Index: user/alc/PQ_LAUNDRY/Makefile.inc1
===================================================================
--- user/alc/PQ_LAUNDRY/Makefile.inc1	(revision 303641)
+++ user/alc/PQ_LAUNDRY/Makefile.inc1	(revision 303642)
@@ -1,2580 +1,2583 @@
 #
 # $FreeBSD$
 #
 # Make command line options:
 #	-DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir
 #	-DNO_CLEAN do not clean at all
 #	-DDB_FROM_SRC use the user/group databases in src/etc instead of
 #	    the system database when installing.
 #	-DNO_SHARE do not go into share subdir
 #	-DKERNFAST define NO_KERNEL{CONFIG,CLEAN,OBJ}
 #	-DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel
 #	-DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel
 #	-DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel
 #	-DNO_PORTSUPDATE do not update ports in ${MAKE} update
 #	-DNO_ROOT install without using root privilege
 #	-DNO_DOCUPDATE do not update doc in ${MAKE} update
 #	-DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects
 #	LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list
 #	LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list
 #	LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target
 #	LOCAL_MTREE="list of mtree files" to process to allow local directories
 #	    to be created before files are installed
 #	LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools
 #	    list
 #	LOCAL_XTOOL_DIRS="list of dirs" to add additional dirs to the
 #	    cross-tools target
 #	METALOG="path to metadata log" to write permission and ownership
 #	    when NO_ROOT is set.  (default: ${DESTDIR}/METALOG)
 #	TARGET="machine" to crossbuild world for a different machine type
 #	TARGET_ARCH= may be required when a TARGET supports multiple endians
 #	BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL})
 #	WORLD_FLAGS= additional flags to pass to make(1) during buildworld
 #	KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel
 #	SUBDIR_OVERRIDE="list of dirs" to build rather than everything.
 #	    All libraries and includes, and some build tools will still build.
 
 #
 # The intended user-driven targets are:
 # buildworld  - rebuild *everything*, including glue to help do upgrades
 # installworld- install everything built by "buildworld"
 # checkworld  - run test suite on installed world
 # doxygen     - build API documentation of the kernel
 # update      - convenient way to update your source tree (eg: svn/svnup)
 #
 # Standard targets (not defined here) are documented in the makefiles in
 # /usr/share/mk.  These include:
 #		obj depend all install clean cleandepend cleanobj
 
 .if !defined(TARGET) || !defined(TARGET_ARCH)
 .error "Both TARGET and TARGET_ARCH must be defined."
 .endif
 
 SRCDIR?=	${.CURDIR}
 LOCALBASE?=	/usr/local
 
 # Cross toolchain changes must be in effect before bsd.compiler.mk
 # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes.
 .if defined(CROSS_TOOLCHAIN)
 .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk"
 CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}"
 .endif
 .if defined(CROSS_TOOLCHAIN_PREFIX)
 CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
 .endif
 
 XCOMPILERS=	CC CXX CPP
 .for COMPILER in ${XCOMPILERS}
 .if defined(CROSS_COMPILER_PREFIX)
 X${COMPILER}?=	${CROSS_COMPILER_PREFIX}${${COMPILER}}
 .else
 X${COMPILER}?=	${${COMPILER}}
 .endif
 .endfor
 # If a full path to an external cross compiler is given, don't build
 # a cross compiler.
 .if ${XCC:N${CCACHE_BIN}:M/*}
 MK_CLANG_BOOTSTRAP=	no
 MK_GCC_BOOTSTRAP=	no
 .endif
 
 # Pull in COMPILER_TYPE and COMPILER_FREEBSD_VERSION early.
 .include <bsd.compiler.mk>
 .include "share/mk/src.opts.mk"
 
 # Check if there is a local compiler that can satisfy as an external compiler.
 # Which compiler is expected to be used?
 .if ${MK_CLANG_BOOTSTRAP} == "yes"
 WANT_COMPILER_TYPE=	clang
 .elif ${MK_GCC_BOOTSTRAP} == "yes"
 WANT_COMPILER_TYPE=	gcc
 .else
 WANT_COMPILER_TYPE=
 .endif
 .if !defined(WANT_COMPILER_FREEBSD_VERSION)
 .if ${WANT_COMPILER_TYPE} == "clang"
 WANT_COMPILER_FREEBSD_VERSION_FILE= lib/clang/freebsd_cc_version.h
 WANT_COMPILER_FREEBSD_VERSION!= \
 	awk '$$2 == "FREEBSD_CC_VERSION" {printf("%d\n", $$3)}' \
 	${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown
 WANT_COMPILER_VERSION_FILE= lib/clang/include/clang/Basic/Version.inc
 WANT_COMPILER_VERSION!= \
 	awk '$$2 == "CLANG_VERSION" {split($$3, a, "."); print a[1] * 10000 + a[2] * 100 + a[3]}' \
 	${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown
 .elif ${WANT_COMPILER_TYPE} == "gcc"
 WANT_COMPILER_FREEBSD_VERSION_FILE= gnu/usr.bin/cc/cc_tools/freebsd-native.h
 WANT_COMPILER_FREEBSD_VERSION!= \
 	awk '$$2 == "FBSD_CC_VER" {printf("%d\n", $$3)}' \
 	${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown
 WANT_COMPILER_VERSION_FILE= contrib/gcc/BASE-VER
 WANT_COMPILER_VERSION!= \
 	awk -F. '{print $$1 * 10000 + $$2 * 100 + $$3}' \
 	${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown
 .endif
 .export WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_VERSION
 .endif	# !defined(WANT_COMPILER_FREEBSD_VERSION)
 # It needs to be the same revision as we would build for the bootstrap.
 # If the expected vs CC is different then we can't skip.
 # GCC cannot be used for cross-arch yet.  For clang we pass -target later if
 # TARGET_ARCH!=MACHINE_ARCH.
 .if ${MK_SYSTEM_COMPILER} == "yes" && \
     (${MK_CLANG_BOOTSTRAP} == "yes" || ${MK_GCC_BOOTSTRAP} == "yes") && \
     !make(showconfig) && !make(native-xtools) && !make(xdev*) && \
     ${WANT_COMPILER_TYPE} == ${COMPILER_TYPE} && \
     (${COMPILER_TYPE} == "clang" || ${TARGET_ARCH} == ${MACHINE_ARCH}) && \
     ${COMPILER_VERSION} == ${WANT_COMPILER_VERSION} && \
     ${COMPILER_FREEBSD_VERSION} == ${WANT_COMPILER_FREEBSD_VERSION}
 # Everything matches, disable the bootstrap compiler.
 MK_CLANG_BOOTSTRAP=	no
 MK_GCC_BOOTSTRAP=	no
 USING_SYSTEM_COMPILER=	yes
 .endif	# ${WANT_COMPILER_TYPE} == ${COMPILER_TYPE}
 USING_SYSTEM_COMPILER?=	no
 TEST_SYSTEM_COMPILER_VARS= \
 	USING_SYSTEM_COMPILER MK_SYSTEM_COMPILER \
 	MK_CROSS_COMPILER MK_CLANG_BOOTSTRAP MK_GCC_BOOTSTRAP \
 	WANT_COMPILER_TYPE WANT_COMPILER_VERSION WANT_COMPILER_VERSION_FILE \
 	WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_FREEBSD_VERSION_FILE \
 	CC COMPILER_TYPE COMPILER_VERSION COMPILER_FREEBSD_VERSION
 test-system-compiler: .PHONY
 .for v in ${TEST_SYSTEM_COMPILER_VARS}
 	${_+_}@printf "%-35s= %s\n" "${v}" "${${v}}"
 .endfor
 .if ${USING_SYSTEM_COMPILER} == "yes" && \
     (make(buildworld) || make(buildkernel) || make(kernel-toolchain) || \
     make(toolchain) || make(_cross-tools))
 .info SYSTEM_COMPILER: Determined that CC=${CC} matches the source tree.  Not bootstrapping a cross-compiler.
 .endif
 
 # For installworld need to ensure that the looked-up compiler metadata is
 # passed along rather than trying to run cc from the restricted
 # STRICTTMPPATH.
 .if ${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no"
 .if !defined(X_COMPILER_TYPE)
 CROSSENV+=	COMPILER_VERSION=${COMPILER_VERSION} \
 		COMPILER_TYPE=${COMPILER_TYPE} \
 		COMPILER_FREEBSD_VERSION=${COMPILER_FREEBSD_VERSION}
 .else
 CROSSENV+=	COMPILER_VERSION=${X_COMPILER_VERSION} \
 		COMPILER_TYPE=${X_COMPILER_TYPE} \
 		COMPILER_FREEBSD_VERSION=${X_COMPILER_FREEBSD_VERSION}
 .endif
 .endif
 
 # Handle external binutils.
 .if defined(CROSS_TOOLCHAIN_PREFIX)
 CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
 .endif
 # If we do not have a bootstrap binutils (because the in-tree one does not
 # support the target architecture), provide a default cross-binutils prefix.
 # This allows aarch64 builds, for example, to automatically use the
 # aarch64-binutils port or package.
 .if !make(showconfig)
 .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \
     !defined(CROSS_BINUTILS_PREFIX)
 CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_ARCH}-freebsd/bin/
 .if !exists(${CROSS_BINUTILS_PREFIX})
 .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX.
 .endif
 .endif
 .endif
 XBINUTILS=	AS AR LD NM OBJCOPY RANLIB SIZE STRINGS
 .for BINUTIL in ${XBINUTILS}
 .if defined(CROSS_BINUTILS_PREFIX) && \
     exists(${CROSS_BINUTILS_PREFIX}${${BINUTIL}})
 X${BINUTIL}?=	${CROSS_BINUTILS_PREFIX}${${BINUTIL}}
 .else
 X${BINUTIL}?=	${${BINUTIL}}
 .endif
 .endfor
 
 
 # We must do lib/ and libexec/ before bin/ in case of a mid-install error to
 # keep the users system reasonably usable.  For static->dynamic root upgrades,
 # we don't want to install a dynamic binary without rtld and the needed
 # libraries.  More commonly, for dynamic root, we don't want to install a
 # binary that requires a newer library version that hasn't been installed yet.
 # This ordering is not a guarantee though.  The only guarantee of a working
 # system here would require fine-grained ordering of all components based
 # on their dependencies.
 .if !empty(SUBDIR_OVERRIDE)
 SUBDIR=	${SUBDIR_OVERRIDE}
 .else
 SUBDIR=	lib libexec
 .if !defined(NO_ROOT) && (make(installworld) || make(install))
 # Ensure libraries are installed before progressing.
 SUBDIR+=.WAIT
 .endif
 SUBDIR+=bin
 .if ${MK_CDDL} != "no"
 SUBDIR+=cddl
 .endif
 SUBDIR+=gnu include
 .if ${MK_KERBEROS} != "no"
 SUBDIR+=kerberos5
 .endif
 .if ${MK_RESCUE} != "no"
 SUBDIR+=rescue
 .endif
 SUBDIR+=sbin
 .if ${MK_CRYPT} != "no"
 SUBDIR+=secure
 .endif
 .if !defined(NO_SHARE)
 SUBDIR+=share
 .endif
 SUBDIR+=sys usr.bin usr.sbin
 .if ${MK_TESTS} != "no"
 SUBDIR+=	tests
 .endif
 .if ${MK_OFED} != "no"
 SUBDIR+=contrib/ofed
 .endif
 
 # Local directories are last, since it is nice to at least get the base
 # system rebuilt before you do them.
 .for _DIR in ${LOCAL_DIRS}
 .if exists(${.CURDIR}/${_DIR}/Makefile)
 SUBDIR+=	${_DIR}
 .endif
 .endfor
 # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR
 # of a LOCAL_DIRS directory.  This allows LOCAL_DIRS=foo and
 # LOCAL_LIB_DIRS=foo/lib to behave as expected.
 .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|}
 _REDUNDENT_LIB_DIRS+=    ${LOCAL_LIB_DIRS:M${_DIR}*}
 .endfor
 .for _DIR in ${LOCAL_LIB_DIRS}
 .if empty(_REDUNDENT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile)
 SUBDIR+=	${_DIR}
 .else
 .warning ${_DIR} not added to SUBDIR list.  See UPDATING 20141121.
 .endif
 .endfor
 
 # We must do etc/ last as it hooks into building the man whatis file
 # by calling 'makedb' in share/man.  This is only relevant for
 # install/distribute so they build the whatis file after every manpage is
 # installed.
 .if make(installworld) || make(install)
 SUBDIR+=.WAIT
 .endif
 SUBDIR+=etc
 
 .endif	# !empty(SUBDIR_OVERRIDE)
 
 .if defined(NOCLEAN)
 .warning NOCLEAN option is deprecated. Use NO_CLEAN instead.
 NO_CLEAN=	${NOCLEAN}
 .endif
 .if defined(NO_CLEANDIR)
 CLEANDIR=	clean cleandepend
 .else
 CLEANDIR=	cleandir
 .endif
 
 .if ${MK_META_MODE} == "yes"
 # If filemon is used then we can rely on the build being incremental-safe.
 # The .meta files will also track the build command and rebuild should
 # it change.
 .if empty(.MAKE.MODE:Mnofilemon)
 NO_CLEAN=	t
 .endif
 .endif
 
 LOCAL_TOOL_DIRS?=
 PACKAGEDIR?=	${DESTDIR}/${DISTDIR}
 
 .if empty(SHELL:M*csh*)
 BUILDENV_SHELL?=${SHELL}
 .else
 BUILDENV_SHELL?=/bin/sh
 .endif
 
 .if !defined(SVN) || empty(SVN)
 . for _P in /usr/bin /usr/local/bin
 .  for _S in svn svnlite
 .   if exists(${_P}/${_S})
 SVN=   ${_P}/${_S}
 .   endif
 .  endfor
 . endfor
 .endif
 SVNFLAGS?=	-r HEAD
 
 MAKEOBJDIRPREFIX?=	/usr/obj
 .if !defined(OSRELDATE)
 .if exists(/usr/include/osreldate.h)
 OSRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		/usr/include/osreldate.h
 .else
 OSRELDATE=	0
 .endif
 .export OSRELDATE
 .endif
 
 # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION.
 .if !defined(_REVISION)
 _REVISION!=	MK_AUTO_OBJ=no ${MAKE} -C ${SRCDIR}/release -V REVISION
 .export _REVISION
 .endif
 .if !defined(_BRANCH)
 _BRANCH!=	MK_AUTO_OBJ=no ${MAKE} -C ${SRCDIR}/release -V BRANCH
 .export _BRANCH
 .endif
 .if !defined(SRCRELDATE)
 SRCRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		${SRCDIR}/sys/sys/param.h
 .export SRCRELDATE
 .endif
 .if !defined(VERSION)
 VERSION=	FreeBSD ${_REVISION}-${_BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE}
 .export VERSION
 .endif
 
 .if !defined(PKG_VERSION)
 .if ${_BRANCH:MSTABLE*} || ${_BRANCH:MCURRENT*} || ${_BRANCH:MALPHA*}
 TIMENOW=	%Y%m%d%H%M%S
 EXTRA_REVISION=	.s${TIMENOW:gmtime}
 .endif
 .if ${_BRANCH:M*-p*}
 EXTRA_REVISION=	_${_BRANCH:C/.*-p([0-9]+$)/\1/}
 .endif
 PKG_VERSION=	${_REVISION}${EXTRA_REVISION}
 .endif
 
 KNOWN_ARCHES?=	aarch64/arm64 \
 		amd64 \
 		arm \
 		armeb/arm \
 		armv6/arm \
 		i386 \
 		i386/pc98 \
 		mips \
 		mipsel/mips \
 		mips64el/mips \
 		mipsn32el/mips \
 		mips64/mips \
 		mipsn32/mips \
 		powerpc \
 		powerpc64/powerpc \
 		riscv64/riscv \
 		sparc64
 
 .if ${TARGET} == ${TARGET_ARCH}
 _t=		${TARGET}
 .else
 _t=		${TARGET_ARCH}/${TARGET}
 .endif
 .for _t in ${_t}
 .if empty(KNOWN_ARCHES:M${_t})
 .error Unknown target ${TARGET_ARCH}:${TARGET}.
 .endif
 .endfor
 
 .if ${TARGET} == ${MACHINE}
 TARGET_CPUTYPE?=${CPUTYPE}
 .else
 TARGET_CPUTYPE?=
 .endif
 
 .if !empty(TARGET_CPUTYPE)
 _TARGET_CPUTYPE=${TARGET_CPUTYPE}
 .else
 _TARGET_CPUTYPE=dummy
 .endif
 _CPUTYPE!=	MK_AUTO_OBJ=no MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} \
 		-f /dev/null -m ${.CURDIR}/share/mk -V CPUTYPE
 .if ${_CPUTYPE} != ${_TARGET_CPUTYPE}
 .error CPUTYPE global should be set with ?=.
 .endif
 .if make(buildworld)
 BUILD_ARCH!=	uname -p
 .if ${MACHINE_ARCH} != ${BUILD_ARCH}
 .error To cross-build, set TARGET_ARCH.
 .endif
 .endif
 .if ${MACHINE} == ${TARGET} && ${MACHINE_ARCH} == ${TARGET_ARCH} && !defined(CROSS_BUILD_TESTING)
 OBJTREE=	${MAKEOBJDIRPREFIX}
 .else
 OBJTREE=	${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH}
 .endif
 WORLDTMP=	${OBJTREE}${.CURDIR}/tmp
 BPATH=		${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin
 XPATH=		${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin
 STRICTTMPPATH=	${BPATH}:${XPATH}
 TMPPATH=	${STRICTTMPPATH}:${PATH}
 
 #
 # Avoid running mktemp(1) unless actually needed.
 # It may not be functional, e.g., due to new ABI
 # when in the middle of installing over this system.
 #
 .if make(distributeworld) || make(installworld) || make(stageworld)
 INSTALLTMP!=	/usr/bin/mktemp -d -u -t install
 .endif
 
 .if make(stagekernel) || make(distributekernel)
 TAGS+=		kernel
 PACKAGE=	kernel
 .endif
 
 #
 # Building a world goes through the following stages
 #
 # 1. legacy stage [BMAKE]
 #	This stage is responsible for creating compatibility
 #	shims that are needed by the bootstrap-tools,
 #	build-tools and cross-tools stages. These are generally
 #	APIs that tools from one of those three stages need to
 #	build that aren't present on the host.
 # 1. bootstrap-tools stage [BMAKE]
 #	This stage is responsible for creating programs that
 #	are needed for backward compatibility reasons. They
 #	are not built as cross-tools.
 # 2. build-tools stage [TMAKE]
 #	This stage is responsible for creating the object
 #	tree and building any tools that are needed during
 #	the build process. Some programs are listed during
 #	this phase because they build binaries to generate
 #	files needed to build these programs. This stage also
 #	builds the 'build-tools' target rather than 'all'.
 # 3. cross-tools stage [XMAKE]
 #	This stage is responsible for creating any tools that
 #	are needed for building the system. A cross-compiler is one
 #	of them. This differs from build tools in two ways:
 #	1. the 'all' target is built rather than 'build-tools'
 #	2. these tools are installed into TMPPATH for stage 4.
 # 4. world stage [WMAKE]
 #	This stage actually builds the world.
 # 5. install stage (optional) [IMAKE]
 #	This stage installs a previously built world.
 #
 
 BOOTSTRAPPING?=	0
 # Keep these in sync
 MINIMUM_SUPPORTED_OSREL?= 900044
 MINIMUM_SUPPORTED_REL?= 9.1
 
 # Common environment for world related stages
 CROSSENV+=	MAKEOBJDIRPREFIX=${OBJTREE} \
 		MACHINE_ARCH=${TARGET_ARCH} \
 		MACHINE=${TARGET} \
 		CPUTYPE=${TARGET_CPUTYPE}
 .if ${MK_META_MODE} != "no"
 # Don't rebuild build-tools targets during normal build.
 CROSSENV+=	BUILD_TOOLS_META=.NOMETA_CMP
 .endif
 .if ${MK_GROFF} != "no"
 CROSSENV+=	GROFF_BIN_PATH=${WORLDTMP}/legacy/usr/bin \
 		GROFF_FONT_PATH=${WORLDTMP}/legacy/usr/share/groff_font \
 		GROFF_TMAC_PATH=${WORLDTMP}/legacy/usr/share/tmac
 .endif
 .if defined(TARGET_CFLAGS)
 CROSSENV+=	${TARGET_CFLAGS}
 .endif
 
 # bootstrap-tools stage
 BMAKEENV=	INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		TOOLS_PREFIX=${WORLDTMP} \
 		PATH=${BPATH}:${PATH} \
 		WORLDTMP=${WORLDTMP} \
 		MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}"
 # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile
 BSARGS= 	DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		MK_HTML=no NO_LINT=yes MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_TESTS=no \
 		MK_INCLUDES=yes
 
 BMAKE=		MAKEOBJDIRPREFIX=${WORLDTMP} \
 		${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		${BSARGS}
 
 # build-tools stage
 TMAKE=		MAKEOBJDIRPREFIX=${OBJTREE} \
 		${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 		DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		-DNO_LINT \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_TESTS=no
 
 # cross-tools stage
 XMAKE=		TOOLS_PREFIX=${WORLDTMP} ${BMAKE} \
 		TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 		MK_GDB=no MK_TESTS=no
 
 # kernel-tools stage
 KTMAKEENV=	INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${BPATH}:${PATH} \
 		WORLDTMP=${WORLDTMP}
 KTMAKE=		TOOLS_PREFIX=${WORLDTMP} MAKEOBJDIRPREFIX=${WORLDTMP} \
 		${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		MK_HTML=no -DNO_LINT MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no
 
 # world stage
 WMAKEENV=	${CROSSENV} \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${TMPPATH}
 
 # make hierarchy
 HMAKE=		PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q}
 .if defined(NO_ROOT)
 HMAKE+=		PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT
 .endif
 
 CROSSENV+=	CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCXXFLAGS} ${XCFLAGS}" \
 		CPP="${XCPP} ${XCFLAGS}" \
 		AS="${XAS}" AR="${XAR}" LD="${XLD}" NM=${XNM} \
 		OBJCOPY="${XOBJCOPY}" \
 		RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \
 		SIZE="${XSIZE}"
 
 .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX})
 # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a
 # directory, but the compiler will look in the right place for its
 # tools so we don't need to tell it where to look.
 BFLAGS+=	-B${CROSS_BINUTILS_PREFIX}
 .endif
 
 # External compiler needs sysroot and target flags.
 .if ${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no"
 .if !defined(CROSS_BINUTILS_PREFIX) || !exists(${CROSS_BINUTILS_PREFIX})
 BFLAGS+=	-B${WORLDTMP}/usr/bin
 .endif
 .if ${TARGET} == "arm"
 .if ${TARGET_ARCH:Marmv6*} != "" && ${TARGET_CPUTYPE:M*soft*} == ""
 TARGET_ABI=	gnueabihf
 .else
 TARGET_ABI=	gnueabi
 .endif
 .endif
 .if defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc
 # GCC requires -isystem and -L when using a cross-compiler.  --sysroot
 # won't set header path and -L is used to ensure the base library path
 # is added before the port PREFIX library path.
 XCFLAGS+=	-isystem ${WORLDTMP}/usr/include -L${WORLDTMP}/usr/lib
 # Force using libc++ for external GCC.
 # XXX: This should be checking MK_GNUCXX == no
 .if ${X_COMPILER_VERSION} >= 40800
 XCXXFLAGS+=	-isystem ${WORLDTMP}/usr/include/c++/v1 -std=c++11 \
 		-nostdinc++ -L${WORLDTMP}/../lib/libc++
 .endif
 .else
 TARGET_ABI?=	unknown
 TARGET_TRIPLE?=	${TARGET_ARCH:C/amd64/x86_64/}-${TARGET_ABI}-freebsd12.0
 XCFLAGS+=	-target ${TARGET_TRIPLE}
 .endif
 XCFLAGS+=	--sysroot=${WORLDTMP}
 .endif # ${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no"
 
 .if !empty(BFLAGS)
 XCFLAGS+=	${BFLAGS}
 .endif
 
 .if ${MK_LIB32} != "no" && (${TARGET_ARCH} == "amd64" || \
     ${TARGET_ARCH} == "powerpc64")
 LIBCOMPAT= 32
 .include "Makefile.libcompat"
 .elif ${MK_LIBSOFT} != "no" && ${TARGET_ARCH} == "armv6"
 LIBCOMPAT= SOFT
 .include "Makefile.libcompat"
 .endif
 
 WMAKE=		${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 DESTDIR=${WORLDTMP}
 
 IMAKEENV=	${CROSSENV}
 IMAKE=		${IMAKEENV} ${MAKE} -f Makefile.inc1 \
 		${IMAKE_INSTALL} ${IMAKE_MTREE}
 .if empty(.MAKEFLAGS:M-n)
 IMAKEENV+=	PATH=${STRICTTMPPATH}:${INSTALLTMP} \
 		LD_LIBRARY_PATH=${INSTALLTMP} \
 		PATH_LOCALE=${INSTALLTMP}/locale
 IMAKE+=		__MAKE_SHELL=${INSTALLTMP}/sh
 .else
 IMAKEENV+=	PATH=${TMPPATH}:${INSTALLTMP}
 .endif
 .if defined(DB_FROM_SRC)
 INSTALLFLAGS+=	-N ${.CURDIR}/etc
 MTREEFLAGS+=	-N ${.CURDIR}/etc
 .endif
 _INSTALL_DDIR=	${DESTDIR}/${DISTDIR}
 INSTALL_DDIR=	${_INSTALL_DDIR:S://:/:g:C:/$::}
 .if defined(NO_ROOT)
 METALOG?=	${DESTDIR}/${DISTDIR}/METALOG
 IMAKE+=		-DNO_ROOT METALOG=${METALOG}
 INSTALLFLAGS+=	-U -M ${METALOG} -D ${INSTALL_DDIR}
 MTREEFLAGS+=	-W
 .endif
 .if defined(BUILD_PKGS)
 INSTALLFLAGS+=	-h sha256
 .endif
 .if defined(DB_FROM_SRC) || defined(NO_ROOT)
 IMAKE_INSTALL=	INSTALL="install ${INSTALLFLAGS}"
 IMAKE_MTREE=	MTREE_CMD="mtree ${MTREEFLAGS}"
 .endif
 
 # kernel stage
 KMAKEENV=	${WMAKEENV}
 KMAKE=		${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME}
 
 #
 # buildworld
 #
 # Attempt to rebuild the entire system, with reasonable chance of
 # success, regardless of how old your existing system is.
 #
 _worldtmp: .PHONY
 .if ${.CURDIR:C/[^,]//g} != ""
 #	The m4 build of sendmail files doesn't like it if ',' is used
 #	anywhere in the path of it's files.
 	@echo
 	@echo "*** Error: path to source tree contains a comma ','"
 	@echo
 	false
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Rebuilding the temporary build tree"
 	@echo "--------------------------------------------------------------"
 .if !defined(NO_CLEAN)
 	rm -rf ${WORLDTMP}
 .if defined(LIBCOMPAT)
 	rm -rf ${LIBCOMPATTMP}
 .endif
 .else
 	rm -rf ${WORLDTMP}/legacy/usr/include
 #	XXX - These can depend on any header file.
 	rm -f ${OBJTREE}${.CURDIR}/lib/libsysdecode/ioctl.c
 	rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/kdump_subr.c
 .endif
 .for _dir in \
     lib lib/casper usr legacy/bin legacy/usr
 	mkdir -p ${WORLDTMP}/${_dir}
 .endfor
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${WORLDTMP}/legacy/usr >/dev/null
 .if ${MK_GROFF} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.groff.dist \
 	    -p ${WORLDTMP}/legacy/usr >/dev/null
 .endif
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${WORLDTMP}/legacy/usr/include >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${WORLDTMP}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${WORLDTMP}/usr/include >/dev/null
 	ln -sf ${.CURDIR}/sys ${WORLDTMP}
 .if ${MK_DEBUG_FILES} != "no"
 	# We could instead disable debug files for these build stages
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${WORLDTMP}/legacy/usr/lib >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${WORLDTMP}/usr/lib >/dev/null
 .endif
 .if defined(LIBCOMPAT)
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \
 	    -p ${WORLDTMP}/usr >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \
 	    -p ${WORLDTMP}/legacy/usr/lib/debug/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \
 	    -p ${WORLDTMP}/usr/lib/debug/usr >/dev/null
 .endif
 .endif
 .if ${MK_TESTS} != "no"
 	mkdir -p ${WORLDTMP}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${WORLDTMP}${TESTSBASE} >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mkdir -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} >/dev/null
 .endif
 .endif
 .for _mtree in ${LOCAL_MTREE}
 	mtree -deU -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null
 .endfor
 _legacy:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1.1: legacy release compatibility shims"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${BMAKE} legacy
 _bootstrap-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1.2: bootstrap tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools
 _cleanobj:
 .if !defined(NO_CLEAN)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.1: cleaning up the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} ${CLEANDIR}
 .if defined(LIBCOMPAT)
 	${_+_}cd ${.CURDIR}; ${LIBCOMPATWMAKE} -f Makefile.inc1 ${CLEANDIR}
 .endif
 .endif
 _obj:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.2: rebuilding the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} obj
 _build-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.3: build tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${TMAKE} build-tools
 _cross-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3: cross tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools
 	${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools
 _includes:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.1: building includes"
 	@echo "--------------------------------------------------------------"
 # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need
 # headers from default SUBDIR.  Do SUBDIR_OVERRIDE includes last.
 	${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \
 	    MK_INCLUDES=yes includes
 .if !empty(SUBDIR_OVERRIDE) && make(buildworld)
 	${_+_}cd ${.CURDIR}; ${WMAKE} MK_INCLUDES=yes SHARED=symlinks includes
 .endif
 _libraries:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.2: building libraries"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; \
 	    ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \
 	    MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries
 everything: .PHONY
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.3: building everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all
 
 WMAKE_TGTS=
 WMAKE_TGTS+=	_worldtmp _legacy
 .if empty(SUBDIR_OVERRIDE)
 WMAKE_TGTS+=	_bootstrap-tools
 .endif
 WMAKE_TGTS+=	_cleanobj _obj _build-tools _cross-tools
 WMAKE_TGTS+=	_includes _libraries
 WMAKE_TGTS+=	everything
 .if defined(LIBCOMPAT) && empty(SUBDIR_OVERRIDE)
 WMAKE_TGTS+=	build${libcompat}
 .endif
 
 buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue .PHONY
 .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue
 
 buildworld_prologue: .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> World build started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 
 buildworld_epilogue: .PHONY
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> World build completed on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 
 #
 # We need to have this as a target because the indirection between Makefile
 # and Makefile.inc1 causes the correct PATH to be used, rather than a
 # modification of the current environment's PATH.  In addition, we need
 # to quote multiword values.
 #
 buildenvvars: .PHONY
 	@echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@}
 
 .if ${.TARGETS:Mbuildenv}
 .if ${.MAKEFLAGS:M-j}
 .error The buildenv target is incompatible with -j
 .endif
 .endif
 BUILDENV_DIR?=	${.CURDIR}
 buildenv: .PHONY
 	@echo Entering world for ${TARGET_ARCH}:${TARGET}
 .if ${BUILDENV_SHELL:M*zsh*}
 	@echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE}
 .endif
 	@cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL} \
 	    || true
 
 TOOLCHAIN_TGTS=	${WMAKE_TGTS:Neverything:Nbuild${libcompat}}
 toolchain: ${TOOLCHAIN_TGTS} .PHONY
 kernel-toolchain: ${TOOLCHAIN_TGTS:N_includes:N_libraries} .PHONY
 
 #
 # installcheck
 #
 # Checks to be sure system is ready for installworld/installkernel.
 #
 installcheck: _installcheck_world _installcheck_kernel .PHONY
 _installcheck_world: .PHONY
 _installcheck_kernel: .PHONY
 
 #
 # Require DESTDIR to be set if installing for a different architecture or
 # using the user/group database in the source tree.
 #
 .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \
     defined(DB_FROM_SRC)
 .if !make(distributeworld)
 _installcheck_world: __installcheck_DESTDIR
 _installcheck_kernel: __installcheck_DESTDIR
 __installcheck_DESTDIR: .PHONY
 .if !defined(DESTDIR) || empty(DESTDIR)
 	@echo "ERROR: Please set DESTDIR!"; \
 	false
 .endif
 .endif
 .endif
 
 .if !defined(DB_FROM_SRC)
 #
 # Check for missing UIDs/GIDs.
 #
 CHECK_UIDS=	auditdistd
 CHECK_GIDS=	audit
 .if ${MK_SENDMAIL} != "no"
 CHECK_UIDS+=	smmsp
 CHECK_GIDS+=	smmsp
 .endif
 .if ${MK_PF} != "no"
 CHECK_UIDS+=	proxy
 CHECK_GIDS+=	proxy authpf
 .endif
 .if ${MK_UNBOUND} != "no"
 CHECK_UIDS+=	unbound
 CHECK_GIDS+=	unbound
 .endif
 _installcheck_world: __installcheck_UGID
 __installcheck_UGID: .PHONY
 .for uid in ${CHECK_UIDS}
 	@if ! `id -u ${uid} >/dev/null 2>&1`; then \
 		echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \
 		false; \
 	fi
 .endfor
 .for gid in ${CHECK_GIDS}
 	@if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \
 		echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \
 		false; \
 	fi
 .endfor
 .endif
 
 #
 # Required install tools to be saved in a scratch dir for safety.
 #
 .if ${MK_ZONEINFO} != "no"
 _zoneinfo=	zic tzsetup
 .endif
 
 ITOOLS=	[ awk cap_mkdb cat chflags chmod chown cmp cp \
 	date echo egrep find grep id install ${_install-info} \
 	ln make mkdir mtree mv pwd_mkdb \
 	rm sed services_mkdb sh strip sysctl test true uname wc ${_zoneinfo} \
 	${LOCAL_ITOOLS}
 
 # Needed for share/man
 .if ${MK_MAN_UTILS} != "no"
 ITOOLS+=makewhatis
 .endif
 
 #
 # distributeworld
 #
 # Distributes everything compiled by a `buildworld'.
 #
 # installworld
 #
 # Installs everything compiled by a 'buildworld'.
 #
 
 # Non-base distributions produced by the base system
 EXTRA_DISTRIBUTIONS=	doc
 .if defined(LIBCOMPAT)
 EXTRA_DISTRIBUTIONS+=	lib${libcompat}
 .endif
 .if ${MK_TESTS} != "no"
 EXTRA_DISTRIBUTIONS+=	tests
 .endif
 
 DEBUG_DISTRIBUTIONS=
 .if ${MK_DEBUG_FILES} != "no"
 DEBUG_DISTRIBUTIONS+=	base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,}
 .endif
 
 MTREE_MAGIC?=	mtree 2.0
 
 distributeworld installworld stageworld: _installcheck_world .PHONY
 	mkdir -p ${INSTALLTMP}
 	progs=$$(for prog in ${ITOOLS}; do \
 		if progpath=`which $$prog`; then \
 			echo $$progpath; \
 		else \
 			echo "Required tool $$prog not found in PATH." >&2; \
 			exit 1; \
 		fi; \
 	    done); \
 	libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \
 	    while read line; do \
 		set -- $$line; \
 		if [ "$$2 $$3" != "not found" ]; then \
 			echo $$2; \
 		else \
 			echo "Required library $$1 not found." >&2; \
 			exit 1; \
 		fi; \
 	    done); \
 	cp $$libs $$progs ${INSTALLTMP}
 	cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale
 .if defined(NO_ROOT)
 	-mkdir -p ${METALOG:H}
 	echo "#${MTREE_MAGIC}" > ${METALOG}
 .endif
 .if make(distributeworld)
 .for dist in ${EXTRA_DISTRIBUTIONS}
 	-mkdir ${DESTDIR}/${DISTDIR}/${dist}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null
 .endif
 .if defined(LIBCOMPAT)
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null
 .endif
 .endif
 .if ${MK_TESTS} != "no" && ${dist} == "tests"
 	-mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null
 .endif
 .endif
 .if defined(NO_ROOT)
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \
 	    sed -e 's#^\./#./${dist}/#' >> ${METALOG}
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \
 	    sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG}
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \
 	    sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG}
 .if defined(LIBCOMPAT)
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist | \
 	    sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG}
 .endif
 .endif
 .endfor
 	-mkdir ${DESTDIR}/${DISTDIR}/base
 	${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \
 	    DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs
 .endif
 	${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \
 	    ${IMAKEENV} rm -rf ${INSTALLTMP}
 .if make(distributeworld)
 .for dist in ${EXTRA_DISTRIBUTIONS}
 	find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -empty -delete
 .endfor
 .if defined(NO_ROOT)
 .for dist in base ${EXTRA_DISTRIBUTIONS}
 	@# For each file that exists in this dist, print the corresponding
 	@# line from the METALOG.  This relies on the fact that
 	@# a line containing only the filename will sort immediately before
 	@# the relevant mtree line.
 	cd ${DESTDIR}/${DISTDIR}; \
 	find ./${dist} | sort -u ${METALOG} - | \
 	awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \
 	${DESTDIR}/${DISTDIR}/${dist}.meta
 .endfor
 .for dist in ${DEBUG_DISTRIBUTIONS}
 	@# For each file that exists in this dist, print the corresponding
 	@# line from the METALOG.  This relies on the fact that
 	@# a line containing only the filename will sort immediately before
 	@# the relevant mtree line.
 	cd ${DESTDIR}/${DISTDIR}; \
 	find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \
 	awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \
 	${DESTDIR}/${DISTDIR}/${dist}.debug.meta
 .endfor
 .endif
 .endif
 
 packageworld: .PHONY
 .for dist in base ${EXTRA_DISTRIBUTIONS}
 .if defined(NO_ROOT)
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - --exclude usr/lib/debug \
 	    @${DESTDIR}/${DISTDIR}/${dist}.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz
 .else
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - --exclude usr/lib/debug . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz
 .endif
 .endfor
 
 .for dist in ${DEBUG_DISTRIBUTIONS}
 . if defined(NO_ROOT)
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz
 . else
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvLf - usr/lib/debug | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz
 . endif
 .endfor
 
 #
 # reinstall
 #
 # If you have a build server, you can NFS mount the source and obj directories
 # and do a 'make reinstall' on the *client* to install new binaries from the
 # most recent server build.
 #
 restage reinstall: .MAKE .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Making hierarchy"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy
 .if make(restage)
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Making distribution"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} distribution
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install
 .if defined(LIBCOMPAT)
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install${libcompat}
 .endif
 
 redistribute: .MAKE .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Distributing everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute
 .if defined(LIBCOMPAT)
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute${libcompat} \
 	    DISTRIBUTION=lib${libcompat}
 .endif
 
 distrib-dirs distribution: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET}
 .if make(distribution)
 	${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \
 		${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \
 		METALOG=${METALOG} MK_TESTS=no installconfig
 .endif
 
 #
 # buildkernel and installkernel
 #
 # Which kernels to build and/or install is specified by setting
 # KERNCONF. If not defined a GENERIC kernel is built/installed.
 # Only the existing (depending TARGET) config files are used
 # for building kernels and only the first of these is designated
 # as the one being installed.
 #
 # Note that we have to use TARGET instead of TARGET_ARCH when
 # we're in kernel-land. Since only TARGET_ARCH is (expected) to
 # be set to cross-build, we have to make sure TARGET is set
 # properly.
 
 .if defined(KERNFAST)
 NO_KERNELCLEAN=	t
 NO_KERNELCONFIG=	t
 NO_KERNELOBJ=		t
 # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah
 .if !defined(KERNCONF) && ${KERNFAST} != "1"
 KERNCONF=${KERNFAST}
 .endif
 .endif
 .if ${TARGET_ARCH} == "powerpc64"
 KERNCONF?=	GENERIC64
 .else
 KERNCONF?=	GENERIC
 .endif
 INSTKERNNAME?=	kernel
 
 KERNSRCDIR?=	${.CURDIR}/sys
 KRNLCONFDIR=	${KERNSRCDIR}/${TARGET}/conf
 KRNLOBJDIR=	${OBJTREE}${KERNSRCDIR}
 KERNCONFDIR?=	${KRNLCONFDIR}
 
 BUILDKERNELS=
 INSTALLKERNEL=
 .if defined(NO_INSTALLKERNEL)
 # All of the BUILDKERNELS loops start at index 1.
 BUILDKERNELS+= dummy
 .endif
 .for _kernel in ${KERNCONF}
 .if exists(${KERNCONFDIR}/${_kernel})
 BUILDKERNELS+=	${_kernel}
 .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL)
 INSTALLKERNEL= ${_kernel}
 .endif
 .endif
 .endfor
 
 ${WMAKE_TGTS:N_worldtmp:Nbuild${libcompat}} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY
 
 #
 # buildkernel
 #
 # Builds all kernels defined by BUILDKERNELS.
 #
 buildkernel: .MAKE .PHONY
 .if empty(BUILDKERNELS:Ndummy)
 	@echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \
 	false
 .endif
 	@echo
 .for _kernel in ${BUILDKERNELS:Ndummy}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 	@echo "===> ${_kernel}"
 	mkdir -p ${KRNLOBJDIR}
 .if !defined(NO_KERNELCONFIG)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1: configuring the kernel"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLCONFDIR}; \
 		PATH=${TMPPATH} \
 		    config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \
 			-I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}'
 .endif
 .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.1: cleaning up the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR}
 .endif
 .if !defined(NO_KERNELOBJ)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.2: rebuilding the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.3: build tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3.1: building everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 .endfor
 
 NO_INSTALLEXTRAKERNELS?=	yes
 
 #
 # installkernel, etc.
 #
 # Install the kernel defined by INSTALLKERNEL
 #
 installkernel installkernel.debug \
 reinstallkernel reinstallkernel.debug: _installcheck_kernel .PHONY
 .if !defined(NO_INSTALLKERNEL)
 .if empty(INSTALLKERNEL)
 	@echo "ERROR: No kernel \"${KERNCONF}\" to install."; \
 	false
 .endif
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${INSTALLKERNEL}"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${CROSSENV} PATH=${TMPPATH} \
 	    ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//}
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${_kernel}"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${_kernel}; \
 	    ${CROSSENV} PATH=${TMPPATH} \
 	    ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//}
 .endfor
 .endif
 
 distributekernel distributekernel.debug: .PHONY
 .if !defined(NO_INSTALLKERNEL)
 .if empty(INSTALLKERNEL)
 	@echo "ERROR: No kernel \"${KERNCONF}\" to install."; \
 	false
 .endif
 	mkdir -p ${DESTDIR}/${DISTDIR}
 .if defined(NO_ROOT)
 	@echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta
 .endif
 	cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \
 	    DESTDIR=${INSTALL_DDIR}/kernel \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
 	@sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \
 	    ${DESTDIR}/${DISTDIR}/kernel.meta
 .endif
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 .if defined(NO_ROOT)
 	@echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta
 .endif
 	cd ${KRNLOBJDIR}/${_kernel}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \
 	    KERNEL=${INSTKERNNAME}.${_kernel} \
 	    DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
 	@sed -e "s|^./kernel.${_kernel}|.|" \
 	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \
 	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta
 .endif
 .endfor
 .endif
 
 packagekernel: .PHONY
 .if defined(NO_ROOT)
 .if !defined(NO_INSTALLKERNEL)
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --exclude '*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz
 .endif
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --include '*/*/*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --exclude '*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --include '*/*/*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz
 .endfor
 .endif
 .else
 .if !defined(NO_INSTALLKERNEL)
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --exclude '*.debug' . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz
 .endif
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --include '*/*/*.debug' $$(eval find .) | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --exclude '*.debug' . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --include '*/*/*.debug' $$(eval find .) | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz
 .endfor
 .endif
 .endif
 
 stagekernel: .PHONY
 	${_+_}${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} distributekernel
 
 PORTSDIR?=	/usr/ports
 WSTAGEDIR?=	${MAKEOBJDIRPREFIX}${.CURDIR}/${TARGET}.${TARGET_ARCH}/worldstage
 KSTAGEDIR?=	${MAKEOBJDIRPREFIX}${.CURDIR}/${TARGET}.${TARGET_ARCH}/kernelstage
 REPODIR?=	${MAKEOBJDIRPREFIX}${.CURDIR}/repo
 PKGSIGNKEY?=	# empty
 
 .ORDER:		stage-packages create-packages
 .ORDER:		create-packages create-world-packages
 .ORDER:		create-packages create-kernel-packages
 .ORDER:		create-packages sign-packages
 
 _pkgbootstrap: .PHONY
 .if !exists(${LOCALBASE}/sbin/pkg)
 	@env ASSUME_ALWAYS_YES=YES pkg bootstrap
 .endif
 
 packages: .PHONY
 	${_+_}${MAKE} -C ${.CURDIR} PKG_VERSION=${PKG_VERSION} real-packages
 
 package-pkg: .PHONY
 	rm -rf /tmp/ports.${TARGET} || :
 	env ${WMAKEENV:Q} SRCDIR=${.CURDIR} PORTSDIR=${PORTSDIR} REVISION=${_REVISION} \
 		PKG_VERSION=${PKG_VERSION} REPODIR=${REPODIR} WSTAGEDIR=${WSTAGEDIR} \
 		sh ${.CURDIR}/release/scripts/make-pkg-package.sh
 
 real-packages:	stage-packages create-packages sign-packages .PHONY
 
 stage-packages: .PHONY
 	@mkdir -p ${REPODIR} ${WSTAGEDIR} ${KSTAGEDIR}
 	${_+_}@cd ${.CURDIR}; \
 		${MAKE} DESTDIR=${WSTAGEDIR} -DNO_ROOT -B stageworld ; \
 		${MAKE} DESTDIR=${KSTAGEDIR} -DNO_ROOT -B stagekernel
 
 create-packages:	_pkgbootstrap .PHONY
 	@mkdir -p ${REPODIR}
 	${_+_}@cd ${.CURDIR}; \
 		${MAKE} DESTDIR=${WSTAGEDIR} \
 			PKG_VERSION=${PKG_VERSION} create-world-packages ; \
 		${MAKE} DESTDIR=${KSTAGEDIR} \
 			PKG_VERSION=${PKG_VERSION} DISTDIR=kernel \
 			create-kernel-packages
 
 create-world-packages:	_pkgbootstrap .PHONY
 	@rm -f ${WSTAGEDIR}/*.plist 2>/dev/null || :
 	@cd ${WSTAGEDIR} ; \
 		awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \
 		${WSTAGEDIR}/METALOG
 	@for plist in ${WSTAGEDIR}/*.plist; do \
 		plist=$${plist##*/} ; \
 		pkgname=$${plist%.plist} ; \
 		sh ${SRCDIR}/release/packages/generate-ucl.sh -o $${pkgname} \
 			-s ${SRCDIR} -u ${WSTAGEDIR}/$${pkgname}.ucl ; \
 	done
 	@for plist in ${WSTAGEDIR}/*.plist; do \
 		plist=$${plist##*/} ; \
 		pkgname=$${plist%.plist} ; \
 		awk -F\" ' \
 			/^name/ { printf("===> Creating %s-", $$2); next } \
 			/^version/ { print $$2; next } \
 			' ${WSTAGEDIR}/$${pkgname}.ucl ; \
 		pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \
 			create -M ${WSTAGEDIR}/$${pkgname}.ucl \
 			-p ${WSTAGEDIR}/$${pkgname}.plist \
 			-r ${WSTAGEDIR} \
 			-o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} ; \
 	done
 
 create-kernel-packages:	_pkgbootstrap .PHONY
 .if exists(${KSTAGEDIR}/kernel.meta)
 .for flavor in "" -debug
 	@cd ${KSTAGEDIR}/${DISTDIR} ; \
 	awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \
 		-v kernel=yes -v _kernconf=${INSTALLKERNEL} \
 		${KSTAGEDIR}/kernel.meta ; \
 	cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \
 	pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \
 	sed -e "s/%VERSION%/${PKG_VERSION}/" \
 		-e "s/%PKGNAME%/kernel-${INSTALLKERNEL:tl}${flavor}/" \
 		-e "s/%COMMENT%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \
 		-e "s/%DESC%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \
 		-e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \
 		-e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \
 		${SRCDIR}/release/packages/kernel.ucl \
 		> ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \
 	awk -F\" ' \
 		/name/ { printf("===> Creating %s-", $$2); next } \
 		/version/ {print $$2; next } ' \
 		${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \
 	pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \
 		create -M ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl \
 		-p ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.plist \
 		-r ${KSTAGEDIR}/${DISTDIR} \
 		-o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION}
 .endfor
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes"
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 .if exists(${KSTAGEDIR}/kernel.${_kernel}.meta)
 .for flavor in "" -debug
 	@cd ${KSTAGEDIR}/kernel.${_kernel} ; \
 	awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \
 		-v kernel=yes -v _kernconf=${_kernel} \
 		${KSTAGEDIR}/kernel.${_kernel}.meta ; \
 	cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \
 	pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \
 	sed -e "s/%VERSION%/${PKG_VERSION}/" \
 		-e "s/%PKGNAME%/kernel-${_kernel:tl}${flavor}/" \
 		-e "s/%COMMENT%/FreeBSD ${_kernel} kernel ${flavor}/" \
 		-e "s/%DESC%/FreeBSD ${_kernel} kernel ${flavor}/" \
 		-e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \
 		-e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \
 		${SRCDIR}/release/packages/kernel.ucl \
 		> ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \
 	awk -F\" ' \
 		/name/ { printf("===> Creating %s-", $$2); next } \
 		/version/ {print $$2; next } ' \
 		${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \
 	pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \
 		create -M ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl \
 		-p ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.plist \
 		-r ${KSTAGEDIR}/kernel.${_kernel} \
 		-o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION}
 .endfor
 .endif
 .endfor
 .endif
 
 sign-packages:	_pkgbootstrap .PHONY
 	@[ -L "${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest" ] && \
 		unlink ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest ; \
 	pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh repo \
 		-o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \
 		${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \
 		${PKGSIGNKEY} ; \
 	ln -s ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \
 		${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest
 
 #
 #
 # checkworld
 #
 # Run test suite on installed world.
 #
 checkworld: .PHONY
 	@if [ ! -x ${LOCALBASE}/bin/kyua ]; then \
 		echo "You need kyua (devel/kyua) to run the test suite." | /usr/bin/fmt; \
 		exit 1; \
 	fi
 	${_+_}${LOCALBASE}/bin/kyua test -k ${TESTSBASE}/Kyuafile
 
 #
 #
 # doxygen
 #
 # Build the API documentation with doxygen
 #
 doxygen: .PHONY
 	@if [ ! -x ${LOCALBASE}/bin/doxygen ]; then \
 		echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \
 		exit 1; \
 	fi
 	${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all
 
 #
 # update
 #
 # Update the source tree(s), by running svn/svnup to update to the
 # latest copy.
 #
 update: .PHONY
 .if defined(SVN_UPDATE)
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Updating ${.CURDIR} using Subversion"
 	@echo "--------------------------------------------------------------"
 	@(cd ${.CURDIR}; ${SVN} update ${SVNFLAGS})
 .endif
 
 #
 # ------------------------------------------------------------------------
 #
 # From here onwards are utility targets used by the 'make world' and
 # related targets.  If your 'world' breaks, you may like to try to fix
 # the problem and manually run the following targets to attempt to
 # complete the build.  Beware, this is *not* guaranteed to work, you
 # need to have a pretty good grip on the current state of the system
 # to attempt to manually finish it.  If in doubt, 'make world' again.
 #
 
 #
 # legacy: Build compatibility shims for the next three targets. This is a
 # minimal set of tools and shims necessary to compensate for older systems
 # which don't have the APIs required by the targets built in bootstrap-tools,
 # build-tools or cross-tools.
 #
 
 # ELF Tool Chain libraries are needed for ELF tools and dtrace tools.
 # r296685 fix cross-endian objcopy
 .if ${BOOTSTRAPPING} < 1100102
 _elftoolchain_libs= lib/libelf lib/libdwarf
 .endif
 
 legacy: .PHONY
 .if ${BOOTSTRAPPING} < ${MINIMUM_SUPPORTED_OSREL} && ${BOOTSTRAPPING} != 0
 	@echo "ERROR: Source upgrades from versions prior to ${MINIMUM_SUPPORTED_REL} are not supported."; \
 	false
 .endif
 .for _tool in tools/build ${_elftoolchain_libs}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,all,install)"; \
 	    cd ${.CURDIR}/${_tool}; \
 	    ${MAKE} DIRPRFX=${_tool}/ obj; \
 	    ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy includes; \
 	    ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no all; \
 	    ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no \
 	        DESTDIR=${MAKEOBJDIRPREFIX}/legacy install
 .endfor
 
 #
 # bootstrap-tools: Build tools needed for compatibility. These are binaries that
 # are built to build other binaries in the system. However, the focus of these
 # binaries is usually quite narrow. Bootstrap tools use the host's compiler and
 # libraries, augmented by -legacy.
 #
 _bt=		_bootstrap-tools
 
 .if ${MK_GAMES} != "no"
 _strfile=	usr.bin/fortune/strfile
 .endif
 
 .if ${MK_GCC} != "no" && ${MK_CXX} != "no"
 _gperf=		gnu/usr.bin/gperf
 .endif
 
 .if ${MK_GROFF} != "no"
 _groff=		gnu/usr.bin/groff \
 		usr.bin/soelim
 .endif
 
 .if ${MK_VT} != "no"
 _vtfontcvt=	usr.bin/vtfontcvt
 .endif
 
 .if ${BOOTSTRAPPING} < 900002
 _sed=		usr.bin/sed
 .endif
 
 .if ${BOOTSTRAPPING} < 1000033
 _libopenbsd=	lib/libopenbsd
 _m4=		usr.bin/m4
 _lex=		usr.bin/lex
 
 ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd
 ${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4
 .endif
 
 .if ${BOOTSTRAPPING} < 1000026
 _nmtree=	lib/libnetbsd \
 		usr.sbin/nmtree
 
 ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd
 .endif
 
 .if ${BOOTSTRAPPING} < 1000027
 _cat=		bin/cat
 .endif
 
 # r264059 support for status=
 .if ${BOOTSTRAPPING} < 1100017
 _dd=		bin/dd
 .endif
 
 # r277259 crunchide: Correct 64-bit section header offset
 # r281674 crunchide: always include both 32- and 64-bit ELF support
 .if ${BOOTSTRAPPING} < 1100078
 _crunchide=	usr.sbin/crunch/crunchide
 .endif
 
 # r285986 crunchen: use STRIPBIN rather than STRIP
 # 1100113: Support MK_AUTO_OBJ
 .if ${BOOTSTRAPPING} < 1100078 || \
     (${MK_AUTO_OBJ} == "yes" && ${BOOTSTRAPPING} < 1100114)
 _crunchgen=	usr.sbin/crunch/crunchgen
 .endif
 
 .if ${BOOTSTRAPPING} >= 900040 && ${BOOTSTRAPPING} < 900041
 _awk=		usr.bin/awk
 .endif
 
 # r296926 -P keymap search path, MFC to stable/10 in r298297
 .if ${BOOTSTRAPPING} < 1003501 || \
 	(${BOOTSTRAPPING} >= 1100000 && ${BOOTSTRAPPING} < 1100103)
 _kbdcontrol=	usr.sbin/kbdcontrol
 .endif
 
 _yacc=		lib/liby \
 		usr.bin/yacc
 
 ${_bt}-usr.bin/yacc: ${_bt}-lib/liby
 
 .if ${MK_BSNMP} != "no"
 _gensnmptree=	usr.sbin/bsnmpd/gensnmptree
 .endif
 
 # We need to build tblgen when we're building clang either as
 # the bootstrap compiler, or as the part of the normal build.
 .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no"
 _clang_tblgen= \
 	lib/clang/libllvmsupport \
 	lib/clang/libllvmtablegen \
 	usr.bin/clang/llvm-tblgen \
 	usr.bin/clang/clang-tblgen
 
 ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
 ${_bt}-usr.bin/clang/llvm-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
 .endif
 
 # Default to building the GPL DTC, but build the BSDL one if users explicitly
 # request it.
 _dtc= usr.bin/dtc
 .if ${MK_GPL_DTC} != "no"
 _dtc= gnu/usr.bin/dtc
 .endif
 
 .if ${MK_KERBEROS} != "no"
 _kerberos5_bootstrap_tools= \
 	kerberos5/tools/make-roken \
 	kerberos5/lib/libroken \
 	kerberos5/lib/libvers \
 	kerberos5/tools/asn1_compile \
 	kerberos5/tools/slc \
 	usr.bin/compile_et
 
 .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g}
 .endif
 
 # r283777 makewhatis(1) replaced with mandoc version which builds a database.
 .if ${MK_MANDOCDB} != "no" && ${BOOTSTRAPPING} < 1100075
 _libopenbsd?=	lib/libopenbsd
 _makewhatis=	lib/libsqlite3 \
 		usr.bin/mandoc
 ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd ${_bt}-lib/libsqlite3
 .endif
 
 bootstrap-tools: .PHONY
 
 #	Please document (add comment) why something is in 'bootstrap-tools'.
 #	Try to bound the building of the bootstrap-tool to just the
 #	FreeBSD versions that need the tool built at this stage of the build.
 .for _tool in \
     ${_clang_tblgen} \
     ${_kerberos5_bootstrap_tools} \
     ${_strfile} \
     ${_gperf} \
     ${_groff} \
     ${_dtc} \
     ${_awk} \
     ${_cat} \
     ${_dd} \
     ${_kbdcontrol} \
     usr.bin/lorder \
     ${_libopenbsd} \
     ${_makewhatis} \
     usr.bin/rpcgen \
     ${_sed} \
     ${_yacc} \
     ${_m4} \
     ${_lex} \
     usr.bin/xinstall \
     ${_gensnmptree} \
     usr.sbin/config \
     ${_crunchide} \
     ${_crunchgen} \
     ${_nmtree} \
     ${_vtfontcvt} \
     usr.bin/localedef
 ${_bt}-${_tool}: .PHONY .MAKE
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ all; \
 		${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install
 
 bootstrap-tools: ${_bt}-${_tool}
 .endfor
 
 #
 # build-tools: Build special purpose build tools
 #
 .if !defined(NO_SHARE)
 _share=	share/syscons/scrnmaps
 .endif
 
 .if ${MK_GCC} != "no"
 _gcc_tools= gnu/usr.bin/cc/cc_tools
 .endif
 
 .if ${MK_RESCUE} != "no"
 # rescue includes programs that have build-tools targets
 _rescue=rescue/rescue
 .endif
 
 .for _tool in \
     bin/csh \
     bin/sh \
     ${LOCAL_TOOL_DIRS} \
     lib/ncurses/ncurses \
     lib/ncurses/ncursesw \
     ${_rescue} \
     ${_share} \
     usr.bin/awk \
     lib/libmagic \
     usr.bin/mkesdb_static \
     usr.bin/mkcsmapper_static \
     usr.bin/vi/catalog
 build-tools_${_tool}: .PHONY
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,build-tools)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ build-tools
 build-tools: build-tools_${_tool}
 .endfor
 .for _tool in \
     ${_gcc_tools}
 build-tools_${_tool}: .PHONY
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,all)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ all
 build-tools: build-tools_${_tool}
 .endfor
 
 #
 # kernel-tools: Build kernel-building tools
 #
 kernel-tools: .PHONY
 	mkdir -p ${MAKEOBJDIRPREFIX}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${MAKEOBJDIRPREFIX}/usr >/dev/null
 
 #
 # cross-tools: All the tools needed to build the rest of the system after
 # we get done with the earlier stages. It is the last set of tools needed
 # to begin building the target binaries.
 #
 .if ${TARGET_ARCH} != ${MACHINE_ARCH}
 .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386"
 _btxld=		usr.sbin/btxld
 .endif
 .endif
 
 # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures
 # resulting from missing bug fixes or ELF Toolchain updates.
 .if ${MK_CDDL} != "no"
 _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \
     cddl/usr.bin/ctfmerge
 .endif
 
 # If we're given an XAS, don't build binutils.
 .if ${XAS:M/*} == ""
 .if ${MK_BINUTILS_BOOTSTRAP} != "no"
 _binutils=	gnu/usr.bin/binutils
 .endif
 .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no"
 _elftctools=	lib/libelftc \
 		lib/libpe \
 		usr.bin/elfcopy \
 		usr.bin/nm \
 		usr.bin/size \
 		usr.bin/strings
 # These are not required by the build, but can be useful for developers who
 # cross-build on a FreeBSD 10 host:
 _elftctools+=	usr.bin/addr2line
 .endif
 .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no"
 # If cross-building with an external binutils we still need to build strip for
 # the target (for at least crunchide).
 _elftctools=	lib/libelftc \
 		lib/libpe \
 		usr.bin/elfcopy
 .endif
 
 .if ${MK_CLANG_BOOTSTRAP} != "no"
 _clang=		usr.bin/clang
 _clang_libs=	lib/clang
 .endif
 .if ${MK_GCC_BOOTSTRAP} != "no"
 _cc=		gnu/usr.bin/cc
 .endif
 .if ${MK_USB} != "no"
 _usb_tools=	sys/boot/usb/tools
 .endif
 
 cross-tools: .MAKE .PHONY
 .for _tool in \
     ${LOCAL_XTOOL_DIRS} \
     ${_clang_libs} \
     ${_clang} \
     ${_binutils} \
     ${_elftctools} \
     ${_dtrace_tools} \
     ${_cc} \
     ${_btxld} \
     ${_usb_tools}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ all; \
 		${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX} install
 .endfor
 
 NXBDESTDIR=	${OBJTREE}/nxb-bin
 NXBENV=		MAKEOBJDIRPREFIX=${OBJTREE}/nxb \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${PATH}:${OBJTREE}/gperf_for_gcc/usr/bin
 NXBMAKE=	${NXBENV} ${MAKE} \
 		LLVM_TBLGEN=${NXBDESTDIR}/usr/bin/llvm-tblgen \
 		CLANG_TBLGEN=${NXBDESTDIR}/usr/bin/clang-tblgen \
 		MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \
 		MK_GDB=no MK_TESTS=no \
 		SSP_CFLAGS= \
 		MK_HTML=no NO_LINT=yes MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_DEBUG_FILES=no
 
 # native-xtools is the current target for qemu-user cross builds of ports
 # via poudriere and the imgact_binmisc kernel module.
 # For non-clang enabled targets that are still using the in tree gcc
 # we must build a gperf binary for one instance of its Makefiles.  On
 # clang-enabled systems, the gperf binary is obsolete.
 native-xtools: .PHONY
 .if ${MK_GCC_BOOTSTRAP} != "no"
 	mkdir -p ${OBJTREE}/gperf_for_gcc/usr/bin
 	${_+_}@${ECHODIR} "===> ${_gperf} (obj,all,install)"; \
 	cd ${.CURDIR}/${_gperf}; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ obj; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ all; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ DESTDIR=${OBJTREE}/gperf_for_gcc install
 .endif
 	mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${NXBDESTDIR}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${NXBDESTDIR}/usr/include >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${NXBDESTDIR}/usr/lib >/dev/null
 .endif
 .for _tool in \
     bin/cat \
     bin/chmod \
     bin/cp \
     bin/csh \
     bin/echo \
     bin/expr \
     bin/hostname \
     bin/ln \
     bin/ls \
     bin/mkdir \
     bin/mv \
     bin/ps \
     bin/realpath \
     bin/rm \
     bin/rmdir \
     bin/sh \
     bin/sleep \
     ${_clang_tblgen} \
     usr.bin/ar \
     ${_binutils} \
     ${_elftctools} \
     ${_cc} \
     ${_gcc_tools} \
     ${_clang_libs} \
     ${_clang} \
     sbin/md5 \
     sbin/sysctl \
     gnu/usr.bin/diff \
     usr.bin/awk \
     usr.bin/basename \
     usr.bin/bmake \
     usr.bin/bzip2 \
     usr.bin/cmp \
     usr.bin/dirname \
     usr.bin/env \
     usr.bin/fetch \
     usr.bin/find \
     usr.bin/grep \
     usr.bin/gzip \
     usr.bin/id \
     usr.bin/lex \
     usr.bin/lorder \
     usr.bin/mktemp \
     usr.bin/mt \
     usr.bin/patch \
     usr.bin/sed \
     usr.bin/sort \
     usr.bin/tar \
     usr.bin/touch \
     usr.bin/tr \
     usr.bin/true \
     usr.bin/uniq \
     usr.bin/unzip \
     usr.bin/xargs \
     usr.bin/xinstall \
     usr.bin/xz \
     usr.bin/yacc \
     usr.sbin/chown
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${NXBMAKE} DIRPRFX=${_tool}/ obj; \
 		${NXBMAKE} DIRPRFX=${_tool}/ all; \
 		${NXBMAKE} DIRPRFX=${_tool}/ DESTDIR=${NXBDESTDIR} install
 .endfor
 
 #
 # hierarchy - ensure that all the needed directories are present
 #
 hierarchy hier: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs
 
 #
 # libraries - build all libraries, and install them under ${DESTDIR}.
 #
 # The list of libraries with dependents (${_prebuild_libs}) and their
 # interdependencies (__L) are built automatically by the
 # ${.CURDIR}/tools/make_libdeps.sh script.
 #
 libraries: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}; \
 	    ${MAKE} -f Makefile.inc1 _prereq_libs; \
 	    ${MAKE} -f Makefile.inc1 _startup_libs; \
 	    ${MAKE} -f Makefile.inc1 _prebuild_libs; \
 	    ${MAKE} -f Makefile.inc1 _generic_libs
 
 #
 # static libgcc.a prerequisite for shared libc
 #
 _prereq_libs= gnu/lib/libssp/libssp_nonshared gnu/lib/libgcc lib/libcompiler_rt
 
 # These dependencies are not automatically generated:
 #
 # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before
 # all shared libraries for ELF.
 #
 _startup_libs=	gnu/lib/csu
 _startup_libs+=	lib/csu
 _startup_libs+=	gnu/lib/libgcc
 _startup_libs+=	lib/libcompiler_rt
 _startup_libs+=	lib/libc
 _startup_libs+=	lib/libc_nonshared
 .if ${MK_LIBCPLUSPLUS} != "no"
 _startup_libs+=	lib/libcxxrt
 .endif
 
 gnu/lib/libgcc__L: lib/libc__L
 gnu/lib/libgcc__L: lib/libc_nonshared__L
 .if ${MK_LIBCPLUSPLUS} != "no"
 lib/libcxxrt__L: gnu/lib/libgcc__L
 .endif
 
 _prebuild_libs=	${_kerberos5_lib_libasn1} \
 		${_kerberos5_lib_libhdb} \
 		${_kerberos5_lib_libheimbase} \
 		${_kerberos5_lib_libheimntlm} \
 		${_libsqlite3} \
 		${_kerberos5_lib_libheimipcc} \
 		${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \
 		${_kerberos5_lib_libroken} \
 		${_kerberos5_lib_libwind} \
 		lib/libbz2 ${_libcom_err} lib/libcrypt \
 		lib/libelf lib/libexpat \
 		lib/libfigpar \
 		${_lib_libgssapi} \
 		lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \
 		${_lib_casper} \
 		lib/ncurses/ncurses lib/ncurses/ncursesw \
 		lib/libopie lib/libpam/libpam ${_lib_libthr} \
 		${_lib_libradius} lib/libsbuf lib/libtacplus \
 		lib/libgeom \
 		${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \
 		${_cddl_lib_libuutil} \
 		${_cddl_lib_libavl} \
 		${_cddl_lib_libzfs_core} \
 		${_cddl_lib_libctf} \
 		lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \
 		${_secure_lib_libcrypto} ${_lib_libldns} \
 		${_secure_lib_libssh} ${_secure_lib_libssl} \
 		gnu/lib/libdialog
 
 .if ${MK_GNUCXX} != "no"
 _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++
 gnu/lib/libstdc++__L: lib/msun__L
 gnu/lib/libsupc++__L: gnu/lib/libstdc++__L
 .endif
 
 .if ${MK_LIBCPLUSPLUS} != "no"
 _prebuild_libs+= lib/libc++
 .endif
 
 lib/libgeom__L: lib/libexpat__L
 lib/libkvm__L: lib/libelf__L
 
 .if ${MK_LIBTHR} != "no"
 _lib_libthr=	lib/libthr
 .endif
 
 .if ${MK_RADIUS_SUPPORT} != "no"
 _lib_libradius=	lib/libradius
 .endif
 
 .if ${MK_OFED} != "no"
 _ofed_lib=		contrib/ofed/usr.lib
 _prebuild_libs+=	contrib/ofed/usr.lib/libosmcomp
 _prebuild_libs+=	contrib/ofed/usr.lib/libopensm
 _prebuild_libs+=	contrib/ofed/usr.lib/libibcommon
 _prebuild_libs+=	contrib/ofed/usr.lib/libibverbs
 _prebuild_libs+=	contrib/ofed/usr.lib/libibumad
 
 contrib/ofed/usr.lib/libopensm__L: lib/libthr__L
 contrib/ofed/usr.lib/libosmcomp__L: lib/libthr__L
 contrib/ofed/usr.lib/libibumad__L: contrib/ofed/usr.lib/libibcommon__L
 .endif
 
 .if ${MK_CASPER} != "no"
 _lib_casper=	lib/libcasper
 .endif
 
 lib/libpjdlog__L: lib/libutil__L
 lib/libcasper__L: lib/libnv__L
 lib/liblzma__L: lib/libthr__L
 
 _generic_libs=	${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib ${_ofed_lib}
 .for _DIR in ${LOCAL_LIB_DIRS}
 .if exists(${.CURDIR}/${_DIR}/Makefile) && empty(_generic_libs:M${_DIR})
 _generic_libs+= ${_DIR}
 .endif
 .endfor
 
 lib/libopie__L lib/libtacplus__L: lib/libmd__L
 
 .if ${MK_CDDL} != "no"
 _cddl_lib_libumem= cddl/lib/libumem
 _cddl_lib_libnvpair= cddl/lib/libnvpair
 _cddl_lib_libavl= cddl/lib/libavl
 _cddl_lib_libuutil= cddl/lib/libuutil
 _cddl_lib_libzfs_core= cddl/lib/libzfs_core
 _cddl_lib_libctf= cddl/lib/libctf
 _cddl_lib= cddl/lib
 cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L
 cddl/lib/libzfs__L: lib/libgeom__L
 cddl/lib/libctf__L: lib/libz__L
 .endif
 # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built
 # on select architectures though (see cddl/lib/Makefile)
 .if ${MACHINE_CPUARCH} != "sparc64"
-_prebuild_libs+=	lib/libproc lib/librtld_db
+_prebuild_libs+=	lib/libprocstat lib/libproc lib/librtld_db
+lib/libprocstat__L: lib/libelf__L lib/libkvm__L lib/libutil__L
+lib/libproc__L: lib/libprocstat__L
+lib/librtld_db__L: lib/libprocstat__L
 .endif
 
 .if ${MK_CRYPT} != "no"
 .if ${MK_OPENSSL} != "no"
 _secure_lib_libcrypto= secure/lib/libcrypto
 _secure_lib_libssl= secure/lib/libssl
 lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L
 .if ${MK_LDNS} != "no"
 _lib_libldns= lib/libldns
 lib/libldns__L: secure/lib/libcrypto__L
 .endif
 .if ${MK_OPENSSH} != "no"
 _secure_lib_libssh= secure/lib/libssh
 secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L
 .if ${MK_LDNS} != "no"
 secure/lib/libssh__L: lib/libldns__L
 .endif
 .if ${MK_KERBEROS_SUPPORT} != "no"
 secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \
     kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \
     lib/libmd__L kerberos5/lib/libroken__L
 .endif
 .endif
 .endif
 _secure_lib=	secure/lib
 .endif
 
 .if ${MK_KERBEROS} != "no"
 kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L
 kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \
     kerberos5/lib/libwind__L lib/libsqlite3__L
 kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \
     kerberos5/lib/libroken__L lib/libcom_err__L
 kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L
 kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \
     kerberos5/lib/libroken__L kerberos5/lib/libwind__L \
     kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L
 kerberos5/lib/libroken__L: lib/libcrypt__L
 kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L
 kerberos5/lib/libheimbase__L: lib/libthr__L
 kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L
 .endif
 
 lib/libsqlite3__L: lib/libthr__L
 
 .if ${MK_GSSAPI} != "no"
 _lib_libgssapi=	lib/libgssapi
 .endif
 
 .if ${MK_KERBEROS} != "no"
 _kerberos5_lib=	kerberos5/lib
 _kerberos5_lib_libasn1= kerberos5/lib/libasn1
 _kerberos5_lib_libhdb= kerberos5/lib/libhdb
 _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase
 _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5
 _kerberos5_lib_libhx509= kerberos5/lib/libhx509
 _kerberos5_lib_libroken= kerberos5/lib/libroken
 _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm
 _libsqlite3= lib/libsqlite3
 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc
 _kerberos5_lib_libwind= kerberos5/lib/libwind
 _libcom_err= lib/libcom_err
 .endif
 
 .if ${MK_NIS} != "no"
 _lib_libypclnt=	lib/libypclnt
 .endif
 
 .if ${MK_OPENSSL} == "no"
 lib/libradius__L: lib/libmd__L
 .endif
 
 lib/libproc__L: \
     ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L
 .if ${MK_CXX} != "no"
 .if ${MK_LIBCPLUSPLUS} != "no"
 lib/libproc__L: lib/libcxxrt__L
 .else # This implies MK_GNUCXX != "no"; see lib/libproc
 lib/libproc__L: gnu/lib/libsupc++__L
 .endif
 .endif
 
 gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L
 
 .for _lib in ${_prereq_libs}
 ${_lib}__PL: .PHONY .MAKE
 .if exists(${.CURDIR}/${_lib})
 	${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \
 		cd ${.CURDIR}/${_lib}; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \
 		${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \
 		    DIRPRFX=${_lib}/ all; \
 		${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \
 		    DIRPRFX=${_lib}/ install
 .endif
 .endfor
 
 .for _lib in ${_startup_libs} ${_prebuild_libs} ${_generic_libs}
 ${_lib}__L: .PHONY .MAKE
 .if exists(${.CURDIR}/${_lib})
 	${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \
 		cd ${.CURDIR}/${_lib}; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install
 .endif
 .endfor
 
 _prereq_libs: ${_prereq_libs:S/$/__PL/}
 _startup_libs: ${_startup_libs:S/$/__L/}
 _prebuild_libs: ${_prebuild_libs:S/$/__L/}
 _generic_libs: ${_generic_libs:S/$/__L/}
 
 # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from
 # 'everything' with _PARALLEL_SUBDIR_OK set.  This is because it is unlikely
 # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE
 # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in
 # parallel.  This is safe for the world stage of buildworld though since it has
 # already built libraries in a proper order and installed includes into
 # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to
 # avoid trashing a system if it crashes mid-install.
 .if !make(all) || defined(_PARALLEL_SUBDIR_OK)
 SUBDIR_PARALLEL=
 .endif
 
 .include <bsd.subdir.mk>
 
 .if make(check-old) || make(check-old-dirs) || \
     make(check-old-files) || make(check-old-libs) || \
     make(delete-old) || make(delete-old-dirs) || \
     make(delete-old-files) || make(delete-old-libs)
 
 #
 # check for / delete old files section
 #
 
 .include "ObsoleteFiles.inc"
 
 OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \
 else you can not start such an application. Consult UPDATING for more \
 information regarding how to cope with the removal/revision bump of a \
 specific library."
 
 .if !defined(BATCH_DELETE_OLD_FILES)
 RM_I=-i
 .else
 RM_I=-v
 .endif
 
 delete-old-files: .PHONY
 	@echo ">>> Removing old files (only deletes safe to delete libs)"
 # Ask for every old file if the user really wants to remove it.
 # It's annoying, but better safe than sorry.
 # NB: We cannot pass the list of OLD_FILES as a parameter because the
 # argument list will get too long. Using .for/.endfor make "loops" will make
 # the Makefile parser segfault.
 	@exec 3<&0; \
 	cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \
 			rm ${RM_I} "${DESTDIR}/$${file}" <&3; \
 		fi; \
 		for ext in debug symbols; do \
 		  if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \
 		      "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \
 			      <&3; \
 		  fi; \
 		done; \
 	done
 # Remove catpages without corresponding manpages.
 	@exec 3<&0; \
 	find ${DESTDIR}/usr/share/man/cat* ! -type d | \
 	sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \
 	while read catpage; do \
 		read manpage; \
 		if [ ! -e "$${manpage}" ]; then \
 			rm ${RM_I} $${catpage} <&3; \
 	        fi; \
 	done
 	@echo ">>> Old files removed"
 
 check-old-files: .PHONY
 	@echo ">>> Checking for old files"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 		 	echo "${DESTDIR}/$${file}"; \
 		fi; \
 		for ext in debug symbols; do \
 		  if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \
 		  fi; \
 		done; \
 	done
 # Check for catpages without corresponding manpages.
 	@find ${DESTDIR}/usr/share/man/cat* ! -type d | \
 	sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \
 	while read catpage; do \
 		read manpage; \
 		if [ ! -e "$${manpage}" ]; then \
 			echo $${catpage}; \
 	        fi; \
 	done
 
 delete-old-libs: .PHONY
 	@echo ">>> Removing old libraries"
 	@echo "${OLD_LIBS_MESSAGE}" | fmt
 	@exec 3<&0; \
 	cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_LIBS | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \
 			rm ${RM_I} "${DESTDIR}/$${file}" <&3; \
 		fi; \
 		for ext in debug symbols; do \
 		  if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \
 		      "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \
 			      <&3; \
 		  fi; \
 		done; \
 	done
 	@echo ">>> Old libraries removed"
 
 check-old-libs: .PHONY
 	@echo ">>> Checking for old libraries"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_LIBS | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			echo "${DESTDIR}/$${file}"; \
 		fi; \
 		for ext in debug symbols; do \
 		  if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \
 		  fi; \
 		done; \
 	done
 
 delete-old-dirs: .PHONY
 	@echo ">>> Removing old directories"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_DIRS | xargs -n1 | sort -r | \
 	while read dir; do \
 		if [ -d "${DESTDIR}/$${dir}" ]; then \
 			rmdir -v "${DESTDIR}/$${dir}" || true; \
 		elif [ -L "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 	done
 	@echo ">>> Old directories removed"
 
 check-old-dirs: .PHONY
 	@echo ">>> Checking for old directories"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_DIRS | xargs -n1 | \
 	while read dir; do \
 		if [ -d "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir}"; \
 		elif [ -L "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 	done
 
 delete-old: delete-old-files delete-old-dirs .PHONY
 	@echo "To remove old libraries run '${MAKE} delete-old-libs'."
 
 check-old: check-old-files check-old-libs check-old-dirs .PHONY
 	@echo "To remove old files and directories run '${MAKE} delete-old'."
 	@echo "To remove old libraries run '${MAKE} delete-old-libs'."
 
 .endif
 
 #
 # showconfig - show build configuration.
 #
 showconfig: .PHONY
 	@(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1; \
 	  ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1) 2>&1 | grep ^MK_ | sort -u
 
 .if !empty(KRNLOBJDIR) && !empty(KERNCONF)
 DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/
 
 .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE)
 .if exists(${KERNCONFDIR}/${KERNCONF})
 FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \
 	'${KERNCONFDIR}/${KERNCONF}' ; echo
 .endif
 .endif
 
 .endif
 
 .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH})
 DTBOUTPUTPATH= ${.CURDIR}
 .endif
 
 #
 # Build 'standalone' Device Tree Blob
 #
 builddtb: .PHONY
 	@PATH=${TMPPATH} MACHINE=${TARGET} \
 	${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \
 	    "${FDT_DTS_FILE}" ${DTBOUTPUTPATH}
 
 ###############
 
 # cleanworld
 # In the following, the first 'rm' in a series will usually remove all
 # files and directories.  If it does not, then there are probably some
 # files with file flags set, so this unsets them and tries the 'rm' a
 # second time.  There are situations where this target will be cleaning
 # some directories via more than one method, but that duplication is
 # needed to correctly handle all the possible situations.  Removing all
 # files without file flags set in the first 'rm' instance saves time,
 # because 'chflags' will need to operate on fewer files afterwards.
 #
 # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be
 # created by bsd.obj.mk, except that we don't want to .include that file
 # in this makefile.
 #
 BW_CANONICALOBJDIR:=${OBJTREE}${.CURDIR}
 cleanworld: .PHONY
 .if exists(${BW_CANONICALOBJDIR}/)
 	-rm -rf ${BW_CANONICALOBJDIR}/*
 	-chflags -R 0 ${BW_CANONICALOBJDIR}
 	rm -rf ${BW_CANONICALOBJDIR}/*
 .endif
 .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR}
 	#   To be safe in this case, fall back to a 'make cleandir'
 	${_+_}@cd ${.CURDIR}; ${MAKE} cleandir
 .endif
 
 .if defined(TARGET) && defined(TARGET_ARCH)
 
 .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH}
 XDEV_CPUTYPE?=${CPUTYPE}
 .else
 XDEV_CPUTYPE?=${TARGET_CPUTYPE}
 .endif
 
 NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \
 	MK_MAN=no MK_NLS=no MK_PROFILE=no \
 	MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \
 	TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 	CPUTYPE=${XDEV_CPUTYPE}
 
 XDDIR=${TARGET_ARCH}-freebsd
 XDTP?=/usr/${XDDIR}
 .if ${XDTP:N/*}
 .error XDTP variable should be an absolute path
 .endif
 
 CDBENV=MAKEOBJDIRPREFIX=${MAKEOBJDIRPREFIX}/${XDDIR} \
 	INSTALL="sh ${.CURDIR}/tools/install.sh"
 CDENV= ${CDBENV} \
 	TOOLS_PREFIX=${XDTP}
 CD2CFLAGS=-isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib \
 	--sysroot=${XDDESTDIR}/ -B${XDDESTDIR}/usr/libexec \
 	-B${XDDESTDIR}/usr/bin -B${XDDESTDIR}/usr/lib
 CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CFLAGS}" \
 	CPP="${CPP} ${CD2CFLAGS}" \
 	MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH}
 
 CDTMP=	${MAKEOBJDIRPREFIX}/${XDDIR}/${.CURDIR}/tmp
 CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${NOFUN}
 CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} ${MAKE} ${NOFUN}
 XDDESTDIR=${DESTDIR}/${XDTP}
 .if !defined(OSREL)
 OSREL!= uname -r | sed -e 's/[-(].*//'
 .endif
 
 .ORDER: xdev-build xdev-install xdev-links
 xdev: xdev-build xdev-install .PHONY
 
 .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools
 xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools .PHONY
 
 _xb-worldtmp: .PHONY
 	mkdir -p ${CDTMP}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${CDTMP}/usr >/dev/null
 
 _xb-bootstrap-tools: .PHONY
 .for _tool in \
     ${_clang_tblgen} \
     ${_gperf}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ obj; \
 	${CDMAKE} DIRPRFX=${_tool}/ all; \
 	${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install
 .endfor
 
 _xb-build-tools: .PHONY
 	${_+_}@cd ${.CURDIR}; \
 	${CDBENV} ${MAKE} -f Makefile.inc1 ${NOFUN} build-tools
 
 _xb-cross-tools: .PHONY
 .for _tool in \
     ${_binutils} \
     ${_elftctools} \
     usr.bin/ar \
     ${_clang_libs} \
     ${_clang} \
     ${_cc}
 	${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,all)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ obj; \
 	${CDMAKE} DIRPRFX=${_tool}/ all
 .endfor
 
 _xi-mtree: .PHONY
 	${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}"
 	mkdir -p ${XDDESTDIR}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \
 	    -p ${XDDESTDIR} >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${XDDESTDIR}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${XDDESTDIR}/usr/include >/dev/null
 .if defined(LIBCOMPAT)
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \
 	    -p ${XDDESTDIR}/usr >/dev/null
 .endif
 .if ${MK_TESTS} != "no"
 	mkdir -p ${XDDESTDIR}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${XDDESTDIR}${TESTSBASE} >/dev/null
 .endif
 
 .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries
 xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries .PHONY
 
 _xi-cross-tools: .PHONY
 	@echo "_xi-cross-tools"
 .for _tool in \
     ${_binutils} \
     ${_elftctools} \
     usr.bin/ar \
     ${_clang_libs} \
     ${_clang} \
     ${_cc}
 	${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR}
 .endfor
 
 _xi-includes: .PHONY
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \
 		DESTDIR=${XDDESTDIR}
 
 _xi-libraries: .PHONY
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \
 		DESTDIR=${XDDESTDIR}
 
 xdev-links: .PHONY
 	${_+_}cd ${XDDESTDIR}/usr/bin; \
 	mkdir -p ../../../../usr/bin; \
 		for i in *; do \
 			ln -sf ../../${XDTP}/usr/bin/$$i \
 			    ../../../../usr/bin/${XDDIR}-$$i; \
 			ln -sf ../../${XDTP}/usr/bin/$$i \
 			    ../../../../usr/bin/${XDDIR}${OSREL}-$$i; \
 		done
 .else
 xdev xdev-build xdev-install xdev-links: .PHONY
 	@echo "*** Error: Both TARGET and TARGET_ARCH must be defined for \"${.TARGET}\" target"
 .endif
Index: user/alc/PQ_LAUNDRY/bin/sh/expand.c
===================================================================
--- user/alc/PQ_LAUNDRY/bin/sh/expand.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/bin/sh/expand.c	(revision 303642)
@@ -1,1544 +1,1545 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 1997-2005
  *	Herbert Xu <herbert@gondor.apana.org.au>.  All rights reserved.
  * Copyright (c) 2010-2015
  *	Jilles Tjoelker <jilles@stack.nl>.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)expand.c	8.5 (Berkeley) 5/15/95";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <dirent.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <limits.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <wchar.h>
 #include <wctype.h>
 
 /*
  * Routines to expand arguments to commands.  We have to deal with
  * backquotes, shell variables, and file metacharacters.
  */
 
 #include "shell.h"
 #include "main.h"
 #include "nodes.h"
 #include "eval.h"
 #include "expand.h"
 #include "syntax.h"
 #include "parser.h"
 #include "jobs.h"
 #include "options.h"
 #include "var.h"
 #include "input.h"
 #include "output.h"
 #include "memalloc.h"
 #include "error.h"
 #include "mystring.h"
 #include "arith.h"
 #include "show.h"
 #include "builtins.h"
 
 enum wordstate { WORD_IDLE, WORD_WS_DELIMITED, WORD_QUOTEMARK };
 
 struct worddest {
 	struct arglist *list;
 	enum wordstate state;
 };
 
 static char *expdest;			/* output of current string */
 static struct nodelist *argbackq;	/* list of back quote expressions */
 
 static const char *argstr(const char *, int, struct worddest *);
 static const char *exptilde(const char *, int);
 static const char *expari(const char *, int, struct worddest *);
 static void expbackq(union node *, int, int, struct worddest *);
 static void subevalvar_trim(const char *, int, int, int);
 static int subevalvar_misc(const char *, const char *, int, int, int);
 static const char *evalvar(const char *, int, struct worddest *);
 static int varisset(const char *, int);
 static void strtodest(const char *, int, int, int, struct worddest *);
 static void reprocess(int, int, int, int, struct worddest *);
 static void varvalue(const char *, int, int, int, struct worddest *);
 static void expandmeta(char *, struct arglist *);
 static void expmeta(char *, char *, struct arglist *);
 static int expsortcmp(const void *, const void *);
 static int patmatch(const char *, const char *);
 static void cvtnum(int, char *);
 static int collate_range_cmp(wchar_t, wchar_t);
 
 void
 emptyarglist(struct arglist *list)
 {
 
 	list->args = list->smallarg;
 	list->count = 0;
 	list->capacity = sizeof(list->smallarg) / sizeof(list->smallarg[0]);
 }
 
 void
 appendarglist(struct arglist *list, char *str)
 {
 	char **newargs;
 	int newcapacity;
 
 	if (list->count >= list->capacity) {
 		newcapacity = list->capacity * 2;
 		if (newcapacity < 16)
 			newcapacity = 16;
 		if (newcapacity > INT_MAX / (int)sizeof(newargs[0]))
 			error("Too many entries in arglist");
 		newargs = stalloc(newcapacity * sizeof(newargs[0]));
 		memcpy(newargs, list->args, list->count * sizeof(newargs[0]));
 		list->args = newargs;
 		list->capacity = newcapacity;
 	}
 	list->args[list->count++] = str;
 }
 
 static int
 collate_range_cmp(wchar_t c1, wchar_t c2)
 {
 	static wchar_t s1[2], s2[2];
 
 	s1[0] = c1;
 	s2[0] = c2;
 	return (wcscoll(s1, s2));
 }
 
 static char *
 stputs_quotes(const char *data, const char *syntax, char *p)
 {
 	while (*data) {
 		CHECKSTRSPACE(2, p);
 		if (syntax[(int)*data] == CCTL)
 			USTPUTC(CTLESC, p);
 		USTPUTC(*data++, p);
 	}
 	return (p);
 }
 #define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p)
 
 static char *
 nextword(char c, int flag, char *p, struct worddest *dst)
 {
 	int is_ws;
 
 	is_ws = c == '\t' || c == '\n' || c == ' ';
 	if (p != stackblock() || (is_ws ? dst->state == WORD_QUOTEMARK :
 	    dst->state != WORD_WS_DELIMITED) || c == '\0') {
 		STPUTC('\0', p);
 		if (flag & EXP_GLOB)
 			expandmeta(grabstackstr(p), dst->list);
 		else
 			appendarglist(dst->list, grabstackstr(p));
 		dst->state = is_ws ? WORD_WS_DELIMITED : WORD_IDLE;
 	} else if (!is_ws && dst->state == WORD_WS_DELIMITED)
 		dst->state = WORD_IDLE;
 	/* Reserve space while the stack string is empty. */
 	appendarglist(dst->list, NULL);
 	dst->list->count--;
 	STARTSTACKSTR(p);
 	return p;
 }
 #define NEXTWORD(c, flag, p, dstlist) p = nextword(c, flag, p, dstlist)
 
 static char *
 stputs_split(const char *data, const char *syntax, int flag, char *p,
     struct worddest *dst)
 {
 	const char *ifs;
 	char c;
 
 	ifs = ifsset() ? ifsval() : " \t\n";
 	while (*data) {
 		CHECKSTRSPACE(2, p);
 		c = *data++;
 		if (strchr(ifs, c) != NULL) {
 			NEXTWORD(c, flag, p, dst);
 			continue;
 		}
 		if (flag & EXP_GLOB && syntax[(int)c] == CCTL)
 			USTPUTC(CTLESC, p);
 		USTPUTC(c, p);
 	}
 	return (p);
 }
 #define STPUTS_SPLIT(data, syntax, flag, p, dst) p = stputs_split((data), syntax, flag, p, dst)
 
 /*
  * Perform expansions on an argument, placing the resulting list of arguments
  * in arglist.  Parameter expansion, command substitution and arithmetic
  * expansion are always performed; additional expansions can be requested
  * via flag (EXP_*).
  * The result is left in the stack string.
  * When arglist is NULL, perform here document expansion.
  *
  * Caution: this function uses global state and is not reentrant.
  * However, a new invocation after an interrupted invocation is safe
  * and will reset the global state for the new call.
  */
 void
 expandarg(union node *arg, struct arglist *arglist, int flag)
 {
 	struct worddest exparg;
 
 	if (fflag)
 		flag &= ~EXP_GLOB;
 	argbackq = arg->narg.backquote;
 	exparg.list = arglist;
 	exparg.state = WORD_IDLE;
 	STARTSTACKSTR(expdest);
 	argstr(arg->narg.text, flag, &exparg);
 	if (arglist == NULL) {
 		STACKSTRNUL(expdest);
 		return;			/* here document expanded */
 	}
 	if ((flag & EXP_SPLIT) == 0 || expdest != stackblock() ||
 	    exparg.state == WORD_QUOTEMARK) {
 		STPUTC('\0', expdest);
 		if (flag & EXP_SPLIT) {
 			if (flag & EXP_GLOB)
 				expandmeta(grabstackstr(expdest), exparg.list);
 			else
 				appendarglist(exparg.list, grabstackstr(expdest));
 		}
 	}
 	if ((flag & EXP_SPLIT) == 0)
 		appendarglist(arglist, grabstackstr(expdest));
 }
 
 
 
 /*
  * Perform parameter expansion, command substitution and arithmetic
  * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE.
  * Processing ends at a CTLENDVAR or CTLENDARI character as well as '\0'.
  * This is used to expand word in ${var+word} etc.
  * If EXP_GLOB or EXP_CASE are set, keep and/or generate CTLESC
  * characters to allow for further processing.
  *
  * If EXP_SPLIT is set, dst receives any complete words produced.
  */
 static const char *
 argstr(const char *p, int flag, struct worddest *dst)
 {
 	char c;
 	int quotes = flag & (EXP_GLOB | EXP_CASE);	/* do CTLESC */
 	int firsteq = 1;
 	int split_lit;
 	int lit_quoted;
 
 	split_lit = flag & EXP_SPLIT_LIT;
 	lit_quoted = flag & EXP_LIT_QUOTED;
 	flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED);
 	if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE)))
 		p = exptilde(p, flag);
 	for (;;) {
 		CHECKSTRSPACE(2, expdest);
 		switch (c = *p++) {
 		case '\0':
 			return (p - 1);
 		case CTLENDVAR:
 		case CTLENDARI:
 			return (p);
 		case CTLQUOTEMARK:
 			lit_quoted = 1;
 			/* "$@" syntax adherence hack */
 			if (p[0] == CTLVAR && (p[1] & VSQUOTE) != 0 &&
 			    p[2] == '@' && p[3] == '=')
 				break;
 			if ((flag & EXP_SPLIT) != 0 && expdest == stackblock())
 				dst->state = WORD_QUOTEMARK;
 			break;
 		case CTLQUOTEEND:
 			lit_quoted = 0;
 			break;
 		case CTLESC:
 			c = *p++;
 			if (split_lit && !lit_quoted &&
 			    strchr(ifsset() ? ifsval() : " \t\n", c) != NULL) {
 				NEXTWORD(c, flag, expdest, dst);
 				break;
 			}
 			if (quotes)
 				USTPUTC(CTLESC, expdest);
 			USTPUTC(c, expdest);
 			break;
 		case CTLVAR:
 			p = evalvar(p, flag, dst);
 			break;
 		case CTLBACKQ:
 		case CTLBACKQ|CTLQUOTE:
 			expbackq(argbackq->n, c & CTLQUOTE, flag, dst);
 			argbackq = argbackq->next;
 			break;
 		case CTLARI:
 			p = expari(p, flag, dst);
 			break;
 		case ':':
 		case '=':
 			/*
 			 * sort of a hack - expand tildes in variable
 			 * assignments (after the first '=' and after ':'s).
 			 */
 			if (split_lit && !lit_quoted &&
 			    strchr(ifsset() ? ifsval() : " \t\n", c) != NULL) {
 				NEXTWORD(c, flag, expdest, dst);
 				break;
 			}
 			USTPUTC(c, expdest);
 			if (flag & EXP_VARTILDE && *p == '~' &&
 			    (c != '=' || firsteq)) {
 				if (c == '=')
 					firsteq = 0;
 				p = exptilde(p, flag);
 			}
 			break;
 		default:
 			if (split_lit && !lit_quoted &&
 			    strchr(ifsset() ? ifsval() : " \t\n", c) != NULL) {
 				NEXTWORD(c, flag, expdest, dst);
 				break;
 			}
 			USTPUTC(c, expdest);
 		}
 	}
 }
 
 /*
  * Perform tilde expansion, placing the result in the stack string and
  * returning the next position in the input string to process.
  */
 static const char *
 exptilde(const char *p, int flag)
 {
 	char c;
 	const char *startp = p;
 	const char *user;
 	struct passwd *pw;
 	char *home;
 	int len;
 
 	for (;;) {
 		c = *p;
 		switch(c) {
 		case CTLESC: /* This means CTL* are always considered quoted. */
 		case CTLVAR:
 		case CTLBACKQ:
 		case CTLBACKQ | CTLQUOTE:
 		case CTLARI:
 		case CTLENDARI:
 		case CTLQUOTEMARK:
 			return (startp);
 		case ':':
 			if ((flag & EXP_VARTILDE) == 0)
 				break;
 			/* FALLTHROUGH */
 		case '\0':
 		case '/':
 		case CTLENDVAR:
 			len = p - startp - 1;
 			STPUTBIN(startp + 1, len, expdest);
 			STACKSTRNUL(expdest);
 			user = expdest - len;
 			if (*user == '\0') {
 				home = lookupvar("HOME");
 			} else {
 				pw = getpwnam(user);
 				home = pw != NULL ? pw->pw_dir : NULL;
 			}
 			STADJUST(-len, expdest);
 			if (home == NULL || *home == '\0')
 				return (startp);
 			strtodest(home, flag, VSNORMAL, 1, NULL);
 			return (p);
 		}
 		p++;
 	}
 }
 
 
 /*
  * Expand arithmetic expression.
  */
 static const char *
 expari(const char *p, int flag, struct worddest *dst)
 {
 	char *q, *start;
 	arith_t result;
 	int begoff;
 	int quoted;
 	int adj;
 
 	quoted = *p++ == '"';
 	begoff = expdest - stackblock();
 	p = argstr(p, 0, NULL);
 	STPUTC('\0', expdest);
 	start = stackblock() + begoff;
 
 	q = grabstackstr(expdest);
 	result = arith(start);
 	ungrabstackstr(q, expdest);
 
 	start = stackblock() + begoff;
 	adj = start - expdest;
 	STADJUST(adj, expdest);
 
 	CHECKSTRSPACE((int)(DIGITS(result) + 1), expdest);
 	fmtstr(expdest, DIGITS(result), ARITH_FORMAT_STR, result);
 	adj = strlen(expdest);
 	STADJUST(adj, expdest);
 	if (!quoted)
 		reprocess(expdest - adj - stackblock(), flag, VSNORMAL, 0, dst);
 	return p;
 }
 
 
 /*
  * Perform command substitution.
  */
 static void
 expbackq(union node *cmd, int quoted, int flag, struct worddest *dst)
 {
 	struct backcmd in;
 	int i;
 	char buf[128];
 	char *p;
 	char *dest = expdest;
 	struct nodelist *saveargbackq;
 	char lastc;
 	char const *syntax = quoted? DQSYNTAX : BASESYNTAX;
 	int quotes = flag & (EXP_GLOB | EXP_CASE);
 	size_t nnl;
 	const char *ifs;
 
 	INTOFF;
 	saveargbackq = argbackq;
 	p = grabstackstr(dest);
 	evalbackcmd(cmd, &in);
 	ungrabstackstr(p, dest);
 	argbackq = saveargbackq;
 
 	p = in.buf;
 	nnl = 0;
 	if (!quoted && flag & EXP_SPLIT)
 		ifs = ifsset() ? ifsval() : " \t\n";
 	else
 		ifs = "";
 	/* Don't copy trailing newlines */
 	for (;;) {
 		if (--in.nleft < 0) {
 			if (in.fd < 0)
 				break;
-			while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR);
+			while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR)
+				;
 			TRACE(("expbackq: read returns %d\n", i));
 			if (i <= 0)
 				break;
 			p = buf;
 			in.nleft = i - 1;
 		}
 		lastc = *p++;
 		if (lastc == '\0')
 			continue;
 		if (lastc == '\n') {
 			nnl++;
 		} else {
 			if (nnl > 0) {
 				if (strchr(ifs, '\n') != NULL) {
 					NEXTWORD('\n', flag, dest, dst);
 					nnl = 0;
 				} else {
 					CHECKSTRSPACE(nnl + 2, dest);
 					while (nnl > 0) {
 						nnl--;
 						USTPUTC('\n', dest);
 					}
 				}
 			}
 			if (strchr(ifs, lastc) != NULL)
 				NEXTWORD(lastc, flag, dest, dst);
 			else {
 				CHECKSTRSPACE(2, dest);
 				if (quotes && syntax[(int)lastc] == CCTL)
 					USTPUTC(CTLESC, dest);
 				USTPUTC(lastc, dest);
 			}
 		}
 	}
 
 	if (in.fd >= 0)
 		close(in.fd);
 	if (in.buf)
 		ckfree(in.buf);
 	if (in.jp)
 		exitstatus = waitforjob(in.jp, (int *)NULL);
 	TRACE(("expbackq: size=%td: \"%.*s\"\n",
 		((dest - stackblock()) - startloc),
 		(int)((dest - stackblock()) - startloc),
 		stackblock() + startloc));
 	expdest = dest;
 	INTON;
 }
 
 
 
 static void
 recordleft(const char *str, const char *loc, char *startp)
 {
 	int amount;
 
 	amount = ((str - 1) - (loc - startp)) - expdest;
 	STADJUST(amount, expdest);
 	while (loc != str - 1)
 		*startp++ = *loc++;
 }
 
 static void
 subevalvar_trim(const char *p, int strloc, int subtype, int startloc)
 {
 	char *startp;
 	char *loc = NULL;
 	char *str;
 	int c = 0;
 	struct nodelist *saveargbackq = argbackq;
 	int amount;
 
 	argstr(p, EXP_CASE | EXP_TILDE, NULL);
 	STACKSTRNUL(expdest);
 	argbackq = saveargbackq;
 	startp = stackblock() + startloc;
 	str = stackblock() + strloc;
 
 	switch (subtype) {
 	case VSTRIMLEFT:
 		for (loc = startp; loc < str; loc++) {
 			c = *loc;
 			*loc = '\0';
 			if (patmatch(str, startp)) {
 				*loc = c;
 				recordleft(str, loc, startp);
 				return;
 			}
 			*loc = c;
 		}
 		break;
 
 	case VSTRIMLEFTMAX:
 		for (loc = str - 1; loc >= startp;) {
 			c = *loc;
 			*loc = '\0';
 			if (patmatch(str, startp)) {
 				*loc = c;
 				recordleft(str, loc, startp);
 				return;
 			}
 			*loc = c;
 			loc--;
 		}
 		break;
 
 	case VSTRIMRIGHT:
 		for (loc = str - 1; loc >= startp;) {
 			if (patmatch(str, loc)) {
 				amount = loc - expdest;
 				STADJUST(amount, expdest);
 				return;
 			}
 			loc--;
 		}
 		break;
 
 	case VSTRIMRIGHTMAX:
 		for (loc = startp; loc < str - 1; loc++) {
 			if (patmatch(str, loc)) {
 				amount = loc - expdest;
 				STADJUST(amount, expdest);
 				return;
 			}
 		}
 		break;
 
 
 	default:
 		abort();
 	}
 	amount = (expdest - stackblock() - strloc) + 1;
 	STADJUST(-amount, expdest);
 }
 
 
 static int
 subevalvar_misc(const char *p, const char *var, int subtype, int startloc,
   int varflags)
 {
 	char *startp;
 	struct nodelist *saveargbackq = argbackq;
 	int amount;
 
 	argstr(p, EXP_TILDE, NULL);
 	STACKSTRNUL(expdest);
 	argbackq = saveargbackq;
 	startp = stackblock() + startloc;
 
 	switch (subtype) {
 	case VSASSIGN:
 		setvar(var, startp, 0);
 		amount = startp - expdest;
 		STADJUST(amount, expdest);
 		return 1;
 
 	case VSQUESTION:
 		if (*p != CTLENDVAR) {
 			outfmt(out2, "%s\n", startp);
 			error((char *)NULL);
 		}
 		error("%.*s: parameter %snot set", (int)(p - var - 1),
 		      var, (varflags & VSNUL) ? "null or " : "");
 		return 0;
 
 	default:
 		abort();
 	}
 }
 
 
 /*
  * Expand a variable, and return a pointer to the next character in the
  * input string.
  */
 
 static const char *
 evalvar(const char *p, int flag, struct worddest *dst)
 {
 	int subtype;
 	int varflags;
 	const char *var;
 	const char *val;
 	int patloc;
 	int c;
 	int set;
 	int special;
 	int startloc;
 	int varlen;
 	int varlenb;
 	char buf[21];
 
 	varflags = (unsigned char)*p++;
 	subtype = varflags & VSTYPE;
 	var = p;
 	special = 0;
 	if (! is_name(*p))
 		special = 1;
 	p = strchr(p, '=') + 1;
 again: /* jump here after setting a variable with ${var=text} */
 	if (varflags & VSLINENO) {
 		set = 1;
 		special = 1;
 		val = NULL;
 	} else if (special) {
 		set = varisset(var, varflags & VSNUL);
 		val = NULL;
 	} else {
 		val = bltinlookup(var, 1);
 		if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) {
 			val = NULL;
 			set = 0;
 		} else
 			set = 1;
 	}
 	varlen = 0;
 	startloc = expdest - stackblock();
 	if (!set && uflag && *var != '@' && *var != '*') {
 		switch (subtype) {
 		case VSNORMAL:
 		case VSTRIMLEFT:
 		case VSTRIMLEFTMAX:
 		case VSTRIMRIGHT:
 		case VSTRIMRIGHTMAX:
 		case VSLENGTH:
 			error("%.*s: parameter not set", (int)(p - var - 1),
 			    var);
 		}
 	}
 	if (set && subtype != VSPLUS) {
 		/* insert the value of the variable */
 		if (special) {
 			if (varflags & VSLINENO) {
 				if (p - var > (ptrdiff_t)sizeof(buf))
 					abort();
 				memcpy(buf, var, p - var - 1);
 				buf[p - var - 1] = '\0';
 				strtodest(buf, flag, subtype,
 				    varflags & VSQUOTE, dst);
 			} else
 				varvalue(var, varflags & VSQUOTE, subtype, flag,
 				    dst);
 			if (subtype == VSLENGTH) {
 				varlenb = expdest - stackblock() - startloc;
 				varlen = varlenb;
 				if (localeisutf8) {
 					val = stackblock() + startloc;
 					for (;val != expdest; val++)
 						if ((*val & 0xC0) == 0x80)
 							varlen--;
 				}
 				STADJUST(-varlenb, expdest);
 			}
 		} else {
 			if (subtype == VSLENGTH) {
 				for (;*val; val++)
 					if (!localeisutf8 ||
 					    (*val & 0xC0) != 0x80)
 						varlen++;
 			}
 			else
 				strtodest(val, flag, subtype,
 				    varflags & VSQUOTE, dst);
 		}
 	}
 
 	if (subtype == VSPLUS)
 		set = ! set;
 
 	switch (subtype) {
 	case VSLENGTH:
 		cvtnum(varlen, buf);
 		strtodest(buf, flag, VSNORMAL, varflags & VSQUOTE, dst);
 		break;
 
 	case VSNORMAL:
 		break;
 
 	case VSPLUS:
 	case VSMINUS:
 		if (!set) {
 			argstr(p, flag | (flag & EXP_SPLIT ? EXP_SPLIT_LIT : 0) |
 			    (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0), dst);
 			break;
 		}
 		break;
 
 	case VSTRIMLEFT:
 	case VSTRIMLEFTMAX:
 	case VSTRIMRIGHT:
 	case VSTRIMRIGHTMAX:
 		if (!set)
 			break;
 		/*
 		 * Terminate the string and start recording the pattern
 		 * right after it
 		 */
 		STPUTC('\0', expdest);
 		patloc = expdest - stackblock();
 		subevalvar_trim(p, patloc, subtype, startloc);
 		reprocess(startloc, flag, VSNORMAL, varflags & VSQUOTE, dst);
 		if (flag & EXP_SPLIT && *var == '@' && varflags & VSQUOTE)
 			dst->state = WORD_QUOTEMARK;
 		break;
 
 	case VSASSIGN:
 	case VSQUESTION:
 		if (!set) {
 			if (subevalvar_misc(p, var, subtype, startloc,
 			    varflags)) {
 				varflags &= ~VSNUL;
 				goto again;
 			}
 			break;
 		}
 		break;
 
 	case VSERROR:
 		c = p - var - 1;
 		error("${%.*s%s}: Bad substitution", c, var,
 		    (c > 0 && *p != CTLENDVAR) ? "..." : "");
 
 	default:
 		abort();
 	}
 
 	if (subtype != VSNORMAL) {	/* skip to end of alternative */
 		int nesting = 1;
 		for (;;) {
 			if ((c = *p++) == CTLESC)
 				p++;
 			else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) {
 				if (set)
 					argbackq = argbackq->next;
 			} else if (c == CTLVAR) {
 				if ((*p++ & VSTYPE) != VSNORMAL)
 					nesting++;
 			} else if (c == CTLENDVAR) {
 				if (--nesting == 0)
 					break;
 			}
 		}
 	}
 	return p;
 }
 
 
 
 /*
  * Test whether a specialized variable is set.
  */
 
 static int
 varisset(const char *name, int nulok)
 {
 
 	if (*name == '!')
 		return backgndpidset();
 	else if (*name == '@' || *name == '*') {
 		if (*shellparam.p == NULL)
 			return 0;
 
 		if (nulok) {
 			char **av;
 
 			for (av = shellparam.p; *av; av++)
 				if (**av != '\0')
 					return 1;
 			return 0;
 		}
 	} else if (is_digit(*name)) {
 		char *ap;
 		long num;
 
 		errno = 0;
 		num = strtol(name, NULL, 10);
 		if (errno != 0 || num > shellparam.nparam)
 			return 0;
 
 		if (num == 0)
 			ap = arg0;
 		else
 			ap = shellparam.p[num - 1];
 
 		if (nulok && (ap == NULL || *ap == '\0'))
 			return 0;
 	}
 	return 1;
 }
 
 static void
 strtodest(const char *p, int flag, int subtype, int quoted,
     struct worddest *dst)
 {
 	if (subtype == VSLENGTH || subtype == VSTRIMLEFT ||
 	    subtype == VSTRIMLEFTMAX || subtype == VSTRIMRIGHT ||
 	    subtype == VSTRIMRIGHTMAX)
 		STPUTS(p, expdest);
 	else if (flag & EXP_SPLIT && !quoted && dst != NULL)
 		STPUTS_SPLIT(p, BASESYNTAX, flag, expdest, dst);
 	else if (flag & (EXP_GLOB | EXP_CASE))
 		STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest);
 	else
 		STPUTS(p, expdest);
 }
 
 static void
 reprocess(int startloc, int flag, int subtype, int quoted,
     struct worddest *dst)
 {
 	static char *buf = NULL;
 	static size_t buflen = 0;
 	char *startp;
 	size_t len, zpos, zlen;
 
 	startp = stackblock() + startloc;
 	len = expdest - startp;
 	if (len >= SIZE_MAX / 2)
 		abort();
 	INTOFF;
 	if (len >= buflen) {
 		ckfree(buf);
 		buf = NULL;
 	}
 	if (buflen < 128)
 		buflen = 128;
 	while (len >= buflen)
 		buflen <<= 1;
 	if (buf == NULL)
 		buf = ckmalloc(buflen);
 	INTON;
 	memcpy(buf, startp, len);
 	buf[len] = '\0';
 	STADJUST(-len, expdest);
 	for (zpos = 0;;) {
 		zlen = strlen(buf + zpos);
 		strtodest(buf + zpos, flag, subtype, quoted, dst);
 		zpos += zlen + 1;
 		if (zpos == len + 1)
 			break;
 		if (flag & EXP_SPLIT && (quoted || (zlen > 0 && zpos < len)))
 			NEXTWORD('\0', flag, expdest, dst);
 	}
 }
 
 /*
  * Add the value of a specialized variable to the stack string.
  */
 
 static void
 varvalue(const char *name, int quoted, int subtype, int flag,
     struct worddest *dst)
 {
 	int num;
 	char *p;
 	int i;
 	int splitlater;
 	char sep[2];
 	char **ap;
 	char buf[(NSHORTOPTS > 10 ? NSHORTOPTS : 10) + 1];
 
 	if (subtype == VSLENGTH)
 		flag &= ~EXP_FULL;
 	splitlater = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX ||
 		subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX;
 
 	switch (*name) {
 	case '$':
 		num = rootpid;
 		break;
 	case '?':
 		num = oexitstatus;
 		break;
 	case '#':
 		num = shellparam.nparam;
 		break;
 	case '!':
 		num = backgndpidval();
 		break;
 	case '-':
 		p = buf;
 		for (i = 0 ; i < NSHORTOPTS ; i++) {
 			if (optval[i])
 				*p++ = optletter[i];
 		}
 		*p = '\0';
 		strtodest(buf, flag, subtype, quoted, dst);
 		return;
 	case '@':
 		if (flag & EXP_SPLIT && quoted) {
 			for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
 				strtodest(p, flag, subtype, quoted, dst);
 				if (*ap) {
 					if (splitlater)
 						STPUTC('\0', expdest);
 					else
 						NEXTWORD('\0', flag, expdest,
 						    dst);
 				}
 			}
 			if (shellparam.nparam > 0)
 				dst->state = WORD_QUOTEMARK;
 			return;
 		}
 		/* FALLTHROUGH */
 	case '*':
 		if (ifsset())
 			sep[0] = ifsval()[0];
 		else
 			sep[0] = ' ';
 		sep[1] = '\0';
 		for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
 			strtodest(p, flag, subtype, quoted, dst);
 			if (!*ap)
 				break;
 			if (sep[0])
 				strtodest(sep, flag, subtype, quoted, dst);
 			else if (flag & EXP_SPLIT && !quoted && **ap != '\0') {
 				if (splitlater)
 					STPUTC('\0', expdest);
 				else
 					NEXTWORD('\0', flag, expdest, dst);
 			}
 		}
 		return;
 	default:
 		if (is_digit(*name)) {
 			num = atoi(name);
 			if (num == 0)
 				p = arg0;
 			else if (num > 0 && num <= shellparam.nparam)
 				p = shellparam.p[num - 1];
 			else
 				return;
 			strtodest(p, flag, subtype, quoted, dst);
 		}
 		return;
 	}
 	cvtnum(num, buf);
 	strtodest(buf, flag, subtype, quoted, dst);
 }
 
 
 
 static char expdir[PATH_MAX];
 #define expdir_end (expdir + sizeof(expdir))
 
 /*
  * Perform pathname generation and remove control characters.
  * At this point, the only control characters should be CTLESC.
  * The results are stored in the list dstlist.
  */
 static void
 expandmeta(char *pattern, struct arglist *dstlist)
 {
 	char *p;
 	int firstmatch;
 	char c;
 
 	firstmatch = dstlist->count;
 	p = pattern;
 	for (; (c = *p) != '\0'; p++) {
 		/* fast check for meta chars */
 		if (c == '*' || c == '?' || c == '[') {
 			INTOFF;
 			expmeta(expdir, pattern, dstlist);
 			INTON;
 			break;
 		}
 	}
 	if (dstlist->count == firstmatch) {
 		/*
 		 * no matches
 		 */
 		rmescapes(pattern);
 		appendarglist(dstlist, pattern);
 	} else {
 		qsort(&dstlist->args[firstmatch],
 		    dstlist->count - firstmatch,
 		    sizeof(dstlist->args[0]), expsortcmp);
 	}
 }
 
 
 /*
  * Do metacharacter (i.e. *, ?, [...]) expansion.
  */
 
 static void
 expmeta(char *enddir, char *name, struct arglist *arglist)
 {
 	const char *p;
 	const char *q;
 	const char *start;
 	char *endname;
 	int metaflag;
 	struct stat statb;
 	DIR *dirp;
 	struct dirent *dp;
 	int atend;
 	int matchdot;
 	int esc;
 	int namlen;
 
 	metaflag = 0;
 	start = name;
 	for (p = name; esc = 0, *p; p += esc + 1) {
 		if (*p == '*' || *p == '?')
 			metaflag = 1;
 		else if (*p == '[') {
 			q = p + 1;
 			if (*q == '!' || *q == '^')
 				q++;
 			for (;;) {
 				if (*q == CTLESC)
 					q++;
 				if (*q == '/' || *q == '\0')
 					break;
 				if (*++q == ']') {
 					metaflag = 1;
 					break;
 				}
 			}
 		} else if (*p == '\0')
 			break;
 		else {
 			if (*p == CTLESC)
 				esc++;
 			if (p[esc] == '/') {
 				if (metaflag)
 					break;
 				start = p + esc + 1;
 			}
 		}
 	}
 	if (metaflag == 0) {	/* we've reached the end of the file name */
 		if (enddir != expdir)
 			metaflag++;
 		for (p = name ; ; p++) {
 			if (*p == CTLESC)
 				p++;
 			*enddir++ = *p;
 			if (*p == '\0')
 				break;
 			if (enddir == expdir_end)
 				return;
 		}
 		if (metaflag == 0 || lstat(expdir, &statb) >= 0)
 			appendarglist(arglist, stsavestr(expdir));
 		return;
 	}
 	endname = name + (p - name);
 	if (start != name) {
 		p = name;
 		while (p < start) {
 			if (*p == CTLESC)
 				p++;
 			*enddir++ = *p++;
 			if (enddir == expdir_end)
 				return;
 		}
 	}
 	if (enddir == expdir) {
 		p = ".";
 	} else if (enddir == expdir + 1 && *expdir == '/') {
 		p = "/";
 	} else {
 		p = expdir;
 		enddir[-1] = '\0';
 	}
 	if ((dirp = opendir(p)) == NULL)
 		return;
 	if (enddir != expdir)
 		enddir[-1] = '/';
 	if (*endname == 0) {
 		atend = 1;
 	} else {
 		atend = 0;
 		*endname = '\0';
 		endname += esc + 1;
 	}
 	matchdot = 0;
 	p = start;
 	if (*p == CTLESC)
 		p++;
 	if (*p == '.')
 		matchdot++;
 	while (! int_pending() && (dp = readdir(dirp)) != NULL) {
 		if (dp->d_name[0] == '.' && ! matchdot)
 			continue;
 		if (patmatch(start, dp->d_name)) {
 			namlen = dp->d_namlen;
 			if (enddir + namlen + 1 > expdir_end)
 				continue;
 			memcpy(enddir, dp->d_name, namlen + 1);
 			if (atend)
 				appendarglist(arglist, stsavestr(expdir));
 			else {
 				if (dp->d_type != DT_UNKNOWN &&
 				    dp->d_type != DT_DIR &&
 				    dp->d_type != DT_LNK)
 					continue;
 				if (enddir + namlen + 2 > expdir_end)
 					continue;
 				enddir[namlen] = '/';
 				enddir[namlen + 1] = '\0';
 				expmeta(enddir + namlen + 1, endname, arglist);
 			}
 		}
 	}
 	closedir(dirp);
 	if (! atend)
 		endname[-esc - 1] = esc ? CTLESC : '/';
 }
 
 
 static int
 expsortcmp(const void *p1, const void *p2)
 {
 	const char *s1 = *(const char * const *)p1;
 	const char *s2 = *(const char * const *)p2;
 
 	return (strcoll(s1, s2));
 }
 
 
 
 static wchar_t
 get_wc(const char **p)
 {
 	wchar_t c;
 	int chrlen;
 
 	chrlen = mbtowc(&c, *p, 4);
 	if (chrlen == 0)
 		return 0;
 	else if (chrlen == -1)
 		c = 0;
 	else
 		*p += chrlen;
 	return c;
 }
 
 
 /*
  * See if a character matches a character class, starting at the first colon
  * of "[:class:]".
  * If a valid character class is recognized, a pointer to the next character
  * after the final closing bracket is stored into *end, otherwise a null
  * pointer is stored into *end.
  */
 static int
 match_charclass(const char *p, wchar_t chr, const char **end)
 {
 	char name[20];
 	const char *nameend;
 	wctype_t cclass;
 
 	*end = NULL;
 	p++;
 	nameend = strstr(p, ":]");
 	if (nameend == NULL || (size_t)(nameend - p) >= sizeof(name) ||
 	    nameend == p)
 		return 0;
 	memcpy(name, p, nameend - p);
 	name[nameend - p] = '\0';
 	*end = nameend + 2;
 	cclass = wctype(name);
 	/* An unknown class matches nothing but is valid nevertheless. */
 	if (cclass == 0)
 		return 0;
 	return iswctype(chr, cclass);
 }
 
 
 /*
  * Returns true if the pattern matches the string.
  */
 
 static int
 patmatch(const char *pattern, const char *string)
 {
 	const char *p, *q, *end;
 	const char *bt_p, *bt_q;
 	char c;
 	wchar_t wc, wc2;
 
 	p = pattern;
 	q = string;
 	bt_p = NULL;
 	bt_q = NULL;
 	for (;;) {
 		switch (c = *p++) {
 		case '\0':
 			if (*q != '\0')
 				goto backtrack;
 			return 1;
 		case CTLESC:
 			if (*q++ != *p++)
 				goto backtrack;
 			break;
 		case '?':
 			if (*q == '\0')
 				return 0;
 			if (localeisutf8) {
 				wc = get_wc(&q);
 				/*
 				 * A '?' does not match invalid UTF-8 but a
 				 * '*' does, so backtrack.
 				 */
 				if (wc == 0)
 					goto backtrack;
 			} else
 				q++;
 			break;
 		case '*':
 			c = *p;
 			while (c == '*')
 				c = *++p;
 			/*
 			 * If the pattern ends here, we know the string
 			 * matches without needing to look at the rest of it.
 			 */
 			if (c == '\0')
 				return 1;
 			/*
 			 * First try the shortest match for the '*' that
 			 * could work. We can forget any earlier '*' since
 			 * there is no way having it match more characters
 			 * can help us, given that we are already here.
 			 */
 			bt_p = p;
 			bt_q = q;
 			break;
 		case '[': {
 			const char *savep, *saveq;
 			int invert, found;
 			wchar_t chr;
 
 			savep = p, saveq = q;
 			invert = 0;
 			if (*p == '!' || *p == '^') {
 				invert++;
 				p++;
 			}
 			found = 0;
 			if (*q == '\0')
 				return 0;
 			if (localeisutf8) {
 				chr = get_wc(&q);
 				if (chr == 0)
 					goto backtrack;
 			} else
 				chr = (unsigned char)*q++;
 			c = *p++;
 			do {
 				if (c == '\0') {
 					p = savep, q = saveq;
 					c = '[';
 					goto dft;
 				}
 				if (c == '[' && *p == ':') {
 					found |= match_charclass(p, chr, &end);
 					if (end != NULL)
 						p = end;
 				}
 				if (c == CTLESC)
 					c = *p++;
 				if (localeisutf8 && c & 0x80) {
 					p--;
 					wc = get_wc(&p);
 					if (wc == 0) /* bad utf-8 */
 						return 0;
 				} else
 					wc = (unsigned char)c;
 				if (*p == '-' && p[1] != ']') {
 					p++;
 					if (*p == CTLESC)
 						p++;
 					if (localeisutf8) {
 						wc2 = get_wc(&p);
 						if (wc2 == 0) /* bad utf-8 */
 							return 0;
 					} else
 						wc2 = (unsigned char)*p++;
 					if (   collate_range_cmp(chr, wc) >= 0
 					    && collate_range_cmp(chr, wc2) <= 0
 					   )
 						found = 1;
 				} else {
 					if (chr == wc)
 						found = 1;
 				}
 			} while ((c = *p++) != ']');
 			if (found == invert)
 				goto backtrack;
 			break;
 		}
 dft:	        default:
 			if (*q == '\0')
 				return 0;
 			if (*q++ == c)
 				break;
 backtrack:
 			/*
 			 * If we have a mismatch (other than hitting the end
 			 * of the string), go back to the last '*' seen and
 			 * have it match one additional character.
 			 */
 			if (bt_p == NULL)
 				return 0;
 			if (*bt_q == '\0')
 				return 0;
 			bt_q++;
 			p = bt_p;
 			q = bt_q;
 			break;
 		}
 	}
 }
 
 
 
 /*
  * Remove any CTLESC and CTLQUOTEMARK characters from a string.
  */
 
 void
 rmescapes(char *str)
 {
 	char *p, *q;
 
 	p = str;
 	while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) {
 		if (*p++ == '\0')
 			return;
 	}
 	q = p;
 	while (*p) {
 		if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) {
 			p++;
 			continue;
 		}
 		if (*p == CTLESC)
 			p++;
 		*q++ = *p++;
 	}
 	*q = '\0';
 }
 
 
 
 /*
  * See if a pattern matches in a case statement.
  */
 
 int
 casematch(union node *pattern, const char *val)
 {
 	struct stackmark smark;
 	int result;
 	char *p;
 
 	setstackmark(&smark);
 	argbackq = pattern->narg.backquote;
 	STARTSTACKSTR(expdest);
 	argstr(pattern->narg.text, EXP_TILDE | EXP_CASE, NULL);
 	STPUTC('\0', expdest);
 	p = grabstackstr(expdest);
 	result = patmatch(p, val);
 	popstackmark(&smark);
 	return result;
 }
 
 /*
  * Our own itoa().
  */
 
 static void
 cvtnum(int num, char *buf)
 {
 	char temp[32];
 	int neg = num < 0;
 	char *p = temp + 31;
 
 	temp[31] = '\0';
 
 	do {
 		*--p = num % 10 + '0';
 	} while ((num /= 10) != 0);
 
 	if (neg)
 		*--p = '-';
 
 	memcpy(buf, p, temp + 32 - p);
 }
 
 /*
  * Do most of the work for wordexp(3).
  */
 
 int
 wordexpcmd(int argc, char **argv)
 {
 	size_t len;
 	int i;
 
 	out1fmt("%08x", argc - 1);
 	for (i = 1, len = 0; i < argc; i++)
 		len += strlen(argv[i]);
 	out1fmt("%08x", (int)len);
 	for (i = 1; i < argc; i++)
 		outbin(argv[i], strlen(argv[i]) + 1, out1);
         return (0);
 }
 
 /*
  * Do most of the work for wordexp(3), new version.
  */
 
 int
 freebsd_wordexpcmd(int argc __unused, char **argv __unused)
 {
 	struct arglist arglist;
 	union node *args, *n;
 	size_t len;
 	int ch;
 	int protected = 0;
 	int fd = -1;
 	int i;
 
 	while ((ch = nextopt("f:p")) != '\0') {
 		switch (ch) {
 		case 'f':
 			fd = number(shoptarg);
 			break;
 		case 'p':
 			protected = 1;
 			break;
 		}
 	}
 	if (*argptr != NULL)
 		error("wrong number of arguments");
 	if (fd < 0)
 		error("missing fd");
 	INTOFF;
 	setinputfd(fd, 1);
 	INTON;
 	args = parsewordexp();
 	popfile(); /* will also close fd */
 	if (protected)
 		for (n = args; n != NULL; n = n->narg.next) {
 			if (n->narg.backquote != NULL) {
 				outcslow('C', out1);
 				error("command substitution disabled");
 			}
 		}
 	outcslow(' ', out1);
 	emptyarglist(&arglist);
 	for (n = args; n != NULL; n = n->narg.next)
 		expandarg(n, &arglist, EXP_FULL | EXP_TILDE);
 	for (i = 0, len = 0; i < arglist.count; i++)
 		len += strlen(arglist.args[i]);
 	out1fmt("%016x %016zx", arglist.count, len);
 	for (i = 0; i < arglist.count; i++)
 		outbin(arglist.args[i], strlen(arglist.args[i]) + 1, out1);
 	return (0);
 }
Index: user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/arm/dt_isadep.c
===================================================================
--- user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/arm/dt_isadep.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/arm/dt_isadep.c	(revision 303642)
@@ -1,190 +1,188 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License, Version 1.0 only
  * (the "License").  You may not use this file except in compliance
  * with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  * Copyright 2014 Howard Su
  * Copyright 2015 George V. Neville-Neil
  *
  */
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #include <stdlib.h>
 #include <assert.h>
 #include <errno.h>
 #include <string.h>
 #include <libgen.h>
 
 #include <dt_impl.h>
 #include <dt_pid.h>
 
-#if !defined(sun)
-#define PR_MODEL_ILP32	1
-#define PR_MODEL_LP64	2
+#ifdef __FreeBSD__
 #include <libproc_compat.h>
 #endif
 
 #define	OP(x)		((x) >> 30)
 #define	OP2(x)		(((x) >> 22) & 0x07)
 #define	COND(x)		(((x) >> 25) & 0x0f)
 #define	A(x)		(((x) >> 29) & 0x01)
 
 #define	OP_BRANCH	0
 
 #define	OP2_BPcc	0x1
 #define	OP2_Bicc	0x2
 #define	OP2_BPr		0x3
 #define	OP2_FBPfcc	0x5
 #define	OP2_FBfcc	0x6
 
 /*ARGSUSED*/
 int
 dt_pid_create_entry_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     fasttrap_probe_spec_t *ftp, const GElf_Sym *symp)
 {
 	ftp->ftps_type = DTFTP_ENTRY;
 	ftp->ftps_pc = (uintptr_t)symp->st_value;
 	ftp->ftps_size = (size_t)symp->st_size;
 	ftp->ftps_noffs = 1;
 	ftp->ftps_offs[0] = 0;
 
 	if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) {
 		dt_dprintf("fasttrap probe creation ioctl failed: %s\n",
 		    strerror(errno));
 		return (dt_set_errno(dtp, errno));
 	}
 
 	return (1);
 }
 
 int
 dt_pid_create_return_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, uint64_t *stret)
 {
 
 	uint32_t *text;
 	int i;
 	int srdepth = 0;
 
 	dt_dprintf("%s: unimplemented\n", __func__);
 	return (DT_PROC_ERR);
 
 	if ((text = malloc(symp->st_size + 4)) == NULL) {
 		dt_dprintf("mr sparkle: malloc() failed\n");
 		return (DT_PROC_ERR);
 	}
 
 	if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) {
 		dt_dprintf("mr sparkle: Pread() failed\n");
 		free(text);
 		return (DT_PROC_ERR);
 	}
 
 	/*
 	 * Leave a dummy instruction in the last slot to simplify edge
 	 * conditions.
 	 */
 	text[symp->st_size / 4] = 0;
 
 	ftp->ftps_type = DTFTP_RETURN;
 	ftp->ftps_pc = symp->st_value;
 	ftp->ftps_size = symp->st_size;
 	ftp->ftps_noffs = 0;
 
 
 	free(text);
 	if (ftp->ftps_noffs > 0) {
 		if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) {
 			dt_dprintf("fasttrap probe creation ioctl failed: %s\n",
 			    strerror(errno));
 			return (dt_set_errno(dtp, errno));
 		}
 	}
 
 
 	return (ftp->ftps_noffs);
 }
 
 /*ARGSUSED*/
 int
 dt_pid_create_offset_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, ulong_t off)
 {
 	if (off & 0x3)
 		return (DT_PROC_ALIGN);
 
 	ftp->ftps_type = DTFTP_OFFSETS;
 	ftp->ftps_pc = (uintptr_t)symp->st_value;
 	ftp->ftps_size = (size_t)symp->st_size;
 	ftp->ftps_noffs = 1;
 	ftp->ftps_offs[0] = off;
 
 	if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) {
 		dt_dprintf("fasttrap probe creation ioctl failed: %s\n",
 		    strerror(errno));
 		return (dt_set_errno(dtp, errno));
 	}
 
 	return (1);
 }
 
 /*ARGSUSED*/
 int
 dt_pid_create_glob_offset_probes(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, const char *pattern)
 {
 	ulong_t i;
 
 	ftp->ftps_type = DTFTP_OFFSETS;
 	ftp->ftps_pc = (uintptr_t)symp->st_value;
 	ftp->ftps_size = (size_t)symp->st_size;
 	ftp->ftps_noffs = 0;
 
 	/*
 	 * If we're matching against everything, just iterate through each
 	 * instruction in the function, otherwise look for matching offset
 	 * names by constructing the string and comparing it against the
 	 * pattern.
 	 */
 	if (strcmp("*", pattern) == 0) {
 		for (i = 0; i < symp->st_size; i += 4) {
 			ftp->ftps_offs[ftp->ftps_noffs++] = i;
 		}
 	} else {
 		char name[sizeof (i) * 2 + 1];
 
 		for (i = 0; i < symp->st_size; i += 4) {
 			(void) sprintf(name, "%lx", i);
 			if (gmatch(name, pattern))
 				ftp->ftps_offs[ftp->ftps_noffs++] = i;
 		}
 	}
 
 	if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) {
 		dt_dprintf("fasttrap probe creation ioctl failed: %s\n",
 		    strerror(errno));
 		return (dt_set_errno(dtp, errno));
 	}
 
 	return (ftp->ftps_noffs);
 }
Index: user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/i386/dt_isadep.c
===================================================================
--- user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/i386/dt_isadep.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libdtrace/i386/dt_isadep.c	(revision 303642)
@@ -1,537 +1,520 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <stdlib.h>
 #include <assert.h>
 #include <errno.h>
 #include <string.h>
 #include <libgen.h>
 
 #include <dt_impl.h>
 #include <dt_pid.h>
 
 #include <dis_tables.h>
 
-#ifndef illumos
-#define PR_MODEL_ILP32	1
-#define PR_MODEL_LP64	2
+#ifdef __FreeBSD__
+#include <libproc.h>
 #include <libproc_compat.h>
 #endif
 
 #define	DT_POPL_EBP	0x5d
 #define	DT_RET		0xc3
 #define	DT_RET16	0xc2
 #define	DT_LEAVE	0xc9
 #define	DT_JMP32	0xe9
 #define	DT_JMP8		0xeb
 #define	DT_REP		0xf3
 
 #define	DT_MOVL_EBP_ESP	0xe58b
 
 #define	DT_ISJ32(op16)	(((op16) & 0xfff0) == 0x0f80)
 #define	DT_ISJ8(op8)	(((op8) & 0xf0) == 0x70)
 
 #define	DT_MODRM_REG(modrm)	(((modrm) >> 3) & 0x7)
 
 static int dt_instr_size(uchar_t *, dtrace_hdl_t *, pid_t, uintptr_t, char);
 
 /*ARGSUSED*/
 int
 dt_pid_create_entry_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     fasttrap_probe_spec_t *ftp, const GElf_Sym *symp)
 {
 	ftp->ftps_type = DTFTP_ENTRY;
 	ftp->ftps_pc = (uintptr_t)symp->st_value;
 	ftp->ftps_size = (size_t)symp->st_size;
 	ftp->ftps_noffs = 1;
 	ftp->ftps_offs[0] = 0;
 
 	if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) {
 		dt_dprintf("fasttrap probe creation ioctl failed: %s\n",
 		    strerror(errno));
 		return (dt_set_errno(dtp, errno));
 	}
 
 	return (1);
 }
 
 static int
 dt_pid_has_jump_table(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     uint8_t *text, fasttrap_probe_spec_t *ftp, const GElf_Sym *symp)
 {
 	ulong_t i;
 	int size;
 #ifdef illumos
 	pid_t pid = Pstatus(P)->pr_pid;
 	char dmodel = Pstatus(P)->pr_dmodel;
 #else
 	pid_t pid = proc_getpid(P);
-#if __i386__
-	char dmodel = PR_MODEL_ILP32;
-#elif __amd64__
-	char dmodel = PR_MODEL_LP64;
+	char dmodel = proc_getmodel(P);
 #endif
-#endif
 
 	/*
 	 * Take a pass through the function looking for a register-dependant
 	 * jmp instruction. This could be a jump table so we have to be
 	 * ultra conservative.
 	 */
 	for (i = 0; i < ftp->ftps_size; i += size) {
 		size = dt_instr_size(&text[i], dtp, pid, symp->st_value + i,
 		    dmodel);
 
 		/*
 		 * Assume the worst if we hit an illegal instruction.
 		 */
 		if (size <= 0) {
 			dt_dprintf("error at %#lx (assuming jump table)\n", i);
 			return (1);
 		}
 
 #ifdef notyet
 		/*
 		 * Register-dependant jmp instructions start with a 0xff byte
 		 * and have the modrm.reg field set to 4. They can have an
 		 * optional REX prefix on the 64-bit ISA.
 		 */
 		if ((text[i] == 0xff && DT_MODRM_REG(text[i + 1]) == 4) ||
 		    (dmodel == PR_MODEL_LP64 && (text[i] & 0xf0) == 0x40 &&
 		    text[i + 1] == 0xff && DT_MODRM_REG(text[i + 2]) == 4)) {
 			dt_dprintf("found a suspected jump table at %s:%lx\n",
 			    ftp->ftps_func, i);
 			return (1);
 		}
 #endif
 	}
 
 	return (0);
 }
 
 /*ARGSUSED*/
 int
 dt_pid_create_return_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, uint64_t *stret)
 {
 	uint8_t *text;
 	ulong_t i, end;
 	int size;
 #ifdef illumos
 	pid_t pid = Pstatus(P)->pr_pid;
 	char dmodel = Pstatus(P)->pr_dmodel;
 #else
 	pid_t pid = proc_getpid(P);
-#if __i386__
-	char dmodel = PR_MODEL_ILP32;
-#elif __amd64__
-	char dmodel = PR_MODEL_LP64;
+	char dmodel = proc_getmodel(P);
 #endif
-#endif
 
 	/*
 	 * We allocate a few extra bytes at the end so we don't have to check
 	 * for overrunning the buffer.
 	 */
 	if ((text = calloc(1, symp->st_size + 4)) == NULL) {
 		dt_dprintf("mr sparkle: malloc() failed\n");
 		return (DT_PROC_ERR);
 	}
 
 	if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) {
 		dt_dprintf("mr sparkle: Pread() failed\n");
 		free(text);
 		return (DT_PROC_ERR);
 	}
 
 	ftp->ftps_type = DTFTP_RETURN;
 	ftp->ftps_pc = (uintptr_t)symp->st_value;
 	ftp->ftps_size = (size_t)symp->st_size;
 	ftp->ftps_noffs = 0;
 
 	/*
 	 * If there's a jump table in the function we're only willing to
 	 * instrument these specific (and equivalent) instruction sequences:
 	 *	leave
 	 *	[rep] ret
 	 * and
 	 *	movl	%ebp,%esp
 	 *	popl	%ebp
 	 *	[rep] ret
 	 *
 	 * We do this to avoid accidentally interpreting jump table
 	 * offsets as actual instructions.
 	 */
 	if (dt_pid_has_jump_table(P, dtp, text, ftp, symp)) {
 		for (i = 0, end = ftp->ftps_size; i < end; i += size) {
 			size = dt_instr_size(&text[i], dtp, pid,
 			    symp->st_value + i, dmodel);
 
 			/* bail if we hit an invalid opcode */
 			if (size <= 0)
 				break;
 
 			if (text[i] == DT_LEAVE && text[i + 1] == DT_RET) {
 				dt_dprintf("leave/ret at %lx\n", i + 1);
 				ftp->ftps_offs[ftp->ftps_noffs++] = i + 1;
 				size = 2;
 			} else if (text[i] == DT_LEAVE &&
 			    text[i + 1] == DT_REP && text[i + 2] == DT_RET) {
 				dt_dprintf("leave/rep ret at %lx\n", i + 1);
 				ftp->ftps_offs[ftp->ftps_noffs++] = i + 1;
 				size = 3;
 			} else if (*(uint16_t *)&text[i] == DT_MOVL_EBP_ESP &&
 			    text[i + 2] == DT_POPL_EBP &&
 			    text[i + 3] == DT_RET) {
 				dt_dprintf("movl/popl/ret at %lx\n", i + 3);
 				ftp->ftps_offs[ftp->ftps_noffs++] = i + 3;
 				size = 4;
 			} else if (*(uint16_t *)&text[i] == DT_MOVL_EBP_ESP &&
 			    text[i + 2] == DT_POPL_EBP &&
 			    text[i + 3] == DT_REP &&
 			    text[i + 4] == DT_RET) {
 				dt_dprintf("movl/popl/rep ret at %lx\n", i + 3);
 				ftp->ftps_offs[ftp->ftps_noffs++] = i + 3;
 				size = 5;
 			}
 		}
 	} else {
 		for (i = 0, end = ftp->ftps_size; i < end; i += size) {
 			size = dt_instr_size(&text[i], dtp, pid,
 			    symp->st_value + i, dmodel);
 
 			/* bail if we hit an invalid opcode */
 			if (size <= 0)
 				break;
 
 			/* ordinary ret */
 			if (size == 1 && text[i] == DT_RET)
 				goto is_ret;
 
 			/* two-byte ret */
 			if (size == 2 && text[i] == DT_REP &&
 			    text[i + 1] == DT_RET)
 				goto is_ret;
 
 			/* ret <imm16> */
 			if (size == 3 && text[i] == DT_RET16)
 				goto is_ret;
 
 			/* two-byte ret <imm16> */
 			if (size == 4 && text[i] == DT_REP &&
 			    text[i + 1] == DT_RET16)
 				goto is_ret;
 
 			/* 32-bit displacement jmp outside of the function */
 			if (size == 5 && text[i] == DT_JMP32 && symp->st_size <=
 			    (uintptr_t)(i + size + *(int32_t *)&text[i + 1]))
 				goto is_ret;
 
 			/* 8-bit displacement jmp outside of the function */
 			if (size == 2 && text[i] == DT_JMP8 && symp->st_size <=
 			    (uintptr_t)(i + size + *(int8_t *)&text[i + 1]))
 				goto is_ret;
 
 			/* 32-bit disp. conditional jmp outside of the func. */
 			if (size == 6 && DT_ISJ32(*(uint16_t *)&text[i]) &&
 			    symp->st_size <=
 			    (uintptr_t)(i + size + *(int32_t *)&text[i + 2]))
 				goto is_ret;
 
 			/* 8-bit disp. conditional jmp outside of the func. */
 			if (size == 2 && DT_ISJ8(text[i]) && symp->st_size <=
 			    (uintptr_t)(i + size + *(int8_t *)&text[i + 1]))
 				goto is_ret;
 
 			continue;
 is_ret:
 			dt_dprintf("return at offset %lx\n", i);
 			ftp->ftps_offs[ftp->ftps_noffs++] = i;
 		}
 	}
 
 	free(text);
 	if (ftp->ftps_noffs > 0) {
 		if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) {
 			dt_dprintf("fasttrap probe creation ioctl failed: %s\n",
 			    strerror(errno));
 			return (dt_set_errno(dtp, errno));
 		}
 	}
 
 	return (ftp->ftps_noffs);
 }
 
 /*ARGSUSED*/
 int
 dt_pid_create_offset_probe(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, ulong_t off)
 {
 	ftp->ftps_type = DTFTP_OFFSETS;
 	ftp->ftps_pc = (uintptr_t)symp->st_value;
 	ftp->ftps_size = (size_t)symp->st_size;
 	ftp->ftps_noffs = 1;
 
 	if (strcmp("-", ftp->ftps_func) == 0) {
 		ftp->ftps_offs[0] = off;
 	} else {
 		uint8_t *text;
 		ulong_t i;
 		int size;
 #ifdef illumos
 		pid_t pid = Pstatus(P)->pr_pid;
 		char dmodel = Pstatus(P)->pr_dmodel;
 #else
 		pid_t pid = proc_getpid(P);
-#if __i386__
-		char dmodel = PR_MODEL_ILP32;
-#elif __amd64__
-		char dmodel = PR_MODEL_LP64;
+		char dmodel = proc_getmodel(P);
 #endif
-#endif
 
 		if ((text = malloc(symp->st_size)) == NULL) {
 			dt_dprintf("mr sparkle: malloc() failed\n");
 			return (DT_PROC_ERR);
 		}
 
 		if (Pread(P, text, symp->st_size, symp->st_value) !=
 		    symp->st_size) {
 			dt_dprintf("mr sparkle: Pread() failed\n");
 			free(text);
 			return (DT_PROC_ERR);
 		}
 
 		/*
 		 * We can't instrument offsets in functions with jump tables
 		 * as we might interpret a jump table offset as an
 		 * instruction.
 		 */
 		if (dt_pid_has_jump_table(P, dtp, text, ftp, symp)) {
 			free(text);
 			return (0);
 		}
 
 		for (i = 0; i < symp->st_size; i += size) {
 			if (i == off) {
 				ftp->ftps_offs[0] = i;
 				break;
 			}
 
 			/*
 			 * If we've passed the desired offset without a
 			 * match, then the given offset must not lie on a
 			 * instruction boundary.
 			 */
 			if (i > off) {
 				free(text);
 				return (DT_PROC_ALIGN);
 			}
 
 			size = dt_instr_size(&text[i], dtp, pid,
 			    symp->st_value + i, dmodel);
 
 			/*
 			 * If we hit an invalid instruction, bail as if we
 			 * couldn't find the offset.
 			 */
 			if (size <= 0) {
 				free(text);
 				return (DT_PROC_ALIGN);
 			}
 		}
 
 		free(text);
 	}
 
 	if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) {
 		dt_dprintf("fasttrap probe creation ioctl failed: %s\n",
 		    strerror(errno));
 		return (dt_set_errno(dtp, errno));
 	}
 
 	return (ftp->ftps_noffs);
 }
 
 /*ARGSUSED*/
 int
 dt_pid_create_glob_offset_probes(struct ps_prochandle *P, dtrace_hdl_t *dtp,
     fasttrap_probe_spec_t *ftp, const GElf_Sym *symp, const char *pattern)
 {
 	uint8_t *text;
 	int size;
 	ulong_t i, end = symp->st_size;
 #ifdef illumos
 	pid_t pid = Pstatus(P)->pr_pid;
 	char dmodel = Pstatus(P)->pr_dmodel;
 #else
 	pid_t pid = proc_getpid(P);
-#if __i386__
-	char dmodel = PR_MODEL_ILP32;
-#elif __amd64__
-	char dmodel = PR_MODEL_LP64;
-#endif
+	char dmodel = proc_getmodel(P);
 #endif
 
 	ftp->ftps_type = DTFTP_OFFSETS;
 	ftp->ftps_pc = (uintptr_t)symp->st_value;
 	ftp->ftps_size = (size_t)symp->st_size;
 	ftp->ftps_noffs = 0;
 
 	if ((text = malloc(symp->st_size)) == NULL) {
 		dt_dprintf("mr sparkle: malloc() failed\n");
 		return (DT_PROC_ERR);
 	}
 
 	if (Pread(P, text, symp->st_size, symp->st_value) != symp->st_size) {
 		dt_dprintf("mr sparkle: Pread() failed\n");
 		free(text);
 		return (DT_PROC_ERR);
 	}
 
 	/*
 	 * We can't instrument offsets in functions with jump tables as
 	 * we might interpret a jump table offset as an instruction.
 	 */
 	if (dt_pid_has_jump_table(P, dtp, text, ftp, symp)) {
 		free(text);
 		return (0);
 	}
 
 	if (strcmp("*", pattern) == 0) {
 		for (i = 0; i < end; i += size) {
 			ftp->ftps_offs[ftp->ftps_noffs++] = i;
 
 			size = dt_instr_size(&text[i], dtp, pid,
 			    symp->st_value + i, dmodel);
 
 			/* bail if we hit an invalid opcode */
 			if (size <= 0)
 				break;
 		}
 	} else {
 		char name[sizeof (i) * 2 + 1];
 
 		for (i = 0; i < end; i += size) {
 			(void) snprintf(name, sizeof (name), "%lx", i);
 			if (gmatch(name, pattern))
 				ftp->ftps_offs[ftp->ftps_noffs++] = i;
 
 			size = dt_instr_size(&text[i], dtp, pid,
 			    symp->st_value + i, dmodel);
 
 			/* bail if we hit an invalid opcode */
 			if (size <= 0)
 				break;
 		}
 	}
 
 	free(text);
 	if (ftp->ftps_noffs > 0) {
 		if (ioctl(dtp->dt_ftfd, FASTTRAPIOC_MAKEPROBE, ftp) != 0) {
 			dt_dprintf("fasttrap probe creation ioctl failed: %s\n",
 			    strerror(errno));
 			return (dt_set_errno(dtp, errno));
 		}
 	}
 
 	return (ftp->ftps_noffs);
 }
 
 typedef struct dtrace_dis {
 	uchar_t	*instr;
 	dtrace_hdl_t *dtp;
 	pid_t pid;
 	uintptr_t addr;
 } dtrace_dis_t;
 
 static int
 dt_getbyte(void *data)
 {
 	dtrace_dis_t	*dis = data;
 	int ret = *dis->instr;
 
 	if (ret == FASTTRAP_INSTR) {
 		fasttrap_instr_query_t instr;
 
 		instr.ftiq_pid = dis->pid;
 		instr.ftiq_pc = dis->addr;
 
 		/*
 		 * If we hit a byte that looks like the fasttrap provider's
 		 * trap instruction (which doubles as the breakpoint
 		 * instruction for debuggers) we need to query the kernel
 		 * for the real value. This may just be part of an immediate
 		 * value so there's no need to return an error if the
 		 * kernel doesn't know about this address.
 		 */
 		if (ioctl(dis->dtp->dt_ftfd, FASTTRAPIOC_GETINSTR, &instr) == 0)
 			ret = instr.ftiq_instr;
 	}
 
 	dis->addr++;
 	dis->instr++;
 
 	return (ret);
 }
 
 static int
 dt_instr_size(uchar_t *instr, dtrace_hdl_t *dtp, pid_t pid, uintptr_t addr,
     char dmodel)
 {
 	dtrace_dis_t data;
 	dis86_t x86dis;
 	uint_t cpu_mode;
 
 	data.instr = instr;
 	data.dtp = dtp;
 	data.pid = pid;
 	data.addr = addr;
 
 	x86dis.d86_data = &data;
 	x86dis.d86_get_byte = dt_getbyte;
 	x86dis.d86_check_func = NULL;
 
 	cpu_mode = (dmodel == PR_MODEL_ILP32) ? SIZE32 : SIZE64;
 
 	if (dtrace_disx86(&x86dis, cpu_mode) != 0)
 		return (-1);
 
 	/*
 	 * If the instruction was a single-byte breakpoint, there may be
 	 * another debugger attached to this process. The original instruction
 	 * can't be recovered so this must fail.
 	 */
 	if (x86dis.d86_len == 1 &&
 	    (uchar_t)x86dis.d86_bytes[0] == FASTTRAP_INSTR)
 		return (-1);
 
 	return (x86dis.d86_len);
 }
Index: user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libzpool/common/util.c
===================================================================
--- user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libzpool/common/util.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris/lib/libzpool/common/util.c	(revision 303642)
@@ -1,155 +1,155 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <assert.h>
 #include <sys/zfs_context.h>
 #include <sys/avl.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/spa.h>
 #include <sys/fs/zfs.h>
 #include <sys/refcount.h>
 
 /*
  * Routines needed by more than one client of libzpool.
  */
 
 void
 nicenum(uint64_t num, char *buf)
 {
 	uint64_t n = num;
 	int index = 0;
 	char u;
 
 	while (n >= 1024) {
 		n = (n + (1024 / 2)) / 1024; /* Round up or down */
 		index++;
 	}
 
 	u = " KMGTPE"[index];
 
 	if (index == 0) {
 		(void) sprintf(buf, "%llu", (u_longlong_t)n);
 	} else if (n < 10 && (num & (num - 1)) != 0) {
 		(void) sprintf(buf, "%.2f%c",
 		    (double)num / (1ULL << 10 * index), u);
 	} else if (n < 100 && (num & (num - 1)) != 0) {
 		(void) sprintf(buf, "%.1f%c",
 		    (double)num / (1ULL << 10 * index), u);
 	} else {
 		(void) sprintf(buf, "%llu%c", (u_longlong_t)n, u);
 	}
 }
 
 static void
 show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
 {
 	vdev_stat_t *vs;
 	vdev_stat_t v0 = { 0 };
 	uint64_t sec;
 	uint64_t is_log = 0;
 	nvlist_t **child;
 	uint_t c, children;
 	char used[6], avail[6];
 	char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6];
 	char *prefix = "";
 
 	if (indent == 0 && desc != NULL) {
 		(void) printf("                           "
 		    " capacity   operations   bandwidth  ---- errors ----\n");
 		(void) printf("description                "
 		    "used avail  read write  read write  read write cksum\n");
 	}
 
 	if (desc != NULL) {
 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
 
 		if (is_log)
 			prefix = "log ";
 
 		if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 		    (uint64_t **)&vs, &c) != 0)
 			vs = &v0;
 
 		sec = MAX(1, vs->vs_timestamp / NANOSEC);
 
 		nicenum(vs->vs_alloc, used);
 		nicenum(vs->vs_space - vs->vs_alloc, avail);
 		nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops);
 		nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops);
 		nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes);
 		nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes);
 		nicenum(vs->vs_read_errors, rerr);
 		nicenum(vs->vs_write_errors, werr);
 		nicenum(vs->vs_checksum_errors, cerr);
 
 		(void) printf("%*s%s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n",
 		    indent, "",
 		    prefix,
-		    indent + strlen(prefix) - 25 - (vs->vs_space ? 0 : 12),
+		    (int)(indent + strlen(prefix) - 25 - (vs->vs_space ? 0 : 12)),
 		    desc,
 		    vs->vs_space ? 6 : 0, vs->vs_space ? used : "",
 		    vs->vs_space ? 6 : 0, vs->vs_space ? avail : "",
 		    rops, wops, rbytes, wbytes, rerr, werr, cerr);
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ctype, &child, &children) != 0)
 		return;
 
 	for (c = 0; c < children; c++) {
 		nvlist_t *cnv = child[c];
 		char *cname, *tname;
 		uint64_t np;
 		if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) &&
 		    nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname))
 			cname = "<unknown>";
 		tname = calloc(1, strlen(cname) + 2);
 		(void) strcpy(tname, cname);
 		if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0)
 			tname[strlen(tname)] = '0' + np;
 		show_vdev_stats(tname, ctype, cnv, indent + 2);
 		free(tname);
 	}
 }
 
 void
 show_pool_stats(spa_t *spa)
 {
 	nvlist_t *config, *nvroot;
 	char *name;
 
 	VERIFY(spa_get_stats(spa_name(spa), &config, NULL, 0) == 0);
 
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 	    &name) == 0);
 
 	show_vdev_stats(name, ZPOOL_CONFIG_CHILDREN, nvroot, 0);
 	show_vdev_stats(NULL, ZPOOL_CONFIG_L2CACHE, nvroot, 0);
 	show_vdev_stats(NULL, ZPOOL_CONFIG_SPARES, nvroot, 0);
 
 	nvlist_free(config);
 }
Index: user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris
===================================================================
--- user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris	(revision 303641)
+++ user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris	(revision 303642)

Property changes on: user/alc/PQ_LAUNDRY/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl/contrib/opensolaris:r303205-303641
Index: user/alc/PQ_LAUNDRY/cddl
===================================================================
--- user/alc/PQ_LAUNDRY/cddl	(revision 303641)
+++ user/alc/PQ_LAUNDRY/cddl	(revision 303642)

Property changes on: user/alc/PQ_LAUNDRY/cddl
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl:r303205-303641
Index: user/alc/PQ_LAUNDRY/contrib/blacklist/lib/bl.c
===================================================================
--- user/alc/PQ_LAUNDRY/contrib/blacklist/lib/bl.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/contrib/blacklist/lib/bl.c	(revision 303642)
@@ -1,524 +1,527 @@
 /*	$NetBSD: bl.c,v 1.27 2015/12/30 16:42:48 christos Exp $	*/
 
 /*-
  * Copyright (c) 2014 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Christos Zoulas.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include <sys/cdefs.h>
 __RCSID("$NetBSD: bl.c,v 1.27 2015/12/30 16:42:48 christos Exp $");
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/un.h>
 
 #include <stdio.h>
 #include <string.h>
 #include <syslog.h>
 #include <signal.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <errno.h>
 #include <stdarg.h>
 #include <netinet/in.h>
 #ifdef _REENTRANT
 #include <pthread.h>
 #endif
 
 #include "bl.h"
 
 typedef struct {
 	uint32_t bl_len;
 	uint32_t bl_version;
 	uint32_t bl_type;
 	uint32_t bl_salen;
 	struct sockaddr_storage bl_ss;
 	char bl_data[];
 } bl_message_t;
 
 struct blacklist {
 #ifdef _REENTRANT
 	pthread_mutex_t b_mutex;
 # define BL_INIT(b)	pthread_mutex_init(&b->b_mutex, NULL)
 # define BL_LOCK(b)	pthread_mutex_lock(&b->b_mutex)
 # define BL_UNLOCK(b)	pthread_mutex_unlock(&b->b_mutex)
 #else
 # define BL_INIT(b)	do {} while(/*CONSTCOND*/0)
 # define BL_LOCK(b)	BL_INIT(b)
 # define BL_UNLOCK(b)	BL_INIT(b)
 #endif
 	int b_fd;
 	int b_connected;
 	struct sockaddr_un b_sun;
 	void (*b_fun)(int, const char *, va_list);
 	bl_info_t b_info;
 };
 
 #define BL_VERSION	1
 
 bool
 bl_isconnected(bl_t b)
 {
 	return b->b_connected == 0;
 }
 
 int
 bl_getfd(bl_t b)
 {
 	return b->b_fd;
 }
 
 static void
 bl_reset(bl_t b, bool locked)
 {
 	int serrno = errno;
 	if (!locked)
 		BL_LOCK(b);
 	close(b->b_fd);
 	errno = serrno;
 	b->b_fd = -1;
 	b->b_connected = -1;
 	if (!locked)
 		BL_UNLOCK(b);
 }
 
 static void
 bl_log(void (*fun)(int, const char *, va_list), int level,
     const char *fmt, ...)
 {
 	va_list ap;
 	int serrno = errno;
 
 	va_start(ap, fmt);
 	(*fun)(level, fmt, ap);
 	va_end(ap);
 	errno = serrno;
 }
 
 static int
 bl_init(bl_t b, bool srv)
 {
 	static int one = 1;
 	/* AF_UNIX address of local logger */
 	mode_t om;
 	int rv, serrno;
 	struct sockaddr_un *sun = &b->b_sun;
 
 #ifndef SOCK_NONBLOCK
 #define SOCK_NONBLOCK 0
 #endif
 #ifndef SOCK_CLOEXEC
 #define SOCK_CLOEXEC 0
 #endif
 #ifndef SOCK_NOSIGPIPE
 #define SOCK_NOSIGPIPE 0
 #endif
 
 	BL_LOCK(b);
 
 	if (b->b_fd == -1) {
 		b->b_fd = socket(PF_LOCAL,
 		    SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK|SOCK_NOSIGPIPE, 0);
 		if (b->b_fd == -1) {
-			bl_log(b->b_fun, LOG_ERR, "%s: socket failed (%m)",
-			    __func__);
+			bl_log(b->b_fun, LOG_ERR, "%s: socket failed (%s)",
+			    __func__, strerror(errno));
 			BL_UNLOCK(b);
 			return -1;
 		}
 #if SOCK_CLOEXEC == 0
 		fcntl(b->b_fd, F_SETFD, FD_CLOEXEC);
 #endif
 #if SOCK_NONBLOCK == 0
 		fcntl(b->b_fd, F_SETFL, fcntl(b->b_fd, F_GETFL) | O_NONBLOCK);
 #endif
 #if SOCK_NOSIGPIPE == 0
 #ifdef SO_NOSIGPIPE
 		int o = 1;
 		setsockopt(b->b_fd, SOL_SOCKET, SO_NOSIGPIPE, &o, sizeof(o));
 #else
 		signal(SIGPIPE, SIG_IGN);
 #endif
 #endif
 	}
 
 	if (bl_isconnected(b)) {
 		BL_UNLOCK(b);
 		return 0;
 	}
 
 	/*
 	 * We try to connect anyway even when we are a server to verify
 	 * that no other server is listening to the socket. If we succeed
 	 * to connect and we are a server, someone else owns it.
 	 */
 	rv = connect(b->b_fd, (const void *)sun, (socklen_t)sizeof(*sun));
 	if (rv == 0) {
 		if (srv) {
 			bl_log(b->b_fun, LOG_ERR,
 			    "%s: another daemon is handling `%s'",
 			    __func__, sun->sun_path);
 			goto out;
 		}
 	} else {
 		if (!srv) {
 			/*
 			 * If the daemon is not running, we just try a
 			 * connect, so leave the socket alone until it does
 			 * and only log once.
 			 */
 			if (b->b_connected != 1) {
 				bl_log(b->b_fun, LOG_DEBUG,
-				    "%s: connect failed for `%s' (%m)",
-				    __func__, sun->sun_path);
+				    "%s: connect failed for `%s' (%s)",
+				    __func__, sun->sun_path, strerror(errno));
 				b->b_connected = 1;
 			}
 			BL_UNLOCK(b);
 			return -1;
 		}
 		bl_log(b->b_fun, LOG_DEBUG, "Connected to blacklist server",
 		    __func__);
 	}
 
 	if (srv) {
 		(void)unlink(sun->sun_path);
 		om = umask(0);
 		rv = bind(b->b_fd, (const void *)sun, (socklen_t)sizeof(*sun));
 		serrno = errno;
 		(void)umask(om);
 		errno = serrno;
 		if (rv == -1) {
 			bl_log(b->b_fun, LOG_ERR,
-			    "%s: bind failed for `%s' (%m)",
-			    __func__, sun->sun_path);
+			    "%s: bind failed for `%s' (%s)",
+			    __func__, sun->sun_path, strerror(errno));
 			goto out;
 		}
 	}
 
 	b->b_connected = 0;
 #define GOT_FD		1
 #if defined(LOCAL_CREDS)
 #define CRED_LEVEL	0
 #define	CRED_NAME	LOCAL_CREDS
 #define CRED_SC_UID	sc_euid
 #define CRED_SC_GID	sc_egid
 #define CRED_MESSAGE	SCM_CREDS
 #define CRED_SIZE	SOCKCREDSIZE(NGROUPS_MAX)
 #define CRED_TYPE	struct sockcred
 #define GOT_CRED	2
 #elif defined(SO_PASSCRED)
 #define CRED_LEVEL	SOL_SOCKET
 #define	CRED_NAME	SO_PASSCRED
 #define CRED_SC_UID	uid
 #define CRED_SC_GID	gid
 #define CRED_MESSAGE	SCM_CREDENTIALS
 #define CRED_SIZE	sizeof(struct ucred)
 #define CRED_TYPE	struct ucred
 #define GOT_CRED	2
 #else
 #define GOT_CRED	0
 /*
  * getpeereid() and LOCAL_PEERCRED don't help here
  * because we are not a stream socket!
  */
 #define	CRED_SIZE	0
 #define CRED_TYPE	void * __unused
 #endif
 
 #ifdef CRED_LEVEL
 	if (setsockopt(b->b_fd, CRED_LEVEL, CRED_NAME,
 	    &one, (socklen_t)sizeof(one)) == -1) {
 		bl_log(b->b_fun, LOG_ERR, "%s: setsockopt %s "
-		    "failed (%m)", __func__, __STRING(CRED_NAME));
+		    "failed (%s)", __func__, __STRING(CRED_NAME),
+		    strerror(errno));
 		goto out;
 	}
 #endif
 
 	BL_UNLOCK(b);
 	return 0;
 out:
 	bl_reset(b, true);
 	BL_UNLOCK(b);
 	return -1;
 }
 
 bl_t
 bl_create(bool srv, const char *path, void (*fun)(int, const char *, va_list))
 {
 	bl_t b = calloc(1, sizeof(*b));
 	if (b == NULL)
 		goto out;
 	b->b_fun = fun == NULL ? vsyslog : fun;
 	b->b_fd = -1;
 	b->b_connected = -1;
 	BL_INIT(b);
 
 	memset(&b->b_sun, 0, sizeof(b->b_sun));
 	b->b_sun.sun_family = AF_LOCAL;
 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
 	b->b_sun.sun_len = sizeof(b->b_sun);
 #endif
 	strlcpy(b->b_sun.sun_path,
 	    path ? path : _PATH_BLSOCK, sizeof(b->b_sun.sun_path));
 
 	bl_init(b, srv);
 	return b;
 out:
 	free(b);
-	bl_log(fun, LOG_ERR, "%s: malloc failed (%m)", __func__);
+	bl_log(fun, LOG_ERR, "%s: malloc failed (%s)", __func__,
+	    strerror(errno));
 	return NULL;
 }
 
 void
 bl_destroy(bl_t b)
 {
 	bl_reset(b, false);
 	free(b);
 }
 
 static int
 bl_getsock(bl_t b, struct sockaddr_storage *ss, const struct sockaddr *sa,
     socklen_t slen, const char *ctx)
 {
 	uint8_t family;
 
 	memset(ss, 0, sizeof(*ss));
 
 	switch (slen) {
 	case 0:
 		return 0;
 	case sizeof(struct sockaddr_in):
 		family = AF_INET;
 		break;
 	case sizeof(struct sockaddr_in6):
 		family = AF_INET6;
 		break;
 	default:
 		bl_log(b->b_fun, LOG_ERR, "%s: invalid socket len %u (%s)",
 		    __func__, (unsigned)slen, ctx);
 		errno = EINVAL;
 		return -1;
 	}
 
 	memcpy(ss, sa, slen);
 
 	if (ss->ss_family != family) {
 		bl_log(b->b_fun, LOG_INFO,
 		    "%s: correcting socket family %d to %d (%s)",
 		    __func__, ss->ss_family, family, ctx);
 		ss->ss_family = family;
 	}
 
 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
 	if (ss->ss_len != slen) {
 		bl_log(b->b_fun, LOG_INFO,
 		    "%s: correcting socket len %u to %u (%s)",
 		    __func__, ss->ss_len, (unsigned)slen, ctx);
 		ss->ss_len = (uint8_t)slen;
 	}
 #endif
 	return 0;
 }
 
 int
 bl_send(bl_t b, bl_type_t e, int pfd, const struct sockaddr *sa,
     socklen_t slen, const char *ctx)
 {
 	struct msghdr   msg;
 	struct iovec    iov;
 	union {
 		char ctrl[CMSG_SPACE(sizeof(int))];
 		uint32_t fd;
 	} ua;
 	struct cmsghdr *cmsg;
 	union {
 		bl_message_t bl;
 		char buf[512];
 	} ub;
 	size_t ctxlen, tried;
 #define NTRIES	5
 
 	ctxlen = strlen(ctx);
 	if (ctxlen > 128)
 		ctxlen = 128;
 
 	iov.iov_base = ub.buf;
 	iov.iov_len = sizeof(bl_message_t) + ctxlen;
 	ub.bl.bl_len = (uint32_t)iov.iov_len;
 	ub.bl.bl_version = BL_VERSION;
 	ub.bl.bl_type = (uint32_t)e;
 
 	if (bl_getsock(b, &ub.bl.bl_ss, sa, slen, ctx) == -1)
 		return -1;
 
 
 	ub.bl.bl_salen = slen;
 	memcpy(ub.bl.bl_data, ctx, ctxlen);
 
 	msg.msg_name = NULL;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &iov;
 	msg.msg_iovlen = 1;
 	msg.msg_flags = 0;
 
 	msg.msg_control = ua.ctrl;
 	msg.msg_controllen = sizeof(ua.ctrl);
 
 	cmsg = CMSG_FIRSTHDR(&msg);
 	cmsg->cmsg_len = CMSG_LEN(sizeof(int));
 	cmsg->cmsg_level = SOL_SOCKET;
 	cmsg->cmsg_type = SCM_RIGHTS;
 
 	memcpy(CMSG_DATA(cmsg), &pfd, sizeof(pfd));
 
 	tried = 0;
 again:
 	if (bl_init(b, false) == -1)
 		return -1;
 
 	if ((sendmsg(b->b_fd, &msg, 0) == -1) && tried++ < NTRIES) {
 		bl_reset(b, false);
 		goto again;
 	}
 	return tried >= NTRIES ? -1 : 0;
 }
 
 bl_info_t *
 bl_recv(bl_t b)
 {
         struct msghdr   msg;
         struct iovec    iov;
 	union {
 		char ctrl[CMSG_SPACE(sizeof(int)) + CMSG_SPACE(CRED_SIZE)];
 		uint32_t fd;
 		CRED_TYPE sc;
 	} ua;
 	struct cmsghdr *cmsg;
 	CRED_TYPE *sc;
 	union {
 		bl_message_t bl;
 		char buf[512];
 	} ub;
 	int got;
 	ssize_t rlen;
 	bl_info_t *bi = &b->b_info;
 
 	got = 0;
 	memset(bi, 0, sizeof(*bi));
 
 	iov.iov_base = ub.buf;
 	iov.iov_len = sizeof(ub);
 
 	msg.msg_name = NULL;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &iov;
 	msg.msg_iovlen = 1;
 	msg.msg_flags = 0;
 
 	msg.msg_control = ua.ctrl;
 	msg.msg_controllen = sizeof(ua.ctrl) + 100;
 
         rlen = recvmsg(b->b_fd, &msg, 0);
         if (rlen == -1) {
-		bl_log(b->b_fun, LOG_ERR, "%s: recvmsg failed (%m)", __func__);
+		bl_log(b->b_fun, LOG_ERR, "%s: recvmsg failed (%s)", __func__,
+		    strerror(errno));
 		return NULL;
         }
 
 	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
 		if (cmsg->cmsg_level != SOL_SOCKET) {
 			bl_log(b->b_fun, LOG_ERR,
 			    "%s: unexpected cmsg_level %d",
 			    __func__, cmsg->cmsg_level);
 			continue;
 		}
 		switch (cmsg->cmsg_type) {
 		case SCM_RIGHTS:
 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
 				bl_log(b->b_fun, LOG_ERR,
 				    "%s: unexpected cmsg_len %d != %zu",
 				    __func__, cmsg->cmsg_len,
 				    CMSG_LEN(2 * sizeof(int)));
 				continue;
 			}
 			memcpy(&bi->bi_fd, CMSG_DATA(cmsg), sizeof(bi->bi_fd));
 			got |= GOT_FD;
 			break;
 #ifdef CRED_MESSAGE
 		case CRED_MESSAGE:
 			sc = (void *)CMSG_DATA(cmsg);
 			bi->bi_uid = sc->CRED_SC_UID;
 			bi->bi_gid = sc->CRED_SC_GID;
 			got |= GOT_CRED;
 			break;
 #endif
 		default:
 			bl_log(b->b_fun, LOG_ERR,
 			    "%s: unexpected cmsg_type %d",
 			    __func__, cmsg->cmsg_type);
 			continue;
 		}
 
 	}
 
 	if (got != (GOT_CRED|GOT_FD)) {
 		bl_log(b->b_fun, LOG_ERR, "message missing %s %s", 
 #if GOT_CRED != 0
 		    (got & GOT_CRED) == 0 ? "cred" :
 #endif
 		    "", (got & GOT_FD) == 0 ? "fd" : "");
 			
 		return NULL;
 	}
 
 	if ((size_t)rlen <= sizeof(ub.bl)) {
 		bl_log(b->b_fun, LOG_ERR, "message too short %zd", rlen);
 		return NULL;
 	}
 
 	if (ub.bl.bl_version != BL_VERSION) {
 		bl_log(b->b_fun, LOG_ERR, "bad version %d", ub.bl.bl_version);
 		return NULL;
 	}
 
 	bi->bi_type = ub.bl.bl_type;
 	bi->bi_slen = ub.bl.bl_salen;
 	bi->bi_ss = ub.bl.bl_ss;
 #ifndef CRED_MESSAGE
 	bi->bi_uid = -1;
 	bi->bi_gid = -1;
 #endif
 	strlcpy(bi->bi_msg, ub.bl.bl_data, MIN(sizeof(bi->bi_msg),
 	    ((size_t)rlen - sizeof(ub.bl) + 1)));
 	return bi;
 }
Index: user/alc/PQ_LAUNDRY/contrib/libexecinfo/backtrace.c
===================================================================
--- user/alc/PQ_LAUNDRY/contrib/libexecinfo/backtrace.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/contrib/libexecinfo/backtrace.c	(revision 303642)
@@ -1,250 +1,249 @@
 /*	$NetBSD: backtrace.c,v 1.3 2013/08/29 14:58:56 christos Exp $	*/
 
 /*-
  * Copyright (c) 2012 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Christos Zoulas.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __RCSID("$NetBSD: backtrace.c,v 1.3 2013/08/29 14:58:56 christos Exp $");
 
 #include <sys/param.h>
 #include <assert.h>
-#define _WITH_DPRINTF
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stddef.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <dlfcn.h>
 #include <elf.h>
 
 #include "execinfo.h"
 #include "symtab.h"
 
 #ifdef __linux__
 #define SELF	"/proc/self/exe"
 #else
 #include <sys/sysctl.h>
 #define SELF	"/proc/curproc/file"
 #endif
 
 static int
 open_self(int flags)
 {
 	const char *pathname = SELF;
 #ifdef KERN_PROC_PATHNAME
 	static const int name[] = {
 		CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1,
 	};
 	char path[MAXPATHLEN];
 	size_t len;
 
 	len = sizeof(path);
 	if (sysctl(name, 4, path, &len, NULL, 0) != -1)
 		pathname = path;
 #endif
 	return open(pathname, flags);
 }
 
 
 static int __printflike(4, 5)
 rasprintf(char **buf, size_t *bufsiz, size_t offs, const char *fmt, ...)
 {
 	for (;;) {
 		size_t nbufsiz;
 		char *nbuf;
 
 		if (*buf && offs < *bufsiz) {
 			va_list ap;
 			int len;
 
 			va_start(ap, fmt);
 			len = vsnprintf(*buf + offs, *bufsiz - offs, fmt, ap);
 			va_end(ap);
 
 			if (len < 0 || (size_t)len + 1 < *bufsiz - offs)
 				return len;
 			nbufsiz = MAX(*bufsiz + 512, (size_t)len + 1);
 		} else
 			nbufsiz = MAX(offs, *bufsiz) + 512;
 			
 		nbuf = realloc(*buf, nbufsiz);
 		if (nbuf == NULL)
 			return -1;
 		*buf = nbuf;
 		*bufsiz = nbufsiz;
 	}
 }
 
 /*
  * format specifiers:
  *	%a	= address
  *	%n	= symbol_name
  *	%d	= symbol_address - address
  *	%D	= if symbol_address == address "" else +%d
  *	%f	= filename
  */
 static ssize_t
 format_string(char **buf, size_t *bufsiz, size_t offs, const char *fmt,
     Dl_info *dli, const void *addr)
 {
 	ptrdiff_t diff = (const char *)addr - (const char *)dli->dli_saddr;
 	size_t o = offs;
 	int len;
 
 	for (; *fmt; fmt++) {
 		if (*fmt != '%')
 			goto printone;
 		switch (*++fmt) {
 		case 'a':
 			len = rasprintf(buf, bufsiz, o, "%p", addr);
 			break;
 		case 'n':
 			len = rasprintf(buf, bufsiz, o, "%s", dli->dli_sname);
 			break;
 		case 'D':
 			if (diff)
 				len = rasprintf(buf, bufsiz, o, "+0x%tx", diff);
 			else
 				len = 0;
 			break;
 		case 'd':
 			len = rasprintf(buf, bufsiz, o, "0x%tx", diff);
 			break;
 		case 'f':
 			len = rasprintf(buf, bufsiz, o, "%s", dli->dli_fname);
 			break;
 		default:
 		printone:
 			len = rasprintf(buf, bufsiz, o, "%c", *fmt);
 			break;
 		}
 		if (len == -1)
 			return -1;
 		o += len;
 	}
 	return o - offs;
 }
 
 static ssize_t
 format_address(symtab_t *st, char **buf, size_t *bufsiz, size_t offs,
     const char *fmt, const void *addr)
 {
 	Dl_info dli;
 
 	memset(&dli, 0, sizeof(dli));
 	(void)dladdr(addr, &dli);
 	if (st)
 		symtab_find(st, addr, &dli);
 
 	if (dli.dli_sname == NULL)
 		dli.dli_sname = "???";
 	if (dli.dli_fname == NULL)
 		dli.dli_fname = "???";
 	if (dli.dli_saddr == NULL)
 		dli.dli_saddr = (void *)(intptr_t)addr;
 
 	return format_string(buf, bufsiz, offs, fmt, &dli, addr);
 }
 
 char **
 backtrace_symbols_fmt(void *const *trace, size_t len, const char *fmt)
 {
 
 	static const size_t slen = sizeof(char *) + 64;	/* estimate */
 	char *ptr;
 	symtab_t *st;
 	int fd;
 
 	if ((fd = open_self(O_RDONLY)) != -1)
 		st = symtab_create(fd, -1, STT_FUNC);
 	else
 		st = NULL;
 
 	if ((ptr = calloc(len, slen)) == NULL)
 		goto out;
 
 	size_t psize = len * slen;
 	size_t offs = len * sizeof(char *);
 
 	/* We store only offsets in the first pass because of realloc */
 	for (size_t i = 0; i < len; i++) {
 		ssize_t x;
 		((char **)(void *)ptr)[i] = (void *)offs;
 		x = format_address(st, &ptr, &psize, offs, fmt, trace[i]);
 		if (x == -1) {
 			free(ptr);
 			ptr = NULL;
 			goto out;
 		}
 		offs += x;
 		ptr[offs++] = '\0';
 		assert(offs < psize);
 	}
 
 	/* Change offsets to pointers */
 	for (size_t j = 0; j < len; j++)
 		((char **)(void *)ptr)[j] += (intptr_t)ptr;
 
 out:
 	symtab_destroy(st);
 	if (fd != -1)
 		(void)close(fd);
 
 	return (void *)ptr;
 }
 
 int
 backtrace_symbols_fd_fmt(void *const *trace, size_t len, int fd,
     const char *fmt)
 {
 	char **s = backtrace_symbols_fmt(trace, len, fmt);
 	if (s == NULL)
 		return -1;
 	for (size_t i = 0; i < len; i++)
 		if (dprintf(fd, "%s\n", s[i]) < 0)
 			break;
 	free(s);
 	return 0;
 }
 
 static const char fmt[] = "%a <%n%D> at %f";
 
 char **
 backtrace_symbols(void *const *trace, size_t len)
 {
 	return backtrace_symbols_fmt(trace, len, fmt);
 }
 
 int
 backtrace_symbols_fd(void *const *trace, size_t len, int fd)
 {
 	return backtrace_symbols_fd_fmt(trace, len, fd, fmt);
 }
Index: user/alc/PQ_LAUNDRY/contrib/libexecinfo
===================================================================
--- user/alc/PQ_LAUNDRY/contrib/libexecinfo	(revision 303641)
+++ user/alc/PQ_LAUNDRY/contrib/libexecinfo	(revision 303642)

Property changes on: user/alc/PQ_LAUNDRY/contrib/libexecinfo
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/libexecinfo:r293456-303641
Index: user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind/src/Unwind-EHABI.cpp
===================================================================
--- user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind/src/Unwind-EHABI.cpp	(revision 303641)
+++ user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind/src/Unwind-EHABI.cpp	(revision 303642)
@@ -1,977 +1,977 @@
 //===--------------------------- Unwind-EHABI.cpp -------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is dual licensed under the MIT and the University of Illinois Open
 // Source Licenses. See LICENSE.TXT for details.
 //
 //
 //  Implements ARM zero-cost C++ exceptions
 //
 //===----------------------------------------------------------------------===//
 
 #include "Unwind-EHABI.h"
 
 #if _LIBUNWIND_ARM_EHABI
 
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include <type_traits>
 
 #include "config.h"
 #include "libunwind.h"
 #include "libunwind_ext.h"
 #include "unwind.h"
 
 namespace {
 
 // Strange order: take words in order, but inside word, take from most to least
 // signinficant byte.
 uint8_t getByte(const uint32_t* data, size_t offset) {
   const uint8_t* byteData = reinterpret_cast<const uint8_t*>(data);
   return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))];
 }
 
 const char* getNextWord(const char* data, uint32_t* out) {
   *out = *reinterpret_cast<const uint32_t*>(data);
   return data + 4;
 }
 
 const char* getNextNibble(const char* data, uint32_t* out) {
   *out = *reinterpret_cast<const uint16_t*>(data);
   return data + 2;
 }
 
 struct Descriptor {
   // See # 9.2
   typedef enum {
     SU16 = 0, // Short descriptor, 16-bit entries
     LU16 = 1, // Long descriptor,  16-bit entries
     LU32 = 3, // Long descriptor,  32-bit entries
     RESERVED0 =  4, RESERVED1 =  5, RESERVED2  = 6,  RESERVED3  =  7,
     RESERVED4 =  8, RESERVED5 =  9, RESERVED6  = 10, RESERVED7  = 11,
     RESERVED8 = 12, RESERVED9 = 13, RESERVED10 = 14, RESERVED11 = 15
   } Format;
 
   // See # 9.2
   typedef enum {
     CLEANUP = 0x0,
     FUNC    = 0x1,
     CATCH   = 0x2,
     INVALID = 0x4
   } Kind;
 };
 
 _Unwind_Reason_Code ProcessDescriptors(
     _Unwind_State state,
     _Unwind_Control_Block* ucbp,
     struct _Unwind_Context* context,
     Descriptor::Format format,
     const char* descriptorStart,
     uint32_t flags) {
 
   // EHT is inlined in the index using compact form. No descriptors. #5
   if (flags & 0x1)
     return _URC_CONTINUE_UNWIND;
 
   // TODO: We should check the state here, and determine whether we need to
   // perform phase1 or phase2 unwinding.
   (void)state;
 
   const char* descriptor = descriptorStart;
   uint32_t descriptorWord;
   getNextWord(descriptor, &descriptorWord);
   while (descriptorWord) {
     // Read descriptor based on # 9.2.
     uint32_t length;
     uint32_t offset;
     switch (format) {
       case Descriptor::LU32:
         descriptor = getNextWord(descriptor, &length);
         descriptor = getNextWord(descriptor, &offset);
       case Descriptor::LU16:
         descriptor = getNextNibble(descriptor, &length);
         descriptor = getNextNibble(descriptor, &offset);
       default:
         assert(false);
         return _URC_FAILURE;
     }
 
     // See # 9.2 table for decoding the kind of descriptor. It's a 2-bit value.
     Descriptor::Kind kind =
         static_cast<Descriptor::Kind>((length & 0x1) | ((offset & 0x1) << 1));
 
     // Clear off flag from last bit.
     length &= ~1u;
     offset &= ~1u;
     uintptr_t scopeStart = ucbp->pr_cache.fnstart + offset;
     uintptr_t scopeEnd = scopeStart + length;
     uintptr_t pc = _Unwind_GetIP(context);
     bool isInScope = (scopeStart <= pc) && (pc < scopeEnd);
 
     switch (kind) {
       case Descriptor::CLEANUP: {
         // TODO(ajwong): Handle cleanup descriptors.
         break;
       }
       case Descriptor::FUNC: {
         // TODO(ajwong): Handle function descriptors.
         break;
       }
       case Descriptor::CATCH: {
         // Catch descriptors require gobbling one more word.
         uint32_t landing_pad;
         descriptor = getNextWord(descriptor, &landing_pad);
 
         if (isInScope) {
           // TODO(ajwong): This is only phase1 compatible logic. Implement
           // phase2.
           landing_pad = signExtendPrel31(landing_pad & ~0x80000000);
           if (landing_pad == 0xffffffff) {
             return _URC_HANDLER_FOUND;
           } else if (landing_pad == 0xfffffffe) {
             return _URC_FAILURE;
           } else {
             /*
             bool is_reference_type = landing_pad & 0x80000000;
             void* matched_object;
             if (__cxxabiv1::__cxa_type_match(
                     ucbp, reinterpret_cast<const std::type_info *>(landing_pad),
                     is_reference_type,
                     &matched_object) != __cxxabiv1::ctm_failed)
                 return _URC_HANDLER_FOUND;
                 */
             _LIBUNWIND_ABORT("Type matching not implemented");
           }
         }
         break;
       }
       default:
         _LIBUNWIND_ABORT("Invalid descriptor kind found.");
     }
 
     getNextWord(descriptor, &descriptorWord);
   }
 
   return _URC_CONTINUE_UNWIND;
 }
 
 static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state,
                                           _Unwind_Control_Block* ucbp,
                                           struct _Unwind_Context* context) {
   // Read the compact model EHT entry's header # 6.3
   const uint32_t* unwindingData = ucbp->pr_cache.ehtp;
   assert((*unwindingData & 0xf0000000) == 0x80000000 && "Must be a compact entry");
   Descriptor::Format format =
       static_cast<Descriptor::Format>((*unwindingData & 0x0f000000) >> 24);
 
   const char *lsda =
       reinterpret_cast<const char *>(_Unwind_GetLanguageSpecificData(context));
 
   // Handle descriptors before unwinding so they are processed in the context
   // of the correct stack frame.
   _Unwind_Reason_Code result =
       ProcessDescriptors(state, ucbp, context, format, lsda,
                          ucbp->pr_cache.additional);
 
   if (result != _URC_CONTINUE_UNWIND)
     return result;
 
   if (unw_step(reinterpret_cast<unw_cursor_t*>(context)) != UNW_STEP_SUCCESS)
     return _URC_FAILURE;
   return _URC_CONTINUE_UNWIND;
 }
 
 // Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE /
 // _UVRSD_UINT32.
 uint32_t RegisterMask(uint8_t start, uint8_t count_minus_one) {
   return ((1U << (count_minus_one + 1)) - 1) << start;
 }
 
 // Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_VFP /
 // _UVRSD_DOUBLE.
 uint32_t RegisterRange(uint8_t start, uint8_t count_minus_one) {
   return ((uint32_t)start << 16) | ((uint32_t)count_minus_one + 1);
 }
 
 } // end anonymous namespace
 
 /**
  * Decodes an EHT entry.
  *
  * @param data Pointer to EHT.
  * @param[out] off Offset from return value (in bytes) to begin interpretation.
  * @param[out] len Number of bytes in unwind code.
  * @return Pointer to beginning of unwind code.
  */
 extern "C" const uint32_t*
 decode_eht_entry(const uint32_t* data, size_t* off, size_t* len) {
   if ((*data & 0x80000000) == 0) {
     // 6.2: Generic Model
     //
     // EHT entry is a prel31 pointing to the PR, followed by data understood
     // only by the personality routine. Fortunately, all existing assembler
     // implementations, including GNU assembler, LLVM integrated assembler,
     // and ARM assembler, assume that the unwind opcodes come after the
     // personality rountine address.
     *off = 1; // First byte is size data.
     *len = (((data[1] >> 24) & 0xff) + 1) * 4;
     data++; // Skip the first word, which is the prel31 offset.
   } else {
     // 6.3: ARM Compact Model
     //
     // EHT entries here correspond to the __aeabi_unwind_cpp_pr[012] PRs indeded
     // by format:
     Descriptor::Format format =
         static_cast<Descriptor::Format>((*data & 0x0f000000) >> 24);
     switch (format) {
       case Descriptor::SU16:
         *len = 4;
         *off = 1;
         break;
       case Descriptor::LU16:
       case Descriptor::LU32:
         *len = 4 + 4 * ((*data & 0x00ff0000) >> 16);
         *off = 2;
         break;
       default:
         return nullptr;
     }
   }
   return data;
 }
 
 _Unwind_Reason_Code _Unwind_VRS_Interpret(
     _Unwind_Context* context,
     const uint32_t* data,
     size_t offset,
     size_t len) {
   bool wrotePC = false;
   bool finish = false;
   while (offset < len && !finish) {
     uint8_t byte = getByte(data, offset++);
     if ((byte & 0x80) == 0) {
       uint32_t sp;
       _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
       if (byte & 0x40)
         sp -= (((uint32_t)byte & 0x3f) << 2) + 4;
       else
         sp += ((uint32_t)byte << 2) + 4;
       _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
     } else {
       switch (byte & 0xf0) {
         case 0x80: {
           if (offset >= len)
             return _URC_FAILURE;
           uint32_t registers =
               (((uint32_t)byte & 0x0f) << 12) |
               (((uint32_t)getByte(data, offset++)) << 4);
           if (!registers)
             return _URC_FAILURE;
           if (registers & (1 << 15))
             wrotePC = true;
           _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
           break;
         }
         case 0x90: {
           uint8_t reg = byte & 0x0f;
           if (reg == 13 || reg == 15)
             return _URC_FAILURE;
           uint32_t sp;
           _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_R0 + reg,
                           _UVRSD_UINT32, &sp);
           _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
                           &sp);
           break;
         }
         case 0xa0: {
           uint32_t registers = RegisterMask(4, byte & 0x07);
           if (byte & 0x08)
             registers |= 1 << 14;
           _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
           break;
         }
         case 0xb0: {
           switch (byte) {
             case 0xb0:
               finish = true;
               break;
             case 0xb1: {
               if (offset >= len)
                 return _URC_FAILURE;
               uint8_t registers = getByte(data, offset++);
               if (registers & 0xf0 || !registers)
                 return _URC_FAILURE;
               _Unwind_VRS_Pop(context, _UVRSC_CORE, registers, _UVRSD_UINT32);
               break;
             }
             case 0xb2: {
               uint32_t addend = 0;
               uint32_t shift = 0;
               // This decodes a uleb128 value.
               while (true) {
                 if (offset >= len)
                   return _URC_FAILURE;
                 uint32_t v = getByte(data, offset++);
                 addend |= (v & 0x7f) << shift;
                 if ((v & 0x80) == 0)
                   break;
                 shift += 7;
               }
               uint32_t sp;
               _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
                               &sp);
               sp += 0x204 + (addend << 2);
               _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
                               &sp);
               break;
             }
             case 0xb3: {
               uint8_t v = getByte(data, offset++);
               _Unwind_VRS_Pop(context, _UVRSC_VFP,
                               RegisterRange(static_cast<uint8_t>(v >> 4),
                                             v & 0x0f), _UVRSD_VFPX);
               break;
             }
             case 0xb4:
             case 0xb5:
             case 0xb6:
             case 0xb7:
               return _URC_FAILURE;
             default:
               _Unwind_VRS_Pop(context, _UVRSC_VFP,
                               RegisterRange(8, byte & 0x07), _UVRSD_VFPX);
               break;
           }
           break;
         }
         case 0xc0: {
           switch (byte) {
             case 0xc0:
             case 0xc1:
             case 0xc2:
             case 0xc3:
             case 0xc4:
             case 0xc5:
               _Unwind_VRS_Pop(context, _UVRSC_WMMXD,
                               RegisterRange(10, byte & 0x7), _UVRSD_DOUBLE);
               break;
             case 0xc6: {
               uint8_t v = getByte(data, offset++);
               uint8_t start = static_cast<uint8_t>(v >> 4);
               uint8_t count_minus_one = v & 0xf;
               if (start + count_minus_one >= 16)
                 return _URC_FAILURE;
               _Unwind_VRS_Pop(context, _UVRSC_WMMXD,
                               RegisterRange(start, count_minus_one),
                               _UVRSD_DOUBLE);
               break;
             }
             case 0xc7: {
               uint8_t v = getByte(data, offset++);
               if (!v || v & 0xf0)
                 return _URC_FAILURE;
               _Unwind_VRS_Pop(context, _UVRSC_WMMXC, v, _UVRSD_DOUBLE);
               break;
             }
             case 0xc8:
             case 0xc9: {
               uint8_t v = getByte(data, offset++);
               uint8_t start =
                   static_cast<uint8_t>(((byte == 0xc8) ? 16 : 0) + (v >> 4));
               uint8_t count_minus_one = v & 0xf;
               if (start + count_minus_one >= 32)
                 return _URC_FAILURE;
               _Unwind_VRS_Pop(context, _UVRSC_VFP,
                               RegisterRange(start, count_minus_one),
                               _UVRSD_DOUBLE);
               break;
             }
             default:
               return _URC_FAILURE;
           }
           break;
         }
         case 0xd0: {
           if (byte & 0x08)
             return _URC_FAILURE;
           _Unwind_VRS_Pop(context, _UVRSC_VFP, RegisterRange(8, byte & 0x7),
                           _UVRSD_DOUBLE);
           break;
         }
         default:
           return _URC_FAILURE;
       }
     }
   }
   if (!wrotePC) {
     uint32_t lr;
     _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_LR, _UVRSD_UINT32, &lr);
     _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_IP, _UVRSD_UINT32, &lr);
   }
   return _URC_CONTINUE_UNWIND;
 }
 
 extern "C" _Unwind_Reason_Code __aeabi_unwind_cpp_pr0(
     _Unwind_State state,
     _Unwind_Control_Block *ucbp,
     _Unwind_Context *context) {
   return unwindOneFrame(state, ucbp, context);
 }
 
 extern "C" _Unwind_Reason_Code __aeabi_unwind_cpp_pr1(
     _Unwind_State state,
     _Unwind_Control_Block *ucbp,
     _Unwind_Context *context) {
   return unwindOneFrame(state, ucbp, context);
 }
 
 extern "C" _Unwind_Reason_Code __aeabi_unwind_cpp_pr2(
     _Unwind_State state,
     _Unwind_Control_Block *ucbp,
     _Unwind_Context *context) {
   return unwindOneFrame(state, ucbp, context);
 }
 
 static _Unwind_Reason_Code
 unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) {
   // EHABI #7.3 discusses preserving the VRS in a "temporary VRS" during
   // phase 1 and then restoring it to the "primary VRS" for phase 2. The
   // effect is phase 2 doesn't see any of the VRS manipulations from phase 1.
   // In this implementation, the phases don't share the VRS backing store.
   // Instead, they are passed the original |uc| and they create a new VRS
   // from scratch thus achieving the same effect.
   unw_init_local(cursor, uc);
 
   // Walk each frame looking for a place to stop.
   for (bool handlerNotFound = true; handlerNotFound;) {
 
     // See if frame has code to run (has personality routine).
     unw_proc_info_t frameInfo;
     if (unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
       _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): unw_get_proc_info "
                                  "failed => _URC_FATAL_PHASE1_ERROR\n",
                                  static_cast<void *>(exception_object));
       return _URC_FATAL_PHASE1_ERROR;
     }
 
     // When tracing, print state information.
     if (_LIBUNWIND_TRACING_UNWINDING) {
       char functionBuf[512];
       const char *functionName = functionBuf;
       unw_word_t offset;
       if ((unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
                              &offset) != UNW_ESUCCESS) ||
           (frameInfo.start_ip + offset > frameInfo.end_ip))
         functionName = ".anonymous.";
       unw_word_t pc;
       unw_get_reg(cursor, UNW_REG_IP, &pc);
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): pc=0x%llX, start_ip=0x%llX, func=%s, "
           "lsda=0x%llX, personality=0x%llX\n",
           static_cast<void *>(exception_object), (long long)pc,
           (long long)frameInfo.start_ip, functionName,
           (long long)frameInfo.lsda, (long long)frameInfo.handler);
     }
 
     // If there is a personality routine, ask it if it will want to stop at
     // this frame.
     if (frameInfo.handler != 0) {
       __personality_routine p =
           (__personality_routine)(long)(frameInfo.handler);
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): calling personality function %p\n",
           static_cast<void *>(exception_object),
           reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(p)));
       struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor);
       exception_object->pr_cache.fnstart = frameInfo.start_ip;
       exception_object->pr_cache.ehtp =
           (_Unwind_EHT_Header *)frameInfo.unwind_info;
       exception_object->pr_cache.additional = frameInfo.flags;
       _Unwind_Reason_Code personalityResult =
           (*p)(_US_VIRTUAL_UNWIND_FRAME, exception_object, context);
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase1(ex_ojb=%p): personality result %d start_ip %x ehtp %p "
           "additional %x\n",
           static_cast<void *>(exception_object), personalityResult,
           exception_object->pr_cache.fnstart,
           static_cast<void *>(exception_object->pr_cache.ehtp),
           exception_object->pr_cache.additional);
       switch (personalityResult) {
       case _URC_HANDLER_FOUND:
         // found a catch clause or locals that need destructing in this frame
         // stop search and remember stack pointer at the frame
         handlerNotFound = false;
         // p should have initialized barrier_cache. EHABI #7.3.5
         _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND \n",
             static_cast<void *>(exception_object));
         return _URC_NO_REASON;
 
       case _URC_CONTINUE_UNWIND:
         _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase1(ex_ojb=%p): _URC_CONTINUE_UNWIND\n",
             static_cast<void *>(exception_object));
         // continue unwinding
         break;
 
       // EHABI #7.3.3
       case _URC_FAILURE:
         return _URC_FAILURE;
 
       default:
         // something went wrong
         _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR\n",
             static_cast<void *>(exception_object));
         return _URC_FATAL_PHASE1_ERROR;
       }
     }
   }
   return _URC_NO_REASON;
 }
 
 static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor,
                                          _Unwind_Exception *exception_object,
                                          bool resume) {
   // See comment at the start of unwind_phase1 regarding VRS integrity.
   unw_init_local(cursor, uc);
 
   _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)\n",
                              static_cast<void *>(exception_object));
   int frame_count = 0;
 
   // Walk each frame until we reach where search phase said to stop.
   while (true) {
     // Ask libuwind to get next frame (skip over first which is
     // _Unwind_RaiseException or _Unwind_Resume).
     //
     // Resume only ever makes sense for 1 frame.
     _Unwind_State state =
         resume ? _US_UNWIND_FRAME_RESUME : _US_UNWIND_FRAME_STARTING;
     if (resume && frame_count == 1) {
       // On a resume, first unwind the _Unwind_Resume() frame. The next frame
       // is now the landing pad for the cleanup from a previous execution of
       // phase2. To continue unwindingly correctly, replace VRS[15] with the
       // IP of the frame that the previous run of phase2 installed the context
       // for. After this, continue unwinding as if normal.
       //
       // See #7.4.6 for details.
       unw_set_reg(cursor, UNW_REG_IP,
                   exception_object->unwinder_cache.reserved2);
       resume = false;
     }
 
     // Get info about this frame.
     unw_word_t sp;
     unw_proc_info_t frameInfo;
     unw_get_reg(cursor, UNW_REG_SP, &sp);
     if (unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
       _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): unw_get_proc_info "
-                                 "failed => _URC_FATAL_PHASE1_ERROR\n",
+                                 "failed => _URC_FATAL_PHASE2_ERROR\n",
                                  static_cast<void *>(exception_object));
       return _URC_FATAL_PHASE2_ERROR;
     }
 
     // When tracing, print state information.
     if (_LIBUNWIND_TRACING_UNWINDING) {
       char functionBuf[512];
       const char *functionName = functionBuf;
       unw_word_t offset;
       if ((unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf),
                              &offset) != UNW_ESUCCESS) ||
           (frameInfo.start_ip + offset > frameInfo.end_ip))
         functionName = ".anonymous.";
       _LIBUNWIND_TRACE_UNWINDING(
           "unwind_phase2(ex_ojb=%p): start_ip=0x%llX, func=%s, sp=0x%llX, "
           "lsda=0x%llX, personality=0x%llX\n",
           static_cast<void *>(exception_object), (long long)frameInfo.start_ip,
           functionName, (long long)sp, (long long)frameInfo.lsda,
           (long long)frameInfo.handler);
     }
 
     // If there is a personality routine, tell it we are unwinding.
     if (frameInfo.handler != 0) {
       __personality_routine p =
           (__personality_routine)(long)(frameInfo.handler);
       struct _Unwind_Context *context = (struct _Unwind_Context *)(cursor);
       // EHABI #7.2
       exception_object->pr_cache.fnstart = frameInfo.start_ip;
       exception_object->pr_cache.ehtp =
           (_Unwind_EHT_Header *)frameInfo.unwind_info;
       exception_object->pr_cache.additional = frameInfo.flags;
       _Unwind_Reason_Code personalityResult =
           (*p)(state, exception_object, context);
       switch (personalityResult) {
       case _URC_CONTINUE_UNWIND:
         // Continue unwinding
         _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND\n",
             static_cast<void *>(exception_object));
         // EHABI #7.2
         if (sp == exception_object->barrier_cache.sp) {
           // Phase 1 said we would stop at this frame, but we did not...
           _LIBUNWIND_ABORT("during phase1 personality function said it would "
                            "stop here, but now in phase2 it did not stop here");
         }
         break;
       case _URC_INSTALL_CONTEXT:
         _LIBUNWIND_TRACE_UNWINDING(
             "unwind_phase2(ex_ojb=%p): _URC_INSTALL_CONTEXT\n",
             static_cast<void *>(exception_object));
         // Personality routine says to transfer control to landing pad.
         // We may get control back if landing pad calls _Unwind_Resume().
         if (_LIBUNWIND_TRACING_UNWINDING) {
           unw_word_t pc;
           unw_get_reg(cursor, UNW_REG_IP, &pc);
           unw_get_reg(cursor, UNW_REG_SP, &sp);
           _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering "
                                      "user code with ip=0x%llX, sp=0x%llX\n",
                                      static_cast<void *>(exception_object),
                                      (long long)pc, (long long)sp);
         }
 
         {
           // EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume
           // is called back, to find this same frame.
           unw_word_t pc;
           unw_get_reg(cursor, UNW_REG_IP, &pc);
           exception_object->unwinder_cache.reserved2 = (uint32_t)pc;
         }
         unw_resume(cursor);
         // unw_resume() only returns if there was an error.
         return _URC_FATAL_PHASE2_ERROR;
 
       // # EHABI #7.4.3
       case _URC_FAILURE:
         abort();
 
       default:
         // Personality routine returned an unknown result code.
         _LIBUNWIND_DEBUG_LOG("personality function returned unknown result %d",
                       personalityResult);
         return _URC_FATAL_PHASE2_ERROR;
       }
     }
     frame_count++;
   }
 
   // Clean up phase did not resume at the frame that the search phase
   // said it would...
   return _URC_FATAL_PHASE2_ERROR;
 }
 
 /// Called by __cxa_throw.  Only returns if there is a fatal error.
 _LIBUNWIND_EXPORT _Unwind_Reason_Code
 _Unwind_RaiseException(_Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_RaiseException(ex_obj=%p)\n",
                        static_cast<void *>(exception_object));
   unw_context_t uc;
   unw_cursor_t cursor;
   unw_getcontext(&uc);
 
   // This field for is for compatibility with GCC to say this isn't a forced
   // unwind. EHABI #7.2
   exception_object->unwinder_cache.reserved1 = 0;
 
   // phase 1: the search phase
   _Unwind_Reason_Code phase1 = unwind_phase1(&uc, &cursor, exception_object);
   if (phase1 != _URC_NO_REASON)
     return phase1;
 
   // phase 2: the clean up phase
   return unwind_phase2(&uc, &cursor, exception_object, false);
 }
 
 _LIBUNWIND_EXPORT void _Unwind_Complete(_Unwind_Exception* exception_object) {
   // This is to be called when exception handling completes to give us a chance
   // to perform any housekeeping. EHABI #7.2. But we have nothing to do here.
   (void)exception_object;
 }
 
 /// When _Unwind_RaiseException() is in phase2, it hands control
 /// to the personality function at each frame.  The personality
 /// may force a jump to a landing pad in that function, the landing
 /// pad code may then call _Unwind_Resume() to continue with the
 /// unwinding.  Note: the call to _Unwind_Resume() is from compiler
 /// geneated user code.  All other _Unwind_* routines are called
 /// by the C++ runtime __cxa_* routines.
 ///
 /// Note: re-throwing an exception (as opposed to continuing the unwind)
 /// is implemented by having the code call __cxa_rethrow() which
 /// in turn calls _Unwind_Resume_or_Rethrow().
 _LIBUNWIND_EXPORT void
 _Unwind_Resume(_Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)\n",
                        static_cast<void *>(exception_object));
   unw_context_t uc;
   unw_cursor_t cursor;
   unw_getcontext(&uc);
 
   // _Unwind_RaiseException on EHABI will always set the reserved1 field to 0,
   // which is in the same position as private_1 below.
   // TODO(ajwong): Who wronte the above? Why is it true?
   unwind_phase2(&uc, &cursor, exception_object, true);
 
   // Clients assume _Unwind_Resume() does not return, so all we can do is abort.
   _LIBUNWIND_ABORT("_Unwind_Resume() can't return");
 }
 
 /// Called by personality handler during phase 2 to get LSDA for current frame.
 _LIBUNWIND_EXPORT uintptr_t
 _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) {
   unw_cursor_t *cursor = (unw_cursor_t *)context;
   unw_proc_info_t frameInfo;
   uintptr_t result = 0;
   if (unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
     result = (uintptr_t)frameInfo.lsda;
   _LIBUNWIND_TRACE_API(
       "_Unwind_GetLanguageSpecificData(context=%p) => 0x%llx\n",
       static_cast<void *>(context), (long long)result);
   return result;
 }
 
 static uint64_t ValueAsBitPattern(_Unwind_VRS_DataRepresentation representation,
                                   void* valuep) {
   uint64_t value = 0;
   switch (representation) {
     case _UVRSD_UINT32:
     case _UVRSD_FLOAT:
       memcpy(&value, valuep, sizeof(uint32_t));
       break;
 
     case _UVRSD_VFPX:
     case _UVRSD_UINT64:
     case _UVRSD_DOUBLE:
       memcpy(&value, valuep, sizeof(uint64_t));
       break;
   }
   return value;
 }
 
 _Unwind_VRS_Result
 _Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
                 uint32_t regno, _Unwind_VRS_DataRepresentation representation,
                 void *valuep) {
   _LIBUNWIND_TRACE_API("_Unwind_VRS_Set(context=%p, regclass=%d, reg=%d, "
                        "rep=%d, value=0x%llX)\n",
                        static_cast<void *>(context), regclass, regno,
                        representation,
                        ValueAsBitPattern(representation, valuep));
   unw_cursor_t *cursor = (unw_cursor_t *)context;
   switch (regclass) {
     case _UVRSC_CORE:
       if (representation != _UVRSD_UINT32 || regno > 15)
         return _UVRSR_FAILED;
       return unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno),
                          *(unw_word_t *)valuep) == UNW_ESUCCESS
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
     case _UVRSC_WMMXC:
       if (representation != _UVRSD_UINT32 || regno > 3)
         return _UVRSR_FAILED;
       return unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno),
                          *(unw_word_t *)valuep) == UNW_ESUCCESS
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
     case _UVRSC_VFP:
       if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
         return _UVRSR_FAILED;
       if (representation == _UVRSD_VFPX) {
         // Can only touch d0-15 with FSTMFDX.
         if (regno > 15)
           return _UVRSR_FAILED;
         unw_save_vfp_as_X(cursor);
       } else {
         if (regno > 31)
           return _UVRSR_FAILED;
       }
       return unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno),
                            *(unw_fpreg_t *)valuep) == UNW_ESUCCESS
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
     case _UVRSC_WMMXD:
       if (representation != _UVRSD_DOUBLE || regno > 31)
         return _UVRSR_FAILED;
       return unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno),
                            *(unw_fpreg_t *)valuep) == UNW_ESUCCESS
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
   }
   _LIBUNWIND_ABORT("unsupported register class");
 }
 
 static _Unwind_VRS_Result
 _Unwind_VRS_Get_Internal(_Unwind_Context *context,
                          _Unwind_VRS_RegClass regclass, uint32_t regno,
                          _Unwind_VRS_DataRepresentation representation,
                          void *valuep) {
   unw_cursor_t *cursor = (unw_cursor_t *)context;
   switch (regclass) {
     case _UVRSC_CORE:
       if (representation != _UVRSD_UINT32 || regno > 15)
         return _UVRSR_FAILED;
       return unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno),
                          (unw_word_t *)valuep) == UNW_ESUCCESS
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
     case _UVRSC_WMMXC:
       if (representation != _UVRSD_UINT32 || regno > 3)
         return _UVRSR_FAILED;
       return unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno),
                          (unw_word_t *)valuep) == UNW_ESUCCESS
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
     case _UVRSC_VFP:
       if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
         return _UVRSR_FAILED;
       if (representation == _UVRSD_VFPX) {
         // Can only touch d0-15 with FSTMFDX.
         if (regno > 15)
           return _UVRSR_FAILED;
         unw_save_vfp_as_X(cursor);
       } else {
         if (regno > 31)
           return _UVRSR_FAILED;
       }
       return unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno),
                            (unw_fpreg_t *)valuep) == UNW_ESUCCESS
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
     case _UVRSC_WMMXD:
       if (representation != _UVRSD_DOUBLE || regno > 31)
         return _UVRSR_FAILED;
       return unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno),
                            (unw_fpreg_t *)valuep) == UNW_ESUCCESS
                  ? _UVRSR_OK
                  : _UVRSR_FAILED;
   }
   _LIBUNWIND_ABORT("unsupported register class");
 }
 
 _Unwind_VRS_Result _Unwind_VRS_Get(
     _Unwind_Context *context,
     _Unwind_VRS_RegClass regclass,
     uint32_t regno,
     _Unwind_VRS_DataRepresentation representation,
     void *valuep) {
   _Unwind_VRS_Result result =
       _Unwind_VRS_Get_Internal(context, regclass, regno, representation,
                                valuep);
   _LIBUNWIND_TRACE_API("_Unwind_VRS_Get(context=%p, regclass=%d, reg=%d, "
                        "rep=%d, value=0x%llX, result = %d)\n",
                        static_cast<void *>(context), regclass, regno,
                        representation,
                        ValueAsBitPattern(representation, valuep), result);
   return result;
 }
 
 _Unwind_VRS_Result
 _Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass,
                 uint32_t discriminator,
                 _Unwind_VRS_DataRepresentation representation) {
   _LIBUNWIND_TRACE_API("_Unwind_VRS_Pop(context=%p, regclass=%d, "
                        "discriminator=%d, representation=%d)\n",
                        static_cast<void *>(context), regclass, discriminator,
                        representation);
   switch (regclass) {
     case _UVRSC_CORE:
     case _UVRSC_WMMXC: {
       if (representation != _UVRSD_UINT32)
         return _UVRSR_FAILED;
       // When popping SP from the stack, we don't want to override it from the
       // computed new stack location. See EHABI #7.5.4 table 3.
       bool poppedSP = false;
       uint32_t* sp;
       if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP,
                           _UVRSD_UINT32, &sp) != _UVRSR_OK) {
         return _UVRSR_FAILED;
       }
       for (uint32_t i = 0; i < 16; ++i) {
         if (!(discriminator & static_cast<uint32_t>(1 << i)))
           continue;
         uint32_t value = *sp++;
         if (regclass == _UVRSC_CORE && i == 13)
           poppedSP = true;
         if (_Unwind_VRS_Set(context, regclass, i,
                             _UVRSD_UINT32, &value) != _UVRSR_OK) {
           return _UVRSR_FAILED;
         }
       }
       if (!poppedSP) {
         return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP,
                                _UVRSD_UINT32, &sp);
       }
       return _UVRSR_OK;
     }
     case _UVRSC_VFP:
     case _UVRSC_WMMXD: {
       if (representation != _UVRSD_VFPX && representation != _UVRSD_DOUBLE)
         return _UVRSR_FAILED;
       uint32_t first = discriminator >> 16;
       uint32_t count = discriminator & 0xffff;
       uint32_t end = first+count;
       uint32_t* sp;
       if (_Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP,
                           _UVRSD_UINT32, &sp) != _UVRSR_OK) {
         return _UVRSR_FAILED;
       }
       // For _UVRSD_VFPX, we're assuming the data is stored in FSTMX "standard
       // format 1", which is equivalent to FSTMD + a padding word.
       for (uint32_t i = first; i < end; ++i) {
         // SP is only 32-bit aligned so don't copy 64-bit at a time.
         uint64_t value = *sp++;
         value |= ((uint64_t)(*sp++)) << 32;
         if (_Unwind_VRS_Set(context, regclass, i, representation, &value) !=
             _UVRSR_OK)
           return _UVRSR_FAILED;
       }
       if (representation == _UVRSD_VFPX)
         ++sp;
       return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32,
                              &sp);
     }
   }
   _LIBUNWIND_ABORT("unsupported register class");
 }
 
 /// Called by personality handler during phase 2 to find the start of the
 /// function.
 _LIBUNWIND_EXPORT uintptr_t
 _Unwind_GetRegionStart(struct _Unwind_Context *context) {
   unw_cursor_t *cursor = (unw_cursor_t *)context;
   unw_proc_info_t frameInfo;
   uintptr_t result = 0;
   if (unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS)
     result = (uintptr_t)frameInfo.start_ip;
   _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%llX\n",
                        static_cast<void *>(context), (long long)result);
   return result;
 }
 
 
 /// Called by personality handler during phase 2 if a foreign exception
 // is caught.
 _LIBUNWIND_EXPORT void
 _Unwind_DeleteException(_Unwind_Exception *exception_object) {
   _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)\n",
                        static_cast<void *>(exception_object));
   if (exception_object->exception_cleanup != NULL)
     (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT,
                                            exception_object);
 }
 
 extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
 __gnu_unwind_frame(_Unwind_Exception *exception_object,
                    struct _Unwind_Context *context) {
   unw_cursor_t *cursor = (unw_cursor_t *)context;
   if (unw_step(cursor) != UNW_STEP_SUCCESS)
     return _URC_FAILURE;
   return _URC_OK;
 }
 
 #endif  // _LIBUNWIND_ARM_EHABI
Index: user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind
===================================================================
--- user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind	(revision 303641)
+++ user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind	(revision 303642)

Property changes on: user/alc/PQ_LAUNDRY/contrib/llvm/projects/libunwind
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/llvm/projects/libunwind:r303205-303641
Index: user/alc/PQ_LAUNDRY/contrib/llvm
===================================================================
--- user/alc/PQ_LAUNDRY/contrib/llvm	(revision 303641)
+++ user/alc/PQ_LAUNDRY/contrib/llvm	(revision 303642)

Property changes on: user/alc/PQ_LAUNDRY/contrib/llvm
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/llvm:r303205-303641
Index: user/alc/PQ_LAUNDRY/contrib/ofed/usr.lib/libibverbs/config.h
===================================================================
--- user/alc/PQ_LAUNDRY/contrib/ofed/usr.lib/libibverbs/config.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/contrib/ofed/usr.lib/libibverbs/config.h	(revision 303642)
@@ -1,2 +1 @@
-#define _WITH_GETLINE
 #include <alloca.h>
Index: user/alc/PQ_LAUNDRY/include/grp.h
===================================================================
--- user/alc/PQ_LAUNDRY/include/grp.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/include/grp.h	(revision 303642)
@@ -1,93 +1,92 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)grp.h	8.2 (Berkeley) 1/21/94
  * $FreeBSD$
  */
 
 #ifndef _GRP_H_
 #define	_GRP_H_
 
 #include <sys/cdefs.h>
 #include <sys/_types.h>
 
 #define	_PATH_GROUP		"/etc/group"
 
 #ifndef _GID_T_DECLARED
 typedef	__gid_t		gid_t;
 #define	_GID_T_DECLARED
 #endif
 
 #ifndef _SIZE_T_DECLARED
 typedef __size_t	size_t;
 #define _SIZE_T_DECLARED
 #endif
 
 struct group {
 	char	*gr_name;		/* group name */
 	char	*gr_passwd;		/* group password */
 	gid_t	gr_gid;			/* group id */
 	char	**gr_mem;		/* group members */
 };
 
 __BEGIN_DECLS
 #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE
 void		 endgrent(void);
 struct group	*getgrent(void);
 #endif
 struct group	*getgrgid(gid_t);
 struct group	*getgrnam(const char *);
 #if __BSD_VISIBLE
 const char	*group_from_gid(gid_t, int);
 int		 gid_from_group(const char *, gid_t *);
 int		 pwcache_groupdb(int (*)(int), void (*)(void),
 		    struct group * (*)(const char *),
 		    struct group * (*)(gid_t));
 #endif
 #if __XSI_VISIBLE
-/* XXX IEEE Std 1003.1, 2003 specifies `void setgrent(void)' */
-int		 setgrent(void);
+void		 setgrent(void);
 #endif
 #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE
 int		 getgrgid_r(gid_t, struct group *, char *, size_t,
 		    struct group **);
 int		 getgrnam_r(const char *, struct group *, char *, size_t,
 		    struct group **);
 #endif
 #if __BSD_VISIBLE
 int		 getgrent_r(struct group *, char *, size_t, struct group **);
 int		 setgroupent(int);
 #endif
 __END_DECLS
 
 #endif /* !_GRP_H_ */
Index: user/alc/PQ_LAUNDRY/include/stdio.h
===================================================================
--- user/alc/PQ_LAUNDRY/include/stdio.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/include/stdio.h	(revision 303642)
@@ -1,544 +1,508 @@
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Chris Torek.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)stdio.h	8.5 (Berkeley) 4/29/95
  * $FreeBSD$
  */
 
 #ifndef	_STDIO_H_
 #define	_STDIO_H_
 
 #include <sys/cdefs.h>
 #include <sys/_null.h>
 #include <sys/_types.h>
 
 typedef	__off_t		fpos_t;
 
 #ifndef _SIZE_T_DECLARED
 typedef	__size_t	size_t;
 #define	_SIZE_T_DECLARED
 #endif
 
 #if __POSIX_VISIBLE >= 200809
 #ifndef _OFF_T_DECLARED
 #define	_OFF_T_DECLARED
 typedef	__off_t		off_t;
 #endif
 #ifndef _SSIZE_T_DECLARED
 #define	_SSIZE_T_DECLARED
 typedef	__ssize_t	ssize_t;
 #endif
 #endif
 
 #ifndef _OFF64_T_DECLARED
 #define	_OFF64_T_DECLARED
 typedef	__off64_t	off64_t;
 #endif
 
 #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE
 #ifndef _VA_LIST_DECLARED
 typedef	__va_list	va_list;
 #define	_VA_LIST_DECLARED
 #endif
 #endif
 
 #define	_FSTDIO			/* Define for new stdio with functions. */
 
 /*
  * NB: to fit things in six character monocase externals, the stdio
  * code uses the prefix `__s' for stdio objects, typically followed
  * by a three-character attempt at a mnemonic.
  */
 
 /* stdio buffers */
 struct __sbuf {
 	unsigned char *_base;
 	int	_size;
 };
 
 /*
  * stdio state variables.
  *
  * The following always hold:
  *
  *	if (_flags&(__SLBF|__SWR)) == (__SLBF|__SWR),
  *		_lbfsize is -_bf._size, else _lbfsize is 0
  *	if _flags&__SRD, _w is 0
  *	if _flags&__SWR, _r is 0
  *
  * This ensures that the getc and putc macros (or inline functions) never
  * try to write or read from a file that is in `read' or `write' mode.
  * (Moreover, they can, and do, automatically switch from read mode to
  * write mode, and back, on "r+" and "w+" files.)
  *
  * _lbfsize is used only to make the inline line-buffered output stream
  * code as compact as possible.
  *
  * _ub, _up, and _ur are used when ungetc() pushes back more characters
  * than fit in the current _bf, or when ungetc() pushes back a character
  * that does not match the previous one in _bf.  When this happens,
  * _ub._base becomes non-nil (i.e., a stream has ungetc() data iff
  * _ub._base!=NULL) and _up and _ur save the current values of _p and _r.
  *
  * Certain members of __sFILE are accessed directly via macros or
  * inline functions.  To preserve ABI compat, these members must not
  * be disturbed.  These members are marked below with (*).
  */
 struct __sFILE {
 	unsigned char *_p;	/* (*) current position in (some) buffer */
 	int	_r;		/* (*) read space left for getc() */
 	int	_w;		/* (*) write space left for putc() */
 	short	_flags;		/* (*) flags, below; this FILE is free if 0 */
 	short	_file;		/* (*) fileno, if Unix descriptor, else -1 */
 	struct	__sbuf _bf;	/* (*) the buffer (at least 1 byte, if !NULL) */
 	int	_lbfsize;	/* (*) 0 or -_bf._size, for inline putc */
 
 	/* operations */
 	void	*_cookie;	/* (*) cookie passed to io functions */
 	int	(*_close)(void *);
 	int	(*_read)(void *, char *, int);
 	fpos_t	(*_seek)(void *, fpos_t, int);
 	int	(*_write)(void *, const char *, int);
 
 	/* separate buffer for long sequences of ungetc() */
 	struct	__sbuf _ub;	/* ungetc buffer */
 	unsigned char	*_up;	/* saved _p when _p is doing ungetc data */
 	int	_ur;		/* saved _r when _r is counting ungetc data */
 
 	/* tricks to meet minimum requirements even when malloc() fails */
 	unsigned char _ubuf[3];	/* guarantee an ungetc() buffer */
 	unsigned char _nbuf[1];	/* guarantee a getc() buffer */
 
 	/* separate buffer for fgetln() when line crosses buffer boundary */
 	struct	__sbuf _lb;	/* buffer for fgetln() */
 
 	/* Unix stdio files get aligned to block boundaries on fseek() */
 	int	_blksize;	/* stat.st_blksize (may be != _bf._size) */
 	fpos_t	_offset;	/* current lseek offset */
 
 	struct pthread_mutex *_fl_mutex;	/* used for MT-safety */
 	struct pthread *_fl_owner;	/* current owner */
 	int	_fl_count;	/* recursive lock count */
 	int	_orientation;	/* orientation for fwide() */
 	__mbstate_t _mbstate;	/* multibyte conversion state */
 	int	_flags2;	/* additional flags */
 };
 #ifndef _STDFILE_DECLARED
 #define _STDFILE_DECLARED
 typedef struct __sFILE FILE;
 #endif
 #ifndef _STDSTREAM_DECLARED
 __BEGIN_DECLS
 extern FILE *__stdinp;
 extern FILE *__stdoutp;
 extern FILE *__stderrp;
 __END_DECLS
 #define	_STDSTREAM_DECLARED
 #endif
 
 #define	__SLBF	0x0001		/* line buffered */
 #define	__SNBF	0x0002		/* unbuffered */
 #define	__SRD	0x0004		/* OK to read */
 #define	__SWR	0x0008		/* OK to write */
 	/* RD and WR are never simultaneously asserted */
 #define	__SRW	0x0010		/* open for reading & writing */
 #define	__SEOF	0x0020		/* found EOF */
 #define	__SERR	0x0040		/* found error */
 #define	__SMBF	0x0080		/* _bf._base is from malloc */
 #define	__SAPP	0x0100		/* fdopen()ed in append mode */
 #define	__SSTR	0x0200		/* this is an sprintf/snprintf string */
 #define	__SOPT	0x0400		/* do fseek() optimization */
 #define	__SNPT	0x0800		/* do not do fseek() optimization */
 #define	__SOFF	0x1000		/* set iff _offset is in fact correct */
 #define	__SMOD	0x2000		/* true => fgetln modified _p text */
 #define	__SALC	0x4000		/* allocate string space dynamically */
 #define	__SIGN	0x8000		/* ignore this file in _fwalk */
 
 #define	__S2OAP	0x0001		/* O_APPEND mode is set */
 
 /*
  * The following three definitions are for ANSI C, which took them
  * from System V, which brilliantly took internal interface macros and
  * made them official arguments to setvbuf(), without renaming them.
  * Hence, these ugly _IOxxx names are *supposed* to appear in user code.
  *
  * Although numbered as their counterparts above, the implementation
  * does not rely on this.
  */
 #define	_IOFBF	0		/* setvbuf should set fully buffered */
 #define	_IOLBF	1		/* setvbuf should set line buffered */
 #define	_IONBF	2		/* setvbuf should set unbuffered */
 
 #define	BUFSIZ	1024		/* size of buffer used by setbuf */
 #define	EOF	(-1)
 
 /*
  * FOPEN_MAX is a minimum maximum, and is the number of streams that
  * stdio can provide without attempting to allocate further resources
  * (which could fail).  Do not use this for anything.
  */
 				/* must be == _POSIX_STREAM_MAX <limits.h> */
 #ifndef FOPEN_MAX
 #define	FOPEN_MAX	20	/* must be <= OPEN_MAX <sys/syslimits.h> */
 #endif
 #define	FILENAME_MAX	1024	/* must be <= PATH_MAX <sys/syslimits.h> */
 
 /* System V/ANSI C; this is the wrong way to do this, do *not* use these. */
 #if __XSI_VISIBLE
 #define	P_tmpdir	"/tmp/"
 #endif
 #define	L_tmpnam	1024	/* XXX must be == PATH_MAX */
 #define	TMP_MAX		308915776
 
 #ifndef SEEK_SET
 #define	SEEK_SET	0	/* set file offset to offset */
 #endif
 #ifndef SEEK_CUR
 #define	SEEK_CUR	1	/* set file offset to current plus offset */
 #endif
 #ifndef SEEK_END
 #define	SEEK_END	2	/* set file offset to EOF plus offset */
 #endif
 
 #define	stdin	__stdinp
 #define	stdout	__stdoutp
 #define	stderr	__stderrp
 
 __BEGIN_DECLS
 #ifdef _XLOCALE_H_
 #include <xlocale/_stdio.h>
 #endif
 /*
  * Functions defined in ANSI C standard.
  */
 void	 clearerr(FILE *);
 int	 fclose(FILE *);
 int	 feof(FILE *);
 int	 ferror(FILE *);
 int	 fflush(FILE *);
 int	 fgetc(FILE *);
 int	 fgetpos(FILE * __restrict, fpos_t * __restrict);
 char	*fgets(char * __restrict, int, FILE * __restrict);
 FILE	*fopen(const char * __restrict, const char * __restrict);
 int	 fprintf(FILE * __restrict, const char * __restrict, ...);
 int	 fputc(int, FILE *);
 int	 fputs(const char * __restrict, FILE * __restrict);
 size_t	 fread(void * __restrict, size_t, size_t, FILE * __restrict);
 FILE	*freopen(const char * __restrict, const char * __restrict, FILE * __restrict);
 int	 fscanf(FILE * __restrict, const char * __restrict, ...);
 int	 fseek(FILE *, long, int);
 int	 fsetpos(FILE *, const fpos_t *);
 long	 ftell(FILE *);
 size_t	 fwrite(const void * __restrict, size_t, size_t, FILE * __restrict);
 int	 getc(FILE *);
 int	 getchar(void);
 char	*gets(char *);
 void	 perror(const char *);
 int	 printf(const char * __restrict, ...);
 int	 putc(int, FILE *);
 int	 putchar(int);
 int	 puts(const char *);
 int	 remove(const char *);
 int	 rename(const char *, const char *);
 void	 rewind(FILE *);
 int	 scanf(const char * __restrict, ...);
 void	 setbuf(FILE * __restrict, char * __restrict);
 int	 setvbuf(FILE * __restrict, char * __restrict, int, size_t);
 int	 sprintf(char * __restrict, const char * __restrict, ...);
 int	 sscanf(const char * __restrict, const char * __restrict, ...);
 FILE	*tmpfile(void);
 char	*tmpnam(char *);
 int	 ungetc(int, FILE *);
 int	 vfprintf(FILE * __restrict, const char * __restrict,
 	    __va_list);
 int	 vprintf(const char * __restrict, __va_list);
 int	 vsprintf(char * __restrict, const char * __restrict,
 	    __va_list);
 
 #if __ISO_C_VISIBLE >= 1999
 int	 snprintf(char * __restrict, size_t, const char * __restrict,
 	    ...) __printflike(3, 4);
 int	 vfscanf(FILE * __restrict, const char * __restrict, __va_list)
 	    __scanflike(2, 0);
 int	 vscanf(const char * __restrict, __va_list) __scanflike(1, 0);
 int	 vsnprintf(char * __restrict, size_t, const char * __restrict,
 	    __va_list) __printflike(3, 0);
 int	 vsscanf(const char * __restrict, const char * __restrict, __va_list)
 	    __scanflike(2, 0);
 #endif
 
 /*
  * Functions defined in all versions of POSIX 1003.1.
  */
 #if __BSD_VISIBLE || (__POSIX_VISIBLE && __POSIX_VISIBLE <= 199506)
 #define	L_cuserid	17	/* size for cuserid(3); MAXLOGNAME, legacy */
 #endif
 
 #if __POSIX_VISIBLE
 #define	L_ctermid	1024	/* size for ctermid(3); PATH_MAX */
 
 char	*ctermid(char *);
 FILE	*fdopen(int, const char *);
 int	 fileno(FILE *);
 #endif /* __POSIX_VISIBLE */
 
 #if __POSIX_VISIBLE >= 199209
 int	 pclose(FILE *);
 FILE	*popen(const char *, const char *);
 #endif
 
 #if __POSIX_VISIBLE >= 199506
 int	 ftrylockfile(FILE *);
 void	 flockfile(FILE *);
 void	 funlockfile(FILE *);
 
 /*
  * These are normally used through macros as defined below, but POSIX
  * requires functions as well.
  */
 int	 getc_unlocked(FILE *);
 int	 getchar_unlocked(void);
 int	 putc_unlocked(int, FILE *);
 int	 putchar_unlocked(int);
 #endif
 #if __BSD_VISIBLE
 void	 clearerr_unlocked(FILE *);
 int	 feof_unlocked(FILE *);
 int	 ferror_unlocked(FILE *);
 int	 fileno_unlocked(FILE *);
 #endif
 
 #if __POSIX_VISIBLE >= 200112
 int	 fseeko(FILE *, __off_t, int);
 __off_t	 ftello(FILE *);
 #endif
 
 #if __BSD_VISIBLE || __XSI_VISIBLE > 0 && __XSI_VISIBLE < 600
 int	 getw(FILE *);
 int	 putw(int, FILE *);
 #endif /* BSD or X/Open before issue 6 */
 
 #if __XSI_VISIBLE
 char	*tempnam(const char *, const char *);
 #endif
 
 #if __POSIX_VISIBLE >= 200809
 FILE	*fmemopen(void * __restrict, size_t, const char * __restrict);
 ssize_t	 getdelim(char ** __restrict, size_t * __restrict, int,
 	    FILE * __restrict);
 FILE	*open_memstream(char **, size_t *);
 int	 renameat(int, const char *, int, const char *);
 int	 vdprintf(int, const char * __restrict, __va_list);
-
-/*
- * Every programmer and his dog wrote functions called getline() and dprintf()
- * before POSIX.1-2008 came along and decided to usurp the names, so we
- * don't prototype them by default unless one of the following is true:
- *   a) the app has requested them specifically by defining _WITH_GETLINE or
- *      _WITH_DPRINTF, respectively
- *   b) the app has requested a POSIX.1-2008 environment via _POSIX_C_SOURCE
- *   c) the app defines a GNUism such as _BSD_SOURCE or _GNU_SOURCE
- */
-#ifndef _WITH_GETLINE
-#if defined(_BSD_SOURCE) || defined(_GNU_SOURCE)
-#define	_WITH_GETLINE
-#elif defined(_POSIX_C_SOURCE)
-#if _POSIX_C_SOURCE >= 200809
-#define	_WITH_GETLINE
-#endif
-#endif
-#endif
-
-#ifdef _WITH_GETLINE
 ssize_t	 getline(char ** __restrict, size_t * __restrict, FILE * __restrict);
-#endif
-
-#ifndef _WITH_DPRINTF
-#if defined(_BSD_SOURCE) || defined(_GNU_SOURCE)
-#define	_WITH_DPRINTF
-#elif defined(_POSIX_C_SOURCE)
-#if _POSIX_C_SOURCE >= 200809
-#define	_WITH_DPRINTF
-#endif
-#endif
-#endif
-
-#ifdef _WITH_DPRINTF
-int	 (dprintf)(int, const char * __restrict, ...);
-#endif
-
+int	 dprintf(int, const char * __restrict, ...);
 #endif /* __POSIX_VISIBLE >= 200809 */
 
 /*
  * Routines that are purely local.
  */
 #if __BSD_VISIBLE
 int	 asprintf(char **, const char *, ...) __printflike(2, 3);
 char	*ctermid_r(char *);
 void	 fcloseall(void);
 int	 fdclose(FILE *, int *);
 char	*fgetln(FILE *, size_t *);
 const char *fmtcheck(const char *, const char *) __format_arg(2);
 int	 fpurge(FILE *);
 void	 setbuffer(FILE *, char *, int);
 int	 setlinebuf(FILE *);
 int	 vasprintf(char **, const char *, __va_list)
 	    __printflike(2, 0);
 
 /*
  * The system error table contains messages for the first sys_nerr
  * positive errno values.  Use strerror() or strerror_r() from <string.h>
  * instead.
  */
 extern const int sys_nerr;
 extern const char * const sys_errlist[];
 
 /*
  * Stdio function-access interface.
  */
 FILE	*funopen(const void *,
 	    int (*)(void *, char *, int),
 	    int (*)(void *, const char *, int),
 	    fpos_t (*)(void *, fpos_t, int),
 	    int (*)(void *));
 #define	fropen(cookie, fn) funopen(cookie, fn, 0, 0, 0)
 #define	fwopen(cookie, fn) funopen(cookie, 0, fn, 0, 0)
 
 typedef __ssize_t cookie_read_function_t(void *, char *, size_t);
 typedef __ssize_t cookie_write_function_t(void *, const char *, size_t);
 typedef int cookie_seek_function_t(void *, off64_t *, int);
 typedef int cookie_close_function_t(void *);
 typedef struct {
 	cookie_read_function_t	*read;
 	cookie_write_function_t	*write;
 	cookie_seek_function_t	*seek;
 	cookie_close_function_t	*close;
 } cookie_io_functions_t;
 FILE	*fopencookie(void *, const char *, cookie_io_functions_t);
 
 /*
  * Portability hacks.  See <sys/types.h>.
  */
 #ifndef _FTRUNCATE_DECLARED
 #define	_FTRUNCATE_DECLARED
 int	 ftruncate(int, __off_t);
 #endif
 #ifndef _LSEEK_DECLARED
 #define	_LSEEK_DECLARED
 __off_t	 lseek(int, __off_t, int);
 #endif
 #ifndef _MMAP_DECLARED
 #define	_MMAP_DECLARED
 void	*mmap(void *, size_t, int, int, int, __off_t);
 #endif
 #ifndef _TRUNCATE_DECLARED
 #define	_TRUNCATE_DECLARED
 int	 truncate(const char *, __off_t);
 #endif
 #endif /* __BSD_VISIBLE */
 
 /*
  * Functions internal to the implementation.
  */
 int	__srget(FILE *);
 int	__swbuf(int, FILE *);
 
 /*
  * The __sfoo macros are here so that we can
  * define function versions in the C library.
  */
 #define	__sgetc(p) (--(p)->_r < 0 ? __srget(p) : (int)(*(p)->_p++))
 #if defined(__GNUC__) && defined(__STDC__)
 static __inline int __sputc(int _c, FILE *_p) {
 	if (--_p->_w >= 0 || (_p->_w >= _p->_lbfsize && (char)_c != '\n'))
 		return (*_p->_p++ = _c);
 	else
 		return (__swbuf(_c, _p));
 }
 #else
 /*
  * This has been tuned to generate reasonable code on the vax using pcc.
  */
 #define	__sputc(c, p) \
 	(--(p)->_w < 0 ? \
 		(p)->_w >= (p)->_lbfsize ? \
 			(*(p)->_p = (c)), *(p)->_p != '\n' ? \
 				(int)*(p)->_p++ : \
 				__swbuf('\n', p) : \
 			__swbuf((int)(c), p) : \
 		(*(p)->_p = (c), (int)*(p)->_p++))
 #endif
 
 extern int __isthreaded;
 
 #ifndef __cplusplus
 
 #define	__sfeof(p)	(((p)->_flags & __SEOF) != 0)
 #define	__sferror(p)	(((p)->_flags & __SERR) != 0)
 #define	__sclearerr(p)	((void)((p)->_flags &= ~(__SERR|__SEOF)))
 #define	__sfileno(p)	((p)->_file)
 
 
 #define	feof(p)		(!__isthreaded ? __sfeof(p) : (feof)(p))
 #define	ferror(p)	(!__isthreaded ? __sferror(p) : (ferror)(p))
 #define	clearerr(p)	(!__isthreaded ? __sclearerr(p) : (clearerr)(p))
 
 #if __POSIX_VISIBLE
 #define	fileno(p)	(!__isthreaded ? __sfileno(p) : (fileno)(p))
 #endif
 
 #define	getc(fp)	(!__isthreaded ? __sgetc(fp) : (getc)(fp))
 #define	putc(x, fp)	(!__isthreaded ? __sputc(x, fp) : (putc)(x, fp))
 
 #define	getchar()	getc(stdin)
 #define	putchar(x)	putc(x, stdout)
 
 #if __BSD_VISIBLE
 /*
  * See ISO/IEC 9945-1 ANSI/IEEE Std 1003.1 Second Edition 1996-07-12
  * B.8.2.7 for the rationale behind the *_unlocked() macros.
  */
 #define	feof_unlocked(p)	__sfeof(p)
 #define	ferror_unlocked(p)	__sferror(p)
 #define	clearerr_unlocked(p)	__sclearerr(p)
 #define	fileno_unlocked(p)	__sfileno(p)
 #endif
 #if __POSIX_VISIBLE >= 199506
 #define	getc_unlocked(fp)	__sgetc(fp)
 #define	putc_unlocked(x, fp)	__sputc(x, fp)
 
 #define	getchar_unlocked()	getc_unlocked(stdin)
 #define	putchar_unlocked(x)	putc_unlocked(x, stdout)
 #endif
 #endif /* __cplusplus */
 
 __END_DECLS
 #endif /* !_STDIO_H_ */
Index: user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.3
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.3	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.3	(revision 303642)
@@ -1,287 +1,286 @@
 .\" Copyright (c) 1989, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     From: @(#)getgrent.3	8.2 (Berkeley) 4/19/94
 .\" $FreeBSD$
 .\"
-.Dd April 16, 2003
+.Dd July 31, 2016
 .Dt GETGRENT 3
 .Os
 .Sh NAME
 .Nm getgrent ,
 .Nm getgrent_r ,
 .Nm getgrnam ,
 .Nm getgrnam_r ,
 .Nm getgrgid ,
 .Nm getgrgid_r ,
 .Nm setgroupent ,
 .Nm setgrent ,
 .Nm endgrent
 .Nd group database operations
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In grp.h
 .Ft struct group *
 .Fn getgrent void
 .Ft int
 .Fn getgrent_r "struct group *grp" "char *buffer" "size_t bufsize" "struct group **result"
 .Ft struct group *
 .Fn getgrnam "const char *name"
 .Ft int
 .Fn getgrnam_r "const char *name" "struct group *grp" "char *buffer" "size_t bufsize" "struct group **result"
 .Ft struct group *
 .Fn getgrgid "gid_t gid"
 .Ft int
 .Fn getgrgid_r "gid_t gid" "struct group *grp" "char *buffer" "size_t bufsize" "struct group **result"
 .Ft int
 .Fn setgroupent "int stayopen"
-.Ft int
+.Ft void
 .Fn setgrent void
 .Ft void
 .Fn endgrent void
 .Sh DESCRIPTION
 These functions operate on the group database file
 .Pa /etc/group
 which is described
 in
 .Xr group 5 .
 Each line of the database is defined by the structure
 .Vt group
 found in the include
 file
 .In grp.h :
 .Bd -literal -offset indent
 struct group {
 	char	*gr_name;	/* group name */
 	char	*gr_passwd;	/* group password */
 	gid_t	gr_gid;		/* group id */
 	char	**gr_mem;	/* group members */
 };
 .Ed
 .Pp
 The functions
 .Fn getgrnam
 and
 .Fn getgrgid
 search the group database for the given group name pointed to by
 .Fa name
 or the group id pointed to by
 .Fa gid ,
 respectively, returning the first one encountered.
 Identical group
 names or group gids may result in undefined behavior.
 .Pp
 The
 .Fn getgrent
 function
 sequentially reads the group database and is intended for programs
 that wish to step through the complete list of groups.
 .Pp
 The functions
 .Fn getgrent_r ,
 .Fn getgrnam_r ,
 and
 .Fn getgrgid_r
 are thread-safe versions of
 .Fn getgrent ,
 .Fn getgrnam ,
 and
 .Fn getgrgid ,
 respectively.
 The caller must provide storage for the results of the search in
 the
 .Fa grp ,
 .Fa buffer ,
 .Fa bufsize ,
 and
 .Fa result
 arguments.
 When these functions are successful, the
 .Fa grp
 argument will be filled-in, and a pointer to that argument will be
 stored in
 .Fa result .
 If an entry is not found or an error occurs,
 .Fa result
 will be set to
 .Dv NULL .
 .Pp
 These functions will open the group file for reading, if necessary.
 .Pp
 The
 .Fn setgroupent
 function
 opens the file, or rewinds it if it is already open.
 If
 .Fa stayopen
 is non-zero, file descriptors are left open, significantly speeding
 functions subsequent calls.
 This functionality is unnecessary for
 .Fn getgrent
 as it does not close its file descriptors by default.
 It should also
 be noted that it is dangerous for long-running programs to use this
 functionality as the group file may be updated.
 .Pp
 The
 .Fn setgrent
 function
 is identical to
 .Fn setgroupent
 with an argument of zero.
 .Pp
 The
 .Fn endgrent
 function
 closes any open files.
 .Sh RETURN VALUES
 The functions
 .Fn getgrent ,
 .Fn getgrnam ,
 and
 .Fn getgrgid ,
 return a pointer to a group structure on success or
 .Dv NULL
 if the entry is not found or if an error occurs.
 If an error does occur,
 .Va errno
 will be set.
 Note that programs must explicitly set
 .Va errno
 to zero before calling any of these functions if they need to
 distinguish between a non-existent entry and an error.
 The functions
 .Fn getgrent_r ,
 .Fn getgrnam_r ,
 and
 .Fn getgrgid_r
 return 0 if no error occurred, or an error number to indicate failure.
 It is not an error if a matching entry is not found.
 (Thus, if
 .Fa result
 is set to
 .Dv NULL
 and the return value is 0, no matching entry exists.)
 .Pp
-The functions
+The function
 .Fn setgroupent
-and
-.Fn setgrent
-return the value 1 if successful, otherwise the value
+returns the value 1 if successful, otherwise the value
 0 is returned.
 The functions
-.Fn endgrent
+.Fn endgrent ,
+.Fn setgrent
 and
 .Fn setgrfile
 have no return value.
 .Sh FILES
 .Bl -tag -width /etc/group -compact
 .It Pa /etc/group
 group database file
 .El
 .Sh COMPATIBILITY
 The historic function
 .Fn setgrfile ,
 which allowed the specification of alternate password databases, has
 been deprecated and is no longer available.
 .Sh SEE ALSO
 .Xr getpwent 3 ,
 .Xr group 5 ,
 .Xr nsswitch.conf 5 ,
 .Xr yp 8
 .Sh STANDARDS
 The
 .Fn getgrent ,
 .Fn getgrnam ,
 .Fn getgrnam_r ,
 .Fn getgrgid ,
 .Fn getgrgid_r
 and
 .Fn endgrent
 functions conform to
 .St -p1003.1-96 .
 The
 .Fn setgrent
 function differs from that standard in that its return type is
 .Vt int
 rather than
 .Vt void .
 .Sh HISTORY
 The functions
 .Fn endgrent ,
 .Fn getgrent ,
 .Fn getgrnam ,
 .Fn getgrgid ,
 and
 .Fn setgrent
 appeared in
 .At v7 .
 The functions
 .Fn setgrfile
 and
 .Fn setgroupent
 appeared in
 .Bx 4.3 Reno .
 The functions
 .Fn getgrent_r ,
 .Fn getgrnam_r ,
 and
 .Fn getgrgid_r
 appeared in
 .Fx 5.1 .
 .Sh BUGS
 The functions
 .Fn getgrent ,
 .Fn getgrnam ,
 .Fn getgrgid ,
 .Fn setgroupent
 and
 .Fn setgrent
 leave their results in an internal static object and return
 a pointer to that object.
 Subsequent calls to
 the same function
 will modify the same object.
 .Pp
 The functions
 .Fn getgrent ,
 .Fn getgrent_r ,
 .Fn endgrent ,
 .Fn setgroupent ,
 and
 .Fn setgrent
 are fairly useless in a networked environment and should be
 avoided, if possible.
 The
 .Fn getgrent
 and
 .Fn getgrent_r
 functions
 make no attempt to suppress duplicate information if multiple
 sources are specified in
 .Xr nsswitch.conf 5 .
Index: user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.c
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libc/gen/getgrent.c	(revision 303642)
@@ -1,1554 +1,1552 @@
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by
  * Jacques A. Vidrine, Safeport Network Services, and Network
  * Associates Laboratories, the Security Research Division of Network
  * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035
  * ("CBOSS"), as part of the DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "namespace.h"
 #include <sys/param.h>
 #ifdef YP
 #include <rpc/rpc.h>
 #include <rpcsvc/yp_prot.h>
 #include <rpcsvc/ypclnt.h>
 #endif
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #ifdef HESIOD
 #include <hesiod.h>
 #endif
 #include <grp.h>
 #include <nsswitch.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <syslog.h>
 #include <unistd.h>
 #include "un-namespace.h"
 #include "libc_private.h"
 #include "nss_tls.h"
 #ifdef NS_CACHING
 #include "nscache.h"
 #endif
 
 enum constants {
 	GRP_STORAGE_INITIAL	= 1 << 10, /* 1 KByte */
 	GRP_STORAGE_MAX		= 1 << 20, /* 1 MByte */
 	SETGRENT		= 1,
 	ENDGRENT		= 2,
 	HESIOD_NAME_MAX		= 256,
 };
 
 static const ns_src defaultsrc[] = {
 	{ NSSRC_COMPAT, NS_SUCCESS },
 	{ NULL, 0 }
 };
 
 int	 __getgroupmembership(const char *, gid_t, gid_t *, int, int *);
 int	 __gr_match_entry(const char *, size_t, enum nss_lookup_type,
 	    const char *, gid_t);
 int	 __gr_parse_entry(char *, size_t, struct group *, char *, size_t,
 	    int *);
 
 static	int	 is_comment_line(const char *, size_t);
 
 union key {
 	const char	*name;
 	gid_t		 gid;
 };
 static	struct group *getgr(int (*)(union key, struct group *, char *, size_t,
 		    struct group **), union key);
 static	int	 wrap_getgrnam_r(union key, struct group *, char *, size_t,
 		    struct group **);
 static	int	 wrap_getgrgid_r(union key, struct group *, char *, size_t,
 		    struct group **);
 static	int	 wrap_getgrent_r(union key, struct group *, char *, size_t,
 		    struct group **);
 
 struct files_state {
 	FILE	*fp;
 	int	 stayopen;
 };
 static	void	 files_endstate(void *);
 NSS_TLS_HANDLING(files);
 static	int	 files_setgrent(void *, void *, va_list);
 static	int	 files_group(void *, void *, va_list);
 
 
 #ifdef HESIOD
 struct dns_state {
 	long	counter;
 };
 static	void	 dns_endstate(void *);
 NSS_TLS_HANDLING(dns);
 static	int	 dns_setgrent(void *, void *, va_list);
 static	int	 dns_group(void *, void *, va_list);
 #endif
 
 
 #ifdef YP
 struct nis_state {
 	char	 domain[MAXHOSTNAMELEN];
 	int	 done;
 	char	*key;
 	int	 keylen;
 };
 static	void	 nis_endstate(void *);
 NSS_TLS_HANDLING(nis);
 static	int	 nis_setgrent(void *, void *, va_list);
 static	int	 nis_group(void *, void *, va_list);
 #endif
 
 struct compat_state {
 	FILE	*fp;
 	int	 stayopen;
 	char	*name;
 	enum _compat {
 		COMPAT_MODE_OFF = 0,
 		COMPAT_MODE_ALL,
 		COMPAT_MODE_NAME
 	}	 compat;
 };
 static	void	 compat_endstate(void *);
 NSS_TLS_HANDLING(compat);
 static	int	 compat_setgrent(void *, void *, va_list);
 static	int	 compat_group(void *, void *, va_list);
 
 static	int	gr_addgid(gid_t, gid_t *, int, int *);
 static	int	getgroupmembership_fallback(void *, void *, va_list);
 
 #ifdef NS_CACHING
 static	int	 grp_id_func(char *, size_t *, va_list, void *);
 static	int	 grp_marshal_func(char *, size_t *, void *, va_list, void *);
 static	int	 grp_unmarshal_func(char *, size_t, void *, va_list, void *);
 
 static int
 grp_id_func(char *buffer, size_t *buffer_size, va_list ap, void *cache_mdata)
 {
 	char	*name;
 	gid_t	gid;
 
 	size_t	desired_size, size;
 	int	res = NS_UNAVAIL;
 	enum nss_lookup_type lookup_type;
 
 
 	lookup_type = (enum nss_lookup_type)cache_mdata;
 	switch (lookup_type) {
 	case nss_lt_name:
 		name = va_arg(ap, char *);
 		size = strlen(name);
 		desired_size = sizeof(enum nss_lookup_type) + size + 1;
 		if (desired_size > *buffer_size) {
 			res = NS_RETURN;
 			goto fin;
 		}
 
 		memcpy(buffer, &lookup_type, sizeof(enum nss_lookup_type));
 		memcpy(buffer + sizeof(enum nss_lookup_type), name, size + 1);
 
 		res = NS_SUCCESS;
 		break;
 	case nss_lt_id:
 		gid = va_arg(ap, gid_t);
 		desired_size = sizeof(enum nss_lookup_type) + sizeof(gid_t);
 		if (desired_size > *buffer_size) {
 			res = NS_RETURN;
 			goto fin;
 		}
 
 		memcpy(buffer, &lookup_type, sizeof(enum nss_lookup_type));
 		memcpy(buffer + sizeof(enum nss_lookup_type), &gid,
 		    sizeof(gid_t));
 
 		res = NS_SUCCESS;
 		break;
 	default:
 		/* should be unreachable */
 		return (NS_UNAVAIL);
 	}
 
 fin:
 	*buffer_size = desired_size;
 	return (res);
 }
 
 static int
 grp_marshal_func(char *buffer, size_t *buffer_size, void *retval, va_list ap,
     void *cache_mdata)
 {
 	char *name;
 	gid_t gid;
 	struct group *grp;
 	char *orig_buf;
 	size_t orig_buf_size;
 
 	struct group new_grp;
 	size_t desired_size, size, mem_size;
 	char *p, **mem;
 
 	switch ((enum nss_lookup_type)cache_mdata) {
 	case nss_lt_name:
 		name = va_arg(ap, char *);
 		break;
 	case nss_lt_id:
 		gid = va_arg(ap, gid_t);
 		break;
 	case nss_lt_all:
 		break;
 	default:
 		/* should be unreachable */
 		return (NS_UNAVAIL);
 	}
 
 	grp = va_arg(ap, struct group *);
 	orig_buf = va_arg(ap, char *);
 	orig_buf_size = va_arg(ap, size_t);
 
 	desired_size = _ALIGNBYTES + sizeof(struct group) + sizeof(char *);
 
 	if (grp->gr_name != NULL)
 		desired_size += strlen(grp->gr_name) + 1;
 	if (grp->gr_passwd != NULL)
 		desired_size += strlen(grp->gr_passwd) + 1;
 
 	if (grp->gr_mem != NULL) {
 		mem_size = 0;
 		for (mem = grp->gr_mem; *mem; ++mem) {
 			desired_size += strlen(*mem) + 1;
 			++mem_size;
 		}
 
 		desired_size += _ALIGNBYTES + (mem_size + 1) * sizeof(char *);
 	}
 
 	if (desired_size > *buffer_size) {
 		/* this assignment is here for future use */
 		*buffer_size = desired_size;
 		return (NS_RETURN);
 	}
 
 	memcpy(&new_grp, grp, sizeof(struct group));
 	memset(buffer, 0, desired_size);
 
 	*buffer_size = desired_size;
 	p = buffer + sizeof(struct group) + sizeof(char *);
 	memcpy(buffer + sizeof(struct group), &p, sizeof(char *));
 	p = (char *)_ALIGN(p);
 
 	if (new_grp.gr_name != NULL) {
 		size = strlen(new_grp.gr_name);
 		memcpy(p, new_grp.gr_name, size);
 		new_grp.gr_name = p;
 		p += size + 1;
 	}
 
 	if (new_grp.gr_passwd != NULL) {
 		size = strlen(new_grp.gr_passwd);
 		memcpy(p, new_grp.gr_passwd, size);
 		new_grp.gr_passwd = p;
 		p += size + 1;
 	}
 
 	if (new_grp.gr_mem != NULL) {
 		p = (char *)_ALIGN(p);
 		memcpy(p, new_grp.gr_mem, sizeof(char *) * mem_size);
 		new_grp.gr_mem = (char **)p;
 		p += sizeof(char *) * (mem_size + 1);
 
 		for (mem = new_grp.gr_mem; *mem; ++mem) {
 			size = strlen(*mem);
 			memcpy(p, *mem, size);
 			*mem = p;
 			p += size + 1;
 		}
 	}
 
 	memcpy(buffer, &new_grp, sizeof(struct group));
 	return (NS_SUCCESS);
 }
 
 static int
 grp_unmarshal_func(char *buffer, size_t buffer_size, void *retval, va_list ap,
     void *cache_mdata)
 {
 	char *name;
 	gid_t gid;
 	struct group *grp;
 	char *orig_buf;
 	size_t orig_buf_size;
 	int *ret_errno;
 
 	char *p;
 	char **mem;
 
 	switch ((enum nss_lookup_type)cache_mdata) {
 	case nss_lt_name:
 		name = va_arg(ap, char *);
 		break;
 	case nss_lt_id:
 		gid = va_arg(ap, gid_t);
 		break;
 	case nss_lt_all:
 		break;
 	default:
 		/* should be unreachable */
 		return (NS_UNAVAIL);
 	}
 
 	grp = va_arg(ap, struct group *);
 	orig_buf = va_arg(ap, char *);
 	orig_buf_size = va_arg(ap, size_t);
 	ret_errno = va_arg(ap, int *);
 
 	if (orig_buf_size <
 	    buffer_size - sizeof(struct group) - sizeof(char *)) {
 		*ret_errno = ERANGE;
 		return (NS_RETURN);
 	}
 
 	memcpy(grp, buffer, sizeof(struct group));
 	memcpy(&p, buffer + sizeof(struct group), sizeof(char *));
 
 	orig_buf = (char *)_ALIGN(orig_buf);
 	memcpy(orig_buf, buffer + sizeof(struct group) + sizeof(char *) +
 	    _ALIGN(p) - (size_t)p,
 	    buffer_size - sizeof(struct group) - sizeof(char *) -
 	    _ALIGN(p) + (size_t)p);
 	p = (char *)_ALIGN(p);
 
 	NS_APPLY_OFFSET(grp->gr_name, orig_buf, p, char *);
 	NS_APPLY_OFFSET(grp->gr_passwd, orig_buf, p, char *);
 	if (grp->gr_mem != NULL) {
 		NS_APPLY_OFFSET(grp->gr_mem, orig_buf, p, char **);
 
 		for (mem = grp->gr_mem; *mem; ++mem)
 			NS_APPLY_OFFSET(*mem, orig_buf, p, char *);
 	}
 
 	if (retval != NULL)
 		*((struct group **)retval) = grp;
 
 	return (NS_SUCCESS);
 }
 
 NSS_MP_CACHE_HANDLING(group);
 #endif /* NS_CACHING */
 
 #ifdef NS_CACHING
 static const nss_cache_info setgrent_cache_info = NS_MP_CACHE_INFO_INITIALIZER(
 	group, (void *)nss_lt_all,
 	NULL, NULL);
 #endif
 
 static const ns_dtab setgrent_dtab[] = {
 	{ NSSRC_FILES, files_setgrent, (void *)SETGRENT },
 #ifdef HESIOD
 	{ NSSRC_DNS, dns_setgrent, (void *)SETGRENT },
 #endif
 #ifdef YP
 	{ NSSRC_NIS, nis_setgrent, (void *)SETGRENT },
 #endif
 	{ NSSRC_COMPAT, compat_setgrent, (void *)SETGRENT },
 #ifdef NS_CACHING
 	NS_CACHE_CB(&setgrent_cache_info)
 #endif
 	{ NULL, NULL, NULL }
 };
 
 #ifdef NS_CACHING
 static const nss_cache_info endgrent_cache_info = NS_MP_CACHE_INFO_INITIALIZER(
 	group, (void *)nss_lt_all,
 	NULL, NULL);
 #endif
 
 static const ns_dtab endgrent_dtab[] = {
 	{ NSSRC_FILES, files_setgrent, (void *)ENDGRENT },
 #ifdef HESIOD
 	{ NSSRC_DNS, dns_setgrent, (void *)ENDGRENT },
 #endif
 #ifdef YP
 	{ NSSRC_NIS, nis_setgrent, (void *)ENDGRENT },
 #endif
 	{ NSSRC_COMPAT, compat_setgrent, (void *)ENDGRENT },
 #ifdef NS_CACHING
 	NS_CACHE_CB(&endgrent_cache_info)
 #endif
 	{ NULL, NULL, NULL }
 };
 
 #ifdef NS_CACHING
 static const nss_cache_info getgrent_r_cache_info = NS_MP_CACHE_INFO_INITIALIZER(
 	group, (void *)nss_lt_all,
 	grp_marshal_func, grp_unmarshal_func);
 #endif
 
 static const ns_dtab getgrent_r_dtab[] = {
 	{ NSSRC_FILES, files_group, (void *)nss_lt_all },
 #ifdef HESIOD
 	{ NSSRC_DNS, dns_group, (void *)nss_lt_all },
 #endif
 #ifdef YP
 	{ NSSRC_NIS, nis_group, (void *)nss_lt_all },
 #endif
 	{ NSSRC_COMPAT, compat_group, (void *)nss_lt_all },
 #ifdef NS_CACHING
 	NS_CACHE_CB(&getgrent_r_cache_info)
 #endif
 	{ NULL, NULL, NULL }
 };
 
 static int
 gr_addgid(gid_t gid, gid_t *groups, int maxgrp, int *grpcnt)
 {
 	int     ret, dupc;
 
 	for (dupc = 0; dupc < MIN(maxgrp, *grpcnt); dupc++) {
 		if (groups[dupc] == gid)
 			return 1;
 	}
 
 	ret = 1;
 	if (*grpcnt < maxgrp)
 		groups[*grpcnt] = gid;
 	else
 		ret = 0;
 
 	(*grpcnt)++;
 
 	return ret;
 }
 
 static int
 getgroupmembership_fallback(void *retval, void *mdata, va_list ap)
 {
 	const ns_src src[] = {
 		{ mdata, NS_SUCCESS },
 		{ NULL, 0}
 	};
 	struct group	grp;
 	struct group	*grp_p;
 	char		*buf;
 	size_t		bufsize;
 	const char	*uname;
 	gid_t		*groups;
 	gid_t		agroup;
 	int 		maxgrp, *grpcnt;
 	int		i, rv, ret_errno;
 
 	/*
 	 * As this is a fallback method, only provided src
 	 * list will be respected during methods search.
 	 */
 	assert(src[0].name != NULL);
 
 	uname = va_arg(ap, const char *);
 	agroup = va_arg(ap, gid_t);
 	groups = va_arg(ap, gid_t *);
 	maxgrp = va_arg(ap, int);
 	grpcnt = va_arg(ap, int *); 
 
 	rv = NS_UNAVAIL;
 
 	buf = malloc(GRP_STORAGE_INITIAL);
 	if (buf == NULL)
 		goto out;
 
 	bufsize = GRP_STORAGE_INITIAL;
 
 	gr_addgid(agroup, groups, maxgrp, grpcnt);
 
 	_nsdispatch(NULL, setgrent_dtab, NSDB_GROUP, "setgrent", src, 0);
 	for (;;) {
 		do {
 			ret_errno = 0;
 			grp_p = NULL;
 			rv = _nsdispatch(&grp_p, getgrent_r_dtab, NSDB_GROUP,
 			    "getgrent_r", src, &grp, buf, bufsize, &ret_errno);
 
 			if (grp_p == NULL && ret_errno == ERANGE) {
 				free(buf);
 				if ((bufsize << 1) > GRP_STORAGE_MAX) {
 					buf = NULL;
 					errno = ERANGE;
 					goto out;
 				}
 
 				bufsize <<= 1;
 				buf = malloc(bufsize);
 				if (buf == NULL) {
 					goto out;
 				}
 			}
 		} while (grp_p == NULL && ret_errno == ERANGE);
 
 		if (ret_errno != 0) {
 			errno = ret_errno;
 			goto out;
 		}
 
 		if (grp_p == NULL)
 			break;
 
 		for (i = 0; grp.gr_mem[i]; i++) {
 			if (strcmp(grp.gr_mem[i], uname) == 0)
 			    gr_addgid(grp.gr_gid, groups, maxgrp, grpcnt);
 		}
 	}
 
 	_nsdispatch(NULL, endgrent_dtab, NSDB_GROUP, "endgrent", src);
 out:
 	free(buf);
 	return (rv);
 }
 
-/* XXX IEEE Std 1003.1, 2003 specifies `void setgrent(void)' */
-int				
+void
 setgrent(void)
 {
 	(void)_nsdispatch(NULL, setgrent_dtab, NSDB_GROUP, "setgrent", defaultsrc, 0);
-	return (1);
 }
 
 
 int
 setgroupent(int stayopen)
 {
 	(void)_nsdispatch(NULL, setgrent_dtab, NSDB_GROUP, "setgrent", defaultsrc,
 	    stayopen);
 	return (1);
 }
 
 
 void
 endgrent(void)
 {
 	(void)_nsdispatch(NULL, endgrent_dtab, NSDB_GROUP, "endgrent", defaultsrc);
 }
 
 
 int
 getgrent_r(struct group *grp, char *buffer, size_t bufsize,
     struct group **result)
 {
 	int	rv, ret_errno;
 
 	ret_errno = 0;
 	*result = NULL;
 	rv = _nsdispatch(result, getgrent_r_dtab, NSDB_GROUP, "getgrent_r", defaultsrc,
 	    grp, buffer, bufsize, &ret_errno);
 	if (rv == NS_SUCCESS)
 		return (0);
 	else
 		return (ret_errno);
 }
 
 
 int
 getgrnam_r(const char *name, struct group *grp, char *buffer, size_t bufsize,
     struct group **result)
 {
 #ifdef NS_CACHING
 	static const nss_cache_info cache_info =
     		NS_COMMON_CACHE_INFO_INITIALIZER(
 		group, (void *)nss_lt_name,
 		grp_id_func, grp_marshal_func, grp_unmarshal_func);
 #endif
 
 	static const ns_dtab dtab[] = {
 		{ NSSRC_FILES, files_group, (void *)nss_lt_name },
 #ifdef HESIOD
 		{ NSSRC_DNS, dns_group, (void *)nss_lt_name },
 #endif
 #ifdef YP
 		{ NSSRC_NIS, nis_group, (void *)nss_lt_name },
 #endif
 		{ NSSRC_COMPAT, compat_group, (void *)nss_lt_name },
 #ifdef NS_CACHING
 		NS_CACHE_CB(&cache_info)
 #endif
 		{ NULL, NULL, NULL }
 	};
 	int	rv, ret_errno;
 
 	ret_errno = 0;
 	*result = NULL;
 	rv = _nsdispatch(result, dtab, NSDB_GROUP, "getgrnam_r", defaultsrc,
 	    name, grp, buffer, bufsize, &ret_errno);
 	if (rv == NS_SUCCESS)
 		return (0);
 	else
 		return (ret_errno);
 }
 
 
 int
 getgrgid_r(gid_t gid, struct group *grp, char *buffer, size_t bufsize,
     struct group **result)
 {
 #ifdef NS_CACHING
 	static const nss_cache_info cache_info =
     		NS_COMMON_CACHE_INFO_INITIALIZER(
 		group, (void *)nss_lt_id,
 		grp_id_func, grp_marshal_func, grp_unmarshal_func);
 #endif
 
 	static const ns_dtab dtab[] = {
 		{ NSSRC_FILES, files_group, (void *)nss_lt_id },
 #ifdef HESIOD
 		{ NSSRC_DNS, dns_group, (void *)nss_lt_id },
 #endif
 #ifdef YP
 		{ NSSRC_NIS, nis_group, (void *)nss_lt_id },
 #endif
 		{ NSSRC_COMPAT, compat_group, (void *)nss_lt_id },
 #ifdef NS_CACHING
 		NS_CACHE_CB(&cache_info)
 #endif
 		{ NULL, NULL, NULL }
 	};
 	int	rv, ret_errno;
 
 	ret_errno = 0;
 	*result = NULL;
 	rv = _nsdispatch(result, dtab, NSDB_GROUP, "getgrgid_r", defaultsrc,
 	    gid, grp, buffer, bufsize, &ret_errno);
 	if (rv == NS_SUCCESS)
 		return (0);
 	else
 		return (ret_errno);
 }
 
 
 
 int
 __getgroupmembership(const char *uname, gid_t agroup, gid_t *groups,
 	int maxgrp, int *grpcnt)
 {
 	static const ns_dtab dtab[] = {
 		NS_FALLBACK_CB(getgroupmembership_fallback)
 		{ NULL, NULL, NULL }
 	};
 
 	assert(uname != NULL);
 	/* groups may be NULL if just sizing when invoked with maxgrp = 0 */
 	assert(grpcnt != NULL);
 
 	*grpcnt = 0;
 	(void)_nsdispatch(NULL, dtab, NSDB_GROUP, "getgroupmembership",
 	    defaultsrc, uname, agroup, groups, maxgrp, grpcnt);
 
 	/* too many groups found? */
 	return (*grpcnt > maxgrp ? -1 : 0);
 }
 
 
 static struct group	 grp;
 static char		*grp_storage;
 static size_t		 grp_storage_size;
 
 static struct group *
 getgr(int (*fn)(union key, struct group *, char *, size_t, struct group **),
     union key key)
 {
 	int		 rv;
 	struct group	*res;
 
 	if (grp_storage == NULL) {
 		grp_storage = malloc(GRP_STORAGE_INITIAL);
 		if (grp_storage == NULL)
 			return (NULL);
 		grp_storage_size = GRP_STORAGE_INITIAL;
 	}
 	do {
 		rv = fn(key, &grp, grp_storage, grp_storage_size, &res);
 		if (res == NULL && rv == ERANGE) {
 			free(grp_storage);
 			if ((grp_storage_size << 1) > GRP_STORAGE_MAX) {
 				grp_storage = NULL;
 				errno = ERANGE;
 				return (NULL);
 			}
 			grp_storage_size <<= 1;
 			grp_storage = malloc(grp_storage_size);
 			if (grp_storage == NULL)
 				return (NULL);
 		}
 	} while (res == NULL && rv == ERANGE);
 	if (rv != 0)
 		errno = rv;
 	return (res);
 }
 
 
 static int
 wrap_getgrnam_r(union key key, struct group *grp, char *buffer, size_t bufsize,
     struct group **res)
 {
 	return (getgrnam_r(key.name, grp, buffer, bufsize, res));
 }
 
 
 static int
 wrap_getgrgid_r(union key key, struct group *grp, char *buffer, size_t bufsize,
     struct group **res)
 {
 	return (getgrgid_r(key.gid, grp, buffer, bufsize, res));
 }
 
 
 static int
 wrap_getgrent_r(union key key __unused, struct group *grp, char *buffer,
     size_t bufsize, struct group **res)
 {
 	return (getgrent_r(grp, buffer, bufsize, res));
 }
 
 
 struct group *
 getgrnam(const char *name)
 {
 	union key key;
 
 	key.name = name;
 	return (getgr(wrap_getgrnam_r, key));
 }
 
 
 struct group *
 getgrgid(gid_t gid)
 {
 	union key key;
 
 	key.gid = gid;
 	return (getgr(wrap_getgrgid_r, key));
 }
 
 
 struct group *
 getgrent(void)
 {
 	union key key;
 
 	key.gid = 0; /* not used */
 	return (getgr(wrap_getgrent_r, key));
 }
 
 
 static int
 is_comment_line(const char *s, size_t n)
 {
 	const char	*eom;
 
 	eom = &s[n];
 
 	for (; s < eom; s++)
 		if (*s == '#' || !isspace((unsigned char)*s))
 			break;
 	return (*s == '#' || s == eom);
 }
 
 
 /*
  * files backend
  */
 static void
 files_endstate(void *p)
 {
 
 	if (p == NULL)
 		return;
 	if (((struct files_state *)p)->fp != NULL)
 		fclose(((struct files_state *)p)->fp);
 	free(p);
 }
 
 
 static int
 files_setgrent(void *retval, void *mdata, va_list ap)
 {
 	struct files_state *st;
 	int		 rv, stayopen;
 
 	rv = files_getstate(&st);
 	if (rv != 0) 
 		return (NS_UNAVAIL);
 	switch ((enum constants)mdata) {
 	case SETGRENT:
 		stayopen = va_arg(ap, int);
 		if (st->fp != NULL)
 			rewind(st->fp);
 		else if (stayopen)
 			st->fp = fopen(_PATH_GROUP, "re");
 		break;
 	case ENDGRENT:
 		if (st->fp != NULL) {
 			fclose(st->fp);
 			st->fp = NULL;
 		}
 		break;
 	default:
 		break;
 	}
 	return (NS_UNAVAIL);
 }
 
 
 static int
 files_group(void *retval, void *mdata, va_list ap)
 {
 	struct files_state	*st;
 	enum nss_lookup_type	 how;
 	const char		*name, *line;
 	struct group		*grp;
 	gid_t			 gid;
 	char			*buffer;
 	size_t			 bufsize, linesize;
 	off_t			 pos;
 	int			 rv, stayopen, *errnop;
 
 	name = NULL;
 	gid = (gid_t)-1;
 	how = (enum nss_lookup_type)mdata;
 	switch (how) {
 	case nss_lt_name:
 		name = va_arg(ap, const char *);
 		break;
 	case nss_lt_id:
 		gid = va_arg(ap, gid_t);
 		break;
 	case nss_lt_all:
 		break;
 	default:
 		return (NS_NOTFOUND);
 	}
 	grp = va_arg(ap, struct group *);
 	buffer = va_arg(ap, char *);
 	bufsize = va_arg(ap, size_t);
 	errnop = va_arg(ap, int *);
 	*errnop = files_getstate(&st);
 	if (*errnop != 0)
 		return (NS_UNAVAIL);
 	if (st->fp == NULL &&
 	    ((st->fp = fopen(_PATH_GROUP, "re")) == NULL)) {
 		*errnop = errno;
 		return (NS_UNAVAIL);
 	}
 	if (how == nss_lt_all)
 		stayopen = 1;
 	else {
 		rewind(st->fp);
 		stayopen = st->stayopen;
 	}
 	rv = NS_NOTFOUND;
 	pos = ftello(st->fp);
 	while ((line = fgetln(st->fp, &linesize)) != NULL) {
 		if (line[linesize-1] == '\n')
 			linesize--;
 		rv = __gr_match_entry(line, linesize, how, name, gid);
 		if (rv != NS_SUCCESS)
 			continue;
 		/* We need room at least for the line, a string NUL
 		 * terminator, alignment padding, and one (char *)
 		 * pointer for the member list terminator.
 		 */
 		if (bufsize <= linesize + _ALIGNBYTES + sizeof(char *)) {
 			*errnop = ERANGE;
 			rv = NS_RETURN;
 			break;
 		}
 		memcpy(buffer, line, linesize);
 		buffer[linesize] = '\0';
 		rv = __gr_parse_entry(buffer, linesize, grp, 
 		    &buffer[linesize + 1], bufsize - linesize - 1, errnop);
 		if (rv & NS_TERMINATE)
 			break;
 		pos = ftello(st->fp);
 	}
 	if (st->fp != NULL && !stayopen) {
 		fclose(st->fp);
 		st->fp = NULL;
 	}
 	if (rv == NS_SUCCESS && retval != NULL)
 		*(struct group **)retval = grp;
 	else if (rv == NS_RETURN && *errnop == ERANGE && st->fp != NULL)
 		fseeko(st->fp, pos, SEEK_SET);
 	return (rv);
 }
 
 
 #ifdef HESIOD
 /*
  * dns backend
  */
 static void
 dns_endstate(void *p)
 {
 
 	free(p);
 }
 
 
 static int
 dns_setgrent(void *retval, void *cb_data, va_list ap)
 {
 	struct dns_state	*st;
 	int			 rv;
 
 	rv = dns_getstate(&st);
 	if (rv != 0)
 		return (NS_UNAVAIL);
 	st->counter = 0;
 	return (NS_UNAVAIL);
 }
 
 
 static int
 dns_group(void *retval, void *mdata, va_list ap)
 {
 	char			 buf[HESIOD_NAME_MAX];
 	struct dns_state	*st;
 	struct group		*grp;
 	const char		*name, *label;
 	void			*ctx;
 	char			*buffer, **hes;
 	size_t			 bufsize, adjsize, linesize;
 	gid_t			 gid;
 	enum nss_lookup_type	 how;
 	int			 rv, *errnop;
 
 	ctx = NULL;
 	hes = NULL;
 	name = NULL;
 	gid = (gid_t)-1;
 	how = (enum nss_lookup_type)mdata;
 	switch (how) {
 	case nss_lt_name:
 		name = va_arg(ap, const char *);
 		break;
 	case nss_lt_id:
 		gid = va_arg(ap, gid_t);
 		break;
 	case nss_lt_all:
 		break;
 	}
 	grp     = va_arg(ap, struct group *);
 	buffer  = va_arg(ap, char *);
 	bufsize = va_arg(ap, size_t);
 	errnop  = va_arg(ap, int *);
 	*errnop = dns_getstate(&st);
 	if (*errnop != 0)
 		return (NS_UNAVAIL);
 	if (hesiod_init(&ctx) != 0) {
 		*errnop = errno;
 		rv = NS_UNAVAIL;
 		goto fin;
 	}
 	do {
 		rv = NS_NOTFOUND;
 		switch (how) {
 		case nss_lt_name:
 			label = name;
 			break;
 		case nss_lt_id:
 			if (snprintf(buf, sizeof(buf), "%lu",
 			    (unsigned long)gid) >= sizeof(buf))
 				goto fin;
 			label = buf;
 			break;
 		case nss_lt_all:
 			if (st->counter < 0)
 				goto fin;
 			if (snprintf(buf, sizeof(buf), "group-%ld",
 			    st->counter++) >= sizeof(buf))
 				goto fin;
 			label = buf;
 			break;
 		}
 		hes = hesiod_resolve(ctx, label,
 		    how == nss_lt_id ? "gid" : "group");
 		if ((how == nss_lt_id && hes == NULL &&
 		    (hes = hesiod_resolve(ctx, buf, "group")) == NULL) ||
 		    hes == NULL) {
 			if (how == nss_lt_all)
 				st->counter = -1;
 			if (errno != ENOENT)
 				*errnop = errno;
 			goto fin;
 		}
 		rv = __gr_match_entry(hes[0], strlen(hes[0]), how, name, gid);
 		if (rv != NS_SUCCESS) {
 			hesiod_free_list(ctx, hes);
 			hes = NULL;
 			continue;
 		}
 		/* We need room at least for the line, a string NUL
 		 * terminator, alignment padding, and one (char *)
 		 * pointer for the member list terminator.
 		 */
 		adjsize = bufsize - _ALIGNBYTES - sizeof(char *);
 		linesize = strlcpy(buffer, hes[0], adjsize);
 		if (linesize >= adjsize) {
 			*errnop = ERANGE;
 			rv = NS_RETURN;
 			goto fin;
 		}
 		hesiod_free_list(ctx, hes);
 		hes = NULL;
 		rv = __gr_parse_entry(buffer, linesize, grp,
 		    &buffer[linesize + 1], bufsize - linesize - 1, errnop);
 	} while (how == nss_lt_all && !(rv & NS_TERMINATE));
 fin:
 	if (hes != NULL)
 		hesiod_free_list(ctx, hes);
 	if (ctx != NULL)
 		hesiod_end(ctx);
 	if (rv == NS_SUCCESS && retval != NULL)
 		*(struct group **)retval = grp;
 	return (rv);
 }
 #endif /* HESIOD */
 
 
 #ifdef YP
 /*
  * nis backend
  */
 static void
 nis_endstate(void *p)
 {
 
 	if (p == NULL)
 		return;
 	free(((struct nis_state *)p)->key);
 	free(p);
 }
 
 
 static int
 nis_setgrent(void *retval, void *cb_data, va_list ap)
 {
 	struct nis_state	*st;
 	int			 rv;
 
 	rv = nis_getstate(&st);
 	if (rv != 0)
 		return (NS_UNAVAIL);
 	st->done = 0;
 	free(st->key);
 	st->key = NULL;
 	return (NS_UNAVAIL);
 }
 
 
 static int
 nis_group(void *retval, void *mdata, va_list ap)
 {
 	char		 *map;
 	struct nis_state *st;
 	struct group	*grp;
 	const char	*name;
 	char		*buffer, *key, *result;
 	size_t		 bufsize;
 	gid_t		 gid;
 	enum nss_lookup_type how;
 	int		*errnop, keylen, resultlen, rv;
 	
 	name = NULL;
 	gid = (gid_t)-1;
 	how = (enum nss_lookup_type)mdata;
 	switch (how) {
 	case nss_lt_name:
 		name = va_arg(ap, const char *);
 		map = "group.byname";
 		break;
 	case nss_lt_id:
 		gid = va_arg(ap, gid_t);
 		map = "group.bygid";
 		break;
 	case nss_lt_all:
 		map = "group.byname";
 		break;
 	}
 	grp     = va_arg(ap, struct group *);
 	buffer  = va_arg(ap, char *);
 	bufsize = va_arg(ap, size_t);
 	errnop  = va_arg(ap, int *);
 	*errnop = nis_getstate(&st);
 	if (*errnop != 0)
 		return (NS_UNAVAIL);
 	if (st->domain[0] == '\0') {
 		if (getdomainname(st->domain, sizeof(st->domain)) != 0) {
 			*errnop = errno;
 			return (NS_UNAVAIL);
 		}
 	}
 	result = NULL;
 	do {
 		rv = NS_NOTFOUND;
 		switch (how) {
 		case nss_lt_name:
 			if (strlcpy(buffer, name, bufsize) >= bufsize)
 				goto erange;
 			break;
 		case nss_lt_id:
 			if (snprintf(buffer, bufsize, "%lu",
 			    (unsigned long)gid) >= bufsize)
 				goto erange;
 			break;
 		case nss_lt_all:
 			if (st->done)
 				goto fin;
 			break;
 		}
 		result = NULL;
 		if (how == nss_lt_all) {
 			if (st->key == NULL)
 				rv = yp_first(st->domain, map, &st->key,
 				    &st->keylen, &result, &resultlen);
 			else {
 				key = st->key;
 				keylen = st->keylen;
 				st->key = NULL;
 				rv = yp_next(st->domain, map, key, keylen,
 				    &st->key, &st->keylen, &result,
 				    &resultlen);
 				free(key);
 			}
 			if (rv != 0) {
 				free(result);
 				free(st->key);
 				st->key = NULL;
 				if (rv == YPERR_NOMORE) {
 					st->done = 1;
 					rv = NS_NOTFOUND;
 				} else
 					rv = NS_UNAVAIL;
 				goto fin;
 			}
 		} else {
 			rv = yp_match(st->domain, map, buffer, strlen(buffer),
 			    &result, &resultlen);
 			if (rv == YPERR_KEY) {
 				rv = NS_NOTFOUND;
 				continue;
 			} else if (rv != 0) {
 				free(result);
 				rv = NS_UNAVAIL;
 				continue;
 			}
 		}
 		/* We need room at least for the line, a string NUL
 		 * terminator, alignment padding, and one (char *)
 		 * pointer for the member list terminator.
 		 */
 		if (resultlen >= bufsize - _ALIGNBYTES - sizeof(char *)) {
 			free(result);
 			goto erange;
 		}
 		memcpy(buffer, result, resultlen);
 		buffer[resultlen] = '\0';
 		free(result);
 		rv = __gr_match_entry(buffer, resultlen, how, name, gid);
 		if (rv == NS_SUCCESS)
 			rv = __gr_parse_entry(buffer, resultlen, grp,
 			    &buffer[resultlen+1], bufsize - resultlen - 1,
 			    errnop);
 	} while (how == nss_lt_all && !(rv & NS_TERMINATE));
 fin:
 	if (rv == NS_SUCCESS && retval != NULL)
 		*(struct group **)retval = grp;
 	return (rv);	
 erange:
 	*errnop = ERANGE;
 	return (NS_RETURN);
 }
 #endif /* YP */
 
 
 
 /*
  * compat backend
  */
 static void
 compat_endstate(void *p)
 {
 	struct compat_state *st;
 
 	if (p == NULL)
 		return;
 	st = (struct compat_state *)p;
 	free(st->name);
 	if (st->fp != NULL)
 		fclose(st->fp);
 	free(p);
 }
 
 
 static int
 compat_setgrent(void *retval, void *mdata, va_list ap)
 {
 	static const ns_src compatsrc[] = {
 #ifdef YP
 		{ NSSRC_NIS, NS_SUCCESS },
 #endif
 		{ NULL, 0 }
 	};
 	ns_dtab dtab[] = {
 #ifdef HESIOD
 		{ NSSRC_DNS, dns_setgrent, NULL },
 #endif
 #ifdef YP
 		{ NSSRC_NIS, nis_setgrent, NULL },
 #endif
 		{ NULL, NULL, NULL }
 	};
 	struct compat_state *st;
 	int		 rv, stayopen;
 
 #define set_setent(x, y) do {	 				\
 	int i;							\
 	for (i = 0; i < (int)(nitems(x) - 1); i++)		\
 		x[i].mdata = (void *)y;				\
 } while (0)
 
 	rv = compat_getstate(&st);
 	if (rv != 0)
 		return (NS_UNAVAIL);
 	switch ((enum constants)mdata) {
 	case SETGRENT:
 		stayopen = va_arg(ap, int);
 		if (st->fp != NULL)
 			rewind(st->fp);
 		else if (stayopen)
 			st->fp = fopen(_PATH_GROUP, "re");
 		set_setent(dtab, mdata);
 		(void)_nsdispatch(NULL, dtab, NSDB_GROUP_COMPAT, "setgrent",
 		    compatsrc, 0);
 		break;
 	case ENDGRENT:
 		if (st->fp != NULL) {
 			fclose(st->fp);
 			st->fp = NULL;
 		}
 		set_setent(dtab, mdata);
 		(void)_nsdispatch(NULL, dtab, NSDB_GROUP_COMPAT, "endgrent",
 		    compatsrc, 0);
 		break;
 	default:
 		break;
 	}
 	st->compat = COMPAT_MODE_OFF;
 	free(st->name);
 	st->name = NULL;
 	return (NS_UNAVAIL);
 #undef set_setent
 }
 
 
 static int
 compat_group(void *retval, void *mdata, va_list ap)
 {
 	static const ns_src compatsrc[] = {
 #ifdef YP
 		{ NSSRC_NIS, NS_SUCCESS },
 #endif
 		{ NULL, 0 }
 	};
 	ns_dtab dtab[] = {
 #ifdef YP
 		{ NSSRC_NIS, nis_group, NULL },
 #endif
 #ifdef HESIOD
 		{ NSSRC_DNS, dns_group, NULL },
 #endif
 		{ NULL, NULL, NULL }
 	};
 	struct compat_state	*st;
 	enum nss_lookup_type	 how;
 	const char		*name, *line;
 	struct group		*grp;
 	gid_t			 gid;
 	char			*buffer, *p;
 	void			*discard;
 	size_t			 bufsize, linesize;
 	off_t			 pos;
 	int			 rv, stayopen, *errnop;
 
 #define set_lookup_type(x, y) do { 				\
 	int i;							\
 	for (i = 0; i < (int)(nitems(x) - 1); i++)		\
 		x[i].mdata = (void *)y;				\
 } while (0)
 
 	name = NULL;
 	gid = (gid_t)-1;
 	how = (enum nss_lookup_type)mdata;
 	switch (how) {
 	case nss_lt_name:
 		name = va_arg(ap, const char *);
 		break;
 	case nss_lt_id:
 		gid = va_arg(ap, gid_t);
 		break;
 	case nss_lt_all:
 		break;
 	default:
 		return (NS_NOTFOUND);
 	}
 	grp = va_arg(ap, struct group *);
 	buffer = va_arg(ap, char *);
 	bufsize = va_arg(ap, size_t);
 	errnop = va_arg(ap, int *);
 	*errnop = compat_getstate(&st);
 	if (*errnop != 0)
 		return (NS_UNAVAIL);
 	if (st->fp == NULL &&
 	    ((st->fp = fopen(_PATH_GROUP, "re")) == NULL)) {
 		*errnop = errno;
 		rv = NS_UNAVAIL;
 		goto fin;
 	}
 	if (how == nss_lt_all)
 		stayopen = 1;
 	else {
 		rewind(st->fp);
 		stayopen = st->stayopen;
 	}
 docompat:
 	switch (st->compat) {
 	case COMPAT_MODE_ALL:
 		set_lookup_type(dtab, how);
 		switch (how) {
 		case nss_lt_all:
 			rv = _nsdispatch(&discard, dtab, NSDB_GROUP_COMPAT,
 			    "getgrent_r", compatsrc, grp, buffer, bufsize,
 			    errnop);
 			break;
 		case nss_lt_id:
 			rv = _nsdispatch(&discard, dtab, NSDB_GROUP_COMPAT,
 			    "getgrgid_r", compatsrc, gid, grp, buffer, bufsize,
 			    errnop);
 			break;
 		case nss_lt_name:
 			rv = _nsdispatch(&discard, dtab, NSDB_GROUP_COMPAT,
 			    "getgrnam_r", compatsrc, name, grp, buffer,
 			    bufsize, errnop);
 			break;
 		}
 		if (rv & NS_TERMINATE)
 			goto fin;
 		st->compat = COMPAT_MODE_OFF;
 		break;
 	case COMPAT_MODE_NAME:
 		set_lookup_type(dtab, nss_lt_name);
 		rv = _nsdispatch(&discard, dtab, NSDB_GROUP_COMPAT,
 		    "getgrnam_r", compatsrc, st->name, grp, buffer, bufsize,
 		    errnop);
 		switch (rv) {
 		case NS_SUCCESS:
 			switch (how) {
 			case nss_lt_name:
 				if (strcmp(name, grp->gr_name) != 0)
 					rv = NS_NOTFOUND;
 				break;
 			case nss_lt_id:
 				if (gid != grp->gr_gid)
 					rv = NS_NOTFOUND;
 				break;
 			default:
 				break;
 			}
 			break;
 		case NS_RETURN:
 			goto fin;
 		default:
 			break;
 		}
 		free(st->name);
 		st->name = NULL;
 		st->compat = COMPAT_MODE_OFF;
 		if (rv == NS_SUCCESS)
 			goto fin;
 		break;
 	default:
 		break;
 	}
 	rv = NS_NOTFOUND;
 	pos = ftello(st->fp);
 	while ((line = fgetln(st->fp, &linesize)) != NULL) {
 		if (line[linesize-1] == '\n')
 			linesize--;
 		if (linesize > 2 && line[0] == '+') {
 			p = memchr(&line[1], ':', linesize);
 			if (p == NULL || p == &line[1])
 				st->compat = COMPAT_MODE_ALL;
 			else {
 				st->name = malloc(p - line);
 				if (st->name == NULL) {
 					syslog(LOG_ERR,
 					 "getgrent memory allocation failure");
 					*errnop = ENOMEM;
 					rv = NS_UNAVAIL;
 					break;
 				}
 				memcpy(st->name, &line[1], p - line - 1);
 				st->name[p - line - 1] = '\0';
 				st->compat = COMPAT_MODE_NAME;
 			}
 			goto docompat;
 		} 
 		rv = __gr_match_entry(line, linesize, how, name, gid);
 		if (rv != NS_SUCCESS)
 			continue;
 		/* We need room at least for the line, a string NUL
 		 * terminator, alignment padding, and one (char *)
 		 * pointer for the member list terminator.
 		 */
 		if (bufsize <= linesize + _ALIGNBYTES + sizeof(char *)) {
 			*errnop = ERANGE;
 			rv = NS_RETURN;
 			break;
 		}
 		memcpy(buffer, line, linesize);
 		buffer[linesize] = '\0';
 		rv = __gr_parse_entry(buffer, linesize, grp, 
 		    &buffer[linesize + 1], bufsize - linesize - 1, errnop);
 		if (rv & NS_TERMINATE)
 			break;
 		pos = ftello(st->fp);
 	}
 fin:
 	if (st->fp != NULL && !stayopen) {
 		fclose(st->fp);
 		st->fp = NULL;
 	}
 	if (rv == NS_SUCCESS && retval != NULL)
 		*(struct group **)retval = grp;
 	else if (rv == NS_RETURN && *errnop == ERANGE && st->fp != NULL)
 		fseeko(st->fp, pos, SEEK_SET);
 	return (rv);
 #undef set_lookup_type
 }
 
 
 /*
  * common group line matching and parsing
  */
 int
 __gr_match_entry(const char *line, size_t linesize, enum nss_lookup_type how,
     const char *name, gid_t gid)
 {
 	size_t		 namesize;
 	const char	*p, *eol;
 	char		*q;
 	unsigned long	 n;
 	int		 i, needed;
 
 	if (linesize == 0 || is_comment_line(line, linesize))
 		return (NS_NOTFOUND);
 	switch (how) {
 	case nss_lt_name:	needed = 1; break;
 	case nss_lt_id:		needed = 2; break;
 	default:		needed = 2; break;
 	}
 	eol = &line[linesize];
 	for (p = line, i = 0; i < needed && p < eol; p++)
 		if (*p == ':')
 			i++;
 	if (i < needed)
 		return (NS_NOTFOUND);
 	switch (how) {
 	case nss_lt_name:
 		namesize = strlen(name);
 		if (namesize + 1 == (size_t)(p - line) &&
 		    memcmp(line, name, namesize) == 0)
 			return (NS_SUCCESS);
 		break;
 	case nss_lt_id:
 		n = strtoul(p, &q, 10);
 		if (q < eol && *q == ':' && gid == (gid_t)n)
 			return (NS_SUCCESS);
 		break;
 	case nss_lt_all:
 		return (NS_SUCCESS);
 	default:
 		break;
 	}
 	return (NS_NOTFOUND);
 }
 
 
 int
 __gr_parse_entry(char *line, size_t linesize, struct group *grp, char *membuf,
     size_t membufsize, int *errnop)
 {
 	char	       *s_gid, *s_mem, *p, **members;
 	unsigned long	n;
 	int		maxmembers;
 
 	memset(grp, 0, sizeof(*grp));
 	members = (char **)_ALIGN(membuf);
 	membufsize -= (char *)members - membuf;
 	maxmembers = membufsize / sizeof(*members);
 	if (maxmembers <= 0 ||
 	    (grp->gr_name = strsep(&line, ":")) == NULL ||
 	    grp->gr_name[0] == '\0' ||
 	    (grp->gr_passwd = strsep(&line, ":")) == NULL ||
 	    (s_gid = strsep(&line, ":")) == NULL ||
 	    s_gid[0] == '\0')
 		return (NS_NOTFOUND);
 	s_mem = line;
 	n = strtoul(s_gid, &s_gid, 10);
 	if (s_gid[0] != '\0')
 		return (NS_NOTFOUND);
 	grp->gr_gid = (gid_t)n;
 	grp->gr_mem = members;
 	while (maxmembers > 1 && s_mem != NULL) {
 		p = strsep(&s_mem, ",");
 		if (p != NULL && *p != '\0') {
 			*members++ = p;
 			maxmembers--;
 		}
 	}
 	*members = NULL;
 	if (s_mem == NULL)
 		return (NS_SUCCESS);
 	else {
 		*errnop = ERANGE;
 		return (NS_RETURN);
 	}
 }
 
 
Index: user/alc/PQ_LAUNDRY/lib/libc/gen/glob.3
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libc/gen/glob.3	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libc/gen/glob.3	(revision 303642)
@@ -1,457 +1,457 @@
 .\" Copyright (c) 1989, 1991, 1993, 1994
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" This code is derived from software contributed to Berkeley by
 .\" Guido van Rossum.
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)glob.3	8.3 (Berkeley) 4/16/94
 .\" $FreeBSD$
 .\"
 .Dd December 20, 2011
 .Dt GLOB 3
 .Os
 .Sh NAME
 .Nm glob ,
 .Nm globfree
 .Nd generate pathnames matching a pattern
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In glob.h
 .Ft int
 .Fn glob "const char * restrict pattern" "int flags" "int (*errfunc)(const char *, int)" "glob_t * restrict pglob"
 .Ft void
 .Fn globfree "glob_t *pglob"
 .Sh DESCRIPTION
 The
 .Fn glob
 function
 is a pathname generator that implements the rules for file name pattern
 matching used by the shell.
 .Pp
 The include file
 .In glob.h
 defines the structure type
 .Fa glob_t ,
 which contains at least the following fields:
 .Bd -literal
 typedef struct {
 	size_t gl_pathc;	/* count of total paths so far */
 	size_t gl_matchc;	/* count of paths matching pattern */
 	size_t gl_offs;		/* reserved at beginning of gl_pathv */
 	int gl_flags;		/* returned flags */
 	char **gl_pathv;	/* list of paths matching pattern */
 } glob_t;
 .Ed
 .Pp
 The argument
 .Fa pattern
 is a pointer to a pathname pattern to be expanded.
 The
 .Fn glob
 argument
 matches all accessible pathnames against the pattern and creates
 a list of the pathnames that match.
 In order to have access to a pathname,
 .Fn glob
 requires search permission on every component of a path except the last
 and read permission on each directory of any filename component of
 .Fa pattern
 that contains any of the special characters
 .Ql * ,
 .Ql ?\&
 or
 .Ql \&[ .
 .Pp
 The
 .Fn glob
 argument
 stores the number of matched pathnames into the
 .Fa gl_pathc
 field, and a pointer to a list of pointers to pathnames into the
 .Fa gl_pathv
 field.
 The first pointer after the last pathname is
 .Dv NULL .
 If the pattern does not match any pathnames, the returned number of
 matched paths is set to zero.
 .Pp
 It is the caller's responsibility to create the structure pointed to by
 .Fa pglob .
 The
 .Fn glob
 function allocates other space as needed, including the memory pointed
 to by
 .Fa gl_pathv .
 .Pp
 The argument
 .Fa flags
 is used to modify the behavior of
 .Fn glob .
 The value of
 .Fa flags
 is the bitwise inclusive
 .Tn OR
 of any of the following
 values defined in
 .In glob.h :
 .Bl -tag -width GLOB_ALTDIRFUNC
 .It Dv GLOB_APPEND
 Append pathnames generated to the ones from a previous call (or calls)
 to
 .Fn glob .
 The value of
 .Fa gl_pathc
 will be the total matches found by this call and the previous call(s).
 The pathnames are appended to, not merged with the pathnames returned by
 the previous call(s).
 Between calls, the caller must not change the setting of the
 .Dv GLOB_DOOFFS
 flag, nor change the value of
 .Fa gl_offs
 when
 .Dv GLOB_DOOFFS
 is set, nor (obviously) call
 .Fn globfree
 for
 .Fa pglob .
 .It Dv GLOB_DOOFFS
 Make use of the
 .Fa gl_offs
 field.
 If this flag is set,
 .Fa gl_offs
 is used to specify how many
 .Dv NULL
 pointers to prepend to the beginning
 of the
 .Fa gl_pathv
 field.
 In other words,
 .Fa gl_pathv
 will point to
 .Fa gl_offs
 .Dv NULL
 pointers,
 followed by
 .Fa gl_pathc
 pathname pointers, followed by a
 .Dv NULL
 pointer.
 .It Dv GLOB_ERR
 Causes
 .Fn glob
 to return when it encounters a directory that it cannot open or read.
 Ordinarily,
 .Fn glob
 continues to find matches.
 .It Dv GLOB_MARK
 Each pathname that is a directory that matches
 .Fa pattern
 has a slash
 appended.
 .It Dv GLOB_NOCHECK
 If
 .Fa pattern
 does not match any pathname, then
 .Fn glob
 returns a list
 consisting of only
 .Fa pattern ,
 with the number of total pathnames set to 1, and the number of matched
 pathnames set to 0.
 The effect of backslash escaping is present in the pattern returned.
 .It Dv GLOB_NOESCAPE
 By default, a backslash
 .Pq Ql \e
 character is used to escape the following character in the pattern,
 avoiding any special interpretation of the character.
 If
 .Dv GLOB_NOESCAPE
 is set, backslash escaping is disabled.
 .It Dv GLOB_NOSORT
 By default, the pathnames are sorted in ascending
 collation
 order;
 this flag prevents that sorting (speeding up
 .Fn glob ) .
 .El
 .Pp
 The following values may also be included in
 .Fa flags ,
 however, they are non-standard extensions to
 .St -p1003.2 .
 .Bl -tag -width GLOB_ALTDIRFUNC
 .It Dv GLOB_ALTDIRFUNC
 The following additional fields in the pglob structure have been
 initialized with alternate functions for glob to use to open, read,
 and close directories and to get stat information on names found
 in those directories.
 .Bd -literal
 void *(*gl_opendir)(const char * name);
 struct dirent *(*gl_readdir)(void *);
 void (*gl_closedir)(void *);
 int (*gl_lstat)(const char *name, struct stat *st);
 int (*gl_stat)(const char *name, struct stat *st);
 .Ed
 .Pp
 This extension is provided to allow programs such as
 .Xr restore 8
 to provide globbing from directories stored on tape.
 .It Dv GLOB_BRACE
 Pre-process the pattern string to expand
 .Ql {pat,pat,...}
 strings like
 .Xr csh 1 .
 The pattern
 .Ql {}
 is left unexpanded for historical reasons (and
 .Xr csh 1
 does the same thing to
 ease typing
 of
 .Xr find 1
 patterns).
 .It Dv GLOB_MAGCHAR
 Set by the
 .Fn glob
 function if the pattern included globbing characters.
 See the description of the usage of the
 .Fa gl_matchc
 structure member for more details.
 .It Dv GLOB_NOMAGIC
 Is the same as
 .Dv GLOB_NOCHECK
 but it only appends the
 .Fa pattern
 if it does not contain any of the special characters ``*'', ``?'' or ``[''.
 .Dv GLOB_NOMAGIC
 is provided to simplify implementing the historic
 .Xr csh 1
 globbing behavior and should probably not be used anywhere else.
 .It Dv GLOB_TILDE
 Expand patterns that start with
 .Ql ~
 to user name home directories.
 .It Dv GLOB_LIMIT
 Limit the total number of returned pathnames to the value provided in
 .Fa gl_matchc
 (default
 .Dv ARG_MAX ) .
 This option should be set for programs
 that can be coerced into a denial of service attack
 via patterns that expand to a very large number of matches,
 such as a long string of
 .Ql */../*/.. .
 .El
 .Pp
 If, during the search, a directory is encountered that cannot be opened
 or read and
 .Fa errfunc
 is
 .Pf non- Dv NULL ,
 .Fn glob
 calls
 .Fa \*(lp*errfunc\*(rp Ns ( Fa path , errno ) ,
 however, the
 .Dv GLOB_ERR
 flag will cause an immediate
 return when this happens.
 .Pp
 If
 .Fa errfunc
 returns non-zero,
 .Fn glob
 stops the scan and returns
 .Dv GLOB_ABORTED
 after setting
 .Fa gl_pathc
 and
 .Fa gl_pathv
 to reflect any paths already matched.
 This also happens if an error is encountered and
 .Dv GLOB_ERR
 is set in
 .Fa flags ,
 regardless of the return value of
 .Fa errfunc ,
 if called.
 If
 .Dv GLOB_ERR
 is not set and either
 .Fa errfunc
 is
 .Dv NULL
 or
 .Fa errfunc
 returns zero, the error is ignored.
 .Pp
 The
 .Fn globfree
 function frees any space associated with
 .Fa pglob
 from a previous call(s) to
 .Fn glob .
 .Sh RETURN VALUES
 On successful completion,
 .Fn glob
 returns zero.
 In addition the fields of
 .Fa pglob
 contain the values described below:
 .Bl -tag -width GLOB_NOCHECK
 .It Fa gl_pathc
 contains the total number of matched pathnames so far.
 This includes other matches from previous invocations of
 .Fn glob
 if
 .Dv GLOB_APPEND
 was specified.
 .It Fa gl_matchc
 contains the number of matched pathnames in the current invocation of
 .Fn glob .
 .It Fa gl_flags
 contains a copy of the
 .Fa flags
 argument with the bit
 .Dv GLOB_MAGCHAR
 set if
 .Fa pattern
 contained any of the special characters ``*'', ``?'' or ``['', cleared
 if not.
 .It Fa gl_pathv
 contains a pointer to a
 .Dv NULL Ns -terminated
 list of matched pathnames.
 However, if
 .Fa gl_pathc
 is zero, the contents of
 .Fa gl_pathv
 are undefined.
 .El
 .Pp
 If
 .Fn glob
 terminates due to an error, it sets errno and returns one of the
 following non-zero constants, which are defined in the include
 file
 .In glob.h :
 .Bl -tag -width GLOB_NOCHECK
 .It Dv GLOB_NOSPACE
 An attempt to allocate memory failed, or if
 .Fa errno
-was 0
+was E2BIG,
 .Dv GLOB_LIMIT
 was specified in the flags and
 .Fa pglob\->gl_matchc
 or more patterns were matched.
 .It Dv GLOB_ABORTED
 The scan was stopped because an error was encountered and either
 .Dv GLOB_ERR
 was set or
 .Fa \*(lp*errfunc\*(rp\*(lp\*(rp
 returned non-zero.
 .It Dv GLOB_NOMATCH
 The pattern did not match a pathname and
 .Dv GLOB_NOCHECK
 was not set.
 .El
 .Pp
 The arguments
 .Fa pglob\->gl_pathc
 and
 .Fa pglob\->gl_pathv
 are still set as specified above.
 .Sh EXAMPLES
 A rough equivalent of
 .Ql "ls -l *.c *.h"
 can be obtained with the
 following code:
 .Bd -literal -offset indent
 glob_t g;
 
 g.gl_offs = 2;
 glob("*.c", GLOB_DOOFFS, NULL, &g);
 glob("*.h", GLOB_DOOFFS | GLOB_APPEND, NULL, &g);
 g.gl_pathv[0] = "ls";
 g.gl_pathv[1] = "-l";
 execvp("ls", g.gl_pathv);
 .Ed
 .Sh SEE ALSO
 .Xr sh 1 ,
 .Xr fnmatch 3 ,
 .Xr regex 3
 .Sh STANDARDS
 The current implementation of the
 .Fn glob
 function
 .Em does not
 conform to
 .St -p1003.2 .
 Collating symbol expressions, equivalence class expressions and
 character class expressions are not supported.
 .Pp
 The flags
 .Dv GLOB_ALTDIRFUNC ,
 .Dv GLOB_BRACE ,
 .Dv GLOB_LIMIT ,
 .Dv GLOB_MAGCHAR ,
 .Dv GLOB_NOMAGIC ,
 and
 .Dv GLOB_TILDE ,
 and the fields
 .Fa gl_matchc
 and
 .Fa gl_flags
 are extensions to the
 .Tn POSIX
 standard and
 should not be used by applications striving for strict
 conformance.
 .Sh HISTORY
 The
 .Fn glob
 and
 .Fn globfree
 functions first appeared in
 .Bx 4.4 .
 .Sh BUGS
 Patterns longer than
 .Dv MAXPATHLEN
 may cause unchecked errors.
 .Pp
 The
 .Fn glob
 argument
 may fail and set errno for any of the errors specified for the
 library routines
 .Xr stat 2 ,
 .Xr closedir 3 ,
 .Xr opendir 3 ,
 .Xr readdir 3 ,
 .Xr malloc 3 ,
 and
 .Xr free 3 .
Index: user/alc/PQ_LAUNDRY/lib/libc/gen/glob.c
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libc/gen/glob.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libc/gen/glob.c	(revision 303642)
@@ -1,1081 +1,1101 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Guido van Rossum.
  *
  * Copyright (c) 2011 The FreeBSD Foundation
  * All rights reserved.
  * Portions of this software were developed by David Chisnall
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
 #endif /* LIBC_SCCS and not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * glob(3) -- a superset of the one defined in POSIX 1003.2.
  *
  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
  *
  * Optional extra services, controlled by flags not defined by POSIX:
  *
  * GLOB_QUOTE:
  *	Escaping convention: \ inhibits any special meaning the following
  *	character might have (except \ at end of string is retained).
  * GLOB_MAGCHAR:
  *	Set in gl_flags if pattern contained a globbing character.
  * GLOB_NOMAGIC:
  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
  *	not contain any magic characters.  [Used in csh style globbing]
  * GLOB_ALTDIRFUNC:
  *	Use alternately specified directory access functions.
  * GLOB_TILDE:
  *	expand ~user/foo to the /home/dir/of/user/foo
  * GLOB_BRACE:
  *	expand {1,2}{a,b} to 1a 1b 2a 2b
  * gl_matchc:
  *	Number of matches in the current invocation of glob.
  */
 
 /*
  * Some notes on multibyte character support:
  * 1. Patterns with illegal byte sequences match nothing - even if
  *    GLOB_NOCHECK is specified.
  * 2. Illegal byte sequences in filenames are handled by treating them as
  *    single-byte characters with a values of such bytes of the sequence
  *    cast to wchar_t.
  * 3. State-dependent encodings are not currently supported.
  */
 
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <glob.h>
 #include <limits.h>
 #include <pwd.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <wchar.h>
 
 #include "collate.h"
 
 /*
  * glob(3) expansion limits. Stop the expansion if any of these limits
  * is reached. This caps the runtime in the face of DoS attacks. See
  * also CVE-2010-2632
  */
 #define	GLOB_LIMIT_BRACE	128	/* number of brace calls */
 #define	GLOB_LIMIT_PATH		65536	/* number of path elements */
 #define	GLOB_LIMIT_READDIR	16384	/* number of readdirs */
 #define	GLOB_LIMIT_STAT		1024	/* number of stat system calls */
 #define	GLOB_LIMIT_STRING	ARG_MAX	/* maximum total size for paths */
 
 struct glob_limit {
 	size_t	l_brace_cnt;
 	size_t	l_path_lim;
 	size_t	l_readdir_cnt;	
 	size_t	l_stat_cnt;	
 	size_t	l_string_cnt;
 };
 
 #define	DOT		L'.'
 #define	EOS		L'\0'
 #define	LBRACKET	L'['
 #define	NOT		L'!'
 #define	QUESTION	L'?'
 #define	QUOTE		L'\\'
 #define	RANGE		L'-'
 #define	RBRACKET	L']'
 #define	SEP		L'/'
 #define	STAR		L'*'
 #define	TILDE		L'~'
 #define	LBRACE		L'{'
 #define	RBRACE		L'}'
 #define	COMMA		L','
 
 #define	M_QUOTE		0x8000000000ULL
 #define	M_PROTECT	0x4000000000ULL
 #define	M_MASK		0xffffffffffULL
 #define	M_CHAR		0x00ffffffffULL
 
 typedef uint_fast64_t Char;
 
 #define	CHAR(c)		((Char)((c)&M_CHAR))
 #define	META(c)		((Char)((c)|M_QUOTE))
 #define	UNPROT(c)	((c) & ~M_PROTECT)
 #define	M_ALL		META(L'*')
 #define	M_END		META(L']')
 #define	M_NOT		META(L'!')
 #define	M_ONE		META(L'?')
 #define	M_RNG		META(L'-')
 #define	M_SET		META(L'[')
 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
 #ifdef DEBUG
 #define	isprot(c)	(((c)&M_PROTECT) != 0)
 #endif
 
 static int	 compare(const void *, const void *);
 static int	 g_Ctoc(const Char *, char *, size_t);
 static int	 g_lstat(Char *, struct stat *, glob_t *);
 static DIR	*g_opendir(Char *, glob_t *);
 static const Char *g_strchr(const Char *, wchar_t);
 #ifdef notdef
 static Char	*g_strcat(Char *, const Char *);
 #endif
 static int	 g_stat(Char *, struct stat *, glob_t *);
 static int	 glob0(const Char *, glob_t *, struct glob_limit *,
     const char *);
 static int	 glob1(Char *, glob_t *, struct glob_limit *);
 static int	 glob2(Char *, Char *, Char *, Char *, glob_t *,
     struct glob_limit *);
 static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *,
     struct glob_limit *);
 static int	 globextend(const Char *, glob_t *, struct glob_limit *,
     const char *);
 static const Char *
 		 globtilde(const Char *, Char *, size_t, glob_t *);
 static int	 globexp0(const Char *, glob_t *, struct glob_limit *,
     const char *);
 static int	 globexp1(const Char *, glob_t *, struct glob_limit *);
 static int	 globexp2(const Char *, const Char *, glob_t *,
     struct glob_limit *);
 static int	 globfinal(glob_t *, struct glob_limit *, size_t,
     const char *);
 static int	 match(Char *, Char *, Char *);
 #ifdef DEBUG
 static void	 qprintf(const char *, Char *);
 #endif
 
 int
 glob(const char * __restrict pattern, int flags,
 	 int (*errfunc)(const char *, int), glob_t * __restrict pglob)
 {
 	struct glob_limit limit = { 0, 0, 0, 0, 0 };
 	const char *patnext;
 	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
 	mbstate_t mbs;
 	wchar_t wc;
 	size_t clen;
 	int too_long;
 
 	patnext = pattern;
 	if (!(flags & GLOB_APPEND)) {
 		pglob->gl_pathc = 0;
 		pglob->gl_pathv = NULL;
 		if (!(flags & GLOB_DOOFFS))
 			pglob->gl_offs = 0;
 	}
 	if (flags & GLOB_LIMIT) {
 		limit.l_path_lim = pglob->gl_matchc;
 		if (limit.l_path_lim == 0)
 			limit.l_path_lim = GLOB_LIMIT_PATH;
 	}
 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
 	pglob->gl_errfunc = errfunc;
 	pglob->gl_matchc = 0;
 
 	bufnext = patbuf;
 	bufend = bufnext + MAXPATHLEN - 1;
 	too_long = 1;
 	if (flags & GLOB_NOESCAPE) {
 		memset(&mbs, 0, sizeof(mbs));
 		while (bufnext <= bufend) {
 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
 			if (clen == (size_t)-1 || clen == (size_t)-2)
 				return (globfinal(pglob, &limit,
 				    pglob->gl_pathc, pattern));
 			else if (clen == 0) {
 				too_long = 0;
 				break;
 			}
 			*bufnext++ = wc;
 			patnext += clen;
 		}
 	} else {
 		/* Protect the quoted characters. */
 		memset(&mbs, 0, sizeof(mbs));
 		while (bufnext <= bufend) {
 			if (*patnext == '\\') {
 				if (*++patnext == '\0') {
 					*bufnext++ = QUOTE;
 					continue;
 				}
 				prot = M_PROTECT;
 			} else
 				prot = 0;
 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
 			if (clen == (size_t)-1 || clen == (size_t)-2)
 				return (globfinal(pglob, &limit,
 				    pglob->gl_pathc, pattern));
 			else if (clen == 0) {
 				too_long = 0;
 				break;
 			}
 			*bufnext++ = wc | prot;
 			patnext += clen;
 		}
 	}
 	if (too_long)
 		return (globfinal(pglob, &limit, pglob->gl_pathc, pattern));
 	*bufnext = EOS;
 
 	if (flags & GLOB_BRACE)
 	    return (globexp0(patbuf, pglob, &limit, pattern));
 	else
 	    return (glob0(patbuf, pglob, &limit, pattern));
 }
 
 static int
 globexp0(const Char *pattern, glob_t *pglob, struct glob_limit *limit,
     const char *origpat) {
 	int rv;
 	size_t oldpathc;
 
 	/* Protect a single {}, for find(1), like csh */
 	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS) {
 		if ((pglob->gl_flags & GLOB_LIMIT) &&
 		    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
-			errno = 0;
+			errno = E2BIG;
 			return (GLOB_NOSPACE);
 		}
 		return (glob0(pattern, pglob, limit, origpat));
 	}
 
 	oldpathc = pglob->gl_pathc;
 
 	if ((rv = globexp1(pattern, pglob, limit)) != 0)
 		return rv;
 
 	return (globfinal(pglob, limit, oldpathc, origpat));
 }
 
 /*
  * Expand recursively a glob {} pattern. When there is no more expansion
  * invoke the standard globbing routine to glob the rest of the magic
  * characters
  */
 static int
 globexp1(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
 {
 	const Char* ptr;
 
 	if ((ptr = g_strchr(pattern, LBRACE)) != NULL) {
 		if ((pglob->gl_flags & GLOB_LIMIT) &&
 		    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
-			errno = 0;
+			errno = E2BIG;
 			return (GLOB_NOSPACE);
 		}
 		return (globexp2(ptr, pattern, pglob, limit));
 	}
 
 	return (glob0(pattern, pglob, limit, NULL));
 }
 
 
 /*
  * Recursive brace globbing helper. Tries to expand a single brace.
  * If it succeeds then it invokes globexp1 with the new pattern.
  * If it fails then it tries to glob the rest of the pattern and returns.
  */
 static int
 globexp2(const Char *ptr, const Char *pattern, glob_t *pglob,
     struct glob_limit *limit)
 {
 	int     i, rv;
 	Char   *lm, *ls;
 	const Char *pe, *pm, *pm1, *pl;
 	Char    patbuf[MAXPATHLEN];
 
 	/* copy part up to the brace */
 	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
 		continue;
 	*lm = EOS;
 	ls = lm;
 
 	/* Find the balanced brace */
 	for (i = 0, pe = ++ptr; *pe != EOS; pe++)
 		if (*pe == LBRACKET) {
 			/* Ignore everything between [] */
 			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
 				continue;
 			if (*pe == EOS) {
 				/*
 				 * We could not find a matching RBRACKET.
 				 * Ignore and just look for RBRACE
 				 */
 				pe = pm;
 			}
 		}
 		else if (*pe == LBRACE)
 			i++;
 		else if (*pe == RBRACE) {
 			if (i == 0)
 				break;
 			i--;
 		}
 
 	/* Non matching braces; just glob the pattern */
 	if (i != 0 || *pe == EOS)
 		return (glob0(pattern, pglob, limit, NULL));
 
 	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
 		switch (*pm) {
 		case LBRACKET:
 			/* Ignore everything between [] */
 			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
 				continue;
 			if (*pm == EOS) {
 				/*
 				 * We could not find a matching RBRACKET.
 				 * Ignore and just look for RBRACE
 				 */
 				pm = pm1;
 			}
 			break;
 
 		case LBRACE:
 			i++;
 			break;
 
 		case RBRACE:
 			if (i) {
 			    i--;
 			    break;
 			}
 			/* FALLTHROUGH */
 		case COMMA:
 			if (i && *pm == COMMA)
 				break;
 			else {
 				/* Append the current string */
 				for (lm = ls; (pl < pm); *lm++ = *pl++)
 					continue;
 				/*
 				 * Append the rest of the pattern after the
 				 * closing brace
 				 */
 				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
 					continue;
 
 				/* Expand the current pattern */
 #ifdef DEBUG
 				qprintf("globexp2:", patbuf);
 #endif
 				rv = globexp1(patbuf, pglob, limit);
 				if (rv)
 					return (rv);
 
 				/* move after the comma, to the next string */
 				pl = pm + 1;
 			}
 			break;
 
 		default:
 			break;
 		}
 	return (0);
 }
 
 
 
 /*
  * expand tilde from the passwd file.
  */
 static const Char *
 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
 {
 	struct passwd *pwd;
 	char *h, *sc;
 	const Char *p;
 	Char *b, *eb;
 	wchar_t wc;
 	wchar_t wbuf[MAXPATHLEN];
 	wchar_t *wbufend, *dc;
 	size_t clen;
 	mbstate_t mbs;
 	int too_long;
 
 	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
 		return (pattern);
 
 	/* 
 	 * Copy up to the end of the string or / 
 	 */
 	eb = &patbuf[patbuf_len - 1];
 	for (p = pattern + 1, b = patbuf;
 	    b < eb && *p != EOS && UNPROT(*p) != SEP; *b++ = *p++)
 		continue;
 
 	if (*p != EOS && UNPROT(*p) != SEP)
 		return (NULL);
 
 	*b = EOS;
 	h = NULL;
 
 	if (patbuf[0] == EOS) {
 		/*
 		 * handle a plain ~ or ~/ by expanding $HOME first (iff
 		 * we're not running setuid or setgid) and then trying
 		 * the password file
 		 */
 		if (issetugid() != 0 ||
 		    (h = getenv("HOME")) == NULL) {
 			if (((h = getlogin()) != NULL &&
 			     (pwd = getpwnam(h)) != NULL) ||
 			    (pwd = getpwuid(getuid())) != NULL)
 				h = pwd->pw_dir;
 			else
 				return (pattern);
 		}
 	}
 	else {
 		/*
 		 * Expand a ~user
 		 */
 		if (g_Ctoc(patbuf, (char *)wbuf, sizeof(wbuf)))
 			return (NULL);
 		if ((pwd = getpwnam((char *)wbuf)) == NULL)
 			return (pattern);
 		else
 			h = pwd->pw_dir;
 	}
 
 	/* Copy the home directory */
 	dc = wbuf;
 	sc = h;
 	wbufend = wbuf + MAXPATHLEN - 1;
 	too_long = 1;
 	memset(&mbs, 0, sizeof(mbs));
 	while (dc <= wbufend) {
 		clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
 		if (clen == (size_t)-1 || clen == (size_t)-2) {
 			/* XXX See initial comment #2. */
 			wc = (unsigned char)*sc;
 			clen = 1;
 			memset(&mbs, 0, sizeof(mbs));
 		}
 		if ((*dc++ = wc) == EOS) {
 			too_long = 0;
 			break;
 		}
 		sc += clen;
 	}
 	if (too_long)
 		return (NULL);
 
 	dc = wbuf;
 	for (b = patbuf; b < eb && *dc != EOS; *b++ = *dc++ | M_PROTECT)
 		continue;
 	if (*dc != EOS)
 		return (NULL);
 
 	/* Append the rest of the pattern */
 	if (*p != EOS) {
 		too_long = 1;
 		while (b <= eb) {
 			if ((*b++ = *p++) == EOS) {
 				too_long = 0;
 				break;
 			}
 		}
 		if (too_long)
 			return (NULL);
 	} else
 		*b = EOS;
 
 	return (patbuf);
 }
 
 
 /*
  * The main glob() routine: compiles the pattern (optionally processing
  * quotes), calls glob1() to do the real pattern matching, and finally
  * sorts the list (unless unsorted operation is requested).  Returns 0
  * if things went well, nonzero if errors occurred.
  */
 static int
 glob0(const Char *pattern, glob_t *pglob, struct glob_limit *limit,
     const char *origpat) {
 	const Char *qpatnext;
 	int err;
 	size_t oldpathc;
 	Char *bufnext, c, patbuf[MAXPATHLEN];
 
 	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
 	if (qpatnext == NULL) {
-		errno = 0;
+		errno = E2BIG;
 		return (GLOB_NOSPACE);
 	}
 	oldpathc = pglob->gl_pathc;
 	bufnext = patbuf;
 
 	/* We don't need to check for buffer overflow any more. */
 	while ((c = *qpatnext++) != EOS) {
 		switch (c) {
 		case LBRACKET:
 			c = *qpatnext;
 			if (c == NOT)
 				++qpatnext;
 			if (*qpatnext == EOS ||
 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
 				*bufnext++ = LBRACKET;
 				if (c == NOT)
 					--qpatnext;
 				break;
 			}
 			*bufnext++ = M_SET;
 			if (c == NOT)
 				*bufnext++ = M_NOT;
 			c = *qpatnext++;
 			do {
 				*bufnext++ = CHAR(c);
 				if (*qpatnext == RANGE &&
 				    (c = qpatnext[1]) != RBRACKET) {
 					*bufnext++ = M_RNG;
 					*bufnext++ = CHAR(c);
 					qpatnext += 2;
 				}
 			} while ((c = *qpatnext++) != RBRACKET);
 			pglob->gl_flags |= GLOB_MAGCHAR;
 			*bufnext++ = M_END;
 			break;
 		case QUESTION:
 			pglob->gl_flags |= GLOB_MAGCHAR;
 			*bufnext++ = M_ONE;
 			break;
 		case STAR:
 			pglob->gl_flags |= GLOB_MAGCHAR;
 			/* collapse adjacent stars to one,
 			 * to avoid exponential behavior
 			 */
 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
 			    *bufnext++ = M_ALL;
 			break;
 		default:
 			*bufnext++ = CHAR(c);
 			break;
 		}
 	}
 	*bufnext = EOS;
 #ifdef DEBUG
 	qprintf("glob0:", patbuf);
 #endif
 
 	if ((err = glob1(patbuf, pglob, limit)) != 0)
 		return(err);
 
 	if (origpat != NULL)
 		return (globfinal(pglob, limit, oldpathc, origpat));
 
 	return (0);
 }
 
 static int
 globfinal(glob_t *pglob, struct glob_limit *limit, size_t oldpathc,
     const char *origpat) {
 	/*
 	 * If there was no match we are going to append the origpat
 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
 	 * and the origpat did not contain any magic characters
 	 * GLOB_NOMAGIC is there just for compatibility with csh.
 	 */
 	if (pglob->gl_pathc == oldpathc) {
 		if ((pglob->gl_flags & GLOB_NOCHECK) ||
 		    ((pglob->gl_flags & GLOB_NOMAGIC) &&
 		    !(pglob->gl_flags & GLOB_MAGCHAR)))
 			return (globextend(NULL, pglob, limit, origpat));
 		else
 			return (GLOB_NOMATCH);
 	}
 	if (!(pglob->gl_flags & GLOB_NOSORT))
 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
 
 	return (0);
 }
 
 static int
 compare(const void *p, const void *q)
 {
 	return (strcoll(*(char **)p, *(char **)q));
 }
 
 static int
 glob1(Char *pattern, glob_t *pglob, struct glob_limit *limit)
 {
 	Char pathbuf[MAXPATHLEN];
 
 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
 	if (*pattern == EOS)
 		return (0);
 	return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
 	    pattern, pglob, limit));
 }
 
 /*
  * The functions glob2 and glob3 are mutually recursive; there is one level
  * of recursion for each segment in the pattern that contains one or more
  * meta characters.
  */
 static int
 glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
       glob_t *pglob, struct glob_limit *limit)
 {
 	struct stat sb;
 	Char *p, *q;
 	int anymeta;
 
 	/*
 	 * Loop over pattern segments until end of pattern or until
 	 * segment with meta character found.
 	 */
 	for (anymeta = 0;;) {
 		if (*pattern == EOS) {		/* End of pattern? */
 			*pathend = EOS;
 			if (g_lstat(pathbuf, &sb, pglob))
 				return (0);
 
 			if ((pglob->gl_flags & GLOB_LIMIT) &&
 			    limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) {
-				errno = 0;
+				errno = E2BIG;
 				return (GLOB_NOSPACE);
 			}
 			if ((pglob->gl_flags & GLOB_MARK) &&
 			    UNPROT(pathend[-1]) != SEP &&
 			    (S_ISDIR(sb.st_mode) ||
 			    (S_ISLNK(sb.st_mode) &&
 			    g_stat(pathbuf, &sb, pglob) == 0 &&
 			    S_ISDIR(sb.st_mode)))) {
 				if (pathend + 1 > pathend_last) {
-					errno = 0;
+					errno = E2BIG;
 					return (GLOB_NOSPACE);
 				}
 				*pathend++ = SEP;
 				*pathend = EOS;
 			}
 			++pglob->gl_matchc;
 			return (globextend(pathbuf, pglob, limit, NULL));
 		}
 
 		/* Find end of next segment, copy tentatively to pathend. */
 		q = pathend;
 		p = pattern;
 		while (*p != EOS && UNPROT(*p) != SEP) {
 			if (ismeta(*p))
 				anymeta = 1;
 			if (q + 1 > pathend_last) {
-				errno = 0;
+				errno = E2BIG;
 				return (GLOB_NOSPACE);
 			}
 			*q++ = *p++;
 		}
 
 		if (!anymeta) {		/* No expansion, do next segment. */
 			pathend = q;
 			pattern = p;
 			while (UNPROT(*pattern) == SEP) {
 				if (pathend + 1 > pathend_last) {
-					errno = 0;
+					errno = E2BIG;
 					return (GLOB_NOSPACE);
 				}
 				*pathend++ = *pattern++;
 			}
 		} else			/* Need expansion, recurse. */
 			return (glob3(pathbuf, pathend, pathend_last, pattern,
 			    p, pglob, limit));
 	}
 	/* NOTREACHED */
 }
 
 static int
 glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
       Char *pattern, Char *restpattern,
       glob_t *pglob, struct glob_limit *limit)
 {
 	struct dirent *dp;
 	DIR *dirp;
-	int err, too_long, saverrno;
+	int err, too_long, saverrno, saverrno2;
 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
 
 	struct dirent *(*readdirfunc)(DIR *);
 
 	if (pathend > pathend_last) {
-		errno = 0;
+		errno = E2BIG;
 		return (GLOB_NOSPACE);
 	}
 	*pathend = EOS;
 	if (pglob->gl_errfunc != NULL &&
 	    g_Ctoc(pathbuf, buf, sizeof(buf))) {
-		errno = 0;
+		errno = E2BIG;
 		return (GLOB_NOSPACE);
 	}
 
+	saverrno = errno;
 	errno = 0;
 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
 		if (errno == ENOENT || errno == ENOTDIR)
 			return (0);
 		if ((pglob->gl_errfunc != NULL &&
 		    pglob->gl_errfunc(buf, errno)) ||
-		    (pglob->gl_flags & GLOB_ERR))
+		    (pglob->gl_flags & GLOB_ERR)) {
+			if (errno == 0)
+				errno = saverrno;
 			return (GLOB_ABORTED);
+		}
+		if (errno == 0)
+			errno = saverrno;
 		return (0);
 	}
 
 	err = 0;
 
 	/* pglob->gl_readdir takes a void *, fix this manually */
 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
 		readdirfunc = (struct dirent *(*)(DIR *))pglob->gl_readdir;
 	else
 		readdirfunc = readdir;
 
 	errno = 0;
 	/* Search directory for matching names. */
 	while ((dp = (*readdirfunc)(dirp)) != NULL) {
 		char *sc;
 		Char *dc;
 		wchar_t wc;
 		size_t clen;
 		mbstate_t mbs;
 
 		if ((pglob->gl_flags & GLOB_LIMIT) &&
 		    limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) {
-			errno = 0;
+			errno = E2BIG;
 			err = GLOB_NOSPACE;
 			break;
 		}
 
 		/* Initial DOT must be matched literally. */
-		if (dp->d_name[0] == '.' && UNPROT(*pattern) != DOT)
+		if (dp->d_name[0] == '.' && UNPROT(*pattern) != DOT) {
+			errno = 0;
 			continue;
+		}
 		memset(&mbs, 0, sizeof(mbs));
 		dc = pathend;
 		sc = dp->d_name;
 		too_long = 1;
 		while (dc <= pathend_last) {
 			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
 			if (clen == (size_t)-1 || clen == (size_t)-2) {
 				/* XXX See initial comment #2. */
 				wc = (unsigned char)*sc;
 				clen = 1;
 				memset(&mbs, 0, sizeof(mbs));
 			}
 			if ((*dc++ = wc) == EOS) {
 				too_long = 0;
 				break;
 			}
 			sc += clen;
 		}
+		if (too_long && ((pglob->gl_errfunc != NULL &&
+		    pglob->gl_errfunc(buf, ENAMETOOLONG)) ||
+		    (pglob->gl_flags & GLOB_ERR))) {
+			errno = ENAMETOOLONG;
+			err = GLOB_ABORTED;
+			break;
+		}
 		if (too_long || !match(pathend, pattern, restpattern)) {
 			*pathend = EOS;
+			errno = 0;
 			continue;
 		}
+		if (errno == 0)
+			errno = saverrno;
 		err = glob2(pathbuf, --dc, pathend_last, restpattern,
 		    pglob, limit);
 		if (err)
 			break;
 		errno = 0;
 	}
 
-	saverrno = errno;
+	saverrno2 = errno;
 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
 		(*pglob->gl_closedir)(dirp);
 	else
 		closedir(dirp);
-	errno = saverrno;
+	errno = saverrno2;
 
 	if (err)
 		return (err);
 
 	if (dp == NULL && errno != 0 && ((pglob->gl_errfunc != NULL &&
 	    pglob->gl_errfunc(buf, errno)) || (pglob->gl_flags & GLOB_ERR)))
 		return (GLOB_ABORTED);
 
+	if (errno == 0)
+		errno = saverrno;
 	return (0);
 }
 
 
 /*
  * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
  * add the new item, and update gl_pathc.
  *
  * This assumes the BSD realloc, which only copies the block when its size
  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
  * behavior.
  *
  * Return 0 if new item added, error code if memory couldn't be allocated.
  *
  * Invariant of the glob_t structure:
  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
  */
 static int
 globextend(const Char *path, glob_t *pglob, struct glob_limit *limit,
     const char *origpat)
 {
 	char **pathv;
 	size_t i, newsize, len;
 	char *copy;
 	const Char *p;
 
 	if ((pglob->gl_flags & GLOB_LIMIT) &&
 	    pglob->gl_matchc > limit->l_path_lim) {
-		errno = 0;
+		errno = E2BIG;
 		return (GLOB_NOSPACE);
 	}
 
 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
 	/* realloc(NULL, newsize) is equivalent to malloc(newsize). */
 	pathv = realloc((void *)pglob->gl_pathv, newsize);
 	if (pathv == NULL)
 		return (GLOB_NOSPACE);
 
 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
 		/* first time around -- clear initial gl_offs items */
 		pathv += pglob->gl_offs;
 		for (i = pglob->gl_offs + 1; --i > 0; )
 			*--pathv = NULL;
 	}
 	pglob->gl_pathv = pathv;
 
 	if (origpat != NULL)
 		copy = strdup(origpat);
 	else {
 		for (p = path; *p++ != EOS;)
 			continue;
 		len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
 		if ((copy = malloc(len)) != NULL) {
 			if (g_Ctoc(path, copy, len)) {
 				free(copy);
-				errno = 0;
+				errno = E2BIG;
 				return (GLOB_NOSPACE);
 			}
 		}
 	}
 	if (copy != NULL) {
 		limit->l_string_cnt += strlen(copy) + 1;
 		if ((pglob->gl_flags & GLOB_LIMIT) &&
 		    limit->l_string_cnt >= GLOB_LIMIT_STRING) {
 			free(copy);
-			errno = 0;
+			errno = E2BIG;
 			return (GLOB_NOSPACE);
 		}
 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
 	}
 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
 	return (copy == NULL ? GLOB_NOSPACE : 0);
 }
 
 /*
  * pattern matching function for filenames.  Each occurrence of the *
  * pattern causes a recursion level.
  */
 static int
 match(Char *name, Char *pat, Char *patend)
 {
 	int ok, negate_range;
 	Char c, k;
 	struct xlocale_collate *table =
 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
 
 	while (pat < patend) {
 		c = *pat++;
 		switch (c & M_MASK) {
 		case M_ALL:
 			if (pat == patend)
 				return (1);
 			do
 			    if (match(name, pat, patend))
 				    return (1);
 			while (*name++ != EOS);
 			return (0);
 		case M_ONE:
 			if (*name++ == EOS)
 				return (0);
 			break;
 		case M_SET:
 			ok = 0;
 			if ((k = *name++) == EOS)
 				return (0);
 			if ((negate_range = ((*pat & M_MASK) == M_NOT)) != 0)
 				++pat;
 			while (((c = *pat++) & M_MASK) != M_END)
 				if ((*pat & M_MASK) == M_RNG) {
 					if (table->__collate_load_error ?
 					    CHAR(c) <= CHAR(k) &&
 					    CHAR(k) <= CHAR(pat[1]) :
 					    __wcollate_range_cmp(CHAR(c),
 					    CHAR(k)) <= 0 &&
 					    __wcollate_range_cmp(CHAR(k),
 					    CHAR(pat[1])) <= 0)
 						ok = 1;
 					pat += 2;
 				} else if (c == k)
 					ok = 1;
 			if (ok == negate_range)
 				return (0);
 			break;
 		default:
 			if (*name++ != c)
 				return (0);
 			break;
 		}
 	}
 	return (*name == EOS);
 }
 
 /* Free allocated data belonging to a glob_t structure. */
 void
 globfree(glob_t *pglob)
 {
 	size_t i;
 	char **pp;
 
 	if (pglob->gl_pathv != NULL) {
 		pp = pglob->gl_pathv + pglob->gl_offs;
 		for (i = pglob->gl_pathc; i--; ++pp)
 			if (*pp)
 				free(*pp);
 		free(pglob->gl_pathv);
 		pglob->gl_pathv = NULL;
 	}
 }
 
 static DIR *
 g_opendir(Char *str, glob_t *pglob)
 {
 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
 
 	if (*str == EOS)
 		strcpy(buf, ".");
 	else {
 		if (g_Ctoc(str, buf, sizeof(buf))) {
 			errno = ENAMETOOLONG;
 			return (NULL);
 		}
 	}
 
 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
 		return ((*pglob->gl_opendir)(buf));
 
 	return (opendir(buf));
 }
 
 static int
 g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
 {
 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
 
 	if (g_Ctoc(fn, buf, sizeof(buf))) {
 		errno = ENAMETOOLONG;
 		return (-1);
 	}
 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
 		return((*pglob->gl_lstat)(buf, sb));
 	return (lstat(buf, sb));
 }
 
 static int
 g_stat(Char *fn, struct stat *sb, glob_t *pglob)
 {
 	char buf[MAXPATHLEN + MB_LEN_MAX - 1];
 
 	if (g_Ctoc(fn, buf, sizeof(buf))) {
 		errno = ENAMETOOLONG;
 		return (-1);
 	}
 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
 		return ((*pglob->gl_stat)(buf, sb));
 	return (stat(buf, sb));
 }
 
 static const Char *
 g_strchr(const Char *str, wchar_t ch)
 {
 
 	do {
 		if (*str == ch)
 			return (str);
 	} while (*str++);
 	return (NULL);
 }
 
 static int
 g_Ctoc(const Char *str, char *buf, size_t len)
 {
 	mbstate_t mbs;
 	size_t clen;
 
 	memset(&mbs, 0, sizeof(mbs));
 	while (len >= MB_CUR_MAX) {
 		clen = wcrtomb(buf, CHAR(*str), &mbs);
 		if (clen == (size_t)-1) {
 			/* XXX See initial comment #2. */
 			*buf = (char)CHAR(*str);
 			clen = 1;
 			memset(&mbs, 0, sizeof(mbs));
 		}
 		if (CHAR(*str) == EOS)
 			return (0);
 		str++;
 		buf += clen;
 		len -= clen;
 	}
 	return (1);
 }
 
 #ifdef DEBUG
 static void
 qprintf(const char *str, Char *s)
 {
 	Char *p;
 
 	(void)printf("%s\n", str);
 	if (s != NULL) {
 		for (p = s; *p != EOS; p++)
 			(void)printf("%c", (char)CHAR(*p));
 		(void)printf("\n");
 		for (p = s; *p != EOS; p++)
 			(void)printf("%c", (isprot(*p) ? '\\' : ' '));
 		(void)printf("\n");
 		for (p = s; *p != EOS; p++)
 			(void)printf("%c", (ismeta(*p) ? '_' : ' '));
 		(void)printf("\n");
 	}
 }
 #endif
Index: user/alc/PQ_LAUNDRY/lib/libc/stdio/dprintf.c
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libc/stdio/dprintf.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libc/stdio/dprintf.c	(revision 303642)
@@ -1,46 +1,45 @@
 /*-
  * Copyright (c) 2009 David Schultz <das@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#define	_WITH_DPRINTF
 #include "namespace.h"
 #include <stdarg.h>
 #include <stdio.h>
 #include "un-namespace.h"
 
 int
 dprintf(int fd, const char * __restrict fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	va_start(ap, fmt);
 	ret = vdprintf(fd, fmt, ap);
 	va_end(ap);
 	return (ret);
 }
Index: user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.3
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.3	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.3	(revision 303642)
@@ -1,166 +1,137 @@
 .\" Copyright (c) 2009 David Schultz <das@FreeBSD.org>
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 30, 2012
+.Dd July 30, 2016
 .Dt GETLINE 3
 .Os
 .Sh NAME
 .Nm getdelim ,
 .Nm getline
 .Nd get a line from a stream
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
-.Fd "#define _WITH_GETLINE"
 .In stdio.h
 .Ft ssize_t
 .Fn getdelim "char ** restrict linep" "size_t * restrict linecapp" "int delimiter" " FILE * restrict stream"
 .Ft ssize_t
 .Fn getline "char ** restrict linep" "size_t * restrict linecapp" " FILE * restrict stream"
 .Sh DESCRIPTION
 The
 .Fn getdelim
 function reads a line from
 .Fa stream ,
 delimited by the character
 .Fa delimiter .
 The
 .Fn getline
 function is equivalent to
 .Fn getdelim
 with the newline character as the delimiter.
 The delimiter character is included as part of the line, unless
 the end of the file is reached.
 .Pp
 The caller may provide a pointer to a malloced buffer for the line in
 .Fa *linep ,
 and the capacity of that buffer in
 .Fa *linecapp .
 These functions expand the buffer as needed, as if via
 .Fn realloc .
 If
 .Fa linep
 points to a
 .Dv NULL
 pointer, a new buffer will be allocated.
 In either case,
 .Fa *linep
 and
 .Fa *linecapp
 will be updated accordingly.
 .Sh RETURN VALUES
 The
 .Fn getdelim
 and
 .Fn getline
 functions return the number of characters stored in the buffer, excluding the
 terminating
 .Dv NUL
 character.
 The value \-1 is returned if an error occurs, or if end-of-file is reached.
 .Sh EXAMPLES
 The following code fragment reads lines from a file and
 writes them to standard output.
 The
 .Fn fwrite
 function is used in case the line contains embedded
 .Dv NUL
 characters.
 .Bd -literal -offset indent
 char *line = NULL;
 size_t linecap = 0;
 ssize_t linelen;
 while ((linelen = getline(&line, &linecap, fp)) > 0)
 	fwrite(line, linelen, 1, stdout);
 free(line);
 .Ed
-.Sh COMPATIBILITY
-Many application writers used the name
-.Va getline
-before the
-.Fn getline
-function was introduced in
-.St -p1003.1 ,
-so a prototype is not provided by default in order to avoid
-compatibility problems.
-Applications that wish to use the
-.Fn getline
-function described herein should either request a strict
-.St -p1003.1-2008
-environment by defining the macro
-.Dv _POSIX_C_SOURCE
-to the value 200809 or greater, or by defining the macro
-.Dv _WITH_GETLINE ,
-prior to the inclusion of
-.In stdio.h .
-For compatibility with GNU libc, defining either
-.Dv _BSD_SOURCE
-or
-.Dv _GNU_SOURCE
-prior to the inclusion of
-.In stdio.h
-will also make
-.Fn getline
-available.
 .Sh ERRORS
 These functions may fail if:
 .Bl -tag -width Er
 .It Bq Er EINVAL
 Either
 .Fa linep
 or
 .Fa linecapp
 is
 .Dv NULL .
 .It Bq Er EOVERFLOW
 No delimiter was found in the first
 .Dv SSIZE_MAX
 characters.
 .El
 .Pp
 These functions may also fail due to any of the errors specified for
 .Fn fgets
 and
 .Fn malloc .
 .Sh SEE ALSO
 .Xr fgetln 3 ,
 .Xr fgets 3 ,
 .Xr malloc 3
 .Sh STANDARDS
 The
 .Fn getdelim
 and
 .Fn getline
 functions conform to
 .St -p1003.1-2008 .
 .Sh HISTORY
 These routines first appeared in
 .Fx 8.0 .
 .Sh BUGS
 There are no wide character versions of
 .Fn getdelim
 or
 .Fn getline .
Index: user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.c
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libc/stdio/getline.c	(revision 303642)
@@ -1,39 +1,38 @@
 /*-
  * Copyright (c) 2009 David Schultz <das@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#define	_WITH_GETLINE
 #include <stdio.h>
 
 ssize_t
 getline(char ** __restrict linep, size_t * __restrict linecapp,
 	FILE * __restrict fp)
 {
 
 	return (getdelim(linep, linecapp, '\n', fp));
 }
Index: user/alc/PQ_LAUNDRY/lib/libc/stdio/printf.3
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libc/stdio/printf.3	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libc/stdio/printf.3	(revision 303642)
@@ -1,920 +1,891 @@
 .\" Copyright (c) 1990, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" This code is derived from software contributed to Berkeley by
 .\" Chris Torek and the American National Standards Committee X3,
 .\" on Information Processing Systems.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)printf.3	8.1 (Berkeley) 6/4/93
 .\" $FreeBSD$
 .\"
-.Dd December 2, 2009
+.Dd July 30, 2016
 .Dt PRINTF 3
 .Os
 .Sh NAME
 .Nm printf , fprintf , sprintf , snprintf , asprintf , dprintf ,
 .Nm vprintf , vfprintf, vsprintf , vsnprintf , vasprintf, vdprintf
 .Nd formatted output conversion
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
-.Fd "#define _WITH_DPRINTF"
 .In stdio.h
 .Ft int
 .Fn printf "const char * restrict format" ...
 .Ft int
 .Fn fprintf "FILE * restrict stream" "const char * restrict format" ...
 .Ft int
 .Fn sprintf "char * restrict str" "const char * restrict format" ...
 .Ft int
 .Fn snprintf "char * restrict str" "size_t size" "const char * restrict format" ...
 .Ft int
 .Fn asprintf "char **ret" "const char *format" ...
 .Ft int
 .Fn dprintf "int fd" "const char * restrict format" ...
 .In stdarg.h
 .Ft int
 .Fn vprintf "const char * restrict format" "va_list ap"
 .Ft int
 .Fn vfprintf "FILE * restrict stream" "const char * restrict format" "va_list ap"
 .Ft int
 .Fn vsprintf "char * restrict str" "const char * restrict format" "va_list ap"
 .Ft int
 .Fn vsnprintf "char * restrict str" "size_t size" "const char * restrict format" "va_list ap"
 .Ft int
 .Fn vasprintf "char **ret" "const char *format" "va_list ap"
 .Ft int
 .Fn vdprintf "int fd" "const char * restrict format" "va_list ap"
 .Sh DESCRIPTION
 The
 .Fn printf
 family of functions produces output according to a
 .Fa format
 as described below.
 The
 .Fn printf
 and
 .Fn vprintf
 functions
 write output to
 .Dv stdout ,
 the standard output stream;
 .Fn fprintf
 and
 .Fn vfprintf
 write output to the given output
 .Fa stream ;
 .Fn dprintf
 and
 .Fn vdprintf
 write output to the given file descriptor;
 .Fn sprintf ,
 .Fn snprintf ,
 .Fn vsprintf ,
 and
 .Fn vsnprintf
 write to the character string
 .Fa str ;
 and
 .Fn asprintf
 and
 .Fn vasprintf
 dynamically allocate a new string with
 .Xr malloc 3 .
 .Pp
 These functions write the output under the control of a
 .Fa format
 string that specifies how subsequent arguments
 (or arguments accessed via the variable-length argument facilities of
 .Xr stdarg 3 )
 are converted for output.
 .Pp
 The
 .Fn asprintf
 and
 .Fn vasprintf
 functions
 set
 .Fa *ret
 to be a pointer to a buffer sufficiently large to hold the formatted string.
 This pointer should be passed to
 .Xr free 3
 to release the allocated storage when it is no longer needed.
 If sufficient space cannot be allocated,
 .Fn asprintf
 and
 .Fn vasprintf
 will return \-1 and set
 .Fa ret
 to be a
 .Dv NULL
 pointer.
 .Pp
 The
 .Fn snprintf
 and
 .Fn vsnprintf
 functions
 will write at most
 .Fa size Ns \-1
 of the characters printed into the output string
 (the
 .Fa size Ns 'th
 character then gets the terminating
 .Ql \e0 ) ;
 if the return value is greater than or equal to the
 .Fa size
 argument, the string was too short
 and some of the printed characters were discarded.
 The output is always null-terminated, unless
 .Fa size
 is 0.
 .Pp
 The
 .Fn sprintf
 and
 .Fn vsprintf
 functions
 effectively assume a
 .Fa size
 of
 .Dv INT_MAX + 1.
 .Pp
 The format string is composed of zero or more directives:
 ordinary
 .\" multibyte
 characters (not
 .Cm % ) ,
 which are copied unchanged to the output stream;
 and conversion specifications, each of which results
 in fetching zero or more subsequent arguments.
 Each conversion specification is introduced by
 the
 .Cm %
 character.
 The arguments must correspond properly (after type promotion)
 with the conversion specifier.
 After the
 .Cm % ,
 the following appear in sequence:
 .Bl -bullet
 .It
 An optional field, consisting of a decimal digit string followed by a
 .Cm $ ,
 specifying the next argument to access.
 If this field is not provided, the argument following the last
 argument accessed will be used.
 Arguments are numbered starting at
 .Cm 1 .
 If unaccessed arguments in the format string are interspersed with ones that
 are accessed the results will be indeterminate.
 .It
 Zero or more of the following flags:
 .Bl -tag -width ".So \  Sc (space)"
 .It Sq Cm #
 The value should be converted to an
 .Dq alternate form .
 For
 .Cm c , d , i , n , p , s ,
 and
 .Cm u
 conversions, this option has no effect.
 For
 .Cm o
 conversions, the precision of the number is increased to force the first
 character of the output string to a zero.
 For
 .Cm x
 and
 .Cm X
 conversions, a non-zero result has the string
 .Ql 0x
 (or
 .Ql 0X
 for
 .Cm X
 conversions) prepended to it.
 For
 .Cm a , A , e , E , f , F , g ,
 and
 .Cm G
 conversions, the result will always contain a decimal point, even if no
 digits follow it (normally, a decimal point appears in the results of
 those conversions only if a digit follows).
 For
 .Cm g
 and
 .Cm G
 conversions, trailing zeros are not removed from the result as they
 would otherwise be.
 .It So Cm 0 Sc (zero)
 Zero padding.
 For all conversions except
 .Cm n ,
 the converted value is padded on the left with zeros rather than blanks.
 If a precision is given with a numeric conversion
 .Cm ( d , i , o , u , i , x ,
 and
 .Cm X ) ,
 the
 .Cm 0
 flag is ignored.
 .It Sq Cm \-
 A negative field width flag;
 the converted value is to be left adjusted on the field boundary.
 Except for
 .Cm n
 conversions, the converted value is padded on the right with blanks,
 rather than on the left with blanks or zeros.
 A
 .Cm \-
 overrides a
 .Cm 0
 if both are given.
 .It So "\ " Sc (space)
 A blank should be left before a positive number
 produced by a signed conversion
 .Cm ( a , A , d , e , E , f , F , g , G ,
 or
 .Cm i ) .
 .It Sq Cm +
 A sign must always be placed before a
 number produced by a signed conversion.
 A
 .Cm +
 overrides a space if both are used.
 .It So "'" Sc (apostrophe)
 Decimal conversions
 .Cm ( d , u ,
 or
 .Cm i )
 or the integral portion of a floating point conversion
 .Cm ( f
 or
 .Cm F )
 should be grouped and separated by thousands using
 the non-monetary separator returned by
 .Xr localeconv 3 .
 .El
 .It
 An optional decimal digit string specifying a minimum field width.
 If the converted value has fewer characters than the field width, it will
 be padded with spaces on the left (or right, if the left-adjustment
 flag has been given) to fill out
 the field width.
 .It
 An optional precision, in the form of a period
 .Cm \&.
 followed by an
 optional digit string.
 If the digit string is omitted, the precision is taken as zero.
 This gives the minimum number of digits to appear for
 .Cm d , i , o , u , x ,
 and
 .Cm X
 conversions, the number of digits to appear after the decimal-point for
 .Cm a , A , e , E , f ,
 and
 .Cm F
 conversions, the maximum number of significant digits for
 .Cm g
 and
 .Cm G
 conversions, or the maximum number of characters to be printed from a
 string for
 .Cm s
 conversions.
 .It
 An optional length modifier, that specifies the size of the argument.
 The following length modifiers are valid for the
 .Cm d , i , n , o , u , x ,
 or
 .Cm X
 conversion:
 .Bl -column ".Cm q Em (deprecated)" ".Vt signed char" ".Vt unsigned long long" ".Vt long long *"
 .It Sy Modifier Ta Cm d , i Ta Cm o , u , x , X Ta Cm n
 .It Cm hh Ta Vt "signed char" Ta Vt "unsigned char" Ta Vt "signed char *"
 .It Cm h Ta Vt short Ta Vt "unsigned short" Ta Vt "short *"
 .It Cm l No (ell) Ta Vt long Ta Vt "unsigned long" Ta Vt "long *"
 .It Cm ll No (ell ell) Ta Vt "long long" Ta Vt "unsigned long long" Ta Vt "long long *"
 .It Cm j Ta Vt intmax_t Ta Vt uintmax_t Ta Vt "intmax_t *"
 .It Cm t Ta Vt ptrdiff_t Ta (see note) Ta Vt "ptrdiff_t *"
 .It Cm z Ta (see note) Ta Vt size_t Ta (see note)
 .It Cm q Em (deprecated) Ta Vt quad_t Ta Vt u_quad_t Ta Vt "quad_t *"
 .El
 .Pp
 Note:
 the
 .Cm t
 modifier, when applied to a
 .Cm o , u , x ,
 or
 .Cm X
 conversion, indicates that the argument is of an unsigned type
 equivalent in size to a
 .Vt ptrdiff_t .
 The
 .Cm z
 modifier, when applied to a
 .Cm d
 or
 .Cm i
 conversion, indicates that the argument is of a signed type equivalent in
 size to a
 .Vt size_t .
 Similarly, when applied to an
 .Cm n
 conversion, it indicates that the argument is a pointer to a signed type
 equivalent in size to a
 .Vt size_t .
 .Pp
 The following length modifier is valid for the
 .Cm a , A , e , E , f , F , g ,
 or
 .Cm G
 conversion:
 .Bl -column ".Sy Modifier" ".Cm a , A , e , E , f , F , g , G"
 .It Sy Modifier Ta Cm a , A , e , E , f , F , g , G
 .It Cm l No (ell) Ta Vt double
 (ignored, same behavior as without it)
 .It Cm L Ta Vt "long double"
 .El
 .Pp
 The following length modifier is valid for the
 .Cm c
 or
 .Cm s
 conversion:
 .Bl -column ".Sy Modifier" ".Vt wint_t" ".Vt wchar_t *"
 .It Sy Modifier Ta Cm c Ta Cm s
 .It Cm l No (ell) Ta Vt wint_t Ta Vt "wchar_t *"
 .El
 .It
 A character that specifies the type of conversion to be applied.
 .El
 .Pp
 A field width or precision, or both, may be indicated by
 an asterisk
 .Ql *
 or an asterisk followed by one or more decimal digits and a
 .Ql $
 instead of a
 digit string.
 In this case, an
 .Vt int
 argument supplies the field width or precision.
 A negative field width is treated as a left adjustment flag followed by a
 positive field width; a negative precision is treated as though it were
 missing.
 If a single format directive mixes positional
 .Pq Li nn$
 and non-positional arguments, the results are undefined.
 .Pp
 The conversion specifiers and their meanings are:
 .Bl -tag -width ".Cm diouxX"
 .It Cm diouxX
 The
 .Vt int
 (or appropriate variant) argument is converted to signed decimal
 .Cm ( d
 and
 .Cm i ) ,
 unsigned octal
 .Pq Cm o ,
 unsigned decimal
 .Pq Cm u ,
 or unsigned hexadecimal
 .Cm ( x
 and
 .Cm X )
 notation.
 The letters
 .Dq Li abcdef
 are used for
 .Cm x
 conversions; the letters
 .Dq Li ABCDEF
 are used for
 .Cm X
 conversions.
 The precision, if any, gives the minimum number of digits that must
 appear; if the converted value requires fewer digits, it is padded on
 the left with zeros.
 .It Cm DOU
 The
 .Vt "long int"
 argument is converted to signed decimal, unsigned octal, or unsigned
 decimal, as if the format had been
 .Cm ld , lo ,
 or
 .Cm lu
 respectively.
 These conversion characters are deprecated, and will eventually disappear.
 .It Cm eE
 The
 .Vt double
 argument is rounded and converted in the style
 .Sm off
 .Oo \- Oc Ar d Li \&. Ar ddd Li e \(+- Ar dd
 .Sm on
 where there is one digit before the
 decimal-point character
 and the number of digits after it is equal to the precision;
 if the precision is missing,
 it is taken as 6; if the precision is
 zero, no decimal-point character appears.
 An
 .Cm E
 conversion uses the letter
 .Ql E
 (rather than
 .Ql e )
 to introduce the exponent.
 The exponent always contains at least two digits; if the value is zero,
 the exponent is 00.
 .Pp
 For
 .Cm a , A , e , E , f , F , g ,
 and
 .Cm G
 conversions, positive and negative infinity are represented as
 .Li inf
 and
 .Li -inf
 respectively when using the lowercase conversion character, and
 .Li INF
 and
 .Li -INF
 respectively when using the uppercase conversion character.
 Similarly, NaN is represented as
 .Li nan
 when using the lowercase conversion, and
 .Li NAN
 when using the uppercase conversion.
 .It Cm fF
 The
 .Vt double
 argument is rounded and converted to decimal notation in the style
 .Sm off
 .Oo \- Oc Ar ddd Li \&. Ar ddd ,
 .Sm on
 where the number of digits after the decimal-point character
 is equal to the precision specification.
 If the precision is missing, it is taken as 6; if the precision is
 explicitly zero, no decimal-point character appears.
 If a decimal point appears, at least one digit appears before it.
 .It Cm gG
 The
 .Vt double
 argument is converted in style
 .Cm f
 or
 .Cm e
 (or
 .Cm F
 or
 .Cm E
 for
 .Cm G
 conversions).
 The precision specifies the number of significant digits.
 If the precision is missing, 6 digits are given; if the precision is zero,
 it is treated as 1.
 Style
 .Cm e
 is used if the exponent from its conversion is less than \-4 or greater than
 or equal to the precision.
 Trailing zeros are removed from the fractional part of the result; a
 decimal point appears only if it is followed by at least one digit.
 .It Cm aA
 The
 .Vt double
 argument is rounded and converted to hexadecimal notation in the style
 .Sm off
 .Oo \- Oc Li 0x Ar h Li \&. Ar hhhp Oo \(+- Oc Ar d ,
 .Sm on
 where the number of digits after the hexadecimal-point character
 is equal to the precision specification.
 If the precision is missing, it is taken as enough to represent
 the floating-point number exactly, and no rounding occurs.
 If the precision is zero, no hexadecimal-point character appears.
 The
 .Cm p
 is a literal character
 .Ql p ,
 and the exponent consists of a positive or negative sign
 followed by a decimal number representing an exponent of 2.
 The
 .Cm A
 conversion uses the prefix
 .Dq Li 0X
 (rather than
 .Dq Li 0x ) ,
 the letters
 .Dq Li ABCDEF
 (rather than
 .Dq Li abcdef )
 to represent the hex digits, and the letter
 .Ql P
 (rather than
 .Ql p )
 to separate the mantissa and exponent.
 .Pp
 Note that there may be multiple valid ways to represent floating-point
 numbers in this hexadecimal format.
 For example,
 .Li 0x1.92p+1 , 0x3.24p+0 , 0x6.48p-1 ,
 and
 .Li 0xc.9p-2
 are all equivalent.
 .Fx 8.0
 and later always prints finite non-zero numbers using
 .Ql 1
 as the digit before the hexadecimal point.
 Zeroes are always represented with a mantissa of 0 (preceded by a
 .Ql -
 if appropriate) and an exponent of
 .Li +0 .
 .It Cm C
 Treated as
 .Cm c
 with the
 .Cm l
 (ell) modifier.
 .It Cm c
 The
 .Vt int
 argument is converted to an
 .Vt "unsigned char" ,
 and the resulting character is written.
 .Pp
 If the
 .Cm l
 (ell) modifier is used, the
 .Vt wint_t
 argument shall be converted to a
 .Vt wchar_t ,
 and the (potentially multi-byte) sequence representing the
 single wide character is written, including any shift sequences.
 If a shift sequence is used, the shift state is also restored
 to the original state after the character.
 .It Cm S
 Treated as
 .Cm s
 with the
 .Cm l
 (ell) modifier.
 .It Cm s
 The
 .Vt "char *"
 argument is expected to be a pointer to an array of character type (pointer
 to a string).
 Characters from the array are written up to (but not including)
 a terminating
 .Dv NUL
 character;
 if a precision is specified, no more than the number specified are
 written.
 If a precision is given, no null character
 need be present; if the precision is not specified, or is greater than
 the size of the array, the array must contain a terminating
 .Dv NUL
 character.
 .Pp
 If the
 .Cm l
 (ell) modifier is used, the
 .Vt "wchar_t *"
 argument is expected to be a pointer to an array of wide characters
 (pointer to a wide string).
 For each wide character in the string, the (potentially multi-byte)
 sequence representing the
 wide character is written, including any shift sequences.
 If any shift sequence is used, the shift state is also restored
 to the original state after the string.
 Wide characters from the array are written up to (but not including)
 a terminating wide
 .Dv NUL
 character;
 if a precision is specified, no more than the number of bytes specified are
 written (including shift sequences).
 Partial characters are never written.
 If a precision is given, no null character
 need be present; if the precision is not specified, or is greater than
 the number of bytes required to render the multibyte representation of
 the string, the array must contain a terminating wide
 .Dv NUL
 character.
 .It Cm p
 The
 .Vt "void *"
 pointer argument is printed in hexadecimal (as if by
 .Ql %#x
 or
 .Ql %#lx ) .
 .It Cm n
 The number of characters written so far is stored into the
 integer indicated by the
 .Vt "int *"
 (or variant) pointer argument.
 No argument is converted.
 .It Cm %
 A
 .Ql %
 is written.
 No argument is converted.
 The complete conversion specification
 is
 .Ql %% .
 .El
 .Pp
 The decimal point
 character is defined in the program's locale (category
 .Dv LC_NUMERIC ) .
 .Pp
 In no case does a non-existent or small field width cause truncation of
 a numeric field; if the result of a conversion is wider than the field
 width, the
 field is expanded to contain the conversion result.
 .Sh RETURN VALUES
 These functions return the number of characters printed
 (not including the trailing
 .Ql \e0
 used to end output to strings),
 except for
 .Fn snprintf
 and
 .Fn vsnprintf ,
 which return the number of characters that would have been printed if the
 .Fa size
 were unlimited
 (again, not including the final
 .Ql \e0 ) .
 These functions return a negative value if an error occurs.
 .Sh EXAMPLES
 To print a date and time in the form
 .Dq Li "Sunday, July 3, 10:02" ,
 where
 .Fa weekday
 and
 .Fa month
 are pointers to strings:
 .Bd -literal -offset indent
 #include <stdio.h>
 fprintf(stdout, "%s, %s %d, %.2d:%.2d\en",
 	weekday, month, day, hour, min);
 .Ed
 .Pp
 To print \*(Pi
 to five decimal places:
 .Bd -literal -offset indent
 #include <math.h>
 #include <stdio.h>
 fprintf(stdout, "pi = %.5f\en", 4 * atan(1.0));
 .Ed
 .Pp
 To allocate a 128 byte string and print into it:
 .Bd -literal -offset indent
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
 char *newfmt(const char *fmt, ...)
 {
 	char *p;
 	va_list ap;
 	if ((p = malloc(128)) == NULL)
 		return (NULL);
 	va_start(ap, fmt);
 	(void) vsnprintf(p, 128, fmt, ap);
 	va_end(ap);
 	return (p);
 }
 .Ed
 .Sh COMPATIBILITY
-Many application writers used the name
-.Va dprintf
-before the
-.Fn dprintf
-function was introduced in
-.St -p1003.1 ,
-so a prototype is not provided by default in order to avoid
-compatibility problems.
-Applications that wish to use the
-.Fn dprintf
-function described herein should either request a strict
-.St -p1003.1-2008
-environment by defining the macro
-.Dv _POSIX_C_SOURCE
-to the value 200809 or greater, or by defining the macro
-.Dv _WITH_DPRINTF ,
-prior to the inclusion of
-.In stdio.h .
-For compatibility with GNU libc, defining either
-.Dv _BSD_SOURCE
-or
-.Dv _GNU_SOURCE
-prior to the inclusion of
-.In stdio.h
-will also make
-.Fn dprintf
-available.
-.Pp
 The conversion formats
 .Cm \&%D , \&%O ,
 and
 .Cm \&%U
 are not standard and
 are provided only for backward compatibility.
 The effect of padding the
 .Cm %p
 format with zeros (either by the
 .Cm 0
 flag or by specifying a precision), and the benign effect (i.e., none)
 of the
 .Cm #
 flag on
 .Cm %n
 and
 .Cm %p
 conversions, as well as other
 nonsensical combinations such as
 .Cm %Ld ,
 are not standard; such combinations
 should be avoided.
 .Sh ERRORS
 In addition to the errors documented for the
 .Xr write 2
 system call, the
 .Fn printf
 family of functions may fail if:
 .Bl -tag -width Er
 .It Bq Er EILSEQ
 An invalid wide character code was encountered.
 .It Bq Er ENOMEM
 Insufficient storage space is available.
 .It Bq Er EOVERFLOW
 The
 .Fa size
 argument exceeds
 .Dv INT_MAX + 1 ,
 or the return value would be too large to be represented by an
 .Vt int .
 .El
 .Sh SEE ALSO
 .Xr printf 1 ,
 .Xr fmtcheck 3 ,
 .Xr scanf 3 ,
 .Xr setlocale 3 ,
 .Xr wprintf 3
 .Sh STANDARDS
 Subject to the caveats noted in the
 .Sx BUGS
 section below, the
 .Fn fprintf ,
 .Fn printf ,
 .Fn sprintf ,
 .Fn vprintf ,
 .Fn vfprintf ,
 and
 .Fn vsprintf
 functions
 conform to
 .St -ansiC
 and
 .St -isoC-99 .
 With the same reservation, the
 .Fn snprintf
 and
 .Fn vsnprintf
 functions conform to
 .St -isoC-99 ,
 while
 .Fn dprintf
 and
 .Fn vdprintf
 conform to
 .St -p1003.1-2008 .
 .Sh HISTORY
 The functions
 .Fn asprintf
 and
 .Fn vasprintf
 first appeared in the
 .Tn GNU C
 library.
 These were implemented by
 .An Peter Wemm Aq Mt peter@FreeBSD.org
 in
 .Fx 2.2 ,
 but were later replaced with a different implementation
 from
 .Ox 2.3
 by
 .An Todd C. Miller Aq Mt Todd.Miller@courtesan.com .
 The
 .Fn dprintf
 and
 .Fn vdprintf
 functions were added in
 .Fx 8.0 .
 .Sh BUGS
 The
 .Nm
 family of functions do not correctly handle multibyte characters in the
 .Fa format
 argument.
 .Sh SECURITY CONSIDERATIONS
 The
 .Fn sprintf
 and
 .Fn vsprintf
 functions are easily misused in a manner which enables malicious users
 to arbitrarily change a running program's functionality through
 a buffer overflow attack.
 Because
 .Fn sprintf
 and
 .Fn vsprintf
 assume an infinitely long string,
 callers must be careful not to overflow the actual space;
 this is often hard to assure.
 For safety, programmers should use the
 .Fn snprintf
 interface instead.
 For example:
 .Bd -literal
 void
 foo(const char *arbitrary_string, const char *and_another)
 {
 	char onstack[8];
 
 #ifdef BAD
 	/*
 	 * This first sprintf is bad behavior.  Do not use sprintf!
 	 */
 	sprintf(onstack, "%s, %s", arbitrary_string, and_another);
 #else
 	/*
 	 * The following two lines demonstrate better use of
 	 * snprintf().
 	 */
 	snprintf(onstack, sizeof(onstack), "%s, %s", arbitrary_string,
 	    and_another);
 #endif
 }
 .Ed
 .Pp
 The
 .Fn printf
 and
 .Fn sprintf
 family of functions are also easily misused in a manner
 allowing malicious users to arbitrarily change a running program's
 functionality by either causing the program
 to print potentially sensitive data
 .Dq "left on the stack" ,
 or causing it to generate a memory fault or bus error
 by dereferencing an invalid pointer.
 .Pp
 .Cm %n
 can be used to write arbitrary data to potentially carefully-selected
 addresses.
 Programmers are therefore strongly advised to never pass untrusted strings
 as the
 .Fa format
 argument, as an attacker can put format specifiers in the string
 to mangle your stack,
 leading to a possible security hole.
 This holds true even if the string was built using a function like
 .Fn snprintf ,
 as the resulting string may still contain user-supplied conversion specifiers
 for later interpolation by
 .Fn printf .
 .Pp
 Always use the proper secure idiom:
 .Pp
 .Dl "snprintf(buffer, sizeof(buffer), \*q%s\*q, string);"
Index: user/alc/PQ_LAUNDRY/lib/libc/tests/stdio/getdelim_test.c
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libc/tests/stdio/getdelim_test.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libc/tests/stdio/getdelim_test.c	(revision 303642)
@@ -1,236 +1,235 @@
 /*-
  * Copyright (c) 2009 David Schultz <das@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#define	_WITH_GETLINE
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include <atf-c.h>
 
 #define	CHUNK_MAX	10
 
 /* The assertions depend on this string. */
 char apothegm[] = "All work and no play\0 makes Jack a dull boy.\n";
 
 /*
  * This is a neurotic reader function designed to give getdelim() a
  * hard time. It reads through the string `apothegm' and returns a
  * random number of bytes up to the requested length.
  */
 static int
 _reader(void *cookie, char *buf, int len)
 {
 	size_t *offp = cookie;
 	size_t r;
 
 	r = random() % CHUNK_MAX + 1;
 	if (len > r)
 		len = r;
 	if (len > sizeof(apothegm) - *offp)
 		len = sizeof(apothegm) - *offp;
 	memcpy(buf, apothegm + *offp, len);
 	*offp += len;
 	return (len);
 }
 
 static FILE *
 mkfilebuf(void)
 {
 	size_t *offp;
 
 	offp = malloc(sizeof(*offp));	/* XXX leak */
 	*offp = 0;
 	return (fropen(offp, _reader));
 }
 
 ATF_TC_WITHOUT_HEAD(getline_basic);
 ATF_TC_BODY(getline_basic, tc)
 {
 	FILE *fp;
 	char *line;
 	size_t linecap;
 	int i;
 
 	srandom(0);
 
 	/*
 	 * Test multiple times with different buffer sizes
 	 * and different _reader() return values.
 	 */
 	errno = 0;
 	for (i = 0; i < 8; i++) {
 		fp = mkfilebuf();
 		linecap = i;
 		line = malloc(i);
 		/* First line: the full apothegm */
 		ATF_REQUIRE(getline(&line, &linecap, fp) == sizeof(apothegm) - 1);
 		ATF_REQUIRE(memcmp(line, apothegm, sizeof(apothegm)) == 0);
 		ATF_REQUIRE(linecap >= sizeof(apothegm));
 		/* Second line: the NUL terminator following the newline */
 		ATF_REQUIRE(getline(&line, &linecap, fp) == 1);
 		ATF_REQUIRE(line[0] == '\0' && line[1] == '\0');
 		/* Third line: EOF */
 		line[0] = 'X';
 		ATF_REQUIRE(getline(&line, &linecap, fp) == -1);
 		ATF_REQUIRE(line[0] == '\0');
 		free(line);
 		line = NULL;
 		ATF_REQUIRE(feof(fp));
 		ATF_REQUIRE(!ferror(fp));
 		fclose(fp);
 	}
 	ATF_REQUIRE(errno == 0);
 }
 
 ATF_TC_WITHOUT_HEAD(stream_error);
 ATF_TC_BODY(stream_error, tc)
 {
 	char *line;
 	size_t linecap;
 
 	/* Make sure read errors are handled properly. */
 	line = NULL;
 	linecap = 0;
 	errno = 0;
 	ATF_REQUIRE(getline(&line, &linecap, stdout) == -1);
 	ATF_REQUIRE(errno == EBADF);
 	errno = 0;
 	ATF_REQUIRE(getdelim(&line, &linecap, 'X', stdout) == -1);
 	ATF_REQUIRE(errno == EBADF);
 	ATF_REQUIRE(ferror(stdout));
 }
 
 ATF_TC_WITHOUT_HEAD(invalid_params);
 ATF_TC_BODY(invalid_params, tc)
 {
 	FILE *fp;
 	char *line;
 	size_t linecap;
 
 	/* Make sure NULL linep or linecapp pointers are handled. */
 	fp = mkfilebuf();
 	ATF_REQUIRE(getline(NULL, &linecap, fp) == -1);
 	ATF_REQUIRE(errno == EINVAL);
 	ATF_REQUIRE(getline(&line, NULL, fp) == -1);
 	ATF_REQUIRE(errno == EINVAL);
 	ATF_REQUIRE(ferror(fp));
 	fclose(fp);
 }
 
 ATF_TC_WITHOUT_HEAD(eof);
 ATF_TC_BODY(eof, tc)
 {
 	FILE *fp;
 	char *line;
 	size_t linecap;
 
 	/* Make sure getline() allocates memory as needed if fp is at EOF. */
 	errno = 0;
 	fp = mkfilebuf();
 	while (!feof(fp))	/* advance to EOF; can't fseek this stream */
 		getc(fp);
 	line = NULL;
 	linecap = 0;
 	printf("getline\n");
 	ATF_REQUIRE(getline(&line, &linecap, fp) == -1);
 	ATF_REQUIRE(line[0] == '\0');
 	ATF_REQUIRE(linecap > 0);
 	ATF_REQUIRE(errno == 0);
 	printf("feof\n");
 	ATF_REQUIRE(feof(fp));
 	ATF_REQUIRE(!ferror(fp));
 	fclose(fp);
 }
 
 ATF_TC_WITHOUT_HEAD(nul);
 ATF_TC_BODY(nul, tc)
 {
 	FILE *fp;
 	char *line;
 	size_t linecap, n;
 
 	errno = 0;
 	line = NULL;
 	linecap = 0;
 	/* Make sure a NUL delimiter works. */
 	fp = mkfilebuf();
 	n = strlen(apothegm);
 	printf("getdelim\n");
 	ATF_REQUIRE(getdelim(&line, &linecap, '\0', fp) == n + 1);
 	ATF_REQUIRE(strcmp(line, apothegm) == 0);
 	ATF_REQUIRE(line[n + 1] == '\0');
 	ATF_REQUIRE(linecap > n + 1);
 	n = strlen(apothegm + n + 1);
 	printf("getdelim 2\n");
 	ATF_REQUIRE(getdelim(&line, &linecap, '\0', fp) == n + 1);
 	ATF_REQUIRE(line[n + 1] == '\0');
 	ATF_REQUIRE(linecap > n + 1);
 	ATF_REQUIRE(errno == 0);
 	ATF_REQUIRE(!ferror(fp));
 	fclose(fp);
 }
 
 ATF_TC_WITHOUT_HEAD(empty_NULL_buffer);
 ATF_TC_BODY(empty_NULL_buffer, tc)
 {
 	FILE *fp;
 	char *line;
 	size_t linecap;
 
 	/* Make sure NULL *linep and zero *linecapp are handled. */
 	fp = mkfilebuf();
 	line = NULL;
 	linecap = 42;
 	ATF_REQUIRE(getline(&line, &linecap, fp) == sizeof(apothegm) - 1);
 	ATF_REQUIRE(memcmp(line, apothegm, sizeof(apothegm)) == 0);
 	fp = mkfilebuf();
 	free(line);
 	line = malloc(100);
 	linecap = 0;
 	ATF_REQUIRE(getline(&line, &linecap, fp) == sizeof(apothegm) - 1);
 	ATF_REQUIRE(memcmp(line, apothegm, sizeof(apothegm)) == 0);
 	free(line);
 	ATF_REQUIRE(!ferror(fp));
 	fclose(fp);
 }
 
 ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, getline_basic);
 	ATF_TP_ADD_TC(tp, stream_error);
 	ATF_TP_ADD_TC(tp, eof);
 	ATF_TP_ADD_TC(tp, invalid_params);
 	ATF_TP_ADD_TC(tp, nul);
 	ATF_TP_ADD_TC(tp, empty_NULL_buffer);
 
 	return (atf_no_error());
 }
Index: user/alc/PQ_LAUNDRY/lib/libproc/Makefile
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libproc/Makefile	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libproc/Makefile	(revision 303642)
@@ -1,47 +1,47 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 PACKAGE=lib${LIB}
 LIB=	proc
 
 SRCS=	proc_bkpt.c		\
 	proc_create.c		\
 	proc_regs.c		\
 	proc_sym.c		\
 	proc_rtld.c		\
 	proc_util.c
 
 INCS=	libproc.h
 
 CFLAGS+=	-I${.CURDIR}
 
 .if ${MK_CXX} == "no"
 CFLAGS+=	-DNO_CXA_DEMANGLE
 .elif ${MK_LIBCPLUSPLUS} != "no"
 LIBADD+=		cxxrt
 .else
 LIBADD+=	supcplusplus
 .endif
 
-LIBADD+=	elf rtld_db util
+LIBADD+=	elf procstat rtld_db util
 
 .if ${MK_CDDL} != "no"
 LIBADD+=	ctf
 IGNORE_PRAGMA=	YES
 CFLAGS+=	-I${.CURDIR}/../../cddl/contrib/opensolaris/lib/libctf/common \
 		-I${.CURDIR}/../../sys/cddl/contrib/opensolaris/uts/common \
 		-I${.CURDIR}/../../sys/cddl/compat/opensolaris
 .else
 CFLAGS+=	-DNO_CTF
 .endif
 
 SHLIB_MAJOR=	3
 
 MAN=
 
 .if ${MK_TESTS} != "no"
 SUBDIR+=	tests
 .endif
 
 .include <bsd.lib.mk>
Index: user/alc/PQ_LAUNDRY/lib/libproc/_libproc.h
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libproc/_libproc.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libproc/_libproc.h	(revision 303642)
@@ -1,59 +1,60 @@
 /*-
  * Copyright (c) 2008 John Birrell (jb@freebsd.org)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
-#include <sys/cdefs.h>
-#include <sys/param.h>
 #include <sys/types.h>
-#include <sys/event.h>
 #include <sys/ptrace.h>
+
 #include <rtld_db.h>
 
 #include "libproc.h"
 
+struct procstat;
+
 struct proc_handle {
 	pid_t	pid;			/* Process ID. */
-	int	kq;			/* Kernel event queue ID. */
 	int	flags;			/* Process flags. */
 	int	status;			/* Process status (PS_*). */
 	int	wstat;			/* Process wait status. */
+	int	model;			/* Process data model. */
 	rd_agent_t *rdap;		/* librtld_db agent */
-	rd_loadobj_t *rdobjs;
-	size_t	rdobjsz;
-	size_t	nobjs;
-	struct lwpstatus lwps;
-	rd_loadobj_t *rdexec;		/* rdobj index of program executable. */
-	char	execname[MAXPATHLEN];	/* Path to program executable. */
+	rd_loadobj_t *rdobjs;		/* Array of loaded objects. */
+	size_t	rdobjsz;		/* Array size. */
+	size_t	nobjs;			/* Num. objects currently loaded. */
+	rd_loadobj_t *rdexec;		/* rdobj for program executable. */
+	struct lwpstatus lwps;		/* Process status. */
+	struct procstat *procstat;	/* libprocstat handle. */
+	char	execpath[MAXPATHLEN];	/* Path to program executable. */
 };
 
 #ifdef DEBUG
 #define	DPRINTF(...) 	warn(__VA_ARGS__)
 #define	DPRINTFX(...)	warnx(__VA_ARGS__)
 #else
 #define	DPRINTF(...)    do { } while (0)
 #define	DPRINTFX(...)   do { } while (0)
 #endif
Index: user/alc/PQ_LAUNDRY/lib/libproc/libproc.h
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libproc/libproc.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libproc/libproc.h	(revision 303642)
@@ -1,157 +1,161 @@
 /*-
  * Copyright (c) 2010 The FreeBSD Foundation
  * Copyright (c) 2008 John Birrell (jb@freebsd.org)
  * All rights reserved.
  *
  * Portions of this software were developed by Rui Paulo under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_LIBPROC_H_
 #define	_LIBPROC_H_
 
 #include <gelf.h>
 #include <rtld_db.h>
 #include <limits.h>
 
 struct ctf_file;
 struct proc_handle;
 
 typedef void (*proc_child_func)(void *);
 
 /* Values returned by proc_state(). */
 #define PS_IDLE		1
 #define PS_STOP		2
 #define PS_RUN		3
 #define PS_UNDEAD	4
 #define PS_DEAD		5
 #define PS_LOST		6
 
 /* Reason values for proc_detach(). */
 #define PRELEASE_HANG	1
 #define PRELEASE_KILL	2
 
 typedef struct prmap {
 	uintptr_t	pr_vaddr;	/* Virtual address. */
 	size_t		pr_size;	/* Mapping size in bytes */
 	size_t		pr_offset;	/* Mapping offset in object */
 	char		pr_mapname[PATH_MAX];	/* Mapping filename */
 	uint8_t		pr_mflags;	/* Protection flags */
 #define	MA_READ		0x01
 #define	MA_WRITE	0x02
 #define	MA_EXEC		0x04
 #define	MA_COW		0x08
 #define MA_NEEDS_COPY	0x10
 #define	MA_NOCOREDUMP	0x20
 } prmap_t;
 
 typedef struct prsyminfo {
 	u_int		prs_lmid;	/* Map id. */
 	u_int		prs_id;		/* Symbol id. */
 } prsyminfo_t;
 
 typedef int proc_map_f(void *, const prmap_t *, const char *);
 typedef int proc_sym_f(void *, const GElf_Sym *, const char *);
 
 /* Values for ELF sections */
 #define	PR_SYMTAB	1
 #define PR_DYNSYM	2
 
 /* Values for the 'mask' parameter in the iteration functions */
 #define	BIND_LOCAL	0x0001
 #define BIND_GLOBAL	0x0002
 #define BIND_WEAK	0x0004
 #define BIND_ANY	(BIND_LOCAL|BIND_GLOBAL|BIND_WEAK)
 #define TYPE_NOTYPE	0x0100
 #define TYPE_OBJECT	0x0200
 #define TYPE_FUNC	0x0400
 #define TYPE_SECTION	0x0800
 #define TYPE_FILE	0x1000
 #define TYPE_ANY	(TYPE_NOTYPE|TYPE_OBJECT|TYPE_FUNC|TYPE_SECTION|\
     			 TYPE_FILE)
 
 typedef enum {
 	REG_PC,
 	REG_SP,
 	REG_RVAL1,
 	REG_RVAL2
 } proc_reg_t;
 
 #define SIG2STR_MAX	8
 
 typedef struct lwpstatus {
 	int pr_why;
 #define PR_REQUESTED	1
 #define PR_FAULTED	2
 #define PR_SYSENTRY	3
 #define PR_SYSEXIT	4
 #define PR_SIGNALLED	5
 	int pr_what;
 #define FLTBPT		-1
 } lwpstatus_t;
 
+#define	PR_MODEL_ILP32	1
+#define	PR_MODEL_LP64	2
+
 /* Function prototype definitions. */
 __BEGIN_DECLS
 
 prmap_t *proc_addr2map(struct proc_handle *, uintptr_t);
 prmap_t *proc_name2map(struct proc_handle *, const char *);
 char	*proc_objname(struct proc_handle *, uintptr_t, char *, size_t);
 prmap_t *proc_obj2map(struct proc_handle *, const char *);
 int	proc_iter_objs(struct proc_handle *, proc_map_f *, void *);
 int	proc_iter_symbyaddr(struct proc_handle *, const char *, int,
 	     int, proc_sym_f *, void *);
 int	proc_addr2sym(struct proc_handle *, uintptr_t, char *, size_t, GElf_Sym *);
 int	proc_attach(pid_t pid, int flags, struct proc_handle **pphdl);
 int	proc_continue(struct proc_handle *);
 int	proc_clearflags(struct proc_handle *, int);
 int	proc_create(const char *, char * const *, proc_child_func *, void *,
 	    struct proc_handle **);
 int	proc_detach(struct proc_handle *, int);
 int	proc_getflags(struct proc_handle *);
 int	proc_name2sym(struct proc_handle *, const char *, const char *,
 	    GElf_Sym *, prsyminfo_t *);
 struct ctf_file *proc_name2ctf(struct proc_handle *, const char *);
 int	proc_setflags(struct proc_handle *, int);
 int	proc_state(struct proc_handle *);
+int	proc_getmodel(struct proc_handle *);
 pid_t	proc_getpid(struct proc_handle *);
 int	proc_wstatus(struct proc_handle *);
 int	proc_getwstat(struct proc_handle *);
 char *	proc_signame(int, char *, size_t);
 int	proc_read(struct proc_handle *, void *, size_t, size_t);
 const lwpstatus_t *proc_getlwpstatus(struct proc_handle *);
 void	proc_free(struct proc_handle *);
 rd_agent_t *proc_rdagent(struct proc_handle *);
 void	proc_updatesyms(struct proc_handle *);
 int	proc_bkptset(struct proc_handle *, uintptr_t, unsigned long *);
 int	proc_bkptdel(struct proc_handle *, uintptr_t, unsigned long);
 void	proc_bkptregadj(unsigned long *);
 int	proc_bkptexec(struct proc_handle *, unsigned long);
 int	proc_regget(struct proc_handle *, proc_reg_t, unsigned long *);
 int	proc_regset(struct proc_handle *, proc_reg_t, unsigned long);
 
 __END_DECLS
 
 #endif /* !_LIBPROC_H_ */
Index: user/alc/PQ_LAUNDRY/lib/libproc/proc_create.c
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libproc/proc_create.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libproc/proc_create.c	(revision 303642)
@@ -1,189 +1,236 @@
 /*-
  * Copyright (c) 2008 John Birrell (jb@freebsd.org)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/types.h>
 #include <sys/sysctl.h>
+#include <sys/user.h>
 #include <sys/wait.h>
 
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
+#include <libelf.h>
+#include <libprocstat.h>
+
 #include "_libproc.h"
 
-static int	proc_init(pid_t, int, int, struct proc_handle *);
+static int	getelfclass(int);
+static int	proc_init(pid_t, int, int, struct proc_handle **);
 
 static int
-proc_init(pid_t pid, int flags, int status, struct proc_handle *phdl)
+getelfclass(int fd)
 {
-	int mib[4], error;
-	size_t len;
+	GElf_Ehdr ehdr;
+	Elf *e;
+	int class;
 
+	class = ELFCLASSNONE;
+
+	if ((e = elf_begin(fd, ELF_C_READ, NULL)) == NULL)
+		goto out;
+	if (gelf_getehdr(e, &ehdr) == NULL)
+		goto out;
+	class = ehdr.e_ident[EI_CLASS];
+out:
+	(void)elf_end(e);
+	return (class);
+}
+
+static int
+proc_init(pid_t pid, int flags, int status, struct proc_handle **pphdl)
+{
+	struct kinfo_proc *kp;
+	struct proc_handle *phdl;
+	int error, class, count, fd;
+
+	*pphdl = NULL;
+	if ((phdl = malloc(sizeof(*phdl))) == NULL)
+		return (ENOMEM);
+
 	memset(phdl, 0, sizeof(*phdl));
 	phdl->pid = pid;
 	phdl->flags = flags;
 	phdl->status = status;
+	phdl->procstat = procstat_open_sysctl();
+	if (phdl->procstat == NULL)
+		return (ENOMEM);
 
-	mib[0] = CTL_KERN;
-	mib[1] = KERN_PROC;
-	mib[2] = KERN_PROC_PATHNAME;
-	mib[3] = pid;
-	len = sizeof(phdl->execname);
-	if (sysctl(mib, 4, phdl->execname, &len, NULL, 0) != 0) {
-		error = errno;
-		DPRINTF("ERROR: cannot get pathname for child process %d", pid);
+	/* Obtain a path to the executable. */
+	if ((kp = procstat_getprocs(phdl->procstat, KERN_PROC_PID, pid,
+	    &count)) == NULL)
+		return (ENOMEM);
+	error = procstat_getpathname(phdl->procstat, kp, phdl->execpath,
+	    sizeof(phdl->execpath));
+	procstat_freeprocs(phdl->procstat, kp);
+	if (error != 0)
 		return (error);
+
+	/* Use it to determine the data model for the process. */
+	if ((fd = open(phdl->execpath, O_RDONLY)) < 0) {
+		error = errno;
+		goto out;
 	}
-	if (len == 0)
-		phdl->execname[0] = '\0';
+	class = getelfclass(fd);
+	switch (class) {
+	case ELFCLASS64:
+		phdl->model = PR_MODEL_LP64;
+		break;
+	case ELFCLASS32:
+		phdl->model = PR_MODEL_ILP32;
+		break;
+	case ELFCLASSNONE:
+	default:
+		error = EINVAL;
+		break;
+	}
+	(void)close(fd);
 
-	return (0);
+out:
+	*pphdl = phdl;
+	return (error);
 }
 
 int
 proc_attach(pid_t pid, int flags, struct proc_handle **pphdl)
 {
 	struct proc_handle *phdl;
-	int error = 0;
-	int status;
+	int error, status;
 
 	if (pid == 0 || pid == getpid())
 		return (EINVAL);
+	if (elf_version(EV_CURRENT) == EV_NONE)
+		return (ENOENT);
 
 	/*
 	 * Allocate memory for the process handle, a structure containing
 	 * all things related to the process.
 	 */
-	if ((phdl = malloc(sizeof(struct proc_handle))) == NULL)
-		return (ENOMEM);
-
-	elf_version(EV_CURRENT);
-
-	error = proc_init(pid, flags, PS_RUN, phdl);
+	error = proc_init(pid, flags, PS_RUN, &phdl);
 	if (error != 0)
 		goto out;
 
 	if (ptrace(PT_ATTACH, phdl->pid, 0, 0) != 0) {
 		error = errno;
 		DPRINTF("ERROR: cannot ptrace child process %d", pid);
 		goto out;
 	}
 
 	/* Wait for the child process to stop. */
 	if (waitpid(pid, &status, WUNTRACED) == -1) {
 		error = errno;
 		DPRINTF("ERROR: child process %d didn't stop as expected", pid);
 		goto out;
 	}
 
 	/* Check for an unexpected status. */
-	if (WIFSTOPPED(status) == 0)
+	if (!WIFSTOPPED(status))
 		DPRINTFX("ERROR: child process %d status 0x%x", pid, status);
 	else
 		phdl->status = PS_STOP;
 
 out:
-	if (error)
+	if (error && phdl != NULL) {
 		proc_free(phdl);
-	else
-		*pphdl = phdl;
+		phdl = NULL;
+	}
+	*pphdl = phdl;
 	return (error);
 }
 
 int
 proc_create(const char *file, char * const *argv, proc_child_func *pcf,
     void *child_arg, struct proc_handle **pphdl)
 {
 	struct proc_handle *phdl;
 	int error = 0;
 	int status;
 	pid_t pid;
 
-	/*
-	 * Allocate memory for the process handle, a structure containing
-	 * all things related to the process.
-	 */
-	if ((phdl = malloc(sizeof(struct proc_handle))) == NULL)
-		return (ENOMEM);
+	if (elf_version(EV_CURRENT) == EV_NONE)
+		return (ENOENT);
 
-	elf_version(EV_CURRENT);
-
 	/* Fork a new process. */
 	if ((pid = vfork()) == -1)
 		error = errno;
 	else if (pid == 0) {
 		/* The child expects to be traced. */
 		if (ptrace(PT_TRACE_ME, 0, 0, 0) != 0)
 			_exit(1);
 
 		if (pcf != NULL)
 			(*pcf)(child_arg);
 
 		/* Execute the specified file: */
 		execvp(file, argv);
 
 		/* Couldn't execute the file. */
 		_exit(2);
+		/* NOTREACHED */
 	} else {
 		/* The parent owns the process handle. */
-		error = proc_init(pid, 0, PS_IDLE, phdl);
+		error = proc_init(pid, 0, PS_IDLE, &phdl);
 		if (error != 0)
 			goto bad;
 
 		/* Wait for the child process to stop. */
 		if (waitpid(pid, &status, WUNTRACED) == -1) {
 			error = errno;
 			DPRINTF("ERROR: child process %d didn't stop as expected", pid);
 			goto bad;
 		}
 
 		/* Check for an unexpected status. */
-		if (WIFSTOPPED(status) == 0) {
+		if (!WIFSTOPPED(status)) {
 			error = errno;
 			DPRINTFX("ERROR: child process %d status 0x%x", pid, status);
 			goto bad;
 		} else
 			phdl->status = PS_STOP;
 	}
 bad:
-	if (error)
+	if (error && phdl != NULL) {
 		proc_free(phdl);
-	else
-		*pphdl = phdl;
+		phdl = NULL;
+	}
+	*pphdl = phdl;
 	return (error);
 }
 
 void
 proc_free(struct proc_handle *phdl)
 {
+
+	if (phdl->procstat != NULL)
+		procstat_close(phdl->procstat);
 	free(phdl);
 }
Index: user/alc/PQ_LAUNDRY/lib/libproc/proc_rtld.c
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libproc/proc_rtld.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libproc/proc_rtld.c	(revision 303642)
@@ -1,84 +1,84 @@
 /*
  * Copyright (c) 2010 The FreeBSD Foundation 
  * All rights reserved. 
  * 
  * This software was developed by Rui Paulo under sponsorship from the
  * FreeBSD Foundation. 
  *  
  * Redistribution and use in source and binary forms, with or without 
  * modification, are permitted provided that the following conditions 
  * are met: 
  * 1. Redistributions of source code must retain the above copyright 
  *    notice, this list of conditions and the following disclaimer. 
  * 2. Redistributions in binary form must reproduce the above copyright 
  *    notice, this list of conditions and the following disclaimer in the 
  *    documentation and/or other materials provided with the distribution. 
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  * SUCH DAMAGE. 
  */ 
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <rtld_db.h>
 #include "libproc.h"
 #include "_libproc.h"
 
 static int
 map_iter(const rd_loadobj_t *lop, void *arg)
 {
 	struct proc_handle *phdl = arg;
 
 	if (phdl->nobjs >= phdl->rdobjsz) {
 		phdl->rdobjsz *= 2;
 		phdl->rdobjs = reallocf(phdl->rdobjs, sizeof(*phdl->rdobjs) *
 		    phdl->rdobjsz);
 		if (phdl->rdobjs == NULL)
 			return (-1);
 	}
-	if (strcmp(lop->rdl_path, phdl->execname) == 0 &&
+	if (strcmp(lop->rdl_path, phdl->execpath) == 0 &&
 	    (lop->rdl_prot & RD_RDL_X) != 0)
 		phdl->rdexec = &phdl->rdobjs[phdl->nobjs];
 	memcpy(&phdl->rdobjs[phdl->nobjs++], lop, sizeof(*lop));
 
 	return (0);
 }
 
 rd_agent_t *
 proc_rdagent(struct proc_handle *phdl)
 {
 	if (phdl->rdap == NULL && phdl->status != PS_UNDEAD &&
 	    phdl->status != PS_IDLE) {
 		if ((phdl->rdap = rd_new(phdl)) != NULL) {
 			phdl->rdobjs = malloc(sizeof(*phdl->rdobjs) * 64);
 			phdl->rdobjsz = 64;
 			if (phdl->rdobjs == NULL)
 				return (phdl->rdap);
 			rd_loadobj_iter(phdl->rdap, map_iter, phdl);
 		}
 	}
 
 	return (phdl->rdap);
 }
 
 void
 proc_updatesyms(struct proc_handle *phdl)
 {
 
 	memset(phdl->rdobjs, 0, sizeof(*phdl->rdobjs) * phdl->rdobjsz);
 	phdl->nobjs = 0;
 	rd_loadobj_iter(phdl->rdap, map_iter, phdl);
 }
Index: user/alc/PQ_LAUNDRY/lib/libproc/proc_util.c
===================================================================
--- user/alc/PQ_LAUNDRY/lib/libproc/proc_util.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/libproc/proc_util.c	(revision 303642)
@@ -1,230 +1,240 @@
 /*-
  * Copyright (c) 2010 The FreeBSD Foundation
  * Copyright (c) 2008 John Birrell (jb@freebsd.org)
  * All rights reserved.
  * 
  * Portions of this software were developed by Rui Paulo under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/types.h>
 #include <sys/ptrace.h>
 #include <sys/wait.h>
 #include <err.h>
 #include <errno.h>
 #include <signal.h>
 #include <string.h>
 #include <unistd.h>
 #include "_libproc.h"
 
 int
 proc_clearflags(struct proc_handle *phdl, int mask)
 {
 
 	if (phdl == NULL)
 		return (EINVAL);
 
 	phdl->flags &= ~mask;
 
 	return (0);
 }
 
 /*
  * NB: we return -1 as the Solaris libproc Psetrun() function.
  */
 int
 proc_continue(struct proc_handle *phdl)
 {
 	int pending = 0;
 
 	if (phdl == NULL)
 		return (-1);
 
 	if (phdl->status == PS_STOP && WSTOPSIG(phdl->wstat) != SIGTRAP)
 		pending = WSTOPSIG(phdl->wstat);
 	if (ptrace(PT_CONTINUE, phdl->pid, (caddr_t)(uintptr_t)1, pending) != 0)
 		return (-1);
 
 	phdl->status = PS_RUN;
 
 	return (0);
 }
 
 int
 proc_detach(struct proc_handle *phdl, int reason)
 {
 	int status;
 
 	if (phdl == NULL)
 		return (EINVAL);
 	if (reason == PRELEASE_KILL) {
 		kill(phdl->pid, SIGKILL);
 		return (0);
 	}
 	if (ptrace(PT_DETACH, phdl->pid, 0, 0) != 0 && errno == ESRCH)
 		return (0);
 	if (errno == EBUSY) {
 		kill(phdl->pid, SIGSTOP);
 		waitpid(phdl->pid, &status, WUNTRACED);
 		ptrace(PT_DETACH, phdl->pid, 0, 0);
 		kill(phdl->pid, SIGCONT);
 		return (0);
 	}
 
 	return (0);
 }
 
 int
 proc_getflags(struct proc_handle *phdl)
 {
 
 	if (phdl == NULL)
 		return (-1);
 
 	return(phdl->flags);
 }
 
 int
 proc_setflags(struct proc_handle *phdl, int mask)
 {
 
 	if (phdl == NULL)
 		return (EINVAL);
 
 	phdl->flags |= mask;
 
 	return (0);
 }
 
 int
 proc_state(struct proc_handle *phdl)
 {
 
 	if (phdl == NULL)
 		return (-1);
 
 	return (phdl->status);
 }
 
 pid_t
 proc_getpid(struct proc_handle *phdl)
 {
 
 	if (phdl == NULL)
 		return (-1);
 
 	return (phdl->pid);
 }
 
 int
+proc_getmodel(struct proc_handle *phdl)
+{
+
+	if (phdl == NULL)
+		return (-1);
+
+	return (phdl->model);
+}
+
+int
 proc_wstatus(struct proc_handle *phdl)
 {
 	int status;
 
 	if (phdl == NULL)
 		return (-1);
 	if (waitpid(phdl->pid, &status, WUNTRACED) < 0) {
 		if (errno != EINTR)
 			DPRINTF("waitpid");
 		return (-1);
 	}
 	if (WIFSTOPPED(status))
 		phdl->status = PS_STOP;
 	if (WIFEXITED(status) || WIFSIGNALED(status))
 		phdl->status = PS_UNDEAD;
 	phdl->wstat = status;
 
 	return (phdl->status);
 }
 
 int
 proc_getwstat(struct proc_handle *phdl)
 {
 
 	if (phdl == NULL)
 		return (-1);
 
 	return (phdl->wstat);
 }
 
 char *
 proc_signame(int sig, char *name, size_t namesz)
 {
 
 	strlcpy(name, strsignal(sig), namesz);
 
 	return (name);
 }
 
 int
 proc_read(struct proc_handle *phdl, void *buf, size_t size, size_t addr)
 {
 	struct ptrace_io_desc piod;
 
 	if (phdl == NULL)
 		return (-1);
 	piod.piod_op = PIOD_READ_D;
 	piod.piod_len = size;
 	piod.piod_addr = (void *)buf;
 	piod.piod_offs = (void *)addr;
 
 	if (ptrace(PT_IO, phdl->pid, (caddr_t)&piod, 0) < 0)
 		return (-1);
 	return (piod.piod_len);
 }
 
 const lwpstatus_t *
 proc_getlwpstatus(struct proc_handle *phdl)
 {
 	struct ptrace_lwpinfo lwpinfo;
 	lwpstatus_t *psp = &phdl->lwps;
 	siginfo_t *siginfo;
 
 	if (phdl == NULL)
 		return (NULL);
 	if (ptrace(PT_LWPINFO, phdl->pid, (caddr_t)&lwpinfo,
 	    sizeof(lwpinfo)) < 0)
 		return (NULL);
 	siginfo = &lwpinfo.pl_siginfo;
 	if (lwpinfo.pl_event == PL_EVENT_SIGNAL &&
 	    (lwpinfo.pl_flags & PL_FLAG_SI) != 0) {
 		if (siginfo->si_signo == SIGTRAP &&
 		    (siginfo->si_code == TRAP_BRKPT ||
 		    siginfo->si_code == TRAP_TRACE)) {
 			psp->pr_why = PR_FAULTED;
 			psp->pr_what = FLTBPT;
 		} else {
 			psp->pr_why = PR_SIGNALLED;
 			psp->pr_what = siginfo->si_signo;
 		}
 	} else if (lwpinfo.pl_flags & PL_FLAG_SCE) {
 		psp->pr_why = PR_SYSENTRY;
 	} else if (lwpinfo.pl_flags & PL_FLAG_SCX) {
 		psp->pr_why = PR_SYSEXIT;
 	}
 
 	return (psp);
 }
Index: user/alc/PQ_LAUNDRY/lib/librtld_db/Makefile
===================================================================
--- user/alc/PQ_LAUNDRY/lib/librtld_db/Makefile	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/librtld_db/Makefile	(revision 303642)
@@ -1,17 +1,19 @@
 # $FreeBSD$
 
 .include <bsd.own.mk>
 
 PACKAGE=lib${LIB}
 LIB=		rtld_db
 SHLIB_MAJOR= 	2
 MAN=		librtld_db.3
 
 SRCS=		rtld_db.c
 INCS=		rtld_db.h
 
 CFLAGS+= -I${.CURDIR}
 # Avoid circular dependency, we only need the libproc.h header here.
 CFLAGS+= -I${.CURDIR:H}/libproc
 
+LIBADD+=	elf procstat
+
 .include <bsd.lib.mk>
Index: user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.c
===================================================================
--- user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.c	(revision 303642)
@@ -1,254 +1,389 @@
 /*
  * Copyright (c) 2010 The FreeBSD Foundation 
  * All rights reserved. 
  * 
  * This software was developed by Rui Paulo under sponsorship from the
  * FreeBSD Foundation. 
  *  
  * Redistribution and use in source and binary forms, with or without 
  * modification, are permitted provided that the following conditions 
  * are met: 
  * 1. Redistributions of source code must retain the above copyright 
  *    notice, this list of conditions and the following disclaimer. 
  * 2. Redistributions in binary form must reproduce the above copyright 
  *    notice, this list of conditions and the following disclaimer in the 
  *    documentation and/or other materials provided with the distribution. 
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  * SUCH DAMAGE. 
- */ 
+ */
+
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include <machine/_inttypes.h>
-#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
 #include <sys/user.h>
 
+#include <assert.h>
 #include <err.h>
+#include <fcntl.h>
+#include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <limits.h>
+#include <unistd.h>
+
+#include <machine/elf.h>
+
+#include <libelf.h>
 #include <libproc.h>
+#include <libprocstat.h>
 #include <libutil.h>
 
 #include "rtld_db.h"
 
 static int _librtld_db_debug = 0;
 #define DPRINTF(...) do {				\
 	if (_librtld_db_debug) {			\
 		fprintf(stderr, "librtld_db: DEBUG: ");	\
 		fprintf(stderr, __VA_ARGS__);		\
 	}						\
 } while (0)
 
 void
 rd_delete(rd_agent_t *rdap)
 {
 
+	if (rdap->rda_procstat != NULL)
+		procstat_close(rdap->rda_procstat);
 	free(rdap);
 }
 
 const char *
 rd_errstr(rd_err_e rderr)
 {
 
 	switch (rderr) {
 	case RD_ERR:
 		return "generic error";
 	case RD_OK:
 		return "no error";
 	case RD_NOCAPAB:
 		return "capability not supported";
 	case RD_DBERR:
 		return "database error";
 	case RD_NOBASE:
 		return "NOBASE";
 	case RD_NOMAPS:
 		return "NOMAPS";
 	default:
 		return "unknown error";
 	}
 }
 
 rd_err_e
 rd_event_addr(rd_agent_t *rdap, rd_event_e event, rd_notify_t *notify)
 {
 	rd_err_e ret;
 
 	DPRINTF("%s rdap %p event %d notify %p\n", __func__, rdap, event,
 	    notify);
 
 	ret = RD_OK;
 	switch (event) {
 	case RD_NONE:
 		break;
 	case RD_PREINIT:
 		notify->type = RD_NOTIFY_BPT;
 		notify->u.bptaddr = rdap->rda_preinit_addr;
 		break;
 	case RD_POSTINIT:
 		notify->type = RD_NOTIFY_BPT;
 		notify->u.bptaddr = rdap->rda_postinit_addr;
 		break;
 	case RD_DLACTIVITY:
 		notify->type = RD_NOTIFY_BPT;
 		notify->u.bptaddr = rdap->rda_dlactivity_addr;
 		break;
 	default:
 		ret = RD_ERR;
 		break;
 	}
 	return (ret);
 }
 
 rd_err_e
 rd_event_enable(rd_agent_t *rdap __unused, int onoff)
 {
 	DPRINTF("%s onoff %d\n", __func__, onoff);
 
 	return (RD_OK);
 }
 
 rd_err_e
 rd_event_getmsg(rd_agent_t *rdap __unused, rd_event_msg_t *msg)
 {
 	DPRINTF("%s\n", __func__);
 
 	msg->type = RD_POSTINIT;
 	msg->u.state = RD_CONSISTENT;
 
 	return (RD_OK);
 }
 
 rd_err_e
 rd_init(int version)
 {
 	char *debug = NULL;
 
 	if (version == RD_VERSION) {
 		debug = getenv("LIBRTLD_DB_DEBUG");
 		_librtld_db_debug = debug ? atoi(debug) : 0;
 		return (RD_OK);
 	} else
 		return (RD_NOCAPAB);
 }
 
 rd_err_e
 rd_loadobj_iter(rd_agent_t *rdap, rl_iter_f *cb, void *clnt_data)
 {
-	int cnt, i, lastvn = 0;
-	rd_loadobj_t rdl;
 	struct kinfo_vmentry *kves, *kve;
+	rd_loadobj_t rdl;
+	rd_err_e ret;
+	int cnt, i, lastvn;
 
 	DPRINTF("%s\n", __func__);
 
         if ((kves = kinfo_getvmmap(proc_getpid(rdap->rda_php), &cnt)) == NULL) {
 		warn("ERROR: kinfo_getvmmap() failed");
 		return (RD_ERR);
 	}
+
+	ret = RD_OK;
+	lastvn = 0;
 	for (i = 0; i < cnt; i++) {
 		kve = kves + i;
 		if (kve->kve_type == KVME_TYPE_VNODE)
 			lastvn = i;
 		memset(&rdl, 0, sizeof(rdl));
 		/*
 		 * Map the kinfo_vmentry struct to the rd_loadobj structure.
 		 */
 		rdl.rdl_saddr = kve->kve_start;
 		rdl.rdl_eaddr = kve->kve_end;
 		rdl.rdl_offset = kve->kve_offset;
 		if (kve->kve_protection & KVME_PROT_READ)
 			rdl.rdl_prot |= RD_RDL_R;
 		if (kve->kve_protection & KVME_PROT_WRITE)
 			rdl.rdl_prot |= RD_RDL_W;
 		if (kve->kve_protection & KVME_PROT_EXEC)
 			rdl.rdl_prot |= RD_RDL_X;
 		strlcpy(rdl.rdl_path, kves[lastvn].kve_path,
-			sizeof(rdl.rdl_path));
-		(*cb)(&rdl, clnt_data);
+		    sizeof(rdl.rdl_path));
+		if ((*cb)(&rdl, clnt_data) != 0) {
+			ret = RD_ERR;
+			break;
+		}
 	}
 	free(kves);
-
-	return (RD_OK);
+	return (ret);
 }
 
 void
 rd_log(const int onoff)
 {
 	DPRINTF("%s\n", __func__);
 
 	(void)onoff;
 }
 
 rd_agent_t *
 rd_new(struct proc_handle *php)
 {
 	rd_agent_t *rdap;
 
-	rdap = malloc(sizeof(rd_agent_t));
-	if (rdap) {
-		memset(rdap, 0, sizeof(rd_agent_t));
-		rdap->rda_php = php;
-		rd_reset(rdap);
-	}
+	rdap = malloc(sizeof(*rdap));
+	if (rdap == NULL)
+		return (NULL);
 
+	memset(rdap, 0, sizeof(rd_agent_t));
+	rdap->rda_php = php;
+	rdap->rda_procstat = procstat_open_sysctl();
+
+	if (rd_reset(rdap) != RD_OK) {
+		rd_delete(rdap);
+		rdap = NULL;
+	}
 	return (rdap);
 }
 
 rd_err_e
 rd_objpad_enable(rd_agent_t *rdap, size_t padsize)
 {
 	DPRINTF("%s\n", __func__);
 
 	(void)rdap;
 	(void)padsize;
 
 	return (RD_ERR);
 }
 
 rd_err_e
 rd_plt_resolution(rd_agent_t *rdap, uintptr_t pc, struct proc *proc,
     uintptr_t plt_base, rd_plt_info_t *rpi)
 {
 	DPRINTF("%s\n", __func__);
 
 	(void)rdap;
 	(void)pc;
 	(void)proc;
 	(void)plt_base;
 	(void)rpi;
 
 	return (RD_ERR);
 }
 
+static int
+rtld_syms(rd_agent_t *rdap, const char *rtldpath, u_long base)
+{
+	GElf_Shdr shdr;
+	GElf_Sym sym;
+	Elf *e;
+	Elf_Data *data;
+	Elf_Scn *scn;
+	const char *symname;
+	Elf64_Word strscnidx;
+	int fd, i, ret;
+
+	ret = 1;
+	e = NULL;
+
+	fd = open(rtldpath, O_RDONLY);
+	if (fd < 0)
+		goto err;
+
+	if (elf_version(EV_CURRENT) == EV_NONE)
+		goto err;
+	e = elf_begin(fd, ELF_C_READ, NULL);
+	if (e == NULL) {
+		close(fd);
+		goto err;
+	}
+
+	scn = NULL;
+	while ((scn = elf_nextscn(e, scn)) != NULL) {
+		gelf_getshdr(scn, &shdr);
+		if (shdr.sh_type == SHT_DYNSYM)
+			break;
+	}
+	if (scn == NULL)
+		goto err;
+
+	strscnidx = shdr.sh_link;
+	data = elf_getdata(scn, NULL);
+	if (data == NULL)
+		goto err;
+
+	for (i = 0; gelf_getsym(data, i, &sym) != NULL; i++) {
+		if (GELF_ST_TYPE(sym.st_info) != STT_FUNC ||
+		    GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
+			continue;
+		symname = elf_strptr(e, strscnidx, sym.st_name);
+		if (symname == NULL)
+			continue;
+
+		if (strcmp(symname, "r_debug_state") == 0) {
+			rdap->rda_preinit_addr = sym.st_value + base;
+			rdap->rda_dlactivity_addr = sym.st_value + base;
+		} else if (strcmp(symname, "_r_debug_postinit") == 0) {
+			rdap->rda_postinit_addr = sym.st_value + base;
+		}
+	}
+
+	if (rdap->rda_preinit_addr != 0 &&
+	    rdap->rda_postinit_addr != 0 &&
+	    rdap->rda_dlactivity_addr != 0)
+		ret = 0;
+
+err:
+	if (e != NULL)
+		(void)elf_end(e);
+	if (fd >= 0)
+		(void)close(fd);
+	return (ret);
+}
+
 rd_err_e
 rd_reset(rd_agent_t *rdap)
 {
-	GElf_Sym sym;
+	struct kinfo_proc *kp;
+	struct kinfo_vmentry *kve;
+	Elf_Auxinfo *auxv;
+	const char *rtldpath;
+	u_long base;
+	rd_err_e rderr;
+	int count, i;
 
-	if (proc_name2sym(rdap->rda_php, "ld-elf.so.1", "r_debug_state",
-	    &sym, NULL) < 0)
-		return (RD_ERR);
-	DPRINTF("found r_debug_state at 0x%lx\n", (unsigned long)sym.st_value);
-	rdap->rda_preinit_addr = sym.st_value;
-	rdap->rda_dlactivity_addr = sym.st_value;
+	kp = NULL;
+	auxv = NULL;
+	kve = NULL;
+	rderr = RD_ERR;
 
-	if (proc_name2sym(rdap->rda_php, "ld-elf.so.1", "_r_debug_postinit",
-	    &sym, NULL) < 0)
+	kp = procstat_getprocs(rdap->rda_procstat, KERN_PROC_PID,
+	    proc_getpid(rdap->rda_php), &count);
+	if (kp == NULL)
 		return (RD_ERR);
-	DPRINTF("found _r_debug_postinit at 0x%lx\n",
-	    (unsigned long)sym.st_value);
-	rdap->rda_postinit_addr = sym.st_value;
+	assert(count == 1);
 
-	return (RD_OK);
+	auxv = procstat_getauxv(rdap->rda_procstat, kp, &count);
+	if (auxv == NULL)
+		goto err;
+
+	base = 0;
+	for (i = 0; i < count; i++) {
+		if (auxv[i].a_type == AT_BASE) {
+			base = auxv[i].a_un.a_val;
+			break;
+		}
+	}
+	if (i == count)
+		goto err;
+
+	rtldpath = NULL;
+	kve = procstat_getvmmap(rdap->rda_procstat, kp, &count);
+	if (kve == NULL)
+		goto err;
+	for (i = 0; i < count; i++) {
+		if (kve[i].kve_start == base) {
+			rtldpath = kve[i].kve_path;
+			break;
+		}
+	}
+	if (i == count)
+		goto err;
+
+	if (rtld_syms(rdap, rtldpath, base) != 0)
+		goto err;
+
+	rderr = RD_OK;
+
+err:
+	if (kve != NULL)
+		procstat_freevmmap(rdap->rda_procstat, kve);
+	if (auxv != NULL)
+		procstat_freeauxv(rdap->rda_procstat, auxv);
+	if (kp != NULL)
+		procstat_freeprocs(rdap->rda_procstat, kp);
+	return (rderr);
 }
Index: user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.h
===================================================================
--- user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/lib/librtld_db/rtld_db.h	(revision 303642)
@@ -1,152 +1,155 @@
 /*
  * Copyright (c) 2010 The FreeBSD Foundation 
  * All rights reserved. 
  * 
  * This software was developed by Rui Paulo under sponsorship from the
  * FreeBSD Foundation. 
  *  
  * Redistribution and use in source and binary forms, with or without 
  * modification, are permitted provided that the following conditions 
  * are met: 
  * 1. Redistributions of source code must retain the above copyright 
  *    notice, this list of conditions and the following disclaimer. 
  * 2. Redistributions in binary form must reproduce the above copyright 
  *    notice, this list of conditions and the following disclaimer in the 
  *    documentation and/or other materials provided with the distribution. 
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  * SUCH DAMAGE. 
  *
  * $FreeBSD$
  */ 
 
 #ifndef _RTLD_DB_H_
 #define _RTLD_DB_H_
 
 #include <sys/param.h>
-#include <sys/cdefs.h>
-#include <sys/types.h>
 
-
 #define	RD_VERSION	1
 
 typedef enum {
 	RD_OK,
 	RD_ERR,
 	RD_DBERR,
 	RD_NOCAPAB,
 	RD_NODYNAM,
 	RD_NOBASE,
 	RD_NOMAPS
 } rd_err_e;
 
+/* XXX struct rd_agent should be private. */
+struct procstat;
+
 typedef struct rd_agent {
 	struct proc_handle *rda_php;
+
 	uintptr_t rda_dlactivity_addr;
 	uintptr_t rda_preinit_addr;
 	uintptr_t rda_postinit_addr;
+
+	struct procstat *rda_procstat;
 } rd_agent_t;
 
 typedef struct rd_loadobj {
 	uintptr_t	rdl_saddr;		/* start address */
 	uintptr_t	rdl_eaddr;		/* end address */
 	uint32_t	rdl_offset;
 	uint8_t		rdl_prot;
 #define RD_RDL_R	0x01
 #define RD_RDL_W	0x02
 #define RD_RDL_X	0x04
 	enum {
 		RDL_TYPE_NONE	= 0,
 		RDL_TYPE_DEF,
 		RDL_TYPE_VNODE,
 		RDL_TYPE_SWAP,
 		RDL_TYPE_DEV,
 		/* XXX some types missing */
 		RDL_TYPE_UNKNOWN = 255
 	} rdl_type;
 	unsigned char	rdl_path[PATH_MAX];
 } rd_loadobj_t;
 
 typedef enum {
 	RD_NONE = 0,
 	RD_PREINIT,
 	RD_POSTINIT,
 	RD_DLACTIVITY
 } rd_event_e;
 
 typedef enum {
 	RD_NOTIFY_BPT,
 	RD_NOTIFY_AUTOBPT,
 	RD_NOTIFY_SYSCALL
 } rd_notify_e;
 
 typedef struct rd_notify {
 	rd_notify_e type;
 	union {
 		uintptr_t bptaddr;
 		long      syscallno;
 	} u;
 } rd_notify_t;
 
 typedef enum {
 	RD_NOSTATE = 0,
 	RD_CONSISTENT,
 	RD_ADD,
 	RD_DELETE
 } rd_state_e;
 
 typedef struct rd_event_msg {
 	rd_event_e type;
 	union {
 		rd_state_e state;
 	} u;
 } rd_event_msg_t;
 
 typedef enum {
 	RD_RESOLVE_NONE,
 	RD_RESOLVE_STEP,
 	RD_RESOLVE_TARGET,
 	RD_RESOLVE_TARGET_STEP
 } rd_skip_e;
 
 typedef struct rd_plt_info {
 	rd_skip_e pi_skip_method;
 	long	  pi_nstep;
 	uintptr_t pi_target;
 	uintptr_t pi_baddr;
 	unsigned int pi_flags;
 } rd_plt_info_t;
 
 #define RD_FLG_PI_PLTBOUND	0x0001
 
 __BEGIN_DECLS
 
 struct proc_handle;
 void		rd_delete(rd_agent_t *);
 const char 	*rd_errstr(rd_err_e);
 rd_err_e	rd_event_addr(rd_agent_t *, rd_event_e, rd_notify_t *);
 rd_err_e	rd_event_enable(rd_agent_t *, int);
 rd_err_e	rd_event_getmsg(rd_agent_t *, rd_event_msg_t *);
 rd_err_e	rd_init(int);
 typedef int rl_iter_f(const rd_loadobj_t *, void *);
 rd_err_e	rd_loadobj_iter(rd_agent_t *, rl_iter_f *, void *);
 void		rd_log(const int);
 rd_agent_t 	*rd_new(struct proc_handle *);
 rd_err_e	rd_objpad_enable(rd_agent_t *, size_t);
 struct proc;
 rd_err_e	rd_plt_resolution(rd_agent_t *, uintptr_t, struct proc *,
 		    uintptr_t, rd_plt_info_t *);
 rd_err_e	rd_reset(rd_agent_t *);
 
 __END_DECLS
 
 #endif /* _RTLD_DB_H_ */
Index: user/alc/PQ_LAUNDRY/sbin/fsdb/fsdb.c
===================================================================
--- user/alc/PQ_LAUNDRY/sbin/fsdb/fsdb.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sbin/fsdb/fsdb.c	(revision 303642)
@@ -1,1210 +1,1210 @@
 /*	$NetBSD: fsdb.c,v 1.2 1995/10/08 23:18:10 thorpej Exp $	*/
 
 /*
  *  Copyright (c) 1995 John T. Kohl
  *  All rights reserved.
  * 
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions
  *  are met:
  *  1. Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright
  *     notice, this list of conditions and the following disclaimer in the
  *     documentation and/or other materials provided with the distribution.
  *  3. The name of the author may not be used to endorse or promote products
  *     derived from this software without specific prior written permission.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char rcsid[] =
   "$FreeBSD$";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <ctype.h>
 #include <err.h>
 #include <grp.h>
 #include <histedit.h>
 #include <pwd.h>
 #include <stdint.h>
 #include <string.h>
 #include <time.h>
 #include <timeconv.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include "fsdb.h"
 #include "fsck.h"
 
 static void usage(void) __dead2;
 int cmdloop(void);
 static int compare_blk32(uint32_t *wantedblk, uint32_t curblk);
 static int compare_blk64(uint64_t *wantedblk, uint64_t curblk);
 static int founddatablk(uint64_t blk);
 static int find_blks32(uint32_t *buf, int size, uint32_t *blknum);
 static int find_blks64(uint64_t *buf, int size, uint64_t *blknum);
 static int find_indirblks32(uint32_t blk, int ind_level, uint32_t *blknum);
 static int find_indirblks64(uint64_t blk, int ind_level, uint64_t *blknum);
 
 static void 
 usage(void)
 {
 	fprintf(stderr, "usage: fsdb [-d] [-f] [-r] fsname\n");
 	exit(1);
 }
 
 int returntosingle;
 char nflag;
 
 /*
  * We suck in lots of fsck code, and just pick & choose the stuff we want.
  *
  * fsreadfd is set up to read from the file system, fswritefd to write to
  * the file system.
  */
 int
 main(int argc, char *argv[])
 {
 	int ch, rval;
 	char *fsys = NULL;
 
 	while (-1 != (ch = getopt(argc, argv, "fdr"))) {
 		switch (ch) {
 		case 'f':
 			/* The -f option is left for historical
 			 * reasons and has no meaning.
 			 */
 			break;
 		case 'd':
 			debug++;
 			break;
 		case 'r':
 			nflag++; /* "no" in fsck, readonly for us */
 			break;
 		default:
 			usage();
 		}
 	}
 	argc -= optind;
 	argv += optind;
 	if (argc != 1)
 		usage();
 	else
 		fsys = argv[0];
 
 	sblock_init();
 	if (!setup(fsys))
 		errx(1, "cannot set up file system `%s'", fsys);
 	printf("%s file system `%s'\nLast Mounted on %s\n",
 	       nflag? "Examining": "Editing", fsys, sblock.fs_fsmnt);
 	rval = cmdloop();
 	if (!nflag) {
 		sblock.fs_clean = 0;	/* mark it dirty */
 		sbdirty();
 		ckfini(0);
 		printf("*** FILE SYSTEM MARKED DIRTY\n");
 		printf("*** BE SURE TO RUN FSCK TO CLEAN UP ANY DAMAGE\n");
 		printf("*** IF IT WAS MOUNTED, RE-MOUNT WITH -u -o reload\n");
 	}
 	exit(rval);
 }
 
 #define CMDFUNC(func) int func(int argc, char *argv[])
 #define CMDFUNCSTART(func) int func(int argc, char *argv[])
 
 CMDFUNC(helpfn);
 CMDFUNC(focus);				/* focus on inode */
 CMDFUNC(active);			/* print active inode */
 CMDFUNC(blocks);			/* print blocks for active inode */
 CMDFUNC(focusname);			/* focus by name */
 CMDFUNC(zapi);				/* clear inode */
 CMDFUNC(uplink);			/* incr link */
 CMDFUNC(downlink);			/* decr link */
 CMDFUNC(linkcount);			/* set link count */
 CMDFUNC(quit);				/* quit */
 CMDFUNC(findblk);			/* find block */
 CMDFUNC(ls);				/* list directory */
 CMDFUNC(rm);				/* remove name */
 CMDFUNC(ln);				/* add name */
 CMDFUNC(newtype);			/* change type */
 CMDFUNC(chmode);			/* change mode */
 CMDFUNC(chlen);				/* change length */
 CMDFUNC(chaflags);			/* change flags */
 CMDFUNC(chgen);				/* change generation */
 CMDFUNC(chowner);			/* change owner */
 CMDFUNC(chgroup);			/* Change group */
 CMDFUNC(back);				/* pop back to last ino */
 CMDFUNC(chbtime);			/* Change btime */
 CMDFUNC(chmtime);			/* Change mtime */
 CMDFUNC(chctime);			/* Change ctime */
 CMDFUNC(chatime);			/* Change atime */
 CMDFUNC(chinum);			/* Change inode # of dirent */
 CMDFUNC(chname);			/* Change dirname of dirent */
 
 struct cmdtable cmds[] = {
 	{ "help", "Print out help", 1, 1, FL_RO, helpfn },
 	{ "?", "Print out help", 1, 1, FL_RO, helpfn },
 	{ "inode", "Set active inode to INUM", 2, 2, FL_RO, focus },
 	{ "clri", "Clear inode INUM", 2, 2, FL_WR, zapi },
 	{ "lookup", "Set active inode by looking up NAME", 2, 2, FL_RO | FL_ST, focusname },
 	{ "cd", "Set active inode by looking up NAME", 2, 2, FL_RO | FL_ST, focusname },
 	{ "back", "Go to previous active inode", 1, 1, FL_RO, back },
 	{ "active", "Print active inode", 1, 1, FL_RO, active },
 	{ "print", "Print active inode", 1, 1, FL_RO, active },
 	{ "blocks", "Print block numbers of active inode", 1, 1, FL_RO, blocks },
 	{ "uplink", "Increment link count", 1, 1, FL_WR, uplink },
 	{ "downlink", "Decrement link count", 1, 1, FL_WR, downlink },
 	{ "linkcount", "Set link count to COUNT", 2, 2, FL_WR, linkcount },
 	{ "findblk", "Find inode owning disk block(s)", 2, 33, FL_RO, findblk},
 	{ "ls", "List current inode as directory", 1, 1, FL_RO, ls },
 	{ "rm", "Remove NAME from current inode directory", 2, 2, FL_WR | FL_ST, rm },
 	{ "del", "Remove NAME from current inode directory", 2, 2, FL_WR | FL_ST, rm },
 	{ "ln", "Hardlink INO into current inode directory as NAME", 3, 3, FL_WR | FL_ST, ln },
 	{ "chinum", "Change dir entry number INDEX to INUM", 3, 3, FL_WR, chinum },
 	{ "chname", "Change dir entry number INDEX to NAME", 3, 3, FL_WR | FL_ST, chname },
 	{ "chtype", "Change type of current inode to TYPE", 2, 2, FL_WR, newtype },
 	{ "chmod", "Change mode of current inode to MODE", 2, 2, FL_WR, chmode },
 	{ "chlen", "Change length of current inode to LENGTH", 2, 2, FL_WR, chlen },
 	{ "chown", "Change owner of current inode to OWNER", 2, 2, FL_WR, chowner },
 	{ "chgrp", "Change group of current inode to GROUP", 2, 2, FL_WR, chgroup },
 	{ "chflags", "Change flags of current inode to FLAGS", 2, 2, FL_WR, chaflags },
 	{ "chgen", "Change generation number of current inode to GEN", 2, 2, FL_WR, chgen },
 	{ "btime", "Change btime of current inode to BTIME", 2, 2, FL_WR, chbtime },
 	{ "mtime", "Change mtime of current inode to MTIME", 2, 2, FL_WR, chmtime },
 	{ "ctime", "Change ctime of current inode to CTIME", 2, 2, FL_WR, chctime },
 	{ "atime", "Change atime of current inode to ATIME", 2, 2, FL_WR, chatime },
 	{ "quit", "Exit", 1, 1, FL_RO, quit },
 	{ "q", "Exit", 1, 1, FL_RO, quit },
 	{ "exit", "Exit", 1, 1, FL_RO, quit },
 	{ NULL, 0, 0, 0, 0, NULL },
 };
 
 int
 helpfn(int argc, char *argv[])
 {
     struct cmdtable *cmdtp;
 
     printf("Commands are:\n%-10s %5s %5s   %s\n",
 	   "command", "min args", "max args", "what");
     
     for (cmdtp = cmds; cmdtp->cmd; cmdtp++)
 	printf("%-10s %5u %5u   %s\n",
 		cmdtp->cmd, cmdtp->minargc-1, cmdtp->maxargc-1, cmdtp->helptxt);
     return 0;
 }
 
 char *
 prompt(EditLine *el)
 {
     static char pstring[64];
     snprintf(pstring, sizeof(pstring), "fsdb (inum: %ju)> ",
 	(uintmax_t)curinum);
     return pstring;
 }
 
 
 int
 cmdloop(void)
 {
     char *line;
     const char *elline;
     int cmd_argc, rval = 0, known;
 #define scratch known
     char **cmd_argv;
     struct cmdtable *cmdp;
     History *hist;
     EditLine *elptr;
     HistEvent he;
 
     curinode = ginode(ROOTINO);
     curinum = ROOTINO;
     printactive(0);
 
     hist = history_init();
     history(hist, &he, H_SETSIZE, 100);	/* 100 elt history buffer */
 
     elptr = el_init("fsdb", stdin, stdout, stderr);
     el_set(elptr, EL_EDITOR, "emacs");
     el_set(elptr, EL_PROMPT, prompt);
     el_set(elptr, EL_HIST, history, hist);
     el_source(elptr, NULL);
 
     while ((elline = el_gets(elptr, &scratch)) != NULL && scratch != 0) {
 	if (debug)
 	    printf("command `%s'\n", elline);
 
 	history(hist, &he, H_ENTER, elline);
 
 	line = strdup(elline);
 	cmd_argv = crack(line, &cmd_argc);
 	/*
 	 * el_parse returns -1 to signal that it's not been handled
 	 * internally.
 	 */
 	if (el_parse(elptr, cmd_argc, (const char **)cmd_argv) != -1)
 	    continue;
 	if (cmd_argc) {
 	    known = 0;
 	    for (cmdp = cmds; cmdp->cmd; cmdp++) {
 		if (!strcmp(cmdp->cmd, cmd_argv[0])) {
 		    if ((cmdp->flags & FL_WR) == FL_WR && nflag)
 			warnx("`%s' requires write access", cmd_argv[0]),
 			    rval = 1;
 		    else if (cmd_argc >= cmdp->minargc &&
 			cmd_argc <= cmdp->maxargc)
 			rval = (*cmdp->handler)(cmd_argc, cmd_argv);
 		    else if (cmd_argc >= cmdp->minargc &&
 			(cmdp->flags & FL_ST) == FL_ST) {
 			strcpy(line, elline);
 			cmd_argv = recrack(line, &cmd_argc, cmdp->maxargc);
 			rval = (*cmdp->handler)(cmd_argc, cmd_argv);
 		    } else
 			rval = argcount(cmdp, cmd_argc, cmd_argv);
 		    known = 1;
 		    break;
 		}
 	    }
 	    if (!known)
 		warnx("unknown command `%s'", cmd_argv[0]), rval = 1;
 	} else
 	    rval = 0;
 	free(line);
 	if (rval < 0)
 	    /* user typed "quit" */
 	    return 0;
 	if (rval)
 	    warnx("rval was %d", rval);
     }
     el_end(elptr);
     history_end(hist);
     return rval;
 }
 
 union dinode *curinode;
 ino_t curinum, ocurrent;
 
 #define GETINUM(ac,inum)    inum = strtoul(argv[ac], &cp, 0); \
     if (inum < ROOTINO || inum > maxino || cp == argv[ac] || *cp != '\0' ) { \
 	printf("inode %ju out of range; range is [%ju,%ju]\n",		\
 	    (uintmax_t)inum, (uintmax_t)ROOTINO, (uintmax_t)maxino);	\
 	return 1; \
     }
 
 /*
  * Focus on given inode number
  */
 CMDFUNCSTART(focus)
 {
     ino_t inum;
     char *cp;
 
     GETINUM(1,inum);
     curinode = ginode(inum);
     ocurrent = curinum;
     curinum = inum;
     printactive(0);
     return 0;
 }
 
 CMDFUNCSTART(back)
 {
     curinum = ocurrent;
     curinode = ginode(curinum);
     printactive(0);
     return 0;
 }
 
 CMDFUNCSTART(zapi)
 {
     ino_t inum;
     union dinode *dp;
     char *cp;
 
     GETINUM(1,inum);
     dp = ginode(inum);
     clearinode(dp);
     inodirty();
     if (curinode)			/* re-set after potential change */
 	curinode = ginode(curinum);
     return 0;
 }
 
 CMDFUNCSTART(active)
 {
     printactive(0);
     return 0;
 }
 
 CMDFUNCSTART(blocks)
 {
     printactive(1);
     return 0;
 }
 
 CMDFUNCSTART(quit)
 {
     return -1;
 }
 
 CMDFUNCSTART(uplink)
 {
     if (!checkactive())
 	return 1;
     DIP_SET(curinode, di_nlink, DIP(curinode, di_nlink) + 1);
     printf("inode %ju link count now %d\n",
 	(uintmax_t)curinum, DIP(curinode, di_nlink));
     inodirty();
     return 0;
 }
 
 CMDFUNCSTART(downlink)
 {
     if (!checkactive())
 	return 1;
     DIP_SET(curinode, di_nlink, DIP(curinode, di_nlink) - 1);
     printf("inode %ju link count now %d\n",
 	(uintmax_t)curinum, DIP(curinode, di_nlink));
     inodirty();
     return 0;
 }
 
 const char *typename[] = {
     "unknown",
     "fifo",
     "char special",
     "unregistered #3",
     "directory",
     "unregistered #5",
     "blk special",
     "unregistered #7",
     "regular",
     "unregistered #9",
     "symlink",
     "unregistered #11",
     "socket",
     "unregistered #13",
     "whiteout",
 };
 
 int diroff; 
 int slot;
 
 int
 scannames(struct inodesc *idesc)
 {
 	struct direct *dirp = idesc->id_dirp;
 
 	printf("slot %d off %d ino %d reclen %d: %s, `%.*s'\n",
 	       slot++, diroff, dirp->d_ino, dirp->d_reclen,
 	       typename[dirp->d_type], dirp->d_namlen, dirp->d_name);
 	diroff += dirp->d_reclen;
 	return (KEEPON);
 }
 
 CMDFUNCSTART(ls)
 {
     struct inodesc idesc;
     checkactivedir();			/* let it go on anyway */
 
     slot = 0;
     diroff = 0;
     idesc.id_number = curinum;
     idesc.id_func = scannames;
     idesc.id_type = DATA;
     idesc.id_fix = IGNORE;
     ckinode(curinode, &idesc);
     curinode = ginode(curinum);
 
     return 0;
 }
 
 static int findblk_numtofind;
 static int wantedblksize;
 
 CMDFUNCSTART(findblk)
 {
     ino_t inum, inosused;
     uint32_t *wantedblk32;
     uint64_t *wantedblk64;
     struct bufarea *cgbp;
     struct cg *cgp;
     int c, i, is_ufs2;
 
     wantedblksize = (argc - 1);
     is_ufs2 = sblock.fs_magic == FS_UFS2_MAGIC;
     ocurrent = curinum;
 
     if (is_ufs2) {
 	wantedblk64 = calloc(wantedblksize, sizeof(uint64_t));
 	if (wantedblk64 == NULL)
 	    err(1, "malloc");
 	for (i = 1; i < argc; i++)
 	    wantedblk64[i - 1] = dbtofsb(&sblock, strtoull(argv[i], NULL, 0));
     } else {
 	wantedblk32 = calloc(wantedblksize, sizeof(uint32_t));
 	if (wantedblk32 == NULL)
 	    err(1, "malloc");
 	for (i = 1; i < argc; i++)
 	    wantedblk32[i - 1] = dbtofsb(&sblock, strtoull(argv[i], NULL, 0));
     }
     findblk_numtofind = wantedblksize;
     /*
      * sblock.fs_ncg holds a number of cylinder groups.
      * Iterate over all cylinder groups.
      */
     for (c = 0; c < sblock.fs_ncg; c++) {
 	/*
 	 * sblock.fs_ipg holds a number of inodes per cylinder group.
 	 * Calculate a highest inode number for a given cylinder group.
 	 */
 	inum = c * sblock.fs_ipg;
 	/* Read cylinder group. */
 	cgbp = cgget(c);
 	cgp = cgbp->b_un.b_cg;
 	/*
 	 * Get a highest used inode number for a given cylinder group.
 	 * For UFS1 all inodes initialized at the newfs stage.
 	 */
 	if (is_ufs2)
 	    inosused = cgp->cg_initediblk;
 	else
 	    inosused = sblock.fs_ipg;
 
 	for (; inosused > 0; inum++, inosused--) {
 	    /* Skip magic inodes: 0, WINO, ROOTINO. */
 	    if (inum < ROOTINO)
 		continue;
 	    /*
 	     * Check if the block we are looking for is just an inode block.
 	     *
 	     * ino_to_fsba() - get block containing inode from its number.
 	     * INOPB() - get a number of inodes in one disk block.
 	     */
 	    if (is_ufs2 ?
 		compare_blk64(wantedblk64, ino_to_fsba(&sblock, inum)) :
 		compare_blk32(wantedblk32, ino_to_fsba(&sblock, inum))) {
 		printf("block %llu: inode block (%ju-%ju)\n",
 		    (unsigned long long)fsbtodb(&sblock,
 			ino_to_fsba(&sblock, inum)),
 		    (uintmax_t)(inum / INOPB(&sblock)) * INOPB(&sblock),
 		    (uintmax_t)(inum / INOPB(&sblock) + 1) * INOPB(&sblock));
 		findblk_numtofind--;
 		if (findblk_numtofind == 0)
 		    goto end;
 	    }
 	    /* Get on-disk inode aka dinode. */
 	    curinum = inum;
 	    curinode = ginode(inum);
 	    /* Find IFLNK dinode with allocated data blocks. */
 	    switch (DIP(curinode, di_mode) & IFMT) {
 	    case IFDIR:
 	    case IFREG:
 		if (DIP(curinode, di_blocks) == 0)
 		    continue;
 		break;
 	    case IFLNK:
 		{
 		    uint64_t size = DIP(curinode, di_size);
 		    if (size > 0 && size < sblock.fs_maxsymlinklen &&
 			DIP(curinode, di_blocks) == 0)
 			continue;
 		    else
 			break;
 		}
 	    default:
 		continue;
 	    }
 	    /* Look through direct data blocks. */
 	    if (is_ufs2 ?
 		find_blks64(curinode->dp2.di_db, NDADDR, wantedblk64) :
 		find_blks32(curinode->dp1.di_db, NDADDR, wantedblk32))
 		goto end;
 	    for (i = 0; i < NIADDR; i++) {
 		/*
 		 * Does the block we are looking for belongs to the
 		 * indirect blocks?
 		 */
 		if (is_ufs2 ?
 		    compare_blk64(wantedblk64, curinode->dp2.di_ib[i]) :
 		    compare_blk32(wantedblk32, curinode->dp1.di_ib[i]))
 		    if (founddatablk(is_ufs2 ? curinode->dp2.di_ib[i] :
 			curinode->dp1.di_ib[i]))
 			goto end;
 		/*
 		 * Search through indirect, double and triple indirect
 		 * data blocks.
 		 */
 		if (is_ufs2 ? (curinode->dp2.di_ib[i] != 0) :
 		    (curinode->dp1.di_ib[i] != 0))
 		    if (is_ufs2 ?
 			find_indirblks64(curinode->dp2.di_ib[i], i,
 			    wantedblk64) :
 			find_indirblks32(curinode->dp1.di_ib[i], i,
 			    wantedblk32))
 			goto end;
 	    }
 	}
     }
 end:
     curinum = ocurrent;
     curinode = ginode(curinum);
     return 0;
 }
 
 static int
 compare_blk32(uint32_t *wantedblk, uint32_t curblk)
 {
     int i;
 
     for (i = 0; i < wantedblksize; i++) {
 	if (wantedblk[i] != 0 && wantedblk[i] == curblk) {
 	    wantedblk[i] = 0;
 	    return 1;
 	}
     }
     return 0;
 }
 
 static int
 compare_blk64(uint64_t *wantedblk, uint64_t curblk)
 {
     int i;
 
     for (i = 0; i < wantedblksize; i++) {
 	if (wantedblk[i] != 0 && wantedblk[i] == curblk) {
 	    wantedblk[i] = 0;
 	    return 1;
 	}
     }
     return 0;
 }
 
 static int
 founddatablk(uint64_t blk)
 {
 
     printf("%llu: data block of inode %ju\n",
 	(unsigned long long)fsbtodb(&sblock, blk), (uintmax_t)curinum);
     findblk_numtofind--;
     if (findblk_numtofind == 0)
 	return 1;
     return 0;
 }
 
 static int
 find_blks32(uint32_t *buf, int size, uint32_t *wantedblk)
 {
     int blk;
     for (blk = 0; blk < size; blk++) {
 	if (buf[blk] == 0)
 	    continue;
 	if (compare_blk32(wantedblk, buf[blk])) {
 	    if (founddatablk(buf[blk]))
 		return 1;
 	}
     }
     return 0;
 }
 
 static int
 find_indirblks32(uint32_t blk, int ind_level, uint32_t *wantedblk)
 {
 #define MAXNINDIR      (MAXBSIZE / sizeof(uint32_t))
     uint32_t idblk[MAXNINDIR];
     int i;
 
     blread(fsreadfd, (char *)idblk, fsbtodb(&sblock, blk), (int)sblock.fs_bsize);
     if (ind_level <= 0) {
 	if (find_blks32(idblk, sblock.fs_bsize / sizeof(uint32_t), wantedblk))
 	    return 1;
     } else {
 	ind_level--;
 	for (i = 0; i < sblock.fs_bsize / sizeof(uint32_t); i++) {
 	    if (compare_blk32(wantedblk, idblk[i])) {
 		if (founddatablk(idblk[i]))
 		    return 1;
 	    }
 	    if (idblk[i] != 0)
 		if (find_indirblks32(idblk[i], ind_level, wantedblk))
 		    return 1;
 	}
     }
 #undef MAXNINDIR
     return 0;
 }
 
 static int
 find_blks64(uint64_t *buf, int size, uint64_t *wantedblk)
 {
     int blk;
     for (blk = 0; blk < size; blk++) {
 	if (buf[blk] == 0)
 	    continue;
 	if (compare_blk64(wantedblk, buf[blk])) {
 	    if (founddatablk(buf[blk]))
 		return 1;
 	}
     }
     return 0;
 }
 
 static int
 find_indirblks64(uint64_t blk, int ind_level, uint64_t *wantedblk)
 {
 #define MAXNINDIR      (MAXBSIZE / sizeof(uint64_t))
     uint64_t idblk[MAXNINDIR];
     int i;
 
     blread(fsreadfd, (char *)idblk, fsbtodb(&sblock, blk), (int)sblock.fs_bsize);
     if (ind_level <= 0) {
 	if (find_blks64(idblk, sblock.fs_bsize / sizeof(uint64_t), wantedblk))
 	    return 1;
     } else {
 	ind_level--;
 	for (i = 0; i < sblock.fs_bsize / sizeof(uint64_t); i++) {
 	    if (compare_blk64(wantedblk, idblk[i])) {
 		if (founddatablk(idblk[i]))
 		    return 1;
 	    }
 	    if (idblk[i] != 0)
 		if (find_indirblks64(idblk[i], ind_level, wantedblk))
 		    return 1;
 	}
     }
 #undef MAXNINDIR
     return 0;
 }
 
 int findino(struct inodesc *idesc); /* from fsck */
 static int dolookup(char *name);
 
 static int
 dolookup(char *name)
 {
     struct inodesc idesc;
 
     if (!checkactivedir())
 	    return 0;
     idesc.id_number = curinum;
     idesc.id_func = findino;
     idesc.id_name = name;
     idesc.id_type = DATA;
     idesc.id_fix = IGNORE;
     if (ckinode(curinode, &idesc) & FOUND) {
 	curinum = idesc.id_parent;
 	curinode = ginode(curinum);
 	printactive(0);
 	return 1;
     } else {
 	warnx("name `%s' not found in current inode directory", name);
 	return 0;
     }
 }
 
 CMDFUNCSTART(focusname)
 {
     char *p, *val;
 
     if (!checkactive())
 	return 1;
 
     ocurrent = curinum;
     
     if (argv[1][0] == '/') {
 	curinum = ROOTINO;
 	curinode = ginode(ROOTINO);
     } else {
 	if (!checkactivedir())
 	    return 1;
     }
     for (p = argv[1]; p != NULL;) {
 	while ((val = strsep(&p, "/")) != NULL && *val == '\0');
 	if (val) {
 	    printf("component `%s': ", val);
 	    fflush(stdout);
 	    if (!dolookup(val)) {
 		curinode = ginode(curinum);
 		return(1);
 	    }
 	}
     }
     return 0;
 }
 
 CMDFUNCSTART(ln)
 {
     ino_t inum;
     int rval;
     char *cp;
 
     GETINUM(1,inum);
 
     if (!checkactivedir())
 	return 1;
     rval = makeentry(curinum, inum, argv[2]);
     if (rval)
 	    printf("Ino %ju entered as `%s'\n", (uintmax_t)inum, argv[2]);
     else
 	printf("could not enter name? weird.\n");
     curinode = ginode(curinum);
     return rval;
 }
 
 CMDFUNCSTART(rm)
 {
     int rval;
 
     if (!checkactivedir())
 	return 1;
     rval = changeino(curinum, argv[1], 0);
     if (rval & ALTERED) {
 	printf("Name `%s' removed\n", argv[1]);
 	return 0;
     } else {
 	printf("could not remove name ('%s')? weird.\n", argv[1]);
 	return 1;
     }
 }
 
 long slotcount, desired;
 
 int
 chinumfunc(struct inodesc *idesc)
 {
 	struct direct *dirp = idesc->id_dirp;
 
 	if (slotcount++ == desired) {
 	    dirp->d_ino = idesc->id_parent;
 	    return STOP|ALTERED|FOUND;
 	}
 	return KEEPON;
 }
 
 CMDFUNCSTART(chinum)
 {
     char *cp;
     ino_t inum;
     struct inodesc idesc;
     
     slotcount = 0;
     if (!checkactivedir())
 	return 1;
     GETINUM(2,inum);
 
     desired = strtol(argv[1], &cp, 0);
     if (cp == argv[1] || *cp != '\0' || desired < 0) {
 	printf("invalid slot number `%s'\n", argv[1]);
 	return 1;
     }
 
     idesc.id_number = curinum;
     idesc.id_func = chinumfunc;
     idesc.id_fix = IGNORE;
     idesc.id_type = DATA;
     idesc.id_parent = inum;		/* XXX convenient hiding place */
 
     if (ckinode(curinode, &idesc) & FOUND)
 	return 0;
     else {
 	warnx("no %sth slot in current directory", argv[1]);
 	return 1;
     }
 }
 
 int
 chnamefunc(struct inodesc *idesc)
 {
 	struct direct *dirp = idesc->id_dirp;
 	struct direct testdir;
 
 	if (slotcount++ == desired) {
 	    /* will name fit? */
 	    testdir.d_namlen = strlen(idesc->id_name);
 	    if (DIRSIZ(NEWDIRFMT, &testdir) <= dirp->d_reclen) {
 		dirp->d_namlen = testdir.d_namlen;
 		strcpy(dirp->d_name, idesc->id_name);
 		return STOP|ALTERED|FOUND;
 	    } else
 		return STOP|FOUND;	/* won't fit, so give up */
 	}
 	return KEEPON;
 }
 
 CMDFUNCSTART(chname)
 {
     int rval;
     char *cp;
     struct inodesc idesc;
     
     slotcount = 0;
     if (!checkactivedir())
 	return 1;
 
     desired = strtoul(argv[1], &cp, 0);
     if (cp == argv[1] || *cp != '\0') {
 	printf("invalid slot number `%s'\n", argv[1]);
 	return 1;
     }
 
     idesc.id_number = curinum;
     idesc.id_func = chnamefunc;
     idesc.id_fix = IGNORE;
     idesc.id_type = DATA;
     idesc.id_name = argv[2];
 
     rval = ckinode(curinode, &idesc);
     if ((rval & (FOUND|ALTERED)) == (FOUND|ALTERED))
 	return 0;
     else if (rval & FOUND) {
 	warnx("new name `%s' does not fit in slot %s\n", argv[2], argv[1]);
 	return 1;
     } else {
 	warnx("no %sth slot in current directory", argv[1]);
 	return 1;
     }
 }
 
 struct typemap {
     const char *typename;
     int typebits;
 } typenamemap[]  = {
     {"file", IFREG},
     {"dir", IFDIR},
     {"socket", IFSOCK},
     {"fifo", IFIFO},
 };
 
 CMDFUNCSTART(newtype)
 {
     int type;
     struct typemap *tp;
 
     if (!checkactive())
 	return 1;
     type = DIP(curinode, di_mode) & IFMT;
     for (tp = typenamemap;
-	 tp < &typenamemap[sizeof(typenamemap)/sizeof(*typenamemap)];
+	 tp < &typenamemap[nitems(typenamemap)];
 	 tp++) {
 	if (!strcmp(argv[1], tp->typename)) {
 	    printf("setting type to %s\n", tp->typename);
 	    type = tp->typebits;
 	    break;
 	}
     }
-    if (tp == &typenamemap[sizeof(typenamemap)/sizeof(*typenamemap)]) {
+    if (tp == &typenamemap[nitems(typenamemap)]) {
 	warnx("type `%s' not known", argv[1]);
 	warnx("try one of `file', `dir', `socket', `fifo'");
 	return 1;
     }
     DIP_SET(curinode, di_mode, DIP(curinode, di_mode) & ~IFMT);
     DIP_SET(curinode, di_mode, DIP(curinode, di_mode) | type);
     inodirty();
     printactive(0);
     return 0;
 }
 
 CMDFUNCSTART(chlen)
 {
     int rval = 1;
     long len;
     char *cp;
 
     if (!checkactive())
 	return 1;
 
     len = strtol(argv[1], &cp, 0);
     if (cp == argv[1] || *cp != '\0' || len < 0) { 
 	warnx("bad length `%s'", argv[1]);
 	return 1;
     }
     
     DIP_SET(curinode, di_size, len);
     inodirty();
     printactive(0);
     return rval;
 }
 
 CMDFUNCSTART(chmode)
 {
     int rval = 1;
     long modebits;
     char *cp;
 
     if (!checkactive())
 	return 1;
 
     modebits = strtol(argv[1], &cp, 8);
     if (cp == argv[1] || *cp != '\0' || (modebits & ~07777)) { 
 	warnx("bad modebits `%s'", argv[1]);
 	return 1;
     }
     
     DIP_SET(curinode, di_mode, DIP(curinode, di_mode) & ~07777);
     DIP_SET(curinode, di_mode, DIP(curinode, di_mode) | modebits);
     inodirty();
     printactive(0);
     return rval;
 }
 
 CMDFUNCSTART(chaflags)
 {
     int rval = 1;
     u_long flags;
     char *cp;
 
     if (!checkactive())
 	return 1;
 
     flags = strtoul(argv[1], &cp, 0);
     if (cp == argv[1] || *cp != '\0' ) { 
 	warnx("bad flags `%s'", argv[1]);
 	return 1;
     }
     
     if (flags > UINT_MAX) {
 	warnx("flags set beyond 32-bit range of field (%lx)\n", flags);
 	return(1);
     }
     DIP_SET(curinode, di_flags, flags);
     inodirty();
     printactive(0);
     return rval;
 }
 
 CMDFUNCSTART(chgen)
 {
     int rval = 1;
     long gen;
     char *cp;
 
     if (!checkactive())
 	return 1;
 
     gen = strtol(argv[1], &cp, 0);
     if (cp == argv[1] || *cp != '\0' ) { 
 	warnx("bad gen `%s'", argv[1]);
 	return 1;
     }
     
     if (gen > INT_MAX || gen < INT_MIN) {
 	warnx("gen set beyond 32-bit range of field (%lx)\n", gen);
 	return(1);
     }
     DIP_SET(curinode, di_gen, gen);
     inodirty();
     printactive(0);
     return rval;
 }
 
 CMDFUNCSTART(linkcount)
 {
     int rval = 1;
     int lcnt;
     char *cp;
 
     if (!checkactive())
 	return 1;
 
     lcnt = strtol(argv[1], &cp, 0);
     if (cp == argv[1] || *cp != '\0' ) { 
 	warnx("bad link count `%s'", argv[1]);
 	return 1;
     }
     if (lcnt > USHRT_MAX || lcnt < 0) {
 	warnx("max link count is %d\n", USHRT_MAX);
 	return 1;
     }
     
     DIP_SET(curinode, di_nlink, lcnt);
     inodirty();
     printactive(0);
     return rval;
 }
 
 CMDFUNCSTART(chowner)
 {
     int rval = 1;
     unsigned long uid;
     char *cp;
     struct passwd *pwd;
 
     if (!checkactive())
 	return 1;
 
     uid = strtoul(argv[1], &cp, 0);
     if (cp == argv[1] || *cp != '\0' ) { 
 	/* try looking up name */
 	if ((pwd = getpwnam(argv[1]))) {
 	    uid = pwd->pw_uid;
 	} else {
 	    warnx("bad uid `%s'", argv[1]);
 	    return 1;
 	}
     }
     
     DIP_SET(curinode, di_uid, uid);
     inodirty();
     printactive(0);
     return rval;
 }
 
 CMDFUNCSTART(chgroup)
 {
     int rval = 1;
     unsigned long gid;
     char *cp;
     struct group *grp;
 
     if (!checkactive())
 	return 1;
 
     gid = strtoul(argv[1], &cp, 0);
     if (cp == argv[1] || *cp != '\0' ) { 
 	if ((grp = getgrnam(argv[1]))) {
 	    gid = grp->gr_gid;
 	} else {
 	    warnx("bad gid `%s'", argv[1]);
 	    return 1;
 	}
     }
     
     DIP_SET(curinode, di_gid, gid);
     inodirty();
     printactive(0);
     return rval;
 }
 
 int
 dotime(char *name, time_t *secp, int32_t *nsecp)
 {
     char *p, *val;
     struct tm t;
     int32_t nsec;
     p = strchr(name, '.');
     if (p) {
 	*p = '\0';
 	nsec = strtoul(++p, &val, 0);
 	if (val == p || *val != '\0' || nsec >= 1000000000 || nsec < 0) {
 		warnx("invalid nanoseconds");
 		goto badformat;
 	}
     } else
 	nsec = 0;
     if (strlen(name) != 14) {
 badformat:
 	warnx("date format: YYYYMMDDHHMMSS[.nsec]");
 	return 1;
     }
     *nsecp = nsec;
 
     for (p = name; *p; p++)
 	if (*p < '0' || *p > '9')
 	    goto badformat;
     
     p = name;
 #define VAL() ((*p++) - '0')
     t.tm_year = VAL();
     t.tm_year = VAL() + t.tm_year * 10;
     t.tm_year = VAL() + t.tm_year * 10;
     t.tm_year = VAL() + t.tm_year * 10 - 1900;
     t.tm_mon = VAL();
     t.tm_mon = VAL() + t.tm_mon * 10 - 1;
     t.tm_mday = VAL();
     t.tm_mday = VAL() + t.tm_mday * 10;
     t.tm_hour = VAL();
     t.tm_hour = VAL() + t.tm_hour * 10;
     t.tm_min = VAL();
     t.tm_min = VAL() + t.tm_min * 10;
     t.tm_sec = VAL();
     t.tm_sec = VAL() + t.tm_sec * 10;
     t.tm_isdst = -1;
 
     *secp = mktime(&t);
     if (*secp == -1) {
 	warnx("date/time out of range");
 	return 1;
     }
     return 0;
 }
 
 CMDFUNCSTART(chbtime)
 {
     time_t secs;
     int32_t nsecs;
 
     if (dotime(argv[1], &secs, &nsecs))
 	return 1;
     if (sblock.fs_magic == FS_UFS1_MAGIC)
 	return 1;
     curinode->dp2.di_birthtime = _time_to_time64(secs);
     curinode->dp2.di_birthnsec = nsecs;
     inodirty();
     printactive(0);
     return 0;
 }
 
 CMDFUNCSTART(chmtime)
 {
     time_t secs;
     int32_t nsecs;
 
     if (dotime(argv[1], &secs, &nsecs))
 	return 1;
     if (sblock.fs_magic == FS_UFS1_MAGIC)
 	curinode->dp1.di_mtime = _time_to_time32(secs);
     else
 	curinode->dp2.di_mtime = _time_to_time64(secs);
     DIP_SET(curinode, di_mtimensec, nsecs);
     inodirty();
     printactive(0);
     return 0;
 }
 
 CMDFUNCSTART(chatime)
 {
     time_t secs;
     int32_t nsecs;
 
     if (dotime(argv[1], &secs, &nsecs))
 	return 1;
     if (sblock.fs_magic == FS_UFS1_MAGIC)
 	curinode->dp1.di_atime = _time_to_time32(secs);
     else
 	curinode->dp2.di_atime = _time_to_time64(secs);
     DIP_SET(curinode, di_atimensec, nsecs);
     inodirty();
     printactive(0);
     return 0;
 }
 
 CMDFUNCSTART(chctime)
 {
     time_t secs;
     int32_t nsecs;
 
     if (dotime(argv[1], &secs, &nsecs))
 	return 1;
     if (sblock.fs_magic == FS_UFS1_MAGIC)
 	curinode->dp1.di_ctime = _time_to_time32(secs);
     else
 	curinode->dp2.di_ctime = _time_to_time64(secs);
     DIP_SET(curinode, di_ctimensec, nsecs);
     inodirty();
     printactive(0);
     return 0;
 }
Index: user/alc/PQ_LAUNDRY/sbin/ipfw/tables.c
===================================================================
--- user/alc/PQ_LAUNDRY/sbin/ipfw/tables.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sbin/ipfw/tables.c	(revision 303642)
@@ -1,1995 +1,2007 @@
 /*
  * Copyright (c) 2014 Yandex LLC
  * Copyright (c) 2014 Alexander V. Chernikov
  *
  * Redistribution and use in source forms, with and without modification,
  * are permitted provided that this entire comment appears intact.
  *
  * Redistribution in binary form may occur without any restrictions.
  * Obviously, it would be nice if you gave credit where credit is due
  * but requiring it would be too onerous.
  *
  * This software is provided ``AS IS'' without any warranties of any kind.
  *
  * in-kernel ipfw tables support.
  *
  * $FreeBSD$
  */
 
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <netdb.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sysexits.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/ip_fw.h>
 #include <arpa/inet.h>
 #include <netdb.h>
 
 #include "ipfw2.h"
 
 static void table_modify_record(ipfw_obj_header *oh, int ac, char *av[],
     int add, int quiet, int update, int atomic);
 static int table_flush(ipfw_obj_header *oh);
 static int table_destroy(ipfw_obj_header *oh);
 static int table_do_create(ipfw_obj_header *oh, ipfw_xtable_info *i);
 static int table_do_modify(ipfw_obj_header *oh, ipfw_xtable_info *i);
 static int table_do_swap(ipfw_obj_header *oh, char *second);
 static void table_create(ipfw_obj_header *oh, int ac, char *av[]);
 static void table_modify(ipfw_obj_header *oh, int ac, char *av[]);
 static void table_lookup(ipfw_obj_header *oh, int ac, char *av[]);
 static void table_lock(ipfw_obj_header *oh, int lock);
 static int table_swap(ipfw_obj_header *oh, char *second);
 static int table_get_info(ipfw_obj_header *oh, ipfw_xtable_info *i);
 static int table_show_info(ipfw_xtable_info *i, void *arg);
 static void table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name,
     uint32_t set, uint16_t uidx);
 
 static int table_flush_one(ipfw_xtable_info *i, void *arg);
 static int table_show_one(ipfw_xtable_info *i, void *arg);
 static int table_do_get_list(ipfw_xtable_info *i, ipfw_obj_header **poh);
 static void table_show_list(ipfw_obj_header *oh, int need_header);
 static void table_show_entry(ipfw_xtable_info *i, ipfw_obj_tentry *tent);
 
 static void tentry_fill_key(ipfw_obj_header *oh, ipfw_obj_tentry *tent,
     char *key, int add, uint8_t *ptype, uint32_t *pvmask, ipfw_xtable_info *xi);
 static void tentry_fill_value(ipfw_obj_header *oh, ipfw_obj_tentry *tent,
     char *arg, uint8_t type, uint32_t vmask);
 static void table_show_value(char *buf, size_t bufsize, ipfw_table_value *v,
     uint32_t vmask, int print_ip);
 
 typedef int (table_cb_t)(ipfw_xtable_info *i, void *arg);
 static int tables_foreach(table_cb_t *f, void *arg, int sort);
 
 #ifndef s6_addr32
 #define s6_addr32 __u6_addr.__u6_addr32
 #endif
 
 static struct _s_x tabletypes[] = {
       { "addr",		IPFW_TABLE_ADDR },
       { "iface",	IPFW_TABLE_INTERFACE },
       { "number",	IPFW_TABLE_NUMBER },
       { "flow",		IPFW_TABLE_FLOW },
       { NULL, 0 }
 };
 
 static struct _s_x tablevaltypes[] = {
       { "skipto",	IPFW_VTYPE_SKIPTO },
       { "pipe",		IPFW_VTYPE_PIPE },
       { "fib",		IPFW_VTYPE_FIB },
       { "nat",		IPFW_VTYPE_NAT },
       { "dscp",		IPFW_VTYPE_DSCP },
       { "tag",		IPFW_VTYPE_TAG },
       { "divert",	IPFW_VTYPE_DIVERT },
       { "netgraph",	IPFW_VTYPE_NETGRAPH },
       { "limit",	IPFW_VTYPE_LIMIT },
       { "ipv4",		IPFW_VTYPE_NH4 },
       { "ipv6",		IPFW_VTYPE_NH6 },
       { NULL, 0 }
 };
 
 static struct _s_x tablecmds[] = {
       { "add",		TOK_ADD },
       { "delete",	TOK_DEL },
       { "create",	TOK_CREATE },
       { "destroy",	TOK_DESTROY },
       { "flush",	TOK_FLUSH },
       { "modify",	TOK_MODIFY },
       { "swap",		TOK_SWAP },
       { "info",		TOK_INFO },
       { "detail",	TOK_DETAIL },
       { "list",		TOK_LIST },
       { "lookup",	TOK_LOOKUP },
       { "atomic",	TOK_ATOMIC },
       { "lock",		TOK_LOCK },
       { "unlock",	TOK_UNLOCK },
       { NULL, 0 }
 };
 
 static int
 lookup_host (char *host, struct in_addr *ipaddr)
 {
 	struct hostent *he;
 
 	if (!inet_aton(host, ipaddr)) {
 		if ((he = gethostbyname(host)) == NULL)
 			return(-1);
 		*ipaddr = *(struct in_addr *)he->h_addr_list[0];
 	}
 	return(0);
 }
 
 /*
  * This one handles all table-related commands
  * 	ipfw table NAME create ...
  * 	ipfw table NAME modify ...
  * 	ipfw table NAME destroy
  * 	ipfw table NAME swap NAME
  * 	ipfw table NAME lock
  * 	ipfw table NAME unlock
  * 	ipfw table NAME add addr[/masklen] [value] 
  * 	ipfw table NAME add [addr[/masklen] value] [addr[/masklen] value] ..
  * 	ipfw table NAME delete addr[/masklen] [addr[/masklen]] ..
  * 	ipfw table NAME lookup addr
  * 	ipfw table {NAME | all} flush
  * 	ipfw table {NAME | all} list
  * 	ipfw table {NAME | all} info
  * 	ipfw table {NAME | all} detail
  */
 void
 ipfw_table_handler(int ac, char *av[])
 {
 	int do_add, is_all;
 	int atomic, error, tcmd;
 	ipfw_xtable_info i;
 	ipfw_obj_header oh;
 	char *tablename;
 	uint32_t set;
 	void *arg;
 
 	memset(&oh, 0, sizeof(oh));
 	is_all = 0;
 	if (co.use_set != 0)
 		set = co.use_set - 1;
 	else
 		set = 0;
 
 	ac--; av++;
 	NEED1("table needs name");
 	tablename = *av;
 
 	if (table_check_name(tablename) == 0) {
 		table_fill_ntlv(&oh.ntlv, *av, set, 1);
 		oh.idx = 1;
 	} else {
 		if (strcmp(tablename, "all") == 0)
 			is_all = 1;
 		else
 			errx(EX_USAGE, "table name %s is invalid", tablename);
 	}
 	ac--; av++;
 	NEED1("table needs command");
 
 	tcmd = get_token(tablecmds, *av, "table command");
 	/* Check if atomic operation was requested */
 	atomic = 0;
 	if (tcmd == TOK_ATOMIC) {
 		ac--; av++;
 		NEED1("atomic needs command");
 		tcmd = get_token(tablecmds, *av, "table command");
 		switch (tcmd) {
 		case TOK_ADD:
 			break;
 		default:
 			errx(EX_USAGE, "atomic is not compatible with %s", *av);
 		}
 		atomic = 1;
 	}
 
 	switch (tcmd) {
 	case TOK_LIST:
 	case TOK_INFO:
 	case TOK_DETAIL:
 	case TOK_FLUSH:
 		break;
 	default:
 		if (is_all != 0)
 			errx(EX_USAGE, "table name required");
 	}
 
 	switch (tcmd) {
 	case TOK_ADD:
 	case TOK_DEL:
 		do_add = **av == 'a';
 		ac--; av++;
 		table_modify_record(&oh, ac, av, do_add, co.do_quiet,
 		    co.do_quiet, atomic);
 		break;
 	case TOK_CREATE:
 		ac--; av++;
 		table_create(&oh, ac, av);
 		break;
 	case TOK_MODIFY:
 		ac--; av++;
 		table_modify(&oh, ac, av);
 		break;
 	case TOK_DESTROY:
-		if (table_destroy(&oh) != 0)
+		if (table_destroy(&oh) == 0)
+			break;
+		if (errno != ESRCH)
 			err(EX_OSERR, "failed to destroy table %s", tablename);
+		/* ESRCH isn't fatal, warn if not quiet mode */
+		if (co.do_quiet == 0)
+			warn("failed to destroy table %s", tablename);
 		break;
 	case TOK_FLUSH:
 		if (is_all == 0) {
-			if ((error = table_flush(&oh)) != 0)
+			if ((error = table_flush(&oh)) == 0)
+				break;
+			if (errno != ESRCH)
 				err(EX_OSERR, "failed to flush table %s info",
 				    tablename);
+			/* ESRCH isn't fatal, warn if not quiet mode */
+			if (co.do_quiet == 0)
+				warn("failed to flush table %s info",
+				    tablename);
 		} else {
 			error = tables_foreach(table_flush_one, &oh, 1);
 			if (error != 0)
 				err(EX_OSERR, "failed to flush tables list");
+			/* XXX: we ignore errors here */
 		}
 		break;
 	case TOK_SWAP:
 		ac--; av++;
 		NEED1("second table name required");
 		table_swap(&oh, *av);
 		break;
 	case TOK_LOCK:
 	case TOK_UNLOCK:
 		table_lock(&oh, (tcmd == TOK_LOCK));
 		break;
 	case TOK_DETAIL:
 	case TOK_INFO:
 		arg = (tcmd == TOK_DETAIL) ? (void *)1 : NULL;
 		if (is_all == 0) {
 			if ((error = table_get_info(&oh, &i)) != 0)
 				err(EX_OSERR, "failed to request table info");
 			table_show_info(&i, arg);
 		} else {
 			error = tables_foreach(table_show_info, arg, 1);
 			if (error != 0)
 				err(EX_OSERR, "failed to request tables list");
 		}
 		break;
 	case TOK_LIST:
 		if (is_all == 0) {
 			ipfw_xtable_info i;
 			if ((error = table_get_info(&oh, &i)) != 0)
 				err(EX_OSERR, "failed to request table info");
 			table_show_one(&i, NULL);
 		} else {
 			error = tables_foreach(table_show_one, NULL, 1);
 			if (error != 0)
 				err(EX_OSERR, "failed to request tables list");
 		}
 		break;
 	case TOK_LOOKUP:
 		ac--; av++;
 		table_lookup(&oh, ac, av);
 		break;
 	}
 }
 
 static void
 table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint32_t set,
     uint16_t uidx)
 {
 
 	ntlv->head.type = IPFW_TLV_TBL_NAME;
 	ntlv->head.length = sizeof(ipfw_obj_ntlv);
 	ntlv->idx = uidx;
 	ntlv->set = set;
 	strlcpy(ntlv->name, name, sizeof(ntlv->name));
 }
 
 static void
 table_fill_objheader(ipfw_obj_header *oh, ipfw_xtable_info *i)
 {
 
 	oh->idx = 1;
 	table_fill_ntlv(&oh->ntlv, i->tablename, i->set, 1);
 }
 
 static struct _s_x tablenewcmds[] = {
       { "type",		TOK_TYPE },
       { "valtype",	TOK_VALTYPE },
       { "algo",		TOK_ALGO },
       { "limit",	TOK_LIMIT },
       { "locked",	TOK_LOCK },
       { NULL, 0 }
 };
 
 static struct _s_x flowtypecmds[] = {
       { "src-ip",	IPFW_TFFLAG_SRCIP },
       { "proto",	IPFW_TFFLAG_PROTO },
       { "src-port",	IPFW_TFFLAG_SRCPORT },
       { "dst-ip",	IPFW_TFFLAG_DSTIP },
       { "dst-port",	IPFW_TFFLAG_DSTPORT },
       { NULL, 0 }
 };
 
 int
 table_parse_type(uint8_t ttype, char *p, uint8_t *tflags)
 {
 	uint32_t fset, fclear;
 	char *e;
 
 	/* Parse type options */
 	switch(ttype) {
 	case IPFW_TABLE_FLOW:
 		fset = fclear = 0;
 		if (fill_flags(flowtypecmds, p, &e, &fset, &fclear) != 0)
 			errx(EX_USAGE,
 			    "unable to parse flow option %s", e);
 		*tflags = fset;
 		break;
 	default:
 		return (EX_USAGE);
 	}
 
 	return (0);
 }
 
 void
 table_print_type(char *tbuf, size_t size, uint8_t type, uint8_t tflags)
 {
 	const char *tname;
 	int l;
 
 	if ((tname = match_value(tabletypes, type)) == NULL)
 		tname = "unknown";
 
 	l = snprintf(tbuf, size, "%s", tname);
 	tbuf += l;
 	size -= l;
 
 	switch(type) {
 	case IPFW_TABLE_FLOW:
 		if (tflags != 0) {
 			*tbuf++ = ':';
 			l--;
 			print_flags_buffer(tbuf, size, flowtypecmds, tflags);
 		}
 		break;
 	}
 }
 
 /*
  * Creates new table
  *
  * ipfw table NAME create [ type { addr | iface | number | flow } ]
  *     [ algo algoname ]
  */
 static void
 table_create(ipfw_obj_header *oh, int ac, char *av[])
 {
 	ipfw_xtable_info xi;
 	int error, tcmd, val;
 	uint32_t fset, fclear;
 	char *e, *p;
 	char tbuf[128];
 
 	memset(&xi, 0, sizeof(xi));
 
 	while (ac > 0) {
 		tcmd = get_token(tablenewcmds, *av, "option");
 		ac--; av++;
 
 		switch (tcmd) {
 		case TOK_LIMIT:
 			NEED1("limit value required");
 			xi.limit = strtol(*av, NULL, 10);
 			ac--; av++;
 			break;
 		case TOK_TYPE:
 			NEED1("table type required");
 			/* Type may have suboptions after ':' */
 			if ((p = strchr(*av, ':')) != NULL)
 				*p++ = '\0';
 			val = match_token(tabletypes, *av);
 			if (val == -1) {
 				concat_tokens(tbuf, sizeof(tbuf), tabletypes,
 				    ", ");
 				errx(EX_USAGE,
 				    "Unknown tabletype: %s. Supported: %s",
 				    *av, tbuf);
 			}
 			xi.type = val;
 			if (p != NULL) {
 				error = table_parse_type(val, p, &xi.tflags);
 				if (error != 0)
 					errx(EX_USAGE,
 					    "Unsupported suboptions: %s", p);
 			}
 			ac--; av++;
 			break;
 		case TOK_VALTYPE:
 			NEED1("table value type required");
 			fset = fclear = 0;
 			val = fill_flags(tablevaltypes, *av, &e, &fset, &fclear);
 			if (val != -1) {
 				xi.vmask = fset;
 				ac--; av++;
 				break;
 			}
 			concat_tokens(tbuf, sizeof(tbuf), tablevaltypes, ", ");
 			errx(EX_USAGE, "Unknown value type: %s. Supported: %s",
 			    e, tbuf);
 			break;
 		case TOK_ALGO:
 			NEED1("table algorithm name required");
 			if (strlen(*av) > sizeof(xi.algoname))
 				errx(EX_USAGE, "algorithm name too long");
 			strlcpy(xi.algoname, *av, sizeof(xi.algoname));
 			ac--; av++;
 			break;
 		case TOK_LOCK:
 			xi.flags |= IPFW_TGFLAGS_LOCKED;
 			break;
 		}
 	}
 
 	/* Set some defaults to preserve compatibility. */
 	if (xi.algoname[0] == '\0' && xi.type == 0)
 		xi.type = IPFW_TABLE_ADDR;
 	if (xi.vmask == 0)
 		xi.vmask = IPFW_VTYPE_LEGACY;
 
 	if ((error = table_do_create(oh, &xi)) != 0)
 		err(EX_OSERR, "Table creation failed");
 }
 
 /*
  * Creates new table
  *
  * Request: [ ipfw_obj_header ipfw_xtable_info ]
  *
  * Returns 0 on success.
  */
 static int
 table_do_create(ipfw_obj_header *oh, ipfw_xtable_info *i)
 {
 	char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info)];
 	int error;
 
 	memcpy(tbuf, oh, sizeof(*oh));
 	memcpy(tbuf + sizeof(*oh), i, sizeof(*i));
 	oh = (ipfw_obj_header *)tbuf;
 
 	error = do_set3(IP_FW_TABLE_XCREATE, &oh->opheader, sizeof(tbuf));
 
 	return (error);
 }
 
 /*
  * Modifies existing table
  *
  * ipfw table NAME modify [ limit number ]
  */
 static void
 table_modify(ipfw_obj_header *oh, int ac, char *av[])
 {
 	ipfw_xtable_info xi;
 	int tcmd;
 
 	memset(&xi, 0, sizeof(xi));
 
 	while (ac > 0) {
 		tcmd = get_token(tablenewcmds, *av, "option");
 		ac--; av++;
 
 		switch (tcmd) {
 		case TOK_LIMIT:
 			NEED1("limit value required");
 			xi.limit = strtol(*av, NULL, 10);
 			xi.mflags |= IPFW_TMFLAGS_LIMIT;
 			ac--; av++;
 			break;
 		default:
 			errx(EX_USAGE, "cmd is not supported for modificatiob");
 		}
 	}
 
 	if (table_do_modify(oh, &xi) != 0)
 		err(EX_OSERR, "Table modification failed");
 }
 
 /*
  * Modifies existing table.
  *
  * Request: [ ipfw_obj_header ipfw_xtable_info ]
  *
  * Returns 0 on success.
  */
 static int
 table_do_modify(ipfw_obj_header *oh, ipfw_xtable_info *i)
 {
 	char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info)];
 	int error;
 
 	memcpy(tbuf, oh, sizeof(*oh));
 	memcpy(tbuf + sizeof(*oh), i, sizeof(*i));
 	oh = (ipfw_obj_header *)tbuf;
 
 	error = do_set3(IP_FW_TABLE_XMODIFY, &oh->opheader, sizeof(tbuf));
 
 	return (error);
 }
 
 /*
  * Locks or unlocks given table
  */
 static void
 table_lock(ipfw_obj_header *oh, int lock)
 {
 	ipfw_xtable_info xi;
 
 	memset(&xi, 0, sizeof(xi));
 
 	xi.mflags |= IPFW_TMFLAGS_LOCK;
 	xi.flags |= (lock != 0) ? IPFW_TGFLAGS_LOCKED : 0;
 
 	if (table_do_modify(oh, &xi) != 0)
 		err(EX_OSERR, "Table %s failed", lock != 0 ? "lock" : "unlock");
 }
 
 /*
  * Destroys given table specified by @oh->ntlv.
  * Returns 0 on success.
  */
 static int
 table_destroy(ipfw_obj_header *oh)
 {
 
 	if (do_set3(IP_FW_TABLE_XDESTROY, &oh->opheader, sizeof(*oh)) != 0)
 		return (-1);
 
 	return (0);
 }
 
 /*
  * Flushes given table specified by @oh->ntlv.
  * Returns 0 on success.
  */
 static int
 table_flush(ipfw_obj_header *oh)
 {
 
 	if (do_set3(IP_FW_TABLE_XFLUSH, &oh->opheader, sizeof(*oh)) != 0)
 		return (-1);
 
 	return (0);
 }
 
 static int
 table_do_swap(ipfw_obj_header *oh, char *second)
 {
 	char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ntlv)];
 	int error;
 
 	memset(tbuf, 0, sizeof(tbuf));
 	memcpy(tbuf, oh, sizeof(*oh));
 	oh = (ipfw_obj_header *)tbuf;
 	table_fill_ntlv((ipfw_obj_ntlv *)(oh + 1), second, oh->ntlv.set, 1);
 
 	error = do_set3(IP_FW_TABLE_XSWAP, &oh->opheader, sizeof(tbuf));
 
 	return (error);
 }
 
 /*
  * Swaps given table with @second one.
  */
 static int
 table_swap(ipfw_obj_header *oh, char *second)
 {
-	int error;
 
 	if (table_check_name(second) != 0)
 		errx(EX_USAGE, "table name %s is invalid", second);
 
-	error = table_do_swap(oh, second);
+	if (table_do_swap(oh, second) == 0)
+		return (0);
 
-	switch (error) {
+	switch (errno) {
 	case EINVAL:
 		errx(EX_USAGE, "Unable to swap table: check types");
 	case EFBIG:
 		errx(EX_USAGE, "Unable to swap table: check limits");
 	}
 
 	return (0);
 }
 
 
 /*
  * Retrieves table in given table specified by @oh->ntlv.
  * it inside @i.
  * Returns 0 on success.
  */
 static int
 table_get_info(ipfw_obj_header *oh, ipfw_xtable_info *i)
 {
 	char tbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info)];
 	size_t sz;
 
 	sz = sizeof(tbuf);
 	memset(tbuf, 0, sizeof(tbuf));
 	memcpy(tbuf, oh, sizeof(*oh));
 	oh = (ipfw_obj_header *)tbuf;
 
 	if (do_get3(IP_FW_TABLE_XINFO, &oh->opheader, &sz) != 0)
 		return (errno);
 
 	if (sz < sizeof(tbuf))
 		return (EINVAL);
 
 	*i = *(ipfw_xtable_info *)(oh + 1);
 
 	return (0);
 }
 
 static struct _s_x tablealgoclass[] = {
       { "hash",		IPFW_TACLASS_HASH },
       { "array",	IPFW_TACLASS_ARRAY },
       { "radix",	IPFW_TACLASS_RADIX },
       { NULL, 0 }
 };
 
 struct ta_cldata {
 	uint8_t		taclass;
 	uint8_t		spare4;
 	uint16_t	itemsize;
 	uint16_t	itemsize6;
 	uint32_t	size;	
 	uint32_t	count;
 };
 
 /*
  * Print global/per-AF table @i algorithm info.
  */
 static void
 table_show_tainfo(ipfw_xtable_info *i, struct ta_cldata *d,
     const char *af, const char *taclass)
 {
 
 	switch (d->taclass) {
 	case IPFW_TACLASS_HASH:
 	case IPFW_TACLASS_ARRAY:
 		printf(" %salgorithm %s info\n", af, taclass);
 		if (d->itemsize == d->itemsize6)
 			printf("  size: %u items: %u itemsize: %u\n",
 			    d->size, d->count, d->itemsize);
 		else
 			printf("  size: %u items: %u "
 			    "itemsize4: %u itemsize6: %u\n",
 			    d->size, d->count,
 			    d->itemsize, d->itemsize6);
 		break;
 	case IPFW_TACLASS_RADIX:
 		printf(" %salgorithm %s info\n", af, taclass);
 		if (d->itemsize == d->itemsize6)
 			printf("  items: %u itemsize: %u\n",
 			    d->count, d->itemsize);
 		else
 			printf("  items: %u "
 			    "itemsize4: %u itemsize6: %u\n",
 			    d->count, d->itemsize, d->itemsize6);
 		break;
 	default:
 		printf(" algo class: %s\n", taclass);
 	}
 }
 
 static void
 table_print_valheader(char *buf, size_t bufsize, uint32_t vmask)
 {
 
 	if (vmask == IPFW_VTYPE_LEGACY) {
 		snprintf(buf, bufsize, "legacy");
 		return;
 	}
 
 	memset(buf, 0, bufsize);
 	print_flags_buffer(buf, bufsize, tablevaltypes, vmask);
 }
 
 /*
  * Prints table info struct @i in human-readable form.
  */
 static int
 table_show_info(ipfw_xtable_info *i, void *arg)
 {
 	const char *vtype;
 	ipfw_ta_tinfo *tainfo;
 	int afdata, afitem;
 	struct ta_cldata d;
 	char ttype[64], tvtype[64];
 
 	table_print_type(ttype, sizeof(ttype), i->type, i->tflags);
 	table_print_valheader(tvtype, sizeof(tvtype), i->vmask);
 
 	printf("--- table(%s), set(%u) ---\n", i->tablename, i->set);
 	if ((i->flags & IPFW_TGFLAGS_LOCKED) != 0)
 		printf(" kindex: %d, type: %s, locked\n", i->kidx, ttype);
 	else
 		printf(" kindex: %d, type: %s\n", i->kidx, ttype);
 	printf(" references: %u, valtype: %s\n", i->refcnt, tvtype);
 	printf(" algorithm: %s\n", i->algoname);
 	printf(" items: %u, size: %u\n", i->count, i->size);
 	if (i->limit > 0)
 		printf(" limit: %u\n", i->limit);
 
 	/* Print algo-specific info if requested & set  */
 	if (arg == NULL)
 		return (0);
 
 	if ((i->ta_info.flags & IPFW_TATFLAGS_DATA) == 0)
 		return (0);
 	tainfo = &i->ta_info;
 
 	afdata = 0;
 	afitem = 0;
 	if (tainfo->flags & IPFW_TATFLAGS_AFDATA)
 		afdata = 1;
 	if (tainfo->flags & IPFW_TATFLAGS_AFITEM)
 		afitem = 1;
 
 	memset(&d, 0, sizeof(d));
 	d.taclass = tainfo->taclass4;
 	d.size = tainfo->size4;
 	d.count = tainfo->count4;
 	d.itemsize = tainfo->itemsize4;
 	if (afdata == 0 && afitem != 0)
 		d.itemsize6 = tainfo->itemsize6;
 	else
 		d.itemsize6 = d.itemsize;
 	if ((vtype = match_value(tablealgoclass, d.taclass)) == NULL)
 		vtype = "unknown";
 
 	if (afdata == 0) {
 		table_show_tainfo(i, &d, "", vtype);
 	} else {
 		table_show_tainfo(i, &d, "IPv4 ", vtype);
 		memset(&d, 0, sizeof(d));
 		d.taclass = tainfo->taclass6;
 		if ((vtype = match_value(tablealgoclass, d.taclass)) == NULL)
 			vtype = "unknown";
 		d.size = tainfo->size6;
 		d.count = tainfo->count6;
 		d.itemsize = tainfo->itemsize6;
 		d.itemsize6 = d.itemsize;
 		table_show_tainfo(i, &d, "IPv6 ", vtype);
 	}
 
 	return (0);
 }
 
 
 /*
  * Function wrappers which can be used either
  * as is or as foreach function parameter.
  */
 
 static int
 table_show_one(ipfw_xtable_info *i, void *arg)
 {
 	ipfw_obj_header *oh;
 	int error;
 
 	if ((error = table_do_get_list(i, &oh)) != 0) {
 		err(EX_OSERR, "Error requesting table %s list", i->tablename);
 		return (error);
 	}
 
 	table_show_list(oh, 1);
 
 	free(oh);
 	return (0);	
 }
 
 static int
 table_flush_one(ipfw_xtable_info *i, void *arg)
 {
 	ipfw_obj_header *oh;
 
 	oh = (ipfw_obj_header *)arg;
 
 	table_fill_ntlv(&oh->ntlv, i->tablename, i->set, 1);
 
 	return (table_flush(oh));
 }
 
 static int
 table_do_modify_record(int cmd, ipfw_obj_header *oh,
     ipfw_obj_tentry *tent, int count, int atomic)
 {
 	ipfw_obj_ctlv *ctlv;
 	ipfw_obj_tentry *tent_base;
 	caddr_t pbuf;
 	char xbuf[sizeof(*oh) + sizeof(ipfw_obj_ctlv) + sizeof(*tent)];
 	int error, i;
 	size_t sz;
 
 	sz = sizeof(*ctlv) + sizeof(*tent) * count;
 	if (count == 1) {
 		memset(xbuf, 0, sizeof(xbuf));
 		pbuf = xbuf;
 	} else {
 		if ((pbuf = calloc(1, sizeof(*oh) + sz)) == NULL)
 			return (ENOMEM);
 	}
 
 	memcpy(pbuf, oh, sizeof(*oh));
 	oh = (ipfw_obj_header *)pbuf;
 	oh->opheader.version = 1;
 
 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
 	ctlv->count = count;
 	ctlv->head.length = sz;
 	if (atomic != 0)
 		ctlv->flags |= IPFW_CTF_ATOMIC;
 
 	tent_base = tent;
 	memcpy(ctlv + 1, tent, sizeof(*tent) * count);
 	tent = (ipfw_obj_tentry *)(ctlv + 1);
 	for (i = 0; i < count; i++, tent++) {
 		tent->head.length = sizeof(ipfw_obj_tentry);
 		tent->idx = oh->idx;
 	}
 
 	sz += sizeof(*oh);
 	error = do_get3(cmd, &oh->opheader, &sz);
 	tent = (ipfw_obj_tentry *)(ctlv + 1);
 	/* Copy result back to provided buffer */
 	memcpy(tent_base, ctlv + 1, sizeof(*tent) * count);
 
 	if (pbuf != xbuf)
 		free(pbuf);
 
 	return (error);
 }
 
 static void
 table_modify_record(ipfw_obj_header *oh, int ac, char *av[], int add,
     int quiet, int update, int atomic)
 {
 	ipfw_obj_tentry *ptent, tent, *tent_buf;
 	ipfw_xtable_info xi;
 	uint8_t type;
 	uint32_t vmask;
 	int cmd, count, error, i, ignored;
 	char *texterr, *etxt, *px;
 
 	if (ac == 0)
 		errx(EX_USAGE, "address required");
 	
 	if (add != 0) {
 		cmd = IP_FW_TABLE_XADD;
 		texterr = "Adding record failed";
 	} else {
 		cmd = IP_FW_TABLE_XDEL;
 		texterr = "Deleting record failed";
 	}
 
 	/*
 	 * Calculate number of entries:
 	 * Assume [key val] x N for add
 	 * and
 	 * key x N for delete
 	 */
 	count = (add != 0) ? ac / 2 + 1 : ac;
 
 	if (count <= 1) {
 		/* Adding single entry with/without value */
 		memset(&tent, 0, sizeof(tent));
 		tent_buf = &tent;
 	} else {
 		
 		if ((tent_buf = calloc(count, sizeof(tent))) == NULL)
 			errx(EX_OSERR,
 			    "Unable to allocate memory for all entries");
 	}
 	ptent = tent_buf;
 
 	memset(&xi, 0, sizeof(xi));
 	count = 0;
 	while (ac > 0) {
 		tentry_fill_key(oh, ptent, *av, add, &type, &vmask, &xi);
 
 		/*
 		 * Compatibility layer: auto-create table if not exists.
 		 */
 		if (xi.tablename[0] == '\0') {
 			xi.type = type;
 			xi.vmask = vmask;
 			strlcpy(xi.tablename, oh->ntlv.name,
 			    sizeof(xi.tablename));
 			if (quiet == 0)
 				warnx("DEPRECATED: inserting data into "
 				    "non-existent table %s. (auto-created)",
 				    xi.tablename);
 			table_do_create(oh, &xi);
 		}
 	
 		oh->ntlv.type = type;
 		ac--; av++;
 	
 		if (add != 0 && ac > 0) {
 			tentry_fill_value(oh, ptent, *av, type, vmask);
 			ac--; av++;
 		}
 
 		if (update != 0)
 			ptent->head.flags |= IPFW_TF_UPDATE;
 
 		count++;
 		ptent++;
 	}
 
 	error = table_do_modify_record(cmd, oh, tent_buf, count, atomic);
 
 	/*
 	 * Compatibility stuff: do not yell on duplicate keys or
 	 * failed deletions.
 	 */
 	if (error == 0 || (error == EEXIST && add != 0) ||
 	    (error == ENOENT && add == 0)) {
 		if (quiet != 0) {
 			if (tent_buf != &tent)
 				free(tent_buf);
 			return;
 		}
 	}
 
 	/* Report results back */
 	ptent = tent_buf;
 	for (i = 0; i < count; ptent++, i++) {
 		ignored = 0;
 		switch (ptent->result) {
 		case IPFW_TR_ADDED:
 			px = "added";
 			break;
 		case IPFW_TR_DELETED:
 			px = "deleted";
 			break;
 		case IPFW_TR_UPDATED:
 			px = "updated";
 			break;
 		case IPFW_TR_LIMIT:
 			px = "limit";
 			ignored = 1;
 			break;
 		case IPFW_TR_ERROR:
 			px = "error";
 			ignored = 1;
 			break;
 		case IPFW_TR_NOTFOUND:
 			px = "notfound";
 			ignored = 1;
 			break;
 		case IPFW_TR_EXISTS:
 			px = "exists";
 			ignored = 1;
 			break;
 		case IPFW_TR_IGNORED:
 			px = "ignored";
 			ignored = 1;
 			break;
 		default:
 			px = "unknown";
 			ignored = 1;
 		}
 
 		if (error != 0 && atomic != 0 && ignored == 0)
 			printf("%s(reverted): ", px);
 		else
 			printf("%s: ", px);
 
 		table_show_entry(&xi, ptent);
 	}
 
 	if (tent_buf != &tent)
 		free(tent_buf);
 
 	if (error == 0)
 		return;
 	/* Get real OS error */
 	error = errno;
 
 	/* Try to provide more human-readable error */
 	switch (error) {
 	case EEXIST:
 		etxt = "record already exists";
 		break;
 	case EFBIG:
 		etxt = "limit hit";
 		break;
 	case ESRCH:
 		etxt = "table not found";
 		break;
 	case ENOENT:
 		etxt = "record not found";
 		break;
 	case EACCES:
 		etxt = "table is locked";
 		break;
 	default:
 		etxt = strerror(error);
 	}
 
 	errx(EX_OSERR, "%s: %s", texterr, etxt);
 }
 
 static int
 table_do_lookup(ipfw_obj_header *oh, char *key, ipfw_xtable_info *xi,
     ipfw_obj_tentry *xtent)
 {
 	char xbuf[sizeof(ipfw_obj_header) + sizeof(ipfw_obj_tentry)];
 	ipfw_obj_tentry *tent;
 	uint8_t type;
 	uint32_t vmask;
 	size_t sz;
 
 	memcpy(xbuf, oh, sizeof(*oh));
 	oh = (ipfw_obj_header *)xbuf;
 	tent = (ipfw_obj_tentry *)(oh + 1);
 
 	memset(tent, 0, sizeof(*tent));
 	tent->head.length = sizeof(*tent);
 	tent->idx = 1;
 
 	tentry_fill_key(oh, tent, key, 0, &type, &vmask, xi);
 	oh->ntlv.type = type;
 
 	sz = sizeof(xbuf);
 	if (do_get3(IP_FW_TABLE_XFIND, &oh->opheader, &sz) != 0)
 		return (errno);
 
 	if (sz < sizeof(xbuf))
 		return (EINVAL);
 
 	*xtent = *tent;
 
 	return (0);
 }
 
 static void
 table_lookup(ipfw_obj_header *oh, int ac, char *av[])
 {
 	ipfw_obj_tentry xtent;
 	ipfw_xtable_info xi;
 	char key[64];
 	int error;
 
 	if (ac == 0)
 		errx(EX_USAGE, "address required");
 
 	strlcpy(key, *av, sizeof(key));
 
 	memset(&xi, 0, sizeof(xi));
 	error = table_do_lookup(oh, key, &xi, &xtent);
 
 	switch (error) {
 	case 0:
 		break;
 	case ESRCH:
 		errx(EX_UNAVAILABLE, "Table %s not found", oh->ntlv.name);
 	case ENOENT:
 		errx(EX_UNAVAILABLE, "Entry %s not found", *av);
 	case ENOTSUP:
 		errx(EX_UNAVAILABLE, "Table %s algo does not support "
 		    "\"lookup\" method", oh->ntlv.name);
 	default:
 		err(EX_OSERR, "getsockopt(IP_FW_TABLE_XFIND)");
 	}
 
 	table_show_entry(&xi, &xtent);
 }
 
 static void
 tentry_fill_key_type(char *arg, ipfw_obj_tentry *tentry, uint8_t type,
     uint8_t tflags)
 {
 	char *p, *pp;
 	int mask, af;
 	struct in6_addr *paddr, tmp;
 	struct tflow_entry *tfe;
 	uint32_t key, *pkey;
 	uint16_t port;
 	struct protoent *pent;
 	struct servent *sent;
 	int masklen;
 
 	masklen = 0;
 	af = 0;
 	paddr = (struct in6_addr *)&tentry->k;
 
 	switch (type) {
 	case IPFW_TABLE_ADDR:
 		/* Remove / if exists */
 		if ((p = strchr(arg, '/')) != NULL) {
 			*p = '\0';
 			mask = atoi(p + 1);
 		}
 
 		if (inet_pton(AF_INET, arg, paddr) == 1) {
 			if (p != NULL && mask > 32)
 				errx(EX_DATAERR, "bad IPv4 mask width: %s",
 				    p + 1);
 
 			masklen = p ? mask : 32;
 			af = AF_INET;
 		} else if (inet_pton(AF_INET6, arg, paddr) == 1) {
 			if (IN6_IS_ADDR_V4COMPAT(paddr))
 				errx(EX_DATAERR,
 				    "Use IPv4 instead of v4-compatible");
 			if (p != NULL && mask > 128)
 				errx(EX_DATAERR, "bad IPv6 mask width: %s",
 				    p + 1);
 
 			masklen = p ? mask : 128;
 			af = AF_INET6;
 		} else {
 			/* Assume FQDN */
 			if (lookup_host(arg, (struct in_addr *)paddr) != 0)
 				errx(EX_NOHOST, "hostname ``%s'' unknown", arg);
 
 			masklen = 32;
 			type = IPFW_TABLE_ADDR;
 			af = AF_INET;
 		}
 		break;
 	case IPFW_TABLE_INTERFACE:
 		/* Assume interface name. Copy significant data only */
 		mask = MIN(strlen(arg), IF_NAMESIZE - 1);
 		memcpy(paddr, arg, mask);
 		/* Set mask to exact match */
 		masklen = 8 * IF_NAMESIZE;
 		break;
 	case IPFW_TABLE_NUMBER:
 		/* Port or any other key */
 		key = strtol(arg, &p, 10);
 		if (*p != '\0')
 			errx(EX_DATAERR, "Invalid number: %s", arg);
 
 		pkey = (uint32_t *)paddr;
 		*pkey = key;
 		masklen = 32;
 		break;
 	case IPFW_TABLE_FLOW:
 		/* Assume [src-ip][,proto][,src-port][,dst-ip][,dst-port] */
 		tfe = &tentry->k.flow;
 		af = 0;
 
 		/* Handle <ipv4|ipv6> */
 		if ((tflags & IPFW_TFFLAG_SRCIP) != 0) {
 			if ((p = strchr(arg, ',')) != NULL)
 				*p++ = '\0';
 			/* Determine family using temporary storage */
 			if (inet_pton(AF_INET, arg, &tmp) == 1) {
 				if (af != 0 && af != AF_INET)
 					errx(EX_DATAERR,
 					    "Inconsistent address family\n");
 				af = AF_INET;
 				memcpy(&tfe->a.a4.sip, &tmp, 4);
 			} else if (inet_pton(AF_INET6, arg, &tmp) == 1) {
 				if (af != 0 && af != AF_INET6)
 					errx(EX_DATAERR,
 					    "Inconsistent address family\n");
 				af = AF_INET6;
 				memcpy(&tfe->a.a6.sip6, &tmp, 16);
 			}
 
 			arg = p;
 		}
 
 		/* Handle <proto-num|proto-name> */
 		if ((tflags & IPFW_TFFLAG_PROTO) != 0) {
 			if (arg == NULL)
 				errx(EX_DATAERR, "invalid key: proto missing");
 			if ((p = strchr(arg, ',')) != NULL)
 				*p++ = '\0';
 
 			key = strtol(arg, &pp, 10);
 			if (*pp != '\0') {
 				if ((pent = getprotobyname(arg)) == NULL)
 					errx(EX_DATAERR, "Unknown proto: %s",
 					    arg);
 				else
 					key = pent->p_proto;
 			}
 			
 			if (key > 255)
 				errx(EX_DATAERR, "Bad protocol number: %u",key);
 
 			tfe->proto = key;
 
 			arg = p;
 		}
 
 		/* Handle <port-num|service-name> */
 		if ((tflags & IPFW_TFFLAG_SRCPORT) != 0) {
 			if (arg == NULL)
 				errx(EX_DATAERR, "invalid key: src port missing");
 			if ((p = strchr(arg, ',')) != NULL)
 				*p++ = '\0';
 
 			if ((port = htons(strtol(arg, NULL, 10))) == 0) {
 				if ((sent = getservbyname(arg, NULL)) == NULL)
 					errx(EX_DATAERR, "Unknown service: %s",
 					    arg);
 				else
 					key = sent->s_port;
 			}
 			
 			tfe->sport = port;
 
 			arg = p;
 		}
 
 		/* Handle <ipv4|ipv6>*/
 		if ((tflags & IPFW_TFFLAG_DSTIP) != 0) {
 			if (arg == NULL)
 				errx(EX_DATAERR, "invalid key: dst ip missing");
 			if ((p = strchr(arg, ',')) != NULL)
 				*p++ = '\0';
 			/* Determine family using temporary storage */
 			if (inet_pton(AF_INET, arg, &tmp) == 1) {
 				if (af != 0 && af != AF_INET)
 					errx(EX_DATAERR,
 					    "Inconsistent address family");
 				af = AF_INET;
 				memcpy(&tfe->a.a4.dip, &tmp, 4);
 			} else if (inet_pton(AF_INET6, arg, &tmp) == 1) {
 				if (af != 0 && af != AF_INET6)
 					errx(EX_DATAERR,
 					    "Inconsistent address family");
 				af = AF_INET6;
 				memcpy(&tfe->a.a6.dip6, &tmp, 16);
 			}
 
 			arg = p;
 		}
 
 		/* Handle <port-num|service-name> */
 		if ((tflags & IPFW_TFFLAG_DSTPORT) != 0) {
 			if (arg == NULL)
 				errx(EX_DATAERR, "invalid key: dst port missing");
 			if ((p = strchr(arg, ',')) != NULL)
 				*p++ = '\0';
 
 			if ((port = htons(strtol(arg, NULL, 10))) == 0) {
 				if ((sent = getservbyname(arg, NULL)) == NULL)
 					errx(EX_DATAERR, "Unknown service: %s",
 					    arg);
 				else
 					key = sent->s_port;
 			}
 			
 			tfe->dport = port;
 
 			arg = p;
 		}
 
 		tfe->af = af;
 
 		break;
 	
 	default:
 		errx(EX_DATAERR, "Unsupported table type: %d", type);
 	}
 
 	tentry->subtype = af;
 	tentry->masklen = masklen;
 }
 
 /*
  * Tries to guess table key type.
  * This procedure is used in legacy table auto-create
  * code AND in `ipfw -n` ruleset checking.
  *
  * Imported from old table_fill_xentry() parse code.
  */
 static int
 guess_key_type(char *key, uint8_t *ptype)
 {
 	char *p;
 	struct in6_addr addr;
 	uint32_t kv;
 
 	if (ishexnumber(*key) != 0 || *key == ':') {
 		/* Remove / if exists */
 		if ((p = strchr(key, '/')) != NULL)
 			*p = '\0';
 
 		if ((inet_pton(AF_INET, key, &addr) == 1) ||
 		    (inet_pton(AF_INET6, key, &addr) == 1)) {
 			*ptype = IPFW_TABLE_CIDR;
 			if (p != NULL)
 				*p = '/';
 			return (0);
 		} else {
 			/* Port or any other key */
 			/* Skip non-base 10 entries like 'fa1' */
 			kv = strtol(key, &p, 10);
 			if (*p == '\0') {
 				*ptype = IPFW_TABLE_NUMBER;
 				return (0);
 			} else if ((p != key) && (*p == '.')) {
 				/*
 				 * Warn on IPv4 address strings
 				 * which are "valid" for inet_aton() but not
 				 * in inet_pton().
 				 *
 				 * Typical examples: '10.5' or '10.0.0.05'
 				 */
 				return (1);
 			}
 		}
 	}
 
 	if (strchr(key, '.') == NULL) {
 		*ptype = IPFW_TABLE_INTERFACE;
 		return (0);
 	}
 
 	if (lookup_host(key, (struct in_addr *)&addr) != 0)
 		return (1);
 
 	*ptype = IPFW_TABLE_CIDR;
 	return (0);
 }
 
 static void
 tentry_fill_key(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *key,
     int add, uint8_t *ptype, uint32_t *pvmask, ipfw_xtable_info *xi)
 {
 	uint8_t type, tflags;
 	uint32_t vmask;
 	int error;
 
 	type = 0;
 	tflags = 0;
 	vmask = 0;
 
 	if (xi->tablename[0] == '\0')
 		error = table_get_info(oh, xi);
 	else
 		error = 0;
 
 	if (error == 0) {
 		if (co.test_only == 0) {
 			/* Table found */
 			type = xi->type;
 			tflags = xi->tflags;
 			vmask = xi->vmask;
 		} else {
 			/*
 			 * We're running `ipfw -n`
 			 * Compatibility layer: try to guess key type
 			 * before failing.
 			 */
 			if (guess_key_type(key, &type) != 0) {
 				/* Inknown key */
 				errx(EX_USAGE, "Cannot guess "
 				    "key '%s' type", key);
 			}
 			vmask = IPFW_VTYPE_LEGACY;
 		}
 	} else {
 		if (error != ESRCH)
 			errx(EX_OSERR, "Error requesting table %s info",
 			    oh->ntlv.name);
 		if (add == 0)
 			errx(EX_DATAERR, "Table %s does not exist",
 			    oh->ntlv.name);
 		/*
 		 * Table does not exist
 		 * Compatibility layer: try to guess key type before failing.
 		 */
 		if (guess_key_type(key, &type) != 0) {
 			/* Inknown key */
 			errx(EX_USAGE, "Table %s does not exist, cannot guess "
 			    "key '%s' type", oh->ntlv.name, key);
 		}
 
 		vmask = IPFW_VTYPE_LEGACY;
 	}
 
 	tentry_fill_key_type(key, tent, type, tflags);
 
 	*ptype = type;
 	*pvmask = vmask;
 }
 
 static void
 set_legacy_value(uint32_t val, ipfw_table_value *v)
 {
 	v->tag = val;
 	v->pipe = val;
 	v->divert = val;
 	v->skipto = val;
 	v->netgraph = val;
 	v->fib = val;
 	v->nat = val;
 	v->nh4 = val;
 	v->dscp = (uint8_t)val;
 	v->limit = val;
 }
 
 static void
 tentry_fill_value(ipfw_obj_header *oh, ipfw_obj_tentry *tent, char *arg,
     uint8_t type, uint32_t vmask)
 {
 	struct addrinfo hints, *res;
 	uint32_t a4, flag, val;
 	ipfw_table_value *v;
 	uint32_t i;
 	int dval;
 	char *comma, *e, *etype, *n, *p;
 
 	v = &tent->v.value;
 
 	/* Compat layer: keep old behavior for legacy value types */
 	if (vmask == IPFW_VTYPE_LEGACY) {
 		/* Try to interpret as number first */
 		val = strtoul(arg, &p, 0);
 		if (*p == '\0') {
 			set_legacy_value(val, v);
 			return;
 		}
 		if (inet_pton(AF_INET, arg, &val) == 1) {
 			set_legacy_value(ntohl(val), v);
 			return;
 		}
 		/* Try hostname */
 		if (lookup_host(arg, (struct in_addr *)&val) == 0) {
 			set_legacy_value(val, v);
 			return;
 		}
 		errx(EX_OSERR, "Unable to parse value %s", arg);
 	}
 
 	/*
 	 * Shorthands: handle single value if vmask consists
 	 * of numbers only. e.g.:
 	 * vmask = "fib,skipto" -> treat input "1" as "1,1"
 	 */
 
 	n = arg;
 	etype = NULL;
 	for (i = 1; i < (1 << 31); i *= 2) {
 		if ((flag = (vmask & i)) == 0)
 			continue;
 		vmask &= ~flag;
 
 		if ((comma = strchr(n, ',')) != NULL)
 			*comma = '\0';
 
 		switch (flag) {
 		case IPFW_VTYPE_TAG:
 			v->tag = strtol(n, &e, 10);
 			if (*e != '\0')
 				etype = "tag";
 			break;
 		case IPFW_VTYPE_PIPE:
 			v->pipe = strtol(n, &e, 10);
 			if (*e != '\0')
 				etype = "pipe";
 			break;
 		case IPFW_VTYPE_DIVERT:
 			v->divert = strtol(n, &e, 10);
 			if (*e != '\0')
 				etype = "divert";
 			break;
 		case IPFW_VTYPE_SKIPTO:
 			v->skipto = strtol(n, &e, 10);
 			if (*e != '\0')
 				etype = "skipto";
 			break;
 		case IPFW_VTYPE_NETGRAPH:
 			v->netgraph = strtol(n, &e, 10);
 			if (*e != '\0')
 				etype = "netgraph";
 			break;
 		case IPFW_VTYPE_FIB:
 			v->fib = strtol(n, &e, 10);
 			if (*e != '\0')
 				etype = "fib";
 			break;
 		case IPFW_VTYPE_NAT:
 			v->nat = strtol(n, &e, 10);
 			if (*e != '\0')
 				etype = "nat";
 			break;
 		case IPFW_VTYPE_LIMIT:
 			v->limit = strtol(n, &e, 10);
 			if (*e != '\0')
 				etype = "limit";
 			break;
 		case IPFW_VTYPE_NH4:
 			if (strchr(n, '.') != NULL &&
 			    inet_pton(AF_INET, n, &a4) == 1) {
 				v->nh4 = ntohl(a4);
 				break;
 			}
 			if (lookup_host(n, (struct in_addr *)&v->nh4) == 0)
 				break;
 			etype = "ipv4";
 			break;
 		case IPFW_VTYPE_DSCP:
 			if (isalpha(*n)) {
 				if ((dval = match_token(f_ipdscp, n)) != -1) {
 					v->dscp = dval;
 					break;
 				} else
 					etype = "DSCP code";
 			} else {
 				v->dscp = strtol(n, &e, 10);
 				if (v->dscp > 63 || *e != '\0')
 					etype = "DSCP value";
 			}
 			break;
 		case IPFW_VTYPE_NH6:
 			if (strchr(n, ':') != NULL) {
 				memset(&hints, 0, sizeof(hints));
 				hints.ai_family = AF_INET6;
 				hints.ai_flags = AI_NUMERICHOST;
 				if (getaddrinfo(n, NULL, &hints, &res) == 0) {
 					v->nh6 = ((struct sockaddr_in6 *)
 					    res->ai_addr)->sin6_addr;
 					v->zoneid = ((struct sockaddr_in6 *)
 					    res->ai_addr)->sin6_scope_id;
 					freeaddrinfo(res);
 					break;
 				}
 			}
 			etype = "ipv6";
 			break;
 		}
 
 		if (etype != NULL)
 			errx(EX_USAGE, "Unable to parse %s as %s", n, etype);
 
 		if (comma != NULL)
 			*comma++ = ',';
 
 		if ((n = comma) != NULL)
 			continue;
 
 		/* End of input. */
 		if (vmask != 0)
 			errx(EX_USAGE, "Not enough fields inside value");
 	}
 }
 
 /*
  * Compare table names.
  * Honor number comparison.
  */
 static int
 tablename_cmp(const void *a, const void *b)
 {
 	ipfw_xtable_info *ia, *ib;
 
 	ia = (ipfw_xtable_info *)a;
 	ib = (ipfw_xtable_info *)b;
 
 	return (stringnum_cmp(ia->tablename, ib->tablename));
 }
 
 /*
  * Retrieves table list from kernel,
  * optionally sorts it and calls requested function for each table.
  * Returns 0 on success.
  */
 static int
 tables_foreach(table_cb_t *f, void *arg, int sort)
 {
 	ipfw_obj_lheader *olh;
 	ipfw_xtable_info *info;
 	size_t sz;
 	int i, error;
 
 	/* Start with reasonable default */
 	sz = sizeof(*olh) + 16 * sizeof(ipfw_xtable_info);
 
 	for (;;) {
 		if ((olh = calloc(1, sz)) == NULL)
 			return (ENOMEM);
 
 		olh->size = sz;
 		if (do_get3(IP_FW_TABLES_XLIST, &olh->opheader, &sz) != 0) {
 			sz = olh->size;
 			free(olh);
 			if (errno != ENOMEM)
 				return (errno);
 			continue;
 		}
 
 		if (sort != 0)
 			qsort(olh + 1, olh->count, olh->objsize, tablename_cmp);
 
 		info = (ipfw_xtable_info *)(olh + 1);
 		for (i = 0; i < olh->count; i++) {
 			error = f(info, arg); /* Ignore errors for now */
 			info = (ipfw_xtable_info *)((caddr_t)info + olh->objsize);
 		}
 
 		free(olh);
 		break;
 	}
 
 	return (0);
 }
 
 
 /*
  * Retrieves all entries for given table @i in
  * eXtended format. Allocate buffer large enough
  * to store result. Called needs to free it later.
  *
  * Returns 0 on success.
  */
 static int
 table_do_get_list(ipfw_xtable_info *i, ipfw_obj_header **poh)
 {
 	ipfw_obj_header *oh;
 	size_t sz;
 	int c;
 
 	sz = 0;
 	oh = NULL;
 	for (c = 0; c < 8; c++) {
 		if (sz < i->size)
 			sz = i->size + 44;
 		if (oh != NULL)
 			free(oh);
 		if ((oh = calloc(1, sz)) == NULL)
 			continue;
 		table_fill_objheader(oh, i);
 		oh->opheader.version = 1; /* Current version */
 		if (do_get3(IP_FW_TABLE_XLIST, &oh->opheader, &sz) == 0) {
 			*poh = oh;
 			return (0);
 		}
 
 		if (errno != ENOMEM)
 			break;
 	}
 	free(oh);
 
 	return (errno);
 }
 
 /*
  * Shows all entries from @oh in human-readable format
  */
 static void
 table_show_list(ipfw_obj_header *oh, int need_header)
 {
 	ipfw_obj_tentry *tent;
 	uint32_t count;
 	ipfw_xtable_info *i;
 
 	i = (ipfw_xtable_info *)(oh + 1);
 	tent = (ipfw_obj_tentry *)(i + 1);
 
 	if (need_header)
 		printf("--- table(%s), set(%u) ---\n", i->tablename, i->set);
 
 	count = i->count;
 	while (count > 0) {
 		table_show_entry(i, tent);
 		tent = (ipfw_obj_tentry *)((caddr_t)tent + tent->head.length);
 		count--;
 	}
 }
 
 static void
 table_show_value(char *buf, size_t bufsize, ipfw_table_value *v,
     uint32_t vmask, int print_ip)
 {
 	char abuf[INET6_ADDRSTRLEN + IF_NAMESIZE + 2];
 	struct sockaddr_in6 sa6;
 	uint32_t flag, i, l;
 	size_t sz;
 	struct in_addr a4;
 
 	sz = bufsize;
 
 	/*
 	 * Some shorthands for printing values:
 	 * legacy assumes all values are equal, so keep the first one.
 	 */
 	if (vmask == IPFW_VTYPE_LEGACY) {
 		if (print_ip != 0) {
 			flag = htonl(v->tag);
 			inet_ntop(AF_INET, &flag, buf, sz);
 		} else
 			snprintf(buf, sz, "%u", v->tag);
 		return;
 	}
 
 	for (i = 1; i < (1 << 31); i *= 2) {
 		if ((flag = (vmask & i)) == 0)
 			continue;
 		l = 0;
 
 		switch (flag) {
 		case IPFW_VTYPE_TAG:
 			l = snprintf(buf, sz, "%u,", v->tag);
 			break;
 		case IPFW_VTYPE_PIPE:
 			l = snprintf(buf, sz, "%u,", v->pipe);
 			break;
 		case IPFW_VTYPE_DIVERT:
 			l = snprintf(buf, sz, "%d,", v->divert);
 			break;
 		case IPFW_VTYPE_SKIPTO:
 			l = snprintf(buf, sz, "%d,", v->skipto);
 			break;
 		case IPFW_VTYPE_NETGRAPH:
 			l = snprintf(buf, sz, "%u,", v->netgraph);
 			break;
 		case IPFW_VTYPE_FIB:
 			l = snprintf(buf, sz, "%u,", v->fib);
 			break;
 		case IPFW_VTYPE_NAT:
 			l = snprintf(buf, sz, "%u,", v->nat);
 			break;
 		case IPFW_VTYPE_LIMIT:
 			l = snprintf(buf, sz, "%u,", v->limit);
 			break;
 		case IPFW_VTYPE_NH4:
 			a4.s_addr = htonl(v->nh4);
 			inet_ntop(AF_INET, &a4, abuf, sizeof(abuf));
 			l = snprintf(buf, sz, "%s,", abuf);
 			break;
 		case IPFW_VTYPE_DSCP:
 			l = snprintf(buf, sz, "%d,", v->dscp);
 			break;
 		case IPFW_VTYPE_NH6:
 			sa6.sin6_family = AF_INET6;
 			sa6.sin6_len = sizeof(sa6);
 			sa6.sin6_addr = v->nh6;
 			sa6.sin6_port = 0;
 			sa6.sin6_scope_id = v->zoneid;
 			if (getnameinfo((const struct sockaddr *)&sa6,
 			    sa6.sin6_len, abuf, sizeof(abuf), NULL, 0,
 			    NI_NUMERICHOST) == 0)
 				l = snprintf(buf, sz, "%s,", abuf);
 			break;
 		}
 
 		buf += l;
 		sz -= l;
 	}
 
 	if (sz != bufsize)
 		*(buf - 1) = '\0';
 }
 
 static void
 table_show_entry(ipfw_xtable_info *i, ipfw_obj_tentry *tent)
 {
 	char *comma, tbuf[128], pval[128];
 	void *paddr;
 	struct tflow_entry *tfe;
 
 	table_show_value(pval, sizeof(pval), &tent->v.value, i->vmask,
 	    co.do_value_as_ip);
 
 	switch (i->type) {
 	case IPFW_TABLE_ADDR:
 		/* IPv4 or IPv6 prefixes */
 		inet_ntop(tent->subtype, &tent->k, tbuf, sizeof(tbuf));
 		printf("%s/%u %s\n", tbuf, tent->masklen, pval);
 		break;
 	case IPFW_TABLE_INTERFACE:
 		/* Interface names */
 		printf("%s %s\n", tent->k.iface, pval);
 		break;
 	case IPFW_TABLE_NUMBER:
 		/* numbers */
 		printf("%u %s\n", tent->k.key, pval);
 		break;
 	case IPFW_TABLE_FLOW:
 		/* flows */
 		tfe = &tent->k.flow;
 		comma = "";
 
 		if ((i->tflags & IPFW_TFFLAG_SRCIP) != 0) {
 			if (tfe->af == AF_INET)
 				paddr = &tfe->a.a4.sip;
 			else
 				paddr = &tfe->a.a6.sip6;
 
 			inet_ntop(tfe->af, paddr, tbuf, sizeof(tbuf));
 			printf("%s%s", comma, tbuf);
 			comma = ",";
 		}
 
 		if ((i->tflags & IPFW_TFFLAG_PROTO) != 0) {
 			printf("%s%d", comma, tfe->proto);
 			comma = ",";
 		}
 
 		if ((i->tflags & IPFW_TFFLAG_SRCPORT) != 0) {
 			printf("%s%d", comma, ntohs(tfe->sport));
 			comma = ",";
 		}
 		if ((i->tflags & IPFW_TFFLAG_DSTIP) != 0) {
 			if (tfe->af == AF_INET)
 				paddr = &tfe->a.a4.dip;
 			else
 				paddr = &tfe->a.a6.dip6;
 
 			inet_ntop(tfe->af, paddr, tbuf, sizeof(tbuf));
 			printf("%s%s", comma, tbuf);
 			comma = ",";
 		}
 
 		if ((i->tflags & IPFW_TFFLAG_DSTPORT) != 0) {
 			printf("%s%d", comma, ntohs(tfe->dport));
 			comma = ",";
 		}
 
 		printf(" %s\n", pval);
 	}
 }
 
 static int
 table_do_get_stdlist(uint16_t opcode, ipfw_obj_lheader **polh)
 {
 	ipfw_obj_lheader req, *olh;
 	size_t sz;
 
 	memset(&req, 0, sizeof(req));
 	sz = sizeof(req);
 
 	if (do_get3(opcode, &req.opheader, &sz) != 0)
 		if (errno != ENOMEM)
 			return (errno);
 
 	sz = req.size;
 	if ((olh = calloc(1, sz)) == NULL)
 		return (ENOMEM);
 
 	olh->size = sz;
 	if (do_get3(opcode, &olh->opheader, &sz) != 0) {
 		free(olh);
 		return (errno);
 	}
 
 	*polh = olh;
 	return (0);
 }
 
 static int
 table_do_get_algolist(ipfw_obj_lheader **polh)
 {
 
 	return (table_do_get_stdlist(IP_FW_TABLES_ALIST, polh));
 }
 
 static int
 table_do_get_vlist(ipfw_obj_lheader **polh)
 {
 
 	return (table_do_get_stdlist(IP_FW_TABLE_VLIST, polh));
 }
 
 void
 ipfw_list_ta(int ac, char *av[])
 {
 	ipfw_obj_lheader *olh;
 	ipfw_ta_info *info;
 	int error, i;
 	const char *atype;
 
 	error = table_do_get_algolist(&olh);
 	if (error != 0)
 		err(EX_OSERR, "Unable to request algorithm list");
 
 	info = (ipfw_ta_info *)(olh + 1);
 	for (i = 0; i < olh->count; i++) {
 		if ((atype = match_value(tabletypes, info->type)) == NULL)
 			atype = "unknown";
 		printf("--- %s ---\n", info->algoname);
 		printf(" type: %s\n refcount: %u\n", atype, info->refcnt);
 
 		info = (ipfw_ta_info *)((caddr_t)info + olh->objsize);
 	}
 
 	free(olh);
 }
 
 
 /* Copy of current kernel table_value structure */
 struct _table_value {
 	uint32_t	tag;		/* O_TAG/O_TAGGED */
 	uint32_t	pipe;		/* O_PIPE/O_QUEUE */
 	uint16_t	divert;		/* O_DIVERT/O_TEE */
 	uint16_t	skipto;		/* skipto, CALLRET */
 	uint32_t	netgraph;	/* O_NETGRAPH/O_NGTEE */
 	uint32_t	fib;		/* O_SETFIB */
 	uint32_t	nat;		/* O_NAT */
 	uint32_t	nh4;
 	uint8_t		dscp;
 	uint8_t		spare0;
 	uint16_t	spare1;
 	/* -- 32 bytes -- */
 	struct in6_addr	nh6;
 	uint32_t	limit;		/* O_LIMIT */
 	uint32_t	zoneid;
 	uint64_t	refcnt;		/* Number of references */
 };
 
 int
 compare_values(const void *_a, const void *_b)
 {
 	struct _table_value *a, *b;
 
 	a = (struct _table_value *)_a;
 	b = (struct _table_value *)_b;
 
 	if (a->spare1 < b->spare1)
 		return (-1);
 	else if (a->spare1 > b->spare1)
 		return (1);
 
 	return (0);
 }
 
 void
 ipfw_list_values(int ac, char *av[])
 {
 	ipfw_obj_lheader *olh;
 	struct _table_value *v;
 	int error, i;
 	uint32_t vmask;
 	char buf[128];
 
 	error = table_do_get_vlist(&olh);
 	if (error != 0)
 		err(EX_OSERR, "Unable to request value list");
 
 	vmask = 0x7FFFFFFF; /* Similar to IPFW_VTYPE_LEGACY */
 
 	table_print_valheader(buf, sizeof(buf), vmask);
 	printf("HEADER: %s\n", buf);
 	v = (struct _table_value *)(olh + 1);
 	qsort(v, olh->count, olh->objsize, compare_values);
 	for (i = 0; i < olh->count; i++) {
 		table_show_value(buf, sizeof(buf), (ipfw_table_value *)v,
 		    vmask, 0);
 		printf("[%u] refs=%lu %s\n", v->spare1, (u_long)v->refcnt, buf);
 		v = (struct _table_value *)((caddr_t)v + olh->objsize);
 	}
 
 	free(olh);
 }
 
 int
 table_check_name(const char *tablename)
 {
 
 	if (ipfw_check_object_name(tablename) != 0)
 		return (EINVAL);
 	/* Restrict some 'special' names */
 	if (strcmp(tablename, "all") == 0)
 		return (EINVAL);
 	return (0);
 }
 
Index: user/alc/PQ_LAUNDRY/sbin/resolvconf/Makefile
===================================================================
--- user/alc/PQ_LAUNDRY/sbin/resolvconf/Makefile	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sbin/resolvconf/Makefile	(revision 303642)
@@ -1,38 +1,42 @@
 # $FreeBSD$
 
 PACKAGE=runtime
 DIST=		${.CURDIR}/../../contrib/openresolv
 .PATH:		${DIST}
 
 SCRIPTS=	resolvconf
 
 FILES=		libc dnsmasq named pdnsd pdns_recursor unbound
 FILESDIR=	/libexec/resolvconf
 
 MAN=		resolvconf.conf.5 resolvconf.8
 
 CLEANFILES=	${SCRIPTS} ${FILES} ${MAN}
 
 SYSCONFDIR=	/etc
 RCDIR=		${SYSCONFDIR}/rc.d
 VARDIR=		/var/run/resolvconf
 
 # We don't assume to restart the services in /sbin.  So, though
 # our service(8) is in /usr/sbin, we can use it, here.
-CMD1=		\1 onestatus >/dev/null 2>\&1
-CMD2=		\1 restart
-RESTARTCMD=	/usr/sbin/service ${CMD1} \&\& /usr/sbin/service ${CMD2}
+CMD1_WITH_ARG=		\1 onestatus >/dev/null 2>\&1
+CMD2_WITH_ARG=		\1 restart
+RESTARTCMD_WITH_ARG=	/usr/sbin/service ${CMD1_WITH_ARG} \&\& /usr/sbin/service ${CMD2_WITH_ARG}
+CMD1=		\\$$1 onestatus >/dev/null 2>\&1
+CMD2=		\\$$1 restart
+RESTARTCMD=	"/usr/sbin/service ${CMD1} \&\& /usr/sbin/service ${CMD2}"
 
 .for f in ${SCRIPTS} ${FILES} ${MAN}
 ${f}:	${f}.in
 	sed -e 's:@PREFIX@::g' \
 		-e 's:@SYSCONFDIR@:${SYSCONFDIR}:g' \
 		-e 's:@LIBEXECDIR@:${FILESDIR}:g' \
 		-e 's:@VARDIR@:${VARDIR}:g' \
-		-e 's:@RESTARTCMD \(.*\)@:${RESTARTCMD}:g' \
+		-e 's:@RESTARTCMD \(.*\)@:${RESTARTCMD_WITH_ARG}:g' \
+		-e 's:@RESTARTCMD@:${RESTARTCMD}:g' \
 		-e 's:@RCDIR@:${RCDIR}:g' \
 		-e 's: vpn : ng[0-9]*&:g' \
 		${DIST}/$@.in > $@
 .endfor
 
 .include <bsd.prog.mk>
Index: user/alc/PQ_LAUNDRY/share/man/man4/gpioled.4
===================================================================
--- user/alc/PQ_LAUNDRY/share/man/man4/gpioled.4	(revision 303641)
+++ user/alc/PQ_LAUNDRY/share/man/man4/gpioled.4	(revision 303642)
@@ -1,156 +1,158 @@
 .\" Copyright (c) 2013, Luiz Otavio O Souza <loos@FreeBSD.org>
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 14, 2014
+.Dd July 30, 2016
 .Dt GPIOLED 4
 .Os
 .Sh NAME
 .Nm gpioled
 .Nd GPIO LED generic device driver
 .Sh SYNOPSIS
 To compile this driver into the kernel,
 place the following lines in your
 kernel configuration file:
 .Bd -ragged -offset indent
 .Cd "device gpio"
 .Cd "device gpioled"
 .Ed
 .Sh DESCRIPTION
 The
 .Nm
 driver provides glue to attach a
 .Xr led 4
 compatible device to a GPIO pin.
 Each LED in the system has a
 .Pa name
 which is used to export a device as
 .Pa /dev/led/<name> .
 The GPIO pin can then be controlled by writing to this device as described
 in
 .Xr led 4 .
 .Pp
 On a
 .Xr device.hints 5
 based system, like
 .Li MIPS ,
 these values are configurable for
 .Nm :
 .Bl -tag -width ".Va hint.gpioiic.%d.atXXX"
 .It Va hint.gpioled.%d.at
 The gpiobus you are attaching to.
 Normally assigned to gpiobus0.
 .It Va hint.gpioled.%d.name
 Arbitrary name of device in
 .Pa /dev/led/
 to create for
 .Xr led 4 .
 .It Va hint.gpioled.%d.pins
 Which pin on the GPIO interface to map to this instance.
 Please note that this mask should only ever have one bit set
 (any other bits - i.e., pins - will be ignored).
+.It Va hint.gpioled.%d.invert
+If set to 1, the pin will be set to 0 to light the LED, and 1 to clear it.
 .El
 .Pp
 On a
 .Xr FDT 4
 based system, like
 .Li ARM ,
 the DTS part for a
 .Nm gpioled
 device usually looks like:
 .Bd -literal
 gpio: gpio {
 
 	gpio-controller;
 	...
 
 	led0 {
 		compatible = "gpioled";
 		gpios = <&gpio 16 2 0>;		/* GPIO pin 16. */
 		name = "ok";
 	};
 
 	led1 {
 		compatible = "gpioled";
 		gpios = <&gpio 17 2 0>;		/* GPIO pin 17. */
 		name = "user-led1";
 	};
 };
 .Ed
 .Pp
 Optionally, you can choose to combine all the LEDs under a single
 .Dq gpio-leds
 compatible node:
 .Bd -literal
 simplebus0 {
 
 	...
 
 	leds {
 		compatible = "gpio-leds";
 
 		led0 {
 			gpios = <&gpio 16 2 0>;
 			name = "ok"
 		};
 
 		led1 {
 			gpios = <&gpio 17 2 0>;
 			name = "user-led1"
 		};
 	};
 };
 .Ed
 .Pp
 Both methods are equally supported and it is possible to have the LEDs
 defined with any sort of mix between the methods.
 The only restriction is that a GPIO pin cannot be mapped by two different
 (gpio)leds.
 .Pp
 For more details about the
 .Va gpios
 property, please consult
 .Pa /usr/src/sys/boot/fdt/dts/bindings-gpio.txt .
 .Pp
 The property
 .Va name
 is the arbitrary name of the device in
 .Pa /dev/led/
 to create for
 .Xr led 4 .
 .Sh SEE ALSO
 .Xr fdt 4 ,
 .Xr gpio 4 ,
 .Xr gpioiic 4 ,
 .Xr led 4
 .Sh HISTORY
 The
 .Nm
 manual page first appeared in
 .Fx 10.1 .
 .Sh AUTHORS
 This
 manual page was written by
 .An Luiz Otavio O Souza .
Index: user/alc/PQ_LAUNDRY/share/man/man4/ng_checksum.4
===================================================================
--- user/alc/PQ_LAUNDRY/share/man/man4/ng_checksum.4	(nonexistent)
+++ user/alc/PQ_LAUNDRY/share/man/man4/ng_checksum.4	(revision 303642)
@@ -0,0 +1,141 @@
+.\" Copyright (c) 2015 Dmitry Vagin <daemon.hammer@ya.ru>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 29, 2015
+.Dt NG_CHECKSUM 4
+.Os
+.Sh NAME
+.Nm ng_checksum
+.Nd IP checksum node type
+.Sh SYNOPSIS
+.In netgraph/ng_checksum.h
+.Sh DESCRIPTION
+The
+.Nm checksum
+node can calculate and prepare for calculation in hardware
+IPv4 header, TCP, UDP checksum.
+.Sh HOOKS
+This node type has two hooks:
+.Bl -tag -width ".Va out"
+.It Va in
+Packets received on this hook are processed according to settings specified
+in config and then forwarded to
+.Ar out
+hook, if it exists and connected. Otherwise they are reflected back to the
+.Ar in
+hook.
+.It Va out
+Packets received on this hook are forwarded to
+.Ar in
+hook without any changes.
+.El
+.Sh CONTROL MESSAGES
+This node type supports the generic control messages, plus the following:
+.Bl -tag -width foo
+.It Dv NGM_CHECKSUM_SETDLT Pq Ic setdlt
+Sets data link type on the
+.Va in
+hook. Currently, supported types are
+.Cm DLT_RAW
+(raw IP datagrams) and
+.Cm DLT_EN10MB
+(Ethernet). DLT_ definitions can be found in
+.In net/bpf.h
+header. Currently used values are
+.Cm DLT_EN10MB
+= 1 and
+.Cm DLT_RAW
+= 12.
+.It Dv NGM_CHECKSUM_GETDLT Pq Ic getdlt
+This control message obtains data link type on the
+.Va in
+hook.
+.It Dv NGM_CHECKSUM_SETCONFIG Pq Ic setconfig
+Sets node configuration. The following
+.Vt "struct ng_checksum_config"
+must be supplied as an argument:
+.Bd -literal -offset 4n
+struct ng_checksum_config {
+	uint64_t	csum_flags;
+	uint64_t	csum_offload;
+};
+.Ed
+.Pp
+The
+.Va csum_flags
+can be set to any combination of CSUM_IP, CSUM_TCP, CSUM_UDP, CSUM_TCP_IPV6 and CSUM_UDP_IPV6
+(other values are ignored) for instructing node need calculate the corresponding checksum.
+.Pp
+The
+.Va csum_offload
+can be set to any combination of CSUM_IP, CSUM_TCP, CSUM_UDP, CSUM_TCP_IPV6 and CSUM_UDP_IPV6
+(other values are ignored) for instructing node what checksum can calculate in hardware.
+.Pp
+Also processed any combination of CSUM_IP, CSUM_TCP, CSUM_UDP, CSUM_TCP_IPV6 and CSUM_UDP_IPV6 sets before on mbuf.
+.It Dv NGM_CHECKSUM_GETCONFIG Pq Ic getconfig
+This control message obtains current node configuration,
+returned as
+.Vt "struct ng_checksum_config" .
+.It Dv NGM_CHECKSUM_GET_STATS Pq Ic getstats
+Returns node statistics as a
+.Vt "struct ng_checksum_stats" .
+.It Dv NGM_CHECKSUM_CLR_STATS Pq Ic clrstats
+Clear node statistics.
+.It Dv NGM_CHECKSUM_GETCLR_STATS Pq Ic getclrstats
+This command is identical to
+.Dv NGM_CHECKSUM_GET_STATS ,
+except that the statistics are also atomically cleared.
+.El
+.Sh SHUTDOWN
+This node shuts down upon receipt of a
+.Dv NGM_SHUTDOWN
+control message, or when all hooks have been disconnected.
+.Sh EXAMPLES
+.Xr ngctl 8
+script:
+.Bd -literal -offset 4n
+/usr/sbin/ngctl -f- <<-SEQ
+	msg checksum-1: "setdlt 1"
+	ngctl msg checksum-1: "setconfig { csum_flags=0 csum_offload=6 }"
+.Ed
+.Pp
+Set data link type to
+.Cm DLT_EN10MB
+(Ethernet), not set additional checksum flags, set hardware
+can calculate CSUM_IP_UDP|CSUM_IP_TCP.
+.Sh SEE ALSO
+.Xr netgraph 4 ,
+.Xr ng_patch 4 ,
+.Xr ngctl 8
+.Sh HISTORY
+The
+.Nm
+node type was implemented in
+.Fx 10.2 
+and first submitted in 
+.Fx 12.0 .
+.Sh AUTHORS
+.An "Dmitry Vagin" Aq daemon.hammer@ya.ru .

Property changes on: user/alc/PQ_LAUNDRY/share/man/man4/ng_checksum.4
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: user/alc/PQ_LAUNDRY/share/man/man9/bitset.9
===================================================================
--- user/alc/PQ_LAUNDRY/share/man/man9/bitset.9	(revision 303641)
+++ user/alc/PQ_LAUNDRY/share/man/man9/bitset.9	(revision 303642)
@@ -1,391 +1,391 @@
 .\" Copyright (c) 2015 Conrad Meyer <cem@FreeBSD.org>
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
 .\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 .\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
 .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 20, 2015
+.Dd July 29, 2016
 .Dt BITSET 9
 .Os
 .Sh NAME
 .Nm bitset(9)
 \(em
 .Nm BITSET_DEFINE ,
 .Nm BITSET_T_INITIALIZER ,
 .Nm BITSET_FSET ,
 .Nm BIT_CLR ,
 .Nm BIT_COPY ,
 .Nm BIT_ISSET ,
 .Nm BIT_SET ,
 .Nm BIT_ZERO ,
 .Nm BIT_FILL ,
 .Nm BIT_SETOF ,
 .Nm BIT_EMPTY ,
 .Nm BIT_ISFULLSET ,
 .Nm BIT_FFS ,
 .Nm BIT_COUNT ,
 .Nm BIT_SUBSET ,
 .Nm BIT_OVERLAP ,
 .Nm BIT_CMP ,
 .Nm BIT_OR ,
 .Nm BIT_AND ,
 .Nm BIT_NAND ,
 .Nm BIT_CLR_ATOMIC ,
 .Nm BIT_SET_ATOMIC ,
 .Nm BIT_SET_ATOMIC_ACQ ,
 .Nm BIT_AND_ATOMIC ,
 .Nm BIT_OR_ATOMIC ,
 .Nm BIT_COPY_STORE_REL
 .Nd bitset manipulation macros
 .Sh SYNOPSIS
 .In sys/_bitset.h
 .In sys/bitset.h
 .\"
 .Fn BITSET_DEFINE "STRUCTNAME" "const SETSIZE"
 .Fn BITSET_T_INITIALIZER "ARRAY_CONTENTS"
 .Fn BITSET_FSET "N_WORDS"
 .\"
 .Fn BIT_CLR "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Fn BIT_COPY "const SETSIZE" "struct STRUCTNAME *from" "struct STRUCTNAME *to"
 .Ft bool
 .Fn BIT_ISSET "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Fn BIT_SET "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Fn BIT_ZERO "const SETSIZE" "struct STRUCTNAME *bitset"
 .Fn BIT_FILL "const SETSIZE" "struct STRUCTNAME *bitset"
 .Fn BIT_SETOF "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Ft bool
 .Fn BIT_EMPTY "const SETSIZE" "struct STRUCTNAME *bitset"
 .Ft bool
 .Fn BIT_ISFULLSET "const SETSIZE" "struct STRUCTNAME *bitset"
-.Ft size_t
+.Ft int
 .Fn BIT_FFS "const SETSIZE" "struct STRUCTNAME *bitset"
-.Ft size_t
+.Ft int
 .Fn BIT_COUNT "const SETSIZE" "struct STRUCTNAME *bitset"
 .\"
 .Ft bool
 .Fo BIT_SUBSET
 .Fa "const SETSIZE" "struct STRUCTNAME *haystack" "struct STRUCTNAME *needle"
 .Fc
 .Ft bool
 .Fo BIT_OVERLAP
 .Fa "const SETSIZE" "struct STRUCTNAME *bitset1" "struct STRUCTNAME *bitset2"
 .Fc
 .Ft bool
 .Fo BIT_CMP
 .Fa "const SETSIZE" "struct STRUCTNAME *bitset1" "struct STRUCTNAME *bitset2"
 .Fc
 .Fn BIT_OR "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
 .Fn BIT_AND "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
 .Fn BIT_NAND "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
 .\"
 .Fn BIT_CLR_ATOMIC "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Fn BIT_SET_ATOMIC "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .Fn BIT_SET_ATOMIC_ACQ "const SETSIZE" "size_t bit" "struct STRUCTNAME *bitset"
 .\"
 .Fo BIT_AND_ATOMIC
 .Fa "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
 .Fc
 .Fo BIT_OR_ATOMIC
 .Fa "const SETSIZE" "struct STRUCTNAME *dst" "struct STRUCTNAME *src"
 .Fc
 .Fo BIT_COPY_STORE_REL
 .Fa "const SETSIZE" "struct STRUCTNAME *from" "struct STRUCTNAME *to"
 .Fc
 .Sh DESCRIPTION
 The
 .Nm
 family of macros provide a flexible and efficient bitset implementation if the
 maximum size of the set is known at compilation.
 Throughout this manual page, the name
 .Fa SETSIZE
 refers to the size of the bitset in bits.
 Individual bits in bitsets are referenced with indices zero through
 .Fa SETSIZE - 1 .
 One example use of
 .In sys/bitset.h
 is
 .In sys/cpuset.h .
 .Pp
 The
 .Fn BITSET_DEFINE
 macro defines a bitset struct
 .Fa STRUCTNAME
 with room to represent
 .Fa SETSIZE
 bits.
 .Pp
 The
 .Fn BITSET_T_INITIALIZER
 macro allows one to initialize a bitset struct with a compile time literal
 value.
 .Pp
 The
 .Fn BITSET_FSET
 macro generates a compile time literal, usable by
 .Fn BITSET_T_INITIALIZER ,
 representing a full bitset (all bits set).
 For examples of
 .Fn BITSET_T_INITIALIZER
 and
 .Fn BITSET_FSET
 usage, see the
 .Sx BITSET_T_INITIALIZER EXAMPLE
 section.
 The
 .Fa N_WORDS
 parameter to
 .Fn BITSET_FSET
 should be:
 .Bd -literal -offset indent
 __bitset_words(SETSIZE)
 .Ed
 .Pp
 The
 .Fn BIT_CLR
 macro clears bit
 .Fa bit
 in the bitset pointed to by
 .Fa bitset .
 The
 .Fn BIT_CLR_ATOMIC
 macro is identical, but the bit is cleared atomically.
 .Pp
 The
 .Fn BIT_COPY
 macro copies the contents of the bitset
 .Fa from
 to the bitset
 .Fa to .
 .Fn BIT_COPY_STORE_REL
 is similar, but copies component machine words from
 .Fa from
 and writes them to
 .Fa to
 with atomic store with release semantics.
 (That is, if
 .Fa to
 is composed of multiple machine words,
 .Fn BIT_COPY_STORE_REL
 performs multiple individually atomic operations.)
 .Pp
 The
 .Fn BIT_SET
 macro sets bit
 .Fa bit
 in the bitset pointed to by
 .Fa bitset .
 The
 .Fn BIT_SET_ATOMIC
 macro is identical, but the bit is set atomically.
 The
 .Fn BIT_SET_ATOMIC_ACQ
 macro sets the bit with acquire semantics.
 .Pp
 The
 .Fn BIT_ZERO
 macro clears all bits in
 .Fa bitset .
 .Pp
 The
 .Fn BIT_FILL
 macro sets all bits in
 .Fa bitset .
 .Pp
 The
 .Fn BIT_SETOF
 macro clears all bits in
 .Fa bitset
 before setting only bit
 .Fa bit .
 .Pp
 The
 .Fn BIT_EMPTY
 macro returns
 .Dv true
 if
 .Fa bitset
 is empty.
 .Pp
 The
 .Fn BIT_ISFULLSET
 macro returns
 .Dv true
 if
 .Fa bitset
 is full (all bits set).
 .Pp
 The
 .Fn BIT_FFS
 macro returns the 1-index of the first (lowest) set bit in
 .Fa bitset ,
 or zero if
 .Fa bitset
 is empty.
 Like with
 .Xr ffs 3 ,
 to use the non-zero result of
 .Fn BIT_FFS
 as a
 .Fa bit
 index parameter to any other
 .Nm
 macro, you must subtract one from the result.
 .Pp
 The
 .Fn BIT_COUNT
 macro returns the total number of set bits in
 .Fa bitset .
 .Pp
 The
 .Fn BIT_SUBSET
 macro returns
 .Dv true
 if
 .Fa needle
 is a subset of
 .Fa haystack .
 .Pp
 The
 .Fn BIT_OVERLAP
 macro returns
 .Dv true
 if
 .Fa bitset1
 and
 .Fa bitset2
 have any common bits.
 (That is, if
 .Fa bitset1
 AND
 .Fa bitset2
 is not the empty set.)
 .Pp
 The
 .Fn BIT_CMP
 macro returns
 .Dv true
 if
 .Fa bitset1
 is NOT equal to
 .Fa bitset2 .
 .Pp
 The
 .Fn BIT_OR
 macro sets bits present in
 .Fa src
 in
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 |=
 .Fa src . )
 .Fn BIT_OR_ATOMIC
 is similar, but sets bits in the component machine words in
 .Fa dst
 atomically.
 (That is, if
 .Fa dst
 is composed of multiple machine words,
 .Fn BIT_OR_ATOMIC
 performs multiple individually atomic operations.)
 .Pp
 The
 .Fn BIT_AND
 macro clears bits absent from
 .Fa src
 from
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 &=
 .Fa src . )
 .Fn BIT_AND_ATOMIC
 is similar, with the same atomic semantics as
 .Fn BIT_OR_ATOMIC .
 .Pp
 The
 .Fn BIT_NAND
 macro clears bits set in
 .Fa src
 from
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 &=
 .Fa ~ src . )
 .Sh BITSET_T_INITIALIZER EXAMPLE
 .Bd -literal
 BITSET_DEFINE(_myset, MYSETSIZE);
 
 struct _myset myset;
 
 /* Initialize myset to filled (all bits set) */
 myset = BITSET_T_INITIALIZER(BITSET_FSET(__bitset_words(MYSETSIZE)));
 
 /* Initialize myset to only the lowest bit set */
 myset = BITSET_T_INITIALIZER(0x1);
 .Ed
 .Sh SEE ALSO
 .Xr bitstring 3 ,
 .Xr cpuset 9
 .Sh HISTORY
 The
 .Nm
 macros first appeared in
 .Fx 10.0
 in January 2014.
 They were MFCed to
 .Fx 9.3 ,
 released in July 2014.
 .Pp
 This manual page first appeared in
 .Fx 11.0 .
 .Sh AUTHORS
 .An -nosplit
 The
 .Nm
 macros were generalized and pulled out of
 .In sys/cpuset.h
 as
 .In sys/_bitset.h
 and
 .In sys/bitset.h
 by
 .An Attilio Rao Aq Mt attilio@FreeBSD.org .
 This manual page was written by
 .An Conrad Meyer Aq Mt cem@FreeBSD.org .
 .Sh CAVEATS
 The
 .Fa SETSIZE
 argument to all of these macros must match the value given to
 .Fn BITSET_DEFINE .
 .Pp
 Unlike every other reference to individual set members, which are zero-indexed,
 .Fn BIT_FFS
 returns a one-indexed result (or zero if the set is empty).
Index: user/alc/PQ_LAUNDRY/share/man/man9/cpuset.9
===================================================================
--- user/alc/PQ_LAUNDRY/share/man/man9/cpuset.9	(revision 303641)
+++ user/alc/PQ_LAUNDRY/share/man/man9/cpuset.9	(revision 303642)
@@ -1,352 +1,352 @@
 .\" Copyright (c) 2015 Conrad Meyer <cem@FreeBSD.org>
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
 .\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 .\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE
 .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 20, 2015
+.Dd July 29, 2016
 .Dt CPUSET 9
 .Os
 .Sh NAME
 .Nm cpuset(9)
 \(em
 .Nm CPUSET_T_INITIALIZER ,
 .Nm CPUSET_FSET ,
 .Nm CPU_CLR ,
 .Nm CPU_COPY ,
 .Nm CPU_ISSET ,
 .Nm CPU_SET ,
 .Nm CPU_ZERO ,
 .Nm CPU_FILL ,
 .Nm CPU_SETOF ,
 .Nm CPU_EMPTY ,
 .Nm CPU_ISFULLSET ,
 .Nm CPU_FFS ,
 .Nm CPU_COUNT ,
 .Nm CPU_SUBSET ,
 .Nm CPU_OVERLAP ,
 .Nm CPU_CMP ,
 .Nm CPU_OR ,
 .Nm CPU_AND ,
 .Nm CPU_NAND ,
 .Nm CPU_CLR_ATOMIC ,
 .Nm CPU_SET_ATOMIC ,
 .Nm CPU_SET_ATOMIC_ACQ ,
 .Nm CPU_AND_ATOMIC ,
 .Nm CPU_OR_ATOMIC ,
 .Nm CPU_COPY_STORE_REL
 .Nd cpuset manipulation macros
 .Sh SYNOPSIS
 .In sys/_cpuset.h
 .In sys/cpuset.h
 .\"
 .Fn CPUSET_T_INITIALIZER "ARRAY_CONTENTS"
 .Vt CPUSET_FSET
 .\"
 .Fn CPU_CLR "size_t cpu_idx" "cpuset_t *cpuset"
 .Fn CPU_COPY "cpuset_t *from" "cpuset_t *to"
 .Ft bool
 .Fn CPU_ISSET "size_t cpu_idx" "cpuset_t *cpuset"
 .Fn CPU_SET "size_t cpu_idx" "cpuset_t *cpuset"
 .Fn CPU_ZERO "cpuset_t *cpuset"
 .Fn CPU_FILL "cpuset_t *cpuset"
 .Fn CPU_SETOF "size_t cpu_idx" "cpuset_t *cpuset"
 .Ft bool
 .Fn CPU_EMPTY "cpuset_t *cpuset"
 .Ft bool
 .Fn CPU_ISFULLSET "cpuset_t *cpuset"
-.Ft size_t
+.Ft int
 .Fn CPU_FFS "cpuset_t *cpuset"
-.Ft size_t
+.Ft int
 .Fn CPU_COUNT "cpuset_t *cpuset"
 .\"
 .Ft bool
 .Fn CPU_SUBSET "cpuset_t *haystack" "cpuset_t *needle"
 .Ft bool
 .Fn CPU_OVERLAP "cpuset_t *cpuset1" "cpuset_t *cpuset2"
 .Ft bool
 .Fn CPU_CMP "cpuset_t *cpuset1" "cpuset_t *cpuset2"
 .Fn CPU_OR "cpuset_t *dst" "cpuset_t *src"
 .Fn CPU_AND "cpuset_t *dst" "cpuset_t *src"
 .Fn CPU_NAND "cpuset_t *dst" "cpuset_t *src"
 .\"
 .Fn CPU_CLR_ATOMIC "size_t cpu_idx" "cpuset_t *cpuset"
 .Fn CPU_SET_ATOMIC "size_t cpu_idx" "cpuset_t *cpuset"
 .Fn CPU_SET_ATOMIC_ACQ "size_t cpu_idx" "cpuset_t *cpuset"
 .\"
 .Fn CPU_AND_ATOMIC "cpuset_t *dst" "cpuset_t *src"
 .Fn CPU_OR_ATOMIC "cpuset_t *dst" "cpuset_t *src"
 .Fn CPU_COPY_STORE_REL "cpuset_t *from" "cpuset_t *to"
 .Sh DESCRIPTION
 The
 .Nm
 family of macros provide a flexible and efficient CPU set implementation,
 backed by the
 .Xr bitset 9
 macros.
 Each CPU is represented by a single bit.
 The maximum number of CPUs representable by
 .Vt cpuset_t
 is
 .Va MAXCPU .
 Individual CPUs in cpusets are referenced with indices zero through
 .Fa MAXCPU - 1 .
 .Pp
 The
 .Fn CPUSET_T_INITIALIZER
 macro allows one to initialize a
 .Vt cpuset_t
 with a compile time literal value.
 .Pp
 The
 .Fn CPUSET_FSET
 macro defines a compile time literal, usable by
 .Fn CPUSET_T_INITIALIZER ,
 representing a full cpuset (all CPUs present).
 For examples of
 .Fn CPUSET_T_INITIALIZER
 and
 .Fn CPUSET_FSET
 usage, see the
 .Sx CPUSET_T_INITIALIZER EXAMPLE
 section.
 .Pp
 The
 .Fn CPU_CLR
 macro removes CPU
 .Fa cpu_idx
 from the cpuset pointed to by
 .Fa cpuset .
 The
 .Fn CPU_CLR_ATOMIC
 macro is identical, but the bit representing the CPU is cleared with atomic
 machine instructions.
 .Pp
 The
 .Fn CPU_COPY
 macro copies the contents of the cpuset
 .Fa from
 to the cpuset
 .Fa to .
 .Fn CPU_COPY_STORE_REL
 is similar, but copies component machine words from
 .Fa from
 and writes them to
 .Fa to
 with atomic store with release semantics.
 (That is, if
 .Fa to
 is composed of multiple machine words,
 .Fn CPU_COPY_STORE_REL
 performs multiple individually atomic operations.)
 .Pp
 The
 .Fn CPU_SET
 macro adds CPU
 .Fa cpu_idx
 to the cpuset pointed to by
 .Fa cpuset ,
 if it is not already present.
 The
 .Fn CPU_SET_ATOMIC
 macro is identical, but the bit representing the CPU is set with atomic
 machine instructions.
 The
 .Fn CPU_SET_ATOMIC_ACQ
 macro sets the bit representing the CPU with atomic acquire semantics.
 .Pp
 The
 .Fn CPU_ZERO
 macro removes all CPUs from
 .Fa cpuset .
 .Pp
 The
 .Fn CPU_FILL
 macro adds all CPUs to
 .Fa cpuset .
 .Pp
 The
 .Fn CPU_SETOF
 macro removes all CPUs in
 .Fa cpuset
 before adding only CPU
 .Fa cpu_idx .
 .Pp
 The
 .Fn CPU_EMPTY
 macro returns
 .Dv true
 if
 .Fa cpuset
 is empty.
 .Pp
 The
 .Fn CPU_ISFULLSET
 macro returns
 .Dv true
 if
 .Fa cpuset
 is full (the set of all CPUs).
 .Pp
 The
 .Fn CPU_FFS
 macro returns the 1-index of the first (lowest) CPU in
 .Fa cpuset ,
 or zero if
 .Fa cpuset
 is empty.
 Like with
 .Xr ffs 3 ,
 to use the non-zero result of
 .Fn CPU_FFS
 as a
 .Fa cpu_idx
 index parameter to any other
 .Nm
 macro, you must subtract one from the result.
 .Pp
 The
 .Fn CPU_COUNT
 macro returns the total number of CPUs in
 .Fa cpuset .
 .Pp
 The
 .Fn CPU_SUBSET
 macro returns
 .Dv true
 if
 .Fa needle
 is a subset of
 .Fa haystack .
 .Pp
 The
 .Fn CPU_OVERLAP
 macro returns
 .Dv true
 if
 .Fa cpuset1
 and
 .Fa cpuset2
 have any common CPUs.
 (That is, if
 .Fa cpuset1
 AND
 .Fa cpuset2
 is not the empty set.)
 .Pp
 The
 .Fn CPU_CMP
 macro returns
 .Dv true
 if
 .Fa cpuset1
 is NOT equal to
 .Fa cpuset2 .
 .Pp
 The
 .Fn CPU_OR
 macro adds CPUs present in
 .Fa src
 to
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 |=
 .Fa src . )
 .Fn CPU_OR_ATOMIC
 is similar, but sets the bits representing CPUs in the component machine words
 in
 .Fa dst
 with atomic machine instructions.
 (That is, if
 .Fa dst
 is composed of multiple machine words,
 .Fn CPU_OR_ATOMIC
 performs multiple individually atomic operations.)
 .Pp
 The
 .Fn CPU_AND
 macro removes CPUs absent from
 .Fa src
 from
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 &=
 .Fa src . )
 .Fn CPU_AND_ATOMIC
 is similar, with the same atomic semantics as
 .Fn CPU_OR_ATOMIC .
 .Pp
 The
 .Fn CPU_NAND
 macro removes CPUs in
 .Fa src
 from
 .Fa dst .
 (It is the
 .Nm
 equivalent of the scalar:
 .Fa dst
 &=
 .Fa ~ src . )
 .Sh CPUSET_T_INITIALIZER EXAMPLE
 .Bd -literal
 cpuset_t myset;
 
 /* Initialize myset to filled (all CPUs) */
 myset = CPUSET_T_INITIALIZER(CPUSET_FSET);
 
 /* Initialize myset to only the lowest CPU */
 myset = CPUSET_T_INITIALIZER(0x1);
 .Ed
 .Sh SEE ALSO
 .Xr cpuset 1 ,
 .Xr cpuset 2 ,
 .Xr bitset 9
 .Sh HISTORY
 .In sys/cpuset.h
 first appeared in
 .Fx 7.1 ,
 released in January 2009, and in
 .Fx 8.0 ,
 released in November 2009.
 .Pp
 This manual page first appeared in
 .Fx 11.0 .
 .Sh AUTHORS
 .An -nosplit
 The
 .Nm
 macros were written by
 .An Jeff Roberson Aq Mt jeff@FreeBSD.org .
 This manual page was written by
 .An Conrad Meyer Aq Mt cem@FreeBSD.org .
 .Sh CAVEATS
 Unlike every other reference to individual set members, which are zero-indexed,
 .Fn CPU_FFS
 returns a one-indexed result (or zero if the cpuset is empty).
Index: user/alc/PQ_LAUNDRY/share/mk/src.libnames.mk
===================================================================
--- user/alc/PQ_LAUNDRY/share/mk/src.libnames.mk	(revision 303641)
+++ user/alc/PQ_LAUNDRY/share/mk/src.libnames.mk	(revision 303642)
@@ -1,576 +1,577 @@
 # $FreeBSD$
 #
 # The include file <src.libnames.mk> define library names suitable
 # for INTERNALLIB and PRIVATELIB definition
 
 .if !target(__<bsd.init.mk>__)
 .error src.libnames.mk cannot be included directly.
 .endif
 
 .if !target(__<src.libnames.mk>__)
 __<src.libnames.mk>__:
 
 .include <src.opts.mk>
 
 _PRIVATELIBS=	\
 		atf_c \
 		atf_cxx \
 		bsdstat \
 		devdctl \
 		event \
 		heimipcc \
 		heimipcs \
 		ldns \
 		sqlite3 \
 		ssh \
 		ucl \
 		unbound
 
 _INTERNALLIBS=	\
 		amu \
 		bsnmptools \
 		cron \
 		elftc \
 		fifolog \
 		ipf \
 		lpr \
 		netbsd \
 		ntp \
 		ntpevent \
 		openbsd \
 		opts \
 		parse \
 		pe \
 		readline \
 		sl \
 		sm \
 		smdb \
 		smutil \
 		telnet \
 		vers
 
 _LIBRARIES=	\
 		${_PRIVATELIBS} \
 		${_INTERNALLIBS} \
 		${LOCAL_LIBRARIES} \
 		80211 \
 		alias \
 		archive \
 		asn1 \
 		auditd \
 		avl \
 		begemot \
 		bluetooth \
 		bsdxml \
 		bsm \
 		bsnmp \
 		bz2 \
 		c \
 		c_pic \
 		calendar \
 		cam \
 		casper \
 		cap_dns \
 		cap_grp \
 		cap_pwd \
 		cap_random \
 		cap_sysctl \
 		com_err \
 		compiler_rt \
 		crypt \
 		crypto \
 		ctf \
 		cuse \
 		cxxrt \
 		devctl \
 		devdctl \
 		devinfo \
 		devstat \
 		dialog \
 		dpv \
 		dtrace \
 		dwarf \
 		edit \
 		elf \
 		execinfo \
 		fetch \
 		figpar \
 		geom \
 		gnuregex \
 		gpio \
 		gssapi \
 		gssapi_krb5 \
 		hdb \
 		heimbase \
 		heimntlm \
 		heimsqlite \
 		hx509 \
 		ipsec \
 		jail \
 		kadm5clnt \
 		kadm5srv \
 		kafs5 \
 		kdc \
 		kiconv \
 		krb5 \
 		kvm \
 		l \
 		lzma \
 		m \
 		magic \
 		md \
 		memstat \
 		mp \
 		mt \
 		nandfs \
 		ncurses \
 		ncursesw \
 		netgraph \
 		ngatm \
 		nv \
 		nvpair \
 		opie \
 		pam \
 		panel \
 		panelw \
 		pcap \
 		pcsclite \
 		pjdlog \
 		pmc \
 		proc \
 		procstat \
 		pthread \
 		radius \
 		readline \
 		roken \
 		rpcsec_gss \
 		rpcsvc \
 		rt \
 		rtld_db \
 		sbuf \
 		sdp \
 		sm \
 		smb \
 		ssl \
 		ssp_nonshared \
 		stdthreads \
 		supcplusplus \
 		sysdecode \
 		tacplus \
 		termcap \
 		termcapw \
 		ufs \
 		ugidfw \
 		ulog \
 		umem \
 		usb \
 		usbhid \
 		util \
 		uutil \
 		vmmapi \
 		wind \
 		wrap \
 		xo \
 		y \
 		ypclnt \
 		z \
 		zfs_core \
 		zfs \
 		zpool \
 
 .if ${MK_BLACKLIST} != "no"
 _LIBRARIES+= \
 		blacklist \
 
 .endif
 
 .if ${MK_OFED} != "no"
 _LIBRARIES+= \
 		cxgb4 \
 		ibcm \
 		ibcommon \
 		ibmad \
 		ibsdp \
 		ibumad \
 		ibverbs \
 		mlx4 \
 		mthca \
 		opensm \
 		osmcomp \
 		osmvendor \
 		rdmacm \
 
 .endif
 
 # Each library's LIBADD needs to be duplicated here for static linkage of
 # 2nd+ order consumers.  Auto-generating this would be better.
 _DP_80211=	sbuf bsdxml
 _DP_archive=	z bz2 lzma bsdxml
 .if ${MK_BLACKLIST} != "no"
 _DP_blacklist+=	pthread
 .endif
 .if ${MK_OPENSSL} != "no"
 _DP_archive+=	crypto
 .else
 _DP_archive+=	md
 .endif
 _DP_sqlite3=	pthread
 _DP_ssl=	crypto
 _DP_ssh=	crypto crypt z
 .if ${MK_LDNS} != "no"
 _DP_ssh+=	ldns
 .endif
 _DP_edit=	ncursesw
 .if ${MK_OPENSSL} != "no"
 _DP_bsnmp=	crypto
 .endif
 _DP_geom=	bsdxml sbuf
 _DP_cam=	sbuf
 _DP_kvm=	elf
 _DP_casper=	nv
 _DP_cap_dns=	nv
 _DP_cap_grp=	nv
 _DP_cap_pwd=	nv
 _DP_cap_random=	nv
 _DP_cap_sysctl=	nv
 _DP_pjdlog=	util
 _DP_opie=	md
 _DP_usb=	pthread
 _DP_unbound=	ssl crypto pthread
 _DP_rt=	pthread
 .if ${MK_OPENSSL} == "no"
 _DP_radius=	md
 .else
 _DP_radius=	crypto
 .endif
+_DP_rtld_db=	elf procstat
 _DP_procstat=	kvm util elf
 .if ${MK_CXX} == "yes"
 .if ${MK_LIBCPLUSPLUS} != "no"
 _DP_proc=	cxxrt
 .else
 _DP_proc=	supcplusplus
 .endif
 .endif
 .if ${MK_CDDL} != "no"
 _DP_proc+=	ctf
 .endif
-_DP_proc+=	elf rtld_db util
+_DP_proc+=	elf procstat rtld_db util
 _DP_mp=	crypto
 _DP_memstat=	kvm
 _DP_magic=	z
 _DP_mt=		sbuf bsdxml
 _DP_ldns=	crypto
 .if ${MK_OPENSSL} != "no"
 _DP_fetch=	ssl crypto
 .else
 _DP_fetch=	md
 .endif
 _DP_execinfo=	elf
 _DP_dwarf=	elf
 _DP_dpv=	dialog figpar util ncursesw
 _DP_dialog=	ncursesw m
 _DP_cuse=	pthread
 _DP_atf_cxx=	atf_c
 _DP_devstat=	kvm
 _DP_pam=	radius tacplus opie md util
 .if ${MK_KERBEROS} != "no"
 _DP_pam+=	krb5
 .endif
 .if ${MK_OPENSSH} != "no"
 _DP_pam+=	ssh
 .endif
 .if ${MK_NIS} != "no"
 _DP_pam+=	ypclnt
 .endif
 _DP_readline=	ncursesw
 _DP_roken=	crypt
 _DP_kadm5clnt=	com_err krb5 roken
 _DP_kadm5srv=	com_err hdb krb5 roken
 _DP_heimntlm=	crypto com_err krb5 roken
 _DP_hx509=	asn1 com_err crypto roken wind
 _DP_hdb=	asn1 com_err krb5 roken sqlite3
 _DP_asn1=	com_err roken
 _DP_kdc=	roken hdb hx509 krb5 heimntlm asn1 crypto
 _DP_wind=	com_err roken
 _DP_heimbase=	pthread
 _DP_heimipcc=	heimbase roken pthread
 _DP_heimipcs=	heimbase roken pthread
 _DP_kafs5=	asn1 krb5 roken
 _DP_krb5+=	asn1 com_err crypt crypto hx509 roken wind heimbase heimipcc
 _DP_gssapi_krb5+=	gssapi krb5 crypto roken asn1 com_err
 _DP_lzma=	pthread
 _DP_ucl=	m
 _DP_vmmapi=	util
 _DP_ctf=	z
 _DP_dtrace=	ctf elf proc pthread rtld_db
 _DP_xo=		util
 # The libc dependencies are not strictly needed but are defined to make the
 # assert happy.
 _DP_c=		compiler_rt
 .if ${MK_SSP} != "no"
 _DP_c+=		ssp_nonshared
 .endif
 _DP_stdthreads=	pthread
 _DP_tacplus=	md
 _DP_panel=	ncurses
 _DP_panelw=	ncursesw
 _DP_rpcsec_gss=	gssapi
 _DP_smb=	kiconv
 _DP_ulog=	md
 _DP_fifolog=	z
 _DP_ipf=	kvm
 _DP_zfs=	md pthread umem util uutil m nvpair avl bsdxml geom nvpair z \
 		zfs_core
 _DP_zfs_core=	nvpair
 _DP_zpool=	md pthread z nvpair avl umem
 .if ${MK_OFED} != "no"
 _DP_cxgb4=	ibverbs pthread
 _DP_ibcm=	ibverbs
 _DP_ibmad=	ibcommon ibumad
 _DP_ibumad=	ibcommon
 _DP_mlx4=	ibverbs pthread
 _DP_mthca=	ibverbs pthread
 _DP_opensm=	pthread
 _DP_osmcomp=	pthread
 _DP_osmvendor=	ibumad opensm osmcomp pthread
 _DP_rdmacm=	ibverbs
 .endif
 
 # Define special cases
 LDADD_supcplusplus=	-lsupc++
 LIBATF_C=	${DESTDIR}${LIBDIR}/libprivateatf-c.a
 LIBATF_CXX=	${DESTDIR}${LIBDIR}/libprivateatf-c++.a
 LDADD_atf_c=	-lprivateatf-c
 LDADD_atf_cxx=	-lprivateatf-c++
 
 .for _l in ${_PRIVATELIBS}
 LIB${_l:tu}?=	${DESTDIR}${LIBDIR}/libprivate${_l}.a
 .endfor
 
 .for _l in ${_LIBRARIES}
 .if ${_INTERNALLIBS:M${_l}}
 LDADD_${_l}_L+=		-L${LIB${_l:tu}DIR}
 .endif
 DPADD_${_l}?=	${LIB${_l:tu}}
 .if ${_PRIVATELIBS:M${_l}}
 LDADD_${_l}?=	-lprivate${_l}
 .else
 LDADD_${_l}?=	${LDADD_${_l}_L} -l${_l}
 .endif
 # Add in all dependencies for static linkage.
 .if defined(_DP_${_l}) && (${_INTERNALLIBS:M${_l}} || \
     (defined(NO_SHARED) && (${NO_SHARED} != "no" && ${NO_SHARED} != "NO")))
 .for _d in ${_DP_${_l}}
 DPADD_${_l}+=	${DPADD_${_d}}
 LDADD_${_l}+=	${LDADD_${_d}}
 .endfor
 .endif
 .endfor
 
 # These are special cases where the library is broken and anything that uses
 # it needs to add more dependencies.  Broken usually means that it has a
 # cyclic dependency and cannot link its own dependencies.  This is bad, please
 # fix the library instead.
 # Unless the library itself is broken then the proper place to define
 # dependencies is _DP_* above.
 
 # libatf-c++ exposes libatf-c abi hence we need to explicit link to atf_c for
 # atf_cxx
 DPADD_atf_cxx+=	${DPADD_atf_c}
 LDADD_atf_cxx+=	${LDADD_atf_c}
 
 # Detect LDADD/DPADD that should be LIBADD, before modifying LDADD here.
 _BADLDADD=
 .for _l in ${LDADD:M-l*:N-l*/*:C,^-l,,}
 .if ${_LIBRARIES:M${_l}} && !${_PRIVATELIBS:M${_l}}
 _BADLDADD+=	${_l}
 .endif
 .endfor
 .if !empty(_BADLDADD)
 .error ${.CURDIR}: These libraries should be LIBADD+=foo rather than DPADD/LDADD+=-lfoo: ${_BADLDADD}
 .endif
 
 .for _l in ${LIBADD}
 DPADD+=		${DPADD_${_l}}
 LDADD+=		${LDADD_${_l}}
 .endfor
 
 # INTERNALLIB definitions.
 LIBELFTCDIR=	${OBJTOP}/lib/libelftc
 LIBELFTC?=	${LIBELFTCDIR}/libelftc.a
 
 LIBPEDIR=	${OBJTOP}/lib/libpe
 LIBPE?=		${LIBPEDIR}/libpe.a
 
 LIBREADLINEDIR=	${OBJTOP}/gnu/lib/libreadline/readline
 LIBREADLINE?=	${LIBREADLINEDIR}/libreadline.a
 
 LIBOPENBSDDIR=	${OBJTOP}/lib/libopenbsd
 LIBOPENBSD?=	${LIBOPENBSDDIR}/libopenbsd.a
 
 LIBSMDIR=	${OBJTOP}/lib/libsm
 LIBSM?=		${LIBSMDIR}/libsm.a
 
 LIBSMDBDIR=	${OBJTOP}/lib/libsmdb
 LIBSMDB?=	${LIBSMDBDIR}/libsmdb.a
 
 LIBSMUTILDIR=	${OBJTOP}/lib/libsmutil
 LIBSMUTIL?=	${LIBSMDBDIR}/libsmutil.a
 
 LIBNETBSDDIR?=	${OBJTOP}/lib/libnetbsd
 LIBNETBSD?=	${LIBNETBSDDIR}/libnetbsd.a
 
 LIBVERSDIR?=	${OBJTOP}/kerberos5/lib/libvers
 LIBVERS?=	${LIBVERSDIR}/libvers.a
 
 LIBSLDIR=	${OBJTOP}/kerberos5/lib/libsl
 LIBSL?=		${LIBSLDIR}/libsl.a
 
 LIBIPFDIR=	${OBJTOP}/sbin/ipf/libipf
 LIBIPF?=	${LIBIPFDIR}/libipf.a
 
 LIBTELNETDIR=	${OBJTOP}/lib/libtelnet
 LIBTELNET?=	${LIBTELNETDIR}/libtelnet.a
 
 LIBCRONDIR=	${OBJTOP}/usr.sbin/cron/lib
 LIBCRON?=	${LIBCRONDIR}/libcron.a
 
 LIBNTPDIR=	${OBJTOP}/usr.sbin/ntp/libntp
 LIBNTP?=	${LIBNTPDIR}/libntp.a
 
 LIBNTPEVENTDIR=	${OBJTOP}/usr.sbin/ntp/libntpevent
 LIBNTPEVENT?=	${LIBNTPEVENTDIR}/libntpevent.a
 
 LIBOPTSDIR=	${OBJTOP}/usr.sbin/ntp/libopts
 LIBOPTS?=	${LIBOPTSDIR}/libopts.a
 
 LIBPARSEDIR=	${OBJTOP}/usr.sbin/ntp/libparse
 LIBPARSE?=	${LIBPARSEDIR}/libparse.a
 
 LIBLPRDIR=	${OBJTOP}/usr.sbin/lpr/common_source
 LIBLPR?=	${LIBOPTSDIR}/liblpr.a
 
 LIBFIFOLOGDIR=	${OBJTOP}/usr.sbin/fifolog/lib
 LIBFIFOLOG?=	${LIBOPTSDIR}/libfifolog.a
 
 LIBBSNMPTOOLSDIR=	${OBJTOP}/usr.sbin/bsnmpd/tools/libbsnmptools
 LIBBSNMPTOOLS?=	${LIBBSNMPTOOLSDIR}/libbsnmptools.a
 
 LIBAMUDIR=	${OBJTOP}/usr.sbin/amd/libamu
 LIBAMU?=	${LIBAMUDIR}/libamu/libamu.a
 
 # Define a directory for each library.  This is useful for adding -L in when
 # not using a --sysroot or for meta mode bootstrapping when there is no
 # Makefile.depend.  These are sorted by directory.
 LIBAVLDIR=	${OBJTOP}/cddl/lib/libavl
 LIBCTFDIR=	${OBJTOP}/cddl/lib/libctf
 LIBDTRACEDIR=	${OBJTOP}/cddl/lib/libdtrace
 LIBNVPAIRDIR=	${OBJTOP}/cddl/lib/libnvpair
 LIBUMEMDIR=	${OBJTOP}/cddl/lib/libumem
 LIBUUTILDIR=	${OBJTOP}/cddl/lib/libuutil
 LIBZFSDIR=	${OBJTOP}/cddl/lib/libzfs
 LIBZFS_COREDIR=	${OBJTOP}/cddl/lib/libzfs_core
 LIBZPOOLDIR=	${OBJTOP}/cddl/lib/libzpool
 LIBCXGB4DIR=	${OBJTOP}/contrib/ofed/usr.lib/libcxgb4
 LIBIBCMDIR=	${OBJTOP}/contrib/ofed/usr.lib/libibcm
 LIBIBCOMMONDIR=	${OBJTOP}/contrib/ofed/usr.lib/libibcommon
 LIBIBMADDIR=	${OBJTOP}/contrib/ofed/usr.lib/libibmad
 LIBIBUMADDIR=	${OBJTOP}/contrib/ofed/usr.lib/libibumad
 LIBIBVERBSDIR=	${OBJTOP}/contrib/ofed/usr.lib/libibverbs
 LIBMLX4DIR=	${OBJTOP}/contrib/ofed/usr.lib/libmlx4
 LIBMTHCADIR=	${OBJTOP}/contrib/ofed/usr.lib/libmthca
 LIBOPENSMDIR=	${OBJTOP}/contrib/ofed/usr.lib/libopensm
 LIBOSMCOMPDIR=	${OBJTOP}/contrib/ofed/usr.lib/libosmcomp
 LIBOSMVENDORDIR=	${OBJTOP}/contrib/ofed/usr.lib/libosmvendor
 LIBRDMACMDIR=	${OBJTOP}/contrib/ofed/usr.lib/librdmacm
 LIBIBSDPDIR=	${OBJTOP}/contrib/ofed/usr.lib/libsdp
 LIBDIALOGDIR=	${OBJTOP}/gnu/lib/libdialog
 LIBGCOVDIR=	${OBJTOP}/gnu/lib/libgcov
 LIBGOMPDIR=	${OBJTOP}/gnu/lib/libgomp
 LIBGNUREGEXDIR=	${OBJTOP}/gnu/lib/libregex
 LIBSSPDIR=	${OBJTOP}/gnu/lib/libssp
 LIBSSP_NONSHAREDDIR=	${OBJTOP}/gnu/lib/libssp/libssp_nonshared
 LIBSUPCPLUSPLUSDIR=	${OBJTOP}/gnu/lib/libsupc++
 LIBASN1DIR=	${OBJTOP}/kerberos5/lib/libasn1
 LIBGSSAPI_KRB5DIR=	${OBJTOP}/kerberos5/lib/libgssapi_krb5
 LIBGSSAPI_NTLMDIR=	${OBJTOP}/kerberos5/lib/libgssapi_ntlm
 LIBGSSAPI_SPNEGODIR=	${OBJTOP}/kerberos5/lib/libgssapi_spnego
 LIBHDBDIR=	${OBJTOP}/kerberos5/lib/libhdb
 LIBHEIMBASEDIR=	${OBJTOP}/kerberos5/lib/libheimbase
 LIBHEIMIPCCDIR=	${OBJTOP}/kerberos5/lib/libheimipcc
 LIBHEIMIPCSDIR=	${OBJTOP}/kerberos5/lib/libheimipcs
 LIBHEIMNTLMDIR=	${OBJTOP}/kerberos5/lib/libheimntlm
 LIBHX509DIR=	${OBJTOP}/kerberos5/lib/libhx509
 LIBKADM5CLNTDIR=	${OBJTOP}/kerberos5/lib/libkadm5clnt
 LIBKADM5SRVDIR=	${OBJTOP}/kerberos5/lib/libkadm5srv
 LIBKAFS5DIR=	${OBJTOP}/kerberos5/lib/libkafs5
 LIBKDCDIR=	${OBJTOP}/kerberos5/lib/libkdc
 LIBKRB5DIR=	${OBJTOP}/kerberos5/lib/libkrb5
 LIBROKENDIR=	${OBJTOP}/kerberos5/lib/libroken
 LIBWINDDIR=	${OBJTOP}/kerberos5/lib/libwind
 LIBATF_CDIR=	${OBJTOP}/lib/atf/libatf-c
 LIBATF_CXXDIR=	${OBJTOP}/lib/atf/libatf-c++
 LIBALIASDIR=	${OBJTOP}/lib/libalias/libalias
 LIBBLACKLISTDIR=	${OBJTOP}/lib/libblacklist
 LIBBLOCKSRUNTIMEDIR=	${OBJTOP}/lib/libblocksruntime
 LIBBSNMPDIR=	${OBJTOP}/lib/libbsnmp/libbsnmp
 LIBCASPERDIR=	${OBJTOP}/lib/libcasper/libcasper
 LIBCAP_DNSDIR=	${OBJTOP}/lib/libcasper/services/cap_dns
 LIBCAP_GRPDIR=	${OBJTOP}/lib/libcasper/services/cap_grp
 LIBCAP_PWDDIR=	${OBJTOP}/lib/libcasper/services/cap_pwd
 LIBCAP_RANDOMDIR=	${OBJTOP}/lib/libcasper/services/cap_random
 LIBCAP_SYSCTLDIR=	${OBJTOP}/lib/libcasper/services/cap_sysctl
 LIBBSDXMLDIR=	${OBJTOP}/lib/libexpat
 LIBKVMDIR=	${OBJTOP}/lib/libkvm
 LIBPTHREADDIR=	${OBJTOP}/lib/libthr
 LIBMDIR=	${OBJTOP}/lib/msun
 LIBFORMDIR=	${OBJTOP}/lib/ncurses/form
 LIBFORMLIBWDIR=	${OBJTOP}/lib/ncurses/formw
 LIBMENUDIR=	${OBJTOP}/lib/ncurses/menu
 LIBMENULIBWDIR=	${OBJTOP}/lib/ncurses/menuw
 LIBNCURSESDIR=	${OBJTOP}/lib/ncurses/ncurses
 LIBNCURSESWDIR=	${OBJTOP}/lib/ncurses/ncursesw
 LIBPANELDIR=	${OBJTOP}/lib/ncurses/panel
 LIBPANELWDIR=	${OBJTOP}/lib/ncurses/panelw
 LIBCRYPTODIR=	${OBJTOP}/secure/lib/libcrypto
 LIBSSHDIR=	${OBJTOP}/secure/lib/libssh
 LIBSSLDIR=	${OBJTOP}/secure/lib/libssl
 LIBTEKENDIR=	${OBJTOP}/sys/teken/libteken
 LIBEGACYDIR=	${OBJTOP}/tools/build
 LIBLNDIR=	${OBJTOP}/usr.bin/lex/lib
 
 LIBTERMCAPDIR=	${LIBNCURSESDIR}
 LIBTERMCAPWDIR=	${LIBNCURSESWDIR}
 
 # Default other library directories to lib/libNAME.
 .for lib in ${_LIBRARIES}
 LIB${lib:tu}DIR?=	${OBJTOP}/lib/lib${lib}
 .endfor
 
 # Validate that listed LIBADD are valid.
 .for _l in ${LIBADD}
 .if empty(_LIBRARIES:M${_l})
 _BADLIBADD+= ${_l}
 .endif
 .endfor
 .if !empty(_BADLIBADD)
 .error ${.CURDIR}: Invalid LIBADD used which may need to be added to ${_this:T}: ${_BADLIBADD}
 .endif
 
 # Sanity check that libraries are defined here properly when building them.
 .if defined(LIB) && ${_LIBRARIES:M${LIB}} != ""
 .if !empty(LIBADD) && \
     (!defined(_DP_${LIB}) || ${LIBADD:O:u} != ${_DP_${LIB}:O:u})
 .error ${.CURDIR}: Missing or incorrect _DP_${LIB} entry in ${_this:T}.  Should match LIBADD for ${LIB} ('${LIBADD}' vs '${_DP_${LIB}}')
 .endif
 # Note that OBJTOP is not yet defined here but for the purpose of the check
 # it is fine as it resolves to the SRC directory.
 .if !defined(LIB${LIB:tu}DIR) || !exists(${SRCTOP}/${LIB${LIB:tu}DIR:S,^${OBJTOP}/,,})
 .error ${.CURDIR}: Missing or incorrect value for LIB${LIB:tu}DIR in ${_this:T}: ${LIB${LIB:tu}DIR:S,^${OBJTOP}/,,}
 .endif
 .if ${_INTERNALLIBS:M${LIB}} != "" && !defined(LIB${LIB:tu})
 .error ${.CURDIR}: Missing value for LIB${LIB:tu} in ${_this:T}.  Likely should be: LIB${LIB:tu}?= $${LIB${LIB:tu}DIR}/lib${LIB}.a
 .endif
 .endif
 
 .endif	# !target(__<src.libnames.mk>__)
Index: user/alc/PQ_LAUNDRY/sys/amd64/amd64/support.S
===================================================================
--- user/alc/PQ_LAUNDRY/sys/amd64/amd64/support.S	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/amd64/amd64/support.S	(revision 303642)
@@ -1,796 +1,789 @@
 /*-
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_ddb.h"
 
 #include <machine/asmacros.h>
 #include <machine/pmap.h>
 
 #include "assym.s"
 
 	.text
 
 /*
  * bcopy family
  * void bzero(void *buf, u_int len)
  */
 
 /* done */
 ENTRY(bzero)
 	PUSH_FRAME_POINTER
 	movq	%rsi,%rcx
 	xorl	%eax,%eax
 	shrq	$3,%rcx
 	cld
 	rep
 	stosq
 	movq	%rsi,%rcx
 	andq	$7,%rcx
 	rep
 	stosb
 	POP_FRAME_POINTER
 	ret
 END(bzero)
 
 /* Address: %rdi */
 ENTRY(pagezero)
 	PUSH_FRAME_POINTER
-	movq	$-PAGE_SIZE,%rdx
-	subq	%rdx,%rdi
+	movq	$PAGE_SIZE/8,%rcx
 	xorl	%eax,%eax
-1:
-	movnti	%rax,(%rdi,%rdx)
-	movnti	%rax,8(%rdi,%rdx)
-	movnti	%rax,16(%rdi,%rdx)
-	movnti	%rax,24(%rdi,%rdx)
-	addq	$32,%rdx
-	jne	1b
-	sfence
+	rep
+	stosq
 	POP_FRAME_POINTER
 	ret
 END(pagezero)
 
 ENTRY(bcmp)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%rcx
 	shrq	$3,%rcx
 	cld					/* compare forwards */
 	repe
 	cmpsq
 	jne	1f
 
 	movq	%rdx,%rcx
 	andq	$7,%rcx
 	repe
 	cmpsb
 1:
 	setne	%al
 	movsbl	%al,%eax
 	POP_FRAME_POINTER
 	ret
 END(bcmp)
 
 /*
  * bcopy(src, dst, cnt)
  *       rdi, rsi, rdx
  *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
  */
 ENTRY(bcopy)
 	PUSH_FRAME_POINTER
 	xchgq	%rsi,%rdi
 	movq	%rdx,%rcx
 
 	movq	%rdi,%rax
 	subq	%rsi,%rax
 	cmpq	%rcx,%rax			/* overlapping && src < dst? */
 	jb	1f
 
 	shrq	$3,%rcx				/* copy by 64-bit words */
 	cld					/* nope, copy forwards */
 	rep
 	movsq
 	movq	%rdx,%rcx
 	andq	$7,%rcx				/* any bytes left? */
 	rep
 	movsb
 	POP_FRAME_POINTER
 	ret
 
 	/* ALIGN_TEXT */
 1:
 	addq	%rcx,%rdi			/* copy backwards */
 	addq	%rcx,%rsi
 	decq	%rdi
 	decq	%rsi
 	andq	$7,%rcx				/* any fractional bytes? */
 	std
 	rep
 	movsb
 	movq	%rdx,%rcx			/* copy remainder by 32-bit words */
 	shrq	$3,%rcx
 	subq	$7,%rsi
 	subq	$7,%rdi
 	rep
 	movsq
 	cld
 	POP_FRAME_POINTER
 	ret
 END(bcopy)
 
 /*
  * Note: memcpy does not support overlapping copies
  */
 ENTRY(memcpy)
 	PUSH_FRAME_POINTER
 	movq	%rdi,%rax
 	movq	%rdx,%rcx
 	shrq	$3,%rcx				/* copy by 64-bit words */
 	cld					/* copy forwards */
 	rep
 	movsq
 	movq	%rdx,%rcx
 	andq	$7,%rcx				/* any bytes left? */
 	rep
 	movsb
 	POP_FRAME_POINTER
 	ret
 END(memcpy)
 
 /*
  * pagecopy(%rdi=from, %rsi=to)
  */
 ENTRY(pagecopy)
 	PUSH_FRAME_POINTER
 	movq	$-PAGE_SIZE,%rax
 	movq	%rax,%rdx
 	subq	%rax,%rdi
 	subq	%rax,%rsi
 1:
 	prefetchnta (%rdi,%rax)
 	addq	$64,%rax
 	jne	1b
 2:
 	movq	(%rdi,%rdx),%rax
 	movnti	%rax,(%rsi,%rdx)
 	movq	8(%rdi,%rdx),%rax
 	movnti	%rax,8(%rsi,%rdx)
 	movq	16(%rdi,%rdx),%rax
 	movnti	%rax,16(%rsi,%rdx)
 	movq	24(%rdi,%rdx),%rax
 	movnti	%rax,24(%rsi,%rdx)
 	addq	$32,%rdx
 	jne	2b
 	sfence
 	POP_FRAME_POINTER
 	ret
 END(pagecopy)
 
 /* fillw(pat, base, cnt) */
 /*       %rdi,%rsi, %rdx */
 ENTRY(fillw)
 	PUSH_FRAME_POINTER
 	movq	%rdi,%rax
 	movq	%rsi,%rdi
 	movq	%rdx,%rcx
 	cld
 	rep
 	stosw
 	POP_FRAME_POINTER
 	ret
 END(fillw)
 
 /*****************************************************************************/
 /* copyout and fubyte family                                                 */
 /*****************************************************************************/
 /*
  * Access user memory from inside the kernel. These routines should be
  * the only places that do this.
  *
  * These routines set curpcb->pcb_onfault for the time they execute. When a
  * protection violation occurs inside the functions, the trap handler
  * returns to *curpcb->pcb_onfault instead of the function.
  */
 
 /*
  * copyout(from_kernel, to_user, len)  - MP SAFE
  *         %rdi,        %rsi,    %rdx
  */
 ENTRY(copyout)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rax
 	movq	$copyout_fault,PCB_ONFAULT(%rax)
 	testq	%rdx,%rdx			/* anything to do? */
 	jz	done_copyout
 
 	/*
 	 * Check explicitly for non-user addresses.  If 486 write protection
 	 * is being used, this check is essential because we are in kernel
 	 * mode so the h/w does not provide any protection against writing
 	 * kernel addresses.
 	 */
 
 	/*
 	 * First, prevent address wrapping.
 	 */
 	movq	%rsi,%rax
 	addq	%rdx,%rax
 	jc	copyout_fault
 /*
  * XXX STOP USING VM_MAXUSER_ADDRESS.
  * It is an end address, not a max, so every time it is used correctly it
  * looks like there is an off by one error, and of course it caused an off
  * by one error in several places.
  */
 	movq	$VM_MAXUSER_ADDRESS,%rcx
 	cmpq	%rcx,%rax
 	ja	copyout_fault
 
 	xchgq	%rdi,%rsi
 	/* bcopy(%rsi, %rdi, %rdx) */
 	movq	%rdx,%rcx
 
 	shrq	$3,%rcx
 	cld
 	rep
 	movsq
 	movb	%dl,%cl
 	andb	$7,%cl
 	rep
 	movsb
 
 done_copyout:
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rdx
 	movq	%rax,PCB_ONFAULT(%rdx)
 	POP_FRAME_POINTER
 	ret
 
 	ALIGN_TEXT
 copyout_fault:
 	movq	PCPU(CURPCB),%rdx
 	movq	$0,PCB_ONFAULT(%rdx)
 	movq	$EFAULT,%rax
 	POP_FRAME_POINTER
 	ret
 END(copyout)
 
 /*
  * copyin(from_user, to_kernel, len) - MP SAFE
  *        %rdi,      %rsi,      %rdx
  */
 ENTRY(copyin)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rax
 	movq	$copyin_fault,PCB_ONFAULT(%rax)
 	testq	%rdx,%rdx			/* anything to do? */
 	jz	done_copyin
 
 	/*
 	 * make sure address is valid
 	 */
 	movq	%rdi,%rax
 	addq	%rdx,%rax
 	jc	copyin_fault
 	movq	$VM_MAXUSER_ADDRESS,%rcx
 	cmpq	%rcx,%rax
 	ja	copyin_fault
 
 	xchgq	%rdi,%rsi
 	movq	%rdx,%rcx
 	movb	%cl,%al
 	shrq	$3,%rcx				/* copy longword-wise */
 	cld
 	rep
 	movsq
 	movb	%al,%cl
 	andb	$7,%cl				/* copy remaining bytes */
 	rep
 	movsb
 
 done_copyin:
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rdx
 	movq	%rax,PCB_ONFAULT(%rdx)
 	POP_FRAME_POINTER
 	ret
 
 	ALIGN_TEXT
 copyin_fault:
 	movq	PCPU(CURPCB),%rdx
 	movq	$0,PCB_ONFAULT(%rdx)
 	movq	$EFAULT,%rax
 	POP_FRAME_POINTER
 	ret
 END(copyin)
 
 /*
  * casueword32.  Compare and set user integer.  Returns -1 on fault,
  *        0 if access was successful.  Old value is written to *oldp.
  *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
  */
 ENTRY(casueword32)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$fusufault,PCB_ONFAULT(%r8)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	movl	%esi,%eax			/* old */
 #ifdef SMP
 	lock
 #endif
 	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
 
 	/*
 	 * The old value is in %eax.  If the store succeeded it will be the
 	 * value we expected (old) from before the store, otherwise it will
 	 * be the current value.  Save %eax into %esi to prepare the return
 	 * value.
 	 */
 	movl	%eax,%esi
 	xorl	%eax,%eax
 	movq	%rax,PCB_ONFAULT(%r8)
 
 	/*
 	 * Access the oldp after the pcb_onfault is cleared, to correctly
 	 * catch corrupted pointer.
 	 */
 	movl	%esi,(%rdx)			/* oldp = %rdx */
 	POP_FRAME_POINTER
 	ret
 END(casueword32)
 
 /*
  * casueword.  Compare and set user long.  Returns -1 on fault,
  *        0 if access was successful.  Old value is written to *oldp.
  *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
  */
 ENTRY(casueword)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$fusufault,PCB_ONFAULT(%r8)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	movq	%rsi,%rax			/* old */
 #ifdef SMP
 	lock
 #endif
 	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
 
 	/*
 	 * The old value is in %rax.  If the store succeeded it will be the
 	 * value we expected (old) from before the store, otherwise it will
 	 * be the current value.
 	 */
 	movq	%rax,%rsi
 	xorl	%eax,%eax
 	movq	%rax,PCB_ONFAULT(%r8)
 	movq	%rsi,(%rdx)
 	POP_FRAME_POINTER
 	ret
 END(casueword)
 
 /*
  * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
  * byte from user memory.
  * addr = %rdi, valp = %rsi
  */
 
 ALTENTRY(fueword64)
 ENTRY(fueword)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-8,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	xorl	%eax,%eax
 	movq	(%rdi),%r11
 	movq	%rax,PCB_ONFAULT(%rcx)
 	movq	%r11,(%rsi)
 	POP_FRAME_POINTER
 	ret
 END(fueword64)
 END(fueword)
 
 ENTRY(fueword32)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	xorl	%eax,%eax
 	movl	(%rdi),%r11d
 	movq	%rax,PCB_ONFAULT(%rcx)
 	movl	%r11d,(%rsi)
 	POP_FRAME_POINTER
 	ret
 END(fueword32)
 
 /*
  * fuswintr() and suswintr() are specialized variants of fuword16() and
  * suword16(), respectively.  They are called from the profiling code,
  * potentially at interrupt time.  If they fail, that's okay; good things
  * will happen later.  They always fail for now, until the trap code is
  * able to deal with this.
  */
 ALTENTRY(suswintr)
 ENTRY(fuswintr)
 	movq	$-1,%rax
 	ret
 END(suswintr)
 END(fuswintr)
 
 ENTRY(fuword16)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-2,%rax
 	cmpq	%rax,%rdi
 	ja	fusufault
 
 	movzwl	(%rdi),%eax
 	movq	$0,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(fuword16)
 
 ENTRY(fubyte)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-1,%rax
 	cmpq	%rax,%rdi
 	ja	fusufault
 
 	movzbl	(%rdi),%eax
 	movq	$0,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(fubyte)
 
 	ALIGN_TEXT
 fusufault:
 	movq	PCPU(CURPCB),%rcx
 	xorl	%eax,%eax
 	movq	%rax,PCB_ONFAULT(%rcx)
 	decq	%rax
 	POP_FRAME_POINTER
 	ret
 
 /*
  * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
  * user memory.  All these functions are MPSAFE.
  * addr = %rdi, value = %rsi
  */
 ALTENTRY(suword64)
 ENTRY(suword)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-8,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movq	%rsi,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(suword64)
 END(suword)
 
 ENTRY(suword32)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movl	%esi,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(suword32)
 
 ENTRY(suword16)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-2,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movw	%si,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(suword16)
 
 ENTRY(subyte)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-1,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movl	%esi,%eax
 	movb	%al,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(subyte)
 
 /*
  * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
  *           %rdi, %rsi, %rdx, %rcx
  *
  *	copy a string from from to to, stop when a 0 character is reached.
  *	return ENAMETOOLONG if string is longer than maxlen, and
  *	EFAULT on protection violations. If lencopied is non-zero,
  *	return the actual length in *lencopied.
  */
 ENTRY(copyinstr)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%r8			/* %r8 = maxlen */
 	movq	%rcx,%r9			/* %r9 = *len */
 	xchgq	%rdi,%rsi			/* %rdi = from, %rsi = to */
 	movq	PCPU(CURPCB),%rcx
 	movq	$cpystrflt,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS,%rax
 
 	/* make sure 'from' is within bounds */
 	subq	%rsi,%rax
 	jbe	cpystrflt
 
 	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
 	cmpq	%rdx,%rax
 	jae	1f
 	movq	%rax,%rdx
 	movq	%rax,%r8
 1:
 	incq	%rdx
 	cld
 
 2:
 	decq	%rdx
 	jz	3f
 
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	2b
 
 	/* Success -- 0 byte reached */
 	decq	%rdx
 	xorl	%eax,%eax
 	jmp	cpystrflt_x
 3:
 	/* rdx is zero - return ENAMETOOLONG or EFAULT */
 	movq	$VM_MAXUSER_ADDRESS,%rax
 	cmpq	%rax,%rsi
 	jae	cpystrflt
 4:
 	movq	$ENAMETOOLONG,%rax
 	jmp	cpystrflt_x
 
 cpystrflt:
 	movq	$EFAULT,%rax
 
 cpystrflt_x:
 	/* set *lencopied and return %eax */
 	movq	PCPU(CURPCB),%rcx
 	movq	$0,PCB_ONFAULT(%rcx)
 
 	testq	%r9,%r9
 	jz	1f
 	subq	%rdx,%r8
 	movq	%r8,(%r9)
 1:
 	POP_FRAME_POINTER
 	ret
 END(copyinstr)
 
 /*
  * copystr(from, to, maxlen, int *lencopied) - MP SAFE
  *         %rdi, %rsi, %rdx, %rcx
  */
 ENTRY(copystr)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%r8			/* %r8 = maxlen */
 
 	xchgq	%rdi,%rsi
 	incq	%rdx
 	cld
 1:
 	decq	%rdx
 	jz	4f
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	1b
 
 	/* Success -- 0 byte reached */
 	decq	%rdx
 	xorl	%eax,%eax
 	jmp	6f
 4:
 	/* rdx is zero -- return ENAMETOOLONG */
 	movq	$ENAMETOOLONG,%rax
 
 6:
 
 	testq	%rcx,%rcx
 	jz	7f
 	/* set *lencopied and return %rax */
 	subq	%rdx,%r8
 	movq	%r8,(%rcx)
 7:
 	POP_FRAME_POINTER
 	ret
 END(copystr)
 
 /*
  * Handling of special amd64 registers and descriptor tables etc
  * %rdi
  */
 /* void lgdt(struct region_descriptor *rdp); */
 ENTRY(lgdt)
 	/* reload the descriptor table */
 	lgdt	(%rdi)
 
 	/* flush the prefetch q */
 	jmp	1f
 	nop
 1:
 	movl	$KDSEL,%eax
 	movl	%eax,%ds
 	movl	%eax,%es
 	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
 	movl	%eax,%gs
 	movl	%eax,%ss
 
 	/* reload code selector by turning return into intersegmental return */
 	popq	%rax
 	pushq	$KCSEL
 	pushq	%rax
 	MEXITCOUNT
 	lretq
 END(lgdt)
 
 /*****************************************************************************/
 /* setjump, longjump                                                         */
 /*****************************************************************************/
 
 ENTRY(setjmp)
 	movq	%rbx,0(%rdi)			/* save rbx */
 	movq	%rsp,8(%rdi)			/* save rsp */
 	movq	%rbp,16(%rdi)			/* save rbp */
 	movq	%r12,24(%rdi)			/* save r12 */
 	movq	%r13,32(%rdi)			/* save r13 */
 	movq	%r14,40(%rdi)			/* save r14 */
 	movq	%r15,48(%rdi)			/* save r15 */
 	movq	0(%rsp),%rdx			/* get rta */
 	movq	%rdx,56(%rdi)			/* save rip */
 	xorl	%eax,%eax			/* return(0); */
 	ret
 END(setjmp)
 
 ENTRY(longjmp)
 	movq	0(%rdi),%rbx			/* restore rbx */
 	movq	8(%rdi),%rsp			/* restore rsp */
 	movq	16(%rdi),%rbp			/* restore rbp */
 	movq	24(%rdi),%r12			/* restore r12 */
 	movq	32(%rdi),%r13			/* restore r13 */
 	movq	40(%rdi),%r14			/* restore r14 */
 	movq	48(%rdi),%r15			/* restore r15 */
 	movq	56(%rdi),%rdx			/* get rta */
 	movq	%rdx,0(%rsp)			/* put in return frame */
 	xorl	%eax,%eax			/* return(1); */
 	incl	%eax
 	ret
 END(longjmp)
 
 /*
  * Support for reading MSRs in the safe manner.
  */
 ENTRY(rdmsr_safe)
 /* int rdmsr_safe(u_int msr, uint64_t *data) */
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$msr_onfault,PCB_ONFAULT(%r8)
 	movl	%edi,%ecx
 	rdmsr			/* Read MSR pointed by %ecx. Returns
 				   hi byte in edx, lo in %eax */
 	salq	$32,%rdx	/* sign-shift %rdx left */
 	movl	%eax,%eax	/* zero-extend %eax -> %rax */
 	orq	%rdx,%rax
 	movq	%rax,(%rsi)
 	xorq	%rax,%rax
 	movq	%rax,PCB_ONFAULT(%r8)
 	POP_FRAME_POINTER
 	ret
 
 /*
  * Support for writing MSRs in the safe manner.
  */
 ENTRY(wrmsr_safe)
 /* int wrmsr_safe(u_int msr, uint64_t data) */
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$msr_onfault,PCB_ONFAULT(%r8)
 	movl	%edi,%ecx
 	movl	%esi,%eax
 	sarq	$32,%rsi
 	movl	%esi,%edx
 	wrmsr			/* Write MSR pointed by %ecx. Accepts
 				   hi byte in edx, lo in %eax. */
 	xorq	%rax,%rax
 	movq	%rax,PCB_ONFAULT(%r8)
 	POP_FRAME_POINTER
 	ret
 
 /*
  * MSR operations fault handler
  */
 	ALIGN_TEXT
 msr_onfault:
 	movq	$0,PCB_ONFAULT(%r8)
 	movl	$EFAULT,%eax
 	POP_FRAME_POINTER
 	ret
Index: user/alc/PQ_LAUNDRY/sys/arm/arm/gic.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/arm/arm/gic.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/arm/arm/gic.c	(revision 303642)
@@ -1,1806 +1,1546 @@
 /*-
  * Copyright (c) 2011 The FreeBSD Foundation
  * All rights reserved.
  *
  * Developed by Damjan Marion <damjan.marion@gmail.com>
  *
  * Based on OMAP4 GIC code by Ben Gray
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the company nor the name of the author may be used to
  *    endorse or promote products derived from this software without specific
  *    prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_platform.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/rman.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/cpuset.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #ifdef INTRNG
 #include <sys/sched.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/intr.h>
 #include <machine/smp.h>
 
 #include <dev/fdt/fdt_common.h>
-#include <dev/ofw/openfirm.h>
-#include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
+#include <arm/arm/gic.h>
+
 #ifdef INTRNG
 #include "pic_if.h"
 #include "msi_if.h"
 #endif
 
-#define GIC_DEBUG_SPURIOUS
-
 /* We are using GICv2 register naming */
 
 /* Distributor Registers */
 #define GICD_CTLR		0x000			/* v1 ICDDCR */
 #define GICD_TYPER		0x004			/* v1 ICDICTR */
 #define GICD_IIDR		0x008			/* v1 ICDIIDR */
 #define GICD_IGROUPR(n)		(0x0080 + ((n) * 4))	/* v1 ICDISER */
 #define GICD_ISENABLER(n)	(0x0100 + ((n) * 4))	/* v1 ICDISER */
 #define GICD_ICENABLER(n)	(0x0180 + ((n) * 4))	/* v1 ICDICER */
 #define GICD_ISPENDR(n)		(0x0200 + ((n) * 4))	/* v1 ICDISPR */
 #define GICD_ICPENDR(n)		(0x0280 + ((n) * 4))	/* v1 ICDICPR */
 #define GICD_ICACTIVER(n)	(0x0380 + ((n) * 4))	/* v1 ICDABR */
 #define GICD_IPRIORITYR(n)	(0x0400 + ((n) * 4))	/* v1 ICDIPR */
 #define GICD_ITARGETSR(n)	(0x0800 + ((n) * 4))	/* v1 ICDIPTR */
 #define GICD_ICFGR(n)		(0x0C00 + ((n) * 4))	/* v1 ICDICFR */
 #define GICD_SGIR(n)		(0x0F00 + ((n) * 4))	/* v1 ICDSGIR */
 #define  GICD_SGI_TARGET_SHIFT	16
 
 /* CPU Registers */
 #define GICC_CTLR		0x0000			/* v1 ICCICR */
 #define GICC_PMR		0x0004			/* v1 ICCPMR */
 #define GICC_BPR		0x0008			/* v1 ICCBPR */
 #define GICC_IAR		0x000C			/* v1 ICCIAR */
 #define GICC_EOIR		0x0010			/* v1 ICCEOIR */
 #define GICC_RPR		0x0014			/* v1 ICCRPR */
 #define GICC_HPPIR		0x0018			/* v1 ICCHPIR */
 #define GICC_ABPR		0x001C			/* v1 ICCABPR */
 #define GICC_IIDR		0x00FC			/* v1 ICCIIDR*/
 
-#define	GIC_FIRST_SGI		 0	/* Irqs 0-15 are SGIs/IPIs. */
-#define	GIC_LAST_SGI		15
-#define	GIC_FIRST_PPI		16	/* Irqs 16-31 are private (per */
-#define	GIC_LAST_PPI		31	/* core) peripheral interrupts. */
-#define	GIC_FIRST_SPI		32	/* Irqs 32+ are shared peripherals. */
-
 /* TYPER Registers */
 #define	GICD_TYPER_SECURITYEXT	0x400
 #define	GIC_SUPPORT_SECEXT(_sc)	\
     ((_sc->typer & GICD_TYPER_SECURITYEXT) == GICD_TYPER_SECURITYEXT)
 
 /* First bit is a polarity bit (0 - low, 1 - high) */
 #define GICD_ICFGR_POL_LOW	(0 << 0)
 #define GICD_ICFGR_POL_HIGH	(1 << 0)
 #define GICD_ICFGR_POL_MASK	0x1
 /* Second bit is a trigger bit (0 - level, 1 - edge) */
 #define GICD_ICFGR_TRIG_LVL	(0 << 1)
 #define GICD_ICFGR_TRIG_EDGE	(1 << 1)
 #define GICD_ICFGR_TRIG_MASK	0x2
 
 #ifndef	GIC_DEFAULT_ICFGR_INIT
 #define	GIC_DEFAULT_ICFGR_INIT	0x00000000
 #endif
 
 #ifdef INTRNG
 struct gic_irqsrc {
 	struct intr_irqsrc	gi_isrc;
 	uint32_t		gi_irq;
 	enum intr_polarity	gi_pol;
 	enum intr_trigger	gi_trig;
 #define GI_FLAG_EARLY_EOI	(1 << 0)
 #define GI_FLAG_MSI		(1 << 1) /* This interrupt source should only */
 					 /* be used for MSI/MSI-X interrupts */
 #define GI_FLAG_MSI_USED	(1 << 2) /* This irq is already allocated */
 					 /* for a MSI/MSI-X interrupt */
 	u_int			gi_flags;
 };
 
 static u_int gic_irq_cpu;
-static int arm_gic_intr(void *);
 static int arm_gic_bind_intr(device_t dev, struct intr_irqsrc *isrc);
 
 #ifdef SMP
 static u_int sgi_to_ipi[GIC_LAST_SGI - GIC_FIRST_SGI + 1];
 static u_int sgi_first_unused = GIC_FIRST_SGI;
 #endif
-#endif
 
-#ifdef INTRNG
-struct arm_gic_range {
-	uint64_t bus;
-	uint64_t host;
-	uint64_t size;
+#define GIC_INTR_ISRC(sc, irq)	(&sc->gic_irqs[irq].gi_isrc)
+#else /* !INTRNG */
+static struct ofw_compat_data compat_data[] = {
+	{"arm,gic",		true},	/* Non-standard, used in FreeBSD dts. */
+	{"arm,gic-400",		true},
+	{"arm,cortex-a15-gic",	true},
+	{"arm,cortex-a9-gic",	true},
+	{"arm,cortex-a7-gic",	true},
+	{"arm,arm11mp-gic",	true},
+	{"brcm,brahma-b15-gic",	true},
+	{"qcom,msm-qgic2",	true},
+	{NULL,			false}
 };
-
-struct arm_gic_devinfo {
-	struct ofw_bus_devinfo	obdinfo;
-	struct resource_list	rl;
-};
 #endif
 
-struct arm_gic_softc {
-	device_t		gic_dev;
-#ifdef INTRNG
-	void *			gic_intrhand;
-	struct gic_irqsrc *	gic_irqs;
-#endif
-	struct resource *	gic_res[3];
-	bus_space_tag_t		gic_c_bst;
-	bus_space_tag_t		gic_d_bst;
-	bus_space_handle_t	gic_c_bsh;
-	bus_space_handle_t	gic_d_bsh;
-	uint8_t			ver;
-	struct mtx		mutex;
-	uint32_t		nirqs;
-	uint32_t		typer;
-#ifdef GIC_DEBUG_SPURIOUS
-	uint32_t		last_irq[MAXCPU];
-#endif
-
-#ifdef INTRNG
-	/* FDT child data */
-	pcell_t			addr_cells;
-	pcell_t			size_cells;
-	int			nranges;
-	struct arm_gic_range *	ranges;
-#endif
-};
-
-#ifdef INTRNG
-#define GIC_INTR_ISRC(sc, irq)	(&sc->gic_irqs[irq].gi_isrc)
-#endif
-
 static struct resource_spec arm_gic_spec[] = {
 	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },	/* Distributor registers */
 	{ SYS_RES_MEMORY,	1,	RF_ACTIVE },	/* CPU Interrupt Intf. registers */
 #ifdef INTRNG
 	{ SYS_RES_IRQ,	  0, RF_ACTIVE | RF_OPTIONAL }, /* Parent interrupt */
 #endif
 	{ -1, 0 }
 };
 
 static u_int arm_gic_map[MAXCPU];
 
 static struct arm_gic_softc *gic_sc = NULL;
 
 #define	gic_c_read_4(_sc, _reg)		\
     bus_space_read_4((_sc)->gic_c_bst, (_sc)->gic_c_bsh, (_reg))
 #define	gic_c_write_4(_sc, _reg, _val)		\
     bus_space_write_4((_sc)->gic_c_bst, (_sc)->gic_c_bsh, (_reg), (_val))
 #define	gic_d_read_4(_sc, _reg)		\
     bus_space_read_4((_sc)->gic_d_bst, (_sc)->gic_d_bsh, (_reg))
 #define	gic_d_write_1(_sc, _reg, _val)		\
     bus_space_write_1((_sc)->gic_d_bst, (_sc)->gic_d_bsh, (_reg), (_val))
 #define	gic_d_write_4(_sc, _reg, _val)		\
     bus_space_write_4((_sc)->gic_d_bst, (_sc)->gic_d_bsh, (_reg), (_val))
 
 #ifndef INTRNG
 static int gic_config_irq(int irq, enum intr_trigger trig,
     enum intr_polarity pol);
 static void gic_post_filter(void *);
 #endif
 
-static struct ofw_compat_data compat_data[] = {
-	{"arm,gic",		true},	/* Non-standard, used in FreeBSD dts. */
-	{"arm,gic-400",		true},
-	{"arm,cortex-a15-gic",	true},
-	{"arm,cortex-a9-gic",	true},
-	{"arm,cortex-a7-gic",	true},
-	{"arm,arm11mp-gic",	true},
-	{"brcm,brahma-b15-gic",	true},
-	{"qcom,msm-qgic2",	true},
-	{NULL,			false}
-};
-
-static int
-arm_gic_probe(device_t dev)
-{
-
-	if (!ofw_bus_status_okay(dev))
-		return (ENXIO);
-
-	if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data)
-		return (ENXIO);
-	device_set_desc(dev, "ARM Generic Interrupt Controller");
-	return (BUS_PROBE_DEFAULT);
-}
-
 #ifdef INTRNG
 static inline void
 gic_irq_unmask(struct arm_gic_softc *sc, u_int irq)
 {
 
 	gic_d_write_4(sc, GICD_ISENABLER(irq >> 5), (1UL << (irq & 0x1F)));
 }
 
 static inline void
 gic_irq_mask(struct arm_gic_softc *sc, u_int irq)
 {
 
 	gic_d_write_4(sc, GICD_ICENABLER(irq >> 5), (1UL << (irq & 0x1F)));
 }
 #endif
 
 static uint8_t
 gic_cpu_mask(struct arm_gic_softc *sc)
 {
 	uint32_t mask;
 	int i;
 
 	/* Read the current cpuid mask by reading ITARGETSR{0..7} */
 	for (i = 0; i < 8; i++) {
 		mask = gic_d_read_4(sc, GICD_ITARGETSR(i));
 		if (mask != 0)
 			break;
 	}
 	/* No mask found, assume we are on CPU interface 0 */
 	if (mask == 0)
 		return (1);
 
 	/* Collect the mask in the lower byte */
 	mask |= mask >> 16;
 	mask |= mask >> 8;
 
 	return (mask);
 }
 
 #ifdef SMP
 #ifdef INTRNG
 static void
 arm_gic_init_secondary(device_t dev)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	u_int irq, cpu;
 
 	/* Set the mask so we can find this CPU to send it IPIs */
 	cpu = PCPU_GET(cpuid);
 	arm_gic_map[cpu] = gic_cpu_mask(sc);
 
 	for (irq = 0; irq < sc->nirqs; irq += 4)
 		gic_d_write_4(sc, GICD_IPRIORITYR(irq >> 2), 0);
 
 	/* Set all the interrupts to be in Group 0 (secure) */
 	for (irq = 0; GIC_SUPPORT_SECEXT(sc) && irq < sc->nirqs; irq += 32) {
 		gic_d_write_4(sc, GICD_IGROUPR(irq >> 5), 0);
 	}
 
 	/* Enable CPU interface */
 	gic_c_write_4(sc, GICC_CTLR, 1);
 
 	/* Set priority mask register. */
 	gic_c_write_4(sc, GICC_PMR, 0xff);
 
 	/* Enable interrupt distribution */
 	gic_d_write_4(sc, GICD_CTLR, 0x01);
 
 	/* Unmask attached SGI interrupts. */
 	for (irq = GIC_FIRST_SGI; irq <= GIC_LAST_SGI; irq++)
 		if (intr_isrc_init_on_cpu(GIC_INTR_ISRC(sc, irq), cpu))
 			gic_irq_unmask(sc, irq);
 
 	/* Unmask attached PPI interrupts. */
 	for (irq = GIC_FIRST_PPI; irq <= GIC_LAST_PPI; irq++)
 		if (intr_isrc_init_on_cpu(GIC_INTR_ISRC(sc, irq), cpu))
 			gic_irq_unmask(sc, irq);
 }
 #else
 static void
 arm_gic_init_secondary(device_t dev)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	int i;
 
 	/* Set the mask so we can find this CPU to send it IPIs */
 	arm_gic_map[PCPU_GET(cpuid)] = gic_cpu_mask(sc);
 
 	for (i = 0; i < sc->nirqs; i += 4)
 		gic_d_write_4(sc, GICD_IPRIORITYR(i >> 2), 0);
 
 	/* Set all the interrupts to be in Group 0 (secure) */
 	for (i = 0; GIC_SUPPORT_SECEXT(sc) && i < sc->nirqs; i += 32) {
 		gic_d_write_4(sc, GICD_IGROUPR(i >> 5), 0);
 	}
 
 	/* Enable CPU interface */
 	gic_c_write_4(sc, GICC_CTLR, 1);
 
 	/* Set priority mask register. */
 	gic_c_write_4(sc, GICC_PMR, 0xff);
 
 	/* Enable interrupt distribution */
 	gic_d_write_4(sc, GICD_CTLR, 0x01);
 
 	/*
 	 * Activate the timer interrupts: virtual, secure, and non-secure.
 	 */
 	gic_d_write_4(sc, GICD_ISENABLER(27 >> 5), (1UL << (27 & 0x1F)));
 	gic_d_write_4(sc, GICD_ISENABLER(29 >> 5), (1UL << (29 & 0x1F)));
 	gic_d_write_4(sc, GICD_ISENABLER(30 >> 5), (1UL << (30 & 0x1F)));
 }
 #endif /* INTRNG */
 #endif /* SMP */
 
 #ifndef INTRNG
 int
 gic_decode_fdt(phandle_t iparent, pcell_t *intr, int *interrupt,
     int *trig, int *pol)
 {
 	static u_int num_intr_cells;
 	static phandle_t self;
 	struct ofw_compat_data *ocd;
 
 	if (self == 0) {
 		for (ocd = compat_data; ocd->ocd_str != NULL; ocd++) {
 			if (fdt_is_compatible(iparent, ocd->ocd_str)) {
 				self = iparent;
 				break;
 			}
 		}
 	}
 	if (self != iparent)
 		return (ENXIO);
 
 	if (num_intr_cells == 0) {
 		if (OF_searchencprop(OF_node_from_xref(iparent),
 		    "#interrupt-cells", &num_intr_cells,
 		    sizeof(num_intr_cells)) == -1) {
 			num_intr_cells = 1;
 		}
 	}
 
 	if (num_intr_cells == 1) {
 		*interrupt = fdt32_to_cpu(intr[0]);
 		*trig = INTR_TRIGGER_CONFORM;
 		*pol = INTR_POLARITY_CONFORM;
 	} else {
 		if (fdt32_to_cpu(intr[0]) == 0)
 			*interrupt = fdt32_to_cpu(intr[1]) + GIC_FIRST_SPI;
 		else
 			*interrupt = fdt32_to_cpu(intr[1]) + GIC_FIRST_PPI;
 		/*
 		 * In intr[2], bits[3:0] are trigger type and level flags.
 		 *   1 = low-to-high edge triggered
 		 *   2 = high-to-low edge triggered
 		 *   4 = active high level-sensitive
 		 *   8 = active low level-sensitive
 		 * The hardware only supports active-high-level or rising-edge
 		 * for SPIs
 		 */
 		if (*interrupt >= GIC_FIRST_SPI &&
 		    fdt32_to_cpu(intr[2]) & 0x0a) {
 			printf("unsupported trigger/polarity configuration "
 			    "0x%02x\n", fdt32_to_cpu(intr[2]) & 0x0f);
 		}
 		*pol  = INTR_POLARITY_CONFORM;
 		if (fdt32_to_cpu(intr[2]) & 0x03)
 			*trig = INTR_TRIGGER_EDGE;
 		else
 			*trig = INTR_TRIGGER_LEVEL;
 	}
 	return (0);
 }
 #endif
 
 #ifdef INTRNG
-static inline intptr_t
-gic_xref(device_t dev)
-{
-#ifdef FDT
-	return (OF_xref_from_node(ofw_bus_get_node(dev)));
-#else
-	return (0);
-#endif
-}
-
 static int
 arm_gic_register_isrcs(struct arm_gic_softc *sc, uint32_t num)
 {
 	int error;
 	uint32_t irq;
 	struct gic_irqsrc *irqs;
 	struct intr_irqsrc *isrc;
 	const char *name;
 
 	irqs = malloc(num * sizeof(struct gic_irqsrc), M_DEVBUF,
 	    M_WAITOK | M_ZERO);
 
 	name = device_get_nameunit(sc->gic_dev);
 	for (irq = 0; irq < num; irq++) {
 		irqs[irq].gi_irq = irq;
 		irqs[irq].gi_pol = INTR_POLARITY_CONFORM;
 		irqs[irq].gi_trig = INTR_TRIGGER_CONFORM;
 
 		isrc = &irqs[irq].gi_isrc;
 		if (irq <= GIC_LAST_SGI) {
 			error = intr_isrc_register(isrc, sc->gic_dev,
 			    INTR_ISRCF_IPI, "%s,i%u", name, irq - GIC_FIRST_SGI);
 		} else if (irq <= GIC_LAST_PPI) {
 			error = intr_isrc_register(isrc, sc->gic_dev,
 			    INTR_ISRCF_PPI, "%s,p%u", name, irq - GIC_FIRST_PPI);
 		} else {
 			error = intr_isrc_register(isrc, sc->gic_dev, 0,
 			    "%s,s%u", name, irq - GIC_FIRST_SPI);
 		}
 		if (error != 0) {
 			/* XXX call intr_isrc_deregister() */
 			free(irqs, M_DEVBUF);
 			return (error);
 		}
 	}
 	sc->gic_irqs = irqs;
 	sc->nirqs = num;
 	return (0);
 }
 
-static int
-arm_gic_fill_ranges(phandle_t node, struct arm_gic_softc *sc)
-{
-	pcell_t host_cells;
-	cell_t *base_ranges;
-	ssize_t nbase_ranges;
-	int i, j, k;
-
-	host_cells = 1;
-	OF_getencprop(OF_parent(node), "#address-cells", &host_cells,
-	    sizeof(host_cells));
-	sc->addr_cells = 2;
-	OF_getencprop(node, "#address-cells", &sc->addr_cells,
-	    sizeof(sc->addr_cells));
-	sc->size_cells = 2;
-	OF_getencprop(node, "#size-cells", &sc->size_cells,
-	    sizeof(sc->size_cells));
-
-	nbase_ranges = OF_getproplen(node, "ranges");
-	if (nbase_ranges < 0)
-		return (-1);
-	sc->nranges = nbase_ranges / sizeof(cell_t) /
-	    (sc->addr_cells + host_cells + sc->size_cells);
-	if (sc->nranges == 0)
-		return (0);
-
-	sc->ranges = malloc(sc->nranges * sizeof(sc->ranges[0]),
-	    M_DEVBUF, M_WAITOK);
-	base_ranges = malloc(nbase_ranges, M_DEVBUF, M_WAITOK);
-	OF_getencprop(node, "ranges", base_ranges, nbase_ranges);
-
-	for (i = 0, j = 0; i < sc->nranges; i++) {
-		sc->ranges[i].bus = 0;
-		for (k = 0; k < sc->addr_cells; k++) {
-			sc->ranges[i].bus <<= 32;
-			sc->ranges[i].bus |= base_ranges[j++];
-		}
-		sc->ranges[i].host = 0;
-		for (k = 0; k < host_cells; k++) {
-			sc->ranges[i].host <<= 32;
-			sc->ranges[i].host |= base_ranges[j++];
-		}
-		sc->ranges[i].size = 0;
-		for (k = 0; k < sc->size_cells; k++) {
-			sc->ranges[i].size <<= 32;
-			sc->ranges[i].size |= base_ranges[j++];
-		}
-	}
-
-	free(base_ranges, M_DEVBUF);
-	return (sc->nranges);
-}
-
-static bool
-arm_gic_add_children(device_t dev)
-{
-	struct arm_gic_softc *sc;
-	struct arm_gic_devinfo *dinfo;
-	phandle_t child, node;
-	device_t cdev;
-
-	sc = device_get_softc(dev);
-	node = ofw_bus_get_node(dev);
-
-	/* If we have no children don't probe for them */
-	child = OF_child(node);
-	if (child == 0)
-		return (false);
-
-	if (arm_gic_fill_ranges(node, sc) < 0) {
-		device_printf(dev, "Have a child, but no ranges\n");
-		return (false);
-	}
-
-	for (; child != 0; child = OF_peer(child)) {
-		dinfo = malloc(sizeof(*dinfo), M_DEVBUF, M_WAITOK | M_ZERO);
-
-		if (ofw_bus_gen_setup_devinfo(&dinfo->obdinfo, child) != 0) {
-			free(dinfo, M_DEVBUF);
-			continue;
-		}
-
-		resource_list_init(&dinfo->rl);
-		ofw_bus_reg_to_rl(dev, child, sc->addr_cells,
-		    sc->size_cells, &dinfo->rl);
-
-		cdev = device_add_child(dev, NULL, -1);
-		if (cdev == NULL) {
-			device_printf(dev, "<%s>: device_add_child failed\n",
-			    dinfo->obdinfo.obd_name);
-			resource_list_free(&dinfo->rl);
-			ofw_bus_gen_destroy_devinfo(&dinfo->obdinfo);
-			free(dinfo, M_DEVBUF);
-			continue;
-		}
-		device_set_ivars(cdev, dinfo);
-	}
-
-	return (true);
-}
-
 static void
 arm_gic_reserve_msi_range(device_t dev, u_int start, u_int count)
 {
 	struct arm_gic_softc *sc;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	KASSERT((start + count) < sc->nirqs,
 	    ("%s: Trying to allocate too many MSI IRQs: %d + %d > %d", __func__,
 	    start, count, sc->nirqs));
 	for (i = 0; i < count; i++) {
 		KASSERT(sc->gic_irqs[start + i].gi_isrc.isrc_handlers == 0,
 		    ("%s: MSI interrupt %d already has a handler", __func__,
 		    count + i));
 		KASSERT(sc->gic_irqs[start + i].gi_pol == INTR_POLARITY_CONFORM,
 		    ("%s: MSI interrupt %d already has a polarity", __func__,
 		    count + i));
 		KASSERT(sc->gic_irqs[start + i].gi_trig == INTR_TRIGGER_CONFORM,
 		    ("%s: MSI interrupt %d already has a trigger", __func__,
 		    count + i));
 		sc->gic_irqs[start + i].gi_pol = INTR_POLARITY_HIGH;
 		sc->gic_irqs[start + i].gi_trig = INTR_TRIGGER_EDGE;
 		sc->gic_irqs[start + i].gi_flags |= GI_FLAG_MSI;
 	}
 }
 #endif
 
-static int
+int
 arm_gic_attach(device_t dev)
 {
 	struct		arm_gic_softc *sc;
 	int		i;
 	uint32_t	icciidr, mask, nirqs;
-#ifdef INTRNG
-	phandle_t	pxref;
-	intptr_t	xref = gic_xref(dev);
-#endif
 
 	if (gic_sc)
 		return (ENXIO);
 
 	sc = device_get_softc(dev);
 
 	if (bus_alloc_resources(dev, arm_gic_spec, sc->gic_res)) {
 		device_printf(dev, "could not allocate resources\n");
 		return (ENXIO);
 	}
 
 	sc->gic_dev = dev;
 	gic_sc = sc;
 
 	/* Initialize mutex */
 	mtx_init(&sc->mutex, "GIC lock", "", MTX_SPIN);
 
 	/* Distributor Interface */
 	sc->gic_d_bst = rman_get_bustag(sc->gic_res[0]);
 	sc->gic_d_bsh = rman_get_bushandle(sc->gic_res[0]);
 
 	/* CPU Interface */
 	sc->gic_c_bst = rman_get_bustag(sc->gic_res[1]);
 	sc->gic_c_bsh = rman_get_bushandle(sc->gic_res[1]);
 
 	/* Disable interrupt forwarding to the CPU interface */
 	gic_d_write_4(sc, GICD_CTLR, 0x00);
 
 	/* Get the number of interrupts */
 	sc->typer = gic_d_read_4(sc, GICD_TYPER);
 	nirqs = 32 * ((sc->typer & 0x1f) + 1);
 
 #ifdef INTRNG
 	if (arm_gic_register_isrcs(sc, nirqs)) {
 		device_printf(dev, "could not register irqs\n");
 		goto cleanup;
 	}
 #else
 	sc->nirqs = nirqs;
 
 	/* Set up function pointers */
 	arm_post_filter = gic_post_filter;
 	arm_config_irq = gic_config_irq;
 #endif
 
 	icciidr = gic_c_read_4(sc, GICC_IIDR);
 	device_printf(dev,"pn 0x%x, arch 0x%x, rev 0x%x, implementer 0x%x irqs %u\n",
 			icciidr>>20, (icciidr>>16) & 0xF, (icciidr>>12) & 0xf,
 			(icciidr & 0xfff), sc->nirqs);
 
 	/* Set all global interrupts to be level triggered, active low. */
 	for (i = 32; i < sc->nirqs; i += 16) {
 		gic_d_write_4(sc, GICD_ICFGR(i >> 4), GIC_DEFAULT_ICFGR_INIT);
 	}
 
 	/* Disable all interrupts. */
 	for (i = 32; i < sc->nirqs; i += 32) {
 		gic_d_write_4(sc, GICD_ICENABLER(i >> 5), 0xFFFFFFFF);
 	}
 
 	/* Find the current cpu mask */
 	mask = gic_cpu_mask(sc);
 	/* Set the mask so we can find this CPU to send it IPIs */
 	arm_gic_map[PCPU_GET(cpuid)] = mask;
 	/* Set all four targets to this cpu */
 	mask |= mask << 8;
 	mask |= mask << 16;
 
 	for (i = 0; i < sc->nirqs; i += 4) {
 		gic_d_write_4(sc, GICD_IPRIORITYR(i >> 2), 0);
 		if (i > 32) {
 			gic_d_write_4(sc, GICD_ITARGETSR(i >> 2), mask);
 		}
 	}
 
 	/* Set all the interrupts to be in Group 0 (secure) */
 	for (i = 0; GIC_SUPPORT_SECEXT(sc) && i < sc->nirqs; i += 32) {
 		gic_d_write_4(sc, GICD_IGROUPR(i >> 5), 0);
 	}
 
 	/* Enable CPU interface */
 	gic_c_write_4(sc, GICC_CTLR, 1);
 
 	/* Set priority mask register. */
 	gic_c_write_4(sc, GICC_PMR, 0xff);
 
 	/* Enable interrupt distribution */
 	gic_d_write_4(sc, GICD_CTLR, 0x01);
-#ifndef INTRNG
 	return (0);
-#else
-	/*
-	 * Now, when everything is initialized, it's right time to
-	 * register interrupt controller to interrupt framefork.
-	 */
-	if (intr_pic_register(dev, xref) == NULL) {
-		device_printf(dev, "could not register PIC\n");
-		goto cleanup;
-	}
 
-	/*
-	 * Controller is root if:
-	 * - doesn't have interrupt parent
-	 * - his interrupt parent is this controller
-	 */
-	pxref = ofw_bus_find_iparent(ofw_bus_get_node(dev));
-	if (pxref == 0 || xref == pxref) {
-		if (intr_pic_claim_root(dev, xref, arm_gic_intr, sc,
-		    GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) {
-			device_printf(dev, "could not set PIC as a root\n");
-			intr_pic_deregister(dev, xref);
-			goto cleanup;
-		}
-	} else {
-		if (sc->gic_res[2] == NULL) {
-			device_printf(dev,
-			    "not root PIC must have defined interrupt\n");
-			intr_pic_deregister(dev, xref);
-			goto cleanup;
-		}
-		if (bus_setup_intr(dev, sc->gic_res[2], INTR_TYPE_CLK,
-		    arm_gic_intr, NULL, sc, &sc->gic_intrhand)) {
-			device_printf(dev, "could not setup irq handler\n");
-			intr_pic_deregister(dev, xref);
-			goto cleanup;
-		}
-	}
+#ifdef INTRNG
+cleanup:
+	arm_gic_detach(dev);
+	return(ENXIO);
+#endif
+}
 
-	OF_device_register_xref(xref, dev);
+int
+arm_gic_detach(device_t dev)
+{
+#ifdef INTRNG
+	struct arm_gic_softc *sc;
 
-	/* If we have children probe and attach them */
-	if (arm_gic_add_children(dev)) {
-		bus_generic_probe(dev);
-		return (bus_generic_attach(dev));
-	}
+	sc = device_get_softc(dev);
 
-	return (0);
-
-cleanup:
-	/*
-	 * XXX - not implemented arm_gic_detach() should be called !
-	 */
 	if (sc->gic_irqs != NULL)
 		free(sc->gic_irqs, M_DEVBUF);
+
 	bus_release_resources(dev, arm_gic_spec, sc->gic_res);
-	return(ENXIO);
 #endif
+
+	return (0);
 }
 
 #ifdef INTRNG
+static int
+arm_gic_print_child(device_t bus, device_t child)
+{
+	struct resource_list *rl;
+	int rv;
+
+	rv = bus_print_child_header(bus, child);
+
+	rl = BUS_GET_RESOURCE_LIST(bus, child);
+	if (rl != NULL) {
+		rv += resource_list_print_type(rl, "mem", SYS_RES_MEMORY,
+		    "%#jx");
+		rv += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%jd");
+	}
+
+	rv += bus_print_child_footer(bus, child);
+
+	return (rv);
+}
+
 static struct resource *
 arm_gic_alloc_resource(device_t bus, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct arm_gic_softc *sc;
-	struct arm_gic_devinfo *di;
 	struct resource_list_entry *rle;
+	struct resource_list *rl;
 	int j;
 
 	KASSERT(type == SYS_RES_MEMORY, ("Invalid resoure type %x", type));
 
 	sc = device_get_softc(bus);
 
 	/*
 	 * Request for the default allocation with a given rid: use resource
 	 * list stored in the local device info.
 	 */
 	if (RMAN_IS_DEFAULT_RANGE(start, end)) {
-		if ((di = device_get_ivars(child)) == NULL)
-			return (NULL);
+		rl = BUS_GET_RESOURCE_LIST(bus, child);
 
 		if (type == SYS_RES_IOPORT)
 			type = SYS_RES_MEMORY;
 
-		rle = resource_list_find(&di->rl, type, *rid);
+		rle = resource_list_find(rl, type, *rid);
 		if (rle == NULL) {
 			if (bootverbose)
 				device_printf(bus, "no default resources for "
 				    "rid = %d, type = %d\n", *rid, type);
 			return (NULL);
 		}
 		start = rle->start;
 		end = rle->end;
 		count = rle->count;
 	}
 
 	/* Remap through ranges property */
 	for (j = 0; j < sc->nranges; j++) {
 		if (start >= sc->ranges[j].bus && end <
 		    sc->ranges[j].bus + sc->ranges[j].size) {
 			start -= sc->ranges[j].bus;
 			start += sc->ranges[j].host;
 			end -= sc->ranges[j].bus;
 			end += sc->ranges[j].host;
 			break;
 		}
 	}
 	if (j == sc->nranges && sc->nranges != 0) {
 		if (bootverbose)
 			device_printf(bus, "Could not map resource "
 			    "%#jx-%#jx\n", (uintmax_t)start, (uintmax_t)end);
 
 		return (NULL);
 	}
 
 	return (bus_generic_alloc_resource(bus, child, type, rid, start, end,
 	    count, flags));
 }
 
-static const struct ofw_bus_devinfo *
-arm_gic_ofw_get_devinfo(device_t bus __unused, device_t child)
-{
-	struct arm_gic_devinfo *di;
-
-	di = device_get_ivars(child);
-
-	return (&di->obdinfo);
-}
-
-static int
+int
 arm_gic_intr(void *arg)
 {
 	struct arm_gic_softc *sc = arg;
 	struct gic_irqsrc *gi;
 	uint32_t irq_active_reg, irq;
 	struct trapframe *tf;
 
 	irq_active_reg = gic_c_read_4(sc, GICC_IAR);
 	irq = irq_active_reg & 0x3FF;
 
 	/*
 	 * 1. We do EOI here because recent read value from active interrupt
 	 *    register must be used for it. Another approach is to save this
 	 *    value into associated interrupt source.
 	 * 2. EOI must be done on same CPU where interrupt has fired. Thus
 	 *    we must ensure that interrupted thread does not migrate to
 	 *    another CPU.
 	 * 3. EOI cannot be delayed by any preemption which could happen on
 	 *    critical_exit() used in MI intr code, when interrupt thread is
 	 *    scheduled. See next point.
 	 * 4. IPI_RENDEZVOUS assumes that no preemption is permitted during
 	 *    an action and any use of critical_exit() could break this
 	 *    assumption. See comments within smp_rendezvous_action().
 	 * 5. We always return FILTER_HANDLED as this is an interrupt
 	 *    controller dispatch function. Otherwise, in cascaded interrupt
 	 *    case, the whole interrupt subtree would be masked.
 	 */
 
 	if (irq >= sc->nirqs) {
 #ifdef GIC_DEBUG_SPURIOUS
 		device_printf(sc->gic_dev,
 		    "Spurious interrupt detected: last irq: %d on CPU%d\n",
 		    sc->last_irq[PCPU_GET(cpuid)], PCPU_GET(cpuid));
 #endif
 		return (FILTER_HANDLED);
 	}
 
 	tf = curthread->td_intr_frame;
 dispatch_irq:
 	gi = sc->gic_irqs + irq;
 	/*
 	 * Note that GIC_FIRST_SGI is zero and is not used in 'if' statement
 	 * as compiler complains that comparing u_int >= 0 is always true.
 	 */
 	if (irq <= GIC_LAST_SGI) {
 #ifdef SMP
 		/* Call EOI for all IPI before dispatch. */
 		gic_c_write_4(sc, GICC_EOIR, irq_active_reg);
 		intr_ipi_dispatch(sgi_to_ipi[gi->gi_irq], tf);
 		goto next_irq;
 #else
 		device_printf(sc->gic_dev, "SGI %u on UP system detected\n",
 		    irq - GIC_FIRST_SGI);
 		gic_c_write_4(sc, GICC_EOIR, irq_active_reg);
 		goto next_irq;
 #endif
 	}
 
 #ifdef GIC_DEBUG_SPURIOUS
 	sc->last_irq[PCPU_GET(cpuid)] = irq;
 #endif
 	if ((gi->gi_flags & GI_FLAG_EARLY_EOI) == GI_FLAG_EARLY_EOI)
 		gic_c_write_4(sc, GICC_EOIR, irq_active_reg);
 
 	if (intr_isrc_dispatch(&gi->gi_isrc, tf) != 0) {
 		gic_irq_mask(sc, irq);
 		if ((gi->gi_flags & GI_FLAG_EARLY_EOI) != GI_FLAG_EARLY_EOI)
 			gic_c_write_4(sc, GICC_EOIR, irq_active_reg);
 		device_printf(sc->gic_dev, "Stray irq %u disabled\n", irq);
 	}
 
 next_irq:
 	arm_irq_memory_barrier(irq);
 	irq_active_reg = gic_c_read_4(sc, GICC_IAR);
 	irq = irq_active_reg & 0x3FF;
 	if (irq < sc->nirqs)
 		goto dispatch_irq;
 
 	return (FILTER_HANDLED);
 }
 
 static void
 gic_config(struct arm_gic_softc *sc, u_int irq, enum intr_trigger trig,
     enum intr_polarity pol)
 {
 	uint32_t reg;
 	uint32_t mask;
 
 	if (irq < GIC_FIRST_SPI)
 		return;
 
 	mtx_lock_spin(&sc->mutex);
 
 	reg = gic_d_read_4(sc, GICD_ICFGR(irq >> 4));
 	mask = (reg >> 2*(irq % 16)) & 0x3;
 
 	if (pol == INTR_POLARITY_LOW) {
 		mask &= ~GICD_ICFGR_POL_MASK;
 		mask |= GICD_ICFGR_POL_LOW;
 	} else if (pol == INTR_POLARITY_HIGH) {
 		mask &= ~GICD_ICFGR_POL_MASK;
 		mask |= GICD_ICFGR_POL_HIGH;
 	}
 
 	if (trig == INTR_TRIGGER_LEVEL) {
 		mask &= ~GICD_ICFGR_TRIG_MASK;
 		mask |= GICD_ICFGR_TRIG_LVL;
 	} else if (trig == INTR_TRIGGER_EDGE) {
 		mask &= ~GICD_ICFGR_TRIG_MASK;
 		mask |= GICD_ICFGR_TRIG_EDGE;
 	}
 
 	/* Set mask */
 	reg = reg & ~(0x3 << 2*(irq % 16));
 	reg = reg | (mask << 2*(irq % 16));
 	gic_d_write_4(sc, GICD_ICFGR(irq >> 4), reg);
 
 	mtx_unlock_spin(&sc->mutex);
 }
 
 static int
 gic_bind(struct arm_gic_softc *sc, u_int irq, cpuset_t *cpus)
 {
 	uint32_t cpu, end, mask;
 
 	end = min(mp_ncpus, 8);
 	for (cpu = end; cpu < MAXCPU; cpu++)
 		if (CPU_ISSET(cpu, cpus))
 			return (EINVAL);
 
 	for (mask = 0, cpu = 0; cpu < end; cpu++)
 		if (CPU_ISSET(cpu, cpus))
 			mask |= arm_gic_map[cpu];
 
 	gic_d_write_1(sc, GICD_ITARGETSR(0) + irq, mask);
 	return (0);
 }
 
 #ifdef FDT
 static int
 gic_map_fdt(device_t dev, u_int ncells, pcell_t *cells, u_int *irqp,
     enum intr_polarity *polp, enum intr_trigger *trigp)
 {
 
 	if (ncells == 1) {
 		*irqp = cells[0];
 		*polp = INTR_POLARITY_CONFORM;
 		*trigp = INTR_TRIGGER_CONFORM;
 		return (0);
 	}
 	if (ncells == 3) {
 		u_int irq, tripol;
 
 		/*
 		 * The 1st cell is the interrupt type:
 		 *	0 = SPI
 		 *	1 = PPI
 		 * The 2nd cell contains the interrupt number:
 		 *	[0 - 987] for SPI
 		 *	[0 -  15] for PPI
 		 * The 3rd cell is the flags, encoded as follows:
 		 *   bits[3:0] trigger type and level flags
 		 *	1 = low-to-high edge triggered
 		 *	2 = high-to-low edge triggered
 		 *	4 = active high level-sensitive
 		 *	8 = active low level-sensitive
 		 *   bits[15:8] PPI interrupt cpu mask
 		 *	Each bit corresponds to each of the 8 possible cpus
 		 *	attached to the GIC.  A bit set to '1' indicated
 		 *	the interrupt is wired to that CPU.
 		 */
 		switch (cells[0]) {
 		case 0:
 			irq = GIC_FIRST_SPI + cells[1];
 			/* SPI irq is checked later. */
 			break;
 		case 1:
 			irq = GIC_FIRST_PPI + cells[1];
 			if (irq > GIC_LAST_PPI) {
 				device_printf(dev, "unsupported PPI interrupt "
 				    "number %u\n", cells[1]);
 				return (EINVAL);
 			}
 			break;
 		default:
 			device_printf(dev, "unsupported interrupt type "
 			    "configuration %u\n", cells[0]);
 			return (EINVAL);
 		}
 
 		tripol = cells[2] & 0xff;
 		if (tripol & 0xf0 || (tripol & 0x0a && cells[0] == 0))
 			device_printf(dev, "unsupported trigger/polarity "
 			    "configuration 0x%02x\n", tripol);
 
 		*irqp = irq;
 		*polp = INTR_POLARITY_CONFORM;
 		*trigp = tripol & 0x03 ? INTR_TRIGGER_EDGE : INTR_TRIGGER_LEVEL;
 		return (0);
 	}
 	return (EINVAL);
 }
 #endif
 
 static int
 gic_map_intr(device_t dev, struct intr_map_data *data, u_int *irqp,
     enum intr_polarity *polp, enum intr_trigger *trigp)
 {
 	u_int irq;
 	enum intr_polarity pol;
 	enum intr_trigger trig;
 	struct arm_gic_softc *sc;
 #ifdef FDT
 	struct intr_map_data_fdt *daf;
 #endif
 
 	sc = device_get_softc(dev);
 	switch (data->type) {
 #ifdef FDT
 	case INTR_MAP_DATA_FDT:
 		daf = (struct intr_map_data_fdt *)data;
 		if (gic_map_fdt(dev, daf->ncells, daf->cells, &irq, &pol,
 		    &trig) != 0)
 			return (EINVAL);
 		KASSERT(irq >= sc->nirqs ||
 		    (sc->gic_irqs[irq].gi_flags & GI_FLAG_MSI) == 0,
 		    ("%s: Attempting to map a MSI interrupt from FDT",
 		    __func__));
 		break;
 #endif
 	default:
 		return (ENOTSUP);
 	}
 
 	if (irq >= sc->nirqs)
 		return (EINVAL);
 	if (pol != INTR_POLARITY_CONFORM && pol != INTR_POLARITY_LOW &&
 	    pol != INTR_POLARITY_HIGH)
 		return (EINVAL);
 	if (trig != INTR_TRIGGER_CONFORM && trig != INTR_TRIGGER_EDGE &&
 	    trig != INTR_TRIGGER_LEVEL)
 		return (EINVAL);
 
 	*irqp = irq;
 	if (polp != NULL)
 		*polp = pol;
 	if (trigp != NULL)
 		*trigp = trig;
 	return (0);
 }
 
 static int
 arm_gic_map_intr(device_t dev, struct intr_map_data *data,
     struct intr_irqsrc **isrcp)
 {
 	int error;
 	u_int irq;
 	struct arm_gic_softc *sc;
 
 	error = gic_map_intr(dev, data, &irq, NULL, NULL);
 	if (error == 0) {
 		sc = device_get_softc(dev);
 		*isrcp = GIC_INTR_ISRC(sc, irq);
 	}
 	return (error);
 }
 
 static int
 arm_gic_setup_intr(device_t dev, struct intr_irqsrc *isrc,
     struct resource *res, struct intr_map_data *data)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc;
 	enum intr_trigger trig;
 	enum intr_polarity pol;
 
 	if ((gi->gi_flags & GI_FLAG_MSI) == GI_FLAG_MSI) {
 		pol = gi->gi_pol;
 		trig = gi->gi_trig;
 		KASSERT(pol == INTR_POLARITY_HIGH,
 		    ("%s: MSI interrupts must be active-high", __func__));
 		KASSERT(trig == INTR_TRIGGER_EDGE,
 		    ("%s: MSI interrupts must be edge triggered", __func__));
 	} else if (data != NULL) {
 		u_int irq;
 
 		/* Get config for resource. */
 		if (gic_map_intr(dev, data, &irq, &pol, &trig) ||
 		    gi->gi_irq != irq)
 			return (EINVAL);
 	} else {
 		pol = INTR_POLARITY_CONFORM;
 		trig = INTR_TRIGGER_CONFORM;
 	}
 
 	/* Compare config if this is not first setup. */
 	if (isrc->isrc_handlers != 0) {
 		if ((pol != INTR_POLARITY_CONFORM && pol != gi->gi_pol) ||
 		    (trig != INTR_TRIGGER_CONFORM && trig != gi->gi_trig))
 			return (EINVAL);
 		else
 			return (0);
 	}
 
 	/* For MSI/MSI-X we should have already configured these */
 	if ((gi->gi_flags & GI_FLAG_MSI) == 0) {
 		if (pol == INTR_POLARITY_CONFORM)
 			pol = INTR_POLARITY_LOW;	/* just pick some */
 		if (trig == INTR_TRIGGER_CONFORM)
 			trig = INTR_TRIGGER_EDGE;	/* just pick some */
 
 		gi->gi_pol = pol;
 		gi->gi_trig = trig;
 
 		/* Edge triggered interrupts need an early EOI sent */
 		if (gi->gi_pol == INTR_TRIGGER_EDGE)
 			gi->gi_flags |= GI_FLAG_EARLY_EOI;
 	}
 
 	/*
 	 * XXX - In case that per CPU interrupt is going to be enabled in time
 	 *       when SMP is already started, we need some IPI call which
 	 *       enables it on others CPUs. Further, it's more complicated as
 	 *       pic_enable_source() and pic_disable_source() should act on
 	 *       per CPU basis only. Thus, it should be solved here somehow.
 	 */
 	if (isrc->isrc_flags & INTR_ISRCF_PPI)
 		CPU_SET(PCPU_GET(cpuid), &isrc->isrc_cpu);
 
 	gic_config(sc, gi->gi_irq, gi->gi_trig, gi->gi_pol);
 	arm_gic_bind_intr(dev, isrc);
 	return (0);
 }
 
 static int
 arm_gic_teardown_intr(device_t dev, struct intr_irqsrc *isrc,
     struct resource *res, struct intr_map_data *data)
 {
 	struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc;
 
 	if (isrc->isrc_handlers == 0 && (gi->gi_flags & GI_FLAG_MSI) == 0) {
 		gi->gi_pol = INTR_POLARITY_CONFORM;
 		gi->gi_trig = INTR_TRIGGER_CONFORM;
 	}
 	return (0);
 }
 
 static void
 arm_gic_enable_intr(device_t dev, struct intr_irqsrc *isrc)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc;
 
 	arm_irq_memory_barrier(gi->gi_irq);
 	gic_irq_unmask(sc, gi->gi_irq);
 }
 
 static void
 arm_gic_disable_intr(device_t dev, struct intr_irqsrc *isrc)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc;
 
 	gic_irq_mask(sc, gi->gi_irq);
 }
 
 static void
 arm_gic_pre_ithread(device_t dev, struct intr_irqsrc *isrc)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc;
 
 	arm_gic_disable_intr(dev, isrc);
 	gic_c_write_4(sc, GICC_EOIR, gi->gi_irq);
 }
 
 static void
 arm_gic_post_ithread(device_t dev, struct intr_irqsrc *isrc)
 {
 
 	arm_irq_memory_barrier(0);
 	arm_gic_enable_intr(dev, isrc);
 }
 
 static void
 arm_gic_post_filter(device_t dev, struct intr_irqsrc *isrc)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc;
 
         /* EOI for edge-triggered done earlier. */
 	if ((gi->gi_flags & GI_FLAG_EARLY_EOI) == GI_FLAG_EARLY_EOI)
 		return;
 
 	arm_irq_memory_barrier(0);
 	gic_c_write_4(sc, GICC_EOIR, gi->gi_irq);
 }
 
 static int
 arm_gic_bind_intr(device_t dev, struct intr_irqsrc *isrc)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc;
 
 	if (gi->gi_irq < GIC_FIRST_SPI)
 		return (EINVAL);
 
 	if (CPU_EMPTY(&isrc->isrc_cpu)) {
 		gic_irq_cpu = intr_irq_next_cpu(gic_irq_cpu, &all_cpus);
 		CPU_SETOF(gic_irq_cpu, &isrc->isrc_cpu);
 	}
 	return (gic_bind(sc, gi->gi_irq, &isrc->isrc_cpu));
 }
 
 #ifdef SMP
 static void
 arm_gic_ipi_send(device_t dev, struct intr_irqsrc *isrc, cpuset_t cpus,
     u_int ipi)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc;
 	uint32_t val = 0, i;
 
 	for (i = 0; i < MAXCPU; i++)
 		if (CPU_ISSET(i, &cpus))
 			val |= arm_gic_map[i] << GICD_SGI_TARGET_SHIFT;
 
 	gic_d_write_4(sc, GICD_SGIR(0), val | gi->gi_irq);
 }
 
 static int
 arm_gic_ipi_setup(device_t dev, u_int ipi, struct intr_irqsrc **isrcp)
 {
 	struct intr_irqsrc *isrc;
 	struct arm_gic_softc *sc = device_get_softc(dev);
 
 	if (sgi_first_unused > GIC_LAST_SGI)
 		return (ENOSPC);
 
 	isrc = GIC_INTR_ISRC(sc, sgi_first_unused);
 	sgi_to_ipi[sgi_first_unused++] = ipi;
 
 	CPU_SET(PCPU_GET(cpuid), &isrc->isrc_cpu);
 
 	*isrcp = isrc;
 	return (0);
 }
 #endif
 #else
 static int
 arm_gic_next_irq(struct arm_gic_softc *sc, int last_irq)
 {
 	uint32_t active_irq;
 
 	active_irq = gic_c_read_4(sc, GICC_IAR);
 
 	/*
 	 * Immediately EOIR the SGIs, because doing so requires the other
 	 * bits (ie CPU number), not just the IRQ number, and we do not
 	 * have this information later.
 	 */
 	if ((active_irq & 0x3ff) <= GIC_LAST_SGI)
 		gic_c_write_4(sc, GICC_EOIR, active_irq);
 	active_irq &= 0x3FF;
 
 	if (active_irq == 0x3FF) {
 		if (last_irq == -1)
 			device_printf(sc->gic_dev,
 			    "Spurious interrupt detected\n");
 		return -1;
 	}
 
 	return active_irq;
 }
 
 static int
 arm_gic_config(device_t dev, int irq, enum intr_trigger trig,
     enum intr_polarity pol)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	uint32_t reg;
 	uint32_t mask;
 
 	/* Function is public-accessible, so validate input arguments */
 	if ((irq < 0) || (irq >= sc->nirqs))
 		goto invalid_args;
 	if ((trig != INTR_TRIGGER_EDGE) && (trig != INTR_TRIGGER_LEVEL) &&
 	    (trig != INTR_TRIGGER_CONFORM))
 		goto invalid_args;
 	if ((pol != INTR_POLARITY_HIGH) && (pol != INTR_POLARITY_LOW) &&
 	    (pol != INTR_POLARITY_CONFORM))
 		goto invalid_args;
 
 	mtx_lock_spin(&sc->mutex);
 
 	reg = gic_d_read_4(sc, GICD_ICFGR(irq >> 4));
 	mask = (reg >> 2*(irq % 16)) & 0x3;
 
 	if (pol == INTR_POLARITY_LOW) {
 		mask &= ~GICD_ICFGR_POL_MASK;
 		mask |= GICD_ICFGR_POL_LOW;
 	} else if (pol == INTR_POLARITY_HIGH) {
 		mask &= ~GICD_ICFGR_POL_MASK;
 		mask |= GICD_ICFGR_POL_HIGH;
 	}
 
 	if (trig == INTR_TRIGGER_LEVEL) {
 		mask &= ~GICD_ICFGR_TRIG_MASK;
 		mask |= GICD_ICFGR_TRIG_LVL;
 	} else if (trig == INTR_TRIGGER_EDGE) {
 		mask &= ~GICD_ICFGR_TRIG_MASK;
 		mask |= GICD_ICFGR_TRIG_EDGE;
 	}
 
 	/* Set mask */
 	reg = reg & ~(0x3 << 2*(irq % 16));
 	reg = reg | (mask << 2*(irq % 16));
 	gic_d_write_4(sc, GICD_ICFGR(irq >> 4), reg);
 
 	mtx_unlock_spin(&sc->mutex);
 
 	return (0);
 
 invalid_args:
 	device_printf(dev, "gic_config_irg, invalid parameters\n");
 	return (EINVAL);
 }
 
 
 static void
 arm_gic_mask(device_t dev, int irq)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 
 	gic_d_write_4(sc, GICD_ICENABLER(irq >> 5), (1UL << (irq & 0x1F)));
 	gic_c_write_4(sc, GICC_EOIR, irq); /* XXX - not allowed */
 }
 
 static void
 arm_gic_unmask(device_t dev, int irq)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 
 	if (irq > GIC_LAST_SGI)
 		arm_irq_memory_barrier(irq);
 
 	gic_d_write_4(sc, GICD_ISENABLER(irq >> 5), (1UL << (irq & 0x1F)));
 }
 
 #ifdef SMP
 static void
 arm_gic_ipi_send(device_t dev, cpuset_t cpus, u_int ipi)
 {
 	struct arm_gic_softc *sc = device_get_softc(dev);
 	uint32_t val = 0, i;
 
 	for (i = 0; i < MAXCPU; i++)
 		if (CPU_ISSET(i, &cpus))
 			val |= arm_gic_map[i] << GICD_SGI_TARGET_SHIFT;
 
 	gic_d_write_4(sc, GICD_SGIR(0), val | ipi);
 }
 
 static int
 arm_gic_ipi_read(device_t dev, int i)
 {
 
 	if (i != -1) {
 		/*
 		 * The intr code will automagically give the frame pointer
 		 * if the interrupt argument is 0.
 		 */
 		if ((unsigned int)i > 16)
 			return (0);
 		return (i);
 	}
 
 	return (0x3ff);
 }
 
 static void
 arm_gic_ipi_clear(device_t dev, int ipi)
 {
 	/* no-op */
 }
 #endif
 
 static void
 gic_post_filter(void *arg)
 {
 	struct arm_gic_softc *sc = gic_sc;
 	uintptr_t irq = (uintptr_t) arg;
 
 	if (irq > GIC_LAST_SGI)
 		arm_irq_memory_barrier(irq);
 	gic_c_write_4(sc, GICC_EOIR, irq);
 }
 
 static int
 gic_config_irq(int irq, enum intr_trigger trig, enum intr_polarity pol)
 {
 
 	return (arm_gic_config(gic_sc->gic_dev, irq, trig, pol));
 }
 
 void
 arm_mask_irq(uintptr_t nb)
 {
 
 	arm_gic_mask(gic_sc->gic_dev, nb);
 }
 
 void
 arm_unmask_irq(uintptr_t nb)
 {
 
 	arm_gic_unmask(gic_sc->gic_dev, nb);
 }
 
 int
 arm_get_next_irq(int last_irq)
 {
 
 	return (arm_gic_next_irq(gic_sc, last_irq));
 }
 
 #ifdef SMP
 void
 intr_pic_init_secondary(void)
 {
 
 	arm_gic_init_secondary(gic_sc->gic_dev);
 }
 
 void
 pic_ipi_send(cpuset_t cpus, u_int ipi)
 {
 
 	arm_gic_ipi_send(gic_sc->gic_dev, cpus, ipi);
 }
 
 int
 pic_ipi_read(int i)
 {
 
 	return (arm_gic_ipi_read(gic_sc->gic_dev, i));
 }
 
 void
 pic_ipi_clear(int ipi)
 {
 
 	arm_gic_ipi_clear(gic_sc->gic_dev, ipi);
 }
 #endif
 #endif /* INTRNG */
 
 static device_method_t arm_gic_methods[] = {
-	/* Device interface */
-	DEVMETHOD(device_probe,		arm_gic_probe),
-	DEVMETHOD(device_attach,	arm_gic_attach),
-
 #ifdef INTRNG
 	/* Bus interface */
+	DEVMETHOD(bus_print_child,	arm_gic_print_child),
 	DEVMETHOD(bus_add_child,	bus_generic_add_child),
 	DEVMETHOD(bus_alloc_resource,	arm_gic_alloc_resource),
 	DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
 	DEVMETHOD(bus_activate_resource,bus_generic_activate_resource),
 
-	/* ofw_bus interface */
-	DEVMETHOD(ofw_bus_get_devinfo,	arm_gic_ofw_get_devinfo),
-	DEVMETHOD(ofw_bus_get_compat,	ofw_bus_gen_get_compat),
-	DEVMETHOD(ofw_bus_get_model,	ofw_bus_gen_get_model),
-	DEVMETHOD(ofw_bus_get_name,	ofw_bus_gen_get_name),
-	DEVMETHOD(ofw_bus_get_node,	ofw_bus_gen_get_node),
-	DEVMETHOD(ofw_bus_get_type,	ofw_bus_gen_get_type),
-
 	/* Interrupt controller interface */
 	DEVMETHOD(pic_disable_intr,	arm_gic_disable_intr),
 	DEVMETHOD(pic_enable_intr,	arm_gic_enable_intr),
 	DEVMETHOD(pic_map_intr,		arm_gic_map_intr),
 	DEVMETHOD(pic_setup_intr,	arm_gic_setup_intr),
 	DEVMETHOD(pic_teardown_intr,	arm_gic_teardown_intr),
 	DEVMETHOD(pic_post_filter,	arm_gic_post_filter),
 	DEVMETHOD(pic_post_ithread,	arm_gic_post_ithread),
 	DEVMETHOD(pic_pre_ithread,	arm_gic_pre_ithread),
 #ifdef SMP
 	DEVMETHOD(pic_bind_intr,	arm_gic_bind_intr),
 	DEVMETHOD(pic_init_secondary,	arm_gic_init_secondary),
 	DEVMETHOD(pic_ipi_send,		arm_gic_ipi_send),
 	DEVMETHOD(pic_ipi_setup,	arm_gic_ipi_setup),
 #endif
 #endif
 	{ 0, 0 }
 };
 
-static driver_t arm_gic_driver = {
-	"gic",
-	arm_gic_methods,
-	sizeof(struct arm_gic_softc),
-};
+DEFINE_CLASS_0(gic, arm_gic_driver, arm_gic_methods,
+    sizeof(struct arm_gic_softc));
 
-static devclass_t arm_gic_devclass;
-
-EARLY_DRIVER_MODULE(gic, simplebus, arm_gic_driver, arm_gic_devclass, 0, 0,
-    BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
-EARLY_DRIVER_MODULE(gic, ofwbus, arm_gic_driver, arm_gic_devclass, 0, 0,
-    BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
-
 #ifdef INTRNG
 /*
  * GICv2m support -- the GICv2 MSI/MSI-X controller.
  */
 
 #define	GICV2M_MSI_TYPER	0x008
 #define	 MSI_TYPER_SPI_BASE(x)	(((x) >> 16) & 0x3ff)
 #define	 MSI_TYPER_SPI_COUNT(x)	(((x) >> 0) & 0x3ff)
 #define	GICv2M_MSI_SETSPI_NS	0x040
 #define	GICV2M_MSI_IIDR		0xFCC
 
-struct arm_gicv2m_softc {
-	struct resource	*sc_mem;
-	struct mtx	sc_mutex;
-	u_int		sc_spi_start;
-	u_int		sc_spi_end;
-	u_int		sc_spi_count;
-};
-
-static struct ofw_compat_data gicv2m_compat_data[] = {
-	{"arm,gic-v2m-frame",	true},
-	{NULL,			false}
-};
-
-static int
-arm_gicv2m_probe(device_t dev)
-{
-
-	if (!ofw_bus_status_okay(dev))
-		return (ENXIO);
-
-	if (!ofw_bus_search_compatible(dev, gicv2m_compat_data)->ocd_data)
-		return (ENXIO);
-
-	device_set_desc(dev, "ARM Generic Interrupt Controller MSI/MSIX");
-	return (BUS_PROBE_DEFAULT);
-}
-
-static int
+int
 arm_gicv2m_attach(device_t dev)
 {
 	struct arm_gicv2m_softc *sc;
 	struct arm_gic_softc *psc;
 	uint32_t typer;
 	int rid;
 
 	psc = device_get_softc(device_get_parent(dev));
 	sc = device_get_softc(dev);
 
 	rid = 0;
 	sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->sc_mem == NULL) {
 		device_printf(dev, "Unable to allocate resources\n");
 		return (ENXIO);
 	}
 
 	typer = bus_read_4(sc->sc_mem, GICV2M_MSI_TYPER);
 	sc->sc_spi_start = MSI_TYPER_SPI_BASE(typer);
 	sc->sc_spi_count = MSI_TYPER_SPI_COUNT(typer);
 	sc->sc_spi_end = sc->sc_spi_start + sc->sc_spi_count;
 
 	/* Reserve these interrupts for MSI/MSI-X use */
 	arm_gic_reserve_msi_range(device_get_parent(dev), sc->sc_spi_start,
 	    sc->sc_spi_count);
 
 	mtx_init(&sc->sc_mutex, "GICv2m lock", "", MTX_DEF);
 
-	intr_msi_register(dev, gic_xref(dev));
+	intr_msi_register(dev, sc->sc_xref);
 
 	if (bootverbose)
 		device_printf(dev, "using spi %u to %u\n", sc->sc_spi_start,
 		    sc->sc_spi_start + sc->sc_spi_count - 1);
 
 	return (0);
 }
 
 static int
 arm_gicv2m_alloc_msi(device_t dev, device_t child, int count, int maxcount,
     device_t *pic, struct intr_irqsrc **srcs)
 {
 	struct arm_gic_softc *psc;
 	struct arm_gicv2m_softc *sc;
 	int i, irq, end_irq;
 	bool found;
 
 	KASSERT(powerof2(count), ("%s: bad count", __func__));
 	KASSERT(powerof2(maxcount), ("%s: bad maxcount", __func__));
 
 	psc = device_get_softc(device_get_parent(dev));
 	sc = device_get_softc(dev);
 
 	mtx_lock(&sc->sc_mutex);
 
 	found = false;
 	for (irq = sc->sc_spi_start; irq < sc->sc_spi_end && !found; irq++) {
 		/* Start on an aligned interrupt */
 		if ((irq & (maxcount - 1)) != 0)
 			continue;
 
 		/* Assume we found a valid range until shown otherwise */
 		found = true;
 
 		/* Check this range is valid */
 		for (end_irq = irq; end_irq != irq + count - 1; end_irq++) {
 			/* No free interrupts */
 			if (end_irq == sc->sc_spi_end) {
 				found = false;
 				break;
 			}
 
 			KASSERT((psc->gic_irqs[irq].gi_flags & GI_FLAG_MSI)!= 0,
 			    ("%s: Non-MSI interrupt found", __func__));
 
 			/* This is already used */
 			if ((psc->gic_irqs[irq].gi_flags & GI_FLAG_MSI_USED) ==
 			    GI_FLAG_MSI_USED) {
 				found = false;
 				break;
 			}
 		}
 	}
 
 	/* Not enough interrupts were found */
 	if (!found || irq == sc->sc_spi_end) {
 		mtx_unlock(&sc->sc_mutex);
 		return (ENXIO);
 	}
 
 	for (i = 0; i < count; i++) {
 		/* Mark the interrupt as used */
 		psc->gic_irqs[irq + i].gi_flags |= GI_FLAG_MSI_USED;
 
 	}
 	mtx_unlock(&sc->sc_mutex);
 
 	for (i = 0; i < count; i++)
 		srcs[i] = (struct intr_irqsrc *)&psc->gic_irqs[irq + i];
 	*pic = device_get_parent(dev);
 
 	return (0);
 }
 
 static int
 arm_gicv2m_release_msi(device_t dev, device_t child, int count,
     struct intr_irqsrc **isrc)
 {
 	struct arm_gicv2m_softc *sc;
 	struct gic_irqsrc *gi;
 	int i;
 
 	sc = device_get_softc(dev);
 
 	mtx_lock(&sc->sc_mutex);
 	for (i = 0; i < count; i++) {
 		gi = (struct gic_irqsrc *)isrc;
 
 		KASSERT((gi->gi_flags & GI_FLAG_MSI_USED) == GI_FLAG_MSI_USED,
 		    ("%s: Trying to release an unused MSI-X interrupt",
 		    __func__));
 
 		gi->gi_flags &= ~GI_FLAG_MSI_USED;
 		mtx_unlock(&sc->sc_mutex);
 	}
 
 	return (0);
 }
 
 static int
 arm_gicv2m_alloc_msix(device_t dev, device_t child, device_t *pic,
     struct intr_irqsrc **isrcp)
 {
 	struct arm_gicv2m_softc *sc;
 	struct arm_gic_softc *psc;
 	int irq;
 
 	psc = device_get_softc(device_get_parent(dev));
 	sc = device_get_softc(dev);
 
 	mtx_lock(&sc->sc_mutex);
 	/* Find an unused interrupt */
 	for (irq = sc->sc_spi_start; irq < sc->sc_spi_end; irq++) {
 		KASSERT((psc->gic_irqs[irq].gi_flags & GI_FLAG_MSI) != 0,
 		    ("%s: Non-MSI interrupt found", __func__));
 		if ((psc->gic_irqs[irq].gi_flags & GI_FLAG_MSI_USED) == 0)
 			break;
 	}
 	/* No free interrupt was found */
 	if (irq == sc->sc_spi_end) {
 		mtx_unlock(&sc->sc_mutex);
 		return (ENXIO);
 	}
 
 	/* Mark the interrupt as used */
 	psc->gic_irqs[irq].gi_flags |= GI_FLAG_MSI_USED;
 	mtx_unlock(&sc->sc_mutex);
 
 	*isrcp = (struct intr_irqsrc *)&psc->gic_irqs[irq];
 	*pic = device_get_parent(dev);
 
 	return (0);
 }
 
 static int
 arm_gicv2m_release_msix(device_t dev, device_t child, struct intr_irqsrc *isrc)
 {
 	struct arm_gicv2m_softc *sc;
 	struct gic_irqsrc *gi;
 
 	sc = device_get_softc(dev);
 	gi = (struct gic_irqsrc *)isrc;
 
 	KASSERT((gi->gi_flags & GI_FLAG_MSI_USED) == GI_FLAG_MSI_USED,
 	    ("%s: Trying to release an unused MSI-X interrupt", __func__));
 
 	mtx_lock(&sc->sc_mutex);
 	gi->gi_flags &= ~GI_FLAG_MSI_USED;
 	mtx_unlock(&sc->sc_mutex);
 
 	return (0);
 }
 
 static int
 arm_gicv2m_map_msi(device_t dev, device_t child, struct intr_irqsrc *isrc,
     uint64_t *addr, uint32_t *data)
 {
 	struct arm_gicv2m_softc *sc = device_get_softc(dev);
 	struct gic_irqsrc *gi = (struct gic_irqsrc *)isrc;
 
 	*addr = vtophys(rman_get_virtual(sc->sc_mem)) + GICv2M_MSI_SETSPI_NS;
 	*data = gi->gi_irq;
 
 	return (0);
 }
 
 static device_method_t arm_gicv2m_methods[] = {
 	/* Device interface */
-	DEVMETHOD(device_probe,		arm_gicv2m_probe),
 	DEVMETHOD(device_attach,	arm_gicv2m_attach),
 
 	/* MSI/MSI-X */
 	DEVMETHOD(msi_alloc_msi,	arm_gicv2m_alloc_msi),
 	DEVMETHOD(msi_release_msi,	arm_gicv2m_release_msi),
 	DEVMETHOD(msi_alloc_msix,	arm_gicv2m_alloc_msix),
 	DEVMETHOD(msi_release_msix,	arm_gicv2m_release_msix),
 	DEVMETHOD(msi_map_msi,		arm_gicv2m_map_msi),
 
 	/* End */
 	DEVMETHOD_END
 };
 
 DEFINE_CLASS_0(gicv2m, arm_gicv2m_driver, arm_gicv2m_methods,
     sizeof(struct arm_gicv2m_softc));
-
-static devclass_t arm_gicv2m_devclass;
-
-EARLY_DRIVER_MODULE(gicv2m, gic, arm_gicv2m_driver,
-    arm_gicv2m_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
 #endif
Index: user/alc/PQ_LAUNDRY/sys/arm/arm/gic.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/arm/arm/gic.h	(nonexistent)
+++ user/alc/PQ_LAUNDRY/sys/arm/arm/gic.h	(revision 303642)
@@ -0,0 +1,106 @@
+/*-
+ * Copyright (c) 2011,2016 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Developed by Damjan Marion <damjan.marion@gmail.com>
+ *
+ * Based on OMAP4 GIC code by Ben Gray
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ *    endorse or promote products derived from this software without specific
+ *    prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _ARM_GIC_H_
+#define _ARM_GIC_H_
+
+#define GIC_DEBUG_SPURIOUS
+
+#define	GIC_FIRST_SGI		 0	/* Irqs 0-15 are SGIs/IPIs. */
+#define	GIC_LAST_SGI		15
+#define	GIC_FIRST_PPI		16	/* Irqs 16-31 are private (per */
+#define	GIC_LAST_PPI		31	/* core) peripheral interrupts. */
+#define	GIC_FIRST_SPI		32	/* Irqs 32+ are shared peripherals. */
+
+#ifdef INTRNG
+struct arm_gic_range {
+	uint64_t bus;
+	uint64_t host;
+	uint64_t size;
+};
+#endif
+
+struct arm_gic_softc {
+	device_t		gic_dev;
+#ifdef INTRNG
+	void *			gic_intrhand;
+	struct gic_irqsrc *	gic_irqs;
+#endif
+	struct resource *	gic_res[3];
+	bus_space_tag_t		gic_c_bst;
+	bus_space_tag_t		gic_d_bst;
+	bus_space_handle_t	gic_c_bsh;
+	bus_space_handle_t	gic_d_bsh;
+	uint8_t			ver;
+	struct mtx		mutex;
+	uint32_t		nirqs;
+	uint32_t		typer;
+#ifdef GIC_DEBUG_SPURIOUS
+	uint32_t		last_irq[MAXCPU];
+#endif
+
+#ifdef INTRNG
+	/* FDT child data */
+	pcell_t			addr_cells;
+	pcell_t			size_cells;
+	int			nranges;
+	struct arm_gic_range *	ranges;
+#endif
+};
+
+DECLARE_CLASS(arm_gic_driver);
+
+#ifdef INTRNG
+struct arm_gicv2m_softc {
+	struct resource	*sc_mem;
+	struct mtx	sc_mutex;
+	uintptr_t	sc_xref;
+	u_int		sc_spi_start;
+	u_int		sc_spi_end;
+	u_int		sc_spi_count;
+};
+
+DECLARE_CLASS(arm_gicv2m_driver);
+#endif
+
+int arm_gic_attach(device_t);
+int arm_gic_detach(device_t);
+int arm_gicv2m_attach(device_t);
+int arm_gic_intr(void *);
+
+#endif /* _ARM_GIC_H_ */

Property changes on: user/alc/PQ_LAUNDRY/sys/arm/arm/gic.h
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: user/alc/PQ_LAUNDRY/sys/arm/arm/gic_fdt.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/arm/arm/gic_fdt.c	(nonexistent)
+++ user/alc/PQ_LAUNDRY/sys/arm/arm/gic_fdt.c	(revision 303642)
@@ -0,0 +1,369 @@
+/*-
+ * Copyright (c) 2011,2016 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Andrew Turner under
+ * sponsorship from the FreeBSD Foundation.
+ *
+ * Developed by Damjan Marion <damjan.marion@gmail.com>
+ *
+ * Based on OMAP4 GIC code by Ben Gray
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company nor the name of the author may be used to
+ *    endorse or promote products derived from this software without specific
+ *    prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_platform.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+
+#include <machine/intr.h>
+
+#include <dev/fdt/fdt_common.h>
+#include <dev/ofw/openfirm.h>
+#include <dev/ofw/ofw_bus.h>
+#include <dev/ofw/ofw_bus_subr.h>
+
+#include <arm/arm/gic.h>
+
+#ifdef INTRNG
+struct arm_gic_devinfo {
+	struct ofw_bus_devinfo	obdinfo;
+	struct resource_list	rl;
+};
+#endif
+
+static device_probe_t gic_fdt_probe;
+static device_attach_t gic_fdt_attach;
+static ofw_bus_get_devinfo_t gic_ofw_get_devinfo;
+#ifdef INTRNG
+static bus_get_resource_list_t gic_fdt_get_resource_list;
+static bool arm_gic_add_children(device_t);
+#endif
+
+static struct ofw_compat_data compat_data[] = {
+	{"arm,gic",		true},	/* Non-standard, used in FreeBSD dts. */
+	{"arm,gic-400",		true},
+	{"arm,cortex-a15-gic",	true},
+	{"arm,cortex-a9-gic",	true},
+	{"arm,cortex-a7-gic",	true},
+	{"arm,arm11mp-gic",	true},
+	{"brcm,brahma-b15-gic",	true},
+	{"qcom,msm-qgic2",	true},
+	{NULL,			false}
+};
+
+static device_method_t gic_fdt_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		gic_fdt_probe),
+	DEVMETHOD(device_attach,	gic_fdt_attach),
+
+#ifdef INTRNG
+	/* Bus interface */
+	DEVMETHOD(bus_get_resource_list,gic_fdt_get_resource_list),
+
+	/* ofw_bus interface */
+	DEVMETHOD(ofw_bus_get_devinfo,	gic_ofw_get_devinfo),
+	DEVMETHOD(ofw_bus_get_compat,	ofw_bus_gen_get_compat),
+	DEVMETHOD(ofw_bus_get_model,	ofw_bus_gen_get_model),
+	DEVMETHOD(ofw_bus_get_name,	ofw_bus_gen_get_name),
+	DEVMETHOD(ofw_bus_get_node,	ofw_bus_gen_get_node),
+	DEVMETHOD(ofw_bus_get_type,	ofw_bus_gen_get_type),
+#endif
+
+	DEVMETHOD_END,
+};
+
+DEFINE_CLASS_1(gic, gic_fdt_driver, gic_fdt_methods,
+    sizeof(struct arm_gic_softc), arm_gic_driver);
+
+static devclass_t gic_fdt_devclass;
+
+EARLY_DRIVER_MODULE(gic, simplebus, gic_fdt_driver, gic_fdt_devclass, 0, 0,
+    BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
+EARLY_DRIVER_MODULE(gic, ofwbus, gic_fdt_driver, gic_fdt_devclass, 0, 0,
+    BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
+
+static int
+gic_fdt_probe(device_t dev)
+{
+
+	if (!ofw_bus_status_okay(dev))
+		return (ENXIO);
+
+	if (!ofw_bus_search_compatible(dev, compat_data)->ocd_data)
+		return (ENXIO);
+	device_set_desc(dev, "ARM Generic Interrupt Controller");
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+gic_fdt_attach(device_t dev)
+{
+#ifdef INTRNG
+	struct arm_gic_softc *sc = device_get_softc(dev);
+	phandle_t pxref;
+	intptr_t xref;
+#endif
+	int err;
+
+	err = arm_gic_attach(dev);
+	if (err != 0)
+		return (err);
+
+#ifdef INTRNG
+	xref = OF_xref_from_node(ofw_bus_get_node(dev));
+
+	/*
+	 * Now, when everything is initialized, it's right time to
+	 * register interrupt controller to interrupt framefork.
+	 */
+	if (intr_pic_register(dev, xref) == NULL) {
+		device_printf(dev, "could not register PIC\n");
+		goto cleanup;
+	}
+
+	/*
+	 * Controller is root if:
+	 * - doesn't have interrupt parent
+	 * - his interrupt parent is this controller
+	 */
+	pxref = ofw_bus_find_iparent(ofw_bus_get_node(dev));
+	if (pxref == 0 || xref == pxref) {
+		if (intr_pic_claim_root(dev, xref, arm_gic_intr, sc,
+		    GIC_LAST_SGI - GIC_FIRST_SGI + 1) != 0) {
+			device_printf(dev, "could not set PIC as a root\n");
+			intr_pic_deregister(dev, xref);
+			goto cleanup;
+		}
+	} else {
+		if (sc->gic_res[2] == NULL) {
+			device_printf(dev,
+			    "not root PIC must have defined interrupt\n");
+			intr_pic_deregister(dev, xref);
+			goto cleanup;
+		}
+		if (bus_setup_intr(dev, sc->gic_res[2], INTR_TYPE_CLK,
+		    arm_gic_intr, NULL, sc, &sc->gic_intrhand)) {
+			device_printf(dev, "could not setup irq handler\n");
+			intr_pic_deregister(dev, xref);
+			goto cleanup;
+		}
+	}
+
+	OF_device_register_xref(xref, dev);
+
+	/* If we have children probe and attach them */
+	if (arm_gic_add_children(dev)) {
+		bus_generic_probe(dev);
+		return (bus_generic_attach(dev));
+	}
+#endif
+
+	return (0);
+
+#ifdef INTRNG
+cleanup:
+	arm_gic_detach(dev);
+	return(ENXIO);
+#endif
+}
+
+#ifdef INTRNG
+static struct resource_list *
+gic_fdt_get_resource_list(device_t bus, device_t child)
+{
+	struct arm_gic_devinfo *di;
+
+	di = device_get_ivars(child);
+	KASSERT(di != NULL, ("gic_fdt_get_resource_list: No devinfo"));
+
+	return (&di->rl);
+}
+
+static int
+arm_gic_fill_ranges(phandle_t node, struct arm_gic_softc *sc)
+{
+	pcell_t host_cells;
+	cell_t *base_ranges;
+	ssize_t nbase_ranges;
+	int i, j, k;
+
+	host_cells = 1;
+	OF_getencprop(OF_parent(node), "#address-cells", &host_cells,
+	    sizeof(host_cells));
+	sc->addr_cells = 2;
+	OF_getencprop(node, "#address-cells", &sc->addr_cells,
+	    sizeof(sc->addr_cells));
+	sc->size_cells = 2;
+	OF_getencprop(node, "#size-cells", &sc->size_cells,
+	    sizeof(sc->size_cells));
+
+	nbase_ranges = OF_getproplen(node, "ranges");
+	if (nbase_ranges < 0)
+		return (-1);
+	sc->nranges = nbase_ranges / sizeof(cell_t) /
+	    (sc->addr_cells + host_cells + sc->size_cells);
+	if (sc->nranges == 0)
+		return (0);
+
+	sc->ranges = malloc(sc->nranges * sizeof(sc->ranges[0]),
+	    M_DEVBUF, M_WAITOK);
+	base_ranges = malloc(nbase_ranges, M_DEVBUF, M_WAITOK);
+	OF_getencprop(node, "ranges", base_ranges, nbase_ranges);
+
+	for (i = 0, j = 0; i < sc->nranges; i++) {
+		sc->ranges[i].bus = 0;
+		for (k = 0; k < sc->addr_cells; k++) {
+			sc->ranges[i].bus <<= 32;
+			sc->ranges[i].bus |= base_ranges[j++];
+		}
+		sc->ranges[i].host = 0;
+		for (k = 0; k < host_cells; k++) {
+			sc->ranges[i].host <<= 32;
+			sc->ranges[i].host |= base_ranges[j++];
+		}
+		sc->ranges[i].size = 0;
+		for (k = 0; k < sc->size_cells; k++) {
+			sc->ranges[i].size <<= 32;
+			sc->ranges[i].size |= base_ranges[j++];
+		}
+	}
+
+	free(base_ranges, M_DEVBUF);
+	return (sc->nranges);
+}
+
+static bool
+arm_gic_add_children(device_t dev)
+{
+	struct arm_gic_softc *sc;
+	struct arm_gic_devinfo *dinfo;
+	phandle_t child, node;
+	device_t cdev;
+
+	sc = device_get_softc(dev);
+	node = ofw_bus_get_node(dev);
+
+	/* If we have no children don't probe for them */
+	child = OF_child(node);
+	if (child == 0)
+		return (false);
+
+	if (arm_gic_fill_ranges(node, sc) < 0) {
+		device_printf(dev, "Have a child, but no ranges\n");
+		return (false);
+	}
+
+	for (; child != 0; child = OF_peer(child)) {
+		dinfo = malloc(sizeof(*dinfo), M_DEVBUF, M_WAITOK | M_ZERO);
+
+		if (ofw_bus_gen_setup_devinfo(&dinfo->obdinfo, child) != 0) {
+			free(dinfo, M_DEVBUF);
+			continue;
+		}
+
+		resource_list_init(&dinfo->rl);
+		ofw_bus_reg_to_rl(dev, child, sc->addr_cells,
+		    sc->size_cells, &dinfo->rl);
+
+		cdev = device_add_child(dev, NULL, -1);
+		if (cdev == NULL) {
+			device_printf(dev, "<%s>: device_add_child failed\n",
+			    dinfo->obdinfo.obd_name);
+			resource_list_free(&dinfo->rl);
+			ofw_bus_gen_destroy_devinfo(&dinfo->obdinfo);
+			free(dinfo, M_DEVBUF);
+			continue;
+		}
+		device_set_ivars(cdev, dinfo);
+	}
+
+	return (true);
+}
+
+static const struct ofw_bus_devinfo *
+gic_ofw_get_devinfo(device_t bus __unused, device_t child)
+{
+	struct arm_gic_devinfo *di;
+
+	di = device_get_ivars(child);
+
+	return (&di->obdinfo);
+}
+
+static struct ofw_compat_data gicv2m_compat_data[] = {
+	{"arm,gic-v2m-frame",	true},
+	{NULL,			false}
+};
+
+static int
+arm_gicv2m_fdt_probe(device_t dev)
+{
+
+	if (!ofw_bus_status_okay(dev))
+		return (ENXIO);
+
+	if (!ofw_bus_search_compatible(dev, gicv2m_compat_data)->ocd_data)
+		return (ENXIO);
+
+	device_set_desc(dev, "ARM Generic Interrupt Controller MSI/MSIX");
+	return (BUS_PROBE_DEFAULT);
+}
+
+static int
+arm_gicv2m_fdt_attach(device_t dev)
+{
+	struct arm_gicv2m_softc *sc;
+
+	sc = device_get_softc(dev);
+	sc->sc_xref = OF_xref_from_node(ofw_bus_get_node(dev));
+
+	return (arm_gicv2m_attach(dev));
+}
+
+static device_method_t arm_gicv2m_fdt_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		arm_gicv2m_fdt_probe),
+	DEVMETHOD(device_attach,	arm_gicv2m_fdt_attach),
+
+	/* End */
+	DEVMETHOD_END
+};
+
+DEFINE_CLASS_1(gicv2m, arm_gicv2m_fdt_driver, arm_gicv2m_fdt_methods,
+    sizeof(struct arm_gicv2m_softc), arm_gicv2m_driver);
+
+static devclass_t arm_gicv2m_fdt_devclass;
+
+EARLY_DRIVER_MODULE(gicv2m, gic, arm_gicv2m_fdt_driver,
+    arm_gicv2m_fdt_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE);
+#endif

Property changes on: user/alc/PQ_LAUNDRY/sys/arm/arm/gic_fdt.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: user/alc/PQ_LAUNDRY/sys/arm64/arm64/machdep.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/arm64/arm64/machdep.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/arm64/arm64/machdep.c	(revision 303642)
@@ -1,1036 +1,1107 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
+#include "opt_acpi.h"
 #include "opt_platform.h"
 #include "opt_ddb.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/devmap.h>
 #include <sys/efi.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h> 
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/msgbuf.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vdso.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 
 #include <machine/armreg.h>
 #include <machine/cpu.h>
 #include <machine/debug_monitor.h>
 #include <machine/kdb.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/reg.h>
 #include <machine/vmparam.h>
 
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
+#ifdef DEV_ACPI
+#include <contrib/dev/acpica/include/acpi.h>
+#include <machine/acpica_machdep.h>
+#endif
+
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #endif
 
+
+enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
+
 struct pcpu __pcpu[MAXCPU];
 
 static struct trapframe proc0_tf;
 
 vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2];
 vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2];
 
 int early_boot = 1;
 int cold = 1;
 long realmem = 0;
 long Maxmem = 0;
 
 #define	PHYSMAP_SIZE	(2 * (VM_PHYSSEG_MAX - 1))
 vm_paddr_t physmap[PHYSMAP_SIZE];
 u_int physmap_idx;
 
 struct kva_md_info kmi;
 
 int64_t dcache_line_size;	/* The minimum D cache line size */
 int64_t icache_line_size;	/* The minimum I cache line size */
 int64_t idcache_line_size;	/* The minimum cache line size */
 int64_t dczva_line_size;	/* The size of cache line the dc zva zeroes */
 
 /* pagezero_* implementations are provided in support.S */
 void pagezero_simple(void *);
 void pagezero_cache(void *);
 
 /* pagezero_simple is default pagezero */
 void (*pagezero)(void *p) = pagezero_simple;
 
 static void
 cpu_startup(void *dummy)
 {
 
 	identify_cpu();
 
 	vm_ksubmap_init(&kmi);
 	bufinit();
 	vm_pager_bufferinit();
 }
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	regs->sp = frame->tf_sp;
 	regs->lr = frame->tf_lr;
 	regs->elr = frame->tf_elr;
 	regs->spsr = frame->tf_spsr;
 
 	memcpy(regs->x, frame->tf_x, sizeof(regs->x));
 
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	frame->tf_sp = regs->sp;
 	frame->tf_lr = regs->lr;
 	frame->tf_elr = regs->elr;
 	frame->tf_spsr = regs->spsr;
 
 	memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x));
 
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef VFP
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running VFP instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		vfp_save_state(td, pcb);
 
 		memcpy(regs->fp_q, pcb->pcb_vfp, sizeof(regs->fp_q));
 		regs->fp_cr = pcb->pcb_fpcr;
 		regs->fp_sr = pcb->pcb_fpsr;
 	} else
 #endif
 		memset(regs->fp_q, 0, sizeof(regs->fp_q));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *regs)
 {
 #ifdef VFP
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	memcpy(pcb->pcb_vfp, regs->fp_q, sizeof(regs->fp_q));
 	pcb->pcb_fpcr = regs->fp_cr;
 	pcb->pcb_fpsr = regs->fp_sr;
 #endif
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	panic("ARM64TODO: fill_dbregs");
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	panic("ARM64TODO: set_dbregs");
 }
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	panic("ARM64TODO: ptrace_set_pc");
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 
 	td->td_frame->tf_spsr |= PSR_SS;
 	td->td_pcb->pcb_flags |= PCB_SINGLE_STEP;
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 
 	td->td_frame->tf_spsr &= ~PSR_SS;
 	td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP;
 	return (0);
 }
 
 void
 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memset(tf, 0, sizeof(struct trapframe));
 
 	/*
 	 * We need to set x0 for init as it doesn't call
 	 * cpu_set_syscall_retval to copy the value. We also
 	 * need to set td_retval for the cases where we do.
 	 */
 	tf->tf_x[0] = td->td_retval[0] = stack;
 	tf->tf_sp = STACKALIGN(stack);
 	tf->tf_lr = imgp->entry_addr;
 	tf->tf_elr = imgp->entry_addr;
 }
 
 /* Sanity check these are the same size, they will be memcpy'd to and fro */
 CTASSERT(sizeof(((struct trapframe *)0)->tf_x) ==
     sizeof((struct gpregs *)0)->gp_x);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_x) ==
     sizeof((struct reg *)0)->x);
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		mcp->mc_gpregs.gp_x[0] = 0;
 		mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C;
 	} else {
 		mcp->mc_gpregs.gp_x[0] = tf->tf_x[0];
 		mcp->mc_gpregs.gp_spsr = tf->tf_spsr;
 	}
 
 	memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1],
 	    sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1));
 
 	mcp->mc_gpregs.gp_sp = tf->tf_sp;
 	mcp->mc_gpregs.gp_lr = tf->tf_lr;
 	mcp->mc_gpregs.gp_elr = tf->tf_elr;
 
 	return (0);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x));
 
 	tf->tf_sp = mcp->mc_gpregs.gp_sp;
 	tf->tf_lr = mcp->mc_gpregs.gp_lr;
 	tf->tf_elr = mcp->mc_gpregs.gp_elr;
 	tf->tf_spsr = mcp->mc_gpregs.gp_spsr;
 
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef VFP
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	curpcb = curthread->td_pcb;
 
 	if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) {
 		/*
 		 * If we have just been running VFP instructions we will
 		 * need to save the state to memcpy it below.
 		 */
 		vfp_save_state(td, curpcb);
 
 		memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_vfp,
 		    sizeof(mcp->mc_fpregs));
 		mcp->mc_fpregs.fp_cr = curpcb->pcb_fpcr;
 		mcp->mc_fpregs.fp_sr = curpcb->pcb_fpsr;
 		mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags;
 		mcp->mc_flags |= _MC_FP_VALID;
 	}
 
 	critical_exit();
 #endif
 }
 
 static void
 set_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifdef VFP
 	struct pcb *curpcb;
 
 	critical_enter();
 
 	if ((mcp->mc_flags & _MC_FP_VALID) != 0) {
 		curpcb = curthread->td_pcb;
 
 		/*
 		 * Discard any vfp state for the current thread, we
 		 * are about to override it.
 		 */
 		vfp_discard(td);
 
 		memcpy(curpcb->pcb_vfp, mcp->mc_fpregs.fp_q,
 		    sizeof(mcp->mc_fpregs));
 		curpcb->pcb_fpcr = mcp->mc_fpregs.fp_cr;
 		curpcb->pcb_fpsr = mcp->mc_fpregs.fp_sr;
 		curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags;
 	}
 
 	critical_exit();
 #endif
 }
 
 void
 cpu_idle(int busy)
 {
 
 	spinlock_enter();
 	if (!busy)
 		cpu_idleclock();
 	if (!sched_runnable())
 		__asm __volatile(
 		    "dsb sy \n"
 		    "wfi    \n");
 	if (!busy)
 		cpu_activeclock();
 	spinlock_exit();
 }
 
 void
 cpu_halt(void)
 {
 
 	/* We should have shutdown by now, if not enter a low power sleep */
 	intr_disable();
 	while (1) {
 		__asm __volatile("wfi");
 	}
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 
 	/* ARM64TODO TBD */
 }
 
 /* Get current clock frequency for the given CPU ID. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 
 	panic("ARM64TODO: cpu_est_clockrate");
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_acpi_id = 0xffffffff;
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t daif;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		daif = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_daif = daif;
 	} else
 		td->td_md.md_spinlock_count++;
 	critical_enter();
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t daif;
 
 	td = curthread;
 	critical_exit();
 	daif = td->td_md.md_saved_daif;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0)
 		intr_restore(daif);
 }
 
 #ifndef	_SYS_SYSPROTO_H_
 struct sigreturn_args {
 	ucontext_t *ucp;
 };
 #endif
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	ucontext_t uc;
 	uint32_t spsr;
 
 	if (uap == NULL)
 		return (EFAULT);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 
 	spsr = uc.uc_mcontext.mc_gpregs.gp_spsr;
 	if ((spsr & PSR_M_MASK) != PSR_M_EL0t ||
 	    (spsr & (PSR_F | PSR_I | PSR_A | PSR_D)) != 0)
 		return (EINVAL); 
 
 	set_mcontext(td, &uc.uc_mcontext);
 	set_fpcontext(td, &uc.uc_mcontext);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 	int i;
 
 	for (i = 0; i < PCB_LR; i++)
 		pcb->pcb_x[i] = tf->tf_x[i];
 
 	pcb->pcb_x[PCB_LR] = tf->tf_lr;
 	pcb->pcb_pc = tf->tf_elr;
 	pcb->pcb_sp = tf->tf_sp;
 }
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td;
 	struct proc *p;
 	struct trapframe *tf;
 	struct sigframe *fp, frame;
 	struct sigacts *psp;
 	struct sysentvec *sysent;
 	int code, onstack, sig;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else {
 		fp = (struct sigframe *)td->td_frame->tf_sp;
 	}
 
 	/* Make room, keeping the stack aligned */
 	fp--;
 	fp = (struct sigframe *)STACKALIGN(fp);
 
 	/* Fill in the frame to copy out */
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 	get_fpcontext(td, &frame.sf_uc.uc_mcontext);
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ?
 	    ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	tf->tf_x[0]= sig;
 	tf->tf_x[1] = (register_t)&fp->sf_si;
 	tf->tf_x[2] = (register_t)&fp->sf_uc;
 
 	tf->tf_elr = (register_t)catcher;
 	tf->tf_sp = (register_t)fp;
 	sysent = p->p_sysent;
 	if (sysent->sv_sigcode_base != 0)
 		tf->tf_lr = (register_t)sysent->sv_sigcode_base;
 	else
 		tf->tf_lr = (register_t)(sysent->sv_psstrings -
 		    *(sysent->sv_szsigcode));
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr,
 	    tf->tf_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 static void
 init_proc0(vm_offset_t kstack)
 {
 	struct pcpu *pcpup = &__pcpu[0];
 
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1;
 	thread0.td_pcb->pcb_fpflags = 0;
 	thread0.td_pcb->pcb_vfpcpu = UINT_MAX;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 }
 
 typedef struct {
 	uint32_t type;
 	uint64_t phys_start;
 	uint64_t virt_start;
 	uint64_t num_pages;
 	uint64_t attr;
 } EFI_MEMORY_DESCRIPTOR;
 
 static int
 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
     u_int *physmap_idxp)
 {
 	u_int i, insert_idx, _physmap_idx;
 
 	_physmap_idx = *physmap_idxp;
 
 	if (length == 0)
 		return (1);
 
 	/*
 	 * Find insertion point while checking for overlap.  Start off by
 	 * assuming the new entry will be added to the end.
 	 */
 	insert_idx = _physmap_idx;
 	for (i = 0; i <= _physmap_idx; i += 2) {
 		if (base < physmap[i + 1]) {
 			if (base + length <= physmap[i]) {
 				insert_idx = i;
 				break;
 			}
 			if (boothowto & RB_VERBOSE)
 				printf(
 		    "Overlapping memory regions, ignoring second region\n");
 			return (1);
 		}
 	}
 
 	/* See if we can prepend to the next entry. */
 	if (insert_idx <= _physmap_idx &&
 	    base + length == physmap[insert_idx]) {
 		physmap[insert_idx] = base;
 		return (1);
 	}
 
 	/* See if we can append to the previous entry. */
 	if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 		physmap[insert_idx - 1] += length;
 		return (1);
 	}
 
 	_physmap_idx += 2;
 	*physmap_idxp = _physmap_idx;
 	if (_physmap_idx == PHYSMAP_SIZE) {
 		printf(
 		"Too many segments in the physical address map, giving up\n");
 		return (0);
 	}
 
 	/*
 	 * Move the last 'N' entries down to make room for the new
 	 * entry if needed.
 	 */
 	for (i = _physmap_idx; i > insert_idx; i -= 2) {
 		physmap[i] = physmap[i - 2];
 		physmap[i + 1] = physmap[i - 1];
 	}
 
 	/* Insert the new entry. */
 	physmap[insert_idx] = base;
 	physmap[insert_idx + 1] = base + length;
 	return (1);
 }
 
 #ifdef FDT
 static void
 add_fdt_mem_regions(struct mem_region *mr, int mrcnt, vm_paddr_t *physmap,
     u_int *physmap_idxp)
 {
 
 	for (int i = 0; i < mrcnt; i++) {
 		if (!add_physmap_entry(mr[i].mr_start, mr[i].mr_size, physmap,
 		    physmap_idxp))
 			break;
 	}
 }
 #endif
 
 #define efi_next_descriptor(ptr, size) \
 	((struct efi_md *)(((uint8_t *) ptr) + size))
 
 static void
 add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap,
     u_int *physmap_idxp)
 {
 	struct efi_md *map, *p;
 	const char *type;
 	size_t efisz;
 	int ndesc, i;
 
 	static const char *types[] = {
 		"Reserved",
 		"LoaderCode",
 		"LoaderData",
 		"BootServicesCode",
 		"BootServicesData",
 		"RuntimeServicesCode",
 		"RuntimeServicesData",
 		"ConventionalMemory",
 		"UnusableMemory",
 		"ACPIReclaimMemory",
 		"ACPIMemoryNVS",
 		"MemoryMappedIO",
 		"MemoryMappedIOPortSpace",
 		"PalCode",
 		"PersistentMemory"
 	};
 
 	/*
 	 * Memory map data provided by UEFI via the GetMemoryMap
 	 * Boot Services API.
 	 */
 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 	map = (struct efi_md *)((uint8_t *)efihdr + efisz); 
 
 	if (efihdr->descriptor_size == 0)
 		return;
 	ndesc = efihdr->memory_size / efihdr->descriptor_size;
 
 	if (boothowto & RB_VERBOSE)
 		printf("%23s %12s %12s %8s %4s\n",
 		    "Type", "Physical", "Virtual", "#Pages", "Attr");
 
 	for (i = 0, p = map; i < ndesc; i++,
 	    p = efi_next_descriptor(p, efihdr->descriptor_size)) {
 		if (boothowto & RB_VERBOSE) {
 			if (p->md_type < nitems(types))
 				type = types[p->md_type];
 			else
 				type = "<INVALID>";
 			printf("%23s %012lx %12p %08lx ", type, p->md_phys,
 			    p->md_virt, p->md_pages);
 			if (p->md_attr & EFI_MD_ATTR_UC)
 				printf("UC ");
 			if (p->md_attr & EFI_MD_ATTR_WC)
 				printf("WC ");
 			if (p->md_attr & EFI_MD_ATTR_WT)
 				printf("WT ");
 			if (p->md_attr & EFI_MD_ATTR_WB)
 				printf("WB ");
 			if (p->md_attr & EFI_MD_ATTR_UCE)
 				printf("UCE ");
 			if (p->md_attr & EFI_MD_ATTR_WP)
 				printf("WP ");
 			if (p->md_attr & EFI_MD_ATTR_RP)
 				printf("RP ");
 			if (p->md_attr & EFI_MD_ATTR_XP)
 				printf("XP ");
 			if (p->md_attr & EFI_MD_ATTR_NV)
 				printf("NV ");
 			if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE)
 				printf("MORE_RELIABLE ");
 			if (p->md_attr & EFI_MD_ATTR_RO)
 				printf("RO ");
 			if (p->md_attr & EFI_MD_ATTR_RT)
 				printf("RUNTIME");
 			printf("\n");
 		}
 
 		switch (p->md_type) {
 		case EFI_MD_TYPE_CODE:
 		case EFI_MD_TYPE_DATA:
 		case EFI_MD_TYPE_BS_CODE:
 		case EFI_MD_TYPE_BS_DATA:
 		case EFI_MD_TYPE_FREE:
 			/*
 			 * We're allowed to use any entry with these types.
 			 */
 			break;
 		default:
 			continue;
 		}
 
 		if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE),
 		    physmap, physmap_idxp))
 			break;
 	}
 }
 
 #ifdef FDT
 static void
 try_load_dtb(caddr_t kmdp)
 {
 	vm_offset_t dtbp;
 
 	dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t);
 	if (dtbp == (vm_offset_t)NULL) {
 		printf("ERROR loading DTB\n");
 		return;
 	}
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 }
 #endif
 
+static bool
+bus_probe(void)
+{
+	bool has_acpi, has_fdt;
+	char *order, *env;
+
+	has_acpi = has_fdt = false;
+
+#ifdef FDT
+	has_fdt = (OF_peer(0) != 0);
+#endif
+#ifdef DEV_ACPI
+	has_acpi = (acpi_find_table(ACPI_SIG_SPCR) != 0);
+#endif
+
+	env = kern_getenv("kern.cfg.order");
+	if (env != NULL) {
+		order = env;
+		while (order != NULL) {
+			if (has_acpi &&
+			    strncmp(order, "acpi", 4) == 0 &&
+			    (order[4] == ',' || order[4] == '\0')) {
+				arm64_bus_method = ARM64_BUS_ACPI;
+				break;
+			}
+			if (has_fdt &&
+			    strncmp(order, "fdt", 3) == 0 &&
+			    (order[3] == ',' || order[3] == '\0')) {
+				arm64_bus_method = ARM64_BUS_FDT;
+				break;
+			}
+			order = strchr(order, ',');
+		}
+		freeenv(env);
+
+		/* If we set the bus method it is valid */
+		if (arm64_bus_method != ARM64_BUS_NONE)
+			return (true);
+	}
+	/* If no order or an invalid order was set use the default */
+	if (arm64_bus_method == ARM64_BUS_NONE) {
+		if (has_fdt)
+			arm64_bus_method = ARM64_BUS_FDT;
+		else if (has_acpi)
+			arm64_bus_method = ARM64_BUS_ACPI;
+	}
+
+	/*
+	 * If no option was set the default is valid, otherwise we are
+	 * setting one to get cninit() working, then calling panic to tell
+	 * the user about the invalid bus setup.
+	 */
+	return (env == NULL);
+}
+
 static void
 cache_setup(void)
 {
 	int dcache_line_shift, icache_line_shift, dczva_line_shift;
 	uint32_t ctr_el0;
 	uint32_t dczid_el0;
 
 	ctr_el0 = READ_SPECIALREG(ctr_el0);
 
 	/* Read the log2 words in each D cache line */
 	dcache_line_shift = CTR_DLINE_SIZE(ctr_el0);
 	/* Get the D cache line size */
 	dcache_line_size = sizeof(int) << dcache_line_shift;
 
 	/* And the same for the I cache */
 	icache_line_shift = CTR_ILINE_SIZE(ctr_el0);
 	icache_line_size = sizeof(int) << icache_line_shift;
 
 	idcache_line_size = MIN(dcache_line_size, icache_line_size);
 
 	dczid_el0 = READ_SPECIALREG(dczid_el0);
 
 	/* Check if dc zva is not prohibited */
 	if (dczid_el0 & DCZID_DZP)
 		dczva_line_size = 0;
 	else {
 		/* Same as with above calculations */
 		dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
 		dczva_line_size = sizeof(int) << dczva_line_shift;
 
 		/* Change pagezero function */
 		pagezero = pagezero_cache;
 	}
 }
 
 void
 initarm(struct arm64_bootparams *abp)
 {
 	struct efi_map_header *efihdr;
 	struct pcpu *pcpup;
 #ifdef FDT
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	int mem_regions_sz;
 #endif
 	vm_offset_t lastaddr;
 	caddr_t kmdp;
 	vm_paddr_t mem_len;
+	bool valid;
 	int i;
 
 	/* Set the module data location */
 	preload_metadata = (caddr_t)(uintptr_t)(abp->modulep);
 
 	/* Find the kernel address */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 
 	boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 	init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), 0);
 
 #ifdef FDT
 	try_load_dtb(kmdp);
 #endif
 
 	/* Find the address to start allocating from */
 	lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t);
 
 	/* Load the physical memory ranges */
 	physmap_idx = 0;
 	efihdr = (struct efi_map_header *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_EFI_MAP);
 	if (efihdr != NULL)
 		add_efi_map_entries(efihdr, physmap, &physmap_idx);
 #ifdef FDT
 	else {
 		/* Grab physical memory regions information from device tree. */
 		if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,
 		    NULL) != 0)
 			panic("Cannot get physical memory regions");
 		add_fdt_mem_regions(mem_regions, mem_regions_sz, physmap,
 		    &physmap_idx);
 	}
 #endif
 
 	/* Print the memory map */
 	mem_len = 0;
 	for (i = 0; i < physmap_idx; i += 2) {
 		dump_avail[i] = physmap[i];
 		dump_avail[i + 1] = physmap[i + 1];
 		mem_len += physmap[i + 1] - physmap[i];
 	}
 	dump_avail[i] = 0;
 	dump_avail[i + 1] = 0;
 
 	/* Set the pcpu data, this is needed by pmap_bootstrap */
 	pcpup = &__pcpu[0];
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 
 	/*
 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
 	 * loaded when entering the kernel from userland.
 	 */
 	__asm __volatile(
 	    "mov x18, %0 \n"
 	    "msr tpidr_el1, %0" :: "r"(pcpup));
 
 	PCPU_SET(curthread, &thread0);
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	cache_setup();
 
 	/* Bootstrap enough of pmap  to enter the kernel proper */
 	pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt,
 	    KERNBASE - abp->kern_delta, lastaddr - KERNBASE);
 
 	devmap_bootstrap(0, NULL);
 
+	valid = bus_probe();
+
 	cninit();
+
+	if (!valid)
+		panic("Invalid bus configuration: %s",
+		    kern_getenv("kern.cfg.order"));
 
 	init_proc0(abp->kern_stack);
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 	init_param2(physmem);
 
 	dbg_monitor_init();
 	kdb_init();
 
 	early_boot = 0;
 }
 
 uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *,
     struct timecounter *);
 
 uint32_t
 cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
 {
 
 	return (arm_cpu_fill_vdso_timehands != NULL ?
 	    arm_cpu_fill_vdso_timehands(vdso_th, tc) : 0);
 }
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(specialregs, db_show_spregs)
 {
 #define	PRINT_REG(reg)	\
     db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg))
 
 	PRINT_REG(actlr_el1);
 	PRINT_REG(afsr0_el1);
 	PRINT_REG(afsr1_el1);
 	PRINT_REG(aidr_el1);
 	PRINT_REG(amair_el1);
 	PRINT_REG(ccsidr_el1);
 	PRINT_REG(clidr_el1);
 	PRINT_REG(contextidr_el1);
 	PRINT_REG(cpacr_el1);
 	PRINT_REG(csselr_el1);
 	PRINT_REG(ctr_el0);
 	PRINT_REG(currentel);
 	PRINT_REG(daif);
 	PRINT_REG(dczid_el0);
 	PRINT_REG(elr_el1);
 	PRINT_REG(esr_el1);
 	PRINT_REG(far_el1);
 #if 0
 	/* ARM64TODO: Enable VFP before reading floating-point registers */
 	PRINT_REG(fpcr);
 	PRINT_REG(fpsr);
 #endif
 	PRINT_REG(id_aa64afr0_el1);
 	PRINT_REG(id_aa64afr1_el1);
 	PRINT_REG(id_aa64dfr0_el1);
 	PRINT_REG(id_aa64dfr1_el1);
 	PRINT_REG(id_aa64isar0_el1);
 	PRINT_REG(id_aa64isar1_el1);
 	PRINT_REG(id_aa64pfr0_el1);
 	PRINT_REG(id_aa64pfr1_el1);
 	PRINT_REG(id_afr0_el1);
 	PRINT_REG(id_dfr0_el1);
 	PRINT_REG(id_isar0_el1);
 	PRINT_REG(id_isar1_el1);
 	PRINT_REG(id_isar2_el1);
 	PRINT_REG(id_isar3_el1);
 	PRINT_REG(id_isar4_el1);
 	PRINT_REG(id_isar5_el1);
 	PRINT_REG(id_mmfr0_el1);
 	PRINT_REG(id_mmfr1_el1);
 	PRINT_REG(id_mmfr2_el1);
 	PRINT_REG(id_mmfr3_el1);
 #if 0
 	/* Missing from llvm */
 	PRINT_REG(id_mmfr4_el1);
 #endif
 	PRINT_REG(id_pfr0_el1);
 	PRINT_REG(id_pfr1_el1);
 	PRINT_REG(isr_el1);
 	PRINT_REG(mair_el1);
 	PRINT_REG(midr_el1);
 	PRINT_REG(mpidr_el1);
 	PRINT_REG(mvfr0_el1);
 	PRINT_REG(mvfr1_el1);
 	PRINT_REG(mvfr2_el1);
 	PRINT_REG(revidr_el1);
 	PRINT_REG(sctlr_el1);
 	PRINT_REG(sp_el0);
 	PRINT_REG(spsel);
 	PRINT_REG(spsr_el1);
 	PRINT_REG(tcr_el1);
 	PRINT_REG(tpidr_el0);
 	PRINT_REG(tpidr_el1);
 	PRINT_REG(tpidrro_el0);
 	PRINT_REG(ttbr0_el1);
 	PRINT_REG(ttbr1_el1);
 	PRINT_REG(vbar_el1);
 #undef PRINT_REG
 }
 
 DB_SHOW_COMMAND(vtop, db_show_vtop)
 {
 	uint64_t phys;
 
 	if (have_addr) {
 		phys = arm64_address_translate_s1e1r(addr);
 		db_printf("Physical address reg: 0x%016lx\n", phys);
 	} else
 		db_printf("show vtop <virt_addr>\n");
 }
 #endif
Index: user/alc/PQ_LAUNDRY/sys/arm64/arm64/mp_machdep.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/arm64/arm64/mp_machdep.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/arm64/arm64/mp_machdep.c	(revision 303642)
@@ -1,707 +1,703 @@
 /*-
  * Copyright (c) 2015-2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include "opt_kstack_pages.h"
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <machine/debug_monitor.h>
+#include <machine/machdep.h>
 #include <machine/intr.h>
 #include <machine/smp.h>
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 #ifdef FDT
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_cpu.h>
 #endif
 
 #include <dev/psci/psci.h>
 
 #include "pic_if.h"
 
 typedef void intr_ipi_send_t(void *, cpuset_t, u_int);
 typedef void intr_ipi_handler_t(void *);
 
 #define INTR_IPI_NAMELEN	(MAXCOMLEN + 1)
 struct intr_ipi {
 	intr_ipi_handler_t *	ii_handler;
 	void *			ii_handler_arg;
 	intr_ipi_send_t *	ii_send;
 	void *			ii_send_arg;
 	char			ii_name[INTR_IPI_NAMELEN];
 	u_long *		ii_count;
 };
 
 static struct intr_ipi ipi_sources[INTR_IPI_COUNT];
 
 static struct intr_ipi *intr_ipi_lookup(u_int);
 static void intr_pic_ipi_setup(u_int, const char *, intr_ipi_handler_t *,
     void *);
 
 boolean_t ofw_cpu_reg(phandle_t node, u_int, cell_t *);
 
 extern struct pcpu __pcpu[];
 
-static enum {
-	CPUS_UNKNOWN,
-#ifdef FDT
-	CPUS_FDT,
-#endif
-} cpu_enum_method;
-
 static device_identify_t arm64_cpu_identify;
 static device_probe_t arm64_cpu_probe;
 static device_attach_t arm64_cpu_attach;
 
 static void ipi_ast(void *);
 static void ipi_hardclock(void *);
 static void ipi_preempt(void *);
 static void ipi_rendezvous(void *);
 static void ipi_stop(void *);
 
 static int ipi_handler(void *arg);
 
 struct mtx ap_boot_mtx;
 struct pcb stoppcbs[MAXCPU];
 
 static device_t cpu_list[MAXCPU];
 
 /*
  * Not all systems boot from the first CPU in the device tree. To work around
  * this we need to find which CPU we have booted from so when we later
  * enable the secondary CPUs we skip this one.
  */
 static int cpu0 = -1;
 
 void mpentry(unsigned long cpuid);
 void init_secondary(uint64_t);
 
 uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16);
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 volatile int aps_ready = 0;
 
 /* Temporary variables for init_secondary()  */
 void *dpcpu[MAXCPU - 1];
 
 static device_method_t arm64_cpu_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	arm64_cpu_identify),
 	DEVMETHOD(device_probe,		arm64_cpu_probe),
 	DEVMETHOD(device_attach,	arm64_cpu_attach),
 
 	DEVMETHOD_END
 };
 
 static devclass_t arm64_cpu_devclass;
 static driver_t arm64_cpu_driver = {
 	"arm64_cpu",
 	arm64_cpu_methods,
 	0
 };
 
 DRIVER_MODULE(arm64_cpu, cpu, arm64_cpu_driver, arm64_cpu_devclass, 0, 0);
 
 static void
 arm64_cpu_identify(driver_t *driver, device_t parent)
 {
 
 	if (device_find_child(parent, "arm64_cpu", -1) != NULL)
 		return;
 	if (BUS_ADD_CHILD(parent, 0, "arm64_cpu", -1) == NULL)
 		device_printf(parent, "add child failed\n");
 }
 
 static int
 arm64_cpu_probe(device_t dev)
 {
 	u_int cpuid;
 
 	cpuid = device_get_unit(dev);
 	if (cpuid >= MAXCPU || cpuid > mp_maxid)
 		return (EINVAL);
 
 	device_quiet(dev);
 	return (0);
 }
 
 static int
 arm64_cpu_attach(device_t dev)
 {
 	const uint32_t *reg;
 	size_t reg_size;
 	u_int cpuid;
 	int i;
 
 	cpuid = device_get_unit(dev);
 
 	if (cpuid >= MAXCPU || cpuid > mp_maxid)
 		return (EINVAL);
 	KASSERT(cpu_list[cpuid] == NULL, ("Already have cpu %u", cpuid));
 
 	reg = cpu_get_cpuid(dev, &reg_size);
 	if (reg == NULL)
 		return (EINVAL);
 
 	if (bootverbose) {
 		device_printf(dev, "register <");
 		for (i = 0; i < reg_size; i++)
 			printf("%s%x", (i == 0) ? "" : " ", reg[i]);
 		printf(">\n");
 	}
 
 	/* Set the device to start it later */
 	cpu_list[cpuid] = dev;
 
 	return (0);
 }
 
 static void
 release_aps(void *dummy __unused)
 {
 	int cpu, i;
 
 	intr_pic_ipi_setup(IPI_AST, "ast", ipi_ast, NULL);
 	intr_pic_ipi_setup(IPI_PREEMPT, "preempt", ipi_preempt, NULL);
 	intr_pic_ipi_setup(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL);
 	intr_pic_ipi_setup(IPI_STOP, "stop", ipi_stop, NULL);
 	intr_pic_ipi_setup(IPI_STOP_HARD, "stop hard", ipi_stop, NULL);
 	intr_pic_ipi_setup(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL);
 
 	atomic_store_rel_int(&aps_ready, 1);
 	/* Wake up the other CPUs */
 	__asm __volatile("sev");
 
 	printf("Release APs\n");
 
 	for (i = 0; i < 2000; i++) {
 		if (smp_started) {
 			for (cpu = 0; cpu <= mp_maxid; cpu++) {
 				if (CPU_ABSENT(cpu))
 					continue;
 				print_cpu_features(cpu);
 			}
 			return;
 		}
 		DELAY(1000);
 	}
 
 	printf("APs not started\n");
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
 void
 init_secondary(uint64_t cpu)
 {
 	struct pcpu *pcpup;
 
 	pcpup = &__pcpu[cpu];
 	/*
 	 * Set the pcpu pointer with a backup in tpidr_el1 to be
 	 * loaded when entering the kernel from userland.
 	 */
 	__asm __volatile(
 	    "mov x18, %0 \n"
 	    "msr tpidr_el1, %0" :: "r"(pcpup));
 
 	/* Spin until the BSP releases the APs */
 	while (!aps_ready)
 		__asm __volatile("wfe");
 
 	/* Initialize curthread */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	pcpup->pc_curthread = pcpup->pc_idlethread;
 	pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb;
 
 	/*
 	 * Identify current CPU. This is necessary to setup
 	 * affinity registers and to provide support for
 	 * runtime chip identification.
 	 */
 	identify_cpu();
 
 	intr_pic_init_secondary();
 
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 #ifdef VFP
 	vfp_init();
 #endif
 
 	dbg_monitor_init();
 
 	/* Enable interrupts */
 	intr_enable();
 
 	mtx_lock_spin(&ap_boot_mtx);
 
 	atomic_add_rel_32(&smp_cpus, 1);
 
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 	}
 
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* Enter the scheduler */
 	sched_throw(NULL);
 
 	panic("scheduler returned us to init_secondary");
 	/* NOTREACHED */
 }
 
 /*
  *  Send IPI thru interrupt controller.
  */
 static void
 pic_ipi_send(void *arg, cpuset_t cpus, u_int ipi)
 {
 
 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
 	PIC_IPI_SEND(intr_irq_root_dev, arg, cpus, ipi);
 }
 
 /*
  *  Setup IPI handler on interrupt controller.
  *
  *  Not SMP coherent.
  */
 static void
 intr_pic_ipi_setup(u_int ipi, const char *name, intr_ipi_handler_t *hand,
     void *arg)
 {
 	struct intr_irqsrc *isrc;
 	struct intr_ipi *ii;
 	int error;
 
 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
 	KASSERT(hand != NULL, ("%s: ipi %u no handler", __func__, ipi));
 
 	error = PIC_IPI_SETUP(intr_irq_root_dev, ipi, &isrc);
 	if (error != 0)
 		return;
 
 	isrc->isrc_handlers++;
 
 	ii = intr_ipi_lookup(ipi);
 	KASSERT(ii->ii_count == NULL, ("%s: ipi %u reused", __func__, ipi));
 
 	ii->ii_handler = hand;
 	ii->ii_handler_arg = arg;
 	ii->ii_send = pic_ipi_send;
 	ii->ii_send_arg = isrc;
 	strlcpy(ii->ii_name, name, INTR_IPI_NAMELEN);
 	ii->ii_count = intr_ipi_setup_counters(name);
 }
 
 static void
 intr_ipi_send(cpuset_t cpus, u_int ipi)
 {
 	struct intr_ipi *ii;
 
 	ii = intr_ipi_lookup(ipi);
 	if (ii->ii_count == NULL)
 		panic("%s: not setup IPI %u", __func__, ipi);
 
 	ii->ii_send(ii->ii_send_arg, cpus, ipi);
 }
 
 static void
 ipi_ast(void *dummy __unused)
 {
 
 	CTR0(KTR_SMP, "IPI_AST");
 }
 
 static void
 ipi_hardclock(void *dummy __unused)
 {
 
 	CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
 	hardclockintr();
 }
 
 static void
 ipi_preempt(void *dummy __unused)
 {
 	CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
 	sched_preempt(curthread);
 }
 
 static void
 ipi_rendezvous(void *dummy __unused)
 {
 
 	CTR0(KTR_SMP, "IPI_RENDEZVOUS");
 	smp_rendezvous_action();
 }
 
 static void
 ipi_stop(void *dummy __unused)
 {
 	u_int cpu;
 
 	CTR0(KTR_SMP, "IPI_STOP");
 
 	cpu = PCPU_GET(cpuid);
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate we are stopped */
 	CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 	/* Wait for restart */
 	while (!CPU_ISSET(cpu, &started_cpus))
 		cpu_spinwait();
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 	CTR0(KTR_SMP, "IPI_STOP (restart)");
 }
 
 struct cpu_group *
 cpu_topo(void)
 {
 
 	return (smp_topo_none());
 }
 
 /* Determine if we running MP machine */
 int
 cpu_mp_probe(void)
 {
 
 	/* ARM64TODO: Read the u bit of mpidr_el1 to determine this */
 	return (1);
 }
 
 #ifdef FDT
 static boolean_t
 cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
 {
 	uint64_t target_cpu;
 	struct pcpu *pcpup;
 	vm_paddr_t pa;
 	u_int cpuid;
 	int err;
 
 	/* Check we are able to start this cpu */
 	if (id > mp_maxid)
 		return (0);
 
 	KASSERT(id < MAXCPU, ("Too many CPUs"));
 
 	/* We are already running on cpu 0 */
 	if (id == cpu0)
 		return (1);
 
 	/*
 	 * Rotate the CPU IDs to put the boot CPU as CPU 0. We keep the other
 	 * CPUs ordered as the are likely grouped into clusters so it can be
 	 * useful to keep that property, e.g. for the GICv3 driver to send
 	 * an IPI to all CPUs in the cluster.
 	 */
 	cpuid = id;
 	if (cpuid < cpu0)
 		cpuid += mp_maxid + 1;
 	cpuid -= cpu0;
 
 	pcpup = &__pcpu[cpuid];
 	pcpu_init(pcpup, cpuid, sizeof(struct pcpu));
 
 	dpcpu[cpuid - 1] = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
 	    M_WAITOK | M_ZERO);
 	dpcpu_init(dpcpu[cpuid - 1], cpuid);
 
 	target_cpu = reg[0];
 	if (addr_size == 2) {
 		target_cpu <<= 32;
 		target_cpu |= reg[1];
 	}
 
 	printf("Starting CPU %u (%lx)\n", cpuid, target_cpu);
 	pa = pmap_extract(kernel_pmap, (vm_offset_t)mpentry);
 
 	err = psci_cpu_on(target_cpu, pa, cpuid);
 	if (err != PSCI_RETVAL_SUCCESS) {
 		/* Panic here if INVARIANTS are enabled */
 		KASSERT(0, ("Failed to start CPU %u (%lx)\n", id,
 		    target_cpu));
 
 		pcpu_destroy(pcpup);
 		kmem_free(kernel_arena, (vm_offset_t)dpcpu[cpuid - 1],
 		    DPCPU_SIZE);
 		dpcpu[cpuid - 1] = NULL;
 		/* Notify the user that the CPU failed to start */
 		printf("Failed to start CPU %u (%lx)\n", id, target_cpu);
 	} else
 		CPU_SET(cpuid, &all_cpus);
 
 	return (1);
 }
 #endif
 
 /* Initialize and fire up non-boot processors */
 void
 cpu_mp_start(void)
 {
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	CPU_SET(0, &all_cpus);
 
-	switch(cpu_enum_method) {
+	switch(arm64_bus_method) {
 #ifdef FDT
-	case CPUS_FDT:
+	case ARM64_BUS_FDT:
 		KASSERT(cpu0 >= 0, ("Current CPU was not found"));
 		ofw_cpu_early_foreach(cpu_init_fdt, true);
 		break;
 #endif
-	case CPUS_UNKNOWN:
+	default:
 		break;
 	}
 }
 
 /* Introduce rest of cores to the world */
 void
 cpu_mp_announce(void)
 {
 }
 
 static boolean_t
 cpu_find_cpu0_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg)
 {
 	uint64_t mpidr_fdt, mpidr_reg;
 
 	if (cpu0 < 0) {
 		mpidr_fdt = reg[0];
 		if (addr_size == 2) {
 			mpidr_fdt <<= 32;
 			mpidr_fdt |= reg[1];
 		}
 
 		mpidr_reg = READ_SPECIALREG(mpidr_el1);
 
 		if ((mpidr_reg & 0xff00fffffful) == mpidr_fdt)
 			cpu0 = id;
 	}
 
 	return (TRUE);
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 #ifdef FDT
 	int cores;
 
-	cores = ofw_cpu_early_foreach(cpu_find_cpu0_fdt, false);
-	if (cores > 0) {
-		cores = MIN(cores, MAXCPU);
-		if (bootverbose)
-			printf("Found %d CPUs in the device tree\n", cores);
-		mp_ncpus = cores;
-		mp_maxid = cores - 1;
-		cpu_enum_method = CPUS_FDT;
-		return;
+	if (arm64_bus_method == ARM64_BUS_FDT) {
+		cores = ofw_cpu_early_foreach(cpu_find_cpu0_fdt, false);
+		if (cores > 0) {
+			cores = MIN(cores, MAXCPU);
+			if (bootverbose)
+				printf("Found %d CPUs in the device tree\n",
+				    cores);
+			mp_ncpus = cores;
+			mp_maxid = cores - 1;
+			return;
+		}
 	}
 #endif
 
 	if (bootverbose)
 		printf("No CPU data, limiting to 1 core\n");
 	mp_ncpus = 1;
 	mp_maxid = 0;
 }
 
 /*
  *  Lookup IPI source.
  */
 static struct intr_ipi *
 intr_ipi_lookup(u_int ipi)
 {
 
 	if (ipi >= INTR_IPI_COUNT)
 		panic("%s: no such IPI %u", __func__, ipi);
 
 	return (&ipi_sources[ipi]);
 }
 
 /*
  *  interrupt controller dispatch function for IPIs. It should
  *  be called straight from the interrupt controller, when associated
  *  interrupt source is learned. Or from anybody who has an interrupt
  *  source mapped.
  */
 void
 intr_ipi_dispatch(u_int ipi, struct trapframe *tf)
 {
 	void *arg;
 	struct intr_ipi *ii;
 
 	ii = intr_ipi_lookup(ipi);
 	if (ii->ii_count == NULL)
 		panic("%s: not setup IPI %u", __func__, ipi);
 
 	intr_ipi_increment_count(ii->ii_count, PCPU_GET(cpuid));
 
 	/*
 	 * Supply ipi filter with trapframe argument
 	 * if none is registered.
 	 */
 	arg = ii->ii_handler_arg != NULL ? ii->ii_handler_arg : tf;
 	ii->ii_handler(arg);
 }
 
 #ifdef notyet
 /*
  *  Map IPI into interrupt controller.
  *
  *  Not SMP coherent.
  */
 static int
 ipi_map(struct intr_irqsrc *isrc, u_int ipi)
 {
 	boolean_t is_percpu;
 	int error;
 
 	if (ipi >= INTR_IPI_COUNT)
 		panic("%s: no such IPI %u", __func__, ipi);
 
 	KASSERT(intr_irq_root_dev != NULL, ("%s: no root attached", __func__));
 
 	isrc->isrc_type = INTR_ISRCT_NAMESPACE;
 	isrc->isrc_nspc_type = INTR_IRQ_NSPC_IPI;
 	isrc->isrc_nspc_num = ipi_next_num;
 
 	error = PIC_REGISTER(intr_irq_root_dev, isrc, &is_percpu);
 	if (error == 0) {
 		isrc->isrc_dev = intr_irq_root_dev;
 		ipi_next_num++;
 	}
 	return (error);
 }
 
 /*
  *  Setup IPI handler to interrupt source.
  *
  *  Note that there could be more ways how to send and receive IPIs
  *  on a platform like fast interrupts for example. In that case,
  *  one can call this function with ASIF_NOALLOC flag set and then
  *  call intr_ipi_dispatch() when appropriate.
  *
  *  Not SMP coherent.
  */
 int
 intr_ipi_set_handler(u_int ipi, const char *name, intr_ipi_filter_t *filter,
     void *arg, u_int flags)
 {
 	struct intr_irqsrc *isrc;
 	int error;
 
 	if (filter == NULL)
 		return(EINVAL);
 
 	isrc = intr_ipi_lookup(ipi);
 	if (isrc->isrc_ipifilter != NULL)
 		return (EEXIST);
 
 	if ((flags & AISHF_NOALLOC) == 0) {
 		error = ipi_map(isrc, ipi);
 		if (error != 0)
 			return (error);
 	}
 
 	isrc->isrc_ipifilter = filter;
 	isrc->isrc_arg = arg;
 	isrc->isrc_handlers = 1;
 	isrc->isrc_count = intr_ipi_setup_counters(name);
 	isrc->isrc_index = 0; /* it should not be used in IPI case */
 
 	if (isrc->isrc_dev != NULL) {
 		PIC_ENABLE_INTR(isrc->isrc_dev, isrc);
 		PIC_ENABLE_SOURCE(isrc->isrc_dev, isrc);
 	}
 	return (0);
 }
 #endif
 
 /* Sending IPI */
 void
 ipi_all_but_self(u_int ipi)
 {
 	cpuset_t cpus;
 
 	cpus = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &cpus);
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	intr_ipi_send(cpus, ipi);
 }
 
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 	cpuset_t cpus;
 
 	CPU_ZERO(&cpus);
 	CPU_SET(cpu, &cpus);
 
 	CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi);
 	intr_ipi_send(cpus, ipi);
 }
 
 void
 ipi_selected(cpuset_t cpus, u_int ipi)
 {
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	intr_ipi_send(cpus, ipi);
 }
Index: user/alc/PQ_LAUNDRY/sys/arm64/arm64/nexus.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/arm64/arm64/nexus.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/arm64/arm64/nexus.c	(revision 303642)
@@ -1,472 +1,473 @@
 /*-
  * Copyright 1998 Massachusetts Institute of Technology
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
  * granted, provided that both the above copyright notice and this
  * permission notice appear in all copies, that both the above
  * copyright notice and this permission notice appear in all
  * supporting documentation, and that the name of M.I.T. not be used
  * in advertising or publicity pertaining to distribution of the
  * software without specific, written prior permission.  M.I.T. makes
  * no representations about the suitability of this software for any
  * purpose.  It is provided "as is" without express or implied
  * warranty.
  *
  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 /*
  * This code implements a `root nexus' for Arm Architecture
  * machines.  The function of the root nexus is to serve as an
  * attachment point for both processors and buses, and to manage
  * resources which are common to all of them.  In particular,
  * this code implements the core resource managers for interrupt
  * requests, DMA requests (which rightfully should be a part of the
  * ISA code but it's easier to do it here for now), I/O port addresses,
  * and I/O memory address space.
  */
 
 #include "opt_acpi.h"
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <sys/interrupt.h>
 
+#include <machine/machdep.h>
 #include <machine/vmparam.h>
 #include <machine/pcb.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/resource.h>
 #include <machine/intr.h>
 
 #ifdef FDT
 #include <dev/ofw/openfirm.h>
 #include "ofw_bus_if.h"
 #endif
 #ifdef DEV_ACPI
 #include <contrib/dev/acpica/include/acpi.h>
 #include <dev/acpica/acpivar.h>
 #endif
 
 extern struct bus_space memmap_bus;
 
 static MALLOC_DEFINE(M_NEXUSDEV, "nexusdev", "Nexus device");
 
 struct nexus_device {
 	struct resource_list	nx_resources;
 };
 
 #define DEVTONX(dev)	((struct nexus_device *)device_get_ivars(dev))
 
 static struct rman mem_rman;
 static struct rman irq_rman;
 
 static	int nexus_attach(device_t);
 
 #ifdef FDT
 static device_probe_t	nexus_fdt_probe;
 static device_attach_t	nexus_fdt_attach;
 #endif
 #ifdef DEV_ACPI
 static device_probe_t	nexus_acpi_probe;
 static device_attach_t	nexus_acpi_attach;
 #endif
 
 static	int nexus_print_child(device_t, device_t);
 static	device_t nexus_add_child(device_t, u_int, const char *, int);
 static	struct resource *nexus_alloc_resource(device_t, device_t, int, int *,
     rman_res_t, rman_res_t, rman_res_t, u_int);
 static	int nexus_activate_resource(device_t, device_t, int, int,
     struct resource *);
 static int nexus_config_intr(device_t dev, int irq, enum intr_trigger trig,
     enum intr_polarity pol);
 static struct resource_list *nexus_get_reslist(device_t, device_t);
 static	int nexus_set_resource(device_t, device_t, int, int,
     rman_res_t, rman_res_t);
 static	int nexus_deactivate_resource(device_t, device_t, int, int,
     struct resource *);
 
 static int nexus_setup_intr(device_t dev, device_t child, struct resource *res,
     int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep);
 static int nexus_teardown_intr(device_t, device_t, struct resource *, void *);
 static bus_space_tag_t nexus_get_bus_tag(device_t, device_t);
 #ifdef SMP
 static int nexus_bind_intr(device_t, device_t, struct resource *, int);
 #endif
 
 #ifdef FDT
 static int nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent,
     int icells, pcell_t *intr);
 #endif
 
 static device_method_t nexus_methods[] = {
 	/* Bus interface */
 	DEVMETHOD(bus_print_child,	nexus_print_child),
 	DEVMETHOD(bus_add_child,	nexus_add_child),
 	DEVMETHOD(bus_alloc_resource,	nexus_alloc_resource),
 	DEVMETHOD(bus_activate_resource,	nexus_activate_resource),
 	DEVMETHOD(bus_config_intr,	nexus_config_intr),
 	DEVMETHOD(bus_get_resource_list, nexus_get_reslist),
 	DEVMETHOD(bus_set_resource,	nexus_set_resource),
 	DEVMETHOD(bus_deactivate_resource,	nexus_deactivate_resource),
 	DEVMETHOD(bus_setup_intr,	nexus_setup_intr),
 	DEVMETHOD(bus_teardown_intr,	nexus_teardown_intr),
 	DEVMETHOD(bus_get_bus_tag,	nexus_get_bus_tag),
 #ifdef SMP
 	DEVMETHOD(bus_bind_intr,	nexus_bind_intr),
 #endif
 	{ 0, 0 }
 };
 
 static driver_t nexus_driver = {
 	"nexus",
 	nexus_methods,
 	1			/* no softc */
 };
 
 static int
 nexus_attach(device_t dev)
 {
 
 	mem_rman.rm_start = 0;
 	mem_rman.rm_end = BUS_SPACE_MAXADDR;
 	mem_rman.rm_type = RMAN_ARRAY;
 	mem_rman.rm_descr = "I/O memory addresses";
 	if (rman_init(&mem_rman) ||
 	    rman_manage_region(&mem_rman, 0, BUS_SPACE_MAXADDR))
 		panic("nexus_attach mem_rman");
 	irq_rman.rm_start = 0;
 	irq_rman.rm_end = ~0;
 	irq_rman.rm_type = RMAN_ARRAY;
 	irq_rman.rm_descr = "Interrupts";
 	if (rman_init(&irq_rman) || rman_manage_region(&irq_rman, 0, ~0))
 		panic("nexus_attach irq_rman");
 
 	bus_generic_probe(dev);
 	bus_generic_attach(dev);
 
 	return (0);
 }
 
 static int
 nexus_print_child(device_t bus, device_t child)
 {
 	int retval = 0;
 
 	retval += bus_print_child_header(bus, child);
 	retval += printf("\n");
 
 	return (retval);
 }
 
 static device_t
 nexus_add_child(device_t bus, u_int order, const char *name, int unit)
 {
 	device_t child;
 	struct nexus_device *ndev;
 
 	ndev = malloc(sizeof(struct nexus_device), M_NEXUSDEV, M_NOWAIT|M_ZERO);
 	if (!ndev)
 		return (0);
 	resource_list_init(&ndev->nx_resources);
 
 	child = device_add_child_ordered(bus, order, name, unit);
 
 	/* should we free this in nexus_child_detached? */
 	device_set_ivars(child, ndev);
 
 	return (child);
 }
 
 
 /*
  * Allocate a resource on behalf of child.  NB: child is usually going to be a
  * child of one of our descendants, not a direct child of nexus0.
  * (Exceptions include footbridge.)
  */
 static struct resource *
 nexus_alloc_resource(device_t bus, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct nexus_device *ndev = DEVTONX(child);
 	struct resource *rv;
 	struct resource_list_entry *rle;
 	struct rman *rm;
 	int needactivate = flags & RF_ACTIVE;
 
 	/*
 	 * If this is an allocation of the "default" range for a given
 	 * RID, and we know what the resources for this device are
 	 * (ie. they aren't maintained by a child bus), then work out
 	 * the start/end values.
 	 */
 	if (RMAN_IS_DEFAULT_RANGE(start, end) && (count == 1)) {
 		if (device_get_parent(child) != bus || ndev == NULL)
 			return(NULL);
 		rle = resource_list_find(&ndev->nx_resources, type, *rid);
 		if (rle == NULL)
 			return(NULL);
 		start = rle->start;
 		end = rle->end;
 		count = rle->count;
 	}
 
 	switch (type) {
 	case SYS_RES_IRQ:
 		rm = &irq_rman;
 		break;
 
 	case SYS_RES_MEMORY:
 	case SYS_RES_IOPORT:
 		rm = &mem_rman;
 		break;
 
 	default:
 		return (NULL);
 	}
 
 	rv = rman_reserve_resource(rm, start, end, count, flags, child);
 	if (rv == NULL)
 		return (NULL);
 
 	rman_set_rid(rv, *rid);
 	rman_set_bushandle(rv, rman_get_start(rv));
 
 	if (needactivate) {
 		if (bus_activate_resource(child, type, *rid, rv)) {
 			rman_release_resource(rv);
 			return (NULL);
 		}
 	}
 
 	return (rv);
 }
 
 static int
 nexus_config_intr(device_t dev, int irq, enum intr_trigger trig,
     enum intr_polarity pol)
 {
 
 	/* TODO: This is wrong, it's needed for ACPI */
 	device_printf(dev, "bus_config_intr is obsolete and not supported!\n");
 	return (EOPNOTSUPP);
 }
 
 static int
 nexus_setup_intr(device_t dev, device_t child, struct resource *res, int flags,
     driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep)
 {
 	int error;
 
 	if ((rman_get_flags(res) & RF_SHAREABLE) == 0)
 		flags |= INTR_EXCL;
 
 	/* We depend here on rman_activate_resource() being idempotent. */
 	error = rman_activate_resource(res);
 	if (error)
 		return (error);
 
 	error = intr_setup_irq(child, res, filt, intr, arg, flags, cookiep);
 
 	return (error);
 }
 
 static int
 nexus_teardown_intr(device_t dev, device_t child, struct resource *r, void *ih)
 {
 
 	return (intr_teardown_irq(child, r, ih));
 }
 
 #ifdef SMP
 static int
 nexus_bind_intr(device_t dev, device_t child, struct resource *irq, int cpu)
 {
 
 	return (intr_bind_irq(child, irq, cpu));
 }
 #endif
 
 static bus_space_tag_t
 nexus_get_bus_tag(device_t bus __unused, device_t child __unused)
 {
 
 	return(&memmap_bus);
 }
 
 static int
 nexus_activate_resource(device_t bus, device_t child, int type, int rid,
     struct resource *r)
 {
 	int err;
 	bus_addr_t paddr;
 	bus_size_t psize;
 	bus_space_handle_t vaddr;
 
 	if ((err = rman_activate_resource(r)) != 0)
 		return (err);
 
 	/*
 	 * If this is a memory resource, map it into the kernel.
 	 */
 	if (type == SYS_RES_MEMORY || type == SYS_RES_IOPORT) {
 		paddr = (bus_addr_t)rman_get_start(r);
 		psize = (bus_size_t)rman_get_size(r);
 		err = bus_space_map(&memmap_bus, paddr, psize, 0, &vaddr);
 		if (err != 0) {
 			rman_deactivate_resource(r);
 			return (err);
 		}
 		rman_set_bustag(r, &memmap_bus);
 		rman_set_virtual(r, (void *)vaddr);
 		rman_set_bushandle(r, vaddr);
 	}
 	return (0);
 }
 
 static struct resource_list *
 nexus_get_reslist(device_t dev, device_t child)
 {
 	struct nexus_device *ndev = DEVTONX(child);
 
 	return (&ndev->nx_resources);
 }
 
 static int
 nexus_set_resource(device_t dev, device_t child, int type, int rid,
     rman_res_t start, rman_res_t count)
 {
 	struct nexus_device	*ndev = DEVTONX(child);
 	struct resource_list	*rl = &ndev->nx_resources;
 
 	/* XXX this should return a success/failure indicator */
 	resource_list_add(rl, type, rid, start, start + count - 1, count);
 
 	return(0);
 }
 
 
 static int
 nexus_deactivate_resource(device_t bus, device_t child, int type, int rid,
     struct resource *r)
 {
 	bus_size_t psize;
 	bus_space_handle_t vaddr;
 
 	psize = (bus_size_t)rman_get_size(r);
 	vaddr = rman_get_bushandle(r);
 
 	if (vaddr != 0) {
 		bus_space_unmap(&memmap_bus, vaddr, psize);
 		rman_set_virtual(r, NULL);
 		rman_set_bushandle(r, 0);
 	}
 
 	return (rman_deactivate_resource(r));
 }
 
 #ifdef FDT
 static device_method_t nexus_fdt_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		nexus_fdt_probe),
 	DEVMETHOD(device_attach,	nexus_fdt_attach),
 
 	/* OFW interface */
 	DEVMETHOD(ofw_bus_map_intr,	nexus_ofw_map_intr),
 };
 
 #define nexus_baseclasses nexus_fdt_baseclasses
 DEFINE_CLASS_1(nexus, nexus_fdt_driver, nexus_fdt_methods, 1, nexus_driver);
 #undef nexus_baseclasses
 static devclass_t nexus_fdt_devclass;
 
 EARLY_DRIVER_MODULE(nexus_fdt, root, nexus_fdt_driver, nexus_fdt_devclass,
     0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST);
 
 static int
 nexus_fdt_probe(device_t dev)
 {
 
-	if (OF_peer(0) == 0)
+	if (arm64_bus_method != ARM64_BUS_FDT)
 		return (ENXIO);
 
 	device_quiet(dev);
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 nexus_fdt_attach(device_t dev)
 {
 
 	nexus_add_child(dev, 10, "ofwbus", 0);
 	return (nexus_attach(dev));
 }
 
 static int
 nexus_ofw_map_intr(device_t dev, device_t child, phandle_t iparent, int icells,
     pcell_t *intr)
 {
 
 	return (INTR_IRQ_INVALID);
 }
 #endif
 
 #ifdef DEV_ACPI
 static device_method_t nexus_acpi_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		nexus_acpi_probe),
 	DEVMETHOD(device_attach,	nexus_acpi_attach),
 };
 
 #define nexus_baseclasses nexus_acpi_baseclasses
 DEFINE_CLASS_1(nexus, nexus_acpi_driver, nexus_acpi_methods, 1,
     nexus_driver);
 #undef nexus_baseclasses
 static devclass_t nexus_acpi_devclass;
 
 EARLY_DRIVER_MODULE(nexus_acpi, root, nexus_acpi_driver, nexus_acpi_devclass,
     0, 0, BUS_PASS_BUS + BUS_PASS_ORDER_FIRST);
 
 static int
 nexus_acpi_probe(device_t dev)
 {
 
-	if (acpi_identify() != 0)
+	if (arm64_bus_method != ARM64_BUS_ACPI || acpi_identify() != 0)
 		return (ENXIO);
 
 	device_quiet(dev);
 	return (BUS_PROBE_LOW_PRIORITY);
 }
 
 static int
 nexus_acpi_attach(device_t dev)
 {
 
 	nexus_add_child(dev, 10, "acpi", 0);
 	return (nexus_attach(dev));
 }
 #endif
Index: user/alc/PQ_LAUNDRY/sys/arm64/arm64/pmap.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/arm64/arm64/pmap.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/arm64/arm64/pmap.c	(revision 303642)
@@ -1,3590 +1,3595 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  * Copyright (c) 2014-2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * This software was developed by Andrew Turner under sponsorship from
  * the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/vmem.h>
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/_unrhdr.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #include <machine/machdep.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
 #define	NL0PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NL1PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NL2PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NL3PG		(PAGE_SIZE/(sizeof (pt_entry_t)))
 
 #define	NUL0E		L0_ENTRIES
 #define	NUL1E		(NUL0E * NL1PG)
 #define	NUL2E		(NUL1E * NL2PG)
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
 #else
 #define PMAP_INLINE	extern inline
 #endif
 #else
 #define PMAP_INLINE
 #endif
 
 /*
  * These are configured by the mair_el1 register. This is set up in locore.S
  */
 #define	DEVICE_MEMORY	0
 #define	UNCACHED_MEMORY	1
 #define	CACHED_MEMORY	2
 
 
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 #define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
 
 #define	NPV_LIST_LOCKS	MAXCPU
 
 #define	PHYS_TO_PV_LIST_LOCK(pa)	\
 			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
 
 #define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
 	struct rwlock **_lockp = (lockp);		\
 	struct rwlock *_new_lock;			\
 							\
 	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
 	if (_new_lock != *_lockp) {			\
 		if (*_lockp != NULL)			\
 			rw_wunlock(*_lockp);		\
 		*_lockp = _new_lock;			\
 		rw_wlock(*_lockp);			\
 	}						\
 } while (0)
 
 #define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
 			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
 
 #define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
 	struct rwlock **_lockp = (lockp);		\
 							\
 	if (*_lockp != NULL) {				\
 		rw_wunlock(*_lockp);			\
 		*_lockp = NULL;				\
 	}						\
 } while (0)
 
 #define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
 			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
 
 struct pmap kernel_pmap_store;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 vm_offset_t kernel_vm_end = 0;
 
 struct msgbuf *msgbufp = NULL;
 
 static struct rwlock_padalign pvh_global_lock;
 
 vm_paddr_t dmap_phys_base;	/* The start of the dmap region */
 vm_paddr_t dmap_phys_max;	/* The limit of the dmap region */
 vm_offset_t dmap_max_addr;	/* The virtual address limit of the dmap */
 
 /* This code assumes all L1 DMAP entries will be used */
 CTASSERT((DMAP_MIN_ADDRESS  & ~L0_OFFSET) == DMAP_MIN_ADDRESS);
 CTASSERT((DMAP_MAX_ADDRESS  & ~L0_OFFSET) == DMAP_MAX_ADDRESS);
 
 #define	DMAP_TABLES	((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
 extern pt_entry_t pagetable_dmap[];
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
 static struct mtx pv_chunks_mutex;
 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
 
 static void	free_pv_chunk(struct pv_chunk *pc);
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m, struct rwlock **lockp);
 
 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
 		struct rwlock **lockp);
 
 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct spglist *free);
 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
 
 /*
  * These load the old table data and store the new value.
  * They need to be atomic as the System MMU may write to the table at
  * the same time as the CPU.
  */
 #define	pmap_load_store(table, entry) atomic_swap_64(table, entry)
 #define	pmap_set(table, mask) atomic_set_64(table, mask)
 #define	pmap_load_clear(table) atomic_swap_64(table, 0)
 #define	pmap_load(table) (*table)
 
 /********************/
 /* Inline functions */
 /********************/
 
 static __inline void
 pagecopy(void *s, void *d)
 {
 
 	memcpy(d, s, PAGE_SIZE);
 }
 
 #define	pmap_l0_index(va)	(((va) >> L0_SHIFT) & L0_ADDR_MASK)
 #define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
 #define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
 #define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
 
 static __inline pd_entry_t *
 pmap_l0(pmap_t pmap, vm_offset_t va)
 {
 
 	return (&pmap->pm_l0[pmap_l0_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
 {
 	pd_entry_t *l1;
 
 	l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
 	return (&l1[pmap_l1_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l1(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l0;
 
 	l0 = pmap_l0(pmap, va);
 	if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
 		return (NULL);
 
 	return (pmap_l0_to_l1(l0, va));
 }
 
 static __inline pd_entry_t *
 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
 {
 	pd_entry_t *l2;
 
 	l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
 	return (&l2[pmap_l2_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l2(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l1;
 
 	l1 = pmap_l1(pmap, va);
 	if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
 		return (NULL);
 
 	return (pmap_l1_to_l2(l1, va));
 }
 
 static __inline pt_entry_t *
 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
 {
 	pt_entry_t *l3;
 
 	l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
 	return (&l3[pmap_l3_index(va)]);
 }
 
 /*
  * Returns the lowest valid pde for a given virtual address.
  * The next level may or may not point to a valid page or block.
  */
 static __inline pd_entry_t *
 pmap_pde(pmap_t pmap, vm_offset_t va, int *level)
 {
 	pd_entry_t *l0, *l1, *l2, desc;
 
 	l0 = pmap_l0(pmap, va);
 	desc = pmap_load(l0) & ATTR_DESCR_MASK;
 	if (desc != L0_TABLE) {
 		*level = -1;
 		return (NULL);
 	}
 
 	l1 = pmap_l0_to_l1(l0, va);
 	desc = pmap_load(l1) & ATTR_DESCR_MASK;
 	if (desc != L1_TABLE) {
 		*level = 0;
 		return (l0);
 	}
 
 	l2 = pmap_l1_to_l2(l1, va);
 	desc = pmap_load(l2) & ATTR_DESCR_MASK;
 	if (desc != L2_TABLE) {
 		*level = 1;
 		return (l1);
 	}
 
 	*level = 2;
 	return (l2);
 }
 
 /*
  * Returns the lowest valid pte block or table entry for a given virtual
  * address. If there are no valid entries return NULL and set the level to
  * the first invalid level.
  */
 static __inline pt_entry_t *
 pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
 {
 	pd_entry_t *l1, *l2, desc;
 	pt_entry_t *l3;
 
 	l1 = pmap_l1(pmap, va);
 	if (l1 == NULL) {
 		*level = 0;
 		return (NULL);
 	}
 	desc = pmap_load(l1) & ATTR_DESCR_MASK;
 	if (desc == L1_BLOCK) {
 		*level = 1;
 		return (l1);
 	}
 
 	if (desc != L1_TABLE) {
 		*level = 1;
 		return (NULL);
 	}
 
 	l2 = pmap_l1_to_l2(l1, va);
 	desc = pmap_load(l2) & ATTR_DESCR_MASK;
 	if (desc == L2_BLOCK) {
 		*level = 2;
 		return (l2);
 	}
 
 	if (desc != L2_TABLE) {
 		*level = 2;
 		return (NULL);
 	}
 
 	*level = 3;
 	l3 = pmap_l2_to_l3(l2, va);
 	if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
 		return (NULL);
 
 	return (l3);
 }
 
 bool
 pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
     pd_entry_t **l2, pt_entry_t **l3)
 {
 	pd_entry_t *l0p, *l1p, *l2p;
 
 	if (pmap->pm_l0 == NULL)
 		return (false);
 
 	l0p = pmap_l0(pmap, va);
 	*l0 = l0p;
 
 	if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
 		return (false);
 
 	l1p = pmap_l0_to_l1(l0p, va);
 	*l1 = l1p;
 
 	if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
 		*l2 = NULL;
 		*l3 = NULL;
 		return (true);
 	}
 
 	if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
 		return (false);
 
 	l2p = pmap_l1_to_l2(l1p, va);
 	*l2 = l2p;
 
 	if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
 		*l3 = NULL;
 		return (true);
 	}
 
 	*l3 = pmap_l2_to_l3(l2p, va);
 
 	return (true);
 }
 
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 
 	return ((pmap == pmap_kernel()) ||
 	    (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
 }
 
 static __inline int
 pmap_l3_valid(pt_entry_t l3)
 {
 
 	return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
 }
 
 static __inline int
 pmap_l3_valid_cacheable(pt_entry_t l3)
 {
 
 	return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) &&
 	    ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
 }
 
 #define	PTE_SYNC(pte)	cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
 
 /*
  * Checks if the page is dirty. We currently lack proper tracking of this on
  * arm64 so for now assume is a page mapped as rw was accessed it is.
  */
 static inline int
 pmap_page_dirty(pt_entry_t pte)
 {
 
 	return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) ==
 	    (ATTR_AF | ATTR_AP(ATTR_AP_RW)));
 }
 
 static __inline void
 pmap_resident_count_inc(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pmap->pm_stats.resident_count += count;
 }
 
 static __inline void
 pmap_resident_count_dec(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT(pmap->pm_stats.resident_count >= count,
 	    ("pmap %p resident count underflow %ld %d", pmap,
 	    pmap->pm_stats.resident_count, count));
 	pmap->pm_stats.resident_count -= count;
 }
 
 static pt_entry_t *
 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
     u_int *l2_slot)
 {
 	pt_entry_t *l2;
 	pd_entry_t *l1;
 
 	l1 = (pd_entry_t *)l1pt;
 	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
 
 	/* Check locore has used a table L1 map */
 	KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
 	   ("Invalid bootstrap L1 table"));
 	/* Find the address of the L2 table */
 	l2 = (pt_entry_t *)init_pt_va;
 	*l2_slot = pmap_l2_index(va);
 
 	return (l2);
 }
 
 static vm_paddr_t
 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
 {
 	u_int l1_slot, l2_slot;
 	pt_entry_t *l2;
 
 	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
 
 	return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
 }
 
 static void
 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
 {
 	vm_offset_t va;
 	vm_paddr_t pa;
 	u_int l1_slot;
 
 	pa = dmap_phys_base = min_pa & ~L1_OFFSET;
 	va = DMAP_MIN_ADDRESS;
 	for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
 	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
 		l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
 
 		pmap_load_store(&pagetable_dmap[l1_slot],
 		    (pa & ~L1_OFFSET) | ATTR_DEFAULT |
 		    ATTR_IDX(CACHED_MEMORY) | L1_BLOCK);
 	}
 
 	/* Set the upper limit of the DMAP region */
 	dmap_phys_max = pa;
 	dmap_max_addr = va;
 
 	cpu_dcache_wb_range((vm_offset_t)pagetable_dmap,
 	    PAGE_SIZE * DMAP_TABLES);
 	cpu_tlb_flushID();
 }
 
 static vm_offset_t
 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
 {
 	vm_offset_t l2pt;
 	vm_paddr_t pa;
 	pd_entry_t *l1;
 	u_int l1_slot;
 
 	KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
 
 	l1 = (pd_entry_t *)l1pt;
 	l1_slot = pmap_l1_index(va);
 	l2pt = l2_start;
 
 	for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
 		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
 
 		pa = pmap_early_vtophys(l1pt, l2pt);
 		pmap_load_store(&l1[l1_slot],
 		    (pa & ~Ln_TABLE_MASK) | L1_TABLE);
 		l2pt += PAGE_SIZE;
 	}
 
 	/* Clean the L2 page table */
 	memset((void *)l2_start, 0, l2pt - l2_start);
 	cpu_dcache_wb_range(l2_start, l2pt - l2_start);
 
 	/* Flush the l1 table to ram */
 	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
 
 	return l2pt;
 }
 
 static vm_offset_t
 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
 {
 	vm_offset_t l2pt, l3pt;
 	vm_paddr_t pa;
 	pd_entry_t *l2;
 	u_int l2_slot;
 
 	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
 
 	l2 = pmap_l2(kernel_pmap, va);
 	l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE);
 	l2pt = (vm_offset_t)l2;
 	l2_slot = pmap_l2_index(va);
 	l3pt = l3_start;
 
 	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
 		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
 
 		pa = pmap_early_vtophys(l1pt, l3pt);
 		pmap_load_store(&l2[l2_slot],
 		    (pa & ~Ln_TABLE_MASK) | L2_TABLE);
 		l3pt += PAGE_SIZE;
 	}
 
 	/* Clean the L2 page table */
 	memset((void *)l3_start, 0, l3pt - l3_start);
 	cpu_dcache_wb_range(l3_start, l3pt - l3_start);
 
 	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
 
 	return l3pt;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  */
 void
 pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
     vm_size_t kernlen)
 {
 	u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
 	uint64_t kern_delta;
 	pt_entry_t *l2;
 	vm_offset_t va, freemempos;
 	vm_offset_t dpcpu, msgbufpv;
 	vm_paddr_t pa, max_pa, min_pa;
 	int i;
 
 	kern_delta = KERNBASE - kernstart;
 	physmem = 0;
 
 	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
 	printf("%lx\n", l1pt);
 	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
 
 	/* Set this early so we can use the pagetable walking functions */
 	kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
 	PMAP_LOCK_INIT(kernel_pmap);
 
  	/*
 	 * Initialize the global pv list lock.
 	 */
 	rw_init(&pvh_global_lock, "pmap pv global");
 
 	/* Assume the address we were loaded to is a valid physical address */
 	min_pa = max_pa = KERNBASE - kern_delta;
 
 	/*
 	 * Find the minimum physical address. physmap is sorted,
 	 * but may contain empty ranges.
 	 */
 	for (i = 0; i < (physmap_idx * 2); i += 2) {
 		if (physmap[i] == physmap[i + 1])
 			continue;
 		if (physmap[i] <= min_pa)
 			min_pa = physmap[i];
 		if (physmap[i + 1] > max_pa)
 			max_pa = physmap[i + 1];
 	}
 
 	/* Create a direct map region early so we can use it for pa -> va */
 	pmap_bootstrap_dmap(l1pt, min_pa, max_pa);
 
 	va = KERNBASE;
 	pa = KERNBASE - kern_delta;
 
 	/*
 	 * Start to initialise phys_avail by copying from physmap
 	 * up to the physical address KERNBASE points at.
 	 */
 	map_slot = avail_slot = 0;
 	for (; map_slot < (physmap_idx * 2) &&
 	    avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) {
 		if (physmap[map_slot] == physmap[map_slot + 1])
 			continue;
 
 		if (physmap[map_slot] <= pa &&
 		    physmap[map_slot + 1] > pa)
 			break;
 
 		phys_avail[avail_slot] = physmap[map_slot];
 		phys_avail[avail_slot + 1] = physmap[map_slot + 1];
 		physmem += (phys_avail[avail_slot + 1] -
 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
 		avail_slot += 2;
 	}
 
 	/* Add the memory before the kernel */
 	if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) {
 		phys_avail[avail_slot] = physmap[map_slot];
 		phys_avail[avail_slot + 1] = pa;
 		physmem += (phys_avail[avail_slot + 1] -
 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
 		avail_slot += 2;
 	}
 	used_map_slot = map_slot;
 
 	/*
 	 * Read the page table to find out what is already mapped.
 	 * This assumes we have mapped a block of memory from KERNBASE
 	 * using a single L1 entry.
 	 */
 	l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
 
 	/* Sanity check the index, KERNBASE should be the first VA */
 	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
 
 	/* Find how many pages we have mapped */
 	for (; l2_slot < Ln_ENTRIES; l2_slot++) {
 		if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0)
 			break;
 
 		/* Check locore used L2 blocks */
 		KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK,
 		    ("Invalid bootstrap L2 table"));
 		KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa,
 		    ("Incorrect PA in L2 table"));
 
 		va += L2_SIZE;
 		pa += L2_SIZE;
 	}
 
 	va = roundup2(va, L1_SIZE);
 
 	freemempos = KERNBASE + kernlen;
 	freemempos = roundup2(freemempos, PAGE_SIZE);
 	/* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */
 	freemempos = pmap_bootstrap_l2(l1pt, va, freemempos);
 	/* And the l3 tables for the early devmap */
 	freemempos = pmap_bootstrap_l3(l1pt,
 	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
 
 	cpu_tlb_flushID();
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	/* Allocate dynamic per-cpu area. */
 	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu, 0);
 
 	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
 	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 	msgbufp = (void *)msgbufpv;
 
 	virtual_avail = roundup2(freemempos, L1_SIZE);
 	virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
 	kernel_vm_end = virtual_avail;
 	
 	pa = pmap_early_vtophys(l1pt, freemempos);
 
 	/* Finish initialising physmap */
 	map_slot = used_map_slot;
 	for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
 	    map_slot < (physmap_idx * 2); map_slot += 2) {
 		if (physmap[map_slot] == physmap[map_slot + 1])
 			continue;
 
 		/* Have we used the current range? */
 		if (physmap[map_slot + 1] <= pa)
 			continue;
 
 		/* Do we need to split the entry? */
 		if (physmap[map_slot] < pa) {
 			phys_avail[avail_slot] = pa;
 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
 		} else {
 			phys_avail[avail_slot] = physmap[map_slot];
 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
 		}
 		physmem += (phys_avail[avail_slot + 1] -
 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
 
 		avail_slot += 2;
 	}
 	phys_avail[avail_slot] = 0;
 	phys_avail[avail_slot + 1] = 0;
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".
 	 */
 	Maxmem = atop(phys_avail[avail_slot - 1]);
 
 	cpu_tlb_flushID();
 }
 
 /*
  *	Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	int i;
 
 	/*
 	 * Initialize the pv chunk list mutex.
 	 */
 	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
 
 	/*
 	 * Initialize the pool of pv list locks.
 	 */
 	for (i = 0; i < NPV_LIST_LOCKS; i++)
 		rw_init(&pv_list_locks[i], "pmap pv list");
 }
 
 /*
- * Normal, non-SMP, invalidation functions.
- * We inline these within pmap.c for speed.
+ * Invalidate a single TLB entry.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	sched_pin();
 	__asm __volatile(
-	    "dsb  sy		\n"
+	    "dsb  ishst		\n"
 	    "tlbi vaae1is, %0	\n"
-	    "dsb  sy		\n"
+	    "dsb  ish		\n"
 	    "isb		\n"
 	    : : "r"(va >> PAGE_SHIFT));
 	sched_unpin();
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
 	sched_pin();
-	__asm __volatile("dsb	sy");
+	dsb(ishst);
 	for (addr = sva; addr < eva; addr += PAGE_SIZE) {
 		__asm __volatile(
 		    "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT));
 	}
 	__asm __volatile(
-	    "dsb  sy	\n"
+	    "dsb  ish	\n"
 	    "isb	\n");
 	sched_unpin();
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	sched_pin();
 	__asm __volatile(
-	    "dsb  sy		\n"
+	    "dsb  ishst		\n"
 	    "tlbi vmalle1is	\n"
-	    "dsb  sy		\n"
+	    "dsb  ish		\n"
 	    "isb		\n");
 	sched_unpin();
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t 
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
 	int lvl;
 
 	pa = 0;
 	PMAP_LOCK(pmap);
 	/*
 	 * Find the block or page map for this virtual address. pmap_pte
 	 * will return either a valid block/page entry, or NULL.
 	 */
 	pte = pmap_pte(pmap, va, &lvl);
 	if (pte != NULL) {
 		tpte = pmap_load(pte);
 		pa = tpte & ~ATTR_MASK;
 		switch(lvl) {
 		case 1:
 			KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
 			    ("pmap_extract: Invalid L1 pte found: %lx",
 			    tpte & ATTR_DESCR_MASK));
 			pa |= (va & L1_OFFSET);
 			break;
 		case 2:
 			KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
 			    ("pmap_extract: Invalid L2 pte found: %lx",
 			    tpte & ATTR_DESCR_MASK));
 			pa |= (va & L2_OFFSET);
 			break;
 		case 3:
 			KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
 			    ("pmap_extract: Invalid L3 pte found: %lx",
 			    tpte & ATTR_DESCR_MASK));
 			pa |= (va & L3_OFFSET);
 			break;
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (pa);
 }
 
 /*
  *	Routine:	pmap_extract_and_hold
  *	Function:
  *		Atomically extract and hold the physical page
  *		with the given pmap and virtual address pair
  *		if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
 	vm_page_t m;
 	int lvl;
 
 	pa = 0;
 	m = NULL;
 	PMAP_LOCK(pmap);
 retry:
 	pte = pmap_pte(pmap, va, &lvl);
 	if (pte != NULL) {
 		tpte = pmap_load(pte);
 
 		KASSERT(lvl > 0 && lvl <= 3,
 		    ("pmap_extract_and_hold: Invalid level %d", lvl));
 		CTASSERT(L1_BLOCK == L2_BLOCK);
 		KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
 		    (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
 		    ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
 		     tpte & ATTR_DESCR_MASK));
 		if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
 		    ((prot & VM_PROT_WRITE) == 0)) {
 			if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa))
 				goto retry;
 			m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
 			vm_page_hold(m);
 		}
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
 	int lvl;
 
 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
 		pa = DMAP_TO_PHYS(va);
 	} else {
 		pa = 0;
 		pte = pmap_pte(kernel_pmap, va, &lvl);
 		if (pte != NULL) {
 			tpte = pmap_load(pte);
 			pa = tpte & ~ATTR_MASK;
 			switch(lvl) {
 			case 1:
 				KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
 				    ("pmap_kextract: Invalid L1 pte found: %lx",
 				    tpte & ATTR_DESCR_MASK));
 				pa |= (va & L1_OFFSET);
 				break;
 			case 2:
 				KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
 				    ("pmap_kextract: Invalid L2 pte found: %lx",
 				    tpte & ATTR_DESCR_MASK));
 				pa |= (va & L2_OFFSET);
 				break;
 			case 3:
 				KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
 				    ("pmap_kextract: Invalid L3 pte found: %lx",
 				    tpte & ATTR_DESCR_MASK));
 				pa |= (va & L3_OFFSET);
 				break;
 			}
 		}
 	}
 	return (pa);
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
-void
-pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
+static void
+pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	vm_offset_t va;
 	int lvl;
 
 	KASSERT((pa & L3_OFFSET) == 0,
-	   ("pmap_kenter_device: Invalid physical address"));
+	   ("pmap_kenter: Invalid physical address"));
 	KASSERT((sva & L3_OFFSET) == 0,
-	   ("pmap_kenter_device: Invalid virtual address"));
+	   ("pmap_kenter: Invalid virtual address"));
 	KASSERT((size & PAGE_MASK) == 0,
-	    ("pmap_kenter_device: Mapping is not page-sized"));
+	    ("pmap_kenter: Mapping is not page-sized"));
 
 	va = sva;
 	while (size != 0) {
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(pde != NULL,
-		    ("pmap_kenter_device: Invalid page entry, va: 0x%lx", va));
-		KASSERT(lvl == 2,
-		    ("pmap_kenter_device: Invalid level %d", lvl));
+		    ("pmap_kenter: Invalid page entry, va: 0x%lx", va));
+		KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
 
 		pte = pmap_l2_to_l3(pde, va);
 		pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
-		    ATTR_IDX(DEVICE_MEMORY) | L3_PAGE);
+		    ATTR_IDX(mode) | L3_PAGE);
 		PTE_SYNC(pte);
 
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
+}
+
+void
+pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
+{
+
+	pmap_kenter(sva, size, pa, DEVICE_MEMORY);
 }
 
 /*
  * Remove a page from the kernel pagetables.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *pte;
 	int lvl;
 
 	pte = pmap_pte(kernel_pmap, va, &lvl);
 	KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
 	KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
 
 	if (pmap_l3_valid_cacheable(pmap_load(pte)))
 		cpu_dcache_wb_range(va, L3_SIZE);
 	pmap_load_clear(pte);
 	PTE_SYNC(pte);
 	pmap_invalidate_page(kernel_pmap, va);
 }
 
 void
 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
 	pt_entry_t *pte;
 	vm_offset_t va;
 	int lvl;
 
 	KASSERT((sva & L3_OFFSET) == 0,
 	   ("pmap_kremove_device: Invalid virtual address"));
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("pmap_kremove_device: Mapping is not page-sized"));
 
 	va = sva;
 	while (size != 0) {
 		pte = pmap_pte(kernel_pmap, va, &lvl);
 		KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
 		KASSERT(lvl == 3,
 		    ("Invalid device pagetable level: %d != 3", lvl));
 		pmap_load_clear(pte);
 		PTE_SYNC(pte);
 
 		va += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	return PHYS_TO_DMAP(start);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte, pa;
 	vm_offset_t va;
 	vm_page_t m;
 	int i, lvl;
 
 	va = sva;
 	for (i = 0; i < count; i++) {
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_qenter: Invalid page entry, va: 0x%lx", va));
 		KASSERT(lvl == 2,
 		    ("pmap_qenter: Invalid level %d", lvl));
 
 		m = ma[i];
 		pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
 		    ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
 		pte = pmap_l2_to_l3(pde, va);
 		pmap_load_store(pte, pa);
 		PTE_SYNC(pte);
 
 		va += L3_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	pt_entry_t *pte;
 	vm_offset_t va;
 	int lvl;
 
 	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
 
 	va = sva;
 	while (count-- > 0) {
 		pte = pmap_pte(kernel_pmap, va, &lvl);
 		KASSERT(lvl == 3,
 		    ("Invalid device pagetable level: %d != 3", lvl));
 		if (pte != NULL) {
 			if (pmap_l3_valid_cacheable(pmap_load(pte)))
 				cpu_dcache_wb_range(va, L3_SIZE);
 			pmap_load_clear(pte);
 			PTE_SYNC(pte);
 		}
 
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 static __inline void
 pmap_free_zero_pages(struct spglist *free)
 {
 	vm_page_t m;
 
 	while ((m = SLIST_FIRST(free)) != NULL) {
 		SLIST_REMOVE_HEAD(free, plinks.s.ss);
 		/* Preserve the page's PG_ZERO setting. */
 		vm_page_free_toq(m);
 	}
 }
 
 /*
  * Schedule the specified unused page table page to be freed.  Specifically,
  * add the page to the specified list of pages that will be released to the
  * physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
     boolean_t set_PG_ZERO)
 {
 
 	if (set_PG_ZERO)
 		m->flags |= PG_ZERO;
 	else
 		m->flags &= ~PG_ZERO;
 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 }
 	
 /*
  * Decrements a page table page's wire count, which is used to record the
  * number of valid page table entries within the page.  If the wire count
  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
  * page table page was unmapped and FALSE otherwise.
  */
 static inline boolean_t
 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 
 	--m->wire_count;
 	if (m->wire_count == 0) {
 		_pmap_unwire_l3(pmap, va, m, free);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 static void
 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/*
 	 * unmap the page table page
 	 */
 	if (m->pindex >= (NUL2E + NUL1E)) {
 		/* l1 page */
 		pd_entry_t *l0;
 
 		l0 = pmap_l0(pmap, va);
 		pmap_load_clear(l0);
 		PTE_SYNC(l0);
 	} else if (m->pindex >= NUL2E) {
 		/* l2 page */
 		pd_entry_t *l1;
 
 		l1 = pmap_l1(pmap, va);
 		pmap_load_clear(l1);
 		PTE_SYNC(l1);
 	} else {
 		/* l3 page */
 		pd_entry_t *l2;
 
 		l2 = pmap_l2(pmap, va);
 		pmap_load_clear(l2);
 		PTE_SYNC(l2);
 	}
 	pmap_resident_count_dec(pmap, 1);
 	if (m->pindex < NUL2E) {
 		/* We just released an l3, unhold the matching l2 */
 		pd_entry_t *l1, tl1;
 		vm_page_t l2pg;
 
 		l1 = pmap_l1(pmap, va);
 		tl1 = pmap_load(l1);
 		l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
 		pmap_unwire_l3(pmap, va, l2pg, free);
 	} else if (m->pindex < (NUL2E + NUL1E)) {
 		/* We just released an l2, unhold the matching l1 */
 		pd_entry_t *l0, tl0;
 		vm_page_t l1pg;
 
 		l0 = pmap_l0(pmap, va);
 		tl0 = pmap_load(l0);
 		l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
 		pmap_unwire_l3(pmap, va, l1pg, free);
 	}
 	pmap_invalidate_page(pmap, va);
 
 	/*
 	 * This is a release store so that the ordinary store unmapping
 	 * the page table page is globally performed before TLB shoot-
 	 * down is begun.
 	 */
 	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
 
 	/* 
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 	pmap_add_delayed_free_list(m, free, TRUE);
 }
 
 /*
  * After removing an l3 entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
     struct spglist *free)
 {
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (0);
 	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
 	mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
 	return (pmap_unwire_l3(pmap, va, mpte, free));
 }
 
 void
 pmap_pinit0(pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 	pmap->pm_l0 = kernel_pmap->pm_l0;
 }
 
 int
 pmap_pinit(pmap_t pmap)
 {
 	vm_paddr_t l0phys;
 	vm_page_t l0pt;
 
 	/*
 	 * allocate the l0 page
 	 */
 	while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 		VM_WAIT;
 
 	l0phys = VM_PAGE_TO_PHYS(l0pt);
 	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys);
 
 	if ((l0pt->flags & PG_ZERO) == 0)
 		pagezero(pmap->pm_l0);
 
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 
 	return (1);
 }
 
 /*
  * This routine is called if the desired page table page does not exist.
  *
  * If page table page allocation fails, this routine may sleep before
  * returning NULL.  It sleeps only if a lock pointer was given.
  *
  * Note: If a page allocation fails at page table level two or three,
  * one or two pages may be held during the wait, only to be released
  * afterwards.  This conservative approach is easily argued to avoid
  * race conditions.
  */
 static vm_page_t
 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 {
 	vm_page_t m, l1pg, l2pg;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 		if (lockp != NULL) {
 			RELEASE_PV_LIST_LOCK(lockp);
 			PMAP_UNLOCK(pmap);
 			rw_runlock(&pvh_global_lock);
 			VM_WAIT;
 			rw_rlock(&pvh_global_lock);
 			PMAP_LOCK(pmap);
 		}
 
 		/*
 		 * Indicate the need to retry.  While waiting, the page table
 		 * page may have been allocated.
 		 */
 		return (NULL);
 	}
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	if (ptepindex >= (NUL2E + NUL1E)) {
 		pd_entry_t *l0;
 		vm_pindex_t l0index;
 
 		l0index = ptepindex - (NUL2E + NUL1E);
 		l0 = &pmap->pm_l0[l0index];
 		pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
 		PTE_SYNC(l0);
 	} else if (ptepindex >= NUL2E) {
 		vm_pindex_t l0index, l1index;
 		pd_entry_t *l0, *l1;
 		pd_entry_t tl0;
 
 		l1index = ptepindex - NUL2E;
 		l0index = l1index >> L0_ENTRIES_SHIFT;
 
 		l0 = &pmap->pm_l0[l0index];
 		tl0 = pmap_load(l0);
 		if (tl0 == 0) {
 			/* recurse for allocating page dir */
 			if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
 			    lockp) == NULL) {
 				--m->wire_count;
 				/* XXX: release mem barrier? */
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 		} else {
 			l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
 			l1pg->wire_count++;
 		}
 
 		l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
 		l1 = &l1[ptepindex & Ln_ADDR_MASK];
 		pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
 		PTE_SYNC(l1);
 	} else {
 		vm_pindex_t l0index, l1index;
 		pd_entry_t *l0, *l1, *l2;
 		pd_entry_t tl0, tl1;
 
 		l1index = ptepindex >> Ln_ENTRIES_SHIFT;
 		l0index = l1index >> L0_ENTRIES_SHIFT;
 
 		l0 = &pmap->pm_l0[l0index];
 		tl0 = pmap_load(l0);
 		if (tl0 == 0) {
 			/* recurse for allocating page dir */
 			if (_pmap_alloc_l3(pmap, NUL2E + l1index,
 			    lockp) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 			tl0 = pmap_load(l0);
 			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
 			l1 = &l1[l1index & Ln_ADDR_MASK];
 		} else {
 			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
 			l1 = &l1[l1index & Ln_ADDR_MASK];
 			tl1 = pmap_load(l1);
 			if (tl1 == 0) {
 				/* recurse for allocating page dir */
 				if (_pmap_alloc_l3(pmap, NUL2E + l1index,
 				    lockp) == NULL) {
 					--m->wire_count;
 					/* XXX: release mem barrier? */
 					atomic_subtract_int(
 					    &vm_cnt.v_wire_count, 1);
 					vm_page_free_zero(m);
 					return (NULL);
 				}
 			} else {
 				l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
 				l2pg->wire_count++;
 			}
 		}
 
 		l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
 		l2 = &l2[ptepindex & Ln_ADDR_MASK];
 		pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
 		PTE_SYNC(l2);
 	}
 
 	pmap_resident_count_inc(pmap, 1);
 
 	return (m);
 }
 
 static vm_page_t
 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t ptepindex;
 	pd_entry_t *pde, tpde;
 	vm_page_t m;
 	int lvl;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = pmap_l2_pindex(va);
 retry:
 	/*
 	 * Get the page directory entry
 	 */
 	pde = pmap_pde(pmap, va, &lvl);
 
 	/*
 	 * If the page table page is mapped, we just increment the hold count,
 	 * and activate it. If we get a level 2 pde it will point to a level 3
 	 * table.
 	 */
 	if (lvl == 2) {
 		tpde = pmap_load(pde);
 		if (tpde != 0) {
 			m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK);
 			m->wire_count++;
 			return (m);
 		}
 	}
 
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	m = _pmap_alloc_l3(pmap, ptepindex, lockp);
 	if (m == NULL && lockp != NULL)
 		goto retry;
 
 	return (m);
 }
 
 
 /***************************************************
  * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t m;
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0));
 
 	m->wire_count--;
 	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 	vm_page_free_zero(m);
 }
 
 #if 0
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "LU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "LU", "Amount of KVM free");
 #endif /* 0 */
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_paddr_t paddr;
 	vm_page_t nkpg;
 	pd_entry_t *l0, *l1, *l2;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 
 	addr = roundup2(addr, L2_SIZE);
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		l0 = pmap_l0(kernel_pmap, kernel_vm_end);
 		KASSERT(pmap_load(l0) != 0,
 		    ("pmap_growkernel: No level 0 kernel entry"));
 
 		l1 = pmap_l0_to_l1(l0, kernel_vm_end);
 		if (pmap_load(l1) == 0) {
 			/* We need a new PDP entry */
 			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (nkpg == NULL)
 				panic("pmap_growkernel: no memory to grow kernel");
 			if ((nkpg->flags & PG_ZERO) == 0)
 				pmap_zero_page(nkpg);
 			paddr = VM_PAGE_TO_PHYS(nkpg);
 			pmap_load_store(l1, paddr | L1_TABLE);
 			PTE_SYNC(l1);
 			continue; /* try again */
 		}
 		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
 		if ((pmap_load(l2) & ATTR_AF) != 0) {
 			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
 				break;                       
 			}
 			continue;
 		}
 
 		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (nkpg == NULL)
 			panic("pmap_growkernel: no memory to grow kernel");
 		if ((nkpg->flags & PG_ZERO) == 0)
 			pmap_zero_page(nkpg);
 		paddr = VM_PAGE_TO_PHYS(nkpg);
 		pmap_load_store(l2, paddr | L2_TABLE);
 		PTE_SYNC(l2);
 		pmap_invalidate_page(kernel_pmap, kernel_vm_end);
 
 		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
 			break;                       
 		}
 	}
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 3);
 CTASSERT(_NPCPV == 168);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0	0xfffffffffffffffful
 #define	PC_FREE1	0xfffffffffffffffful
 #define	PC_FREE2	0x000000fffffffffful
 
 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
 
 #if 0
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 	"Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 	"Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 	"Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 	"Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 	"Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 	"Current number of pv entry allocs");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 	"Current number of spare pv entries");
 #endif
 #endif /* 0 */
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
  * another pv entry chunk.
  *
  * Returns NULL if PV entries were reclaimed from the specified pmap.
  *
  * We do not, however, unmap 2mpages because subsequent accesses will
  * allocate per-page pv entries until repromotion occurs, thereby
  * exacerbating the shortage of free pv entries.
  */
 static vm_page_t
 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
 {
 
 	panic("ARM64TODO: reclaim_pv_chunk");
 }
 
 /*
  * free the pv_entry back to the free list
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
 	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 64;
 	bit = idx % 64;
 	pc->pc_map[field] |= 1ul << bit;
 	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
 	    pc->pc_map[2] != PC_FREE2) {
 		/* 98% of the time, pc is already at the head of the list. */
 		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		}
 		return;
 	}
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	free_pv_chunk(pc);
 }
 
 static void
 free_pv_chunk(struct pv_chunk *pc)
 {
 	vm_page_t m;
 
 	mtx_lock(&pv_chunks_mutex);
  	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 	dump_drop_page(m->phys_addr);
 	vm_page_unwire(m, PQ_NONE);
 	vm_page_free(m);
 }
 
 /*
  * Returns a new PV entry, allocating a new PV chunk from the system when
  * needed.  If this PV chunk allocation fails and a PV list lock pointer was
  * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
  * returned.
  *
  * The given PV list lock may be released.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
 {
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = ffsl(pc->pc_map[field]) - 1;
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 64 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
 			    pc->pc_map[2] == 0) {
 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
 				    pc_list);
 			}
 			PV_STAT(atomic_add_long(&pv_entry_count, 1));
 			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
 			return (pv);
 		}
 	}
 	/* No free items, allocate another chunk */
 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED);
 	if (m == NULL) {
 		if (lockp == NULL) {
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		m = reclaim_pv_chunk(pmap, lockp);
 		if (m == NULL)
 			goto retry;
 	}
 	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 	dump_add_page(m->phys_addr);
 	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
 	pc->pc_map[1] = PC_FREE1;
 	pc->pc_map[2] = PC_FREE2;
 	mtx_lock(&pv_chunks_mutex);
 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(atomic_add_long(&pv_entry_count, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
 	return (pv);
 }
 
 /*
  * First find and then remove the pv entry for the specified pmap and virtual
  * address from the specified pv list.  Returns the pv entry if found and NULL
  * otherwise.  This operation can be performed on pv lists for either 4KB or
  * 2MB page mappings.
  */
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			pvh->pv_gen++;
 			break;
 		}
 	}
 	return (pv);
 }
 
 /*
  * First find and then destroy the pv entry for the specified pmap and virtual
  * address.  This operation can be performed on pv lists for either 4KB or 2MB
  * page mappings.
  */
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 /*
  * Conditionally create the PV entry for a 4KB page mapping if the required
  * memory can be allocated without resorting to reclamation.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct rwlock **lockp)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/* Pass NULL instead of the lock pointer to disable reclamation. */
 	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * pmap_remove_l3: do the things to unmap a page in a process
  */
 static int
 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 
     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
 {
 	pt_entry_t old_l3;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
 		cpu_dcache_wb_range(va, L3_SIZE);
 	old_l3 = pmap_load_clear(l3);
 	PTE_SYNC(l3);
 	pmap_invalidate_page(pmap, va);
 	if (old_l3 & ATTR_SW_WIRED)
 		pmap->pm_stats.wired_count -= 1;
 	pmap_resident_count_dec(pmap, 1);
 	if (old_l3 & ATTR_SW_MANAGED) {
 		m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
 		if (pmap_page_dirty(old_l3))
 			vm_page_dirty(m);
 		if (old_l3 & ATTR_AF)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		pmap_pvh_free(&m->md, pmap, va);
 	}
 	return (pmap_unuse_l3(pmap, va, l2e, free));
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	struct rwlock *lock;
 	vm_offset_t va, va_next;
 	pd_entry_t *l0, *l1, *l2;
 	pt_entry_t l3_paddr, *l3;
 	struct spglist free;
 	int anyvalid;
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	anyvalid = 0;
 	SLIST_INIT(&free);
 
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 
 	lock = NULL;
 	for (; sva < eva; sva = va_next) {
 
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		l0 = pmap_l0(pmap, sva);
 		if (pmap_load(l0) == 0) {
 			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (l2 == NULL)
 			continue;
 
 		l3_paddr = pmap_load(l2);
 
 		/*
 		 * Weed out invalid mappings.
 		 */
 		if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
 			continue;
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (va_next > eva)
 			va_next = eva;
 
 		va = va_next;
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			if (l3 == NULL)
 				panic("l3 == NULL");
 			if (pmap_load(l3) == 0) {
 				if (va != va_next) {
 					pmap_invalidate_range(pmap, va, sva);
 					va = va_next;
 				}
 				continue;
 			}
 			if (va == va_next)
 				va = sva;
 			if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free,
 			    &lock)) {
 				sva += L3_SIZE;
 				break;
 			}
 		}
 		if (va != va_next)
 			pmap_invalidate_range(pmap, va, sva);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	if (anyvalid)
 		pmap_invalidate_all(pmap);
 	rw_runlock(&pvh_global_lock);	
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	pv_entry_t pv;
 	pmap_t pmap;
 	pd_entry_t *pde, tpde;
 	pt_entry_t *pte, tpte;
 	struct spglist free;
 	int lvl;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_all: page %p is not managed", m));
 	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pmap_resident_count_dec(pmap, 1);
 
 		pde = pmap_pde(pmap, pv->pv_va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_remove_all: no page directory entry found"));
 		KASSERT(lvl == 2,
 		    ("pmap_remove_all: invalid pde level %d", lvl));
 		tpde = pmap_load(pde);
 
 		pte = pmap_l2_to_l3(pde, pv->pv_va);
 		tpte = pmap_load(pte);
 		if (pmap_is_current(pmap) &&
 		    pmap_l3_valid_cacheable(tpte))
 			cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
 		pmap_load_clear(pte);
 		PTE_SYNC(pte);
 		pmap_invalidate_page(pmap, pv->pv_va);
 		if (tpte & ATTR_SW_WIRED)
 			pmap->pm_stats.wired_count--;
 		if ((tpte & ATTR_AF) != 0)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (pmap_page_dirty(tpte))
 			vm_page_dirty(m);
 		pmap_unuse_l3(pmap, pv->pv_va, tpde, &free);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&pvh_global_lock);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t va, va_next;
 	pd_entry_t *l0, *l1, *l2;
 	pt_entry_t *l3p, l3;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE)
 		return;
 
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 
 		l0 = pmap_l0(pmap, sva);
 		if (pmap_load(l0) == 0) {
 			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE)
 			continue;
 
 		if (va_next > eva)
 			va_next = eva;
 
 		va = va_next;
 		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
 		    sva += L3_SIZE) {
 			l3 = pmap_load(l3p);
 			if (pmap_l3_valid(l3)) {
 				pmap_set(l3p, ATTR_AP(ATTR_AP_RO));
 				PTE_SYNC(l3p);
 				/* XXX: Use pmap_invalidate_range */
 				pmap_invalidate_page(pmap, va);
 			}
 		}
 	}
 	PMAP_UNLOCK(pmap);
 
 	/* TODO: Only invalidate entries we are touching */
 	pmap_invalidate_all(pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 int
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind __unused)
 {
 	struct rwlock *lock;
 	pd_entry_t *pde;
 	pt_entry_t new_l3, orig_l3;
 	pt_entry_t *l3;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa;
 	vm_page_t mpte, om, l1_m, l2_m, l3_m;
 	boolean_t nosleep;
 	int lvl;
 
 	va = trunc_page(va);
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 	pa = VM_PAGE_TO_PHYS(m);
 	new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
 	    L3_PAGE);
 	if ((prot & VM_PROT_WRITE) == 0)
 		new_l3 |= ATTR_AP(ATTR_AP_RO);
 	if ((flags & PMAP_ENTER_WIRED) != 0)
 		new_l3 |= ATTR_SW_WIRED;
 	if ((va >> 63) == 0)
 		new_l3 |= ATTR_AP(ATTR_AP_USER);
 
 	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
 
 	mpte = NULL;
 
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 
 	if (va < VM_MAXUSER_ADDRESS) {
 		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
 		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
 		if (mpte == NULL && nosleep) {
 			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
 			if (lock != NULL)
 				rw_wunlock(lock);
 			rw_runlock(&pvh_global_lock);
 			PMAP_UNLOCK(pmap);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 		pde = pmap_pde(pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_enter: Invalid page entry, va: 0x%lx", va));
 		KASSERT(lvl == 2,
 		    ("pmap_enter: Invalid level %d", lvl));
 
 		l3 = pmap_l2_to_l3(pde, va);
 	} else {
 		pde = pmap_pde(pmap, va, &lvl);
 		/*
 		 * If we get a level 2 pde it must point to a level 3 entry
 		 * otherwise we will need to create the intermediate tables
 		 */
 		if (lvl < 2) {
 			switch(lvl) {
 			default:
 			case -1:
 				/* Get the l0 pde to update */
 				pde = pmap_l0(pmap, va);
 				KASSERT(pde != NULL, ("..."));
 
 				l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 				    VM_ALLOC_ZERO);
 				if (l1_m == NULL)
 					panic("pmap_enter: l1 pte_m == NULL");
 				if ((l1_m->flags & PG_ZERO) == 0)
 					pmap_zero_page(l1_m);
 
 				l1_pa = VM_PAGE_TO_PHYS(l1_m);
 				pmap_load_store(pde, l1_pa | L0_TABLE);
 				PTE_SYNC(pde);
 				/* FALLTHROUGH */
 			case 0:
 				/* Get the l1 pde to update */
 				pde = pmap_l1_to_l2(pde, va);
 				KASSERT(pde != NULL, ("..."));
 
 				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 				    VM_ALLOC_ZERO);
 				if (l2_m == NULL)
 					panic("pmap_enter: l2 pte_m == NULL");
 				if ((l2_m->flags & PG_ZERO) == 0)
 					pmap_zero_page(l2_m);
 
 				l2_pa = VM_PAGE_TO_PHYS(l2_m);
 				pmap_load_store(pde, l2_pa | L1_TABLE);
 				PTE_SYNC(pde);
 				/* FALLTHROUGH */
 			case 1:
 				/* Get the l2 pde to update */
 				pde = pmap_l1_to_l2(pde, va);
 
 				l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 				    VM_ALLOC_ZERO);
 				if (l3_m == NULL)
 					panic("pmap_enter: l3 pte_m == NULL");
 				if ((l3_m->flags & PG_ZERO) == 0)
 					pmap_zero_page(l3_m);
 
 				l3_pa = VM_PAGE_TO_PHYS(l3_m);
 				pmap_load_store(pde, l3_pa | L2_TABLE);
 				PTE_SYNC(pde);
 				break;
 			}
 		}
 		l3 = pmap_l2_to_l3(pde, va);
 		pmap_invalidate_page(pmap, va);
 	}
 
 	om = NULL;
 	orig_l3 = pmap_load(l3);
 	opa = orig_l3 & ~ATTR_MASK;
 
 	/*
 	 * Is the specified virtual address already mapped?
 	 */
 	if (pmap_l3_valid(orig_l3)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if ((flags & PMAP_ENTER_WIRED) != 0 &&
 		    (orig_l3 & ATTR_SW_WIRED) == 0)
 			pmap->pm_stats.wired_count++;
 		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
 		    (orig_l3 & ATTR_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove the extra PT page reference.
 		 */
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			KASSERT(mpte->wire_count > 0,
 			    ("pmap_enter: missing reference to page table page,"
 			     " va: 0x%lx", va));
 		}
 
 		/*
 		 * Has the physical page changed?
 		 */
 		if (opa == pa) {
 			/*
 			 * No, might be a protection or wiring change.
 			 */
 			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
 				new_l3 |= ATTR_SW_MANAGED;
 				if ((new_l3 & ATTR_AP(ATTR_AP_RW)) ==
 				    ATTR_AP(ATTR_AP_RW)) {
 					vm_page_aflag_set(m, PGA_WRITEABLE);
 				}
 			}
 			goto validate;
 		}
 
 		/* Flush the cache, there might be uncommitted data in it */
 		if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
 			cpu_dcache_wb_range(va, L3_SIZE);
 	} else {
 		/*
 		 * Increment the counters.
 		 */
 		if ((new_l3 & ATTR_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count++;
 		pmap_resident_count_inc(pmap, 1);
 	}
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		new_l3 |= ATTR_SW_MANAGED;
 		pv = get_pv_entry(pmap, &lock);
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
 
 	/*
 	 * Update the L3 entry.
 	 */
 	if (orig_l3 != 0) {
 validate:
 		orig_l3 = pmap_load_store(l3, new_l3);
 		PTE_SYNC(l3);
 		opa = orig_l3 & ~ATTR_MASK;
 
 		if (opa != pa) {
 			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
 				om = PHYS_TO_VM_PAGE(opa);
 				if (pmap_page_dirty(orig_l3))
 					vm_page_dirty(om);
 				if ((orig_l3 & ATTR_AF) != 0)
 					vm_page_aflag_set(om, PGA_REFERENCED);
 				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
 				pmap_pvh_free(&om->md, pmap, va);
 			}
 		} else if (pmap_page_dirty(orig_l3)) {
 			if ((orig_l3 & ATTR_SW_MANAGED) != 0)
 				vm_page_dirty(m);
 		}
 	} else {
 		pmap_load_store(l3, new_l3);
 		PTE_SYNC(l3);
 	}
 	pmap_invalidate_page(pmap, va);
 	if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
 	    cpu_icache_sync_range(va, PAGE_SIZE);
 
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	return (KERN_SUCCESS);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	struct rwlock *lock;
 	vm_offset_t va;
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	mpte = NULL;
 	m = m_start;
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
 		m = TAILQ_NEXT(m, listq);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * but is *MUCH* faster than pmap_enter...
  */
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	struct rwlock *lock;
 
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 {
 	struct spglist free;
 	pd_entry_t *pde;
 	pt_entry_t *l3;
 	vm_paddr_t pa;
 	int lvl;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		vm_pindex_t l2pindex;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		l2pindex = pmap_l2_pindex(va);
 		if (mpte && (mpte->pindex == l2pindex)) {
 			mpte->wire_count++;
 		} else {
 			/*
 			 * Get the l2 entry
 			 */
 			pde = pmap_pde(pmap, va, &lvl);
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.  Otherwise, we
 			 * attempt to allocate a page table page.  If this
 			 * attempt fails, we don't retry.  Instead, we give up.
 			 */
 			if (lvl == 2 && pmap_load(pde) != 0) {
 				mpte =
 				    PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
 				mpte->wire_count++;
 			} else {
 				/*
 				 * Pass NULL instead of the PV list lock
 				 * pointer, because we don't intend to sleep.
 				 */
 				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
 				if (mpte == NULL)
 					return (mpte);
 			}
 		}
 		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
 		l3 = &l3[pmap_l3_index(va)];
 	} else {
 		mpte = NULL;
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
 		     va));
 		KASSERT(lvl == 2,
 		    ("pmap_enter_quick_locked: Invalid level %d", lvl));
 		l3 = pmap_l2_to_l3(pde, va);
 	}
 
 	if (pmap_load(l3) != 0) {
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
 		if (mpte != NULL) {
 			SLIST_INIT(&free);
 			if (pmap_unwire_l3(pmap, va, mpte, &free)) {
 				pmap_invalidate_page(pmap, va);
 				pmap_free_zero_pages(&free);
 			}
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap_resident_count_inc(pmap, 1);
 
 	pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
 	    ATTR_AP(ATTR_AP_RW) | L3_PAGE;
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		pa |= ATTR_SW_MANAGED;
 	pmap_load_store(l3, pa);
 	PTE_SYNC(l3);
 	pmap_invalidate_page(pmap, va);
 	return (mpte);
 }
 
 /*
  * This code maps large physical mmap regions into the
  * processor address space.  Note that some shortcuts
  * are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 }
 
 /*
  *	Clear the wired attribute from the mappings for the specified range of
  *	addresses in the given pmap.  Every valid mapping within that range
  *	must have the wired attribute set.  In contrast, invalid mappings
  *	cannot have the wired attribute set, so they are ignored.
  *
  *	The wired attribute of the page table entry is not a hardware feature,
  *	so there is no need to invalidate any TLB entries.
  */
 void
 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va_next;
 	pd_entry_t *l0, *l1, *l2;
 	pt_entry_t *l3;
 	boolean_t pv_lists_locked;
 
 	pv_lists_locked = FALSE;
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 		l0 = pmap_l0(pmap, sva);
 		if (pmap_load(l0) == 0) {
 			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (pmap_load(l2) == 0)
 			continue;
 
 		if (va_next > eva)
 			va_next = eva;
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			if (pmap_load(l3) == 0)
 				continue;
 			if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
 				panic("pmap_unwire: l3 %#jx is missing "
 				    "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
 
 			/*
 			 * PG_W must be cleared atomically.  Although the pmap
 			 * lock synchronizes access to PG_W, another processor
 			 * could be setting PG_M and/or PG_A concurrently.
 			 */
 			atomic_clear_long(l3, ATTR_SW_WIRED);
 			pmap->pm_stats.wired_count--;
 		}
 	}
 	if (pv_lists_locked)
 		rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	if (off == 0 && size == PAGE_SIZE)
 		pagezero((void *)va);
 	else
 		bzero((char *)va + off, size);
 }
 
 /*
  *	pmap_zero_page_idle zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.  This
  *	is intended to be called from the vm_pagezero process only and
  *	outside of Giant.
  */
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
 	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
 
 	pagecopy((void *)src, (void *)dst);
 }
 
 int unmapped_buf_allowed = 1;
 
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
 {
 	void *a_cp, *b_cp;
 	vm_page_t m_a, m_b;
 	vm_paddr_t p_a, p_b;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	int cnt;
 
 	while (xfersize > 0) {
 		a_pg_offset = a_offset & PAGE_MASK;
 		m_a = ma[a_offset >> PAGE_SHIFT];
 		p_a = m_a->phys_addr;
 		b_pg_offset = b_offset & PAGE_MASK;
 		m_b = mb[b_offset >> PAGE_SHIFT];
 		p_b = m_b->phys_addr;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
 			panic("!DMAP a %lx", p_a);
 		} else {
 			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
 		}
 		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
 			panic("!DMAP b %lx", p_b);
 		} else {
 			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
 		}
 		bcopy(a_cp, b_cp, cnt);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 }
 
 vm_offset_t
 pmap_quick_enter_page(vm_page_t m)
 {
 
 	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
 }
 
 void
 pmap_quick_remove_page(vm_offset_t addr)
 {
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct rwlock *lock;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	struct rwlock *lock;
 	pmap_t pmap;
 	pt_entry_t *pte;
 	pv_entry_t pv;
 	int count, lvl, md_gen;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (0);
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	count = 0;
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 		if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0)
 			count++;
 		PMAP_UNLOCK(pmap);
 	}
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (count);
 }
 
 /*
  * Destroy all managed, non-wired mappings in the given user-space
  * pmap.  This pmap cannot be active on any processor besides the
  * caller.
  *                                                                                
  * This function cannot be applied to the kernel pmap.  Moreover, it
  * is not intended for general use.  It is only to be used during
  * process termination.  Consequently, it can be implemented in ways
  * that make it faster than pmap_remove().  First, it can more quickly
  * destroy mappings by iterating over the pmap's collection of PV
  * entries, rather than searching the page table.  Second, it doesn't
  * have to test and clear the page table entries atomically, because
  * no processor is currently accessing the user address space.  In
  * particular, a page table entry's dirty bit won't change state once
  * this function starts.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte, tpte;
 	struct spglist free;
 	vm_page_t m;
 	pv_entry_t pv;
 	struct pv_chunk *pc, *npc;
 	struct rwlock *lock;
 	int64_t bit;
 	uint64_t inuse, bitmask;
 	int allfree, field, freed, idx, lvl;
 	vm_paddr_t pa;
 
 	lock = NULL;
 
 	SLIST_INIT(&free);
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		allfree = 1;
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = ~pc->pc_map[field] & pc_freemask[field];
 			while (inuse != 0) {
 				bit = ffsl(inuse) - 1;
 				bitmask = 1UL << bit;
 				idx = field * 64 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				pde = pmap_pde(pmap, pv->pv_va, &lvl);
 				KASSERT(pde != NULL,
 				    ("Attempting to remove an unmapped page"));
 				KASSERT(lvl == 2,
 				    ("Invalid page directory level: %d", lvl));
 
 				pte = pmap_l2_to_l3(pde, pv->pv_va);
 				KASSERT(pte != NULL,
 				    ("Attempting to remove an unmapped page"));
 
 				tpte = pmap_load(pte);
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 				if (tpte & ATTR_SW_WIRED) {
 					allfree = 0;
 					continue;
 				}
 
 				pa = tpte & ~ATTR_MASK;
 
 				m = PHYS_TO_VM_PAGE(pa);
 				KASSERT(m->phys_addr == pa,
 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 				    m, (uintmax_t)m->phys_addr,
 				    (uintmax_t)tpte));
 
 				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 				    m < &vm_page_array[vm_page_array_size],
 				    ("pmap_remove_pages: bad pte %#jx",
 				    (uintmax_t)tpte));
 
 				/* XXX: assumes tpte is level 3 */
 				if (pmap_is_current(pmap) &&
 				    pmap_l3_valid_cacheable(tpte))
 					cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
 				pmap_load_clear(pte);
 				PTE_SYNC(pte);
 				pmap_invalidate_page(pmap, pv->pv_va);
 
 				/*
 				 * Update the vm_page_t clean/reference bits.
 				 */
 				if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
 					vm_page_dirty(m);
 
 				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 
 				/* Mark free */
 				pc->pc_map[field] |= bitmask;
 
 				pmap_resident_count_dec(pmap, 1);
 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 				m->md.pv_gen++;
 
 				pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde),
 				    &free);
 				freed++;
 			}
 		}
 		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 		if (allfree) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			free_pv_chunk(pc);
 		}
 	}
 	pmap_invalidate_all(pmap);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  * This is used to check if a page has been accessed or modified. As we
  * don't have a bit to see if it has been modified we have to assume it
  * has been if the page is read/write.
  */
 static boolean_t
 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
 {
 	struct rwlock *lock;
 	pv_entry_t pv;
 	pt_entry_t *pte, mask, value;
 	pmap_t pmap;
 	int lvl, md_gen;
 	boolean_t rv;
 
 	rv = FALSE;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 		KASSERT(lvl == 3,
 		    ("pmap_page_test_mappings: Invalid level %d", lvl));
 		mask = 0;
 		value = 0;
 		if (modified) {
 			mask |= ATTR_AP_RW_BIT;
 			value |= ATTR_AP(ATTR_AP_RW);
 		}
 		if (accessed) {
 			mask |= ATTR_AF | ATTR_DESCR_MASK;
 			value |= ATTR_AF | L3_PAGE;
 		}
 		rv = (pmap_load(pte) & mask) == value;
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			goto out;
 	}
 out:
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	return (pmap_page_test_mappings(m, FALSE, TRUE));
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is eligible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pt_entry_t *pte;
 	boolean_t rv;
 	int lvl;
 
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	pte = pmap_pte(pmap, addr, &lvl);
 	if (pte != NULL && pmap_load(pte) != 0) {
 		rv = TRUE;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	return (pmap_page_test_mappings(m, TRUE, FALSE));
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	pmap_t pmap;
 	struct rwlock *lock;
 	pv_entry_t pv;
 	pt_entry_t oldpte, *pte;
 	int lvl, md_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * set by another thread while the object is locked.  Thus,
 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 retry_pv_loop:
 	rw_wlock(lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(lock);
 				goto retry_pv_loop;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 retry:
 		oldpte = pmap_load(pte);
 		if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) {
 			if (!atomic_cmpset_long(pte, oldpte,
 			    oldpte | ATTR_AP(ATTR_AP_RO)))
 				goto retry;
 			if ((oldpte & ATTR_AF) != 0)
 				vm_page_dirty(m);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	rw_wunlock(lock);
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_runlock(&pvh_global_lock);
 }
 
 static __inline boolean_t
 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
 {
 
 	return (FALSE);
 }
 
 #define	PMAP_TS_REFERENCED_MAX	5
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	pv_entry_t pv, pvf;
 	pmap_t pmap;
 	struct rwlock *lock;
 	pd_entry_t *pde, tpde;
 	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
 	int cleared, md_gen, not_cleared, lvl;
 	struct spglist free;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	SLIST_INIT(&free);
 	cleared = 0;
 	pa = VM_PAGE_TO_PHYS(m);
 	lock = PHYS_TO_PV_LIST_LOCK(pa);
 	rw_rlock(&pvh_global_lock);
 	rw_wlock(lock);
 retry:
 	not_cleared = 0;
 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 		goto out;
 	pv = pvf;
 	do {
 		if (pvf == NULL)
 			pvf = pv;
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		pde = pmap_pde(pmap, pv->pv_va, &lvl);
 		KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found"));
 		KASSERT(lvl == 2,
 		    ("pmap_ts_referenced: invalid pde level %d", lvl));
 		tpde = pmap_load(pde);
 		KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE,
 		    ("pmap_ts_referenced: found an invalid l2 table"));
 		pte = pmap_l2_to_l3(pde, pv->pv_va);
 		tpte = pmap_load(pte);
 		if ((tpte & ATTR_AF) != 0) {
 			if (safe_to_clear_referenced(pmap, tpte)) {
 				/*
 				 * TODO: We don't handle the access flag
 				 * at all. We need to be able to set it in
 				 * the exception handler.
 				 */
 				panic("ARM64TODO: safe_to_clear_referenced\n");
 			} else if ((tpte & ATTR_SW_WIRED) == 0) {
 				/*
 				 * Wired pages cannot be paged out so
 				 * doing accessed bit emulation for
 				 * them is wasted effort. We do the
 				 * hard work for unwired pages only.
 				 */
 				pmap_remove_l3(pmap, pte, pv->pv_va, tpde,
 				    &free, &lock);
 				pmap_invalidate_page(pmap, pv->pv_va);
 				cleared++;
 				if (pvf == pv)
 					pvf = NULL;
 				pv = NULL;
 				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 				    ("inconsistent pv lock %p %p for page %p",
 				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 			} else
 				not_cleared++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 			m->md.pv_gen++;
 		}
 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
 	    not_cleared < PMAP_TS_REFERENCED_MAX);
 out:
 	rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	pmap_free_zero_pages(&free);
 	return (cleared + not_cleared);
 }
 
 /*
  *	Apply the given advice to the specified range of addresses within the
  *	given pmap.  Depending on the advice, clear the referenced and/or
  *	modified flags in each mapping and set the mapped page's dirty field.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT(!vm_page_xbusied(m),
 	    ("pmap_clear_modify: page %p is exclusive busied", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
 	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 
 	/* ARM64TODO: We lack support for tracking if a page is modified */
 }
 
 void *
 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
 
         return ((void *)PHYS_TO_DMAP(pa));
 }
 
 void
 pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
 {
 }
 
 /*
  * Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 
 	m->md.pv_memattr = ma;
 
 	/*
 	 * ARM64TODO: Implement the below (from the amd64 pmap)
 	 * If "m" is a normal page, update its direct mapping.  This update
 	 * can be relied upon to perform any cache operations that are
 	 * required for data coherence.
 	 */
 	if ((m->flags & PG_FICTITIOUS) == 0 &&
 	    PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m)))
 		panic("ARM64TODO: pmap_page_set_memattr");
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 	pd_entry_t *l1p, l1;
 	pd_entry_t *l2p, l2;
 	pt_entry_t *l3p, l3;
 	vm_paddr_t pa;
 	bool managed;
 	int val;
 
 	PMAP_LOCK(pmap);
 retry:
 	pa = 0;
 	val = 0;
 	managed = false;
 
 	l1p = pmap_l1(pmap, addr);
 	if (l1p == NULL) /* No l1 */
 		goto done;
 
 	l1 = pmap_load(l1p);
 	if ((l1 & ATTR_DESCR_MASK) == L1_INVAL)
 		goto done;
 
 	if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) {
 		pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET);
 		managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
 		val = MINCORE_SUPER | MINCORE_INCORE;
 		if (pmap_page_dirty(l1))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((l1 & ATTR_AF) == ATTR_AF)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 		goto done;
 	}
 
 	l2p = pmap_l1_to_l2(l1p, addr);
 	if (l2p == NULL) /* No l2 */
 		goto done;
 
 	l2 = pmap_load(l2p);
 	if ((l2 & ATTR_DESCR_MASK) == L2_INVAL)
 		goto done;
 
 	if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
 		pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET);
 		managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
 		val = MINCORE_SUPER | MINCORE_INCORE;
 		if (pmap_page_dirty(l2))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((l2 & ATTR_AF) == ATTR_AF)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 		goto done;
 	}
 
 	l3p = pmap_l2_to_l3(l2p, addr);
 	if (l3p == NULL) /* No l3 */
 		goto done;
 
 	l3 = pmap_load(l2p);
 	if ((l3 & ATTR_DESCR_MASK) == L3_INVAL)
 		goto done;
 
 	if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) {
 		pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET);
 		managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
 		val = MINCORE_INCORE;
 		if (pmap_page_dirty(l3))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((l3 & ATTR_AF) == ATTR_AF)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	}
 
 done:
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 			goto retry;
 	} else
 		PA_UNLOCK_COND(*locked_pa);
 	PMAP_UNLOCK(pmap);
 
 	return (val);
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t	pmap;
 
 	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0);
 	__asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr));
 	pmap_invalidate_all(pmap);
 	critical_exit();
 }
 
 void
 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
 {
 
 	if (va >= VM_MIN_KERNEL_ADDRESS) {
 		cpu_icache_sync_range(va, sz);
 	} else {
 		u_int len, offset;
 		vm_paddr_t pa;
 
 		/* Find the length of data in this page to flush */
 		offset = va & PAGE_MASK;
 		len = imin(PAGE_SIZE - offset, sz);
 
 		while (sz != 0) {
 			/* Extract the physical address & find it in the DMAP */
 			pa = pmap_extract(pmap, va);
 			if (pa != 0)
 				cpu_icache_sync_range(PHYS_TO_DMAP(pa), len);
 
 			/* Move to the next page */
 			sz -= len;
 			va += len;
 			/* Set the length for the next iteration */
 			len = imin(PAGE_SIZE, sz);
 		}
 	}
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 }
 
 /**
  * Get the kernel virtual address of a set of physical pages. If there are
  * physical addresses not covered by the DMAP perform a transient mapping
  * that will be removed when calling pmap_unmap_io_transient.
  *
  * \param page        The pages the caller wishes to obtain the virtual
  *                    address on the kernel memory map.
  * \param vaddr       On return contains the kernel virtual memory address
  *                    of the pages passed in the page parameter.
  * \param count       Number of pages passed in.
  * \param can_fault   TRUE if the thread using the mapped pages can take
  *                    page faults, FALSE otherwise.
  *
  * \returns TRUE if the caller must call pmap_unmap_io_transient when
  *          finished or FALSE otherwise.
  *
  */
 boolean_t
 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	boolean_t needs_mapping;
 	int error, i;
 
 	/*
 	 * Allocate any KVA space that we need, this is done in a separate
 	 * loop to prevent calling vmem_alloc while pinned.
 	 */
 	needs_mapping = FALSE;
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (__predict_false(!PHYS_IN_DMAP(paddr))) {
 			error = vmem_alloc(kernel_arena, PAGE_SIZE,
 			    M_BESTFIT | M_WAITOK, &vaddr[i]);
 			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
 			needs_mapping = TRUE;
 		} else {
 			vaddr[i] = PHYS_TO_DMAP(paddr);
 		}
 	}
 
 	/* Exit early if everything is covered by the DMAP */
 	if (!needs_mapping)
 		return (FALSE);
 
 	if (!can_fault)
 		sched_pin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (!PHYS_IN_DMAP(paddr)) {
 			panic(
 			   "pmap_map_io_transient: TODO: Map out of DMAP data");
 		}
 	}
 
 	return (needs_mapping);
 }
 
 void
 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	int i;
 
 	if (!can_fault)
 		sched_unpin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (!PHYS_IN_DMAP(paddr)) {
 			panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");
 		}
 	}
 }
Index: user/alc/PQ_LAUNDRY/sys/arm64/include/armreg.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/arm64/include/armreg.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/arm64/include/armreg.h	(revision 303642)
@@ -1,411 +1,458 @@
 /*-
  * Copyright (c) 2013, 2014 Andrew Turner
  * Copyright (c) 2015 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_ARMREG_H_
 #define	_MACHINE_ARMREG_H_
 
 #define	INSN_SIZE		4
 
 #define	READ_SPECIALREG(reg)						\
 ({	uint64_t val;							\
 	__asm __volatile("mrs	%0, " __STRING(reg) : "=&r" (val));	\
 	val;								\
 })
 #define	WRITE_SPECIALREG(reg, val)					\
 	__asm __volatile("msr	" __STRING(reg) ", %0" : : "r"((uint64_t)val))
 
 /* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */
 #define	CNTHCTL_EVNTI_MASK	(0xf << 4) /* Bit to trigger event stream */
 #define	CNTHCTL_EVNTDIR		(1 << 3) /* Control transition trigger bit */
 #define	CNTHCTL_EVNTEN		(1 << 2) /* Enable event stream */
 #define	CNTHCTL_EL1PCEN		(1 << 1) /* Allow EL0/1 physical timer access */
 #define	CNTHCTL_EL1PCTEN	(1 << 0) /*Allow EL0/1 physical counter access*/
 
 /* CPACR_EL1 */
 #define	CPACR_FPEN_MASK		(0x3 << 20)
 #define	 CPACR_FPEN_TRAP_ALL1	(0x0 << 20) /* Traps from EL0 and EL1 */
 #define	 CPACR_FPEN_TRAP_EL0	(0x1 << 20) /* Traps from EL0 */
 #define	 CPACR_FPEN_TRAP_ALL2	(0x2 << 20) /* Traps from EL0 and EL1 */
 #define	 CPACR_FPEN_TRAP_NONE	(0x3 << 20) /* No traps */
 #define	CPACR_TTA		(0x1 << 28)
 
 /* CTR_EL0 - Cache Type Register */
 #define	CTR_DLINE_SHIFT		16
 #define	CTR_DLINE_MASK		(0xf << CTR_DLINE_SHIFT)
 #define	CTR_DLINE_SIZE(reg)	(((reg) & CTR_DLINE_MASK) >> CTR_DLINE_SHIFT)
 #define	CTR_ILINE_SHIFT		0
 #define	CTR_ILINE_MASK		(0xf << CTR_ILINE_SHIFT)
 #define	CTR_ILINE_SIZE(reg)	(((reg) & CTR_ILINE_MASK) >> CTR_ILINE_SHIFT)
 
 /* DCZID_EL0 - Data Cache Zero ID register */
 #define DCZID_DZP		(1 << 4) /* DC ZVA prohibited if non-0 */
 #define DCZID_BS_SHIFT		0
 #define DCZID_BS_MASK		(0xf << DCZID_BS_SHIFT)
 #define	DCZID_BS_SIZE(reg)	(((reg) & DCZID_BS_MASK) >> DCZID_BS_SHIFT)
 
 /* ESR_ELx */
 #define	ESR_ELx_ISS_MASK	0x00ffffff
 #define	 ISS_INSN_FnV		(0x01 << 10)
 #define	 ISS_INSN_EA		(0x01 << 9)
 #define	 ISS_INSN_S1PTW		(0x01 << 7)
 #define	 ISS_INSN_IFSC_MASK	(0x1f << 0)
 #define	 ISS_DATA_ISV		(0x01 << 24)
 #define	 ISS_DATA_SAS_MASK	(0x03 << 22)
 #define	 ISS_DATA_SSE		(0x01 << 21)
 #define	 ISS_DATA_SRT_MASK	(0x1f << 16)
 #define	 ISS_DATA_SF		(0x01 << 15)
 #define	 ISS_DATA_AR		(0x01 << 14)
 #define	 ISS_DATA_FnV		(0x01 << 10)
 #define	 ISS_DATa_EA		(0x01 << 9)
 #define	 ISS_DATa_CM		(0x01 << 8)
 #define	 ISS_INSN_S1PTW		(0x01 << 7)
 #define	 ISS_DATa_WnR		(0x01 << 6)
 #define	 ISS_DATA_DFSC_MASK	(0x1f << 0)
+#define	 ISS_DATA_DFSC_ASF_L0	(0x00 << 0)
+#define	 ISS_DATA_DFSC_ASF_L1	(0x01 << 0)
+#define	 ISS_DATA_DFSC_ASF_L2	(0x02 << 0)
+#define	 ISS_DATA_DFSC_ASF_L3	(0x03 << 0)
+#define	 ISS_DATA_DFSC_TF_L0	(0x04 << 0)
+#define	 ISS_DATA_DFSC_TF_L1	(0x05 << 0)
+#define	 ISS_DATA_DFSC_TF_L2	(0x06 << 0)
+#define	 ISS_DATA_DFSC_TF_L3	(0x07 << 0)
+#define	 ISS_DATA_DFSC_AFF_L1	(0x09 << 0)
+#define	 ISS_DATA_DFSC_AFF_L2	(0x0a << 0)
+#define	 ISS_DATA_DFSC_AFF_L3	(0x0b << 0)
+#define	 ISS_DATA_DFSC_PF_L1	(0x0d << 0)
+#define	 ISS_DATA_DFSC_PF_L2	(0x0e << 0)
+#define	 ISS_DATA_DFSC_PF_L3	(0x0f << 0)
+#define	 ISS_DATA_DFSC_EXT	(0x10 << 0)
+#define	 ISS_DATA_DFSC_EXT_L0	(0x14 << 0)
+#define	 ISS_DATA_DFSC_EXT_L1	(0x15 << 0)
+#define	 ISS_DATA_DFSC_EXT_L2	(0x16 << 0)
+#define	 ISS_DATA_DFSC_EXT_L3	(0x17 << 0)
+#define	 ISS_DATA_DFSC_ECC	(0x18 << 0)
+#define	 ISS_DATA_DFSC_ECC_L0	(0x1c << 0)
+#define	 ISS_DATA_DFSC_ECC_L1	(0x1d << 0)
+#define	 ISS_DATA_DFSC_ECC_L2	(0x1e << 0)
+#define	 ISS_DATA_DFSC_ECC_L3	(0x1f << 0)
+#define	 ISS_DATA_DFSC_ALIGN	(0x21 << 0)
+#define	 ISS_DATA_DFSC_TLB_CONFLICT (0x28 << 0)
 #define	ESR_ELx_IL		(0x01 << 25)
 #define	ESR_ELx_EC_SHIFT	26
 #define	ESR_ELx_EC_MASK		(0x3f << 26)
 #define	ESR_ELx_EXCEPTION(esr)	(((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
 #define	 EXCP_UNKNOWN		0x00	/* Unkwn exception */
 #define	 EXCP_FP_SIMD		0x07	/* VFP/SIMD trap */
 #define	 EXCP_ILL_STATE		0x0e	/* Illegal execution state */
 #define	 EXCP_SVC		0x15	/* SVC trap */
 #define	 EXCP_MSR		0x18	/* MSR/MRS trap */
 #define	 EXCP_INSN_ABORT_L	0x20	/* Instruction abort, from lower EL */
 #define	 EXCP_INSN_ABORT	0x21	/* Instruction abort, from same EL */ 
 #define	 EXCP_PC_ALIGN		0x22	/* PC alignment fault */
 #define	 EXCP_DATA_ABORT_L	0x24	/* Data abort, from lower EL */
 #define	 EXCP_DATA_ABORT	0x25	/* Data abort, from same EL */ 
 #define	 EXCP_SP_ALIGN		0x26	/* SP slignment fault */
 #define	 EXCP_TRAP_FP		0x2c	/* Trapped FP exception */
 #define	 EXCP_SERROR		0x2f	/* SError interrupt */
 #define	 EXCP_SOFTSTP_EL0	0x32	/* Software Step, from lower EL */
 #define	 EXCP_SOFTSTP_EL1	0x33	/* Software Step, from same EL */
 #define	 EXCP_WATCHPT_EL1	0x35	/* Watchpoint, from same EL */
 #define	 EXCP_BRK		0x3c	/* Breakpoint */
 
 /* ICC_CTLR_EL1 */
 #define	ICC_CTLR_EL1_EOIMODE	(1U << 1)
 
 /* ICC_IAR1_EL1 */
 #define	ICC_IAR1_EL1_SPUR	(0x03ff)
 
 /* ICC_IGRPEN0_EL1 */
 #define	ICC_IGRPEN0_EL1_EN	(1U << 0)
 
 /* ICC_PMR_EL1 */
 #define	ICC_PMR_EL1_PRIO_MASK	(0xFFUL)
 
 /* ICC_SGI1R_EL1 */
 #define	ICC_SGI1R_EL1_TL_MASK		0xffffUL
 #define	ICC_SGI1R_EL1_AFF1_SHIFT	16
 #define	ICC_SGI1R_EL1_SGIID_SHIFT	24
 #define	ICC_SGI1R_EL1_AFF2_SHIFT	32
 #define	ICC_SGI1R_EL1_AFF3_SHIFT	48
 #define	ICC_SGI1R_EL1_SGIID_MASK	0xfUL
 #define	ICC_SGI1R_EL1_IRM		(0x1UL << 40)
 
 /* ICC_SRE_EL1 */
 #define	ICC_SRE_EL1_SRE		(1U << 0)
 
 /* ICC_SRE_EL2 */
 #define	ICC_SRE_EL2_SRE		(1U << 0)
 #define	ICC_SRE_EL2_EN		(1U << 3)
 
 /* ID_AA64DFR0_EL1 */
 #define	ID_AA64DFR0_MASK		0xf0f0ffff
 #define	ID_AA64DFR0_DEBUG_VER_SHIFT	0
 #define	ID_AA64DFR0_DEBUG_VER_MASK	(0xf << ID_AA64DFR0_DEBUG_VER_SHIFT)
 #define	ID_AA64DFR0_DEBUG_VER(x)	((x) & ID_AA64DFR0_DEBUG_VER_MASK)
 #define	 ID_AA64DFR0_DEBUG_VER_8	(0x6 << ID_AA64DFR0_DEBUG_VER_SHIFT)
 #define	ID_AA64DFR0_TRACE_VER_SHIFT	4
 #define	ID_AA64DFR0_TRACE_VER_MASK	(0xf << ID_AA64DFR0_TRACE_VER_SHIFT)
 #define	ID_AA64DFR0_TRACE_VER(x)	((x) & ID_AA64DFR0_TRACE_VER_MASK)
 #define	 ID_AA64DFR0_TRACE_VER_NONE	(0x0 << ID_AA64DFR0_TRACE_VER_SHIFT)
 #define	 ID_AA64DFR0_TRACE_VER_IMPL	(0x1 << ID_AA64DFR0_TRACE_VER_SHIFT)
 #define	ID_AA64DFR0_PMU_VER_SHIFT	8
 #define	ID_AA64DFR0_PMU_VER_MASK	(0xf << ID_AA64DFR0_PMU_VER_SHIFT)
 #define	ID_AA64DFR0_PMU_VER(x)		((x) & ID_AA64DFR0_PMU_VER_MASK)
 #define	 ID_AA64DFR0_PMU_VER_NONE	(0x0 << ID_AA64DFR0_PMU_VER_SHIFT)
 #define	 ID_AA64DFR0_PMU_VER_3		(0x1 << ID_AA64DFR0_PMU_VER_SHIFT)
 #define	 ID_AA64DFR0_PMU_VER_IMPL	(0xf << ID_AA64DFR0_PMU_VER_SHIFT)
 #define	ID_AA64DFR0_BRPS_SHIFT		12
 #define	ID_AA64DFR0_BRPS_MASK		(0xf << ID_AA64DFR0_BRPS_SHIFT)
 #define	ID_AA64DFR0_BRPS(x)		\
     ((((x) >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) + 1)
 #define	ID_AA64DFR0_WRPS_SHIFT		20
 #define	ID_AA64DFR0_WRPS_MASK		(0xf << ID_AA64DFR0_WRPS_SHIFT)
 #define	ID_AA64DFR0_WRPS(x)		\
     ((((x) >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) + 1)
 #define	ID_AA64DFR0_CTX_CMPS_SHIFT	28
 #define	ID_AA64DFR0_CTX_CMPS_MASK	(0xf << ID_AA64DFR0_CTX_CMPS_SHIFT)
 #define	ID_AA64DFR0_CTX_CMPS(x)		\
     ((((x) >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) + 1)
 
 /* ID_AA64ISAR0_EL1 */
 #define	ID_AA64ISAR0_MASK		0x000ffff0
 #define	ID_AA64ISAR0_AES_SHIFT		4
 #define	ID_AA64ISAR0_AES_MASK		(0xf << ID_AA64ISAR0_AES_SHIFT)
 #define	ID_AA64ISAR0_AES(x)		((x) & ID_AA64ISAR0_AES_MASK)
 #define	 ID_AA64ISAR0_AES_NONE		(0x0 << ID_AA64ISAR0_AES_SHIFT)
 #define	 ID_AA64ISAR0_AES_BASE		(0x1 << ID_AA64ISAR0_AES_SHIFT)
 #define	 ID_AA64ISAR0_AES_PMULL		(0x2 << ID_AA64ISAR0_AES_SHIFT)
 #define	ID_AA64ISAR0_SHA1_SHIFT		8
 #define	ID_AA64ISAR0_SHA1_MASK		(0xf << ID_AA64ISAR0_SHA1_SHIFT)
 #define	ID_AA64ISAR0_SHA1(x)		((x) & ID_AA64ISAR0_SHA1_MASK)
 #define	 ID_AA64ISAR0_SHA1_NONE		(0x0 << ID_AA64ISAR0_SHA1_SHIFT)
 #define	 ID_AA64ISAR0_SHA1_BASE		(0x1 << ID_AA64ISAR0_SHA1_SHIFT)
 #define	ID_AA64ISAR0_SHA2_SHIFT		12
 #define	ID_AA64ISAR0_SHA2_MASK		(0xf << ID_AA64ISAR0_SHA2_SHIFT)
 #define	ID_AA64ISAR0_SHA2(x)		((x) & ID_AA64ISAR0_SHA2_MASK)
 #define	 ID_AA64ISAR0_SHA2_NONE		(0x0 << ID_AA64ISAR0_SHA2_SHIFT)
 #define	 ID_AA64ISAR0_SHA2_BASE		(0x1 << ID_AA64ISAR0_SHA2_SHIFT)
 #define	ID_AA64ISAR0_CRC32_SHIFT	16
 #define	ID_AA64ISAR0_CRC32_MASK		(0xf << ID_AA64ISAR0_CRC32_SHIFT)
 #define	ID_AA64ISAR0_CRC32(x)		((x) & ID_AA64ISAR0_CRC32_MASK)
 #define	 ID_AA64ISAR0_CRC32_NONE	(0x0 << ID_AA64ISAR0_CRC32_SHIFT)
 #define	 ID_AA64ISAR0_CRC32_BASE	(0x1 << ID_AA64ISAR0_CRC32_SHIFT)
 
 /* ID_AA64MMFR0_EL1 */
 #define	ID_AA64MMFR0_MASK		0xffffffff
 #define	ID_AA64MMFR0_PA_RANGE_SHIFT	0
 #define	ID_AA64MMFR0_PA_RANGE_MASK	(0xf << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	ID_AA64MMFR0_PA_RANGE(x)	((x) & ID_AA64MMFR0_PA_RANGE_MASK)
 #define	 ID_AA64MMFR0_PA_RANGE_4G	(0x0 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	 ID_AA64MMFR0_PA_RANGE_64G	(0x1 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	 ID_AA64MMFR0_PA_RANGE_1T	(0x2 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	 ID_AA64MMFR0_PA_RANGE_4T	(0x3 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	 ID_AA64MMFR0_PA_RANGE_16T	(0x4 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	 ID_AA64MMFR0_PA_RANGE_256T	(0x5 << ID_AA64MMFR0_PA_RANGE_SHIFT)
 #define	ID_AA64MMFR0_ASID_BITS_SHIFT	4
 #define	ID_AA64MMFR0_ASID_BITS_MASK	(0xf << ID_AA64MMFR0_ASID_BITS_SHIFT)
 #define	ID_AA64MMFR0_ASID_BITS(x)	((x) & ID_AA64MMFR0_ASID_BITS_MASK)
 #define	 ID_AA64MMFR0_ASID_BITS_8	(0x0 << ID_AA64MMFR0_ASID_BITS_SHIFT)
 #define	 ID_AA64MMFR0_ASID_BITS_16	(0x2 << ID_AA64MMFR0_ASID_BITS_SHIFT)
 #define	ID_AA64MMFR0_BIGEND_SHIFT	8
 #define	ID_AA64MMFR0_BIGEND_MASK	(0xf << ID_AA64MMFR0_BIGEND_SHIFT)
 #define	ID_AA64MMFR0_BIGEND(x)		((x) & ID_AA64MMFR0_BIGEND_MASK)
 #define	 ID_AA64MMFR0_BIGEND_FIXED	(0x0 << ID_AA64MMFR0_BIGEND_SHIFT)
 #define	 ID_AA64MMFR0_BIGEND_MIXED	(0x1 << ID_AA64MMFR0_BIGEND_SHIFT)
 #define	ID_AA64MMFR0_S_NS_MEM_SHIFT	12
 #define	ID_AA64MMFR0_S_NS_MEM_MASK	(0xf << ID_AA64MMFR0_S_NS_MEM_SHIFT)
 #define	ID_AA64MMFR0_S_NS_MEM(x)	((x) & ID_AA64MMFR0_S_NS_MEM_MASK)
 #define	 ID_AA64MMFR0_S_NS_MEM_NONE	(0x0 << ID_AA64MMFR0_S_NS_MEM_SHIFT)
 #define	 ID_AA64MMFR0_S_NS_MEM_DISTINCT	(0x1 << ID_AA64MMFR0_S_NS_MEM_SHIFT)
 #define	ID_AA64MMFR0_BIGEND_EL0_SHIFT	16
 #define	ID_AA64MMFR0_BIGEND_EL0_MASK	(0xf << ID_AA64MMFR0_BIGEND_EL0_SHIFT)
 #define	ID_AA64MMFR0_BIGEND_EL0(x)	((x) & ID_AA64MMFR0_BIGEND_EL0_MASK)
 #define	 ID_AA64MMFR0_BIGEND_EL0_FIXED	(0x0 << ID_AA64MMFR0_BIGEND_EL0_SHIFT)
 #define	 ID_AA64MMFR0_BIGEND_EL0_MIXED	(0x1 << ID_AA64MMFR0_BIGEND_EL0_SHIFT)
 #define	ID_AA64MMFR0_TGRAN16_SHIFT	20
 #define	ID_AA64MMFR0_TGRAN16_MASK	(0xf << ID_AA64MMFR0_TGRAN16_SHIFT)
 #define	ID_AA64MMFR0_TGRAN16(x)		((x) & ID_AA64MMFR0_TGRAN16_MASK)
 #define	 ID_AA64MMFR0_TGRAN16_NONE	(0x0 << ID_AA64MMFR0_TGRAN16_SHIFT)
 #define	 ID_AA64MMFR0_TGRAN16_IMPL	(0x1 << ID_AA64MMFR0_TGRAN16_SHIFT)
 #define	ID_AA64MMFR0_TGRAN64_SHIFT	24
 #define	ID_AA64MMFR0_TGRAN64_MASK	(0xf << ID_AA64MMFR0_TGRAN64_SHIFT)
 #define	ID_AA64MMFR0_TGRAN64(x)		((x) & ID_AA64MMFR0_TGRAN64_MASK)
 #define	 ID_AA64MMFR0_TGRAN64_IMPL	(0x0 << ID_AA64MMFR0_TGRAN64_SHIFT)
 #define	 ID_AA64MMFR0_TGRAN64_NONE	(0xf << ID_AA64MMFR0_TGRAN64_SHIFT)
 #define	ID_AA64MMFR0_TGRAN4_SHIFT	28
 #define	ID_AA64MMFR0_TGRAN4_MASK	(0xf << ID_AA64MMFR0_TGRAN4_SHIFT)
 #define	ID_AA64MMFR0_TGRAN4(x)		((x) & ID_AA64MMFR0_TGRAN4_MASK)
 #define	 ID_AA64MMFR0_TGRAN4_IMPL	(0x0 << ID_AA64MMFR0_TGRAN4_SHIFT)
 #define	 ID_AA64MMFR0_TGRAN4_NONE	(0xf << ID_AA64MMFR0_TGRAN4_SHIFT)
 
 /* ID_AA64PFR0_EL1 */
 #define	ID_AA64PFR0_MASK		0x0fffffff
 #define	ID_AA64PFR0_EL0_SHIFT		0
 #define	ID_AA64PFR0_EL0_MASK		(0xf << ID_AA64PFR0_EL0_SHIFT)
 #define	ID_AA64PFR0_EL0(x)		((x) & ID_AA64PFR0_EL0_MASK)
 #define	 ID_AA64PFR0_EL0_64		(1 << ID_AA64PFR0_EL0_SHIFT)
 #define	 ID_AA64PFR0_EL0_64_32		(2 << ID_AA64PFR0_EL0_SHIFT)
 #define	ID_AA64PFR0_EL1_SHIFT		4
 #define	ID_AA64PFR0_EL1_MASK		(0xf << ID_AA64PFR0_EL1_SHIFT)
 #define	ID_AA64PFR0_EL1(x)		((x) & ID_AA64PFR0_EL1_MASK)
 #define	 ID_AA64PFR0_EL1_64		(1 << ID_AA64PFR0_EL1_SHIFT)
 #define	 ID_AA64PFR0_EL1_64_32		(2 << ID_AA64PFR0_EL1_SHIFT)
 #define	ID_AA64PFR0_EL2_SHIFT		8
 #define	ID_AA64PFR0_EL2_MASK		(0xf << ID_AA64PFR0_EL2_SHIFT)
 #define	ID_AA64PFR0_EL2(x)		((x) & ID_AA64PFR0_EL2_MASK)
 #define	 ID_AA64PFR0_EL2_NONE		(0 << ID_AA64PFR0_EL2_SHIFT)
 #define	 ID_AA64PFR0_EL2_64		(1 << ID_AA64PFR0_EL2_SHIFT)
 #define	 ID_AA64PFR0_EL2_64_32		(2 << ID_AA64PFR0_EL2_SHIFT)
 #define	ID_AA64PFR0_EL3_SHIFT		12
 #define	ID_AA64PFR0_EL3_MASK		(0xf << ID_AA64PFR0_EL3_SHIFT)
 #define	ID_AA64PFR0_EL3(x)		((x) & ID_AA64PFR0_EL3_MASK)
 #define	 ID_AA64PFR0_EL3_NONE		(0 << ID_AA64PFR0_EL3_SHIFT)
 #define	 ID_AA64PFR0_EL3_64		(1 << ID_AA64PFR0_EL3_SHIFT)
 #define	 ID_AA64PFR0_EL3_64_32		(2 << ID_AA64PFR0_EL3_SHIFT)
 #define	ID_AA64PFR0_FP_SHIFT		16
 #define	ID_AA64PFR0_FP_MASK		(0xf << ID_AA64PFR0_FP_SHIFT)
 #define	ID_AA64PFR0_FP(x)		((x) & ID_AA64PFR0_FP_MASK)
 #define	 ID_AA64PFR0_FP_IMPL		(0x0 << ID_AA64PFR0_FP_SHIFT)
 #define	 ID_AA64PFR0_FP_NONE		(0xf << ID_AA64PFR0_FP_SHIFT)
 #define	ID_AA64PFR0_ADV_SIMD_SHIFT	20
 #define	ID_AA64PFR0_ADV_SIMD_MASK	(0xf << ID_AA64PFR0_ADV_SIMD_SHIFT)
 #define	ID_AA64PFR0_ADV_SIMD(x)		((x) & ID_AA64PFR0_ADV_SIMD_MASK)
 #define	 ID_AA64PFR0_ADV_SIMD_IMPL	(0x0 << ID_AA64PFR0_ADV_SIMD_SHIFT)
 #define	 ID_AA64PFR0_ADV_SIMD_NONE	(0xf << ID_AA64PFR0_ADV_SIMD_SHIFT)
 #define	ID_AA64PFR0_GIC_BITS		0x4 /* Number of bits in GIC field */
 #define	ID_AA64PFR0_GIC_SHIFT		24
 #define	ID_AA64PFR0_GIC_MASK		(0xf << ID_AA64PFR0_GIC_SHIFT)
 #define	ID_AA64PFR0_GIC(x)		((x) & ID_AA64PFR0_GIC_MASK)
 #define	 ID_AA64PFR0_GIC_CPUIF_NONE	(0x0 << ID_AA64PFR0_GIC_SHIFT)
 #define	 ID_AA64PFR0_GIC_CPUIF_EN	(0x1 << ID_AA64PFR0_GIC_SHIFT)
 
 /* MAIR_EL1 - Memory Attribute Indirection Register */
 #define	MAIR_ATTR_MASK(idx)	(0xff << ((n)* 8))
 #define	MAIR_ATTR(attr, idx) ((attr) << ((idx) * 8))
+
+/* PAR_EL1 - Physical Address Register */
+#define	PAR_F_SHIFT		0
+#define	PAR_F			(0x1 << PAR_F_SHIFT)
+#define	PAR_SUCCESS(x)		(((x) & PAR_F) == 0)
+/* When PAR_F == 0 (success) */
+#define	PAR_SH_SHIFT		7
+#define	PAR_SH_MASK		(0x3 << PAR_SH_SHIFT)
+#define	PAR_NS_SHIFT		9
+#define	PAR_NS_MASK		(0x3 << PAR_NS_SHIFT)
+#define	PAR_PA_SHIFT		12
+#define	PAR_PA_MASK		0x0000fffffffff000
+#define	PAR_ATTR_SHIFT		56
+#define	PAR_ATTR_MASK		(0xff << PAR_ATTR_SHIFT)
+/* When PAR_F == 1 (aborted) */
+#define	PAR_FST_SHIFT		1
+#define	PAR_FST_MASK		(0x3f << PAR_FST_SHIFT)
+#define	PAR_PTW_SHIFT		8
+#define	PAR_PTW_MASK		(0x1 << PAR_PTW_SHIFT)
+#define	PAR_S_SHIFT		9
+#define	PAR_S_MASK		(0x1 << PAR_S_SHIFT)
 
 /* SCTLR_EL1 - System Control Register */
 #define	SCTLR_RES0	0xc8222400	/* Reserved, write 0 */
 #define	SCTLR_RES1	0x30d00800	/* Reserved, write 1 */
 
 #define	SCTLR_M		0x00000001
 #define	SCTLR_A		0x00000002
 #define	SCTLR_C		0x00000004
 #define	SCTLR_SA	0x00000008
 #define	SCTLR_SA0	0x00000010
 #define	SCTLR_CP15BEN	0x00000020
 #define	SCTLR_THEE	0x00000040
 #define	SCTLR_ITD	0x00000080
 #define	SCTLR_SED	0x00000100
 #define	SCTLR_UMA	0x00000200
 #define	SCTLR_I		0x00001000
 #define	SCTLR_DZE	0x00004000
 #define	SCTLR_UCT	0x00008000
 #define	SCTLR_nTWI	0x00010000
 #define	SCTLR_nTWE	0x00040000
 #define	SCTLR_WXN	0x00080000
 #define	SCTLR_EOE	0x01000000
 #define	SCTLR_EE	0x02000000
 #define	SCTLR_UCI	0x04000000
 
 /* SPSR_EL1 */
 /*
  * When the exception is taken in AArch64:
  * M[4]   is 0 for AArch64 mode
  * M[3:2] is the exception level
  * M[1]   is unused
  * M[0]   is the SP select:
  *         0: always SP0
  *         1: current ELs SP
  */
 #define	PSR_M_EL0t	0x00000000
 #define	PSR_M_EL1t	0x00000004
 #define	PSR_M_EL1h	0x00000005
 #define	PSR_M_EL2t	0x00000008
 #define	PSR_M_EL2h	0x00000009
 #define	PSR_M_MASK	0x0000001f
 
 #define	PSR_F		0x00000040
 #define	PSR_I		0x00000080
 #define	PSR_A		0x00000100
 #define	PSR_D		0x00000200
 #define	PSR_IL		0x00100000
 #define	PSR_SS		0x00200000
 #define	PSR_V		0x10000000
 #define	PSR_C		0x20000000
 #define	PSR_Z		0x40000000
 #define	PSR_N		0x80000000
 
 /* TCR_EL1 - Translation Control Register */
 #define	TCR_ASID_16	(1 << 36)
 
 #define	TCR_IPS_SHIFT	32
 #define	TCR_IPS_32BIT	(0 << TCR_IPS_SHIFT)
 #define	TCR_IPS_36BIT	(1 << TCR_IPS_SHIFT)
 #define	TCR_IPS_40BIT	(2 << TCR_IPS_SHIFT)
 #define	TCR_IPS_42BIT	(3 << TCR_IPS_SHIFT)
 #define	TCR_IPS_44BIT	(4 << TCR_IPS_SHIFT)
 #define	TCR_IPS_48BIT	(5 << TCR_IPS_SHIFT)
 
 #define	TCR_TG1_SHIFT	30
 #define	TCR_TG1_16K	(1 << TCR_TG1_SHIFT)
 #define	TCR_TG1_4K	(2 << TCR_TG1_SHIFT)
 #define	TCR_TG1_64K	(3 << TCR_TG1_SHIFT)
 
 #define	TCR_SH1_SHIFT	28
 #define	TCR_SH1_IS	(0x3UL << TCR_SH1_SHIFT)
 #define	TCR_ORGN1_SHIFT	26
 #define	TCR_ORGN1_WBWA	(0x1UL << TCR_ORGN1_SHIFT)
 #define	TCR_IRGN1_SHIFT	24
 #define	TCR_IRGN1_WBWA	(0x1UL << TCR_IRGN1_SHIFT)
 #define	TCR_SH0_SHIFT	12
 #define	TCR_SH0_IS	(0x3UL << TCR_SH0_SHIFT)
 #define	TCR_ORGN0_SHIFT	10
 #define	TCR_ORGN0_WBWA	(0x1UL << TCR_ORGN0_SHIFT)
 #define	TCR_IRGN0_SHIFT	8
 #define	TCR_IRGN0_WBWA	(0x1UL << TCR_IRGN0_SHIFT)
 
 #define	TCR_CACHE_ATTRS	((TCR_IRGN0_WBWA | TCR_IRGN1_WBWA) |\
 				(TCR_ORGN0_WBWA | TCR_ORGN1_WBWA))
 
 #ifdef SMP
 #define	TCR_SMP_ATTRS	(TCR_SH0_IS | TCR_SH1_IS)
 #else
 #define	TCR_SMP_ATTRS	0
 #endif
 
 #define	TCR_T1SZ_SHIFT	16
 #define	TCR_T0SZ_SHIFT	0
 #define	TCR_T1SZ(x)	((x) << TCR_T1SZ_SHIFT)
 #define	TCR_T0SZ(x)	((x) << TCR_T0SZ_SHIFT)
 #define	TCR_TxSZ(x)	(TCR_T1SZ(x) | TCR_T0SZ(x))
 
 /* Saved Program Status Register */
 #define	DBG_SPSR_SS	(0x1 << 21)
 
 /* Monitor Debug System Control Register */
 #define	DBG_MDSCR_SS	(0x1 << 0)
 #define	DBG_MDSCR_KDE	(0x1 << 13)
 #define	DBG_MDSCR_MDE	(0x1 << 15)
 
 /* Perfomance Monitoring Counters */
 #define	PMCR_E		(1 << 0) /* Enable all counters */
 #define	PMCR_P		(1 << 1) /* Reset all counters */
 #define	PMCR_C		(1 << 2) /* Clock counter reset */
 #define	PMCR_D		(1 << 3) /* CNTR counts every 64 clk cycles */
 #define	PMCR_X		(1 << 4) /* Export to ext. monitoring (ETM) */
 #define	PMCR_DP		(1 << 5) /* Disable CCNT if non-invasive debug*/
 #define	PMCR_LC		(1 << 6) /* Long cycle count enable */
 #define	PMCR_IMP_SHIFT	24 /* Implementer code */
 #define	PMCR_IMP_MASK	(0xff << PMCR_IMP_SHIFT)
 #define	PMCR_IDCODE_SHIFT	16 /* Identification code */
 #define	PMCR_IDCODE_MASK	(0xff << PMCR_IDCODE_SHIFT)
 #define	 PMCR_IDCODE_CORTEX_A57	0x01
 #define	 PMCR_IDCODE_CORTEX_A72	0x02
 #define	 PMCR_IDCODE_CORTEX_A53	0x03
 #define	PMCR_N_SHIFT	11       /* Number of counters implemented */
 #define	PMCR_N_MASK	(0x1f << PMCR_N_SHIFT)
 
 #endif /* !_MACHINE_ARMREG_H_ */
Index: user/alc/PQ_LAUNDRY/sys/arm64/include/machdep.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/arm64/include/machdep.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/arm64/include/machdep.h	(revision 303642)
@@ -1,46 +1,54 @@
 /*-
  * Copyright (c) 2013 Andrew Turner <andrew@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_MACHDEP_H_
 #define	_MACHINE_MACHDEP_H_
 
 struct arm64_bootparams {
 	vm_offset_t	modulep;
 	vm_offset_t	kern_l1pt;	/* L1 page table for the kernel */
 	uint64_t	kern_delta;
 	vm_offset_t	kern_stack;
 	vm_offset_t	kern_l0pt;	/* L1 page table for the kernel */
 };
 
+enum arm64_bus {
+	ARM64_BUS_NONE,
+	ARM64_BUS_FDT,
+	ARM64_BUS_ACPI,
+};
+
+extern enum arm64_bus arm64_bus_method;
+
 extern vm_paddr_t physmap[];
 extern u_int physmap_idx;
 
 void initarm(struct arm64_bootparams *);
 extern void (*pagezero)(void *);
 
 #endif /* _MACHINE_MACHDEP_H_ */
Index: user/alc/PQ_LAUNDRY/sys/boot/common/bcache.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/boot/common/bcache.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/boot/common/bcache.c	(revision 303642)
@@ -1,468 +1,470 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright 2015 Toomas Soome <tsoome@me.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Simple hashed block cache
  */
 
 #include <sys/stdint.h>
 
 #include <stand.h>
 #include <string.h>
 #include <strings.h>
 
 #include "bootstrap.h"
 
 /* #define BCACHE_DEBUG */
 
 #ifdef BCACHE_DEBUG
 # define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
 #else
 # define DEBUG(fmt, args...)
 #endif
 
 struct bcachectl
 {
     daddr_t	bc_blkno;
     int		bc_count;
 };
 
 /*
  * bcache per device node. cache is allocated on device first open and freed
  * on last close, to save memory. The issue there is the size; biosdisk
  * supports up to 31 (0x1f) devices. Classic setup would use single disk
  * to boot from, but this has changed with zfs.
  */
 struct bcache {
     struct bcachectl	*bcache_ctl;
     caddr_t		bcache_data;
     u_int		bcache_nblks;
     size_t		ra;
 };
 
 static u_int bcache_total_nblks;	/* set by bcache_init */
 static u_int bcache_blksize;		/* set by bcache_init */
 static u_int bcache_numdev;		/* set by bcache_add_dev */
 /* statistics */
 static u_int bcache_units;	/* number of devices with cache */
 static u_int bcache_unit_nblks;	/* nblocks per unit */
 static u_int bcache_hits;
 static u_int bcache_misses;
 static u_int bcache_ops;
 static u_int bcache_bypasses;
 static u_int bcache_bcount;
 static u_int bcache_rablks;
 
 #define	BHASH(bc, blkno)	((blkno) & ((bc)->bcache_nblks - 1))
 #define	BCACHE_LOOKUP(bc, blkno)	\
 	((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
 #define	BCACHE_READAHEAD	256
 #define	BCACHE_MINREADAHEAD	32
 
 static void	bcache_invalidate(struct bcache *bc, daddr_t blkno);
 static void	bcache_insert(struct bcache *bc, daddr_t blkno);
 static void	bcache_free_instance(struct bcache *bc);
 
 /*
  * Initialise the cache for (nblks) of (bsize).
  */
 void
 bcache_init(u_int nblks, size_t bsize)
 {
     /* set up control data */
     bcache_total_nblks = nblks;
     bcache_blksize = bsize;
 }
 
 /*
  * add number of devices to bcache. we have to divide cache space
  * between the devices, so bcache_add_dev() can be used to set up the
  * number. The issue is, we need to get the number before actual allocations.
  * bcache_add_dev() is supposed to be called from device init() call, so the
  * assumption is, devsw dv_init is called for plain devices first, and
  * for zfs, last.
  */
 void
 bcache_add_dev(int devices)
 {
     bcache_numdev += devices;
 }
 
 void *
 bcache_allocate(void)
 {
     u_int i;
     struct bcache *bc = malloc(sizeof (struct bcache));
     int disks = bcache_numdev;
 
     if (disks == 0)
 	disks = 1;	/* safe guard */
 
     if (bc == NULL) {
 	errno = ENOMEM;
 	return (bc);
     }
 
     /*
      * the bcache block count must be power of 2 for hash function
      */
     i = fls(disks) - 1;		/* highbit - 1 */
     if (disks > (1 << i))	/* next power of 2 */
 	i++;
 
     bc->bcache_nblks = bcache_total_nblks >> i;
     bcache_unit_nblks = bc->bcache_nblks;
     bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
     if (bc->bcache_data == NULL) {
 	/* dont error out yet. fall back to 32 blocks and try again */
 	bc->bcache_nblks = 32;
 	bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
     }
 
     bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl));
 
     if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
 	bcache_free_instance(bc);
 	errno = ENOMEM;
 	return(NULL);
     }
 
     /* Flush the cache */
     for (i = 0; i < bc->bcache_nblks; i++) {
 	bc->bcache_ctl[i].bc_count = -1;
 	bc->bcache_ctl[i].bc_blkno = -1;
     }
     bcache_units++;
     bc->ra = BCACHE_READAHEAD;	/* optimistic read ahead */
     return (bc);
 }
 
 void
 bcache_free(void *cache)
 {
     struct bcache *bc = cache;
 
     if (bc == NULL)
 	return;
 
     bcache_free_instance(bc);
     bcache_units--;
 }
 
 /*
  * Handle a write request; write directly to the disk, and populate the
  * cache with the new values.
  */
 static int
 write_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
     size_t size, char *buf, size_t *rsize)
 {
     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
     struct bcache		*bc = dd->dv_cache;
     daddr_t			i, nblk;
 
     nblk = size / bcache_blksize;
 
     /* Invalidate the blocks being written */
     for (i = 0; i < nblk; i++) {
 	bcache_invalidate(bc, blk + i);
     }
 
     /* Write the blocks */
     return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, rsize));
 }
 
 /*
  * Handle a read request; fill in parts of the request that can
  * be satisfied by the cache, use the supplied strategy routine to do
  * device I/O and then use the I/O results to populate the cache. 
  */
 static int
 read_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
     size_t size, char *buf, size_t *rsize)
 {
     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
     struct bcache		*bc = dd->dv_cache;
     size_t			i, nblk, p_size, r_size, complete, ra;
     int				result;
     daddr_t			p_blk;
     caddr_t			p_buf;
 
     if (bc == NULL) {
 	errno = ENODEV;
 	return (-1);
     }
 
     if (rsize != NULL)
 	*rsize = 0;
 
     nblk = size / bcache_blksize;
     if ((nblk == 0 && size != 0) || offset != 0)
 	nblk++;
     result = 0;
     complete = 1;
 
     /* Satisfy any cache hits up front, break on first miss */
     for (i = 0; i < nblk; i++) {
 	if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) {
 	    bcache_misses += (nblk - i);
 	    complete = 0;
 	    if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > BCACHE_MINREADAHEAD)
 		bc->ra >>= 1;	/* reduce read ahead */
 	    break;
 	} else {
 	    bcache_hits++;
 	}
     }
 
    if (complete) {	/* whole set was in cache, return it */
 	if (bc->ra < BCACHE_READAHEAD)
 		bc->ra <<= 1;	/* increase read ahead */
 	bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset,
 	    buf, size);
 	goto done;
    }
 
     /*
      * Fill in any misses. From check we have i pointing to first missing
      * block, read in all remaining blocks + readahead.
      * We have space at least for nblk - i before bcache wraps.
      */
     p_blk = blk + i;
     p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk));
     r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */
 
     p_size = MIN(r_size, nblk - i);	/* read at least those blocks */
 
     ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
     if (ra != bc->bcache_nblks) { /* do we have RA space? */
 	ra = MIN(bc->ra, ra);
 	p_size += ra;
     }
 
     /* invalidate bcache */
     for (i = 0; i < p_size; i++) {
 	bcache_invalidate(bc, p_blk + i);
     }
 
     r_size = 0;
     /*
      * with read-ahead, it may happen we are attempting to read past
      * disk end, as bcache has no information about disk size.
      * in such case we should get partial read if some blocks can be
      * read or error, if no blocks can be read.
      * in either case we should return the data in bcache and only
      * return error if there is no data.
      */
     result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, 0,
 	p_size * bcache_blksize, p_buf, &r_size);
 
     r_size /= bcache_blksize;
     for (i = 0; i < r_size; i++)
 	bcache_insert(bc, p_blk + i);
 
     /* update ra statistics */
     if (r_size != 0) {
 	if (r_size < p_size)
 	    bcache_rablks += (p_size - r_size);
 	else
 	    bcache_rablks += ra;
     }
 
     /* check how much data can we copy */
     for (i = 0; i < nblk; i++) {
 	if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i)))
 	    break;
     }
 
-    size = i * bcache_blksize;
+    if (size > i * bcache_blksize)
+	size = i * bcache_blksize;
+
     if (size != 0) {
 	bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset,
 	    buf, size);
 	result = 0;
     }
 
  done:
     if ((result == 0) && (rsize != NULL))
 	*rsize = size;
     return(result);
 }
 
 /* 
  * Requests larger than 1/2 cache size will be bypassed and go
  * directly to the disk.  XXX tune this.
  */
 int
 bcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
     size_t size, char *buf, size_t *rsize)
 {
     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
     struct bcache		*bc = dd->dv_cache;
     u_int bcache_nblks = 0;
     int nblk, cblk, ret;
     size_t csize, isize, total;
 
     bcache_ops++;
 
     if (bc != NULL)
 	bcache_nblks = bc->bcache_nblks;
 
     /* bypass large requests, or when the cache is inactive */
     if (bc == NULL ||
 	(offset == 0 && ((size * 2 / bcache_blksize) > bcache_nblks))) {
 	DEBUG("bypass %d from %d", size / bcache_blksize, blk);
 	bcache_bypasses++;
 	return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf,
 	    rsize));
     }
 
     /* normalize offset */
     while (offset >= bcache_blksize) {
 	blk++;
 	offset -= bcache_blksize;
     }
 
     switch (rw) {
     case F_READ:
 	nblk = size / bcache_blksize;
 	if (offset || (size != 0 && nblk == 0))
 	    nblk++;	/* read at least one block */
 
 	ret = 0;
 	total = 0;
 	while(size) {
 	    cblk = bcache_nblks - BHASH(bc, blk); /* # of blocks left */
 	    cblk = MIN(cblk, nblk);
 
 	    if (size <= bcache_blksize)
 		csize = size;
 	    else {
 		csize = cblk * bcache_blksize;
 		if (offset)
 		    csize -= (bcache_blksize - offset);
 	    }
 
 	    ret = read_strategy(devdata, rw, blk, offset,
 		csize, buf+total, &isize);
 
 	    /*
 	     * we may have error from read ahead, if we have read some data
 	     * return partial read.
 	     */
 	    if (ret != 0 || isize == 0) {
 		if (total != 0)
 		    ret = 0;
 		break;
 	    }
 	    blk += (offset+isize) / bcache_blksize;
 	    offset = 0;
 	    total += isize;
 	    size -= isize;
 	    nblk = size / bcache_blksize;
 	}
 
 	if (rsize)
 	    *rsize = total;
 
 	return (ret);
     case F_WRITE:
 	return write_strategy(devdata, rw, blk, offset, size, buf, rsize);
     }
     return -1;
 }
 
 /*
  * Free allocated bcache instance
  */
 static void
 bcache_free_instance(struct bcache *bc)
 {
     if (bc != NULL) {
 	if (bc->bcache_ctl)
 	    free(bc->bcache_ctl);
 	if (bc->bcache_data)
 	    free(bc->bcache_data);
 	free(bc);
     }
 }
 
 /*
  * Insert a block into the cache.
  */
 static void
 bcache_insert(struct bcache *bc, daddr_t blkno)
 {
     u_int	cand;
     
     cand = BHASH(bc, blkno);
 
     DEBUG("insert blk %llu -> %u # %d", blkno, cand, bcache_bcount);
     bc->bcache_ctl[cand].bc_blkno = blkno;
     bc->bcache_ctl[cand].bc_count = bcache_bcount++;
 }
 
 /*
  * Invalidate a block from the cache.
  */
 static void
 bcache_invalidate(struct bcache *bc, daddr_t blkno)
 {
     u_int	i;
     
     i = BHASH(bc, blkno);
     if (bc->bcache_ctl[i].bc_blkno == blkno) {
 	bc->bcache_ctl[i].bc_count = -1;
 	bc->bcache_ctl[i].bc_blkno = -1;
 	DEBUG("invalidate blk %llu", blkno);
     }
 }
 
 #ifndef BOOT2
 COMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats", command_bcache);
 
 static int
 command_bcache(int argc, char *argv[])
 {
     if (argc != 1) {
 	command_errmsg = "wrong number of arguments";
 	return(CMD_ERROR);
     }
 
     printf("\ncache blocks: %d\n", bcache_total_nblks);
     printf("cache blocksz: %d\n", bcache_blksize);
     printf("cache readahead: %d\n", bcache_rablks);
     printf("unit cache blocks: %d\n", bcache_unit_nblks);
     printf("cached units: %d\n", bcache_units);
     printf("%d ops  %d bypasses  %d hits  %d misses\n", bcache_ops,
 	bcache_bypasses, bcache_hits, bcache_misses);
     return(CMD_OK);
 }
 #endif
Index: user/alc/PQ_LAUNDRY/sys/boot/common/interp_parse.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/boot/common/interp_parse.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/boot/common/interp_parse.c	(revision 303642)
@@ -1,204 +1,222 @@
 /*-
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * Jordan K. Hubbard
  * 29 August 1998
  *
  * The meat of the simple parser.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <string.h>
 #include "bootstrap.h"
 
 static void	 clean(void);
 static int	 insert(int *argcp, char *buf);
 static char	*variable_lookup(char *name);
 
 #define PARSE_BUFSIZE	1024	/* maximum size of one element */
 #define MAXARGS		20	/* maximum number of elements */
 static char		*args[MAXARGS];
 
 /*
  * parse: accept a string of input and "parse" it for backslash
  * substitutions and environment variable expansions (${var}),
  * returning an argc/argv style vector of whitespace separated
  * arguments.  Returns 0 on success, 1 on failure (ok, ok, so I
  * wimped-out on the error codes! :).
  *
  * Note that the argv array returned must be freed by the caller, but
  * we own the space allocated for arguments and will free that on next
  * invocation.  This allows argv consumers to modify the array if
  * required.
  *
  * NB: environment variables that expand to more than one whitespace
  * separated token will be returned as a single argv[] element, not
  * split in turn.  Expanded text is also immune to further backslash
  * elimination or expansion since this is a one-pass, non-recursive
  * parser.  You didn't specify more than this so if you want more, ask
  * me. - jkh
  */
 
 #define PARSE_FAIL(expr) \
 if (expr) { \
     printf("fail at line %d\n", __LINE__); \
     clean(); \
     free(copy); \
     free(buf); \
     return 1; \
 }
 
 /* Accept the usual delimiters for a variable, returning counterpart */
 static char
 isdelim(int ch)
 {
     if (ch == '{')
 	return '}';
     else if (ch == '(')
 	return ')';
     return '\0';
 }
 
 static int
 isquote(int ch)
 {
-    return (ch == '\'' || ch == '"');
+    return (ch == '\'');
 }
 
+static int
+isdquote(int ch)
+{
+    return (ch == '"');
+}
+
 int
 parse(int *argc, char ***argv, char *str)
 {
     int ac;
     char *val, *p, *q, *copy = NULL;
     size_t i = 0;
-    char token, tmp, quote, *buf;
+    char token, tmp, quote, dquote, *buf;
     enum { STR, VAR, WHITE } state;
 
     ac = *argc = 0;
-    quote = 0;
+    dquote = quote = 0;
     if (!str || (p = copy = backslash(str)) == NULL)
 	return 1;
 
     /* Initialize vector and state */
     clean();
     state = STR;
     buf = (char *)malloc(PARSE_BUFSIZE);
     token = 0;
 
     /* And awaaaaaaaaay we go! */
     while (*p) {
 	switch (state) {
 	case STR:
 	    if ((*p == '\\') && p[1]) {
 		p++;
 		PARSE_FAIL(i == (PARSE_BUFSIZE - 1));
 		buf[i++] = *p++;
 	    } else if (isquote(*p)) {
 		quote = quote ? 0 : *p;
-		++p;
-	    }
-	    else if (isspace(*p) && !quote) {
+		if (dquote) { /* keep quote */
+			PARSE_FAIL(i == (PARSE_BUFSIZE - 1));
+			buf[i++] = *p++;
+		} else
+			++p;
+	    } else if (isdquote(*p)) {
+		dquote = dquote ? 0 : *p;
+		if (quote) { /* keep dquote */
+			PARSE_FAIL(i == (PARSE_BUFSIZE - 1));
+			buf[i++] = *p++;
+		} else
+			++p;
+	    } else if (isspace(*p) && !quote && !dquote) {
 		state = WHITE;
 		if (i) {
 		    buf[i] = '\0';
 		    PARSE_FAIL(insert(&ac, buf));
 		    i = 0;
 		}
 		++p;
-	    } else if (*p == '$') {
+	    } else if (*p == '$' && !quote) {
 		token = isdelim(*(p + 1));
 		if (token)
 		    p += 2;
 		else
 		    ++p;
 		state = VAR;
 	    } else {
 		PARSE_FAIL(i == (PARSE_BUFSIZE - 1));
 		buf[i++] = *p++;
 	    }
 	    break;
 
 	case WHITE:
 	    if (isspace(*p))
 		++p;
 	    else
 		state = STR;
 	    break;
 
 	case VAR:
 	    if (token) {
 		PARSE_FAIL((q = strchr(p, token)) == NULL);
 	    } else {
 		q = p;
 		while (*q && !isspace(*q))
 		    ++q;
 	    }
 	    tmp = *q;
 	    *q = '\0';
 	    if ((val = variable_lookup(p)) != NULL) {
 		size_t len = strlen(val);
 
 		strncpy(buf + i, val, PARSE_BUFSIZE - (i + 1));
 		i += min(len, PARSE_BUFSIZE - 1);
 	    }
 	    *q = tmp;	/* restore value */
 	    p = q + (token ? 1 : 0);
 	    state = STR;
 	    break;
 	}
     }
+    /* missing terminating ' or " */
+    PARSE_FAIL(quote || dquote);
     /* If at end of token, add it */
     if (i && state == STR) {
 	buf[i] = '\0';
 	PARSE_FAIL(insert(&ac, buf));
     }
     args[ac] = NULL;
     *argc = ac;
     *argv = (char **)malloc((sizeof(char *) * ac + 1));
     bcopy(args, *argv, sizeof(char *) * ac + 1);
     free(buf);
     free(copy);
     return 0;
 }
 
 #define MAXARGS	20
 
 /* Clean vector space */
 static void
 clean(void)
 {
     int		i;
 
     for (i = 0; i < MAXARGS; i++) {
 	if (args[i] != NULL) {
 	    free(args[i]);
 	    args[i] = NULL;
 	}
     }
 }
 
 static int
 insert(int *argcp, char *buf)
 {
     if (*argcp >= MAXARGS)
 	return 1;
     args[(*argcp)++] = strdup(buf);
     return 0;
 }
 
 static char *
 variable_lookup(char *name)
 {
     /* XXX search "special variable" space first? */
     return (char *)getenv(name);
 }
Index: user/alc/PQ_LAUNDRY/sys/boot/zfs/libzfs.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/boot/zfs/libzfs.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/boot/zfs/libzfs.h	(revision 303642)
@@ -1,74 +1,74 @@
 /*-
  * Copyright (c) 2012 Andriy Gapon <avg@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _BOOT_LIBZFS_H_
 #define _BOOT_LIBZFS_H_
 
 #define	ZFS_MAXNAMELEN	256
 
 /*
  * ZFS fully-qualified device descriptor.
  * Note, this must match the 'struct devdesc' declaration in bootstrap.h.
  * Arch-specific device descriptors should be binary compatible with this
  * structure if they are to support ZFS.
  */
 struct zfs_devdesc
 {
     struct devsw	*d_dev;
     int			d_type;
     int			d_unit;
     void		*d_opendata;
     uint64_t		pool_guid;
     uint64_t		root_guid;
 };
 
 struct zfs_boot_args
 {
     uint32_t		size;
     uint32_t		reserved;
     uint64_t		pool;
     uint64_t		root;
     uint64_t		primary_pool;
     uint64_t		primary_vdev;
     char		gelipw[256];
 };
 
 int	zfs_parsedev(struct zfs_devdesc *dev, const char *devspec,
 		     const char **path);
 char	*zfs_fmtdev(void *vdev);
 int	zfs_probe_dev(const char *devname, uint64_t *pool_guid);
 int	zfs_list(const char *name);
 void	init_zfs_bootenv(char *currdev);
 int	zfs_bootenv(const char *name);
-int	zfs_belist_add(const char *name);
+int	zfs_belist_add(const char *name, uint64_t __unused);
 int	zfs_set_env(void);
 
 extern struct devsw zfs_dev;
 extern struct fs_ops zfs_fsops;
 
 #endif /*_BOOT_LIBZFS_H_*/
Index: user/alc/PQ_LAUNDRY/sys/boot/zfs/zfs.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/boot/zfs/zfs.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/boot/zfs/zfs.c	(revision 303642)
@@ -1,897 +1,897 @@
 /*-
  * Copyright (c) 2007 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Stand-alone file reading package.
  */
 
 #include <sys/disk.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/queue.h>
 #include <part.h>
 #include <stddef.h>
 #include <stdarg.h>
 #include <string.h>
 #include <stand.h>
 #include <bootstrap.h>
 
 #include "libzfs.h"
 
 #include "zfsimpl.c"
 
 /* Define the range of indexes to be populated with ZFS Boot Environments */
 #define		ZFS_BE_FIRST	4
 #define		ZFS_BE_LAST	8
 
 static int	zfs_open(const char *path, struct open_file *f);
 static int	zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
 static int	zfs_close(struct open_file *f);
 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
 static int	zfs_stat(struct open_file *f, struct stat *sb);
 static int	zfs_readdir(struct open_file *f, struct dirent *d);
 
 struct devsw zfs_dev;
 
 struct fs_ops zfs_fsops = {
 	"zfs",
 	zfs_open,
 	zfs_close,
 	zfs_read,
 	zfs_write,
 	zfs_seek,
 	zfs_stat,
 	zfs_readdir
 };
 
 /*
  * In-core open file.
  */
 struct file {
 	off_t		f_seekp;	/* seek pointer */
 	dnode_phys_t	f_dnode;
 	uint64_t	f_zap_type;	/* zap type for readdir */
 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
 };
 
 static int	zfs_env_index;
 static int	zfs_env_count;
 
 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
 struct zfs_be_list *zfs_be_headp;
 struct zfs_be_entry {
 	const char *name;
 	SLIST_ENTRY(zfs_be_entry) entries;
 } *zfs_be, *zfs_be_tmp;
 
 /*
  * Open a file.
  */
 static int
 zfs_open(const char *upath, struct open_file *f)
 {
 	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
 	struct file *fp;
 	int rc;
 
 	if (f->f_dev != &zfs_dev)
 		return (EINVAL);
 
 	/* allocate file system specific data structure */
 	fp = malloc(sizeof(struct file));
 	bzero(fp, sizeof(struct file));
 	f->f_fsdata = (void *)fp;
 
 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
 	fp->f_seekp = 0;
 	if (rc) {
 		f->f_fsdata = NULL;
 		free(fp);
 	}
 	return (rc);
 }
 
 static int
 zfs_close(struct open_file *f)
 {
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	dnode_cache_obj = 0;
 	f->f_fsdata = (void *)0;
 	if (fp == (struct file *)0)
 		return (0);
 
 	free(fp);
 	return (0);
 }
 
 /*
  * Copy a portion of a file into kernel memory.
  * Cross block boundaries when necessary.
  */
 static int
 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 	struct stat sb;
 	size_t n;
 	int rc;
 
 	rc = zfs_stat(f, &sb);
 	if (rc)
 		return (rc);
 	n = size;
 	if (fp->f_seekp + n > sb.st_size)
 		n = sb.st_size - fp->f_seekp;
 
 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
 	if (rc)
 		return (rc);
 
 	if (0) {
 	    int i;
 	    for (i = 0; i < n; i++)
 		putchar(((char*) start)[i]);
 	}
 	fp->f_seekp += n;
 	if (resid)
 		*resid = size - n;
 
 	return (0);
 }
 
 /*
  * Don't be silly - the bootstrap has no business writing anything.
  */
 static int
 zfs_write(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
 {
 
 	return (EROFS);
 }
 
 static off_t
 zfs_seek(struct open_file *f, off_t offset, int where)
 {
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	switch (where) {
 	case SEEK_SET:
 		fp->f_seekp = offset;
 		break;
 	case SEEK_CUR:
 		fp->f_seekp += offset;
 		break;
 	case SEEK_END:
 	    {
 		struct stat sb;
 		int error;
 
 		error = zfs_stat(f, &sb);
 		if (error != 0) {
 			errno = error;
 			return (-1);
 		}
 		fp->f_seekp = sb.st_size - offset;
 		break;
 	    }
 	default:
 		errno = EINVAL;
 		return (-1);
 	}
 	return (fp->f_seekp);
 }
 
 static int
 zfs_stat(struct open_file *f, struct stat *sb)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
 }
 
 static int
 zfs_readdir(struct open_file *f, struct dirent *d)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 	mzap_ent_phys_t mze;
 	struct stat sb;
 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	int rc;
 
 	rc = zfs_stat(f, &sb);
 	if (rc)
 		return (rc);
 	if (!S_ISDIR(sb.st_mode))
 		return (ENOTDIR);
 
 	/*
 	 * If this is the first read, get the zap type.
 	 */
 	if (fp->f_seekp == 0) {
 		rc = dnode_read(spa, &fp->f_dnode,
 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
 		if (rc)
 			return (rc);
 
 		if (fp->f_zap_type == ZBT_MICRO) {
 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
 		} else {
 			rc = dnode_read(spa, &fp->f_dnode,
 					offsetof(zap_phys_t, zap_num_leafs),
 					&fp->f_num_leafs,
 					sizeof(fp->f_num_leafs));
 			if (rc)
 				return (rc);
 
 			fp->f_seekp = bsize;
 			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
 			rc = dnode_read(spa, &fp->f_dnode,
 					fp->f_seekp,
 					fp->f_zap_leaf,
 					bsize);
 			if (rc)
 				return (rc);
 		}
 	}
 
 	if (fp->f_zap_type == ZBT_MICRO) {
 	mzap_next:
 		if (fp->f_seekp >= bsize)
 			return (ENOENT);
 
 		rc = dnode_read(spa, &fp->f_dnode,
 				fp->f_seekp, &mze, sizeof(mze));
 		if (rc)
 			return (rc);
 		fp->f_seekp += sizeof(mze);
 
 		if (!mze.mze_name[0])
 			goto mzap_next;
 
 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
 		strcpy(d->d_name, mze.mze_name);
 		d->d_namlen = strlen(d->d_name);
 		return (0);
 	} else {
 		zap_leaf_t zl;
 		zap_leaf_chunk_t *zc, *nc;
 		int chunk;
 		size_t namelen;
 		char *p;
 		uint64_t value;
 
 		/*
 		 * Initialise this so we can use the ZAP size
 		 * calculating macros.
 		 */
 		zl.l_bs = ilog2(bsize);
 		zl.l_phys = fp->f_zap_leaf;
 
 		/*
 		 * Figure out which chunk we are currently looking at
 		 * and consider seeking to the next leaf. We use the
 		 * low bits of f_seekp as a simple chunk index.
 		 */
 	fzap_next:
 		chunk = fp->f_seekp & (bsize - 1);
 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
 			fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
 			chunk = 0;
 
 			/*
 			 * Check for EOF and read the new leaf.
 			 */
 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
 				return (ENOENT);
 
 			rc = dnode_read(spa, &fp->f_dnode,
 					fp->f_seekp,
 					fp->f_zap_leaf,
 					bsize);
 			if (rc)
 				return (rc);
 		}
 
 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
 		fp->f_seekp++;
 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
 			goto fzap_next;
 
 		namelen = zc->l_entry.le_name_numints;
 		if (namelen > sizeof(d->d_name))
 			namelen = sizeof(d->d_name);
 
 		/*
 		 * Paste the name back together.
 		 */
 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
 		p = d->d_name;
 		while (namelen > 0) {
 			int len;
 			len = namelen;
 			if (len > ZAP_LEAF_ARRAY_BYTES)
 				len = ZAP_LEAF_ARRAY_BYTES;
 			memcpy(p, nc->l_array.la_array, len);
 			p += len;
 			namelen -= len;
 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
 		}
 		d->d_name[sizeof(d->d_name) - 1] = 0;
 
 		/*
 		 * Assume the first eight bytes of the value are
 		 * a uint64_t.
 		 */
 		value = fzap_leaf_value(&zl, zc);
 
 		d->d_fileno = ZFS_DIRENT_OBJ(value);
 		d->d_type = ZFS_DIRENT_TYPE(value);
 		d->d_namlen = strlen(d->d_name);
 
 		return (0);
 	}
 }
 
 static int
 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
 {
 	int fd;
 
 	fd = (uintptr_t) priv;
 	lseek(fd, offset, SEEK_SET);
 	if (read(fd, buf, size) == size) {
 		return 0;
 	} else {
 		return (EIO);
 	}
 }
 
 static int
 zfs_dev_init(void)
 {
 	spa_t *spa;
 	spa_t *next;
 	spa_t *prev;
 
 	zfs_init();
 	if (archsw.arch_zfs_probe == NULL)
 		return (ENXIO);
 	archsw.arch_zfs_probe();
 
 	prev = NULL;
 	spa = STAILQ_FIRST(&zfs_pools);
 	while (spa != NULL) {
 		next = STAILQ_NEXT(spa, spa_link);
 		if (zfs_spa_init(spa)) {
 			if (prev == NULL)
 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
 			else
 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
 		} else
 			prev = spa;
 		spa = next;
 	}
 	return (0);
 }
 
 struct zfs_probe_args {
 	int		fd;
 	const char	*devname;
 	uint64_t	*pool_guid;
 	u_int		secsz;
 };
 
 static int
 zfs_diskread(void *arg, void *buf, size_t blocks, off_t offset)
 {
 	struct zfs_probe_args *ppa;
 
 	ppa = (struct zfs_probe_args *)arg;
 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
 }
 
 static int
 zfs_probe(int fd, uint64_t *pool_guid)
 {
 	spa_t *spa;
 	int ret;
 
 	ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
 	if (ret == 0 && pool_guid != NULL)
 		*pool_guid = spa->spa_guid;
 	return (ret);
 }
 
 static int
 zfs_probe_partition(void *arg, const char *partname,
     const struct ptable_entry *part)
 {
 	struct zfs_probe_args *ppa, pa;
 	struct ptable *table;
 	char devname[32];
 	int ret;
 
 	/* Probe only freebsd-zfs and freebsd partitions */
 	if (part->type != PART_FREEBSD &&
 	    part->type != PART_FREEBSD_ZFS)
 		return (0);
 
 	ppa = (struct zfs_probe_args *)arg;
 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
 	devname[strlen(ppa->devname) - 1] = '\0';
 	sprintf(devname, "%s%s:", devname, partname);
 	pa.fd = open(devname, O_RDONLY);
 	if (pa.fd == -1)
 		return (0);
 	ret = zfs_probe(pa.fd, ppa->pool_guid);
 	if (ret == 0)
 		return (0);
 	/* Do we have BSD label here? */
 	if (part->type == PART_FREEBSD) {
 		pa.devname = devname;
 		pa.pool_guid = ppa->pool_guid;
 		pa.secsz = ppa->secsz;
 		table = ptable_open(&pa, part->end - part->start + 1,
 		    ppa->secsz, zfs_diskread);
 		if (table != NULL) {
 			ptable_iterate(table, &pa, zfs_probe_partition);
 			ptable_close(table);
 		}
 	}
 	close(pa.fd);
 	return (0);
 }
 
 int
 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
 {
 	struct ptable *table;
 	struct zfs_probe_args pa;
 	off_t mediasz;
 	int ret;
 
 	pa.fd = open(devname, O_RDONLY);
 	if (pa.fd == -1)
 		return (ENXIO);
 	/* Probe the whole disk */
 	ret = zfs_probe(pa.fd, pool_guid);
 	if (ret == 0)
 		return (0);
 	/* Probe each partition */
 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
 	if (ret == 0)
 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
 	if (ret == 0) {
 		pa.devname = devname;
 		pa.pool_guid = pool_guid;
 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
 		    zfs_diskread);
 		if (table != NULL) {
 			ptable_iterate(table, &pa, zfs_probe_partition);
 			ptable_close(table);
 		}
 	}
 	close(pa.fd);
 	return (ret);
 }
 
 /*
  * Print information about ZFS pools
  */
 static void
 zfs_dev_print(int verbose)
 {
 	spa_t *spa;
 	char line[80];
 
 	if (verbose) {
 		spa_all_status();
 		return;
 	}
 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 		sprintf(line, "    zfs:%s\n", spa->spa_name);
 		pager_output(line);
 	}
 }
 
 /*
  * Attempt to open the pool described by (dev) for use by (f).
  */
 static int
 zfs_dev_open(struct open_file *f, ...)
 {
 	va_list		args;
 	struct zfs_devdesc	*dev;
 	struct zfsmount	*mount;
 	spa_t		*spa;
 	int		rv;
 
 	va_start(args, f);
 	dev = va_arg(args, struct zfs_devdesc *);
 	va_end(args);
 
 	if (dev->pool_guid == 0)
 		spa = STAILQ_FIRST(&zfs_pools);
 	else
 		spa = spa_find_by_guid(dev->pool_guid);
 	if (!spa)
 		return (ENXIO);
 	mount = malloc(sizeof(*mount));
 	rv = zfs_mount(spa, dev->root_guid, mount);
 	if (rv != 0) {
 		free(mount);
 		return (rv);
 	}
 	if (mount->objset.os_type != DMU_OST_ZFS) {
 		printf("Unexpected object set type %ju\n",
 		    (uintmax_t)mount->objset.os_type);
 		free(mount);
 		return (EIO);
 	}
 	f->f_devdata = mount;
 	free(dev);
 	return (0);
 }
 
 static int
 zfs_dev_close(struct open_file *f)
 {
 
 	free(f->f_devdata);
 	f->f_devdata = NULL;
 	return (0);
 }
 
 static int
 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize)
 {
 
 	return (ENOSYS);
 }
 
 struct devsw zfs_dev = {
 	.dv_name = "zfs",
 	.dv_type = DEVT_ZFS,
 	.dv_init = zfs_dev_init,
 	.dv_strategy = zfs_dev_strategy,
 	.dv_open = zfs_dev_open,
 	.dv_close = zfs_dev_close,
 	.dv_ioctl = noioctl,
 	.dv_print = zfs_dev_print,
 	.dv_cleanup = NULL
 };
 
 int
 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
 {
 	static char	rootname[ZFS_MAXNAMELEN];
 	static char	poolname[ZFS_MAXNAMELEN];
 	spa_t		*spa;
 	const char	*end;
 	const char	*np;
 	const char	*sep;
 	int		rv;
 
 	np = devspec;
 	if (*np != ':')
 		return (EINVAL);
 	np++;
 	end = strchr(np, ':');
 	if (end == NULL)
 		return (EINVAL);
 	sep = strchr(np, '/');
 	if (sep == NULL || sep >= end)
 		sep = end;
 	memcpy(poolname, np, sep - np);
 	poolname[sep - np] = '\0';
 	if (sep < end) {
 		sep++;
 		memcpy(rootname, sep, end - sep);
 		rootname[end - sep] = '\0';
 	}
 	else
 		rootname[0] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	dev->pool_guid = spa->spa_guid;
 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
 	if (rv != 0)
 		return (rv);
 	if (path != NULL)
 		*path = (*end == '\0') ? end : end + 1;
 	dev->d_dev = &zfs_dev;
 	dev->d_type = zfs_dev.dv_type;
 	return (0);
 }
 
 char *
 zfs_fmtdev(void *vdev)
 {
 	static char		rootname[ZFS_MAXNAMELEN];
 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
 	spa_t			*spa;
 
 	buf[0] = '\0';
 	if (dev->d_type != DEVT_ZFS)
 		return (buf);
 
 	if (dev->pool_guid == 0) {
 		spa = STAILQ_FIRST(&zfs_pools);
 		dev->pool_guid = spa->spa_guid;
 	} else
 		spa = spa_find_by_guid(dev->pool_guid);
 	if (spa == NULL) {
 		printf("ZFS: can't find pool by guid\n");
 		return (buf);
 	}
 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
 		printf("ZFS: can't find root filesystem\n");
 		return (buf);
 	}
 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
 		printf("ZFS: can't find filesystem by guid\n");
 		return (buf);
 	}
 
 	if (rootname[0] == '\0')
 		sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name);
 	else
 		sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name,
 		    rootname);
 	return (buf);
 }
 
 int
 zfs_list(const char *name)
 {
 	static char	poolname[ZFS_MAXNAMELEN];
 	uint64_t	objid;
 	spa_t		*spa;
 	const char	*dsname;
 	int		len;
 	int		rv;
 
 	len = strlen(name);
 	dsname = strchr(name, '/');
 	if (dsname != NULL) {
 		len = dsname - name;
 		dsname++;
 	} else
 		dsname = "";
 	memcpy(poolname, name, len);
 	poolname[len] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	rv = zfs_lookup_dataset(spa, dsname, &objid);
 	if (rv != 0)
 		return (rv);
 
 	return (zfs_list_dataset(spa, objid));
 }
 
 void
 init_zfs_bootenv(char *currdev)
 {
 	char *beroot;
 
 	if (strlen(currdev) == 0)
 		return;
 	if(strncmp(currdev, "zfs:", 4) != 0)
 		return;
 	/* Remove the trailing : */
 	currdev[strlen(currdev) - 1] = '\0';
 	setenv("zfs_be_active", currdev, 1);
 	setenv("zfs_be_currpage", "1", 1);
 	/* Forward past zfs: */
 	currdev = strchr(currdev, ':');
 	currdev++;
 	/* Remove the last element (current bootenv) */
 	beroot = strrchr(currdev, '/');
 	if (beroot != NULL)
 		beroot[0] = '\0';
 	beroot = currdev;
 	setenv("zfs_be_root", beroot, 1);
 }
 
 int
 zfs_bootenv(const char *name)
 {
 	static char	poolname[ZFS_MAXNAMELEN], *dsname, *root;
 	char		becount[4];
 	uint64_t	objid;
 	spa_t		*spa;
 	int		len, rv, pages, perpage, currpage;
 
 	if (name == NULL)
 		return (EINVAL);
 	if ((root = getenv("zfs_be_root")) == NULL)
 		return (EINVAL);
 
 	if (strcmp(name, root) != 0) {
 		if (setenv("zfs_be_root", name, 1) != 0)
 			return (ENOMEM);
 	}
 
 	SLIST_INIT(&zfs_be_head);
 	zfs_env_count = 0;
 	len = strlen(name);
 	dsname = strchr(name, '/');
 	if (dsname != NULL) {
 		len = dsname - name;
 		dsname++;
 	} else
 		dsname = "";
 	memcpy(poolname, name, len);
 	poolname[len] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	rv = zfs_lookup_dataset(spa, dsname, &objid);
 	if (rv != 0)
 		return (rv);
 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
 
 	/* Calculate and store the number of pages of BEs */
 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
 	snprintf(becount, 4, "%d", pages);
 	if (setenv("zfs_be_pages", becount, 1) != 0)
 		return (ENOMEM);
 
 	/* Roll over the page counter if it has exceeded the maximum */
 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
 	if (currpage > pages) {
 		if (setenv("zfs_be_currpage", "1", 1) != 0)
 			return (ENOMEM);
 	}
 
 	/* Populate the menu environment variables */
 	zfs_set_env();
 
 	/* Clean up the SLIST of ZFS BEs */
 	while (!SLIST_EMPTY(&zfs_be_head)) {
 		zfs_be = SLIST_FIRST(&zfs_be_head);
 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
 		free(zfs_be);
 	}
 
 	return (rv);
 }
 
 int
-zfs_belist_add(const char *name)
+zfs_belist_add(const char *name, uint64_t value __unused)
 {
 
 	/* Skip special datasets that start with a $ character */
 	if (strncmp(name, "$", 1) == 0) {
 		return (0);
 	}
 	/* Add the boot environment to the head of the SLIST */
 	zfs_be = malloc(sizeof(struct zfs_be_entry));
 	if (zfs_be == NULL) {
 		return (ENOMEM);
 	}
 	zfs_be->name = name;
 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
 	zfs_env_count++;
 
 	return (0);
 }
 
 int
 zfs_set_env(void)
 {
 	char envname[32], envval[256];
 	char *beroot, *pagenum;
 	int rv, page, ctr;
 
 	beroot = getenv("zfs_be_root");
 	if (beroot == NULL) {
 		return (1);
 	}
 
 	pagenum = getenv("zfs_be_currpage");
 	if (pagenum != NULL) {
 		page = strtol(pagenum, NULL, 10);
 	} else {
 		page = 1;
 	}
 
 	ctr = 1;
 	rv = 0;
 	zfs_env_index = ZFS_BE_FIRST;
 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
 		/* Skip to the requested page number */
 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
 			ctr++;
 			continue;
 		}
 		
 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0) {
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0){
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
 		rv = setenv(envname, "set_bootenv", 1);
 		if (rv != 0){
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0){
 			break;
 		}
 
 		zfs_env_index++;
 		if (zfs_env_index > ZFS_BE_LAST) {
 			break;
 		}
 
 	}
 	
 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 	}
 
 	return (rv);
 }
Index: user/alc/PQ_LAUNDRY/sys/boot/zfs/zfsimpl.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/boot/zfs/zfsimpl.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/boot/zfs/zfsimpl.c	(revision 303642)
@@ -1,2199 +1,2267 @@
 /*-
  * Copyright (c) 2007 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Stand-alone ZFS file reader.
  */
 
 #include <sys/stat.h>
 #include <sys/stdint.h>
 
 #include "zfsimpl.h"
 #include "zfssubr.c"
 
 
 struct zfsmount {
 	const spa_t	*spa;
 	objset_phys_t	objset;
 	uint64_t	rootobj;
 };
 
 /*
  * List of all vdevs, chained through v_alllink.
  */
 static vdev_list_t zfs_vdevs;
 
  /*
  * List of ZFS features supported for read
  */
 static const char *features_for_read[] = {
 	"org.illumos:lz4_compress",
 	"com.delphix:hole_birth",
 	"com.delphix:extensible_dataset",
 	"com.delphix:embedded_data",
 	"org.open-zfs:large_blocks",
 	NULL
 };
 
 /*
  * List of all pools, chained through spa_link.
  */
 static spa_list_t zfs_pools;
 
 static uint64_t zfs_crc64_table[256];
 static const dnode_phys_t *dnode_cache_obj = 0;
 static uint64_t dnode_cache_bn;
 static char *dnode_cache_buf;
 static char *zap_scratch;
 static char *zfs_temp_buf, *zfs_temp_end, *zfs_temp_ptr;
 
 #define TEMP_SIZE	(1024 * 1024)
 
 static int zio_read(const spa_t *spa, const blkptr_t *bp, void *buf);
 static int zfs_get_root(const spa_t *spa, uint64_t *objid);
 static int zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result);
 
 static void
 zfs_init(void)
 {
 	STAILQ_INIT(&zfs_vdevs);
 	STAILQ_INIT(&zfs_pools);
 
 	zfs_temp_buf = malloc(TEMP_SIZE);
 	zfs_temp_end = zfs_temp_buf + TEMP_SIZE;
 	zfs_temp_ptr = zfs_temp_buf;
 	dnode_cache_buf = malloc(SPA_MAXBLOCKSIZE);
 	zap_scratch = malloc(SPA_MAXBLOCKSIZE);
 
 	zfs_init_crc();
 }
 
 static void *
 zfs_alloc(size_t size)
 {
 	char *ptr;
 
 	if (zfs_temp_ptr + size > zfs_temp_end) {
 		printf("ZFS: out of temporary buffer space\n");
 		for (;;) ;
 	}
 	ptr = zfs_temp_ptr;
 	zfs_temp_ptr += size;
 
 	return (ptr);
 }
 
 static void
 zfs_free(void *ptr, size_t size)
 {
 
 	zfs_temp_ptr -= size;
 	if (zfs_temp_ptr != ptr) {
 		printf("ZFS: zfs_alloc()/zfs_free() mismatch\n");
 		for (;;) ;
 	}
 }
 
 static int
 xdr_int(const unsigned char **xdr, int *ip)
 {
 	*ip = ((*xdr)[0] << 24)
 		| ((*xdr)[1] << 16)
 		| ((*xdr)[2] << 8)
 		| ((*xdr)[3] << 0);
 	(*xdr) += 4;
 	return (0);
 }
 
 static int
 xdr_u_int(const unsigned char **xdr, u_int *ip)
 {
 	*ip = ((*xdr)[0] << 24)
 		| ((*xdr)[1] << 16)
 		| ((*xdr)[2] << 8)
 		| ((*xdr)[3] << 0);
 	(*xdr) += 4;
 	return (0);
 }
 
 static int
 xdr_uint64_t(const unsigned char **xdr, uint64_t *lp)
 {
 	u_int hi, lo;
 
 	xdr_u_int(xdr, &hi);
 	xdr_u_int(xdr, &lo);
 	*lp = (((uint64_t) hi) << 32) | lo;
 	return (0);
 }
 
 static int
 nvlist_find(const unsigned char *nvlist, const char *name, int type,
 	    int* elementsp, void *valuep)
 {
 	const unsigned char *p, *pair;
 	int junk;
 	int encoded_size, decoded_size;
 
 	p = nvlist;
 	xdr_int(&p, &junk);
 	xdr_int(&p, &junk);
 
 	pair = p;
 	xdr_int(&p, &encoded_size);
 	xdr_int(&p, &decoded_size);
 	while (encoded_size && decoded_size) {
 		int namelen, pairtype, elements;
 		const char *pairname;
 
 		xdr_int(&p, &namelen);
 		pairname = (const char*) p;
 		p += roundup(namelen, 4);
 		xdr_int(&p, &pairtype);
 
 		if (!memcmp(name, pairname, namelen) && type == pairtype) {
 			xdr_int(&p, &elements);
 			if (elementsp)
 				*elementsp = elements;
 			if (type == DATA_TYPE_UINT64) {
 				xdr_uint64_t(&p, (uint64_t *) valuep);
 				return (0);
 			} else if (type == DATA_TYPE_STRING) {
 				int len;
 				xdr_int(&p, &len);
 				(*(const char**) valuep) = (const char*) p;
 				return (0);
 			} else if (type == DATA_TYPE_NVLIST
 				   || type == DATA_TYPE_NVLIST_ARRAY) {
 				(*(const unsigned char**) valuep) =
 					 (const unsigned char*) p;
 				return (0);
 			} else {
 				return (EIO);
 			}
 		} else {
 			/*
 			 * Not the pair we are looking for, skip to the next one.
 			 */
 			p = pair + encoded_size;
 		}
 
 		pair = p;
 		xdr_int(&p, &encoded_size);
 		xdr_int(&p, &decoded_size);
 	}
 
 	return (EIO);
 }
 
 static int
 nvlist_check_features_for_read(const unsigned char *nvlist)
 {
 	const unsigned char *p, *pair;
 	int junk;
 	int encoded_size, decoded_size;
 	int rc;
 
 	rc = 0;
 
 	p = nvlist;
 	xdr_int(&p, &junk);
 	xdr_int(&p, &junk);
 
 	pair = p;
 	xdr_int(&p, &encoded_size);
 	xdr_int(&p, &decoded_size);
 	while (encoded_size && decoded_size) {
 		int namelen, pairtype;
 		const char *pairname;
 		int i, found;
 
 		found = 0;
 
 		xdr_int(&p, &namelen);
 		pairname = (const char*) p;
 		p += roundup(namelen, 4);
 		xdr_int(&p, &pairtype);
 
 		for (i = 0; features_for_read[i] != NULL; i++) {
 			if (!memcmp(pairname, features_for_read[i], namelen)) {
 				found = 1;
 				break;
 			}
 		}
 
 		if (!found) {
 			printf("ZFS: unsupported feature: %s\n", pairname);
 			rc = EIO;
 		}
 
 		p = pair + encoded_size;
 
 		pair = p;
 		xdr_int(&p, &encoded_size);
 		xdr_int(&p, &decoded_size);
 	}
 
 	return (rc);
 }
 
 /*
  * Return the next nvlist in an nvlist array.
  */
 static const unsigned char *
 nvlist_next(const unsigned char *nvlist)
 {
 	const unsigned char *p, *pair;
 	int junk;
 	int encoded_size, decoded_size;
 
 	p = nvlist;
 	xdr_int(&p, &junk);
 	xdr_int(&p, &junk);
 
 	pair = p;
 	xdr_int(&p, &encoded_size);
 	xdr_int(&p, &decoded_size);
 	while (encoded_size && decoded_size) {
 		p = pair + encoded_size;
 
 		pair = p;
 		xdr_int(&p, &encoded_size);
 		xdr_int(&p, &decoded_size);
 	}
 
 	return p;
 }
 
 #ifdef TEST
 
 static const unsigned char *
 nvlist_print(const unsigned char *nvlist, unsigned int indent)
 {
 	static const char* typenames[] = {
 		"DATA_TYPE_UNKNOWN",
 		"DATA_TYPE_BOOLEAN",
 		"DATA_TYPE_BYTE",
 		"DATA_TYPE_INT16",
 		"DATA_TYPE_UINT16",
 		"DATA_TYPE_INT32",
 		"DATA_TYPE_UINT32",
 		"DATA_TYPE_INT64",
 		"DATA_TYPE_UINT64",
 		"DATA_TYPE_STRING",
 		"DATA_TYPE_BYTE_ARRAY",
 		"DATA_TYPE_INT16_ARRAY",
 		"DATA_TYPE_UINT16_ARRAY",
 		"DATA_TYPE_INT32_ARRAY",
 		"DATA_TYPE_UINT32_ARRAY",
 		"DATA_TYPE_INT64_ARRAY",
 		"DATA_TYPE_UINT64_ARRAY",
 		"DATA_TYPE_STRING_ARRAY",
 		"DATA_TYPE_HRTIME",
 		"DATA_TYPE_NVLIST",
 		"DATA_TYPE_NVLIST_ARRAY",
 		"DATA_TYPE_BOOLEAN_VALUE",
 		"DATA_TYPE_INT8",
 		"DATA_TYPE_UINT8",
 		"DATA_TYPE_BOOLEAN_ARRAY",
 		"DATA_TYPE_INT8_ARRAY",
 		"DATA_TYPE_UINT8_ARRAY"
 	};
 
 	unsigned int i, j;
 	const unsigned char *p, *pair;
 	int junk;
 	int encoded_size, decoded_size;
 
 	p = nvlist;
 	xdr_int(&p, &junk);
 	xdr_int(&p, &junk);
 
 	pair = p;
 	xdr_int(&p, &encoded_size);
 	xdr_int(&p, &decoded_size);
 	while (encoded_size && decoded_size) {
 		int namelen, pairtype, elements;
 		const char *pairname;
 
 		xdr_int(&p, &namelen);
 		pairname = (const char*) p;
 		p += roundup(namelen, 4);
 		xdr_int(&p, &pairtype);
 
 		for (i = 0; i < indent; i++)
 			printf(" ");
 		printf("%s %s", typenames[pairtype], pairname);
 
 		xdr_int(&p, &elements);
 		switch (pairtype) {
 		case DATA_TYPE_UINT64: {
 			uint64_t val;
 			xdr_uint64_t(&p, &val);
 			printf(" = 0x%jx\n", (uintmax_t)val);
 			break;
 		}
 
 		case DATA_TYPE_STRING: {
 			int len;
 			xdr_int(&p, &len);
 			printf(" = \"%s\"\n", p);
 			break;
 		}
 
 		case DATA_TYPE_NVLIST:
 			printf("\n");
 			nvlist_print(p, indent + 1);
 			break;
 
 		case DATA_TYPE_NVLIST_ARRAY:
 			for (j = 0; j < elements; j++) {
 				printf("[%d]\n", j);
 				p = nvlist_print(p, indent + 1);
 				if (j != elements - 1) {
 					for (i = 0; i < indent; i++)
 						printf(" ");
 					printf("%s %s", typenames[pairtype], pairname);
 				}
 			}
 			break;
 
 		default:
 			printf("\n");
 		}
 
 		p = pair + encoded_size;
 
 		pair = p;
 		xdr_int(&p, &encoded_size);
 		xdr_int(&p, &decoded_size);
 	}
 
 	return p;
 }
 
 #endif
 
 static int
 vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf,
     off_t offset, size_t size)
 {
 	size_t psize;
 	int rc;
 
 	if (!vdev->v_phys_read)
 		return (EIO);
 
 	if (bp) {
 		psize = BP_GET_PSIZE(bp);
 	} else {
 		psize = size;
 	}
 
 	/*printf("ZFS: reading %d bytes at 0x%jx to %p\n", psize, (uintmax_t)offset, buf);*/
 	rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize);
 	if (rc)
 		return (rc);
 	if (bp && zio_checksum_verify(bp, buf))
 		return (EIO);
 
 	return (0);
 }
 
 static int
 vdev_disk_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
     off_t offset, size_t bytes)
 {
 
 	return (vdev_read_phys(vdev, bp, buf,
 		offset + VDEV_LABEL_START_SIZE, bytes));
 }
 
 
 static int
 vdev_mirror_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
     off_t offset, size_t bytes)
 {
 	vdev_t *kid;
 	int rc;
 
 	rc = EIO;
 	STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
 		if (kid->v_state != VDEV_STATE_HEALTHY)
 			continue;
 		rc = kid->v_read(kid, bp, buf, offset, bytes);
 		if (!rc)
 			return (0);
 	}
 
 	return (rc);
 }
 
 static int
 vdev_replacing_read(vdev_t *vdev, const blkptr_t *bp, void *buf,
     off_t offset, size_t bytes)
 {
 	vdev_t *kid;
 
 	/*
 	 * Here we should have two kids:
 	 * First one which is the one we are replacing and we can trust
 	 * only this one to have valid data, but it might not be present.
 	 * Second one is that one we are replacing with. It is most likely
 	 * healthy, but we can't trust it has needed data, so we won't use it.
 	 */
 	kid = STAILQ_FIRST(&vdev->v_children);
 	if (kid == NULL)
 		return (EIO);
 	if (kid->v_state != VDEV_STATE_HEALTHY)
 		return (EIO);
 	return (kid->v_read(kid, bp, buf, offset, bytes));
 }
 
 static vdev_t *
 vdev_find(uint64_t guid)
 {
 	vdev_t *vdev;
 
 	STAILQ_FOREACH(vdev, &zfs_vdevs, v_alllink)
 		if (vdev->v_guid == guid)
 			return (vdev);
 
 	return (0);
 }
 
 static vdev_t *
 vdev_create(uint64_t guid, vdev_read_t *read)
 {
 	vdev_t *vdev;
 
 	vdev = malloc(sizeof(vdev_t));
 	memset(vdev, 0, sizeof(vdev_t));
 	STAILQ_INIT(&vdev->v_children);
 	vdev->v_guid = guid;
 	vdev->v_state = VDEV_STATE_OFFLINE;
 	vdev->v_read = read;
 	vdev->v_phys_read = 0;
 	vdev->v_read_priv = 0;
 	STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink);
 
 	return (vdev);
 }
 
 static int
 vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t *pvdev,
     vdev_t **vdevp, int is_newer)
 {
 	int rc;
 	uint64_t guid, id, ashift, nparity;
 	const char *type;
 	const char *path;
 	vdev_t *vdev, *kid;
 	const unsigned char *kids;
 	int nkids, i, is_new;
 	uint64_t is_offline, is_faulted, is_degraded, is_removed, isnt_present;
 
 	if (nvlist_find(nvlist, ZPOOL_CONFIG_GUID,
 			DATA_TYPE_UINT64, 0, &guid)
 	    || nvlist_find(nvlist, ZPOOL_CONFIG_ID,
 			   DATA_TYPE_UINT64, 0, &id)
 	    || nvlist_find(nvlist, ZPOOL_CONFIG_TYPE,
 			   DATA_TYPE_STRING, 0, &type)) {
 		printf("ZFS: can't find vdev details\n");
 		return (ENOENT);
 	}
 
 	if (strcmp(type, VDEV_TYPE_MIRROR)
 	    && strcmp(type, VDEV_TYPE_DISK)
 #ifdef ZFS_TEST
 	    && strcmp(type, VDEV_TYPE_FILE)
 #endif
 	    && strcmp(type, VDEV_TYPE_RAIDZ)
 	    && strcmp(type, VDEV_TYPE_REPLACING)) {
 		printf("ZFS: can only boot from disk, mirror, raidz1, raidz2 and raidz3 vdevs\n");
 		return (EIO);
 	}
 
 	is_offline = is_removed = is_faulted = is_degraded = isnt_present = 0;
 
 	nvlist_find(nvlist, ZPOOL_CONFIG_OFFLINE, DATA_TYPE_UINT64, 0,
 			&is_offline);
 	nvlist_find(nvlist, ZPOOL_CONFIG_REMOVED, DATA_TYPE_UINT64, 0,
 			&is_removed);
 	nvlist_find(nvlist, ZPOOL_CONFIG_FAULTED, DATA_TYPE_UINT64, 0,
 			&is_faulted);
 	nvlist_find(nvlist, ZPOOL_CONFIG_DEGRADED, DATA_TYPE_UINT64, 0,
 			&is_degraded);
 	nvlist_find(nvlist, ZPOOL_CONFIG_NOT_PRESENT, DATA_TYPE_UINT64, 0,
 			&isnt_present);
 
 	vdev = vdev_find(guid);
 	if (!vdev) {
 		is_new = 1;
 
 		if (!strcmp(type, VDEV_TYPE_MIRROR))
 			vdev = vdev_create(guid, vdev_mirror_read);
 		else if (!strcmp(type, VDEV_TYPE_RAIDZ))
 			vdev = vdev_create(guid, vdev_raidz_read);
 		else if (!strcmp(type, VDEV_TYPE_REPLACING))
 			vdev = vdev_create(guid, vdev_replacing_read);
 		else
 			vdev = vdev_create(guid, vdev_disk_read);
 
 		vdev->v_id = id;
 		vdev->v_top = pvdev != NULL ? pvdev : vdev;
 		if (nvlist_find(nvlist, ZPOOL_CONFIG_ASHIFT,
 			DATA_TYPE_UINT64, 0, &ashift) == 0)
 			vdev->v_ashift = ashift;
 		else
 			vdev->v_ashift = 0;
 		if (nvlist_find(nvlist, ZPOOL_CONFIG_NPARITY,
 			DATA_TYPE_UINT64, 0, &nparity) == 0)
 			vdev->v_nparity = nparity;
 		else
 			vdev->v_nparity = 0;
 		if (nvlist_find(nvlist, ZPOOL_CONFIG_PATH,
 				DATA_TYPE_STRING, 0, &path) == 0) {
 			if (strncmp(path, "/dev/", 5) == 0)
 				path += 5;
 			vdev->v_name = strdup(path);
 		} else {
 			if (!strcmp(type, "raidz")) {
 				if (vdev->v_nparity == 1)
 					vdev->v_name = "raidz1";
 				else if (vdev->v_nparity == 2)
 					vdev->v_name = "raidz2";
 				else if (vdev->v_nparity == 3)
 					vdev->v_name = "raidz3";
 				else {
 					printf("ZFS: can only boot from disk, mirror, raidz1, raidz2 and raidz3 vdevs\n");
 					return (EIO);
 				}
 			} else {
 				vdev->v_name = strdup(type);
 			}
 		}
 	} else {
 		is_new = 0;
 	}
 
 	if (is_new || is_newer) {
 		/*
 		 * This is either new vdev or we've already seen this vdev,
 		 * but from an older vdev label, so let's refresh its state
 		 * from the newer label.
 		 */
 		if (is_offline)
 			vdev->v_state = VDEV_STATE_OFFLINE;
 		else if (is_removed)
 			vdev->v_state = VDEV_STATE_REMOVED;
 		else if (is_faulted)
 			vdev->v_state = VDEV_STATE_FAULTED;
 		else if (is_degraded)
 			vdev->v_state = VDEV_STATE_DEGRADED;
 		else if (isnt_present)
 			vdev->v_state = VDEV_STATE_CANT_OPEN;
 	}
 
 	rc = nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN,
 			 DATA_TYPE_NVLIST_ARRAY, &nkids, &kids);
 	/*
 	 * Its ok if we don't have any kids.
 	 */
 	if (rc == 0) {
 		vdev->v_nchildren = nkids;
 		for (i = 0; i < nkids; i++) {
 			rc = vdev_init_from_nvlist(kids, vdev, &kid, is_newer);
 			if (rc)
 				return (rc);
 			if (is_new)
 				STAILQ_INSERT_TAIL(&vdev->v_children, kid,
 						   v_childlink);
 			kids = nvlist_next(kids);
 		}
 	} else {
 		vdev->v_nchildren = 0;
 	}
 
 	if (vdevp)
 		*vdevp = vdev;
 	return (0);
 }
 
 static void
 vdev_set_state(vdev_t *vdev)
 {
 	vdev_t *kid;
 	int good_kids;
 	int bad_kids;
 
 	/*
 	 * A mirror or raidz is healthy if all its kids are healthy. A
 	 * mirror is degraded if any of its kids is healthy; a raidz
 	 * is degraded if at most nparity kids are offline.
 	 */
 	if (STAILQ_FIRST(&vdev->v_children)) {
 		good_kids = 0;
 		bad_kids = 0;
 		STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
 			if (kid->v_state == VDEV_STATE_HEALTHY)
 				good_kids++;
 			else
 				bad_kids++;
 		}
 		if (bad_kids == 0) {
 			vdev->v_state = VDEV_STATE_HEALTHY;
 		} else {
 			if (vdev->v_read == vdev_mirror_read) {
 				if (good_kids) {
 					vdev->v_state = VDEV_STATE_DEGRADED;
 				} else {
 					vdev->v_state = VDEV_STATE_OFFLINE;
 				}
 			} else if (vdev->v_read == vdev_raidz_read) {
 				if (bad_kids > vdev->v_nparity) {
 					vdev->v_state = VDEV_STATE_OFFLINE;
 				} else {
 					vdev->v_state = VDEV_STATE_DEGRADED;
 				}
 			}
 		}
 	}
 }
 
 static spa_t *
 spa_find_by_guid(uint64_t guid)
 {
 	spa_t *spa;
 
 	STAILQ_FOREACH(spa, &zfs_pools, spa_link)
 		if (spa->spa_guid == guid)
 			return (spa);
 
 	return (0);
 }
 
 static spa_t *
 spa_find_by_name(const char *name)
 {
 	spa_t *spa;
 
 	STAILQ_FOREACH(spa, &zfs_pools, spa_link)
 		if (!strcmp(spa->spa_name, name))
 			return (spa);
 
 	return (0);
 }
 
 #ifdef BOOT2
 static spa_t *
 spa_get_primary(void)
 {
 
 	return (STAILQ_FIRST(&zfs_pools));
 }
 
 static vdev_t *
 spa_get_primary_vdev(const spa_t *spa)
 {
 	vdev_t *vdev;
 	vdev_t *kid;
 
 	if (spa == NULL)
 		spa = spa_get_primary();
 	if (spa == NULL)
 		return (NULL);
 	vdev = STAILQ_FIRST(&spa->spa_vdevs);
 	if (vdev == NULL)
 		return (NULL);
 	for (kid = STAILQ_FIRST(&vdev->v_children); kid != NULL;
 	     kid = STAILQ_FIRST(&vdev->v_children))
 		vdev = kid;
 	return (vdev);
 }
 #endif
 
 static spa_t *
 spa_create(uint64_t guid)
 {
 	spa_t *spa;
 
 	spa = malloc(sizeof(spa_t));
 	memset(spa, 0, sizeof(spa_t));
 	STAILQ_INIT(&spa->spa_vdevs);
 	spa->spa_guid = guid;
 	STAILQ_INSERT_TAIL(&zfs_pools, spa, spa_link);
 
 	return (spa);
 }
 
 static const char *
 state_name(vdev_state_t state)
 {
 	static const char* names[] = {
 		"UNKNOWN",
 		"CLOSED",
 		"OFFLINE",
 		"REMOVED",
 		"CANT_OPEN",
 		"FAULTED",
 		"DEGRADED",
 		"ONLINE"
 	};
 	return names[state];
 }
 
 #ifdef BOOT2
 
 #define pager_printf printf
 
 #else
 
 static void
 pager_printf(const char *fmt, ...)
 {
 	char line[80];
 	va_list args;
 
 	va_start(args, fmt);
 	vsprintf(line, fmt, args);
 	va_end(args);
 	pager_output(line);
 }
 
 #endif
 
 #define STATUS_FORMAT	"        %s %s\n"
 
 static void
 print_state(int indent, const char *name, vdev_state_t state)
 {
 	int i;
 	char buf[512];
 
 	buf[0] = 0;
 	for (i = 0; i < indent; i++)
 		strcat(buf, "  ");
 	strcat(buf, name);
 	pager_printf(STATUS_FORMAT, buf, state_name(state));
 	
 }
 
 static void
 vdev_status(vdev_t *vdev, int indent)
 {
 	vdev_t *kid;
 	print_state(indent, vdev->v_name, vdev->v_state);
 
 	STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) {
 		vdev_status(kid, indent + 1);
 	}
 }
 
 static void
 spa_status(spa_t *spa)
 {
 	static char bootfs[ZFS_MAXNAMELEN];
 	uint64_t rootid;
 	vdev_t *vdev;
 	int good_kids, bad_kids, degraded_kids;
 	vdev_state_t state;
 
 	pager_printf("  pool: %s\n", spa->spa_name);
 	if (zfs_get_root(spa, &rootid) == 0 &&
 	    zfs_rlookup(spa, rootid, bootfs) == 0) {
 		if (bootfs[0] == '\0')
 			pager_printf("bootfs: %s\n", spa->spa_name);
 		else
 			pager_printf("bootfs: %s/%s\n", spa->spa_name, bootfs);
 	}
 	pager_printf("config:\n\n");
 	pager_printf(STATUS_FORMAT, "NAME", "STATE");
 
 	good_kids = 0;
 	degraded_kids = 0;
 	bad_kids = 0;
 	STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) {
 		if (vdev->v_state == VDEV_STATE_HEALTHY)
 			good_kids++;
 		else if (vdev->v_state == VDEV_STATE_DEGRADED)
 			degraded_kids++;
 		else
 			bad_kids++;
 	}
 
 	state = VDEV_STATE_CLOSED;
 	if (good_kids > 0 && (degraded_kids + bad_kids) == 0)
 		state = VDEV_STATE_HEALTHY;
 	else if ((good_kids + degraded_kids) > 0)
 		state = VDEV_STATE_DEGRADED;
 
 	print_state(0, spa->spa_name, state);
 	STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) {
 		vdev_status(vdev, 1);
 	}
 }
 
 static void
 spa_all_status(void)
 {
 	spa_t *spa;
 	int first = 1;
 
 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 		if (!first)
 			pager_printf("\n");
 		first = 0;
 		spa_status(spa);
 	}
 }
 
 static int
 vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap)
 {
 	vdev_t vtmp;
 	vdev_phys_t *vdev_label = (vdev_phys_t *) zap_scratch;
 	spa_t *spa;
 	vdev_t *vdev, *top_vdev, *pool_vdev;
 	off_t off;
 	blkptr_t bp;
 	const unsigned char *nvlist;
 	uint64_t val;
 	uint64_t guid;
 	uint64_t pool_txg, pool_guid;
 	uint64_t is_log;
 	const char *pool_name;
 	const unsigned char *vdevs;
 	const unsigned char *features;
 	int i, rc, is_newer;
 	char *upbuf;
 	const struct uberblock *up;
 
 	/*
 	 * Load the vdev label and figure out which
 	 * uberblock is most current.
 	 */
 	memset(&vtmp, 0, sizeof(vtmp));
 	vtmp.v_phys_read = read;
 	vtmp.v_read_priv = read_priv;
 	off = offsetof(vdev_label_t, vl_vdev_phys);
 	BP_ZERO(&bp);
 	BP_SET_LSIZE(&bp, sizeof(vdev_phys_t));
 	BP_SET_PSIZE(&bp, sizeof(vdev_phys_t));
 	BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
 	BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
 	DVA_SET_OFFSET(BP_IDENTITY(&bp), off);
 	ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
 	if (vdev_read_phys(&vtmp, &bp, vdev_label, off, 0))
 		return (EIO);
 
 	if (vdev_label->vp_nvlist[0] != NV_ENCODE_XDR) {
 		return (EIO);
 	}
 
 	nvlist = (const unsigned char *) vdev_label->vp_nvlist + 4;
 
 	if (nvlist_find(nvlist,
 			ZPOOL_CONFIG_VERSION,
 			DATA_TYPE_UINT64, 0, &val)) {
 		return (EIO);
 	}
 
 	if (!SPA_VERSION_IS_SUPPORTED(val)) {
 		printf("ZFS: unsupported ZFS version %u (should be %u)\n",
 		    (unsigned) val, (unsigned) SPA_VERSION);
 		return (EIO);
 	}
 
 	/* Check ZFS features for read */
 	if (nvlist_find(nvlist,
 			ZPOOL_CONFIG_FEATURES_FOR_READ,
 			DATA_TYPE_NVLIST, 0, &features) == 0
 	    && nvlist_check_features_for_read(features) != 0)
 		return (EIO);
 
 	if (nvlist_find(nvlist,
 			ZPOOL_CONFIG_POOL_STATE,
 			DATA_TYPE_UINT64, 0, &val)) {
 		return (EIO);
 	}
 
 	if (val == POOL_STATE_DESTROYED) {
 		/* We don't boot only from destroyed pools. */
 		return (EIO);
 	}
 
 	if (nvlist_find(nvlist,
 			ZPOOL_CONFIG_POOL_TXG,
 			DATA_TYPE_UINT64, 0, &pool_txg)
 	    || nvlist_find(nvlist,
 			   ZPOOL_CONFIG_POOL_GUID,
 			   DATA_TYPE_UINT64, 0, &pool_guid)
 	    || nvlist_find(nvlist,
 			   ZPOOL_CONFIG_POOL_NAME,
 			   DATA_TYPE_STRING, 0, &pool_name)) {
 		/*
 		 * Cache and spare devices end up here - just ignore
 		 * them.
 		 */
 		/*printf("ZFS: can't find pool details\n");*/
 		return (EIO);
 	}
 
 	is_log = 0;
 	(void) nvlist_find(nvlist, ZPOOL_CONFIG_IS_LOG, DATA_TYPE_UINT64, 0,
 	    &is_log);
 	if (is_log)
 		return (EIO);
 
 	/*
 	 * Create the pool if this is the first time we've seen it.
 	 */
 	spa = spa_find_by_guid(pool_guid);
 	if (!spa) {
 		spa = spa_create(pool_guid);
 		spa->spa_name = strdup(pool_name);
 	}
 	if (pool_txg > spa->spa_txg) {
 		spa->spa_txg = pool_txg;
 		is_newer = 1;
 	} else
 		is_newer = 0;
 
 	/*
 	 * Get the vdev tree and create our in-core copy of it.
 	 * If we already have a vdev with this guid, this must
 	 * be some kind of alias (overlapping slices, dangerously dedicated
 	 * disks etc).
 	 */
 	if (nvlist_find(nvlist,
 			ZPOOL_CONFIG_GUID,
 			DATA_TYPE_UINT64, 0, &guid)) {
 		return (EIO);
 	}
 	vdev = vdev_find(guid);
 	if (vdev && vdev->v_phys_read)	/* Has this vdev already been inited? */
 		return (EIO);
 
 	if (nvlist_find(nvlist,
 			ZPOOL_CONFIG_VDEV_TREE,
 			DATA_TYPE_NVLIST, 0, &vdevs)) {
 		return (EIO);
 	}
 
 	rc = vdev_init_from_nvlist(vdevs, NULL, &top_vdev, is_newer);
 	if (rc)
 		return (rc);
 
 	/*
 	 * Add the toplevel vdev to the pool if its not already there.
 	 */
 	STAILQ_FOREACH(pool_vdev, &spa->spa_vdevs, v_childlink)
 		if (top_vdev == pool_vdev)
 			break;
 	if (!pool_vdev && top_vdev)
 		STAILQ_INSERT_TAIL(&spa->spa_vdevs, top_vdev, v_childlink);
 
 	/*
 	 * We should already have created an incomplete vdev for this
 	 * vdev. Find it and initialise it with our read proc.
 	 */
 	vdev = vdev_find(guid);
 	if (vdev) {
 		vdev->v_phys_read = read;
 		vdev->v_read_priv = read_priv;
 		vdev->v_state = VDEV_STATE_HEALTHY;
 	} else {
 		printf("ZFS: inconsistent nvlist contents\n");
 		return (EIO);
 	}
 
 	/*
 	 * Re-evaluate top-level vdev state.
 	 */
 	vdev_set_state(top_vdev);
 
 	/*
 	 * Ok, we are happy with the pool so far. Lets find
 	 * the best uberblock and then we can actually access
 	 * the contents of the pool.
 	 */
 	upbuf = zfs_alloc(VDEV_UBERBLOCK_SIZE(vdev));
 	up = (const struct uberblock *)upbuf;
 	for (i = 0;
 	     i < VDEV_UBERBLOCK_COUNT(vdev);
 	     i++) {
 		off = VDEV_UBERBLOCK_OFFSET(vdev, i);
 		BP_ZERO(&bp);
 		DVA_SET_OFFSET(&bp.blk_dva[0], off);
 		BP_SET_LSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev));
 		BP_SET_PSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev));
 		BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
 		BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
 		ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
 
 		if (vdev_read_phys(vdev, &bp, upbuf, off, 0))
 			continue;
 
 		if (up->ub_magic != UBERBLOCK_MAGIC)
 			continue;
 		if (up->ub_txg < spa->spa_txg)
 			continue;
 		if (up->ub_txg > spa->spa_uberblock.ub_txg) {
 			spa->spa_uberblock = *up;
 		} else if (up->ub_txg == spa->spa_uberblock.ub_txg) {
 			if (up->ub_timestamp > spa->spa_uberblock.ub_timestamp)
 				spa->spa_uberblock = *up;
 		}
 	}
 	zfs_free(upbuf, VDEV_UBERBLOCK_SIZE(vdev));
 
 	if (spap)
 		*spap = spa;
 	return (0);
 }
 
 static int
 ilog2(int n)
 {
 	int v;
 
 	for (v = 0; v < 32; v++)
 		if (n == (1 << v))
 			return v;
 	return -1;
 }
 
 static int
 zio_read_gang(const spa_t *spa, const blkptr_t *bp, void *buf)
 {
 	blkptr_t gbh_bp;
 	zio_gbh_phys_t zio_gb;
 	char *pbuf;
 	int i;
 
 	/* Artificial BP for gang block header. */
 	gbh_bp = *bp;
 	BP_SET_PSIZE(&gbh_bp, SPA_GANGBLOCKSIZE);
 	BP_SET_LSIZE(&gbh_bp, SPA_GANGBLOCKSIZE);
 	BP_SET_CHECKSUM(&gbh_bp, ZIO_CHECKSUM_GANG_HEADER);
 	BP_SET_COMPRESS(&gbh_bp, ZIO_COMPRESS_OFF);
 	for (i = 0; i < SPA_DVAS_PER_BP; i++)
 		DVA_SET_GANG(&gbh_bp.blk_dva[i], 0);
 
 	/* Read gang header block using the artificial BP. */
 	if (zio_read(spa, &gbh_bp, &zio_gb))
 		return (EIO);
 
 	pbuf = buf;
 	for (i = 0; i < SPA_GBH_NBLKPTRS; i++) {
 		blkptr_t *gbp = &zio_gb.zg_blkptr[i];
 
 		if (BP_IS_HOLE(gbp))
 			continue;
 		if (zio_read(spa, gbp, pbuf))
 			return (EIO);
 		pbuf += BP_GET_PSIZE(gbp);
 	}
 
 	if (zio_checksum_verify(bp, buf))
 		return (EIO);
 	return (0);
 }
 
 static int
 zio_read(const spa_t *spa, const blkptr_t *bp, void *buf)
 {
 	int cpfunc = BP_GET_COMPRESS(bp);
 	uint64_t align, size;
 	void *pbuf;
 	int i, error;
 
 	/*
 	 * Process data embedded in block pointer
 	 */
 	if (BP_IS_EMBEDDED(bp)) {
 		ASSERT(BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
 
 		size = BPE_GET_PSIZE(bp);
 		ASSERT(size <= BPE_PAYLOAD_SIZE);
 
 		if (cpfunc != ZIO_COMPRESS_OFF)
 			pbuf = zfs_alloc(size);
 		else
 			pbuf = buf;
 
 		decode_embedded_bp_compressed(bp, pbuf);
 		error = 0;
 
 		if (cpfunc != ZIO_COMPRESS_OFF) {
 			error = zio_decompress_data(cpfunc, pbuf,
 			    size, buf, BP_GET_LSIZE(bp));
 			zfs_free(pbuf, size);
 		}
 		if (error != 0)
 			printf("ZFS: i/o error - unable to decompress block pointer data, error %d\n",
 			    error);
 		return (error);
 	}
 
 	error = EIO;
 
 	for (i = 0; i < SPA_DVAS_PER_BP; i++) {
 		const dva_t *dva = &bp->blk_dva[i];
 		vdev_t *vdev;
 		int vdevid;
 		off_t offset;
 
 		if (!dva->dva_word[0] && !dva->dva_word[1])
 			continue;
 
 		vdevid = DVA_GET_VDEV(dva);
 		offset = DVA_GET_OFFSET(dva);
 		STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) {
 			if (vdev->v_id == vdevid)
 				break;
 		}
 		if (!vdev || !vdev->v_read)
 			continue;
 
 		size = BP_GET_PSIZE(bp);
 		if (vdev->v_read == vdev_raidz_read) {
 			align = 1ULL << vdev->v_top->v_ashift;
 			if (P2PHASE(size, align) != 0)
 				size = P2ROUNDUP(size, align);
 		}
 		if (size != BP_GET_PSIZE(bp) || cpfunc != ZIO_COMPRESS_OFF)
 			pbuf = zfs_alloc(size);
 		else
 			pbuf = buf;
 
 		if (DVA_GET_GANG(dva))
 			error = zio_read_gang(spa, bp, pbuf);
 		else
 			error = vdev->v_read(vdev, bp, pbuf, offset, size);
 		if (error == 0) {
 			if (cpfunc != ZIO_COMPRESS_OFF)
 				error = zio_decompress_data(cpfunc, pbuf,
 				    BP_GET_PSIZE(bp), buf, BP_GET_LSIZE(bp));
 			else if (size != BP_GET_PSIZE(bp))
 				bcopy(pbuf, buf, BP_GET_PSIZE(bp));
 		}
 		if (buf != pbuf)
 			zfs_free(pbuf, size);
 		if (error == 0)
 			break;
 	}
 	if (error != 0)
 		printf("ZFS: i/o error - all block copies unavailable\n");
 	return (error);
 }
 
 static int
 dnode_read(const spa_t *spa, const dnode_phys_t *dnode, off_t offset, void *buf, size_t buflen)
 {
 	int ibshift = dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
 	int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	int nlevels = dnode->dn_nlevels;
 	int i, rc;
 
 	if (bsize > SPA_MAXBLOCKSIZE) {
 		printf("ZFS: I/O error - blocks larger than 128K are not supported\n");
 		return (EIO);
 	}
 
 	/*
 	 * Note: bsize may not be a power of two here so we need to do an
 	 * actual divide rather than a bitshift.
 	 */
 	while (buflen > 0) {
 		uint64_t bn = offset / bsize;
 		int boff = offset % bsize;
 		int ibn;
 		const blkptr_t *indbp;
 		blkptr_t bp;
 
 		if (bn > dnode->dn_maxblkid)
 			return (EIO);
 
 		if (dnode == dnode_cache_obj && bn == dnode_cache_bn)
 			goto cached;
 
 		indbp = dnode->dn_blkptr;
 		for (i = 0; i < nlevels; i++) {
 			/*
 			 * Copy the bp from the indirect array so that
 			 * we can re-use the scratch buffer for multi-level
 			 * objects.
 			 */
 			ibn = bn >> ((nlevels - i - 1) * ibshift);
 			ibn &= ((1 << ibshift) - 1);
 			bp = indbp[ibn];
 			if (BP_IS_HOLE(&bp)) {
 				memset(dnode_cache_buf, 0, bsize);
 				break;
 			}
 			rc = zio_read(spa, &bp, dnode_cache_buf);
 			if (rc)
 				return (rc);
 			indbp = (const blkptr_t *) dnode_cache_buf;
 		}
 		dnode_cache_obj = dnode;
 		dnode_cache_bn = bn;
 	cached:
 
 		/*
 		 * The buffer contains our data block. Copy what we
 		 * need from it and loop.
 		 */ 
 		i = bsize - boff;
 		if (i > buflen) i = buflen;
 		memcpy(buf, &dnode_cache_buf[boff], i);
 		buf = ((char*) buf) + i;
 		offset += i;
 		buflen -= i;
 	}
 
 	return (0);
 }
 
 /*
  * Lookup a value in a microzap directory. Assumes that the zap
  * scratch buffer contains the directory contents.
  */
 static int
 mzap_lookup(const dnode_phys_t *dnode, const char *name, uint64_t *value)
 {
 	const mzap_phys_t *mz;
 	const mzap_ent_phys_t *mze;
 	size_t size;
 	int chunks, i;
 
 	/*
 	 * Microzap objects use exactly one block. Read the whole
 	 * thing.
 	 */
 	size = dnode->dn_datablkszsec * 512;
 
 	mz = (const mzap_phys_t *) zap_scratch;
 	chunks = size / MZAP_ENT_LEN - 1;
 
 	for (i = 0; i < chunks; i++) {
 		mze = &mz->mz_chunk[i];
 		if (!strcmp(mze->mze_name, name)) {
 			*value = mze->mze_value;
 			return (0);
 		}
 	}
 
 	return (ENOENT);
 }
 
 /*
  * Compare a name with a zap leaf entry. Return non-zero if the name
  * matches.
  */
 static int
 fzap_name_equal(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, const char *name)
 {
 	size_t namelen;
 	const zap_leaf_chunk_t *nc;
 	const char *p;
 
 	namelen = zc->l_entry.le_name_numints;
 			
 	nc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_name_chunk);
 	p = name;
 	while (namelen > 0) {
 		size_t len;
 		len = namelen;
 		if (len > ZAP_LEAF_ARRAY_BYTES)
 			len = ZAP_LEAF_ARRAY_BYTES;
 		if (memcmp(p, nc->l_array.la_array, len))
 			return (0);
 		p += len;
 		namelen -= len;
 		nc = &ZAP_LEAF_CHUNK(zl, nc->l_array.la_next);
 	}
 
 	return 1;
 }
 
 /*
  * Extract a uint64_t value from a zap leaf entry.
  */
 static uint64_t
 fzap_leaf_value(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc)
 {
 	const zap_leaf_chunk_t *vc;
 	int i;
 	uint64_t value;
 	const uint8_t *p;
 
 	vc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_value_chunk);
 	for (i = 0, value = 0, p = vc->l_array.la_array; i < 8; i++) {
 		value = (value << 8) | p[i];
 	}
 
 	return value;
 }
 
 /*
  * Lookup a value in a fatzap directory. Assumes that the zap scratch
  * buffer contains the directory header.
  */
 static int
 fzap_lookup(const spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *value)
 {
 	int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	zap_phys_t zh = *(zap_phys_t *) zap_scratch;
 	fat_zap_t z;
 	uint64_t *ptrtbl;
 	uint64_t hash;
 	int rc;
 
 	if (zh.zap_magic != ZAP_MAGIC)
 		return (EIO);
 
 	z.zap_block_shift = ilog2(bsize);
 	z.zap_phys = (zap_phys_t *) zap_scratch;
 
 	/*
 	 * Figure out where the pointer table is and read it in if necessary.
 	 */
 	if (zh.zap_ptrtbl.zt_blk) {
 		rc = dnode_read(spa, dnode, zh.zap_ptrtbl.zt_blk * bsize,
 			       zap_scratch, bsize);
 		if (rc)
 			return (rc);
 		ptrtbl = (uint64_t *) zap_scratch;
 	} else {
 		ptrtbl = &ZAP_EMBEDDED_PTRTBL_ENT(&z, 0);
 	}
 
 	hash = zap_hash(zh.zap_salt, name);
 
 	zap_leaf_t zl;
 	zl.l_bs = z.zap_block_shift;
 
 	off_t off = ptrtbl[hash >> (64 - zh.zap_ptrtbl.zt_shift)] << zl.l_bs;
 	zap_leaf_chunk_t *zc;
 
 	rc = dnode_read(spa, dnode, off, zap_scratch, bsize);
 	if (rc)
 		return (rc);
 
 	zl.l_phys = (zap_leaf_phys_t *) zap_scratch;
 
 	/*
 	 * Make sure this chunk matches our hash.
 	 */
 	if (zl.l_phys->l_hdr.lh_prefix_len > 0
 	    && zl.l_phys->l_hdr.lh_prefix
 	    != hash >> (64 - zl.l_phys->l_hdr.lh_prefix_len))
 		return (ENOENT);
 
 	/*
 	 * Hash within the chunk to find our entry.
 	 */
 	int shift = (64 - ZAP_LEAF_HASH_SHIFT(&zl) - zl.l_phys->l_hdr.lh_prefix_len);
 	int h = (hash >> shift) & ((1 << ZAP_LEAF_HASH_SHIFT(&zl)) - 1);
 	h = zl.l_phys->l_hash[h];
 	if (h == 0xffff)
 		return (ENOENT);
 	zc = &ZAP_LEAF_CHUNK(&zl, h);
 	while (zc->l_entry.le_hash != hash) {
 		if (zc->l_entry.le_next == 0xffff) {
 			zc = 0;
 			break;
 		}
 		zc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_next);
 	}
 	if (fzap_name_equal(&zl, zc, name)) {
 		if (zc->l_entry.le_value_intlen * zc->l_entry.le_value_numints > 8)
 			return (E2BIG);
 		*value = fzap_leaf_value(&zl, zc);
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 /*
  * Lookup a name in a zap object and return its value as a uint64_t.
  */
 static int
 zap_lookup(const spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *value)
 {
 	int rc;
 	uint64_t zap_type;
 	size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 
 	rc = dnode_read(spa, dnode, 0, zap_scratch, size);
 	if (rc)
 		return (rc);
 
 	zap_type = *(uint64_t *) zap_scratch;
 	if (zap_type == ZBT_MICRO)
 		return mzap_lookup(dnode, name, value);
 	else if (zap_type == ZBT_HEADER)
 		return fzap_lookup(spa, dnode, name, value);
 	printf("ZFS: invalid zap_type=%d\n", (int)zap_type);
 	return (EIO);
 }
 
 /*
  * List a microzap directory. Assumes that the zap scratch buffer contains
  * the directory contents.
  */
 static int
-mzap_list(const dnode_phys_t *dnode, int (*callback)(const char *))
+mzap_list(const dnode_phys_t *dnode, int (*callback)(const char *, uint64_t))
 {
 	const mzap_phys_t *mz;
 	const mzap_ent_phys_t *mze;
 	size_t size;
-	int chunks, i;
+	int chunks, i, rc;
 
 	/*
 	 * Microzap objects use exactly one block. Read the whole
 	 * thing.
 	 */
 	size = dnode->dn_datablkszsec * 512;
 	mz = (const mzap_phys_t *) zap_scratch;
 	chunks = size / MZAP_ENT_LEN - 1;
 
 	for (i = 0; i < chunks; i++) {
 		mze = &mz->mz_chunk[i];
-		if (mze->mze_name[0])
-			//printf("%-32s 0x%jx\n", mze->mze_name, (uintmax_t)mze->mze_value);
-			callback(mze->mze_name);
+		if (mze->mze_name[0]) {
+			rc = callback(mze->mze_name, mze->mze_value);
+			if (rc != 0)
+				return (rc);
+		}
 	}
 
 	return (0);
 }
 
 /*
  * List a fatzap directory. Assumes that the zap scratch buffer contains
  * the directory header.
  */
 static int
-fzap_list(const spa_t *spa, const dnode_phys_t *dnode, int (*callback)(const char *))
+fzap_list(const spa_t *spa, const dnode_phys_t *dnode, int (*callback)(const char *, uint64_t))
 {
 	int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	zap_phys_t zh = *(zap_phys_t *) zap_scratch;
 	fat_zap_t z;
-	int i, j;
+	int i, j, rc;
 
 	if (zh.zap_magic != ZAP_MAGIC)
 		return (EIO);
 
 	z.zap_block_shift = ilog2(bsize);
 	z.zap_phys = (zap_phys_t *) zap_scratch;
 
 	/*
 	 * This assumes that the leaf blocks start at block 1. The
 	 * documentation isn't exactly clear on this.
 	 */
 	zap_leaf_t zl;
 	zl.l_bs = z.zap_block_shift;
 	for (i = 0; i < zh.zap_num_leafs; i++) {
 		off_t off = (i + 1) << zl.l_bs;
 		char name[256], *p;
 		uint64_t value;
 
 		if (dnode_read(spa, dnode, off, zap_scratch, bsize))
 			return (EIO);
 
 		zl.l_phys = (zap_leaf_phys_t *) zap_scratch;
 
 		for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) {
 			zap_leaf_chunk_t *zc, *nc;
 			int namelen;
 
 			zc = &ZAP_LEAF_CHUNK(&zl, j);
 			if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
 				continue;
 			namelen = zc->l_entry.le_name_numints;
 			if (namelen > sizeof(name))
 				namelen = sizeof(name);
 
 			/*
 			 * Paste the name back together.
 			 */
 			nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
 			p = name;
 			while (namelen > 0) {
 				int len;
 				len = namelen;
 				if (len > ZAP_LEAF_ARRAY_BYTES)
 					len = ZAP_LEAF_ARRAY_BYTES;
 				memcpy(p, nc->l_array.la_array, len);
 				p += len;
 				namelen -= len;
 				nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
 			}
 
 			/*
 			 * Assume the first eight bytes of the value are
 			 * a uint64_t.
 			 */
 			value = fzap_leaf_value(&zl, zc);
 
 			//printf("%s 0x%jx\n", name, (uintmax_t)value);
-			callback((const char *)name);
+			rc = callback((const char *)name, value);
+			if (rc != 0)
+				return (rc);
 		}
 	}
 
 	return (0);
 }
 
-static int zfs_printf(const char *name)
+static int zfs_printf(const char *name, uint64_t value __unused)
 {
 
 	printf("%s\n", name);
 
 	return (0);
 }
 
 /*
  * List a zap directory.
  */
 static int
 zap_list(const spa_t *spa, const dnode_phys_t *dnode)
 {
 	uint64_t zap_type;
 	size_t size = dnode->dn_datablkszsec * 512;
 
 	if (dnode_read(spa, dnode, 0, zap_scratch, size))
 		return (EIO);
 
 	zap_type = *(uint64_t *) zap_scratch;
 	if (zap_type == ZBT_MICRO)
 		return mzap_list(dnode, zfs_printf);
 	else
 		return fzap_list(spa, dnode, zfs_printf);
 }
 
 static int
 objset_get_dnode(const spa_t *spa, const objset_phys_t *os, uint64_t objnum, dnode_phys_t *dnode)
 {
 	off_t offset;
 
 	offset = objnum * sizeof(dnode_phys_t);
 	return dnode_read(spa, &os->os_meta_dnode, offset,
 		dnode, sizeof(dnode_phys_t));
 }
 
 static int
 mzap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name, uint64_t value)
 {
 	const mzap_phys_t *mz;
 	const mzap_ent_phys_t *mze;
 	size_t size;
 	int chunks, i;
 
 	/*
 	 * Microzap objects use exactly one block. Read the whole
 	 * thing.
 	 */
 	size = dnode->dn_datablkszsec * 512;
 
 	mz = (const mzap_phys_t *) zap_scratch;
 	chunks = size / MZAP_ENT_LEN - 1;
 
 	for (i = 0; i < chunks; i++) {
 		mze = &mz->mz_chunk[i];
 		if (value == mze->mze_value) {
 			strcpy(name, mze->mze_name);
 			return (0);
 		}
 	}
 
 	return (ENOENT);
 }
 
 static void
 fzap_name_copy(const zap_leaf_t *zl, const zap_leaf_chunk_t *zc, char *name)
 {
 	size_t namelen;
 	const zap_leaf_chunk_t *nc;
 	char *p;
 
 	namelen = zc->l_entry.le_name_numints;
 
 	nc = &ZAP_LEAF_CHUNK(zl, zc->l_entry.le_name_chunk);
 	p = name;
 	while (namelen > 0) {
 		size_t len;
 		len = namelen;
 		if (len > ZAP_LEAF_ARRAY_BYTES)
 			len = ZAP_LEAF_ARRAY_BYTES;
 		memcpy(p, nc->l_array.la_array, len);
 		p += len;
 		namelen -= len;
 		nc = &ZAP_LEAF_CHUNK(zl, nc->l_array.la_next);
 	}
 
 	*p = '\0';
 }
 
 static int
 fzap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name, uint64_t value)
 {
 	int bsize = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	zap_phys_t zh = *(zap_phys_t *) zap_scratch;
 	fat_zap_t z;
 	int i, j;
 
 	if (zh.zap_magic != ZAP_MAGIC)
 		return (EIO);
 
 	z.zap_block_shift = ilog2(bsize);
 	z.zap_phys = (zap_phys_t *) zap_scratch;
 
 	/*
 	 * This assumes that the leaf blocks start at block 1. The
 	 * documentation isn't exactly clear on this.
 	 */
 	zap_leaf_t zl;
 	zl.l_bs = z.zap_block_shift;
 	for (i = 0; i < zh.zap_num_leafs; i++) {
 		off_t off = (i + 1) << zl.l_bs;
 
 		if (dnode_read(spa, dnode, off, zap_scratch, bsize))
 			return (EIO);
 
 		zl.l_phys = (zap_leaf_phys_t *) zap_scratch;
 
 		for (j = 0; j < ZAP_LEAF_NUMCHUNKS(&zl); j++) {
 			zap_leaf_chunk_t *zc;
 
 			zc = &ZAP_LEAF_CHUNK(&zl, j);
 			if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
 				continue;
 			if (zc->l_entry.le_value_intlen != 8 ||
 			    zc->l_entry.le_value_numints != 1)
 				continue;
 
 			if (fzap_leaf_value(&zl, zc) == value) {
 				fzap_name_copy(&zl, zc, name);
 				return (0);
 			}
 		}
 	}
 
 	return (ENOENT);
 }
 
 static int
 zap_rlookup(const spa_t *spa, const dnode_phys_t *dnode, char *name, uint64_t value)
 {
 	int rc;
 	uint64_t zap_type;
 	size_t size = dnode->dn_datablkszsec * 512;
 
 	rc = dnode_read(spa, dnode, 0, zap_scratch, size);
 	if (rc)
 		return (rc);
 
 	zap_type = *(uint64_t *) zap_scratch;
 	if (zap_type == ZBT_MICRO)
 		return mzap_rlookup(spa, dnode, name, value);
 	else
 		return fzap_rlookup(spa, dnode, name, value);
 }
 
 static int
 zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result)
 {
 	char name[256];
 	char component[256];
 	uint64_t dir_obj, parent_obj, child_dir_zapobj;
 	dnode_phys_t child_dir_zap, dataset, dir, parent;
 	dsl_dir_phys_t *dd;
 	dsl_dataset_phys_t *ds;
 	char *p;
 	int len;
 
 	p = &name[sizeof(name) - 1];
 	*p = '\0';
 
 	if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) {
 		printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum);
 		return (EIO);
 	}
 	ds = (dsl_dataset_phys_t *)&dataset.dn_bonus;
 	dir_obj = ds->ds_dir_obj;
 
 	for (;;) {
 		if (objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir) != 0)
 			return (EIO);
 		dd = (dsl_dir_phys_t *)&dir.dn_bonus;
 
 		/* Actual loop condition. */
 		parent_obj  = dd->dd_parent_obj;
 		if (parent_obj == 0)
 			break;
 
 		if (objset_get_dnode(spa, &spa->spa_mos, parent_obj, &parent) != 0)
 			return (EIO);
 		dd = (dsl_dir_phys_t *)&parent.dn_bonus;
 		child_dir_zapobj = dd->dd_child_dir_zapobj;
 		if (objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0)
 			return (EIO);
 		if (zap_rlookup(spa, &child_dir_zap, component, dir_obj) != 0)
 			return (EIO);
 
 		len = strlen(component);
 		p -= len;
 		memcpy(p, component, len);
 		--p;
 		*p = '/';
 
 		/* Actual loop iteration. */
 		dir_obj = parent_obj;
 	}
 
 	if (*p != '\0')
 		++p;
 	strcpy(result, p);
 
 	return (0);
 }
 
 static int
 zfs_lookup_dataset(const spa_t *spa, const char *name, uint64_t *objnum)
 {
 	char element[256];
 	uint64_t dir_obj, child_dir_zapobj;
 	dnode_phys_t child_dir_zap, dir;
 	dsl_dir_phys_t *dd;
 	const char *p, *q;
 
 	if (objset_get_dnode(spa, &spa->spa_mos, DMU_POOL_DIRECTORY_OBJECT, &dir))
 		return (EIO);
 	if (zap_lookup(spa, &dir, DMU_POOL_ROOT_DATASET, &dir_obj))
 		return (EIO);
 
 	p = name;
 	for (;;) {
 		if (objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir))
 			return (EIO);
 		dd = (dsl_dir_phys_t *)&dir.dn_bonus;
 
 		while (*p == '/')
 			p++;
 		/* Actual loop condition #1. */
 		if (*p == '\0')
 			break;
 
 		q = strchr(p, '/');
 		if (q) {
 			memcpy(element, p, q - p);
 			element[q - p] = '\0';
 			p = q + 1;
 		} else {
 			strcpy(element, p);
 			p += strlen(p);
 		}
 
 		child_dir_zapobj = dd->dd_child_dir_zapobj;
 		if (objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0)
 			return (EIO);
 
 		/* Actual loop condition #2. */
 		if (zap_lookup(spa, &child_dir_zap, element, &dir_obj) != 0)
 			return (ENOENT);
 	}
 
 	*objnum = dd->dd_head_dataset_obj;
 	return (0);
 }
 
 #ifndef BOOT2
 static int
 zfs_list_dataset(const spa_t *spa, uint64_t objnum/*, int pos, char *entry*/)
 {
 	uint64_t dir_obj, child_dir_zapobj;
 	dnode_phys_t child_dir_zap, dir, dataset;
 	dsl_dataset_phys_t *ds;
 	dsl_dir_phys_t *dd;
 
 	if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) {
 		printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum);
 		return (EIO);
 	}
 	ds = (dsl_dataset_phys_t *) &dataset.dn_bonus;
 	dir_obj = ds->ds_dir_obj;
 
 	if (objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir)) {
 		printf("ZFS: can't find dirobj %ju\n", (uintmax_t)dir_obj);
 		return (EIO);
 	}
 	dd = (dsl_dir_phys_t *)&dir.dn_bonus;
 
 	child_dir_zapobj = dd->dd_child_dir_zapobj;
 	if (objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0) {
 		printf("ZFS: can't find child zap %ju\n", (uintmax_t)dir_obj);
 		return (EIO);
 	}
 
 	return (zap_list(spa, &child_dir_zap) != 0);
 }
 
 int
-zfs_callback_dataset(const spa_t *spa, uint64_t objnum, int (*callback)(const char *name))
+zfs_callback_dataset(const spa_t *spa, uint64_t objnum, int (*callback)(const char *, uint64_t))
 {
 	uint64_t dir_obj, child_dir_zapobj, zap_type;
 	dnode_phys_t child_dir_zap, dir, dataset;
 	dsl_dataset_phys_t *ds;
 	dsl_dir_phys_t *dd;
 	int err;
 
 	err = objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset);
 	if (err != 0) {
 		printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum);
 		return (err);
 	}
 	ds = (dsl_dataset_phys_t *) &dataset.dn_bonus;
 	dir_obj = ds->ds_dir_obj;
 
 	err = objset_get_dnode(spa, &spa->spa_mos, dir_obj, &dir);
 	if (err != 0) {
 		printf("ZFS: can't find dirobj %ju\n", (uintmax_t)dir_obj);
 		return (err);
 	}
 	dd = (dsl_dir_phys_t *)&dir.dn_bonus;
 
 	child_dir_zapobj = dd->dd_child_dir_zapobj;
 	err = objset_get_dnode(spa, &spa->spa_mos, child_dir_zapobj, &child_dir_zap);
 	if (err != 0) {
 		printf("ZFS: can't find child zap %ju\n", (uintmax_t)dir_obj);
 		return (err);
 	}
 
 	err = dnode_read(spa, &child_dir_zap, 0, zap_scratch, child_dir_zap.dn_datablkszsec * 512);
 	if (err != 0)
 		return (err);
 
 	zap_type = *(uint64_t *) zap_scratch;
 	if (zap_type == ZBT_MICRO)
 		return mzap_list(&child_dir_zap, callback);
 	else
 		return fzap_list(spa, &child_dir_zap, callback);
 }
 #endif
 
 /*
  * Find the object set given the object number of its dataset object
  * and return its details in *objset
  */
 static int
 zfs_mount_dataset(const spa_t *spa, uint64_t objnum, objset_phys_t *objset)
 {
 	dnode_phys_t dataset;
 	dsl_dataset_phys_t *ds;
 
 	if (objset_get_dnode(spa, &spa->spa_mos, objnum, &dataset)) {
 		printf("ZFS: can't find dataset %ju\n", (uintmax_t)objnum);
 		return (EIO);
 	}
 
 	ds = (dsl_dataset_phys_t *) &dataset.dn_bonus;
 	if (zio_read(spa, &ds->ds_bp, objset)) {
 		printf("ZFS: can't read object set for dataset %ju\n",
 		    (uintmax_t)objnum);
 		return (EIO);
 	}
 
 	return (0);
 }
 
 /*
  * Find the object set pointed to by the BOOTFS property or the root
  * dataset if there is none and return its details in *objset
  */
 static int
 zfs_get_root(const spa_t *spa, uint64_t *objid)
 {
 	dnode_phys_t dir, propdir;
 	uint64_t props, bootfs, root;
 
 	*objid = 0;
 
 	/*
 	 * Start with the MOS directory object.
 	 */
 	if (objset_get_dnode(spa, &spa->spa_mos, DMU_POOL_DIRECTORY_OBJECT, &dir)) {
 		printf("ZFS: can't read MOS object directory\n");
 		return (EIO);
 	}
 
 	/*
 	 * Lookup the pool_props and see if we can find a bootfs.
 	 */
 	if (zap_lookup(spa, &dir, DMU_POOL_PROPS, &props) == 0
 	     && objset_get_dnode(spa, &spa->spa_mos, props, &propdir) == 0
 	     && zap_lookup(spa, &propdir, "bootfs", &bootfs) == 0
 	     && bootfs != 0)
 	{
 		*objid = bootfs;
 		return (0);
 	}
 	/*
 	 * Lookup the root dataset directory
 	 */
 	if (zap_lookup(spa, &dir, DMU_POOL_ROOT_DATASET, &root)
 	    || objset_get_dnode(spa, &spa->spa_mos, root, &dir)) {
 		printf("ZFS: can't find root dsl_dir\n");
 		return (EIO);
 	}
 
 	/*
 	 * Use the information from the dataset directory's bonus buffer
 	 * to find the dataset object and from that the object set itself.
 	 */
 	dsl_dir_phys_t *dd = (dsl_dir_phys_t *) &dir.dn_bonus;
 	*objid = dd->dd_head_dataset_obj;
 	return (0);
 }
 
 static int
 zfs_mount(const spa_t *spa, uint64_t rootobj, struct zfsmount *mount)
 {
 
 	mount->spa = spa;
 
 	/*
 	 * Find the root object set if not explicitly provided
 	 */
 	if (rootobj == 0 && zfs_get_root(spa, &rootobj)) {
 		printf("ZFS: can't find root filesystem\n");
 		return (EIO);
 	}
 
 	if (zfs_mount_dataset(spa, rootobj, &mount->objset)) {
 		printf("ZFS: can't open root filesystem\n");
 		return (EIO);
 	}
 
 	mount->rootobj = rootobj;
 
 	return (0);
 }
 
+/*
+ * callback function for feature name checks.
+ */
 static int
+check_feature(const char *name, uint64_t value)
+{
+	int i;
+
+	if (value == 0)
+		return (0);
+	if (name[0] == '\0')
+		return (0);
+
+	for (i = 0; features_for_read[i] != NULL; i++) {
+		if (strcmp(name, features_for_read[i]) == 0)
+			return (0);
+	}
+	printf("ZFS: unsupported feature: %s\n", name);
+	return (EIO);
+}
+
+/*
+ * Checks whether the MOS features that are active are supported.
+ */
+static int
+check_mos_features(const spa_t *spa)
+{
+	dnode_phys_t dir;
+	uint64_t objnum, zap_type;
+	size_t size;
+	int rc;
+
+	if ((rc = objset_get_dnode(spa, &spa->spa_mos, DMU_OT_OBJECT_DIRECTORY,
+	    &dir)) != 0)
+		return (rc);
+	if ((rc = zap_lookup(spa, &dir, DMU_POOL_FEATURES_FOR_READ, &objnum)) != 0)
+		return (rc);
+
+	if ((rc = objset_get_dnode(spa, &spa->spa_mos, objnum, &dir)) != 0)
+		return (rc);
+
+	if (dir.dn_type != DMU_OTN_ZAP_METADATA)
+		return (EIO);
+
+	size = dir.dn_datablkszsec * 512;
+	if (dnode_read(spa, &dir, 0, zap_scratch, size))
+		return (EIO);
+
+	zap_type = *(uint64_t *) zap_scratch;
+	if (zap_type == ZBT_MICRO)
+		rc = mzap_list(&dir, check_feature);
+	else
+		rc = fzap_list(spa, &dir, check_feature);
+
+	return (rc);
+}
+
+static int
 zfs_spa_init(spa_t *spa)
 {
+	int rc;
 
 	if (zio_read(spa, &spa->spa_uberblock.ub_rootbp, &spa->spa_mos)) {
 		printf("ZFS: can't read MOS of pool %s\n", spa->spa_name);
 		return (EIO);
 	}
 	if (spa->spa_mos.os_type != DMU_OST_META) {
 		printf("ZFS: corrupted MOS of pool %s\n", spa->spa_name);
 		return (EIO);
 	}
-	return (0);
+
+	rc = check_mos_features(spa);
+	if (rc != 0) {
+		printf("ZFS: pool %s is not supported\n", spa->spa_name);
+	}
+
+	return (rc);
 }
 
 static int
 zfs_dnode_stat(const spa_t *spa, dnode_phys_t *dn, struct stat *sb)
 {
 
 	if (dn->dn_bonustype != DMU_OT_SA) {
 		znode_phys_t *zp = (znode_phys_t *)dn->dn_bonus;
 
 		sb->st_mode = zp->zp_mode;
 		sb->st_uid = zp->zp_uid;
 		sb->st_gid = zp->zp_gid;
 		sb->st_size = zp->zp_size;
 	} else {
 		sa_hdr_phys_t *sahdrp;
 		int hdrsize;
 		size_t size = 0;
 		void *buf = NULL;
 
 		if (dn->dn_bonuslen != 0)
 			sahdrp = (sa_hdr_phys_t *)DN_BONUS(dn);
 		else {
 			if ((dn->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0) {
 				blkptr_t *bp = &dn->dn_spill;
 				int error;
 
 				size = BP_GET_LSIZE(bp);
 				buf = zfs_alloc(size);
 				error = zio_read(spa, bp, buf);
 				if (error != 0) {
 					zfs_free(buf, size);
 					return (error);
 				}
 				sahdrp = buf;
 			} else {
 				return (EIO);
 			}
 		}
 		hdrsize = SA_HDR_SIZE(sahdrp);
 		sb->st_mode = *(uint64_t *)((char *)sahdrp + hdrsize +
 		    SA_MODE_OFFSET);
 		sb->st_uid = *(uint64_t *)((char *)sahdrp + hdrsize +
 		    SA_UID_OFFSET);
 		sb->st_gid = *(uint64_t *)((char *)sahdrp + hdrsize +
 		    SA_GID_OFFSET);
 		sb->st_size = *(uint64_t *)((char *)sahdrp + hdrsize +
 		    SA_SIZE_OFFSET);
 		if (buf != NULL)
 			zfs_free(buf, size);
 	}
 
 	return (0);
 }
 
 /*
  * Lookup a file and return its dnode.
  */
 static int
 zfs_lookup(const struct zfsmount *mount, const char *upath, dnode_phys_t *dnode)
 {
 	int rc;
 	uint64_t objnum, rootnum, parentnum;
 	const spa_t *spa;
 	dnode_phys_t dn;
 	const char *p, *q;
 	char element[256];
 	char path[1024];
 	int symlinks_followed = 0;
 	struct stat sb;
 
 	spa = mount->spa;
 	if (mount->objset.os_type != DMU_OST_ZFS) {
 		printf("ZFS: unexpected object set type %ju\n",
 		    (uintmax_t)mount->objset.os_type);
 		return (EIO);
 	}
 
 	/*
 	 * Get the root directory dnode.
 	 */
 	rc = objset_get_dnode(spa, &mount->objset, MASTER_NODE_OBJ, &dn);
 	if (rc)
 		return (rc);
 
 	rc = zap_lookup(spa, &dn, ZFS_ROOT_OBJ, &rootnum);
 	if (rc)
 		return (rc);
 
 	rc = objset_get_dnode(spa, &mount->objset, rootnum, &dn);
 	if (rc)
 		return (rc);
 
 	objnum = rootnum;
 	p = upath;
 	while (p && *p) {
 		while (*p == '/')
 			p++;
 		if (!*p)
 			break;
 		q = strchr(p, '/');
 		if (q) {
 			memcpy(element, p, q - p);
 			element[q - p] = 0;
 			p = q;
 		} else {
 			strcpy(element, p);
 			p = 0;
 		}
 
 		rc = zfs_dnode_stat(spa, &dn, &sb);
 		if (rc)
 			return (rc);
 		if (!S_ISDIR(sb.st_mode))
 			return (ENOTDIR);
 
 		parentnum = objnum;
 		rc = zap_lookup(spa, &dn, element, &objnum);
 		if (rc)
 			return (rc);
 		objnum = ZFS_DIRENT_OBJ(objnum);
 
 		rc = objset_get_dnode(spa, &mount->objset, objnum, &dn);
 		if (rc)
 			return (rc);
 
 		/*
 		 * Check for symlink.
 		 */
 		rc = zfs_dnode_stat(spa, &dn, &sb);
 		if (rc)
 			return (rc);
 		if (S_ISLNK(sb.st_mode)) {
 			if (symlinks_followed > 10)
 				return (EMLINK);
 			symlinks_followed++;
 
 			/*
 			 * Read the link value and copy the tail of our
 			 * current path onto the end.
 			 */
 			if (p)
 				strcpy(&path[sb.st_size], p);
 			else
 				path[sb.st_size] = 0;
 			/*
 			 * Second test is purely to silence bogus compiler
 			 * warning about accessing past the end of dn_bonus.
 			 */
 			if (sb.st_size + sizeof(znode_phys_t) <=
 			    dn.dn_bonuslen && sizeof(znode_phys_t) <=
 			    sizeof(dn.dn_bonus)) {
 				memcpy(path, &dn.dn_bonus[sizeof(znode_phys_t)],
 					sb.st_size);
 			} else {
 				rc = dnode_read(spa, &dn, 0, path, sb.st_size);
 				if (rc)
 					return (rc);
 			}
 
 			/*
 			 * Restart with the new path, starting either at
 			 * the root or at the parent depending whether or
 			 * not the link is relative.
 			 */
 			p = path;
 			if (*p == '/')
 				objnum = rootnum;
 			else
 				objnum = parentnum;
 			objset_get_dnode(spa, &mount->objset, objnum, &dn);
 		}
 	}
 
 	*dnode = dn;
 	return (0);
 }
Index: user/alc/PQ_LAUNDRY/sys/cddl/boot/zfs/zfsimpl.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/cddl/boot/zfs/zfsimpl.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/cddl/boot/zfs/zfsimpl.h	(revision 303642)
@@ -1,1453 +1,1505 @@
 /*-
  * Copyright (c) 2002 McAfee, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Marshall
  * Kirk McKusick and McAfee Research,, the Security Research Division of
  * McAfee, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as
  * part of the DARPA CHATS research program
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /*
  * Copyright 2013 by Saso Kiselkov. All rights reserved.
  */
 /*
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #define	MAXNAMELEN	256
 
 #define _NOTE(s)
 
+typedef enum { B_FALSE, B_TRUE } boolean_t;
+
 /* CRC64 table */
 #define	ZFS_CRC64_POLY	0xC96C5795D7870F42ULL	/* ECMA-182, reflected form */
 
 /*
  * Macros for various sorts of alignment and rounding when the alignment
  * is known to be a power of 2.
  */
 #define	P2ALIGN(x, align)		((x) & -(align))
 #define	P2PHASE(x, align)		((x) & ((align) - 1))
 #define	P2NPHASE(x, align)		(-(x) & ((align) - 1))
 #define	P2ROUNDUP(x, align)		(-(-(x) & -(align)))
 #define	P2END(x, align)			(-(~(x) & -(align)))
 #define	P2PHASEUP(x, align, phase)	((phase) - (((phase) - (x)) & -(align)))
 #define	P2BOUNDARY(off, len, align)	(((off) ^ ((off) + (len) - 1)) > (align) - 1)
 
 /*
  * General-purpose 32-bit and 64-bit bitfield encodings.
  */
 #define	BF32_DECODE(x, low, len)	P2PHASE((x) >> (low), 1U << (len))
 #define	BF64_DECODE(x, low, len)	P2PHASE((x) >> (low), 1ULL << (len))
 #define	BF32_ENCODE(x, low, len)	(P2PHASE((x), 1U << (len)) << (low))
 #define	BF64_ENCODE(x, low, len)	(P2PHASE((x), 1ULL << (len)) << (low))
 
 #define	BF32_GET(x, low, len)		BF32_DECODE(x, low, len)
 #define	BF64_GET(x, low, len)		BF64_DECODE(x, low, len)
 
 #define	BF32_SET(x, low, len, val)	\
 	((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
 #define	BF64_SET(x, low, len, val)	\
 	((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
 
 #define	BF32_GET_SB(x, low, len, shift, bias)	\
 	((BF32_GET(x, low, len) + (bias)) << (shift))
 #define	BF64_GET_SB(x, low, len, shift, bias)	\
 	((BF64_GET(x, low, len) + (bias)) << (shift))
 
 #define	BF32_SET_SB(x, low, len, shift, bias, val)	\
 	BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
 #define	BF64_SET_SB(x, low, len, shift, bias, val)	\
 	BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
 
 /*
  * Macros to reverse byte order
  */
 #define	BSWAP_8(x)	((x) & 0xff)
 #define	BSWAP_16(x)	((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
 #define	BSWAP_32(x)	((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
 #define	BSWAP_64(x)	((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
 
 /*
  * Note: the boot loader can't actually read blocks larger than 128KB,
  * due to lack of memory.  Therefore its SPA_MAXBLOCKSIZE is still 128KB.
  */
 #define	SPA_MINBLOCKSHIFT	9
 #define	SPA_MAXBLOCKSHIFT	17
 #define	SPA_MINBLOCKSIZE	(1ULL << SPA_MINBLOCKSHIFT)
 #define	SPA_MAXBLOCKSIZE	(1ULL << SPA_MAXBLOCKSHIFT)
 
 /*
  * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
  * The ASIZE encoding should be at least 64 times larger (6 more bits)
  * to support up to 4-way RAID-Z mirror mode with worst-case gang block
  * overhead, three DVAs per bp, plus one more bit in case we do anything
  * else that expands the ASIZE.
  */
 #define	SPA_LSIZEBITS		16	/* LSIZE up to 32M (2^16 * 512)	*/
 #define	SPA_PSIZEBITS		16	/* PSIZE up to 32M (2^16 * 512)	*/
 #define	SPA_ASIZEBITS		24	/* ASIZE up to 64 times larger	*/
 
 /*
  * All SPA data is represented by 128-bit data virtual addresses (DVAs).
  * The members of the dva_t should be considered opaque outside the SPA.
  */
 typedef struct dva {
 	uint64_t	dva_word[2];
 } dva_t;
 
 /*
  * Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
  */
 typedef struct zio_cksum {
 	uint64_t	zc_word[4];
 } zio_cksum_t;
 
 /*
  * Each block is described by its DVAs, time of birth, checksum, etc.
  * The word-by-word, bit-by-bit layout of the blkptr is as follows:
  *
  *	64	56	48	40	32	24	16	8	0
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 0	|		vdev1		| GRID  |	  ASIZE		|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 1	|G|			 offset1				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 2	|		vdev2		| GRID  |	  ASIZE		|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 3	|G|			 offset2				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 4	|		vdev3		| GRID  |	  ASIZE		|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 5	|G|			 offset3				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 6	|BDX|lvl| type	| cksum |E| comp|    PSIZE	|     LSIZE	|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 7	|			padding					|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 8	|			padding					|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 9	|			physical birth txg			|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * a	|			logical birth txg			|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * b	|			fill count				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * c	|			checksum[0]				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * d	|			checksum[1]				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * e	|			checksum[2]				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * f	|			checksum[3]				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  *
  * Legend:
  *
  * vdev		virtual device ID
  * offset	offset into virtual device
  * LSIZE	logical size
  * PSIZE	physical size (after compression)
  * ASIZE	allocated size (including RAID-Z parity and gang block headers)
  * GRID		RAID-Z layout information (reserved for future use)
  * cksum	checksum function
  * comp		compression function
  * G		gang block indicator
  * B		byteorder (endianness)
  * D		dedup
  * X		encryption (on version 30, which is not supported)
  * E		blkptr_t contains embedded data (see below)
  * lvl		level of indirection
  * type		DMU object type
  * phys birth	txg of block allocation; zero if same as logical birth txg
  * log. birth	transaction group in which the block was logically born
  * fill count	number of non-zero blocks under this bp
  * checksum[4]	256-bit checksum of the data this bp describes
  */
 
 /*
  * "Embedded" blkptr_t's don't actually point to a block, instead they
  * have a data payload embedded in the blkptr_t itself.  See the comment
  * in blkptr.c for more details.
  *
  * The blkptr_t is laid out as follows:
  *
  *	64	56	48	40	32	24	16	8	0
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 0	|      payload                                                  |
  * 1	|      payload                                                  |
  * 2	|      payload                                                  |
  * 3	|      payload                                                  |
  * 4	|      payload                                                  |
  * 5	|      payload                                                  |
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 6	|BDX|lvl| type	| etype |E| comp| PSIZE|              LSIZE	|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 7	|      payload                                                  |
  * 8	|      payload                                                  |
  * 9	|      payload                                                  |
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * a	|			logical birth txg			|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * b	|      payload                                                  |
  * c	|      payload                                                  |
  * d	|      payload                                                  |
  * e	|      payload                                                  |
  * f	|      payload                                                  |
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  *
  * Legend:
  *
  * payload		contains the embedded data
  * B (byteorder)	byteorder (endianness)
  * D (dedup)		padding (set to zero)
  * X			encryption (set to zero; see above)
  * E (embedded)		set to one
  * lvl			indirection level
  * type			DMU object type
  * etype		how to interpret embedded data (BP_EMBEDDED_TYPE_*)
  * comp			compression function of payload
  * PSIZE		size of payload after compression, in bytes
  * LSIZE		logical size of payload, in bytes
  *			note that 25 bits is enough to store the largest
  *			"normal" BP's LSIZE (2^16 * 2^9) in bytes
  * log. birth		transaction group in which the block was logically born
  *
  * Note that LSIZE and PSIZE are stored in bytes, whereas for non-embedded
  * bp's they are stored in units of SPA_MINBLOCKSHIFT.
  * Generally, the generic BP_GET_*() macros can be used on embedded BP's.
  * The B, D, X, lvl, type, and comp fields are stored the same as with normal
  * BP's so the BP_SET_* macros can be used with them.  etype, PSIZE, LSIZE must
  * be set with the BPE_SET_* macros.  BP_SET_EMBEDDED() should be called before
  * other macros, as they assert that they are only used on BP's of the correct
  * "embedded-ness".
  */
 
 #define	BPE_GET_ETYPE(bp)	\
 	(ASSERT(BP_IS_EMBEDDED(bp)), \
 	BF64_GET((bp)->blk_prop, 40, 8))
 #define	BPE_SET_ETYPE(bp, t)	do { \
 	ASSERT(BP_IS_EMBEDDED(bp)); \
 	BF64_SET((bp)->blk_prop, 40, 8, t); \
 _NOTE(CONSTCOND) } while (0)
 
 #define	BPE_GET_LSIZE(bp)	\
 	(ASSERT(BP_IS_EMBEDDED(bp)), \
 	BF64_GET_SB((bp)->blk_prop, 0, 25, 0, 1))
 #define	BPE_SET_LSIZE(bp, x)	do { \
 	ASSERT(BP_IS_EMBEDDED(bp)); \
 	BF64_SET_SB((bp)->blk_prop, 0, 25, 0, 1, x); \
 _NOTE(CONSTCOND) } while (0)
 
 #define	BPE_GET_PSIZE(bp)	\
 	(ASSERT(BP_IS_EMBEDDED(bp)), \
 	BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1))
 #define	BPE_SET_PSIZE(bp, x)	do { \
 	ASSERT(BP_IS_EMBEDDED(bp)); \
 	BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \
 _NOTE(CONSTCOND) } while (0)
 
 typedef enum bp_embedded_type {
 	BP_EMBEDDED_TYPE_DATA,
 	BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */
 	NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED
 } bp_embedded_type_t;
 
 #define	BPE_NUM_WORDS 14
 #define	BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t))
 #define	BPE_IS_PAYLOADWORD(bp, wp) \
 	((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
 
 #define	SPA_BLKPTRSHIFT	7		/* blkptr_t is 128 bytes	*/
 #define	SPA_DVAS_PER_BP	3		/* Number of DVAs in a bp	*/
 
 typedef struct blkptr {
 	dva_t		blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
 	uint64_t	blk_prop;	/* size, compression, type, etc	    */
 	uint64_t	blk_pad[2];	/* Extra space for the future	    */
 	uint64_t	blk_phys_birth;	/* txg when block was allocated	    */
 	uint64_t	blk_birth;	/* transaction group at birth	    */
 	uint64_t	blk_fill;	/* fill count			    */
 	zio_cksum_t	blk_cksum;	/* 256-bit checksum		    */
 } blkptr_t;
 
 /*
  * Macros to get and set fields in a bp or DVA.
  */
 #define	DVA_GET_ASIZE(dva)	\
 	BF64_GET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, SPA_MINBLOCKSHIFT, 0)
 #define	DVA_SET_ASIZE(dva, x)	\
 	BF64_SET_SB((dva)->dva_word[0], 0, SPA_ASIZEBITS, \
 	SPA_MINBLOCKSHIFT, 0, x)
 
 #define	DVA_GET_GRID(dva)	BF64_GET((dva)->dva_word[0], 24, 8)
 #define	DVA_SET_GRID(dva, x)	BF64_SET((dva)->dva_word[0], 24, 8, x)
 
 #define	DVA_GET_VDEV(dva)	BF64_GET((dva)->dva_word[0], 32, 32)
 #define	DVA_SET_VDEV(dva, x)	BF64_SET((dva)->dva_word[0], 32, 32, x)
 
 #define	DVA_GET_OFFSET(dva)	\
 	BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
 #define	DVA_SET_OFFSET(dva, x)	\
 	BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
 
 #define	DVA_GET_GANG(dva)	BF64_GET((dva)->dva_word[1], 63, 1)
 #define	DVA_SET_GANG(dva, x)	BF64_SET((dva)->dva_word[1], 63, 1, x)
 
 #define	BP_GET_LSIZE(bp)	\
 	(BP_IS_EMBEDDED(bp) ?	\
 	(BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA ? BPE_GET_LSIZE(bp) : 0): \
 	BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1))
 #define	BP_SET_LSIZE(bp, x)	do { \
 	ASSERT(!BP_IS_EMBEDDED(bp)); \
 	BF64_SET_SB((bp)->blk_prop, \
 	    0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
 _NOTE(CONSTCOND) } while (0)
 
 #define	BP_GET_PSIZE(bp)	\
 	BF64_GET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
 #define	BP_SET_PSIZE(bp, x)	\
 	BF64_SET_SB((bp)->blk_prop, 16, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
 
 #define	BP_GET_COMPRESS(bp)	BF64_GET((bp)->blk_prop, 32, 7)
 #define	BP_SET_COMPRESS(bp, x)	BF64_SET((bp)->blk_prop, 32, 7, x)
 
 #define	BP_GET_CHECKSUM(bp)	BF64_GET((bp)->blk_prop, 40, 8)
 #define	BP_SET_CHECKSUM(bp, x)	BF64_SET((bp)->blk_prop, 40, 8, x)
 
 #define	BP_GET_TYPE(bp)		BF64_GET((bp)->blk_prop, 48, 8)
 #define	BP_SET_TYPE(bp, x)	BF64_SET((bp)->blk_prop, 48, 8, x)
 
 #define	BP_GET_LEVEL(bp)	BF64_GET((bp)->blk_prop, 56, 5)
 #define	BP_SET_LEVEL(bp, x)	BF64_SET((bp)->blk_prop, 56, 5, x)
 
 #define	BP_IS_EMBEDDED(bp)	BF64_GET((bp)->blk_prop, 39, 1)
 
 #define	BP_GET_DEDUP(bp)	BF64_GET((bp)->blk_prop, 62, 1)
 #define	BP_SET_DEDUP(bp, x)	BF64_SET((bp)->blk_prop, 62, 1, x)
 
 #define	BP_GET_BYTEORDER(bp)	BF64_GET((bp)->blk_prop, 63, 1)
 #define	BP_SET_BYTEORDER(bp, x)	BF64_SET((bp)->blk_prop, 63, 1, x)
 
 #define	BP_PHYSICAL_BIRTH(bp)		\
 	((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
 
 #define	BP_GET_ASIZE(bp)	\
 	(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
 		DVA_GET_ASIZE(&(bp)->blk_dva[2]))
 
 #define	BP_GET_UCSIZE(bp) \
 	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
 	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
 
 #define	BP_GET_NDVAS(bp)	\
 	(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
 	!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
 	!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
 
 #define	DVA_EQUAL(dva1, dva2)	\
 	((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
 	(dva1)->dva_word[0] == (dva2)->dva_word[0])
 
 #define	ZIO_CHECKSUM_EQUAL(zc1, zc2) \
 	(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
 	((zc1).zc_word[1] - (zc2).zc_word[1]) | \
 	((zc1).zc_word[2] - (zc2).zc_word[2]) | \
 	((zc1).zc_word[3] - (zc2).zc_word[3])))
 
 
 #define	DVA_IS_VALID(dva)	(DVA_GET_ASIZE(dva) != 0)
 
 #define	ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3)	\
 {						\
 	(zcp)->zc_word[0] = w0;			\
 	(zcp)->zc_word[1] = w1;			\
 	(zcp)->zc_word[2] = w2;			\
 	(zcp)->zc_word[3] = w3;			\
 }
 
 #define	BP_IDENTITY(bp)		(&(bp)->blk_dva[0])
 #define	BP_IS_GANG(bp)		DVA_GET_GANG(BP_IDENTITY(bp))
 #define	DVA_IS_EMPTY(dva)	((dva)->dva_word[0] == 0ULL &&  \
 	(dva)->dva_word[1] == 0ULL)
 #define	BP_IS_HOLE(bp)		DVA_IS_EMPTY(BP_IDENTITY(bp))
 #define	BP_IS_OLDER(bp, txg)	(!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg))
 
 #define	BP_ZERO(bp)				\
 {						\
 	(bp)->blk_dva[0].dva_word[0] = 0;	\
 	(bp)->blk_dva[0].dva_word[1] = 0;	\
 	(bp)->blk_dva[1].dva_word[0] = 0;	\
 	(bp)->blk_dva[1].dva_word[1] = 0;	\
 	(bp)->blk_dva[2].dva_word[0] = 0;	\
 	(bp)->blk_dva[2].dva_word[1] = 0;	\
 	(bp)->blk_prop = 0;			\
 	(bp)->blk_pad[0] = 0;			\
 	(bp)->blk_pad[1] = 0;			\
 	(bp)->blk_phys_birth = 0;		\
 	(bp)->blk_birth = 0;			\
 	(bp)->blk_fill = 0;			\
 	ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0);	\
 }
 
 #define	BPE_NUM_WORDS 14
 #define	BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t))
 #define	BPE_IS_PAYLOADWORD(bp, wp) \
 	((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
 
 /*
  * Embedded checksum
  */
 #define	ZEC_MAGIC	0x210da7ab10c7a11ULL
 
 typedef struct zio_eck {
 	uint64_t	zec_magic;	/* for validation, endianness	*/
 	zio_cksum_t	zec_cksum;	/* 256-bit checksum		*/
 } zio_eck_t;
 
 /*
  * Gang block headers are self-checksumming and contain an array
  * of block pointers.
  */
 #define	SPA_GANGBLOCKSIZE	SPA_MINBLOCKSIZE
 #define	SPA_GBH_NBLKPTRS	((SPA_GANGBLOCKSIZE - \
 	sizeof (zio_eck_t)) / sizeof (blkptr_t))
 #define	SPA_GBH_FILLER		((SPA_GANGBLOCKSIZE - \
 	sizeof (zio_eck_t) - \
 	(SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\
 	sizeof (uint64_t))
 
 typedef struct zio_gbh {
 	blkptr_t		zg_blkptr[SPA_GBH_NBLKPTRS];
 	uint64_t		zg_filler[SPA_GBH_FILLER];
 	zio_eck_t		zg_tail;
 } zio_gbh_phys_t;
 
 #define	VDEV_RAIDZ_MAXPARITY	3
 
 #define	VDEV_PAD_SIZE		(8 << 10)
 /* 2 padding areas (vl_pad1 and vl_pad2) to skip */
 #define	VDEV_SKIP_SIZE		VDEV_PAD_SIZE * 2
 #define	VDEV_PHYS_SIZE		(112 << 10)
 #define	VDEV_UBERBLOCK_RING	(128 << 10)
 
 #define	VDEV_UBERBLOCK_SHIFT(vd)	\
 	MAX((vd)->v_top->v_ashift, UBERBLOCK_SHIFT)
 #define	VDEV_UBERBLOCK_COUNT(vd)	\
 	(VDEV_UBERBLOCK_RING >> VDEV_UBERBLOCK_SHIFT(vd))
 #define	VDEV_UBERBLOCK_OFFSET(vd, n)	\
 	offsetof(vdev_label_t, vl_uberblock[(n) << VDEV_UBERBLOCK_SHIFT(vd)])
 #define	VDEV_UBERBLOCK_SIZE(vd)		(1ULL << VDEV_UBERBLOCK_SHIFT(vd))
 
 typedef struct vdev_phys {
 	char		vp_nvlist[VDEV_PHYS_SIZE - sizeof (zio_eck_t)];
 	zio_eck_t	vp_zbt;
 } vdev_phys_t;
 
 typedef struct vdev_label {
 	char		vl_pad1[VDEV_PAD_SIZE];			/*  8K  */
 	char		vl_pad2[VDEV_PAD_SIZE];			/*  8K  */
 	vdev_phys_t	vl_vdev_phys;				/* 112K	*/
 	char		vl_uberblock[VDEV_UBERBLOCK_RING];	/* 128K	*/
 } vdev_label_t;							/* 256K total */
 
 /*
  * vdev_dirty() flags
  */
 #define	VDD_METASLAB	0x01
 #define	VDD_DTL		0x02
 
 /*
  * Size and offset of embedded boot loader region on each label.
  * The total size of the first two labels plus the boot area is 4MB.
  */
 #define	VDEV_BOOT_OFFSET	(2 * sizeof (vdev_label_t))
 #define	VDEV_BOOT_SIZE		(7ULL << 19)			/* 3.5M	*/
 
 /*
  * Size of label regions at the start and end of each leaf device.
  */
 #define	VDEV_LABEL_START_SIZE	(2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
 #define	VDEV_LABEL_END_SIZE	(2 * sizeof (vdev_label_t))
 #define	VDEV_LABELS		4
 
 enum zio_checksum {
 	ZIO_CHECKSUM_INHERIT = 0,
 	ZIO_CHECKSUM_ON,
 	ZIO_CHECKSUM_OFF,
 	ZIO_CHECKSUM_LABEL,
 	ZIO_CHECKSUM_GANG_HEADER,
 	ZIO_CHECKSUM_ZILOG,
 	ZIO_CHECKSUM_FLETCHER_2,
 	ZIO_CHECKSUM_FLETCHER_4,
 	ZIO_CHECKSUM_SHA256,
 	ZIO_CHECKSUM_ZILOG2,
 	ZIO_CHECKSUM_FUNCTIONS
 };
 
 #define	ZIO_CHECKSUM_ON_VALUE	ZIO_CHECKSUM_FLETCHER_4
 #define	ZIO_CHECKSUM_DEFAULT	ZIO_CHECKSUM_ON
 
 enum zio_compress {
 	ZIO_COMPRESS_INHERIT = 0,
 	ZIO_COMPRESS_ON,
 	ZIO_COMPRESS_OFF,
 	ZIO_COMPRESS_LZJB,
 	ZIO_COMPRESS_EMPTY,
 	ZIO_COMPRESS_GZIP_1,
 	ZIO_COMPRESS_GZIP_2,
 	ZIO_COMPRESS_GZIP_3,
 	ZIO_COMPRESS_GZIP_4,
 	ZIO_COMPRESS_GZIP_5,
 	ZIO_COMPRESS_GZIP_6,
 	ZIO_COMPRESS_GZIP_7,
 	ZIO_COMPRESS_GZIP_8,
 	ZIO_COMPRESS_GZIP_9,
 	ZIO_COMPRESS_ZLE,
 	ZIO_COMPRESS_LZ4,
 	ZIO_COMPRESS_FUNCTIONS
 };
 
 #define	ZIO_COMPRESS_ON_VALUE	ZIO_COMPRESS_LZJB
 #define	ZIO_COMPRESS_DEFAULT	ZIO_COMPRESS_OFF
 
 /* nvlist pack encoding */
 #define	NV_ENCODE_NATIVE	0
 #define	NV_ENCODE_XDR		1
 
 typedef enum {
 	DATA_TYPE_UNKNOWN = 0,
 	DATA_TYPE_BOOLEAN,
 	DATA_TYPE_BYTE,
 	DATA_TYPE_INT16,
 	DATA_TYPE_UINT16,
 	DATA_TYPE_INT32,
 	DATA_TYPE_UINT32,
 	DATA_TYPE_INT64,
 	DATA_TYPE_UINT64,
 	DATA_TYPE_STRING,
 	DATA_TYPE_BYTE_ARRAY,
 	DATA_TYPE_INT16_ARRAY,
 	DATA_TYPE_UINT16_ARRAY,
 	DATA_TYPE_INT32_ARRAY,
 	DATA_TYPE_UINT32_ARRAY,
 	DATA_TYPE_INT64_ARRAY,
 	DATA_TYPE_UINT64_ARRAY,
 	DATA_TYPE_STRING_ARRAY,
 	DATA_TYPE_HRTIME,
 	DATA_TYPE_NVLIST,
 	DATA_TYPE_NVLIST_ARRAY,
 	DATA_TYPE_BOOLEAN_VALUE,
 	DATA_TYPE_INT8,
 	DATA_TYPE_UINT8,
 	DATA_TYPE_BOOLEAN_ARRAY,
 	DATA_TYPE_INT8_ARRAY,
 	DATA_TYPE_UINT8_ARRAY
 } data_type_t;
 
 /*
  * On-disk version number.
  */
 #define	SPA_VERSION_1			1ULL
 #define	SPA_VERSION_2			2ULL
 #define	SPA_VERSION_3			3ULL
 #define	SPA_VERSION_4			4ULL
 #define	SPA_VERSION_5			5ULL
 #define	SPA_VERSION_6			6ULL
 #define	SPA_VERSION_7			7ULL
 #define	SPA_VERSION_8			8ULL
 #define	SPA_VERSION_9			9ULL
 #define	SPA_VERSION_10			10ULL
 #define	SPA_VERSION_11			11ULL
 #define	SPA_VERSION_12			12ULL
 #define	SPA_VERSION_13			13ULL
 #define	SPA_VERSION_14			14ULL
 #define	SPA_VERSION_15			15ULL
 #define	SPA_VERSION_16			16ULL
 #define	SPA_VERSION_17			17ULL
 #define	SPA_VERSION_18			18ULL
 #define	SPA_VERSION_19			19ULL
 #define	SPA_VERSION_20			20ULL
 #define	SPA_VERSION_21			21ULL
 #define	SPA_VERSION_22			22ULL
 #define	SPA_VERSION_23			23ULL
 #define	SPA_VERSION_24			24ULL
 #define	SPA_VERSION_25			25ULL
 #define	SPA_VERSION_26			26ULL
 #define	SPA_VERSION_27			27ULL
 #define	SPA_VERSION_28			28ULL
 #define	SPA_VERSION_5000		5000ULL
 
 /*
  * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
  * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
  * and do the appropriate changes.  Also bump the version number in
  * usr/src/grub/capability.
  */
 #define	SPA_VERSION			SPA_VERSION_5000
 #define	SPA_VERSION_STRING		"5000"
 
 /*
  * Symbolic names for the changes that caused a SPA_VERSION switch.
  * Used in the code when checking for presence or absence of a feature.
  * Feel free to define multiple symbolic names for each version if there
  * were multiple changes to on-disk structures during that version.
  *
  * NOTE: When checking the current SPA_VERSION in your code, be sure
  *       to use spa_version() since it reports the version of the
  *       last synced uberblock.  Checking the in-flight version can
  *       be dangerous in some cases.
  */
 #define	SPA_VERSION_INITIAL		SPA_VERSION_1
 #define	SPA_VERSION_DITTO_BLOCKS	SPA_VERSION_2
 #define	SPA_VERSION_SPARES		SPA_VERSION_3
 #define	SPA_VERSION_RAID6		SPA_VERSION_3
 #define	SPA_VERSION_BPLIST_ACCOUNT	SPA_VERSION_3
 #define	SPA_VERSION_RAIDZ_DEFLATE	SPA_VERSION_3
 #define	SPA_VERSION_DNODE_BYTES		SPA_VERSION_3
 #define	SPA_VERSION_ZPOOL_HISTORY	SPA_VERSION_4
 #define	SPA_VERSION_GZIP_COMPRESSION	SPA_VERSION_5
 #define	SPA_VERSION_BOOTFS		SPA_VERSION_6
 #define	SPA_VERSION_SLOGS		SPA_VERSION_7
 #define	SPA_VERSION_DELEGATED_PERMS	SPA_VERSION_8
 #define	SPA_VERSION_FUID		SPA_VERSION_9
 #define	SPA_VERSION_REFRESERVATION	SPA_VERSION_9
 #define	SPA_VERSION_REFQUOTA		SPA_VERSION_9
 #define	SPA_VERSION_UNIQUE_ACCURATE	SPA_VERSION_9
 #define	SPA_VERSION_L2CACHE		SPA_VERSION_10
 #define	SPA_VERSION_NEXT_CLONES		SPA_VERSION_11
 #define	SPA_VERSION_ORIGIN		SPA_VERSION_11
 #define	SPA_VERSION_DSL_SCRUB		SPA_VERSION_11
 #define	SPA_VERSION_SNAP_PROPS		SPA_VERSION_12
 #define	SPA_VERSION_USED_BREAKDOWN	SPA_VERSION_13
 #define	SPA_VERSION_PASSTHROUGH_X	SPA_VERSION_14
 #define SPA_VERSION_USERSPACE		SPA_VERSION_15
 #define	SPA_VERSION_STMF_PROP		SPA_VERSION_16
 #define	SPA_VERSION_RAIDZ3		SPA_VERSION_17
 #define	SPA_VERSION_USERREFS		SPA_VERSION_18
 #define	SPA_VERSION_HOLES		SPA_VERSION_19
 #define	SPA_VERSION_ZLE_COMPRESSION	SPA_VERSION_20
 #define	SPA_VERSION_DEDUP		SPA_VERSION_21
 #define	SPA_VERSION_RECVD_PROPS		SPA_VERSION_22
 #define	SPA_VERSION_SLIM_ZIL		SPA_VERSION_23
 #define	SPA_VERSION_SA			SPA_VERSION_24
 #define	SPA_VERSION_SCAN		SPA_VERSION_25
 #define	SPA_VERSION_DIR_CLONES		SPA_VERSION_26
 #define	SPA_VERSION_DEADLISTS		SPA_VERSION_26
 #define	SPA_VERSION_FAST_SNAP		SPA_VERSION_27
 #define	SPA_VERSION_MULTI_REPLACE	SPA_VERSION_28
 #define	SPA_VERSION_BEFORE_FEATURES	SPA_VERSION_28
 #define	SPA_VERSION_FEATURES		SPA_VERSION_5000
 
 #define	SPA_VERSION_IS_SUPPORTED(v) \
 	(((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
 	((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
  * configuration.
  */
 #define	ZPOOL_CONFIG_VERSION		"version"
 #define	ZPOOL_CONFIG_POOL_NAME		"name"
 #define	ZPOOL_CONFIG_POOL_STATE		"state"
 #define	ZPOOL_CONFIG_POOL_TXG		"txg"
 #define	ZPOOL_CONFIG_POOL_GUID		"pool_guid"
 #define	ZPOOL_CONFIG_CREATE_TXG		"create_txg"
 #define	ZPOOL_CONFIG_TOP_GUID		"top_guid"
 #define	ZPOOL_CONFIG_VDEV_TREE		"vdev_tree"
 #define	ZPOOL_CONFIG_TYPE		"type"
 #define	ZPOOL_CONFIG_CHILDREN		"children"
 #define	ZPOOL_CONFIG_ID			"id"
 #define	ZPOOL_CONFIG_GUID		"guid"
 #define	ZPOOL_CONFIG_PATH		"path"
 #define	ZPOOL_CONFIG_DEVID		"devid"
 #define	ZPOOL_CONFIG_METASLAB_ARRAY	"metaslab_array"
 #define	ZPOOL_CONFIG_METASLAB_SHIFT	"metaslab_shift"
 #define	ZPOOL_CONFIG_ASHIFT		"ashift"
 #define	ZPOOL_CONFIG_ASIZE		"asize"
 #define	ZPOOL_CONFIG_DTL		"DTL"
 #define	ZPOOL_CONFIG_STATS		"stats"
 #define	ZPOOL_CONFIG_WHOLE_DISK		"whole_disk"
 #define	ZPOOL_CONFIG_ERRCOUNT		"error_count"
 #define	ZPOOL_CONFIG_NOT_PRESENT	"not_present"
 #define	ZPOOL_CONFIG_SPARES		"spares"
 #define	ZPOOL_CONFIG_IS_SPARE		"is_spare"
 #define	ZPOOL_CONFIG_NPARITY		"nparity"
 #define	ZPOOL_CONFIG_HOSTID		"hostid"
 #define	ZPOOL_CONFIG_HOSTNAME		"hostname"
 #define	ZPOOL_CONFIG_IS_LOG		"is_log"
 #define	ZPOOL_CONFIG_TIMESTAMP		"timestamp" /* not stored on disk */
 #define	ZPOOL_CONFIG_FEATURES_FOR_READ	"features_for_read"
 
 /*
  * The persistent vdev state is stored as separate values rather than a single
  * 'vdev_state' entry.  This is because a device can be in multiple states, such
  * as offline and degraded.
  */
 #define	ZPOOL_CONFIG_OFFLINE            "offline"
 #define	ZPOOL_CONFIG_FAULTED            "faulted"
 #define	ZPOOL_CONFIG_DEGRADED           "degraded"
 #define	ZPOOL_CONFIG_REMOVED            "removed"
 #define	ZPOOL_CONFIG_FRU		"fru"
 #define	ZPOOL_CONFIG_AUX_STATE		"aux_state"
 
 #define	VDEV_TYPE_ROOT			"root"
 #define	VDEV_TYPE_MIRROR		"mirror"
 #define	VDEV_TYPE_REPLACING		"replacing"
 #define	VDEV_TYPE_RAIDZ			"raidz"
 #define	VDEV_TYPE_DISK			"disk"
 #define	VDEV_TYPE_FILE			"file"
 #define	VDEV_TYPE_MISSING		"missing"
 #define	VDEV_TYPE_HOLE			"hole"
 #define	VDEV_TYPE_SPARE			"spare"
 #define	VDEV_TYPE_LOG			"log"
 #define	VDEV_TYPE_L2CACHE		"l2cache"
 
 /*
  * This is needed in userland to report the minimum necessary device size.
  */
 #define	SPA_MINDEVSIZE		(64ULL << 20)
 
 /*
  * The location of the pool configuration repository, shared between kernel and
  * userland.
  */
 #define	ZPOOL_CACHE		"/boot/zfs/zpool.cache"
 
 /*
  * vdev states are ordered from least to most healthy.
  * A vdev that's CANT_OPEN or below is considered unusable.
  */
 typedef enum vdev_state {
 	VDEV_STATE_UNKNOWN = 0,	/* Uninitialized vdev			*/
 	VDEV_STATE_CLOSED,	/* Not currently open			*/
 	VDEV_STATE_OFFLINE,	/* Not allowed to open			*/
 	VDEV_STATE_REMOVED,	/* Explicitly removed from system	*/
 	VDEV_STATE_CANT_OPEN,	/* Tried to open, but failed		*/
 	VDEV_STATE_FAULTED,	/* External request to fault device	*/
 	VDEV_STATE_DEGRADED,	/* Replicated vdev with unhealthy kids	*/
 	VDEV_STATE_HEALTHY	/* Presumed good			*/
 } vdev_state_t;
 
 /*
  * vdev aux states.  When a vdev is in the CANT_OPEN state, the aux field
  * of the vdev stats structure uses these constants to distinguish why.
  */
 typedef enum vdev_aux {
 	VDEV_AUX_NONE,		/* no error				*/
 	VDEV_AUX_OPEN_FAILED,	/* ldi_open_*() or vn_open() failed	*/
 	VDEV_AUX_CORRUPT_DATA,	/* bad label or disk contents		*/
 	VDEV_AUX_NO_REPLICAS,	/* insufficient number of replicas	*/
 	VDEV_AUX_BAD_GUID_SUM,	/* vdev guid sum doesn't match		*/
 	VDEV_AUX_TOO_SMALL,	/* vdev size is too small		*/
 	VDEV_AUX_BAD_LABEL,	/* the label is OK but invalid		*/
 	VDEV_AUX_VERSION_NEWER,	/* on-disk version is too new		*/
 	VDEV_AUX_VERSION_OLDER,	/* on-disk version is too old		*/
 	VDEV_AUX_SPARED		/* hot spare used in another pool	*/
 } vdev_aux_t;
 
 /*
  * pool state.  The following states are written to disk as part of the normal
  * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE.  The remaining states are
  * software abstractions used at various levels to communicate pool state.
  */
 typedef enum pool_state {
 	POOL_STATE_ACTIVE = 0,		/* In active use		*/
 	POOL_STATE_EXPORTED,		/* Explicitly exported		*/
 	POOL_STATE_DESTROYED,		/* Explicitly destroyed		*/
 	POOL_STATE_SPARE,		/* Reserved for hot spare use	*/
 	POOL_STATE_UNINITIALIZED,	/* Internal spa_t state		*/
 	POOL_STATE_UNAVAIL,		/* Internal libzfs state	*/
 	POOL_STATE_POTENTIALLY_ACTIVE	/* Internal libzfs state	*/
 } pool_state_t;
 
 /*
  * The uberblock version is incremented whenever an incompatible on-disk
  * format change is made to the SPA, DMU, or ZAP.
  *
  * Note: the first two fields should never be moved.  When a storage pool
  * is opened, the uberblock must be read off the disk before the version
  * can be checked.  If the ub_version field is moved, we may not detect
  * version mismatch.  If the ub_magic field is moved, applications that
  * expect the magic number in the first word won't work.
  */
 #define	UBERBLOCK_MAGIC		0x00bab10c		/* oo-ba-bloc!	*/
 #define	UBERBLOCK_SHIFT		10			/* up to 1K	*/
 
 struct uberblock {
 	uint64_t	ub_magic;	/* UBERBLOCK_MAGIC		*/
 	uint64_t	ub_version;	/* SPA_VERSION			*/
 	uint64_t	ub_txg;		/* txg of last sync		*/
 	uint64_t	ub_guid_sum;	/* sum of all vdev guids	*/
 	uint64_t	ub_timestamp;	/* UTC time of last sync	*/
 	blkptr_t	ub_rootbp;	/* MOS objset_phys_t		*/
 };
 
 /*
  * Flags.
  */
 #define	DNODE_MUST_BE_ALLOCATED	1
 #define	DNODE_MUST_BE_FREE	2
 
 /*
  * Fixed constants.
  */
 #define	DNODE_SHIFT		9	/* 512 bytes */
 #define	DN_MIN_INDBLKSHIFT	12	/* 4k */
 #define	DN_MAX_INDBLKSHIFT	14	/* 16k */
 #define	DNODE_BLOCK_SHIFT	14	/* 16k */
 #define	DNODE_CORE_SIZE		64	/* 64 bytes for dnode sans blkptrs */
 #define	DN_MAX_OBJECT_SHIFT	48	/* 256 trillion (zfs_fid_t limit) */
 #define	DN_MAX_OFFSET_SHIFT	64	/* 2^64 bytes in a dnode */
 
 /*
  * Derived constants.
  */
 #define	DNODE_SIZE	(1 << DNODE_SHIFT)
 #define	DN_MAX_NBLKPTR	((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
 #define	DN_MAX_BONUSLEN	(DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
 #define	DN_MAX_OBJECT	(1ULL << DN_MAX_OBJECT_SHIFT)
 
 #define	DNODES_PER_BLOCK_SHIFT	(DNODE_BLOCK_SHIFT - DNODE_SHIFT)
 #define	DNODES_PER_BLOCK	(1ULL << DNODES_PER_BLOCK_SHIFT)
 #define	DNODES_PER_LEVEL_SHIFT	(DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
 
 /* The +2 here is a cheesy way to round up */
 #define	DN_MAX_LEVELS	(2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
 	(DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT)))
 
 #define	DN_BONUS(dnp)	((void*)((dnp)->dn_bonus + \
 	(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
 
 #define	DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
 	(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
 
 #define	EPB(blkshift, typeshift)	(1 << (blkshift - typeshift))
 
 /* Is dn_used in bytes?  if not, it's in multiples of SPA_MINBLOCKSIZE */
 #define	DNODE_FLAG_USED_BYTES		(1<<0)
 #define	DNODE_FLAG_USERUSED_ACCOUNTED	(1<<1)
 
 /* Does dnode have a SA spill blkptr in bonus? */
 #define	DNODE_FLAG_SPILL_BLKPTR	(1<<2)
 
 typedef struct dnode_phys {
 	uint8_t dn_type;		/* dmu_object_type_t */
 	uint8_t dn_indblkshift;		/* ln2(indirect block size) */
 	uint8_t dn_nlevels;		/* 1=dn_blkptr->data blocks */
 	uint8_t dn_nblkptr;		/* length of dn_blkptr */
 	uint8_t dn_bonustype;		/* type of data in bonus buffer */
 	uint8_t	dn_checksum;		/* ZIO_CHECKSUM type */
 	uint8_t	dn_compress;		/* ZIO_COMPRESS type */
 	uint8_t dn_flags;		/* DNODE_FLAG_* */
 	uint16_t dn_datablkszsec;	/* data block size in 512b sectors */
 	uint16_t dn_bonuslen;		/* length of dn_bonus */
 	uint8_t dn_pad2[4];
 
 	/* accounting is protected by dn_dirty_mtx */
 	uint64_t dn_maxblkid;		/* largest allocated block ID */
 	uint64_t dn_used;		/* bytes (or sectors) of disk space */
 
 	uint64_t dn_pad3[4];
 
 	blkptr_t dn_blkptr[1];
 	uint8_t dn_bonus[DN_MAX_BONUSLEN - sizeof (blkptr_t)];
 	blkptr_t dn_spill;
 } dnode_phys_t;
 
+typedef enum dmu_object_byteswap {
+	DMU_BSWAP_UINT8,
+	DMU_BSWAP_UINT16,
+	DMU_BSWAP_UINT32,
+	DMU_BSWAP_UINT64,
+	DMU_BSWAP_ZAP,
+	DMU_BSWAP_DNODE,
+	DMU_BSWAP_OBJSET,
+	DMU_BSWAP_ZNODE,
+	DMU_BSWAP_OLDACL,
+	DMU_BSWAP_ACL,
+	/*
+	 * Allocating a new byteswap type number makes the on-disk format
+	 * incompatible with any other format that uses the same number.
+	 *
+	 * Data can usually be structured to work with one of the
+	 * DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types.
+	 */
+	DMU_BSWAP_NUMFUNCS
+} dmu_object_byteswap_t;
+
+#define	DMU_OT_NEWTYPE 0x80
+#define	DMU_OT_METADATA 0x40
+#define	DMU_OT_BYTESWAP_MASK 0x3f
+
+/*
+ * Defines a uint8_t object type. Object types specify if the data
+ * in the object is metadata (boolean) and how to byteswap the data
+ * (dmu_object_byteswap_t).
+ */
+#define	DMU_OT(byteswap, metadata) \
+	(DMU_OT_NEWTYPE | \
+	((metadata) ? DMU_OT_METADATA : 0) | \
+	((byteswap) & DMU_OT_BYTESWAP_MASK))
+
 typedef enum dmu_object_type {
 	DMU_OT_NONE,
 	/* general: */
 	DMU_OT_OBJECT_DIRECTORY,	/* ZAP */
 	DMU_OT_OBJECT_ARRAY,		/* UINT64 */
 	DMU_OT_PACKED_NVLIST,		/* UINT8 (XDR by nvlist_pack/unpack) */
 	DMU_OT_PACKED_NVLIST_SIZE,	/* UINT64 */
 	DMU_OT_BPLIST,			/* UINT64 */
 	DMU_OT_BPLIST_HDR,		/* UINT64 */
 	/* spa: */
 	DMU_OT_SPACE_MAP_HEADER,	/* UINT64 */
 	DMU_OT_SPACE_MAP,		/* UINT64 */
 	/* zil: */
 	DMU_OT_INTENT_LOG,		/* UINT64 */
 	/* dmu: */
 	DMU_OT_DNODE,			/* DNODE */
 	DMU_OT_OBJSET,			/* OBJSET */
 	/* dsl: */
 	DMU_OT_DSL_DIR,			/* UINT64 */
 	DMU_OT_DSL_DIR_CHILD_MAP,	/* ZAP */
 	DMU_OT_DSL_DS_SNAP_MAP,		/* ZAP */
 	DMU_OT_DSL_PROPS,		/* ZAP */
 	DMU_OT_DSL_DATASET,		/* UINT64 */
 	/* zpl: */
 	DMU_OT_ZNODE,			/* ZNODE */
 	DMU_OT_OLDACL,			/* Old ACL */
 	DMU_OT_PLAIN_FILE_CONTENTS,	/* UINT8 */
 	DMU_OT_DIRECTORY_CONTENTS,	/* ZAP */
 	DMU_OT_MASTER_NODE,		/* ZAP */
 	DMU_OT_UNLINKED_SET,		/* ZAP */
 	/* zvol: */
 	DMU_OT_ZVOL,			/* UINT8 */
 	DMU_OT_ZVOL_PROP,		/* ZAP */
 	/* other; for testing only! */
 	DMU_OT_PLAIN_OTHER,		/* UINT8 */
 	DMU_OT_UINT64_OTHER,		/* UINT64 */
 	DMU_OT_ZAP_OTHER,		/* ZAP */
 	/* new object types: */
 	DMU_OT_ERROR_LOG,		/* ZAP */
 	DMU_OT_SPA_HISTORY,		/* UINT8 */
 	DMU_OT_SPA_HISTORY_OFFSETS,	/* spa_his_phys_t */
 	DMU_OT_POOL_PROPS,		/* ZAP */
 	DMU_OT_DSL_PERMS,		/* ZAP */
 	DMU_OT_ACL,			/* ACL */
 	DMU_OT_SYSACL,			/* SYSACL */
 	DMU_OT_FUID,			/* FUID table (Packed NVLIST UINT8) */
 	DMU_OT_FUID_SIZE,		/* FUID table size UINT64 */
 	DMU_OT_NEXT_CLONES,		/* ZAP */
 	DMU_OT_SCAN_QUEUE,		/* ZAP */
 	DMU_OT_USERGROUP_USED,		/* ZAP */
 	DMU_OT_USERGROUP_QUOTA,		/* ZAP */
 	DMU_OT_USERREFS,		/* ZAP */
 	DMU_OT_DDT_ZAP,			/* ZAP */
 	DMU_OT_DDT_STATS,		/* ZAP */
 	DMU_OT_SA,			/* System attr */
 	DMU_OT_SA_MASTER_NODE,		/* ZAP */
 	DMU_OT_SA_ATTR_REGISTRATION,	/* ZAP */
 	DMU_OT_SA_ATTR_LAYOUTS,		/* ZAP */
 	DMU_OT_SCAN_XLATE,		/* ZAP */
 	DMU_OT_DEDUP,			/* fake dedup BP from ddt_bp_create() */
-	DMU_OT_NUMTYPES
+	DMU_OT_NUMTYPES,
+
+	/*
+	 * Names for valid types declared with DMU_OT().
+	 */
+	DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
+	DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
+	DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
+	DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
+	DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
+	DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
+	DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
+	DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
+	DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
+	DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE)
 } dmu_object_type_t;
 
 typedef enum dmu_objset_type {
 	DMU_OST_NONE,
 	DMU_OST_META,
 	DMU_OST_ZFS,
 	DMU_OST_ZVOL,
 	DMU_OST_OTHER,			/* For testing only! */
 	DMU_OST_ANY,			/* Be careful! */
 	DMU_OST_NUMTYPES
 } dmu_objset_type_t;
 
 /*
  * header for all bonus and spill buffers.
  * The header has a fixed portion with a variable number
  * of "lengths" depending on the number of variable sized
  * attribues which are determined by the "layout number"
  */
 
 #define	SA_MAGIC	0x2F505A  /* ZFS SA */
 typedef struct sa_hdr_phys {
 	uint32_t sa_magic;
 	uint16_t sa_layout_info;  /* Encoded with hdrsize and layout number */
 	uint16_t sa_lengths[1];	/* optional sizes for variable length attrs */
 	/* ... Data follows the lengths.  */
 } sa_hdr_phys_t;
 
 /*
  * sa_hdr_phys -> sa_layout_info
  *
  * 16      10       0
  * +--------+-------+
  * | hdrsz  |layout |
  * +--------+-------+
  *
  * Bits 0-10 are the layout number
  * Bits 11-16 are the size of the header.
  * The hdrsize is the number * 8
  *
  * For example.
  * hdrsz of 1 ==> 8 byte header
  *          2 ==> 16 byte header
  *
  */
 
 #define	SA_HDR_LAYOUT_NUM(hdr) BF32_GET(hdr->sa_layout_info, 0, 10)
 #define	SA_HDR_SIZE(hdr) BF32_GET_SB(hdr->sa_layout_info, 10, 16, 3, 0)
 #define	SA_HDR_LAYOUT_INFO_ENCODE(x, num, size) \
 { \
 	BF32_SET_SB(x, 10, 6, 3, 0, size); \
 	BF32_SET(x, 0, 10, num); \
 }
 
 #define	SA_MODE_OFFSET		0
 #define	SA_SIZE_OFFSET		8
 #define	SA_GEN_OFFSET		16
 #define	SA_UID_OFFSET		24
 #define	SA_GID_OFFSET		32
 #define	SA_PARENT_OFFSET	40
 
 /*
  * Intent log header - this on disk structure holds fields to manage
  * the log.  All fields are 64 bit to easily handle cross architectures.
  */
 typedef struct zil_header {
 	uint64_t zh_claim_txg;	/* txg in which log blocks were claimed */
 	uint64_t zh_replay_seq;	/* highest replayed sequence number */
 	blkptr_t zh_log;	/* log chain */
 	uint64_t zh_claim_seq;	/* highest claimed sequence number */
 	uint64_t zh_pad[5];
 } zil_header_t;
 
 #define	OBJSET_PHYS_SIZE 2048
 
 typedef struct objset_phys {
 	dnode_phys_t os_meta_dnode;
 	zil_header_t os_zil_header;
 	uint64_t os_type;
 	uint64_t os_flags;
 	char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
 	    sizeof (zil_header_t) - sizeof (uint64_t)*2];
 	dnode_phys_t os_userused_dnode;
 	dnode_phys_t os_groupused_dnode;
 } objset_phys_t;
 
 typedef struct dsl_dir_phys {
 	uint64_t dd_creation_time; /* not actually used */
 	uint64_t dd_head_dataset_obj;
 	uint64_t dd_parent_obj;
 	uint64_t dd_clone_parent_obj;
 	uint64_t dd_child_dir_zapobj;
 	/*
 	 * how much space our children are accounting for; for leaf
 	 * datasets, == physical space used by fs + snaps
 	 */
 	uint64_t dd_used_bytes;
 	uint64_t dd_compressed_bytes;
 	uint64_t dd_uncompressed_bytes;
 	/* Administrative quota setting */
 	uint64_t dd_quota;
 	/* Administrative reservation setting */
 	uint64_t dd_reserved;
 	uint64_t dd_props_zapobj;
 	uint64_t dd_pad[21]; /* pad out to 256 bytes for good measure */
 } dsl_dir_phys_t;
 
 typedef struct dsl_dataset_phys {
 	uint64_t ds_dir_obj;
 	uint64_t ds_prev_snap_obj;
 	uint64_t ds_prev_snap_txg;
 	uint64_t ds_next_snap_obj;
 	uint64_t ds_snapnames_zapobj;	/* zap obj of snaps; ==0 for snaps */
 	uint64_t ds_num_children;	/* clone/snap children; ==0 for head */
 	uint64_t ds_creation_time;	/* seconds since 1970 */
 	uint64_t ds_creation_txg;
 	uint64_t ds_deadlist_obj;
 	uint64_t ds_used_bytes;
 	uint64_t ds_compressed_bytes;
 	uint64_t ds_uncompressed_bytes;
 	uint64_t ds_unique_bytes;	/* only relevant to snapshots */
 	/*
 	 * The ds_fsid_guid is a 56-bit ID that can change to avoid
 	 * collisions.  The ds_guid is a 64-bit ID that will never
 	 * change, so there is a small probability that it will collide.
 	 */
 	uint64_t ds_fsid_guid;
 	uint64_t ds_guid;
 	uint64_t ds_flags;
 	blkptr_t ds_bp;
 	uint64_t ds_pad[8]; /* pad out to 320 bytes for good measure */
 } dsl_dataset_phys_t;
 
 /*
  * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
  */
 #define	DMU_POOL_DIRECTORY_OBJECT	1
 #define	DMU_POOL_CONFIG			"config"
+#define	DMU_POOL_FEATURES_FOR_READ	"features_for_read"
 #define	DMU_POOL_ROOT_DATASET		"root_dataset"
 #define	DMU_POOL_SYNC_BPLIST		"sync_bplist"
 #define	DMU_POOL_ERRLOG_SCRUB		"errlog_scrub"
 #define	DMU_POOL_ERRLOG_LAST		"errlog_last"
 #define	DMU_POOL_SPARES			"spares"
 #define	DMU_POOL_DEFLATE		"deflate"
 #define	DMU_POOL_HISTORY		"history"
 #define	DMU_POOL_PROPS			"pool_props"
 
 #define	ZAP_MAGIC 0x2F52AB2ABULL
 
 #define	FZAP_BLOCK_SHIFT(zap)	((zap)->zap_block_shift)
 
 #define	ZAP_MAXCD		(uint32_t)(-1)
 #define	ZAP_HASHBITS		28
 #define	MZAP_ENT_LEN		64
 #define	MZAP_NAME_LEN		(MZAP_ENT_LEN - 8 - 4 - 2)
 #define	MZAP_MAX_BLKSHIFT	SPA_MAXBLOCKSHIFT
 #define	MZAP_MAX_BLKSZ		(1 << MZAP_MAX_BLKSHIFT)
 
 typedef struct mzap_ent_phys {
 	uint64_t mze_value;
 	uint32_t mze_cd;
 	uint16_t mze_pad;	/* in case we want to chain them someday */
 	char mze_name[MZAP_NAME_LEN];
 } mzap_ent_phys_t;
 
 typedef struct mzap_phys {
 	uint64_t mz_block_type;	/* ZBT_MICRO */
 	uint64_t mz_salt;
 	uint64_t mz_pad[6];
 	mzap_ent_phys_t mz_chunk[1];
 	/* actually variable size depending on block size */
 } mzap_phys_t;
 
 /*
  * The (fat) zap is stored in one object. It is an array of
  * 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
  *
  * ptrtbl fits in first block:
  * 	[zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
  *
  * ptrtbl too big for first block:
  * 	[zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
  *
  */
 
 #define	ZBT_LEAF		((1ULL << 63) + 0)
 #define	ZBT_HEADER		((1ULL << 63) + 1)
 #define	ZBT_MICRO		((1ULL << 63) + 3)
 /* any other values are ptrtbl blocks */
 
 /*
  * the embedded pointer table takes up half a block:
  * block size / entry size (2^3) / 2
  */
 #define	ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
 
 /*
  * The embedded pointer table starts half-way through the block.  Since
  * the pointer table itself is half the block, it starts at (64-bit)
  * word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
  */
 #define	ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
 	((uint64_t *)(zap)->zap_phys) \
 	[(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
 
 /*
  * TAKE NOTE:
  * If zap_phys_t is modified, zap_byteswap() must be modified.
  */
 typedef struct zap_phys {
 	uint64_t zap_block_type;	/* ZBT_HEADER */
 	uint64_t zap_magic;		/* ZAP_MAGIC */
 
 	struct zap_table_phys {
 		uint64_t zt_blk;	/* starting block number */
 		uint64_t zt_numblks;	/* number of blocks */
 		uint64_t zt_shift;	/* bits to index it */
 		uint64_t zt_nextblk;	/* next (larger) copy start block */
 		uint64_t zt_blks_copied; /* number source blocks copied */
 	} zap_ptrtbl;
 
 	uint64_t zap_freeblk;		/* the next free block */
 	uint64_t zap_num_leafs;		/* number of leafs */
 	uint64_t zap_num_entries;	/* number of entries */
 	uint64_t zap_salt;		/* salt to stir into hash function */
 	/*
 	 * This structure is followed by padding, and then the embedded
 	 * pointer table.  The embedded pointer table takes up second
 	 * half of the block.  It is accessed using the
 	 * ZAP_EMBEDDED_PTRTBL_ENT() macro.
 	 */
 } zap_phys_t;
 
 typedef struct zap_table_phys zap_table_phys_t;
 
 typedef struct fat_zap {
 	int zap_block_shift;			/* block size shift */
 	zap_phys_t *zap_phys;
 } fat_zap_t;
 
 #define	ZAP_LEAF_MAGIC 0x2AB1EAF
 
 /* chunk size = 24 bytes */
 #define	ZAP_LEAF_CHUNKSIZE 24
 
 /*
  * The amount of space available for chunks is:
  * block size (1<<l->l_bs) - hash entry size (2) * number of hash
  * entries - header space (2*chunksize)
  */
 #define	ZAP_LEAF_NUMCHUNKS(l) \
 	(((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
 	ZAP_LEAF_CHUNKSIZE - 2)
 
 /*
  * The amount of space within the chunk available for the array is:
  * chunk size - space for type (1) - space for next pointer (2)
  */
 #define	ZAP_LEAF_ARRAY_BYTES (ZAP_LEAF_CHUNKSIZE - 3)
 
 #define	ZAP_LEAF_ARRAY_NCHUNKS(bytes) \
 	(((bytes)+ZAP_LEAF_ARRAY_BYTES-1)/ZAP_LEAF_ARRAY_BYTES)
 
 /*
  * Low water mark:  when there are only this many chunks free, start
  * growing the ptrtbl.  Ideally, this should be larger than a
  * "reasonably-sized" entry.  20 chunks is more than enough for the
  * largest directory entry (MAXNAMELEN (256) byte name, 8-byte value),
  * while still being only around 3% for 16k blocks.
  */
 #define	ZAP_LEAF_LOW_WATER (20)
 
 /*
  * The leaf hash table has block size / 2^5 (32) number of entries,
  * which should be more than enough for the maximum number of entries,
  * which is less than block size / CHUNKSIZE (24) / minimum number of
  * chunks per entry (3).
  */
 #define	ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
 #define	ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
 
 /*
  * The chunks start immediately after the hash table.  The end of the
  * hash table is at l_hash + HASH_NUMENTRIES, which we simply cast to a
  * chunk_t.
  */
 #define	ZAP_LEAF_CHUNK(l, idx) \
 	((zap_leaf_chunk_t *) \
 	((l)->l_phys->l_hash + ZAP_LEAF_HASH_NUMENTRIES(l)))[idx]
 #define	ZAP_LEAF_ENTRY(l, idx) (&ZAP_LEAF_CHUNK(l, idx).l_entry)
 
 typedef enum zap_chunk_type {
 	ZAP_CHUNK_FREE = 253,
 	ZAP_CHUNK_ENTRY = 252,
 	ZAP_CHUNK_ARRAY = 251,
 	ZAP_CHUNK_TYPE_MAX = 250
 } zap_chunk_type_t;
 
 /*
  * TAKE NOTE:
  * If zap_leaf_phys_t is modified, zap_leaf_byteswap() must be modified.
  */
 typedef struct zap_leaf_phys {
 	struct zap_leaf_header {
 		uint64_t lh_block_type;		/* ZBT_LEAF */
 		uint64_t lh_pad1;
 		uint64_t lh_prefix;		/* hash prefix of this leaf */
 		uint32_t lh_magic;		/* ZAP_LEAF_MAGIC */
 		uint16_t lh_nfree;		/* number free chunks */
 		uint16_t lh_nentries;		/* number of entries */
 		uint16_t lh_prefix_len;		/* num bits used to id this */
 
 /* above is accessable to zap, below is zap_leaf private */
 
 		uint16_t lh_freelist;		/* chunk head of free list */
 		uint8_t lh_pad2[12];
 	} l_hdr; /* 2 24-byte chunks */
 
 	/*
 	 * The header is followed by a hash table with
 	 * ZAP_LEAF_HASH_NUMENTRIES(zap) entries.  The hash table is
 	 * followed by an array of ZAP_LEAF_NUMCHUNKS(zap)
 	 * zap_leaf_chunk structures.  These structures are accessed
 	 * with the ZAP_LEAF_CHUNK() macro.
 	 */
 
 	uint16_t l_hash[1];
 } zap_leaf_phys_t;
 
 typedef union zap_leaf_chunk {
 	struct zap_leaf_entry {
 		uint8_t le_type; 		/* always ZAP_CHUNK_ENTRY */
 		uint8_t le_value_intlen;	/* size of ints */
 		uint16_t le_next;		/* next entry in hash chain */
 		uint16_t le_name_chunk;		/* first chunk of the name */
 		uint16_t le_name_numints;	/* bytes in name, incl null */
 		uint16_t le_value_chunk;	/* first chunk of the value */
 		uint16_t le_value_numints;	/* value length in ints */
 		uint32_t le_cd;			/* collision differentiator */
 		uint64_t le_hash;		/* hash value of the name */
 	} l_entry;
 	struct zap_leaf_array {
 		uint8_t la_type;		/* always ZAP_CHUNK_ARRAY */
 		uint8_t la_array[ZAP_LEAF_ARRAY_BYTES];
 		uint16_t la_next;		/* next blk or CHAIN_END */
 	} l_array;
 	struct zap_leaf_free {
 		uint8_t lf_type;		/* always ZAP_CHUNK_FREE */
 		uint8_t lf_pad[ZAP_LEAF_ARRAY_BYTES];
 		uint16_t lf_next;	/* next in free list, or CHAIN_END */
 	} l_free;
 } zap_leaf_chunk_t;
 
 typedef struct zap_leaf {
 	int l_bs;			/* block size shift */
 	zap_leaf_phys_t *l_phys;
 } zap_leaf_t;
 
 /*
  * Define special zfs pflags
  */
 #define	ZFS_XATTR	0x1		/* is an extended attribute */
 #define	ZFS_INHERIT_ACE	0x2		/* ace has inheritable ACEs */
 #define	ZFS_ACL_TRIVIAL 0x4		/* files ACL is trivial */
 
 #define	MASTER_NODE_OBJ	1
 
 /*
  * special attributes for master node.
  */
 
 #define	ZFS_FSID		"FSID"
 #define	ZFS_UNLINKED_SET	"DELETE_QUEUE"
 #define	ZFS_ROOT_OBJ		"ROOT"
 #define	ZPL_VERSION_OBJ		"VERSION"
 #define	ZFS_PROP_BLOCKPERPAGE	"BLOCKPERPAGE"
 #define	ZFS_PROP_NOGROWBLOCKS	"NOGROWBLOCKS"
 
 #define	ZFS_FLAG_BLOCKPERPAGE	0x1
 #define	ZFS_FLAG_NOGROWBLOCKS	0x2
 
 /*
  * ZPL version - rev'd whenever an incompatible on-disk format change
  * occurs.  Independent of SPA/DMU/ZAP versioning.
  */
 
 #define	ZPL_VERSION		1ULL
 
 /*
  * The directory entry has the type (currently unused on Solaris) in the
  * top 4 bits, and the object number in the low 48 bits.  The "middle"
  * 12 bits are unused.
  */
 #define	ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4)
 #define	ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48)
 #define	ZFS_DIRENT_MAKE(type, obj) (((uint64_t)type << 60) | obj)
 
 typedef struct ace {
 	uid_t		a_who;		/* uid or gid */
 	uint32_t	a_access_mask;	/* read,write,... */
 	uint16_t	a_flags;	/* see below */
 	uint16_t	a_type;		/* allow or deny */
 } ace_t;
 
 #define ACE_SLOT_CNT	6
 
 typedef struct zfs_znode_acl {
 	uint64_t	z_acl_extern_obj;	  /* ext acl pieces */
 	uint32_t	z_acl_count;		  /* Number of ACEs */
 	uint16_t	z_acl_version;		  /* acl version */
 	uint16_t	z_acl_pad;		  /* pad */
 	ace_t		z_ace_data[ACE_SLOT_CNT]; /* 6 standard ACEs */
 } zfs_znode_acl_t;
 
 /*
  * This is the persistent portion of the znode.  It is stored
  * in the "bonus buffer" of the file.  Short symbolic links
  * are also stored in the bonus buffer.
  */
 typedef struct znode_phys {
 	uint64_t zp_atime[2];		/*  0 - last file access time */
 	uint64_t zp_mtime[2];		/* 16 - last file modification time */
 	uint64_t zp_ctime[2];		/* 32 - last file change time */
 	uint64_t zp_crtime[2];		/* 48 - creation time */
 	uint64_t zp_gen;		/* 64 - generation (txg of creation) */
 	uint64_t zp_mode;		/* 72 - file mode bits */
 	uint64_t zp_size;		/* 80 - size of file */
 	uint64_t zp_parent;		/* 88 - directory parent (`..') */
 	uint64_t zp_links;		/* 96 - number of links to file */
 	uint64_t zp_xattr;		/* 104 - DMU object for xattrs */
 	uint64_t zp_rdev;		/* 112 - dev_t for VBLK & VCHR files */
 	uint64_t zp_flags;		/* 120 - persistent flags */
 	uint64_t zp_uid;		/* 128 - file owner */
 	uint64_t zp_gid;		/* 136 - owning group */
 	uint64_t zp_pad[4];		/* 144 - future */
 	zfs_znode_acl_t zp_acl;		/* 176 - 263 ACL */
 	/*
 	 * Data may pad out any remaining bytes in the znode buffer, eg:
 	 *
 	 * |<---------------------- dnode_phys (512) ------------------------>|
 	 * |<-- dnode (192) --->|<----------- "bonus" buffer (320) ---------->|
 	 *			|<---- znode (264) ---->|<---- data (56) ---->|
 	 *
 	 * At present, we only use this space to store symbolic links.
 	 */
 } znode_phys_t;
 
 /*
  * In-core vdev representation.
  */
 struct vdev;
 typedef int vdev_phys_read_t(struct vdev *vdev, void *priv,
     off_t offset, void *buf, size_t bytes);
 typedef int vdev_read_t(struct vdev *vdev, const blkptr_t *bp,
     void *buf, off_t offset, size_t bytes);
 
 typedef STAILQ_HEAD(vdev_list, vdev) vdev_list_t;
 
 typedef struct vdev {
 	STAILQ_ENTRY(vdev) v_childlink;	/* link in parent's child list */
 	STAILQ_ENTRY(vdev) v_alllink;	/* link in global vdev list */
 	vdev_list_t	v_children;	/* children of this vdev */
 	const char	*v_name;	/* vdev name */
 	uint64_t	v_guid;		/* vdev guid */
 	int		v_id;		/* index in parent */
 	int		v_ashift;	/* offset to block shift */
 	int		v_nparity;	/* # parity for raidz */
 	struct vdev	*v_top;		/* parent vdev */
 	int		v_nchildren;	/* # children */
 	vdev_state_t	v_state;	/* current state */
 	vdev_phys_read_t *v_phys_read;	/* read from raw leaf vdev */
 	vdev_read_t	*v_read;	/* read from vdev */
 	void		*v_read_priv;	/* private data for read function */
 } vdev_t;
 
 /*
  * In-core pool representation.
  */
 typedef STAILQ_HEAD(spa_list, spa) spa_list_t;
 
 typedef struct spa {
 	STAILQ_ENTRY(spa) spa_link;	/* link in global pool list */
 	char		*spa_name;	/* pool name */
 	uint64_t	spa_guid;	/* pool guid */
 	uint64_t	spa_txg;	/* most recent transaction */
 	struct uberblock spa_uberblock;	/* best uberblock so far */
 	vdev_list_t	spa_vdevs;	/* list of all toplevel vdevs */
 	objset_phys_t	spa_mos;	/* MOS for this pool */
 	int		spa_inited;	/* initialized */
 } spa_t;
 
 static void decode_embedded_bp_compressed(const blkptr_t *, void *);
Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/dtrace/fasttrap.c	(revision 303642)
@@ -1,2755 +1,2763 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * Portions Copyright 2010 The FreeBSD Foundation
  *
  * $FreeBSD$
  */
 
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2015, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/atomic.h>
 #include <sys/errno.h>
 #include <sys/stat.h>
 #include <sys/modctl.h>
 #include <sys/conf.h>
 #include <sys/systm.h>
 #ifdef illumos
 #include <sys/ddi.h>
 #endif
 #include <sys/sunddi.h>
 #include <sys/cpuvar.h>
 #include <sys/kmem.h>
 #ifdef illumos
 #include <sys/strsubr.h>
 #endif
 #include <sys/fasttrap.h>
 #include <sys/fasttrap_impl.h>
 #include <sys/fasttrap_isa.h>
 #include <sys/dtrace.h>
 #include <sys/dtrace_impl.h>
 #include <sys/sysmacros.h>
 #include <sys/proc.h>
 #include <sys/policy.h>
 #ifdef illumos
 #include <util/qsort.h>
 #endif
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #ifndef illumos
 #include <sys/dtrace_bsd.h>
 #include <sys/eventhandler.h>
 #include <sys/rmlock.h>
+#include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/u8_textprep.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_param.h>
 
 #include <cddl/dev/dtrace/dtrace_cddl.h>
 #endif
 
 /*
  * User-Land Trap-Based Tracing
  * ----------------------------
  *
  * The fasttrap provider allows DTrace consumers to instrument any user-level
  * instruction to gather data; this includes probes with semantic
  * signifigance like entry and return as well as simple offsets into the
  * function. While the specific techniques used are very ISA specific, the
  * methodology is generalizable to any architecture.
  *
  *
  * The General Methodology
  * -----------------------
  *
  * With the primary goal of tracing every user-land instruction and the
  * limitation that we can't trust user space so don't want to rely on much
  * information there, we begin by replacing the instructions we want to trace
  * with trap instructions. Each instruction we overwrite is saved into a hash
  * table keyed by process ID and pc address. When we enter the kernel due to
  * this trap instruction, we need the effects of the replaced instruction to
  * appear to have occurred before we proceed with the user thread's
  * execution.
  *
  * Each user level thread is represented by a ulwp_t structure which is
  * always easily accessible through a register. The most basic way to produce
  * the effects of the instruction we replaced is to copy that instruction out
  * to a bit of scratch space reserved in the user thread's ulwp_t structure
  * (a sort of kernel-private thread local storage), set the PC to that
  * scratch space and single step. When we reenter the kernel after single
  * stepping the instruction we must then adjust the PC to point to what would
  * normally be the next instruction. Of course, special care must be taken
  * for branches and jumps, but these represent such a small fraction of any
  * instruction set that writing the code to emulate these in the kernel is
  * not too difficult.
  *
  * Return probes may require several tracepoints to trace every return site,
  * and, conversely, each tracepoint may activate several probes (the entry
  * and offset 0 probes, for example). To solve this muliplexing problem,
  * tracepoints contain lists of probes to activate and probes contain lists
  * of tracepoints to enable. If a probe is activated, it adds its ID to
  * existing tracepoints or creates new ones as necessary.
  *
  * Most probes are activated _before_ the instruction is executed, but return
  * probes are activated _after_ the effects of the last instruction of the
  * function are visible. Return probes must be fired _after_ we have
  * single-stepped the instruction whereas all other probes are fired
  * beforehand.
  *
  *
  * Lock Ordering
  * -------------
  *
  * The lock ordering below -- both internally and with respect to the DTrace
  * framework -- is a little tricky and bears some explanation. Each provider
  * has a lock (ftp_mtx) that protects its members including reference counts
  * for enabled probes (ftp_rcount), consumers actively creating probes
  * (ftp_ccount) and USDT consumers (ftp_mcount); all three prevent a provider
  * from being freed. A provider is looked up by taking the bucket lock for the
  * provider hash table, and is returned with its lock held. The provider lock
  * may be taken in functions invoked by the DTrace framework, but may not be
  * held while calling functions in the DTrace framework.
  *
  * To ensure consistency over multiple calls to the DTrace framework, the
  * creation lock (ftp_cmtx) should be held. Naturally, the creation lock may
  * not be taken when holding the provider lock as that would create a cyclic
  * lock ordering. In situations where one would naturally take the provider
  * lock and then the creation lock, we instead up a reference count to prevent
  * the provider from disappearing, drop the provider lock, and acquire the
  * creation lock.
  *
  * Briefly:
  * 	bucket lock before provider lock
  *	DTrace before provider lock
  *	creation lock before DTrace
  *	never hold the provider lock and creation lock simultaneously
  */
 
 static d_open_t fasttrap_open;
 static d_ioctl_t fasttrap_ioctl;
 
 static struct cdevsw fasttrap_cdevsw = {
 	.d_version	= D_VERSION,
 	.d_open		= fasttrap_open,
 	.d_ioctl	= fasttrap_ioctl,
 	.d_name		= "fasttrap",
 };
 static struct cdev *fasttrap_cdev;
 static dtrace_meta_provider_id_t fasttrap_meta_id;
 
 static struct proc *fasttrap_cleanup_proc;
 static struct mtx fasttrap_cleanup_mtx;
 static uint_t fasttrap_cleanup_work, fasttrap_cleanup_drain, fasttrap_cleanup_cv;
 
 /*
  * Generation count on modifications to the global tracepoint lookup table.
  */
 static volatile uint64_t fasttrap_mod_gen;
 
 /*
  * When the fasttrap provider is loaded, fasttrap_max is set to either
  * FASTTRAP_MAX_DEFAULT, or the value for fasttrap-max-probes in the
  * fasttrap.conf file (Illumos), or the value provied in the loader.conf (FreeBSD).
  * Each time a probe is created, fasttrap_total is incremented by the number
  * of tracepoints that may be associated with that probe; fasttrap_total is capped
  * at fasttrap_max.
  */
 #define	FASTTRAP_MAX_DEFAULT		250000
 static uint32_t fasttrap_max = FASTTRAP_MAX_DEFAULT;
 static uint32_t fasttrap_total;
 
 /*
  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
  */
 
 #define	FASTTRAP_TPOINTS_DEFAULT_SIZE	0x4000
 #define	FASTTRAP_PROVIDERS_DEFAULT_SIZE	0x100
 #define	FASTTRAP_PROCS_DEFAULT_SIZE	0x100
 
 #define	FASTTRAP_PID_NAME		"pid"
 
 fasttrap_hash_t			fasttrap_tpoints;
 static fasttrap_hash_t		fasttrap_provs;
 static fasttrap_hash_t		fasttrap_procs;
 
 static uint64_t			fasttrap_pid_count;	/* pid ref count */
 static kmutex_t			fasttrap_count_mtx;	/* lock on ref count */
 
 #define	FASTTRAP_ENABLE_FAIL	1
 #define	FASTTRAP_ENABLE_PARTIAL	2
 
 static int fasttrap_tracepoint_enable(proc_t *, fasttrap_probe_t *, uint_t);
 static void fasttrap_tracepoint_disable(proc_t *, fasttrap_probe_t *, uint_t);
 
 static fasttrap_provider_t *fasttrap_provider_lookup(pid_t, const char *,
     const dtrace_pattr_t *);
 static void fasttrap_provider_retire(pid_t, const char *, int);
 static void fasttrap_provider_free(fasttrap_provider_t *);
 
 static fasttrap_proc_t *fasttrap_proc_lookup(pid_t);
 static void fasttrap_proc_release(fasttrap_proc_t *);
 
 #ifndef illumos
 static void fasttrap_thread_dtor(void *, struct thread *);
 #endif
 
 #define	FASTTRAP_PROVS_INDEX(pid, name) \
 	((fasttrap_hash_str(name) + (pid)) & fasttrap_provs.fth_mask)
 
 #define	FASTTRAP_PROCS_INDEX(pid) ((pid) & fasttrap_procs.fth_mask)
 
 #ifndef illumos
 struct rmlock fasttrap_tp_lock;
 static eventhandler_tag fasttrap_thread_dtor_tag;
 #endif
 
 static unsigned long tpoints_hash_size = FASTTRAP_TPOINTS_DEFAULT_SIZE;
 
 #ifdef __FreeBSD__
 SYSCTL_DECL(_kern_dtrace);
 SYSCTL_NODE(_kern_dtrace, OID_AUTO, fasttrap, CTLFLAG_RD, 0, "DTrace fasttrap parameters");
 SYSCTL_UINT(_kern_dtrace_fasttrap, OID_AUTO, max_probes, CTLFLAG_RWTUN, &fasttrap_max,
     FASTTRAP_MAX_DEFAULT, "Maximum number of fasttrap probes");
 SYSCTL_ULONG(_kern_dtrace_fasttrap, OID_AUTO, tpoints_hash_size, CTLFLAG_RDTUN, &tpoints_hash_size,
     FASTTRAP_TPOINTS_DEFAULT_SIZE, "Size of the tracepoint hash table");
 #endif
 
 static int
 fasttrap_highbit(ulong_t i)
 {
 	int h = 1;
 
 	if (i == 0)
 		return (0);
 #ifdef _LP64
 	if (i & 0xffffffff00000000ul) {
 		h += 32; i >>= 32;
 	}
 #endif
 	if (i & 0xffff0000) {
 		h += 16; i >>= 16;
 	}
 	if (i & 0xff00) {
 		h += 8; i >>= 8;
 	}
 	if (i & 0xf0) {
 		h += 4; i >>= 4;
 	}
 	if (i & 0xc) {
 		h += 2; i >>= 2;
 	}
 	if (i & 0x2) {
 		h += 1;
 	}
 	return (h);
 }
 
 static uint_t
 fasttrap_hash_str(const char *p)
 {
 	unsigned int g;
 	uint_t hval = 0;
 
 	while (*p) {
 		hval = (hval << 4) + *p++;
 		if ((g = (hval & 0xf0000000)) != 0)
 			hval ^= g >> 24;
 		hval &= ~g;
 	}
 	return (hval);
 }
 
 void
 fasttrap_sigtrap(proc_t *p, kthread_t *t, uintptr_t pc)
 {
 #ifdef illumos
 	sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
 
 	sqp->sq_info.si_signo = SIGTRAP;
 	sqp->sq_info.si_code = TRAP_DTRACE;
 	sqp->sq_info.si_addr = (caddr_t)pc;
 
 	mutex_enter(&p->p_lock);
 	sigaddqa(p, t, sqp);
 	mutex_exit(&p->p_lock);
 
 	if (t != NULL)
 		aston(t);
 #else
 	ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP);
 
 	ksiginfo_init(ksi);
 	ksi->ksi_signo = SIGTRAP;
 	ksi->ksi_code = TRAP_DTRACE;
 	ksi->ksi_addr = (caddr_t)pc;
 	PROC_LOCK(p);
 	(void) tdsendsignal(p, t, SIGTRAP, ksi);
 	PROC_UNLOCK(p);
 #endif
 }
 
 #ifndef illumos
 /*
  * Obtain a chunk of scratch space in the address space of the target process.
  */
 fasttrap_scrspace_t *
 fasttrap_scraddr(struct thread *td, fasttrap_proc_t *fprc)
 {
 	fasttrap_scrblock_t *scrblk;
 	fasttrap_scrspace_t *scrspc;
 	struct proc *p;
 	vm_offset_t addr;
 	int error, i;
 
 	scrspc = NULL;
 	if (td->t_dtrace_sscr != NULL) {
 		/* If the thread already has scratch space, we're done. */
 		scrspc = (fasttrap_scrspace_t *)td->t_dtrace_sscr;
 		return (scrspc);
 	}
 
 	p = td->td_proc;
 
 	mutex_enter(&fprc->ftpc_mtx);
 	if (LIST_EMPTY(&fprc->ftpc_fscr)) {
 		/*
 		 * No scratch space is available, so we'll map a new scratch
 		 * space block into the traced process' address space.
 		 */
 		addr = 0;
 		error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr,
 		    FASTTRAP_SCRBLOCK_SIZE, 0, VMFS_ANY_SPACE, VM_PROT_ALL,
 		    VM_PROT_ALL, 0);
 		if (error != KERN_SUCCESS)
 			goto done;
 
 		scrblk = malloc(sizeof(*scrblk), M_SOLARIS, M_WAITOK);
 		scrblk->ftsb_addr = addr;
 		LIST_INSERT_HEAD(&fprc->ftpc_scrblks, scrblk, ftsb_next);
 
 		/*
 		 * Carve the block up into chunks and put them on the free list.
 		 */
 		for (i = 0;
 		    i < FASTTRAP_SCRBLOCK_SIZE / FASTTRAP_SCRSPACE_SIZE; i++) {
 			scrspc = malloc(sizeof(*scrspc), M_SOLARIS, M_WAITOK);
 			scrspc->ftss_addr = addr +
 			    i * FASTTRAP_SCRSPACE_SIZE;
 			LIST_INSERT_HEAD(&fprc->ftpc_fscr, scrspc,
 			    ftss_next);
 		}
 	}
 
 	/*
 	 * Take the first scratch chunk off the free list, put it on the
 	 * allocated list, and return its address.
 	 */
 	scrspc = LIST_FIRST(&fprc->ftpc_fscr);
 	LIST_REMOVE(scrspc, ftss_next);
 	LIST_INSERT_HEAD(&fprc->ftpc_ascr, scrspc, ftss_next);
 
 	/*
 	 * This scratch space is reserved for use by td until the thread exits.
 	 */
 	td->t_dtrace_sscr = scrspc;
 
 done:
 	mutex_exit(&fprc->ftpc_mtx);
 
 	return (scrspc);
 }
 
 /*
  * Return any allocated per-thread scratch space chunks back to the process'
  * free list.
  */
 static void
 fasttrap_thread_dtor(void *arg __unused, struct thread *td)
 {
 	fasttrap_bucket_t *bucket;
 	fasttrap_proc_t *fprc;
 	fasttrap_scrspace_t *scrspc;
 	pid_t pid;
 
 	if (td->t_dtrace_sscr == NULL)
 		return;
 
 	pid = td->td_proc->p_pid;
 	bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)];
 	fprc = NULL;
 
 	/* Look up the fasttrap process handle for this process. */
 	mutex_enter(&bucket->ftb_mtx);
 	for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) {
 		if (fprc->ftpc_pid == pid) {
 			mutex_enter(&fprc->ftpc_mtx);
 			mutex_exit(&bucket->ftb_mtx);
 			break;
 		}
 	}
 	if (fprc == NULL) {
 		mutex_exit(&bucket->ftb_mtx);
 		return;
 	}
 
 	scrspc = (fasttrap_scrspace_t *)td->t_dtrace_sscr;
 	LIST_REMOVE(scrspc, ftss_next);
 	LIST_INSERT_HEAD(&fprc->ftpc_fscr, scrspc, ftss_next);
 
 	mutex_exit(&fprc->ftpc_mtx);
 }
 #endif
 
 /*
  * This function ensures that no threads are actively using the memory
  * associated with probes that were formerly live.
  */
 static void
 fasttrap_mod_barrier(uint64_t gen)
 {
 	int i;
 
 	if (gen < fasttrap_mod_gen)
 		return;
 
 	fasttrap_mod_gen++;
 
 #ifdef illumos
 	CPU_FOREACH(i) {
 		mutex_enter(&fasttrap_cpuc_pid_lock[i]);
 		mutex_exit(&fasttrap_cpuc_pid_lock[i]);
 	}
 #else
 	rm_wlock(&fasttrap_tp_lock);
 	rm_wunlock(&fasttrap_tp_lock);
 #endif
 }
 
 /*
  * This function performs asynchronous cleanup of fasttrap providers. The
  * Solaris implementation of this mechanism use a timeout that's activated in
  * fasttrap_pid_cleanup(), but this doesn't work in FreeBSD: one may sleep while
  * holding the DTrace mutexes, but it is unsafe to sleep in a callout handler.
  * Thus we use a dedicated process to perform the cleanup when requested.
  */
 /*ARGSUSED*/
 static void
 fasttrap_pid_cleanup_cb(void *data)
 {
 	fasttrap_provider_t **fpp, *fp;
 	fasttrap_bucket_t *bucket;
 	dtrace_provider_id_t provid;
 	int i, later = 0, rval;
 
 	mtx_lock(&fasttrap_cleanup_mtx);
 	while (!fasttrap_cleanup_drain || later > 0) {
 		fasttrap_cleanup_work = 0;
 		mtx_unlock(&fasttrap_cleanup_mtx);
 
 		later = 0;
 
 		/*
 		 * Iterate over all the providers trying to remove the marked
 		 * ones. If a provider is marked but not retired, we just
 		 * have to take a crack at removing it -- it's no big deal if
 		 * we can't.
 		 */
 		for (i = 0; i < fasttrap_provs.fth_nent; i++) {
 			bucket = &fasttrap_provs.fth_table[i];
 			mutex_enter(&bucket->ftb_mtx);
 			fpp = (fasttrap_provider_t **)&bucket->ftb_data;
 
 			while ((fp = *fpp) != NULL) {
 				if (!fp->ftp_marked) {
 					fpp = &fp->ftp_next;
 					continue;
 				}
 
 				mutex_enter(&fp->ftp_mtx);
 
 				/*
 				 * If this provider has consumers actively
 				 * creating probes (ftp_ccount) or is a USDT
 				 * provider (ftp_mcount), we can't unregister
 				 * or even condense.
 				 */
 				if (fp->ftp_ccount != 0 ||
 				    fp->ftp_mcount != 0) {
 					mutex_exit(&fp->ftp_mtx);
 					fp->ftp_marked = 0;
 					continue;
 				}
 
 				if (!fp->ftp_retired || fp->ftp_rcount != 0)
 					fp->ftp_marked = 0;
 
 				mutex_exit(&fp->ftp_mtx);
 
 				/*
 				 * If we successfully unregister this
 				 * provider we can remove it from the hash
 				 * chain and free the memory. If our attempt
 				 * to unregister fails and this is a retired
 				 * provider, increment our flag to try again
 				 * pretty soon. If we've consumed more than
 				 * half of our total permitted number of
 				 * probes call dtrace_condense() to try to
 				 * clean out the unenabled probes.
 				 */
 				provid = fp->ftp_provid;
 				if ((rval = dtrace_unregister(provid)) != 0) {
 					if (fasttrap_total > fasttrap_max / 2)
 						(void) dtrace_condense(provid);
 
 					if (rval == EAGAIN)
 						fp->ftp_marked = 1;
 
 					later += fp->ftp_marked;
 					fpp = &fp->ftp_next;
 				} else {
 					*fpp = fp->ftp_next;
 					fasttrap_provider_free(fp);
 				}
 			}
 			mutex_exit(&bucket->ftb_mtx);
 		}
 		mtx_lock(&fasttrap_cleanup_mtx);
 
 		/*
 		 * If we were unable to retire a provider, try again after a
 		 * second. This situation can occur in certain circumstances
 		 * where providers cannot be unregistered even though they have
 		 * no probes enabled because of an execution of dtrace -l or
 		 * something similar.
 		 */
 		if (later > 0 || fasttrap_cleanup_work ||
 		    fasttrap_cleanup_drain) {
 			mtx_unlock(&fasttrap_cleanup_mtx);
 			pause("ftclean", hz);
 			mtx_lock(&fasttrap_cleanup_mtx);
 		} else
 			mtx_sleep(&fasttrap_cleanup_cv, &fasttrap_cleanup_mtx,
 			    0, "ftcl", 0);
 	}
 
 	/*
 	 * Wake up the thread in fasttrap_unload() now that we're done.
 	 */
 	wakeup(&fasttrap_cleanup_drain);
 	mtx_unlock(&fasttrap_cleanup_mtx);
 
 	kthread_exit();
 }
 
 /*
  * Activates the asynchronous cleanup mechanism.
  */
 static void
 fasttrap_pid_cleanup(void)
 {
 
 	mtx_lock(&fasttrap_cleanup_mtx);
 	if (!fasttrap_cleanup_work) {
 		fasttrap_cleanup_work = 1;
 		wakeup(&fasttrap_cleanup_cv);
 	}
 	mtx_unlock(&fasttrap_cleanup_mtx);
 }
 
 /*
  * This is called from cfork() via dtrace_fasttrap_fork(). The child
  * process's address space is (roughly) a copy of the parent process's so
  * we have to remove all the instrumentation we had previously enabled in the
  * parent.
  */
 static void
 fasttrap_fork(proc_t *p, proc_t *cp)
 {
 #ifndef illumos
 	fasttrap_scrblock_t *scrblk;
 	fasttrap_proc_t *fprc = NULL;
 #endif
 	pid_t ppid = p->p_pid;
 	int i;
 
 #ifdef illumos
 	ASSERT(curproc == p);
 	ASSERT(p->p_proc_flag & P_PR_LOCK);
 #else
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 #endif
 #ifdef illumos
 	ASSERT(p->p_dtrace_count > 0);
 #else
 	if (p->p_dtrace_helpers) {
 		/*
 		 * dtrace_helpers_duplicate() allocates memory.
 		 */
 		_PHOLD(cp);
 		PROC_UNLOCK(p);
 		PROC_UNLOCK(cp);
 		dtrace_helpers_duplicate(p, cp);
 		PROC_LOCK(cp);
 		PROC_LOCK(p);
 		_PRELE(cp);
 	}
 	/*
 	 * This check is purposely here instead of in kern_fork.c because,
 	 * for legal resons, we cannot include the dtrace_cddl.h header
 	 * inside kern_fork.c and insert if-clause there.
 	 */
 	if (p->p_dtrace_count == 0)
 		return;
 #endif
 	ASSERT(cp->p_dtrace_count == 0);
 
 	/*
 	 * This would be simpler and faster if we maintained per-process
 	 * hash tables of enabled tracepoints. It could, however, potentially
 	 * slow down execution of a tracepoint since we'd need to go
 	 * through two levels of indirection. In the future, we should
 	 * consider either maintaining per-process ancillary lists of
 	 * enabled tracepoints or hanging a pointer to a per-process hash
 	 * table of enabled tracepoints off the proc structure.
 	 */
 
 	/*
 	 * We don't have to worry about the child process disappearing
 	 * because we're in fork().
 	 */
 #ifdef illumos
 	mtx_lock_spin(&cp->p_slock);
 	sprlock_proc(cp);
 	mtx_unlock_spin(&cp->p_slock);
 #else
 	/*
 	 * fasttrap_tracepoint_remove() expects the child process to be
 	 * unlocked and the VM then expects curproc to be unlocked.
 	 */
 	_PHOLD(cp);
 	PROC_UNLOCK(cp);
 	PROC_UNLOCK(p);
 #endif
 
 	/*
 	 * Iterate over every tracepoint looking for ones that belong to the
 	 * parent process, and remove each from the child process.
 	 */
 	for (i = 0; i < fasttrap_tpoints.fth_nent; i++) {
 		fasttrap_tracepoint_t *tp;
 		fasttrap_bucket_t *bucket = &fasttrap_tpoints.fth_table[i];
 
 		mutex_enter(&bucket->ftb_mtx);
 		for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 			if (tp->ftt_pid == ppid &&
 			    tp->ftt_proc->ftpc_acount != 0) {
 				int ret = fasttrap_tracepoint_remove(cp, tp);
 				ASSERT(ret == 0);
 
 				/*
 				 * The count of active providers can only be
 				 * decremented (i.e. to zero) during exec,
 				 * exit, and removal of a meta provider so it
 				 * should be impossible to drop the count
 				 * mid-fork.
 				 */
 				ASSERT(tp->ftt_proc->ftpc_acount != 0);
 #ifndef illumos
 				fprc = tp->ftt_proc;
 #endif
 			}
 		}
 		mutex_exit(&bucket->ftb_mtx);
 
 #ifndef illumos
 		/*
 		 * Unmap any scratch space inherited from the parent's address
 		 * space.
 		 */
 		if (fprc != NULL) {
 			mutex_enter(&fprc->ftpc_mtx);
 			LIST_FOREACH(scrblk, &fprc->ftpc_scrblks, ftsb_next) {
 				vm_map_remove(&cp->p_vmspace->vm_map,
 				    scrblk->ftsb_addr,
 				    scrblk->ftsb_addr + FASTTRAP_SCRBLOCK_SIZE);
 			}
 			mutex_exit(&fprc->ftpc_mtx);
 		}
 #endif
 	}
 
 #ifdef illumos
 	mutex_enter(&cp->p_lock);
 	sprunlock(cp);
 #else
 	PROC_LOCK(p);
 	PROC_LOCK(cp);
 	_PRELE(cp);
 #endif
 }
 
 /*
  * This is called from proc_exit() or from exec_common() if p_dtrace_probes
  * is set on the proc structure to indicate that there is a pid provider
  * associated with this process.
  */
 static void
 fasttrap_exec_exit(proc_t *p)
 {
 #ifndef illumos
 	struct thread *td;
 #endif
 
 #ifdef illumos
 	ASSERT(p == curproc);
 #else
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	_PHOLD(p);
 	/*
 	 * Since struct threads may be recycled, we cannot rely on t_dtrace_sscr
 	 * fields to be zeroed by kdtrace_thread_ctor. Thus we must zero it
 	 * ourselves when a process exits.
 	 */
 	FOREACH_THREAD_IN_PROC(p, td)
 		td->t_dtrace_sscr = NULL;
 	PROC_UNLOCK(p);
 #endif
 
 	/*
 	 * We clean up the pid provider for this process here; user-land
 	 * static probes are handled by the meta-provider remove entry point.
 	 */
 	fasttrap_provider_retire(p->p_pid, FASTTRAP_PID_NAME, 0);
 #ifndef illumos
 	if (p->p_dtrace_helpers)
 		dtrace_helpers_destroy(p);
 	PROC_LOCK(p);
 	_PRELE(p);
 #endif
 }
 
 
 /*ARGSUSED*/
 static void
 fasttrap_pid_provide(void *arg, dtrace_probedesc_t *desc)
 {
 	/*
 	 * There are no "default" pid probes.
 	 */
 }
 
 static int
 fasttrap_tracepoint_enable(proc_t *p, fasttrap_probe_t *probe, uint_t index)
 {
 	fasttrap_tracepoint_t *tp, *new_tp = NULL;
 	fasttrap_bucket_t *bucket;
 	fasttrap_id_t *id;
 	pid_t pid;
 	uintptr_t pc;
 
 	ASSERT(index < probe->ftp_ntps);
 
 	pid = probe->ftp_pid;
 	pc = probe->ftp_tps[index].fit_tp->ftt_pc;
 	id = &probe->ftp_tps[index].fit_id;
 
 	ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid);
 
 #ifdef illumos
 	ASSERT(!(p->p_flag & SVFORK));
 #endif
 
 	/*
 	 * Before we make any modifications, make sure we've imposed a barrier
 	 * on the generation in which this probe was last modified.
 	 */
 	fasttrap_mod_barrier(probe->ftp_gen);
 
 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 
 	/*
 	 * If the tracepoint has already been enabled, just add our id to the
 	 * list of interested probes. This may be our second time through
 	 * this path in which case we'll have constructed the tracepoint we'd
 	 * like to install. If we can't find a match, and have an allocated
 	 * tracepoint ready to go, enable that one now.
 	 *
 	 * A tracepoint whose process is defunct is also considered defunct.
 	 */
 again:
 	mutex_enter(&bucket->ftb_mtx);
 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 		/*
 		 * Note that it's safe to access the active count on the
 		 * associated proc structure because we know that at least one
 		 * provider (this one) will still be around throughout this
 		 * operation.
 		 */
 		if (tp->ftt_pid != pid || tp->ftt_pc != pc ||
 		    tp->ftt_proc->ftpc_acount == 0)
 			continue;
 
 		/*
 		 * Now that we've found a matching tracepoint, it would be
 		 * a decent idea to confirm that the tracepoint is still
 		 * enabled and the trap instruction hasn't been overwritten.
 		 * Since this is a little hairy, we'll punt for now.
 		 */
 
 		/*
 		 * This can't be the first interested probe. We don't have
 		 * to worry about another thread being in the midst of
 		 * deleting this tracepoint (which would be the only valid
 		 * reason for a tracepoint to have no interested probes)
 		 * since we're holding P_PR_LOCK for this process.
 		 */
 		ASSERT(tp->ftt_ids != NULL || tp->ftt_retids != NULL);
 
 		switch (id->fti_ptype) {
 		case DTFTP_ENTRY:
 		case DTFTP_OFFSETS:
 		case DTFTP_IS_ENABLED:
 			id->fti_next = tp->ftt_ids;
 			membar_producer();
 			tp->ftt_ids = id;
 			membar_producer();
 			break;
 
 		case DTFTP_RETURN:
 		case DTFTP_POST_OFFSETS:
 			id->fti_next = tp->ftt_retids;
 			membar_producer();
 			tp->ftt_retids = id;
 			membar_producer();
 			break;
 
 		default:
 			ASSERT(0);
 		}
 
 		mutex_exit(&bucket->ftb_mtx);
 
 		if (new_tp != NULL) {
 			new_tp->ftt_ids = NULL;
 			new_tp->ftt_retids = NULL;
 		}
 
 		return (0);
 	}
 
 	/*
 	 * If we have a good tracepoint ready to go, install it now while
 	 * we have the lock held and no one can screw with us.
 	 */
 	if (new_tp != NULL) {
 		int rc = 0;
 
 		new_tp->ftt_next = bucket->ftb_data;
 		membar_producer();
 		bucket->ftb_data = new_tp;
 		membar_producer();
 		mutex_exit(&bucket->ftb_mtx);
 
 		/*
 		 * Activate the tracepoint in the ISA-specific manner.
 		 * If this fails, we need to report the failure, but
 		 * indicate that this tracepoint must still be disabled
 		 * by calling fasttrap_tracepoint_disable().
 		 */
 		if (fasttrap_tracepoint_install(p, new_tp) != 0)
 			rc = FASTTRAP_ENABLE_PARTIAL;
 
 		/*
 		 * Increment the count of the number of tracepoints active in
 		 * the victim process.
 		 */
 #ifdef illumos
 		ASSERT(p->p_proc_flag & P_PR_LOCK);
 #endif
 		p->p_dtrace_count++;
 
 		return (rc);
 	}
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	/*
 	 * Initialize the tracepoint that's been preallocated with the probe.
 	 */
 	new_tp = probe->ftp_tps[index].fit_tp;
 
 	ASSERT(new_tp->ftt_pid == pid);
 	ASSERT(new_tp->ftt_pc == pc);
 	ASSERT(new_tp->ftt_proc == probe->ftp_prov->ftp_proc);
 	ASSERT(new_tp->ftt_ids == NULL);
 	ASSERT(new_tp->ftt_retids == NULL);
 
 	switch (id->fti_ptype) {
 	case DTFTP_ENTRY:
 	case DTFTP_OFFSETS:
 	case DTFTP_IS_ENABLED:
 		id->fti_next = NULL;
 		new_tp->ftt_ids = id;
 		break;
 
 	case DTFTP_RETURN:
 	case DTFTP_POST_OFFSETS:
 		id->fti_next = NULL;
 		new_tp->ftt_retids = id;
 		break;
 
 	default:
 		ASSERT(0);
 	}
+
+#ifdef __FreeBSD__
+	if (SV_PROC_FLAG(p, SV_LP64))
+		p->p_model = DATAMODEL_LP64;
+	else
+		p->p_model = DATAMODEL_ILP32;
+#endif
 
 	/*
 	 * If the ISA-dependent initialization goes to plan, go back to the
 	 * beginning and try to install this freshly made tracepoint.
 	 */
 	if (fasttrap_tracepoint_init(p, new_tp, pc, id->fti_ptype) == 0)
 		goto again;
 
 	new_tp->ftt_ids = NULL;
 	new_tp->ftt_retids = NULL;
 
 	return (FASTTRAP_ENABLE_FAIL);
 }
 
 static void
 fasttrap_tracepoint_disable(proc_t *p, fasttrap_probe_t *probe, uint_t index)
 {
 	fasttrap_bucket_t *bucket;
 	fasttrap_provider_t *provider = probe->ftp_prov;
 	fasttrap_tracepoint_t **pp, *tp;
 	fasttrap_id_t *id, **idp = NULL;
 	pid_t pid;
 	uintptr_t pc;
 
 	ASSERT(index < probe->ftp_ntps);
 
 	pid = probe->ftp_pid;
 	pc = probe->ftp_tps[index].fit_tp->ftt_pc;
 	id = &probe->ftp_tps[index].fit_id;
 
 	ASSERT(probe->ftp_tps[index].fit_tp->ftt_pid == pid);
 
 	/*
 	 * Find the tracepoint and make sure that our id is one of the
 	 * ones registered with it.
 	 */
 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 	mutex_enter(&bucket->ftb_mtx);
 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 		if (tp->ftt_pid == pid && tp->ftt_pc == pc &&
 		    tp->ftt_proc == provider->ftp_proc)
 			break;
 	}
 
 	/*
 	 * If we somehow lost this tracepoint, we're in a world of hurt.
 	 */
 	ASSERT(tp != NULL);
 
 	switch (id->fti_ptype) {
 	case DTFTP_ENTRY:
 	case DTFTP_OFFSETS:
 	case DTFTP_IS_ENABLED:
 		ASSERT(tp->ftt_ids != NULL);
 		idp = &tp->ftt_ids;
 		break;
 
 	case DTFTP_RETURN:
 	case DTFTP_POST_OFFSETS:
 		ASSERT(tp->ftt_retids != NULL);
 		idp = &tp->ftt_retids;
 		break;
 
 	default:
 		ASSERT(0);
 	}
 
 	while ((*idp)->fti_probe != probe) {
 		idp = &(*idp)->fti_next;
 		ASSERT(*idp != NULL);
 	}
 
 	id = *idp;
 	*idp = id->fti_next;
 	membar_producer();
 
 	ASSERT(id->fti_probe == probe);
 
 	/*
 	 * If there are other registered enablings of this tracepoint, we're
 	 * all done, but if this was the last probe assocated with this
 	 * this tracepoint, we need to remove and free it.
 	 */
 	if (tp->ftt_ids != NULL || tp->ftt_retids != NULL) {
 
 		/*
 		 * If the current probe's tracepoint is in use, swap it
 		 * for an unused tracepoint.
 		 */
 		if (tp == probe->ftp_tps[index].fit_tp) {
 			fasttrap_probe_t *tmp_probe;
 			fasttrap_tracepoint_t **tmp_tp;
 			uint_t tmp_index;
 
 			if (tp->ftt_ids != NULL) {
 				tmp_probe = tp->ftt_ids->fti_probe;
 				/* LINTED - alignment */
 				tmp_index = FASTTRAP_ID_INDEX(tp->ftt_ids);
 				tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp;
 			} else {
 				tmp_probe = tp->ftt_retids->fti_probe;
 				/* LINTED - alignment */
 				tmp_index = FASTTRAP_ID_INDEX(tp->ftt_retids);
 				tmp_tp = &tmp_probe->ftp_tps[tmp_index].fit_tp;
 			}
 
 			ASSERT(*tmp_tp != NULL);
 			ASSERT(*tmp_tp != probe->ftp_tps[index].fit_tp);
 			ASSERT((*tmp_tp)->ftt_ids == NULL);
 			ASSERT((*tmp_tp)->ftt_retids == NULL);
 
 			probe->ftp_tps[index].fit_tp = *tmp_tp;
 			*tmp_tp = tp;
 		}
 
 		mutex_exit(&bucket->ftb_mtx);
 
 		/*
 		 * Tag the modified probe with the generation in which it was
 		 * changed.
 		 */
 		probe->ftp_gen = fasttrap_mod_gen;
 		return;
 	}
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	/*
 	 * We can't safely remove the tracepoint from the set of active
 	 * tracepoints until we've actually removed the fasttrap instruction
 	 * from the process's text. We can, however, operate on this
 	 * tracepoint secure in the knowledge that no other thread is going to
 	 * be looking at it since we hold P_PR_LOCK on the process if it's
 	 * live or we hold the provider lock on the process if it's dead and
 	 * gone.
 	 */
 
 	/*
 	 * We only need to remove the actual instruction if we're looking
 	 * at an existing process
 	 */
 	if (p != NULL) {
 		/*
 		 * If we fail to restore the instruction we need to kill
 		 * this process since it's in a completely unrecoverable
 		 * state.
 		 */
 		if (fasttrap_tracepoint_remove(p, tp) != 0)
 			fasttrap_sigtrap(p, NULL, pc);
 
 		/*
 		 * Decrement the count of the number of tracepoints active
 		 * in the victim process.
 		 */
 #ifdef illumos
 		ASSERT(p->p_proc_flag & P_PR_LOCK);
 #endif
 		p->p_dtrace_count--;
 	}
 
 	/*
 	 * Remove the probe from the hash table of active tracepoints.
 	 */
 	mutex_enter(&bucket->ftb_mtx);
 	pp = (fasttrap_tracepoint_t **)&bucket->ftb_data;
 	ASSERT(*pp != NULL);
 	while (*pp != tp) {
 		pp = &(*pp)->ftt_next;
 		ASSERT(*pp != NULL);
 	}
 
 	*pp = tp->ftt_next;
 	membar_producer();
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	/*
 	 * Tag the modified probe with the generation in which it was changed.
 	 */
 	probe->ftp_gen = fasttrap_mod_gen;
 }
 
 static void
 fasttrap_enable_callbacks(void)
 {
 	/*
 	 * We don't have to play the rw lock game here because we're
 	 * providing something rather than taking something away --
 	 * we can be sure that no threads have tried to follow this
 	 * function pointer yet.
 	 */
 	mutex_enter(&fasttrap_count_mtx);
 	if (fasttrap_pid_count == 0) {
 		ASSERT(dtrace_pid_probe_ptr == NULL);
 		ASSERT(dtrace_return_probe_ptr == NULL);
 		dtrace_pid_probe_ptr = &fasttrap_pid_probe;
 		dtrace_return_probe_ptr = &fasttrap_return_probe;
 	}
 	ASSERT(dtrace_pid_probe_ptr == &fasttrap_pid_probe);
 	ASSERT(dtrace_return_probe_ptr == &fasttrap_return_probe);
 	fasttrap_pid_count++;
 	mutex_exit(&fasttrap_count_mtx);
 }
 
 static void
 fasttrap_disable_callbacks(void)
 {
 #ifdef illumos
 	ASSERT(MUTEX_HELD(&cpu_lock));
 #endif
 
 
 	mutex_enter(&fasttrap_count_mtx);
 	ASSERT(fasttrap_pid_count > 0);
 	fasttrap_pid_count--;
 	if (fasttrap_pid_count == 0) {
 #ifdef illumos
 		cpu_t *cur, *cpu = CPU;
 
 		for (cur = cpu->cpu_next_onln; cur != cpu;
 		    cur = cur->cpu_next_onln) {
 			rw_enter(&cur->cpu_ft_lock, RW_WRITER);
 		}
 #endif
 		dtrace_pid_probe_ptr = NULL;
 		dtrace_return_probe_ptr = NULL;
 #ifdef illumos
 		for (cur = cpu->cpu_next_onln; cur != cpu;
 		    cur = cur->cpu_next_onln) {
 			rw_exit(&cur->cpu_ft_lock);
 		}
 #endif
 	}
 	mutex_exit(&fasttrap_count_mtx);
 }
 
 /*ARGSUSED*/
 static void
 fasttrap_pid_enable(void *arg, dtrace_id_t id, void *parg)
 {
 	fasttrap_probe_t *probe = parg;
 	proc_t *p = NULL;
 	int i, rc;
 
 	ASSERT(probe != NULL);
 	ASSERT(!probe->ftp_enabled);
 	ASSERT(id == probe->ftp_id);
 #ifdef illumos
 	ASSERT(MUTEX_HELD(&cpu_lock));
 #endif
 
 	/*
 	 * Increment the count of enabled probes on this probe's provider;
 	 * the provider can't go away while the probe still exists. We
 	 * must increment this even if we aren't able to properly enable
 	 * this probe.
 	 */
 	mutex_enter(&probe->ftp_prov->ftp_mtx);
 	probe->ftp_prov->ftp_rcount++;
 	mutex_exit(&probe->ftp_prov->ftp_mtx);
 
 	/*
 	 * If this probe's provider is retired (meaning it was valid in a
 	 * previously exec'ed incarnation of this address space), bail out. The
 	 * provider can't go away while we're in this code path.
 	 */
 	if (probe->ftp_prov->ftp_retired)
 		return;
 
 	/*
 	 * If we can't find the process, it may be that we're in the context of
 	 * a fork in which the traced process is being born and we're copying
 	 * USDT probes. Otherwise, the process is gone so bail.
 	 */
 #ifdef illumos
 	if ((p = sprlock(probe->ftp_pid)) == NULL) {
 		if ((curproc->p_flag & SFORKING) == 0)
 			return;
 
 		mutex_enter(&pidlock);
 		p = prfind(probe->ftp_pid);
 
 		if (p == NULL) {
 			/*
 			 * So it's not that the target process is being born,
 			 * it's that it isn't there at all (and we simply
 			 * happen to be forking).  Anyway, we know that the
 			 * target is definitely gone, so bail out.
 			 */
 			mutex_exit(&pidlock);
 			return (0);
 		}
 
 		/*
 		 * Confirm that curproc is indeed forking the process in which
 		 * we're trying to enable probes.
 		 */
 		ASSERT(p->p_parent == curproc);
 		ASSERT(p->p_stat == SIDL);
 
 		mutex_enter(&p->p_lock);
 		mutex_exit(&pidlock);
 
 		sprlock_proc(p);
 	}
 
 	ASSERT(!(p->p_flag & SVFORK));
 	mutex_exit(&p->p_lock);
 #else
 	if ((p = pfind(probe->ftp_pid)) == NULL)
 		return;
 #endif
 
 	/*
 	 * We have to enable the trap entry point before any user threads have
 	 * the chance to execute the trap instruction we're about to place
 	 * in their process's text.
 	 */
 #ifdef __FreeBSD__
 	/*
 	 * pfind() returns a locked process.
 	 */
 	_PHOLD(p);
 	PROC_UNLOCK(p);
 #endif
 	fasttrap_enable_callbacks();
 
 	/*
 	 * Enable all the tracepoints and add this probe's id to each
 	 * tracepoint's list of active probes.
 	 */
 	for (i = 0; i < probe->ftp_ntps; i++) {
 		if ((rc = fasttrap_tracepoint_enable(p, probe, i)) != 0) {
 			/*
 			 * If enabling the tracepoint failed completely,
 			 * we don't have to disable it; if the failure
 			 * was only partial we must disable it.
 			 */
 			if (rc == FASTTRAP_ENABLE_FAIL)
 				i--;
 			else
 				ASSERT(rc == FASTTRAP_ENABLE_PARTIAL);
 
 			/*
 			 * Back up and pull out all the tracepoints we've
 			 * created so far for this probe.
 			 */
 			while (i >= 0) {
 				fasttrap_tracepoint_disable(p, probe, i);
 				i--;
 			}
 
 #ifdef illumos
 			mutex_enter(&p->p_lock);
 			sprunlock(p);
 #else
 			PRELE(p);
 #endif
 
 			/*
 			 * Since we're not actually enabling this probe,
 			 * drop our reference on the trap table entry.
 			 */
 			fasttrap_disable_callbacks();
 			return;
 		}
 	}
 #ifdef illumos
 	mutex_enter(&p->p_lock);
 	sprunlock(p);
 #else
 	PRELE(p);
 #endif
 
 	probe->ftp_enabled = 1;
 }
 
 /*ARGSUSED*/
 static void
 fasttrap_pid_disable(void *arg, dtrace_id_t id, void *parg)
 {
 	fasttrap_probe_t *probe = parg;
 	fasttrap_provider_t *provider = probe->ftp_prov;
 	proc_t *p;
 	int i, whack = 0;
 
 	ASSERT(id == probe->ftp_id);
 
 	mutex_enter(&provider->ftp_mtx);
 
 	/*
 	 * We won't be able to acquire a /proc-esque lock on the process
 	 * iff the process is dead and gone. In this case, we rely on the
 	 * provider lock as a point of mutual exclusion to prevent other
 	 * DTrace consumers from disabling this probe.
 	 */
 	if ((p = pfind(probe->ftp_pid)) != NULL) {
 #ifdef __FreeBSD__
 		if (p->p_flag & P_WEXIT) {
 			PROC_UNLOCK(p);
 			p = NULL;
 		} else {
 			_PHOLD(p);
 			PROC_UNLOCK(p);
 		}
 #endif
 	}
 
 	/*
 	 * Disable all the associated tracepoints (for fully enabled probes).
 	 */
 	if (probe->ftp_enabled) {
 		for (i = 0; i < probe->ftp_ntps; i++) {
 			fasttrap_tracepoint_disable(p, probe, i);
 		}
 	}
 
 	ASSERT(provider->ftp_rcount > 0);
 	provider->ftp_rcount--;
 
 	if (p != NULL) {
 		/*
 		 * Even though we may not be able to remove it entirely, we
 		 * mark this retired provider to get a chance to remove some
 		 * of the associated probes.
 		 */
 		if (provider->ftp_retired && !provider->ftp_marked)
 			whack = provider->ftp_marked = 1;
 		mutex_exit(&provider->ftp_mtx);
 	} else {
 		/*
 		 * If the process is dead, we're just waiting for the
 		 * last probe to be disabled to be able to free it.
 		 */
 		if (provider->ftp_rcount == 0 && !provider->ftp_marked)
 			whack = provider->ftp_marked = 1;
 		mutex_exit(&provider->ftp_mtx);
 	}
 
 	if (whack)
 		fasttrap_pid_cleanup();
 
 #ifdef __FreeBSD__
 	if (p != NULL)
 		PRELE(p);
 #endif
 	if (!probe->ftp_enabled)
 		return;
 
 	probe->ftp_enabled = 0;
 
 #ifdef illumos
 	ASSERT(MUTEX_HELD(&cpu_lock));
 #endif
 	fasttrap_disable_callbacks();
 }
 
 /*ARGSUSED*/
 static void
 fasttrap_pid_getargdesc(void *arg, dtrace_id_t id, void *parg,
     dtrace_argdesc_t *desc)
 {
 	fasttrap_probe_t *probe = parg;
 	char *str;
 	int i, ndx;
 
 	desc->dtargd_native[0] = '\0';
 	desc->dtargd_xlate[0] = '\0';
 
 	if (probe->ftp_prov->ftp_retired != 0 ||
 	    desc->dtargd_ndx >= probe->ftp_nargs) {
 		desc->dtargd_ndx = DTRACE_ARGNONE;
 		return;
 	}
 
 	ndx = (probe->ftp_argmap != NULL) ?
 	    probe->ftp_argmap[desc->dtargd_ndx] : desc->dtargd_ndx;
 
 	str = probe->ftp_ntypes;
 	for (i = 0; i < ndx; i++) {
 		str += strlen(str) + 1;
 	}
 
 	ASSERT(strlen(str + 1) < sizeof (desc->dtargd_native));
 	(void) strcpy(desc->dtargd_native, str);
 
 	if (probe->ftp_xtypes == NULL)
 		return;
 
 	str = probe->ftp_xtypes;
 	for (i = 0; i < desc->dtargd_ndx; i++) {
 		str += strlen(str) + 1;
 	}
 
 	ASSERT(strlen(str + 1) < sizeof (desc->dtargd_xlate));
 	(void) strcpy(desc->dtargd_xlate, str);
 }
 
 /*ARGSUSED*/
 static void
 fasttrap_pid_destroy(void *arg, dtrace_id_t id, void *parg)
 {
 	fasttrap_probe_t *probe = parg;
 	int i;
 	size_t size;
 
 	ASSERT(probe != NULL);
 	ASSERT(!probe->ftp_enabled);
 	ASSERT(fasttrap_total >= probe->ftp_ntps);
 
 	atomic_add_32(&fasttrap_total, -probe->ftp_ntps);
 	size = offsetof(fasttrap_probe_t, ftp_tps[probe->ftp_ntps]);
 
 	if (probe->ftp_gen + 1 >= fasttrap_mod_gen)
 		fasttrap_mod_barrier(probe->ftp_gen);
 
 	for (i = 0; i < probe->ftp_ntps; i++) {
 		kmem_free(probe->ftp_tps[i].fit_tp,
 		    sizeof (fasttrap_tracepoint_t));
 	}
 
 	kmem_free(probe, size);
 }
 
 
 static const dtrace_pattr_t pid_attr = {
 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
 };
 
 static dtrace_pops_t pid_pops = {
 	fasttrap_pid_provide,
 	NULL,
 	fasttrap_pid_enable,
 	fasttrap_pid_disable,
 	NULL,
 	NULL,
 	fasttrap_pid_getargdesc,
 	fasttrap_pid_getarg,
 	NULL,
 	fasttrap_pid_destroy
 };
 
 static dtrace_pops_t usdt_pops = {
 	fasttrap_pid_provide,
 	NULL,
 	fasttrap_pid_enable,
 	fasttrap_pid_disable,
 	NULL,
 	NULL,
 	fasttrap_pid_getargdesc,
 	fasttrap_usdt_getarg,
 	NULL,
 	fasttrap_pid_destroy
 };
 
 static fasttrap_proc_t *
 fasttrap_proc_lookup(pid_t pid)
 {
 	fasttrap_bucket_t *bucket;
 	fasttrap_proc_t *fprc, *new_fprc;
 
 
 	bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)];
 	mutex_enter(&bucket->ftb_mtx);
 
 	for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) {
 		if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) {
 			mutex_enter(&fprc->ftpc_mtx);
 			mutex_exit(&bucket->ftb_mtx);
 			fprc->ftpc_rcount++;
 			atomic_inc_64(&fprc->ftpc_acount);
 			ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount);
 			mutex_exit(&fprc->ftpc_mtx);
 
 			return (fprc);
 		}
 	}
 
 	/*
 	 * Drop the bucket lock so we don't try to perform a sleeping
 	 * allocation under it.
 	 */
 	mutex_exit(&bucket->ftb_mtx);
 
 	new_fprc = kmem_zalloc(sizeof (fasttrap_proc_t), KM_SLEEP);
 	new_fprc->ftpc_pid = pid;
 	new_fprc->ftpc_rcount = 1;
 	new_fprc->ftpc_acount = 1;
 #ifndef illumos
 	mutex_init(&new_fprc->ftpc_mtx, "fasttrap proc mtx", MUTEX_DEFAULT,
 	    NULL);
 #endif
 
 	mutex_enter(&bucket->ftb_mtx);
 
 	/*
 	 * Take another lap through the list to make sure a proc hasn't
 	 * been created for this pid while we weren't under the bucket lock.
 	 */
 	for (fprc = bucket->ftb_data; fprc != NULL; fprc = fprc->ftpc_next) {
 		if (fprc->ftpc_pid == pid && fprc->ftpc_acount != 0) {
 			mutex_enter(&fprc->ftpc_mtx);
 			mutex_exit(&bucket->ftb_mtx);
 			fprc->ftpc_rcount++;
 			atomic_inc_64(&fprc->ftpc_acount);
 			ASSERT(fprc->ftpc_acount <= fprc->ftpc_rcount);
 			mutex_exit(&fprc->ftpc_mtx);
 
 			kmem_free(new_fprc, sizeof (fasttrap_proc_t));
 
 			return (fprc);
 		}
 	}
 
 	new_fprc->ftpc_next = bucket->ftb_data;
 	bucket->ftb_data = new_fprc;
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	return (new_fprc);
 }
 
 static void
 fasttrap_proc_release(fasttrap_proc_t *proc)
 {
 	fasttrap_bucket_t *bucket;
 	fasttrap_proc_t *fprc, **fprcp;
 	pid_t pid = proc->ftpc_pid;
 #ifndef illumos
 	fasttrap_scrblock_t *scrblk, *scrblktmp;
 	fasttrap_scrspace_t *scrspc, *scrspctmp;
 	struct proc *p;
 	struct thread *td;
 #endif
 
 	mutex_enter(&proc->ftpc_mtx);
 
 	ASSERT(proc->ftpc_rcount != 0);
 	ASSERT(proc->ftpc_acount <= proc->ftpc_rcount);
 
 	if (--proc->ftpc_rcount != 0) {
 		mutex_exit(&proc->ftpc_mtx);
 		return;
 	}
 
 #ifndef illumos
 	/*
 	 * Free all structures used to manage per-thread scratch space.
 	 */
 	LIST_FOREACH_SAFE(scrblk, &proc->ftpc_scrblks, ftsb_next,
 	    scrblktmp) {
 		LIST_REMOVE(scrblk, ftsb_next);
 		free(scrblk, M_SOLARIS);
 	}
 	LIST_FOREACH_SAFE(scrspc, &proc->ftpc_fscr, ftss_next, scrspctmp) {
 		LIST_REMOVE(scrspc, ftss_next);
 		free(scrspc, M_SOLARIS);
 	}
 	LIST_FOREACH_SAFE(scrspc, &proc->ftpc_ascr, ftss_next, scrspctmp) {
 		LIST_REMOVE(scrspc, ftss_next);
 		free(scrspc, M_SOLARIS);
 	}
 
 	if ((p = pfind(pid)) != NULL) {
 		FOREACH_THREAD_IN_PROC(p, td)
 			td->t_dtrace_sscr = NULL;
 		PROC_UNLOCK(p);
 	}
 #endif
 
 	mutex_exit(&proc->ftpc_mtx);
 
 	/*
 	 * There should definitely be no live providers associated with this
 	 * process at this point.
 	 */
 	ASSERT(proc->ftpc_acount == 0);
 
 	bucket = &fasttrap_procs.fth_table[FASTTRAP_PROCS_INDEX(pid)];
 	mutex_enter(&bucket->ftb_mtx);
 
 	fprcp = (fasttrap_proc_t **)&bucket->ftb_data;
 	while ((fprc = *fprcp) != NULL) {
 		if (fprc == proc)
 			break;
 
 		fprcp = &fprc->ftpc_next;
 	}
 
 	/*
 	 * Something strange has happened if we can't find the proc.
 	 */
 	ASSERT(fprc != NULL);
 
 	*fprcp = fprc->ftpc_next;
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	kmem_free(fprc, sizeof (fasttrap_proc_t));
 }
 
 /*
  * Lookup a fasttrap-managed provider based on its name and associated pid.
  * If the pattr argument is non-NULL, this function instantiates the provider
  * if it doesn't exist otherwise it returns NULL. The provider is returned
  * with its lock held.
  */
 static fasttrap_provider_t *
 fasttrap_provider_lookup(pid_t pid, const char *name,
     const dtrace_pattr_t *pattr)
 {
 	fasttrap_provider_t *fp, *new_fp = NULL;
 	fasttrap_bucket_t *bucket;
 	char provname[DTRACE_PROVNAMELEN];
 	proc_t *p;
 	cred_t *cred;
 
 	ASSERT(strlen(name) < sizeof (fp->ftp_name));
 	ASSERT(pattr != NULL);
 
 	bucket = &fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)];
 	mutex_enter(&bucket->ftb_mtx);
 
 	/*
 	 * Take a lap through the list and return the match if we find it.
 	 */
 	for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) {
 		if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 &&
 		    !fp->ftp_retired) {
 			mutex_enter(&fp->ftp_mtx);
 			mutex_exit(&bucket->ftb_mtx);
 			return (fp);
 		}
 	}
 
 	/*
 	 * Drop the bucket lock so we don't try to perform a sleeping
 	 * allocation under it.
 	 */
 	mutex_exit(&bucket->ftb_mtx);
 
 	/*
 	 * Make sure the process exists, isn't a child created as the result
 	 * of a vfork(2), and isn't a zombie (but may be in fork).
 	 */
 	if ((p = pfind(pid)) == NULL)
 		return (NULL);
 
 	/*
 	 * Increment p_dtrace_probes so that the process knows to inform us
 	 * when it exits or execs. fasttrap_provider_free() decrements this
 	 * when we're done with this provider.
 	 */
 	p->p_dtrace_probes++;
 
 	/*
 	 * Grab the credentials for this process so we have
 	 * something to pass to dtrace_register().
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	crhold(p->p_ucred);
 	cred = p->p_ucred;
 	PROC_UNLOCK(p);
 
 	new_fp = kmem_zalloc(sizeof (fasttrap_provider_t), KM_SLEEP);
 	new_fp->ftp_pid = pid;
 	new_fp->ftp_proc = fasttrap_proc_lookup(pid);
 #ifndef illumos
 	mutex_init(&new_fp->ftp_mtx, "provider mtx", MUTEX_DEFAULT, NULL);
 	mutex_init(&new_fp->ftp_cmtx, "lock on creating", MUTEX_DEFAULT, NULL);
 #endif
 
 	ASSERT(new_fp->ftp_proc != NULL);
 
 	mutex_enter(&bucket->ftb_mtx);
 
 	/*
 	 * Take another lap through the list to make sure a provider hasn't
 	 * been created for this pid while we weren't under the bucket lock.
 	 */
 	for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) {
 		if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 &&
 		    !fp->ftp_retired) {
 			mutex_enter(&fp->ftp_mtx);
 			mutex_exit(&bucket->ftb_mtx);
 			fasttrap_provider_free(new_fp);
 			crfree(cred);
 			return (fp);
 		}
 	}
 
 	(void) strcpy(new_fp->ftp_name, name);
 
 	/*
 	 * Fail and return NULL if either the provider name is too long
 	 * or we fail to register this new provider with the DTrace
 	 * framework. Note that this is the only place we ever construct
 	 * the full provider name -- we keep it in pieces in the provider
 	 * structure.
 	 */
 	if (snprintf(provname, sizeof (provname), "%s%u", name, (uint_t)pid) >=
 	    sizeof (provname) ||
 	    dtrace_register(provname, pattr,
 	    DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER | DTRACE_PRIV_ZONEOWNER, cred,
 	    pattr == &pid_attr ? &pid_pops : &usdt_pops, new_fp,
 	    &new_fp->ftp_provid) != 0) {
 		mutex_exit(&bucket->ftb_mtx);
 		fasttrap_provider_free(new_fp);
 		crfree(cred);
 		return (NULL);
 	}
 
 	new_fp->ftp_next = bucket->ftb_data;
 	bucket->ftb_data = new_fp;
 
 	mutex_enter(&new_fp->ftp_mtx);
 	mutex_exit(&bucket->ftb_mtx);
 
 	crfree(cred);
 	return (new_fp);
 }
 
 static void
 fasttrap_provider_free(fasttrap_provider_t *provider)
 {
 	pid_t pid = provider->ftp_pid;
 	proc_t *p;
 
 	/*
 	 * There need to be no associated enabled probes, no consumers
 	 * creating probes, and no meta providers referencing this provider.
 	 */
 	ASSERT(provider->ftp_rcount == 0);
 	ASSERT(provider->ftp_ccount == 0);
 	ASSERT(provider->ftp_mcount == 0);
 
 	/*
 	 * If this provider hasn't been retired, we need to explicitly drop the
 	 * count of active providers on the associated process structure.
 	 */
 	if (!provider->ftp_retired) {
 		atomic_dec_64(&provider->ftp_proc->ftpc_acount);
 		ASSERT(provider->ftp_proc->ftpc_acount <
 		    provider->ftp_proc->ftpc_rcount);
 	}
 
 	fasttrap_proc_release(provider->ftp_proc);
 
 #ifndef illumos
 	mutex_destroy(&provider->ftp_mtx);
 	mutex_destroy(&provider->ftp_cmtx);
 #endif
 	kmem_free(provider, sizeof (fasttrap_provider_t));
 
 	/*
 	 * Decrement p_dtrace_probes on the process whose provider we're
 	 * freeing. We don't have to worry about clobbering somone else's
 	 * modifications to it because we have locked the bucket that
 	 * corresponds to this process's hash chain in the provider hash
 	 * table. Don't sweat it if we can't find the process.
 	 */
 	if ((p = pfind(pid)) == NULL) {
 		return;
 	}
 
 	p->p_dtrace_probes--;
 #ifndef illumos
 	PROC_UNLOCK(p);
 #endif
 }
 
 static void
 fasttrap_provider_retire(pid_t pid, const char *name, int mprov)
 {
 	fasttrap_provider_t *fp;
 	fasttrap_bucket_t *bucket;
 	dtrace_provider_id_t provid;
 
 	ASSERT(strlen(name) < sizeof (fp->ftp_name));
 
 	bucket = &fasttrap_provs.fth_table[FASTTRAP_PROVS_INDEX(pid, name)];
 	mutex_enter(&bucket->ftb_mtx);
 
 	for (fp = bucket->ftb_data; fp != NULL; fp = fp->ftp_next) {
 		if (fp->ftp_pid == pid && strcmp(fp->ftp_name, name) == 0 &&
 		    !fp->ftp_retired)
 			break;
 	}
 
 	if (fp == NULL) {
 		mutex_exit(&bucket->ftb_mtx);
 		return;
 	}
 
 	mutex_enter(&fp->ftp_mtx);
 	ASSERT(!mprov || fp->ftp_mcount > 0);
 	if (mprov && --fp->ftp_mcount != 0)  {
 		mutex_exit(&fp->ftp_mtx);
 		mutex_exit(&bucket->ftb_mtx);
 		return;
 	}
 
 	/*
 	 * Mark the provider to be removed in our post-processing step, mark it
 	 * retired, and drop the active count on its proc. Marking it indicates
 	 * that we should try to remove it; setting the retired flag indicates
 	 * that we're done with this provider; dropping the active the proc
 	 * releases our hold, and when this reaches zero (as it will during
 	 * exit or exec) the proc and associated providers become defunct.
 	 *
 	 * We obviously need to take the bucket lock before the provider lock
 	 * to perform the lookup, but we need to drop the provider lock
 	 * before calling into the DTrace framework since we acquire the
 	 * provider lock in callbacks invoked from the DTrace framework. The
 	 * bucket lock therefore protects the integrity of the provider hash
 	 * table.
 	 */
 	atomic_dec_64(&fp->ftp_proc->ftpc_acount);
 	ASSERT(fp->ftp_proc->ftpc_acount < fp->ftp_proc->ftpc_rcount);
 
 	fp->ftp_retired = 1;
 	fp->ftp_marked = 1;
 	provid = fp->ftp_provid;
 	mutex_exit(&fp->ftp_mtx);
 
 	/*
 	 * We don't have to worry about invalidating the same provider twice
 	 * since fasttrap_provider_lookup() will ignore provider that have
 	 * been marked as retired.
 	 */
 	dtrace_invalidate(provid);
 
 	mutex_exit(&bucket->ftb_mtx);
 
 	fasttrap_pid_cleanup();
 }
 
 static int
 fasttrap_uint32_cmp(const void *ap, const void *bp)
 {
 	return (*(const uint32_t *)ap - *(const uint32_t *)bp);
 }
 
 static int
 fasttrap_uint64_cmp(const void *ap, const void *bp)
 {
 	return (*(const uint64_t *)ap - *(const uint64_t *)bp);
 }
 
 static int
 fasttrap_add_probe(fasttrap_probe_spec_t *pdata)
 {
 	fasttrap_provider_t *provider;
 	fasttrap_probe_t *pp;
 	fasttrap_tracepoint_t *tp;
 	char *name;
 	int i, aframes = 0, whack;
 
 	/*
 	 * There needs to be at least one desired trace point.
 	 */
 	if (pdata->ftps_noffs == 0)
 		return (EINVAL);
 
 	switch (pdata->ftps_type) {
 	case DTFTP_ENTRY:
 		name = "entry";
 		aframes = FASTTRAP_ENTRY_AFRAMES;
 		break;
 	case DTFTP_RETURN:
 		name = "return";
 		aframes = FASTTRAP_RETURN_AFRAMES;
 		break;
 	case DTFTP_OFFSETS:
 		name = NULL;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	if ((provider = fasttrap_provider_lookup(pdata->ftps_pid,
 	    FASTTRAP_PID_NAME, &pid_attr)) == NULL)
 		return (ESRCH);
 
 	/*
 	 * Increment this reference count to indicate that a consumer is
 	 * actively adding a new probe associated with this provider. This
 	 * prevents the provider from being deleted -- we'll need to check
 	 * for pending deletions when we drop this reference count.
 	 */
 	provider->ftp_ccount++;
 	mutex_exit(&provider->ftp_mtx);
 
 	/*
 	 * Grab the creation lock to ensure consistency between calls to
 	 * dtrace_probe_lookup() and dtrace_probe_create() in the face of
 	 * other threads creating probes. We must drop the provider lock
 	 * before taking this lock to avoid a three-way deadlock with the
 	 * DTrace framework.
 	 */
 	mutex_enter(&provider->ftp_cmtx);
 
 	if (name == NULL) {
 		for (i = 0; i < pdata->ftps_noffs; i++) {
 			char name_str[17];
 
 			(void) sprintf(name_str, "%llx",
 			    (unsigned long long)pdata->ftps_offs[i]);
 
 			if (dtrace_probe_lookup(provider->ftp_provid,
 			    pdata->ftps_mod, pdata->ftps_func, name_str) != 0)
 				continue;
 
 			atomic_inc_32(&fasttrap_total);
 
 			if (fasttrap_total > fasttrap_max) {
 				atomic_dec_32(&fasttrap_total);
 				goto no_mem;
 			}
 
 			pp = kmem_zalloc(sizeof (fasttrap_probe_t), KM_SLEEP);
 
 			pp->ftp_prov = provider;
 			pp->ftp_faddr = pdata->ftps_pc;
 			pp->ftp_fsize = pdata->ftps_size;
 			pp->ftp_pid = pdata->ftps_pid;
 			pp->ftp_ntps = 1;
 
 			tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t),
 			    KM_SLEEP);
 
 			tp->ftt_proc = provider->ftp_proc;
 			tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc;
 			tp->ftt_pid = pdata->ftps_pid;
 
 			pp->ftp_tps[0].fit_tp = tp;
 			pp->ftp_tps[0].fit_id.fti_probe = pp;
 			pp->ftp_tps[0].fit_id.fti_ptype = pdata->ftps_type;
 
 			pp->ftp_id = dtrace_probe_create(provider->ftp_provid,
 			    pdata->ftps_mod, pdata->ftps_func, name_str,
 			    FASTTRAP_OFFSET_AFRAMES, pp);
 		}
 
 	} else if (dtrace_probe_lookup(provider->ftp_provid, pdata->ftps_mod,
 	    pdata->ftps_func, name) == 0) {
 		atomic_add_32(&fasttrap_total, pdata->ftps_noffs);
 
 		if (fasttrap_total > fasttrap_max) {
 			atomic_add_32(&fasttrap_total, -pdata->ftps_noffs);
 			goto no_mem;
 		}
 
 		/*
 		 * Make sure all tracepoint program counter values are unique.
 		 * We later assume that each probe has exactly one tracepoint
 		 * for a given pc.
 		 */
 		qsort(pdata->ftps_offs, pdata->ftps_noffs,
 		    sizeof (uint64_t), fasttrap_uint64_cmp);
 		for (i = 1; i < pdata->ftps_noffs; i++) {
 			if (pdata->ftps_offs[i] > pdata->ftps_offs[i - 1])
 				continue;
 
 			atomic_add_32(&fasttrap_total, -pdata->ftps_noffs);
 			goto no_mem;
 		}
 
 		ASSERT(pdata->ftps_noffs > 0);
 		pp = kmem_zalloc(offsetof(fasttrap_probe_t,
 		    ftp_tps[pdata->ftps_noffs]), KM_SLEEP);
 
 		pp->ftp_prov = provider;
 		pp->ftp_faddr = pdata->ftps_pc;
 		pp->ftp_fsize = pdata->ftps_size;
 		pp->ftp_pid = pdata->ftps_pid;
 		pp->ftp_ntps = pdata->ftps_noffs;
 
 		for (i = 0; i < pdata->ftps_noffs; i++) {
 			tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t),
 			    KM_SLEEP);
 
 			tp->ftt_proc = provider->ftp_proc;
 			tp->ftt_pc = pdata->ftps_offs[i] + pdata->ftps_pc;
 			tp->ftt_pid = pdata->ftps_pid;
 
 			pp->ftp_tps[i].fit_tp = tp;
 			pp->ftp_tps[i].fit_id.fti_probe = pp;
 			pp->ftp_tps[i].fit_id.fti_ptype = pdata->ftps_type;
 		}
 
 		pp->ftp_id = dtrace_probe_create(provider->ftp_provid,
 		    pdata->ftps_mod, pdata->ftps_func, name, aframes, pp);
 	}
 
 	mutex_exit(&provider->ftp_cmtx);
 
 	/*
 	 * We know that the provider is still valid since we incremented the
 	 * creation reference count. If someone tried to clean up this provider
 	 * while we were using it (e.g. because the process called exec(2) or
 	 * exit(2)), take note of that and try to clean it up now.
 	 */
 	mutex_enter(&provider->ftp_mtx);
 	provider->ftp_ccount--;
 	whack = provider->ftp_retired;
 	mutex_exit(&provider->ftp_mtx);
 
 	if (whack)
 		fasttrap_pid_cleanup();
 
 	return (0);
 
 no_mem:
 	/*
 	 * If we've exhausted the allowable resources, we'll try to remove
 	 * this provider to free some up. This is to cover the case where
 	 * the user has accidentally created many more probes than was
 	 * intended (e.g. pid123:::).
 	 */
 	mutex_exit(&provider->ftp_cmtx);
 	mutex_enter(&provider->ftp_mtx);
 	provider->ftp_ccount--;
 	provider->ftp_marked = 1;
 	mutex_exit(&provider->ftp_mtx);
 
 	fasttrap_pid_cleanup();
 
 	return (ENOMEM);
 }
 
 /*ARGSUSED*/
 static void *
 fasttrap_meta_provide(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid)
 {
 	fasttrap_provider_t *provider;
 
 	/*
 	 * A 32-bit unsigned integer (like a pid for example) can be
 	 * expressed in 10 or fewer decimal digits. Make sure that we'll
 	 * have enough space for the provider name.
 	 */
 	if (strlen(dhpv->dthpv_provname) + 10 >=
 	    sizeof (provider->ftp_name)) {
 		printf("failed to instantiate provider %s: "
 		    "name too long to accomodate pid", dhpv->dthpv_provname);
 		return (NULL);
 	}
 
 	/*
 	 * Don't let folks spoof the true pid provider.
 	 */
 	if (strcmp(dhpv->dthpv_provname, FASTTRAP_PID_NAME) == 0) {
 		printf("failed to instantiate provider %s: "
 		    "%s is an invalid name", dhpv->dthpv_provname,
 		    FASTTRAP_PID_NAME);
 		return (NULL);
 	}
 
 	/*
 	 * The highest stability class that fasttrap supports is ISA; cap
 	 * the stability of the new provider accordingly.
 	 */
 	if (dhpv->dthpv_pattr.dtpa_provider.dtat_class > DTRACE_CLASS_ISA)
 		dhpv->dthpv_pattr.dtpa_provider.dtat_class = DTRACE_CLASS_ISA;
 	if (dhpv->dthpv_pattr.dtpa_mod.dtat_class > DTRACE_CLASS_ISA)
 		dhpv->dthpv_pattr.dtpa_mod.dtat_class = DTRACE_CLASS_ISA;
 	if (dhpv->dthpv_pattr.dtpa_func.dtat_class > DTRACE_CLASS_ISA)
 		dhpv->dthpv_pattr.dtpa_func.dtat_class = DTRACE_CLASS_ISA;
 	if (dhpv->dthpv_pattr.dtpa_name.dtat_class > DTRACE_CLASS_ISA)
 		dhpv->dthpv_pattr.dtpa_name.dtat_class = DTRACE_CLASS_ISA;
 	if (dhpv->dthpv_pattr.dtpa_args.dtat_class > DTRACE_CLASS_ISA)
 		dhpv->dthpv_pattr.dtpa_args.dtat_class = DTRACE_CLASS_ISA;
 
 	if ((provider = fasttrap_provider_lookup(pid, dhpv->dthpv_provname,
 	    &dhpv->dthpv_pattr)) == NULL) {
 		printf("failed to instantiate provider %s for "
 		    "process %u",  dhpv->dthpv_provname, (uint_t)pid);
 		return (NULL);
 	}
 
 	/*
 	 * Up the meta provider count so this provider isn't removed until
 	 * the meta provider has been told to remove it.
 	 */
 	provider->ftp_mcount++;
 
 	mutex_exit(&provider->ftp_mtx);
 
 	return (provider);
 }
 
 /*
  * We know a few things about our context here:  we know that the probe being
  * created doesn't already exist (DTrace won't load DOF at the same address
  * twice, even if explicitly told to do so) and we know that we are
  * single-threaded with respect to the meta provider machinery. Knowing that
  * this is a new probe and that there is no way for us to race with another
  * operation on this provider allows us an important optimization: we need not
  * lookup a probe before adding it.  Saving this lookup is important because
  * this code is in the fork path for processes with USDT probes, and lookups
  * here are potentially very expensive because of long hash conflicts on
  * module, function and name (DTrace doesn't hash on provider name).
  */
 /*ARGSUSED*/
 static void
 fasttrap_meta_create_probe(void *arg, void *parg,
     dtrace_helper_probedesc_t *dhpb)
 {
 	fasttrap_provider_t *provider = parg;
 	fasttrap_probe_t *pp;
 	fasttrap_tracepoint_t *tp;
 	int i, j;
 	uint32_t ntps;
 
 	/*
 	 * Since the meta provider count is non-zero we don't have to worry
 	 * about this provider disappearing.
 	 */
 	ASSERT(provider->ftp_mcount > 0);
 
 	/*
 	 * The offsets must be unique.
 	 */
 	qsort(dhpb->dthpb_offs, dhpb->dthpb_noffs, sizeof (uint32_t),
 	    fasttrap_uint32_cmp);
 	for (i = 1; i < dhpb->dthpb_noffs; i++) {
 		if (dhpb->dthpb_base + dhpb->dthpb_offs[i] <=
 		    dhpb->dthpb_base + dhpb->dthpb_offs[i - 1])
 			return;
 	}
 
 	qsort(dhpb->dthpb_enoffs, dhpb->dthpb_nenoffs, sizeof (uint32_t),
 	    fasttrap_uint32_cmp);
 	for (i = 1; i < dhpb->dthpb_nenoffs; i++) {
 		if (dhpb->dthpb_base + dhpb->dthpb_enoffs[i] <=
 		    dhpb->dthpb_base + dhpb->dthpb_enoffs[i - 1])
 			return;
 	}
 
 	ntps = dhpb->dthpb_noffs + dhpb->dthpb_nenoffs;
 	ASSERT(ntps > 0);
 
 	atomic_add_32(&fasttrap_total, ntps);
 
 	if (fasttrap_total > fasttrap_max) {
 		atomic_add_32(&fasttrap_total, -ntps);
 		return;
 	}
 
 	pp = kmem_zalloc(offsetof(fasttrap_probe_t, ftp_tps[ntps]), KM_SLEEP);
 
 	pp->ftp_prov = provider;
 	pp->ftp_pid = provider->ftp_pid;
 	pp->ftp_ntps = ntps;
 	pp->ftp_nargs = dhpb->dthpb_xargc;
 	pp->ftp_xtypes = dhpb->dthpb_xtypes;
 	pp->ftp_ntypes = dhpb->dthpb_ntypes;
 
 	/*
 	 * First create a tracepoint for each actual point of interest.
 	 */
 	for (i = 0; i < dhpb->dthpb_noffs; i++) {
 		tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP);
 
 		tp->ftt_proc = provider->ftp_proc;
 		tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_offs[i];
 		tp->ftt_pid = provider->ftp_pid;
 
 		pp->ftp_tps[i].fit_tp = tp;
 		pp->ftp_tps[i].fit_id.fti_probe = pp;
 #ifdef __sparc
 		pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_POST_OFFSETS;
 #else
 		pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_OFFSETS;
 #endif
 	}
 
 	/*
 	 * Then create a tracepoint for each is-enabled point.
 	 */
 	for (j = 0; i < ntps; i++, j++) {
 		tp = kmem_zalloc(sizeof (fasttrap_tracepoint_t), KM_SLEEP);
 
 		tp->ftt_proc = provider->ftp_proc;
 		tp->ftt_pc = dhpb->dthpb_base + dhpb->dthpb_enoffs[j];
 		tp->ftt_pid = provider->ftp_pid;
 
 		pp->ftp_tps[i].fit_tp = tp;
 		pp->ftp_tps[i].fit_id.fti_probe = pp;
 		pp->ftp_tps[i].fit_id.fti_ptype = DTFTP_IS_ENABLED;
 	}
 
 	/*
 	 * If the arguments are shuffled around we set the argument remapping
 	 * table. Later, when the probe fires, we only remap the arguments
 	 * if the table is non-NULL.
 	 */
 	for (i = 0; i < dhpb->dthpb_xargc; i++) {
 		if (dhpb->dthpb_args[i] != i) {
 			pp->ftp_argmap = dhpb->dthpb_args;
 			break;
 		}
 	}
 
 	/*
 	 * The probe is fully constructed -- register it with DTrace.
 	 */
 	pp->ftp_id = dtrace_probe_create(provider->ftp_provid, dhpb->dthpb_mod,
 	    dhpb->dthpb_func, dhpb->dthpb_name, FASTTRAP_OFFSET_AFRAMES, pp);
 }
 
 /*ARGSUSED*/
 static void
 fasttrap_meta_remove(void *arg, dtrace_helper_provdesc_t *dhpv, pid_t pid)
 {
 	/*
 	 * Clean up the USDT provider. There may be active consumers of the
 	 * provider busy adding probes, no damage will actually befall the
 	 * provider until that count has dropped to zero. This just puts
 	 * the provider on death row.
 	 */
 	fasttrap_provider_retire(pid, dhpv->dthpv_provname, 1);
 }
 
 static dtrace_mops_t fasttrap_mops = {
 	fasttrap_meta_create_probe,
 	fasttrap_meta_provide,
 	fasttrap_meta_remove
 };
 
 /*ARGSUSED*/
 static int
 fasttrap_open(struct cdev *dev __unused, int oflags __unused,
     int devtype __unused, struct thread *td __unused)
 {
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 fasttrap_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int fflag,
     struct thread *td)
 {
 #ifdef notyet
 	struct kinfo_proc kp;
 	const cred_t *cr = td->td_ucred;
 #endif
 	if (!dtrace_attached())
 		return (EAGAIN);
 
 	if (cmd == FASTTRAPIOC_MAKEPROBE) {
 		fasttrap_probe_spec_t *uprobe = *(fasttrap_probe_spec_t **)arg;
 		fasttrap_probe_spec_t *probe;
 		uint64_t noffs;
 		size_t size;
 		int ret, err;
 
 		if (copyin(&uprobe->ftps_noffs, &noffs,
 		    sizeof (uprobe->ftps_noffs)))
 			return (EFAULT);
 
 		/*
 		 * Probes must have at least one tracepoint.
 		 */
 		if (noffs == 0)
 			return (EINVAL);
 
 		size = sizeof (fasttrap_probe_spec_t) +
 		    sizeof (probe->ftps_offs[0]) * (noffs - 1);
 
 		if (size > 1024 * 1024)
 			return (ENOMEM);
 
 		probe = kmem_alloc(size, KM_SLEEP);
 
 		if (copyin(uprobe, probe, size) != 0 ||
 		    probe->ftps_noffs != noffs) {
 			kmem_free(probe, size);
 			return (EFAULT);
 		}
 
 		/*
 		 * Verify that the function and module strings contain no
 		 * funny characters.
 		 */
 		if (u8_validate(probe->ftps_func, strlen(probe->ftps_func),
 		    NULL, U8_VALIDATE_ENTIRE, &err) < 0) {
 			ret = EINVAL;
 			goto err;
 		}
 
 		if (u8_validate(probe->ftps_mod, strlen(probe->ftps_mod),
 		    NULL, U8_VALIDATE_ENTIRE, &err) < 0) {
 			ret = EINVAL;
 			goto err;
 		}
 
 #ifdef notyet
 		if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) {
 			proc_t *p;
 			pid_t pid = probe->ftps_pid;
 
 #ifdef illumos
 			mutex_enter(&pidlock);
 #endif
 			/*
 			 * Report an error if the process doesn't exist
 			 * or is actively being birthed.
 			 */
 			sx_slock(&proctree_lock);
 			p = pfind(pid);
 			if (p)
 				fill_kinfo_proc(p, &kp);
 			sx_sunlock(&proctree_lock);
 			if (p == NULL || kp.ki_stat == SIDL) {
 #ifdef illumos
 				mutex_exit(&pidlock);
 #endif
 				return (ESRCH);
 			}
 #ifdef illumos
 			mutex_enter(&p->p_lock);
 			mutex_exit(&pidlock);
 #else
 			PROC_LOCK_ASSERT(p, MA_OWNED);
 #endif
 
 #ifdef notyet
 			if ((ret = priv_proc_cred_perm(cr, p, NULL,
 			    VREAD | VWRITE)) != 0) {
 #ifdef illumos
 				mutex_exit(&p->p_lock);
 #else
 				PROC_UNLOCK(p);
 #endif
 				return (ret);
 			}
 #endif /* notyet */
 #ifdef illumos
 			mutex_exit(&p->p_lock);
 #else
 			PROC_UNLOCK(p);
 #endif
 		}
 #endif /* notyet */
 
 		ret = fasttrap_add_probe(probe);
 err:
 		kmem_free(probe, size);
 
 		return (ret);
 
 	} else if (cmd == FASTTRAPIOC_GETINSTR) {
 		fasttrap_instr_query_t instr;
 		fasttrap_tracepoint_t *tp;
 		uint_t index;
 #ifdef illumos
 		int ret;
 #endif
 
 #ifdef illumos
 		if (copyin((void *)arg, &instr, sizeof (instr)) != 0)
 			return (EFAULT);
 #endif
 
 #ifdef notyet
 		if (!PRIV_POLICY_CHOICE(cr, PRIV_ALL, B_FALSE)) {
 			proc_t *p;
 			pid_t pid = instr.ftiq_pid;
 
 #ifdef illumos
 			mutex_enter(&pidlock);
 #endif
 			/*
 			 * Report an error if the process doesn't exist
 			 * or is actively being birthed.
 			 */
 			sx_slock(&proctree_lock);
 			p = pfind(pid);
 			if (p)
 				fill_kinfo_proc(p, &kp);
 			sx_sunlock(&proctree_lock);
 			if (p == NULL || kp.ki_stat == SIDL) {
 #ifdef illumos
 				mutex_exit(&pidlock);
 #endif
 				return (ESRCH);
 			}
 #ifdef illumos
 			mutex_enter(&p->p_lock);
 			mutex_exit(&pidlock);
 #else
 			PROC_LOCK_ASSERT(p, MA_OWNED);
 #endif
 
 #ifdef notyet
 			if ((ret = priv_proc_cred_perm(cr, p, NULL,
 			    VREAD)) != 0) {
 #ifdef illumos
 				mutex_exit(&p->p_lock);
 #else
 				PROC_UNLOCK(p);
 #endif
 				return (ret);
 			}
 #endif /* notyet */
 
 #ifdef illumos
 			mutex_exit(&p->p_lock);
 #else
 			PROC_UNLOCK(p);
 #endif
 		}
 #endif /* notyet */
 
 		index = FASTTRAP_TPOINTS_INDEX(instr.ftiq_pid, instr.ftiq_pc);
 
 		mutex_enter(&fasttrap_tpoints.fth_table[index].ftb_mtx);
 		tp = fasttrap_tpoints.fth_table[index].ftb_data;
 		while (tp != NULL) {
 			if (instr.ftiq_pid == tp->ftt_pid &&
 			    instr.ftiq_pc == tp->ftt_pc &&
 			    tp->ftt_proc->ftpc_acount != 0)
 				break;
 
 			tp = tp->ftt_next;
 		}
 
 		if (tp == NULL) {
 			mutex_exit(&fasttrap_tpoints.fth_table[index].ftb_mtx);
 			return (ENOENT);
 		}
 
 		bcopy(&tp->ftt_instr, &instr.ftiq_instr,
 		    sizeof (instr.ftiq_instr));
 		mutex_exit(&fasttrap_tpoints.fth_table[index].ftb_mtx);
 
 		if (copyout(&instr, (void *)arg, sizeof (instr)) != 0)
 			return (EFAULT);
 
 		return (0);
 	}
 
 	return (EINVAL);
 }
 
 static int
 fasttrap_load(void)
 {
 	ulong_t nent;
 	int i, ret;
 
         /* Create the /dev/dtrace/fasttrap entry. */
         fasttrap_cdev = make_dev(&fasttrap_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
             "dtrace/fasttrap");
 
 	mtx_init(&fasttrap_cleanup_mtx, "fasttrap clean", "dtrace", MTX_DEF);
 	mutex_init(&fasttrap_count_mtx, "fasttrap count mtx", MUTEX_DEFAULT,
 	    NULL);
 
 #ifdef illumos
 	fasttrap_max = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
 	    "fasttrap-max-probes", FASTTRAP_MAX_DEFAULT);
 #endif
 	fasttrap_total = 0;
 
 	/*
 	 * Conjure up the tracepoints hashtable...
 	 */
 #ifdef illumos
 	nent = ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
 	    "fasttrap-hash-size", FASTTRAP_TPOINTS_DEFAULT_SIZE);
 #else
 	nent = tpoints_hash_size;
 #endif
 
 	if (nent == 0 || nent > 0x1000000)
 		nent = FASTTRAP_TPOINTS_DEFAULT_SIZE;
 
 	tpoints_hash_size = nent;
 
 	if (ISP2(nent))
 		fasttrap_tpoints.fth_nent = nent;
 	else
 		fasttrap_tpoints.fth_nent = 1 << fasttrap_highbit(nent);
 	ASSERT(fasttrap_tpoints.fth_nent > 0);
 	fasttrap_tpoints.fth_mask = fasttrap_tpoints.fth_nent - 1;
 	fasttrap_tpoints.fth_table = kmem_zalloc(fasttrap_tpoints.fth_nent *
 	    sizeof (fasttrap_bucket_t), KM_SLEEP);
 #ifndef illumos
 	for (i = 0; i < fasttrap_tpoints.fth_nent; i++)
 		mutex_init(&fasttrap_tpoints.fth_table[i].ftb_mtx,
 		    "tracepoints bucket mtx", MUTEX_DEFAULT, NULL);
 #endif
 
 	/*
 	 * ... and the providers hash table...
 	 */
 	nent = FASTTRAP_PROVIDERS_DEFAULT_SIZE;
 	if (ISP2(nent))
 		fasttrap_provs.fth_nent = nent;
 	else
 		fasttrap_provs.fth_nent = 1 << fasttrap_highbit(nent);
 	ASSERT(fasttrap_provs.fth_nent > 0);
 	fasttrap_provs.fth_mask = fasttrap_provs.fth_nent - 1;
 	fasttrap_provs.fth_table = kmem_zalloc(fasttrap_provs.fth_nent *
 	    sizeof (fasttrap_bucket_t), KM_SLEEP);
 #ifndef illumos
 	for (i = 0; i < fasttrap_provs.fth_nent; i++)
 		mutex_init(&fasttrap_provs.fth_table[i].ftb_mtx, 
 		    "providers bucket mtx", MUTEX_DEFAULT, NULL);
 #endif
 
 	ret = kproc_create(fasttrap_pid_cleanup_cb, NULL,
 	    &fasttrap_cleanup_proc, 0, 0, "ftcleanup");
 	if (ret != 0) {
 		destroy_dev(fasttrap_cdev);
 #ifndef illumos
 		for (i = 0; i < fasttrap_provs.fth_nent; i++)
 			mutex_destroy(&fasttrap_provs.fth_table[i].ftb_mtx);
 		for (i = 0; i < fasttrap_tpoints.fth_nent; i++)
 			mutex_destroy(&fasttrap_tpoints.fth_table[i].ftb_mtx);
 #endif
 		kmem_free(fasttrap_provs.fth_table, fasttrap_provs.fth_nent *
 		    sizeof (fasttrap_bucket_t));
 		mtx_destroy(&fasttrap_cleanup_mtx);
 		mutex_destroy(&fasttrap_count_mtx);
 		return (ret);
 	}
 
 
 	/*
 	 * ... and the procs hash table.
 	 */
 	nent = FASTTRAP_PROCS_DEFAULT_SIZE;
 	if (ISP2(nent))
 		fasttrap_procs.fth_nent = nent;
 	else
 		fasttrap_procs.fth_nent = 1 << fasttrap_highbit(nent);
 	ASSERT(fasttrap_procs.fth_nent > 0);
 	fasttrap_procs.fth_mask = fasttrap_procs.fth_nent - 1;
 	fasttrap_procs.fth_table = kmem_zalloc(fasttrap_procs.fth_nent *
 	    sizeof (fasttrap_bucket_t), KM_SLEEP);
 #ifndef illumos
 	for (i = 0; i < fasttrap_procs.fth_nent; i++)
 		mutex_init(&fasttrap_procs.fth_table[i].ftb_mtx,
 		    "processes bucket mtx", MUTEX_DEFAULT, NULL);
 
 	rm_init(&fasttrap_tp_lock, "fasttrap tracepoint");
 
 	/*
 	 * This event handler must run before kdtrace_thread_dtor() since it
 	 * accesses the thread's struct kdtrace_thread.
 	 */
 	fasttrap_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
 	    fasttrap_thread_dtor, NULL, EVENTHANDLER_PRI_FIRST);
 #endif
 
 	/*
 	 * Install our hooks into fork(2), exec(2), and exit(2).
 	 */
 	dtrace_fasttrap_fork = &fasttrap_fork;
 	dtrace_fasttrap_exit = &fasttrap_exec_exit;
 	dtrace_fasttrap_exec = &fasttrap_exec_exit;
 
 	(void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL,
 	    &fasttrap_meta_id);
 
 	return (0);
 }
 
 static int
 fasttrap_unload(void)
 {
 	int i, fail = 0;
 
 	/*
 	 * Unregister the meta-provider to make sure no new fasttrap-
 	 * managed providers come along while we're trying to close up
 	 * shop. If we fail to detach, we'll need to re-register as a
 	 * meta-provider. We can fail to unregister as a meta-provider
 	 * if providers we manage still exist.
 	 */
 	if (fasttrap_meta_id != DTRACE_METAPROVNONE &&
 	    dtrace_meta_unregister(fasttrap_meta_id) != 0)
 		return (-1);
 
 	/*
 	 * Iterate over all of our providers. If there's still a process
 	 * that corresponds to that pid, fail to detach.
 	 */
 	for (i = 0; i < fasttrap_provs.fth_nent; i++) {
 		fasttrap_provider_t **fpp, *fp;
 		fasttrap_bucket_t *bucket = &fasttrap_provs.fth_table[i];
 
 		mutex_enter(&bucket->ftb_mtx);
 		fpp = (fasttrap_provider_t **)&bucket->ftb_data;
 		while ((fp = *fpp) != NULL) {
 			/*
 			 * Acquire and release the lock as a simple way of
 			 * waiting for any other consumer to finish with
 			 * this provider. A thread must first acquire the
 			 * bucket lock so there's no chance of another thread
 			 * blocking on the provider's lock.
 			 */
 			mutex_enter(&fp->ftp_mtx);
 			mutex_exit(&fp->ftp_mtx);
 
 			if (dtrace_unregister(fp->ftp_provid) != 0) {
 				fail = 1;
 				fpp = &fp->ftp_next;
 			} else {
 				*fpp = fp->ftp_next;
 				fasttrap_provider_free(fp);
 			}
 		}
 
 		mutex_exit(&bucket->ftb_mtx);
 	}
 
 	if (fail) {
 		(void) dtrace_meta_register("fasttrap", &fasttrap_mops, NULL,
 		    &fasttrap_meta_id);
 
 		return (-1);
 	}
 
 	/*
 	 * Stop new processes from entering these hooks now, before the
 	 * fasttrap_cleanup thread runs.  That way all processes will hopefully
 	 * be out of these hooks before we free fasttrap_provs.fth_table
 	 */
 	ASSERT(dtrace_fasttrap_fork == &fasttrap_fork);
 	dtrace_fasttrap_fork = NULL;
 
 	ASSERT(dtrace_fasttrap_exec == &fasttrap_exec_exit);
 	dtrace_fasttrap_exec = NULL;
 
 	ASSERT(dtrace_fasttrap_exit == &fasttrap_exec_exit);
 	dtrace_fasttrap_exit = NULL;
 
 	mtx_lock(&fasttrap_cleanup_mtx);
 	fasttrap_cleanup_drain = 1;
 	/* Wait for the cleanup thread to finish up and signal us. */
 	wakeup(&fasttrap_cleanup_cv);
 	mtx_sleep(&fasttrap_cleanup_drain, &fasttrap_cleanup_mtx, 0, "ftcld",
 	    0);
 	fasttrap_cleanup_proc = NULL;
 	mtx_destroy(&fasttrap_cleanup_mtx);
 
 #ifdef DEBUG
 	mutex_enter(&fasttrap_count_mtx);
 	ASSERT(fasttrap_pid_count == 0);
 	mutex_exit(&fasttrap_count_mtx);
 #endif
 
 #ifndef illumos
 	EVENTHANDLER_DEREGISTER(thread_dtor, fasttrap_thread_dtor_tag);
 
 	for (i = 0; i < fasttrap_tpoints.fth_nent; i++)
 		mutex_destroy(&fasttrap_tpoints.fth_table[i].ftb_mtx);
 	for (i = 0; i < fasttrap_provs.fth_nent; i++)
 		mutex_destroy(&fasttrap_provs.fth_table[i].ftb_mtx);
 	for (i = 0; i < fasttrap_procs.fth_nent; i++)
 		mutex_destroy(&fasttrap_procs.fth_table[i].ftb_mtx);
 #endif
 	kmem_free(fasttrap_tpoints.fth_table,
 	    fasttrap_tpoints.fth_nent * sizeof (fasttrap_bucket_t));
 	fasttrap_tpoints.fth_nent = 0;
 
 	kmem_free(fasttrap_provs.fth_table,
 	    fasttrap_provs.fth_nent * sizeof (fasttrap_bucket_t));
 	fasttrap_provs.fth_nent = 0;
 
 	kmem_free(fasttrap_procs.fth_table,
 	    fasttrap_procs.fth_nent * sizeof (fasttrap_bucket_t));
 	fasttrap_procs.fth_nent = 0;
 
 #ifndef illumos
 	destroy_dev(fasttrap_cdev);
 	mutex_destroy(&fasttrap_count_mtx);
 	rm_destroy(&fasttrap_tp_lock);
 #endif
 
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 fasttrap_modevent(module_t mod __unused, int type, void *data __unused)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		break;
 
 	case MOD_UNLOAD:
 		break;
 
 	case MOD_SHUTDOWN:
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 SYSINIT(fasttrap_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, fasttrap_load,
     NULL);
 SYSUNINIT(fasttrap_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY,
     fasttrap_unload, NULL);
 
 DEV_MODULE(fasttrap, fasttrap_modevent, NULL);
 MODULE_VERSION(fasttrap, 1);
 MODULE_DEPEND(fasttrap, dtrace, 1, 1, 1);
 MODULE_DEPEND(fasttrap, opensolaris, 1, 1, 1);
Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c	(revision 303642)
@@ -1,661 +1,673 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
 #include <sys/vdev_impl.h>
 #include <sys/zio.h>
 #include <sys/fs/zfs.h>
 
 /*
  * Virtual device vector for mirroring.
  */
 
 typedef struct mirror_child {
 	vdev_t		*mc_vd;
 	uint64_t	mc_offset;
 	int		mc_error;
 	int		mc_load;
 	uint8_t		mc_tried;
 	uint8_t		mc_skipped;
 	uint8_t		mc_speculative;
 } mirror_child_t;
 
 typedef struct mirror_map {
 	int		*mm_preferred;
 	int		mm_preferred_cnt;
 	int		mm_children;
 	boolean_t	mm_replacing;
 	boolean_t	mm_root;
 	mirror_child_t	mm_child[];
 } mirror_map_t;
 
 static int vdev_mirror_shift = 21;
 
+#ifdef _KERNEL
 SYSCTL_DECL(_vfs_zfs_vdev);
 static SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0,
     "ZFS VDEV Mirror");
+#endif
 
 /*
  * The load configuration settings below are tuned by default for
  * the case where all devices are of the same rotational type.
  *
  * If there is a mixture of rotating and non-rotating media, setting
  * non_rotating_seek_inc to 0 may well provide better results as it
  * will direct more reads to the non-rotating vdevs which are more
  * likely to have a higher performance.
  */
 
 /* Rotating media load calculation configuration. */
 static int rotating_inc = 0;
+#ifdef _KERNEL
 SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, rotating_inc, CTLFLAG_RWTUN,
     &rotating_inc, 0, "Rotating media load increment for non-seeking I/O's");
+#endif
 
 static int rotating_seek_inc = 5;
+#ifdef _KERNEL
 SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, rotating_seek_inc, CTLFLAG_RWTUN,
     &rotating_seek_inc, 0, "Rotating media load increment for seeking I/O's");
+#endif
 
 static int rotating_seek_offset = 1 * 1024 * 1024;
+#ifdef _KERNEL
 SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, rotating_seek_offset, CTLFLAG_RWTUN,
     &rotating_seek_offset, 0, "Offset in bytes from the last I/O which "
     "triggers a reduced rotating media seek increment");
+#endif
 
 /* Non-rotating media load calculation configuration. */
 static int non_rotating_inc = 0;
+#ifdef _KERNEL
 SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, non_rotating_inc, CTLFLAG_RWTUN,
     &non_rotating_inc, 0,
     "Non-rotating media load increment for non-seeking I/O's");
+#endif
 
 static int non_rotating_seek_inc = 1;
+#ifdef _KERNEL
 SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, non_rotating_seek_inc, CTLFLAG_RWTUN,
     &non_rotating_seek_inc, 0,
     "Non-rotating media load increment for seeking I/O's");
+#endif
 
 
 static inline size_t
 vdev_mirror_map_size(int children)
 {
 	return (offsetof(mirror_map_t, mm_child[children]) +
 	    sizeof(int) * children);
 }
 
 static inline mirror_map_t *
 vdev_mirror_map_alloc(int children, boolean_t replacing, boolean_t root)
 {
 	mirror_map_t *mm;
 
 	mm = kmem_zalloc(vdev_mirror_map_size(children), KM_SLEEP);
 	mm->mm_children = children;
 	mm->mm_replacing = replacing;
 	mm->mm_root = root;
 	mm->mm_preferred = (int *)((uintptr_t)mm + 
 	    offsetof(mirror_map_t, mm_child[children]));
 
 	return mm;
 }
 
 static void
 vdev_mirror_map_free(zio_t *zio)
 {
 	mirror_map_t *mm = zio->io_vsd;
 
 	kmem_free(mm, vdev_mirror_map_size(mm->mm_children));
 }
 
 static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
 	vdev_mirror_map_free,
 	zio_vsd_default_cksum_report
 };
 
 static int
 vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
 {
 	uint64_t lastoffset;
 	int load;
 
 	/* All DVAs have equal weight at the root. */
 	if (mm->mm_root)
 		return (INT_MAX);
 
 	/*
 	 * We don't return INT_MAX if the device is resilvering i.e.
 	 * vdev_resilver_txg != 0 as when tested performance was slightly
 	 * worse overall when resilvering with compared to without.
 	 */
 
 	/* Standard load based on pending queue length. */
 	load = vdev_queue_length(vd);
 	lastoffset = vdev_queue_lastoffset(vd);
 
 	if (vd->vdev_rotation_rate == VDEV_RATE_NON_ROTATING) {
 		/* Non-rotating media. */
 		if (lastoffset == zio_offset)
 			return (load + non_rotating_inc);
 
 		/*
 		 * Apply a seek penalty even for non-rotating devices as
 		 * sequential I/O'a can be aggregated into fewer operations
 		 * on the device, thus avoiding unnecessary per-command
 		 * overhead and boosting performance.
 		 */
 		return (load + non_rotating_seek_inc);
 	}
 
 	/* Rotating media I/O's which directly follow the last I/O. */
 	if (lastoffset == zio_offset)
 		return (load + rotating_inc);
 
 	/*
 	 * Apply half the seek increment to I/O's within seek offset
 	 * of the last I/O queued to this vdev as they should incure less
 	 * of a seek increment.
 	 */
 	if (ABS(lastoffset - zio_offset) < rotating_seek_offset)
 		return (load + (rotating_seek_inc / 2));
 
 	/* Apply the full seek increment to all other I/O's. */
 	return (load + rotating_seek_inc);
 }
 
 
 static mirror_map_t *
 vdev_mirror_map_init(zio_t *zio)
 {
 	mirror_map_t *mm = NULL;
 	mirror_child_t *mc;
 	vdev_t *vd = zio->io_vd;
 	int c;
 
 	if (vd == NULL) {
 		dva_t *dva = zio->io_bp->blk_dva;
 		spa_t *spa = zio->io_spa;
 
 		mm = vdev_mirror_map_alloc(BP_GET_NDVAS(zio->io_bp), B_FALSE,
 		    B_TRUE);
 		for (c = 0; c < mm->mm_children; c++) {
 			mc = &mm->mm_child[c];
 			mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c]));
 			mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
 		}
 	} else {
 		mm = vdev_mirror_map_alloc(vd->vdev_children,
 		    (vd->vdev_ops == &vdev_replacing_ops ||
                     vd->vdev_ops == &vdev_spare_ops), B_FALSE);
 		for (c = 0; c < mm->mm_children; c++) {
 			mc = &mm->mm_child[c];
 			mc->mc_vd = vd->vdev_child[c];
 			mc->mc_offset = zio->io_offset;
 		}
 	}
 
 	zio->io_vsd = mm;
 	zio->io_vsd_ops = &vdev_mirror_vsd_ops;
 	return (mm);
 }
 
 static int
 vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
     uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	int numerrors = 0;
 	int lasterror = 0;
 
 	if (vd->vdev_children == 0) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
 		return (SET_ERROR(EINVAL));
 	}
 
 	vdev_open_children(vd);
 
 	for (int c = 0; c < vd->vdev_children; c++) {
 		vdev_t *cvd = vd->vdev_child[c];
 
 		if (cvd->vdev_open_error) {
 			lasterror = cvd->vdev_open_error;
 			numerrors++;
 			continue;
 		}
 
 		*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
 		*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
 		*logical_ashift = MAX(*logical_ashift, cvd->vdev_ashift);
 		*physical_ashift = MAX(*physical_ashift,
 		    cvd->vdev_physical_ashift);
 	}
 
 	if (numerrors == vd->vdev_children) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
 		return (lasterror);
 	}
 
 	return (0);
 }
 
 static void
 vdev_mirror_close(vdev_t *vd)
 {
 	for (int c = 0; c < vd->vdev_children; c++)
 		vdev_close(vd->vdev_child[c]);
 }
 
 static void
 vdev_mirror_child_done(zio_t *zio)
 {
 	mirror_child_t *mc = zio->io_private;
 
 	mc->mc_error = zio->io_error;
 	mc->mc_tried = 1;
 	mc->mc_skipped = 0;
 }
 
 static void
 vdev_mirror_scrub_done(zio_t *zio)
 {
 	mirror_child_t *mc = zio->io_private;
 
 	if (zio->io_error == 0) {
 		zio_t *pio;
 
 		mutex_enter(&zio->io_lock);
 		while ((pio = zio_walk_parents(zio)) != NULL) {
 			mutex_enter(&pio->io_lock);
 			ASSERT3U(zio->io_size, >=, pio->io_size);
 			bcopy(zio->io_data, pio->io_data, pio->io_size);
 			mutex_exit(&pio->io_lock);
 		}
 		mutex_exit(&zio->io_lock);
 	}
 
 	zio_buf_free(zio->io_data, zio->io_size);
 
 	mc->mc_error = zio->io_error;
 	mc->mc_tried = 1;
 	mc->mc_skipped = 0;
 }
 
 /*
  * Check the other, lower-index DVAs to see if they're on the same
  * vdev as the child we picked.  If they are, use them since they
  * are likely to have been allocated from the primary metaslab in
  * use at the time, and hence are more likely to have locality with
  * single-copy data.
  */
 static int
 vdev_mirror_dva_select(zio_t *zio, int p)
 {
 	dva_t *dva = zio->io_bp->blk_dva;
 	mirror_map_t *mm = zio->io_vsd;
 	int preferred;
 	int c;
 
 	preferred = mm->mm_preferred[p];
 	for (p-- ; p >= 0; p--) {
 		c = mm->mm_preferred[p];
 		if (DVA_GET_VDEV(&dva[c]) == DVA_GET_VDEV(&dva[preferred]))
 			preferred = c;
 	}
 	return (preferred);
 }
 
 static int
 vdev_mirror_preferred_child_randomize(zio_t *zio)
 {
 	mirror_map_t *mm = zio->io_vsd;
 	int p;
 
 	if (mm->mm_root) {
 		p = spa_get_random(mm->mm_preferred_cnt);
 		return (vdev_mirror_dva_select(zio, p));
 	}
 
 	/*
 	 * To ensure we don't always favour the first matching vdev,
 	 * which could lead to wear leveling issues on SSD's, we
 	 * use the I/O offset as a pseudo random seed into the vdevs
 	 * which have the lowest load.
 	 */
 	p = (zio->io_offset >> vdev_mirror_shift) % mm->mm_preferred_cnt;
 	return (mm->mm_preferred[p]);
 }
 
 /*
  * Try to find a vdev whose DTL doesn't contain the block we want to read
  * prefering vdevs based on determined load.
  *
  * If we can't, try the read on any vdev we haven't already tried.
  */
 static int
 vdev_mirror_child_select(zio_t *zio)
 {
 	mirror_map_t *mm = zio->io_vsd;
 	uint64_t txg = zio->io_txg;
 	int c, lowest_load;
 
 	ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg);
 
 	lowest_load = INT_MAX;
 	mm->mm_preferred_cnt = 0;
 	for (c = 0; c < mm->mm_children; c++) {
 		mirror_child_t *mc;
 
 		mc = &mm->mm_child[c];
 		if (mc->mc_tried || mc->mc_skipped)
 			continue;
 
 		if (!vdev_readable(mc->mc_vd)) {
 			mc->mc_error = SET_ERROR(ENXIO);
 			mc->mc_tried = 1;	/* don't even try */
 			mc->mc_skipped = 1;
 			continue;
 		}
 
 		if (vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) {
 			mc->mc_error = SET_ERROR(ESTALE);
 			mc->mc_skipped = 1;
 			mc->mc_speculative = 1;
 			continue;
 		}
 
 		mc->mc_load = vdev_mirror_load(mm, mc->mc_vd, mc->mc_offset);
 		if (mc->mc_load > lowest_load)
 			continue;
 
 		if (mc->mc_load < lowest_load) {
 			lowest_load = mc->mc_load;
 			mm->mm_preferred_cnt = 0;
 		}
 		mm->mm_preferred[mm->mm_preferred_cnt] = c;
 		mm->mm_preferred_cnt++;
 	}
 
 	if (mm->mm_preferred_cnt == 1) {
 		vdev_queue_register_lastoffset(
 		    mm->mm_child[mm->mm_preferred[0]].mc_vd, zio);
 		return (mm->mm_preferred[0]);
 	}
 
 	if (mm->mm_preferred_cnt > 1) {
 		int c = vdev_mirror_preferred_child_randomize(zio);
 
 		vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio);
 		return (c);
 	}
 
 	/*
 	 * Every device is either missing or has this txg in its DTL.
 	 * Look for any child we haven't already tried before giving up.
 	 */
 	for (c = 0; c < mm->mm_children; c++) {
 		if (!mm->mm_child[c].mc_tried) {
 			vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd,
 			    zio);
 			return (c);
 		}
 	}
 
 	/*
 	 * Every child failed.  There's no place left to look.
 	 */
 	return (-1);
 }
 
 static void
 vdev_mirror_io_start(zio_t *zio)
 {
 	mirror_map_t *mm;
 	mirror_child_t *mc;
 	int c, children;
 
 	mm = vdev_mirror_map_init(zio);
 
 	if (zio->io_type == ZIO_TYPE_READ) {
 		if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing &&
 		    mm->mm_children > 1) {
 			/*
 			 * For scrubbing reads we need to allocate a read
 			 * buffer for each child and issue reads to all
 			 * children.  If any child succeeds, it will copy its
 			 * data into zio->io_data in vdev_mirror_scrub_done.
 			 */
 			for (c = 0; c < mm->mm_children; c++) {
 				mc = &mm->mm_child[c];
 				zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
 				    mc->mc_vd, mc->mc_offset,
 				    zio_buf_alloc(zio->io_size), zio->io_size,
 				    zio->io_type, zio->io_priority, 0,
 				    vdev_mirror_scrub_done, mc));
 			}
 			zio_execute(zio);
 			return;
 		}
 		/*
 		 * For normal reads just pick one child.
 		 */
 		c = vdev_mirror_child_select(zio);
 		children = (c >= 0);
 	} else {
 		ASSERT(zio->io_type == ZIO_TYPE_WRITE ||
 		    zio->io_type == ZIO_TYPE_FREE);
 
 		/*
 		 * Writes and frees go to all children.
 		 */
 		c = 0;
 		children = mm->mm_children;
 	}
 
 	while (children--) {
 		mc = &mm->mm_child[c];
 		zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
 		    mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size,
 		    zio->io_type, zio->io_priority, 0,
 		    vdev_mirror_child_done, mc));
 		c++;
 	}
 
 	zio_execute(zio);
 }
 
 static int
 vdev_mirror_worst_error(mirror_map_t *mm)
 {
 	int error[2] = { 0, 0 };
 
 	for (int c = 0; c < mm->mm_children; c++) {
 		mirror_child_t *mc = &mm->mm_child[c];
 		int s = mc->mc_speculative;
 		error[s] = zio_worst_error(error[s], mc->mc_error);
 	}
 
 	return (error[0] ? error[0] : error[1]);
 }
 
 static void
 vdev_mirror_io_done(zio_t *zio)
 {
 	mirror_map_t *mm = zio->io_vsd;
 	mirror_child_t *mc;
 	int c;
 	int good_copies = 0;
 	int unexpected_errors = 0;
 
 	for (c = 0; c < mm->mm_children; c++) {
 		mc = &mm->mm_child[c];
 
 		if (mc->mc_error) {
 			if (!mc->mc_skipped)
 				unexpected_errors++;
 		} else if (mc->mc_tried) {
 			good_copies++;
 		}
 	}
 
 	if (zio->io_type == ZIO_TYPE_WRITE) {
 		/*
 		 * XXX -- for now, treat partial writes as success.
 		 *
 		 * Now that we support write reallocation, it would be better
 		 * to treat partial failure as real failure unless there are
 		 * no non-degraded top-level vdevs left, and not update DTLs
 		 * if we intend to reallocate.
 		 */
 		/* XXPOLICY */
 		if (good_copies != mm->mm_children) {
 			/*
 			 * Always require at least one good copy.
 			 *
 			 * For ditto blocks (io_vd == NULL), require
 			 * all copies to be good.
 			 *
 			 * XXX -- for replacing vdevs, there's no great answer.
 			 * If the old device is really dead, we may not even
 			 * be able to access it -- so we only want to
 			 * require good writes to the new device.  But if
 			 * the new device turns out to be flaky, we want
 			 * to be able to detach it -- which requires all
 			 * writes to the old device to have succeeded.
 			 */
 			if (good_copies == 0 || zio->io_vd == NULL)
 				zio->io_error = vdev_mirror_worst_error(mm);
 		}
 		return;
 	} else if (zio->io_type == ZIO_TYPE_FREE) {
 		return;
 	}
 
 	ASSERT(zio->io_type == ZIO_TYPE_READ);
 
 	/*
 	 * If we don't have a good copy yet, keep trying other children.
 	 */
 	/* XXPOLICY */
 	if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) {
 		ASSERT(c >= 0 && c < mm->mm_children);
 		mc = &mm->mm_child[c];
 		zio_vdev_io_redone(zio);
 		zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
 		    mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size,
 		    ZIO_TYPE_READ, zio->io_priority, 0,
 		    vdev_mirror_child_done, mc));
 		return;
 	}
 
 	/* XXPOLICY */
 	if (good_copies == 0) {
 		zio->io_error = vdev_mirror_worst_error(mm);
 		ASSERT(zio->io_error != 0);
 	}
 
 	if (good_copies && spa_writeable(zio->io_spa) &&
 	    (unexpected_errors ||
 	    (zio->io_flags & ZIO_FLAG_RESILVER) ||
 	    ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) {
 		/*
 		 * Use the good data we have in hand to repair damaged children.
 		 */
 		for (c = 0; c < mm->mm_children; c++) {
 			/*
 			 * Don't rewrite known good children.
 			 * Not only is it unnecessary, it could
 			 * actually be harmful: if the system lost
 			 * power while rewriting the only good copy,
 			 * there would be no good copies left!
 			 */
 			mc = &mm->mm_child[c];
 
 			if (mc->mc_error == 0) {
 				if (mc->mc_tried)
 					continue;
 				if (!(zio->io_flags & ZIO_FLAG_SCRUB) &&
 				    !vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL,
 				    zio->io_txg, 1))
 					continue;
 				mc->mc_error = SET_ERROR(ESTALE);
 			}
 
 			zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
 			    mc->mc_vd, mc->mc_offset,
 			    zio->io_data, zio->io_size,
 			    ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE,
 			    ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
 			    ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
 		}
 	}
 }
 
 static void
 vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded)
 {
 	if (faulted == vd->vdev_children)
 		vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
 		    VDEV_AUX_NO_REPLICAS);
 	else if (degraded + faulted != 0)
 		vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
 	else
 		vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
 }
 
 vdev_ops_t vdev_mirror_ops = {
 	vdev_mirror_open,
 	vdev_mirror_close,
 	vdev_default_asize,
 	vdev_mirror_io_start,
 	vdev_mirror_io_done,
 	vdev_mirror_state_change,
 	NULL,
 	NULL,
 	VDEV_TYPE_MIRROR,	/* name of this vdev type */
 	B_FALSE			/* not a leaf vdev */
 };
 
 vdev_ops_t vdev_replacing_ops = {
 	vdev_mirror_open,
 	vdev_mirror_close,
 	vdev_default_asize,
 	vdev_mirror_io_start,
 	vdev_mirror_io_done,
 	vdev_mirror_state_change,
 	NULL,
 	NULL,
 	VDEV_TYPE_REPLACING,	/* name of this vdev type */
 	B_FALSE			/* not a leaf vdev */
 };
 
 vdev_ops_t vdev_spare_ops = {
 	vdev_mirror_open,
 	vdev_mirror_close,
 	vdev_default_asize,
 	vdev_mirror_io_start,
 	vdev_mirror_io_done,
 	vdev_mirror_state_change,
 	NULL,
 	NULL,
 	VDEV_TYPE_SPARE,	/* name of this vdev type */
 	B_FALSE			/* not a leaf vdev */
 };
Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c	(revision 303642)
@@ -1,916 +1,918 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  */
 
 #include <sys/zfs_context.h>
 #include <sys/vdev_impl.h>
 #include <sys/spa_impl.h>
 #include <sys/zio.h>
 #include <sys/avl.h>
 #include <sys/dsl_pool.h>
 
 /*
  * ZFS I/O Scheduler
  * ---------------
  *
  * ZFS issues I/O operations to leaf vdevs to satisfy and complete zios.  The
  * I/O scheduler determines when and in what order those operations are
  * issued.  The I/O scheduler divides operations into six I/O classes
  * prioritized in the following order: sync read, sync write, async read,
  * async write, scrub/resilver and trim.  Each queue defines the minimum and
  * maximum number of concurrent operations that may be issued to the device.
  * In addition, the device has an aggregate maximum. Note that the sum of the
  * per-queue minimums must not exceed the aggregate maximum, and if the
  * aggregate maximum is equal to or greater than the sum of the per-queue
  * maximums, the per-queue minimum has no effect.
  *
  * For many physical devices, throughput increases with the number of
  * concurrent operations, but latency typically suffers. Further, physical
  * devices typically have a limit at which more concurrent operations have no
  * effect on throughput or can actually cause it to decrease.
  *
  * The scheduler selects the next operation to issue by first looking for an
  * I/O class whose minimum has not been satisfied. Once all are satisfied and
  * the aggregate maximum has not been hit, the scheduler looks for classes
  * whose maximum has not been satisfied. Iteration through the I/O classes is
  * done in the order specified above. No further operations are issued if the
  * aggregate maximum number of concurrent operations has been hit or if there
  * are no operations queued for an I/O class that has not hit its maximum.
  * Every time an I/O is queued or an operation completes, the I/O scheduler
  * looks for new operations to issue.
  *
  * All I/O classes have a fixed maximum number of outstanding operations
  * except for the async write class. Asynchronous writes represent the data
  * that is committed to stable storage during the syncing stage for
  * transaction groups (see txg.c). Transaction groups enter the syncing state
  * periodically so the number of queued async writes will quickly burst up and
  * then bleed down to zero. Rather than servicing them as quickly as possible,
  * the I/O scheduler changes the maximum number of active async write I/Os
  * according to the amount of dirty data in the pool (see dsl_pool.c). Since
  * both throughput and latency typically increase with the number of
  * concurrent operations issued to physical devices, reducing the burstiness
  * in the number of concurrent operations also stabilizes the response time of
  * operations from other -- and in particular synchronous -- queues. In broad
  * strokes, the I/O scheduler will issue more concurrent operations from the
  * async write queue as there's more dirty data in the pool.
  *
  * Async Writes
  *
  * The number of concurrent operations issued for the async write I/O class
  * follows a piece-wise linear function defined by a few adjustable points.
  *
  *        |                   o---------| <-- zfs_vdev_async_write_max_active
  *   ^    |                  /^         |
  *   |    |                 / |         |
  * active |                /  |         |
  *  I/O   |               /   |         |
  * count  |              /    |         |
  *        |             /     |         |
  *        |------------o      |         | <-- zfs_vdev_async_write_min_active
  *       0|____________^______|_________|
  *        0%           |      |       100% of zfs_dirty_data_max
  *                     |      |
  *                     |      `-- zfs_vdev_async_write_active_max_dirty_percent
  *                     `--------- zfs_vdev_async_write_active_min_dirty_percent
  *
  * Until the amount of dirty data exceeds a minimum percentage of the dirty
  * data allowed in the pool, the I/O scheduler will limit the number of
  * concurrent operations to the minimum. As that threshold is crossed, the
  * number of concurrent operations issued increases linearly to the maximum at
  * the specified maximum percentage of the dirty data allowed in the pool.
  *
  * Ideally, the amount of dirty data on a busy pool will stay in the sloped
  * part of the function between zfs_vdev_async_write_active_min_dirty_percent
  * and zfs_vdev_async_write_active_max_dirty_percent. If it exceeds the
  * maximum percentage, this indicates that the rate of incoming data is
  * greater than the rate that the backend storage can handle. In this case, we
  * must further throttle incoming writes (see dmu_tx_delay() for details).
  */
 
 /*
  * The maximum number of I/Os active to each device.  Ideally, this will be >=
  * the sum of each queue's max_active.  It must be at least the sum of each
  * queue's min_active.
  */
 uint32_t zfs_vdev_max_active = 1000;
 
 /*
  * Per-queue limits on the number of I/Os active to each device.  If the
  * sum of the queue's max_active is < zfs_vdev_max_active, then the
  * min_active comes into play.  We will send min_active from each queue,
  * and then select from queues in the order defined by zio_priority_t.
  *
  * In general, smaller max_active's will lead to lower latency of synchronous
  * operations.  Larger max_active's may lead to higher overall throughput,
  * depending on underlying storage.
  *
  * The ratio of the queues' max_actives determines the balance of performance
  * between reads, writes, and scrubs.  E.g., increasing
  * zfs_vdev_scrub_max_active will cause the scrub or resilver to complete
  * more quickly, but reads and writes to have higher latency and lower
  * throughput.
  */
 uint32_t zfs_vdev_sync_read_min_active = 10;
 uint32_t zfs_vdev_sync_read_max_active = 10;
 uint32_t zfs_vdev_sync_write_min_active = 10;
 uint32_t zfs_vdev_sync_write_max_active = 10;
 uint32_t zfs_vdev_async_read_min_active = 1;
 uint32_t zfs_vdev_async_read_max_active = 3;
 uint32_t zfs_vdev_async_write_min_active = 1;
 uint32_t zfs_vdev_async_write_max_active = 10;
 uint32_t zfs_vdev_scrub_min_active = 1;
 uint32_t zfs_vdev_scrub_max_active = 2;
 uint32_t zfs_vdev_trim_min_active = 1;
 /*
  * TRIM max active is large in comparison to the other values due to the fact
  * that TRIM IOs are coalesced at the device layer. This value is set such
  * that a typical SSD can process the queued IOs in a single request.
  */
 uint32_t zfs_vdev_trim_max_active = 64;
 
 
 /*
  * When the pool has less than zfs_vdev_async_write_active_min_dirty_percent
  * dirty data, use zfs_vdev_async_write_min_active.  When it has more than
  * zfs_vdev_async_write_active_max_dirty_percent, use
  * zfs_vdev_async_write_max_active. The value is linearly interpolated
  * between min and max.
  */
 int zfs_vdev_async_write_active_min_dirty_percent = 30;
 int zfs_vdev_async_write_active_max_dirty_percent = 60;
 
 /*
  * To reduce IOPs, we aggregate small adjacent I/Os into one large I/O.
  * For read I/Os, we also aggregate across small adjacency gaps; for writes
  * we include spans of optional I/Os to aid aggregation at the disk even when
  * they aren't able to help us aggregate at this level.
  */
 int zfs_vdev_aggregation_limit = SPA_OLD_MAXBLOCKSIZE;
 int zfs_vdev_read_gap_limit = 32 << 10;
 int zfs_vdev_write_gap_limit = 4 << 10;
 
 #ifdef __FreeBSD__
+#ifdef _KERNEL
 SYSCTL_DECL(_vfs_zfs_vdev);
 
 static int sysctl_zfs_async_write_active_min_dirty_percent(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vfs_zfs_vdev, OID_AUTO, async_write_active_min_dirty_percent,
     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int),
     sysctl_zfs_async_write_active_min_dirty_percent, "I",
     "Percentage of async write dirty data below which "
     "async_write_min_active is used.");
 
 static int sysctl_zfs_async_write_active_max_dirty_percent(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vfs_zfs_vdev, OID_AUTO, async_write_active_max_dirty_percent,
     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int),
     sysctl_zfs_async_write_active_max_dirty_percent, "I",
     "Percentage of async write dirty data above which "
     "async_write_max_active is used.");
 
 SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, max_active, CTLFLAG_RWTUN,
     &zfs_vdev_max_active, 0,
     "The maximum number of I/Os of all types active for each device.");
 
 #define ZFS_VDEV_QUEUE_KNOB_MIN(name)					\
 SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _min_active, CTLFLAG_RWTUN,\
     &zfs_vdev_ ## name ## _min_active, 0,				\
     "Initial number of I/O requests of type " #name			\
     " active for each device");
 
 #define ZFS_VDEV_QUEUE_KNOB_MAX(name)					\
 SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _max_active, CTLFLAG_RWTUN,\
     &zfs_vdev_ ## name ## _max_active, 0,				\
     "Maximum number of I/O requests of type " #name			\
     " active for each device");
 
 ZFS_VDEV_QUEUE_KNOB_MIN(sync_read);
 ZFS_VDEV_QUEUE_KNOB_MAX(sync_read);
 ZFS_VDEV_QUEUE_KNOB_MIN(sync_write);
 ZFS_VDEV_QUEUE_KNOB_MAX(sync_write);
 ZFS_VDEV_QUEUE_KNOB_MIN(async_read);
 ZFS_VDEV_QUEUE_KNOB_MAX(async_read);
 ZFS_VDEV_QUEUE_KNOB_MIN(async_write);
 ZFS_VDEV_QUEUE_KNOB_MAX(async_write);
 ZFS_VDEV_QUEUE_KNOB_MIN(scrub);
 ZFS_VDEV_QUEUE_KNOB_MAX(scrub);
 ZFS_VDEV_QUEUE_KNOB_MIN(trim);
 ZFS_VDEV_QUEUE_KNOB_MAX(trim);
 
 #undef ZFS_VDEV_QUEUE_KNOB
 
 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, aggregation_limit, CTLFLAG_RWTUN,
     &zfs_vdev_aggregation_limit, 0,
     "I/O requests are aggregated up to this size");
 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, read_gap_limit, CTLFLAG_RWTUN,
     &zfs_vdev_read_gap_limit, 0,
     "Acceptable gap between two reads being aggregated");
 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, write_gap_limit, CTLFLAG_RWTUN,
     &zfs_vdev_write_gap_limit, 0,
     "Acceptable gap between two writes being aggregated");
 
 static int
 sysctl_zfs_async_write_active_min_dirty_percent(SYSCTL_HANDLER_ARGS)
 {
 	int val, err;
 
 	val = zfs_vdev_async_write_active_min_dirty_percent;
 	err = sysctl_handle_int(oidp, &val, 0, req);
 	if (err != 0 || req->newptr == NULL)
 		return (err);
 	
 	if (val < 0 || val > 100 ||
 	    val >= zfs_vdev_async_write_active_max_dirty_percent)
 		return (EINVAL);
 
 	zfs_vdev_async_write_active_min_dirty_percent = val;
 
 	return (0);
 }
 
 static int
 sysctl_zfs_async_write_active_max_dirty_percent(SYSCTL_HANDLER_ARGS)
 {
 	int val, err;
 
 	val = zfs_vdev_async_write_active_max_dirty_percent;
 	err = sysctl_handle_int(oidp, &val, 0, req);
 	if (err != 0 || req->newptr == NULL)
 		return (err);
 
 	if (val < 0 || val > 100 ||
 	    val <= zfs_vdev_async_write_active_min_dirty_percent)
 		return (EINVAL);
 
 	zfs_vdev_async_write_active_max_dirty_percent = val;
 
 	return (0);
 }
+#endif
 #endif
 
 int
 vdev_queue_offset_compare(const void *x1, const void *x2)
 {
 	const zio_t *z1 = x1;
 	const zio_t *z2 = x2;
 
 	if (z1->io_offset < z2->io_offset)
 		return (-1);
 	if (z1->io_offset > z2->io_offset)
 		return (1);
 
 	if (z1 < z2)
 		return (-1);
 	if (z1 > z2)
 		return (1);
 
 	return (0);
 }
 
 static inline avl_tree_t *
 vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p)
 {
 	return (&vq->vq_class[p].vqc_queued_tree);
 }
 
 static inline avl_tree_t *
 vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t)
 {
 	if (t == ZIO_TYPE_READ)
 		return (&vq->vq_read_offset_tree);
 	else if (t == ZIO_TYPE_WRITE)
 		return (&vq->vq_write_offset_tree);
 	else
 		return (NULL);
 }
 
 int
 vdev_queue_timestamp_compare(const void *x1, const void *x2)
 {
 	const zio_t *z1 = x1;
 	const zio_t *z2 = x2;
 
 	if (z1->io_timestamp < z2->io_timestamp)
 		return (-1);
 	if (z1->io_timestamp > z2->io_timestamp)
 		return (1);
 
 	if (z1->io_offset < z2->io_offset)
 		return (-1);
 	if (z1->io_offset > z2->io_offset)
 		return (1);
 
 	if (z1 < z2)
 		return (-1);
 	if (z1 > z2)
 		return (1);
 
 	return (0);
 }
 
 void
 vdev_queue_init(vdev_t *vd)
 {
 	vdev_queue_t *vq = &vd->vdev_queue;
 
 	mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
 	vq->vq_vdev = vd;
 
 	avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
 	    sizeof (zio_t), offsetof(struct zio, io_queue_node));
 	avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ),
 	    vdev_queue_offset_compare, sizeof (zio_t),
 	    offsetof(struct zio, io_offset_node));
 	avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE),
 	    vdev_queue_offset_compare, sizeof (zio_t),
 	    offsetof(struct zio, io_offset_node));
 
 	for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
 		int (*compfn) (const void *, const void *);
 
 		/*
 		 * The synchronous i/o queues are dispatched in FIFO rather
 		 * than LBA order.  This provides more consistent latency for
 		 * these i/os.
 		 */
 		if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE)
 			compfn = vdev_queue_timestamp_compare;
 		else
 			compfn = vdev_queue_offset_compare;
 
 		avl_create(vdev_queue_class_tree(vq, p), compfn,
 		    sizeof (zio_t), offsetof(struct zio, io_queue_node));
 	}
 
 	vq->vq_lastoffset = 0;
 }
 
 void
 vdev_queue_fini(vdev_t *vd)
 {
 	vdev_queue_t *vq = &vd->vdev_queue;
 
 	for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
 		avl_destroy(vdev_queue_class_tree(vq, p));
 	avl_destroy(&vq->vq_active_tree);
 	avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ));
 	avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE));
 
 	mutex_destroy(&vq->vq_lock);
 }
 
 static void
 vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
 	avl_tree_t *qtt;
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
 	qtt = vdev_queue_type_tree(vq, zio->io_type);
 	if (qtt)
 		avl_add(qtt, zio);
 
 #ifdef illumos
 	mutex_enter(&spa->spa_iokstat_lock);
 	spa->spa_queue_stats[zio->io_priority].spa_queued++;
 	if (spa->spa_iokstat != NULL)
 		kstat_waitq_enter(spa->spa_iokstat->ks_data);
 	mutex_exit(&spa->spa_iokstat_lock);
 #endif
 }
 
 static void
 vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
 	avl_tree_t *qtt;
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
 	qtt = vdev_queue_type_tree(vq, zio->io_type);
 	if (qtt)
 		avl_remove(qtt, zio);
 
 #ifdef illumos
 	mutex_enter(&spa->spa_iokstat_lock);
 	ASSERT3U(spa->spa_queue_stats[zio->io_priority].spa_queued, >, 0);
 	spa->spa_queue_stats[zio->io_priority].spa_queued--;
 	if (spa->spa_iokstat != NULL)
 		kstat_waitq_exit(spa->spa_iokstat->ks_data);
 	mutex_exit(&spa->spa_iokstat_lock);
 #endif
 }
 
 static void
 vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	vq->vq_class[zio->io_priority].vqc_active++;
 	avl_add(&vq->vq_active_tree, zio);
 
 #ifdef illumos
 	mutex_enter(&spa->spa_iokstat_lock);
 	spa->spa_queue_stats[zio->io_priority].spa_active++;
 	if (spa->spa_iokstat != NULL)
 		kstat_runq_enter(spa->spa_iokstat->ks_data);
 	mutex_exit(&spa->spa_iokstat_lock);
 #endif
 }
 
 static void
 vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	vq->vq_class[zio->io_priority].vqc_active--;
 	avl_remove(&vq->vq_active_tree, zio);
 
 #ifdef illumos
 	mutex_enter(&spa->spa_iokstat_lock);
 	ASSERT3U(spa->spa_queue_stats[zio->io_priority].spa_active, >, 0);
 	spa->spa_queue_stats[zio->io_priority].spa_active--;
 	if (spa->spa_iokstat != NULL) {
 		kstat_io_t *ksio = spa->spa_iokstat->ks_data;
 
 		kstat_runq_exit(spa->spa_iokstat->ks_data);
 		if (zio->io_type == ZIO_TYPE_READ) {
 			ksio->reads++;
 			ksio->nread += zio->io_size;
 		} else if (zio->io_type == ZIO_TYPE_WRITE) {
 			ksio->writes++;
 			ksio->nwritten += zio->io_size;
 		}
 	}
 	mutex_exit(&spa->spa_iokstat_lock);
 #endif
 }
 
 static void
 vdev_queue_agg_io_done(zio_t *aio)
 {
 	if (aio->io_type == ZIO_TYPE_READ) {
 		zio_t *pio;
 		while ((pio = zio_walk_parents(aio)) != NULL) {
 			bcopy((char *)aio->io_data + (pio->io_offset -
 			    aio->io_offset), pio->io_data, pio->io_size);
 		}
 	}
 
 	zio_buf_free(aio->io_data, aio->io_size);
 }
 
 static int
 vdev_queue_class_min_active(zio_priority_t p)
 {
 	switch (p) {
 	case ZIO_PRIORITY_SYNC_READ:
 		return (zfs_vdev_sync_read_min_active);
 	case ZIO_PRIORITY_SYNC_WRITE:
 		return (zfs_vdev_sync_write_min_active);
 	case ZIO_PRIORITY_ASYNC_READ:
 		return (zfs_vdev_async_read_min_active);
 	case ZIO_PRIORITY_ASYNC_WRITE:
 		return (zfs_vdev_async_write_min_active);
 	case ZIO_PRIORITY_SCRUB:
 		return (zfs_vdev_scrub_min_active);
 	case ZIO_PRIORITY_TRIM:
 		return (zfs_vdev_trim_min_active);
 	default:
 		panic("invalid priority %u", p);
 		return (0);
 	}
 }
 
 static __noinline int
 vdev_queue_max_async_writes(spa_t *spa)
 {
 	int writes;
 	uint64_t dirty = spa->spa_dsl_pool->dp_dirty_total;
 	uint64_t min_bytes = zfs_dirty_data_max *
 	    zfs_vdev_async_write_active_min_dirty_percent / 100;
 	uint64_t max_bytes = zfs_dirty_data_max *
 	    zfs_vdev_async_write_active_max_dirty_percent / 100;
 
 	/*
 	 * Sync tasks correspond to interactive user actions. To reduce the
 	 * execution time of those actions we push data out as fast as possible.
 	 */
 	if (spa_has_pending_synctask(spa)) {
 		return (zfs_vdev_async_write_max_active);
 	}
 
 	if (dirty < min_bytes)
 		return (zfs_vdev_async_write_min_active);
 	if (dirty > max_bytes)
 		return (zfs_vdev_async_write_max_active);
 
 	/*
 	 * linear interpolation:
 	 * slope = (max_writes - min_writes) / (max_bytes - min_bytes)
 	 * move right by min_bytes
 	 * move up by min_writes
 	 */
 	writes = (dirty - min_bytes) *
 	    (zfs_vdev_async_write_max_active -
 	    zfs_vdev_async_write_min_active) /
 	    (max_bytes - min_bytes) +
 	    zfs_vdev_async_write_min_active;
 	ASSERT3U(writes, >=, zfs_vdev_async_write_min_active);
 	ASSERT3U(writes, <=, zfs_vdev_async_write_max_active);
 	return (writes);
 }
 
 static int
 vdev_queue_class_max_active(spa_t *spa, zio_priority_t p)
 {
 	switch (p) {
 	case ZIO_PRIORITY_SYNC_READ:
 		return (zfs_vdev_sync_read_max_active);
 	case ZIO_PRIORITY_SYNC_WRITE:
 		return (zfs_vdev_sync_write_max_active);
 	case ZIO_PRIORITY_ASYNC_READ:
 		return (zfs_vdev_async_read_max_active);
 	case ZIO_PRIORITY_ASYNC_WRITE:
 		return (vdev_queue_max_async_writes(spa));
 	case ZIO_PRIORITY_SCRUB:
 		return (zfs_vdev_scrub_max_active);
 	case ZIO_PRIORITY_TRIM:
 		return (zfs_vdev_trim_max_active);
 	default:
 		panic("invalid priority %u", p);
 		return (0);
 	}
 }
 
 /*
  * Return the i/o class to issue from, or ZIO_PRIORITY_MAX_QUEUEABLE if
  * there is no eligible class.
  */
 static zio_priority_t
 vdev_queue_class_to_issue(vdev_queue_t *vq)
 {
 	spa_t *spa = vq->vq_vdev->vdev_spa;
 	zio_priority_t p;
 
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 
 	if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active)
 		return (ZIO_PRIORITY_NUM_QUEUEABLE);
 
 	/* find a queue that has not reached its minimum # outstanding i/os */
 	for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
 		if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
 		    vq->vq_class[p].vqc_active <
 		    vdev_queue_class_min_active(p))
 			return (p);
 	}
 
 	/*
 	 * If we haven't found a queue, look for one that hasn't reached its
 	 * maximum # outstanding i/os.
 	 */
 	for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
 		if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
 		    vq->vq_class[p].vqc_active <
 		    vdev_queue_class_max_active(spa, p))
 			return (p);
 	}
 
 	/* No eligible queued i/os */
 	return (ZIO_PRIORITY_NUM_QUEUEABLE);
 }
 
 /*
  * Compute the range spanned by two i/os, which is the endpoint of the last
  * (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset).
  * Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio);
  * thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0.
  */
 #define	IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset)
 #define	IO_GAP(fio, lio) (-IO_SPAN(lio, fio))
 
 static zio_t *
 vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
 {
 	zio_t *first, *last, *aio, *dio, *mandatory, *nio;
 	uint64_t maxgap = 0;
 	uint64_t size;
 	boolean_t stretch;
 	avl_tree_t *t;
 	enum zio_flag flags;
 
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 
 	if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
 		return (NULL);
 
 	first = last = zio;
 
 	if (zio->io_type == ZIO_TYPE_READ)
 		maxgap = zfs_vdev_read_gap_limit;
 
 	/*
 	 * We can aggregate I/Os that are sufficiently adjacent and of
 	 * the same flavor, as expressed by the AGG_INHERIT flags.
 	 * The latter requirement is necessary so that certain
 	 * attributes of the I/O, such as whether it's a normal I/O
 	 * or a scrub/resilver, can be preserved in the aggregate.
 	 * We can include optional I/Os, but don't allow them
 	 * to begin a range as they add no benefit in that situation.
 	 */
 
 	/*
 	 * We keep track of the last non-optional I/O.
 	 */
 	mandatory = (first->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : first;
 
 	/*
 	 * Walk backwards through sufficiently contiguous I/Os
 	 * recording the last non-option I/O.
 	 */
 	flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
 	t = vdev_queue_type_tree(vq, zio->io_type);
 	while (t != NULL && (dio = AVL_PREV(t, first)) != NULL &&
 	    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
 	    IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit &&
 	    IO_GAP(dio, first) <= maxgap) {
 		first = dio;
 		if (mandatory == NULL && !(first->io_flags & ZIO_FLAG_OPTIONAL))
 			mandatory = first;
 	}
 
 	/*
 	 * Skip any initial optional I/Os.
 	 */
 	while ((first->io_flags & ZIO_FLAG_OPTIONAL) && first != last) {
 		first = AVL_NEXT(t, first);
 		ASSERT(first != NULL);
 	}
 
 	/*
 	 * Walk forward through sufficiently contiguous I/Os.
 	 */
 	while ((dio = AVL_NEXT(t, last)) != NULL &&
 	    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
 	    IO_SPAN(first, dio) <= zfs_vdev_aggregation_limit &&
 	    IO_GAP(last, dio) <= maxgap) {
 		last = dio;
 		if (!(last->io_flags & ZIO_FLAG_OPTIONAL))
 			mandatory = last;
 	}
 
 	/*
 	 * Now that we've established the range of the I/O aggregation
 	 * we must decide what to do with trailing optional I/Os.
 	 * For reads, there's nothing to do. While we are unable to
 	 * aggregate further, it's possible that a trailing optional
 	 * I/O would allow the underlying device to aggregate with
 	 * subsequent I/Os. We must therefore determine if the next
 	 * non-optional I/O is close enough to make aggregation
 	 * worthwhile.
 	 */
 	stretch = B_FALSE;
 	if (zio->io_type == ZIO_TYPE_WRITE && mandatory != NULL) {
 		zio_t *nio = last;
 		while ((dio = AVL_NEXT(t, nio)) != NULL &&
 		    IO_GAP(nio, dio) == 0 &&
 		    IO_GAP(mandatory, dio) <= zfs_vdev_write_gap_limit) {
 			nio = dio;
 			if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) {
 				stretch = B_TRUE;
 				break;
 			}
 		}
 	}
 
 	if (stretch) {
 		/* This may be a no-op. */
 		dio = AVL_NEXT(t, last);
 		dio->io_flags &= ~ZIO_FLAG_OPTIONAL;
 	} else {
 		while (last != mandatory && last != first) {
 			ASSERT(last->io_flags & ZIO_FLAG_OPTIONAL);
 			last = AVL_PREV(t, last);
 			ASSERT(last != NULL);
 		}
 	}
 
 	if (first == last)
 		return (NULL);
 
 	size = IO_SPAN(first, last);
 	ASSERT3U(size, <=, zfs_vdev_aggregation_limit);
 
 	aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
 	    zio_buf_alloc(size), size, first->io_type, zio->io_priority,
 	    flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
 	    vdev_queue_agg_io_done, NULL);
 	aio->io_timestamp = first->io_timestamp;
 
 	nio = first;
 	do {
 		dio = nio;
 		nio = AVL_NEXT(t, dio);
 		ASSERT3U(dio->io_type, ==, aio->io_type);
 
 		if (dio->io_flags & ZIO_FLAG_NODATA) {
 			ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE);
 			bzero((char *)aio->io_data + (dio->io_offset -
 			    aio->io_offset), dio->io_size);
 		} else if (dio->io_type == ZIO_TYPE_WRITE) {
 			bcopy(dio->io_data, (char *)aio->io_data +
 			    (dio->io_offset - aio->io_offset),
 			    dio->io_size);
 		}
 
 		zio_add_child(dio, aio);
 		vdev_queue_io_remove(vq, dio);
 		zio_vdev_io_bypass(dio);
 		zio_execute(dio);
 	} while (dio != last);
 
 	return (aio);
 }
 
 static zio_t *
 vdev_queue_io_to_issue(vdev_queue_t *vq)
 {
 	zio_t *zio, *aio;
 	zio_priority_t p;
 	avl_index_t idx;
 	avl_tree_t *tree;
 	zio_t search;
 
 again:
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 
 	p = vdev_queue_class_to_issue(vq);
 
 	if (p == ZIO_PRIORITY_NUM_QUEUEABLE) {
 		/* No eligible queued i/os */
 		return (NULL);
 	}
 
 	/*
 	 * For LBA-ordered queues (async / scrub), issue the i/o which follows
 	 * the most recently issued i/o in LBA (offset) order.
 	 *
 	 * For FIFO queues (sync), issue the i/o with the lowest timestamp.
 	 */
 	tree = vdev_queue_class_tree(vq, p);
 	search.io_timestamp = 0;
 	search.io_offset = vq->vq_last_offset + 1;
 	VERIFY3P(avl_find(tree, &search, &idx), ==, NULL);
 	zio = avl_nearest(tree, idx, AVL_AFTER);
 	if (zio == NULL)
 		zio = avl_first(tree);
 	ASSERT3U(zio->io_priority, ==, p);
 
 	aio = vdev_queue_aggregate(vq, zio);
 	if (aio != NULL)
 		zio = aio;
 	else
 		vdev_queue_io_remove(vq, zio);
 
 	/*
 	 * If the I/O is or was optional and therefore has no data, we need to
 	 * simply discard it. We need to drop the vdev queue's lock to avoid a
 	 * deadlock that we could encounter since this I/O will complete
 	 * immediately.
 	 */
 	if (zio->io_flags & ZIO_FLAG_NODATA) {
 		mutex_exit(&vq->vq_lock);
 		zio_vdev_io_bypass(zio);
 		zio_execute(zio);
 		mutex_enter(&vq->vq_lock);
 		goto again;
 	}
 
 	vdev_queue_pending_add(vq, zio);
 	vq->vq_last_offset = zio->io_offset;
 
 	return (zio);
 }
 
 zio_t *
 vdev_queue_io(zio_t *zio)
 {
 	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
 	zio_t *nio;
 
 	if (zio->io_flags & ZIO_FLAG_DONT_QUEUE)
 		return (zio);
 
 	/*
 	 * Children i/os inherent their parent's priority, which might
 	 * not match the child's i/o type.  Fix it up here.
 	 */
 	if (zio->io_type == ZIO_TYPE_READ) {
 		if (zio->io_priority != ZIO_PRIORITY_SYNC_READ &&
 		    zio->io_priority != ZIO_PRIORITY_ASYNC_READ &&
 		    zio->io_priority != ZIO_PRIORITY_SCRUB)
 			zio->io_priority = ZIO_PRIORITY_ASYNC_READ;
 	} else if (zio->io_type == ZIO_TYPE_WRITE) {
 		if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE &&
 		    zio->io_priority != ZIO_PRIORITY_ASYNC_WRITE)
 			zio->io_priority = ZIO_PRIORITY_ASYNC_WRITE;
 	} else {
 		ASSERT(zio->io_type == ZIO_TYPE_FREE);
 		zio->io_priority = ZIO_PRIORITY_TRIM;
 	}
 
 	zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
 
 	mutex_enter(&vq->vq_lock);
 	zio->io_timestamp = gethrtime();
 	vdev_queue_io_add(vq, zio);
 	nio = vdev_queue_io_to_issue(vq);
 	mutex_exit(&vq->vq_lock);
 
 	if (nio == NULL)
 		return (NULL);
 
 	if (nio->io_done == vdev_queue_agg_io_done) {
 		zio_nowait(nio);
 		return (NULL);
 	}
 
 	return (nio);
 }
 
 void
 vdev_queue_io_done(zio_t *zio)
 {
 	vdev_queue_t *vq = &zio->io_vd->vdev_queue;
 	zio_t *nio;
 
 	mutex_enter(&vq->vq_lock);
 
 	vdev_queue_pending_remove(vq, zio);
 
 	vq->vq_io_complete_ts = gethrtime();
 
 	while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
 		mutex_exit(&vq->vq_lock);
 		if (nio->io_done == vdev_queue_agg_io_done) {
 			zio_nowait(nio);
 		} else {
 			zio_vdev_io_reissue(nio);
 			zio_execute(nio);
 		}
 		mutex_enter(&vq->vq_lock);
 	}
 
 	mutex_exit(&vq->vq_lock);
 }
 
 /*
  * As these three methods are only used for load calculations we're not concerned
  * if we get an incorrect value on 32bit platforms due to lack of vq_lock mutex
  * use here, instead we prefer to keep it lock free for performance.
  */ 
 int
 vdev_queue_length(vdev_t *vd)
 {
 	return (avl_numnodes(&vd->vdev_queue.vq_active_tree));
 }
 
 uint64_t
 vdev_queue_lastoffset(vdev_t *vd)
 {
 	return (vd->vdev_queue.vq_lastoffset);
 }
 
 void
 vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio)
 {
 	vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size;
 }
Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c	(revision 303642)
@@ -1,1935 +1,1922 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * Portions Copyright 2010 The FreeBSD Foundation
  *
  * $FreeBSD$
  */
 
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #ifdef illumos
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 #endif
 
 #include <sys/fasttrap_isa.h>
 #include <sys/fasttrap_impl.h>
 #include <sys/dtrace.h>
 #include <sys/dtrace_impl.h>
 #include <sys/cmn_err.h>
 #ifdef illumos
 #include <sys/regset.h>
 #include <sys/privregs.h>
 #include <sys/segments.h>
 #include <sys/x86_archext.h>
 #else
-#include <cddl/dev/dtrace/dtrace_cddl.h>
 #include <sys/types.h>
+#include <sys/dtrace_bsd.h>
 #include <sys/proc.h>
 #include <sys/rmlock.h>
-#include <sys/dtrace_bsd.h>
+#include <cddl/dev/dtrace/dtrace_cddl.h>
 #include <cddl/dev/dtrace/x86/regset.h>
 #include <machine/segments.h>
 #include <machine/reg.h>
 #include <machine/pcb.h>
 #endif
 #include <sys/sysmacros.h>
 #ifdef illumos
 #include <sys/trap.h>
 #include <sys/archsystm.h>
 #else
 #include <sys/ptrace.h>
 
 static int
 uread(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr)
 {
 	ssize_t n;
 
 	PHOLD(p);
 	n = proc_readmem(curthread, p, uaddr, kaddr, len);
 	PRELE(p);
 	if (n != len)
 		return (ENOMEM);
 	return (0);
 }
 
 static int
 uwrite(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr)
 {
 	ssize_t n;
 
 	PHOLD(p);
 	n = proc_writemem(curthread, p, uaddr, kaddr, len);
 	PRELE(p);
 	if (n != len)
 		return (ENOMEM);
 	return (0);
 }
 
 #endif /* illumos */
 #ifdef __i386__
 #define	r_rax	r_eax
 #define	r_rbx	r_ebx
 #define	r_rip	r_eip
 #define	r_rflags r_eflags
 #define	r_rsp	r_esp
 #define	r_rbp	r_ebp
 #endif
 
 /*
  * Lossless User-Land Tracing on x86
  * ---------------------------------
  *
  * The execution of most instructions is not dependent on the address; for
  * these instructions it is sufficient to copy them into the user process's
  * address space and execute them. To effectively single-step an instruction
  * in user-land, we copy out the following sequence of instructions to scratch
  * space in the user thread's ulwp_t structure.
  *
  * We then set the program counter (%eip or %rip) to point to this scratch
  * space. Once execution resumes, the original instruction is executed and
  * then control flow is redirected to what was originally the subsequent
  * instruction. If the kernel attemps to deliver a signal while single-
  * stepping, the signal is deferred and the program counter is moved into the
  * second sequence of instructions. The second sequence ends in a trap into
  * the kernel where the deferred signal is then properly handled and delivered.
  *
  * For instructions whose execute is position dependent, we perform simple
  * emulation. These instructions are limited to control transfer
  * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
  * of %rip-relative addressing that means that almost any instruction can be
  * position dependent. For all the details on how we emulate generic
  * instructions included %rip-relative instructions, see the code in
  * fasttrap_pid_probe() below where we handle instructions of type
  * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
  */
 
 #define	FASTTRAP_MODRM_MOD(modrm)	(((modrm) >> 6) & 0x3)
 #define	FASTTRAP_MODRM_REG(modrm)	(((modrm) >> 3) & 0x7)
 #define	FASTTRAP_MODRM_RM(modrm)	((modrm) & 0x7)
 #define	FASTTRAP_MODRM(mod, reg, rm)	(((mod) << 6) | ((reg) << 3) | (rm))
 
 #define	FASTTRAP_SIB_SCALE(sib)		(((sib) >> 6) & 0x3)
 #define	FASTTRAP_SIB_INDEX(sib)		(((sib) >> 3) & 0x7)
 #define	FASTTRAP_SIB_BASE(sib)		((sib) & 0x7)
 
 #define	FASTTRAP_REX_W(rex)		(((rex) >> 3) & 1)
 #define	FASTTRAP_REX_R(rex)		(((rex) >> 2) & 1)
 #define	FASTTRAP_REX_X(rex)		(((rex) >> 1) & 1)
 #define	FASTTRAP_REX_B(rex)		((rex) & 1)
 #define	FASTTRAP_REX(w, r, x, b)	\
 	(0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
 
 /*
  * Single-byte op-codes.
  */
 #define	FASTTRAP_PUSHL_EBP	0x55
 
 #define	FASTTRAP_JO		0x70
 #define	FASTTRAP_JNO		0x71
 #define	FASTTRAP_JB		0x72
 #define	FASTTRAP_JAE		0x73
 #define	FASTTRAP_JE		0x74
 #define	FASTTRAP_JNE		0x75
 #define	FASTTRAP_JBE		0x76
 #define	FASTTRAP_JA		0x77
 #define	FASTTRAP_JS		0x78
 #define	FASTTRAP_JNS		0x79
 #define	FASTTRAP_JP		0x7a
 #define	FASTTRAP_JNP		0x7b
 #define	FASTTRAP_JL		0x7c
 #define	FASTTRAP_JGE		0x7d
 #define	FASTTRAP_JLE		0x7e
 #define	FASTTRAP_JG		0x7f
 
 #define	FASTTRAP_NOP		0x90
 
 #define	FASTTRAP_MOV_EAX	0xb8
 #define	FASTTRAP_MOV_ECX	0xb9
 
 #define	FASTTRAP_RET16		0xc2
 #define	FASTTRAP_RET		0xc3
 
 #define	FASTTRAP_LOOPNZ		0xe0
 #define	FASTTRAP_LOOPZ		0xe1
 #define	FASTTRAP_LOOP		0xe2
 #define	FASTTRAP_JCXZ		0xe3
 
 #define	FASTTRAP_CALL		0xe8
 #define	FASTTRAP_JMP32		0xe9
 #define	FASTTRAP_JMP8		0xeb
 
 #define	FASTTRAP_INT3		0xcc
 #define	FASTTRAP_INT		0xcd
 
 #define	FASTTRAP_2_BYTE_OP	0x0f
 #define	FASTTRAP_GROUP5_OP	0xff
 
 /*
  * Two-byte op-codes (second byte only).
  */
 #define	FASTTRAP_0F_JO		0x80
 #define	FASTTRAP_0F_JNO		0x81
 #define	FASTTRAP_0F_JB		0x82
 #define	FASTTRAP_0F_JAE		0x83
 #define	FASTTRAP_0F_JE		0x84
 #define	FASTTRAP_0F_JNE		0x85
 #define	FASTTRAP_0F_JBE		0x86
 #define	FASTTRAP_0F_JA		0x87
 #define	FASTTRAP_0F_JS		0x88
 #define	FASTTRAP_0F_JNS		0x89
 #define	FASTTRAP_0F_JP		0x8a
 #define	FASTTRAP_0F_JNP		0x8b
 #define	FASTTRAP_0F_JL		0x8c
 #define	FASTTRAP_0F_JGE		0x8d
 #define	FASTTRAP_0F_JLE		0x8e
 #define	FASTTRAP_0F_JG		0x8f
 
 #define	FASTTRAP_EFLAGS_OF	0x800
 #define	FASTTRAP_EFLAGS_DF	0x400
 #define	FASTTRAP_EFLAGS_SF	0x080
 #define	FASTTRAP_EFLAGS_ZF	0x040
 #define	FASTTRAP_EFLAGS_AF	0x010
 #define	FASTTRAP_EFLAGS_PF	0x004
 #define	FASTTRAP_EFLAGS_CF	0x001
 
 /*
  * Instruction prefixes.
  */
 #define	FASTTRAP_PREFIX_OPERAND	0x66
 #define	FASTTRAP_PREFIX_ADDRESS	0x67
 #define	FASTTRAP_PREFIX_CS	0x2E
 #define	FASTTRAP_PREFIX_DS	0x3E
 #define	FASTTRAP_PREFIX_ES	0x26
 #define	FASTTRAP_PREFIX_FS	0x64
 #define	FASTTRAP_PREFIX_GS	0x65
 #define	FASTTRAP_PREFIX_SS	0x36
 #define	FASTTRAP_PREFIX_LOCK	0xF0
 #define	FASTTRAP_PREFIX_REP	0xF3
 #define	FASTTRAP_PREFIX_REPNE	0xF2
 
 #define	FASTTRAP_NOREG	0xff
 
 /*
  * Map between instruction register encodings and the kernel constants which
  * correspond to indicies into struct regs.
  */
 #ifdef __amd64
 static const uint8_t regmap[16] = {
 	REG_RAX, REG_RCX, REG_RDX, REG_RBX, REG_RSP, REG_RBP, REG_RSI, REG_RDI,
 	REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15,
 };
 #else
 static const uint8_t regmap[8] = {
 	EAX, ECX, EDX, EBX, UESP, EBP, ESI, EDI
 };
 #endif
 
 static ulong_t fasttrap_getreg(struct reg *, uint_t);
 
 static uint64_t
 fasttrap_anarg(struct reg *rp, int function_entry, int argno)
 {
 	uint64_t value = 0;
 	int shift = function_entry ? 1 : 0;
 
 #ifdef __amd64
 	if (curproc->p_model == DATAMODEL_LP64) {
 		uintptr_t *stack;
 
 		/*
 		 * In 64-bit mode, the first six arguments are stored in
 		 * registers.
 		 */
 		if (argno < 6)
 			switch (argno) {
 			case 0:
 				return (rp->r_rdi);
 			case 1:
 				return (rp->r_rsi);
 			case 2:
 				return (rp->r_rdx);
 			case 3:
 				return (rp->r_rcx);
 			case 4:
 				return (rp->r_r8);
 			case 5:
 				return (rp->r_r9);
 			}
 
 		stack = (uintptr_t *)rp->r_rsp;
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		value = dtrace_fulword(&stack[argno - 6 + shift]);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
 	} else {
 #endif
-#ifdef __i386
-		uint32_t *stack = (uint32_t *)rp->r_esp;
+		uint32_t *stack = (uint32_t *)rp->r_rsp;
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
 		value = dtrace_fuword32(&stack[argno + shift]);
 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR);
-#endif
 #ifdef __amd64
 	}
 #endif
 
 	return (value);
 }
 
 /*ARGSUSED*/
 int
 fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
     fasttrap_probe_type_t type)
 {
 	uint8_t instr[FASTTRAP_MAX_INSTR_SIZE + 10];
 	size_t len = FASTTRAP_MAX_INSTR_SIZE;
 	size_t first = MIN(len, PAGESIZE - (pc & PAGEOFFSET));
 	uint_t start = 0;
 	int rmindex, size;
 	uint8_t seg, rex = 0;
 
 	/*
 	 * Read the instruction at the given address out of the process's
 	 * address space. We don't have to worry about a debugger
 	 * changing this instruction before we overwrite it with our trap
 	 * instruction since P_PR_LOCK is set. Since instructions can span
 	 * pages, we potentially read the instruction in two parts. If the
 	 * second part fails, we just zero out that part of the instruction.
 	 */
 	if (uread(p, &instr[0], first, pc) != 0)
 		return (-1);
 	if (len > first &&
 	    uread(p, &instr[first], len - first, pc + first) != 0) {
 		bzero(&instr[first], len - first);
 		len = first;
 	}
 
 	/*
 	 * If the disassembly fails, then we have a malformed instruction.
 	 */
 	if ((size = dtrace_instr_size_isa(instr, p->p_model, &rmindex)) <= 0)
 		return (-1);
 
 	/*
 	 * Make sure the disassembler isn't completely broken.
 	 */
 	ASSERT(-1 <= rmindex && rmindex < size);
 
 	/*
 	 * If the computed size is greater than the number of bytes read,
 	 * then it was a malformed instruction possibly because it fell on a
 	 * page boundary and the subsequent page was missing or because of
 	 * some malicious user.
 	 */
 	if (size > len)
 		return (-1);
 
 	tp->ftt_size = (uint8_t)size;
 	tp->ftt_segment = FASTTRAP_SEG_NONE;
 
 	/*
 	 * Find the start of the instruction's opcode by processing any
 	 * legacy prefixes.
 	 */
 	for (;;) {
 		seg = 0;
 		switch (instr[start]) {
 		case FASTTRAP_PREFIX_SS:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_GS:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_FS:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_ES:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_DS:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_CS:
 			seg++;
 			/*FALLTHRU*/
 		case FASTTRAP_PREFIX_OPERAND:
 		case FASTTRAP_PREFIX_ADDRESS:
 		case FASTTRAP_PREFIX_LOCK:
 		case FASTTRAP_PREFIX_REP:
 		case FASTTRAP_PREFIX_REPNE:
 			if (seg != 0) {
 				/*
 				 * It's illegal for an instruction to specify
 				 * two segment prefixes -- give up on this
 				 * illegal instruction.
 				 */
 				if (tp->ftt_segment != FASTTRAP_SEG_NONE)
 					return (-1);
 
 				tp->ftt_segment = seg;
 			}
 			start++;
 			continue;
 		}
 		break;
 	}
 
 #ifdef __amd64
 	/*
 	 * Identify the REX prefix on 64-bit processes.
 	 */
 	if (p->p_model == DATAMODEL_LP64 && (instr[start] & 0xf0) == 0x40)
 		rex = instr[start++];
 #endif
 
 	/*
 	 * Now that we're pretty sure that the instruction is okay, copy the
 	 * valid part to the tracepoint.
 	 */
 	bcopy(instr, tp->ftt_instr, FASTTRAP_MAX_INSTR_SIZE);
 
 	tp->ftt_type = FASTTRAP_T_COMMON;
 	if (instr[start] == FASTTRAP_2_BYTE_OP) {
 		switch (instr[start + 1]) {
 		case FASTTRAP_0F_JO:
 		case FASTTRAP_0F_JNO:
 		case FASTTRAP_0F_JB:
 		case FASTTRAP_0F_JAE:
 		case FASTTRAP_0F_JE:
 		case FASTTRAP_0F_JNE:
 		case FASTTRAP_0F_JBE:
 		case FASTTRAP_0F_JA:
 		case FASTTRAP_0F_JS:
 		case FASTTRAP_0F_JNS:
 		case FASTTRAP_0F_JP:
 		case FASTTRAP_0F_JNP:
 		case FASTTRAP_0F_JL:
 		case FASTTRAP_0F_JGE:
 		case FASTTRAP_0F_JLE:
 		case FASTTRAP_0F_JG:
 			tp->ftt_type = FASTTRAP_T_JCC;
 			tp->ftt_code = (instr[start + 1] & 0x0f) | FASTTRAP_JO;
 			tp->ftt_dest = pc + tp->ftt_size +
 			    /* LINTED - alignment */
 			    *(int32_t *)&instr[start + 2];
 			break;
 		}
 	} else if (instr[start] == FASTTRAP_GROUP5_OP) {
 		uint_t mod = FASTTRAP_MODRM_MOD(instr[start + 1]);
 		uint_t reg = FASTTRAP_MODRM_REG(instr[start + 1]);
 		uint_t rm = FASTTRAP_MODRM_RM(instr[start + 1]);
 
 		if (reg == 2 || reg == 4) {
 			uint_t i, sz;
 
 			if (reg == 2)
 				tp->ftt_type = FASTTRAP_T_CALL;
 			else
 				tp->ftt_type = FASTTRAP_T_JMP;
 
 			if (mod == 3)
 				tp->ftt_code = 2;
 			else
 				tp->ftt_code = 1;
 
 			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
 
 			/*
 			 * See AMD x86-64 Architecture Programmer's Manual
 			 * Volume 3, Section 1.2.7, Table 1-12, and
 			 * Appendix A.3.1, Table A-15.
 			 */
 			if (mod != 3 && rm == 4) {
 				uint8_t sib = instr[start + 2];
 				uint_t index = FASTTRAP_SIB_INDEX(sib);
 				uint_t base = FASTTRAP_SIB_BASE(sib);
 
 				tp->ftt_scale = FASTTRAP_SIB_SCALE(sib);
 
 				tp->ftt_index = (index == 4) ?
 				    FASTTRAP_NOREG :
 				    regmap[index | (FASTTRAP_REX_X(rex) << 3)];
 				tp->ftt_base = (mod == 0 && base == 5) ?
 				    FASTTRAP_NOREG :
 				    regmap[base | (FASTTRAP_REX_B(rex) << 3)];
 
 				i = 3;
 				sz = mod == 1 ? 1 : 4;
 			} else {
 				/*
 				 * In 64-bit mode, mod == 0 and r/m == 5
 				 * denotes %rip-relative addressing; in 32-bit
 				 * mode, the base register isn't used. In both
 				 * modes, there is a 32-bit operand.
 				 */
 				if (mod == 0 && rm == 5) {
 #ifdef __amd64
 					if (p->p_model == DATAMODEL_LP64)
 						tp->ftt_base = REG_RIP;
 					else
 #endif
 						tp->ftt_base = FASTTRAP_NOREG;
 					sz = 4;
 				} else  {
 					uint8_t base = rm |
 					    (FASTTRAP_REX_B(rex) << 3);
 
 					tp->ftt_base = regmap[base];
 					sz = mod == 1 ? 1 : mod == 2 ? 4 : 0;
 				}
 				tp->ftt_index = FASTTRAP_NOREG;
 				i = 2;
 			}
 
 			if (sz == 1) {
 				tp->ftt_dest = *(int8_t *)&instr[start + i];
 			} else if (sz == 4) {
 				/* LINTED - alignment */
 				tp->ftt_dest = *(int32_t *)&instr[start + i];
 			} else {
 				tp->ftt_dest = 0;
 			}
 		}
 	} else {
 		switch (instr[start]) {
 		case FASTTRAP_RET:
 			tp->ftt_type = FASTTRAP_T_RET;
 			break;
 
 		case FASTTRAP_RET16:
 			tp->ftt_type = FASTTRAP_T_RET16;
 			/* LINTED - alignment */
 			tp->ftt_dest = *(uint16_t *)&instr[start + 1];
 			break;
 
 		case FASTTRAP_JO:
 		case FASTTRAP_JNO:
 		case FASTTRAP_JB:
 		case FASTTRAP_JAE:
 		case FASTTRAP_JE:
 		case FASTTRAP_JNE:
 		case FASTTRAP_JBE:
 		case FASTTRAP_JA:
 		case FASTTRAP_JS:
 		case FASTTRAP_JNS:
 		case FASTTRAP_JP:
 		case FASTTRAP_JNP:
 		case FASTTRAP_JL:
 		case FASTTRAP_JGE:
 		case FASTTRAP_JLE:
 		case FASTTRAP_JG:
 			tp->ftt_type = FASTTRAP_T_JCC;
 			tp->ftt_code = instr[start];
 			tp->ftt_dest = pc + tp->ftt_size +
 			    (int8_t)instr[start + 1];
 			break;
 
 		case FASTTRAP_LOOPNZ:
 		case FASTTRAP_LOOPZ:
 		case FASTTRAP_LOOP:
 			tp->ftt_type = FASTTRAP_T_LOOP;
 			tp->ftt_code = instr[start];
 			tp->ftt_dest = pc + tp->ftt_size +
 			    (int8_t)instr[start + 1];
 			break;
 
 		case FASTTRAP_JCXZ:
 			tp->ftt_type = FASTTRAP_T_JCXZ;
 			tp->ftt_dest = pc + tp->ftt_size +
 			    (int8_t)instr[start + 1];
 			break;
 
 		case FASTTRAP_CALL:
 			tp->ftt_type = FASTTRAP_T_CALL;
 			tp->ftt_dest = pc + tp->ftt_size +
 			    /* LINTED - alignment */
 			    *(int32_t *)&instr[start + 1];
 			tp->ftt_code = 0;
 			break;
 
 		case FASTTRAP_JMP32:
 			tp->ftt_type = FASTTRAP_T_JMP;
 			tp->ftt_dest = pc + tp->ftt_size +
 			    /* LINTED - alignment */
 			    *(int32_t *)&instr[start + 1];
 			break;
 		case FASTTRAP_JMP8:
 			tp->ftt_type = FASTTRAP_T_JMP;
 			tp->ftt_dest = pc + tp->ftt_size +
 			    (int8_t)instr[start + 1];
 			break;
 
 		case FASTTRAP_PUSHL_EBP:
 			if (start == 0)
 				tp->ftt_type = FASTTRAP_T_PUSHL_EBP;
 			break;
 
 		case FASTTRAP_NOP:
 #ifdef __amd64
 			ASSERT(p->p_model == DATAMODEL_LP64 || rex == 0);
 
 			/*
 			 * On amd64 we have to be careful not to confuse a nop
 			 * (actually xchgl %eax, %eax) with an instruction using
 			 * the same opcode, but that does something different
 			 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
 			 */
 			if (FASTTRAP_REX_B(rex) == 0)
 #endif
 				tp->ftt_type = FASTTRAP_T_NOP;
 			break;
 
 		case FASTTRAP_INT3:
 			/*
 			 * The pid provider shares the int3 trap with debugger
 			 * breakpoints so we can't instrument them.
 			 */
 			ASSERT(instr[start] == FASTTRAP_INSTR);
 			return (-1);
 
 		case FASTTRAP_INT:
 			/*
 			 * Interrupts seem like they could be traced with
 			 * no negative implications, but it's possible that
 			 * a thread could be redirected by the trap handling
 			 * code which would eventually return to the
 			 * instruction after the interrupt. If the interrupt
 			 * were in our scratch space, the subsequent
 			 * instruction might be overwritten before we return.
 			 * Accordingly we refuse to instrument any interrupt.
 			 */
 			return (-1);
 		}
 	}
 
 #ifdef __amd64
 	if (p->p_model == DATAMODEL_LP64 && tp->ftt_type == FASTTRAP_T_COMMON) {
 		/*
 		 * If the process is 64-bit and the instruction type is still
 		 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
 		 * execute it -- we need to watch for %rip-relative
 		 * addressing mode. See the portion of fasttrap_pid_probe()
 		 * below where we handle tracepoints with type
 		 * FASTTRAP_T_COMMON for how we emulate instructions that
 		 * employ %rip-relative addressing.
 		 */
 		if (rmindex != -1) {
 			uint_t mod = FASTTRAP_MODRM_MOD(instr[rmindex]);
 			uint_t reg = FASTTRAP_MODRM_REG(instr[rmindex]);
 			uint_t rm = FASTTRAP_MODRM_RM(instr[rmindex]);
 
 			ASSERT(rmindex > start);
 
 			if (mod == 0 && rm == 5) {
 				/*
 				 * We need to be sure to avoid other
 				 * registers used by this instruction. While
 				 * the reg field may determine the op code
 				 * rather than denoting a register, assuming
 				 * that it denotes a register is always safe.
 				 * We leave the REX field intact and use
 				 * whatever value's there for simplicity.
 				 */
 				if (reg != 0) {
 					tp->ftt_ripmode = FASTTRAP_RIP_1 |
 					    (FASTTRAP_RIP_X *
 					    FASTTRAP_REX_B(rex));
 					rm = 0;
 				} else {
 					tp->ftt_ripmode = FASTTRAP_RIP_2 |
 					    (FASTTRAP_RIP_X *
 					    FASTTRAP_REX_B(rex));
 					rm = 1;
 				}
 
 				tp->ftt_modrm = tp->ftt_instr[rmindex];
 				tp->ftt_instr[rmindex] =
 				    FASTTRAP_MODRM(2, reg, rm);
 			}
 		}
 	}
 #endif
 
 	return (0);
 }
 
 int
 fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
 {
 	fasttrap_instr_t instr = FASTTRAP_INSTR;
 
 	if (uwrite(p, &instr, 1, tp->ftt_pc) != 0)
 		return (-1);
 
 	return (0);
 }
 
 int
 fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
 {
 	uint8_t instr;
 
 	/*
 	 * Distinguish between read or write failures and a changed
 	 * instruction.
 	 */
 	if (uread(p, &instr, 1, tp->ftt_pc) != 0)
 		return (0);
 	if (instr != FASTTRAP_INSTR)
 		return (0);
 	if (uwrite(p, &tp->ftt_instr[0], 1, tp->ftt_pc) != 0)
 		return (-1);
 
 	return (0);
 }
 
 #ifdef __amd64
 static uintptr_t
 fasttrap_fulword_noerr(const void *uaddr)
 {
 	uintptr_t ret;
 
 	if ((ret = fasttrap_fulword(uaddr)) != -1)
 		return (ret);
 
 	return (0);
 }
 #endif
 
-#ifdef __i386__
 static uint32_t
 fasttrap_fuword32_noerr(const void *uaddr)
 {
 	uint32_t ret;
 
 	if ((ret = fasttrap_fuword32(uaddr)) != -1)
 		return (ret);
 
 	return (0);
 }
-#endif
 
 static void
 fasttrap_return_common(struct reg *rp, uintptr_t pc, pid_t pid,
     uintptr_t new_pc)
 {
 	fasttrap_tracepoint_t *tp;
 	fasttrap_bucket_t *bucket;
 	fasttrap_id_t *id;
 #ifdef illumos
 	kmutex_t *pid_mtx;
 
 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
 	mutex_enter(pid_mtx);
 #else
 	struct rm_priotracker tracker;
 
 	rm_rlock(&fasttrap_tp_lock, &tracker);
 #endif
 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 
 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 		    tp->ftt_proc->ftpc_acount != 0)
 			break;
 	}
 
 	/*
 	 * Don't sweat it if we can't find the tracepoint again; unlike
 	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
 	 * is not essential to the correct execution of the process.
 	 */
 	if (tp == NULL) {
 #ifdef illumos
 		mutex_exit(pid_mtx);
 #else
 		rm_runlock(&fasttrap_tp_lock, &tracker);
 #endif
 		return;
 	}
 
 	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
 		/*
 		 * If there's a branch that could act as a return site, we
 		 * need to trace it, and check here if the program counter is
 		 * external to the function.
 		 */
 		if (tp->ftt_type != FASTTRAP_T_RET &&
 		    tp->ftt_type != FASTTRAP_T_RET16 &&
 		    new_pc - id->fti_probe->ftp_faddr <
 		    id->fti_probe->ftp_fsize)
 			continue;
 
 		dtrace_probe(id->fti_probe->ftp_id,
 		    pc - id->fti_probe->ftp_faddr,
 		    rp->r_rax, rp->r_rbx, 0, 0);
 	}
 
 #ifdef illumos
 	mutex_exit(pid_mtx);
 #else
 	rm_runlock(&fasttrap_tp_lock, &tracker);
 #endif
 }
 
 static void
 fasttrap_sigsegv(proc_t *p, kthread_t *t, uintptr_t addr)
 {
 #ifdef illumos
 	sigqueue_t *sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
 
 	sqp->sq_info.si_signo = SIGSEGV;
 	sqp->sq_info.si_code = SEGV_MAPERR;
 	sqp->sq_info.si_addr = (caddr_t)addr;
 
 	mutex_enter(&p->p_lock);
 	sigaddqa(p, t, sqp);
 	mutex_exit(&p->p_lock);
 
 	if (t != NULL)
 		aston(t);
 #else
 	ksiginfo_t *ksi = kmem_zalloc(sizeof (ksiginfo_t), KM_SLEEP);
 
 	ksiginfo_init(ksi);
 	ksi->ksi_signo = SIGSEGV;
 	ksi->ksi_code = SEGV_MAPERR;
 	ksi->ksi_addr = (caddr_t)addr;
 	(void) tdksignal(t, SIGSEGV, ksi);
 #endif
 }
 
 #ifdef __amd64
 static void
 fasttrap_usdt_args64(fasttrap_probe_t *probe, struct reg *rp, int argc,
     uintptr_t *argv)
 {
 	int i, x, cap = MIN(argc, probe->ftp_nargs);
 	uintptr_t *stack = (uintptr_t *)rp->r_rsp;
 
 	for (i = 0; i < cap; i++) {
 		x = probe->ftp_argmap[i];
 
 		if (x < 6)
 			argv[i] = (&rp->r_rdi)[x];
 		else
 			argv[i] = fasttrap_fulword_noerr(&stack[x]);
 	}
 
 	for (; i < argc; i++) {
 		argv[i] = 0;
 	}
 }
 #endif
 
-#ifdef __i386__
 static void
 fasttrap_usdt_args32(fasttrap_probe_t *probe, struct reg *rp, int argc,
     uint32_t *argv)
 {
 	int i, x, cap = MIN(argc, probe->ftp_nargs);
 	uint32_t *stack = (uint32_t *)rp->r_rsp;
 
 	for (i = 0; i < cap; i++) {
 		x = probe->ftp_argmap[i];
 
 		argv[i] = fasttrap_fuword32_noerr(&stack[x]);
 	}
 
 	for (; i < argc; i++) {
 		argv[i] = 0;
 	}
 }
-#endif
 
 static int
 fasttrap_do_seg(fasttrap_tracepoint_t *tp, struct reg *rp, uintptr_t *addr)
 {
 	proc_t *p = curproc;
 #ifdef __i386__
 	struct segment_descriptor *desc;
 #else
 	struct user_segment_descriptor *desc;
 #endif
 	uint16_t sel = 0, ndx, type;
 	uintptr_t limit;
 
 	switch (tp->ftt_segment) {
 	case FASTTRAP_SEG_CS:
 		sel = rp->r_cs;
 		break;
 	case FASTTRAP_SEG_DS:
 		sel = rp->r_ds;
 		break;
 	case FASTTRAP_SEG_ES:
 		sel = rp->r_es;
 		break;
 	case FASTTRAP_SEG_FS:
 		sel = rp->r_fs;
 		break;
 	case FASTTRAP_SEG_GS:
 		sel = rp->r_gs;
 		break;
 	case FASTTRAP_SEG_SS:
 		sel = rp->r_ss;
 		break;
 	}
 
 	/*
 	 * Make sure the given segment register specifies a user priority
 	 * selector rather than a kernel selector.
 	 */
 	if (ISPL(sel) != SEL_UPL)
 		return (-1);
 
 	ndx = IDXSEL(sel);
 
 	/*
 	 * Check the bounds and grab the descriptor out of the specified
 	 * descriptor table.
 	 */
 	if (ISLDT(sel)) {
 #ifdef __i386__
 		if (ndx > p->p_md.md_ldt->ldt_len)
 			return (-1);
 
 		desc = (struct segment_descriptor *)
 		    p->p_md.md_ldt[ndx].ldt_base;
 #else
 		if (ndx > max_ldt_segment)
 			return (-1);
 
 		desc = (struct user_segment_descriptor *)
 		    p->p_md.md_ldt[ndx].ldt_base;
 #endif
 
 	} else {
 		if (ndx >= NGDT)
 			return (-1);
 
 #ifdef __i386__
 		desc = &gdt[ndx].sd;
 #else
 		desc = &gdt[ndx];
 #endif
 	}
 
 	/*
 	 * The descriptor must have user privilege level and it must be
 	 * present in memory.
 	 */
 	if (desc->sd_dpl != SEL_UPL || desc->sd_p != 1)
 		return (-1);
 
 	type = desc->sd_type;
 
 	/*
 	 * If the S bit in the type field is not set, this descriptor can
 	 * only be used in system context.
 	 */
 	if ((type & 0x10) != 0x10)
 		return (-1);
 
 	limit = USD_GETLIMIT(desc) * (desc->sd_gran ? PAGESIZE : 1);
 
 	if (tp->ftt_segment == FASTTRAP_SEG_CS) {
 		/*
 		 * The code/data bit and readable bit must both be set.
 		 */
 		if ((type & 0xa) != 0xa)
 			return (-1);
 
 		if (*addr > limit)
 			return (-1);
 	} else {
 		/*
 		 * The code/data bit must be clear.
 		 */
 		if ((type & 0x8) != 0)
 			return (-1);
 
 		/*
 		 * If the expand-down bit is clear, we just check the limit as
 		 * it would naturally be applied. Otherwise, we need to check
 		 * that the address is the range [limit + 1 .. 0xffff] or
 		 * [limit + 1 ... 0xffffffff] depending on if the default
 		 * operand size bit is set.
 		 */
 		if ((type & 0x4) == 0) {
 			if (*addr > limit)
 				return (-1);
 		} else if (desc->sd_def32) {
 			if (*addr < limit + 1 || 0xffff < *addr)
 				return (-1);
 		} else {
 			if (*addr < limit + 1 || 0xffffffff < *addr)
 				return (-1);
 		}
 	}
 
 	*addr += USD_GETBASE(desc);
 
 	return (0);
 }
 
 int
 fasttrap_pid_probe(struct reg *rp)
 {
 	proc_t *p = curproc;
 #ifndef illumos
 	struct rm_priotracker tracker;
 	proc_t *pp;
 #endif
 	uintptr_t pc = rp->r_rip - 1;
 	uintptr_t new_pc = 0;
 	fasttrap_bucket_t *bucket;
 #ifdef illumos
 	kmutex_t *pid_mtx;
 #endif
 	fasttrap_tracepoint_t *tp, tp_local;
 	pid_t pid;
 	dtrace_icookie_t cookie;
 	uint_t is_enabled = 0;
 
 	/*
 	 * It's possible that a user (in a veritable orgy of bad planning)
 	 * could redirect this thread's flow of control before it reached the
 	 * return probe fasttrap. In this case we need to kill the process
 	 * since it's in a unrecoverable state.
 	 */
 	if (curthread->t_dtrace_step) {
 		ASSERT(curthread->t_dtrace_on);
 		fasttrap_sigtrap(p, curthread, pc);
 		return (0);
 	}
 
 	/*
 	 * Clear all user tracing flags.
 	 */
 	curthread->t_dtrace_ft = 0;
 	curthread->t_dtrace_pc = 0;
 	curthread->t_dtrace_npc = 0;
 	curthread->t_dtrace_scrpc = 0;
 	curthread->t_dtrace_astpc = 0;
 #ifdef __amd64
 	curthread->t_dtrace_regv = 0;
 #endif
 
 	/*
 	 * Treat a child created by a call to vfork(2) as if it were its
 	 * parent. We know that there's only one thread of control in such a
 	 * process: this one.
 	 */
 #ifdef illumos
 	while (p->p_flag & SVFORK) {
 		p = p->p_parent;
 	}
 
 	pid = p->p_pid;
 	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
 	mutex_enter(pid_mtx);
 #else
 	pp = p;
 	sx_slock(&proctree_lock);
 	while (pp->p_vmspace == pp->p_pptr->p_vmspace)
 		pp = pp->p_pptr;
 	pid = pp->p_pid;
 	sx_sunlock(&proctree_lock);
 	pp = NULL;
 
 	rm_rlock(&fasttrap_tp_lock, &tracker);
 #endif
 
 	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
 
 	/*
 	 * Lookup the tracepoint that the process just hit.
 	 */
 	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
 		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
 		    tp->ftt_proc->ftpc_acount != 0)
 			break;
 	}
 
 	/*
 	 * If we couldn't find a matching tracepoint, either a tracepoint has
 	 * been inserted without using the pid<pid> ioctl interface (see
 	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
 	 */
 	if (tp == NULL) {
 #ifdef illumos
 		mutex_exit(pid_mtx);
 #else
 		rm_runlock(&fasttrap_tp_lock, &tracker);
 #endif
 		return (-1);
 	}
 
 	/*
 	 * Set the program counter to the address of the traced instruction
 	 * so that it looks right in ustack() output.
 	 */
 	rp->r_rip = pc;
 
 	if (tp->ftt_ids != NULL) {
 		fasttrap_id_t *id;
 
 #ifdef __amd64
 		if (p->p_model == DATAMODEL_LP64) {
 			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
 				fasttrap_probe_t *probe = id->fti_probe;
 
 				if (id->fti_ptype == DTFTP_ENTRY) {
 					/*
 					 * We note that this was an entry
 					 * probe to help ustack() find the
 					 * first caller.
 					 */
 					cookie = dtrace_interrupt_disable();
 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
 					dtrace_probe(probe->ftp_id, rp->r_rdi,
 					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
 					    rp->r_r8);
 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
 					dtrace_interrupt_enable(cookie);
 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
 					/*
 					 * Note that in this case, we don't
 					 * call dtrace_probe() since it's only
 					 * an artificial probe meant to change
 					 * the flow of control so that it
 					 * encounters the true probe.
 					 */
 					is_enabled = 1;
 				} else if (probe->ftp_argmap == NULL) {
 					dtrace_probe(probe->ftp_id, rp->r_rdi,
 					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
 					    rp->r_r8);
 				} else {
 					uintptr_t t[5];
 
 					fasttrap_usdt_args64(probe, rp,
 					    sizeof (t) / sizeof (t[0]), t);
 
 					dtrace_probe(probe->ftp_id, t[0], t[1],
 					    t[2], t[3], t[4]);
 				}
 			}
 		} else {
-#else /* __amd64 */
+#endif
 			uintptr_t s0, s1, s2, s3, s4, s5;
-			uint32_t *stack = (uint32_t *)rp->r_esp;
+			uint32_t *stack = (uint32_t *)rp->r_rsp;
 
 			/*
 			 * In 32-bit mode, all arguments are passed on the
 			 * stack. If this is a function entry probe, we need
 			 * to skip the first entry on the stack as it
 			 * represents the return address rather than a
 			 * parameter to the function.
 			 */
 			s0 = fasttrap_fuword32_noerr(&stack[0]);
 			s1 = fasttrap_fuword32_noerr(&stack[1]);
 			s2 = fasttrap_fuword32_noerr(&stack[2]);
 			s3 = fasttrap_fuword32_noerr(&stack[3]);
 			s4 = fasttrap_fuword32_noerr(&stack[4]);
 			s5 = fasttrap_fuword32_noerr(&stack[5]);
 
 			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
 				fasttrap_probe_t *probe = id->fti_probe;
 
 				if (id->fti_ptype == DTFTP_ENTRY) {
 					/*
 					 * We note that this was an entry
 					 * probe to help ustack() find the
 					 * first caller.
 					 */
 					cookie = dtrace_interrupt_disable();
 					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
 					dtrace_probe(probe->ftp_id, s1, s2,
 					    s3, s4, s5);
 					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
 					dtrace_interrupt_enable(cookie);
 				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
 					/*
 					 * Note that in this case, we don't
 					 * call dtrace_probe() since it's only
 					 * an artificial probe meant to change
 					 * the flow of control so that it
 					 * encounters the true probe.
 					 */
 					is_enabled = 1;
 				} else if (probe->ftp_argmap == NULL) {
 					dtrace_probe(probe->ftp_id, s0, s1,
 					    s2, s3, s4);
 				} else {
 					uint32_t t[5];
 
 					fasttrap_usdt_args32(probe, rp,
 					    sizeof (t) / sizeof (t[0]), t);
 
 					dtrace_probe(probe->ftp_id, t[0], t[1],
 					    t[2], t[3], t[4]);
 				}
 			}
-#endif /* __amd64 */
 #ifdef __amd64
 		}
 #endif
 	}
 
 	/*
 	 * We're about to do a bunch of work so we cache a local copy of
 	 * the tracepoint to emulate the instruction, and then find the
 	 * tracepoint again later if we need to light up any return probes.
 	 */
 	tp_local = *tp;
 #ifdef illumos
 	mutex_exit(pid_mtx);
 #else
 	rm_runlock(&fasttrap_tp_lock, &tracker);
 #endif
 	tp = &tp_local;
 
 	/*
 	 * Set the program counter to appear as though the traced instruction
 	 * had completely executed. This ensures that fasttrap_getreg() will
 	 * report the expected value for REG_RIP.
 	 */
 	rp->r_rip = pc + tp->ftt_size;
 
 	/*
 	 * If there's an is-enabled probe connected to this tracepoint it
 	 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
 	 * instruction that was placed there by DTrace when the binary was
 	 * linked. As this probe is, in fact, enabled, we need to stuff 1
 	 * into %eax or %rax. Accordingly, we can bypass all the instruction
 	 * emulation logic since we know the inevitable result. It's possible
 	 * that a user could construct a scenario where the 'is-enabled'
 	 * probe was on some other instruction, but that would be a rather
 	 * exotic way to shoot oneself in the foot.
 	 */
 	if (is_enabled) {
 		rp->r_rax = 1;
 		new_pc = rp->r_rip;
 		goto done;
 	}
 
 	/*
 	 * We emulate certain types of instructions to ensure correctness
 	 * (in the case of position dependent instructions) or optimize
 	 * common cases. The rest we have the thread execute back in user-
 	 * land.
 	 */
 	switch (tp->ftt_type) {
 	case FASTTRAP_T_RET:
 	case FASTTRAP_T_RET16:
 	{
 		uintptr_t dst = 0;
 		uintptr_t addr = 0;
 		int ret = 0;
 
 		/*
 		 * We have to emulate _every_ facet of the behavior of a ret
 		 * instruction including what happens if the load from %esp
 		 * fails; in that case, we send a SIGSEGV.
 		 */
 #ifdef __amd64
 		if (p->p_model == DATAMODEL_NATIVE) {
 			ret = dst = fasttrap_fulword((void *)rp->r_rsp);
 			addr = rp->r_rsp + sizeof (uintptr_t);
 		} else {
 #endif
-#ifdef __i386__
 			uint32_t dst32;
-			ret = dst32 = fasttrap_fuword32((void *)rp->r_esp);
+			ret = dst32 = fasttrap_fuword32((void *)rp->r_rsp);
 			dst = dst32;
-			addr = rp->r_esp + sizeof (uint32_t);
-#endif
+			addr = rp->r_rsp + sizeof (uint32_t);
 #ifdef __amd64
 		}
 #endif
 
 		if (ret == -1) {
 			fasttrap_sigsegv(p, curthread, rp->r_rsp);
 			new_pc = pc;
 			break;
 		}
 
 		if (tp->ftt_type == FASTTRAP_T_RET16)
 			addr += tp->ftt_dest;
 
 		rp->r_rsp = addr;
 		new_pc = dst;
 		break;
 	}
 
 	case FASTTRAP_T_JCC:
 	{
 		uint_t taken = 0;
 
 		switch (tp->ftt_code) {
 		case FASTTRAP_JO:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0;
 			break;
 		case FASTTRAP_JNO:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0;
 			break;
 		case FASTTRAP_JB:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0;
 			break;
 		case FASTTRAP_JAE:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0;
 			break;
 		case FASTTRAP_JE:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
 			break;
 		case FASTTRAP_JNE:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
 			break;
 		case FASTTRAP_JBE:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 ||
 			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
 			break;
 		case FASTTRAP_JA:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 &&
 			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
 			break;
 		case FASTTRAP_JS:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0;
 			break;
 		case FASTTRAP_JNS:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0;
 			break;
 		case FASTTRAP_JP:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0;
 			break;
 		case FASTTRAP_JNP:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0;
 			break;
 		case FASTTRAP_JL:
 			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
 			break;
 		case FASTTRAP_JGE:
 			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
 			break;
 		case FASTTRAP_JLE:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 ||
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
 			break;
 		case FASTTRAP_JG:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
 			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
 			break;
 
 		}
 
 		if (taken)
 			new_pc = tp->ftt_dest;
 		else
 			new_pc = pc + tp->ftt_size;
 		break;
 	}
 
 	case FASTTRAP_T_LOOP:
 	{
 		uint_t taken = 0;
 #ifdef __amd64
 		greg_t cx = rp->r_rcx--;
 #else
 		greg_t cx = rp->r_ecx--;
 #endif
 
 		switch (tp->ftt_code) {
 		case FASTTRAP_LOOPNZ:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
 			    cx != 0;
 			break;
 		case FASTTRAP_LOOPZ:
 			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 &&
 			    cx != 0;
 			break;
 		case FASTTRAP_LOOP:
 			taken = (cx != 0);
 			break;
 		}
 
 		if (taken)
 			new_pc = tp->ftt_dest;
 		else
 			new_pc = pc + tp->ftt_size;
 		break;
 	}
 
 	case FASTTRAP_T_JCXZ:
 	{
 #ifdef __amd64
 		greg_t cx = rp->r_rcx;
 #else
 		greg_t cx = rp->r_ecx;
 #endif
 
 		if (cx == 0)
 			new_pc = tp->ftt_dest;
 		else
 			new_pc = pc + tp->ftt_size;
 		break;
 	}
 
 	case FASTTRAP_T_PUSHL_EBP:
 	{
 		int ret = 0;
 
 #ifdef __amd64
 		if (p->p_model == DATAMODEL_NATIVE) {
 			rp->r_rsp -= sizeof (uintptr_t);
 			ret = fasttrap_sulword((void *)rp->r_rsp, rp->r_rbp);
 		} else {
 #endif
-#ifdef __i386__
 			rp->r_rsp -= sizeof (uint32_t);
 			ret = fasttrap_suword32((void *)rp->r_rsp, rp->r_rbp);
-#endif
 #ifdef __amd64
 		}
 #endif
 
 		if (ret == -1) {
 			fasttrap_sigsegv(p, curthread, rp->r_rsp);
 			new_pc = pc;
 			break;
 		}
 
 		new_pc = pc + tp->ftt_size;
 		break;
 	}
 
 	case FASTTRAP_T_NOP:
 		new_pc = pc + tp->ftt_size;
 		break;
 
 	case FASTTRAP_T_JMP:
 	case FASTTRAP_T_CALL:
 		if (tp->ftt_code == 0) {
 			new_pc = tp->ftt_dest;
 		} else {
 			uintptr_t value, addr = tp->ftt_dest;
 
 			if (tp->ftt_base != FASTTRAP_NOREG)
 				addr += fasttrap_getreg(rp, tp->ftt_base);
 			if (tp->ftt_index != FASTTRAP_NOREG)
 				addr += fasttrap_getreg(rp, tp->ftt_index) <<
 				    tp->ftt_scale;
 
 			if (tp->ftt_code == 1) {
 				/*
 				 * If there's a segment prefix for this
 				 * instruction, we'll need to check permissions
 				 * and bounds on the given selector, and adjust
 				 * the address accordingly.
 				 */
 				if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
 				    fasttrap_do_seg(tp, rp, &addr) != 0) {
 					fasttrap_sigsegv(p, curthread, addr);
 					new_pc = pc;
 					break;
 				}
 
 #ifdef __amd64
 				if (p->p_model == DATAMODEL_NATIVE) {
 #endif
 					if ((value = fasttrap_fulword((void *)addr))
 					     == -1) {
 						fasttrap_sigsegv(p, curthread,
 						    addr);
 						new_pc = pc;
 						break;
 					}
 					new_pc = value;
 #ifdef __amd64
 				} else {
 					uint32_t value32;
 					addr = (uintptr_t)(uint32_t)addr;
 					if ((value32 = fasttrap_fuword32((void *)addr))
 					    == -1) {
 						fasttrap_sigsegv(p, curthread,
 						    addr);
 						new_pc = pc;
 						break;
 					}
 					new_pc = value32;
 				}
 #endif
 			} else {
 				new_pc = addr;
 			}
 		}
 
 		/*
 		 * If this is a call instruction, we need to push the return
 		 * address onto the stack. If this fails, we send the process
 		 * a SIGSEGV and reset the pc to emulate what would happen if
 		 * this instruction weren't traced.
 		 */
 		if (tp->ftt_type == FASTTRAP_T_CALL) {
 			int ret = 0;
 			uintptr_t addr = 0, pcps;
 #ifdef __amd64
 			if (p->p_model == DATAMODEL_NATIVE) {
 				addr = rp->r_rsp - sizeof (uintptr_t);
 				pcps = pc + tp->ftt_size;
 				ret = fasttrap_sulword((void *)addr, pcps);
 			} else {
 #endif
 				addr = rp->r_rsp - sizeof (uint32_t);
 				pcps = (uint32_t)(pc + tp->ftt_size);
 				ret = fasttrap_suword32((void *)addr, pcps);
 #ifdef __amd64
 			}
 #endif
 
 			if (ret == -1) {
 				fasttrap_sigsegv(p, curthread, addr);
 				new_pc = pc;
 				break;
 			}
 
 			rp->r_rsp = addr;
 		}
 
 		break;
 
 	case FASTTRAP_T_COMMON:
 	{
 		uintptr_t addr;
 #if defined(__amd64)
 		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
 #else
 		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];
 #endif
 		uint_t i = 0;
 #ifdef illumos
 		klwp_t *lwp = ttolwp(curthread);
 
 		/*
 		 * Compute the address of the ulwp_t and step over the
 		 * ul_self pointer. The method used to store the user-land
 		 * thread pointer is very different on 32- and 64-bit
 		 * kernels.
 		 */
 #if defined(__amd64)
 		if (p->p_model == DATAMODEL_LP64) {
 			addr = lwp->lwp_pcb.pcb_fsbase;
 			addr += sizeof (void *);
 		} else {
 			addr = lwp->lwp_pcb.pcb_gsbase;
 			addr += sizeof (caddr32_t);
 		}
 #else
 		addr = USD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc);
 		addr += sizeof (void *);
 #endif
 #else	/* !illumos */
 		fasttrap_scrspace_t *scrspace;
 		scrspace = fasttrap_scraddr(curthread, tp->ftt_proc);
 		if (scrspace == NULL) {
 			/*
 			 * We failed to allocate scratch space for this thread.
 			 * Try to write the original instruction back out and
 			 * reset the pc.
 			 */
 			if (fasttrap_copyout(tp->ftt_instr, (void *)pc,
 			    tp->ftt_size))
 				fasttrap_sigtrap(p, curthread, pc);
 			new_pc = pc;
 			break;
 		}
 		addr = scrspace->ftss_addr;
 #endif /* illumos */
 
 		/*
 		 * Generic Instruction Tracing
 		 * ---------------------------
 		 *
 		 * This is the layout of the scratch space in the user-land
 		 * thread structure for our generated instructions.
 		 *
 		 *	32-bit mode			bytes
 		 *	------------------------	-----
 		 * a:	<original instruction>		<= 15
 		 *	jmp	<pc + tp->ftt_size>	    5
 		 * b:	<original instruction>		<= 15
 		 *	int	T_DTRACE_RET		    2
 		 *					-----
 		 *					<= 37
 		 *
 		 *	64-bit mode			bytes
 		 *	------------------------	-----
 		 * a:	<original instruction>		<= 15
 		 *	jmp	0(%rip)			    6
 		 *	<pc + tp->ftt_size>		    8
 		 * b:	<original instruction>		<= 15
 		 * 	int	T_DTRACE_RET		    2
 		 * 					-----
 		 * 					<= 46
 		 *
 		 * The %pc is set to a, and curthread->t_dtrace_astpc is set
 		 * to b. If we encounter a signal on the way out of the
 		 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
 		 * so that we execute the original instruction and re-enter
 		 * the kernel rather than redirecting to the next instruction.
 		 *
 		 * If there are return probes (so we know that we're going to
 		 * need to reenter the kernel after executing the original
 		 * instruction), the scratch space will just contain the
 		 * original instruction followed by an interrupt -- the same
 		 * data as at b.
 		 *
 		 * %rip-relative Addressing
 		 * ------------------------
 		 *
 		 * There's a further complication in 64-bit mode due to %rip-
 		 * relative addressing. While this is clearly a beneficial
 		 * architectural decision for position independent code, it's
 		 * hard not to see it as a personal attack against the pid
 		 * provider since before there was a relatively small set of
 		 * instructions to emulate; with %rip-relative addressing,
 		 * almost every instruction can potentially depend on the
 		 * address at which it's executed. Rather than emulating
 		 * the broad spectrum of instructions that can now be
 		 * position dependent, we emulate jumps and others as in
 		 * 32-bit mode, and take a different tack for instructions
 		 * using %rip-relative addressing.
 		 *
 		 * For every instruction that uses the ModRM byte, the
 		 * in-kernel disassembler reports its location. We use the
 		 * ModRM byte to identify that an instruction uses
 		 * %rip-relative addressing and to see what other registers
 		 * the instruction uses. To emulate those instructions,
 		 * we modify the instruction to be %rax-relative rather than
 		 * %rip-relative (or %rcx-relative if the instruction uses
 		 * %rax; or %r8- or %r9-relative if the REX.B is present so
 		 * we don't have to rewrite the REX prefix). We then load
 		 * the value that %rip would have been into the scratch
 		 * register and generate an instruction to reset the scratch
 		 * register back to its original value. The instruction
 		 * sequence looks like this:
 		 *
 		 *	64-mode %rip-relative		bytes
 		 *	------------------------	-----
 		 * a:	<modified instruction>		<= 15
 		 *	movq	$<value>, %<scratch>	    6
 		 *	jmp	0(%rip)			    6
 		 *	<pc + tp->ftt_size>		    8
 		 * b:	<modified instruction>  	<= 15
 		 * 	int	T_DTRACE_RET		    2
 		 * 					-----
 		 *					   52
 		 *
 		 * We set curthread->t_dtrace_regv so that upon receiving
 		 * a signal we can reset the value of the scratch register.
 		 */
 
 		ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);
 
 		curthread->t_dtrace_scrpc = addr;
 		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
 		i += tp->ftt_size;
 
 #ifdef __amd64
 		if (tp->ftt_ripmode != 0) {
 			greg_t *reg = NULL;
 
 			ASSERT(p->p_model == DATAMODEL_LP64);
 			ASSERT(tp->ftt_ripmode &
 			    (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));
 
 			/*
 			 * If this was a %rip-relative instruction, we change
 			 * it to be either a %rax- or %rcx-relative
 			 * instruction (depending on whether those registers
 			 * are used as another operand; or %r8- or %r9-
 			 * relative depending on the value of REX.B). We then
 			 * set that register and generate a movq instruction
 			 * to reset the value.
 			 */
 			if (tp->ftt_ripmode & FASTTRAP_RIP_X)
 				scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
 			else
 				scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);
 
 			if (tp->ftt_ripmode & FASTTRAP_RIP_1)
 				scratch[i++] = FASTTRAP_MOV_EAX;
 			else
 				scratch[i++] = FASTTRAP_MOV_ECX;
 
 			switch (tp->ftt_ripmode) {
 			case FASTTRAP_RIP_1:
 				reg = &rp->r_rax;
 				curthread->t_dtrace_reg = REG_RAX;
 				break;
 			case FASTTRAP_RIP_2:
 				reg = &rp->r_rcx;
 				curthread->t_dtrace_reg = REG_RCX;
 				break;
 			case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
 				reg = &rp->r_r8;
 				curthread->t_dtrace_reg = REG_R8;
 				break;
 			case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
 				reg = &rp->r_r9;
 				curthread->t_dtrace_reg = REG_R9;
 				break;
 			}
 
 			/* LINTED - alignment */
 			*(uint64_t *)&scratch[i] = *reg;
 			curthread->t_dtrace_regv = *reg;
 			*reg = pc + tp->ftt_size;
 			i += sizeof (uint64_t);
 		}
 #endif
 
 		/*
 		 * Generate the branch instruction to what would have
 		 * normally been the subsequent instruction. In 32-bit mode,
 		 * this is just a relative branch; in 64-bit mode this is a
 		 * %rip-relative branch that loads the 64-bit pc value
 		 * immediately after the jmp instruction.
 		 */
 #ifdef __amd64
 		if (p->p_model == DATAMODEL_LP64) {
 			scratch[i++] = FASTTRAP_GROUP5_OP;
 			scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
 			/* LINTED - alignment */
 			*(uint32_t *)&scratch[i] = 0;
 			i += sizeof (uint32_t);
 			/* LINTED - alignment */
 			*(uint64_t *)&scratch[i] = pc + tp->ftt_size;
 			i += sizeof (uint64_t);
 		} else {
 #endif
-#ifdef __i386__
 			/*
 			 * Set up the jmp to the next instruction; note that
 			 * the size of the traced instruction cancels out.
 			 */
 			scratch[i++] = FASTTRAP_JMP32;
 			/* LINTED - alignment */
 			*(uint32_t *)&scratch[i] = pc - addr - 5;
 			i += sizeof (uint32_t);
-#endif
 #ifdef __amd64
 		}
 #endif
 
 		curthread->t_dtrace_astpc = addr + i;
 		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
 		i += tp->ftt_size;
 		scratch[i++] = FASTTRAP_INT;
 		scratch[i++] = T_DTRACE_RET;
 
 		ASSERT(i <= sizeof (scratch));
 
 #ifdef illumos
 		if (fasttrap_copyout(scratch, (char *)addr, i)) {
 #else
 		if (uwrite(p, scratch, i, addr)) {
 #endif
 			fasttrap_sigtrap(p, curthread, pc);
 			new_pc = pc;
 			break;
 		}
 		if (tp->ftt_retids != NULL) {
 			curthread->t_dtrace_step = 1;
 			curthread->t_dtrace_ret = 1;
 			new_pc = curthread->t_dtrace_astpc;
 		} else {
 			new_pc = curthread->t_dtrace_scrpc;
 		}
 
 		curthread->t_dtrace_pc = pc;
 		curthread->t_dtrace_npc = pc + tp->ftt_size;
 		curthread->t_dtrace_on = 1;
 		break;
 	}
 
 	default:
 		panic("fasttrap: mishandled an instruction");
 	}
 
 done:
 	/*
 	 * If there were no return probes when we first found the tracepoint,
 	 * we should feel no obligation to honor any return probes that were
 	 * subsequently enabled -- they'll just have to wait until the next
 	 * time around.
 	 */
 	if (tp->ftt_retids != NULL) {
 		/*
 		 * We need to wait until the results of the instruction are
 		 * apparent before invoking any return probes. If this
 		 * instruction was emulated we can just call
 		 * fasttrap_return_common(); if it needs to be executed, we
 		 * need to wait until the user thread returns to the kernel.
 		 */
 		if (tp->ftt_type != FASTTRAP_T_COMMON) {
 			/*
 			 * Set the program counter to the address of the traced
 			 * instruction so that it looks right in ustack()
 			 * output. We had previously set it to the end of the
 			 * instruction to simplify %rip-relative addressing.
 			 */
 			rp->r_rip = pc;
 
 			fasttrap_return_common(rp, pc, pid, new_pc);
 		} else {
 			ASSERT(curthread->t_dtrace_ret != 0);
 			ASSERT(curthread->t_dtrace_pc == pc);
 			ASSERT(curthread->t_dtrace_scrpc != 0);
 			ASSERT(new_pc == curthread->t_dtrace_astpc);
 		}
 	}
 
 	rp->r_rip = new_pc;
 
 #ifndef illumos
 	PROC_LOCK(p);
 	proc_write_regs(curthread, rp);
 	PROC_UNLOCK(p);
 #endif
 
 	return (0);
 }
 
 int
 fasttrap_return_probe(struct reg *rp)
 {
 	proc_t *p = curproc;
 	uintptr_t pc = curthread->t_dtrace_pc;
 	uintptr_t npc = curthread->t_dtrace_npc;
 
 	curthread->t_dtrace_pc = 0;
 	curthread->t_dtrace_npc = 0;
 	curthread->t_dtrace_scrpc = 0;
 	curthread->t_dtrace_astpc = 0;
 
 #ifdef illumos
 	/*
 	 * Treat a child created by a call to vfork(2) as if it were its
 	 * parent. We know that there's only one thread of control in such a
 	 * process: this one.
 	 */
 	while (p->p_flag & SVFORK) {
 		p = p->p_parent;
 	}
 #endif
 
 	/*
 	 * We set rp->r_rip to the address of the traced instruction so
 	 * that it appears to dtrace_probe() that we're on the original
 	 * instruction, and so that the user can't easily detect our
 	 * complex web of lies. dtrace_return_probe() (our caller)
 	 * will correctly set %pc after we return.
 	 */
 	rp->r_rip = pc;
 
 	fasttrap_return_common(rp, pc, p->p_pid, npc);
 
 	return (0);
 }
 
 /*ARGSUSED*/
 uint64_t
 fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
     int aframes)
 {
 	struct reg r;
 
 	fill_regs(curthread, &r);
 
 	return (fasttrap_anarg(&r, 1, argno));
 }
 
 /*ARGSUSED*/
 uint64_t
 fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
     int aframes)
 {
 	struct reg r;
 
 	fill_regs(curthread, &r);
 
 	return (fasttrap_anarg(&r, 0, argno));
 }
 
 static ulong_t
 fasttrap_getreg(struct reg *rp, uint_t reg)
 {
 #ifdef __amd64
 	switch (reg) {
 	case REG_R15:		return (rp->r_r15);
 	case REG_R14:		return (rp->r_r14);
 	case REG_R13:		return (rp->r_r13);
 	case REG_R12:		return (rp->r_r12);
 	case REG_R11:		return (rp->r_r11);
 	case REG_R10:		return (rp->r_r10);
 	case REG_R9:		return (rp->r_r9);
 	case REG_R8:		return (rp->r_r8);
 	case REG_RDI:		return (rp->r_rdi);
 	case REG_RSI:		return (rp->r_rsi);
 	case REG_RBP:		return (rp->r_rbp);
 	case REG_RBX:		return (rp->r_rbx);
 	case REG_RDX:		return (rp->r_rdx);
 	case REG_RCX:		return (rp->r_rcx);
 	case REG_RAX:		return (rp->r_rax);
 	case REG_TRAPNO:	return (rp->r_trapno);
 	case REG_ERR:		return (rp->r_err);
 	case REG_RIP:		return (rp->r_rip);
 	case REG_CS:		return (rp->r_cs);
 #ifdef illumos
 	case REG_RFL:		return (rp->r_rfl);
 #endif
 	case REG_RSP:		return (rp->r_rsp);
 	case REG_SS:		return (rp->r_ss);
 	case REG_FS:		return (rp->r_fs);
 	case REG_GS:		return (rp->r_gs);
 	case REG_DS:		return (rp->r_ds);
 	case REG_ES:		return (rp->r_es);
 	case REG_FSBASE:	return (rdmsr(MSR_FSBASE));
 	case REG_GSBASE:	return (rdmsr(MSR_GSBASE));
 	}
 
 	panic("dtrace: illegal register constant");
 	/*NOTREACHED*/
 #else
 #define _NGREG 19
 	if (reg >= _NGREG)
 		panic("dtrace: illegal register constant");
 
 	return (((greg_t *)&rp->r_gs)[reg]);
 #endif
 }
Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris
===================================================================
--- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris	(revision 303642)

Property changes on: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/cddl/contrib/opensolaris:r303053-303641
Index: user/alc/PQ_LAUNDRY/sys/conf/files.arm
===================================================================
--- user/alc/PQ_LAUNDRY/sys/conf/files.arm	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/conf/files.arm	(revision 303642)
@@ -1,144 +1,145 @@
 # $FreeBSD$
 arm/arm/autoconf.c		standard
 arm/arm/bcopy_page.S		standard
 arm/arm/bcopyinout.S		standard
 arm/arm/blockio.S		standard
 arm/arm/bus_space_asm_generic.S	standard
 arm/arm/bus_space_base.c	optional	fdt
 arm/arm/bus_space_generic.c	standard
 arm/arm/busdma_machdep-v4.c 	optional	!armv6
 arm/arm/busdma_machdep-v6.c 	optional	armv6
 arm/arm/copystr.S		standard
 arm/arm/cpufunc.c		standard
 arm/arm/cpufunc_asm.S		standard
 arm/arm/cpufunc_asm_arm9.S 	optional	cpu_arm9 | cpu_arm9e
 arm/arm/cpufunc_asm_arm11.S	optional	cpu_arm1176
 arm/arm/cpufunc_asm_arm11x6.S	optional	cpu_arm1176
 arm/arm/cpufunc_asm_armv4.S 	optional	cpu_arm9 | cpu_arm9e | cpu_fa526 | cpu_xscale_pxa2x0 | cpu_xscale_ixp425 | cpu_xscale_81342
 arm/arm/cpufunc_asm_armv5_ec.S 	optional	cpu_arm9e
 arm/arm/cpufunc_asm_armv6.S 	optional	cpu_arm1176
 arm/arm/cpufunc_asm_armv7.S	optional	cpu_cortexa | cpu_krait | cpu_mv_pj4b
 arm/arm/cpufunc_asm_fa526.S	optional	cpu_fa526
 arm/arm/cpufunc_asm_pj4b.S	optional	cpu_mv_pj4b
 arm/arm/cpufunc_asm_sheeva.S 	optional	cpu_arm9e
 arm/arm/cpufunc_asm_xscale.S	optional	cpu_xscale_pxa2x0 | cpu_xscale_ixp425 | cpu_xscale_81342
 arm/arm/cpufunc_asm_xscale_c3.S	optional	cpu_xscale_81342
 arm/arm/cpuinfo.c		standard
 arm/arm/cpu_asm-v6.S		optional	armv6
 arm/arm/db_disasm.c		optional	ddb
 arm/arm/db_interface.c		optional	ddb
 arm/arm/db_trace.c		optional	ddb
 arm/arm/debug_monitor.c		optional	ddb armv6
 arm/arm/disassem.c		optional	ddb
 arm/arm/dump_machdep.c		standard
 arm/arm/elf_machdep.c		standard
 arm/arm/elf_note.S		standard
 arm/arm/exception.S		standard
 arm/arm/fiq.c			standard
 arm/arm/fiq_subr.S		standard
 arm/arm/fusu.S			standard
 arm/arm/gdb_machdep.c		optional	gdb
 arm/arm/generic_timer.c		optional	generic_timer
 arm/arm/gic.c			optional	gic
+arm/arm/gic_fdt.c		optional	gic fdt
 arm/arm/hdmi_if.m		optional	hdmi
 arm/arm/identcpu.c		standard
 arm/arm/in_cksum.c		optional	inet | inet6
 arm/arm/in_cksum_arm.S		optional	inet | inet6
 arm/arm/intr.c			optional	!intrng
 kern/subr_intr.c		optional	intrng
 arm/arm/locore.S		standard	no-obj
 arm/arm/machdep.c		standard
 arm/arm/machdep_intr.c		standard
 arm/arm/mem.c			optional	mem
 arm/arm/minidump_machdep.c	optional	mem
 arm/arm/mp_machdep.c		optional	smp
 arm/arm/mpcore_timer.c		optional	mpcore_timer
 arm/arm/nexus.c			standard
 arm/arm/ofw_machdep.c		optional	fdt
 arm/arm/physmem.c		standard
 arm/arm/pl190.c			optional	pl190
 arm/arm/pl310.c			optional	pl310
 arm/arm/platform.c		optional	platform
 arm/arm/platform_if.m		optional	platform
 arm/arm/pmap-v4.c		optional	!armv6
 arm/arm/pmap-v6.c		optional	armv6
 arm/arm/pmu.c			optional	pmu | fdt hwpmc
 arm/arm/sc_machdep.c		optional	sc
 arm/arm/setcpsr.S		standard
 arm/arm/setstack.s		standard
 arm/arm/stack_machdep.c		optional	ddb | stack
 arm/arm/stdatomic.c		standard \
 	compile-with "${NORMAL_C:N-Wmissing-prototypes}"
 arm/arm/support.S		standard
 arm/arm/swtch.S			standard
 arm/arm/swtch-v4.S		optional	!armv6
 arm/arm/swtch-v6.S		optional	armv6
 arm/arm/sys_machdep.c		standard
 arm/arm/syscall.c		standard
 arm/arm/trap-v4.c		optional	!armv6
 arm/arm/trap-v6.c		optional	armv6
 arm/arm/uio_machdep.c		standard
 arm/arm/undefined.c		standard
 arm/arm/unwind.c		optional	ddb | kdtrace_hooks
 arm/arm/vm_machdep.c		standard
 arm/arm/vfp.c			standard
 board_id.h			standard				   \
 	dependency	"$S/arm/conf/genboardid.awk $S/arm/conf/mach-types" \
 	compile-with	"${AWK} -f $S/arm/conf/genboardid.awk $S/arm/conf/mach-types > board_id.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"board_id.h"
 cddl/compat/opensolaris/kern/opensolaris_atomic.c	optional zfs | dtrace compile-with "${CDDL_C}"
 cddl/dev/dtrace/arm/dtrace_asm.S			optional dtrace compile-with "${DTRACE_S}"
 cddl/dev/dtrace/arm/dtrace_subr.c			optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/fbt/arm/fbt_isa.c				optional dtrace_fbt | dtraceall compile-with "${FBT_C}"
 crypto/blowfish/bf_enc.c	optional	crypto | ipsec 
 crypto/des/des_enc.c		optional	crypto | ipsec | netsmb
 dev/dwc/if_dwc.c		optional	dwc
 dev/dwc/if_dwc_if.m		optional	dwc
 dev/fb/fb.c			optional	sc
 dev/fdt/fdt_arm_platform.c	optional	platform fdt
 dev/hwpmc/hwpmc_arm.c		optional	hwpmc
 dev/hwpmc/hwpmc_armv7.c		optional	hwpmc armv6
 dev/iicbus/twsi/twsi.c		optional	twsi
 dev/ofw/ofw_cpu.c		optional	fdt
 dev/ofw/ofwpci.c		optional	fdt pci
 dev/pci/pci_host_generic.c	optional	pci_host_generic pci fdt
 dev/psci/psci.c			optional	psci
 dev/psci/psci_arm.S		optional	psci
 dev/syscons/scgfbrndr.c		optional	sc
 dev/syscons/scterm-teken.c	optional	sc
 dev/syscons/scvtb.c		optional	sc
 dev/uart/uart_cpu_fdt.c		optional	uart fdt
 
 font.h				optional	sc			\
 	compile-with	"uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h" \
 	no-obj no-implicit-rule before-depend				\
 	clean	"font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
 kern/msi_if.m			optional	intrng
 kern/pic_if.m			optional	intrng
 kern/subr_busdma_bufalloc.c	standard
 kern/subr_devmap.c		standard
 kern/subr_sfbuf.c		standard
 libkern/arm/aeabi_unwind.c	standard
 libkern/arm/divsi3.S		standard
 libkern/arm/ffs.S		standard
 libkern/arm/ldivmod.S		standard
 libkern/arm/ldivmod_helper.c	standard
 libkern/arm/memclr.S		standard
 libkern/arm/memcpy.S		standard
 libkern/arm/memset.S		standard
 libkern/arm/muldi3.c		standard
 libkern/ashldi3.c		standard
 libkern/ashrdi3.c		standard
 libkern/divdi3.c		standard
 libkern/ffsl.c			standard
 libkern/ffsll.c			standard
 libkern/fls.c			standard
 libkern/flsl.c			standard
 libkern/flsll.c			standard
 libkern/lshrdi3.c		standard
 libkern/moddi3.c		standard
 libkern/qdivrem.c		standard
 libkern/ucmpdi2.c		standard
 libkern/udivdi3.c		standard
 libkern/umoddi3.c		standard
Index: user/alc/PQ_LAUNDRY/sys/conf/files.arm64
===================================================================
--- user/alc/PQ_LAUNDRY/sys/conf/files.arm64	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/conf/files.arm64	(revision 303642)
@@ -1,101 +1,102 @@
 # $FreeBSD$
 arm/arm/generic_timer.c		standard
-arm/arm/gic.c			optional	intrng
+arm/arm/gic.c			standard
+arm/arm/gic_fdt.c		optional	fdt
 arm/arm/pmu.c			standard
 arm64/acpica/acpi_machdep.c	optional	acpi
 arm64/acpica/OsdEnvironment.c	optional	acpi
 arm64/acpica/acpi_wakeup.c	optional	acpi
 arm64/acpica/pci_cfgreg.c	optional	acpi	pci
 arm64/arm64/autoconf.c		standard
 arm64/arm64/bcopy.c		standard
 arm64/arm64/bus_machdep.c	standard
 arm64/arm64/bus_space_asm.S	standard
 arm64/arm64/busdma_bounce.c	standard
 arm64/arm64/busdma_machdep.c	standard
 arm64/arm64/bzero.S		standard
 arm64/arm64/clock.c		standard
 arm64/arm64/copyinout.S		standard
 arm64/arm64/copystr.c		standard
 arm64/arm64/cpufunc_asm.S	standard
 arm64/arm64/db_disasm.c		optional	ddb
 arm64/arm64/db_interface.c	optional	ddb
 arm64/arm64/db_trace.c		optional	ddb
 arm64/arm64/debug_monitor.c	optional	kdb
 arm64/arm64/disassem.c		optional	ddb
 arm64/arm64/dump_machdep.c	standard
 arm64/arm64/elf_machdep.c	standard
 arm64/arm64/exception.S		standard
 arm64/arm64/gicv3_its.c		optional	intrng
 arm64/arm64/gic_v3.c		standard
 arm64/arm64/gic_v3_fdt.c	optional	fdt
 arm64/arm64/identcpu.c		standard
 arm64/arm64/in_cksum.c		optional	inet | inet6
 arm64/arm64/locore.S		standard	no-obj
 arm64/arm64/machdep.c		standard
 arm64/arm64/mem.c		standard
 arm64/arm64/minidump_machdep.c	standard
 arm64/arm64/mp_machdep.c	optional	smp
 arm64/arm64/nexus.c		standard
 arm64/arm64/ofw_machdep.c	optional	fdt
 arm64/arm64/pmap.c		standard
 arm64/arm64/stack_machdep.c	optional	ddb | stack
 arm64/arm64/support.S		standard
 arm64/arm64/swtch.S		standard
 arm64/arm64/sys_machdep.c	standard
 arm64/arm64/trap.c		standard
 arm64/arm64/uio_machdep.c	standard
 arm64/arm64/uma_machdep.c	standard
 arm64/arm64/unwind.c		optional	ddb | kdtrace_hooks | stack
 arm64/arm64/vfp.c		standard
 arm64/arm64/vm_machdep.c	standard
 arm64/cavium/thunder_pcie_fdt.c		optional	soc_cavm_thunderx pci fdt
 arm64/cavium/thunder_pcie_pem.c		optional	soc_cavm_thunderx pci
 arm64/cavium/thunder_pcie_pem_fdt.c	optional	soc_cavm_thunderx pci fdt
 arm64/cavium/thunder_pcie_common.c	optional	soc_cavm_thunderx pci
 arm64/cloudabi64/cloudabi64_sysvec.c	optional compat_cloudabi64
 crypto/blowfish/bf_enc.c	optional	crypto | ipsec
 crypto/des/des_enc.c		optional	crypto | ipsec | netsmb
 dev/acpica/acpi_if.m		optional	acpi
 dev/ahci/ahci_generic.c		optional ahci fdt
 dev/hwpmc/hwpmc_arm64.c		optional	hwpmc
 dev/hwpmc/hwpmc_arm64_md.c	optional	hwpmc
 dev/mmc/host/dwmmc.c		optional	dwmmc fdt
 dev/mmc/host/dwmmc_hisi.c	optional	dwmmc fdt soc_hisi_hi6220
 dev/ofw/ofw_cpu.c		optional	fdt
 dev/ofw/ofwpci.c		optional 	fdt pci
 dev/pci/pci_host_generic.c	optional	pci fdt
 dev/psci/psci.c			optional	psci
 dev/psci/psci_arm64.S		optional	psci
 dev/uart/uart_cpu_fdt.c		optional	uart fdt
 dev/uart/uart_dev_pl011.c	optional	uart pl011
 dev/usb/controller/dwc_otg_hisi.c optional	dwcotg fdt soc_hisi_hi6220
 dev/usb/controller/generic_ehci.c optional	ehci acpi
 dev/usb/controller/generic_ohci.c optional	ohci fdt
 dev/usb/controller/generic_usb_if.m optional	ohci fdt
 dev/vnic/mrml_bridge.c		optional	vnic fdt
 dev/vnic/nic_main.c		optional	vnic pci
 dev/vnic/nicvf_main.c		optional	vnic pci pci_iov
 dev/vnic/nicvf_queues.c		optional	vnic pci pci_iov
 dev/vnic/thunder_bgx_fdt.c	optional	vnic fdt
 dev/vnic/thunder_bgx.c		optional	vnic pci
 dev/vnic/thunder_mdio_fdt.c	optional	vnic fdt
 dev/vnic/thunder_mdio.c		optional	vnic
 dev/vnic/lmac_if.m		optional	inet | inet6 | vnic
 kern/kern_clocksource.c		standard
 kern/msi_if.m			optional	intrng
 kern/pic_if.m			optional	intrng
 kern/subr_devmap.c		standard
 kern/subr_intr.c		optional	intrng
 libkern/bcmp.c			standard
 libkern/ffs.c			standard
 libkern/ffsl.c			standard
 libkern/ffsll.c			standard
 libkern/fls.c			standard
 libkern/flsl.c			standard
 libkern/flsll.c			standard
 libkern/memmove.c		standard
 libkern/memset.c		standard
 cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S	optional zfs | dtrace compile-with "${CDDL_C}"
 cddl/dev/dtrace/aarch64/dtrace_asm.S			optional dtrace compile-with "${DTRACE_S}"
 cddl/dev/dtrace/aarch64/dtrace_subr.c			optional dtrace compile-with "${DTRACE_C}"
 cddl/dev/fbt/aarch64/fbt_isa.c				optional dtrace_fbt | dtraceall compile-with "${FBT_C}"
Index: user/alc/PQ_LAUNDRY/sys/contrib/libnv/nvlist.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/contrib/libnv/nvlist.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/contrib/libnv/nvlist.c	(revision 303642)
@@ -1,2026 +1,2025 @@
 /*-
  * Copyright (c) 2009-2013 The FreeBSD Foundation
  * Copyright (c) 2013-2015 Mariusz Zaborski <oshogbo@FreeBSD.org>
  * All rights reserved.
  *
  * This software was developed by Pawel Jakub Dawidek under sponsorship from
  * the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/endian.h>
 #include <sys/queue.h>
 
 #ifdef _KERNEL
 
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 
 #include <machine/stdarg.h>
 
 #else
 #include <sys/socket.h>
 
 #include <errno.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdint.h>
-#define	_WITH_DPRINTF
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "msgio.h"
 #endif
 
 #ifdef HAVE_PJDLOG
 #include <pjdlog.h>
 #endif
 
 #include <sys/nv.h>
 
 #include "nv_impl.h"
 #include "nvlist_impl.h"
 #include "nvpair_impl.h"
 
 #ifndef	HAVE_PJDLOG
 #ifdef _KERNEL
 #define	PJDLOG_ASSERT(...)		MPASS(__VA_ARGS__)
 #define	PJDLOG_RASSERT(expr, ...)	KASSERT(expr, (__VA_ARGS__))
 #define	PJDLOG_ABORT(...)		panic(__VA_ARGS__)
 #else
 #include <assert.h>
 #define	PJDLOG_ASSERT(...)		assert(__VA_ARGS__)
 #define	PJDLOG_RASSERT(expr, ...)	assert(expr)
 #define	PJDLOG_ABORT(...)		do {				\
 	fprintf(stderr, "%s:%u: ", __FILE__, __LINE__);			\
 	fprintf(stderr, __VA_ARGS__);					\
 	fprintf(stderr, "\n");						\
 	abort();							\
 } while (0)
 #endif
 #endif
 
 #define	NV_FLAG_PRIVATE_MASK	(NV_FLAG_BIG_ENDIAN | NV_FLAG_IN_ARRAY)
 #define	NV_FLAG_PUBLIC_MASK	(NV_FLAG_IGNORE_CASE | NV_FLAG_NO_UNIQUE)
 #define	NV_FLAG_ALL_MASK	(NV_FLAG_PRIVATE_MASK | NV_FLAG_PUBLIC_MASK)
 
 #define	NVLIST_MAGIC	0x6e766c	/* "nvl" */
 struct nvlist {
 	int		 nvl_magic;
 	int		 nvl_error;
 	int		 nvl_flags;
 	nvpair_t	*nvl_parent;
 	nvpair_t	*nvl_array_next;
 	struct nvl_head	 nvl_head;
 };
 
 #define	NVLIST_ASSERT(nvl)	do {					\
 	PJDLOG_ASSERT((nvl) != NULL);					\
 	PJDLOG_ASSERT((nvl)->nvl_magic == NVLIST_MAGIC);		\
 } while (0)
 
 #ifdef _KERNEL
 MALLOC_DEFINE(M_NVLIST, "nvlist", "kernel nvlist");
 #endif
 
 #define	NVPAIR_ASSERT(nvp)	nvpair_assert(nvp)
 
 #define	NVLIST_HEADER_MAGIC	0x6c
 #define	NVLIST_HEADER_VERSION	0x00
 struct nvlist_header {
 	uint8_t		nvlh_magic;
 	uint8_t		nvlh_version;
 	uint8_t		nvlh_flags;
 	uint64_t	nvlh_descriptors;
 	uint64_t	nvlh_size;
 } __packed;
 
 nvlist_t *
 nvlist_create(int flags)
 {
 	nvlist_t *nvl;
 
 	PJDLOG_ASSERT((flags & ~(NV_FLAG_PUBLIC_MASK)) == 0);
 
 	nvl = nv_malloc(sizeof(*nvl));
 	if (nvl == NULL)
 		return (NULL);
 	nvl->nvl_error = 0;
 	nvl->nvl_flags = flags;
 	nvl->nvl_parent = NULL;
 	nvl->nvl_array_next = NULL;
 	TAILQ_INIT(&nvl->nvl_head);
 	nvl->nvl_magic = NVLIST_MAGIC;
 
 	return (nvl);
 }
 
 void
 nvlist_destroy(nvlist_t *nvl)
 {
 	nvpair_t *nvp;
 
 	if (nvl == NULL)
 		return;
 
 	ERRNO_SAVE();
 
 	NVLIST_ASSERT(nvl);
 
 	while ((nvp = nvlist_first_nvpair(nvl)) != NULL) {
 		nvlist_remove_nvpair(nvl, nvp);
 		nvpair_free(nvp);
 	}
 	if (nvl->nvl_array_next != NULL)
 		nvpair_free_structure(nvl->nvl_array_next);
 	nvl->nvl_array_next = NULL;
 	nvl->nvl_parent = NULL;
 	nvl->nvl_magic = 0;
 	nv_free(nvl);
 
 	ERRNO_RESTORE();
 }
 
 void
 nvlist_set_error(nvlist_t *nvl, int error)
 {
 
 	PJDLOG_ASSERT(error != 0);
 
 	/*
 	 * Check for error != 0 so that we don't do the wrong thing if somebody
 	 * tries to abuse this API when asserts are disabled.
 	 */
 	if (nvl != NULL && error != 0 && nvl->nvl_error == 0)
 		nvl->nvl_error = error;
 }
 
 int
 nvlist_error(const nvlist_t *nvl)
 {
 
 	if (nvl == NULL)
 		return (ENOMEM);
 
 	NVLIST_ASSERT(nvl);
 
 	return (nvl->nvl_error);
 }
 
 nvpair_t *
 nvlist_get_nvpair_parent(const nvlist_t *nvl)
 {
 
 	NVLIST_ASSERT(nvl);
 
 	return (nvl->nvl_parent);
 }
 
 const nvlist_t *
 nvlist_get_parent(const nvlist_t *nvl, void **cookiep)
 {
 	nvpair_t *nvp;
 
 	NVLIST_ASSERT(nvl);
 
 	nvp = nvl->nvl_parent;
 	if (cookiep != NULL)
 		*cookiep = nvp;
 	if (nvp == NULL)
 		return (NULL);
 
 	return (nvpair_nvlist(nvp));
 }
 
 void
 nvlist_set_parent(nvlist_t *nvl, nvpair_t *parent)
 {
 
 	NVLIST_ASSERT(nvl);
 
 	nvl->nvl_parent = parent;
 }
 
 void
 nvlist_set_array_next(nvlist_t *nvl, nvpair_t *ele)
 {
 
 	NVLIST_ASSERT(nvl);
 
 	if (ele != NULL) {
 		nvl->nvl_flags |= NV_FLAG_IN_ARRAY;
 	} else {
 		nvl->nvl_flags &= ~NV_FLAG_IN_ARRAY;
 		nv_free(nvl->nvl_array_next);
 	}
 
 	nvl->nvl_array_next = ele;
 }
 
 bool
 nvlist_in_array(const nvlist_t *nvl)
 {
 
 	NVLIST_ASSERT(nvl);
 
 	return ((nvl->nvl_flags & NV_FLAG_IN_ARRAY) != 0);
 }
 
 const nvlist_t *
 nvlist_get_array_next(const nvlist_t *nvl)
 {
 	nvpair_t *nvp;
 
 	NVLIST_ASSERT(nvl);
 
 	nvp = nvl->nvl_array_next;
 	if (nvp == NULL)
 		return (NULL);
 
 	return (nvpair_get_nvlist(nvp));
 }
 
 const nvlist_t *
 nvlist_get_pararr(const nvlist_t *nvl, void **cookiep)
 {
 	const nvlist_t *ret;
 
 	ret = nvlist_get_array_next(nvl);
 	if (ret != NULL) {
 		if (cookiep != NULL)
 			*cookiep = NULL;
 		return (ret);
 	}
 
 	ret = nvlist_get_parent(nvl, cookiep);
 	return (ret);
 }
 
 bool
 nvlist_empty(const nvlist_t *nvl)
 {
 
 	NVLIST_ASSERT(nvl);
 	PJDLOG_ASSERT(nvl->nvl_error == 0);
 
 	return (nvlist_first_nvpair(nvl) == NULL);
 }
 
 int
 nvlist_flags(const nvlist_t *nvl)
 {
 
 	NVLIST_ASSERT(nvl);
 	PJDLOG_ASSERT(nvl->nvl_error == 0);
 
 	return (nvl->nvl_flags & NV_FLAG_PUBLIC_MASK);
 }
 
 void
 nvlist_set_flags(nvlist_t *nvl, int flags)
 {
 
 	NVLIST_ASSERT(nvl);
 	PJDLOG_ASSERT(nvl->nvl_error == 0);
 
 	nvl->nvl_flags = flags;
 }
 
 static void
 nvlist_report_missing(int type, const char *name)
 {
 
 	PJDLOG_ABORT("Element '%s' of type %s doesn't exist.",
 	    name, nvpair_type_string(type));
 }
 
 static nvpair_t *
 nvlist_find(const nvlist_t *nvl, int type, const char *name)
 {
 	nvpair_t *nvp;
 
 	NVLIST_ASSERT(nvl);
 	PJDLOG_ASSERT(nvl->nvl_error == 0);
 	PJDLOG_ASSERT(type == NV_TYPE_NONE ||
 	    (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST));
 
 	for (nvp = nvlist_first_nvpair(nvl); nvp != NULL;
 	    nvp = nvlist_next_nvpair(nvl, nvp)) {
 		if (type != NV_TYPE_NONE && nvpair_type(nvp) != type)
 			continue;
 		if ((nvl->nvl_flags & NV_FLAG_IGNORE_CASE) != 0) {
 			if (strcasecmp(nvpair_name(nvp), name) != 0)
 				continue;
 		} else {
 			if (strcmp(nvpair_name(nvp), name) != 0)
 				continue;
 		}
 		break;
 	}
 
 	if (nvp == NULL)
 		ERRNO_SET(ENOENT);
 
 	return (nvp);
 }
 
 bool
 nvlist_exists_type(const nvlist_t *nvl, const char *name, int type)
 {
 
 	NVLIST_ASSERT(nvl);
 	PJDLOG_ASSERT(nvl->nvl_error == 0);
 	PJDLOG_ASSERT(type == NV_TYPE_NONE ||
 	    (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST));
 
 	return (nvlist_find(nvl, type, name) != NULL);
 }
 
 void
 nvlist_free_type(nvlist_t *nvl, const char *name, int type)
 {
 	nvpair_t *nvp;
 
 	NVLIST_ASSERT(nvl);
 	PJDLOG_ASSERT(nvl->nvl_error == 0);
 	PJDLOG_ASSERT(type == NV_TYPE_NONE ||
 	    (type >= NV_TYPE_FIRST && type <= NV_TYPE_LAST));
 
 	nvp = nvlist_find(nvl, type, name);
 	if (nvp != NULL)
 		nvlist_free_nvpair(nvl, nvp);
 	else
 		nvlist_report_missing(type, name);
 }
 
 nvlist_t *
 nvlist_clone(const nvlist_t *nvl)
 {
 	nvlist_t *newnvl;
 	nvpair_t *nvp, *newnvp;
 
 	NVLIST_ASSERT(nvl);
 
 	if (nvl->nvl_error != 0) {
 		ERRNO_SET(nvl->nvl_error);
 		return (NULL);
 	}
 
 	newnvl = nvlist_create(nvl->nvl_flags & NV_FLAG_PUBLIC_MASK);
 	for (nvp = nvlist_first_nvpair(nvl); nvp != NULL;
 	    nvp = nvlist_next_nvpair(nvl, nvp)) {
 		newnvp = nvpair_clone(nvp);
 		if (newnvp == NULL)
 			break;
 		(void)nvlist_move_nvpair(newnvl, newnvp);
 	}
 	if (nvp != NULL) {
 		nvlist_destroy(newnvl);
 		return (NULL);
 	}
 	return (newnvl);
 }
 
 #ifndef _KERNEL
 static bool
 nvlist_dump_error_check(const nvlist_t *nvl, int fd, int level)
 {
 
 	if (nvlist_error(nvl) != 0) {
 		dprintf(fd, "%*serror: %d\n", level * 4, "",
 		    nvlist_error(nvl));
 		return (true);
 	}
 
 	return (false);
 }
 
 /*
  * Dump content of nvlist.
  */
 void
 nvlist_dump(const nvlist_t *nvl, int fd)
 {
 	const nvlist_t *tmpnvl;
 	nvpair_t *nvp, *tmpnvp;
 	void *cookie;
 	int level;
 
 	level = 0;
 	if (nvlist_dump_error_check(nvl, fd, level))
 		return;
 
 	nvp = nvlist_first_nvpair(nvl);
 	while (nvp != NULL) {
 		dprintf(fd, "%*s%s (%s):", level * 4, "", nvpair_name(nvp),
 		    nvpair_type_string(nvpair_type(nvp)));
 		switch (nvpair_type(nvp)) {
 		case NV_TYPE_NULL:
 			dprintf(fd, " null\n");
 			break;
 		case NV_TYPE_BOOL:
 			dprintf(fd, " %s\n", nvpair_get_bool(nvp) ?
 			    "TRUE" : "FALSE");
 			break;
 		case NV_TYPE_NUMBER:
 			dprintf(fd, " %ju (%jd) (0x%jx)\n",
 			    (uintmax_t)nvpair_get_number(nvp),
 			    (intmax_t)nvpair_get_number(nvp),
 			    (uintmax_t)nvpair_get_number(nvp));
 			break;
 		case NV_TYPE_STRING:
 			dprintf(fd, " [%s]\n", nvpair_get_string(nvp));
 			break;
 		case NV_TYPE_NVLIST:
 			dprintf(fd, "\n");
 			tmpnvl = nvpair_get_nvlist(nvp);
 			if (nvlist_dump_error_check(tmpnvl, fd, level + 1))
 				break;
 			tmpnvp = nvlist_first_nvpair(tmpnvl);
 			if (tmpnvp != NULL) {
 				nvl = tmpnvl;
 				nvp = tmpnvp;
 				level++;
 				continue;
 			}
 			break;
 		case NV_TYPE_DESCRIPTOR:
 			dprintf(fd, " %d\n", nvpair_get_descriptor(nvp));
 			break;
 		case NV_TYPE_BINARY:
 		    {
 			const unsigned char *binary;
 			unsigned int ii;
 			size_t size;
 
 			binary = nvpair_get_binary(nvp, &size);
 			dprintf(fd, " %zu ", size);
 			for (ii = 0; ii < size; ii++)
 				dprintf(fd, "%02hhx", binary[ii]);
 			dprintf(fd, "\n");
 			break;
 		    }
 		case NV_TYPE_BOOL_ARRAY:
 		    {
 			const bool *value;
 			unsigned int ii;
 			size_t nitems;
 
 			value = nvpair_get_bool_array(nvp, &nitems);
 			dprintf(fd, " [ ");
 			for (ii = 0; ii < nitems; ii++) {
 				dprintf(fd, "%s", value[ii] ? "TRUE" : "FALSE");
 				if (ii != nitems - 1)
 					dprintf(fd, ", ");
 			}
 			dprintf(fd, " ]\n");
 			break;
 		    }
 		case NV_TYPE_STRING_ARRAY:
 		    {
 			const char * const *value;
 			unsigned int ii;
 			size_t nitems;
 
 			value = nvpair_get_string_array(nvp, &nitems);
 			dprintf(fd, " [ ");
 			for (ii = 0; ii < nitems; ii++) {
 				if (value[ii] == NULL)
 					dprintf(fd, "NULL");
 				else
 					dprintf(fd, "\"%s\"", value[ii]);
 				if (ii != nitems - 1)
 					dprintf(fd, ", ");
 			}
 			dprintf(fd, " ]\n");
 			break;
 		    }
 		case NV_TYPE_NUMBER_ARRAY:
 		    {
 			const uint64_t *value;
 			unsigned int ii;
 			size_t nitems;
 
 			value = nvpair_get_number_array(nvp, &nitems);
 			dprintf(fd, " [ ");
 			for (ii = 0; ii < nitems; ii++) {
 				dprintf(fd, "%ju (%jd) (0x%jx)",
 				    value[ii], value[ii], value[ii]);
 				if (ii != nitems - 1)
 					dprintf(fd, ", ");
 			}
 			dprintf(fd, " ]\n");
 			break;
 		    }
 		case NV_TYPE_DESCRIPTOR_ARRAY:
 		    {
 			const int *value;
 			unsigned int ii;
 			size_t nitems;
 
 			value = nvpair_get_descriptor_array(nvp, &nitems);
 			dprintf(fd, " [ ");
 			for (ii = 0; ii < nitems; ii++) {
 				dprintf(fd, "%d", value[ii]);
 				if (ii != nitems - 1)
 					dprintf(fd, ", ");
 			}
 			dprintf(fd, " ]\n");
 			break;
 		    }
 		case NV_TYPE_NVLIST_ARRAY:
 		    {
 			const nvlist_t * const *value;
 			unsigned int ii;
 			size_t nitems;
 
 			value = nvpair_get_nvlist_array(nvp, &nitems);
 			dprintf(fd, " %zu\n", nitems);
 			tmpnvl = NULL;
 			tmpnvp = NULL;
 			for (ii = 0; ii < nitems; ii++) {
 				if (nvlist_dump_error_check(value[ii], fd,
 				    level + 1)) {
 					break;
 				}
 
 				if (tmpnvl == NULL) {
 					tmpnvp = nvlist_first_nvpair(value[ii]);
 					if (tmpnvp != NULL) {
 						tmpnvl = value[ii];
 					} else {
 						dprintf(fd, "%*s,\n",
 						    (level + 1) * 4, "");
 					}
 				}
 			}
 			if (tmpnvp != NULL) {
 				nvl = tmpnvl;
 				nvp = tmpnvp;
 				level++;
 				continue;
 			}
 			break;
 		    }
 		default:
 			PJDLOG_ABORT("Unknown type: %d.", nvpair_type(nvp));
 		}
 
 		while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
 			do {
 				cookie = NULL;
 				if (nvlist_in_array(nvl))
 					dprintf(fd, "%*s,\n", level * 4, "");
 				nvl = nvlist_get_pararr(nvl, &cookie);
 				if (nvl == NULL)
 					return;
 				if (nvlist_in_array(nvl) && cookie == NULL) {
 					nvp = nvlist_first_nvpair(nvl);
 				} else {
 					nvp = cookie;
 					level--;
 				}
 			} while (nvp == NULL);
 			if (nvlist_in_array(nvl) && cookie == NULL)
 				break;
 		}
 	}
 }
 
 void
 nvlist_fdump(const nvlist_t *nvl, FILE *fp)
 {
 
 	fflush(fp);
 	nvlist_dump(nvl, fileno(fp));
 }
 #endif
 
 /*
  * The function obtains size of the nvlist after nvlist_pack().
  */
 size_t
 nvlist_size(const nvlist_t *nvl)
 {
 	const nvlist_t *tmpnvl;
 	const nvlist_t * const *nvlarray;
 	const nvpair_t *nvp, *tmpnvp;
 	void *cookie;
 	size_t size, nitems;
 	unsigned int ii;
 
 	NVLIST_ASSERT(nvl);
 	PJDLOG_ASSERT(nvl->nvl_error == 0);
 
 	size = sizeof(struct nvlist_header);
 	nvp = nvlist_first_nvpair(nvl);
 	while (nvp != NULL) {
 		size += nvpair_header_size();
 		size += strlen(nvpair_name(nvp)) + 1;
 		if (nvpair_type(nvp) == NV_TYPE_NVLIST) {
 			size += sizeof(struct nvlist_header);
 			size += nvpair_header_size() + 1;
 			tmpnvl = nvpair_get_nvlist(nvp);
 			PJDLOG_ASSERT(tmpnvl->nvl_error == 0);
 			tmpnvp = nvlist_first_nvpair(tmpnvl);
 			if (tmpnvp != NULL) {
 				nvl = tmpnvl;
 				nvp = tmpnvp;
 				continue;
 			}
 		} else if (nvpair_type(nvp) == NV_TYPE_NVLIST_ARRAY) {
 			nvlarray = nvpair_get_nvlist_array(nvp, &nitems);
 			PJDLOG_ASSERT(nitems > 0);
 
 			size += (nvpair_header_size() + 1) * nitems;
 			size += sizeof(struct nvlist_header) * nitems;
 
 			tmpnvl = NULL;
 			tmpnvp = NULL;
 			for (ii = 0; ii < nitems; ii++) {
 				PJDLOG_ASSERT(nvlarray[ii]->nvl_error == 0);
 				tmpnvp = nvlist_first_nvpair(nvlarray[ii]);
 				if (tmpnvp != NULL) {
 					tmpnvl = nvlarray[ii];
 					break;
 				}
 			}
 			if (tmpnvp != NULL) {
 				nvp = tmpnvp;
 				nvl = tmpnvl;
 				continue;
 			}
 
 		} else {
 			size += nvpair_size(nvp);
 		}
 
 		while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
 			do {
 				cookie = NULL;
 				nvl = nvlist_get_pararr(nvl, &cookie);
 				if (nvl == NULL)
 					goto out;
 				if (nvlist_in_array(nvl) && cookie == NULL) {
 					nvp = nvlist_first_nvpair(nvl);
 				} else {
 					nvp = cookie;
 				}
 			} while (nvp == NULL);
 			if (nvlist_in_array(nvl) && cookie == NULL)
 				break;
 		}
 	}
 
 out:
 	return (size);
 }
 
 #ifndef _KERNEL
 static int *
 nvlist_xdescriptors(const nvlist_t *nvl, int *descs)
 {
 	nvpair_t *nvp;
 	const char *name;
 	int type;
 
 	NVLIST_ASSERT(nvl);
 	PJDLOG_ASSERT(nvl->nvl_error == 0);
 
 	nvp = NULL;
 	do {
 		while ((name = nvlist_next(nvl, &type, (void**)&nvp)) != NULL) {
 			switch (type) {
 			case NV_TYPE_DESCRIPTOR:
 				*descs = nvpair_get_descriptor(nvp);
 				descs++;
 				break;
 			case NV_TYPE_DESCRIPTOR_ARRAY:
 			    {
 				const int *value;
 				size_t nitems;
 				unsigned int ii;
 
 				value = nvpair_get_descriptor_array(nvp,
 				    &nitems);
 				for (ii = 0; ii < nitems; ii++) {
 					*descs = value[ii];
 					descs++;
 				}
 				break;
 			    }
 			case NV_TYPE_NVLIST:
 				nvl = nvpair_get_nvlist(nvp);
 				nvp = NULL;
 				break;
 			case NV_TYPE_NVLIST_ARRAY:
 			    {
 				const nvlist_t * const *value;
 				size_t nitems;
 
 				value = nvpair_get_nvlist_array(nvp, &nitems);
 				PJDLOG_ASSERT(value != NULL);
 				PJDLOG_ASSERT(nitems > 0);
 
 				nvl = value[0];
 				nvp = NULL;
 				break;
 			    }
 			}
 		}
 	} while ((nvl = nvlist_get_pararr(nvl, (void**)&nvp)) != NULL);
 
 	return (descs);
 }
 #endif
 
 #ifndef _KERNEL
 int *
 nvlist_descriptors(const nvlist_t *nvl, size_t *nitemsp)
 {
 	size_t nitems;
 	int *fds;
 
 	nitems = nvlist_ndescriptors(nvl);
 	fds = nv_malloc(sizeof(fds[0]) * (nitems + 1));
 	if (fds == NULL)
 		return (NULL);
 	if (nitems > 0)
 		nvlist_xdescriptors(nvl, fds);
 	fds[nitems] = -1;
 	if (nitemsp != NULL)
 		*nitemsp = nitems;
 	return (fds);
 }
 #endif
 
 size_t
 nvlist_ndescriptors(const nvlist_t *nvl)
 {
 #ifndef _KERNEL
 	nvpair_t *nvp;
 	const char *name;
 	size_t ndescs;
 	int type;
 
 	NVLIST_ASSERT(nvl);
 	PJDLOG_ASSERT(nvl->nvl_error == 0);
 
 	ndescs = 0;
 	nvp = NULL;
 	do {
 		while ((name = nvlist_next(nvl, &type, (void**)&nvp)) != NULL) {
 			switch (type) {
 			case NV_TYPE_DESCRIPTOR:
 				ndescs++;
 				break;
 			case NV_TYPE_NVLIST:
 				nvl = nvpair_get_nvlist(nvp);
 				nvp = NULL;
 				break;
 			case NV_TYPE_NVLIST_ARRAY:
 			    {
 				const nvlist_t * const *value;
 				size_t nitems;
 
 				value = nvpair_get_nvlist_array(nvp, &nitems);
 				PJDLOG_ASSERT(value != NULL);
 				PJDLOG_ASSERT(nitems > 0);
 
 				nvl = value[0];
 				nvp = NULL;
 				break;
 			    }
 			case NV_TYPE_DESCRIPTOR_ARRAY:
 			    {
 				size_t nitems;
 
 				(void)nvpair_get_descriptor_array(nvp,
 				    &nitems);
 				ndescs += nitems;
 				break;
 			    }
 			}
 		}
 	} while ((nvl = nvlist_get_pararr(nvl, (void**)&nvp)) != NULL);
 
 	return (ndescs);
 #else
 	return (0);
 #endif
 }
 
 static unsigned char *
 nvlist_pack_header(const nvlist_t *nvl, unsigned char *ptr, size_t *leftp)
 {
 	struct nvlist_header nvlhdr;
 
 	NVLIST_ASSERT(nvl);
 
 	nvlhdr.nvlh_magic = NVLIST_HEADER_MAGIC;
 	nvlhdr.nvlh_version = NVLIST_HEADER_VERSION;
 	nvlhdr.nvlh_flags = nvl->nvl_flags;
 #if BYTE_ORDER == BIG_ENDIAN
 	nvlhdr.nvlh_flags |= NV_FLAG_BIG_ENDIAN;
 #endif
 	nvlhdr.nvlh_descriptors = nvlist_ndescriptors(nvl);
 	nvlhdr.nvlh_size = *leftp - sizeof(nvlhdr);
 	PJDLOG_ASSERT(*leftp >= sizeof(nvlhdr));
 	memcpy(ptr, &nvlhdr, sizeof(nvlhdr));
 	ptr += sizeof(nvlhdr);
 	*leftp -= sizeof(nvlhdr);
 
 	return (ptr);
 }
 
 static void *
 nvlist_xpack(const nvlist_t *nvl, int64_t *fdidxp, size_t *sizep)
 {
 	unsigned char *buf, *ptr;
 	size_t left, size;
 	const nvlist_t *tmpnvl;
 	nvpair_t *nvp, *tmpnvp;
 	void *cookie;
 
 	NVLIST_ASSERT(nvl);
 
 	if (nvl->nvl_error != 0) {
 		ERRNO_SET(nvl->nvl_error);
 		return (NULL);
 	}
 
 	size = nvlist_size(nvl);
 	buf = nv_malloc(size);
 	if (buf == NULL)
 		return (NULL);
 
 	ptr = buf;
 	left = size;
 
 	ptr = nvlist_pack_header(nvl, ptr, &left);
 
 	nvp = nvlist_first_nvpair(nvl);
 	while (nvp != NULL) {
 		NVPAIR_ASSERT(nvp);
 
 		nvpair_init_datasize(nvp);
 		ptr = nvpair_pack_header(nvp, ptr, &left);
 		if (ptr == NULL)
 			goto fail;
 		switch (nvpair_type(nvp)) {
 		case NV_TYPE_NULL:
 			ptr = nvpair_pack_null(nvp, ptr, &left);
 			break;
 		case NV_TYPE_BOOL:
 			ptr = nvpair_pack_bool(nvp, ptr, &left);
 			break;
 		case NV_TYPE_NUMBER:
 			ptr = nvpair_pack_number(nvp, ptr, &left);
 			break;
 		case NV_TYPE_STRING:
 			ptr = nvpair_pack_string(nvp, ptr, &left);
 			break;
 		case NV_TYPE_NVLIST:
 			tmpnvl = nvpair_get_nvlist(nvp);
 			ptr = nvlist_pack_header(tmpnvl, ptr, &left);
 			if (ptr == NULL)
 				goto fail;
 			tmpnvp = nvlist_first_nvpair(tmpnvl);
 			if (tmpnvp != NULL) {
 				nvl = tmpnvl;
 				nvp = tmpnvp;
 				continue;
 			}
 			ptr = nvpair_pack_nvlist_up(ptr, &left);
 			break;
 #ifndef _KERNEL
 		case NV_TYPE_DESCRIPTOR:
 			ptr = nvpair_pack_descriptor(nvp, ptr, fdidxp, &left);
 			break;
 		case NV_TYPE_DESCRIPTOR_ARRAY:
 			ptr = nvpair_pack_descriptor_array(nvp, ptr, fdidxp,
 			    &left);
 			break;
 #endif
 		case NV_TYPE_BINARY:
 			ptr = nvpair_pack_binary(nvp, ptr, &left);
 			break;
 		case NV_TYPE_BOOL_ARRAY:
 			ptr = nvpair_pack_bool_array(nvp, ptr, &left);
 			break;
 		case NV_TYPE_NUMBER_ARRAY:
 			ptr = nvpair_pack_number_array(nvp, ptr, &left);
 			break;
 		case NV_TYPE_STRING_ARRAY:
 			ptr = nvpair_pack_string_array(nvp, ptr, &left);
 			break;
 		case NV_TYPE_NVLIST_ARRAY:
 		    {
 			const nvlist_t * const * value;
 			size_t nitems;
 			unsigned int ii;
 
 			tmpnvl = NULL;
 			value = nvpair_get_nvlist_array(nvp, &nitems);
 			for (ii = 0; ii < nitems; ii++) {
 				ptr = nvlist_pack_header(value[ii], ptr, &left);
 				if (ptr == NULL)
 					goto out;
 				tmpnvp = nvlist_first_nvpair(value[ii]);
 				if (tmpnvp != NULL) {
 					tmpnvl = value[ii];
 					break;
 				}
 				ptr = nvpair_pack_nvlist_array_next(ptr, &left);
 				if (ptr == NULL)
 					goto out;
 			}
 			if (tmpnvl != NULL) {
 				nvl = tmpnvl;
 				nvp = tmpnvp;
 				continue;
 			}
 			break;
 		    }
 		default:
 			PJDLOG_ABORT("Invalid type (%d).", nvpair_type(nvp));
 		}
 		if (ptr == NULL)
 			goto fail;
 		while ((nvp = nvlist_next_nvpair(nvl, nvp)) == NULL) {
 			do {
 				cookie = NULL;
 				if (nvlist_in_array(nvl)) {
 					ptr = nvpair_pack_nvlist_array_next(ptr,
 					    &left);
 					if (ptr == NULL)
 						goto fail;
 				}
 				nvl = nvlist_get_pararr(nvl, &cookie);
 				if (nvl == NULL)
 					goto out;
 				if (nvlist_in_array(nvl) && cookie == NULL) {
 					nvp = nvlist_first_nvpair(nvl);
 					ptr = nvlist_pack_header(nvl, ptr,
 					    &left);
 					if (ptr == NULL)
 						goto fail;
 				} else if (nvpair_type((nvpair_t *)cookie) !=
 				    NV_TYPE_NVLIST_ARRAY) {
 					ptr = nvpair_pack_nvlist_up(ptr, &left);
 					if (ptr == NULL)
 						goto fail;
 					nvp = cookie;
 				} else {
 					nvp = cookie;
 				}
 			} while (nvp == NULL);
 			if (nvlist_in_array(nvl) && cookie == NULL)
 				break;
 		}
 	}
 
 out:
 	if (sizep != NULL)
 		*sizep = size;
 	return (buf);
 fail:
 	nv_free(buf);
 	return (NULL);
 }
 
 void *
 nvlist_pack(const nvlist_t *nvl, size_t *sizep)
 {
 
 	NVLIST_ASSERT(nvl);
 
 	if (nvl->nvl_error != 0) {
 		ERRNO_SET(nvl->nvl_error);
 		return (NULL);
 	}
 
 	if (nvlist_ndescriptors(nvl) > 0) {
 		ERRNO_SET(EOPNOTSUPP);
 		return (NULL);
 	}
 
 	return (nvlist_xpack(nvl, NULL, sizep));
 }
 
 static bool
 nvlist_check_header(struct nvlist_header *nvlhdrp)
 {
 
 	if (nvlhdrp->nvlh_magic != NVLIST_HEADER_MAGIC) {
 		ERRNO_SET(EINVAL);
 		return (false);
 	}
 	if ((nvlhdrp->nvlh_flags & ~NV_FLAG_ALL_MASK) != 0) {
 		ERRNO_SET(EINVAL);
 		return (false);
 	}
 #if BYTE_ORDER == BIG_ENDIAN
 	if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) == 0) {
 		nvlhdrp->nvlh_size = le64toh(nvlhdrp->nvlh_size);
 		nvlhdrp->nvlh_descriptors = le64toh(nvlhdrp->nvlh_descriptors);
 	}
 #else
 	if ((nvlhdrp->nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0) {
 		nvlhdrp->nvlh_size = be64toh(nvlhdrp->nvlh_size);
 		nvlhdrp->nvlh_descriptors = be64toh(nvlhdrp->nvlh_descriptors);
 	}
 #endif
 	return (true);
 }
 
 const unsigned char *
 nvlist_unpack_header(nvlist_t *nvl, const unsigned char *ptr, size_t nfds,
     bool *isbep, size_t *leftp)
 {
 	struct nvlist_header nvlhdr;
 	int inarrayf;
 
 	if (*leftp < sizeof(nvlhdr))
 		goto failed;
 
 	memcpy(&nvlhdr, ptr, sizeof(nvlhdr));
 
 	if (!nvlist_check_header(&nvlhdr))
 		goto failed;
 
 	if (nvlhdr.nvlh_size != *leftp - sizeof(nvlhdr))
 		goto failed;
 
 	/*
 	 * nvlh_descriptors might be smaller than nfds in embedded nvlists.
 	 */
 	if (nvlhdr.nvlh_descriptors > nfds)
 		goto failed;
 
 	if ((nvlhdr.nvlh_flags & ~NV_FLAG_ALL_MASK) != 0)
 		goto failed;
 
 	inarrayf = (nvl->nvl_flags & NV_FLAG_IN_ARRAY);
 	nvl->nvl_flags = (nvlhdr.nvlh_flags & NV_FLAG_PUBLIC_MASK) | inarrayf;
 
 	ptr += sizeof(nvlhdr);
 	if (isbep != NULL)
 		*isbep = (((int)nvlhdr.nvlh_flags & NV_FLAG_BIG_ENDIAN) != 0);
 	*leftp -= sizeof(nvlhdr);
 
 	return (ptr);
 failed:
 	ERRNO_SET(EINVAL);
 	return (NULL);
 }
 
 static nvlist_t *
 nvlist_xunpack(const void *buf, size_t size, const int *fds, size_t nfds,
     int flags)
 {
 	const unsigned char *ptr;
 	nvlist_t *nvl, *retnvl, *tmpnvl, *array;
 	nvpair_t *nvp;
 	size_t left;
 	bool isbe;
 
 	PJDLOG_ASSERT((flags & ~(NV_FLAG_PUBLIC_MASK)) == 0);
 
 	left = size;
 	ptr = buf;
 
 	tmpnvl = array = NULL;
 	nvl = retnvl = nvlist_create(0);
 	if (nvl == NULL)
 		goto failed;
 
 	ptr = nvlist_unpack_header(nvl, ptr, nfds, &isbe, &left);
 	if (ptr == NULL)
 		goto failed;
 	if (nvl->nvl_flags != flags) {
 		ERRNO_SET(EILSEQ);
 		goto failed;
 	}
 
 	while (left > 0) {
 		ptr = nvpair_unpack(isbe, ptr, &left, &nvp);
 		if (ptr == NULL)
 			goto failed;
 		switch (nvpair_type(nvp)) {
 		case NV_TYPE_NULL:
 			ptr = nvpair_unpack_null(isbe, nvp, ptr, &left);
 			break;
 		case NV_TYPE_BOOL:
 			ptr = nvpair_unpack_bool(isbe, nvp, ptr, &left);
 			break;
 		case NV_TYPE_NUMBER:
 			ptr = nvpair_unpack_number(isbe, nvp, ptr, &left);
 			break;
 		case NV_TYPE_STRING:
 			ptr = nvpair_unpack_string(isbe, nvp, ptr, &left);
 			break;
 		case NV_TYPE_NVLIST:
 			ptr = nvpair_unpack_nvlist(isbe, nvp, ptr, &left, nfds,
 			    &tmpnvl);
 			if (tmpnvl == NULL || ptr == NULL)
 				goto failed;
 			nvlist_set_parent(tmpnvl, nvp);
 			break;
 #ifndef _KERNEL
 		case NV_TYPE_DESCRIPTOR:
 			ptr = nvpair_unpack_descriptor(isbe, nvp, ptr, &left,
 			    fds, nfds);
 			break;
 		case NV_TYPE_DESCRIPTOR_ARRAY:
 			ptr = nvpair_unpack_descriptor_array(isbe, nvp, ptr,
 			    &left, fds, nfds);
 			break;
 #endif
 		case NV_TYPE_BINARY:
 			ptr = nvpair_unpack_binary(isbe, nvp, ptr, &left);
 			break;
 		case NV_TYPE_NVLIST_UP:
 			if (nvl->nvl_parent == NULL)
 				goto failed;
 			nvl = nvpair_nvlist(nvl->nvl_parent);
 			nvpair_free_structure(nvp);
 			continue;
 		case NV_TYPE_NVLIST_ARRAY_NEXT:
 			if (nvl->nvl_array_next == NULL) {
 				if (nvl->nvl_parent == NULL)
 					goto failed;
 				nvl = nvpair_nvlist(nvl->nvl_parent);
 			} else {
 				nvl = __DECONST(nvlist_t *,
 				    nvlist_get_array_next(nvl));
 				ptr = nvlist_unpack_header(nvl, ptr, nfds,
 				    &isbe, &left);
 				if (ptr == NULL)
 					goto failed;
 			}
 			nvpair_free_structure(nvp);
 			continue;
 		case NV_TYPE_BOOL_ARRAY:
 			ptr = nvpair_unpack_bool_array(isbe, nvp, ptr, &left);
 			break;
 		case NV_TYPE_NUMBER_ARRAY:
 			ptr = nvpair_unpack_number_array(isbe, nvp, ptr, &left);
 			break;
 		case NV_TYPE_STRING_ARRAY:
 			ptr = nvpair_unpack_string_array(isbe, nvp, ptr, &left);
 			break;
 		case NV_TYPE_NVLIST_ARRAY:
 			ptr = nvpair_unpack_nvlist_array(isbe, nvp, ptr, &left,
 			    &array);
 			if (ptr == NULL)
 				goto failed;
 			tmpnvl = array;
 			while (array != NULL) {
 				nvlist_set_parent(array, nvp);
 				array = __DECONST(nvlist_t *,
 				    nvlist_get_array_next(array));
 			}
 			ptr = nvlist_unpack_header(tmpnvl, ptr, nfds, &isbe,
 			    &left);
 			break;
 		default:
 			PJDLOG_ABORT("Invalid type (%d).", nvpair_type(nvp));
 		}
 		if (ptr == NULL)
 			goto failed;
 		if (!nvlist_move_nvpair(nvl, nvp))
 			goto failed;
 		if (tmpnvl != NULL) {
 			nvl = tmpnvl;
 			tmpnvl = NULL;
 		}
 	}
 
 	return (retnvl);
 failed:
 	nvlist_destroy(retnvl);
 	return (NULL);
 }
 
 nvlist_t *
 nvlist_unpack(const void *buf, size_t size, int flags)
 {
 
 	return (nvlist_xunpack(buf, size, NULL, 0, flags));
 }
 
 #ifndef _KERNEL
 int
 nvlist_send(int sock, const nvlist_t *nvl)
 {
 	size_t datasize, nfds;
 	int *fds;
 	void *data;
 	int64_t fdidx;
 	int ret;
 
 	if (nvlist_error(nvl) != 0) {
 		ERRNO_SET(nvlist_error(nvl));
 		return (-1);
 	}
 
 	fds = nvlist_descriptors(nvl, &nfds);
 	if (fds == NULL)
 		return (-1);
 
 	ret = -1;
 	data = NULL;
 	fdidx = 0;
 
 	data = nvlist_xpack(nvl, &fdidx, &datasize);
 	if (data == NULL)
 		goto out;
 
 	if (buf_send(sock, data, datasize) == -1)
 		goto out;
 
 	if (nfds > 0) {
 		if (fd_send(sock, fds, nfds) == -1)
 			goto out;
 	}
 
 	ret = 0;
 out:
 	ERRNO_SAVE();
 	nv_free(fds);
 	nv_free(data);
 	ERRNO_RESTORE();
 	return (ret);
 }
 
 nvlist_t *
 nvlist_recv(int sock, int flags)
 {
 	struct nvlist_header nvlhdr;
 	nvlist_t *nvl, *ret;
 	unsigned char *buf;
 	size_t nfds, size, i;
 	int *fds;
 
 	if (buf_recv(sock, &nvlhdr, sizeof(nvlhdr)) == -1)
 		return (NULL);
 
 	if (!nvlist_check_header(&nvlhdr))
 		return (NULL);
 
 	nfds = (size_t)nvlhdr.nvlh_descriptors;
 	size = sizeof(nvlhdr) + (size_t)nvlhdr.nvlh_size;
 
 	buf = nv_malloc(size);
 	if (buf == NULL)
 		return (NULL);
 
 	memcpy(buf, &nvlhdr, sizeof(nvlhdr));
 
 	ret = NULL;
 	fds = NULL;
 
 	if (buf_recv(sock, buf + sizeof(nvlhdr), size - sizeof(nvlhdr)) == -1)
 		goto out;
 
 	if (nfds > 0) {
 		fds = nv_malloc(nfds * sizeof(fds[0]));
 		if (fds == NULL)
 			goto out;
 		if (fd_recv(sock, fds, nfds) == -1)
 			goto out;
 	}
 
 	nvl = nvlist_xunpack(buf, size, fds, nfds, flags);
 	if (nvl == NULL) {
 		ERRNO_SAVE();
 		for (i = 0; i < nfds; i++)
 			close(fds[i]);
 		ERRNO_RESTORE();
 		goto out;
 	}
 
 	ret = nvl;
 out:
 	ERRNO_SAVE();
 	nv_free(buf);
 	nv_free(fds);
 	ERRNO_RESTORE();
 
 	return (ret);
 }
 
 nvlist_t *
 nvlist_xfer(int sock, nvlist_t *nvl, int flags)
 {
 
 	if (nvlist_send(sock, nvl) < 0) {
 		nvlist_destroy(nvl);
 		return (NULL);
 	}
 	nvlist_destroy(nvl);
 	return (nvlist_recv(sock, flags));
 }
 #endif
 
 nvpair_t *
 nvlist_first_nvpair(const nvlist_t *nvl)
 {
 
 	NVLIST_ASSERT(nvl);
 
 	return (TAILQ_FIRST(&nvl->nvl_head));
 }
 
 nvpair_t *
 nvlist_next_nvpair(const nvlist_t *nvl, const nvpair_t *nvp)
 {
 	nvpair_t *retnvp;
 
 	NVLIST_ASSERT(nvl);
 	NVPAIR_ASSERT(nvp);
 	PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);
 
 	retnvp = nvpair_next(nvp);
 	PJDLOG_ASSERT(retnvp == NULL || nvpair_nvlist(retnvp) == nvl);
 
 	return (retnvp);
 
 }
 
 nvpair_t *
 nvlist_prev_nvpair(const nvlist_t *nvl, const nvpair_t *nvp)
 {
 	nvpair_t *retnvp;
 
 	NVLIST_ASSERT(nvl);
 	NVPAIR_ASSERT(nvp);
 	PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);
 
 	retnvp = nvpair_prev(nvp);
 	PJDLOG_ASSERT(nvpair_nvlist(retnvp) == nvl);
 
 	return (retnvp);
 }
 
 const char *
 nvlist_next(const nvlist_t *nvl, int *typep, void **cookiep)
 {
 	nvpair_t *nvp;
 
 	NVLIST_ASSERT(nvl);
 
 	if (cookiep == NULL || *cookiep == NULL)
 		nvp = nvlist_first_nvpair(nvl);
 	else
 		nvp = nvlist_next_nvpair(nvl, *cookiep);
 	if (nvp == NULL)
 		return (NULL);
 	if (typep != NULL)
 		*typep = nvpair_type(nvp);
 	if (cookiep != NULL)
 		*cookiep = nvp;
 	return (nvpair_name(nvp));
 }
 
 bool
 nvlist_exists(const nvlist_t *nvl, const char *name)
 {
 
 	return (nvlist_find(nvl, NV_TYPE_NONE, name) != NULL);
 }
 
 #define	NVLIST_EXISTS(type, TYPE)					\
 bool									\
 nvlist_exists_##type(const nvlist_t *nvl, const char *name)		\
 {									\
 									\
 	return (nvlist_find(nvl, NV_TYPE_##TYPE, name) != NULL);	\
 }
 
 NVLIST_EXISTS(null, NULL)
 NVLIST_EXISTS(bool, BOOL)
 NVLIST_EXISTS(number, NUMBER)
 NVLIST_EXISTS(string, STRING)
 NVLIST_EXISTS(nvlist, NVLIST)
 NVLIST_EXISTS(binary, BINARY)
 NVLIST_EXISTS(bool_array, BOOL_ARRAY)
 NVLIST_EXISTS(number_array, NUMBER_ARRAY)
 NVLIST_EXISTS(string_array, STRING_ARRAY)
 NVLIST_EXISTS(nvlist_array, NVLIST_ARRAY)
 #ifndef _KERNEL
 NVLIST_EXISTS(descriptor, DESCRIPTOR)
 NVLIST_EXISTS(descriptor_array, DESCRIPTOR_ARRAY)
 #endif
 
 #undef	NVLIST_EXISTS
 
 void
 nvlist_add_nvpair(nvlist_t *nvl, const nvpair_t *nvp)
 {
 	nvpair_t *newnvp;
 
 	NVPAIR_ASSERT(nvp);
 
 	if (nvlist_error(nvl) != 0) {
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 	if ((nvl->nvl_flags & NV_FLAG_NO_UNIQUE) == 0) {
 		if (nvlist_exists(nvl, nvpair_name(nvp))) {
 			nvl->nvl_error = EEXIST;
 			ERRNO_SET(nvlist_error(nvl));
 			return;
 		}
 	}
 
 	newnvp = nvpair_clone(nvp);
 	if (newnvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvpair_insert(&nvl->nvl_head, newnvp, nvl);
 }
 
 void
 nvlist_add_stringf(nvlist_t *nvl, const char *name, const char *valuefmt, ...)
 {
 	va_list valueap;
 
 	va_start(valueap, valuefmt);
 	nvlist_add_stringv(nvl, name, valuefmt, valueap);
 	va_end(valueap);
 }
 
 void
 nvlist_add_stringv(nvlist_t *nvl, const char *name, const char *valuefmt,
     va_list valueap)
 {
 	nvpair_t *nvp;
 
 	if (nvlist_error(nvl) != 0) {
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_create_stringv(name, valuefmt, valueap);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 
 void
 nvlist_add_null(nvlist_t *nvl, const char *name)
 {
 	nvpair_t *nvp;
 
 	if (nvlist_error(nvl) != 0) {
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_create_null(name);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 
 void
 nvlist_add_binary(nvlist_t *nvl, const char *name, const void *value,
     size_t size)
 {
 	nvpair_t *nvp;
 
 	if (nvlist_error(nvl) != 0) {
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_create_binary(name, value, size);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 
 
 #define	NVLIST_ADD(vtype, type)						\
 void									\
 nvlist_add_##type(nvlist_t *nvl, const char *name, vtype value)		\
 {									\
 	nvpair_t *nvp;							\
 									\
 	if (nvlist_error(nvl) != 0) {					\
 		ERRNO_SET(nvlist_error(nvl));				\
 		return;							\
 	}								\
 									\
 	nvp = nvpair_create_##type(name, value);			\
 	if (nvp == NULL) {						\
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);		\
 		ERRNO_SET(nvl->nvl_error);				\
 	} else {							\
 		(void)nvlist_move_nvpair(nvl, nvp);			\
 	}								\
 }
 
 NVLIST_ADD(bool, bool)
 NVLIST_ADD(uint64_t, number)
 NVLIST_ADD(const char *, string)
 NVLIST_ADD(const nvlist_t *, nvlist)
 #ifndef _KERNEL
 NVLIST_ADD(int, descriptor);
 #endif
 
 #undef	NVLIST_ADD
 
 #define	NVLIST_ADD_ARRAY(vtype, type)					\
 void									\
 nvlist_add_##type##_array(nvlist_t *nvl, const char *name, vtype value,	\
     size_t nitems)							\
 {									\
 	nvpair_t *nvp;							\
 									\
 	if (nvlist_error(nvl) != 0) {					\
 		ERRNO_SET(nvlist_error(nvl));				\
 		return;							\
 	}								\
 									\
 	nvp = nvpair_create_##type##_array(name, value, nitems);	\
 	if (nvp == NULL) {						\
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);		\
 		ERRNO_SET(nvl->nvl_error);				\
 	} else {							\
 		(void)nvlist_move_nvpair(nvl, nvp);			\
 	}								\
 }
 
 NVLIST_ADD_ARRAY(const bool *, bool)
 NVLIST_ADD_ARRAY(const uint64_t *, number)
 NVLIST_ADD_ARRAY(const char * const *, string)
 NVLIST_ADD_ARRAY(const nvlist_t * const *, nvlist)
 #ifndef _KERNEL
 NVLIST_ADD_ARRAY(const int *, descriptor)
 #endif
 
 #undef	NVLIST_ADD_ARRAY
 
 bool
 nvlist_move_nvpair(nvlist_t *nvl, nvpair_t *nvp)
 {
 
 	NVPAIR_ASSERT(nvp);
 	PJDLOG_ASSERT(nvpair_nvlist(nvp) == NULL);
 
 	if (nvlist_error(nvl) != 0) {
 		nvpair_free(nvp);
 		ERRNO_SET(nvlist_error(nvl));
 		return (false);
 	}
 	if ((nvl->nvl_flags & NV_FLAG_NO_UNIQUE) == 0) {
 		if (nvlist_exists(nvl, nvpair_name(nvp))) {
 			nvpair_free(nvp);
 			nvl->nvl_error = EEXIST;
 			ERRNO_SET(nvl->nvl_error);
 			return (false);
 		}
 	}
 
 	nvpair_insert(&nvl->nvl_head, nvp, nvl);
 	return (true);
 }
 
 void
 nvlist_move_string(nvlist_t *nvl, const char *name, char *value)
 {
 	nvpair_t *nvp;
 
 	if (nvlist_error(nvl) != 0) {
 		nv_free(value);
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_move_string(name, value);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 
 void
 nvlist_move_nvlist(nvlist_t *nvl, const char *name, nvlist_t *value)
 {
 	nvpair_t *nvp;
 
 	if (nvlist_error(nvl) != 0) {
 		if (value != NULL && nvlist_get_nvpair_parent(value) != NULL)
 			nvlist_destroy(value);
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_move_nvlist(name, value);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 
 #ifndef _KERNEL
 void
 nvlist_move_descriptor(nvlist_t *nvl, const char *name, int value)
 {
 	nvpair_t *nvp;
 
 	if (nvlist_error(nvl) != 0) {
 		close(value);
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_move_descriptor(name, value);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 #endif
 
 void
 nvlist_move_binary(nvlist_t *nvl, const char *name, void *value, size_t size)
 {
 	nvpair_t *nvp;
 
 	if (nvlist_error(nvl) != 0) {
 		nv_free(value);
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_move_binary(name, value, size);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 
 void
 nvlist_move_bool_array(nvlist_t *nvl, const char *name, bool *value,
     size_t nitems)
 {
 	nvpair_t *nvp;
 
 	if (nvlist_error(nvl) != 0) {
 		nv_free(value);
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_move_bool_array(name, value, nitems);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 
 void
 nvlist_move_string_array(nvlist_t *nvl, const char *name, char **value,
     size_t nitems)
 {
 	nvpair_t *nvp;
 	size_t i;
 
 	if (nvlist_error(nvl) != 0) {
 		if (value != NULL) {
 			for (i = 0; i < nitems; i++)
 				nv_free(value[i]);
 			nv_free(value);
 		}
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_move_string_array(name, value, nitems);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 
 void
 nvlist_move_nvlist_array(nvlist_t *nvl, const char *name, nvlist_t **value,
     size_t nitems)
 {
 	nvpair_t *nvp;
 	size_t i;
 
 	if (nvlist_error(nvl) != 0) {
 		if (value != NULL) {
 			for (i = 0; i < nitems; i++) {
 				if (nvlist_get_pararr(value[i], NULL) == NULL)
 					nvlist_destroy(value[i]);
 			}
 		}
 		nv_free(value);
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_move_nvlist_array(name, value, nitems);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 
 void
 nvlist_move_number_array(nvlist_t *nvl, const char *name, uint64_t *value,
     size_t nitems)
 {
 	nvpair_t *nvp;
 
 	if (nvlist_error(nvl) != 0) {
 		nv_free(value);
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_move_number_array(name, value, nitems);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 
 #ifndef _KERNEL
 void
 nvlist_move_descriptor_array(nvlist_t *nvl, const char *name, int *value,
     size_t nitems)
 {
 	nvpair_t *nvp;
 	size_t i;
 
 	if (nvlist_error(nvl) != 0) {
 		if (value != 0) {
 			for (i = 0; i < nitems; i++)
 				close(value[i]);
 			nv_free(value);
 		}
 
 		ERRNO_SET(nvlist_error(nvl));
 		return;
 	}
 
 	nvp = nvpair_move_descriptor_array(name, value, nitems);
 	if (nvp == NULL) {
 		nvl->nvl_error = ERRNO_OR_DEFAULT(ENOMEM);
 		ERRNO_SET(nvl->nvl_error);
 	} else {
 		(void)nvlist_move_nvpair(nvl, nvp);
 	}
 }
 #endif
 
 const nvpair_t *
 nvlist_get_nvpair(const nvlist_t *nvl, const char *name)
 {
 
 	return (nvlist_find(nvl, NV_TYPE_NONE, name));
 }
 
 #define	NVLIST_GET(ftype, type, TYPE)					\
 ftype									\
 nvlist_get_##type(const nvlist_t *nvl, const char *name)		\
 {									\
 	const nvpair_t *nvp;						\
 									\
 	nvp = nvlist_find(nvl, NV_TYPE_##TYPE, name);			\
 	if (nvp == NULL)						\
 		nvlist_report_missing(NV_TYPE_##TYPE, name);		\
 	return (nvpair_get_##type(nvp));				\
 }
 
 NVLIST_GET(bool, bool, BOOL)
 NVLIST_GET(uint64_t, number, NUMBER)
 NVLIST_GET(const char *, string, STRING)
 NVLIST_GET(const nvlist_t *, nvlist, NVLIST)
 #ifndef _KERNEL
 NVLIST_GET(int, descriptor, DESCRIPTOR)
 #endif
 
 #undef	NVLIST_GET
 
 const void *
 nvlist_get_binary(const nvlist_t *nvl, const char *name, size_t *sizep)
 {
 	nvpair_t *nvp;
 
 	nvp = nvlist_find(nvl, NV_TYPE_BINARY, name);
 	if (nvp == NULL)
 		nvlist_report_missing(NV_TYPE_BINARY, name);
 
 	return (nvpair_get_binary(nvp, sizep));
 }
 
 #define	NVLIST_GET_ARRAY(ftype, type, TYPE)				\
 ftype									\
 nvlist_get_##type##_array(const nvlist_t *nvl, const char *name,	\
     size_t *nitems)							\
 {									\
 	const nvpair_t *nvp;						\
 									\
 	nvp = nvlist_find(nvl, NV_TYPE_##TYPE##_ARRAY, name);		\
 	if (nvp == NULL)						\
 		nvlist_report_missing(NV_TYPE_##TYPE##_ARRAY, name);	\
 	return (nvpair_get_##type##_array(nvp, nitems));		\
 }
 
 NVLIST_GET_ARRAY(const bool *, bool, BOOL)
 NVLIST_GET_ARRAY(const uint64_t *, number, NUMBER)
 NVLIST_GET_ARRAY(const char * const *, string, STRING)
 NVLIST_GET_ARRAY(const nvlist_t * const *, nvlist, NVLIST)
 #ifndef _KERNEL
 NVLIST_GET_ARRAY(const int *, descriptor, DESCRIPTOR)
 #endif
 
 #undef	NVLIST_GET_ARRAY
 
 #define	NVLIST_TAKE(ftype, type, TYPE)					\
 ftype									\
 nvlist_take_##type(nvlist_t *nvl, const char *name)			\
 {									\
 	nvpair_t *nvp;							\
 	ftype value;							\
 									\
 	nvp = nvlist_find(nvl, NV_TYPE_##TYPE, name);			\
 	if (nvp == NULL)						\
 		nvlist_report_missing(NV_TYPE_##TYPE, name);		\
 	value = (ftype)(intptr_t)nvpair_get_##type(nvp);		\
 	nvlist_remove_nvpair(nvl, nvp);					\
 	nvpair_free_structure(nvp);					\
 	return (value);							\
 }
 
 NVLIST_TAKE(bool, bool, BOOL)
 NVLIST_TAKE(uint64_t, number, NUMBER)
 NVLIST_TAKE(char *, string, STRING)
 NVLIST_TAKE(nvlist_t *, nvlist, NVLIST)
 #ifndef _KERNEL
 NVLIST_TAKE(int, descriptor, DESCRIPTOR)
 #endif
 
 #undef	NVLIST_TAKE
 
 void *
 nvlist_take_binary(nvlist_t *nvl, const char *name, size_t *sizep)
 {
 	nvpair_t *nvp;
 	void *value;
 
 	nvp = nvlist_find(nvl, NV_TYPE_BINARY, name);
 	if (nvp == NULL)
 		nvlist_report_missing(NV_TYPE_BINARY, name);
 
 	value = (void *)(intptr_t)nvpair_get_binary(nvp, sizep);
 	nvlist_remove_nvpair(nvl, nvp);
 	nvpair_free_structure(nvp);
 	return (value);
 }
 
 #define	NVLIST_TAKE_ARRAY(ftype, type, TYPE)				\
 ftype									\
 nvlist_take_##type##_array(nvlist_t *nvl, const char *name,		\
     size_t *nitems)							\
 {									\
 	nvpair_t *nvp;							\
 	ftype value;							\
 									\
 	nvp = nvlist_find(nvl, NV_TYPE_##TYPE##_ARRAY, name);		\
 	if (nvp == NULL)						\
 		nvlist_report_missing(NV_TYPE_##TYPE##_ARRAY, name);	\
 	value = (ftype)(intptr_t)nvpair_get_##type##_array(nvp, nitems);\
 	nvlist_remove_nvpair(nvl, nvp);					\
 	nvpair_free_structure(nvp);					\
 	return (value);							\
 }
 
 NVLIST_TAKE_ARRAY(bool *, bool, BOOL)
 NVLIST_TAKE_ARRAY(uint64_t *, number, NUMBER)
 NVLIST_TAKE_ARRAY(char **, string, STRING)
 NVLIST_TAKE_ARRAY(nvlist_t **, nvlist, NVLIST)
 #ifndef _KERNEL
 NVLIST_TAKE_ARRAY(int *, descriptor, DESCRIPTOR)
 #endif
 
 void
 nvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *nvp)
 {
 
 	NVLIST_ASSERT(nvl);
 	NVPAIR_ASSERT(nvp);
 	PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);
 
 	nvpair_remove(&nvl->nvl_head, nvp, nvl);
 }
 
 void
 nvlist_free(nvlist_t *nvl, const char *name)
 {
 
 	nvlist_free_type(nvl, name, NV_TYPE_NONE);
 }
 
 #define	NVLIST_FREE(type, TYPE)						\
 void									\
 nvlist_free_##type(nvlist_t *nvl, const char *name)			\
 {									\
 									\
 	nvlist_free_type(nvl, name, NV_TYPE_##TYPE);			\
 }
 
 NVLIST_FREE(null, NULL)
 NVLIST_FREE(bool, BOOL)
 NVLIST_FREE(number, NUMBER)
 NVLIST_FREE(string, STRING)
 NVLIST_FREE(nvlist, NVLIST)
 NVLIST_FREE(binary, BINARY)
 NVLIST_FREE(bool_array, BOOL_ARRAY)
 NVLIST_FREE(number_array, NUMBER_ARRAY)
 NVLIST_FREE(string_array, STRING_ARRAY)
 NVLIST_FREE(nvlist_array, NVLIST_ARRAY)
 #ifndef _KERNEL
 NVLIST_FREE(descriptor, DESCRIPTOR)
 NVLIST_FREE(descriptor_array, DESCRIPTOR_ARRAY)
 #endif
 
 #undef	NVLIST_FREE
 
 void
 nvlist_free_nvpair(nvlist_t *nvl, nvpair_t *nvp)
 {
 
 	NVLIST_ASSERT(nvl);
 	NVPAIR_ASSERT(nvp);
 	PJDLOG_ASSERT(nvpair_nvlist(nvp) == nvl);
 
 	nvlist_remove_nvpair(nvl, nvp);
 	nvpair_free(nvp);
 }
 
Index: user/alc/PQ_LAUNDRY/sys/dev/ath/if_athioctl.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/ath/if_athioctl.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/ath/if_athioctl.h	(revision 303642)
@@ -1,452 +1,452 @@
 /*-
  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  *
  * $FreeBSD$
  */
 
 /*
  * Ioctl-related defintions for the Atheros Wireless LAN controller driver.
  */
 #ifndef _DEV_ATH_ATHIOCTL_H
 #define _DEV_ATH_ATHIOCTL_H
 
 struct ath_tx_aggr_stats {
 	u_int32_t	aggr_pkts[64];
 	u_int32_t	aggr_single_pkt;
 	u_int32_t	aggr_nonbaw_pkt;
 	u_int32_t	aggr_aggr_pkt;
 	u_int32_t	aggr_baw_closed_single_pkt;
 	u_int32_t	aggr_low_hwq_single_pkt;
 	u_int32_t	aggr_sched_nopkt;
 	u_int32_t	aggr_rts_aggr_limited;
 };
 
 struct ath_intr_stats {
 	u_int32_t	sync_intr[32];
 };
 
 struct ath_stats {
 	u_int32_t	ast_watchdog;	/* device reset by watchdog */
 	u_int32_t	ast_hardware;	/* fatal hardware error interrupts */
 	u_int32_t	ast_bmiss;	/* beacon miss interrupts */
 	u_int32_t	ast_bmiss_phantom;/* beacon miss interrupts */
 	u_int32_t	ast_bstuck;	/* beacon stuck interrupts */
 	u_int32_t	ast_rxorn;	/* rx overrun interrupts */
 	u_int32_t	ast_rxeol;	/* rx eol interrupts */
 	u_int32_t	ast_txurn;	/* tx underrun interrupts */
 	u_int32_t	ast_mib;	/* mib interrupts */
 	u_int32_t	ast_intrcoal;	/* interrupts coalesced */
 	u_int32_t	ast_tx_packets;	/* packet sent on the interface */
 	u_int32_t	ast_tx_mgmt;	/* management frames transmitted */
 	u_int32_t	ast_tx_discard;	/* frames discarded prior to assoc */
 	u_int32_t	ast_tx_qstop;	/* output stopped 'cuz no buffer */
 	u_int32_t	ast_tx_encap;	/* tx encapsulation failed */
 	u_int32_t	ast_tx_nonode;	/* tx failed 'cuz no node */
 	u_int32_t	ast_tx_nombuf;	/* tx failed 'cuz no mbuf */
 	u_int32_t	ast_tx_nomcl;	/* tx failed 'cuz no cluster */
 	u_int32_t	ast_tx_linear;	/* tx linearized to cluster */
 	u_int32_t	ast_tx_nodata;	/* tx discarded empty frame */
 	u_int32_t	ast_tx_busdma;	/* tx failed for dma resrcs */
 	u_int32_t	ast_tx_xretries;/* tx failed 'cuz too many retries */
 	u_int32_t	ast_tx_fifoerr;	/* tx failed 'cuz FIFO underrun */
 	u_int32_t	ast_tx_filtered;/* tx failed 'cuz xmit filtered */
 	u_int32_t	ast_tx_shortretry;/* tx on-chip retries (short) */
 	u_int32_t	ast_tx_longretry;/* tx on-chip retries (long) */
 	u_int32_t	ast_tx_badrate;	/* tx failed 'cuz bogus xmit rate */
 	u_int32_t	ast_tx_noack;	/* tx frames with no ack marked */
 	u_int32_t	ast_tx_rts;	/* tx frames with rts enabled */
 	u_int32_t	ast_tx_cts;	/* tx frames with cts enabled */
 	u_int32_t	ast_tx_shortpre;/* tx frames with short preamble */
 	u_int32_t	ast_tx_altrate;	/* tx frames with alternate rate */
 	u_int32_t	ast_tx_protect;	/* tx frames with protection */
 	u_int32_t	ast_tx_ctsburst;/* tx frames with cts and bursting */
 	u_int32_t	ast_tx_ctsext;	/* tx frames with cts extension */
 	u_int32_t	ast_rx_nombuf;	/* rx setup failed 'cuz no mbuf */
 	u_int32_t	ast_rx_busdma;	/* rx setup failed for dma resrcs */
 	u_int32_t	ast_rx_orn;	/* rx failed 'cuz of desc overrun */
 	u_int32_t	ast_rx_crcerr;	/* rx failed 'cuz of bad CRC */
 	u_int32_t	ast_rx_fifoerr;	/* rx failed 'cuz of FIFO overrun */
 	u_int32_t	ast_rx_badcrypt;/* rx failed 'cuz decryption */
 	u_int32_t	ast_rx_badmic;	/* rx failed 'cuz MIC failure */
 	u_int32_t	ast_rx_phyerr;	/* rx failed 'cuz of PHY err */
 	u_int32_t	ast_rx_phy[64];	/* rx PHY error per-code counts */
 	u_int32_t	ast_rx_tooshort;/* rx discarded 'cuz frame too short */
 	u_int32_t	ast_rx_toobig;	/* rx discarded 'cuz frame too large */
 	u_int32_t	ast_rx_packets;	/* packet recv on the interface */
 	u_int32_t	ast_rx_mgt;	/* management frames received */
 	u_int32_t	ast_rx_ctl;	/* rx discarded 'cuz ctl frame */
 	int8_t		ast_tx_rssi;	/* tx rssi of last ack */
 	int8_t		ast_rx_rssi;	/* rx rssi from histogram */
 	u_int8_t	ast_tx_rate;	/* IEEE rate of last unicast tx */
 	u_int32_t	ast_be_xmit;	/* beacons transmitted */
 	u_int32_t	ast_be_nombuf;	/* beacon setup failed 'cuz no mbuf */
 	u_int32_t	ast_per_cal;	/* periodic calibration calls */
 	u_int32_t	ast_per_calfail;/* periodic calibration failed */
 	u_int32_t	ast_per_rfgain;	/* periodic calibration rfgain reset */
 	u_int32_t	ast_rate_calls;	/* rate control checks */
 	u_int32_t	ast_rate_raise;	/* rate control raised xmit rate */
 	u_int32_t	ast_rate_drop;	/* rate control dropped xmit rate */
 	u_int32_t	ast_ant_defswitch;/* rx/default antenna switches */
 	u_int32_t	ast_ant_txswitch;/* tx antenna switches */
 	u_int32_t	ast_ant_rx[8];	/* rx frames with antenna */
 	u_int32_t	ast_ant_tx[8];	/* tx frames with antenna */
 	u_int32_t	ast_cabq_xmit;	/* cabq frames transmitted */
 	u_int32_t	ast_cabq_busy;	/* cabq found busy */
 	u_int32_t	ast_tx_raw;	/* tx frames through raw api */
 	u_int32_t	ast_ff_txok;	/* fast frames tx'd successfully */
 	u_int32_t	ast_ff_txerr;	/* fast frames tx'd w/ error */
 	u_int32_t	ast_ff_rx;	/* fast frames rx'd */
 	u_int32_t	ast_ff_flush;	/* fast frames flushed from staging q */
 	u_int32_t	ast_tx_qfull;	/* tx dropped 'cuz of queue limit */
 	int8_t		ast_rx_noise;	/* rx noise floor */
 	u_int32_t	ast_tx_nobuf;	/* tx dropped 'cuz no ath buffer */
 	u_int32_t	ast_tdma_update;/* TDMA slot timing updates */
 	u_int32_t	ast_tdma_timers;/* TDMA slot update set beacon timers */
 	u_int32_t	ast_tdma_tsf;	/* TDMA slot update set TSF */
 	u_int16_t	ast_tdma_tsfadjp;/* TDMA slot adjust+ (usec, smoothed)*/
 	u_int16_t	ast_tdma_tsfadjm;/* TDMA slot adjust- (usec, smoothed)*/
 	u_int32_t	ast_tdma_ack;	/* TDMA tx failed 'cuz ACK required */
 	u_int32_t	ast_tx_raw_fail;/* raw tx failed 'cuz h/w down */
 	u_int32_t	ast_tx_nofrag;	/* tx dropped 'cuz no ath frag buffer */
 	u_int32_t	ast_be_missed;	/* missed beacons */
 	u_int32_t	ast_ani_cal;	/* ANI calibrations performed */
 	u_int32_t	ast_rx_agg;	/* number of aggregate frames RX'ed */
 	u_int32_t	ast_rx_halfgi;	/* RX half-GI */
 	u_int32_t	ast_rx_2040;	/* RX 40mhz frame */
 	u_int32_t	ast_rx_pre_crc_err;	/* RX pre-delimiter CRC error */
 	u_int32_t	ast_rx_post_crc_err;	/* RX post-delimiter CRC error */
 	u_int32_t	ast_rx_decrypt_busy_err;	/* RX decrypt engine busy error */
 	u_int32_t	ast_rx_hi_rx_chain;
 	u_int32_t	ast_tx_htprotect;	/* HT tx frames with protection */
 	u_int32_t	ast_rx_hitqueueend;	/* RX hit descr queue end */
 	u_int32_t	ast_tx_timeout;		/* Global TX timeout */
 	u_int32_t	ast_tx_cst;		/* Carrier sense timeout */
 	u_int32_t	ast_tx_xtxop;	/* tx exceeded TXOP */
 	u_int32_t	ast_tx_timerexpired;	/* tx exceeded TX_TIMER */
 	u_int32_t	ast_tx_desccfgerr;	/* tx desc cfg error */
 	u_int32_t	ast_tx_swretries;	/* software TX retries */
 	u_int32_t	ast_tx_swretrymax;	/* software TX retry max limit reach */
 	u_int32_t	ast_tx_data_underrun;
 	u_int32_t	ast_tx_delim_underrun;
 	u_int32_t	ast_tx_aggr_failall;	/* aggregate TX failed in its entirety */
 	u_int32_t	ast_tx_getnobuf;
 	u_int32_t	ast_tx_getbusybuf;
 	u_int32_t	ast_tx_intr;
 	u_int32_t	ast_rx_intr;
 	u_int32_t	ast_tx_aggr_ok;		/* aggregate TX ok */
 	u_int32_t	ast_tx_aggr_fail;	/* aggregate TX failed */
 	u_int32_t	ast_tx_mcastq_overflow;	/* multicast queue overflow */
 	u_int32_t	ast_rx_keymiss;
 	u_int32_t	ast_tx_swfiltered;
 	u_int32_t	ast_tx_node_psq_overflow;
 	u_int32_t	ast_rx_stbc;		/* RX STBC frame */
 	u_int32_t	ast_tx_nodeq_overflow;	/* node sw queue overflow */
 	u_int32_t	ast_tx_ldpc;		/* TX LDPC frame */
 	u_int32_t	ast_tx_stbc;		/* TX STBC frame */
 	u_int32_t	ast_pad[10];
 };
 
 #define	SIOCGATHSTATS	_IOWR('i', 137, struct ifreq)
 #define	SIOCZATHSTATS	_IOWR('i', 139, struct ifreq)
 #define	SIOCGATHAGSTATS	_IOWR('i', 141, struct ifreq)
 
 struct ath_diag {
 	char	ad_name[IFNAMSIZ];	/* if name, e.g. "ath0" */
 	u_int16_t ad_id;
 #define	ATH_DIAG_DYN	0x8000		/* allocate buffer in caller */
 #define	ATH_DIAG_IN	0x4000		/* copy in parameters */
 #define	ATH_DIAG_OUT	0x0000		/* copy out results (always) */
 #define	ATH_DIAG_ID	0x0fff
 	u_int16_t ad_in_size;		/* pack to fit, yech */
 	caddr_t	ad_in_data;
 	caddr_t	ad_out_data;
 	u_int	ad_out_size;
 
 };
 #define	SIOCGATHDIAG	_IOWR('i', 138, struct ath_diag)
 #define	SIOCGATHPHYERR	_IOWR('i', 140, struct ath_diag)
 
 
 /*
  * The rate control ioctl has to support multiple potential rate
  * control classes.  For now, instead of trying to support an
  * abstraction for this in the API, let's just use a TLV
  * representation for the payload and let userspace sort it out.
  */
 struct ath_rateioctl_tlv {
 	uint16_t	tlv_id;
 	uint16_t	tlv_len;	/* length excluding TLV header */
 };
 
 /*
  * This is purely the six byte MAC address.
  */
 #define	ATH_RATE_TLV_MACADDR		0xaab0
 
 /*
  * The rate control modules may decide to push a mapping table
  * of rix -> net80211 ratecode as part of the update.
  */
 #define	ATH_RATE_TLV_RATETABLE_NENTRIES	64
 struct ath_rateioctl_rt {
 	uint16_t	nentries;
 	uint16_t	pad[1];
 	uint8_t		ratecode[ATH_RATE_TLV_RATETABLE_NENTRIES];
 };
 #define	ATH_RATE_TLV_RATETABLE		0xaab1
 
 /*
  * This is the sample node statistics structure.
  * More in ath_rate/sample/sample.h.
  */
 #define	ATH_RATE_TLV_SAMPLENODE		0xaab2
 
 struct ath_rateioctl {
 	char	if_name[IFNAMSIZ];	/* if name */
 	union {
 		uint8_t		macaddr[IEEE80211_ADDR_LEN];
 		uint64_t	pad;
 	} is_u;
 	uint32_t		len;
 	caddr_t			buf;
 };
 #define	SIOCGATHNODERATESTATS	_IOWR('i', 149, struct ath_rateioctl)
 #define	SIOCGATHRATESTATS	_IOWR('i', 150, struct ath_rateioctl)
 
 /*
  * Radio capture format.
  */
 #define ATH_RX_RADIOTAP_PRESENT_BASE (		\
 	(1 << IEEE80211_RADIOTAP_TSFT)		| \
 	(1 << IEEE80211_RADIOTAP_FLAGS)		| \
 	(1 << IEEE80211_RADIOTAP_RATE)		| \
 	(1 << IEEE80211_RADIOTAP_ANTENNA)	| \
 	(1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL)	| \
 	(1 << IEEE80211_RADIOTAP_DBM_ANTNOISE)	| \
 	(1 << IEEE80211_RADIOTAP_XCHANNEL)	| \
 	0)
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 #define	ATH_RX_RADIOTAP_PRESENT \
 	(ATH_RX_RADIOTAP_PRESENT_BASE		| \
 	(1 << IEEE80211_RADIOTAP_VENDOREXT)	| \
 	(1 << IEEE80211_RADIOTAP_EXT)		| \
 	0)
 #else
 #define	ATH_RX_RADIOTAP_PRESENT	ATH_RX_RADIOTAP_PRESENT_BASE
 #endif	/* ATH_ENABLE_RADIOTAP_PRESENT */
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 /*
  * This is higher than the vendor bitmap used inside
  * the Atheros reference codebase.
  */
 
 /* Bit 8 */
 #define	ATH_RADIOTAP_VENDOR_HEADER	8
 
 /*
  * Using four chains makes all the fields in the
  * per-chain info header be 4-byte aligned.
  */
 #define	ATH_RADIOTAP_MAX_CHAINS		4
 
 /*
  * AR9380 and later chips are 3x3, which requires
  * 5 EVM DWORDs in HT40 mode.
  */
 #define	ATH_RADIOTAP_MAX_EVM		5
 
 /*
  * The vendor radiotap header data needs to be:
  *
  * + Aligned to a 4 byte address
  * + .. so all internal fields are 4 bytes aligned;
  * + .. and no 64 bit fields are allowed.
  *
  * So padding is required to ensure this is the case.
  *
  * Note that because of the lack of alignment with the
  * vendor header (6 bytes), the first field must be
  * two bytes so it can be accessed by alignment-strict
  * platform (eg MIPS.)
  */
 struct ath_radiotap_vendor_hdr {		/* 30 bytes */
 	uint8_t		vh_version;		/* 1 */
 	uint8_t		vh_rx_chainmask;	/* 1 */
 
 	/* At this point it should be 4 byte aligned */
 	uint32_t	evm[ATH_RADIOTAP_MAX_EVM];	/* 5 * 4 = 20 */
 
-	uint8_t		rssi_ctl[ATH_RADIOTAP_MAX_CHAINS];	/* 4 */
-	uint8_t		rssi_ext[ATH_RADIOTAP_MAX_CHAINS];	/* 4 */
+	uint8_t		rssi_ctl[ATH_RADIOTAP_MAX_CHAINS];	/* 4 * 4 = 16 */
+	uint8_t		rssi_ext[ATH_RADIOTAP_MAX_CHAINS];	/* 4 * 4 = 16 */
 
 	uint8_t		vh_phyerr_code;	/* Phy error code, or 0xff */
 	uint8_t		vh_rs_status;	/* RX status */
 	uint8_t		vh_rssi;	/* Raw RSSI */
 	uint8_t		vh_flags;	/* General flags */
 #define	ATH_VENDOR_PKT_RX	0x01
 #define	ATH_VENDOR_PKT_TX	0x02
 #define	ATH_VENDOR_PKT_RXPHYERR	0x04
 #define	ATH_VENDOR_PKT_ISAGGR	0x08
 #define	ATH_VENDOR_PKT_MOREAGGR	0x10
 
 	uint8_t		vh_rx_hwrate;	/* hardware RX ratecode */
 	uint8_t		vh_rs_flags;	/* RX HAL flags */
 	uint8_t		vh_pad[2];	/* pad to DWORD boundary */
 } __packed;
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 
 struct ath_rx_radiotap_header {
 	struct ieee80211_radiotap_header wr_ihdr;
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 	/* Vendor extension header bitmap */
 	uint32_t	wr_ext_bitmap;          /* 4 */
 
 	/*
 	 * This padding is needed because:
 	 * + the radiotap header is 8 bytes;
 	 * + the extension bitmap is 4 bytes;
 	 * + the tsf is 8 bytes, so it must start on an 8 byte
 	 *   boundary.
 	 */
 	uint32_t	wr_pad1;
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 
 	/* Normal radiotap fields */
 	u_int64_t	wr_tsf;
 	u_int8_t	wr_flags;
 	u_int8_t	wr_rate;
 	int8_t		wr_antsignal;
 	int8_t		wr_antnoise;
 	u_int8_t	wr_antenna;
 	u_int8_t	wr_pad[3];
 	u_int32_t	wr_chan_flags;
 	u_int16_t	wr_chan_freq;
 	u_int8_t	wr_chan_ieee;
 	int8_t		wr_chan_maxpow;
 
 #ifdef	ATH_ENABLE_RADIOTAP_VENDOR_EXT
 	/*
 	 * Vendor header section, as required by the
 	 * presence of the vendor extension bit and bitmap
 	 * entry.
 	 *
 	 * XXX This must be aligned to a 4 byte address?
 	 * XXX or 8 byte address?
 	 */
 	struct ieee80211_radiotap_vendor_header wr_vh;  /* 6 bytes */
 
 	/*
 	 * Because of the lack of alignment enforced by the above
 	 * header, this vendor section won't be aligned in any
 	 * useful way.  So, this will include a two-byte version
 	 * value which will force the structure to be 4-byte aligned.
 	 */
 	struct ath_radiotap_vendor_hdr wr_v;
 #endif	/* ATH_ENABLE_RADIOTAP_VENDOR_EXT */
 } __packed;
 
 #define ATH_TX_RADIOTAP_PRESENT (		\
 	(1 << IEEE80211_RADIOTAP_TSFT)		| \
 	(1 << IEEE80211_RADIOTAP_FLAGS)		| \
 	(1 << IEEE80211_RADIOTAP_RATE)		| \
 	(1 << IEEE80211_RADIOTAP_DBM_TX_POWER)	| \
 	(1 << IEEE80211_RADIOTAP_ANTENNA)	| \
 	(1 << IEEE80211_RADIOTAP_XCHANNEL)	| \
 	0)
 
 struct ath_tx_radiotap_header {
 	struct ieee80211_radiotap_header wt_ihdr;
 	u_int64_t	wt_tsf;
 	u_int8_t	wt_flags;
 	u_int8_t	wt_rate;
 	u_int8_t	wt_txpower;
 	u_int8_t	wt_antenna;
 	u_int32_t	wt_chan_flags;
 	u_int16_t	wt_chan_freq;
 	u_int8_t	wt_chan_ieee;
 	int8_t		wt_chan_maxpow;
 } __packed;
 
 /*
  * DFS ioctl commands
  */
 
 #define	DFS_SET_THRESH		2
 #define	DFS_GET_THRESH		3
 #define	DFS_RADARDETECTS	6
 
 /*
  * DFS ioctl parameter types
  */
 #define DFS_PARAM_FIRPWR	1
 #define DFS_PARAM_RRSSI		2
 #define DFS_PARAM_HEIGHT	3
 #define DFS_PARAM_PRSSI		4
 #define DFS_PARAM_INBAND	5
 #define DFS_PARAM_NOL		6	/* XXX not used in FreeBSD */
 #define DFS_PARAM_RELSTEP_EN	7
 #define DFS_PARAM_RELSTEP	8
 #define DFS_PARAM_RELPWR_EN	9
 #define DFS_PARAM_RELPWR	10
 #define DFS_PARAM_MAXLEN	11
 #define DFS_PARAM_USEFIR128	12
 #define DFS_PARAM_BLOCKRADAR	13
 #define DFS_PARAM_MAXRSSI_EN	14
 
 /* FreeBSD-specific start at 32 */
 #define	DFS_PARAM_ENABLE	32
 #define	DFS_PARAM_EN_EXTCH	33
 
 /*
  * Spectral ioctl parameter types
  */
 #define	SPECTRAL_PARAM_FFT_PERIOD	1
 #define	SPECTRAL_PARAM_SS_PERIOD	2
 #define	SPECTRAL_PARAM_SS_COUNT		3
 #define	SPECTRAL_PARAM_SS_SHORT_RPT	4
 #define	SPECTRAL_PARAM_ENABLED		5
 #define	SPECTRAL_PARAM_ACTIVE		6
 
 /*
  * Spectral control parameters
  */
 #define	SIOCGATHSPECTRAL	_IOWR('i', 151, struct ath_diag)
 
 #define	SPECTRAL_CONTROL_ENABLE		2
 #define	SPECTRAL_CONTROL_DISABLE	3
 #define	SPECTRAL_CONTROL_START		4
 #define	SPECTRAL_CONTROL_STOP		5
 #define	SPECTRAL_CONTROL_GET_PARAMS	6
 #define	SPECTRAL_CONTROL_SET_PARAMS	7
 #define	SPECTRAL_CONTROL_ENABLE_AT_RESET	8
 #define	SPECTRAL_CONTROL_DISABLE_AT_RESET	9
 
 #endif /* _DEV_ATH_ATHIOCTL_H */
Index: user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.c	(revision 303642)
@@ -1,538 +1,558 @@
-/* $NetBSD: cfe_api.c,v 1.5 2005/12/11 12:18:07 christos Exp $ */
-/* from: SiByte Id: cfe_api.c,v 1.16 2002/07/09 23:29:11 cgd Exp $ */
+/* from: Broadcom Id: cfe_api.c,v 1.18 2006/08/24 02:13:56 binh Exp $ */
 
 /*-
  * Copyright 2000, 2001, 2002
  * Broadcom Corporation. All rights reserved.
  *
  * This software is furnished under license and may be used and copied only
  * in accordance with the following terms and conditions.  Subject to these
  * conditions, you may download, copy, install, use, modify and distribute
  * modified or unmodified copies of this software in source and/or binary
  * form. No title or ownership is transferred hereby.
  *
  * 1) Any source code used, modified or distributed must reproduce and
  *    retain this copyright notice and list of conditions as they appear in
  *    the source file.
  *
  * 2) No right is granted to use any trade name, trademark, or logo of
  *    Broadcom Corporation.  The "Broadcom Corporation" name may not be
  *    used to endorse or promote products derived from this software
  *    without the prior written permission of Broadcom Corporation.
  *
  * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR IMPLIED
  *    WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF
  *    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
  *    NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM BE LIABLE
  *    FOR ANY DAMAGES WHATSOEVER, AND IN PARTICULAR, BROADCOM SHALL NOT BE
  *    LIABLE FOR DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  *    OR OTHERWISE), EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*  *********************************************************************
     *
     *  Broadcom Common Firmware Environment (CFE)
     *
     *  Device Function stubs			File: cfe_api.c
     *
     *  This module contains device function stubs (small routines to
     *  call the standard "iocb" interface entry point to CFE).
     *  There should be one routine here per iocb function call.
     *
     *  Authors:  Mitch Lichtenberg, Chris Demetriou
     *
     ********************************************************************* */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <dev/cfe/cfe_api.h>
 #include <dev/cfe/cfe_api_int.h>
 
 /* Cast from a native pointer to a cfe_xptr_t and back.  */
 #define XPTR_FROM_NATIVE(n)	((cfe_xptr_t) (intptr_t) (n))
 #define NATIVE_FROM_XPTR(x)	((void *) (intptr_t) (x))
 
 #ifdef CFE_API_IMPL_NAMESPACE
 #define cfe_iocb_dispatch(a)		__cfe_iocb_dispatch(a)
 #endif
 int cfe_iocb_dispatch(cfe_xiocb_t *xiocb);
 
 #if defined(CFE_API_common) || defined(CFE_API_ALL)
 /*
  * Declare the dispatch function with args of "intptr_t".
  * This makes sure whatever model we're compiling in
  * puts the pointers in a single register.  For example,
  * combining -mlong64 and -mips1 or -mips2 would lead to
  * trouble, since the handle and IOCB pointer will be
  * passed in two registers each, and CFE expects one.
  */
 
 static int (*cfe_dispfunc)(intptr_t handle, intptr_t xiocb) = 0;
 static cfe_xuint_t cfe_handle = 0;
 
 int
 cfe_init(cfe_xuint_t handle, cfe_xuint_t ept)
 {
     cfe_dispfunc = NATIVE_FROM_XPTR(ept);
     cfe_handle = handle;
     return 0;
 }
 
 int
 cfe_iocb_dispatch(cfe_xiocb_t *xiocb)
 {
     if (!cfe_dispfunc) return -1;
     return (*cfe_dispfunc)((intptr_t)cfe_handle, (intptr_t)xiocb);
 }
 #endif /* CFE_API_common || CFE_API_ALL */
 
 #if defined(CFE_API_close) || defined(CFE_API_ALL)
 int
 cfe_close(int handle)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_DEV_CLOSE;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = handle;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = 0;
 
     cfe_iocb_dispatch(&xiocb);
 
     return xiocb.xiocb_status;
 
 }
 #endif /* CFE_API_close || CFE_API_ALL */
 
 #if defined(CFE_API_cpu_start) || defined(CFE_API_ALL)
 int
 cfe_cpu_start(int cpu, void (*fn)(void), long sp, long gp, long a1)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_FW_CPUCTL;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags  = 0;
     xiocb.xiocb_psize = sizeof(xiocb_cpuctl_t);
     xiocb.plist.xiocb_cpuctl.cpu_number = cpu;
     xiocb.plist.xiocb_cpuctl.cpu_command = CFE_CPU_CMD_START;
     xiocb.plist.xiocb_cpuctl.gp_val = gp;
     xiocb.plist.xiocb_cpuctl.sp_val = sp;
     xiocb.plist.xiocb_cpuctl.a1_val = a1;
     xiocb.plist.xiocb_cpuctl.start_addr = (long)fn;
 
     cfe_iocb_dispatch(&xiocb);
 
     return xiocb.xiocb_status;
 }
 #endif /* CFE_API_cpu_start || CFE_API_ALL */
 
 #if defined(CFE_API_cpu_stop) || defined(CFE_API_ALL)
 int
 cfe_cpu_stop(int cpu)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_FW_CPUCTL;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags  = 0;
     xiocb.xiocb_psize = sizeof(xiocb_cpuctl_t);
     xiocb.plist.xiocb_cpuctl.cpu_number = cpu;
     xiocb.plist.xiocb_cpuctl.cpu_command = CFE_CPU_CMD_STOP;
 
     cfe_iocb_dispatch(&xiocb);
 
     return xiocb.xiocb_status;
 }
 #endif /* CFE_API_cpu_stop || CFE_API_ALL */
 
 #if defined(CFE_API_enumenv) || defined(CFE_API_ALL)
 int
 cfe_enumenv(int idx, char *name, int namelen, char *val, int vallen)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_ENV_ENUM;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_envbuf_t);
     xiocb.plist.xiocb_envbuf.enum_idx = idx;
     xiocb.plist.xiocb_envbuf.name_ptr = XPTR_FROM_NATIVE(name);
     xiocb.plist.xiocb_envbuf.name_length = namelen;
     xiocb.plist.xiocb_envbuf.val_ptr = XPTR_FROM_NATIVE(val);
     xiocb.plist.xiocb_envbuf.val_length = vallen;
 
     cfe_iocb_dispatch(&xiocb);
 
     return xiocb.xiocb_status;
 }
 #endif /* CFE_API_enumenv || CFE_API_ALL */
+
+#if defined(CFE_API_enumdev) || defined(CFE_API_ALL)
+int
+cfe_enumdev(int idx, char *name, int namelen)
+{
+    cfe_xiocb_t xiocb;
+
+    xiocb.xiocb_fcode = CFE_CMD_DEV_ENUM;
+    xiocb.xiocb_status = 0;
+    xiocb.xiocb_handle = 0;
+    xiocb.xiocb_flags = 0;
+    xiocb.xiocb_psize = sizeof(xiocb_envbuf_t);
+    xiocb.plist.xiocb_envbuf.enum_idx = idx;
+    xiocb.plist.xiocb_envbuf.name_ptr = XPTR_FROM_NATIVE(name);
+    xiocb.plist.xiocb_envbuf.name_length = namelen;
+
+    cfe_iocb_dispatch(&xiocb);
+
+    return xiocb.xiocb_status;
+}
+#endif /* CFE_API_enumdev || CFE_API_ALL */
 
 #if defined(CFE_API_enummem) || defined(CFE_API_ALL)
 int
 cfe_enummem(int idx, int flags, cfe_xuint_t *start, cfe_xuint_t *length,
 	    cfe_xuint_t *type)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_FW_MEMENUM;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = flags;
     xiocb.xiocb_psize = sizeof(xiocb_meminfo_t);
     xiocb.plist.xiocb_meminfo.mi_idx = idx;
 
     cfe_iocb_dispatch(&xiocb);
 
     if (xiocb.xiocb_status < 0)
 	return xiocb.xiocb_status;
 
     *start = xiocb.plist.xiocb_meminfo.mi_addr;
     *length = xiocb.plist.xiocb_meminfo.mi_size;
     *type = xiocb.plist.xiocb_meminfo.mi_type;
 
     return 0;
 }
 #endif /* CFE_API_enummem || CFE_API_ALL */
 
 #if defined(CFE_API_exit) || defined(CFE_API_ALL)
 int
 cfe_exit(int warm, int status)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_FW_RESTART;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = warm ? CFE_FLG_WARMSTART : 0;
     xiocb.xiocb_psize = sizeof(xiocb_exitstat_t);
     xiocb.plist.xiocb_exitstat.status = status;
 
     cfe_iocb_dispatch(&xiocb);
 
     return xiocb.xiocb_status;
 }
 #endif /* CFE_API_exit || CFE_API_ALL */
 
 #if defined(CFE_API_flushcache) || defined(CFE_API_ALL)
 int
 cfe_flushcache(int flg)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_FW_FLUSHCACHE;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = flg;
     xiocb.xiocb_psize = 0;
 
     cfe_iocb_dispatch(&xiocb);
 
     return xiocb.xiocb_status;
 }
 #endif /* CFE_API_flushcache || CFE_API_ALL */
 
 #if defined(CFE_API_getdevinfo) || defined(CFE_API_ALL)
 int
 cfe_getdevinfo(char *name)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_DEV_GETINFO;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_buffer_t);
     xiocb.plist.xiocb_buffer.buf_offset = 0;
     xiocb.plist.xiocb_buffer.buf_ptr = XPTR_FROM_NATIVE(name);
     xiocb.plist.xiocb_buffer.buf_length = cfe_strlen(name);
 
     cfe_iocb_dispatch(&xiocb);
 
     if (xiocb.xiocb_status < 0)
 	return xiocb.xiocb_status;
     return xiocb.plist.xiocb_buffer.buf_devflags;
 }
 #endif /* CFE_API_getdevinfo || CFE_API_ALL */
 
 #if defined(CFE_API_getenv) || defined(CFE_API_ALL)
 int
 cfe_getenv(char *name, char *dest, int destlen)
 {
     cfe_xiocb_t xiocb;
 
     *dest = 0;
 
     xiocb.xiocb_fcode = CFE_CMD_ENV_GET;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_envbuf_t);
     xiocb.plist.xiocb_envbuf.enum_idx = 0;
     xiocb.plist.xiocb_envbuf.name_ptr = XPTR_FROM_NATIVE(name);
     xiocb.plist.xiocb_envbuf.name_length = cfe_strlen(name);
     xiocb.plist.xiocb_envbuf.val_ptr = XPTR_FROM_NATIVE(dest);
     xiocb.plist.xiocb_envbuf.val_length = destlen;
 
     cfe_iocb_dispatch(&xiocb);
 
     return xiocb.xiocb_status;
 }
 #endif /* CFE_API_getenv || CFE_API_ALL */
 
 #if defined(CFE_API_getfwinfo) || defined(CFE_API_ALL)
 int
 cfe_getfwinfo(cfe_fwinfo_t *info)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_FW_GETINFO;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_fwinfo_t);
 
     cfe_iocb_dispatch(&xiocb);
 
     if (xiocb.xiocb_status < 0)
 	return xiocb.xiocb_status;
 
     info->fwi_version = xiocb.plist.xiocb_fwinfo.fwi_version;
     info->fwi_totalmem = xiocb.plist.xiocb_fwinfo.fwi_totalmem;
     info->fwi_flags = xiocb.plist.xiocb_fwinfo.fwi_flags;
     info->fwi_boardid = xiocb.plist.xiocb_fwinfo.fwi_boardid;
     info->fwi_bootarea_va = xiocb.plist.xiocb_fwinfo.fwi_bootarea_va;
     info->fwi_bootarea_pa = xiocb.plist.xiocb_fwinfo.fwi_bootarea_pa;
     info->fwi_bootarea_size = xiocb.plist.xiocb_fwinfo.fwi_bootarea_size;
 #if 0
     info->fwi_reserved1 = xiocb.plist.xiocb_fwinfo.fwi_reserved1;
     info->fwi_reserved2 = xiocb.plist.xiocb_fwinfo.fwi_reserved2;
     info->fwi_reserved3 = xiocb.plist.xiocb_fwinfo.fwi_reserved3;
 #endif
 
     return 0;
 }
 #endif /* CFE_API_getfwinfo || CFE_API_ALL */
 
 #if defined(CFE_API_getstdhandle) || defined(CFE_API_ALL)
 int
 cfe_getstdhandle(int flg)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_DEV_GETHANDLE;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = flg;
     xiocb.xiocb_psize = 0;
 
     cfe_iocb_dispatch(&xiocb);
 
     if (xiocb.xiocb_status < 0)
 	return xiocb.xiocb_status;
     return xiocb.xiocb_handle;
 
 }
 #endif /* CFE_API_getstdhandle || CFE_API_ALL */
 
 #if defined(CFE_API_getticks) || defined(CFE_API_ALL)
 int64_t
 #ifdef CFE_API_IMPL_NAMESPACE
 __cfe_getticks(void)
 #else
 cfe_getticks(void)
 #endif
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_FW_GETTIME;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_time_t);
     xiocb.plist.xiocb_time.ticks = 0;
 
     cfe_iocb_dispatch(&xiocb);
 
     return xiocb.plist.xiocb_time.ticks;
 
 }
 #endif /* CFE_API_getticks || CFE_API_ALL */
 
 #if defined(CFE_API_inpstat) || defined(CFE_API_ALL)
 int
 cfe_inpstat(int handle)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_DEV_INPSTAT;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = handle;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_inpstat_t);
     xiocb.plist.xiocb_inpstat.inp_status = 0;
 
     cfe_iocb_dispatch(&xiocb);
 
     if (xiocb.xiocb_status < 0)
 	return xiocb.xiocb_status;
     return xiocb.plist.xiocb_inpstat.inp_status;
 
 }
 #endif /* CFE_API_inpstat || CFE_API_ALL */
 
 #if defined(CFE_API_ioctl) || defined(CFE_API_ALL)
 int
 cfe_ioctl(int handle, unsigned int ioctlnum, unsigned char *buffer, int length,
 	  int *retlen, cfe_xuint_t offset)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_DEV_IOCTL;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = handle;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_buffer_t);
     xiocb.plist.xiocb_buffer.buf_offset = offset;
     xiocb.plist.xiocb_buffer.buf_ioctlcmd = ioctlnum;
     xiocb.plist.xiocb_buffer.buf_ptr = XPTR_FROM_NATIVE(buffer);
     xiocb.plist.xiocb_buffer.buf_length = length;
 
     cfe_iocb_dispatch(&xiocb);
 
     if (retlen)
 	*retlen = xiocb.plist.xiocb_buffer.buf_retlen;
     return xiocb.xiocb_status;
 }
 #endif /* CFE_API_ioctl || CFE_API_ALL */
 
 #if defined(CFE_API_open) || defined(CFE_API_ALL)
 int
 cfe_open(char *name)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_DEV_OPEN;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_buffer_t);
     xiocb.plist.xiocb_buffer.buf_offset = 0;
     xiocb.plist.xiocb_buffer.buf_ptr = XPTR_FROM_NATIVE(name);
     xiocb.plist.xiocb_buffer.buf_length = cfe_strlen(name);
 
     cfe_iocb_dispatch(&xiocb);
 
     if (xiocb.xiocb_status < 0)
 	return xiocb.xiocb_status;
     return xiocb.xiocb_handle;
 }
 #endif /* CFE_API_open || CFE_API_ALL */
 
 #if defined(CFE_API_read) || defined(CFE_API_ALL)
 int
 cfe_read(int handle, unsigned char *buffer, int length)
 {
     return cfe_readblk(handle, 0, buffer, length);
 }
 #endif /* CFE_API_read || CFE_API_ALL */
 
 #if defined(CFE_API_readblk) || defined(CFE_API_ALL)
 int
 cfe_readblk(int handle, cfe_xint_t offset, unsigned char *buffer, int length)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_DEV_READ;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = handle;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_buffer_t);
     xiocb.plist.xiocb_buffer.buf_offset = offset;
     xiocb.plist.xiocb_buffer.buf_ptr = XPTR_FROM_NATIVE(buffer);
     xiocb.plist.xiocb_buffer.buf_length = length;
 
     cfe_iocb_dispatch(&xiocb);
 
     if (xiocb.xiocb_status < 0)
 	return xiocb.xiocb_status;
     return xiocb.plist.xiocb_buffer.buf_retlen;
 }
 #endif /* CFE_API_readblk || CFE_API_ALL */
 
 #if defined(CFE_API_setenv) || defined(CFE_API_ALL)
 int
 cfe_setenv(char *name, char *val)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_ENV_SET;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = 0;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_envbuf_t);
     xiocb.plist.xiocb_envbuf.enum_idx = 0;
     xiocb.plist.xiocb_envbuf.name_ptr = XPTR_FROM_NATIVE(name);
     xiocb.plist.xiocb_envbuf.name_length = cfe_strlen(name);
     xiocb.plist.xiocb_envbuf.val_ptr = XPTR_FROM_NATIVE(val);
     xiocb.plist.xiocb_envbuf.val_length = cfe_strlen(val);
 
     cfe_iocb_dispatch(&xiocb);
 
     return xiocb.xiocb_status;
 }
 #endif /* CFE_API_setenv || CFE_API_ALL */
 
 #if (defined(CFE_API_strlen) || defined(CFE_API_ALL)) \
     && !defined(CFE_API_STRLEN_CUSTOM)
 int
 cfe_strlen(char *name)
 {
     int count = 0;
 
     while (*name++)
 	count++;
 
     return count;
 }
 #endif /* CFE_API_strlen || CFE_API_ALL */
 
 #if defined(CFE_API_write) || defined(CFE_API_ALL)
 int
 cfe_write(int handle, unsigned char *buffer, int length)
 {
     return cfe_writeblk(handle, 0, buffer, length);
 }
 #endif /* CFE_API_write || CFE_API_ALL */
 
 #if defined(CFE_API_writeblk) || defined(CFE_API_ALL)
 int
 cfe_writeblk(int handle, cfe_xint_t offset, unsigned char *buffer, int length)
 {
     cfe_xiocb_t xiocb;
 
     xiocb.xiocb_fcode = CFE_CMD_DEV_WRITE;
     xiocb.xiocb_status = 0;
     xiocb.xiocb_handle = handle;
     xiocb.xiocb_flags = 0;
     xiocb.xiocb_psize = sizeof(xiocb_buffer_t);
     xiocb.plist.xiocb_buffer.buf_offset = offset;
     xiocb.plist.xiocb_buffer.buf_ptr = XPTR_FROM_NATIVE(buffer);
     xiocb.plist.xiocb_buffer.buf_length = length;
 
     cfe_iocb_dispatch(&xiocb);
 
     if (xiocb.xiocb_status < 0)
 	return xiocb.xiocb_status;
     return xiocb.plist.xiocb_buffer.buf_retlen;
 }
 #endif /* CFE_API_writeblk || CFE_API_ALL */
Index: user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api.h	(revision 303642)
@@ -1,199 +1,200 @@
-/* $NetBSD: cfe_api.h,v 1.3 2003/02/07 17:38:48 cgd Exp $ */
-/* from: SiByte Id: cfe_api.h,v 1.29 2002/07/09 23:29:11 cgd Exp $ */
+/* from: Broadcom Id: cfe_api.h,v 1.31 2006/08/24 02:13:56 binh Exp $ */
 
 /*-
  * Copyright 2000, 2001, 2002
  * Broadcom Corporation. All rights reserved.
  *
  * This software is furnished under license and may be used and copied only
  * in accordance with the following terms and conditions.  Subject to these
  * conditions, you may download, copy, install, use, modify and distribute
  * modified or unmodified copies of this software in source and/or binary
  * form. No title or ownership is transferred hereby.
  *
  * 1) Any source code used, modified or distributed must reproduce and
  *    retain this copyright notice and list of conditions as they appear in
  *    the source file.
  *
  * 2) No right is granted to use any trade name, trademark, or logo of
  *    Broadcom Corporation.  The "Broadcom Corporation" name may not be
  *    used to endorse or promote products derived from this software
  *    without the prior written permission of Broadcom Corporation.
  *
  * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR IMPLIED
  *    WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF
  *    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
  *    NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM BE LIABLE
  *    FOR ANY DAMAGES WHATSOEVER, AND IN PARTICULAR, BROADCOM SHALL NOT BE
  *    LIABLE FOR DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  *    OR OTHERWISE), EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*  *********************************************************************
     *
     *  Broadcom Common Firmware Environment (CFE)
     *
     *  Device function prototypes		File: cfe_api.h
     *
     *  This file contains declarations for doing callbacks to
     *  cfe from an application.  It should be the only header
     *  needed by the application to use this library
     *
     *  Authors:  Mitch Lichtenberg, Chris Demetriou
     *
     ********************************************************************* */
 
 #ifndef CFE_API_H
 #define CFE_API_H
 
 /*
  * Apply customizations here for different OSes.  These need to:
  *	* typedef uint64_t, int64_t, intptr_t, uintptr_t.
  *	* define cfe_strlen() if use of an existing function is desired.
  *	* define CFE_API_IMPL_NAMESPACE if API functions are to use
  *	  names in the implementation namespace.
  * Also, optionally, if the build environment does not do so automatically,
  * CFE_API_* can be defined here as desired.
  */
 /* Begin customization. */
 #include <sys/stdint.h>		/* All of the typedefs.  */
 #include <sys/systm.h>		/* strlen() prototype.  */
 
 #define	CFE_API_ALL
 #define	cfe_strlen(x)	strlen(x)
 /* End customization. */
 
 
 /*  *********************************************************************
     *  Constants
     ********************************************************************* */
 
 /* Seal indicating CFE's presence, passed to user program. */
 #define CFE_EPTSEAL 0x43464531
 
 #define CFE_MI_RESERVED	0		/* memory is reserved, do not use */
 #define CFE_MI_AVAILABLE 1		/* memory is available */
 
 #define CFE_FLG_WARMSTART     0x00000001
 #define CFE_FLG_FULL_ARENA    0x00000001
 #define CFE_FLG_ENV_PERMANENT 0x00000001
 
 #define CFE_CPU_CMD_START 1
 #define CFE_CPU_CMD_STOP 0
 
 #define CFE_STDHANDLE_CONSOLE	0
 
 #define CFE_DEV_NETWORK 	1
 #define CFE_DEV_DISK		2
 #define CFE_DEV_FLASH		3
 #define CFE_DEV_SERIAL		4
 #define CFE_DEV_CPU		5
 #define CFE_DEV_NVRAM		6
 #define CFE_DEV_CLOCK           7
 #define CFE_DEV_OTHER		8
 #define CFE_DEV_MASK		0x0F
 
 #define CFE_CACHE_FLUSH_D	1
 #define CFE_CACHE_INVAL_I	2
 #define CFE_CACHE_INVAL_D	4
 #define CFE_CACHE_INVAL_L2	8
 
 #define CFE_FWI_64BIT		0x00000001
 #define CFE_FWI_32BIT		0x00000002
 #define CFE_FWI_RELOC		0x00000004
 #define CFE_FWI_UNCACHED	0x00000008
 #define CFE_FWI_MULTICPU	0x00000010
 #define CFE_FWI_FUNCSIM		0x00000020
 #define CFE_FWI_RTLSIM		0x00000040
 
 typedef struct {
     int64_t fwi_version;		/* major, minor, eco version */
     int64_t fwi_totalmem;		/* total installed mem */
     int64_t fwi_flags;		        /* various flags */
     int64_t fwi_boardid;		/* board ID */
     int64_t fwi_bootarea_va;		/* VA of boot area */
     int64_t fwi_bootarea_pa;		/* PA of boot area */
     int64_t fwi_bootarea_size;	        /* size of boot area */
 } cfe_fwinfo_t;
 
 
 /*
  * cfe_strlen is handled specially: If already defined, it has been
  * overridden in this environment with a standard strlen-like function.
  */
 #ifdef cfe_strlen
 # define CFE_API_STRLEN_CUSTOM
 #else
 # ifdef CFE_API_IMPL_NAMESPACE
 #  define cfe_strlen(a)			__cfe_strlen(a)
 # endif
 int cfe_strlen(char *name);
 #endif
 
 /*
  * Defines and prototypes for functions which take no arguments.
  */
 #ifdef CFE_API_IMPL_NAMESPACE
 int64_t __cfe_getticks(void);
 #define cfe_getticks()			__cfe_getticks()
 #else
 int64_t cfe_getticks(void);
 #endif
 
 /*
  * Defines and prototypes for the rest of the functions.
  */
 #ifdef CFE_API_IMPL_NAMESPACE
 #define cfe_close(a)			__cfe_close(a)
 #define cfe_cpu_start(a,b,c,d,e)	__cfe_cpu_start(a,b,c,d,e)
 #define cfe_cpu_stop(a)			__cfe_cpu_stop(a)
 #define cfe_enumenv(a,b,d,e,f)		__cfe_enumenv(a,b,d,e,f)
+#define cfe_enumdev(a,b,c)		__cfe_enumdev(a,b,c)
 #define cfe_enummem(a,b,c,d,e)		__cfe_enummem(a,b,c,d,e)
 #define cfe_exit(a,b)			__cfe_exit(a,b)
 #define cfe_flushcache(a)		__cfe_cacheflush(a)
 #define cfe_getdevinfo(a)		__cfe_getdevinfo(a)
 #define cfe_getenv(a,b,c)		__cfe_getenv(a,b,c)
 #define cfe_getfwinfo(a)		__cfe_getfwinfo(a)
 #define cfe_getstdhandle(a)		__cfe_getstdhandle(a)
 #define cfe_init(a,b)			__cfe_init(a,b)
 #define cfe_inpstat(a)			__cfe_inpstat(a)
 #define cfe_ioctl(a,b,c,d,e,f)		__cfe_ioctl(a,b,c,d,e,f)
 #define cfe_open(a)			__cfe_open(a)
 #define cfe_read(a,b,c)			__cfe_read(a,b,c)
 #define cfe_readblk(a,b,c,d)		__cfe_readblk(a,b,c,d)
 #define cfe_setenv(a,b)			__cfe_setenv(a,b)
 #define cfe_write(a,b,c)		__cfe_write(a,b,c)
 #define cfe_writeblk(a,b,c,d)		__cfe_writeblk(a,b,c,d)
 #endif /* CFE_API_IMPL_NAMESPACE */
 
 int cfe_close(int handle);
 int cfe_cpu_start(int cpu, void (*fn)(void), long sp, long gp, long a1);
 int cfe_cpu_stop(int cpu);
 int cfe_enumenv(int idx, char *name, int namelen, char *val, int vallen);
+int cfe_enumdev(int idx, char *name, int namelen);
 int cfe_enummem(int idx, int flags, uint64_t *start, uint64_t *length,
 		uint64_t *type);
 int cfe_exit(int warm,int status);
 int cfe_flushcache(int flg);
 int cfe_getdevinfo(char *name);
 int cfe_getenv(char *name, char *dest, int destlen);
 int cfe_getfwinfo(cfe_fwinfo_t *info);
 int cfe_getstdhandle(int flg);
 int cfe_init(uint64_t handle,uint64_t ept);
 int cfe_inpstat(int handle);
 int cfe_ioctl(int handle, unsigned int ioctlnum, unsigned char *buffer,
 	      int length, int *retlen, uint64_t offset);
 int cfe_open(char *name);
 int cfe_read(int handle, unsigned char *buffer, int length);
 int cfe_readblk(int handle, int64_t offset, unsigned char *buffer, int length);
 int cfe_setenv(char *name, char *val);
 int cfe_write(int handle, unsigned char *buffer, int length);
 int cfe_writeblk(int handle, int64_t offset, unsigned char *buffer,
 		 int length);
 
 #endif /* CFE_API_H */
Index: user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api_int.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api_int.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_api_int.h	(revision 303642)
@@ -1,171 +1,170 @@
-/* $NetBSD: cfe_api_int.h,v 1.2 2003/02/07 17:38:48 cgd Exp $ */
-/* from: SiByte Id: cfe_api_int.h,v 1.21 2002/07/09 23:29:11 cgd Exp $ */
+/* from: Broadcom Id: cfe_api_int.h,v 1.22 2003/02/07 17:27:56 cgd Exp $ */
 
 /*-
  * Copyright 2000, 2001, 2002
  * Broadcom Corporation. All rights reserved.
  *
  * This software is furnished under license and may be used and copied only
  * in accordance with the following terms and conditions.  Subject to these
  * conditions, you may download, copy, install, use, modify and distribute
  * modified or unmodified copies of this software in source and/or binary
  * form. No title or ownership is transferred hereby.
  *
  * 1) Any source code used, modified or distributed must reproduce and
  *    retain this copyright notice and list of conditions as they appear in
  *    the source file.
  *
  * 2) No right is granted to use any trade name, trademark, or logo of
  *    Broadcom Corporation.  The "Broadcom Corporation" name may not be
  *    used to endorse or promote products derived from this software
  *    without the prior written permission of Broadcom Corporation.
  *
  * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR IMPLIED
  *    WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF
  *    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
  *    NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM BE LIABLE
  *    FOR ANY DAMAGES WHATSOEVER, AND IN PARTICULAR, BROADCOM SHALL NOT BE
  *    LIABLE FOR DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  *    OR OTHERWISE), EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*  *********************************************************************
     *
     *  Broadcom Common Firmware Environment (CFE)
     *
     *  Device function prototypes		File: cfe_api_int.h
     *
     *  This header defines all internal types and macros for the
     *  library.  This is stuff that's not exported to an app
     *  using the library.
     *
     *  Authors:  Mitch Lichtenberg, Chris Demetriou
     *
     ********************************************************************* */
 
 #ifndef CFE_API_INT_H
 #define CFE_API_INT_H
 
 /*  *********************************************************************
     *  Constants
     ********************************************************************* */
 
 #define CFE_CMD_FW_GETINFO	0
 #define CFE_CMD_FW_RESTART	1
 #define CFE_CMD_FW_BOOT		2
 #define CFE_CMD_FW_CPUCTL	3
 #define CFE_CMD_FW_GETTIME      4
 #define CFE_CMD_FW_MEMENUM	5
 #define CFE_CMD_FW_FLUSHCACHE	6
 
 #define CFE_CMD_DEV_GETHANDLE	9
 #define CFE_CMD_DEV_ENUM	10
 #define CFE_CMD_DEV_OPEN	11
 #define CFE_CMD_DEV_INPSTAT	12
 #define CFE_CMD_DEV_READ	13
 #define CFE_CMD_DEV_WRITE	14
 #define CFE_CMD_DEV_IOCTL	15
 #define CFE_CMD_DEV_CLOSE	16
 #define CFE_CMD_DEV_GETINFO	17
 
 #define CFE_CMD_ENV_ENUM	20
 #define CFE_CMD_ENV_GET		22
 #define CFE_CMD_ENV_SET		23
 #define CFE_CMD_ENV_DEL		24
 
 #define CFE_CMD_MAX		32
 
 #define CFE_CMD_VENDOR_USE	0x8000	/* codes above this are for customer use */
 
 /*  *********************************************************************
     *  Structures
     ********************************************************************* */
 
 typedef uint64_t cfe_xuint_t;
 typedef int64_t cfe_xint_t;
 typedef int64_t cfe_xptr_t;
 
 typedef struct xiocb_buffer_s {
     cfe_xuint_t   buf_offset;		/* offset on device (bytes) */
     cfe_xptr_t 	  buf_ptr;		/* pointer to a buffer */
     cfe_xuint_t   buf_length;		/* length of this buffer */
     cfe_xuint_t   buf_retlen;		/* returned length (for read ops) */
     cfe_xuint_t   buf_ioctlcmd;		/* IOCTL command (used only for IOCTLs) */
 } xiocb_buffer_t;
 
 #define buf_devflags buf_ioctlcmd	/* returned device info flags */
 
 typedef struct xiocb_inpstat_s {
     cfe_xuint_t inp_status;		/* 1 means input available */
 } xiocb_inpstat_t;
 
 typedef struct xiocb_envbuf_s {
     cfe_xint_t enum_idx;		/* 0-based enumeration index */
     cfe_xptr_t name_ptr;		/* name string buffer */
     cfe_xint_t name_length;		/* size of name buffer */
     cfe_xptr_t val_ptr;			/* value string buffer */
     cfe_xint_t val_length;		/* size of value string buffer */
 } xiocb_envbuf_t;
 
 typedef struct xiocb_cpuctl_s {
     cfe_xuint_t  cpu_number;		/* cpu number to control */
     cfe_xuint_t  cpu_command;		/* command to issue to CPU */
     cfe_xuint_t  start_addr;		/* CPU start address */
     cfe_xuint_t  gp_val;		/* starting GP value */
     cfe_xuint_t  sp_val;		/* starting SP value */
     cfe_xuint_t  a1_val;		/* starting A1 value */
 } xiocb_cpuctl_t;
 
 typedef struct xiocb_time_s {
     cfe_xint_t ticks;			/* current time in ticks */
 } xiocb_time_t;
 
 typedef struct xiocb_exitstat_s {
     cfe_xint_t status;
 } xiocb_exitstat_t;
 
 typedef struct xiocb_meminfo_s {
     cfe_xint_t  mi_idx;			/* 0-based enumeration index */
     cfe_xint_t  mi_type;		/* type of memory block */
     cfe_xuint_t mi_addr;		/* physical start address */
     cfe_xuint_t mi_size;		/* block size */
 } xiocb_meminfo_t;
 
 typedef struct xiocb_fwinfo_s {
     cfe_xint_t fwi_version;		/* major, minor, eco version */
     cfe_xint_t fwi_totalmem;		/* total installed mem */
     cfe_xint_t fwi_flags;		/* various flags */
     cfe_xint_t fwi_boardid;		/* board ID */
     cfe_xint_t fwi_bootarea_va;		/* VA of boot area */
     cfe_xint_t fwi_bootarea_pa;		/* PA of boot area */
     cfe_xint_t fwi_bootarea_size;	/* size of boot area */
     cfe_xint_t fwi_reserved1;
     cfe_xint_t fwi_reserved2;
     cfe_xint_t fwi_reserved3;
 } xiocb_fwinfo_t;
 
 typedef struct cfe_xiocb_s {
     cfe_xuint_t xiocb_fcode;		/* IOCB function code */
     cfe_xint_t  xiocb_status;		/* return status */
     cfe_xint_t  xiocb_handle;		/* file/device handle */
     cfe_xuint_t xiocb_flags;		/* flags for this IOCB */
     cfe_xuint_t xiocb_psize;		/* size of parameter list */
     union {
 	xiocb_buffer_t  xiocb_buffer;	/* buffer parameters */
 	xiocb_inpstat_t xiocb_inpstat;	/* input status parameters */
 	xiocb_envbuf_t  xiocb_envbuf;	/* environment function parameters */
 	xiocb_cpuctl_t  xiocb_cpuctl;	/* CPU control parameters */
 	xiocb_time_t    xiocb_time;	/* timer parameters */
 	xiocb_meminfo_t xiocb_meminfo;	/* memory arena info parameters */
 	xiocb_fwinfo_t  xiocb_fwinfo;	/* firmware information */
 	xiocb_exitstat_t xiocb_exitstat; /* Exit Status */
     } plist;
 } cfe_xiocb_t;
 
 #endif /* CFE_API_INT_H */
Index: user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_error.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_error.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_error.h	(revision 303642)
@@ -1,104 +1,103 @@
-/* $NetBSD: cfe_error.h,v 1.2 2003/02/07 17:38:48 cgd Exp $ */
-/* from: SiByte Id: cfe_error.h,v 1.2 2002/07/09 19:37:52 cgd Exp $ */
+/* from: Broadcom Id: cfe_error.h,v 1.3 2003/02/07 17:27:56 cgd Exp $ */
 
 /*-
  * Copyright 2000, 2001, 2002
  * Broadcom Corporation. All rights reserved.
  *
  * This software is furnished under license and may be used and copied only
  * in accordance with the following terms and conditions.  Subject to these
  * conditions, you may download, copy, install, use, modify and distribute
  * modified or unmodified copies of this software in source and/or binary
  * form. No title or ownership is transferred hereby.
  *
  * 1) Any source code used, modified or distributed must reproduce and
  *    retain this copyright notice and list of conditions as they appear in
  *    the source file.
  *
  * 2) No right is granted to use any trade name, trademark, or logo of
  *    Broadcom Corporation.  The "Broadcom Corporation" name may not be
  *    used to endorse or promote products derived from this software
  *    without the prior written permission of Broadcom Corporation.
  *
  * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR IMPLIED
  *    WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF
  *    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
  *    NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM BE LIABLE
  *    FOR ANY DAMAGES WHATSOEVER, AND IN PARTICULAR, BROADCOM SHALL NOT BE
  *    LIABLE FOR DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  *    OR OTHERWISE), EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*  *********************************************************************
     *  
     *  Broadcom Common Firmware Environment (CFE)
     *  
     *  Error codes				File: cfe_error.h
     *  
     *  CFE's global error code list is here.
     *  
     *  Author:  Mitch Lichtenberg
     *  
     ********************************************************************* */
 
 
 #define CFE_OK			 0
 #define CFE_ERR                 -1	/* generic error */
 #define CFE_ERR_INV_COMMAND	-2
 #define CFE_ERR_EOF		-3
 #define CFE_ERR_IOERR		-4
 #define CFE_ERR_NOMEM		-5
 #define CFE_ERR_DEVNOTFOUND	-6
 #define CFE_ERR_DEVOPEN		-7
 #define CFE_ERR_INV_PARAM	-8
 #define CFE_ERR_ENVNOTFOUND	-9
 #define CFE_ERR_ENVREADONLY	-10
 
 #define CFE_ERR_NOTELF		-11
 #define CFE_ERR_NOT32BIT 	-12
 #define CFE_ERR_WRONGENDIAN 	-13
 #define CFE_ERR_BADELFVERS 	-14
 #define CFE_ERR_NOTMIPS 	-15
 #define CFE_ERR_BADELFFMT 	-16
 #define CFE_ERR_BADADDR 	-17
 
 #define CFE_ERR_FILENOTFOUND	-18
 #define CFE_ERR_UNSUPPORTED	-19
 
 #define CFE_ERR_HOSTUNKNOWN	-20
 
 #define CFE_ERR_TIMEOUT		-21
 
 #define CFE_ERR_PROTOCOLERR	-22
 
 #define CFE_ERR_NETDOWN		-23
 #define CFE_ERR_NONAMESERVER	-24
 
 #define CFE_ERR_NOHANDLES	-25
 #define CFE_ERR_ALREADYBOUND	-26
 
 #define CFE_ERR_CANNOTSET	-27
 #define CFE_ERR_NOMORE		-28
 #define CFE_ERR_BADFILESYS	-29
 #define CFE_ERR_FSNOTAVAIL	-30
 
 #define CFE_ERR_INVBOOTBLOCK	-31
 #define CFE_ERR_WRONGDEVTYPE	-32
 #define CFE_ERR_BBCHECKSUM	-33
 #define CFE_ERR_BOOTPROGCHKSUM	-34
 
 #define CFE_ERR_LDRNOTAVAIL	-35
 
 #define CFE_ERR_NOTREADY	-36
 
 #define CFE_ERR_GETMEM          -37
 #define CFE_ERR_SETMEM          -38
 
 #define CFE_ERR_NOTCONN		-39
 #define CFE_ERR_ADDRINUSE	-40
Index: user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_ioctl.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_ioctl.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/cfe/cfe_ioctl.h	(revision 303642)
@@ -1,75 +1,166 @@
-/* $NetBSD: cfe_ioctl.h,v 1.2 2003/02/07 17:52:08 cgd Exp $ */
-
 /*-
- * Copyright 2000, 2001
+ * Copyright 2000, 2001, 2002, 2003
  * Broadcom Corporation. All rights reserved.
  *
- * This software is furnished under license and may be used and copied only
- * in accordance with the following terms and conditions.  Subject to these
- * conditions, you may download, copy, install, use, modify and distribute
- * modified or unmodified copies of this software in source and/or binary
- * form. No title or ownership is transferred hereby.
+ * This software is furnished under license and may be used and 
+ * copied only in accordance with the following terms and 
+ * conditions.  Subject to these conditions, you may download, 
+ * copy, install, use, modify and distribute modified or unmodified 
+ * copies of this software in source and/or binary form.  No title 
+ * or ownership is transferred hereby.
  *
  * 1) Any source code used, modified or distributed must reproduce and
  *    retain this copyright notice and list of conditions as they appear in
  *    the source file.
  *
  * 2) No right is granted to use any trade name, trademark, or logo of
  *    Broadcom Corporation.  The "Broadcom Corporation" name may not be
  *    used to endorse or promote products derived from this software
  *    without the prior written permission of Broadcom Corporation.
  *
  * 3) THIS SOFTWARE IS PROVIDED "AS-IS" AND ANY EXPRESS OR IMPLIED
  *    WARRANTIES, INCLUDING BUT NOT LIMITED TO, ANY IMPLIED WARRANTIES OF
  *    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
  *    NON-INFRINGEMENT ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM BE LIABLE
  *    FOR ANY DAMAGES WHATSOEVER, AND IN PARTICULAR, BROADCOM SHALL NOT BE
  *    LIABLE FOR DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  *    OR OTHERWISE), EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*  *********************************************************************
     *  Broadcom Common Firmware Environment (CFE)
     *  
     *  IOCTL definitions			File: cfe_ioctl.h
     *  
     *  IOCTL function numbers and I/O data structures.
     *  
-    *  Author:  Mitch Lichtenberg (mpl@broadcom.com)
+    *  Author:  Mitch Lichtenberg
     *  
     ********************************************************************* */
 
 
 /*  *********************************************************************
     *  NVFAM and FLASH stuff
     ********************************************************************* */
 
 #define IOCTL_NVRAM_GETINFO	1	/* return nvram_info_t */
 #define IOCTL_NVRAM_ERASE	2	/* erase sector containing nvram_info_t area */
 #define IOCTL_FLASH_ERASE_SECTOR 3	/* erase an arbitrary sector */
-#define IOCTL_FLASH_ERASE_ALL 4		/* Erase the entire flash */
+#define IOCTL_FLASH_ERASE_ALL   4	/* Erase the entire flash */
+#define IOCTL_FLASH_WRITE_ALL	5	/* write entire flash */
+#define IOCTL_FLASH_GETINFO	6	/* get flash device info */
+#define IOCTL_FLASH_GETSECTORS	7	/* get sector information */
+#define IOCTL_FLASH_ERASE_RANGE 8	/* erase range of bytes */
+#define IOCTL_NVRAM_UNLOCK	9	/* allow r/w beyond logical end of device */
+#define IOCTL_FLASH_PROTECT_RANGE 10	/* Protect a group of sectors */
+#define IOCTL_FLASH_UNPROTECT_RANGE 11	/* unprotect a group of sectors */
+#define IOCTL_FLASH_DATA_WIDTH_MODE	12 	/* switch flash and gen bus to support 8 or 16-bit mode I/Os */
+#define IOCTL_FLASH_BURST_MODE	13	/* configure gen bus for burst mode */
 
+typedef struct flash_range_s {
+    unsigned int range_base;
+    unsigned int range_length;
+} flash_range_t;
+
+typedef struct flash_info_s {
+    unsigned long long flash_base;	/* flash physical base address */
+    unsigned int flash_size;		/* available device size in bytes */
+    unsigned int flash_type;		/* type, from FLASH_TYPE below */
+    unsigned int flash_flags;		/* Various flags (FLASH_FLAG_xxx) */
+} flash_info_t;
+
+typedef struct flash_sector_s {
+    int flash_sector_idx;
+    int flash_sector_status;
+    unsigned int flash_sector_offset;
+    unsigned int flash_sector_size;
+} flash_sector_t;
+
+#define FLASH_SECTOR_OK		0
+#define FLASH_SECTOR_INVALID	-1
+
+#define FLASH_TYPE_UNKNOWN	0	/* not sure what kind of flash */
+#define FLASH_TYPE_SRAM		1	/* not flash: it's SRAM */
+#define FLASH_TYPE_ROM		2	/* not flash: it's ROM */
+#define FLASH_TYPE_FLASH	3	/* it's flash memory of some sort */
+
+#define FLASH_FLAG_NOERASE	1	/* Byte-range writes supported,
+					   Erasing is not necessary */
+
 typedef struct nvram_info_s {
-    int nvram_offset;		/* offset of environment area */
-    int nvram_size;		/* size of environment area */
-    int nvram_eraseflg;		/* true if we need to erase first */
+    int nvram_offset;			/* offset of environment area */
+    int nvram_size;			/* size of environment area */
+    int nvram_eraseflg;			/* true if we need to erase first */
 } nvram_info_t;
 
 /*  *********************************************************************
     *  Ethernet stuff
     ********************************************************************* */
 
-#define IOCTL_ETHER_GETHWADDR	1
+#define IOCTL_ETHER_GETHWADDR	1	/* Get hardware address (6bytes) */
+#define IOCTL_ETHER_SETHWADDR   2	/* Set hardware address (6bytes) */
+#define IOCTL_ETHER_GETSPEED    3	/* Get Speed and Media (int) */
+#define IOCTL_ETHER_SETSPEED    4	/* Set Speed and Media (int) */
+#define IOCTL_ETHER_GETLINK	5	/* get link status (int) */
+#define IOCTL_ETHER_GETLOOPBACK	7	/* get loopback state */
+#define IOCTL_ETHER_SETLOOPBACK	8	/* set loopback state */
+#define IOCTL_ETHER_SETPACKETFIFO 9	/* set packet fifo mode (int) */
+#define IOCTL_ETHER_SETSTROBESIG 10	/* set strobe signal (int) */
 
+#define ETHER_LOOPBACK_OFF	0	/* no loopback */
+#define ETHER_LOOPBACK_INT	1	/* Internal loopback */
+#define ETHER_LOOPBACK_EXT	2	/* External loopback (through PHY) */
+
+#define ETHER_SPEED_AUTO	0	/* Auto detect */
+#define ETHER_SPEED_UNKNOWN	0	/* Speed not known (on link status) */
+#define ETHER_SPEED_10HDX	1	/* 10MB hdx and fdx */
+#define ETHER_SPEED_10FDX	2
+#define ETHER_SPEED_100HDX	3	/* 100MB hdx and fdx */
+#define ETHER_SPEED_100FDX	4
+#define ETHER_SPEED_1000HDX	5	/* 1000MB hdx and fdx */
+#define ETHER_SPEED_1000FDX	6
+
+#define ETHER_FIFO_8		0	/* 8-bit packet fifo mode */
+#define ETHER_FIFO_16		1	/* 16-bit packet fifo mode */
+#define ETHER_ETHER		2	/* Standard ethernet mode */
+
+#define ETHER_STROBE_GMII	0	/* GMII style strobe signal */
+#define ETHER_STROBE_ENCODED	1	/* Encoded */
+#define ETHER_STROBE_SOP	2	/* SOP flagged. Only in 8-bit mode*/
+#define ETHER_STROBE_EOP	3	/* EOP flagged. Only in 8-bit mode*/
+
 /*  *********************************************************************
+    *  Serial Ports
+    ********************************************************************* */
+
+#define IOCTL_SERIAL_SETSPEED	1	/* get baud rate (int) */
+#define IOCTL_SERIAL_GETSPEED	2	/* set baud rate (int) */
+#define IOCTL_SERIAL_SETFLOW	3	/* Set Flow Control */
+#define IOCTL_SERIAL_GETFLOW	4	/* Get Flow Control */
+
+#define SERIAL_FLOW_NONE	0	/* no flow control */
+#define SERIAL_FLOW_SOFTWARE	1	/* software flow control (not impl) */
+#define SERIAL_FLOW_HARDWARE	2	/* hardware flow control */
+
+/*  *********************************************************************
     *  Block device stuff
     ********************************************************************* */
 
-#define IOCTL_BLOCK_GETBLOCKSIZE 1
-#define IOCTL_BLOCK_GETTOTALBLOCKS 2
+#define IOCTL_BLOCK_GETBLOCKSIZE 1	/* get block size (int) */
+#define IOCTL_BLOCK_GETTOTALBLOCKS 2	/* get total bocks (long long) */
+#define IOCTL_BLOCK_GETDEVTYPE 3	/* get device type (struct) */
+
+typedef struct blockdev_info_s {
+    unsigned long long blkdev_totalblocks;
+    unsigned int blkdev_blocksize;
+    unsigned int blkdev_devtype;
+} blockdev_info_t;
+
+#define BLOCK_DEVTYPE_DISK	0
+#define BLOCK_DEVTYPE_CDROM	1
Index: user/alc/PQ_LAUNDRY/sys/dev/cxgbe/t4_main.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/cxgbe/t4_main.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/cxgbe/t4_main.c	(revision 303642)
@@ -1,9552 +1,9530 @@
 /*-
  * Copyright (c) 2011 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/priv.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/taskqueue.h>
 #include <sys/pciio.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pci_private.h>
 #include <sys/firmware.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/if_vlan_var.h>
 #ifdef RSS
 #include <net/rss_config.h>
 #endif
 #if defined(__i386__) || defined(__amd64__)
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #endif
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_lex.h>
 #endif
 
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "common/t4_regs_values.h"
 #include "t4_ioctl.h"
 #include "t4_l2t.h"
 #include "t4_mp_ring.h"
 #include "t4_if.h"
 
 /* T4 bus driver interface */
 static int t4_probe(device_t);
 static int t4_attach(device_t);
 static int t4_detach(device_t);
 static int t4_ready(device_t);
 static int t4_read_port_unit(device_t, int, int *);
 static device_method_t t4_methods[] = {
 	DEVMETHOD(device_probe,		t4_probe),
 	DEVMETHOD(device_attach,	t4_attach),
 	DEVMETHOD(device_detach,	t4_detach),
 
 	DEVMETHOD(t4_is_main_ready,	t4_ready),
 	DEVMETHOD(t4_read_port_unit,	t4_read_port_unit),
 
 	DEVMETHOD_END
 };
 static driver_t t4_driver = {
 	"t4nex",
 	t4_methods,
 	sizeof(struct adapter)
 };
 
 
 /* T4 port (cxgbe) interface */
 static int cxgbe_probe(device_t);
 static int cxgbe_attach(device_t);
 static int cxgbe_detach(device_t);
 static device_method_t cxgbe_methods[] = {
 	DEVMETHOD(device_probe,		cxgbe_probe),
 	DEVMETHOD(device_attach,	cxgbe_attach),
 	DEVMETHOD(device_detach,	cxgbe_detach),
 	{ 0, 0 }
 };
 static driver_t cxgbe_driver = {
 	"cxgbe",
 	cxgbe_methods,
 	sizeof(struct port_info)
 };
 
 /* T4 VI (vcxgbe) interface */
 static int vcxgbe_probe(device_t);
 static int vcxgbe_attach(device_t);
 static int vcxgbe_detach(device_t);
 static device_method_t vcxgbe_methods[] = {
 	DEVMETHOD(device_probe,		vcxgbe_probe),
 	DEVMETHOD(device_attach,	vcxgbe_attach),
 	DEVMETHOD(device_detach,	vcxgbe_detach),
 	{ 0, 0 }
 };
 static driver_t vcxgbe_driver = {
 	"vcxgbe",
 	vcxgbe_methods,
 	sizeof(struct vi_info)
 };
 
 static d_ioctl_t t4_ioctl;
-static d_open_t t4_open;
-static d_close_t t4_close;
 
 static struct cdevsw t4_cdevsw = {
        .d_version = D_VERSION,
-       .d_flags = 0,
-       .d_open = t4_open,
-       .d_close = t4_close,
        .d_ioctl = t4_ioctl,
        .d_name = "t4nex",
 };
 
 /* T5 bus driver interface */
 static int t5_probe(device_t);
 static device_method_t t5_methods[] = {
 	DEVMETHOD(device_probe,		t5_probe),
 	DEVMETHOD(device_attach,	t4_attach),
 	DEVMETHOD(device_detach,	t4_detach),
 
 	DEVMETHOD(t4_is_main_ready,	t4_ready),
 	DEVMETHOD(t4_read_port_unit,	t4_read_port_unit),
 
 	DEVMETHOD_END
 };
 static driver_t t5_driver = {
 	"t5nex",
 	t5_methods,
 	sizeof(struct adapter)
 };
 
 
 /* T5 port (cxl) interface */
 static driver_t cxl_driver = {
 	"cxl",
 	cxgbe_methods,
 	sizeof(struct port_info)
 };
 
 /* T5 VI (vcxl) interface */
 static driver_t vcxl_driver = {
 	"vcxl",
 	vcxgbe_methods,
 	sizeof(struct vi_info)
 };
 
-static struct cdevsw t5_cdevsw = {
-       .d_version = D_VERSION,
-       .d_flags = 0,
-       .d_open = t4_open,
-       .d_close = t4_close,
-       .d_ioctl = t4_ioctl,
-       .d_name = "t5nex",
-};
-
 /* ifnet + media interface */
 static void cxgbe_init(void *);
 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
 static void cxgbe_qflush(struct ifnet *);
 static int cxgbe_media_change(struct ifnet *);
 static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
 
 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
 
 /*
  * Correct lock order when you need to acquire multiple locks is t4_list_lock,
  * then ADAPTER_LOCK, then t4_uld_list_lock.
  */
 static struct sx t4_list_lock;
 SLIST_HEAD(, adapter) t4_list;
 #ifdef TCP_OFFLOAD
 static struct sx t4_uld_list_lock;
 SLIST_HEAD(, uld_info) t4_uld_list;
 #endif
 
 /*
  * Tunables.  See tweak_tunables() too.
  *
  * Each tunable is set to a default value here if it's known at compile-time.
  * Otherwise it is set to -1 as an indication to tweak_tunables() that it should
  * provide a reasonable default when the driver is loaded.
  *
  * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
  * T5 are under hw.cxl.
  */
 
 /*
  * Number of queues for tx and rx, 10G and 1G, NIC and offload.
  */
 #define NTXQ_10G 16
 static int t4_ntxq10g = -1;
 TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g);
 
 #define NRXQ_10G 8
 static int t4_nrxq10g = -1;
 TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g);
 
 #define NTXQ_1G 4
 static int t4_ntxq1g = -1;
 TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g);
 
 #define NRXQ_1G 2
 static int t4_nrxq1g = -1;
 TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g);
 
 #define NTXQ_VI 1
 static int t4_ntxq_vi = -1;
 TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi);
 
 #define NRXQ_VI 1
 static int t4_nrxq_vi = -1;
 TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi);
 
 static int t4_rsrv_noflowq = 0;
 TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq);
 
 #ifdef TCP_OFFLOAD
 #define NOFLDTXQ_10G 8
 static int t4_nofldtxq10g = -1;
 TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g);
 
 #define NOFLDRXQ_10G 2
 static int t4_nofldrxq10g = -1;
 TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g);
 
 #define NOFLDTXQ_1G 2
 static int t4_nofldtxq1g = -1;
 TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g);
 
 #define NOFLDRXQ_1G 1
 static int t4_nofldrxq1g = -1;
 TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g);
 
 #define NOFLDTXQ_VI 1
 static int t4_nofldtxq_vi = -1;
 TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi);
 
 #define NOFLDRXQ_VI 1
 static int t4_nofldrxq_vi = -1;
 TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi);
 #endif
 
 #ifdef DEV_NETMAP
 #define NNMTXQ_VI 2
 static int t4_nnmtxq_vi = -1;
 TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi);
 
 #define NNMRXQ_VI 2
 static int t4_nnmrxq_vi = -1;
 TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi);
 #endif
 
 /*
  * Holdoff parameters for 10G and 1G ports.
  */
 #define TMR_IDX_10G 1
 static int t4_tmr_idx_10g = TMR_IDX_10G;
 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g);
 
 #define PKTC_IDX_10G (-1)
 static int t4_pktc_idx_10g = PKTC_IDX_10G;
 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g);
 
 #define TMR_IDX_1G 1
 static int t4_tmr_idx_1g = TMR_IDX_1G;
 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g);
 
 #define PKTC_IDX_1G (-1)
 static int t4_pktc_idx_1g = PKTC_IDX_1G;
 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g);
 
 /*
  * Size (# of entries) of each tx and rx queue.
  */
 static unsigned int t4_qsize_txq = TX_EQ_QSIZE;
 TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq);
 
 static unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
 TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq);
 
 /*
  * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
  */
 static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
 TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types);
 
 /*
  * Configuration file.
  */
 #define DEFAULT_CF	"default"
 #define FLASH_CF	"flash"
 #define UWIRE_CF	"uwire"
 #define FPGA_CF		"fpga"
 static char t4_cfg_file[32] = DEFAULT_CF;
 TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
 
 /*
  * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively).
  * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
  * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
  *            mark or when signalled to do so, 0 to never emit PAUSE.
  */
 static int t4_pause_settings = PAUSE_TX | PAUSE_RX;
 TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings);
 
 /*
  * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
  * encouraged respectively).
  */
 static unsigned int t4_fw_install = 1;
 TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install);
 
 /*
  * ASIC features that will be used.  Disable the ones you don't want so that the
  * chip resources aren't wasted on features that will not be used.
  */
 static int t4_nbmcaps_allowed = 0;
 TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed);
 
 static int t4_linkcaps_allowed = 0;	/* No DCBX, PPP, etc. by default */
 TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed);
 
 static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS |
     FW_CAPS_CONFIG_SWITCH_EGRESS;
 TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed);
 
 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
 TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
 
 static int t4_toecaps_allowed = -1;
 TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
 
 static int t4_rdmacaps_allowed = -1;
 TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed);
 
 static int t4_tlscaps_allowed = 0;
 TUNABLE_INT("hw.cxgbe.tlscaps_allowed", &t4_tlscaps_allowed);
 
 static int t4_iscsicaps_allowed = -1;
 TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed);
 
 static int t4_fcoecaps_allowed = 0;
 TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed);
 
 static int t5_write_combine = 0;
 TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine);
 
 static int t4_num_vis = 1;
 TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis);
 
 /* Functions used by extra VIs to obtain unique MAC addresses for each VI. */
 static int vi_mac_funcs[] = {
 	FW_VI_FUNC_OFLD,
 	FW_VI_FUNC_IWARP,
 	FW_VI_FUNC_OPENISCSI,
 	FW_VI_FUNC_OPENFCOE,
 	FW_VI_FUNC_FOISCSI,
 	FW_VI_FUNC_FOFCOE,
 };
 
 struct intrs_and_queues {
 	uint16_t intr_type;	/* INTx, MSI, or MSI-X */
 	uint16_t nirq;		/* Total # of vectors */
 	uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */
 	uint16_t intr_flags_1g;	/* Interrupt flags for each 1G port */
 	uint16_t ntxq10g;	/* # of NIC txq's for each 10G port */
 	uint16_t nrxq10g;	/* # of NIC rxq's for each 10G port */
 	uint16_t ntxq1g;	/* # of NIC txq's for each 1G port */
 	uint16_t nrxq1g;	/* # of NIC rxq's for each 1G port */
 	uint16_t rsrv_noflowq;	/* Flag whether to reserve queue 0 */
 	uint16_t nofldtxq10g;	/* # of TOE txq's for each 10G port */
 	uint16_t nofldrxq10g;	/* # of TOE rxq's for each 10G port */
 	uint16_t nofldtxq1g;	/* # of TOE txq's for each 1G port */
 	uint16_t nofldrxq1g;	/* # of TOE rxq's for each 1G port */
 
 	/* The vcxgbe/vcxl interfaces use these and not the ones above. */
 	uint16_t ntxq_vi;	/* # of NIC txq's */
 	uint16_t nrxq_vi;	/* # of NIC rxq's */
 	uint16_t nofldtxq_vi;	/* # of TOE txq's */
 	uint16_t nofldrxq_vi;	/* # of TOE rxq's */
 	uint16_t nnmtxq_vi;	/* # of netmap txq's */
 	uint16_t nnmrxq_vi;	/* # of netmap rxq's */
 };
 
 struct filter_entry {
         uint32_t valid:1;	/* filter allocated and valid */
         uint32_t locked:1;	/* filter is administratively locked */
         uint32_t pending:1;	/* filter action is pending firmware reply */
 	uint32_t smtidx:8;	/* Source MAC Table index for smac */
 	struct l2t_entry *l2t;	/* Layer Two Table entry for dmac */
 
         struct t4_filter_specification fs;
 };
 
 static int map_bars_0_and_4(struct adapter *);
 static int map_bar_2(struct adapter *);
 static void setup_memwin(struct adapter *);
 static void position_memwin(struct adapter *, int, uint32_t);
 static int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int);
 static inline int read_via_memwin(struct adapter *, int, uint32_t, uint32_t *,
     int);
 static inline int write_via_memwin(struct adapter *, int, uint32_t,
     const uint32_t *, int);
 static int validate_mem_range(struct adapter *, uint32_t, int);
 static int fwmtype_to_hwmtype(int);
 static int validate_mt_off_len(struct adapter *, int, uint32_t, int,
     uint32_t *);
 static int fixup_devlog_params(struct adapter *);
 static int cfg_itype_and_nqueues(struct adapter *, int, int, int,
     struct intrs_and_queues *);
 static int prep_firmware(struct adapter *);
 static int partition_resources(struct adapter *, const struct firmware *,
     const char *);
 static int get_params__pre_init(struct adapter *);
 static int get_params__post_init(struct adapter *);
 static int set_params__post_init(struct adapter *);
 static void t4_set_desc(struct adapter *);
 static void build_medialist(struct port_info *, struct ifmedia *);
 static int cxgbe_init_synchronized(struct vi_info *);
 static int cxgbe_uninit_synchronized(struct vi_info *);
 static int setup_intr_handlers(struct adapter *);
 static void quiesce_txq(struct adapter *, struct sge_txq *);
 static void quiesce_wrq(struct adapter *, struct sge_wrq *);
 static void quiesce_iq(struct adapter *, struct sge_iq *);
 static void quiesce_fl(struct adapter *, struct sge_fl *);
 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
     driver_intr_t *, void *, char *);
 static int t4_free_irq(struct adapter *, struct irq *);
 static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
 static void vi_refresh_stats(struct adapter *, struct vi_info *);
 static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
 static void cxgbe_tick(void *);
 static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
 static void t4_sysctls(struct adapter *);
 static void cxgbe_sysctls(struct port_info *);
 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
 static int sysctl_bitfield(SYSCTL_HANDLER_ARGS);
 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
 static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
 static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
 static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
 static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
 #ifdef SBUF_DRAIN
 static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
 static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
 static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
 static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
 static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
 static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
 static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS);
 static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
 static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_tids(SYSCTL_HANDLER_ARGS);
 static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS);
 static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
 static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_tc_params(SYSCTL_HANDLER_ARGS);
 #endif
 #ifdef TCP_OFFLOAD
 static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS);
 static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS);
 static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS);
 #endif
 static uint32_t fconf_iconf_to_mode(uint32_t, uint32_t);
 static uint32_t mode_to_fconf(uint32_t);
 static uint32_t mode_to_iconf(uint32_t);
 static int check_fspec_against_fconf_iconf(struct adapter *,
     struct t4_filter_specification *);
 static int get_filter_mode(struct adapter *, uint32_t *);
 static int set_filter_mode(struct adapter *, uint32_t);
 static inline uint64_t get_filter_hits(struct adapter *, uint32_t);
 static int get_filter(struct adapter *, struct t4_filter *);
 static int set_filter(struct adapter *, struct t4_filter *);
 static int del_filter(struct adapter *, struct t4_filter *);
 static void clear_filter(struct filter_entry *);
 static int set_filter_wr(struct adapter *, int);
 static int del_filter_wr(struct adapter *, int);
 static int set_tcb_rpl(struct sge_iq *, const struct rss_header *,
     struct mbuf *);
 static int get_sge_context(struct adapter *, struct t4_sge_context *);
 static int load_fw(struct adapter *, struct t4_data *);
 static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
 static int read_i2c(struct adapter *, struct t4_i2c_data *);
 static int set_sched_class(struct adapter *, struct t4_sched_params *);
 static int set_sched_queue(struct adapter *, struct t4_sched_queue *);
 #ifdef TCP_OFFLOAD
 static int toe_capability(struct vi_info *, int);
 #endif
 static int mod_event(module_t, int, void *);
 static int notify_siblings(device_t, int);
 
 struct {
 	uint16_t device;
 	char *desc;
 } t4_pciids[] = {
 	{0xa000, "Chelsio Terminator 4 FPGA"},
 	{0x4400, "Chelsio T440-dbg"},
 	{0x4401, "Chelsio T420-CR"},
 	{0x4402, "Chelsio T422-CR"},
 	{0x4403, "Chelsio T440-CR"},
 	{0x4404, "Chelsio T420-BCH"},
 	{0x4405, "Chelsio T440-BCH"},
 	{0x4406, "Chelsio T440-CH"},
 	{0x4407, "Chelsio T420-SO"},
 	{0x4408, "Chelsio T420-CX"},
 	{0x4409, "Chelsio T420-BT"},
 	{0x440a, "Chelsio T404-BT"},
 	{0x440e, "Chelsio T440-LP-CR"},
 }, t5_pciids[] = {
 	{0xb000, "Chelsio Terminator 5 FPGA"},
 	{0x5400, "Chelsio T580-dbg"},
 	{0x5401,  "Chelsio T520-CR"},		/* 2 x 10G */
 	{0x5402,  "Chelsio T522-CR"},		/* 2 x 10G, 2 X 1G */
 	{0x5403,  "Chelsio T540-CR"},		/* 4 x 10G */
 	{0x5407,  "Chelsio T520-SO"},		/* 2 x 10G, nomem */
 	{0x5409,  "Chelsio T520-BT"},		/* 2 x 10GBaseT */
 	{0x540a,  "Chelsio T504-BT"},		/* 4 x 1G */
 	{0x540d,  "Chelsio T580-CR"},		/* 2 x 40G */
 	{0x540e,  "Chelsio T540-LP-CR"},	/* 4 x 10G */
 	{0x5410,  "Chelsio T580-LP-CR"},	/* 2 x 40G */
 	{0x5411,  "Chelsio T520-LL-CR"},	/* 2 x 10G */
 	{0x5412,  "Chelsio T560-CR"},		/* 1 x 40G, 2 x 10G */
 	{0x5414,  "Chelsio T580-LP-SO-CR"},	/* 2 x 40G, nomem */
 	{0x5415,  "Chelsio T502-BT"},		/* 2 x 1G */
 #ifdef notyet
 	{0x5404,  "Chelsio T520-BCH"},
 	{0x5405,  "Chelsio T540-BCH"},
 	{0x5406,  "Chelsio T540-CH"},
 	{0x5408,  "Chelsio T520-CX"},
 	{0x540b,  "Chelsio B520-SR"},
 	{0x540c,  "Chelsio B504-BT"},
 	{0x540f,  "Chelsio Amsterdam"},
 	{0x5413,  "Chelsio T580-CHR"},
 #endif
 };
 
 #ifdef TCP_OFFLOAD
 /*
  * service_iq() has an iq and needs the fl.  Offset of fl from the iq should be
  * exactly the same for both rxq and ofld_rxq.
  */
 CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
 #endif
 CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
 
 static int
 t4_probe(device_t dev)
 {
 	int i;
 	uint16_t v = pci_get_vendor(dev);
 	uint16_t d = pci_get_device(dev);
 	uint8_t f = pci_get_function(dev);
 
 	if (v != PCI_VENDOR_ID_CHELSIO)
 		return (ENXIO);
 
 	/* Attach only to PF0 of the FPGA */
 	if (d == 0xa000 && f != 0)
 		return (ENXIO);
 
 	for (i = 0; i < nitems(t4_pciids); i++) {
 		if (d == t4_pciids[i].device) {
 			device_set_desc(dev, t4_pciids[i].desc);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 
 	return (ENXIO);
 }
 
 static int
 t5_probe(device_t dev)
 {
 	int i;
 	uint16_t v = pci_get_vendor(dev);
 	uint16_t d = pci_get_device(dev);
 	uint8_t f = pci_get_function(dev);
 
 	if (v != PCI_VENDOR_ID_CHELSIO)
 		return (ENXIO);
 
 	/* Attach only to PF0 of the FPGA */
 	if (d == 0xb000 && f != 0)
 		return (ENXIO);
 
 	for (i = 0; i < nitems(t5_pciids); i++) {
 		if (d == t5_pciids[i].device) {
 			device_set_desc(dev, t5_pciids[i].desc);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 
 	return (ENXIO);
 }
 
 static void
 t5_attribute_workaround(device_t dev)
 {
 	device_t root_port;
 	uint32_t v;
 
 	/*
 	 * The T5 chips do not properly echo the No Snoop and Relaxed
 	 * Ordering attributes when replying to a TLP from a Root
 	 * Port.  As a workaround, find the parent Root Port and
 	 * disable No Snoop and Relaxed Ordering.  Note that this
 	 * affects all devices under this root port.
 	 */
 	root_port = pci_find_pcie_root_port(dev);
 	if (root_port == NULL) {
 		device_printf(dev, "Unable to find parent root port\n");
 		return;
 	}
 
 	v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
 	    PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
 	if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
 	    0)
 		device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
 		    device_get_nameunit(root_port));
 }
 
 static int
 t4_attach(device_t dev)
 {
 	struct adapter *sc;
 	int rc = 0, i, j, n10g, n1g, rqidx, tqidx;
+	struct make_dev_args mda;
 	struct intrs_and_queues iaq;
 	struct sge *s;
 	uint8_t *buf;
 #ifdef TCP_OFFLOAD
 	int ofld_rqidx, ofld_tqidx;
 #endif
 #ifdef DEV_NETMAP
 	int nm_rqidx, nm_tqidx;
 #endif
 	int num_vis;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 	TUNABLE_INT_FETCH("hw.cxgbe.debug_flags", &sc->debug_flags);
 
 	if ((pci_get_device(dev) & 0xff00) == 0x5400)
 		t5_attribute_workaround(dev);
 	pci_enable_busmaster(dev);
 	if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
 		uint32_t v;
 
 		pci_set_max_read_req(dev, 4096);
 		v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
 		v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
 		pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
 
 		sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
 	}
 
 	sc->traceq = -1;
 	mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
 	snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
 	    device_get_nameunit(dev));
 
 	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
 	    device_get_nameunit(dev));
 	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
 	sx_xlock(&t4_list_lock);
 	SLIST_INSERT_HEAD(&t4_list, sc, link);
 	sx_xunlock(&t4_list_lock);
 
 	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
 	TAILQ_INIT(&sc->sfl);
 	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
 
 	mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
 
 	rc = map_bars_0_and_4(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	/*
 	 * This is the real PF# to which we're attaching.  Works from within PCI
 	 * passthrough environments too, where pci_get_function() could return a
 	 * different PF# depending on the passthrough configuration.  We need to
 	 * use the real PF# in all our communication with the firmware.
 	 */
 	sc->pf = G_SOURCEPF(t4_read_reg(sc, A_PL_WHOAMI));
 	sc->mbox = sc->pf;
 
 	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
 
 	/* Prepare the adapter for operation. */
 	buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK);
 	rc = -t4_prep_adapter(sc, buf);
 	free(buf, M_CXGBE);
 	if (rc != 0) {
 		device_printf(dev, "failed to prepare adapter: %d.\n", rc);
 		goto done;
 	}
 
 	/*
 	 * Do this really early, with the memory windows set up even before the
 	 * character device.  The userland tool's register i/o and mem read
 	 * will work even in "recovery mode".
 	 */
 	setup_memwin(sc);
 	if (t4_init_devlog_params(sc, 0) == 0)
 		fixup_devlog_params(sc);
-	sc->cdev = make_dev(is_t4(sc) ? &t4_cdevsw : &t5_cdevsw,
-	    device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "%s",
-	    device_get_nameunit(dev));
-	if (sc->cdev == NULL)
-		device_printf(dev, "failed to create nexus char device.\n");
-	else
-		sc->cdev->si_drv1 = sc;
+	make_dev_args_init(&mda);
+	mda.mda_devsw = &t4_cdevsw;
+	mda.mda_uid = UID_ROOT;
+	mda.mda_gid = GID_WHEEL;
+	mda.mda_mode = 0600;
+	mda.mda_si_drv1 = sc;
+	rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
+	if (rc != 0)
+		device_printf(dev, "failed to create nexus char device: %d.\n",
+		    rc);
 
 	/* Go no further if recovery mode has been requested. */
 	if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
 		device_printf(dev, "recovery mode.\n");
 		goto done;
 	}
 
 #if defined(__i386__)
 	if ((cpu_feature & CPUID_CX8) == 0) {
 		device_printf(dev, "64 bit atomics not available.\n");
 		rc = ENOTSUP;
 		goto done;
 	}
 #endif
 
 	/* Prepare the firmware for operation */
 	rc = prep_firmware(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	rc = get_params__post_init(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	rc = set_params__post_init(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	rc = map_bar_2(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	rc = t4_create_dma_tag(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	/*
 	 * Number of VIs to create per-port.  The first VI is the "main" regular
 	 * VI for the port.  The rest are additional virtual interfaces on the
 	 * same physical port.  Note that the main VI does not have native
 	 * netmap support but the extra VIs do.
 	 *
 	 * Limit the number of VIs per port to the number of available
 	 * MAC addresses per port.
 	 */
 	if (t4_num_vis >= 1)
 		num_vis = t4_num_vis;
 	else
 		num_vis = 1;
 	if (num_vis > nitems(vi_mac_funcs)) {
 		num_vis = nitems(vi_mac_funcs);
 		device_printf(dev, "Number of VIs limited to %d\n", num_vis);
 	}
 
 	/*
 	 * First pass over all the ports - allocate VIs and initialize some
 	 * basic parameters like mac address, port type, etc.  We also figure
 	 * out whether a port is 10G or 1G and use that information when
 	 * calculating how many interrupts to attempt to allocate.
 	 */
 	n10g = n1g = 0;
 	for_each_port(sc, i) {
 		struct port_info *pi;
 
 		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
 		sc->port[i] = pi;
 
 		/* These must be set before t4_port_init */
 		pi->adapter = sc;
 		pi->port_id = i;
 		/*
 		 * XXX: vi[0] is special so we can't delay this allocation until
 		 * pi->nvi's final value is known.
 		 */
 		pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE,
 		    M_ZERO | M_WAITOK);
 
 		/*
 		 * Allocate the "main" VI and initialize parameters
 		 * like mac addr.
 		 */
 		rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
 		if (rc != 0) {
 			device_printf(dev, "unable to initialize port %d: %d\n",
 			    i, rc);
 			free(pi->vi, M_CXGBE);
 			free(pi, M_CXGBE);
 			sc->port[i] = NULL;
 			goto done;
 		}
 
 		pi->link_cfg.requested_fc &= ~(PAUSE_TX | PAUSE_RX);
 		pi->link_cfg.requested_fc |= t4_pause_settings;
 		pi->link_cfg.fc &= ~(PAUSE_TX | PAUSE_RX);
 		pi->link_cfg.fc |= t4_pause_settings;
 
 		rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, &pi->link_cfg);
 		if (rc != 0) {
 			device_printf(dev, "port %d l1cfg failed: %d\n", i, rc);
 			free(pi->vi, M_CXGBE);
 			free(pi, M_CXGBE);
 			sc->port[i] = NULL;
 			goto done;
 		}
 
 		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
 		    device_get_nameunit(dev), i);
 		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
 		sc->chan_map[pi->tx_chan] = i;
 
 		pi->tc = malloc(sizeof(struct tx_sched_class) *
 		    sc->chip_params->nsched_cls, M_CXGBE, M_ZERO | M_WAITOK);
 
 		if (is_10G_port(pi) || is_40G_port(pi)) {
 			n10g++;
 		} else {
 			n1g++;
 		}
 
 		pi->linkdnrc = -1;
 
 		pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1);
 		if (pi->dev == NULL) {
 			device_printf(dev,
 			    "failed to add device for port %d.\n", i);
 			rc = ENXIO;
 			goto done;
 		}
 		pi->vi[0].dev = pi->dev;
 		device_set_softc(pi->dev, pi);
 	}
 
 	/*
 	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
 	 */
 	rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 	if (iaq.nrxq_vi + iaq.nofldrxq_vi + iaq.nnmrxq_vi == 0)
 		num_vis = 1;
 
 	sc->intr_type = iaq.intr_type;
 	sc->intr_count = iaq.nirq;
 
 	s = &sc->sge;
 	s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
 	s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
 	if (num_vis > 1) {
 		s->nrxq += (n10g + n1g) * (num_vis - 1) * iaq.nrxq_vi;
 		s->ntxq += (n10g + n1g) * (num_vis - 1) * iaq.ntxq_vi;
 	}
 	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
 	s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
 	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
 #ifdef TCP_OFFLOAD
 	if (is_offload(sc)) {
 		s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g;
 		s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g;
 		if (num_vis > 1) {
 			s->nofldrxq += (n10g + n1g) * (num_vis - 1) *
 			    iaq.nofldrxq_vi;
 			s->nofldtxq += (n10g + n1g) * (num_vis - 1) *
 			    iaq.nofldtxq_vi;
 		}
 		s->neq += s->nofldtxq + s->nofldrxq;
 		s->niq += s->nofldrxq;
 
 		s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
 		    M_CXGBE, M_ZERO | M_WAITOK);
 		s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
 		    M_CXGBE, M_ZERO | M_WAITOK);
 	}
 #endif
 #ifdef DEV_NETMAP
 	if (num_vis > 1) {
 		s->nnmrxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmrxq_vi;
 		s->nnmtxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmtxq_vi;
 	}
 	s->neq += s->nnmtxq + s->nnmrxq;
 	s->niq += s->nnmrxq;
 
 	s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
 	    M_CXGBE, M_ZERO | M_WAITOK);
 	s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
 	    M_CXGBE, M_ZERO | M_WAITOK);
 #endif
 
 	s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 	s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 	s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	t4_init_l2t(sc, M_WAITOK);
 
 	/*
 	 * Second pass over the ports.  This time we know the number of rx and
 	 * tx queues that each port should get.
 	 */
 	rqidx = tqidx = 0;
 #ifdef TCP_OFFLOAD
 	ofld_rqidx = ofld_tqidx = 0;
 #endif
 #ifdef DEV_NETMAP
 	nm_rqidx = nm_tqidx = 0;
 #endif
 	for_each_port(sc, i) {
 		struct port_info *pi = sc->port[i];
 		struct vi_info *vi;
 
 		if (pi == NULL)
 			continue;
 
 		pi->nvi = num_vis;
 		for_each_vi(pi, j, vi) {
 			vi->pi = pi;
 			vi->qsize_rxq = t4_qsize_rxq;
 			vi->qsize_txq = t4_qsize_txq;
 
 			vi->first_rxq = rqidx;
 			vi->first_txq = tqidx;
 			if (is_10G_port(pi) || is_40G_port(pi)) {
 				vi->tmr_idx = t4_tmr_idx_10g;
 				vi->pktc_idx = t4_pktc_idx_10g;
 				vi->flags |= iaq.intr_flags_10g & INTR_RXQ;
 				vi->nrxq = j == 0 ? iaq.nrxq10g : iaq.nrxq_vi;
 				vi->ntxq = j == 0 ? iaq.ntxq10g : iaq.ntxq_vi;
 			} else {
 				vi->tmr_idx = t4_tmr_idx_1g;
 				vi->pktc_idx = t4_pktc_idx_1g;
 				vi->flags |= iaq.intr_flags_1g & INTR_RXQ;
 				vi->nrxq = j == 0 ? iaq.nrxq1g : iaq.nrxq_vi;
 				vi->ntxq = j == 0 ? iaq.ntxq1g : iaq.ntxq_vi;
 			}
 			rqidx += vi->nrxq;
 			tqidx += vi->ntxq;
 
 			if (j == 0 && vi->ntxq > 1)
 				vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0;
 			else
 				vi->rsrv_noflowq = 0;
 
 #ifdef TCP_OFFLOAD
 			vi->first_ofld_rxq = ofld_rqidx;
 			vi->first_ofld_txq = ofld_tqidx;
 			if (is_10G_port(pi) || is_40G_port(pi)) {
 				vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ;
 				vi->nofldrxq = j == 0 ? iaq.nofldrxq10g :
 				    iaq.nofldrxq_vi;
 				vi->nofldtxq = j == 0 ? iaq.nofldtxq10g :
 				    iaq.nofldtxq_vi;
 			} else {
 				vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ;
 				vi->nofldrxq = j == 0 ? iaq.nofldrxq1g :
 				    iaq.nofldrxq_vi;
 				vi->nofldtxq = j == 0 ? iaq.nofldtxq1g :
 				    iaq.nofldtxq_vi;
 			}
 			ofld_rqidx += vi->nofldrxq;
 			ofld_tqidx += vi->nofldtxq;
 #endif
 #ifdef DEV_NETMAP
 			if (j > 0) {
 				vi->first_nm_rxq = nm_rqidx;
 				vi->first_nm_txq = nm_tqidx;
 				vi->nnmrxq = iaq.nnmrxq_vi;
 				vi->nnmtxq = iaq.nnmtxq_vi;
 				nm_rqidx += vi->nnmrxq;
 				nm_tqidx += vi->nnmtxq;
 			}
 #endif
 		}
 	}
 
 	rc = setup_intr_handlers(sc);
 	if (rc != 0) {
 		device_printf(dev,
 		    "failed to setup interrupt handlers: %d\n", rc);
 		goto done;
 	}
 
 	rc = bus_generic_attach(dev);
 	if (rc != 0) {
 		device_printf(dev,
 		    "failed to attach all child ports: %d\n", rc);
 		goto done;
 	}
 
 	device_printf(dev,
 	    "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
 	    sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
 	    sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
 	    (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
 	    sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
 
 	t4_set_desc(sc);
 
 	notify_siblings(dev, 0);
 
 done:
 	if (rc != 0 && sc->cdev) {
 		/* cdev was created and so cxgbetool works; recover that way. */
 		device_printf(dev,
 		    "error during attach, adapter is now in recovery mode.\n");
 		rc = 0;
 	}
 
 	if (rc != 0)
 		t4_detach(dev);
 	else
 		t4_sysctls(sc);
 
 	return (rc);
 }
 
 static int
 t4_ready(device_t dev)
 {
 	struct adapter *sc;
 
 	sc = device_get_softc(dev);
 	if (sc->flags & FW_OK)
 		return (0);
 	return (ENXIO);
 }
 
 static int
 t4_read_port_unit(device_t dev, int port, int *unit)
 {
 	struct adapter *sc;
 	struct port_info *pi;
 
 	sc = device_get_softc(dev);
 	if (port < 0 || port >= MAX_NPORTS)
 		return (EINVAL);
 	pi = sc->port[port];
 	if (pi == NULL || pi->dev == NULL)
 		return (ENXIO);
 	*unit = device_get_unit(pi->dev);
 	return (0);
 }
 
 static int
 notify_siblings(device_t dev, int detaching)
 {
 	device_t sibling;
 	int error, i;
 
 	error = 0;
 	for (i = 0; i < PCI_FUNCMAX; i++) {
 		if (i == pci_get_function(dev))
 			continue;
 		sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
 		    pci_get_slot(dev), i);
 		if (sibling == NULL || !device_is_attached(sibling))
 			continue;
 		if (detaching)
 			error = T4_DETACH_CHILD(sibling);
 		else
 			(void)T4_ATTACH_CHILD(sibling);
 		if (error)
 			break;
 	}
 	return (error);
 }
 
 /*
  * Idempotent
  */
 static int
 t4_detach(device_t dev)
 {
 	struct adapter *sc;
 	struct port_info *pi;
 	int i, rc;
 
 	sc = device_get_softc(dev);
 
 	rc = notify_siblings(dev, 1);
 	if (rc) {
 		device_printf(dev,
 		    "failed to detach sibling devices: %d\n", rc);
 		return (rc);
 	}
 
 	if (sc->flags & FULL_INIT_DONE)
 		t4_intr_disable(sc);
 
 	if (sc->cdev) {
 		destroy_dev(sc->cdev);
 		sc->cdev = NULL;
 	}
 
 	rc = bus_generic_detach(dev);
 	if (rc) {
 		device_printf(dev,
 		    "failed to detach child devices: %d\n", rc);
 		return (rc);
 	}
 
 	for (i = 0; i < sc->intr_count; i++)
 		t4_free_irq(sc, &sc->irq[i]);
 
 	for (i = 0; i < MAX_NPORTS; i++) {
 		pi = sc->port[i];
 		if (pi) {
 			t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
 			if (pi->dev)
 				device_delete_child(dev, pi->dev);
 
 			mtx_destroy(&pi->pi_lock);
 			free(pi->vi, M_CXGBE);
 			free(pi->tc, M_CXGBE);
 			free(pi, M_CXGBE);
 		}
 	}
 
 	if (sc->flags & FULL_INIT_DONE)
 		adapter_full_uninit(sc);
 
 	if (sc->flags & FW_OK)
 		t4_fw_bye(sc, sc->mbox);
 
 	if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
 		pci_release_msi(dev);
 
 	if (sc->regs_res)
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
 		    sc->regs_res);
 
 	if (sc->udbs_res)
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
 		    sc->udbs_res);
 
 	if (sc->msix_res)
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
 		    sc->msix_res);
 
 	if (sc->l2t)
 		t4_free_l2t(sc->l2t);
 
 #ifdef TCP_OFFLOAD
 	free(sc->sge.ofld_rxq, M_CXGBE);
 	free(sc->sge.ofld_txq, M_CXGBE);
 #endif
 #ifdef DEV_NETMAP
 	free(sc->sge.nm_rxq, M_CXGBE);
 	free(sc->sge.nm_txq, M_CXGBE);
 #endif
 	free(sc->irq, M_CXGBE);
 	free(sc->sge.rxq, M_CXGBE);
 	free(sc->sge.txq, M_CXGBE);
 	free(sc->sge.ctrlq, M_CXGBE);
 	free(sc->sge.iqmap, M_CXGBE);
 	free(sc->sge.eqmap, M_CXGBE);
 	free(sc->tids.ftid_tab, M_CXGBE);
 	t4_destroy_dma_tag(sc);
 	if (mtx_initialized(&sc->sc_lock)) {
 		sx_xlock(&t4_list_lock);
 		SLIST_REMOVE(&t4_list, sc, adapter, link);
 		sx_xunlock(&t4_list_lock);
 		mtx_destroy(&sc->sc_lock);
 	}
 
 	callout_drain(&sc->sfl_callout);
 	if (mtx_initialized(&sc->tids.ftid_lock))
 		mtx_destroy(&sc->tids.ftid_lock);
 	if (mtx_initialized(&sc->sfl_lock))
 		mtx_destroy(&sc->sfl_lock);
 	if (mtx_initialized(&sc->ifp_lock))
 		mtx_destroy(&sc->ifp_lock);
 	if (mtx_initialized(&sc->reg_lock))
 		mtx_destroy(&sc->reg_lock);
 
 	for (i = 0; i < NUM_MEMWIN; i++) {
 		struct memwin *mw = &sc->memwin[i];
 
 		if (rw_initialized(&mw->mw_lock))
 			rw_destroy(&mw->mw_lock);
 	}
 
 	bzero(sc, sizeof(*sc));
 
 	return (0);
 }
 
 static int
 cxgbe_probe(device_t dev)
 {
 	char buf[128];
 	struct port_info *pi = device_get_softc(dev);
 
 	snprintf(buf, sizeof(buf), "port %d", pi->port_id);
 	device_set_desc_copy(dev, buf);
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS)
 #define T4_CAP_ENABLE (T4_CAP)
 
 static int
 cxgbe_vi_attach(device_t dev, struct vi_info *vi)
 {
 	struct ifnet *ifp;
 	struct sbuf *sb;
 
 	vi->xact_addr_filt = -1;
 	callout_init(&vi->tick, 1);
 
 	/* Allocate an ifnet and set it up */
 	ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "Cannot allocate ifnet\n");
 		return (ENOMEM);
 	}
 	vi->ifp = ifp;
 	ifp->if_softc = vi;
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 
 	ifp->if_init = cxgbe_init;
 	ifp->if_ioctl = cxgbe_ioctl;
 	ifp->if_transmit = cxgbe_transmit;
 	ifp->if_qflush = cxgbe_qflush;
 	ifp->if_get_counter = cxgbe_get_counter;
 
 	ifp->if_capabilities = T4_CAP;
 #ifdef TCP_OFFLOAD
 	if (vi->nofldrxq != 0)
 		ifp->if_capabilities |= IFCAP_TOE;
 #endif
 	ifp->if_capenable = T4_CAP_ENABLE;
 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
 
 	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 	ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS;
 	ifp->if_hw_tsomaxsegsize = 65536;
 
 	/* Initialize ifmedia for this VI */
 	ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change,
 	    cxgbe_media_status);
 	build_medialist(vi->pi, &vi->media);
 
 	vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
 	    EVENTHANDLER_PRI_ANY);
 
 	ether_ifattach(ifp, vi->hw_addr);
 #ifdef DEV_NETMAP
 	if (vi->nnmrxq != 0)
 		cxgbe_nm_attach(vi);
 #endif
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
 #ifdef TCP_OFFLOAD
 	if (ifp->if_capabilities & IFCAP_TOE)
 		sbuf_printf(sb, "; %d txq, %d rxq (TOE)",
 		    vi->nofldtxq, vi->nofldrxq);
 #endif
 #ifdef DEV_NETMAP
 	if (ifp->if_capabilities & IFCAP_NETMAP)
 		sbuf_printf(sb, "; %d txq, %d rxq (netmap)",
 		    vi->nnmtxq, vi->nnmrxq);
 #endif
 	sbuf_finish(sb);
 	device_printf(dev, "%s\n", sbuf_data(sb));
 	sbuf_delete(sb);
 
 	vi_sysctls(vi);
 
 	return (0);
 }
 
 static int
 cxgbe_attach(device_t dev)
 {
 	struct port_info *pi = device_get_softc(dev);
 	struct vi_info *vi;
 	int i, rc;
 
 	callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
 
 	rc = cxgbe_vi_attach(dev, &pi->vi[0]);
 	if (rc)
 		return (rc);
 
 	for_each_vi(pi, i, vi) {
 		if (i == 0)
 			continue;
 		vi->dev = device_add_child(dev, is_t4(pi->adapter) ?
 		    "vcxgbe" : "vcxl", -1);
 		if (vi->dev == NULL) {
 			device_printf(dev, "failed to add VI %d\n", i);
 			continue;
 		}
 		device_set_softc(vi->dev, vi);
 	}
 
 	cxgbe_sysctls(pi);
 
 	bus_generic_attach(dev);
 
 	return (0);
 }
 
 static void
 cxgbe_vi_detach(struct vi_info *vi)
 {
 	struct ifnet *ifp = vi->ifp;
 
 	ether_ifdetach(ifp);
 
 	if (vi->vlan_c)
 		EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c);
 
 	/* Let detach proceed even if these fail. */
 #ifdef DEV_NETMAP
 	if (ifp->if_capabilities & IFCAP_NETMAP)
 		cxgbe_nm_detach(vi);
 #endif
 	cxgbe_uninit_synchronized(vi);
 	callout_drain(&vi->tick);
 	vi_full_uninit(vi);
 
 	ifmedia_removeall(&vi->media);
 	if_free(vi->ifp);
 	vi->ifp = NULL;
 }
 
 static int
 cxgbe_detach(device_t dev)
 {
 	struct port_info *pi = device_get_softc(dev);
 	struct adapter *sc = pi->adapter;
 	int rc;
 
 	/* Detach the extra VIs first. */
 	rc = bus_generic_detach(dev);
 	if (rc)
 		return (rc);
 	device_delete_children(dev);
 
 	doom_vi(sc, &pi->vi[0]);
 
 	if (pi->flags & HAS_TRACEQ) {
 		sc->traceq = -1;	/* cloner should not create ifnet */
 		t4_tracer_port_detach(sc);
 	}
 
 	cxgbe_vi_detach(&pi->vi[0]);
 	callout_drain(&pi->tick);
 
 	end_synchronized_op(sc, 0);
 
 	return (0);
 }
 
 static void
 cxgbe_init(void *arg)
 {
 	struct vi_info *vi = arg;
 	struct adapter *sc = vi->pi->adapter;
 
 	if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
 		return;
 	cxgbe_init_synchronized(vi);
 	end_synchronized_op(sc, 0);
 }
 
 static int
 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
 {
 	int rc = 0, mtu, flags, can_sleep;
 	struct vi_info *vi = ifp->if_softc;
 	struct adapter *sc = vi->pi->adapter;
 	struct ifreq *ifr = (struct ifreq *)data;
 	uint32_t mask;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		mtu = ifr->ifr_mtu;
 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
 			return (EINVAL);
 
 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
 		if (rc)
 			return (rc);
 		ifp->if_mtu = mtu;
 		if (vi->flags & VI_INIT_DONE) {
 			t4_update_fl_bufsize(ifp);
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				rc = update_mac_settings(ifp, XGMAC_MTU);
 		}
 		end_synchronized_op(sc, 0);
 		break;
 
 	case SIOCSIFFLAGS:
 		can_sleep = 0;
 redo_sifflags:
 		rc = begin_synchronized_op(sc, vi,
 		    can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg");
 		if (rc)
 			return (rc);
 
 		if (ifp->if_flags & IFF_UP) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				flags = vi->if_flags;
 				if ((ifp->if_flags ^ flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					if (can_sleep == 1) {
 						end_synchronized_op(sc, 0);
 						can_sleep = 0;
 						goto redo_sifflags;
 					}
 					rc = update_mac_settings(ifp,
 					    XGMAC_PROMISC | XGMAC_ALLMULTI);
 				}
 			} else {
 				if (can_sleep == 0) {
 					end_synchronized_op(sc, LOCK_HELD);
 					can_sleep = 1;
 					goto redo_sifflags;
 				}
 				rc = cxgbe_init_synchronized(vi);
 			}
 			vi->if_flags = ifp->if_flags;
 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			if (can_sleep == 0) {
 				end_synchronized_op(sc, LOCK_HELD);
 				can_sleep = 1;
 				goto redo_sifflags;
 			}
 			rc = cxgbe_uninit_synchronized(vi);
 		}
 		end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI: /* these two are called with a mutex held :-( */
 		rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi");
 		if (rc)
 			return (rc);
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			rc = update_mac_settings(ifp, XGMAC_MCADDRS);
 		end_synchronized_op(sc, LOCK_HELD);
 		break;
 
 	case SIOCSIFCAP:
 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
 		if (rc)
 			return (rc);
 
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (mask & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
 
 			if (IFCAP_TSO4 & ifp->if_capenable &&
 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
 				ifp->if_capenable &= ~IFCAP_TSO4;
 				if_printf(ifp,
 				    "tso4 disabled due to -txcsum.\n");
 			}
 		}
 		if (mask & IFCAP_TXCSUM_IPV6) {
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
 
 			if (IFCAP_TSO6 & ifp->if_capenable &&
 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
 				ifp->if_capenable &= ~IFCAP_TSO6;
 				if_printf(ifp,
 				    "tso6 disabled due to -txcsum6.\n");
 			}
 		}
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		if (mask & IFCAP_RXCSUM_IPV6)
 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 
 		/*
 		 * Note that we leave CSUM_TSO alone (it is always set).  The
 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
 		 * sending a TSO request our way, so it's sufficient to toggle
 		 * IFCAP_TSOx only.
 		 */
 		if (mask & IFCAP_TSO4) {
 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
 				if_printf(ifp, "enable txcsum first.\n");
 				rc = EAGAIN;
 				goto fail;
 			}
 			ifp->if_capenable ^= IFCAP_TSO4;
 		}
 		if (mask & IFCAP_TSO6) {
 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
 				if_printf(ifp, "enable txcsum6 first.\n");
 				rc = EAGAIN;
 				goto fail;
 			}
 			ifp->if_capenable ^= IFCAP_TSO6;
 		}
 		if (mask & IFCAP_LRO) {
 #if defined(INET) || defined(INET6)
 			int i;
 			struct sge_rxq *rxq;
 
 			ifp->if_capenable ^= IFCAP_LRO;
 			for_each_rxq(vi, i, rxq) {
 				if (ifp->if_capenable & IFCAP_LRO)
 					rxq->iq.flags |= IQ_LRO_ENABLED;
 				else
 					rxq->iq.flags &= ~IQ_LRO_ENABLED;
 			}
 #endif
 		}
 #ifdef TCP_OFFLOAD
 		if (mask & IFCAP_TOE) {
 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
 
 			rc = toe_capability(vi, enable);
 			if (rc != 0)
 				goto fail;
 
 			ifp->if_capenable ^= mask;
 		}
 #endif
 		if (mask & IFCAP_VLAN_HWTAGGING) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				rc = update_mac_settings(ifp, XGMAC_VLANEX);
 		}
 		if (mask & IFCAP_VLAN_MTU) {
 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
 
 			/* Need to find out how to disable auto-mtu-inflation */
 		}
 		if (mask & IFCAP_VLAN_HWTSO)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 		if (mask & IFCAP_VLAN_HWCSUM)
 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
 
 #ifdef VLAN_CAPABILITIES
 		VLAN_CAPABILITIES(ifp);
 #endif
 fail:
 		end_synchronized_op(sc, 0);
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		ifmedia_ioctl(ifp, ifr, &vi->media, cmd);
 		break;
 
 	case SIOCGI2C: {
 		struct ifi2creq i2c;
 
 		rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
 		if (rc != 0)
 			break;
 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
 			rc = EPERM;
 			break;
 		}
 		if (i2c.len > sizeof(i2c.data)) {
 			rc = EINVAL;
 			break;
 		}
 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
 		if (rc)
 			return (rc);
 		rc = -t4_i2c_rd(sc, sc->mbox, vi->pi->port_id, i2c.dev_addr,
 		    i2c.offset, i2c.len, &i2c.data[0]);
 		end_synchronized_op(sc, 0);
 		if (rc == 0)
 			rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
 		break;
 	}
 
 	default:
 		rc = ether_ioctl(ifp, cmd, data);
 	}
 
 	return (rc);
 }
 
 static int
 cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct sge_txq *txq;
 	void *items[1];
 	int rc;
 
 	M_ASSERTPKTHDR(m);
 	MPASS(m->m_nextpkt == NULL);	/* not quite ready for this yet */
 
 	if (__predict_false(pi->link_cfg.link_ok == 0)) {
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	rc = parse_pkt(&m);
 	if (__predict_false(rc != 0)) {
 		MPASS(m == NULL);			/* was freed already */
 		atomic_add_int(&pi->tx_parse_error, 1);	/* rare, atomic is ok */
 		return (rc);
 	}
 
 	/* Select a txq. */
 	txq = &sc->sge.txq[vi->first_txq];
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
 		    vi->rsrv_noflowq);
 
 	items[0] = m;
 	rc = mp_ring_enqueue(txq->r, items, 1, 4096);
 	if (__predict_false(rc != 0))
 		m_freem(m);
 
 	return (rc);
 }
 
 static void
 cxgbe_qflush(struct ifnet *ifp)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct sge_txq *txq;
 	int i;
 
 	/* queues do not exist if !VI_INIT_DONE. */
 	if (vi->flags & VI_INIT_DONE) {
 		for_each_txq(vi, i, txq) {
 			TXQ_LOCK(txq);
 			txq->eq.flags &= ~EQ_ENABLED;
 			TXQ_UNLOCK(txq);
 			while (!mp_ring_is_idle(txq->r)) {
 				mp_ring_check_drainage(txq->r, 0);
 				pause("qflush", 1);
 			}
 		}
 	}
 	if_qflush(ifp);
 }
 
 static uint64_t
 vi_get_counter(struct ifnet *ifp, ift_counter c)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct fw_vi_stats_vf *s = &vi->stats;
 
 	vi_refresh_stats(vi->pi->adapter, vi);
 
 	switch (c) {
 	case IFCOUNTER_IPACKETS:
 		return (s->rx_bcast_frames + s->rx_mcast_frames +
 		    s->rx_ucast_frames);
 	case IFCOUNTER_IERRORS:
 		return (s->rx_err_frames);
 	case IFCOUNTER_OPACKETS:
 		return (s->tx_bcast_frames + s->tx_mcast_frames +
 		    s->tx_ucast_frames + s->tx_offload_frames);
 	case IFCOUNTER_OERRORS:
 		return (s->tx_drop_frames);
 	case IFCOUNTER_IBYTES:
 		return (s->rx_bcast_bytes + s->rx_mcast_bytes +
 		    s->rx_ucast_bytes);
 	case IFCOUNTER_OBYTES:
 		return (s->tx_bcast_bytes + s->tx_mcast_bytes +
 		    s->tx_ucast_bytes + s->tx_offload_bytes);
 	case IFCOUNTER_IMCASTS:
 		return (s->rx_mcast_frames);
 	case IFCOUNTER_OMCASTS:
 		return (s->tx_mcast_frames);
 	case IFCOUNTER_OQDROPS: {
 		uint64_t drops;
 
 		drops = 0;
 		if (vi->flags & VI_INIT_DONE) {
 			int i;
 			struct sge_txq *txq;
 
 			for_each_txq(vi, i, txq)
 				drops += counter_u64_fetch(txq->r->drops);
 		}
 
 		return (drops);
 
 	}
 
 	default:
 		return (if_get_counter_default(ifp, c));
 	}
 }
 
 uint64_t
 cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct port_stats *s = &pi->stats;
 
 	if (pi->nvi > 1)
 		return (vi_get_counter(ifp, c));
 
 	cxgbe_refresh_stats(sc, pi);
 
 	switch (c) {
 	case IFCOUNTER_IPACKETS:
 		return (s->rx_frames);
 
 	case IFCOUNTER_IERRORS:
 		return (s->rx_jabber + s->rx_runt + s->rx_too_long +
 		    s->rx_fcs_err + s->rx_len_err);
 
 	case IFCOUNTER_OPACKETS:
 		return (s->tx_frames);
 
 	case IFCOUNTER_OERRORS:
 		return (s->tx_error_frames);
 
 	case IFCOUNTER_IBYTES:
 		return (s->rx_octets);
 
 	case IFCOUNTER_OBYTES:
 		return (s->tx_octets);
 
 	case IFCOUNTER_IMCASTS:
 		return (s->rx_mcast_frames);
 
 	case IFCOUNTER_OMCASTS:
 		return (s->tx_mcast_frames);
 
 	case IFCOUNTER_IQDROPS:
 		return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
 		    s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
 		    s->rx_trunc3 + pi->tnl_cong_drops);
 
 	case IFCOUNTER_OQDROPS: {
 		uint64_t drops;
 
 		drops = s->tx_drop;
 		if (vi->flags & VI_INIT_DONE) {
 			int i;
 			struct sge_txq *txq;
 
 			for_each_txq(vi, i, txq)
 				drops += counter_u64_fetch(txq->r->drops);
 		}
 
 		return (drops);
 
 	}
 
 	default:
 		return (if_get_counter_default(ifp, c));
 	}
 }
 
 static int
 cxgbe_media_change(struct ifnet *ifp)
 {
 	struct vi_info *vi = ifp->if_softc;
 
 	device_printf(vi->dev, "%s unimplemented.\n", __func__);
 
 	return (EOPNOTSUPP);
 }
 
 static void
 cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
 	struct ifmedia_entry *cur;
 	int speed = pi->link_cfg.speed;
 
 	cur = vi->media.ifm_cur;
 
 	ifmr->ifm_status = IFM_AVALID;
 	if (!pi->link_cfg.link_ok)
 		return;
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 
 	/* active and current will differ iff current media is autoselect. */
 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
 		return;
 
 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
 	if (speed == 10000)
 		ifmr->ifm_active |= IFM_10G_T;
 	else if (speed == 1000)
 		ifmr->ifm_active |= IFM_1000_T;
 	else if (speed == 100)
 		ifmr->ifm_active |= IFM_100_TX;
 	else if (speed == 10)
 		ifmr->ifm_active |= IFM_10_T;
 	else
 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
 			    speed));
 }
 
 static int
 vcxgbe_probe(device_t dev)
 {
 	char buf[128];
 	struct vi_info *vi = device_get_softc(dev);
 
 	snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
 	    vi - vi->pi->vi);
 	device_set_desc_copy(dev, buf);
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 vcxgbe_attach(device_t dev)
 {
 	struct vi_info *vi;
 	struct port_info *pi;
 	struct adapter *sc;
 	int func, index, rc;
 	u32 param, val;
 
 	vi = device_get_softc(dev);
 	pi = vi->pi;
 	sc = pi->adapter;
 
 	index = vi - pi->vi;
 	KASSERT(index < nitems(vi_mac_funcs),
 	    ("%s: VI %s doesn't have a MAC func", __func__,
 	    device_get_nameunit(dev)));
 	func = vi_mac_funcs[index];
 	rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
 	    vi->hw_addr, &vi->rss_size, func, 0);
 	if (rc < 0) {
 		device_printf(dev, "Failed to allocate virtual interface "
 		    "for port %d: %d\n", pi->port_id, -rc);
 		return (-rc);
 	}
 	vi->viid = rc;
 
 	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
 	    V_FW_PARAMS_PARAM_YZ(vi->viid);
 	rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
 	if (rc)
 		vi->rss_base = 0xffff;
 	else {
 		/* MPASS((val >> 16) == rss_size); */
 		vi->rss_base = val & 0xffff;
 	}
 
 	rc = cxgbe_vi_attach(dev, vi);
 	if (rc) {
 		t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
 		return (rc);
 	}
 	return (0);
 }
 
 static int
 vcxgbe_detach(device_t dev)
 {
 	struct vi_info *vi;
 	struct adapter *sc;
 
 	vi = device_get_softc(dev);
 	sc = vi->pi->adapter;
 
 	doom_vi(sc, vi);
 
 	cxgbe_vi_detach(vi);
 	t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
 
 	end_synchronized_op(sc, 0);
 
 	return (0);
 }
 
 void
 t4_fatal_err(struct adapter *sc)
 {
 	t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
 	t4_intr_disable(sc);
 	log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
 	    device_get_nameunit(sc->dev));
 }
 
 static int
 map_bars_0_and_4(struct adapter *sc)
 {
 	sc->regs_rid = PCIR_BAR(0);
 	sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
 	    &sc->regs_rid, RF_ACTIVE);
 	if (sc->regs_res == NULL) {
 		device_printf(sc->dev, "cannot map registers.\n");
 		return (ENXIO);
 	}
 	sc->bt = rman_get_bustag(sc->regs_res);
 	sc->bh = rman_get_bushandle(sc->regs_res);
 	sc->mmio_len = rman_get_size(sc->regs_res);
 	setbit(&sc->doorbells, DOORBELL_KDB);
 
 	sc->msix_rid = PCIR_BAR(4);
 	sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
 	    &sc->msix_rid, RF_ACTIVE);
 	if (sc->msix_res == NULL) {
 		device_printf(sc->dev, "cannot map MSI-X BAR.\n");
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 static int
 map_bar_2(struct adapter *sc)
 {
 
 	/*
 	 * T4: only iWARP driver uses the userspace doorbells.  There is no need
 	 * to map it if RDMA is disabled.
 	 */
 	if (is_t4(sc) && sc->rdmacaps == 0)
 		return (0);
 
 	sc->udbs_rid = PCIR_BAR(2);
 	sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
 	    &sc->udbs_rid, RF_ACTIVE);
 	if (sc->udbs_res == NULL) {
 		device_printf(sc->dev, "cannot map doorbell BAR.\n");
 		return (ENXIO);
 	}
 	sc->udbs_base = rman_get_virtual(sc->udbs_res);
 
 	if (is_t5(sc)) {
 		setbit(&sc->doorbells, DOORBELL_UDB);
 #if defined(__i386__) || defined(__amd64__)
 		if (t5_write_combine) {
 			int rc;
 
 			/*
 			 * Enable write combining on BAR2.  This is the
 			 * userspace doorbell BAR and is split into 128B
 			 * (UDBS_SEG_SIZE) doorbell regions, each associated
 			 * with an egress queue.  The first 64B has the doorbell
 			 * and the second 64B can be used to submit a tx work
 			 * request with an implicit doorbell.
 			 */
 
 			rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
 			    rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
 			if (rc == 0) {
 				clrbit(&sc->doorbells, DOORBELL_UDB);
 				setbit(&sc->doorbells, DOORBELL_WCWR);
 				setbit(&sc->doorbells, DOORBELL_UDBWC);
 			} else {
 				device_printf(sc->dev,
 				    "couldn't enable write combining: %d\n",
 				    rc);
 			}
 
 			t4_write_reg(sc, A_SGE_STAT_CFG,
 			    V_STATSOURCE_T5(7) | V_STATMODE(0));
 		}
 #endif
 	}
 
 	return (0);
 }
 
 struct memwin_init {
 	uint32_t base;
 	uint32_t aperture;
 };
 
 static const struct memwin_init t4_memwin[NUM_MEMWIN] = {
 	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
 	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
 	{ MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
 };
 
 static const struct memwin_init t5_memwin[NUM_MEMWIN] = {
 	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
 	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
 	{ MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
 };
 
 static void
 setup_memwin(struct adapter *sc)
 {
 	const struct memwin_init *mw_init;
 	struct memwin *mw;
 	int i;
 	uint32_t bar0;
 
 	if (is_t4(sc)) {
 		/*
 		 * Read low 32b of bar0 indirectly via the hardware backdoor
 		 * mechanism.  Works from within PCI passthrough environments
 		 * too, where rman_get_start() can return a different value.  We
 		 * need to program the T4 memory window decoders with the actual
 		 * addresses that will be coming across the PCIe link.
 		 */
 		bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
 		bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
 
 		mw_init = &t4_memwin[0];
 	} else {
 		/* T5+ use the relative offset inside the PCIe BAR */
 		bar0 = 0;
 
 		mw_init = &t5_memwin[0];
 	}
 
 	for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) {
 		rw_init(&mw->mw_lock, "memory window access");
 		mw->mw_base = mw_init->base;
 		mw->mw_aperture = mw_init->aperture;
 		mw->mw_curpos = 0;
 		t4_write_reg(sc,
 		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
 		    (mw->mw_base + bar0) | V_BIR(0) |
 		    V_WINDOW(ilog2(mw->mw_aperture) - 10));
 		rw_wlock(&mw->mw_lock);
 		position_memwin(sc, i, 0);
 		rw_wunlock(&mw->mw_lock);
 	}
 
 	/* flush */
 	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
 }
 
 /*
  * Positions the memory window at the given address in the card's address space.
  * There are some alignment requirements and the actual position may be at an
  * address prior to the requested address.  mw->mw_curpos always has the actual
  * position of the window.
  */
 static void
 position_memwin(struct adapter *sc, int idx, uint32_t addr)
 {
 	struct memwin *mw;
 	uint32_t pf;
 	uint32_t reg;
 
 	MPASS(idx >= 0 && idx < NUM_MEMWIN);
 	mw = &sc->memwin[idx];
 	rw_assert(&mw->mw_lock, RA_WLOCKED);
 
 	if (is_t4(sc)) {
 		pf = 0;
 		mw->mw_curpos = addr & ~0xf;	/* start must be 16B aligned */
 	} else {
 		pf = V_PFNUM(sc->pf);
 		mw->mw_curpos = addr & ~0x7f;	/* start must be 128B aligned */
 	}
 	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx);
 	t4_write_reg(sc, reg, mw->mw_curpos | pf);
 	t4_read_reg(sc, reg);	/* flush */
 }
 
 static int
 rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
     int len, int rw)
 {
 	struct memwin *mw;
 	uint32_t mw_end, v;
 
 	MPASS(idx >= 0 && idx < NUM_MEMWIN);
 
 	/* Memory can only be accessed in naturally aligned 4 byte units */
 	if (addr & 3 || len & 3 || len <= 0)
 		return (EINVAL);
 
 	mw = &sc->memwin[idx];
 	while (len > 0) {
 		rw_rlock(&mw->mw_lock);
 		mw_end = mw->mw_curpos + mw->mw_aperture;
 		if (addr >= mw_end || addr < mw->mw_curpos) {
 			/* Will need to reposition the window */
 			if (!rw_try_upgrade(&mw->mw_lock)) {
 				rw_runlock(&mw->mw_lock);
 				rw_wlock(&mw->mw_lock);
 			}
 			rw_assert(&mw->mw_lock, RA_WLOCKED);
 			position_memwin(sc, idx, addr);
 			rw_downgrade(&mw->mw_lock);
 			mw_end = mw->mw_curpos + mw->mw_aperture;
 		}
 		rw_assert(&mw->mw_lock, RA_RLOCKED);
 		while (addr < mw_end && len > 0) {
 			if (rw == 0) {
 				v = t4_read_reg(sc, mw->mw_base + addr -
 				    mw->mw_curpos);
 				*val++ = le32toh(v);
 			} else {
 				v = *val++;
 				t4_write_reg(sc, mw->mw_base + addr -
 				    mw->mw_curpos, htole32(v));
 			}
 			addr += 4;
 			len -= 4;
 		}
 		rw_runlock(&mw->mw_lock);
 	}
 
 	return (0);
 }
 
 static inline int
 read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val,
     int len)
 {
 
 	return (rw_via_memwin(sc, idx, addr, val, len, 0));
 }
 
 static inline int
 write_via_memwin(struct adapter *sc, int idx, uint32_t addr,
     const uint32_t *val, int len)
 {
 
 	return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1));
 }
 
 static int
 t4_range_cmp(const void *a, const void *b)
 {
 	return ((const struct t4_range *)a)->start -
 	       ((const struct t4_range *)b)->start;
 }
 
 /*
  * Verify that the memory range specified by the addr/len pair is valid within
  * the card's address space.
  */
 static int
 validate_mem_range(struct adapter *sc, uint32_t addr, int len)
 {
 	struct t4_range mem_ranges[4], *r, *next;
 	uint32_t em, addr_len;
 	int i, n, remaining;
 
 	/* Memory can only be accessed in naturally aligned 4 byte units */
 	if (addr & 3 || len & 3 || len <= 0)
 		return (EINVAL);
 
 	/* Enabled memories */
 	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
 
 	r = &mem_ranges[0];
 	n = 0;
 	bzero(r, sizeof(mem_ranges));
 	if (em & F_EDRAM0_ENABLE) {
 		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
 		r->size = G_EDRAM0_SIZE(addr_len) << 20;
 		if (r->size > 0) {
 			r->start = G_EDRAM0_BASE(addr_len) << 20;
 			if (addr >= r->start &&
 			    addr + len <= r->start + r->size)
 				return (0);
 			r++;
 			n++;
 		}
 	}
 	if (em & F_EDRAM1_ENABLE) {
 		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
 		r->size = G_EDRAM1_SIZE(addr_len) << 20;
 		if (r->size > 0) {
 			r->start = G_EDRAM1_BASE(addr_len) << 20;
 			if (addr >= r->start &&
 			    addr + len <= r->start + r->size)
 				return (0);
 			r++;
 			n++;
 		}
 	}
 	if (em & F_EXT_MEM_ENABLE) {
 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
 		r->size = G_EXT_MEM_SIZE(addr_len) << 20;
 		if (r->size > 0) {
 			r->start = G_EXT_MEM_BASE(addr_len) << 20;
 			if (addr >= r->start &&
 			    addr + len <= r->start + r->size)
 				return (0);
 			r++;
 			n++;
 		}
 	}
 	if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) {
 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
 		r->size = G_EXT_MEM1_SIZE(addr_len) << 20;
 		if (r->size > 0) {
 			r->start = G_EXT_MEM1_BASE(addr_len) << 20;
 			if (addr >= r->start &&
 			    addr + len <= r->start + r->size)
 				return (0);
 			r++;
 			n++;
 		}
 	}
 	MPASS(n <= nitems(mem_ranges));
 
 	if (n > 1) {
 		/* Sort and merge the ranges. */
 		qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp);
 
 		/* Start from index 0 and examine the next n - 1 entries. */
 		r = &mem_ranges[0];
 		for (remaining = n - 1; remaining > 0; remaining--, r++) {
 
 			MPASS(r->size > 0);	/* r is a valid entry. */
 			next = r + 1;
 			MPASS(next->size > 0);	/* and so is the next one. */
 
 			while (r->start + r->size >= next->start) {
 				/* Merge the next one into the current entry. */
 				r->size = max(r->start + r->size,
 				    next->start + next->size) - r->start;
 				n--;	/* One fewer entry in total. */
 				if (--remaining == 0)
 					goto done;	/* short circuit */
 				next++;
 			}
 			if (next != r + 1) {
 				/*
 				 * Some entries were merged into r and next
 				 * points to the first valid entry that couldn't
 				 * be merged.
 				 */
 				MPASS(next->size > 0);	/* must be valid */
 				memcpy(r + 1, next, remaining * sizeof(*r));
 #ifdef INVARIANTS
 				/*
 				 * This so that the foo->size assertion in the
 				 * next iteration of the loop do the right
 				 * thing for entries that were pulled up and are
 				 * no longer valid.
 				 */
 				MPASS(n < nitems(mem_ranges));
 				bzero(&mem_ranges[n], (nitems(mem_ranges) - n) *
 				    sizeof(struct t4_range));
 #endif
 			}
 		}
 done:
 		/* Done merging the ranges. */
 		MPASS(n > 0);
 		r = &mem_ranges[0];
 		for (i = 0; i < n; i++, r++) {
 			if (addr >= r->start &&
 			    addr + len <= r->start + r->size)
 				return (0);
 		}
 	}
 
 	return (EFAULT);
 }
 
 static int
 fwmtype_to_hwmtype(int mtype)
 {
 
 	switch (mtype) {
 	case FW_MEMTYPE_EDC0:
 		return (MEM_EDC0);
 	case FW_MEMTYPE_EDC1:
 		return (MEM_EDC1);
 	case FW_MEMTYPE_EXTMEM:
 		return (MEM_MC0);
 	case FW_MEMTYPE_EXTMEM1:
 		return (MEM_MC1);
 	default:
 		panic("%s: cannot translate fw mtype %d.", __func__, mtype);
 	}
 }
 
 /*
  * Verify that the memory range specified by the memtype/offset/len pair is
  * valid and lies entirely within the memtype specified.  The global address of
  * the start of the range is returned in addr.
  */
 static int
 validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len,
     uint32_t *addr)
 {
 	uint32_t em, addr_len, maddr;
 
 	/* Memory can only be accessed in naturally aligned 4 byte units */
 	if (off & 3 || len & 3 || len == 0)
 		return (EINVAL);
 
 	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
 	switch (fwmtype_to_hwmtype(mtype)) {
 	case MEM_EDC0:
 		if (!(em & F_EDRAM0_ENABLE))
 			return (EINVAL);
 		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
 		maddr = G_EDRAM0_BASE(addr_len) << 20;
 		break;
 	case MEM_EDC1:
 		if (!(em & F_EDRAM1_ENABLE))
 			return (EINVAL);
 		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
 		maddr = G_EDRAM1_BASE(addr_len) << 20;
 		break;
 	case MEM_MC:
 		if (!(em & F_EXT_MEM_ENABLE))
 			return (EINVAL);
 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
 		maddr = G_EXT_MEM_BASE(addr_len) << 20;
 		break;
 	case MEM_MC1:
 		if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE))
 			return (EINVAL);
 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
 		maddr = G_EXT_MEM1_BASE(addr_len) << 20;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	*addr = maddr + off;	/* global address */
 	return (validate_mem_range(sc, *addr, len));
 }
 
 static int
 fixup_devlog_params(struct adapter *sc)
 {
 	struct devlog_params *dparams = &sc->params.devlog;
 	int rc;
 
 	rc = validate_mt_off_len(sc, dparams->memtype, dparams->start,
 	    dparams->size, &dparams->addr);
 
 	return (rc);
 }
 
 static int
 cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, int num_vis,
     struct intrs_and_queues *iaq)
 {
 	int rc, itype, navail, nrxq10g, nrxq1g, n;
 	int nofldrxq10g = 0, nofldrxq1g = 0;
 
 	bzero(iaq, sizeof(*iaq));
 
 	iaq->ntxq10g = t4_ntxq10g;
 	iaq->ntxq1g = t4_ntxq1g;
 	iaq->ntxq_vi = t4_ntxq_vi;
 	iaq->nrxq10g = nrxq10g = t4_nrxq10g;
 	iaq->nrxq1g = nrxq1g = t4_nrxq1g;
 	iaq->nrxq_vi = t4_nrxq_vi;
 	iaq->rsrv_noflowq = t4_rsrv_noflowq;
 #ifdef TCP_OFFLOAD
 	if (is_offload(sc)) {
 		iaq->nofldtxq10g = t4_nofldtxq10g;
 		iaq->nofldtxq1g = t4_nofldtxq1g;
 		iaq->nofldtxq_vi = t4_nofldtxq_vi;
 		iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g;
 		iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g;
 		iaq->nofldrxq_vi = t4_nofldrxq_vi;
 	}
 #endif
 #ifdef DEV_NETMAP
 	iaq->nnmtxq_vi = t4_nnmtxq_vi;
 	iaq->nnmrxq_vi = t4_nnmrxq_vi;
 #endif
 
 	for (itype = INTR_MSIX; itype; itype >>= 1) {
 
 		if ((itype & t4_intr_types) == 0)
 			continue;	/* not allowed */
 
 		if (itype == INTR_MSIX)
 			navail = pci_msix_count(sc->dev);
 		else if (itype == INTR_MSI)
 			navail = pci_msi_count(sc->dev);
 		else
 			navail = 1;
 restart:
 		if (navail == 0)
 			continue;
 
 		iaq->intr_type = itype;
 		iaq->intr_flags_10g = 0;
 		iaq->intr_flags_1g = 0;
 
 		/*
 		 * Best option: an interrupt vector for errors, one for the
 		 * firmware event queue, and one for every rxq (NIC and TOE) of
 		 * every VI.  The VIs that support netmap use the same
 		 * interrupts for the NIC rx queues and the netmap rx queues
 		 * because only one set of queues is active at a time.
 		 */
 		iaq->nirq = T4_EXTRA_INTR;
 		iaq->nirq += n10g * (nrxq10g + nofldrxq10g);
 		iaq->nirq += n1g * (nrxq1g + nofldrxq1g);
 		iaq->nirq += (n10g + n1g) * (num_vis - 1) *
 		    max(iaq->nrxq_vi, iaq->nnmrxq_vi);	/* See comment above. */
 		iaq->nirq += (n10g + n1g) * (num_vis - 1) * iaq->nofldrxq_vi;
 		if (iaq->nirq <= navail &&
 		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
 			iaq->intr_flags_10g = INTR_ALL;
 			iaq->intr_flags_1g = INTR_ALL;
 			goto allocate;
 		}
 
 		/* Disable the VIs (and netmap) if there aren't enough intrs */
 		if (num_vis > 1) {
 			device_printf(sc->dev, "virtual interfaces disabled "
 			    "because num_vis=%u with current settings "
 			    "(nrxq10g=%u, nrxq1g=%u, nofldrxq10g=%u, "
 			    "nofldrxq1g=%u, nrxq_vi=%u nofldrxq_vi=%u, "
 			    "nnmrxq_vi=%u) would need %u interrupts but "
 			    "only %u are available.\n", num_vis, nrxq10g,
 			    nrxq1g, nofldrxq10g, nofldrxq1g, iaq->nrxq_vi,
 			    iaq->nofldrxq_vi, iaq->nnmrxq_vi, iaq->nirq,
 			    navail);
 			num_vis = 1;
 			iaq->ntxq_vi = iaq->nrxq_vi = 0;
 			iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0;
 			iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0;
 			goto restart;
 		}
 
 		/*
 		 * Second best option: a vector for errors, one for the firmware
 		 * event queue, and vectors for either all the NIC rx queues or
 		 * all the TOE rx queues.  The queues that don't get vectors
 		 * will forward their interrupts to those that do.
 		 */
 		iaq->nirq = T4_EXTRA_INTR;
 		if (nrxq10g >= nofldrxq10g) {
 			iaq->intr_flags_10g = INTR_RXQ;
 			iaq->nirq += n10g * nrxq10g;
 		} else {
 			iaq->intr_flags_10g = INTR_OFLD_RXQ;
 			iaq->nirq += n10g * nofldrxq10g;
 		}
 		if (nrxq1g >= nofldrxq1g) {
 			iaq->intr_flags_1g = INTR_RXQ;
 			iaq->nirq += n1g * nrxq1g;
 		} else {
 			iaq->intr_flags_1g = INTR_OFLD_RXQ;
 			iaq->nirq += n1g * nofldrxq1g;
 		}
 		if (iaq->nirq <= navail &&
 		    (itype != INTR_MSI || powerof2(iaq->nirq)))
 			goto allocate;
 
 		/*
 		 * Next best option: an interrupt vector for errors, one for the
 		 * firmware event queue, and at least one per main-VI.  At this
 		 * point we know we'll have to downsize nrxq and/or nofldrxq to
 		 * fit what's available to us.
 		 */
 		iaq->nirq = T4_EXTRA_INTR;
 		iaq->nirq += n10g + n1g;
 		if (iaq->nirq <= navail) {
 			int leftover = navail - iaq->nirq;
 
 			if (n10g > 0) {
 				int target = max(nrxq10g, nofldrxq10g);
 
 				iaq->intr_flags_10g = nrxq10g >= nofldrxq10g ?
 				    INTR_RXQ : INTR_OFLD_RXQ;
 
 				n = 1;
 				while (n < target && leftover >= n10g) {
 					leftover -= n10g;
 					iaq->nirq += n10g;
 					n++;
 				}
 				iaq->nrxq10g = min(n, nrxq10g);
 #ifdef TCP_OFFLOAD
 				iaq->nofldrxq10g = min(n, nofldrxq10g);
 #endif
 			}
 
 			if (n1g > 0) {
 				int target = max(nrxq1g, nofldrxq1g);
 
 				iaq->intr_flags_1g = nrxq1g >= nofldrxq1g ?
 				    INTR_RXQ : INTR_OFLD_RXQ;
 
 				n = 1;
 				while (n < target && leftover >= n1g) {
 					leftover -= n1g;
 					iaq->nirq += n1g;
 					n++;
 				}
 				iaq->nrxq1g = min(n, nrxq1g);
 #ifdef TCP_OFFLOAD
 				iaq->nofldrxq1g = min(n, nofldrxq1g);
 #endif
 			}
 
 			if (itype != INTR_MSI || powerof2(iaq->nirq))
 				goto allocate;
 		}
 
 		/*
 		 * Least desirable option: one interrupt vector for everything.
 		 */
 		iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1;
 		iaq->intr_flags_10g = iaq->intr_flags_1g = 0;
 #ifdef TCP_OFFLOAD
 		if (is_offload(sc))
 			iaq->nofldrxq10g = iaq->nofldrxq1g = 1;
 #endif
 allocate:
 		navail = iaq->nirq;
 		rc = 0;
 		if (itype == INTR_MSIX)
 			rc = pci_alloc_msix(sc->dev, &navail);
 		else if (itype == INTR_MSI)
 			rc = pci_alloc_msi(sc->dev, &navail);
 
 		if (rc == 0) {
 			if (navail == iaq->nirq)
 				return (0);
 
 			/*
 			 * Didn't get the number requested.  Use whatever number
 			 * the kernel is willing to allocate (it's in navail).
 			 */
 			device_printf(sc->dev, "fewer vectors than requested, "
 			    "type=%d, req=%d, rcvd=%d; will downshift req.\n",
 			    itype, iaq->nirq, navail);
 			pci_release_msi(sc->dev);
 			goto restart;
 		}
 
 		device_printf(sc->dev,
 		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
 		    itype, rc, iaq->nirq, navail);
 	}
 
 	device_printf(sc->dev,
 	    "failed to find a usable interrupt type.  "
 	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
 	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
 
 	return (ENXIO);
 }
 
 #define FW_VERSION(chip) ( \
     V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
     V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
     V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
     V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
 #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
 
 struct fw_info {
 	uint8_t chip;
 	char *kld_name;
 	char *fw_mod_name;
 	struct fw_hdr fw_hdr;	/* XXX: waste of space, need a sparse struct */
 } fw_info[] = {
 	{
 		.chip = CHELSIO_T4,
 		.kld_name = "t4fw_cfg",
 		.fw_mod_name = "t4fw",
 		.fw_hdr = {
 			.chip = FW_HDR_CHIP_T4,
 			.fw_ver = htobe32_const(FW_VERSION(T4)),
 			.intfver_nic = FW_INTFVER(T4, NIC),
 			.intfver_vnic = FW_INTFVER(T4, VNIC),
 			.intfver_ofld = FW_INTFVER(T4, OFLD),
 			.intfver_ri = FW_INTFVER(T4, RI),
 			.intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
 			.intfver_iscsi = FW_INTFVER(T4, ISCSI),
 			.intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
 			.intfver_fcoe = FW_INTFVER(T4, FCOE),
 		},
 	}, {
 		.chip = CHELSIO_T5,
 		.kld_name = "t5fw_cfg",
 		.fw_mod_name = "t5fw",
 		.fw_hdr = {
 			.chip = FW_HDR_CHIP_T5,
 			.fw_ver = htobe32_const(FW_VERSION(T5)),
 			.intfver_nic = FW_INTFVER(T5, NIC),
 			.intfver_vnic = FW_INTFVER(T5, VNIC),
 			.intfver_ofld = FW_INTFVER(T5, OFLD),
 			.intfver_ri = FW_INTFVER(T5, RI),
 			.intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
 			.intfver_iscsi = FW_INTFVER(T5, ISCSI),
 			.intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
 			.intfver_fcoe = FW_INTFVER(T5, FCOE),
 		},
 	}
 };
 
 static struct fw_info *
 find_fw_info(int chip)
 {
 	int i;
 
 	for (i = 0; i < nitems(fw_info); i++) {
 		if (fw_info[i].chip == chip)
 			return (&fw_info[i]);
 	}
 	return (NULL);
 }
 
 /*
  * Is the given firmware API compatible with the one the driver was compiled
  * with?
  */
 static int
 fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2)
 {
 
 	/* short circuit if it's the exact same firmware version */
 	if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
 		return (1);
 
 	/*
 	 * XXX: Is this too conservative?  Perhaps I should limit this to the
 	 * features that are supported in the driver.
 	 */
 #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
 	if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
 	    SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
 	    SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
 		return (1);
 #undef SAME_INTF
 
 	return (0);
 }
 
 /*
  * The firmware in the KLD is usable, but should it be installed?  This routine
  * explains itself in detail if it indicates the KLD firmware should be
  * installed.
  */
 static int
 should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c)
 {
 	const char *reason;
 
 	if (!card_fw_usable) {
 		reason = "incompatible or unusable";
 		goto install;
 	}
 
 	if (k > c) {
 		reason = "older than the version bundled with this driver";
 		goto install;
 	}
 
 	if (t4_fw_install == 2 && k != c) {
 		reason = "different than the version bundled with this driver";
 		goto install;
 	}
 
 	return (0);
 
 install:
 	if (t4_fw_install == 0) {
 		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
 		    "but the driver is prohibited from installing a different "
 		    "firmware on the card.\n",
 		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
 		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
 
 		return (0);
 	}
 
 	device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
 	    "installing firmware %u.%u.%u.%u on card.\n",
 	    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
 	    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
 	    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
 	    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
 
 	return (1);
 }
 /*
  * Establish contact with the firmware and determine if we are the master driver
  * or not, and whether we are responsible for chip initialization.
  */
 static int
 prep_firmware(struct adapter *sc)
 {
 	const struct firmware *fw = NULL, *default_cfg;
 	int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1;
 	enum dev_state state;
 	struct fw_info *fw_info;
 	struct fw_hdr *card_fw;		/* fw on the card */
 	const struct fw_hdr *kld_fw;	/* fw in the KLD */
 	const struct fw_hdr *drv_fw;	/* fw header the driver was compiled
 					   against */
 
 	/* Contact firmware. */
 	rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
 	if (rc < 0 || state == DEV_STATE_ERR) {
 		rc = -rc;
 		device_printf(sc->dev,
 		    "failed to connect to the firmware: %d, %d.\n", rc, state);
 		return (rc);
 	}
 	pf = rc;
 	if (pf == sc->mbox)
 		sc->flags |= MASTER_PF;
 	else if (state == DEV_STATE_UNINIT) {
 		/*
 		 * We didn't get to be the master so we definitely won't be
 		 * configuring the chip.  It's a bug if someone else hasn't
 		 * configured it already.
 		 */
 		device_printf(sc->dev, "couldn't be master(%d), "
 		    "device not already initialized either(%d).\n", rc, state);
 		return (EDOOFUS);
 	}
 
 	/* This is the firmware whose headers the driver was compiled against */
 	fw_info = find_fw_info(chip_id(sc));
 	if (fw_info == NULL) {
 		device_printf(sc->dev,
 		    "unable to look up firmware information for chip %d.\n",
 		    chip_id(sc));
 		return (EINVAL);
 	}
 	drv_fw = &fw_info->fw_hdr;
 
 	/*
 	 * The firmware KLD contains many modules.  The KLD name is also the
 	 * name of the module that contains the default config file.
 	 */
 	default_cfg = firmware_get(fw_info->kld_name);
 
 	/* Read the header of the firmware on the card */
 	card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
 	rc = -t4_read_flash(sc, FLASH_FW_START,
 	    sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1);
 	if (rc == 0)
 		card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw);
 	else {
 		device_printf(sc->dev,
 		    "Unable to read card's firmware header: %d\n", rc);
 		card_fw_usable = 0;
 	}
 
 	/* This is the firmware in the KLD */
 	fw = firmware_get(fw_info->fw_mod_name);
 	if (fw != NULL) {
 		kld_fw = (const void *)fw->data;
 		kld_fw_usable = fw_compatible(drv_fw, kld_fw);
 	} else {
 		kld_fw = NULL;
 		kld_fw_usable = 0;
 	}
 
 	if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver &&
 	    (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) {
 		/*
 		 * Common case: the firmware on the card is an exact match and
 		 * the KLD is an exact match too, or the KLD is
 		 * absent/incompatible.  Note that t4_fw_install = 2 is ignored
 		 * here -- use cxgbetool loadfw if you want to reinstall the
 		 * same firmware as the one on the card.
 		 */
 	} else if (kld_fw_usable && state == DEV_STATE_UNINIT &&
 	    should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver),
 	    be32toh(card_fw->fw_ver))) {
 
 		rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to install firmware: %d\n", rc);
 			goto done;
 		}
 
 		/* Installed successfully, update the cached header too. */
 		memcpy(card_fw, kld_fw, sizeof(*card_fw));
 		card_fw_usable = 1;
 		need_fw_reset = 0;	/* already reset as part of load_fw */
 	}
 
 	if (!card_fw_usable) {
 		uint32_t d, c, k;
 
 		d = ntohl(drv_fw->fw_ver);
 		c = ntohl(card_fw->fw_ver);
 		k = kld_fw ? ntohl(kld_fw->fw_ver) : 0;
 
 		device_printf(sc->dev, "Cannot find a usable firmware: "
 		    "fw_install %d, chip state %d, "
 		    "driver compiled with %d.%d.%d.%d, "
 		    "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n",
 		    t4_fw_install, state,
 		    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
 		    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d),
 		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
 		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c),
 		    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
 		    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
 		rc = EINVAL;
 		goto done;
 	}
 
 	/* We're using whatever's on the card and it's known to be good. */
 	sc->params.fw_vers = ntohl(card_fw->fw_ver);
 	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
 	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
 	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
 	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
 	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
 
 	t4_get_tp_version(sc, &sc->params.tp_vers);
 	snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
 	    G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
 	    G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
 	    G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
 	    G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
 
 	if (t4_get_exprom_version(sc, &sc->params.exprom_vers) != 0)
 		sc->params.exprom_vers = 0;
 	else {
 		snprintf(sc->exprom_version, sizeof(sc->exprom_version),
 		    "%u.%u.%u.%u",
 		    G_FW_HDR_FW_VER_MAJOR(sc->params.exprom_vers),
 		    G_FW_HDR_FW_VER_MINOR(sc->params.exprom_vers),
 		    G_FW_HDR_FW_VER_MICRO(sc->params.exprom_vers),
 		    G_FW_HDR_FW_VER_BUILD(sc->params.exprom_vers));
 	}
 
 	/* Reset device */
 	if (need_fw_reset &&
 	    (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) {
 		device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
 		if (rc != ETIMEDOUT && rc != EIO)
 			t4_fw_bye(sc, sc->mbox);
 		goto done;
 	}
 	sc->flags |= FW_OK;
 
 	rc = get_params__pre_init(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	/* Partition adapter resources as specified in the config file. */
 	if (state == DEV_STATE_UNINIT) {
 
 		KASSERT(sc->flags & MASTER_PF,
 		    ("%s: trying to change chip settings when not master.",
 		    __func__));
 
 		rc = partition_resources(sc, default_cfg, fw_info->kld_name);
 		if (rc != 0)
 			goto done;	/* error message displayed already */
 
 		t4_tweak_chip_settings(sc);
 
 		/* get basic stuff going */
 		rc = -t4_fw_initialize(sc, sc->mbox);
 		if (rc != 0) {
 			device_printf(sc->dev, "fw init failed: %d.\n", rc);
 			goto done;
 		}
 	} else {
 		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf);
 		sc->cfcsum = 0;
 	}
 
 done:
 	free(card_fw, M_CXGBE);
 	if (fw != NULL)
 		firmware_put(fw, FIRMWARE_UNLOAD);
 	if (default_cfg != NULL)
 		firmware_put(default_cfg, FIRMWARE_UNLOAD);
 
 	return (rc);
 }
 
 #define FW_PARAM_DEV(param) \
 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
 #define FW_PARAM_PFVF(param) \
 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
 
 /*
  * Partition chip resources for use between various PFs, VFs, etc.
  */
 static int
 partition_resources(struct adapter *sc, const struct firmware *default_cfg,
     const char *name_prefix)
 {
 	const struct firmware *cfg = NULL;
 	int rc = 0;
 	struct fw_caps_config_cmd caps;
 	uint32_t mtype, moff, finicsum, cfcsum;
 
 	/*
 	 * Figure out what configuration file to use.  Pick the default config
 	 * file for the card if the user hasn't specified one explicitly.
 	 */
 	snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file);
 	if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
 		/* Card specific overrides go here. */
 		if (pci_get_device(sc->dev) == 0x440a)
 			snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF);
 		if (is_fpga(sc))
 			snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF);
 	}
 
 	/*
 	 * We need to load another module if the profile is anything except
 	 * "default" or "flash".
 	 */
 	if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 &&
 	    strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
 		char s[32];
 
 		snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file);
 		cfg = firmware_get(s);
 		if (cfg == NULL) {
 			if (default_cfg != NULL) {
 				device_printf(sc->dev,
 				    "unable to load module \"%s\" for "
 				    "configuration profile \"%s\", will use "
 				    "the default config file instead.\n",
 				    s, sc->cfg_file);
 				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
 				    "%s", DEFAULT_CF);
 			} else {
 				device_printf(sc->dev,
 				    "unable to load module \"%s\" for "
 				    "configuration profile \"%s\", will use "
 				    "the config file on the card's flash "
 				    "instead.\n", s, sc->cfg_file);
 				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
 				    "%s", FLASH_CF);
 			}
 		}
 	}
 
 	if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 &&
 	    default_cfg == NULL) {
 		device_printf(sc->dev,
 		    "default config file not available, will use the config "
 		    "file on the card's flash instead.\n");
 		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF);
 	}
 
 	if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
 		u_int cflen;
 		const uint32_t *cfdata;
 		uint32_t param, val, addr;
 
 		KASSERT(cfg != NULL || default_cfg != NULL,
 		    ("%s: no config to upload", __func__));
 
 		/*
 		 * Ask the firmware where it wants us to upload the config file.
 		 */
 		param = FW_PARAM_DEV(CF);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
 		if (rc != 0) {
 			/* No support for config file?  Shouldn't happen. */
 			device_printf(sc->dev,
 			    "failed to query config file location: %d.\n", rc);
 			goto done;
 		}
 		mtype = G_FW_PARAMS_PARAM_Y(val);
 		moff = G_FW_PARAMS_PARAM_Z(val) << 16;
 
 		/*
 		 * XXX: sheer laziness.  We deliberately added 4 bytes of
 		 * useless stuffing/comments at the end of the config file so
 		 * it's ok to simply throw away the last remaining bytes when
 		 * the config file is not an exact multiple of 4.  This also
 		 * helps with the validate_mt_off_len check.
 		 */
 		if (cfg != NULL) {
 			cflen = cfg->datasize & ~3;
 			cfdata = cfg->data;
 		} else {
 			cflen = default_cfg->datasize & ~3;
 			cfdata = default_cfg->data;
 		}
 
 		if (cflen > FLASH_CFG_MAX_SIZE) {
 			device_printf(sc->dev,
 			    "config file too long (%d, max allowed is %d).  "
 			    "Will try to use the config on the card, if any.\n",
 			    cflen, FLASH_CFG_MAX_SIZE);
 			goto use_config_on_flash;
 		}
 
 		rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "%s: addr (%d/0x%x) or len %d is not valid: %d.  "
 			    "Will try to use the config on the card, if any.\n",
 			    __func__, mtype, moff, cflen, rc);
 			goto use_config_on_flash;
 		}
 		write_via_memwin(sc, 2, addr, cfdata, cflen);
 	} else {
 use_config_on_flash:
 		mtype = FW_MEMTYPE_FLASH;
 		moff = t4_flash_cfg_addr(sc);
 	}
 
 	bzero(&caps, sizeof(caps));
 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
 	caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
 	    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
 	    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps));
 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to pre-process config file: %d "
 		    "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
 		goto done;
 	}
 
 	finicsum = be32toh(caps.finicsum);
 	cfcsum = be32toh(caps.cfcsum);
 	if (finicsum != cfcsum) {
 		device_printf(sc->dev,
 		    "WARNING: config file checksum mismatch: %08x %08x\n",
 		    finicsum, cfcsum);
 	}
 	sc->cfcsum = cfcsum;
 
 #define LIMIT_CAPS(x) do { \
 	caps.x &= htobe16(t4_##x##_allowed); \
 } while (0)
 
 	/*
 	 * Let the firmware know what features will (not) be used so it can tune
 	 * things accordingly.
 	 */
 	LIMIT_CAPS(nbmcaps);
 	LIMIT_CAPS(linkcaps);
 	LIMIT_CAPS(switchcaps);
 	LIMIT_CAPS(niccaps);
 	LIMIT_CAPS(toecaps);
 	LIMIT_CAPS(rdmacaps);
 	LIMIT_CAPS(tlscaps);
 	LIMIT_CAPS(iscsicaps);
 	LIMIT_CAPS(fcoecaps);
 #undef LIMIT_CAPS
 
 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 	    F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
 	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to process config file: %d.\n", rc);
 	}
 done:
 	if (cfg != NULL)
 		firmware_put(cfg, FIRMWARE_UNLOAD);
 	return (rc);
 }
 
 /*
  * Retrieve parameters that are needed (or nice to have) very early.
  */
 static int
 get_params__pre_init(struct adapter *sc)
 {
 	int rc;
 	uint32_t param[2], val[2];
 
 	param[0] = FW_PARAM_DEV(PORTVEC);
 	param[1] = FW_PARAM_DEV(CCLK);
 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to query parameters (pre_init): %d.\n", rc);
 		return (rc);
 	}
 
 	sc->params.portvec = val[0];
 	sc->params.nports = bitcount32(val[0]);
 	sc->params.vpd.cclk = val[1];
 
 	/* Read device log parameters. */
 	rc = -t4_init_devlog_params(sc, 1);
 	if (rc == 0)
 		fixup_devlog_params(sc);
 	else {
 		device_printf(sc->dev,
 		    "failed to get devlog parameters: %d.\n", rc);
 		rc = 0;	/* devlog isn't critical for device operation */
 	}
 
 	return (rc);
 }
 
 /*
  * Retrieve various parameters that are of interest to the driver.  The device
  * has been initialized by the firmware at this point.
  */
 static int
 get_params__post_init(struct adapter *sc)
 {
 	int rc;
 	uint32_t param[7], val[7];
 	struct fw_caps_config_cmd caps;
 
 	param[0] = FW_PARAM_PFVF(IQFLINT_START);
 	param[1] = FW_PARAM_PFVF(EQ_START);
 	param[2] = FW_PARAM_PFVF(FILTER_START);
 	param[3] = FW_PARAM_PFVF(FILTER_END);
 	param[4] = FW_PARAM_PFVF(L2T_START);
 	param[5] = FW_PARAM_PFVF(L2T_END);
 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to query parameters (post_init): %d.\n", rc);
 		return (rc);
 	}
 
 	sc->sge.iq_start = val[0];
 	sc->sge.eq_start = val[1];
 	sc->tids.ftid_base = val[2];
 	sc->tids.nftids = val[3] - val[2] + 1;
 	sc->params.ftid_min = val[2];
 	sc->params.ftid_max = val[3];
 	sc->vres.l2t.start = val[4];
 	sc->vres.l2t.size = val[5] - val[4] + 1;
 	KASSERT(sc->vres.l2t.size <= L2T_SIZE,
 	    ("%s: L2 table size (%u) larger than expected (%u)",
 	    __func__, sc->vres.l2t.size, L2T_SIZE));
 
 	/* get capabilites */
 	bzero(&caps, sizeof(caps));
 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
 	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to get card capabilities: %d.\n", rc);
 		return (rc);
 	}
 
 #define READ_CAPS(x) do { \
 	sc->x = htobe16(caps.x); \
 } while (0)
 	READ_CAPS(nbmcaps);
 	READ_CAPS(linkcaps);
 	READ_CAPS(switchcaps);
 	READ_CAPS(niccaps);
 	READ_CAPS(toecaps);
 	READ_CAPS(rdmacaps);
 	READ_CAPS(tlscaps);
 	READ_CAPS(iscsicaps);
 	READ_CAPS(fcoecaps);
 
 	if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
 		param[0] = FW_PARAM_PFVF(ETHOFLD_START);
 		param[1] = FW_PARAM_PFVF(ETHOFLD_END);
 		param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to query NIC parameters: %d.\n", rc);
 			return (rc);
 		}
 		sc->tids.etid_base = val[0];
 		sc->params.etid_min = val[0];
 		sc->tids.netids = val[1] - val[0] + 1;
 		sc->params.netids = sc->tids.netids;
 		sc->params.eo_wr_cred = val[2];
 		sc->params.ethoffload = 1;
 	}
 
 	if (sc->toecaps) {
 		/* query offload-related parameters */
 		param[0] = FW_PARAM_DEV(NTID);
 		param[1] = FW_PARAM_PFVF(SERVER_START);
 		param[2] = FW_PARAM_PFVF(SERVER_END);
 		param[3] = FW_PARAM_PFVF(TDDP_START);
 		param[4] = FW_PARAM_PFVF(TDDP_END);
 		param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to query TOE parameters: %d.\n", rc);
 			return (rc);
 		}
 		sc->tids.ntids = val[0];
 		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
 		sc->tids.stid_base = val[1];
 		sc->tids.nstids = val[2] - val[1] + 1;
 		sc->vres.ddp.start = val[3];
 		sc->vres.ddp.size = val[4] - val[3] + 1;
 		sc->params.ofldq_wr_cred = val[5];
 		sc->params.offload = 1;
 	}
 	if (sc->rdmacaps) {
 		param[0] = FW_PARAM_PFVF(STAG_START);
 		param[1] = FW_PARAM_PFVF(STAG_END);
 		param[2] = FW_PARAM_PFVF(RQ_START);
 		param[3] = FW_PARAM_PFVF(RQ_END);
 		param[4] = FW_PARAM_PFVF(PBL_START);
 		param[5] = FW_PARAM_PFVF(PBL_END);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to query RDMA parameters(1): %d.\n", rc);
 			return (rc);
 		}
 		sc->vres.stag.start = val[0];
 		sc->vres.stag.size = val[1] - val[0] + 1;
 		sc->vres.rq.start = val[2];
 		sc->vres.rq.size = val[3] - val[2] + 1;
 		sc->vres.pbl.start = val[4];
 		sc->vres.pbl.size = val[5] - val[4] + 1;
 
 		param[0] = FW_PARAM_PFVF(SQRQ_START);
 		param[1] = FW_PARAM_PFVF(SQRQ_END);
 		param[2] = FW_PARAM_PFVF(CQ_START);
 		param[3] = FW_PARAM_PFVF(CQ_END);
 		param[4] = FW_PARAM_PFVF(OCQ_START);
 		param[5] = FW_PARAM_PFVF(OCQ_END);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to query RDMA parameters(2): %d.\n", rc);
 			return (rc);
 		}
 		sc->vres.qp.start = val[0];
 		sc->vres.qp.size = val[1] - val[0] + 1;
 		sc->vres.cq.start = val[2];
 		sc->vres.cq.size = val[3] - val[2] + 1;
 		sc->vres.ocq.start = val[4];
 		sc->vres.ocq.size = val[5] - val[4] + 1;
 	}
 	if (sc->iscsicaps) {
 		param[0] = FW_PARAM_PFVF(ISCSI_START);
 		param[1] = FW_PARAM_PFVF(ISCSI_END);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to query iSCSI parameters: %d.\n", rc);
 			return (rc);
 		}
 		sc->vres.iscsi.start = val[0];
 		sc->vres.iscsi.size = val[1] - val[0] + 1;
 	}
 
 	/*
 	 * We've got the params we wanted to query via the firmware.  Now grab
 	 * some others directly from the chip.
 	 */
 	rc = t4_read_chip_settings(sc);
 
 	return (rc);
 }
 
 static int
 set_params__post_init(struct adapter *sc)
 {
 	uint32_t param, val;
 
 	/* ask for encapsulated CPLs */
 	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
 	val = 1;
 	(void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
 
 	return (0);
 }
 
 #undef FW_PARAM_PFVF
 #undef FW_PARAM_DEV
 
 static void
 t4_set_desc(struct adapter *sc)
 {
 	char buf[128];
 	struct adapter_params *p = &sc->params;
 
 	snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, "
 	    "P/N:%s, E/C:%s", p->vpd.id, is_offload(sc) ? "R" : "",
 	    chip_rev(sc), p->vpd.sn, p->vpd.pn, p->vpd.ec);
 
 	device_set_desc_copy(sc->dev, buf);
 }
 
 static void
 build_medialist(struct port_info *pi, struct ifmedia *media)
 {
 	int m;
 
 	PORT_LOCK(pi);
 
 	ifmedia_removeall(media);
 
 	m = IFM_ETHER | IFM_FDX;
 
 	switch(pi->port_type) {
 	case FW_PORT_TYPE_BT_XFI:
 	case FW_PORT_TYPE_BT_XAUI:
 		ifmedia_add(media, m | IFM_10G_T, 0, NULL);
 		/* fall through */
 
 	case FW_PORT_TYPE_BT_SGMII:
 		ifmedia_add(media, m | IFM_1000_T, 0, NULL);
 		ifmedia_add(media, m | IFM_100_TX, 0, NULL);
 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
 		break;
 
 	case FW_PORT_TYPE_CX4:
 		ifmedia_add(media, m | IFM_10G_CX4, 0, NULL);
 		ifmedia_set(media, m | IFM_10G_CX4);
 		break;
 
 	case FW_PORT_TYPE_QSFP_10G:
 	case FW_PORT_TYPE_SFP:
 	case FW_PORT_TYPE_FIBER_XFI:
 	case FW_PORT_TYPE_FIBER_XAUI:
 		switch (pi->mod_type) {
 
 		case FW_PORT_MOD_TYPE_LR:
 			ifmedia_add(media, m | IFM_10G_LR, 0, NULL);
 			ifmedia_set(media, m | IFM_10G_LR);
 			break;
 
 		case FW_PORT_MOD_TYPE_SR:
 			ifmedia_add(media, m | IFM_10G_SR, 0, NULL);
 			ifmedia_set(media, m | IFM_10G_SR);
 			break;
 
 		case FW_PORT_MOD_TYPE_LRM:
 			ifmedia_add(media, m | IFM_10G_LRM, 0, NULL);
 			ifmedia_set(media, m | IFM_10G_LRM);
 			break;
 
 		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
 		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
 			ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL);
 			ifmedia_set(media, m | IFM_10G_TWINAX);
 			break;
 
 		case FW_PORT_MOD_TYPE_NONE:
 			m &= ~IFM_FDX;
 			ifmedia_add(media, m | IFM_NONE, 0, NULL);
 			ifmedia_set(media, m | IFM_NONE);
 			break;
 
 		case FW_PORT_MOD_TYPE_NA:
 		case FW_PORT_MOD_TYPE_ER:
 		default:
 			device_printf(pi->dev,
 			    "unknown port_type (%d), mod_type (%d)\n",
 			    pi->port_type, pi->mod_type);
 			ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
 			ifmedia_set(media, m | IFM_UNKNOWN);
 			break;
 		}
 		break;
 
 	case FW_PORT_TYPE_QSFP:
 		switch (pi->mod_type) {
 
 		case FW_PORT_MOD_TYPE_LR:
 			ifmedia_add(media, m | IFM_40G_LR4, 0, NULL);
 			ifmedia_set(media, m | IFM_40G_LR4);
 			break;
 
 		case FW_PORT_MOD_TYPE_SR:
 			ifmedia_add(media, m | IFM_40G_SR4, 0, NULL);
 			ifmedia_set(media, m | IFM_40G_SR4);
 			break;
 
 		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
 		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
 			ifmedia_add(media, m | IFM_40G_CR4, 0, NULL);
 			ifmedia_set(media, m | IFM_40G_CR4);
 			break;
 
 		case FW_PORT_MOD_TYPE_NONE:
 			m &= ~IFM_FDX;
 			ifmedia_add(media, m | IFM_NONE, 0, NULL);
 			ifmedia_set(media, m | IFM_NONE);
 			break;
 
 		default:
 			device_printf(pi->dev,
 			    "unknown port_type (%d), mod_type (%d)\n",
 			    pi->port_type, pi->mod_type);
 			ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
 			ifmedia_set(media, m | IFM_UNKNOWN);
 			break;
 		}
 		break;
 
 	default:
 		device_printf(pi->dev,
 		    "unknown port_type (%d), mod_type (%d)\n", pi->port_type,
 		    pi->mod_type);
 		ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
 		ifmedia_set(media, m | IFM_UNKNOWN);
 		break;
 	}
 
 	PORT_UNLOCK(pi);
 }
 
 #define FW_MAC_EXACT_CHUNK	7
 
 /*
  * Program the port's XGMAC based on parameters in ifnet.  The caller also
  * indicates which parameters should be programmed (the rest are left alone).
  */
 int
 update_mac_settings(struct ifnet *ifp, int flags)
 {
 	int rc = 0;
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 	KASSERT(flags, ("%s: not told what to update.", __func__));
 
 	if (flags & XGMAC_MTU)
 		mtu = ifp->if_mtu;
 
 	if (flags & XGMAC_PROMISC)
 		promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
 
 	if (flags & XGMAC_ALLMULTI)
 		allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
 
 	if (flags & XGMAC_VLANEX)
 		vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
 
 	if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
 		rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
 		    allmulti, 1, vlanex, false);
 		if (rc) {
 			if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
 			    rc);
 			return (rc);
 		}
 	}
 
 	if (flags & XGMAC_UCADDR) {
 		uint8_t ucaddr[ETHER_ADDR_LEN];
 
 		bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
 		rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
 		    ucaddr, true, true);
 		if (rc < 0) {
 			rc = -rc;
 			if_printf(ifp, "change_mac failed: %d\n", rc);
 			return (rc);
 		} else {
 			vi->xact_addr_filt = rc;
 			rc = 0;
 		}
 	}
 
 	if (flags & XGMAC_MCADDRS) {
 		const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
 		int del = 1;
 		uint64_t hash = 0;
 		struct ifmultiaddr *ifma;
 		int i = 0, j;
 
 		if_maddr_rlock(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK)
 				continue;
 			mcaddr[i] =
 			    LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
 			MPASS(ETHER_IS_MULTICAST(mcaddr[i]));
 			i++;
 
 			if (i == FW_MAC_EXACT_CHUNK) {
 				rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
 				    del, i, mcaddr, NULL, &hash, 0);
 				if (rc < 0) {
 					rc = -rc;
 					for (j = 0; j < i; j++) {
 						if_printf(ifp,
 						    "failed to add mc address"
 						    " %02x:%02x:%02x:"
 						    "%02x:%02x:%02x rc=%d\n",
 						    mcaddr[j][0], mcaddr[j][1],
 						    mcaddr[j][2], mcaddr[j][3],
 						    mcaddr[j][4], mcaddr[j][5],
 						    rc);
 					}
 					goto mcfail;
 				}
 				del = 0;
 				i = 0;
 			}
 		}
 		if (i > 0) {
 			rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i,
 			    mcaddr, NULL, &hash, 0);
 			if (rc < 0) {
 				rc = -rc;
 				for (j = 0; j < i; j++) {
 					if_printf(ifp,
 					    "failed to add mc address"
 					    " %02x:%02x:%02x:"
 					    "%02x:%02x:%02x rc=%d\n",
 					    mcaddr[j][0], mcaddr[j][1],
 					    mcaddr[j][2], mcaddr[j][3],
 					    mcaddr[j][4], mcaddr[j][5],
 					    rc);
 				}
 				goto mcfail;
 			}
 		}
 
 		rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
 		if (rc != 0)
 			if_printf(ifp, "failed to set mc address hash: %d", rc);
 mcfail:
 		if_maddr_runlock(ifp);
 	}
 
 	return (rc);
 }
 
 /*
  * {begin|end}_synchronized_op must be called from the same thread.
  */
 int
 begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
     char *wmesg)
 {
 	int rc, pri;
 
 #ifdef WITNESS
 	/* the caller thinks it's ok to sleep, but is it really? */
 	if (flags & SLEEP_OK)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "begin_synchronized_op");
 #endif
 
 	if (INTR_OK)
 		pri = PCATCH;
 	else
 		pri = 0;
 
 	ADAPTER_LOCK(sc);
 	for (;;) {
 
 		if (vi && IS_DOOMED(vi)) {
 			rc = ENXIO;
 			goto done;
 		}
 
 		if (!IS_BUSY(sc)) {
 			rc = 0;
 			break;
 		}
 
 		if (!(flags & SLEEP_OK)) {
 			rc = EBUSY;
 			goto done;
 		}
 
 		if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
 			rc = EINTR;
 			goto done;
 		}
 	}
 
 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
 	SET_BUSY(sc);
 #ifdef INVARIANTS
 	sc->last_op = wmesg;
 	sc->last_op_thr = curthread;
 	sc->last_op_flags = flags;
 #endif
 
 done:
 	if (!(flags & HOLD_LOCK) || rc)
 		ADAPTER_UNLOCK(sc);
 
 	return (rc);
 }
 
 /*
  * Tell if_ioctl and if_init that the VI is going away.  This is
  * special variant of begin_synchronized_op and must be paired with a
  * call to end_synchronized_op.
  */
 void
 doom_vi(struct adapter *sc, struct vi_info *vi)
 {
 
 	ADAPTER_LOCK(sc);
 	SET_DOOMED(vi);
 	wakeup(&sc->flags);
 	while (IS_BUSY(sc))
 		mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
 	SET_BUSY(sc);
 #ifdef INVARIANTS
 	sc->last_op = "t4detach";
 	sc->last_op_thr = curthread;
 	sc->last_op_flags = 0;
 #endif
 	ADAPTER_UNLOCK(sc);
 }
 
 /*
  * {begin|end}_synchronized_op must be called from the same thread.
  */
 void
 end_synchronized_op(struct adapter *sc, int flags)
 {
 
 	if (flags & LOCK_HELD)
 		ADAPTER_LOCK_ASSERT_OWNED(sc);
 	else
 		ADAPTER_LOCK(sc);
 
 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
 	CLR_BUSY(sc);
 	wakeup(&sc->flags);
 	ADAPTER_UNLOCK(sc);
 }
 
 static int
 cxgbe_init_synchronized(struct vi_info *vi)
 {
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct ifnet *ifp = vi->ifp;
 	int rc = 0, i;
 	struct sge_txq *txq;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return (0);	/* already running */
 
 	if (!(sc->flags & FULL_INIT_DONE) &&
 	    ((rc = adapter_full_init(sc)) != 0))
 		return (rc);	/* error message displayed already */
 
 	if (!(vi->flags & VI_INIT_DONE) &&
 	    ((rc = vi_full_init(vi)) != 0))
 		return (rc); /* error message displayed already */
 
 	rc = update_mac_settings(ifp, XGMAC_ALL);
 	if (rc)
 		goto done;	/* error message displayed already */
 
 	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
 	if (rc != 0) {
 		if_printf(ifp, "enable_vi failed: %d\n", rc);
 		goto done;
 	}
 
 	/*
 	 * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
 	 * if this changes.
 	 */
 
 	for_each_txq(vi, i, txq) {
 		TXQ_LOCK(txq);
 		txq->eq.flags |= EQ_ENABLED;
 		TXQ_UNLOCK(txq);
 	}
 
 	/*
 	 * The first iq of the first port to come up is used for tracing.
 	 */
 	if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
 		sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
 		t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
 		    A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
 		    V_QUEUENUMBER(sc->traceq));
 		pi->flags |= HAS_TRACEQ;
 	}
 
 	/* all ok */
 	PORT_LOCK(pi);
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	pi->up_vis++;
 
 	if (pi->nvi > 1)
 		callout_reset(&vi->tick, hz, vi_tick, vi);
 	else
 		callout_reset(&pi->tick, hz, cxgbe_tick, pi);
 	PORT_UNLOCK(pi);
 done:
 	if (rc != 0)
 		cxgbe_uninit_synchronized(vi);
 
 	return (rc);
 }
 
 /*
  * Idempotent.
  */
 static int
 cxgbe_uninit_synchronized(struct vi_info *vi)
 {
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct ifnet *ifp = vi->ifp;
 	int rc, i;
 	struct sge_txq *txq;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (!(vi->flags & VI_INIT_DONE)) {
 		KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING),
 		    ("uninited VI is running"));
 		return (0);
 	}
 
 	/*
 	 * Disable the VI so that all its data in either direction is discarded
 	 * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
 	 * tick) intact as the TP can deliver negative advice or data that it's
 	 * holding in its RAM (for an offloaded connection) even after the VI is
 	 * disabled.
 	 */
 	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
 	if (rc) {
 		if_printf(ifp, "disable_vi failed: %d\n", rc);
 		return (rc);
 	}
 
 	for_each_txq(vi, i, txq) {
 		TXQ_LOCK(txq);
 		txq->eq.flags &= ~EQ_ENABLED;
 		TXQ_UNLOCK(txq);
 	}
 
 	PORT_LOCK(pi);
 	if (pi->nvi == 1)
 		callout_stop(&pi->tick);
 	else
 		callout_stop(&vi->tick);
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 		PORT_UNLOCK(pi);
 		return (0);
 	}
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	pi->up_vis--;
 	if (pi->up_vis > 0) {
 		PORT_UNLOCK(pi);
 		return (0);
 	}
 	PORT_UNLOCK(pi);
 
 	pi->link_cfg.link_ok = 0;
 	pi->link_cfg.speed = 0;
 	pi->linkdnrc = -1;
 	t4_os_link_changed(sc, pi->port_id, 0, -1);
 
 	return (0);
 }
 
 /*
  * It is ok for this function to fail midway and return right away.  t4_detach
  * will walk the entire sc->irq list and clean up whatever is valid.
  */
 static int
 setup_intr_handlers(struct adapter *sc)
 {
 	int rc, rid, p, q, v;
 	char s[8];
 	struct irq *irq;
 	struct port_info *pi;
 	struct vi_info *vi;
 	struct sge *sge = &sc->sge;
 	struct sge_rxq *rxq;
 #ifdef TCP_OFFLOAD
 	struct sge_ofld_rxq *ofld_rxq;
 #endif
 #ifdef DEV_NETMAP
 	struct sge_nm_rxq *nm_rxq;
 #endif
 #ifdef RSS
 	int nbuckets = rss_getnumbuckets();
 #endif
 
 	/*
 	 * Setup interrupts.
 	 */
 	irq = &sc->irq[0];
 	rid = sc->intr_type == INTR_INTX ? 0 : 1;
 	if (sc->intr_count == 1)
 		return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
 
 	/* Multiple interrupts. */
 	KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
 	    ("%s: too few intr.", __func__));
 
 	/* The first one is always error intr */
 	rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
 	if (rc != 0)
 		return (rc);
 	irq++;
 	rid++;
 
 	/* The second one is always the firmware event queue */
 	rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt");
 	if (rc != 0)
 		return (rc);
 	irq++;
 	rid++;
 
 	for_each_port(sc, p) {
 		pi = sc->port[p];
 		for_each_vi(pi, v, vi) {
 			vi->first_intr = rid - 1;
 
 			if (vi->nnmrxq > 0) {
 				int n = max(vi->nrxq, vi->nnmrxq);
 
 				MPASS(vi->flags & INTR_RXQ);
 
 				rxq = &sge->rxq[vi->first_rxq];
 #ifdef DEV_NETMAP
 				nm_rxq = &sge->nm_rxq[vi->first_nm_rxq];
 #endif
 				for (q = 0; q < n; q++) {
 					snprintf(s, sizeof(s), "%x%c%x", p,
 					    'a' + v, q);
 					if (q < vi->nrxq)
 						irq->rxq = rxq++;
 #ifdef DEV_NETMAP
 					if (q < vi->nnmrxq)
 						irq->nm_rxq = nm_rxq++;
 #endif
 					rc = t4_alloc_irq(sc, irq, rid,
 					    t4_vi_intr, irq, s);
 					if (rc != 0)
 						return (rc);
 					irq++;
 					rid++;
 					vi->nintr++;
 				}
 			} else if (vi->flags & INTR_RXQ) {
 				for_each_rxq(vi, q, rxq) {
 					snprintf(s, sizeof(s), "%x%c%x", p,
 					    'a' + v, q);
 					rc = t4_alloc_irq(sc, irq, rid,
 					    t4_intr, rxq, s);
 					if (rc != 0)
 						return (rc);
 #ifdef RSS
 					bus_bind_intr(sc->dev, irq->res,
 					    rss_getcpu(q % nbuckets));
 #endif
 					irq++;
 					rid++;
 					vi->nintr++;
 				}
 			}
 #ifdef TCP_OFFLOAD
 			if (vi->flags & INTR_OFLD_RXQ) {
 				for_each_ofld_rxq(vi, q, ofld_rxq) {
 					snprintf(s, sizeof(s), "%x%c%x", p,
 					    'A' + v, q);
 					rc = t4_alloc_irq(sc, irq, rid,
 					    t4_intr, ofld_rxq, s);
 					if (rc != 0)
 						return (rc);
 					irq++;
 					rid++;
 					vi->nintr++;
 				}
 			}
 #endif
 		}
 	}
 	MPASS(irq == &sc->irq[sc->intr_count]);
 
 	return (0);
 }
 
 int
 adapter_full_init(struct adapter *sc)
 {
 	int rc, i;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
 	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
 	    ("%s: FULL_INIT_DONE already", __func__));
 
 	/*
 	 * queues that belong to the adapter (not any particular port).
 	 */
 	rc = t4_setup_adapter_queues(sc);
 	if (rc != 0)
 		goto done;
 
 	for (i = 0; i < nitems(sc->tq); i++) {
 		sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &sc->tq[i]);
 		if (sc->tq[i] == NULL) {
 			device_printf(sc->dev,
 			    "failed to allocate task queue %d\n", i);
 			rc = ENOMEM;
 			goto done;
 		}
 		taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
 		    device_get_nameunit(sc->dev), i);
 	}
 
 	t4_intr_enable(sc);
 	sc->flags |= FULL_INIT_DONE;
 done:
 	if (rc != 0)
 		adapter_full_uninit(sc);
 
 	return (rc);
 }
 
 int
 adapter_full_uninit(struct adapter *sc)
 {
 	int i;
 
 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
 
 	t4_teardown_adapter_queues(sc);
 
 	for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
 		taskqueue_free(sc->tq[i]);
 		sc->tq[i] = NULL;
 	}
 
 	sc->flags &= ~FULL_INIT_DONE;
 
 	return (0);
 }
 
 #ifdef RSS
 #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
     RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
     RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
     RSS_HASHTYPE_RSS_UDP_IPV6)
 
 /* Translates kernel hash types to hardware. */
 static int
 hashconfig_to_hashen(int hashconfig)
 {
 	int hashen = 0;
 
 	if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
 	if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
 	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
 		    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
 	}
 	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
 		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
 	}
 	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
 	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
 
 	return (hashen);
 }
 
 /* Translates hardware hash types to kernel. */
 static int
 hashen_to_hashconfig(int hashen)
 {
 	int hashconfig = 0;
 
 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
 		/*
 		 * If UDP hashing was enabled it must have been enabled for
 		 * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
 		 * enabling any 4-tuple hash is nonsense configuration.
 		 */
 		MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
 		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
 
 		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
 			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
 		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
 			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
 	}
 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
 		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
 		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
 		hashconfig |= RSS_HASHTYPE_RSS_IPV4;
 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
 		hashconfig |= RSS_HASHTYPE_RSS_IPV6;
 
 	return (hashconfig);
 }
 #endif
 
 int
 vi_full_init(struct vi_info *vi)
 {
 	struct adapter *sc = vi->pi->adapter;
 	struct ifnet *ifp = vi->ifp;
 	uint16_t *rss;
 	struct sge_rxq *rxq;
 	int rc, i, j, hashen;
 #ifdef RSS
 	int nbuckets = rss_getnumbuckets();
 	int hashconfig = rss_gethashconfig();
 	int extra;
 	uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
 	uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
 #endif
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 	KASSERT((vi->flags & VI_INIT_DONE) == 0,
 	    ("%s: VI_INIT_DONE already", __func__));
 
 	sysctl_ctx_init(&vi->ctx);
 	vi->flags |= VI_SYSCTL_CTX;
 
 	/*
 	 * Allocate tx/rx/fl queues for this VI.
 	 */
 	rc = t4_setup_vi_queues(vi);
 	if (rc != 0)
 		goto done;	/* error message displayed already */
 
 	/*
 	 * Setup RSS for this VI.  Save a copy of the RSS table for later use.
 	 */
 	if (vi->nrxq > vi->rss_size) {
 		if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
 		    "some queues will never receive traffic.\n", vi->nrxq,
 		    vi->rss_size);
 	} else if (vi->rss_size % vi->nrxq) {
 		if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
 		    "expect uneven traffic distribution.\n", vi->nrxq,
 		    vi->rss_size);
 	}
 #ifdef RSS
 	MPASS(RSS_KEYSIZE == 40);
 	if (vi->nrxq != nbuckets) {
 		if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
 		    "performance will be impacted.\n", vi->nrxq, nbuckets);
 	}
 
 	rss_getkey((void *)&raw_rss_key[0]);
 	for (i = 0; i < nitems(rss_key); i++) {
 		rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
 	}
 	t4_write_rss_key(sc, &rss_key[0], -1);
 #endif
 	rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
 	for (i = 0; i < vi->rss_size;) {
 #ifdef RSS
 		j = rss_get_indirection_to_bucket(i);
 		j %= vi->nrxq;
 		rxq = &sc->sge.rxq[vi->first_rxq + j];
 		rss[i++] = rxq->iq.abs_id;
 #else
 		for_each_rxq(vi, j, rxq) {
 			rss[i++] = rxq->iq.abs_id;
 			if (i == vi->rss_size)
 				break;
 		}
 #endif
 	}
 
 	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
 	    vi->rss_size);
 	if (rc != 0) {
 		if_printf(ifp, "rss_config failed: %d\n", rc);
 		goto done;
 	}
 
 #ifdef RSS
 	hashen = hashconfig_to_hashen(hashconfig);
 
 	/*
 	 * We may have had to enable some hashes even though the global config
 	 * wants them disabled.  This is a potential problem that must be
 	 * reported to the user.
 	 */
 	extra = hashen_to_hashconfig(hashen) ^ hashconfig;
 
 	/*
 	 * If we consider only the supported hash types, then the enabled hashes
 	 * are a superset of the requested hashes.  In other words, there cannot
 	 * be any supported hash that was requested but not enabled, but there
 	 * can be hashes that were not requested but had to be enabled.
 	 */
 	extra &= SUPPORTED_RSS_HASHTYPES;
 	MPASS((extra & hashconfig) == 0);
 
 	if (extra) {
 		if_printf(ifp,
 		    "global RSS config (0x%x) cannot be accommodated.\n",
 		    hashconfig);
 	}
 	if (extra & RSS_HASHTYPE_RSS_IPV4)
 		if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
 		if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_IPV6)
 		if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
 		if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
 		if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
 		if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
 #else
 	hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
 	    F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
 	    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
 	    F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
 #endif
 	rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0]);
 	if (rc != 0) {
 		if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
 		goto done;
 	}
 
 	vi->rss = rss;
 	vi->flags |= VI_INIT_DONE;
 done:
 	if (rc != 0)
 		vi_full_uninit(vi);
 
 	return (rc);
 }
 
 /*
  * Idempotent.
  */
 int
 vi_full_uninit(struct vi_info *vi)
 {
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	int i;
 	struct sge_rxq *rxq;
 	struct sge_txq *txq;
 #ifdef TCP_OFFLOAD
 	struct sge_ofld_rxq *ofld_rxq;
 	struct sge_wrq *ofld_txq;
 #endif
 
 	if (vi->flags & VI_INIT_DONE) {
 
 		/* Need to quiesce queues.  */
 
 		/* XXX: Only for the first VI? */
 		if (IS_MAIN_VI(vi))
 			quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
 
 		for_each_txq(vi, i, txq) {
 			quiesce_txq(sc, txq);
 		}
 
 #ifdef TCP_OFFLOAD
 		for_each_ofld_txq(vi, i, ofld_txq) {
 			quiesce_wrq(sc, ofld_txq);
 		}
 #endif
 
 		for_each_rxq(vi, i, rxq) {
 			quiesce_iq(sc, &rxq->iq);
 			quiesce_fl(sc, &rxq->fl);
 		}
 
 #ifdef TCP_OFFLOAD
 		for_each_ofld_rxq(vi, i, ofld_rxq) {
 			quiesce_iq(sc, &ofld_rxq->iq);
 			quiesce_fl(sc, &ofld_rxq->fl);
 		}
 #endif
 		free(vi->rss, M_CXGBE);
 		free(vi->nm_rss, M_CXGBE);
 	}
 
 	t4_teardown_vi_queues(vi);
 	vi->flags &= ~VI_INIT_DONE;
 
 	return (0);
 }
 
 static void
 quiesce_txq(struct adapter *sc, struct sge_txq *txq)
 {
 	struct sge_eq *eq = &txq->eq;
 	struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
 
 	(void) sc;	/* unused */
 
 #ifdef INVARIANTS
 	TXQ_LOCK(txq);
 	MPASS((eq->flags & EQ_ENABLED) == 0);
 	TXQ_UNLOCK(txq);
 #endif
 
 	/* Wait for the mp_ring to empty. */
 	while (!mp_ring_is_idle(txq->r)) {
 		mp_ring_check_drainage(txq->r, 0);
 		pause("rquiesce", 1);
 	}
 
 	/* Then wait for the hardware to finish. */
 	while (spg->cidx != htobe16(eq->pidx))
 		pause("equiesce", 1);
 
 	/* Finally, wait for the driver to reclaim all descriptors. */
 	while (eq->cidx != eq->pidx)
 		pause("dquiesce", 1);
 }
 
 static void
 quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
 {
 
 	/* XXXTX */
 }
 
 static void
 quiesce_iq(struct adapter *sc, struct sge_iq *iq)
 {
 	(void) sc;	/* unused */
 
 	/* Synchronize with the interrupt handler */
 	while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
 		pause("iqfree", 1);
 }
 
 static void
 quiesce_fl(struct adapter *sc, struct sge_fl *fl)
 {
 	mtx_lock(&sc->sfl_lock);
 	FL_LOCK(fl);
 	fl->flags |= FL_DOOMED;
 	FL_UNLOCK(fl);
 	callout_stop(&sc->sfl_callout);
 	mtx_unlock(&sc->sfl_lock);
 
 	KASSERT((fl->flags & FL_STARVING) == 0,
 	    ("%s: still starving", __func__));
 }
 
 static int
 t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
     driver_intr_t *handler, void *arg, char *name)
 {
 	int rc;
 
 	irq->rid = rid;
 	irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
 	    RF_SHAREABLE | RF_ACTIVE);
 	if (irq->res == NULL) {
 		device_printf(sc->dev,
 		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
 		return (ENOMEM);
 	}
 
 	rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
 	    NULL, handler, arg, &irq->tag);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to setup interrupt for rid %d, name %s: %d\n",
 		    rid, name, rc);
 	} else if (name)
 		bus_describe_intr(sc->dev, irq->res, irq->tag, name);
 
 	return (rc);
 }
 
 static int
 t4_free_irq(struct adapter *sc, struct irq *irq)
 {
 	if (irq->tag)
 		bus_teardown_intr(sc->dev, irq->res, irq->tag);
 	if (irq->res)
 		bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
 
 	bzero(irq, sizeof(*irq));
 
 	return (0);
 }
 
 static void
 get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
 {
 
 	regs->version = chip_id(sc) | chip_rev(sc) << 10;
 	t4_get_regs(sc, buf, regs->len);
 }
 
 #define	A_PL_INDIR_CMD	0x1f8
 
 #define	S_PL_AUTOINC	31
 #define	M_PL_AUTOINC	0x1U
 #define	V_PL_AUTOINC(x)	((x) << S_PL_AUTOINC)
 #define	G_PL_AUTOINC(x)	(((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
 
 #define	S_PL_VFID	20
 #define	M_PL_VFID	0xffU
 #define	V_PL_VFID(x)	((x) << S_PL_VFID)
 #define	G_PL_VFID(x)	(((x) >> S_PL_VFID) & M_PL_VFID)
 
 #define	S_PL_ADDR	0
 #define	M_PL_ADDR	0xfffffU
 #define	V_PL_ADDR(x)	((x) << S_PL_ADDR)
 #define	G_PL_ADDR(x)	(((x) >> S_PL_ADDR) & M_PL_ADDR)
 
 #define	A_PL_INDIR_DATA	0x1fc
 
 static uint64_t
 read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
 {
 	u32 stats[2];
 
 	mtx_assert(&sc->reg_lock, MA_OWNED);
 	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
 	    V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg)));
 	stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
 	stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
 	return (((uint64_t)stats[1]) << 32 | stats[0]);
 }
 
 static void
 t4_get_vi_stats(struct adapter *sc, unsigned int viid,
     struct fw_vi_stats_vf *stats)
 {
 
 #define GET_STAT(name) \
 	read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)
 
 	stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
 	stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
 	stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
 	stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
 	stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
 	stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
 	stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
 	stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
 	stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
 	stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
 	stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
 	stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
 	stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
 	stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
 	stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
 	stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
 
 #undef GET_STAT
 }
 
 static void
 t4_clr_vi_stats(struct adapter *sc, unsigned int viid)
 {
 	int reg;
 
 	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
 	    V_PL_VFID(G_FW_VIID_VIN(viid)) |
 	    V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
 	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
 	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
 		t4_write_reg(sc, A_PL_INDIR_DATA, 0);
 }
 
 static void
 vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
 {
 	struct timeval tv;
 	const struct timeval interval = {0, 250000};	/* 250ms */
 
 	if (!(vi->flags & VI_INIT_DONE))
 		return;
 
 	getmicrotime(&tv);
 	timevalsub(&tv, &interval);
 	if (timevalcmp(&tv, &vi->last_refreshed, <))
 		return;
 
 	mtx_lock(&sc->reg_lock);
 	t4_get_vi_stats(sc, vi->viid, &vi->stats);
 	getmicrotime(&vi->last_refreshed);
 	mtx_unlock(&sc->reg_lock);
 }
 
 static void
 cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
 {
 	int i;
 	u_int v, tnl_cong_drops;
 	struct timeval tv;
 	const struct timeval interval = {0, 250000};	/* 250ms */
 
 	getmicrotime(&tv);
 	timevalsub(&tv, &interval);
 	if (timevalcmp(&tv, &pi->last_refreshed, <))
 		return;
 
 	tnl_cong_drops = 0;
 	t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
 	for (i = 0; i < sc->chip_params->nchan; i++) {
 		if (pi->rx_chan_map & (1 << i)) {
 			mtx_lock(&sc->reg_lock);
 			t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
 			    1, A_TP_MIB_TNL_CNG_DROP_0 + i);
 			mtx_unlock(&sc->reg_lock);
 			tnl_cong_drops += v;
 		}
 	}
 	pi->tnl_cong_drops = tnl_cong_drops;
 	getmicrotime(&pi->last_refreshed);
 }
 
 static void
 cxgbe_tick(void *arg)
 {
 	struct port_info *pi = arg;
 	struct adapter *sc = pi->adapter;
 
 	PORT_LOCK_ASSERT_OWNED(pi);
 	cxgbe_refresh_stats(sc, pi);
 
 	callout_schedule(&pi->tick, hz);
 }
 
 void
 vi_tick(void *arg)
 {
 	struct vi_info *vi = arg;
 	struct adapter *sc = vi->pi->adapter;
 
 	vi_refresh_stats(sc, vi);
 
 	callout_schedule(&vi->tick, hz);
 }
 
 static void
 cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid)
 {
 	struct ifnet *vlan;
 
 	if (arg != ifp || ifp->if_type != IFT_ETHER)
 		return;
 
 	vlan = VLAN_DEVAT(ifp, vid);
 	VLAN_SETCOOKIE(vlan, ifp);
 }
 
 /*
  * Should match fw_caps_config_<foo> enums in t4fw_interface.h
  */
 static char *caps_decoder[] = {
 	"\20\001IPMI\002NCSI",				/* 0: NBM */
 	"\20\001PPP\002QFC\003DCBX",			/* 1: link */
 	"\20\001INGRESS\002EGRESS",			/* 2: switch */
 	"\20\001NIC\002VM\003IDS\004UM\005UM_ISGL"	/* 3: NIC */
 	    "\006HASHFILTER\007ETHOFLD",
 	"\20\001TOE",					/* 4: TOE */
 	"\20\001RDDP\002RDMAC",				/* 5: RDMA */
 	"\20\001INITIATOR_PDU\002TARGET_PDU"		/* 6: iSCSI */
 	    "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD"
 	    "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD"
 	    "\007T10DIF"
 	    "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD",
 	"\20\00KEYS",					/* 7: TLS */
 	"\20\001INITIATOR\002TARGET\003CTRL_OFLD"	/* 8: FCoE */
 		    "\004PO_INITIATOR\005PO_TARGET",
 };
 
 static void
 t4_sysctls(struct adapter *sc)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *children, *c0;
 	static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
 
 	ctx = device_get_sysctl_ctx(sc->dev);
 
 	/*
 	 * dev.t4nex.X.
 	 */
 	oid = device_get_sysctl_tree(sc->dev);
 	c0 = children = SYSCTL_CHILDREN(oid);
 
 	sc->sc_do_rxcopy = 1;
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
 	    &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
 	    sc->params.nports, "# of ports");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
 	    NULL, chip_rev(sc), "chip hardware revision");
 
 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version",
 	    CTLFLAG_RD, sc->tp_version, 0, "TP microcode version");
 
 	if (sc->params.exprom_vers != 0) {
 		SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "exprom_version",
 		    CTLFLAG_RD, sc->exprom_version, 0, "expansion ROM version");
 	}
 
 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
 	    CTLFLAG_RD, sc->fw_version, 0, "firmware version");
 
 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
 	    CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
 
 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
 	    sc->cfcsum, "config file checksum");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
 	    CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells,
 	    sysctl_bitfield, "A", "available doorbells");
 
 #define SYSCTL_CAP(name, n, text) \
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \
 	    CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \
 	    sysctl_bitfield, "A", "available " text "capabilities")
 
 	SYSCTL_CAP(nbmcaps, 0, "NBM");
 	SYSCTL_CAP(linkcaps, 1, "link");
 	SYSCTL_CAP(switchcaps, 2, "switch");
 	SYSCTL_CAP(niccaps, 3, "NIC");
 	SYSCTL_CAP(toecaps, 4, "TCP offload");
 	SYSCTL_CAP(rdmacaps, 5, "RDMA");
 	SYSCTL_CAP(iscsicaps, 6, "iSCSI");
 	SYSCTL_CAP(tlscaps, 7, "TLS");
 	SYSCTL_CAP(fcoecaps, 8, "FCoE");
 #undef SYSCTL_CAP
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
 	    sc->params.vpd.cclk, "core clock frequency (in KHz)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val,
 	    sizeof(sc->params.sge.timer_val), sysctl_int_array, "A",
 	    "interrupt holdoff timer values (us)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val,
 	    sizeof(sc->params.sge.counter_val), sysctl_int_array, "A",
 	    "interrupt holdoff packet counter values");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
 	    NULL, sc->tids.nftids, "number of filters");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
 	    CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
 	    "chip temperature (in Celsius)");
 
 	t4_sge_sysctls(sc, ctx, children);
 
 	sc->lro_timeout = 100;
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
 	    &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "debug_flags", CTLFLAG_RW,
 	    &sc->debug_flags, 0, "flags to enable runtime debugging");
 
 #ifdef SBUF_DRAIN
 	/*
 	 * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
 	 */
 	oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
 	    CTLFLAG_RD | CTLFLAG_SKIP, NULL,
 	    "logs and miscellaneous information");
 	children = SYSCTL_CHILDREN(oid);
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cctrl, "A", "congestion control");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6,
 	    "A", "CIM logic analyzer");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
 
 	if (chip_id(sc) > CHELSIO_T4) {
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
 		    sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
 		    sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
 	}
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cim_qcfg, "A", "CIM queue configuration");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cpl_stats, "A", "CPL statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_ddp_stats, "A", "non-TCP DDP statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_devlog, "A", "firmware's device log");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_fcoe_stats, "A", "FCoE statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_hw_sched, "A", "hardware scheduler ");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_l2t, "A", "hardware L2 table");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_lb_stats, "A", "loopback statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_meminfo, "A", "memory regions");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6,
 	    "A", "MPS TCAM entries");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_path_mtus, "A", "path MTUs");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_pm_stats, "A", "PM statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_rdma_stats, "A", "RDMA statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_tcp_stats, "A", "TCP statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_tids, "A", "TID information");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_tp_err_stats, "A", "TP error statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask",
 	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I",
 	    "TP logic analyzer event capture mask");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_tp_la, "A", "TP logic analyzer");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_tx_rate, "A", "Tx rate");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_ulprx_la, "A", "ULPRX logic analyzer");
 
 	if (is_t5(sc)) {
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 		    sysctl_wcwr_stats, "A", "write combined work requests");
 	}
 #endif
 
 #ifdef TCP_OFFLOAD
 	if (is_offload(sc)) {
 		/*
 		 * dev.t4nex.X.toe.
 		 */
 		oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
 		    NULL, "TOE parameters");
 		children = SYSCTL_CHILDREN(oid);
 
 		sc->tt.sndbuf = 256 * 1024;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
 		    &sc->tt.sndbuf, 0, "max hardware send buffer size");
 
 		sc->tt.ddp = 0;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
 		    &sc->tt.ddp, 0, "DDP allowed");
 
 		sc->tt.rx_coalesce = 1;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
 		    CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
 
 		sc->tt.tx_align = 1;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
 		    CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
 
 		sc->tt.tx_zcopy = 0;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy",
 		    CTLFLAG_RW, &sc->tt.tx_zcopy, 0,
 		    "Enable zero-copy aio_write(2)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick",
 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A",
 		    "TP timer tick (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick",
 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A",
 		    "TCP timestamp tick (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick",
 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A",
 		    "DACK tick (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer",
 		    CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer,
 		    "IU", "DACK timer (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min",
 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN,
 		    sysctl_tp_timer, "LU", "Retransmit min (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max",
 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX,
 		    sysctl_tp_timer, "LU", "Retransmit max (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min",
 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN,
 		    sysctl_tp_timer, "LU", "Persist timer min (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max",
 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX,
 		    sysctl_tp_timer, "LU", "Persist timer max (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle",
 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE,
 		    sysctl_tp_timer, "LU", "Keepidle idle timer (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_intvl",
 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL,
 		    sysctl_tp_timer, "LU", "Keepidle interval (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt",
 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT,
 		    sysctl_tp_timer, "LU", "Initial SRTT (us)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer",
 		    CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER,
 		    sysctl_tp_timer, "LU", "FINWAIT2 timer (us)");
 	}
 #endif
 }
 
 void
 vi_sysctls(struct vi_info *vi)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *children;
 
 	ctx = device_get_sysctl_ctx(vi->dev);
 
 	/*
 	 * dev.v?(cxgbe|cxl).X.
 	 */
 	oid = device_get_sysctl_tree(vi->dev);
 	children = SYSCTL_CHILDREN(oid);
 
 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
 	    vi->viid, "VI identifer");
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
 	    &vi->nrxq, 0, "# of rx queues");
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
 	    &vi->ntxq, 0, "# of tx queues");
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
 	    &vi->first_rxq, 0, "index of first rx queue");
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
 	    &vi->first_txq, 0, "index of first tx queue");
 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL,
 	    vi->rss_size, "size of RSS indirection table");
 
 	if (IS_MAIN_VI(vi)) {
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq",
 		    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
 		    "Reserve queue 0 for non-flowid packets");
 	}
 
 #ifdef TCP_OFFLOAD
 	if (vi->nofldrxq != 0) {
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
 		    &vi->nofldrxq, 0,
 		    "# of rx queues for offloaded TCP connections");
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
 		    &vi->nofldtxq, 0,
 		    "# of tx queues for offloaded TCP connections");
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
 		    CTLFLAG_RD, &vi->first_ofld_rxq, 0,
 		    "index of first TOE rx queue");
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
 		    CTLFLAG_RD, &vi->first_ofld_txq, 0,
 		    "index of first TOE tx queue");
 	}
 #endif
 #ifdef DEV_NETMAP
 	if (vi->nnmrxq != 0) {
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD,
 		    &vi->nnmrxq, 0, "# of netmap rx queues");
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD,
 		    &vi->nnmtxq, 0, "# of netmap tx queues");
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq",
 		    CTLFLAG_RD, &vi->first_nm_rxq, 0,
 		    "index of first netmap rx queue");
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq",
 		    CTLFLAG_RD, &vi->first_nm_txq, 0,
 		    "index of first netmap tx queue");
 	}
 #endif
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
 	    "holdoff timer index");
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
 	    "holdoff packet counter index");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
 	    "rx queue size");
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
 	    "tx queue size");
 }
 
 static void
 cxgbe_sysctls(struct port_info *pi)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *children, *children2;
 	struct adapter *sc = pi->adapter;
 	int i;
 	char name[16];
 
 	ctx = device_get_sysctl_ctx(pi->dev);
 
 	/*
 	 * dev.cxgbe.X.
 	 */
 	oid = device_get_sysctl_tree(pi->dev);
 	children = SYSCTL_CHILDREN(oid);
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
 	   CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
 	if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
 		    CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
 		    "PHY temperature (in Celsius)");
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
 		    CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
 		    "PHY firmware version");
 	}
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
 	    CTLTYPE_STRING | CTLFLAG_RW, pi, PAUSE_TX, sysctl_pause_settings,
 	    "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL,
 	    port_top_speed(pi), "max speed (in Gbps)");
 
 	/*
 	 * dev.(cxgbe|cxl).X.tc.
 	 */
 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL,
 	    "Tx scheduler traffic classes");
 	for (i = 0; i < sc->chip_params->nsched_cls; i++) {
 		struct tx_sched_class *tc = &pi->tc[i];
 
 		snprintf(name, sizeof(name), "%d", i);
 		children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx,
 		    SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL,
 		    "traffic class"));
 		SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD,
 		    &tc->flags, 0, "flags");
 		SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount",
 		    CTLFLAG_RD, &tc->refcount, 0, "references to this class");
 #ifdef SBUF_DRAIN
 		SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params",
 		    CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i,
 		    sysctl_tc_params, "A", "traffic class parameters");
 #endif
 	}
 
 	/*
 	 * dev.cxgbe.X.stats.
 	 */
 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
 	    NULL, "port statistics");
 	children = SYSCTL_CHILDREN(oid);
 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
 	    &pi->tx_parse_error, 0,
 	    "# of tx packets with invalid length or # of segments");
 
 #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
 	SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
 	    CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
 	    sysctl_handle_t4_reg64, "QU", desc)
 
 	SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
 
 	SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
 	    "# of frames received with bad FCS",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
 	    "# of frames received with length error",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
 
 #undef SYSCTL_ADD_T4_REG64
 
 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
 	    &pi->stats.name, desc)
 
 	/* We get these from port_stats and they may be stale by up to 1s */
 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
 	    "# drops due to buffer-group 0 overflows");
 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
 	    "# drops due to buffer-group 1 overflows");
 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
 	    "# drops due to buffer-group 2 overflows");
 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
 	    "# drops due to buffer-group 3 overflows");
 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
 	    "# of buffer-group 0 truncated packets");
 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
 	    "# of buffer-group 1 truncated packets");
 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
 	    "# of buffer-group 2 truncated packets");
 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
 	    "# of buffer-group 3 truncated packets");
 
 #undef SYSCTL_ADD_T4_PORTSTAT
 }
 
 static int
 sysctl_int_array(SYSCTL_HANDLER_ARGS)
 {
 	int rc, *i, space = 0;
 	struct sbuf sb;
 
 	sbuf_new_for_sysctl(&sb, NULL, 64, req);
 	for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
 		if (space)
 			sbuf_printf(&sb, " ");
 		sbuf_printf(&sb, "%d", *i);
 		space = 1;
 	}
 	rc = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (rc);
 }
 
 static int
 sysctl_bitfield(SYSCTL_HANDLER_ARGS)
 {
 	int rc;
 	struct sbuf *sb;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return(rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	sbuf_printf(sb, "%b", (int)arg2, (char *)arg1);
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_btphy(SYSCTL_HANDLER_ARGS)
 {
 	struct port_info *pi = arg1;
 	int op = arg2;
 	struct adapter *sc = pi->adapter;
 	u_int v;
 	int rc;
 
 	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
 	if (rc)
 		return (rc);
 	/* XXX: magic numbers */
 	rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
 	    &v);
 	end_synchronized_op(sc, 0);
 	if (rc)
 		return (rc);
 	if (op == 0)
 		v /= 256;
 
 	rc = sysctl_handle_int(oidp, &v, 0, req);
 	return (rc);
 }
 
 static int
 sysctl_noflowq(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
 	int rc, val;
 
 	val = vi->rsrv_noflowq;
 	rc = sysctl_handle_int(oidp, &val, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 
 	if ((val >= 1) && (vi->ntxq > 1))
 		vi->rsrv_noflowq = 1;
 	else
 		vi->rsrv_noflowq = 0;
 
 	return (rc);
 }
 
 static int
 sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
 	struct adapter *sc = vi->pi->adapter;
 	int idx, rc, i;
 	struct sge_rxq *rxq;
 #ifdef TCP_OFFLOAD
 	struct sge_ofld_rxq *ofld_rxq;
 #endif
 	uint8_t v;
 
 	idx = vi->tmr_idx;
 
 	rc = sysctl_handle_int(oidp, &idx, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 
 	if (idx < 0 || idx >= SGE_NTIMERS)
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4tmr");
 	if (rc)
 		return (rc);
 
 	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
 	for_each_rxq(vi, i, rxq) {
 #ifdef atomic_store_rel_8
 		atomic_store_rel_8(&rxq->iq.intr_params, v);
 #else
 		rxq->iq.intr_params = v;
 #endif
 	}
 #ifdef TCP_OFFLOAD
 	for_each_ofld_rxq(vi, i, ofld_rxq) {
 #ifdef atomic_store_rel_8
 		atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
 #else
 		ofld_rxq->iq.intr_params = v;
 #endif
 	}
 #endif
 	vi->tmr_idx = idx;
 
 	end_synchronized_op(sc, LOCK_HELD);
 	return (0);
 }
 
 static int
 sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
 	struct adapter *sc = vi->pi->adapter;
 	int idx, rc;
 
 	idx = vi->pktc_idx;
 
 	rc = sysctl_handle_int(oidp, &idx, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 
 	if (idx < -1 || idx >= SGE_NCOUNTERS)
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4pktc");
 	if (rc)
 		return (rc);
 
 	if (vi->flags & VI_INIT_DONE)
 		rc = EBUSY; /* cannot be changed once the queues are created */
 	else
 		vi->pktc_idx = idx;
 
 	end_synchronized_op(sc, LOCK_HELD);
 	return (rc);
 }
 
 static int
 sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
 	struct adapter *sc = vi->pi->adapter;
 	int qsize, rc;
 
 	qsize = vi->qsize_rxq;
 
 	rc = sysctl_handle_int(oidp, &qsize, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 
 	if (qsize < 128 || (qsize & 7))
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4rxqs");
 	if (rc)
 		return (rc);
 
 	if (vi->flags & VI_INIT_DONE)
 		rc = EBUSY; /* cannot be changed once the queues are created */
 	else
 		vi->qsize_rxq = qsize;
 
 	end_synchronized_op(sc, LOCK_HELD);
 	return (rc);
 }
 
 static int
 sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
 	struct adapter *sc = vi->pi->adapter;
 	int qsize, rc;
 
 	qsize = vi->qsize_txq;
 
 	rc = sysctl_handle_int(oidp, &qsize, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 
 	if (qsize < 128 || qsize > 65536)
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4txqs");
 	if (rc)
 		return (rc);
 
 	if (vi->flags & VI_INIT_DONE)
 		rc = EBUSY; /* cannot be changed once the queues are created */
 	else
 		vi->qsize_txq = qsize;
 
 	end_synchronized_op(sc, LOCK_HELD);
 	return (rc);
 }
 
 static int
 sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
 {
 	struct port_info *pi = arg1;
 	struct adapter *sc = pi->adapter;
 	struct link_config *lc = &pi->link_cfg;
 	int rc;
 
 	if (req->newptr == NULL) {
 		struct sbuf *sb;
 		static char *bits = "\20\1PAUSE_RX\2PAUSE_TX";
 
 		rc = sysctl_wire_old_buffer(req, 0);
 		if (rc != 0)
 			return(rc);
 
 		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
 		if (sb == NULL)
 			return (ENOMEM);
 
 		sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits);
 		rc = sbuf_finish(sb);
 		sbuf_delete(sb);
 	} else {
 		char s[2];
 		int n;
 
 		s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX));
 		s[1] = 0;
 
 		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
 		if (rc != 0)
 			return(rc);
 
 		if (s[1] != 0)
 			return (EINVAL);
 		if (s[0] < '0' || s[0] > '9')
 			return (EINVAL);	/* not a number */
 		n = s[0] - '0';
 		if (n & ~(PAUSE_TX | PAUSE_RX))
 			return (EINVAL);	/* some other bit is set too */
 
 		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
 		    "t4PAUSE");
 		if (rc)
 			return (rc);
 		if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) {
 			int link_ok = lc->link_ok;
 
 			lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX);
 			lc->requested_fc |= n;
 			rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc);
 			lc->link_ok = link_ok;	/* restore */
 		}
 		end_synchronized_op(sc, 0);
 	}
 
 	return (rc);
 }
 
 static int
 sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	int reg = arg2;
 	uint64_t val;
 
 	val = t4_read_reg64(sc, reg);
 
 	return (sysctl_handle_64(oidp, &val, 0, req));
 }
 
 static int
 sysctl_temperature(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	int rc, t;
 	uint32_t param, val;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
 	if (rc)
 		return (rc);
 	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
 	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
 	end_synchronized_op(sc, 0);
 	if (rc)
 		return (rc);
 
 	/* unknown is returned as 0 but we display -1 in that case */
 	t = val == 0 ? -1 : val;
 
 	rc = sysctl_handle_int(oidp, &t, 0, req);
 	return (rc);
 }
 
 #ifdef SBUF_DRAIN
 static int
 sysctl_cctrl(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i;
 	uint16_t incr[NMTUS][NCCTRL_WIN];
 	static const char *dec_fac[] = {
 		"0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
 		"0.9375"
 	};
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_read_cong_tbl(sc, incr);
 
 	for (i = 0; i < NCCTRL_WIN; ++i) {
 		sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
 		    incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
 		    incr[5][i], incr[6][i], incr[7][i]);
 		sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
 		    incr[8][i], incr[9][i], incr[10][i], incr[11][i],
 		    incr[12][i], incr[13][i], incr[14][i], incr[15][i],
 		    sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
 	"TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",	/* ibq's */
 	"ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI",	/* obq's */
 	"SGE0-RX", "SGE1-RX"	/* additional obq's (T5 onwards) */
 };
 
 static int
 sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i, n, qid = arg2;
 	uint32_t *buf, *p;
 	char *qtype;
 	u_int cim_num_obq = sc->chip_params->cim_num_obq;
 
 	KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
 	    ("%s: bad qid %d\n", __func__, qid));
 
 	if (qid < CIM_NUM_IBQ) {
 		/* inbound queue */
 		qtype = "IBQ";
 		n = 4 * CIM_IBQ_SIZE;
 		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
 		rc = t4_read_cim_ibq(sc, qid, buf, n);
 	} else {
 		/* outbound queue */
 		qtype = "OBQ";
 		qid -= CIM_NUM_IBQ;
 		n = 4 * cim_num_obq * CIM_OBQ_SIZE;
 		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
 		rc = t4_read_cim_obq(sc, qid, buf, n);
 	}
 
 	if (rc < 0) {
 		rc = -rc;
 		goto done;
 	}
 	n = rc * sizeof(uint32_t);	/* rc has # of words actually read */
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		goto done;
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
 	if (sb == NULL) {
 		rc = ENOMEM;
 		goto done;
 	}
 
 	sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
 	for (i = 0, p = buf; i < n; i += 16, p += 4)
 		sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
 		    p[2], p[3]);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 done:
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_cim_la(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	u_int cfg;
 	struct sbuf *sb;
 	uint32_t *buf, *p;
 	int rc;
 
 	MPASS(chip_id(sc) <= CHELSIO_T5);
 
 	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
 	if (rc != 0)
 		return (rc);
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	rc = -t4_cim_read_la(sc, buf, NULL);
 	if (rc != 0)
 		goto done;
 
 	sbuf_printf(sb, "Status   Data      PC%s",
 	    cfg & F_UPDBGLACAPTPCONLY ? "" :
 	    "     LS0Stat  LS0Addr             LS0Data");
 
 	for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) {
 		if (cfg & F_UPDBGLACAPTPCONLY) {
 			sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
 			    p[6], p[7]);
 			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
 			    (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
 			    p[4] & 0xff, p[5] >> 8);
 			sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
 			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
 			    p[1] & 0xf, p[2] >> 4);
 		} else {
 			sbuf_printf(sb,
 			    "\n  %02x   %x%07x %x%07x %08x %08x "
 			    "%08x%08x%08x%08x",
 			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
 			    p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
 			    p[6], p[7]);
 		}
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 done:
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	u_int cfg;
 	struct sbuf *sb;
 	uint32_t *buf, *p;
 	int rc;
 
 	MPASS(chip_id(sc) > CHELSIO_T5);
 
 	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
 	if (rc != 0)
 		return (rc);
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	rc = -t4_cim_read_la(sc, buf, NULL);
 	if (rc != 0)
 		goto done;
 
 	sbuf_printf(sb, "Status   Inst    Data      PC%s",
 	    cfg & F_UPDBGLACAPTPCONLY ? "" :
 	    "     LS0Stat  LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data");
 
 	for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) {
 		if (cfg & F_UPDBGLACAPTPCONLY) {
 			sbuf_printf(sb, "\n  %02x   %08x %08x %08x",
 			    p[3] & 0xff, p[2], p[1], p[0]);
 			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x %02x%06x",
 			    (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
 			    p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
 			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x",
 			    (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
 			    p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
 			    p[6] >> 16);
 		} else {
 			sbuf_printf(sb, "\n  %02x   %04x%04x %04x%04x %04x%04x "
 			    "%08x %08x %08x %08x %08x %08x",
 			    (p[9] >> 16) & 0xff,
 			    p[9] & 0xffff, p[8] >> 16,
 			    p[8] & 0xffff, p[7] >> 16,
 			    p[7] & 0xffff, p[6] >> 16,
 			    p[2], p[1], p[0], p[5], p[4], p[3]);
 		}
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 done:
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	u_int i;
 	struct sbuf *sb;
 	uint32_t *buf, *p;
 	int rc;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
 	p = buf;
 
 	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
 		sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
 		    p[1], p[0]);
 	}
 
 	sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
 	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
 		sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
 		    (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
 		    (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
 		    (p[1] >> 2) | ((p[2] & 3) << 30),
 		    (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
 		    p[0] & 1);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	u_int i;
 	struct sbuf *sb;
 	uint32_t *buf, *p;
 	int rc;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
 	p = buf;
 
 	sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
 	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
 		sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
 		    (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
 		    p[4], p[3], p[2], p[1], p[0]);
 	}
 
 	sbuf_printf(sb, "\n\nCntl ID               Data");
 	for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) {
 		sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
 		    (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i;
 	uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
 	uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
 	uint16_t thres[CIM_NUM_IBQ];
 	uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
 	uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
 	u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
 
 	cim_num_obq = sc->chip_params->cim_num_obq;
 	if (is_t4(sc)) {
 		ibq_rdaddr = A_UP_IBQ_0_RDADDR;
 		obq_rdaddr = A_UP_OBQ_0_REALADDR;
 	} else {
 		ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
 		obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
 	}
 	nq = CIM_NUM_IBQ + cim_num_obq;
 
 	rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
 	if (rc == 0)
 		rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
 	if (rc != 0)
 		return (rc);
 
 	t4_read_cimq_cfg(sc, base, size, thres);
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	sbuf_printf(sb, "Queue  Base  Size Thres RdPtr WrPtr  SOP  EOP Avail");
 
 	for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
 		sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
 		    qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
 		    G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
 		    G_QUEREMFLITS(p[2]) * 16);
 	for ( ; i < nq; i++, p += 4, wr += 2)
 		sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
 		    base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
 		    wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
 		    G_QUEREMFLITS(p[2]) * 16);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_cpl_stats stats;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	mtx_lock(&sc->reg_lock);
 	t4_tp_get_cpl_stats(sc, &stats);
 	mtx_unlock(&sc->reg_lock);
 
 	if (sc->chip_params->nchan > 2) {
 		sbuf_printf(sb, "                 channel 0  channel 1"
 		    "  channel 2  channel 3");
 		sbuf_printf(sb, "\nCPL requests:   %10u %10u %10u %10u",
 		    stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
 		sbuf_printf(sb, "\nCPL responses:   %10u %10u %10u %10u",
 		    stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
 	} else {
 		sbuf_printf(sb, "                 channel 0  channel 1");
 		sbuf_printf(sb, "\nCPL requests:   %10u %10u",
 		    stats.req[0], stats.req[1]);
 		sbuf_printf(sb, "\nCPL responses:   %10u %10u",
 		    stats.rsp[0], stats.rsp[1]);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_usm_stats stats;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return(rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_get_usm_stats(sc, &stats);
 
 	sbuf_printf(sb, "Frames: %u\n", stats.frames);
 	sbuf_printf(sb, "Octets: %ju\n", stats.octets);
 	sbuf_printf(sb, "Drops:  %u", stats.drops);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static const char * const devlog_level_strings[] = {
 	[FW_DEVLOG_LEVEL_EMERG]		= "EMERG",
 	[FW_DEVLOG_LEVEL_CRIT]		= "CRIT",
 	[FW_DEVLOG_LEVEL_ERR]		= "ERR",
 	[FW_DEVLOG_LEVEL_NOTICE]	= "NOTICE",
 	[FW_DEVLOG_LEVEL_INFO]		= "INFO",
 	[FW_DEVLOG_LEVEL_DEBUG]		= "DEBUG"
 };
 
 static const char * const devlog_facility_strings[] = {
 	[FW_DEVLOG_FACILITY_CORE]	= "CORE",
 	[FW_DEVLOG_FACILITY_CF]		= "CF",
 	[FW_DEVLOG_FACILITY_SCHED]	= "SCHED",
 	[FW_DEVLOG_FACILITY_TIMER]	= "TIMER",
 	[FW_DEVLOG_FACILITY_RES]	= "RES",
 	[FW_DEVLOG_FACILITY_HW]		= "HW",
 	[FW_DEVLOG_FACILITY_FLR]	= "FLR",
 	[FW_DEVLOG_FACILITY_DMAQ]	= "DMAQ",
 	[FW_DEVLOG_FACILITY_PHY]	= "PHY",
 	[FW_DEVLOG_FACILITY_MAC]	= "MAC",
 	[FW_DEVLOG_FACILITY_PORT]	= "PORT",
 	[FW_DEVLOG_FACILITY_VI]		= "VI",
 	[FW_DEVLOG_FACILITY_FILTER]	= "FILTER",
 	[FW_DEVLOG_FACILITY_ACL]	= "ACL",
 	[FW_DEVLOG_FACILITY_TM]		= "TM",
 	[FW_DEVLOG_FACILITY_QFC]	= "QFC",
 	[FW_DEVLOG_FACILITY_DCB]	= "DCB",
 	[FW_DEVLOG_FACILITY_ETH]	= "ETH",
 	[FW_DEVLOG_FACILITY_OFLD]	= "OFLD",
 	[FW_DEVLOG_FACILITY_RI]		= "RI",
 	[FW_DEVLOG_FACILITY_ISCSI]	= "ISCSI",
 	[FW_DEVLOG_FACILITY_FCOE]	= "FCOE",
 	[FW_DEVLOG_FACILITY_FOISCSI]	= "FOISCSI",
 	[FW_DEVLOG_FACILITY_FOFCOE]	= "FOFCOE",
 	[FW_DEVLOG_FACILITY_CHNET]	= "CHNET",
 };
 
 static int
 sysctl_devlog(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct devlog_params *dparams = &sc->params.devlog;
 	struct fw_devlog_e *buf, *e;
 	int i, j, rc, nentries, first = 0;
 	struct sbuf *sb;
 	uint64_t ftstamp = UINT64_MAX;
 
 	if (dparams->addr == 0)
 		return (ENXIO);
 
 	buf = malloc(dparams->size, M_CXGBE, M_NOWAIT);
 	if (buf == NULL)
 		return (ENOMEM);
 
 	rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size);
 	if (rc != 0)
 		goto done;
 
 	nentries = dparams->size / sizeof(struct fw_devlog_e);
 	for (i = 0; i < nentries; i++) {
 		e = &buf[i];
 
 		if (e->timestamp == 0)
 			break;	/* end */
 
 		e->timestamp = be64toh(e->timestamp);
 		e->seqno = be32toh(e->seqno);
 		for (j = 0; j < 8; j++)
 			e->params[j] = be32toh(e->params[j]);
 
 		if (e->timestamp < ftstamp) {
 			ftstamp = e->timestamp;
 			first = i;
 		}
 	}
 
 	if (buf[first].timestamp == 0)
 		goto done;	/* nothing in the log */
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		goto done;
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL) {
 		rc = ENOMEM;
 		goto done;
 	}
 	sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
 	    "Seq#", "Tstamp", "Level", "Facility", "Message");
 
 	i = first;
 	do {
 		e = &buf[i];
 		if (e->timestamp == 0)
 			break;	/* end */
 
 		sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
 		    e->seqno, e->timestamp,
 		    (e->level < nitems(devlog_level_strings) ?
 			devlog_level_strings[e->level] : "UNKNOWN"),
 		    (e->facility < nitems(devlog_facility_strings) ?
 			devlog_facility_strings[e->facility] : "UNKNOWN"));
 		sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
 		    e->params[2], e->params[3], e->params[4],
 		    e->params[5], e->params[6], e->params[7]);
 
 		if (++i == nentries)
 			i = 0;
 	} while (i != first);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 done:
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_fcoe_stats stats[MAX_NCHAN];
 	int i, nchan = sc->chip_params->nchan;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	for (i = 0; i < nchan; i++)
 		t4_get_fcoe_stats(sc, i, &stats[i]);
 
 	if (nchan > 2) {
 		sbuf_printf(sb, "                   channel 0        channel 1"
 		    "        channel 2        channel 3");
 		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju %16ju %16ju",
 		    stats[0].octets_ddp, stats[1].octets_ddp,
 		    stats[2].octets_ddp, stats[3].octets_ddp);
 		sbuf_printf(sb, "\nframesDDP:  %16u %16u %16u %16u",
 		    stats[0].frames_ddp, stats[1].frames_ddp,
 		    stats[2].frames_ddp, stats[3].frames_ddp);
 		sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u",
 		    stats[0].frames_drop, stats[1].frames_drop,
 		    stats[2].frames_drop, stats[3].frames_drop);
 	} else {
 		sbuf_printf(sb, "                   channel 0        channel 1");
 		sbuf_printf(sb, "\noctetsDDP:  %16ju %16ju",
 		    stats[0].octets_ddp, stats[1].octets_ddp);
 		sbuf_printf(sb, "\nframesDDP:  %16u %16u",
 		    stats[0].frames_ddp, stats[1].frames_ddp);
 		sbuf_printf(sb, "\nframesDrop: %16u %16u",
 		    stats[0].frames_drop, stats[1].frames_drop);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i;
 	unsigned int map, kbps, ipg, mode;
 	unsigned int pace_tab[NTX_SCHED];
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
 	mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
 	t4_read_pace_tbl(sc, pace_tab);
 
 	sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
 	    "Class IPG (0.1 ns)   Flow IPG (us)");
 
 	for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
 		t4_get_tx_sched(sc, i, &kbps, &ipg);
 		sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
 		    (mode & (1 << i)) ? "flow" : "class", map & 3);
 		if (kbps)
 			sbuf_printf(sb, "%9u     ", kbps);
 		else
 			sbuf_printf(sb, " disabled     ");
 
 		if (ipg)
 			sbuf_printf(sb, "%13u        ", ipg);
 		else
 			sbuf_printf(sb, "     disabled        ");
 
 		if (pace_tab[i])
 			sbuf_printf(sb, "%10u", pace_tab[i]);
 		else
 			sbuf_printf(sb, "  disabled");
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i, j;
 	uint64_t *p0, *p1;
 	struct lb_port_stats s[2];
 	static const char *stat_name[] = {
 		"OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
 		"UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
 		"Frames128To255:", "Frames256To511:", "Frames512To1023:",
 		"Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
 		"BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
 		"BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
 		"BG2FramesTrunc:", "BG3FramesTrunc:"
 	};
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	memset(s, 0, sizeof(s));
 
 	for (i = 0; i < sc->chip_params->nchan; i += 2) {
 		t4_get_lb_stats(sc, i, &s[0]);
 		t4_get_lb_stats(sc, i + 1, &s[1]);
 
 		p0 = &s[0].octets;
 		p1 = &s[1].octets;
 		sbuf_printf(sb, "%s                       Loopback %u"
 		    "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
 
 		for (j = 0; j < nitems(stat_name); j++)
 			sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
 				   *p0++, *p1++);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
 {
 	int rc = 0;
 	struct port_info *pi = arg1;
 	struct sbuf *sb;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return(rc);
 	sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	if (pi->linkdnrc < 0)
 		sbuf_printf(sb, "n/a");
 	else
 		sbuf_printf(sb, "%s", t4_link_down_rc_str(pi->linkdnrc));
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 struct mem_desc {
 	unsigned int base;
 	unsigned int limit;
 	unsigned int idx;
 };
 
 static int
 mem_desc_cmp(const void *a, const void *b)
 {
 	return ((const struct mem_desc *)a)->base -
 	       ((const struct mem_desc *)b)->base;
 }
 
 static void
 mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
     unsigned int to)
 {
 	unsigned int size;
 
 	if (from == to)
 		return;
 
 	size = to - from + 1;
 	if (size == 0)
 		return;
 
 	/* XXX: need humanize_number(3) in libkern for a more readable 'size' */
 	sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
 }
 
 static int
 sysctl_meminfo(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i, n;
 	uint32_t lo, hi, used, alloc;
 	static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
 	static const char *region[] = {
 		"DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
 		"Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
 		"Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
 		"TDDP region:", "TPT region:", "STAG region:", "RQ region:",
 		"RQUDP region:", "PBL region:", "TXPBL region:",
 		"DBVFIFO region:", "ULPRX state:", "ULPTX state:",
 		"On-chip queues:"
 	};
 	struct mem_desc avail[4];
 	struct mem_desc mem[nitems(region) + 3];	/* up to 3 holes */
 	struct mem_desc *md = mem;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	for (i = 0; i < nitems(mem); i++) {
 		mem[i].limit = 0;
 		mem[i].idx = i;
 	}
 
 	/* Find and sort the populated memory ranges */
 	i = 0;
 	lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
 	if (lo & F_EDRAM0_ENABLE) {
 		hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
 		avail[i].base = G_EDRAM0_BASE(hi) << 20;
 		avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
 		avail[i].idx = 0;
 		i++;
 	}
 	if (lo & F_EDRAM1_ENABLE) {
 		hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
 		avail[i].base = G_EDRAM1_BASE(hi) << 20;
 		avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
 		avail[i].idx = 1;
 		i++;
 	}
 	if (lo & F_EXT_MEM_ENABLE) {
 		hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
 		avail[i].base = G_EXT_MEM_BASE(hi) << 20;
 		avail[i].limit = avail[i].base +
 		    (G_EXT_MEM_SIZE(hi) << 20);
 		avail[i].idx = is_t5(sc) ? 3 : 2;	/* Call it MC0 for T5 */
 		i++;
 	}
 	if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) {
 		hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
 		avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
 		avail[i].limit = avail[i].base +
 		    (G_EXT_MEM1_SIZE(hi) << 20);
 		avail[i].idx = 4;
 		i++;
 	}
 	if (!i)                                    /* no memory available */
 		return 0;
 	qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
 
 	(md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
 	(md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
 	(md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
 
 	/* the next few have explicit upper bounds */
 	md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
 	md->limit = md->base - 1 +
 		    t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
 		    G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
 	md++;
 
 	md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
 	md->limit = md->base - 1 +
 		    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
 		    G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
 	md++;
 
 	if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
 		if (chip_id(sc) <= CHELSIO_T5)
 			md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
 		else
 			md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR);
 		md->limit = 0;
 	} else {
 		md->base = 0;
 		md->idx = nitems(region);  /* hide it */
 	}
 	md++;
 
 #define ulp_region(reg) \
 	md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
 	(md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
 
 	ulp_region(RX_ISCSI);
 	ulp_region(RX_TDDP);
 	ulp_region(TX_TPT);
 	ulp_region(RX_STAG);
 	ulp_region(RX_RQ);
 	ulp_region(RX_RQUDP);
 	ulp_region(RX_PBL);
 	ulp_region(TX_PBL);
 #undef ulp_region
 
 	md->base = 0;
 	md->idx = nitems(region);
 	if (!is_t4(sc)) {
 		uint32_t size = 0;
 		uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2);
 		uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE);
 
 		if (is_t5(sc)) {
 			if (sge_ctrl & F_VFIFO_ENABLE)
 				size = G_DBVFIFO_SIZE(fifo_size);
 		} else
 			size = G_T6_DBVFIFO_SIZE(fifo_size);
 
 		if (size) {
 			md->base = G_BASEADDR(t4_read_reg(sc,
 			    A_SGE_DBVFIFO_BADDR));
 			md->limit = md->base + (size << 2) - 1;
 		}
 	}
 	md++;
 
 	md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
 	md->limit = 0;
 	md++;
 	md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
 	md->limit = 0;
 	md++;
 
 	md->base = sc->vres.ocq.start;
 	if (sc->vres.ocq.size)
 		md->limit = md->base + sc->vres.ocq.size - 1;
 	else
 		md->idx = nitems(region);  /* hide it */
 	md++;
 
 	/* add any address-space holes, there can be up to 3 */
 	for (n = 0; n < i - 1; n++)
 		if (avail[n].limit < avail[n + 1].base)
 			(md++)->base = avail[n].limit;
 	if (avail[n].limit)
 		(md++)->base = avail[n].limit;
 
 	n = md - mem;
 	qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
 
 	for (lo = 0; lo < i; lo++)
 		mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
 				avail[lo].limit - 1);
 
 	sbuf_printf(sb, "\n");
 	for (i = 0; i < n; i++) {
 		if (mem[i].idx >= nitems(region))
 			continue;                        /* skip holes */
 		if (!mem[i].limit)
 			mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
 		mem_region_show(sb, region[mem[i].idx], mem[i].base,
 				mem[i].limit);
 	}
 
 	sbuf_printf(sb, "\n");
 	lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
 	hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
 	mem_region_show(sb, "uP RAM:", lo, hi);
 
 	lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
 	hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
 	mem_region_show(sb, "uP Extmem2:", lo, hi);
 
 	lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
 	sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
 		   G_PMRXMAXPAGE(lo),
 		   t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
 		   (lo & F_PMRXNUMCHN) ? 2 : 1);
 
 	lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
 	hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
 	sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
 		   G_PMTXMAXPAGE(lo),
 		   hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
 		   hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
 	sbuf_printf(sb, "%u p-structs\n",
 		   t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
 
 	for (i = 0; i < 4; i++) {
 		if (chip_id(sc) > CHELSIO_T5)
 			lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4);
 		else
 			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
 		if (is_t5(sc)) {
 			used = G_T5_USED(lo);
 			alloc = G_T5_ALLOC(lo);
 		} else {
 			used = G_USED(lo);
 			alloc = G_ALLOC(lo);
 		}
 		/* For T6 these are MAC buffer groups */
 		sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
 		    i, used, alloc);
 	}
 	for (i = 0; i < sc->chip_params->nchan; i++) {
 		if (chip_id(sc) > CHELSIO_T5)
 			lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4);
 		else
 			lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
 		if (is_t5(sc)) {
 			used = G_T5_USED(lo);
 			alloc = G_T5_ALLOC(lo);
 		} else {
 			used = G_USED(lo);
 			alloc = G_ALLOC(lo);
 		}
 		/* For T6 these are MAC buffer groups */
 		sbuf_printf(sb,
 		    "\nLoopback %d using %u pages out of %u allocated",
 		    i, used, alloc);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static inline void
 tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
 {
 	*mask = x | y;
 	y = htobe64(y);
 	memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
 }
 
 static int
 sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i;
 
 	MPASS(chip_id(sc) <= CHELSIO_T5);
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	sbuf_printf(sb,
 	    "Idx  Ethernet address     Mask     Vld Ports PF"
 	    "  VF              Replication             P0 P1 P2 P3  ML");
 	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
 		uint64_t tcamx, tcamy, mask;
 		uint32_t cls_lo, cls_hi;
 		uint8_t addr[ETHER_ADDR_LEN];
 
 		tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
 		tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
 		if (tcamx & tcamy)
 			continue;
 		tcamxy2valmask(tcamx, tcamy, addr, &mask);
 		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
 		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
 		sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
 			   "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
 			   addr[3], addr[4], addr[5], (uintmax_t)mask,
 			   (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
 			   G_PORTMAP(cls_hi), G_PF(cls_lo),
 			   (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
 
 		if (cls_lo & F_REPLICATE) {
 			struct fw_ldst_cmd ldst_cmd;
 
 			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
 			ldst_cmd.op_to_addrspace =
 			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
 				F_FW_CMD_REQUEST | F_FW_CMD_READ |
 				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
 			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
 			ldst_cmd.u.mps.rplc.fid_idx =
 			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
 				V_FW_LDST_CMD_IDX(i));
 
 			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
 			    "t4mps");
 			if (rc)
 				break;
 			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
 			    sizeof(ldst_cmd), &ldst_cmd);
 			end_synchronized_op(sc, 0);
 
 			if (rc != 0) {
 				sbuf_printf(sb, "%36d", rc);
 				rc = 0;
 			} else {
 				sbuf_printf(sb, " %08x %08x %08x %08x",
 				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
 			}
 		} else
 			sbuf_printf(sb, "%36s", "");
 
 		sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
 		    G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
 		    G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
 	}
 
 	if (rc)
 		(void) sbuf_finish(sb);
 	else
 		rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i;
 
 	MPASS(chip_id(sc) > CHELSIO_T5);
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	sbuf_printf(sb, "Idx  Ethernet address     Mask       VNI   Mask"
 	    "   IVLAN Vld DIP_Hit   Lookup  Port Vld Ports PF  VF"
 	    "                           Replication"
 	    "                                    P0 P1 P2 P3  ML\n");
 
 	for (i = 0; i < sc->chip_params->mps_tcam_size; i++) {
 		uint8_t dip_hit, vlan_vld, lookup_type, port_num;
 		uint16_t ivlan;
 		uint64_t tcamx, tcamy, val, mask;
 		uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy;
 		uint8_t addr[ETHER_ADDR_LEN];
 
 		ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0);
 		if (i < 256)
 			ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0);
 		else
 			ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1);
 		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
 		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
 		tcamy = G_DMACH(val) << 32;
 		tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
 		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
 		lookup_type = G_DATALKPTYPE(data2);
 		port_num = G_DATAPORTNUM(data2);
 		if (lookup_type && lookup_type != M_DATALKPTYPE) {
 			/* Inner header VNI */
 			vniy = ((data2 & F_DATAVIDH2) << 23) |
 				       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
 			dip_hit = data2 & F_DATADIPHIT;
 			vlan_vld = 0;
 		} else {
 			vniy = 0;
 			dip_hit = 0;
 			vlan_vld = data2 & F_DATAVIDH2;
 			ivlan = G_VIDL(val);
 		}
 
 		ctl |= V_CTLXYBITSEL(1);
 		t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl);
 		val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1);
 		tcamx = G_DMACH(val) << 32;
 		tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1);
 		data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1);
 		if (lookup_type && lookup_type != M_DATALKPTYPE) {
 			/* Inner header VNI mask */
 			vnix = ((data2 & F_DATAVIDH2) << 23) |
 			       (G_DATAVIDH1(data2) << 16) | G_VIDL(val);
 		} else
 			vnix = 0;
 
 		if (tcamx & tcamy)
 			continue;
 		tcamxy2valmask(tcamx, tcamy, addr, &mask);
 
 		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
 		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
 
 		if (lookup_type && lookup_type != M_DATALKPTYPE) {
 			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
 			    "%012jx %06x %06x    -    -   %3c"
 			    "      'I'  %4x   %3c   %#x%4u%4d", i, addr[0],
 			    addr[1], addr[2], addr[3], addr[4], addr[5],
 			    (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N',
 			    port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
 			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
 			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
 		} else {
 			sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x "
 			    "%012jx    -       -   ", i, addr[0], addr[1],
 			    addr[2], addr[3], addr[4], addr[5],
 			    (uintmax_t)mask);
 
 			if (vlan_vld)
 				sbuf_printf(sb, "%4u   Y     ", ivlan);
 			else
 				sbuf_printf(sb, "  -    N     ");
 
 			sbuf_printf(sb, "-      %3c  %4x   %3c   %#x%4u%4d",
 			    lookup_type ? 'I' : 'O', port_num,
 			    cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N',
 			    G_PORTMAP(cls_hi), G_T6_PF(cls_lo),
 			    cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1);
 		}
 
 
 		if (cls_lo & F_T6_REPLICATE) {
 			struct fw_ldst_cmd ldst_cmd;
 
 			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
 			ldst_cmd.op_to_addrspace =
 			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
 				F_FW_CMD_REQUEST | F_FW_CMD_READ |
 				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
 			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
 			ldst_cmd.u.mps.rplc.fid_idx =
 			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
 				V_FW_LDST_CMD_IDX(i));
 
 			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
 			    "t6mps");
 			if (rc)
 				break;
 			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
 			    sizeof(ldst_cmd), &ldst_cmd);
 			end_synchronized_op(sc, 0);
 
 			if (rc != 0) {
 				sbuf_printf(sb, "%72d", rc);
 				rc = 0;
 			} else {
 				sbuf_printf(sb, " %08x %08x %08x %08x"
 				    " %08x %08x %08x %08x",
 				    be32toh(ldst_cmd.u.mps.rplc.rplc255_224),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc223_192),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc191_160),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc159_128),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
 			}
 		} else
 			sbuf_printf(sb, "%72s", "");
 
 		sbuf_printf(sb, "%4u%3u%3u%3u %#x",
 		    G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo),
 		    G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo),
 		    (cls_lo >> S_T6_MULTILISTEN0) & 0xf);
 	}
 
 	if (rc)
 		(void) sbuf_finish(sb);
 	else
 		rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	uint16_t mtus[NMTUS];
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_read_mtu_tbl(sc, mtus, NULL);
 
 	sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
 	    mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
 	    mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
 	    mtus[14], mtus[15]);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i;
 	uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS];
 	uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS];
 	static const char *tx_stats[MAX_PM_NSTATS] = {
 		"Read:", "Write bypass:", "Write mem:", "Bypass + mem:",
 		"Tx FIFO wait", NULL, "Tx latency"
 	};
 	static const char *rx_stats[MAX_PM_NSTATS] = {
 		"Read:", "Write bypass:", "Write mem:", "Flush:",
 		" Rx FIFO wait", NULL, "Rx latency"
 	};
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_pmtx_get_stats(sc, tx_cnt, tx_cyc);
 	t4_pmrx_get_stats(sc, rx_cnt, rx_cyc);
 
 	sbuf_printf(sb, "                Tx pcmds             Tx bytes");
 	for (i = 0; i < 4; i++) {
 		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
 		    tx_cyc[i]);
 	}
 
 	sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
 	for (i = 0; i < 4; i++) {
 		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
 		    rx_cyc[i]);
 	}
 
 	if (chip_id(sc) > CHELSIO_T5) {
 		sbuf_printf(sb,
 		    "\n              Total wait      Total occupancy");
 		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
 		    tx_cyc[i]);
 		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
 		    rx_cyc[i]);
 
 		i += 2;
 		MPASS(i < nitems(tx_stats));
 
 		sbuf_printf(sb,
 		    "\n                   Reads           Total wait");
 		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i],
 		    tx_cyc[i]);
 		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i],
 		    rx_cyc[i]);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_rdma_stats stats;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	mtx_lock(&sc->reg_lock);
 	t4_tp_get_rdma_stats(sc, &stats);
 	mtx_unlock(&sc->reg_lock);
 
 	sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
 	sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_tcp_stats v4, v6;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	mtx_lock(&sc->reg_lock);
 	t4_tp_get_tcp_stats(sc, &v4, &v6);
 	mtx_unlock(&sc->reg_lock);
 
 	sbuf_printf(sb,
 	    "                                IP                 IPv6\n");
 	sbuf_printf(sb, "OutRsts:      %20u %20u\n",
 	    v4.tcp_out_rsts, v6.tcp_out_rsts);
 	sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
 	    v4.tcp_in_segs, v6.tcp_in_segs);
 	sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
 	    v4.tcp_out_segs, v6.tcp_out_segs);
 	sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
 	    v4.tcp_retrans_segs, v6.tcp_retrans_segs);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_tids(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tid_info *t = &sc->tids;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	if (t->natids) {
 		sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
 		    t->atids_in_use);
 	}
 
 	if (t->ntids) {
 		if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
 			uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
 
 			if (b) {
 				sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1,
 				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
 				    t->ntids - 1);
 			} else {
 				sbuf_printf(sb, "TID range: %u-%u",
 				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
 				    t->ntids - 1);
 			}
 		} else
 			sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1);
 		sbuf_printf(sb, ", in use: %u\n",
 		    atomic_load_acq_int(&t->tids_in_use));
 	}
 
 	if (t->nstids) {
 		sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
 		    t->stid_base + t->nstids - 1, t->stids_in_use);
 	}
 
 	if (t->nftids) {
 		sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base,
 		    t->ftid_base + t->nftids - 1);
 	}
 
 	if (t->netids) {
 		sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base,
 		    t->etid_base + t->netids - 1);
 	}
 
 	sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
 	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
 	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_err_stats stats;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	mtx_lock(&sc->reg_lock);
 	t4_tp_get_err_stats(sc, &stats);
 	mtx_unlock(&sc->reg_lock);
 
 	if (sc->chip_params->nchan > 2) {
 		sbuf_printf(sb, "                 channel 0  channel 1"
 		    "  channel 2  channel 3\n");
 		sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
 		    stats.mac_in_errs[0], stats.mac_in_errs[1],
 		    stats.mac_in_errs[2], stats.mac_in_errs[3]);
 		sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
 		    stats.hdr_in_errs[0], stats.hdr_in_errs[1],
 		    stats.hdr_in_errs[2], stats.hdr_in_errs[3]);
 		sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
 		    stats.tcp_in_errs[0], stats.tcp_in_errs[1],
 		    stats.tcp_in_errs[2], stats.tcp_in_errs[3]);
 		sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
 		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1],
 		    stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]);
 		sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
 		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1],
 		    stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]);
 		sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
 		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1],
 		    stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]);
 		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
 		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1],
 		    stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]);
 		sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
 		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1],
 		    stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]);
 	} else {
 		sbuf_printf(sb, "                 channel 0  channel 1\n");
 		sbuf_printf(sb, "macInErrs:      %10u %10u\n",
 		    stats.mac_in_errs[0], stats.mac_in_errs[1]);
 		sbuf_printf(sb, "hdrInErrs:      %10u %10u\n",
 		    stats.hdr_in_errs[0], stats.hdr_in_errs[1]);
 		sbuf_printf(sb, "tcpInErrs:      %10u %10u\n",
 		    stats.tcp_in_errs[0], stats.tcp_in_errs[1]);
 		sbuf_printf(sb, "tcp6InErrs:     %10u %10u\n",
 		    stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]);
 		sbuf_printf(sb, "tnlCongDrops:   %10u %10u\n",
 		    stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]);
 		sbuf_printf(sb, "tnlTxDrops:     %10u %10u\n",
 		    stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]);
 		sbuf_printf(sb, "ofldVlanDrops:  %10u %10u\n",
 		    stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]);
 		sbuf_printf(sb, "ofldChanDrops:  %10u %10u\n\n",
 		    stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]);
 	}
 
 	sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
 	    stats.ofld_no_neigh, stats.ofld_cong_defer);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct tp_params *tpp = &sc->params.tp;
 	u_int mask;
 	int rc;
 
 	mask = tpp->la_mask >> 16;
 	rc = sysctl_handle_int(oidp, &mask, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 	if (mask > 0xffff)
 		return (EINVAL);
 	tpp->la_mask = mask << 16;
 	t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask);
 
 	return (0);
 }
 
 struct field_desc {
 	const char *name;
 	u_int start;
 	u_int width;
 };
 
 static void
 field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
 {
 	char buf[32];
 	int line_size = 0;
 
 	while (f->name) {
 		uint64_t mask = (1ULL << f->width) - 1;
 		int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
 		    ((uintmax_t)v >> f->start) & mask);
 
 		if (line_size + len >= 79) {
 			line_size = 8;
 			sbuf_printf(sb, "\n        ");
 		}
 		sbuf_printf(sb, "%s ", buf);
 		line_size += len + 1;
 		f++;
 	}
 	sbuf_printf(sb, "\n");
 }
 
 static const struct field_desc tp_la0[] = {
 	{ "RcfOpCodeOut", 60, 4 },
 	{ "State", 56, 4 },
 	{ "WcfState", 52, 4 },
 	{ "RcfOpcSrcOut", 50, 2 },
 	{ "CRxError", 49, 1 },
 	{ "ERxError", 48, 1 },
 	{ "SanityFailed", 47, 1 },
 	{ "SpuriousMsg", 46, 1 },
 	{ "FlushInputMsg", 45, 1 },
 	{ "FlushInputCpl", 44, 1 },
 	{ "RssUpBit", 43, 1 },
 	{ "RssFilterHit", 42, 1 },
 	{ "Tid", 32, 10 },
 	{ "InitTcb", 31, 1 },
 	{ "LineNumber", 24, 7 },
 	{ "Emsg", 23, 1 },
 	{ "EdataOut", 22, 1 },
 	{ "Cmsg", 21, 1 },
 	{ "CdataOut", 20, 1 },
 	{ "EreadPdu", 19, 1 },
 	{ "CreadPdu", 18, 1 },
 	{ "TunnelPkt", 17, 1 },
 	{ "RcfPeerFin", 16, 1 },
 	{ "RcfReasonOut", 12, 4 },
 	{ "TxCchannel", 10, 2 },
 	{ "RcfTxChannel", 8, 2 },
 	{ "RxEchannel", 6, 2 },
 	{ "RcfRxChannel", 5, 1 },
 	{ "RcfDataOutSrdy", 4, 1 },
 	{ "RxDvld", 3, 1 },
 	{ "RxOoDvld", 2, 1 },
 	{ "RxCongestion", 1, 1 },
 	{ "TxCongestion", 0, 1 },
 	{ NULL }
 };
 
 static const struct field_desc tp_la1[] = {
 	{ "CplCmdIn", 56, 8 },
 	{ "CplCmdOut", 48, 8 },
 	{ "ESynOut", 47, 1 },
 	{ "EAckOut", 46, 1 },
 	{ "EFinOut", 45, 1 },
 	{ "ERstOut", 44, 1 },
 	{ "SynIn", 43, 1 },
 	{ "AckIn", 42, 1 },
 	{ "FinIn", 41, 1 },
 	{ "RstIn", 40, 1 },
 	{ "DataIn", 39, 1 },
 	{ "DataInVld", 38, 1 },
 	{ "PadIn", 37, 1 },
 	{ "RxBufEmpty", 36, 1 },
 	{ "RxDdp", 35, 1 },
 	{ "RxFbCongestion", 34, 1 },
 	{ "TxFbCongestion", 33, 1 },
 	{ "TxPktSumSrdy", 32, 1 },
 	{ "RcfUlpType", 28, 4 },
 	{ "Eread", 27, 1 },
 	{ "Ebypass", 26, 1 },
 	{ "Esave", 25, 1 },
 	{ "Static0", 24, 1 },
 	{ "Cread", 23, 1 },
 	{ "Cbypass", 22, 1 },
 	{ "Csave", 21, 1 },
 	{ "CPktOut", 20, 1 },
 	{ "RxPagePoolFull", 18, 2 },
 	{ "RxLpbkPkt", 17, 1 },
 	{ "TxLpbkPkt", 16, 1 },
 	{ "RxVfValid", 15, 1 },
 	{ "SynLearned", 14, 1 },
 	{ "SetDelEntry", 13, 1 },
 	{ "SetInvEntry", 12, 1 },
 	{ "CpcmdDvld", 11, 1 },
 	{ "CpcmdSave", 10, 1 },
 	{ "RxPstructsFull", 8, 2 },
 	{ "EpcmdDvld", 7, 1 },
 	{ "EpcmdFlush", 6, 1 },
 	{ "EpcmdTrimPrefix", 5, 1 },
 	{ "EpcmdTrimPostfix", 4, 1 },
 	{ "ERssIp4Pkt", 3, 1 },
 	{ "ERssIp6Pkt", 2, 1 },
 	{ "ERssTcpUdpPkt", 1, 1 },
 	{ "ERssFceFipPkt", 0, 1 },
 	{ NULL }
 };
 
 static const struct field_desc tp_la2[] = {
 	{ "CplCmdIn", 56, 8 },
 	{ "MpsVfVld", 55, 1 },
 	{ "MpsPf", 52, 3 },
 	{ "MpsVf", 44, 8 },
 	{ "SynIn", 43, 1 },
 	{ "AckIn", 42, 1 },
 	{ "FinIn", 41, 1 },
 	{ "RstIn", 40, 1 },
 	{ "DataIn", 39, 1 },
 	{ "DataInVld", 38, 1 },
 	{ "PadIn", 37, 1 },
 	{ "RxBufEmpty", 36, 1 },
 	{ "RxDdp", 35, 1 },
 	{ "RxFbCongestion", 34, 1 },
 	{ "TxFbCongestion", 33, 1 },
 	{ "TxPktSumSrdy", 32, 1 },
 	{ "RcfUlpType", 28, 4 },
 	{ "Eread", 27, 1 },
 	{ "Ebypass", 26, 1 },
 	{ "Esave", 25, 1 },
 	{ "Static0", 24, 1 },
 	{ "Cread", 23, 1 },
 	{ "Cbypass", 22, 1 },
 	{ "Csave", 21, 1 },
 	{ "CPktOut", 20, 1 },
 	{ "RxPagePoolFull", 18, 2 },
 	{ "RxLpbkPkt", 17, 1 },
 	{ "TxLpbkPkt", 16, 1 },
 	{ "RxVfValid", 15, 1 },
 	{ "SynLearned", 14, 1 },
 	{ "SetDelEntry", 13, 1 },
 	{ "SetInvEntry", 12, 1 },
 	{ "CpcmdDvld", 11, 1 },
 	{ "CpcmdSave", 10, 1 },
 	{ "RxPstructsFull", 8, 2 },
 	{ "EpcmdDvld", 7, 1 },
 	{ "EpcmdFlush", 6, 1 },
 	{ "EpcmdTrimPrefix", 5, 1 },
 	{ "EpcmdTrimPostfix", 4, 1 },
 	{ "ERssIp4Pkt", 3, 1 },
 	{ "ERssIp6Pkt", 2, 1 },
 	{ "ERssTcpUdpPkt", 1, 1 },
 	{ "ERssFceFipPkt", 0, 1 },
 	{ NULL }
 };
 
 static void
 tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
 {
 
 	field_desc_show(sb, *p, tp_la0);
 }
 
 static void
 tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
 {
 
 	if (idx)
 		sbuf_printf(sb, "\n");
 	field_desc_show(sb, p[0], tp_la0);
 	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
 		field_desc_show(sb, p[1], tp_la0);
 }
 
 static void
 tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
 {
 
 	if (idx)
 		sbuf_printf(sb, "\n");
 	field_desc_show(sb, p[0], tp_la0);
 	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
 		field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
 }
 
 static int
 sysctl_tp_la(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	uint64_t *buf, *p;
 	int rc;
 	u_int i, inc;
 	void (*show_func)(struct sbuf *, uint64_t *, int);
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
 
 	t4_tp_read_la(sc, buf, NULL);
 	p = buf;
 
 	switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
 	case 2:
 		inc = 2;
 		show_func = tp_la_show2;
 		break;
 	case 3:
 		inc = 2;
 		show_func = tp_la_show3;
 		break;
 	default:
 		inc = 1;
 		show_func = tp_la_show;
 	}
 
 	for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
 		(*show_func)(sb, p, i);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	u64 nrate[MAX_NCHAN], orate[MAX_NCHAN];
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_get_chan_txrate(sc, nrate, orate);
 
 	if (sc->chip_params->nchan > 2) {
 		sbuf_printf(sb, "              channel 0   channel 1"
 		    "   channel 2   channel 3\n");
 		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
 		    nrate[0], nrate[1], nrate[2], nrate[3]);
 		sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
 		    orate[0], orate[1], orate[2], orate[3]);
 	} else {
 		sbuf_printf(sb, "              channel 0   channel 1\n");
 		sbuf_printf(sb, "NIC B/s:     %10ju  %10ju\n",
 		    nrate[0], nrate[1]);
 		sbuf_printf(sb, "Offload B/s: %10ju  %10ju",
 		    orate[0], orate[1]);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	uint32_t *buf, *p;
 	int rc, i;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	t4_ulprx_read_la(sc, buf);
 	p = buf;
 
 	sbuf_printf(sb, "      Pcmd        Type   Message"
 	    "                Data");
 	for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
 		sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
 		    p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, v;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	v = t4_read_reg(sc, A_SGE_STAT_CFG);
 	if (G_STATSOURCE_T5(v) == 7) {
 		if (G_STATMODE(v) == 0) {
 			sbuf_printf(sb, "total %d, incomplete %d",
 			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
 			    t4_read_reg(sc, A_SGE_STAT_MATCH));
 		} else if (G_STATMODE(v) == 1) {
 			sbuf_printf(sb, "total %d, data overflow %d",
 			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
 			    t4_read_reg(sc, A_SGE_STAT_MATCH));
 		}
 	}
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_tc_params(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct tx_sched_class *tc;
 	struct t4_sched_class_params p;
 	struct sbuf *sb;
 	int i, rc, port_id, flags, mbps, gbps;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	port_id = arg2 >> 16;
 	MPASS(port_id < sc->params.nports);
 	MPASS(sc->port[port_id] != NULL);
 	i = arg2 & 0xffff;
 	MPASS(i < sc->chip_params->nsched_cls);
 	tc = &sc->port[port_id]->tc[i];
 
 	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4tc_p");
 	if (rc)
 		goto done;
 	flags = tc->flags;
 	p = tc->params;
 	end_synchronized_op(sc, LOCK_HELD);
 
 	if ((flags & TX_SC_OK) == 0) {
 		sbuf_printf(sb, "none");
 		goto done;
 	}
 
 	if (p.level == SCHED_CLASS_LEVEL_CL_WRR) {
 		sbuf_printf(sb, "cl-wrr weight %u", p.weight);
 		goto done;
 	} else if (p.level == SCHED_CLASS_LEVEL_CL_RL)
 		sbuf_printf(sb, "cl-rl");
 	else if (p.level == SCHED_CLASS_LEVEL_CH_RL)
 		sbuf_printf(sb, "ch-rl");
 	else {
 		rc = ENXIO;
 		goto done;
 	}
 
 	if (p.ratemode == SCHED_CLASS_RATEMODE_REL) {
 		/* XXX: top speed or actual link speed? */
 		gbps = port_top_speed(sc->port[port_id]);
 		sbuf_printf(sb, " %u%% of %uGbps", p.maxrate, gbps);
 	}
 	else if (p.ratemode == SCHED_CLASS_RATEMODE_ABS) {
 		switch (p.rateunit) {
 		case SCHED_CLASS_RATEUNIT_BITS:
 			mbps = p.maxrate / 1000;
 			gbps = p.maxrate / 1000000;
 			if (p.maxrate == gbps * 1000000)
 				sbuf_printf(sb, " %uGbps", gbps);
 			else if (p.maxrate == mbps * 1000)
 				sbuf_printf(sb, " %uMbps", mbps);
 			else
 				sbuf_printf(sb, " %uKbps", p.maxrate);
 			break;
 		case SCHED_CLASS_RATEUNIT_PKTS:
 			sbuf_printf(sb, " %upps", p.maxrate);
 			break;
 		default:
 			rc = ENXIO;
 			goto done;
 		}
 	}
 
 	switch (p.mode) {
 	case SCHED_CLASS_MODE_CLASS:
 		sbuf_printf(sb, " aggregate");
 		break;
 	case SCHED_CLASS_MODE_FLOW:
 		sbuf_printf(sb, " per-flow");
 		break;
 	default:
 		rc = ENXIO;
 		goto done;
 	}
 
 done:
 	if (rc == 0)
 		rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 #endif
 
 #ifdef TCP_OFFLOAD
 static void
 unit_conv(char *buf, size_t len, u_int val, u_int factor)
 {
 	u_int rem = val % factor;
 
 	if (rem == 0)
 		snprintf(buf, len, "%u", val / factor);
 	else {
 		while (rem % 10 == 0)
 			rem /= 10;
 		snprintf(buf, len, "%u.%u", val / factor, rem);
 	}
 }
 
 static int
 sysctl_tp_tick(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	char buf[16];
 	u_int res, re;
 	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
 
 	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
 	switch (arg2) {
 	case 0:
 		/* timer_tick */
 		re = G_TIMERRESOLUTION(res);
 		break;
 	case 1:
 		/* TCP timestamp tick */
 		re = G_TIMESTAMPRESOLUTION(res);
 		break;
 	case 2:
 		/* DACK tick */
 		re = G_DELAYEDACKRESOLUTION(res);
 		break;
 	default:
 		return (EDOOFUS);
 	}
 
 	unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000);
 
 	return (sysctl_handle_string(oidp, buf, sizeof(buf), req));
 }
 
 static int
 sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	u_int res, dack_re, v;
 	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
 
 	res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION);
 	dack_re = G_DELAYEDACKRESOLUTION(res);
 	v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER);
 
 	return (sysctl_handle_int(oidp, &v, 0, req));
 }
 
 static int
 sysctl_tp_timer(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	int reg = arg2;
 	u_int tre;
 	u_long tp_tick_us, v;
 	u_int cclk_ps = 1000000000 / sc->params.vpd.cclk;
 
 	MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX ||
 	    reg == A_TP_PERS_MIN || reg == A_TP_PERS_MAX ||
 	    reg == A_TP_KEEP_IDLE || A_TP_KEEP_INTVL || reg == A_TP_INIT_SRTT ||
 	    reg == A_TP_FINWAIT2_TIMER);
 
 	tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION));
 	tp_tick_us = (cclk_ps << tre) / 1000000;
 
 	if (reg == A_TP_INIT_SRTT)
 		v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg));
 	else
 		v = tp_tick_us * t4_read_reg(sc, reg);
 
 	return (sysctl_handle_long(oidp, &v, 0, req));
 }
 #endif
 
 static uint32_t
 fconf_iconf_to_mode(uint32_t fconf, uint32_t iconf)
 {
 	uint32_t mode;
 
 	mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR |
 	    T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT;
 
 	if (fconf & F_FRAGMENTATION)
 		mode |= T4_FILTER_IP_FRAGMENT;
 
 	if (fconf & F_MPSHITTYPE)
 		mode |= T4_FILTER_MPS_HIT_TYPE;
 
 	if (fconf & F_MACMATCH)
 		mode |= T4_FILTER_MAC_IDX;
 
 	if (fconf & F_ETHERTYPE)
 		mode |= T4_FILTER_ETH_TYPE;
 
 	if (fconf & F_PROTOCOL)
 		mode |= T4_FILTER_IP_PROTO;
 
 	if (fconf & F_TOS)
 		mode |= T4_FILTER_IP_TOS;
 
 	if (fconf & F_VLAN)
 		mode |= T4_FILTER_VLAN;
 
 	if (fconf & F_VNIC_ID) {
 		mode |= T4_FILTER_VNIC;
 		if (iconf & F_VNIC)
 			mode |= T4_FILTER_IC_VNIC;
 	}
 
 	if (fconf & F_PORT)
 		mode |= T4_FILTER_PORT;
 
 	if (fconf & F_FCOE)
 		mode |= T4_FILTER_FCoE;
 
 	return (mode);
 }
 
 static uint32_t
 mode_to_fconf(uint32_t mode)
 {
 	uint32_t fconf = 0;
 
 	if (mode & T4_FILTER_IP_FRAGMENT)
 		fconf |= F_FRAGMENTATION;
 
 	if (mode & T4_FILTER_MPS_HIT_TYPE)
 		fconf |= F_MPSHITTYPE;
 
 	if (mode & T4_FILTER_MAC_IDX)
 		fconf |= F_MACMATCH;
 
 	if (mode & T4_FILTER_ETH_TYPE)
 		fconf |= F_ETHERTYPE;
 
 	if (mode & T4_FILTER_IP_PROTO)
 		fconf |= F_PROTOCOL;
 
 	if (mode & T4_FILTER_IP_TOS)
 		fconf |= F_TOS;
 
 	if (mode & T4_FILTER_VLAN)
 		fconf |= F_VLAN;
 
 	if (mode & T4_FILTER_VNIC)
 		fconf |= F_VNIC_ID;
 
 	if (mode & T4_FILTER_PORT)
 		fconf |= F_PORT;
 
 	if (mode & T4_FILTER_FCoE)
 		fconf |= F_FCOE;
 
 	return (fconf);
 }
 
 static uint32_t
 mode_to_iconf(uint32_t mode)
 {
 
 	if (mode & T4_FILTER_IC_VNIC)
 		return (F_VNIC);
 	return (0);
 }
 
 static int check_fspec_against_fconf_iconf(struct adapter *sc,
     struct t4_filter_specification *fs)
 {
 	struct tp_params *tpp = &sc->params.tp;
 	uint32_t fconf = 0;
 
 	if (fs->val.frag || fs->mask.frag)
 		fconf |= F_FRAGMENTATION;
 
 	if (fs->val.matchtype || fs->mask.matchtype)
 		fconf |= F_MPSHITTYPE;
 
 	if (fs->val.macidx || fs->mask.macidx)
 		fconf |= F_MACMATCH;
 
 	if (fs->val.ethtype || fs->mask.ethtype)
 		fconf |= F_ETHERTYPE;
 
 	if (fs->val.proto || fs->mask.proto)
 		fconf |= F_PROTOCOL;
 
 	if (fs->val.tos || fs->mask.tos)
 		fconf |= F_TOS;
 
 	if (fs->val.vlan_vld || fs->mask.vlan_vld)
 		fconf |= F_VLAN;
 
 	if (fs->val.ovlan_vld || fs->mask.ovlan_vld) {
 		fconf |= F_VNIC_ID;
 		if (tpp->ingress_config & F_VNIC)
 			return (EINVAL);
 	}
 
 	if (fs->val.pfvf_vld || fs->mask.pfvf_vld) {
 		fconf |= F_VNIC_ID;
 		if ((tpp->ingress_config & F_VNIC) == 0)
 			return (EINVAL);
 	}
 
 	if (fs->val.iport || fs->mask.iport)
 		fconf |= F_PORT;
 
 	if (fs->val.fcoe || fs->mask.fcoe)
 		fconf |= F_FCOE;
 
 	if ((tpp->vlan_pri_map | fconf) != tpp->vlan_pri_map)
 		return (E2BIG);
 
 	return (0);
 }
 
 static int
 get_filter_mode(struct adapter *sc, uint32_t *mode)
 {
 	struct tp_params *tpp = &sc->params.tp;
 
 	/*
 	 * We trust the cached values of the relevant TP registers.  This means
 	 * things work reliably only if writes to those registers are always via
 	 * t4_set_filter_mode.
 	 */
 	*mode = fconf_iconf_to_mode(tpp->vlan_pri_map, tpp->ingress_config);
 
 	return (0);
 }
 
 static int
 set_filter_mode(struct adapter *sc, uint32_t mode)
 {
 	struct tp_params *tpp = &sc->params.tp;
 	uint32_t fconf, iconf;
 	int rc;
 
 	iconf = mode_to_iconf(mode);
 	if ((iconf ^ tpp->ingress_config) & F_VNIC) {
 		/*
 		 * For now we just complain if A_TP_INGRESS_CONFIG is not
 		 * already set to the correct value for the requested filter
 		 * mode.  It's not clear if it's safe to write to this register
 		 * on the fly.  (And we trust the cached value of the register).
 		 */
 		return (EBUSY);
 	}
 
 	fconf = mode_to_fconf(mode);
 
 	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4setfm");
 	if (rc)
 		return (rc);
 
 	if (sc->tids.ftids_in_use > 0) {
 		rc = EBUSY;
 		goto done;
 	}
 
 #ifdef TCP_OFFLOAD
 	if (uld_active(sc, ULD_TOM)) {
 		rc = EBUSY;
 		goto done;
 	}
 #endif
 
 	rc = -t4_set_filter_mode(sc, fconf);
 done:
 	end_synchronized_op(sc, LOCK_HELD);
 	return (rc);
 }
 
 static inline uint64_t
 get_filter_hits(struct adapter *sc, uint32_t fid)
 {
 	uint32_t tcb_addr;
 
 	tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) +
 	    (fid + sc->tids.ftid_base) * TCB_SIZE;
 
 	if (is_t4(sc)) {
 		uint64_t hits;
 
 		read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&hits, 8);
 		return (be64toh(hits));
 	} else {
 		uint32_t hits;
 
 		read_via_memwin(sc, 0, tcb_addr + 24, &hits, 4);
 		return (be32toh(hits));
 	}
 }
 
 static int
 get_filter(struct adapter *sc, struct t4_filter *t)
 {
 	int i, rc, nfilters = sc->tids.nftids;
 	struct filter_entry *f;
 
 	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4getf");
 	if (rc)
 		return (rc);
 
 	if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL ||
 	    t->idx >= nfilters) {
 		t->idx = 0xffffffff;
 		goto done;
 	}
 
 	f = &sc->tids.ftid_tab[t->idx];
 	for (i = t->idx; i < nfilters; i++, f++) {
 		if (f->valid) {
 			t->idx = i;
 			t->l2tidx = f->l2t ? f->l2t->idx : 0;
 			t->smtidx = f->smtidx;
 			if (f->fs.hitcnts)
 				t->hits = get_filter_hits(sc, t->idx);
 			else
 				t->hits = UINT64_MAX;
 			t->fs = f->fs;
 
 			goto done;
 		}
 	}
 
 	t->idx = 0xffffffff;
 done:
 	end_synchronized_op(sc, LOCK_HELD);
 	return (0);
 }
 
 static int
 set_filter(struct adapter *sc, struct t4_filter *t)
 {
 	unsigned int nfilters, nports;
 	struct filter_entry *f;
 	int i, rc;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf");
 	if (rc)
 		return (rc);
 
 	nfilters = sc->tids.nftids;
 	nports = sc->params.nports;
 
 	if (nfilters == 0) {
 		rc = ENOTSUP;
 		goto done;
 	}
 
 	if (t->idx >= nfilters) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	/* Validate against the global filter mode and ingress config */
 	rc = check_fspec_against_fconf_iconf(sc, &t->fs);
 	if (rc != 0)
 		goto done;
 
 	if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	if (t->fs.val.iport >= nports) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	/* Can't specify an iq if not steering to it */
 	if (!t->fs.dirsteer && t->fs.iq) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	/* IPv6 filter idx must be 4 aligned */
 	if (t->fs.type == 1 &&
 	    ((t->idx & 0x3) || t->idx + 4 >= nfilters)) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	if (!(sc->flags & FULL_INIT_DONE) &&
 	    ((rc = adapter_full_init(sc)) != 0))
 		goto done;
 
 	if (sc->tids.ftid_tab == NULL) {
 		KASSERT(sc->tids.ftids_in_use == 0,
 		    ("%s: no memory allocated but filters_in_use > 0",
 		    __func__));
 
 		sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) *
 		    nfilters, M_CXGBE, M_NOWAIT | M_ZERO);
 		if (sc->tids.ftid_tab == NULL) {
 			rc = ENOMEM;
 			goto done;
 		}
 		mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF);
 	}
 
 	for (i = 0; i < 4; i++) {
 		f = &sc->tids.ftid_tab[t->idx + i];
 
 		if (f->pending || f->valid) {
 			rc = EBUSY;
 			goto done;
 		}
 		if (f->locked) {
 			rc = EPERM;
 			goto done;
 		}
 
 		if (t->fs.type == 0)
 			break;
 	}
 
 	f = &sc->tids.ftid_tab[t->idx];
 	f->fs = t->fs;
 
 	rc = set_filter_wr(sc, t->idx);
 done:
 	end_synchronized_op(sc, 0);
 
 	if (rc == 0) {
 		mtx_lock(&sc->tids.ftid_lock);
 		for (;;) {
 			if (f->pending == 0) {
 				rc = f->valid ? 0 : EIO;
 				break;
 			}
 
 			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
 			    PCATCH, "t4setfw", 0)) {
 				rc = EINPROGRESS;
 				break;
 			}
 		}
 		mtx_unlock(&sc->tids.ftid_lock);
 	}
 	return (rc);
 }
 
 static int
 del_filter(struct adapter *sc, struct t4_filter *t)
 {
 	unsigned int nfilters;
 	struct filter_entry *f;
 	int rc;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf");
 	if (rc)
 		return (rc);
 
 	nfilters = sc->tids.nftids;
 
 	if (nfilters == 0) {
 		rc = ENOTSUP;
 		goto done;
 	}
 
 	if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 ||
 	    t->idx >= nfilters) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	if (!(sc->flags & FULL_INIT_DONE)) {
 		rc = EAGAIN;
 		goto done;
 	}
 
 	f = &sc->tids.ftid_tab[t->idx];
 
 	if (f->pending) {
 		rc = EBUSY;
 		goto done;
 	}
 	if (f->locked) {
 		rc = EPERM;
 		goto done;
 	}
 
 	if (f->valid) {
 		t->fs = f->fs;	/* extra info for the caller */
 		rc = del_filter_wr(sc, t->idx);
 	}
 
 done:
 	end_synchronized_op(sc, 0);
 
 	if (rc == 0) {
 		mtx_lock(&sc->tids.ftid_lock);
 		for (;;) {
 			if (f->pending == 0) {
 				rc = f->valid ? EIO : 0;
 				break;
 			}
 
 			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
 			    PCATCH, "t4delfw", 0)) {
 				rc = EINPROGRESS;
 				break;
 			}
 		}
 		mtx_unlock(&sc->tids.ftid_lock);
 	}
 
 	return (rc);
 }
 
 static void
 clear_filter(struct filter_entry *f)
 {
 	if (f->l2t)
 		t4_l2t_release(f->l2t);
 
 	bzero(f, sizeof (*f));
 }
 
 static int
 set_filter_wr(struct adapter *sc, int fidx)
 {
 	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
 	struct fw_filter_wr *fwr;
 	unsigned int ftid, vnic_vld, vnic_vld_mask;
 	struct wrq_cookie cookie;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (f->fs.newdmac || f->fs.newvlan) {
 		/* This filter needs an L2T entry; allocate one. */
 		f->l2t = t4_l2t_alloc_switching(sc->l2t);
 		if (f->l2t == NULL)
 			return (EAGAIN);
 		if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport,
 		    f->fs.dmac)) {
 			t4_l2t_release(f->l2t);
 			f->l2t = NULL;
 			return (ENOMEM);
 		}
 	}
 
 	/* Already validated against fconf, iconf */
 	MPASS((f->fs.val.pfvf_vld & f->fs.val.ovlan_vld) == 0);
 	MPASS((f->fs.mask.pfvf_vld & f->fs.mask.ovlan_vld) == 0);
 	if (f->fs.val.pfvf_vld || f->fs.val.ovlan_vld)
 		vnic_vld = 1;
 	else
 		vnic_vld = 0;
 	if (f->fs.mask.pfvf_vld || f->fs.mask.ovlan_vld)
 		vnic_vld_mask = 1;
 	else
 		vnic_vld_mask = 0;
 
 	ftid = sc->tids.ftid_base + fidx;
 
 	fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
 	if (fwr == NULL)
 		return (ENOMEM);
 	bzero(fwr, sizeof(*fwr));
 
 	fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR));
 	fwr->len16_pkd = htobe32(FW_LEN16(*fwr));
 	fwr->tid_to_iq =
 	    htobe32(V_FW_FILTER_WR_TID(ftid) |
 		V_FW_FILTER_WR_RQTYPE(f->fs.type) |
 		V_FW_FILTER_WR_NOREPLY(0) |
 		V_FW_FILTER_WR_IQ(f->fs.iq));
 	fwr->del_filter_to_l2tix =
 	    htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) |
 		V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
 		V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
 		V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) |
 		V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) |
 		V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) |
 		V_FW_FILTER_WR_DMAC(f->fs.newdmac) |
 		V_FW_FILTER_WR_SMAC(f->fs.newsmac) |
 		V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT ||
 		    f->fs.newvlan == VLAN_REWRITE) |
 		V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE ||
 		    f->fs.newvlan == VLAN_REWRITE) |
 		V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
 		V_FW_FILTER_WR_TXCHAN(f->fs.eport) |
 		V_FW_FILTER_WR_PRIO(f->fs.prio) |
 		V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0));
 	fwr->ethtype = htobe16(f->fs.val.ethtype);
 	fwr->ethtypem = htobe16(f->fs.mask.ethtype);
 	fwr->frag_to_ovlan_vldm =
 	    (V_FW_FILTER_WR_FRAG(f->fs.val.frag) |
 		V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) |
 		V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) |
 		V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) |
 		V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) |
 		V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask));
 	fwr->smac_sel = 0;
 	fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) |
 	    V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id));
 	fwr->maci_to_matchtypem =
 	    htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
 		V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
 		V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) |
 		V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) |
 		V_FW_FILTER_WR_PORT(f->fs.val.iport) |
 		V_FW_FILTER_WR_PORTM(f->fs.mask.iport) |
 		V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) |
 		V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype));
 	fwr->ptcl = f->fs.val.proto;
 	fwr->ptclm = f->fs.mask.proto;
 	fwr->ttyp = f->fs.val.tos;
 	fwr->ttypm = f->fs.mask.tos;
 	fwr->ivlan = htobe16(f->fs.val.vlan);
 	fwr->ivlanm = htobe16(f->fs.mask.vlan);
 	fwr->ovlan = htobe16(f->fs.val.vnic);
 	fwr->ovlanm = htobe16(f->fs.mask.vnic);
 	bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip));
 	bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm));
 	bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip));
 	bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm));
 	fwr->lp = htobe16(f->fs.val.dport);
 	fwr->lpm = htobe16(f->fs.mask.dport);
 	fwr->fp = htobe16(f->fs.val.sport);
 	fwr->fpm = htobe16(f->fs.mask.sport);
 	if (f->fs.newsmac)
 		bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma));
 
 	f->pending = 1;
 	sc->tids.ftids_in_use++;
 
 	commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
 	return (0);
 }
 
 static int
 del_filter_wr(struct adapter *sc, int fidx)
 {
 	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
 	struct fw_filter_wr *fwr;
 	unsigned int ftid;
 	struct wrq_cookie cookie;
 
 	ftid = sc->tids.ftid_base + fidx;
 
 	fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
 	if (fwr == NULL)
 		return (ENOMEM);
 	bzero(fwr, sizeof (*fwr));
 
 	t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id);
 
 	f->pending = 1;
 	commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
 	return (0);
 }
 
 int
 t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
 	unsigned int idx = GET_TID(rpl);
 	unsigned int rc;
 	struct filter_entry *f;
 
 	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
 	    rss->opcode));
 	MPASS(iq == &sc->sge.fwq);
 	MPASS(is_ftid(sc, idx));
 
 	idx -= sc->tids.ftid_base;
 	f = &sc->tids.ftid_tab[idx];
 	rc = G_COOKIE(rpl->cookie);
 
 	mtx_lock(&sc->tids.ftid_lock);
 	if (rc == FW_FILTER_WR_FLT_ADDED) {
 		KASSERT(f->pending, ("%s: filter[%u] isn't pending.",
 		    __func__, idx));
 		f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff;
 		f->pending = 0;  /* asynchronous setup completed */
 		f->valid = 1;
 	} else {
 		if (rc != FW_FILTER_WR_FLT_DELETED) {
 			/* Add or delete failed, display an error */
 			log(LOG_ERR,
 			    "filter %u setup failed with error %u\n",
 			    idx, rc);
 		}
 
 		clear_filter(f);
 		sc->tids.ftids_in_use--;
 	}
 	wakeup(&sc->tids.ftid_tab);
 	mtx_unlock(&sc->tids.ftid_lock);
 
 	return (0);
 }
 
 static int
 set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 
 	MPASS(iq->set_tcb_rpl != NULL);
 	return (iq->set_tcb_rpl(iq, rss, m));
 }
 
 static int
 l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 
 	MPASS(iq->l2t_write_rpl != NULL);
 	return (iq->l2t_write_rpl(iq, rss, m));
 }
 
 static int
 get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
 {
 	int rc;
 
 	if (cntxt->cid > M_CTXTQID)
 		return (EINVAL);
 
 	if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
 	    cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
 	if (rc)
 		return (rc);
 
 	if (sc->flags & FW_OK) {
 		rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
 		    &cntxt->data[0]);
 		if (rc == 0)
 			goto done;
 	}
 
 	/*
 	 * Read via firmware failed or wasn't even attempted.  Read directly via
 	 * the backdoor.
 	 */
 	rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
 done:
 	end_synchronized_op(sc, 0);
 	return (rc);
 }
 
 static int
 load_fw(struct adapter *sc, struct t4_data *fw)
 {
 	int rc;
 	uint8_t *fw_data;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
 	if (rc)
 		return (rc);
 
 	if (sc->flags & FULL_INIT_DONE) {
 		rc = EBUSY;
 		goto done;
 	}
 
 	fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
 	if (fw_data == NULL) {
 		rc = ENOMEM;
 		goto done;
 	}
 
 	rc = copyin(fw->data, fw_data, fw->len);
 	if (rc == 0)
 		rc = -t4_load_fw(sc, fw_data, fw->len);
 
 	free(fw_data, M_CXGBE);
 done:
 	end_synchronized_op(sc, 0);
 	return (rc);
 }
 
 #define MAX_READ_BUF_SIZE (128 * 1024)
 static int
 read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
 {
 	uint32_t addr, remaining, n;
 	uint32_t *buf;
 	int rc;
 	uint8_t *dst;
 
 	rc = validate_mem_range(sc, mr->addr, mr->len);
 	if (rc != 0)
 		return (rc);
 
 	buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK);
 	addr = mr->addr;
 	remaining = mr->len;
 	dst = (void *)mr->data;
 
 	while (remaining) {
 		n = min(remaining, MAX_READ_BUF_SIZE);
 		read_via_memwin(sc, 2, addr, buf, n);
 
 		rc = copyout(buf, dst, n);
 		if (rc != 0)
 			break;
 
 		dst += n;
 		remaining -= n;
 		addr += n;
 	}
 
 	free(buf, M_CXGBE);
 	return (rc);
 }
 #undef MAX_READ_BUF_SIZE
 
 static int
 read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
 {
 	int rc;
 
 	if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
 		return (EINVAL);
 
 	if (i2cd->len > sizeof(i2cd->data))
 		return (EFBIG);
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
 	if (rc)
 		return (rc);
 	rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
 	    i2cd->offset, i2cd->len, &i2cd->data[0]);
 	end_synchronized_op(sc, 0);
 
 	return (rc);
 }
 
 static int
 in_range(int val, int lo, int hi)
 {
 
 	return (val < 0 || (val <= hi && val >= lo));
 }
 
 static int
 set_sched_class_config(struct adapter *sc, int minmax)
 {
 	int rc;
 
 	if (minmax < 0)
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc");
 	if (rc)
 		return (rc);
 	rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1);
 	end_synchronized_op(sc, 0);
 
 	return (rc);
 }
 
 static int
 set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p,
     int sleep_ok)
 {
 	int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode;
 	struct port_info *pi;
 	struct tx_sched_class *tc;
 
 	if (p->level == SCHED_CLASS_LEVEL_CL_RL)
 		fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL;
 	else if (p->level == SCHED_CLASS_LEVEL_CL_WRR)
 		fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR;
 	else if (p->level == SCHED_CLASS_LEVEL_CH_RL)
 		fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL;
 	else
 		return (EINVAL);
 
 	if (p->mode == SCHED_CLASS_MODE_CLASS)
 		fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
 	else if (p->mode == SCHED_CLASS_MODE_FLOW)
 		fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
 	else
 		return (EINVAL);
 
 	if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS)
 		fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
 	else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS)
 		fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
 	else
 		return (EINVAL);
 
 	if (p->ratemode == SCHED_CLASS_RATEMODE_REL)
 		fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
 	else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS)
 		fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
 	else
 		return (EINVAL);
 
 	/* Vet our parameters ... */
 	if (!in_range(p->channel, 0, sc->chip_params->nchan - 1))
 		return (ERANGE);
 
 	pi = sc->port[sc->chan_map[p->channel]];
 	if (pi == NULL)
 		return (ENXIO);
 	MPASS(pi->tx_chan == p->channel);
 	top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */
 
 	if (!in_range(p->cl, 0, sc->chip_params->nsched_cls) ||
 	    !in_range(p->minrate, 0, top_speed) ||
 	    !in_range(p->maxrate, 0, top_speed) ||
 	    !in_range(p->weight, 0, 100))
 		return (ERANGE);
 
 	/*
 	 * Translate any unset parameters into the firmware's
 	 * nomenclature and/or fail the call if the parameters
 	 * are required ...
 	 */
 	if (p->rateunit < 0 || p->ratemode < 0 || p->channel < 0 || p->cl < 0)
 		return (EINVAL);
 
 	if (p->minrate < 0)
 		p->minrate = 0;
 	if (p->maxrate < 0) {
 		if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
 		    p->level == SCHED_CLASS_LEVEL_CH_RL)
 			return (EINVAL);
 		else
 			p->maxrate = 0;
 	}
 	if (p->weight < 0) {
 		if (p->level == SCHED_CLASS_LEVEL_CL_WRR)
 			return (EINVAL);
 		else
 			p->weight = 0;
 	}
 	if (p->pktsize < 0) {
 		if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
 		    p->level == SCHED_CLASS_LEVEL_CH_RL)
 			return (EINVAL);
 		else
 			p->pktsize = 0;
 	}
 
 	rc = begin_synchronized_op(sc, NULL,
 	    sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp");
 	if (rc)
 		return (rc);
 	tc = &pi->tc[p->cl];
 	tc->params = *p;
 	rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, fw_mode,
 	    fw_rateunit, fw_ratemode, p->channel, p->cl, p->minrate, p->maxrate,
 	    p->weight, p->pktsize, sleep_ok);
 	if (rc == 0)
 		tc->flags |= TX_SC_OK;
 	else {
 		/*
 		 * Unknown state at this point, see tc->params for what was
 		 * attempted.
 		 */
 		tc->flags &= ~TX_SC_OK;
 	}
 	end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD);
 
 	return (rc);
 }
 
 static int
 set_sched_class(struct adapter *sc, struct t4_sched_params *p)
 {
 
 	if (p->type != SCHED_CLASS_TYPE_PACKET)
 		return (EINVAL);
 
 	if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG)
 		return (set_sched_class_config(sc, p->u.config.minmax));
 
 	if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS)
 		return (set_sched_class_params(sc, &p->u.params, 1));
 
 	return (EINVAL);
 }
 
 static int
 set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
 {
 	struct port_info *pi = NULL;
 	struct vi_info *vi;
 	struct sge_txq *txq;
 	uint32_t fw_mnem, fw_queue, fw_class;
 	int i, rc;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq");
 	if (rc)
 		return (rc);
 
 	if (p->port >= sc->params.nports) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	/* XXX: Only supported for the main VI. */
 	pi = sc->port[p->port];
 	vi = &pi->vi[0];
 	if (!(vi->flags & VI_INIT_DONE)) {
 		/* tx queues not set up yet */
 		rc = EAGAIN;
 		goto done;
 	}
 
 	if (!in_range(p->queue, 0, vi->ntxq - 1) ||
 	    !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	/*
 	 * Create a template for the FW_PARAMS_CMD mnemonic and value (TX
 	 * Scheduling Class in this case).
 	 */
 	fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH));
 	fw_class = p->cl < 0 ? 0xffffffff : p->cl;
 
 	/*
 	 * If op.queue is non-negative, then we're only changing the scheduling
 	 * on a single specified TX queue.
 	 */
 	if (p->queue >= 0) {
 		txq = &sc->sge.txq[vi->first_txq + p->queue];
 		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
 		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
 		    &fw_class);
 		goto done;
 	}
 
 	/*
 	 * Change the scheduling on all the TX queues for the
 	 * interface.
 	 */
 	for_each_txq(vi, i, txq) {
 		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
 		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
 		    &fw_class);
 		if (rc)
 			goto done;
 	}
 
 	rc = 0;
 done:
 	end_synchronized_op(sc, 0);
 	return (rc);
 }
 
 int
 t4_os_find_pci_capability(struct adapter *sc, int cap)
 {
 	int i;
 
 	return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
 }
 
 int
 t4_os_pci_save_state(struct adapter *sc)
 {
 	device_t dev;
 	struct pci_devinfo *dinfo;
 
 	dev = sc->dev;
 	dinfo = device_get_ivars(dev);
 
 	pci_cfg_save(dev, dinfo, 0);
 	return (0);
 }
 
 int
 t4_os_pci_restore_state(struct adapter *sc)
 {
 	device_t dev;
 	struct pci_devinfo *dinfo;
 
 	dev = sc->dev;
 	dinfo = device_get_ivars(dev);
 
 	pci_cfg_restore(dev, dinfo);
 	return (0);
 }
 
 void
 t4_os_portmod_changed(const struct adapter *sc, int idx)
 {
 	struct port_info *pi = sc->port[idx];
 	struct vi_info *vi;
 	struct ifnet *ifp;
 	int v;
 	static const char *mod_str[] = {
 		NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
 	};
 
 	for_each_vi(pi, v, vi) {
 		build_medialist(pi, &vi->media);
 	}
 
 	ifp = pi->vi[0].ifp;
 	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
 		if_printf(ifp, "transceiver unplugged.\n");
 	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
 		if_printf(ifp, "unknown transceiver inserted.\n");
 	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
 		if_printf(ifp, "unsupported transceiver inserted.\n");
 	else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
 		if_printf(ifp, "%s transceiver inserted.\n",
 		    mod_str[pi->mod_type]);
 	} else {
 		if_printf(ifp, "transceiver (type %d) inserted.\n",
 		    pi->mod_type);
 	}
 }
 
 void
 t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason)
 {
 	struct port_info *pi = sc->port[idx];
 	struct vi_info *vi;
 	struct ifnet *ifp;
 	int v;
 
 	if (link_stat)
 		pi->linkdnrc = -1;
 	else {
 		if (reason >= 0)
 			pi->linkdnrc = reason;
 	}
 	for_each_vi(pi, v, vi) {
 		ifp = vi->ifp;
 		if (ifp == NULL)
 			continue;
 
 		if (link_stat) {
 			ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed);
 			if_link_state_change(ifp, LINK_STATE_UP);
 		} else {
 			if_link_state_change(ifp, LINK_STATE_DOWN);
 		}
 	}
 }
 
 void
 t4_iterate(void (*func)(struct adapter *, void *), void *arg)
 {
 	struct adapter *sc;
 
 	sx_slock(&t4_list_lock);
 	SLIST_FOREACH(sc, &t4_list, link) {
 		/*
 		 * func should not make any assumptions about what state sc is
 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
 		 */
 		func(sc, arg);
 	}
 	sx_sunlock(&t4_list_lock);
 }
 
 static int
-t4_open(struct cdev *dev, int flags, int type, struct thread *td)
-{
-       return (0);
-}
-
-static int
-t4_close(struct cdev *dev, int flags, int type, struct thread *td)
-{
-       return (0);
-}
-
-static int
 t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
     struct thread *td)
 {
 	int rc;
 	struct adapter *sc = dev->si_drv1;
 
 	rc = priv_check(td, PRIV_DRIVER);
 	if (rc != 0)
 		return (rc);
 
 	switch (cmd) {
 	case CHELSIO_T4_GETREG: {
 		struct t4_reg *edata = (struct t4_reg *)data;
 
 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
 			return (EFAULT);
 
 		if (edata->size == 4)
 			edata->val = t4_read_reg(sc, edata->addr);
 		else if (edata->size == 8)
 			edata->val = t4_read_reg64(sc, edata->addr);
 		else
 			return (EINVAL);
 
 		break;
 	}
 	case CHELSIO_T4_SETREG: {
 		struct t4_reg *edata = (struct t4_reg *)data;
 
 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
 			return (EFAULT);
 
 		if (edata->size == 4) {
 			if (edata->val & 0xffffffff00000000)
 				return (EINVAL);
 			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
 		} else if (edata->size == 8)
 			t4_write_reg64(sc, edata->addr, edata->val);
 		else
 			return (EINVAL);
 		break;
 	}
 	case CHELSIO_T4_REGDUMP: {
 		struct t4_regdump *regs = (struct t4_regdump *)data;
 		int reglen = is_t4(sc) ? T4_REGDUMP_SIZE : T5_REGDUMP_SIZE;
 		uint8_t *buf;
 
 		if (regs->len < reglen) {
 			regs->len = reglen; /* hint to the caller */
 			return (ENOBUFS);
 		}
 
 		regs->len = reglen;
 		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
 		get_regs(sc, regs, buf);
 		rc = copyout(buf, regs->data, reglen);
 		free(buf, M_CXGBE);
 		break;
 	}
 	case CHELSIO_T4_GET_FILTER_MODE:
 		rc = get_filter_mode(sc, (uint32_t *)data);
 		break;
 	case CHELSIO_T4_SET_FILTER_MODE:
 		rc = set_filter_mode(sc, *(uint32_t *)data);
 		break;
 	case CHELSIO_T4_GET_FILTER:
 		rc = get_filter(sc, (struct t4_filter *)data);
 		break;
 	case CHELSIO_T4_SET_FILTER:
 		rc = set_filter(sc, (struct t4_filter *)data);
 		break;
 	case CHELSIO_T4_DEL_FILTER:
 		rc = del_filter(sc, (struct t4_filter *)data);
 		break;
 	case CHELSIO_T4_GET_SGE_CONTEXT:
 		rc = get_sge_context(sc, (struct t4_sge_context *)data);
 		break;
 	case CHELSIO_T4_LOAD_FW:
 		rc = load_fw(sc, (struct t4_data *)data);
 		break;
 	case CHELSIO_T4_GET_MEM:
 		rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
 		break;
 	case CHELSIO_T4_GET_I2C:
 		rc = read_i2c(sc, (struct t4_i2c_data *)data);
 		break;
 	case CHELSIO_T4_CLEAR_STATS: {
 		int i, v;
 		u_int port_id = *(uint32_t *)data;
 		struct port_info *pi;
 		struct vi_info *vi;
 
 		if (port_id >= sc->params.nports)
 			return (EINVAL);
 		pi = sc->port[port_id];
 
 		/* MAC stats */
 		t4_clr_port_stats(sc, pi->tx_chan);
 		pi->tx_parse_error = 0;
 		mtx_lock(&sc->reg_lock);
 		for_each_vi(pi, v, vi) {
 			if (vi->flags & VI_INIT_DONE)
 				t4_clr_vi_stats(sc, vi->viid);
 		}
 		mtx_unlock(&sc->reg_lock);
 
 		/*
 		 * Since this command accepts a port, clear stats for
 		 * all VIs on this port.
 		 */
 		for_each_vi(pi, v, vi) {
 			if (vi->flags & VI_INIT_DONE) {
 				struct sge_rxq *rxq;
 				struct sge_txq *txq;
 				struct sge_wrq *wrq;
 
 				for_each_rxq(vi, i, rxq) {
 #if defined(INET) || defined(INET6)
 					rxq->lro.lro_queued = 0;
 					rxq->lro.lro_flushed = 0;
 #endif
 					rxq->rxcsum = 0;
 					rxq->vlan_extraction = 0;
 				}
 
 				for_each_txq(vi, i, txq) {
 					txq->txcsum = 0;
 					txq->tso_wrs = 0;
 					txq->vlan_insertion = 0;
 					txq->imm_wrs = 0;
 					txq->sgl_wrs = 0;
 					txq->txpkt_wrs = 0;
 					txq->txpkts0_wrs = 0;
 					txq->txpkts1_wrs = 0;
 					txq->txpkts0_pkts = 0;
 					txq->txpkts1_pkts = 0;
 					mp_ring_reset_stats(txq->r);
 				}
 
 #ifdef TCP_OFFLOAD
 				/* nothing to clear for each ofld_rxq */
 
 				for_each_ofld_txq(vi, i, wrq) {
 					wrq->tx_wrs_direct = 0;
 					wrq->tx_wrs_copied = 0;
 				}
 #endif
 
 				if (IS_MAIN_VI(vi)) {
 					wrq = &sc->sge.ctrlq[pi->port_id];
 					wrq->tx_wrs_direct = 0;
 					wrq->tx_wrs_copied = 0;
 				}
 			}
 		}
 		break;
 	}
 	case CHELSIO_T4_SCHED_CLASS:
 		rc = set_sched_class(sc, (struct t4_sched_params *)data);
 		break;
 	case CHELSIO_T4_SCHED_QUEUE:
 		rc = set_sched_queue(sc, (struct t4_sched_queue *)data);
 		break;
 	case CHELSIO_T4_GET_TRACER:
 		rc = t4_get_tracer(sc, (struct t4_tracer *)data);
 		break;
 	case CHELSIO_T4_SET_TRACER:
 		rc = t4_set_tracer(sc, (struct t4_tracer *)data);
 		break;
 	default:
-		rc = EINVAL;
+		rc = ENOTTY;
 	}
 
 	return (rc);
 }
 
 void
 t4_db_full(struct adapter *sc)
 {
 
 	CXGBE_UNIMPLEMENTED(__func__);
 }
 
 void
 t4_db_dropped(struct adapter *sc)
 {
 
 	CXGBE_UNIMPLEMENTED(__func__);
 }
 
 #ifdef TCP_OFFLOAD
 void
 t4_iscsi_init(struct adapter *sc, u_int tag_mask, const u_int *pgsz_order)
 {
 
 	t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask);
 	t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) |
 		V_HPZ1(pgsz_order[1]) | V_HPZ2(pgsz_order[2]) |
 		V_HPZ3(pgsz_order[3]));
 }
 
 static int
 toe_capability(struct vi_info *vi, int enable)
 {
 	int rc;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (!is_offload(sc))
 		return (ENODEV);
 
 	if (enable) {
 		if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
 			/* TOE is already enabled. */
 			return (0);
 		}
 
 		/*
 		 * We need the port's queues around so that we're able to send
 		 * and receive CPLs to/from the TOE even if the ifnet for this
 		 * port has never been UP'd administratively.
 		 */
 		if (!(vi->flags & VI_INIT_DONE)) {
 			rc = vi_full_init(vi);
 			if (rc)
 				return (rc);
 		}
 		if (!(pi->vi[0].flags & VI_INIT_DONE)) {
 			rc = vi_full_init(&pi->vi[0]);
 			if (rc)
 				return (rc);
 		}
 
 		if (isset(&sc->offload_map, pi->port_id)) {
 			/* TOE is enabled on another VI of this port. */
 			pi->uld_vis++;
 			return (0);
 		}
 
 		if (!uld_active(sc, ULD_TOM)) {
 			rc = t4_activate_uld(sc, ULD_TOM);
 			if (rc == EAGAIN) {
 				log(LOG_WARNING,
 				    "You must kldload t4_tom.ko before trying "
 				    "to enable TOE on a cxgbe interface.\n");
 			}
 			if (rc != 0)
 				return (rc);
 			KASSERT(sc->tom_softc != NULL,
 			    ("%s: TOM activated but softc NULL", __func__));
 			KASSERT(uld_active(sc, ULD_TOM),
 			    ("%s: TOM activated but flag not set", __func__));
 		}
 
 		/* Activate iWARP and iSCSI too, if the modules are loaded. */
 		if (!uld_active(sc, ULD_IWARP))
 			(void) t4_activate_uld(sc, ULD_IWARP);
 		if (!uld_active(sc, ULD_ISCSI))
 			(void) t4_activate_uld(sc, ULD_ISCSI);
 
 		pi->uld_vis++;
 		setbit(&sc->offload_map, pi->port_id);
 	} else {
 		pi->uld_vis--;
 
 		if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
 			return (0);
 
 		KASSERT(uld_active(sc, ULD_TOM),
 		    ("%s: TOM never initialized?", __func__));
 		clrbit(&sc->offload_map, pi->port_id);
 	}
 
 	return (0);
 }
 
 /*
  * Add an upper layer driver to the global list.
  */
 int
 t4_register_uld(struct uld_info *ui)
 {
 	int rc = 0;
 	struct uld_info *u;
 
 	sx_xlock(&t4_uld_list_lock);
 	SLIST_FOREACH(u, &t4_uld_list, link) {
 	    if (u->uld_id == ui->uld_id) {
 		    rc = EEXIST;
 		    goto done;
 	    }
 	}
 
 	SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
 	ui->refcount = 0;
 done:
 	sx_xunlock(&t4_uld_list_lock);
 	return (rc);
 }
 
 int
 t4_unregister_uld(struct uld_info *ui)
 {
 	int rc = EINVAL;
 	struct uld_info *u;
 
 	sx_xlock(&t4_uld_list_lock);
 
 	SLIST_FOREACH(u, &t4_uld_list, link) {
 	    if (u == ui) {
 		    if (ui->refcount > 0) {
 			    rc = EBUSY;
 			    goto done;
 		    }
 
 		    SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
 		    rc = 0;
 		    goto done;
 	    }
 	}
 done:
 	sx_xunlock(&t4_uld_list_lock);
 	return (rc);
 }
 
 int
 t4_activate_uld(struct adapter *sc, int id)
 {
 	int rc;
 	struct uld_info *ui;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (id < 0 || id > ULD_MAX)
 		return (EINVAL);
 	rc = EAGAIN;	/* kldoad the module with this ULD and try again. */
 
 	sx_slock(&t4_uld_list_lock);
 
 	SLIST_FOREACH(ui, &t4_uld_list, link) {
 		if (ui->uld_id == id) {
 			if (!(sc->flags & FULL_INIT_DONE)) {
 				rc = adapter_full_init(sc);
 				if (rc != 0)
 					break;
 			}
 
 			rc = ui->activate(sc);
 			if (rc == 0) {
 				setbit(&sc->active_ulds, id);
 				ui->refcount++;
 			}
 			break;
 		}
 	}
 
 	sx_sunlock(&t4_uld_list_lock);
 
 	return (rc);
 }
 
 int
 t4_deactivate_uld(struct adapter *sc, int id)
 {
 	int rc;
 	struct uld_info *ui;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (id < 0 || id > ULD_MAX)
 		return (EINVAL);
 	rc = ENXIO;
 
 	sx_slock(&t4_uld_list_lock);
 
 	SLIST_FOREACH(ui, &t4_uld_list, link) {
 		if (ui->uld_id == id) {
 			rc = ui->deactivate(sc);
 			if (rc == 0) {
 				clrbit(&sc->active_ulds, id);
 				ui->refcount--;
 			}
 			break;
 		}
 	}
 
 	sx_sunlock(&t4_uld_list_lock);
 
 	return (rc);
 }
 
 int
 uld_active(struct adapter *sc, int uld_id)
 {
 
 	MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
 
 	return (isset(&sc->active_ulds, uld_id));
 }
 #endif
 
 /*
  * Come up with reasonable defaults for some of the tunables, provided they're
  * not set by the user (in which case we'll use the values as is).
  */
 static void
 tweak_tunables(void)
 {
 	int nc = mp_ncpus;	/* our snapshot of the number of CPUs */
 
 	if (t4_ntxq10g < 1) {
 #ifdef RSS
 		t4_ntxq10g = rss_getnumbuckets();
 #else
 		t4_ntxq10g = min(nc, NTXQ_10G);
 #endif
 	}
 
 	if (t4_ntxq1g < 1) {
 #ifdef RSS
 		/* XXX: way too many for 1GbE? */
 		t4_ntxq1g = rss_getnumbuckets();
 #else
 		t4_ntxq1g = min(nc, NTXQ_1G);
 #endif
 	}
 
 	if (t4_ntxq_vi < 1)
 		t4_ntxq_vi = min(nc, NTXQ_VI);
 
 	if (t4_nrxq10g < 1) {
 #ifdef RSS
 		t4_nrxq10g = rss_getnumbuckets();
 #else
 		t4_nrxq10g = min(nc, NRXQ_10G);
 #endif
 	}
 
 	if (t4_nrxq1g < 1) {
 #ifdef RSS
 		/* XXX: way too many for 1GbE? */
 		t4_nrxq1g = rss_getnumbuckets();
 #else
 		t4_nrxq1g = min(nc, NRXQ_1G);
 #endif
 	}
 
 	if (t4_nrxq_vi < 1)
 		t4_nrxq_vi = min(nc, NRXQ_VI);
 
 #ifdef TCP_OFFLOAD
 	if (t4_nofldtxq10g < 1)
 		t4_nofldtxq10g = min(nc, NOFLDTXQ_10G);
 
 	if (t4_nofldtxq1g < 1)
 		t4_nofldtxq1g = min(nc, NOFLDTXQ_1G);
 
 	if (t4_nofldtxq_vi < 1)
 		t4_nofldtxq_vi = min(nc, NOFLDTXQ_VI);
 
 	if (t4_nofldrxq10g < 1)
 		t4_nofldrxq10g = min(nc, NOFLDRXQ_10G);
 
 	if (t4_nofldrxq1g < 1)
 		t4_nofldrxq1g = min(nc, NOFLDRXQ_1G);
 
 	if (t4_nofldrxq_vi < 1)
 		t4_nofldrxq_vi = min(nc, NOFLDRXQ_VI);
 
 	if (t4_toecaps_allowed == -1)
 		t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
 
 	if (t4_rdmacaps_allowed == -1) {
 		t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP |
 		    FW_CAPS_CONFIG_RDMA_RDMAC;
 	}
 
 	if (t4_iscsicaps_allowed == -1) {
 		t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU |
 		    FW_CAPS_CONFIG_ISCSI_TARGET_PDU |
 		    FW_CAPS_CONFIG_ISCSI_T10DIF;
 	}
 #else
 	if (t4_toecaps_allowed == -1)
 		t4_toecaps_allowed = 0;
 
 	if (t4_rdmacaps_allowed == -1)
 		t4_rdmacaps_allowed = 0;
 
 	if (t4_iscsicaps_allowed == -1)
 		t4_iscsicaps_allowed = 0;
 #endif
 
 #ifdef DEV_NETMAP
 	if (t4_nnmtxq_vi < 1)
 		t4_nnmtxq_vi = min(nc, NNMTXQ_VI);
 
 	if (t4_nnmrxq_vi < 1)
 		t4_nnmrxq_vi = min(nc, NNMRXQ_VI);
 #endif
 
 	if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS)
 		t4_tmr_idx_10g = TMR_IDX_10G;
 
 	if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS)
 		t4_pktc_idx_10g = PKTC_IDX_10G;
 
 	if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS)
 		t4_tmr_idx_1g = TMR_IDX_1G;
 
 	if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS)
 		t4_pktc_idx_1g = PKTC_IDX_1G;
 
 	if (t4_qsize_txq < 128)
 		t4_qsize_txq = 128;
 
 	if (t4_qsize_rxq < 128)
 		t4_qsize_rxq = 128;
 	while (t4_qsize_rxq & 7)
 		t4_qsize_rxq++;
 
 	t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
 }
 
 #ifdef DDB
 static void
 t4_dump_tcb(struct adapter *sc, int tid)
 {
 	uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos;
 
 	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2);
 	save = t4_read_reg(sc, reg);
 	base = sc->memwin[2].mw_base;
 
 	/* Dump TCB for the tid */
 	tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
 	tcb_addr += tid * TCB_SIZE;
 
 	if (is_t4(sc)) {
 		pf = 0;
 		win_pos = tcb_addr & ~0xf;	/* start must be 16B aligned */
 	} else {
 		pf = V_PFNUM(sc->pf);
 		win_pos = tcb_addr & ~0x7f;	/* start must be 128B aligned */
 	}
 	t4_write_reg(sc, reg, win_pos | pf);
 	t4_read_reg(sc, reg);
 
 	off = tcb_addr - win_pos;
 	for (i = 0; i < 4; i++) {
 		uint32_t buf[8];
 		for (j = 0; j < 8; j++, off += 4)
 			buf[j] = htonl(t4_read_reg(sc, base + off));
 
 		db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n",
 		    buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6],
 		    buf[7]);
 	}
 
 	t4_write_reg(sc, reg, save);
 	t4_read_reg(sc, reg);
 }
 
 static void
 t4_dump_devlog(struct adapter *sc)
 {
 	struct devlog_params *dparams = &sc->params.devlog;
 	struct fw_devlog_e e;
 	int i, first, j, m, nentries, rc;
 	uint64_t ftstamp = UINT64_MAX;
 
 	if (dparams->start == 0) {
 		db_printf("devlog params not valid\n");
 		return;
 	}
 
 	nentries = dparams->size / sizeof(struct fw_devlog_e);
 	m = fwmtype_to_hwmtype(dparams->memtype);
 
 	/* Find the first entry. */
 	first = -1;
 	for (i = 0; i < nentries && !db_pager_quit; i++) {
 		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
 		    sizeof(e), (void *)&e);
 		if (rc != 0)
 			break;
 
 		if (e.timestamp == 0)
 			break;
 
 		e.timestamp = be64toh(e.timestamp);
 		if (e.timestamp < ftstamp) {
 			ftstamp = e.timestamp;
 			first = i;
 		}
 	}
 
 	if (first == -1)
 		return;
 
 	i = first;
 	do {
 		rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e),
 		    sizeof(e), (void *)&e);
 		if (rc != 0)
 			return;
 
 		if (e.timestamp == 0)
 			return;
 
 		e.timestamp = be64toh(e.timestamp);
 		e.seqno = be32toh(e.seqno);
 		for (j = 0; j < 8; j++)
 			e.params[j] = be32toh(e.params[j]);
 
 		db_printf("%10d  %15ju  %8s  %8s  ",
 		    e.seqno, e.timestamp,
 		    (e.level < nitems(devlog_level_strings) ?
 			devlog_level_strings[e.level] : "UNKNOWN"),
 		    (e.facility < nitems(devlog_facility_strings) ?
 			devlog_facility_strings[e.facility] : "UNKNOWN"));
 		db_printf(e.fmt, e.params[0], e.params[1], e.params[2],
 		    e.params[3], e.params[4], e.params[5], e.params[6],
 		    e.params[7]);
 
 		if (++i == nentries)
 			i = 0;
 	} while (i != first && !db_pager_quit);
 }
 
 static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table);
 _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table);
 
 DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL)
 {
 	device_t dev;
 	int t;
 	bool valid;
 
 	valid = false;
 	t = db_read_token();
 	if (t == tIDENT) {
 		dev = device_lookup_by_name(db_tok_string);
 		valid = true;
 	}
 	db_skip_to_eol();
 	if (!valid) {
 		db_printf("usage: show t4 devlog <nexus>\n");
 		return;
 	}
 
 	if (dev == NULL) {
 		db_printf("device not found\n");
 		return;
 	}
 
 	t4_dump_devlog(device_get_softc(dev));
 }
 
 DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
 {
 	device_t dev;
 	int radix, tid, t;
 	bool valid;
 
 	valid = false;
 	radix = db_radix;
 	db_radix = 10;
 	t = db_read_token();
 	if (t == tIDENT) {
 		dev = device_lookup_by_name(db_tok_string);
 		t = db_read_token();
 		if (t == tNUMBER) {
 			tid = db_tok_number;
 			valid = true;
 		}
 	}	
 	db_radix = radix;
 	db_skip_to_eol();
 	if (!valid) {
 		db_printf("usage: show t4 tcb <nexus> <tid>\n");
 		return;
 	}
 
 	if (dev == NULL) {
 		db_printf("device not found\n");
 		return;
 	}
 	if (tid < 0) {
 		db_printf("invalid tid\n");
 		return;
 	}
 
 	t4_dump_tcb(device_get_softc(dev), tid);
 }
 #endif
 
 static struct sx mlu;	/* mod load unload */
 SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
 
 static int
 mod_event(module_t mod, int cmd, void *arg)
 {
 	int rc = 0;
 	static int loaded = 0;
 
 	switch (cmd) {
 	case MOD_LOAD:
 		sx_xlock(&mlu);
 		if (loaded++ == 0) {
 			t4_sge_modload();
 			t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl);
 			t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl);
 			t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt);
 			t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt);
 			sx_init(&t4_list_lock, "T4/T5 adapters");
 			SLIST_INIT(&t4_list);
 #ifdef TCP_OFFLOAD
 			sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
 			SLIST_INIT(&t4_uld_list);
 #endif
 			t4_tracer_modload();
 			tweak_tunables();
 		}
 		sx_xunlock(&mlu);
 		break;
 
 	case MOD_UNLOAD:
 		sx_xlock(&mlu);
 		if (--loaded == 0) {
 			int tries;
 
 			sx_slock(&t4_list_lock);
 			if (!SLIST_EMPTY(&t4_list)) {
 				rc = EBUSY;
 				sx_sunlock(&t4_list_lock);
 				goto done_unload;
 			}
 #ifdef TCP_OFFLOAD
 			sx_slock(&t4_uld_list_lock);
 			if (!SLIST_EMPTY(&t4_uld_list)) {
 				rc = EBUSY;
 				sx_sunlock(&t4_uld_list_lock);
 				sx_sunlock(&t4_list_lock);
 				goto done_unload;
 			}
 #endif
 			tries = 0;
 			while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
 				uprintf("%ju clusters with custom free routine "
 				    "still is use.\n", t4_sge_extfree_refs());
 				pause("t4unload", 2 * hz);
 			}
 #ifdef TCP_OFFLOAD
 			sx_sunlock(&t4_uld_list_lock);
 #endif
 			sx_sunlock(&t4_list_lock);
 
 			if (t4_sge_extfree_refs() == 0) {
 				t4_tracer_modunload();
 #ifdef TCP_OFFLOAD
 				sx_destroy(&t4_uld_list_lock);
 #endif
 				sx_destroy(&t4_list_lock);
 				t4_sge_modunload();
 				loaded = 0;
 			} else {
 				rc = EBUSY;
 				loaded++;	/* undo earlier decrement */
 			}
 		}
 done_unload:
 		sx_xunlock(&mlu);
 		break;
 	}
 
 	return (rc);
 }
 
 static devclass_t t4_devclass, t5_devclass;
 static devclass_t cxgbe_devclass, cxl_devclass;
 static devclass_t vcxgbe_devclass, vcxl_devclass;
 
 DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
 MODULE_VERSION(t4nex, 1);
 MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
 #ifdef DEV_NETMAP
 MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
 #endif /* DEV_NETMAP */
 
 
 DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
 MODULE_VERSION(t5nex, 1);
 MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
 #ifdef DEV_NETMAP
 MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
 #endif /* DEV_NETMAP */
 
 DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
 MODULE_VERSION(cxgbe, 1);
 
 DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
 MODULE_VERSION(cxl, 1);
 
 DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
 MODULE_VERSION(vcxgbe, 1);
 
 DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
 MODULE_VERSION(vcxl, 1);
Index: user/alc/PQ_LAUNDRY/sys/dev/e1000/if_em.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/e1000/if_em.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/e1000/if_em.c	(revision 303642)
@@ -1,6235 +1,6236 @@
 /******************************************************************************
 
   Copyright (c) 2001-2015, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 #include "opt_em.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_device_polling.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #ifdef DDB
 #include <sys/types.h>
 #include <ddb/ddb.h>
 #endif
 #if __FreeBSD_version >= 800000
 #include <sys/buf_ring.h>
 #endif
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/eventhandler.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <machine/in_cksum.h>
 #include <dev/led/led.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include "e1000_api.h"
 #include "e1000_82571.h"
 #include "if_em.h"
 
 /*********************************************************************
  *  Driver version:
  *********************************************************************/
 char em_driver_version[] = "7.6.1-k";
 
 /*********************************************************************
  *  PCI Device ID Table
  *
  *  Used by probe to select devices to load on
  *  Last field stores an index into e1000_strings
  *  Last entry must be all 0s
  *
  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
  *********************************************************************/
 
 static em_vendor_info_t em_vendor_info_array[] =
 {
 	/* Intel(R) PRO/1000 Network Connection */
 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
 
 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM, PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V,  PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_LM2,
                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_SPT_I219_V2, PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_LBG_I219_LM3,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	/* required last entry */
 	{ 0, 0, 0, 0, 0}
 };
 
 /*********************************************************************
  *  Table of branding strings for all supported NICs.
  *********************************************************************/
 
 static char *em_strings[] = {
 	"Intel(R) PRO/1000 Network Connection"
 };
 
 /*********************************************************************
  *  Function prototypes
  *********************************************************************/
 static int	em_probe(device_t);
 static int	em_attach(device_t);
 static int	em_detach(device_t);
 static int	em_shutdown(device_t);
 static int	em_suspend(device_t);
 static int	em_resume(device_t);
 #ifdef EM_MULTIQUEUE
 static int	em_mq_start(if_t, struct mbuf *);
 static int	em_mq_start_locked(if_t,
 		    struct tx_ring *);
 static void	em_qflush(if_t);
 #else
 static void	em_start(if_t);
 static void	em_start_locked(if_t, struct tx_ring *);
 #endif
 static int	em_ioctl(if_t, u_long, caddr_t);
 static uint64_t	em_get_counter(if_t, ift_counter);
 static void	em_init(void *);
 static void	em_init_locked(struct adapter *);
 static void	em_stop(void *);
 static void	em_media_status(if_t, struct ifmediareq *);
 static int	em_media_change(if_t);
 static void	em_identify_hardware(struct adapter *);
 static int	em_allocate_pci_resources(struct adapter *);
 static int	em_allocate_legacy(struct adapter *);
 static int	em_allocate_msix(struct adapter *);
 static int	em_allocate_queues(struct adapter *);
 static int	em_setup_msix(struct adapter *);
 static void	em_free_pci_resources(struct adapter *);
 static void	em_local_timer(void *);
 static void	em_reset(struct adapter *);
 static int	em_setup_interface(device_t, struct adapter *);
 static void	em_flush_desc_rings(struct adapter *);
 
 static void	em_setup_transmit_structures(struct adapter *);
 static void	em_initialize_transmit_unit(struct adapter *);
 static int	em_allocate_transmit_buffers(struct tx_ring *);
 static void	em_free_transmit_structures(struct adapter *);
 static void	em_free_transmit_buffers(struct tx_ring *);
 
 static int	em_setup_receive_structures(struct adapter *);
 static int	em_allocate_receive_buffers(struct rx_ring *);
 static void	em_initialize_receive_unit(struct adapter *);
 static void	em_free_receive_structures(struct adapter *);
 static void	em_free_receive_buffers(struct rx_ring *);
 
 static void	em_enable_intr(struct adapter *);
 static void	em_disable_intr(struct adapter *);
 static void	em_update_stats_counters(struct adapter *);
 static void	em_add_hw_stats(struct adapter *adapter);
 static void	em_txeof(struct tx_ring *);
 static bool	em_rxeof(struct rx_ring *, int, int *);
 #ifndef __NO_STRICT_ALIGNMENT
 static int	em_fixup_rx(struct rx_ring *);
 #endif
 static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
 		    const struct em_rxbuffer *rxbuf);
 static void	em_receive_checksum(uint32_t status, struct mbuf *);
 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
 		    struct ip *, u32 *, u32 *);
 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
 		    struct tcphdr *, u32 *, u32 *);
 static void	em_set_promisc(struct adapter *);
 static void	em_disable_promisc(struct adapter *);
 static void	em_set_multi(struct adapter *);
 static void	em_update_link_status(struct adapter *);
 static void	em_refresh_mbufs(struct rx_ring *, int);
 static void	em_register_vlan(void *, if_t, u16);
 static void	em_unregister_vlan(void *, if_t, u16);
 static void	em_setup_vlan_hw_support(struct adapter *);
 static int	em_xmit(struct tx_ring *, struct mbuf **);
 static int	em_dma_malloc(struct adapter *, bus_size_t,
 		    struct em_dma_alloc *, int);
 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
 static void	em_print_nvm_info(struct adapter *);
 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
 static void	em_print_debug_info(struct adapter *);
 static int 	em_is_valid_ether_addr(u8 *);
 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
 		    const char *, struct em_int_delay_info *, int, int);
 /* Management and WOL Support */
 static void	em_init_manageability(struct adapter *);
 static void	em_release_manageability(struct adapter *);
 static void     em_get_hw_control(struct adapter *);
 static void     em_release_hw_control(struct adapter *);
 static void	em_get_wakeup(device_t);
 static void     em_enable_wakeup(device_t);
 static int	em_enable_phy_wakeup(struct adapter *);
 static void	em_led_func(void *, int);
 static void	em_disable_aspm(struct adapter *);
 
 static int	em_irq_fast(void *);
 
 /* MSIX handlers */
 static void	em_msix_tx(void *);
 static void	em_msix_rx(void *);
 static void	em_msix_link(void *);
 static void	em_handle_tx(void *context, int pending);
 static void	em_handle_rx(void *context, int pending);
 static void	em_handle_link(void *context, int pending);
 
 #ifdef EM_MULTIQUEUE
 static void	em_enable_vectors_82574(struct adapter *);
 #endif
 
 static void	em_set_sysctl_value(struct adapter *, const char *,
 		    const char *, int *, int);
 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
 
 static __inline void em_rx_discard(struct rx_ring *, int);
 
 #ifdef DEVICE_POLLING
 static poll_handler_t em_poll;
 #endif /* POLLING */
 
 /*********************************************************************
  *  FreeBSD Device Interface Entry Points
  *********************************************************************/
 
 static device_method_t em_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, em_probe),
 	DEVMETHOD(device_attach, em_attach),
 	DEVMETHOD(device_detach, em_detach),
 	DEVMETHOD(device_shutdown, em_shutdown),
 	DEVMETHOD(device_suspend, em_suspend),
 	DEVMETHOD(device_resume, em_resume),
 	DEVMETHOD_END
 };
 
 static driver_t em_driver = {
 	"em", em_methods, sizeof(struct adapter),
 };
 
 devclass_t em_devclass;
 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
 MODULE_DEPEND(em, pci, 1, 1, 1);
 MODULE_DEPEND(em, ether, 1, 1, 1);
 #ifdef DEV_NETMAP
 MODULE_DEPEND(em, netmap, 1, 1, 1);
 #endif /* DEV_NETMAP */
 
 /*********************************************************************
  *  Tunable default values.
  *********************************************************************/
 
 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
 #define M_TSO_LEN			66
 
 #define MAX_INTS_PER_SEC	8000
 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
 
 /* Allow common code without TSO */
 #ifndef CSUM_TSO
 #define CSUM_TSO	0
 #endif
 
 #define TSO_WORKAROUND	4
 
 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
 
 static int em_disable_crc_stripping = 0;
 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
 
 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
     0, "Default transmit interrupt delay in usecs");
 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
     0, "Default receive interrupt delay in usecs");
 
 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
     &em_tx_abs_int_delay_dflt, 0,
     "Default transmit interrupt delay limit in usecs");
 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
     &em_rx_abs_int_delay_dflt, 0,
     "Default receive interrupt delay limit in usecs");
 
 static int em_rxd = EM_DEFAULT_RXD;
 static int em_txd = EM_DEFAULT_TXD;
 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
     "Number of receive descriptors per queue");
 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
     "Number of transmit descriptors per queue");
 
 static int em_smart_pwr_down = FALSE;
 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
     0, "Set to true to leave smart power down enabled on newer adapters");
 
 /* Controls whether promiscuous also shows bad packets */
 static int em_debug_sbp = FALSE;
 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
     "Show bad packets in promiscuous mode");
 
 static int em_enable_msix = TRUE;
 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
     "Enable MSI-X interrupts");
 
 #ifdef EM_MULTIQUEUE
 static int em_num_queues = 1;
 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
 #endif
 
 /*
 ** Global variable to store last used CPU when binding queues
 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
 ** queue is bound to a cpu.
 */
 static int em_last_bind_cpu = -1;
 
 /* How many packets rxeof tries to clean at a time */
 static int em_rx_process_limit = 100;
 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
     &em_rx_process_limit, 0,
     "Maximum number of received packets to process "
     "at a time, -1 means unlimited");
 
 /* Energy efficient ethernet - default to OFF */
 static int eee_setting = 1;
 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
     "Enable Energy Efficient Ethernet");
 
 /* Global used in WOL setup with multiport cards */
 static int global_quad_port_a = 0;
 
 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
 #include <dev/netmap/if_em_netmap.h>
 #endif /* DEV_NETMAP */
 
 /*********************************************************************
  *  Device identification routine
  *
  *  em_probe determines if the driver should be loaded on
  *  adapter based on PCI vendor/device id of the adapter.
  *
  *  return BUS_PROBE_DEFAULT on success, positive on failure
  *********************************************************************/
 
 static int
 em_probe(device_t dev)
 {
 	char		adapter_name[60];
 	uint16_t	pci_vendor_id = 0;
 	uint16_t	pci_device_id = 0;
 	uint16_t	pci_subvendor_id = 0;
 	uint16_t	pci_subdevice_id = 0;
 	em_vendor_info_t *ent;
 
 	INIT_DEBUGOUT("em_probe: begin");
 
 	pci_vendor_id = pci_get_vendor(dev);
 	if (pci_vendor_id != EM_VENDOR_ID)
 		return (ENXIO);
 
 	pci_device_id = pci_get_device(dev);
 	pci_subvendor_id = pci_get_subvendor(dev);
 	pci_subdevice_id = pci_get_subdevice(dev);
 
 	ent = em_vendor_info_array;
 	while (ent->vendor_id != 0) {
 		if ((pci_vendor_id == ent->vendor_id) &&
 		    (pci_device_id == ent->device_id) &&
 
 		    ((pci_subvendor_id == ent->subvendor_id) ||
 		    (ent->subvendor_id == PCI_ANY_ID)) &&
 
 		    ((pci_subdevice_id == ent->subdevice_id) ||
 		    (ent->subdevice_id == PCI_ANY_ID))) {
 			sprintf(adapter_name, "%s %s",
 				em_strings[ent->index],
 				em_driver_version);
 			device_set_desc_copy(dev, adapter_name);
 			return (BUS_PROBE_DEFAULT);
 		}
 		ent++;
 	}
 
 	return (ENXIO);
 }
 
 /*********************************************************************
  *  Device initialization routine
  *
  *  The attach entry point is called when the driver is being loaded.
  *  This routine identifies the type of hardware, allocates all resources
  *  and initializes the hardware.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 em_attach(device_t dev)
 {
 	struct adapter	*adapter;
 	struct e1000_hw	*hw;
 	int		error = 0;
 
 	INIT_DEBUGOUT("em_attach: begin");
 
 	if (resource_disabled("em", device_get_unit(dev))) {
 		device_printf(dev, "Disabled by device hint\n");
 		return (ENXIO);
 	}
 
 	adapter = device_get_softc(dev);
 	adapter->dev = adapter->osdep.dev = dev;
 	hw = &adapter->hw;
 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
 
 	/* SYSCTL stuff */
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    em_sysctl_nvm_info, "I", "NVM Information");
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    em_sysctl_debug_info, "I", "Debug Information");
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    em_set_flowcntl, "I", "Flow Control");
 
 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
 
 	/* Determine hardware and mac info */
 	em_identify_hardware(adapter);
 
 	/* Setup PCI resources */
 	if (em_allocate_pci_resources(adapter)) {
 		device_printf(dev, "Allocation of PCI resources failed\n");
 		error = ENXIO;
 		goto err_pci;
 	}
 
 	/*
 	** For ICH8 and family we need to
 	** map the flash memory, and this
 	** must happen after the MAC is 
 	** identified
 	*/
 	if ((hw->mac.type == e1000_ich8lan) ||
 	    (hw->mac.type == e1000_ich9lan) ||
 	    (hw->mac.type == e1000_ich10lan) ||
 	    (hw->mac.type == e1000_pchlan) ||
 	    (hw->mac.type == e1000_pch2lan) ||
 	    (hw->mac.type == e1000_pch_lpt)) {
 		int rid = EM_BAR_TYPE_FLASH;
 		adapter->flash = bus_alloc_resource_any(dev,
 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
 		if (adapter->flash == NULL) {
 			device_printf(dev, "Mapping of Flash failed\n");
 			error = ENXIO;
 			goto err_pci;
 		}
 		/* This is used in the shared code */
 		hw->flash_address = (u8 *)adapter->flash;
 		adapter->osdep.flash_bus_space_tag =
 		    rman_get_bustag(adapter->flash);
 		adapter->osdep.flash_bus_space_handle =
 		    rman_get_bushandle(adapter->flash);
 	}
 	/*
 	** In the new SPT device flash is not  a
 	** separate BAR, rather it is also in BAR0,
 	** so use the same tag and an offset handle for the
 	** FLASH read/write macros in the shared code.
 	*/
 	else if (hw->mac.type == e1000_pch_spt) {
 		adapter->osdep.flash_bus_space_tag =
 		    adapter->osdep.mem_bus_space_tag;
 		adapter->osdep.flash_bus_space_handle =
 		    adapter->osdep.mem_bus_space_handle
 		    + E1000_FLASH_BASE_ADDR;
 	}
 
 	/* Do Shared Code initialization */
 	error = e1000_setup_init_funcs(hw, TRUE);
 	if (error) {
 		device_printf(dev, "Setup of Shared code failed, error %d\n",
 		    error);
 		error = ENXIO;
 		goto err_pci;
 	}
 
 	/*
 	 * Setup MSI/X or MSI if PCI Express
 	 */
 	adapter->msix = em_setup_msix(adapter);
 
 	e1000_get_bus_info(hw);
 
 	/* Set up some sysctls for the tunable interrupt delays */
 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
 	    "receive interrupt delay limit in usecs",
 	    &adapter->rx_abs_int_delay,
 	    E1000_REGISTER(hw, E1000_RADV),
 	    em_rx_abs_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
 	    "transmit interrupt delay limit in usecs",
 	    &adapter->tx_abs_int_delay,
 	    E1000_REGISTER(hw, E1000_TADV),
 	    em_tx_abs_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "itr",
 	    "interrupt delay limit in usecs/4",
 	    &adapter->tx_itr,
 	    E1000_REGISTER(hw, E1000_ITR),
 	    DEFAULT_ITR);
 
 	/* Sysctl for limiting the amount of work done in the taskqueue */
 	em_set_sysctl_value(adapter, "rx_processing_limit",
 	    "max number of rx packets to process", &adapter->rx_process_limit,
 	    em_rx_process_limit);
 
 	/*
 	 * Validate number of transmit and receive descriptors. It
 	 * must not exceed hardware maximum, and must be multiple
 	 * of E1000_DBA_ALIGN.
 	 */
 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
 		    EM_DEFAULT_TXD, em_txd);
 		adapter->num_tx_desc = EM_DEFAULT_TXD;
 	} else
 		adapter->num_tx_desc = em_txd;
 
 	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
 		    EM_DEFAULT_RXD, em_rxd);
 		adapter->num_rx_desc = EM_DEFAULT_RXD;
 	} else
 		adapter->num_rx_desc = em_rxd;
 
 	hw->mac.autoneg = DO_AUTO_NEG;
 	hw->phy.autoneg_wait_to_complete = FALSE;
 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
 
 	/* Copper options */
 	if (hw->phy.media_type == e1000_media_type_copper) {
 		hw->phy.mdix = AUTO_ALL_MODES;
 		hw->phy.disable_polarity_correction = FALSE;
 		hw->phy.ms_type = EM_MASTER_SLAVE;
 	}
 
 	/*
 	 * Set the frame limits assuming
 	 * standard ethernet sized frames.
 	 */
 	adapter->hw.mac.max_frame_size =
 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
 
 	/*
 	 * This controls when hardware reports transmit completion
 	 * status.
 	 */
 	hw->mac.report_tx_early = 1;
 
 	/* 
 	** Get queue/ring memory
 	*/
 	if (em_allocate_queues(adapter)) {
 		error = ENOMEM;
 		goto err_pci;
 	}
 
 	/* Allocate multicast array memory. */
 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
 	if (adapter->mta == NULL) {
 		device_printf(dev, "Can not allocate multicast setup array\n");
 		error = ENOMEM;
 		goto err_late;
 	}
 
 	/* Check SOL/IDER usage */
 	if (e1000_check_reset_block(hw))
 		device_printf(dev, "PHY reset is blocked"
 		    " due to SOL/IDER session.\n");
 
 	/* Sysctl for setting Energy Efficient Ethernet */
 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
 	    adapter, 0, em_sysctl_eee, "I",
 	    "Disable Energy Efficient Ethernet");
 
 	/*
 	** Start from a known state, this is
 	** important in reading the nvm and
 	** mac from that.
 	*/
 	e1000_reset_hw(hw);
 
 
 	/* Make sure we have a good EEPROM before we read from it */
 	if (e1000_validate_nvm_checksum(hw) < 0) {
 		/*
 		** Some PCI-E parts fail the first check due to
 		** the link being in sleep state, call it again,
 		** if it fails a second time its a real issue.
 		*/
 		if (e1000_validate_nvm_checksum(hw) < 0) {
 			device_printf(dev,
 			    "The EEPROM Checksum Is Not Valid\n");
 			error = EIO;
 			goto err_late;
 		}
 	}
 
 	/* Copy the permanent MAC address out of the EEPROM */
 	if (e1000_read_mac_addr(hw) < 0) {
 		device_printf(dev, "EEPROM read error while reading MAC"
 		    " address\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
 		device_printf(dev, "Invalid MAC address\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	/* Disable ULP support */
 	e1000_disable_ulp_lpt_lp(hw, TRUE);
 
 	/*
 	**  Do interrupt configuration
 	*/
 	if (adapter->msix > 1) /* Do MSIX */
 		error = em_allocate_msix(adapter);
 	else  /* MSI or Legacy */
 		error = em_allocate_legacy(adapter);
 	if (error)
 		goto err_late;
 
 	/*
 	 * Get Wake-on-Lan and Management info for later use
 	 */
 	em_get_wakeup(dev);
 
 	/* Setup OS specific network interface */
 	if (em_setup_interface(dev, adapter) != 0)
 		goto err_late;
 
 	em_reset(adapter);
 
 	/* Initialize statistics */
 	em_update_stats_counters(adapter);
 
 	hw->mac.get_link_status = 1;
 	em_update_link_status(adapter);
 
 	/* Register for VLAN events */
 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
 
 	em_add_hw_stats(adapter);
 
 	/* Non-AMT based hardware can now take control from firmware */
 	if (adapter->has_manage && !adapter->has_amt)
 		em_get_hw_control(adapter);
 
 	/* Tell the stack that the interface is not active */
 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 
 	adapter->led_dev = led_create(em_led_func, adapter,
 	    device_get_nameunit(dev));
 #ifdef DEV_NETMAP
 	em_netmap_attach(adapter);
 #endif /* DEV_NETMAP */
 
 	INIT_DEBUGOUT("em_attach: end");
 
 	return (0);
 
 err_late:
 	em_free_transmit_structures(adapter);
 	em_free_receive_structures(adapter);
 	em_release_hw_control(adapter);
 	if (adapter->ifp != (void *)NULL)
 		if_free(adapter->ifp);
 err_pci:
 	em_free_pci_resources(adapter);
 	free(adapter->mta, M_DEVBUF);
 	EM_CORE_LOCK_DESTROY(adapter);
 
 	return (error);
 }
 
 /*********************************************************************
  *  Device removal routine
  *
  *  The detach entry point is called when the driver is being removed.
  *  This routine stops the adapter and deallocates all the resources
  *  that were allocated for driver operation.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 em_detach(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	if_t ifp = adapter->ifp;
 
 	INIT_DEBUGOUT("em_detach: begin");
 
 	/* Make sure VLANS are not using driver */
 	if (if_vlantrunkinuse(ifp)) {
 		device_printf(dev,"Vlan in use, detach first\n");
 		return (EBUSY);
 	}
 
 #ifdef DEVICE_POLLING
 	if (if_getcapenable(ifp) & IFCAP_POLLING)
 		ether_poll_deregister(ifp);
 #endif
 
 	if (adapter->led_dev != NULL)
 		led_destroy(adapter->led_dev);
 
 	EM_CORE_LOCK(adapter);
 	adapter->in_detach = 1;
 	em_stop(adapter);
 	EM_CORE_UNLOCK(adapter);
 	EM_CORE_LOCK_DESTROY(adapter);
 
 	e1000_phy_hw_reset(&adapter->hw);
 
 	em_release_manageability(adapter);
 	em_release_hw_control(adapter);
 
 	/* Unregister VLAN events */
 	if (adapter->vlan_attach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
 	if (adapter->vlan_detach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
 
 	ether_ifdetach(adapter->ifp);
 	callout_drain(&adapter->timer);
 
 #ifdef DEV_NETMAP
 	netmap_detach(ifp);
 #endif /* DEV_NETMAP */
 
 	em_free_pci_resources(adapter);
 	bus_generic_detach(dev);
 	if_free(ifp);
 
 	em_free_transmit_structures(adapter);
 	em_free_receive_structures(adapter);
 
 	em_release_hw_control(adapter);
 	free(adapter->mta, M_DEVBUF);
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Shutdown entry point
  *
  **********************************************************************/
 
 static int
 em_shutdown(device_t dev)
 {
 	return em_suspend(dev);
 }
 
 /*
  * Suspend/resume device methods.
  */
 static int
 em_suspend(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 
 	EM_CORE_LOCK(adapter);
 
         em_release_manageability(adapter);
 	em_release_hw_control(adapter);
 	em_enable_wakeup(dev);
 
 	EM_CORE_UNLOCK(adapter);
 
 	return bus_generic_suspend(dev);
 }
 
 static int
 em_resume(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	struct tx_ring	*txr = adapter->tx_rings;
 	if_t ifp = adapter->ifp;
 
 	EM_CORE_LOCK(adapter);
 	if (adapter->hw.mac.type == e1000_pch2lan)
 		e1000_resume_workarounds_pchlan(&adapter->hw);
 	em_init_locked(adapter);
 	em_init_manageability(adapter);
 
 	if ((if_getflags(ifp) & IFF_UP) &&
 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
 			EM_TX_LOCK(txr);
 #ifdef EM_MULTIQUEUE
 			if (!drbr_empty(ifp, txr->br))
 				em_mq_start_locked(ifp, txr);
 #else
 			if (!if_sendq_empty(ifp))
 				em_start_locked(ifp, txr);
 #endif
 			EM_TX_UNLOCK(txr);
 		}
 	}
 	EM_CORE_UNLOCK(adapter);
 
 	return bus_generic_resume(dev);
 }
 
 
 #ifndef EM_MULTIQUEUE
 static void
 em_start_locked(if_t ifp, struct tx_ring *txr)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	struct mbuf	*m_head;
 
 	EM_TX_LOCK_ASSERT(txr);
 
 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return;
 
 	if (!adapter->link_active)
 		return;
 
 	while (!if_sendq_empty(ifp)) {
         	/* Call cleanup if number of TX descriptors low */
 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
 			em_txeof(txr);
 		if (txr->tx_avail < EM_MAX_SCATTER) {
 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
 			break;
 		}
 		m_head = if_dequeue(ifp);
 		if (m_head == NULL)
 			break;
 		/*
 		 *  Encapsulation can modify our pointer, and or make it
 		 *  NULL on failure.  In that event, we can't requeue.
 		 */
 		if (em_xmit(txr, &m_head)) {
 			if (m_head == NULL)
 				break;
 			if_sendq_prepend(ifp, m_head);
 			break;
 		}
 
 		/* Mark the queue as having work */
 		if (txr->busy == EM_TX_IDLE)
 			txr->busy = EM_TX_BUSY;
 
 		/* Send a copy of the frame to the BPF listener */
 		ETHER_BPF_MTAP(ifp, m_head);
 
 	}
 
 	return;
 }
 
 static void
 em_start(if_t ifp)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	struct tx_ring	*txr = adapter->tx_rings;
 
 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 		EM_TX_LOCK(txr);
 		em_start_locked(ifp, txr);
 		EM_TX_UNLOCK(txr);
 	}
 	return;
 }
 #else /* EM_MULTIQUEUE */
 /*********************************************************************
  *  Multiqueue Transmit routines 
  *
  *  em_mq_start is called by the stack to initiate a transmit.
  *  however, if busy the driver can queue the request rather
  *  than do an immediate send. It is this that is an advantage
  *  in this driver, rather than also having multiple tx queues.
  **********************************************************************/
 /*
 ** Multiqueue capable stack interface
 */
 static int
 em_mq_start(if_t ifp, struct mbuf *m)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	struct tx_ring	*txr = adapter->tx_rings;
 	unsigned int	i, error;
 
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		i = m->m_pkthdr.flowid % adapter->num_queues;
 	else
 		i = curcpu % adapter->num_queues;
 
 	txr = &adapter->tx_rings[i];
 
 	error = drbr_enqueue(ifp, txr->br, m);
 	if (error)
 		return (error);
 
 	if (EM_TX_TRYLOCK(txr)) {
 		em_mq_start_locked(ifp, txr);
 		EM_TX_UNLOCK(txr);
 	} else 
 		taskqueue_enqueue(txr->tq, &txr->tx_task);
 
 	return (0);
 }
 
 static int
 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
 {
 	struct adapter  *adapter = txr->adapter;
         struct mbuf     *next;
         int             err = 0, enq = 0;
 
 	EM_TX_LOCK_ASSERT(txr);
 
 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
 	    adapter->link_active == 0) {
 		return (ENETDOWN);
 	}
 
 	/* Process the queue */
 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
 		if ((err = em_xmit(txr, &next)) != 0) {
 			if (next == NULL) {
 				/* It was freed, move forward */
 				drbr_advance(ifp, txr->br);
 			} else {
 				/* 
 				 * Still have one left, it may not be
 				 * the same since the transmit function
 				 * may have changed it.
 				 */
 				drbr_putback(ifp, txr->br, next);
 			}
 			break;
 		}
 		drbr_advance(ifp, txr->br);
 		enq++;
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
 		if (next->m_flags & M_MCAST)
 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		ETHER_BPF_MTAP(ifp, next);
 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
                         break;
 	}
 
 	/* Mark the queue as having work */
 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
 		txr->busy = EM_TX_BUSY;
 
 	if (txr->tx_avail < EM_MAX_SCATTER)
 		em_txeof(txr);
 	if (txr->tx_avail < EM_MAX_SCATTER) {
 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
 	}
 	return (err);
 }
 
 /*
 ** Flush all ring buffers
 */
 static void
 em_qflush(if_t ifp)
 {
 	struct adapter  *adapter = if_getsoftc(ifp);
 	struct tx_ring  *txr = adapter->tx_rings;
 	struct mbuf     *m;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		EM_TX_LOCK(txr);
 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
 			m_freem(m);
 		EM_TX_UNLOCK(txr);
 	}
 	if_qflush(ifp);
 }
 #endif /* EM_MULTIQUEUE */
 
 /*********************************************************************
  *  Ioctl entry point
  *
  *  em_ioctl is called when the user wants to configure the
  *  interface.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
 em_ioctl(if_t ifp, u_long command, caddr_t data)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	struct ifreq	*ifr = (struct ifreq *)data;
 #if defined(INET) || defined(INET6)
 	struct ifaddr	*ifa = (struct ifaddr *)data;
 #endif
 	bool		avoid_reset = FALSE;
 	int		error = 0;
 
 	if (adapter->in_detach)
 		return (error);
 
 	switch (command) {
 	case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			avoid_reset = TRUE;
 #endif
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6)
 			avoid_reset = TRUE;
 #endif
 		/*
 		** Calling init results in link renegotiation,
 		** so we avoid doing it when possible.
 		*/
 		if (avoid_reset) {
 			if_setflagbits(ifp,IFF_UP,0);
 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
 				em_init(adapter);
 #ifdef INET
 			if (!(if_getflags(ifp) & IFF_NOARP))
 				arp_ifinit(ifp, ifa);
 #endif
 		} else
 			error = ether_ioctl(ifp, command, data);
 		break;
 	case SIOCSIFMTU:
 	    {
 		int max_frame_size;
 
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
 
 		EM_CORE_LOCK(adapter);
 		switch (adapter->hw.mac.type) {
 		case e1000_82571:
 		case e1000_82572:
 		case e1000_ich9lan:
 		case e1000_ich10lan:
 		case e1000_pch2lan:
 		case e1000_pch_lpt:
 		case e1000_pch_spt:
 		case e1000_82574:
 		case e1000_82583:
 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
 			max_frame_size = 9234;
 			break;
 		case e1000_pchlan:
 			max_frame_size = 4096;
 			break;
 			/* Adapters that do not support jumbo frames */
 		case e1000_ich8lan:
 			max_frame_size = ETHER_MAX_LEN;
 			break;
 		default:
 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
 		}
 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
 		    ETHER_CRC_LEN) {
 			EM_CORE_UNLOCK(adapter);
 			error = EINVAL;
 			break;
 		}
 
 		if_setmtu(ifp, ifr->ifr_mtu);
 		adapter->hw.mac.max_frame_size =
 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 			em_init_locked(adapter);
 		EM_CORE_UNLOCK(adapter);
 		break;
 	    }
 	case SIOCSIFFLAGS:
 		IOCTL_DEBUGOUT("ioctl rcv'd:\
 		    SIOCSIFFLAGS (Set Interface Flags)");
 		EM_CORE_LOCK(adapter);
 		if (if_getflags(ifp) & IFF_UP) {
 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 				if ((if_getflags(ifp) ^ adapter->if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					em_disable_promisc(adapter);
 					em_set_promisc(adapter);
 				}
 			} else
 				em_init_locked(adapter);
 		} else
 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 				em_stop(adapter);
 		adapter->if_flags = if_getflags(ifp);
 		EM_CORE_UNLOCK(adapter);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 			EM_CORE_LOCK(adapter);
 			em_disable_intr(adapter);
 			em_set_multi(adapter);
 #ifdef DEVICE_POLLING
 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
 #endif
 				em_enable_intr(adapter);
 			EM_CORE_UNLOCK(adapter);
 		}
 		break;
 	case SIOCSIFMEDIA:
 		/* Check SOL/IDER usage */
 		EM_CORE_LOCK(adapter);
 		if (e1000_check_reset_block(&adapter->hw)) {
 			EM_CORE_UNLOCK(adapter);
 			device_printf(adapter->dev, "Media change is"
 			    " blocked due to SOL/IDER session.\n");
 			break;
 		}
 		EM_CORE_UNLOCK(adapter);
 		/* falls thru */
 	case SIOCGIFMEDIA:
 		IOCTL_DEBUGOUT("ioctl rcv'd: \
 		    SIOCxIFMEDIA (Get/Set Interface Media)");
 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
 		break;
 	case SIOCSIFCAP:
 	    {
 		int mask, reinit;
 
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
 		reinit = 0;
 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
 #ifdef DEVICE_POLLING
 		if (mask & IFCAP_POLLING) {
 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
 				error = ether_poll_register(em_poll, ifp);
 				if (error)
 					return (error);
 				EM_CORE_LOCK(adapter);
 				em_disable_intr(adapter);
 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
 				EM_CORE_UNLOCK(adapter);
 			} else {
 				error = ether_poll_deregister(ifp);
 				/* Enable interrupt even in error case */
 				EM_CORE_LOCK(adapter);
 				em_enable_intr(adapter);
 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
 				EM_CORE_UNLOCK(adapter);
 			}
 		}
 #endif
 		if (mask & IFCAP_HWCSUM) {
 			if_togglecapenable(ifp,IFCAP_HWCSUM);
 			reinit = 1;
 		}
 		if (mask & IFCAP_TSO4) {
 			if_togglecapenable(ifp,IFCAP_TSO4);
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWTAGGING) {
 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWFILTER) {
 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWTSO) {
 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
 			reinit = 1;
 		}
 		if ((mask & IFCAP_WOL) &&
 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
 			if (mask & IFCAP_WOL_MCAST)
 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
 			if (mask & IFCAP_WOL_MAGIC)
 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
 		}
 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
 			em_init(adapter);
 		if_vlancap(ifp);
 		break;
 	    }
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 
 /*********************************************************************
  *  Init entry point
  *
  *  This routine is used in two ways. It is used by the stack as
  *  init entry point in network interface structure. It is also used
  *  by the driver as a hw/sw initialization routine to get to a
  *  consistent state.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static void
 em_init_locked(struct adapter *adapter)
 {
 	if_t ifp = adapter->ifp;
 	device_t	dev = adapter->dev;
 
 	INIT_DEBUGOUT("em_init: begin");
 
 	EM_CORE_LOCK_ASSERT(adapter);
 
 	em_disable_intr(adapter);
 	callout_stop(&adapter->timer);
 
 	/* Get the latest mac address, User can use a LAA */
         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
               ETHER_ADDR_LEN);
 
 	/* Put the address into the Receive Address Array */
 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
 
 	/*
 	 * With the 82571 adapter, RAR[0] may be overwritten
 	 * when the other port is reset, we make a duplicate
 	 * in RAR[14] for that eventuality, this assures
 	 * the interface continues to function.
 	 */
 	if (adapter->hw.mac.type == e1000_82571) {
 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
 		    E1000_RAR_ENTRIES - 1);
 	}
 
 	/* Initialize the hardware */
 	em_reset(adapter);
 	em_update_link_status(adapter);
 
 	/* Setup VLAN support, basic and offload if available */
 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
 
 	/* Set hardware offload abilities */
 	if_clearhwassist(ifp);
 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
 	/* 
 	** There have proven to be problems with TSO when not
 	** at full gigabit speed, so disable the assist automatically
 	** when at lower speeds.  -jfv
 	*/
 	if (if_getcapenable(ifp) & IFCAP_TSO4) {
 		if (adapter->link_speed == SPEED_1000)
 			if_sethwassistbits(ifp, CSUM_TSO, 0);
 	}
 
 	/* Configure for OS presence */
 	em_init_manageability(adapter);
 
 	/* Prepare transmit descriptors and buffers */
 	em_setup_transmit_structures(adapter);
 	em_initialize_transmit_unit(adapter);
 
 	/* Setup Multicast table */
 	em_set_multi(adapter);
 
 	/*
 	** Figure out the desired mbuf
 	** pool for doing jumbos
 	*/
 	if (adapter->hw.mac.max_frame_size <= 2048)
 		adapter->rx_mbuf_sz = MCLBYTES;
 	else if (adapter->hw.mac.max_frame_size <= 4096)
 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
 	else
 		adapter->rx_mbuf_sz = MJUM9BYTES;
 
 	/* Prepare receive descriptors and buffers */
 	if (em_setup_receive_structures(adapter)) {
 		device_printf(dev, "Could not setup receive structures\n");
 		em_stop(adapter);
 		return;
 	}
 	em_initialize_receive_unit(adapter);
 
 	/* Use real VLAN Filter support? */
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 			/* Use real VLAN Filter support */
 			em_setup_vlan_hw_support(adapter);
 		else {
 			u32 ctrl;
 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
 			ctrl |= E1000_CTRL_VME;
 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
 		}
 	}
 
 	/* Don't lose promiscuous settings */
 	em_set_promisc(adapter);
 
 	/* Set the interface as ACTIVE */
 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
 
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
 
 	/* MSI/X configuration for 82574 */
 	if (adapter->hw.mac.type == e1000_82574) {
 		int tmp;
 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 		tmp |= E1000_CTRL_EXT_PBA_CLR;
 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
 		/* Set the IVAR - interrupt vector routing. */
 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
 	}
 
 #ifdef DEVICE_POLLING
 	/*
 	 * Only enable interrupts if we are not polling, make sure
 	 * they are off otherwise.
 	 */
 	if (if_getcapenable(ifp) & IFCAP_POLLING)
 		em_disable_intr(adapter);
 	else
 #endif /* DEVICE_POLLING */
 		em_enable_intr(adapter);
 
 	/* AMT based hardware can now take control from firmware */
 	if (adapter->has_manage && adapter->has_amt)
 		em_get_hw_control(adapter);
 }
 
 static void
 em_init(void *arg)
 {
 	struct adapter *adapter = arg;
 
 	EM_CORE_LOCK(adapter);
 	em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 }
 
 
 #ifdef DEVICE_POLLING
 /*********************************************************************
  *
  *  Legacy polling routine: note this only works with single queue
  *
  *********************************************************************/
 static int
 em_poll(if_t ifp, enum poll_cmd cmd, int count)
 {
 	struct adapter *adapter = if_getsoftc(ifp);
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct rx_ring	*rxr = adapter->rx_rings;
 	u32		reg_icr;
 	int		rx_done;
 
 	EM_CORE_LOCK(adapter);
 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
 		EM_CORE_UNLOCK(adapter);
 		return (0);
 	}
 
 	if (cmd == POLL_AND_CHECK_STATUS) {
 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
 			callout_stop(&adapter->timer);
 			adapter->hw.mac.get_link_status = 1;
 			em_update_link_status(adapter);
 			callout_reset(&adapter->timer, hz,
 			    em_local_timer, adapter);
 		}
 	}
 	EM_CORE_UNLOCK(adapter);
 
 	em_rxeof(rxr, count, &rx_done);
 
 	EM_TX_LOCK(txr);
 	em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 	if (!drbr_empty(ifp, txr->br))
 		em_mq_start_locked(ifp, txr);
 #else
 	if (!if_sendq_empty(ifp))
 		em_start_locked(ifp, txr);
 #endif
 	EM_TX_UNLOCK(txr);
 
 	return (rx_done);
 }
 #endif /* DEVICE_POLLING */
 
 
 /*********************************************************************
  *
  *  Fast Legacy/MSI Combined Interrupt Service routine  
  *
  *********************************************************************/
 static int
 em_irq_fast(void *arg)
 {
 	struct adapter	*adapter = arg;
 	if_t ifp;
 	u32		reg_icr;
 
 	ifp = adapter->ifp;
 
 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 
 	/* Hot eject?  */
 	if (reg_icr == 0xffffffff)
 		return FILTER_STRAY;
 
 	/* Definitely not our interrupt.  */
 	if (reg_icr == 0x0)
 		return FILTER_STRAY;
 
 	/*
 	 * Starting with the 82571 chip, bit 31 should be used to
 	 * determine whether the interrupt belongs to us.
 	 */
 	if (adapter->hw.mac.type >= e1000_82571 &&
 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
 		return FILTER_STRAY;
 
 	em_disable_intr(adapter);
 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
 
 	/* Link status change */
 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
 		adapter->hw.mac.get_link_status = 1;
 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
 	}
 
 	if (reg_icr & E1000_ICR_RXO)
 		adapter->rx_overruns++;
 	return FILTER_HANDLED;
 }
 
 /* Combined RX/TX handler, used by Legacy and MSI */
 static void
 em_handle_que(void *context, int pending)
 {
 	struct adapter	*adapter = context;
 	if_t ifp = adapter->ifp;
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct rx_ring	*rxr = adapter->rx_rings;
 
 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 
 		EM_TX_LOCK(txr);
 		em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 		if (!drbr_empty(ifp, txr->br))
 			em_mq_start_locked(ifp, txr);
 #else
 		if (!if_sendq_empty(ifp))
 			em_start_locked(ifp, txr);
 #endif
 		EM_TX_UNLOCK(txr);
 		if (more) {
 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
 			return;
 		}
 	}
 
 	em_enable_intr(adapter);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  MSIX Interrupt Service Routines
  *
  **********************************************************************/
 static void
 em_msix_tx(void *arg)
 {
 	struct tx_ring *txr = arg;
 	struct adapter *adapter = txr->adapter;
 	if_t ifp = adapter->ifp;
 
 	++txr->tx_irq;
 	EM_TX_LOCK(txr);
 	em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 	if (!drbr_empty(ifp, txr->br))
 		em_mq_start_locked(ifp, txr);
 #else
 	if (!if_sendq_empty(ifp))
 		em_start_locked(ifp, txr);
 #endif
 
 	/* Reenable this interrupt */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
 	EM_TX_UNLOCK(txr);
 	return;
 }
 
 /*********************************************************************
  *
  *  MSIX RX Interrupt Service routine
  *
  **********************************************************************/
 
 static void
 em_msix_rx(void *arg)
 {
 	struct rx_ring	*rxr = arg;
 	struct adapter	*adapter = rxr->adapter;
 	bool		more;
 
 	++rxr->rx_irq;
 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
 		return;
 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 	if (more)
 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
 	else {
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
 	}
 	return;
 }
 
 /*********************************************************************
  *
  *  MSIX Link Fast Interrupt Service routine
  *
  **********************************************************************/
 static void
 em_msix_link(void *arg)
 {
 	struct adapter	*adapter = arg;
 	u32		reg_icr;
 
 	++adapter->link_irq;
 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 
 	if (reg_icr & E1000_ICR_RXO)
 		adapter->rx_overruns++;
 
 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
 		adapter->hw.mac.get_link_status = 1;
 		em_handle_link(adapter, 0);
 	} else
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
 		    EM_MSIX_LINK | E1000_IMS_LSC);
 	/*
  	** Because we must read the ICR for this interrupt
  	** it may clear other causes using autoclear, for
  	** this reason we simply create a soft interrupt
  	** for all these vectors.
  	*/
 	if (reg_icr) {
 		E1000_WRITE_REG(&adapter->hw,
 			E1000_ICS, adapter->ims);
 	}
 	return;
 }
 
 static void
 em_handle_rx(void *context, int pending)
 {
 	struct rx_ring	*rxr = context;
 	struct adapter	*adapter = rxr->adapter;
         bool            more;
 
 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 	if (more)
 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
 	else {
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
 	}
 }
 
 static void
 em_handle_tx(void *context, int pending)
 {
 	struct tx_ring	*txr = context;
 	struct adapter	*adapter = txr->adapter;
 	if_t ifp = adapter->ifp;
 
 	EM_TX_LOCK(txr);
 	em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 	if (!drbr_empty(ifp, txr->br))
 		em_mq_start_locked(ifp, txr);
 #else
 	if (!if_sendq_empty(ifp))
 		em_start_locked(ifp, txr);
 #endif
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
 	EM_TX_UNLOCK(txr);
 }
 
 static void
 em_handle_link(void *context, int pending)
 {
 	struct adapter	*adapter = context;
 	struct tx_ring	*txr = adapter->tx_rings;
 	if_t ifp = adapter->ifp;
 
 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
 		return;
 
 	EM_CORE_LOCK(adapter);
 	callout_stop(&adapter->timer);
 	em_update_link_status(adapter);
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
 	    EM_MSIX_LINK | E1000_IMS_LSC);
 	if (adapter->link_active) {
 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
 			EM_TX_LOCK(txr);
 #ifdef EM_MULTIQUEUE
 			if (!drbr_empty(ifp, txr->br))
 				em_mq_start_locked(ifp, txr);
 #else
 			if (if_sendq_empty(ifp))
 				em_start_locked(ifp, txr);
 #endif
 			EM_TX_UNLOCK(txr);
 		}
 	}
 	EM_CORE_UNLOCK(adapter);
 }
 
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called whenever the user queries the status of
  *  the interface using ifconfig.
  *
  **********************************************************************/
 static void
 em_media_status(if_t ifp, struct ifmediareq *ifmr)
 {
 	struct adapter *adapter = if_getsoftc(ifp);
 	u_char fiber_type = IFM_1000_SX;
 
 	INIT_DEBUGOUT("em_media_status: begin");
 
 	EM_CORE_LOCK(adapter);
 	em_update_link_status(adapter);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (!adapter->link_active) {
 		EM_CORE_UNLOCK(adapter);
 		return;
 	}
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 
 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
 		ifmr->ifm_active |= fiber_type | IFM_FDX;
 	} else {
 		switch (adapter->link_speed) {
 		case 10:
 			ifmr->ifm_active |= IFM_10_T;
 			break;
 		case 100:
 			ifmr->ifm_active |= IFM_100_TX;
 			break;
 		case 1000:
 			ifmr->ifm_active |= IFM_1000_T;
 			break;
 		}
 		if (adapter->link_duplex == FULL_DUPLEX)
 			ifmr->ifm_active |= IFM_FDX;
 		else
 			ifmr->ifm_active |= IFM_HDX;
 	}
 	EM_CORE_UNLOCK(adapter);
 }
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called when the user changes speed/duplex using
  *  media/mediopt option with ifconfig.
  *
  **********************************************************************/
 static int
 em_media_change(if_t ifp)
 {
 	struct adapter *adapter = if_getsoftc(ifp);
 	struct ifmedia  *ifm = &adapter->media;
 
 	INIT_DEBUGOUT("em_media_change: begin");
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	EM_CORE_LOCK(adapter);
 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
 	case IFM_AUTO:
 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
 		break;
 	case IFM_1000_LX:
 	case IFM_1000_SX:
 	case IFM_1000_T:
 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
 		break;
 	case IFM_100_TX:
 		adapter->hw.mac.autoneg = FALSE;
 		adapter->hw.phy.autoneg_advertised = 0;
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
 		else
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
 		break;
 	case IFM_10_T:
 		adapter->hw.mac.autoneg = FALSE;
 		adapter->hw.phy.autoneg_advertised = 0;
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
 		else
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
 		break;
 	default:
 		device_printf(adapter->dev, "Unsupported media type\n");
 	}
 
 	em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  This routine maps the mbufs to tx descriptors.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
 {
 	struct adapter		*adapter = txr->adapter;
 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
 	bus_dmamap_t		map;
 	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
 	struct e1000_tx_desc	*ctxd = NULL;
 	struct mbuf		*m_head;
 	struct ether_header	*eh;
 	struct ip		*ip = NULL;
 	struct tcphdr		*tp = NULL;
 	u32			txd_upper = 0, txd_lower = 0;
 	int			ip_off, poff;
 	int			nsegs, i, j, first, last = 0;
 	int			error;
 	bool			do_tso, tso_desc, remap = TRUE;
 
 	m_head = *m_headp;
 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
 	tso_desc = FALSE;
 	ip_off = poff = 0;
 
 	/*
 	 * Intel recommends entire IP/TCP header length reside in a single
 	 * buffer. If multiple descriptors are used to describe the IP and
 	 * TCP header, each descriptor should describe one or more
 	 * complete headers; descriptors referencing only parts of headers
 	 * are not supported. If all layer headers are not coalesced into
 	 * a single buffer, each buffer should not cross a 4KB boundary,
 	 * or be larger than the maximum read request size.
 	 * Controller also requires modifing IP/TCP header to make TSO work
 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
 	 * IP/TCP header into a single buffer to meet the requirement of
 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
 	 * which also has similar restrictions.
 	 */
 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
 		if (do_tso || (m_head->m_next != NULL && 
 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
 			if (M_WRITABLE(*m_headp) == 0) {
 				m_head = m_dup(*m_headp, M_NOWAIT);
 				m_freem(*m_headp);
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 				*m_headp = m_head;
 			}
 		}
 		/*
 		 * XXX
 		 * Assume IPv4, we don't have TSO/checksum offload support
 		 * for IPv6 yet.
 		 */
 		ip_off = sizeof(struct ether_header);
 		if (m_head->m_len < ip_off) {
 			m_head = m_pullup(m_head, ip_off);
 			if (m_head == NULL) {
 				*m_headp = NULL;
 				return (ENOBUFS);
 			}
 		}
 		eh = mtod(m_head, struct ether_header *);
 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 			ip_off = sizeof(struct ether_vlan_header);
 			if (m_head->m_len < ip_off) {
 				m_head = m_pullup(m_head, ip_off);
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 			}
 		}
 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
 			if (m_head == NULL) {
 				*m_headp = NULL;
 				return (ENOBUFS);
 			}
 		}
 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
 		poff = ip_off + (ip->ip_hl << 2);
 
 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
 				m_head = m_pullup(m_head, poff +
 				    sizeof(struct tcphdr));
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 			}
 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
 			/*
 			 * TSO workaround:
 			 *   pull 4 more bytes of data into it.
 			 */
 			if (m_head->m_len < poff + (tp->th_off << 2)) {
 				m_head = m_pullup(m_head, poff +
 				                 (tp->th_off << 2) +
 				                 TSO_WORKAROUND);
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 			}
 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
 			if (do_tso) {
 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
 				                  (ip->ip_hl << 2) +
 				                  (tp->th_off << 2));
 				ip->ip_sum = 0;
 				/*
 				 * The pseudo TCP checksum does not include TCP
 				 * payload length so driver should recompute
 				 * the checksum here what hardware expect to
 				 * see. This is adherence of Microsoft's Large
 				 * Send specification.
 			 	*/
 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 			}
 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
 				m_head = m_pullup(m_head, poff +
 				    sizeof(struct udphdr));
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 			}
 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
 		}
 		*m_headp = m_head;
 	}
 
 	/*
 	 * Map the packet for DMA
 	 *
 	 * Capture the first descriptor index,
 	 * this descriptor will have the index
 	 * of the EOP which is the only one that
 	 * now gets a DONE bit writeback.
 	 */
 	first = txr->next_avail_desc;
 	tx_buffer = &txr->tx_buffers[first];
 	tx_buffer_mapped = tx_buffer;
 	map = tx_buffer->map;
 
 retry:
 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
 
 	/*
 	 * There are two types of errors we can (try) to handle:
 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
 	 *   out of segments.  Defragment the mbuf chain and try again.
 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
 	 *   at this point in time.  Defer sending and try again later.
 	 * All other errors, in particular EINVAL, are fatal and prevent the
 	 * mbuf chain from ever going through.  Drop it and report error.
 	 */
 	if (error == EFBIG && remap) {
 		struct mbuf *m;
 
 		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
 		if (m == NULL) {
 			adapter->mbuf_defrag_failed++;
 			m_freem(*m_headp);
 			*m_headp = NULL;
 			return (ENOBUFS);
 		}
 		*m_headp = m;
 
 		/* Try it again, but only once */
 		remap = FALSE;
 		goto retry;
 	} else if (error != 0) {
 		adapter->no_tx_dma_setup++;
 		m_freem(*m_headp);
 		*m_headp = NULL;
 		return (error);
 	}
 
 	/*
 	 * TSO Hardware workaround, if this packet is not
 	 * TSO, and is only a single descriptor long, and
 	 * it follows a TSO burst, then we need to add a
 	 * sentinel descriptor to prevent premature writeback.
 	 */
 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
 		if (nsegs == 1)
 			tso_desc = TRUE;
 		txr->tx_tso = FALSE;
 	}
 
         if (txr->tx_avail < (nsegs + EM_MAX_SCATTER)) {
                 txr->no_desc_avail++;
 		bus_dmamap_unload(txr->txtag, map);
 		return (ENOBUFS);
         }
 	m_head = *m_headp;
 
 	/* Do hardware assists */
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 		em_tso_setup(txr, m_head, ip_off, ip, tp,
 		    &txd_upper, &txd_lower);
 		/* we need to make a final sentinel transmit desc */
 		tso_desc = TRUE;
 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
 		em_transmit_checksum_setup(txr, m_head,
 		    ip_off, ip, &txd_upper, &txd_lower);
 
 	if (m_head->m_flags & M_VLANTAG) {
 		/* Set the vlan id. */
 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
                 /* Tell hardware to add tag */
                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
         }
 
 	i = txr->next_avail_desc;
 
 	/* Set up our transmit descriptors */
 	for (j = 0; j < nsegs; j++) {
 		bus_size_t seg_len;
 		bus_addr_t seg_addr;
 
 		tx_buffer = &txr->tx_buffers[i];
 		ctxd = &txr->tx_base[i];
 		seg_addr = segs[j].ds_addr;
 		seg_len  = segs[j].ds_len;
 		/*
 		** TSO Workaround:
 		** If this is the last descriptor, we want to
 		** split it so we have a small final sentinel
 		*/
 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
 			seg_len -= TSO_WORKAROUND;
 			ctxd->buffer_addr = htole64(seg_addr);
 			ctxd->lower.data = htole32(
 				adapter->txd_cmd | txd_lower | seg_len);
 			ctxd->upper.data = htole32(txd_upper);
 			if (++i == adapter->num_tx_desc)
 				i = 0;
 
 			/* Now make the sentinel */	
 			txr->tx_avail--;
 			ctxd = &txr->tx_base[i];
 			tx_buffer = &txr->tx_buffers[i];
 			ctxd->buffer_addr =
 			    htole64(seg_addr + seg_len);
 			ctxd->lower.data = htole32(
 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
 			ctxd->upper.data =
 			    htole32(txd_upper);
 			last = i;
 			if (++i == adapter->num_tx_desc)
 				i = 0;
 		} else {
 			ctxd->buffer_addr = htole64(seg_addr);
 			ctxd->lower.data = htole32(
 			adapter->txd_cmd | txd_lower | seg_len);
 			ctxd->upper.data = htole32(txd_upper);
 			last = i;
 			if (++i == adapter->num_tx_desc)
 				i = 0;
 		}
 		tx_buffer->m_head = NULL;
 		tx_buffer->next_eop = -1;
 	}
 
 	txr->next_avail_desc = i;
 	txr->tx_avail -= nsegs;
 
         tx_buffer->m_head = m_head;
 	/*
 	** Here we swap the map so the last descriptor,
 	** which gets the completion interrupt has the
 	** real map, and the first descriptor gets the
 	** unused map from this descriptor.
 	*/
 	tx_buffer_mapped->map = tx_buffer->map;
 	tx_buffer->map = map;
         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
 
         /*
          * Last Descriptor of Packet
 	 * needs End Of Packet (EOP)
 	 * and Report Status (RS)
          */
         ctxd->lower.data |=
 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
 	/*
 	 * Keep track in the first buffer which
 	 * descriptor will be written back
 	 */
 	tx_buffer = &txr->tx_buffers[first];
 	tx_buffer->next_eop = last;
 
 	/*
 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
 	 * that this frame is available to transmit.
 	 */
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
 
 	return (0);
 }
 
 static void
 em_set_promisc(struct adapter *adapter)
 {
 	if_t ifp = adapter->ifp;
 	u32		reg_rctl;
 
 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 
 	if (if_getflags(ifp) & IFF_PROMISC) {
 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
 		/* Turn this on if you want to see bad packets */
 		if (em_debug_sbp)
 			reg_rctl |= E1000_RCTL_SBP;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
 		reg_rctl |= E1000_RCTL_MPE;
 		reg_rctl &= ~E1000_RCTL_UPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	}
 }
 
 static void
 em_disable_promisc(struct adapter *adapter)
 {
 	if_t		ifp = adapter->ifp;
 	u32		reg_rctl;
 	int		mcnt = 0;
 
 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 	reg_rctl &=  (~E1000_RCTL_UPE);
 	if (if_getflags(ifp) & IFF_ALLMULTI)
 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
 	else
 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
 	/* Don't disable if in MAX groups */
 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
 		reg_rctl &=  (~E1000_RCTL_MPE);
 	reg_rctl &=  (~E1000_RCTL_SBP);
 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 }
 
 
 /*********************************************************************
  *  Multicast Update
  *
  *  This routine is called whenever multicast address list is updated.
  *
  **********************************************************************/
 
 static void
 em_set_multi(struct adapter *adapter)
 {
 	if_t ifp = adapter->ifp;
 	u32 reg_rctl = 0;
 	u8  *mta; /* Multicast array memory */
 	int mcnt = 0;
 
 	IOCTL_DEBUGOUT("em_set_multi: begin");
 
 	mta = adapter->mta;
 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
 
 	if (adapter->hw.mac.type == e1000_82542 && 
 	    adapter->hw.revision_id == E1000_REVISION_2) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
 			e1000_pci_clear_mwi(&adapter->hw);
 		reg_rctl |= E1000_RCTL_RST;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 		msec_delay(5);
 	}
 
 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
 
 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		reg_rctl |= E1000_RCTL_MPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	} else
 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
 
 	if (adapter->hw.mac.type == e1000_82542 && 
 	    adapter->hw.revision_id == E1000_REVISION_2) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		reg_rctl &= ~E1000_RCTL_RST;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 		msec_delay(5);
 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
 			e1000_pci_set_mwi(&adapter->hw);
 	}
 }
 
 
 /*********************************************************************
  *  Timer routine
  *
  *  This routine checks for link status and updates statistics.
  *
  **********************************************************************/
 
 static void
 em_local_timer(void *arg)
 {
 	struct adapter	*adapter = arg;
 	if_t ifp = adapter->ifp;
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct rx_ring	*rxr = adapter->rx_rings;
 	u32		trigger = 0;
 
 	EM_CORE_LOCK_ASSERT(adapter);
 
 	em_update_link_status(adapter);
 	em_update_stats_counters(adapter);
 
 	/* Reset LAA into RAR[0] on 82571 */
 	if ((adapter->hw.mac.type == e1000_82571) &&
 	    e1000_get_laa_state_82571(&adapter->hw))
 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
 
 	/* Mask to use in the irq trigger */
 	if (adapter->msix_mem) {
 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
 			trigger |= rxr->ims;
 		rxr = adapter->rx_rings;
 	} else
 		trigger = E1000_ICS_RXDMT0;
 
 	/*
 	** Check on the state of the TX queue(s), this 
 	** can be done without the lock because its RO
 	** and the HUNG state will be static if set.
 	*/
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		if (txr->busy == EM_TX_HUNG)
 			goto hung;
 		if (txr->busy >= EM_TX_MAXTRIES)
 			txr->busy = EM_TX_HUNG;
 		/* Schedule a TX tasklet if needed */
 		if (txr->tx_avail <= EM_MAX_SCATTER)
 			taskqueue_enqueue(txr->tq, &txr->tx_task);
 	}
 	
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 #ifndef DEVICE_POLLING
 	/* Trigger an RX interrupt to guarantee mbuf refresh */
 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
 #endif
 	return;
 hung:
 	/* Looks like we're hung */
 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
 			txr->me);
 	em_print_debug_info(adapter);
 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 	adapter->watchdog_events++;
 	em_init_locked(adapter);
 }
 
 
 static void
 em_update_link_status(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	if_t ifp = adapter->ifp;
 	device_t dev = adapter->dev;
 	struct tx_ring *txr = adapter->tx_rings;
 	u32 link_check = 0;
 
 	/* Get the cached link value or read phy for real */
 	switch (hw->phy.media_type) {
 	case e1000_media_type_copper:
 		if (hw->mac.get_link_status) {
 			if (hw->mac.type == e1000_pch_spt)
 				msec_delay(50);
 			/* Do the work to read phy */
 			e1000_check_for_link(hw);
 			link_check = !hw->mac.get_link_status;
 			if (link_check) /* ESB2 fix */
 				e1000_cfg_on_link_up(hw);
 		} else
 			link_check = TRUE;
 		break;
 	case e1000_media_type_fiber:
 		e1000_check_for_link(hw);
 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
                                  E1000_STATUS_LU);
 		break;
 	case e1000_media_type_internal_serdes:
 		e1000_check_for_link(hw);
 		link_check = adapter->hw.mac.serdes_has_link;
 		break;
 	default:
 	case e1000_media_type_unknown:
 		break;
 	}
 
 	/* Now check for a transition */
 	if (link_check && (adapter->link_active == 0)) {
 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
 		    &adapter->link_duplex);
 		/* Check if we must disable SPEED_MODE bit on PCI-E */
 		if ((adapter->link_speed != SPEED_1000) &&
 		    ((hw->mac.type == e1000_82571) ||
 		    (hw->mac.type == e1000_82572))) {
 			int tarc0;
 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
 			tarc0 &= ~TARC_SPEED_MODE_BIT;
 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
 		}
 		if (bootverbose)
 			device_printf(dev, "Link is up %d Mbps %s\n",
 			    adapter->link_speed,
 			    ((adapter->link_duplex == FULL_DUPLEX) ?
 			    "Full Duplex" : "Half Duplex"));
 		adapter->link_active = 1;
 		adapter->smartspeed = 0;
 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else if (!link_check && (adapter->link_active == 1)) {
 		if_setbaudrate(ifp, 0);
 		adapter->link_speed = 0;
 		adapter->link_duplex = 0;
 		if (bootverbose)
 			device_printf(dev, "Link is Down\n");
 		adapter->link_active = 0;
 		/* Link down, disable hang detection */
 		for (int i = 0; i < adapter->num_queues; i++, txr++)
 			txr->busy = EM_TX_IDLE;
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 	}
 }
 
 /*********************************************************************
  *
  *  This routine disables all traffic on the adapter by issuing a
  *  global reset on the MAC and deallocates TX/RX buffers.
  *
  *  This routine should always be called with BOTH the CORE
  *  and TX locks.
  **********************************************************************/
 
 static void
 em_stop(void *arg)
 {
 	struct adapter	*adapter = arg;
 	if_t ifp = adapter->ifp;
 	struct tx_ring	*txr = adapter->tx_rings;
 
 	EM_CORE_LOCK_ASSERT(adapter);
 
 	INIT_DEBUGOUT("em_stop: begin");
 
 	em_disable_intr(adapter);
 	callout_stop(&adapter->timer);
 
 	/* Tell the stack that the interface is no longer active */
 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 
         /* Disarm Hang Detection. */
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		EM_TX_LOCK(txr);
 		txr->busy = EM_TX_IDLE;
 		EM_TX_UNLOCK(txr);
 	}
 
 	/* I219 needs some special flushing to avoid hangs */
 	if (adapter->hw.mac.type == e1000_pch_spt)
 		em_flush_desc_rings(adapter);
 
 	e1000_reset_hw(&adapter->hw);
 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
 
 	e1000_led_off(&adapter->hw);
 	e1000_cleanup_led(&adapter->hw);
 }
 
 
 /*********************************************************************
  *
  *  Determine hardware revision.
  *
  **********************************************************************/
 static void
 em_identify_hardware(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	/* Make sure our PCI config space has the necessary stuff set */
 	pci_enable_busmaster(dev);
 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
 
 	/* Save off the information about this board */
 	adapter->hw.vendor_id = pci_get_vendor(dev);
 	adapter->hw.device_id = pci_get_device(dev);
 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
 	adapter->hw.subsystem_vendor_id =
 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
 	adapter->hw.subsystem_device_id =
 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
 
 	/* Do Shared Code Init and Setup */
 	if (e1000_set_mac_type(&adapter->hw)) {
 		device_printf(dev, "Setup init failure\n");
 		return;
 	}
 }
 
 static int
 em_allocate_pci_resources(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	int		rid;
 
 	rid = PCIR_BAR(0);
 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &rid, RF_ACTIVE);
 	if (adapter->memory == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: memory\n");
 		return (ENXIO);
 	}
 	adapter->osdep.mem_bus_space_tag =
 	    rman_get_bustag(adapter->memory);
 	adapter->osdep.mem_bus_space_handle =
 	    rman_get_bushandle(adapter->memory);
 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
 
 	adapter->hw.back = &adapter->osdep;
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Setup the Legacy or MSI Interrupt handler
  *
  **********************************************************************/
 int
 em_allocate_legacy(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	struct tx_ring	*txr = adapter->tx_rings;
 	int error, rid = 0;
 
 	/* Manually turn off all interrupts */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 
 	if (adapter->msix == 1) /* using MSI */
 		rid = 1;
 	/* We allocate a single interrupt resource */
 	adapter->res = bus_alloc_resource_any(dev,
 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (adapter->res == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: "
 		    "interrupt\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Allocate a fast interrupt and the associated
 	 * deferred processing contexts.
 	 */
 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
 	    taskqueue_thread_enqueue, &adapter->tq);
 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
 	    device_get_nameunit(adapter->dev));
 	/* Use a TX only tasklet for local timer */
 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
 	    taskqueue_thread_enqueue, &txr->tq);
 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
 	    device_get_nameunit(adapter->dev));
 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
 		device_printf(dev, "Failed to register fast interrupt "
 			    "handler: %d\n", error);
 		taskqueue_free(adapter->tq);
 		adapter->tq = NULL;
 		return (error);
 	}
 	
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Setup the MSIX Interrupt handlers
  *   This is not really Multiqueue, rather
  *   its just separate interrupt vectors
  *   for TX, RX, and Link.
  *
  **********************************************************************/
 int
 em_allocate_msix(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct		tx_ring *txr = adapter->tx_rings;
 	struct		rx_ring *rxr = adapter->rx_rings;
 	int		error, rid, vector = 0;
 	int		cpu_id = 0;
 
 
 	/* Make sure all interrupts are disabled */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 
 	/* First set up ring resources */
 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
 
 		/* RX ring */
 		rid = vector + 1;
 
 		rxr->res = bus_alloc_resource_any(dev,
 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
 		if (rxr->res == NULL) {
 			device_printf(dev,
 			    "Unable to allocate bus resource: "
 			    "RX MSIX Interrupt %d\n", i);
 			return (ENXIO);
 		}
 		if ((error = bus_setup_intr(dev, rxr->res,
 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
 		    rxr, &rxr->tag)) != 0) {
 			device_printf(dev, "Failed to register RX handler");
 			return (error);
 		}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
 #endif
 		rxr->msix = vector;
 
 		if (em_last_bind_cpu < 0)
 			em_last_bind_cpu = CPU_FIRST();
 		cpu_id = em_last_bind_cpu;
 		bus_bind_intr(dev, rxr->res, cpu_id);
 
 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &rxr->tq);
 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
 		    device_get_nameunit(adapter->dev), cpu_id);
 		/*
 		** Set the bit to enable interrupt
 		** in E1000_IMS -- bits 20 and 21
 		** are for RX0 and RX1, note this has
 		** NOTHING to do with the MSIX vector
 		*/
 		rxr->ims = 1 << (20 + i);
 		adapter->ims |= rxr->ims;
 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
 
 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
 	}
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
 		/* TX ring */
 		rid = vector + 1;
 		txr->res = bus_alloc_resource_any(dev,
 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
 		if (txr->res == NULL) {
 			device_printf(dev,
 			    "Unable to allocate bus resource: "
 			    "TX MSIX Interrupt %d\n", i);
 			return (ENXIO);
 		}
 		if ((error = bus_setup_intr(dev, txr->res,
 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
 		    txr, &txr->tag)) != 0) {
 			device_printf(dev, "Failed to register TX handler");
 			return (error);
 		}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
 #endif
 		txr->msix = vector;
 
                 if (em_last_bind_cpu < 0)
                         em_last_bind_cpu = CPU_FIRST();
                 cpu_id = em_last_bind_cpu;
                 bus_bind_intr(dev, txr->res, cpu_id);
 
 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &txr->tq);
 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
 		    device_get_nameunit(adapter->dev), cpu_id);
 		/*
 		** Set the bit to enable interrupt
 		** in E1000_IMS -- bits 22 and 23
 		** are for TX0 and TX1, note this has
 		** NOTHING to do with the MSIX vector
 		*/
 		txr->ims = 1 << (22 + i);
 		adapter->ims |= txr->ims;
 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
 
 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
 	}
 
 	/* Link interrupt */
 	rid = vector + 1;
 	adapter->res = bus_alloc_resource_any(dev,
 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (!adapter->res) {
 		device_printf(dev,"Unable to allocate "
 		    "bus resource: Link interrupt [%d]\n", rid);
 		return (ENXIO);
         }
 	/* Set the link handler function */
 	error = bus_setup_intr(dev, adapter->res,
 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 	    em_msix_link, adapter, &adapter->tag);
 	if (error) {
 		adapter->res = NULL;
 		device_printf(dev, "Failed to register LINK handler");
 		return (error);
 	}
 #if __FreeBSD_version >= 800504
 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
 #endif
 	adapter->linkvec = vector;
 	adapter->ivars |=  (8 | vector) << 16;
 	adapter->ivars |= 0x80000000;
 
 	return (0);
 }
 
 
 static void
 em_free_pci_resources(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct tx_ring	*txr;
 	struct rx_ring	*rxr;
 	int		rid;
 
 
 	/*
 	** Release all the queue interrupt resources:
 	*/
 	for (int i = 0; i < adapter->num_queues; i++) {
 		txr = &adapter->tx_rings[i];
 		/* an early abort? */
 		if (txr == NULL)
 			break;
 		rid = txr->msix +1;
 		if (txr->tag != NULL) {
 			bus_teardown_intr(dev, txr->res, txr->tag);
 			txr->tag = NULL;
 		}
 		if (txr->res != NULL)
 			bus_release_resource(dev, SYS_RES_IRQ,
 			    rid, txr->res);
 
 		rxr = &adapter->rx_rings[i];
 		/* an early abort? */
 		if (rxr == NULL)
 			break;
 		rid = rxr->msix +1;
 		if (rxr->tag != NULL) {
 			bus_teardown_intr(dev, rxr->res, rxr->tag);
 			rxr->tag = NULL;
 		}
 		if (rxr->res != NULL)
 			bus_release_resource(dev, SYS_RES_IRQ,
 			    rid, rxr->res);
 	}
 
         if (adapter->linkvec) /* we are doing MSIX */
                 rid = adapter->linkvec + 1;
         else
                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
 
 	if (adapter->tag != NULL) {
 		bus_teardown_intr(dev, adapter->res, adapter->tag);
 		adapter->tag = NULL;
 	}
 
 	if (adapter->res != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
 
 
 	if (adapter->msix)
 		pci_release_msi(dev);
 
 	if (adapter->msix_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
 
 	if (adapter->memory != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(0), adapter->memory);
 
 	if (adapter->flash != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    EM_FLASH, adapter->flash);
 }
 
 /*
  * Setup MSI or MSI/X
  */
 static int
 em_setup_msix(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	int val;
 
 	/* Nearly always going to use one queue */
 	adapter->num_queues = 1;
 
 	/*
 	** Try using MSI-X for Hartwell adapters
 	*/
 	if ((adapter->hw.mac.type == e1000_82574) &&
 	    (em_enable_msix == TRUE)) {
 #ifdef EM_MULTIQUEUE
 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
 		if (adapter->num_queues > 1)
 			em_enable_vectors_82574(adapter);
 #endif
 		/* Map the MSIX BAR */
 		int rid = PCIR_BAR(EM_MSIX_BAR);
 		adapter->msix_mem = bus_alloc_resource_any(dev,
 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
        		if (adapter->msix_mem == NULL) {
 			/* May not be enabled */
                		device_printf(adapter->dev,
 			    "Unable to map MSIX table \n");
 			goto msi;
        		}
 		val = pci_msix_count(dev); 
 
 #ifdef EM_MULTIQUEUE
 		/* We need 5 vectors in the multiqueue case */
 		if (adapter->num_queues > 1 ) {
 			if (val >= 5)
 				val = 5;
 			else {
 				adapter->num_queues = 1;
 				device_printf(adapter->dev,
 				    "Insufficient MSIX vectors for >1 queue, "
 				    "using single queue...\n");
 				goto msix_one;
 			}
 		} else {
 msix_one:
 #endif
 			if (val >= 3)
 				val = 3;
 			else {
 				device_printf(adapter->dev,
 			    	"Insufficient MSIX vectors, using MSI\n");
 				goto msi;
 			}
 #ifdef EM_MULTIQUEUE
 		}
 #endif
 
 		if ((pci_alloc_msix(dev, &val) == 0)) {
 			device_printf(adapter->dev,
 			    "Using MSIX interrupts "
 			    "with %d vectors\n", val);
 			return (val);
 		}
 
 		/*
 		** If MSIX alloc failed or provided us with
 		** less than needed, free and fall through to MSI
 		*/
 		pci_release_msi(dev);
 	}
 msi:
 	if (adapter->msix_mem != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
 		adapter->msix_mem = NULL;
 	}
        	val = 1;
        	if (pci_alloc_msi(dev, &val) == 0) {
                	device_printf(adapter->dev, "Using an MSI interrupt\n");
 		return (val);
 	} 
 	/* Should only happen due to manual configuration */
 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
 	return (0);
 }
 
 
 /*
 ** The 3 following flush routines are used as a workaround in the
 ** I219 client parts and only for them.
 **
 ** em_flush_tx_ring - remove all descriptors from the tx_ring
 **
 ** We want to clear all pending descriptors from the TX ring.
 ** zeroing happens when the HW reads the regs. We  assign the ring itself as
 ** the data of the next descriptor. We don't care about the data we are about
 ** to reset the HW.
 */
 static void
 em_flush_tx_ring(struct adapter *adapter)
 {
 	struct e1000_hw		*hw = &adapter->hw;
 	struct tx_ring		*txr = adapter->tx_rings;
 	struct e1000_tx_desc	*txd;
 	u32			tctl, txd_lower = E1000_TXD_CMD_IFCS;
 	u16			size = 512;
 
 	tctl = E1000_READ_REG(hw, E1000_TCTL);
 	E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN);
 
 	txd = &txr->tx_base[txr->next_avail_desc++];
 	if (txr->next_avail_desc == adapter->num_tx_desc)
 		txr->next_avail_desc = 0;
 
 	/* Just use the ring as a dummy buffer addr */
 	txd->buffer_addr = txr->txdma.dma_paddr;
 	txd->lower.data = htole32(txd_lower | size);
 	txd->upper.data = 0;
 
 	/* flush descriptors to memory before notifying the HW */
 	wmb();
 
 	E1000_WRITE_REG(hw, E1000_TDT(0), txr->next_avail_desc);
 	mb();
 	usec_delay(250);
 }
 
 /*
 ** em_flush_rx_ring - remove all descriptors from the rx_ring
 **
 ** Mark all descriptors in the RX ring as consumed and disable the rx ring
 */
 static void
 em_flush_rx_ring(struct adapter *adapter)
 {
 	struct e1000_hw	*hw = &adapter->hw;
 	u32		rctl, rxdctl;
 
 	rctl = E1000_READ_REG(hw, E1000_RCTL);
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
 	E1000_WRITE_FLUSH(hw);
 	usec_delay(150);
 
 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
 	/* zero the lower 14 bits (prefetch and host thresholds) */
 	rxdctl &= 0xffffc000;
 	/*
 	 * update thresholds: prefetch threshold to 31, host threshold to 1
 	 * and make sure the granularity is "descriptors" and not "cache lines"
 	 */
 	rxdctl |= (0x1F | (1 << 8) | E1000_RXDCTL_THRESH_UNIT_DESC);
 	E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl);
 
 	/* momentarily enable the RX ring for the changes to take effect */
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl | E1000_RCTL_EN);
 	E1000_WRITE_FLUSH(hw);
 	usec_delay(150);
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
 }
 
 /*
 ** em_flush_desc_rings - remove all descriptors from the descriptor rings
 **
 ** In i219, the descriptor rings must be emptied before resetting the HW
 ** or before changing the device state to D3 during runtime (runtime PM).
 **
 ** Failure to do this will cause the HW to enter a unit hang state which can
 ** only be released by PCI reset on the device
 **
 */
 static void
 em_flush_desc_rings(struct adapter *adapter)
 {
 	struct e1000_hw	*hw = &adapter->hw;
 	device_t	dev = adapter->dev;
 	u16		hang_state;
 	u32		fext_nvm11, tdlen;
  
 	/* First, disable MULR fix in FEXTNVM11 */
 	fext_nvm11 = E1000_READ_REG(hw, E1000_FEXTNVM11);
 	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
 	E1000_WRITE_REG(hw, E1000_FEXTNVM11, fext_nvm11);
         
 	/* do nothing if we're not in faulty state, or if the queue is empty */
 	tdlen = E1000_READ_REG(hw, E1000_TDLEN(0));
 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
 	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
 		return;
 	em_flush_tx_ring(adapter);
 
 	/* recheck, maybe the fault is caused by the rx ring */
 	hang_state = pci_read_config(dev, PCICFG_DESC_RING_STATUS, 2);
 	if (hang_state & FLUSH_DESC_REQUIRED)
 		em_flush_rx_ring(adapter);
 }
 
 
 /*********************************************************************
  *
  *  Initialize the hardware to a configuration
  *  as specified by the adapter structure.
  *
  **********************************************************************/
 static void
 em_reset(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	if_t ifp = adapter->ifp;
 	struct e1000_hw	*hw = &adapter->hw;
 	u16		rx_buffer_size;
 	u32		pba;
 
 	INIT_DEBUGOUT("em_reset: begin");
 
 	/* Set up smart power down as default off on newer adapters. */
 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
 	    hw->mac.type == e1000_82572)) {
 		u16 phy_tmp = 0;
 
 		/* Speed up time to link by disabling smart power down. */
 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
 		phy_tmp &= ~IGP02E1000_PM_SPD;
 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
 	}
 
 	/*
 	 * Packet Buffer Allocation (PBA)
 	 * Writing PBA sets the receive portion of the buffer
 	 * the remainder is used for the transmit buffer.
 	 */
 	switch (hw->mac.type) {
 	/* Total Packet Buffer on these is 48K */
 	case e1000_82571:
 	case e1000_82572:
 	case e1000_80003es2lan:
 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
 		break;
 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
 		break;
 	case e1000_82574:
 	case e1000_82583:
 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
 		break;
 	case e1000_ich8lan:
 		pba = E1000_PBA_8K;
 		break;
 	case e1000_ich9lan:
 	case e1000_ich10lan:
 		/* Boost Receive side for jumbo frames */
 		if (adapter->hw.mac.max_frame_size > 4096)
 			pba = E1000_PBA_14K;
 		else
 			pba = E1000_PBA_10K;
 		break;
 	case e1000_pchlan:
 	case e1000_pch2lan:
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 		pba = E1000_PBA_26K;
 		break;
 	default:
 		if (adapter->hw.mac.max_frame_size > 8192)
 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
 		else
 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
 	}
 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
 
 	/*
 	 * These parameters control the automatic generation (Tx) and
 	 * response (Rx) to Ethernet PAUSE frames.
 	 * - High water mark should allow for at least two frames to be
 	 *   received after sending an XOFF.
 	 * - Low water mark works best when it is very near the high water mark.
 	 *   This allows the receiver to restart by sending XON when it has
 	 *   drained a bit. Here we use an arbitrary value of 1500 which will
 	 *   restart after one full frame is pulled from the buffer. There
 	 *   could be several smaller frames in the buffer and if so they will
 	 *   not trigger the XON until their total number reduces the buffer
 	 *   by 1500.
 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
 	 */
 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
 	hw->fc.high_water = rx_buffer_size -
 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
 	hw->fc.low_water = hw->fc.high_water - 1500;
 
 	if (adapter->fc) /* locally set flow control value? */
 		hw->fc.requested_mode = adapter->fc;
 	else
 		hw->fc.requested_mode = e1000_fc_full;
 
 	if (hw->mac.type == e1000_80003es2lan)
 		hw->fc.pause_time = 0xFFFF;
 	else
 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
 
 	hw->fc.send_xon = TRUE;
 
 	/* Device specific overrides/settings */
 	switch (hw->mac.type) {
 	case e1000_pchlan:
 		/* Workaround: no TX flow ctrl for PCH */
                 hw->fc.requested_mode = e1000_fc_rx_pause;
 		hw->fc.pause_time = 0xFFFF; /* override */
 		if (if_getmtu(ifp) > ETHERMTU) {
 			hw->fc.high_water = 0x3500;
 			hw->fc.low_water = 0x1500;
 		} else {
 			hw->fc.high_water = 0x5000;
 			hw->fc.low_water = 0x3000;
 		}
 		hw->fc.refresh_time = 0x1000;
 		break;
 	case e1000_pch2lan:
 	case e1000_pch_lpt:
 	case e1000_pch_spt:
 		hw->fc.high_water = 0x5C20;
 		hw->fc.low_water = 0x5048;
 		hw->fc.pause_time = 0x0650;
 		hw->fc.refresh_time = 0x0400;
 		/* Jumbos need adjusted PBA */
 		if (if_getmtu(ifp) > ETHERMTU)
 			E1000_WRITE_REG(hw, E1000_PBA, 12);
 		else
 			E1000_WRITE_REG(hw, E1000_PBA, 26);
 		break;
         case e1000_ich9lan:
         case e1000_ich10lan:
 		if (if_getmtu(ifp) > ETHERMTU) {
 			hw->fc.high_water = 0x2800;
 			hw->fc.low_water = hw->fc.high_water - 8;
 			break;
 		} 
 		/* else fall thru */
 	default:
 		if (hw->mac.type == e1000_80003es2lan)
 			hw->fc.pause_time = 0xFFFF;
 		break;
 	}
 
 	/* I219 needs some special flushing to avoid hangs */
 	if (hw->mac.type == e1000_pch_spt)
 		em_flush_desc_rings(adapter);
 
 	/* Issue a global reset */
 	e1000_reset_hw(hw);
 	E1000_WRITE_REG(hw, E1000_WUC, 0);
 	em_disable_aspm(adapter);
 	/* and a re-init */
 	if (e1000_init_hw(hw) < 0) {
 		device_printf(dev, "Hardware Initialization Failed\n");
 		return;
 	}
 
 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
 	e1000_get_phy_info(hw);
 	e1000_check_for_link(hw);
 	return;
 }
 
 /*********************************************************************
  *
  *  Setup networking device structure and register an interface.
  *
  **********************************************************************/
 static int
 em_setup_interface(device_t dev, struct adapter *adapter)
 {
 	if_t ifp;
 
 	INIT_DEBUGOUT("em_setup_interface: begin");
 
 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
 	if (ifp == 0) {
 		device_printf(dev, "can not allocate ifnet structure\n");
 		return (-1);
 	}
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	if_setdev(ifp, dev);
 	if_setinitfn(ifp, em_init);
 	if_setsoftc(ifp, adapter);
 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
 	if_setioctlfn(ifp, em_ioctl);
 	if_setgetcounterfn(ifp, em_get_counter);
 
 	/* TSO parameters */
 	ifp->if_hw_tsomax = IP_MAXPACKET;
 	/* Take m_pullup(9)'s in em_xmit() w/ TSO into acount. */
 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER - 5;
 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
 
 #ifdef EM_MULTIQUEUE
 	/* Multiqueue stack interface */
 	if_settransmitfn(ifp, em_mq_start);
 	if_setqflushfn(ifp, em_qflush);
 #else
 	if_setstartfn(ifp, em_start);
 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
 	if_setsendqready(ifp);
 #endif	
 
 	ether_ifattach(ifp, adapter->hw.mac.addr);
 
 	if_setcapabilities(ifp, 0);
 	if_setcapenable(ifp, 0);
 
 
 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
 	    IFCAP_TSO4, 0);
 	/*
 	 * Tell the upper layer(s) we
 	 * support full VLAN capability
 	 */
 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
 	    IFCAP_VLAN_MTU, 0);
 	if_setcapenable(ifp, if_getcapabilities(ifp));
 
 	/*
 	** Don't turn this on by default, if vlans are
 	** created on another pseudo device (eg. lagg)
 	** then vlan events are not passed thru, breaking
 	** operation, but with HW FILTER off it works. If
 	** using vlans directly on the em driver you can
 	** enable this and get full hardware tag filtering.
 	*/
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
 
 #ifdef DEVICE_POLLING
 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
 #endif
 
 	/* Enable only WOL MAGIC by default */
 	if (adapter->wol) {
 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
 	}
 		
 	/*
 	 * Specify the media types supported by this adapter and register
 	 * callbacks to update media and link information
 	 */
 	ifmedia_init(&adapter->media, IFM_IMASK,
 	    em_media_change, em_media_status);
 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
 		u_char fiber_type = IFM_1000_SX;	/* default type */
 
 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
 	} else {
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
 			    0, NULL);
 		if (adapter->hw.phy.type != e1000_phy_ife) {
 			ifmedia_add(&adapter->media,
 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
 			ifmedia_add(&adapter->media,
 				IFM_ETHER | IFM_1000_T, 0, NULL);
 		}
 	}
 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
 	return (0);
 }
 
 
 /*
  * Manage DMA'able memory.
  */
 static void
 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	if (error)
 		return;
 	*(bus_addr_t *) arg = segs[0].ds_addr;
 }
 
 static int
 em_dma_malloc(struct adapter *adapter, bus_size_t size,
         struct em_dma_alloc *dma, int mapflags)
 {
 	int error;
 
 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				size,			/* maxsize */
 				1,			/* nsegments */
 				size,			/* maxsegsize */
 				0,			/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockarg */
 				&dma->dma_tag);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dma_tag_create failed: %d\n",
 		    __func__, error);
 		goto fail_0;
 	}
 
 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
 		    __func__, (uintmax_t)size, error);
 		goto fail_2;
 	}
 
 	dma->dma_paddr = 0;
 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
 	if (error || dma->dma_paddr == 0) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamap_load failed: %d\n",
 		    __func__, error);
 		goto fail_3;
 	}
 
 	return (0);
 
 fail_3:
 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 fail_2:
 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 	bus_dma_tag_destroy(dma->dma_tag);
 fail_0:
 	dma->dma_tag = NULL;
 
 	return (error);
 }
 
 static void
 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
 {
 	if (dma->dma_tag == NULL)
 		return;
 	if (dma->dma_paddr != 0) {
 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 		dma->dma_paddr = 0;
 	}
 	if (dma->dma_vaddr != NULL) {
 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 		dma->dma_vaddr = NULL;
 	}
 	bus_dma_tag_destroy(dma->dma_tag);
 	dma->dma_tag = NULL;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for the transmit and receive rings, and then
  *  the descriptors associated with each, called only once at attach.
  *
  **********************************************************************/
 static int
 em_allocate_queues(struct adapter *adapter)
 {
 	device_t		dev = adapter->dev;
 	struct tx_ring		*txr = NULL;
 	struct rx_ring		*rxr = NULL;
 	int rsize, tsize, error = E1000_SUCCESS;
 	int txconf = 0, rxconf = 0;
 
 
 	/* Allocate the TX ring struct memory */
 	if (!(adapter->tx_rings =
 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate TX ring memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
 	/* Now allocate the RX */
 	if (!(adapter->rx_rings =
 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate RX ring memory\n");
 		error = ENOMEM;
 		goto rx_fail;
 	}
 
 	tsize = roundup2(adapter->num_tx_desc *
 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
 	/*
 	 * Now set up the TX queues, txconf is needed to handle the
 	 * possibility that things fail midcourse and we need to
 	 * undo memory gracefully
 	 */ 
 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
 		/* Set up some basics */
 		txr = &adapter->tx_rings[i];
 		txr->adapter = adapter;
 		txr->me = i;
 
 		/* Initialize the TX lock */
 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
 
 		if (em_dma_malloc(adapter, tsize,
 			&txr->txdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate TX Descriptor memory\n");
 			error = ENOMEM;
 			goto err_tx_desc;
 		}
 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
 		bzero((void *)txr->tx_base, tsize);
 
         	if (em_allocate_transmit_buffers(txr)) {
 			device_printf(dev,
 			    "Critical Failure setting up transmit buffers\n");
 			error = ENOMEM;
 			goto err_tx_desc;
         	}
 #if __FreeBSD_version >= 800000
 		/* Allocate a buf ring */
 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
 		    M_WAITOK, &txr->tx_mtx);
 #endif
 	}
 
 	/*
 	 * Next the RX queues...
 	 */ 
 	rsize = roundup2(adapter->num_rx_desc *
 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
 		rxr = &adapter->rx_rings[i];
 		rxr->adapter = adapter;
 		rxr->me = i;
 
 		/* Initialize the RX lock */
 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
 
 		if (em_dma_malloc(adapter, rsize,
 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate RxDescriptor memory\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
 		bzero((void *)rxr->rx_base, rsize);
 
         	/* Allocate receive buffers for the ring*/
 		if (em_allocate_receive_buffers(rxr)) {
 			device_printf(dev,
 			    "Critical Failure setting up receive buffers\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 	}
 
 	return (0);
 
 err_rx_desc:
 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
 		em_dma_free(adapter, &rxr->rxdma);
 err_tx_desc:
 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
 		em_dma_free(adapter, &txr->txdma);
 	free(adapter->rx_rings, M_DEVBUF);
 rx_fail:
 #if __FreeBSD_version >= 800000
 	buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	free(adapter->tx_rings, M_DEVBUF);
 fail:
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
  *  the information needed to transmit a packet on the wire. This is
  *  called only once at attach, setup is done every reset.
  *
  **********************************************************************/
 static int
 em_allocate_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	device_t dev = adapter->dev;
 	struct em_txbuffer *txbuf;
 	int error, i;
 
 	/*
 	 * Setup DMA descriptor areas.
 	 */
 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       EM_TSO_SIZE,		/* maxsize */
 			       EM_MAX_SCATTER,		/* nsegments */
 			       PAGE_SIZE,		/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txr->txtag))) {
 		device_printf(dev,"Unable to allocate TX DMA tag\n");
 		goto fail;
 	}
 
 	if (!(txr->tx_buffers =
 	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
         /* Create the descriptor buffer dma maps */
 	txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
 		if (error != 0) {
 			device_printf(dev, "Unable to create TX DMA map\n");
 			goto fail;
 		}
 	}
 
 	return 0;
 fail:
 	/* We free all, it handles case where we are in the middle */
 	em_free_transmit_structures(adapter);
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Initialize a transmit ring.
  *
  **********************************************************************/
 static void
 em_setup_transmit_ring(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	struct em_txbuffer *txbuf;
 	int i;
 #ifdef DEV_NETMAP
 	struct netmap_slot *slot;
 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
 #endif /* DEV_NETMAP */
 
 	/* Clear the old descriptor contents */
 	EM_TX_LOCK(txr);
 #ifdef DEV_NETMAP
 	slot = netmap_reset(na, NR_TX, txr->me, 0);
 #endif /* DEV_NETMAP */
 
 	bzero((void *)txr->tx_base,
 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
 	/* Reset indices */
 	txr->next_avail_desc = 0;
 	txr->next_to_clean = 0;
 
 	/* Free any existing tx buffers. */
         txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		if (txbuf->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag, txbuf->map);
 			m_freem(txbuf->m_head);
 			txbuf->m_head = NULL;
 		}
 #ifdef DEV_NETMAP
 		if (slot) {
 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
 			uint64_t paddr;
 			void *addr;
 
 			addr = PNMB(na, slot + si, &paddr);
 			txr->tx_base[i].buffer_addr = htole64(paddr);
 			/* reload the map for netmap mode */
 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
 		}
 #endif /* DEV_NETMAP */
 
 		/* clear the watch index */
 		txbuf->next_eop = -1;
         }
 
 	/* Set number of descriptors available */
 	txr->tx_avail = adapter->num_tx_desc;
 	txr->busy = EM_TX_IDLE;
 
 	/* Clear checksum offload context. */
 	txr->last_hw_offload = 0;
 	txr->last_hw_ipcss = 0;
 	txr->last_hw_ipcso = 0;
 	txr->last_hw_tucss = 0;
 	txr->last_hw_tucso = 0;
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	EM_TX_UNLOCK(txr);
 }
 
 /*********************************************************************
  *
  *  Initialize all transmit rings.
  *
  **********************************************************************/
 static void
 em_setup_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++)
 		em_setup_transmit_ring(txr);
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Enable transmit unit.
  *
  **********************************************************************/
 static void
 em_initialize_transmit_unit(struct adapter *adapter)
 {
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct e1000_hw	*hw = &adapter->hw;
 	u32	tctl, txdctl = 0, tarc, tipg = 0;
 
 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		u64 bus_addr = txr->txdma.dma_paddr;
 		/* Base and Len of TX Ring */
 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
 	    	    (u32)(bus_addr >> 32));
 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
 	    	    (u32)bus_addr);
 		/* Init the HEAD/TAIL indices */
 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
 
 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
 
 		txr->busy = EM_TX_IDLE;
 		txdctl = 0; /* clear txdctl */
                 txdctl |= 0x1f; /* PTHRESH */
                 txdctl |= 1 << 8; /* HTHRESH */
                 txdctl |= 1 << 16;/* WTHRESH */
 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
 		txdctl |= E1000_TXDCTL_GRAN;
                 txdctl |= 1 << 25; /* LWTHRESH */
 
                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
 	}
 
 	/* Set the default values for the Tx Inter Packet Gap timer */
 	switch (adapter->hw.mac.type) {
 	case e1000_80003es2lan:
 		tipg = DEFAULT_82543_TIPG_IPGR1;
 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
 		    E1000_TIPG_IPGR2_SHIFT;
 		break;
 	default:
 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 		    (adapter->hw.phy.media_type ==
 		    e1000_media_type_internal_serdes))
 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
 		else
 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
 	}
 
 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
 
 	if(adapter->hw.mac.type >= e1000_82540)
 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
 		    adapter->tx_abs_int_delay.value);
 
 	if ((adapter->hw.mac.type == e1000_82571) ||
 	    (adapter->hw.mac.type == e1000_82572)) {
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
 		tarc |= TARC_SPEED_MODE_BIT;
 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
 		/* errata: program both queues to unweighted RR */
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
 		tarc |= 1;
 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
 		tarc |= 1;
 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
 	} else if (adapter->hw.mac.type == e1000_82574) {
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
 		tarc |= TARC_ERRATA_BIT;
 		if ( adapter->num_queues > 1) {
 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
 		} else
 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
 	}
 
 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
 	if (adapter->tx_int_delay.value > 0)
 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
 
 	/* Program the Transmit Control Register */
 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
 	tctl &= ~E1000_TCTL_CT;
 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
 
 	if (adapter->hw.mac.type >= e1000_82571)
 		tctl |= E1000_TCTL_MULR;
 
 	/* This write will effectively turn on the transmit unit. */
 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
 
 	if (hw->mac.type == e1000_pch_spt) {
 		u32 reg;
 		reg = E1000_READ_REG(hw, E1000_IOSFPC);
 		reg |= E1000_RCTL_RDMTS_HEX;
 		E1000_WRITE_REG(hw, E1000_IOSFPC, reg);
 		reg = E1000_READ_REG(hw, E1000_TARC(0));
 		reg |= E1000_TARC0_CB_MULTIQ_3_REQ;
 		E1000_WRITE_REG(hw, E1000_TARC(0), reg);
 	}
 }
 
 
 /*********************************************************************
  *
  *  Free all transmit rings.
  *
  **********************************************************************/
 static void
 em_free_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		EM_TX_LOCK(txr);
 		em_free_transmit_buffers(txr);
 		em_dma_free(adapter, &txr->txdma);
 		EM_TX_UNLOCK(txr);
 		EM_TX_LOCK_DESTROY(txr);
 	}
 
 	free(adapter->tx_rings, M_DEVBUF);
 }
 
 /*********************************************************************
  *
  *  Free transmit ring related data structures.
  *
  **********************************************************************/
 static void
 em_free_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter		*adapter = txr->adapter;
 	struct em_txbuffer	*txbuf;
 
 	INIT_DEBUGOUT("free_transmit_ring: begin");
 
 	if (txr->tx_buffers == NULL)
 		return;
 
 	for (int i = 0; i < adapter->num_tx_desc; i++) {
 		txbuf = &txr->tx_buffers[i];
 		if (txbuf->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag,
 			    txbuf->map);
 			m_freem(txbuf->m_head);
 			txbuf->m_head = NULL;
 			if (txbuf->map != NULL) {
 				bus_dmamap_destroy(txr->txtag,
 				    txbuf->map);
 				txbuf->map = NULL;
 			}
 		} else if (txbuf->map != NULL) {
 			bus_dmamap_unload(txr->txtag,
 			    txbuf->map);
 			bus_dmamap_destroy(txr->txtag,
 			    txbuf->map);
 			txbuf->map = NULL;
 		}
 	}
 #if __FreeBSD_version >= 800000
 	if (txr->br != NULL)
 		buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	if (txr->tx_buffers != NULL) {
 		free(txr->tx_buffers, M_DEVBUF);
 		txr->tx_buffers = NULL;
 	}
 	if (txr->txtag != NULL) {
 		bus_dma_tag_destroy(txr->txtag);
 		txr->txtag = NULL;
 	}
 	return;
 }
 
 
 /*********************************************************************
  *  The offload context is protocol specific (TCP/UDP) and thus
  *  only needs to be set when the protocol changes. The occasion
  *  of a context change can be a performance detriment, and
  *  might be better just disabled. The reason arises in the way
  *  in which the controller supports pipelined requests from the
  *  Tx data DMA. Up to four requests can be pipelined, and they may
  *  belong to the same packet or to multiple packets. However all
  *  requests for one packet are issued before a request is issued
  *  for a subsequent packet and if a request for the next packet
  *  requires a context change, that request will be stalled
  *  until the previous request completes. This means setting up
  *  a new context effectively disables pipelined Tx data DMA which
  *  in turn greatly slow down performance to send small sized
  *  frames. 
  **********************************************************************/
 static void
 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
 {
 	struct adapter			*adapter = txr->adapter;
 	struct e1000_context_desc	*TXD = NULL;
 	struct em_txbuffer		*tx_buffer;
 	int				cur, hdr_len;
 	u32				cmd = 0;
 	u16				offload = 0;
 	u8				ipcso, ipcss, tucso, tucss;
 
 	ipcss = ipcso = tucss = tucso = 0;
 	hdr_len = ip_off + (ip->ip_hl << 2);
 	cur = txr->next_avail_desc;
 
 	/* Setup of IP header checksum. */
 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
 		offload |= CSUM_IP;
 		ipcss = ip_off;
 		ipcso = ip_off + offsetof(struct ip, ip_sum);
 		/*
 		 * Start offset for header checksum calculation.
 		 * End offset for header checksum calculation.
 		 * Offset of place to put the checksum.
 		 */
 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
 		TXD->lower_setup.ip_fields.ipcss = ipcss;
 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
 		TXD->lower_setup.ip_fields.ipcso = ipcso;
 		cmd |= E1000_TXD_CMD_IP;
 	}
 
 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
  		offload |= CSUM_TCP;
  		tucss = hdr_len;
  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
 		/*
 		 * The 82574L can only remember the *last* context used
 		 * regardless of queue that it was use for.  We cannot reuse
 		 * contexts on this hardware platform and must generate a new
 		 * context every time.  82574L hardware spec, section 7.2.6,
 		 * second note.
 		 */
 		if (adapter->num_queues < 2) {
  			/*
  		 	* Setting up new checksum offload context for every
 			* frames takes a lot of processing time for hardware.
 			* This also reduces performance a lot for small sized
 			* frames so avoid it if driver can use previously
 			* configured checksum offload context.
  		 	*/
  			if (txr->last_hw_offload == offload) {
  				if (offload & CSUM_IP) {
  					if (txr->last_hw_ipcss == ipcss &&
  				    	txr->last_hw_ipcso == ipcso &&
  				    	txr->last_hw_tucss == tucss &&
  				    	txr->last_hw_tucso == tucso)
  						return;
  				} else {
  					if (txr->last_hw_tucss == tucss &&
  				    	txr->last_hw_tucso == tucso)
  						return;
  				}
   			}
  			txr->last_hw_offload = offload;
  			txr->last_hw_tucss = tucss;
  			txr->last_hw_tucso = tucso;
 		}
  		/*
  		 * Start offset for payload checksum calculation.
  		 * End offset for payload checksum calculation.
  		 * Offset of place to put the checksum.
  		 */
 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
  		TXD->upper_setup.tcp_fields.tucso = tucso;
  		cmd |= E1000_TXD_CMD_TCP;
  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
  		tucss = hdr_len;
  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
 		/*
 		 * The 82574L can only remember the *last* context used
 		 * regardless of queue that it was use for.  We cannot reuse
 		 * contexts on this hardware platform and must generate a new
 		 * context every time.  82574L hardware spec, section 7.2.6,
 		 * second note.
 		 */
 		if (adapter->num_queues < 2) {
  			/*
  		 	* Setting up new checksum offload context for every
 			* frames takes a lot of processing time for hardware.
 			* This also reduces performance a lot for small sized
 			* frames so avoid it if driver can use previously
 			* configured checksum offload context.
  		 	*/
  			if (txr->last_hw_offload == offload) {
  				if (offload & CSUM_IP) {
  					if (txr->last_hw_ipcss == ipcss &&
  				    	txr->last_hw_ipcso == ipcso &&
  				    	txr->last_hw_tucss == tucss &&
  				    	txr->last_hw_tucso == tucso)
  						return;
  				} else {
  					if (txr->last_hw_tucss == tucss &&
  				    	txr->last_hw_tucso == tucso)
  						return;
  				}
  			}
  			txr->last_hw_offload = offload;
  			txr->last_hw_tucss = tucss;
  			txr->last_hw_tucso = tucso;
 		}
  		/*
  		 * Start offset for header checksum calculation.
  		 * End offset for header checksum calculation.
  		 * Offset of place to put the checksum.
  		 */
 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
  		TXD->upper_setup.tcp_fields.tucss = tucss;
  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
  		TXD->upper_setup.tcp_fields.tucso = tucso;
   	}
   
  	if (offload & CSUM_IP) {
  		txr->last_hw_ipcss = ipcss;
  		txr->last_hw_ipcso = ipcso;
   	}
 
 	TXD->tcp_seg_setup.data = htole32(0);
 	TXD->cmd_and_length =
 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
 	tx_buffer = &txr->tx_buffers[cur];
 	tx_buffer->m_head = NULL;
 	tx_buffer->next_eop = -1;
 
 	if (++cur == adapter->num_tx_desc)
 		cur = 0;
 
 	txr->tx_avail--;
 	txr->next_avail_desc = cur;
 }
 
 
 /**********************************************************************
  *
  *  Setup work for hardware segmentation offload (TSO)
  *
  **********************************************************************/
 static void
 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
 {
 	struct adapter			*adapter = txr->adapter;
 	struct e1000_context_desc	*TXD;
 	struct em_txbuffer		*tx_buffer;
 	int cur, hdr_len;
 
 	/*
 	 * In theory we can use the same TSO context if and only if
 	 * frame is the same type(IP/TCP) and the same MSS. However
 	 * checking whether a frame has the same IP/TCP structure is
 	 * hard thing so just ignore that and always restablish a
 	 * new TSO context.
 	 */
 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
 		      E1000_TXD_DTYP_D |	/* Data descr type */
 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
 
 	/* IP and/or TCP header checksum calculation and insertion. */
 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
 
 	cur = txr->next_avail_desc;
 	tx_buffer = &txr->tx_buffers[cur];
 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
 
 	/*
 	 * Start offset for header checksum calculation.
 	 * End offset for header checksum calculation.
 	 * Offset of place put the checksum.
 	 */
 	TXD->lower_setup.ip_fields.ipcss = ip_off;
 	TXD->lower_setup.ip_fields.ipcse =
 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
 	/*
 	 * Start offset for payload checksum calculation.
 	 * End offset for payload checksum calculation.
 	 * Offset of place to put the checksum.
 	 */
 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
 	TXD->upper_setup.tcp_fields.tucse = 0;
 	TXD->upper_setup.tcp_fields.tucso =
 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
 	/*
 	 * Payload size per packet w/o any headers.
 	 * Length of all headers up to payload.
 	 */
 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
 
 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
 				E1000_TXD_CMD_DEXT |	/* Extended descr */
 				E1000_TXD_CMD_TSE |	/* TSE context */
 				E1000_TXD_CMD_IP |	/* Do IP csum */
 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
 
 	tx_buffer->m_head = NULL;
 	tx_buffer->next_eop = -1;
 
 	if (++cur == adapter->num_tx_desc)
 		cur = 0;
 
 	txr->tx_avail--;
 	txr->next_avail_desc = cur;
 	txr->tx_tso = TRUE;
 }
 
 
 /**********************************************************************
  *
  *  Examine each tx_buffer in the used queue. If the hardware is done
  *  processing the packet then free associated resources. The
  *  tx_buffer is put back on the free queue.
  *
  **********************************************************************/
 static void
 em_txeof(struct tx_ring *txr)
 {
 	struct adapter	*adapter = txr->adapter;
         int first, last, done, processed;
         struct em_txbuffer *tx_buffer;
         struct e1000_tx_desc   *tx_desc, *eop_desc;
 	if_t ifp = adapter->ifp;
 
 	EM_TX_LOCK_ASSERT(txr);
 #ifdef DEV_NETMAP
 	if (netmap_tx_irq(ifp, txr->me))
 		return;
 #endif /* DEV_NETMAP */
 
 	/* No work, make sure hang detection is disabled */
         if (txr->tx_avail == adapter->num_tx_desc) {
 		txr->busy = EM_TX_IDLE;
                 return;
 	}
 
 	processed = 0;
         first = txr->next_to_clean;
         tx_desc = &txr->tx_base[first];
         tx_buffer = &txr->tx_buffers[first];
 	last = tx_buffer->next_eop;
         eop_desc = &txr->tx_base[last];
 
 	/*
 	 * What this does is get the index of the
 	 * first descriptor AFTER the EOP of the 
 	 * first packet, that way we can do the
 	 * simple comparison on the inner while loop.
 	 */
 	if (++last == adapter->num_tx_desc)
  		last = 0;
 	done = last;
 
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_POSTREAD);
 
         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
 		/* We clean the range of the packet */
 		while (first != done) {
                 	tx_desc->upper.data = 0;
                 	tx_desc->lower.data = 0;
                 	tx_desc->buffer_addr = 0;
                 	++txr->tx_avail;
 			++processed;
 
 			if (tx_buffer->m_head) {
 				bus_dmamap_sync(txr->txtag,
 				    tx_buffer->map,
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(txr->txtag,
 				    tx_buffer->map);
                         	m_freem(tx_buffer->m_head);
                         	tx_buffer->m_head = NULL;
                 	}
 			tx_buffer->next_eop = -1;
 
 	                if (++first == adapter->num_tx_desc)
 				first = 0;
 
 	                tx_buffer = &txr->tx_buffers[first];
 			tx_desc = &txr->tx_base[first];
 		}
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		/* See if we can continue to the next packet */
 		last = tx_buffer->next_eop;
 		if (last != -1) {
         		eop_desc = &txr->tx_base[last];
 			/* Get new done point */
 			if (++last == adapter->num_tx_desc) last = 0;
 			done = last;
 		} else
 			break;
         }
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
         txr->next_to_clean = first;
 
 	/*
 	** Hang detection: we know there's work outstanding
 	** or the entry return would have been taken, so no
 	** descriptor processed here indicates a potential hang.
 	** The local timer will examine this and do a reset if needed.
 	*/
 	if (processed == 0) {
 		if (txr->busy != EM_TX_HUNG)
 			++txr->busy;
 	} else /* At least one descriptor was cleaned */
 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
 
         /*
          * If we have a minimum free, clear IFF_DRV_OACTIVE
          * to tell the stack that it is OK to send packets.
 	 * Notice that all writes of OACTIVE happen under the
 	 * TX lock which, with a single queue, guarantees 
 	 * sanity.
          */
         if (txr->tx_avail >= EM_MAX_SCATTER) {
 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
 	}
 
 	/* Disable hang detection if all clean */
 	if (txr->tx_avail == adapter->num_tx_desc)
 		txr->busy = EM_TX_IDLE;
 }
 
 /*********************************************************************
  *
  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
  *
  **********************************************************************/
 static void
 em_refresh_mbufs(struct rx_ring *rxr, int limit)
 {
 	struct adapter		*adapter = rxr->adapter;
 	struct mbuf		*m;
 	bus_dma_segment_t	segs;
 	struct em_rxbuffer	*rxbuf;
 	int			i, j, error, nsegs;
 	bool			cleaned = FALSE;
 
 	i = j = rxr->next_to_refresh;
 	/*
 	** Get one descriptor beyond
 	** our work mark to control
 	** the loop.
 	*/
 	if (++j == adapter->num_rx_desc)
 		j = 0;
 
 	while (j != limit) {
 		rxbuf = &rxr->rx_buffers[i];
 		if (rxbuf->m_head == NULL) {
 			m = m_getjcl(M_NOWAIT, MT_DATA,
 			    M_PKTHDR, adapter->rx_mbuf_sz);
 			/*
 			** If we have a temporary resource shortage
 			** that causes a failure, just abort refresh
 			** for now, we will return to this point when
 			** reinvoked from em_rxeof.
 			*/
 			if (m == NULL)
 				goto update;
 		} else
 			m = rxbuf->m_head;
 
 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
 		m->m_flags |= M_PKTHDR;
 		m->m_data = m->m_ext.ext_buf;
 
 		/* Use bus_dma machinery to setup the memory mapping  */
 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
 		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			printf("Refresh mbufs: hdr dmamap load"
 			    " failure - %d\n", error);
 			m_free(m);
 			rxbuf->m_head = NULL;
 			goto update;
 		}
 		rxbuf->m_head = m;
 		rxbuf->paddr = segs.ds_addr;
 		bus_dmamap_sync(rxr->rxtag,
 		    rxbuf->map, BUS_DMASYNC_PREREAD);
 		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
 		cleaned = TRUE;
 
 		i = j; /* Next is precalulated for us */
 		rxr->next_to_refresh = i;
 		/* Calculate next controlling index */
 		if (++j == adapter->num_rx_desc)
 			j = 0;
 	}
 update:
 	/*
 	** Update the tail pointer only if,
 	** and as far as we have refreshed.
 	*/
 	if (cleaned)
 		E1000_WRITE_REG(&adapter->hw,
 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for rx_buffer structures. Since we use one
  *  rx_buffer per received packet, the maximum number of rx_buffer's
  *  that we'll need is equal to the number of receive descriptors
  *  that we've allocated.
  *
  **********************************************************************/
 static int
 em_allocate_receive_buffers(struct rx_ring *rxr)
 {
 	struct adapter		*adapter = rxr->adapter;
 	device_t		dev = adapter->dev;
 	struct em_rxbuffer	*rxbuf;
 	int			error;
 
 	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (rxr->rx_buffers == NULL) {
 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
 		return (ENOMEM);
 	}
 
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
 				1, 0,			/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				MJUM9BYTES,		/* maxsize */
 				1,			/* nsegments */
 				MJUM9BYTES,		/* maxsegsize */
 				0,			/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockarg */
 				&rxr->rxtag);
 	if (error) {
 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
 		    __func__, error);
 		goto fail;
 	}
 
 	rxbuf = rxr->rx_buffers;
 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
 		rxbuf = &rxr->rx_buffers[i];
 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
 		if (error) {
 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
 			    __func__, error);
 			goto fail;
 		}
 	}
 
 	return (0);
 
 fail:
 	em_free_receive_structures(adapter);
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  Initialize a receive ring and its buffers.
  *
  **********************************************************************/
 static int
 em_setup_receive_ring(struct rx_ring *rxr)
 {
 	struct	adapter 	*adapter = rxr->adapter;
 	struct em_rxbuffer	*rxbuf;
 	bus_dma_segment_t	seg[1];
 	int			rsize, nsegs, error = 0;
 #ifdef DEV_NETMAP
 	struct netmap_slot *slot;
 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
 #endif
 
 
 	/* Clear the ring contents */
 	EM_RX_LOCK(rxr);
 	rsize = roundup2(adapter->num_rx_desc *
 	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
 	bzero((void *)rxr->rx_base, rsize);
 #ifdef DEV_NETMAP
 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
 #endif
 
 	/*
 	** Free current RX buffer structs and their mbufs
 	*/
 	for (int i = 0; i < adapter->num_rx_desc; i++) {
 		rxbuf = &rxr->rx_buffers[i];
 		if (rxbuf->m_head != NULL) {
 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
 			m_freem(rxbuf->m_head);
 			rxbuf->m_head = NULL; /* mark as freed */
 		}
 	}
 
 	/* Now replenish the mbufs */
         for (int j = 0; j != adapter->num_rx_desc; ++j) {
 		rxbuf = &rxr->rx_buffers[j];
 #ifdef DEV_NETMAP
 		if (slot) {
 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
 			uint64_t paddr;
 			void *addr;
 
 			addr = PNMB(na, slot + si, &paddr);
 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
+			rxbuf->paddr = paddr;
 			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
 			continue;
 		}
 #endif /* DEV_NETMAP */
 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
 		    M_PKTHDR, adapter->rx_mbuf_sz);
 		if (rxbuf->m_head == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
 
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
 		    rxbuf->map, rxbuf->m_head, seg,
 		    &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			m_freem(rxbuf->m_head);
 			rxbuf->m_head = NULL;
 			goto fail;
 		}
 		bus_dmamap_sync(rxr->rxtag,
 		    rxbuf->map, BUS_DMASYNC_PREREAD);
 
 		rxbuf->paddr = seg[0].ds_addr;
 		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
 	}
 	rxr->next_to_check = 0;
 	rxr->next_to_refresh = 0;
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 fail:
 	EM_RX_UNLOCK(rxr);
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Initialize all receive rings.
  *
  **********************************************************************/
 static int
 em_setup_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 	int q;
 
 	for (q = 0; q < adapter->num_queues; q++, rxr++)
 		if (em_setup_receive_ring(rxr))
 			goto fail;
 
 	return (0);
 fail:
 	/*
 	 * Free RX buffers allocated so far, we will only handle
 	 * the rings that completed, the failing case will have
 	 * cleaned up for itself. 'q' failed, so its the terminus.
 	 */
 	for (int i = 0; i < q; ++i) {
 		rxr = &adapter->rx_rings[i];
 		for (int n = 0; n < adapter->num_rx_desc; n++) {
 			struct em_rxbuffer *rxbuf;
 			rxbuf = &rxr->rx_buffers[n];
 			if (rxbuf->m_head != NULL) {
 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 			  	  BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
 				m_freem(rxbuf->m_head);
 				rxbuf->m_head = NULL;
 			}
 		}
 		rxr->next_to_check = 0;
 		rxr->next_to_refresh = 0;
 	}
 
 	return (ENOBUFS);
 }
 
 /*********************************************************************
  *
  *  Free all receive rings.
  *
  **********************************************************************/
 static void
 em_free_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		em_free_receive_buffers(rxr);
 		/* Free the ring memory as well */
 		em_dma_free(adapter, &rxr->rxdma);
 		EM_RX_LOCK_DESTROY(rxr);
 	}
 
 	free(adapter->rx_rings, M_DEVBUF);
 }
 
 
 /*********************************************************************
  *
  *  Free receive ring data structures
  *
  **********************************************************************/
 static void
 em_free_receive_buffers(struct rx_ring *rxr)
 {
 	struct adapter		*adapter = rxr->adapter;
 	struct em_rxbuffer	*rxbuf = NULL;
 
 	INIT_DEBUGOUT("free_receive_buffers: begin");
 
 	if (rxr->rx_buffers != NULL) {
 		for (int i = 0; i < adapter->num_rx_desc; i++) {
 			rxbuf = &rxr->rx_buffers[i];
 			if (rxbuf->map != NULL) {
 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
 			}
 			if (rxbuf->m_head != NULL) {
 				m_freem(rxbuf->m_head);
 				rxbuf->m_head = NULL;
 			}
 		}
 		free(rxr->rx_buffers, M_DEVBUF);
 		rxr->rx_buffers = NULL;
 		rxr->next_to_check = 0;
 		rxr->next_to_refresh = 0;
 	}
 
 	if (rxr->rxtag != NULL) {
 		bus_dma_tag_destroy(rxr->rxtag);
 		rxr->rxtag = NULL;
 	}
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Enable receive unit.
  *
  **********************************************************************/
 
 static void
 em_initialize_receive_unit(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 	if_t ifp = adapter->ifp;
 	struct e1000_hw	*hw = &adapter->hw;
 	u32	rctl, rxcsum, rfctl;
 
 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
 
 	/*
 	 * Make sure receives are disabled while setting
 	 * up the descriptor ring
 	 */
 	rctl = E1000_READ_REG(hw, E1000_RCTL);
 	/* Do not disable if ever enabled on this hardware */
 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
 
 	/* Setup the Receive Control Register */
 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
 	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
 	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
 
 	/* Do not store bad packets */
 	rctl &= ~E1000_RCTL_SBP;
 
 	/* Enable Long Packet receive */
 	if (if_getmtu(ifp) > ETHERMTU)
 		rctl |= E1000_RCTL_LPE;
 	else
 		rctl &= ~E1000_RCTL_LPE;
 
         /* Strip the CRC */
         if (!em_disable_crc_stripping)
 		rctl |= E1000_RCTL_SECRC;
 
 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
 	    adapter->rx_abs_int_delay.value);
 
 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
 	    adapter->rx_int_delay.value);
 	/*
 	 * Set the interrupt throttling rate. Value is calculated
 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
 	 */
 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
 
 	/* Use extended rx descriptor formats */
 	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
 	rfctl |= E1000_RFCTL_EXTEN;
 	/*
 	** When using MSIX interrupts we need to throttle
 	** using the EITR register (82574 only)
 	*/
 	if (hw->mac.type == e1000_82574) {
 		for (int i = 0; i < 4; i++)
 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
 			    DEFAULT_ITR);
 		/* Disable accelerated acknowledge */
 		rfctl |= E1000_RFCTL_ACK_DIS;
 	}
 	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
 
 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
 #ifdef EM_MULTIQUEUE
 		rxcsum |= E1000_RXCSUM_TUOFL |
 			  E1000_RXCSUM_IPOFL |
 			  E1000_RXCSUM_PCSD;
 #else
 		rxcsum |= E1000_RXCSUM_TUOFL;
 #endif
 	} else
 		rxcsum &= ~E1000_RXCSUM_TUOFL;
 
 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
 
 #ifdef EM_MULTIQUEUE
 #define RSSKEYLEN 10
 	if (adapter->num_queues > 1) {
 		uint8_t  rss_key[4 * RSSKEYLEN];
 		uint32_t reta = 0;
 		int i;
 
 		/*
 		* Configure RSS key
 		*/
 		arc4rand(rss_key, sizeof(rss_key), 0);
 		for (i = 0; i < RSSKEYLEN; ++i) {
 			uint32_t rssrk = 0;
 
 			rssrk = EM_RSSRK_VAL(rss_key, i);
 			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
 		}
 
 		/*
 		* Configure RSS redirect table in following fashion:
 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
 		*/
 		for (i = 0; i < sizeof(reta); ++i) {
 			uint32_t q;
 
 			q = (i % adapter->num_queues) << 7;
 			reta |= q << (8 * i);
 		}
 
 		for (i = 0; i < 32; ++i) {
 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
 		}
 
 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
 				E1000_MRQC_RSS_FIELD_IPV4 |
 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
 				E1000_MRQC_RSS_FIELD_IPV6_EX |
 				E1000_MRQC_RSS_FIELD_IPV6);
 	}
 #endif
 	/*
 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
 	** long latencies are observed, like Lenovo X60. This
 	** change eliminates the problem, but since having positive
 	** values in RDTR is a known source of problems on other
 	** platforms another solution is being sought.
 	*/
 	if (hw->mac.type == e1000_82573)
 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		/* Setup the Base and Length of the Rx Descriptor Ring */
 		u64 bus_addr = rxr->rxdma.dma_paddr;
 		u32 rdt = adapter->num_rx_desc - 1; /* default */
 
 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
 		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
 		/* Setup the Head and Tail Descriptor Pointers */
 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
 #ifdef DEV_NETMAP
 		/*
 		 * an init() while a netmap client is active must
 		 * preserve the rx buffers passed to userspace.
 		 */
 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
 		}
 #endif /* DEV_NETMAP */
 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
 	}
 
 	/*
 	 * Set PTHRESH for improved jumbo performance
 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
 	 * Only write to RXDCTL(1) if there is a need for different
 	 * settings.
 	 */
 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
 	    (adapter->hw.mac.type == e1000_pch2lan) ||
 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
 	    (if_getmtu(ifp) > ETHERMTU)) {
 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
 	} else if (adapter->hw.mac.type == e1000_82574) {
 		for (int i = 0; i < adapter->num_queues; i++) {
 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
 
 			rxdctl |= 0x20; /* PTHRESH */
 			rxdctl |= 4 << 8; /* HTHRESH */
 			rxdctl |= 4 << 16;/* WTHRESH */
 			rxdctl |= 1 << 24; /* Switch to granularity */
 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
 		}
 	}
 		
 	if (adapter->hw.mac.type >= e1000_pch2lan) {
 		if (if_getmtu(ifp) > ETHERMTU)
 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
 		else
 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
 	}
 
         /* Make sure VLAN Filters are off */
         rctl &= ~E1000_RCTL_VFE;
 
 	if (adapter->rx_mbuf_sz == MCLBYTES)
 		rctl |= E1000_RCTL_SZ_2048;
 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
 
 	/* ensure we clear use DTYPE of 00 here */
 	rctl &= ~0x00000C00;
 	/* Write out the settings */
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  This routine executes in interrupt context. It replenishes
  *  the mbufs in the descriptor and sends data which has been
  *  dma'ed into host memory to upper layer.
  *
  *  We loop at most count times if count is > 0, or until done if
  *  count < 0.
  *  
  *  For polling we also now return the number of cleaned packets
  *********************************************************************/
 static bool
 em_rxeof(struct rx_ring *rxr, int count, int *done)
 {
 	struct adapter		*adapter = rxr->adapter;
 	if_t ifp = adapter->ifp;
 	struct mbuf		*mp, *sendmp;
 	u32			status = 0;
 	u16 			len;
 	int			i, processed, rxdone = 0;
 	bool			eop;
 	union e1000_rx_desc_extended	*cur;
 
 	EM_RX_LOCK(rxr);
 
 	/* Sync the ring */
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 
 #ifdef DEV_NETMAP
 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
 		EM_RX_UNLOCK(rxr);
 		return (FALSE);
 	}
 #endif /* DEV_NETMAP */
 
 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
 			break;
 
 		cur = &rxr->rx_base[i];
 		status = le32toh(cur->wb.upper.status_error);
 		mp = sendmp = NULL;
 
 		if ((status & E1000_RXD_STAT_DD) == 0)
 			break;
 
 		len = le16toh(cur->wb.upper.length);
 		eop = (status & E1000_RXD_STAT_EOP) != 0;
 
 		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
 		    (rxr->discard == TRUE)) {
 			adapter->dropped_pkts++;
 			++rxr->rx_discarded;
 			if (!eop) /* Catch subsequent segs */
 				rxr->discard = TRUE;
 			else
 				rxr->discard = FALSE;
 			em_rx_discard(rxr, i);
 			goto next_desc;
 		}
 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
 
 		/* Assign correct length to the current fragment */
 		mp = rxr->rx_buffers[i].m_head;
 		mp->m_len = len;
 
 		/* Trigger for refresh */
 		rxr->rx_buffers[i].m_head = NULL;
 
 		/* First segment? */
 		if (rxr->fmp == NULL) {
 			mp->m_pkthdr.len = len;
 			rxr->fmp = rxr->lmp = mp;
 		} else {
 			/* Chain mbuf's together */
 			mp->m_flags &= ~M_PKTHDR;
 			rxr->lmp->m_next = mp;
 			rxr->lmp = mp;
 			rxr->fmp->m_pkthdr.len += len;
 		}
 
 		if (eop) {
 			--count;
 			sendmp = rxr->fmp;
 			if_setrcvif(sendmp, ifp);
 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 			em_receive_checksum(status, sendmp);
 #ifndef __NO_STRICT_ALIGNMENT
 			if (adapter->hw.mac.max_frame_size >
 			    (MCLBYTES - ETHER_ALIGN) &&
 			    em_fixup_rx(rxr) != 0)
 				goto skip;
 #endif
 			if (status & E1000_RXD_STAT_VP) {
 				if_setvtag(sendmp, 
 				    le16toh(cur->wb.upper.vlan));
 				sendmp->m_flags |= M_VLANTAG;
 			}
 #ifndef __NO_STRICT_ALIGNMENT
 skip:
 #endif
 			rxr->fmp = rxr->lmp = NULL;
 		}
 next_desc:
 		/* Sync the ring */
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 		/* Zero out the receive descriptors status. */
 		cur->wb.upper.status_error &= htole32(~0xFF);
 		++rxdone;	/* cumulative for POLL */
 		++processed;
 
 		/* Advance our pointers to the next descriptor. */
 		if (++i == adapter->num_rx_desc)
 			i = 0;
 
 		/* Send to the stack */
 		if (sendmp != NULL) {
 			rxr->next_to_check = i;
 			EM_RX_UNLOCK(rxr);
 			if_input(ifp, sendmp);
 			EM_RX_LOCK(rxr);
 			i = rxr->next_to_check;
 		}
 
 		/* Only refresh mbufs every 8 descriptors */
 		if (processed == 8) {
 			em_refresh_mbufs(rxr, i);
 			processed = 0;
 		}
 	}
 
 	/* Catch any remaining refresh work */
 	if (e1000_rx_unrefreshed(rxr))
 		em_refresh_mbufs(rxr, i);
 
 	rxr->next_to_check = i;
 	if (done != NULL)
 		*done = rxdone;
 	EM_RX_UNLOCK(rxr);
 
 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
 }
 
 static __inline void
 em_rx_discard(struct rx_ring *rxr, int i)
 {
 	struct em_rxbuffer	*rbuf;
 
 	rbuf = &rxr->rx_buffers[i];
 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
 
 	/* Free any previous pieces */
 	if (rxr->fmp != NULL) {
 		rxr->fmp->m_flags |= M_PKTHDR;
 		m_freem(rxr->fmp);
 		rxr->fmp = NULL;
 		rxr->lmp = NULL;
 	}
 	/*
 	** Free buffer and allow em_refresh_mbufs()
 	** to clean up and recharge buffer.
 	*/
 	if (rbuf->m_head) {
 		m_free(rbuf->m_head);
 		rbuf->m_head = NULL;
 	}
 	return;
 }
 
 #ifndef __NO_STRICT_ALIGNMENT
 /*
  * When jumbo frames are enabled we should realign entire payload on
  * architecures with strict alignment. This is serious design mistake of 8254x
  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
  * payload. On architecures without strict alignment restrictions 8254x still
  * performs unaligned memory access which would reduce the performance too.
  * To avoid copying over an entire frame to align, we allocate a new mbuf and
  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
  * existing mbuf chain.
  *
  * Be aware, best performance of the 8254x is achived only when jumbo frame is
  * not used at all on architectures with strict alignment.
  */
 static int
 em_fixup_rx(struct rx_ring *rxr)
 {
 	struct adapter *adapter = rxr->adapter;
 	struct mbuf *m, *n;
 	int error;
 
 	error = 0;
 	m = rxr->fmp;
 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
 		m->m_data += ETHER_HDR_LEN;
 	} else {
 		MGETHDR(n, M_NOWAIT, MT_DATA);
 		if (n != NULL) {
 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
 			m->m_data += ETHER_HDR_LEN;
 			m->m_len -= ETHER_HDR_LEN;
 			n->m_len = ETHER_HDR_LEN;
 			M_MOVE_PKTHDR(n, m);
 			n->m_next = m;
 			rxr->fmp = n;
 		} else {
 			adapter->dropped_pkts++;
 			m_freem(rxr->fmp);
 			rxr->fmp = NULL;
 			error = ENOMEM;
 		}
 	}
 
 	return (error);
 }
 #endif
 
 static void
 em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
 {
 	rxd->read.buffer_addr = htole64(rxbuf->paddr);
 	/* DD bits must be cleared */
 	rxd->wb.upper.status_error= 0;
 }
 
 /*********************************************************************
  *
  *  Verify that the hardware indicated that the checksum is valid.
  *  Inform the stack about the status of checksum so that stack
  *  doesn't spend time verifying the checksum.
  *
  *********************************************************************/
 static void
 em_receive_checksum(uint32_t status, struct mbuf *mp)
 {
 	mp->m_pkthdr.csum_flags = 0;
 
 	/* Ignore Checksum bit is set */
 	if (status & E1000_RXD_STAT_IXSM)
 		return;
 
 	/* If the IP checksum exists and there is no IP Checksum error */
 	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
 		E1000_RXD_STAT_IPCS) {
 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
 	}
 
 	/* TCP or UDP checksum */
 	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
 	    E1000_RXD_STAT_TCPCS) {
 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 		mp->m_pkthdr.csum_data = htons(0xffff);
 	}
 	if (status & E1000_RXD_STAT_UDPCS) {
 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 		mp->m_pkthdr.csum_data = htons(0xffff);
 	}
 }
 
 /*
  * This routine is run via an vlan
  * config EVENT
  */
 static void
 em_register_vlan(void *arg, if_t ifp, u16 vtag)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	u32		index, bit;
 
 	if ((void*)adapter !=  arg)   /* Not our event */
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
                 return;
 
 	EM_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] |= (1 << bit);
 	++adapter->num_vlans;
 	/* Re-init to load the changes */
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 		em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 }
 
 /*
  * This routine is run via an vlan
  * unconfig EVENT
  */
 static void
 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	u32		index, bit;
 
 	if (adapter != arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
                 return;
 
 	EM_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] &= ~(1 << bit);
 	--adapter->num_vlans;
 	/* Re-init to load the changes */
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 		em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 }
 
 static void
 em_setup_vlan_hw_support(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32             reg;
 
 	/*
 	** We get here thru init_locked, meaning
 	** a soft reset, this has already cleared
 	** the VFTA and other state, so if there
 	** have been no vlan's registered do nothing.
 	*/
 	if (adapter->num_vlans == 0)
                 return;
 
 	/*
 	** A soft reset zero's out the VFTA, so
 	** we need to repopulate it now.
 	*/
 	for (int i = 0; i < EM_VFTA_SIZE; i++)
                 if (adapter->shadow_vfta[i] != 0)
 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
                             i, adapter->shadow_vfta[i]);
 
 	reg = E1000_READ_REG(hw, E1000_CTRL);
 	reg |= E1000_CTRL_VME;
 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
 
 	/* Enable the Filter Table */
 	reg = E1000_READ_REG(hw, E1000_RCTL);
 	reg &= ~E1000_RCTL_CFIEN;
 	reg |= E1000_RCTL_VFE;
 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
 }
 
 static void
 em_enable_intr(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 ims_mask = IMS_ENABLE_MASK;
 
 	if (hw->mac.type == e1000_82574) {
 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
 		ims_mask |= adapter->ims;
 	} 
 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
 }
 
 static void
 em_disable_intr(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 
 	if (hw->mac.type == e1000_82574)
 		E1000_WRITE_REG(hw, EM_EIAC, 0);
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 }
 
 /*
  * Bit of a misnomer, what this really means is
  * to enable OS management of the system... aka
  * to disable special hardware management features 
  */
 static void
 em_init_manageability(struct adapter *adapter)
 {
 	/* A shared code workaround */
 #define E1000_82542_MANC2H E1000_MANC2H
 	if (adapter->has_manage) {
 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
 
 		/* disable hardware interception of ARP */
 		manc &= ~(E1000_MANC_ARP_EN);
 
                 /* enable receiving management packets to the host */
 		manc |= E1000_MANC_EN_MNG2HOST;
 #define E1000_MNG2HOST_PORT_623 (1 << 5)
 #define E1000_MNG2HOST_PORT_664 (1 << 6)
 		manc2h |= E1000_MNG2HOST_PORT_623;
 		manc2h |= E1000_MNG2HOST_PORT_664;
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
 	}
 }
 
 /*
  * Give control back to hardware management
  * controller if there is one.
  */
 static void
 em_release_manageability(struct adapter *adapter)
 {
 	if (adapter->has_manage) {
 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
 
 		/* re-enable hardware interception of ARP */
 		manc |= E1000_MANC_ARP_EN;
 		manc &= ~E1000_MANC_EN_MNG2HOST;
 
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
 	}
 }
 
 /*
  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
  * For ASF and Pass Through versions of f/w this means
  * that the driver is loaded. For AMT version type f/w
  * this means that the network i/f is open.
  */
 static void
 em_get_hw_control(struct adapter *adapter)
 {
 	u32 ctrl_ext, swsm;
 
 	if (adapter->hw.mac.type == e1000_82573) {
 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
 		    swsm | E1000_SWSM_DRV_LOAD);
 		return;
 	}
 	/* else */
 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
 	return;
 }
 
 /*
  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
  * For ASF and Pass Through versions of f/w this means that
  * the driver is no longer loaded. For AMT versions of the
  * f/w this means that the network i/f is closed.
  */
 static void
 em_release_hw_control(struct adapter *adapter)
 {
 	u32 ctrl_ext, swsm;
 
 	if (!adapter->has_manage)
 		return;
 
 	if (adapter->hw.mac.type == e1000_82573) {
 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
 		    swsm & ~E1000_SWSM_DRV_LOAD);
 		return;
 	}
 	/* else */
 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
 	return;
 }
 
 static int
 em_is_valid_ether_addr(u8 *addr)
 {
 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
 
 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
 		return (FALSE);
 	}
 
 	return (TRUE);
 }
 
 /*
 ** Parse the interface capabilities with regard
 ** to both system management and wake-on-lan for
 ** later use.
 */
 static void
 em_get_wakeup(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	u16		eeprom_data = 0, device_id, apme_mask;
 
 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
 	apme_mask = EM_EEPROM_APME;
 
 	switch (adapter->hw.mac.type) {
 	case e1000_82573:
 	case e1000_82583:
 		adapter->has_amt = TRUE;
 		/* Falls thru */
 	case e1000_82571:
 	case e1000_82572:
 	case e1000_80003es2lan:
 		if (adapter->hw.bus.func == 1) {
 			e1000_read_nvm(&adapter->hw,
 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
 			break;
 		} else
 			e1000_read_nvm(&adapter->hw,
 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
 		break;
 	case e1000_ich8lan:
 	case e1000_ich9lan:
 	case e1000_ich10lan:
 	case e1000_pchlan:
 	case e1000_pch2lan:
 		apme_mask = E1000_WUC_APME;
 		adapter->has_amt = TRUE;
 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
 		break;
 	default:
 		e1000_read_nvm(&adapter->hw,
 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
 		break;
 	}
 	if (eeprom_data & apme_mask)
 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
 	/*
          * We have the eeprom settings, now apply the special cases
          * where the eeprom may be wrong or the board won't support
          * wake on lan on a particular port
 	 */
 	device_id = pci_get_device(dev);
         switch (device_id) {
 	case E1000_DEV_ID_82571EB_FIBER:
 		/* Wake events only supported on port A for dual fiber
 		 * regardless of eeprom setting */
 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
 		    E1000_STATUS_FUNC_1)
 			adapter->wol = 0;
 		break;
 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
                 /* if quad port adapter, disable WoL on all but port A */
 		if (global_quad_port_a != 0)
 			adapter->wol = 0;
 		/* Reset for multiple quad port adapters */
 		if (++global_quad_port_a == 4)
 			global_quad_port_a = 0;
                 break;
 	}
 	return;
 }
 
 
 /*
  * Enable PCI Wake On Lan capability
  */
 static void
 em_enable_wakeup(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	if_t ifp = adapter->ifp;
 	u32		pmc, ctrl, ctrl_ext, rctl;
 	u16     	status;
 
 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
 		return;
 
 	/* Advertise the wakeup capability */
 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
 
 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
 	    (adapter->hw.mac.type == e1000_pchlan) ||
 	    (adapter->hw.mac.type == e1000_ich9lan) ||
 	    (adapter->hw.mac.type == e1000_ich10lan))
 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
 
 	/* Keep the laser running on Fiber adapters */
 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
 	}
 
 	/*
 	** Determine type of Wakeup: note that wol
 	** is set with all bits on by default.
 	*/
 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
 		adapter->wol &= ~E1000_WUFC_MAG;
 
 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
 		adapter->wol &= ~E1000_WUFC_MC;
 	else {
 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		rctl |= E1000_RCTL_MPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
 	}
 
 	if ((adapter->hw.mac.type == e1000_pchlan) ||
 	    (adapter->hw.mac.type == e1000_pch2lan)) {
 		if (em_enable_phy_wakeup(adapter))
 			return;
 	} else {
 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
 	}
 
 	if (adapter->hw.phy.type == e1000_phy_igp_3)
 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
 
         /* Request PME */
         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
 	if (if_getcapenable(ifp) & IFCAP_WOL)
 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
 
 	return;
 }
 
 /*
 ** WOL in the newer chipset interfaces (pchlan)
 ** require thing to be copied into the phy
 */
 static int
 em_enable_phy_wakeup(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 mreg, ret = 0;
 	u16 preg;
 
 	/* copy MAC RARs to PHY RARs */
 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
 
 	/* copy MAC MTA to PHY MTA */
 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
 		    (u16)((mreg >> 16) & 0xFFFF));
 	}
 
 	/* configure PHY Rx Control register */
 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
 	mreg = E1000_READ_REG(hw, E1000_RCTL);
 	if (mreg & E1000_RCTL_UPE)
 		preg |= BM_RCTL_UPE;
 	if (mreg & E1000_RCTL_MPE)
 		preg |= BM_RCTL_MPE;
 	preg &= ~(BM_RCTL_MO_MASK);
 	if (mreg & E1000_RCTL_MO_3)
 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
 				<< BM_RCTL_MO_SHIFT);
 	if (mreg & E1000_RCTL_BAM)
 		preg |= BM_RCTL_BAM;
 	if (mreg & E1000_RCTL_PMCF)
 		preg |= BM_RCTL_PMCF;
 	mreg = E1000_READ_REG(hw, E1000_CTRL);
 	if (mreg & E1000_CTRL_RFCE)
 		preg |= BM_RCTL_RFCE;
 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
 
 	/* enable PHY wakeup in MAC register */
 	E1000_WRITE_REG(hw, E1000_WUC,
 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
 
 	/* configure and enable PHY wakeup in PHY registers */
 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
 
 	/* activate PHY wakeup */
 	ret = hw->phy.ops.acquire(hw);
 	if (ret) {
 		printf("Could not acquire PHY\n");
 		return ret;
 	}
 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
 	if (ret) {
 		printf("Could not read PHY page 769\n");
 		goto out;
 	}
 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
 	if (ret)
 		printf("Could not set PHY Host Wakeup bit\n");
 out:
 	hw->phy.ops.release(hw);
 
 	return ret;
 }
 
 static void
 em_led_func(void *arg, int onoff)
 {
 	struct adapter	*adapter = arg;
  
 	EM_CORE_LOCK(adapter);
 	if (onoff) {
 		e1000_setup_led(&adapter->hw);
 		e1000_led_on(&adapter->hw);
 	} else {
 		e1000_led_off(&adapter->hw);
 		e1000_cleanup_led(&adapter->hw);
 	}
 	EM_CORE_UNLOCK(adapter);
 }
 
 /*
 ** Disable the L0S and L1 LINK states
 */
 static void
 em_disable_aspm(struct adapter *adapter)
 {
 	int		base, reg;
 	u16		link_cap,link_ctrl;
 	device_t	dev = adapter->dev;
 
 	switch (adapter->hw.mac.type) {
 		case e1000_82573:
 		case e1000_82574:
 		case e1000_82583:
 			break;
 		default:
 			return;
 	}
 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
 		return;
 	reg = base + PCIER_LINK_CAP;
 	link_cap = pci_read_config(dev, reg, 2);
 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
 		return;
 	reg = base + PCIER_LINK_CTL;
 	link_ctrl = pci_read_config(dev, reg, 2);
 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
 	pci_write_config(dev, reg, link_ctrl, 2);
 	return;
 }
 
 /**********************************************************************
  *
  *  Update the board statistics counters.
  *
  **********************************************************************/
 static void
 em_update_stats_counters(struct adapter *adapter)
 {
 
 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
 	}
 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
 
 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
 
 	/* For the 64-bit byte counters the low dword must be read first. */
 	/* Both registers clear on the read of the high dword */
 
 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
 
 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
 
 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
 
 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
 
 	/* Interrupt Counts */
 
 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
 
 	if (adapter->hw.mac.type >= e1000_82543) {
 		adapter->stats.algnerrc += 
 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
 		adapter->stats.rxerrc += 
 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
 		adapter->stats.tncrs += 
 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
 		adapter->stats.cexterr += 
 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
 		adapter->stats.tsctc += 
 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
 		adapter->stats.tsctfc += 
 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
 	}
 }
 
 static uint64_t
 em_get_counter(if_t ifp, ift_counter cnt)
 {
 	struct adapter *adapter;
 
 	adapter = if_getsoftc(ifp);
 
 	switch (cnt) {
 	case IFCOUNTER_COLLISIONS:
 		return (adapter->stats.colc);
 	case IFCOUNTER_IERRORS:
 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
 		    adapter->stats.ruc + adapter->stats.roc +
 		    adapter->stats.mpc + adapter->stats.cexterr);
 	case IFCOUNTER_OERRORS:
 		return (adapter->stats.ecol + adapter->stats.latecol +
 		    adapter->watchdog_events);
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 
 /* Export a single 32-bit register via a read-only sysctl. */
 static int
 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	u_int val;
 
 	adapter = oidp->oid_arg1;
 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
 	return (sysctl_handle_int(oidp, &val, 0, req));
 }
 
 /*
  * Add sysctl variables, one per statistic, to the system.
  */
 static void
 em_add_hw_stats(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	struct tx_ring *txr = adapter->tx_rings;
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
 	struct e1000_hw_stats *stats = &adapter->stats;
 
 	struct sysctl_oid *stat_node, *queue_node, *int_node;
 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
 
 #define QUEUE_NAME_LEN 32
 	char namebuf[QUEUE_NAME_LEN];
 	
 	/* Driver Statistics */
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
 			CTLFLAG_RD, &adapter->dropped_pkts,
 			"Driver dropped packets");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
 			CTLFLAG_RD, &adapter->link_irq,
 			"Link MSIX IRQ Handled");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 
 			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
 			 "Defragmenting mbuf chain failed");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
 			"Driver tx dma failure in xmit");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
 			CTLFLAG_RD, &adapter->rx_overruns,
 			"RX overruns");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
 			CTLFLAG_RD, &adapter->watchdog_events,
 			"Watchdog timeouts");
 	
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
 			em_sysctl_reg_handler, "IU",
 			"Device Control Register");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
 			em_sysctl_reg_handler, "IU",
 			"Receiver Control Register");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
 			"Flow Control High Watermark");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
 			"Flow Control Low Watermark");
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 					    CTLFLAG_RD, NULL, "TX Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
 				E1000_TDH(txr->me),
 				em_sysctl_reg_handler, "IU",
  				"Transmit Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
 				E1000_TDT(txr->me),
 				em_sysctl_reg_handler, "IU",
  				"Transmit Descriptor Tail");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
 				CTLFLAG_RD, &txr->tx_irq,
 				"Queue MSI-X Transmit Interrupts");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
 				CTLFLAG_RD, &txr->no_desc_avail,
 				"Queue No Descriptor Available");
 
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 					    CTLFLAG_RD, NULL, "RX Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
 				E1000_RDH(rxr->me),
 				em_sysctl_reg_handler, "IU",
 				"Receive Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
 				E1000_RDT(rxr->me),
 				em_sysctl_reg_handler, "IU",
 				"Receive Descriptor Tail");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
 				CTLFLAG_RD, &rxr->rx_irq,
 				"Queue MSI-X Receive Interrupts");
 	}
 
 	/* MAC stats get their own sub node */
 
 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
 				    CTLFLAG_RD, NULL, "Statistics");
 	stat_list = SYSCTL_CHILDREN(stat_node);
 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
 			CTLFLAG_RD, &stats->ecol,
 			"Excessive collisions");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
 			CTLFLAG_RD, &stats->scc,
 			"Single collisions");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
 			CTLFLAG_RD, &stats->mcc,
 			"Multiple collisions");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
 			CTLFLAG_RD, &stats->latecol,
 			"Late collisions");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
 			CTLFLAG_RD, &stats->colc,
 			"Collision Count");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
 			CTLFLAG_RD, &adapter->stats.symerrs,
 			"Symbol Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
 			CTLFLAG_RD, &adapter->stats.sec,
 			"Sequence Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
 			CTLFLAG_RD, &adapter->stats.dc,
 			"Defer Count");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
 			CTLFLAG_RD, &adapter->stats.mpc,
 			"Missed Packets");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
 			CTLFLAG_RD, &adapter->stats.rnbc,
 			"Receive No Buffers");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
 			CTLFLAG_RD, &adapter->stats.ruc,
 			"Receive Undersize");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
 			CTLFLAG_RD, &adapter->stats.rfc,
 			"Fragmented Packets Received ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
 			CTLFLAG_RD, &adapter->stats.roc,
 			"Oversized Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
 			CTLFLAG_RD, &adapter->stats.rjc,
 			"Recevied Jabber");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
 			CTLFLAG_RD, &adapter->stats.rxerrc,
 			"Receive Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
 			CTLFLAG_RD, &adapter->stats.crcerrs,
 			"CRC errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
 			CTLFLAG_RD, &adapter->stats.algnerrc,
 			"Alignment Errors");
 	/* On 82575 these are collision counts */
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
 			CTLFLAG_RD, &adapter->stats.cexterr,
 			"Collision/Carrier extension errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
 			CTLFLAG_RD, &adapter->stats.xonrxc,
 			"XON Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
 			CTLFLAG_RD, &adapter->stats.xontxc,
 			"XON Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
 			CTLFLAG_RD, &adapter->stats.xoffrxc,
 			"XOFF Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
 			CTLFLAG_RD, &adapter->stats.xofftxc,
 			"XOFF Transmitted");
 
 	/* Packet Reception Stats */
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.tpr,
 			"Total Packets Received ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.gprc,
 			"Good Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.bprc,
 			"Broadcast Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.mprc,
 			"Multicast Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
 			CTLFLAG_RD, &adapter->stats.prc64,
 			"64 byte frames received ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
 			CTLFLAG_RD, &adapter->stats.prc127,
 			"65-127 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
 			CTLFLAG_RD, &adapter->stats.prc255,
 			"128-255 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
 			CTLFLAG_RD, &adapter->stats.prc511,
 			"256-511 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
 			CTLFLAG_RD, &adapter->stats.prc1023,
 			"512-1023 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
 			CTLFLAG_RD, &adapter->stats.prc1522,
 			"1023-1522 byte frames received");
  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
  			CTLFLAG_RD, &adapter->stats.gorc, 
  			"Good Octets Received"); 
 
 	/* Packet Transmission Stats */
  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
  			CTLFLAG_RD, &adapter->stats.gotc, 
  			"Good Octets Transmitted"); 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.tpt,
 			"Total Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.gptc,
 			"Good Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.bptc,
 			"Broadcast Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.mptc,
 			"Multicast Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
 			CTLFLAG_RD, &adapter->stats.ptc64,
 			"64 byte frames transmitted ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
 			CTLFLAG_RD, &adapter->stats.ptc127,
 			"65-127 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
 			CTLFLAG_RD, &adapter->stats.ptc255,
 			"128-255 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
 			CTLFLAG_RD, &adapter->stats.ptc511,
 			"256-511 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
 			CTLFLAG_RD, &adapter->stats.ptc1023,
 			"512-1023 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
 			CTLFLAG_RD, &adapter->stats.ptc1522,
 			"1024-1522 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
 			CTLFLAG_RD, &adapter->stats.tsctc,
 			"TSO Contexts Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
 			CTLFLAG_RD, &adapter->stats.tsctfc,
 			"TSO Contexts Failed");
 
 
 	/* Interrupt Stats */
 
 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
 	int_list = SYSCTL_CHILDREN(int_node);
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
 			CTLFLAG_RD, &adapter->stats.iac,
 			"Interrupt Assertion Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
 			CTLFLAG_RD, &adapter->stats.icrxptc,
 			"Interrupt Cause Rx Pkt Timer Expire Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
 			CTLFLAG_RD, &adapter->stats.icrxatc,
 			"Interrupt Cause Rx Abs Timer Expire Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
 			CTLFLAG_RD, &adapter->stats.ictxptc,
 			"Interrupt Cause Tx Pkt Timer Expire Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
 			CTLFLAG_RD, &adapter->stats.ictxatc,
 			"Interrupt Cause Tx Abs Timer Expire Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
 			CTLFLAG_RD, &adapter->stats.ictxqec,
 			"Interrupt Cause Tx Queue Empty Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
 			"Interrupt Cause Tx Queue Min Thresh Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
 			"Interrupt Cause Rx Desc Min Thresh Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
 			CTLFLAG_RD, &adapter->stats.icrxoc,
 			"Interrupt Cause Receiver Overrun Count");
 }
 
 /**********************************************************************
  *
  *  This routine provides a way to dump out the adapter eeprom,
  *  often a useful debug/service tool. This only dumps the first
  *  32 words, stuff that matters is in that extent.
  *
  **********************************************************************/
 static int
 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *)arg1;
 	int error;
 	int result;
 
 	result = -1;
 	error = sysctl_handle_int(oidp, &result, 0, req);
 
 	if (error || !req->newptr)
 		return (error);
 
 	/*
 	 * This value will cause a hex dump of the
 	 * first 32 16-bit words of the EEPROM to
 	 * the screen.
 	 */
 	if (result == 1)
 		em_print_nvm_info(adapter);
 
 	return (error);
 }
 
 static void
 em_print_nvm_info(struct adapter *adapter)
 {
 	u16	eeprom_data;
 	int	i, j, row = 0;
 
 	/* Its a bit crude, but it gets the job done */
 	printf("\nInterface EEPROM Dump:\n");
 	printf("Offset\n0x0000  ");
 	for (i = 0, j = 0; i < 32; i++, j++) {
 		if (j == 8) { /* Make the offset block */
 			j = 0; ++row;
 			printf("\n0x00%x0  ",row);
 		}
 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
 		printf("%04x ", eeprom_data);
 	}
 	printf("\n");
 }
 
 static int
 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
 {
 	struct em_int_delay_info *info;
 	struct adapter *adapter;
 	u32 regval;
 	int error, usecs, ticks;
 
 	info = (struct em_int_delay_info *)arg1;
 	usecs = info->value;
 	error = sysctl_handle_int(oidp, &usecs, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
 		return (EINVAL);
 	info->value = usecs;
 	ticks = EM_USECS_TO_TICKS(usecs);
 	if (info->offset == E1000_ITR)	/* units are 256ns here */
 		ticks *= 4;
 
 	adapter = info->adapter;
 	
 	EM_CORE_LOCK(adapter);
 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
 	regval = (regval & ~0xffff) | (ticks & 0xffff);
 	/* Handle a few special cases. */
 	switch (info->offset) {
 	case E1000_RDTR:
 		break;
 	case E1000_TIDV:
 		if (ticks == 0) {
 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
 			/* Don't write 0 into the TIDV register. */
 			regval++;
 		} else
 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
 		break;
 	}
 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
 	EM_CORE_UNLOCK(adapter);
 	return (0);
 }
 
 static void
 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
 	const char *description, struct em_int_delay_info *info,
 	int offset, int value)
 {
 	info->adapter = adapter;
 	info->offset = offset;
 	info->value = value;
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
 	    info, 0, em_sysctl_int_delay, "I", description);
 }
 
 static void
 em_set_sysctl_value(struct adapter *adapter, const char *name,
 	const char *description, int *limit, int value)
 {
 	*limit = value;
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
 }
 
 
 /*
 ** Set flow control using sysctl:
 ** Flow control values:
 **      0 - off
 **      1 - rx pause
 **      2 - tx pause
 **      3 - full
 */
 static int
 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
 {       
         int		error;
 	static int	input = 3; /* default is full */
         struct adapter	*adapter = (struct adapter *) arg1;
                     
         error = sysctl_handle_int(oidp, &input, 0, req);
     
         if ((error) || (req->newptr == NULL))
                 return (error);
                 
 	if (input == adapter->fc) /* no change? */
 		return (error);
 
         switch (input) {
                 case e1000_fc_rx_pause:
                 case e1000_fc_tx_pause:
                 case e1000_fc_full:
                 case e1000_fc_none:
                         adapter->hw.fc.requested_mode = input;
 			adapter->fc = input;
                         break;
                 default:
 			/* Do nothing */
 			return (error);
         }
 
         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
         e1000_force_mac_fc(&adapter->hw);
         return (error);
 }
 
 /*
 ** Manage Energy Efficient Ethernet:
 ** Control values:
 **     0/1 - enabled/disabled
 */
 static int
 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
 {
        struct adapter *adapter = (struct adapter *) arg1;
        int             error, value;
 
        value = adapter->hw.dev_spec.ich8lan.eee_disable;
        error = sysctl_handle_int(oidp, &value, 0, req);
        if (error || req->newptr == NULL)
                return (error);
        EM_CORE_LOCK(adapter);
        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
        em_init_locked(adapter);
        EM_CORE_UNLOCK(adapter);
        return (0);
 }
 
 static int
 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	int error;
 	int result;
 
 	result = -1;
 	error = sysctl_handle_int(oidp, &result, 0, req);
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (result == 1) {
 		adapter = (struct adapter *)arg1;
 		em_print_debug_info(adapter);
         }
 
 	return (error);
 }
 
 /*
 ** This routine is meant to be fluid, add whatever is
 ** needed for debugging a problem.  -jfv
 */
 static void
 em_print_debug_info(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	struct tx_ring *txr = adapter->tx_rings;
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
 		printf("Interface is RUNNING ");
 	else
 		printf("Interface is NOT RUNNING\n");
 
 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
 		printf("and INACTIVE\n");
 	else
 		printf("and ACTIVE\n");
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
 		device_printf(dev, "TX Queue %d ------\n", i);
 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
 		device_printf(dev, "TX descriptors avail = %d\n",
 	    		txr->tx_avail);
 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
 	    		txr->no_desc_avail);
 		device_printf(dev, "RX Queue %d ------\n", i);
 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
 		device_printf(dev, "RX discarded packets = %ld\n",
 	    		rxr->rx_discarded);
 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
 	}
 }
 
 #ifdef EM_MULTIQUEUE
 /*
  * 82574 only:
  * Write a new value to the EEPROM increasing the number of MSIX
  * vectors from 3 to 5, for proper multiqueue support.
  */
 static void
 em_enable_vectors_82574(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	device_t dev = adapter->dev;
 	u16 edata;
 
 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
 	printf("Current cap: %#06x\n", edata);
 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
 		device_printf(dev, "Writing to eeprom: increasing "
 		    "reported MSIX vectors from 3 to 5...\n");
 		edata &= ~(EM_NVM_MSIX_N_MASK);
 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
 		e1000_update_nvm_checksum(hw);
 		device_printf(dev, "Writing to eeprom: done\n");
 	}
 }
 #endif
 
 #ifdef DDB
 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
 {
 	devclass_t	dc;
 	int max_em;
 
 	dc = devclass_find("em");
 	max_em = devclass_get_maxunit(dc);
 
 	for (int index = 0; index < (max_em - 1); index++) {
 		device_t dev;
 		dev = devclass_get_device(dc, index);
 		if (device_get_driver(dev) == &em_driver) {
 			struct adapter *adapter = device_get_softc(dev);
 			EM_CORE_LOCK(adapter);
 			em_init_locked(adapter);
 			EM_CORE_UNLOCK(adapter);
 		}
 	}
 }
 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
 {
 	devclass_t	dc;
 	int max_em;
 
 	dc = devclass_find("em");
 	max_em = devclass_get_maxunit(dc);
 
 	for (int index = 0; index < (max_em - 1); index++) {
 		device_t dev;
 		dev = devclass_get_device(dc, index);
 		if (device_get_driver(dev) == &em_driver)
 			em_print_debug_info(device_get_softc(dev));
 	}
 
 }
 #endif
Index: user/alc/PQ_LAUNDRY/sys/dev/gpio/gpioled.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/gpio/gpioled.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/gpio/gpioled.c	(revision 303642)
@@ -1,249 +1,254 @@
 /*-
  * Copyright (c) 2009 Oleksandr Tymoshenko <gonzo@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_platform.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/gpio.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/ofw_bus.h>
 #endif
 
 #include <dev/gpio/gpiobusvar.h>
 #include <dev/led/led.h>
 
 #include "gpiobus_if.h"
 
 /*
  * Only one pin for led
  */
 #define	GPIOLED_PIN	0
 
 #define	GPIOLED_LOCK(_sc)		mtx_lock(&(_sc)->sc_mtx)
 #define	GPIOLED_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_mtx)
 #define	GPIOLED_LOCK_INIT(_sc)		mtx_init(&(_sc)->sc_mtx,	\
     device_get_nameunit((_sc)->sc_dev), "gpioled", MTX_DEF)
 #define	GPIOLED_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->sc_mtx)
 
 struct gpioled_softc 
 {
 	device_t	sc_dev;
 	device_t	sc_busdev;
 	struct mtx	sc_mtx;
 	struct cdev	*sc_leddev;
+	int		sc_invert;
 };
 
 static void gpioled_control(void *, int);
 static int gpioled_probe(device_t);
 static int gpioled_attach(device_t);
 static int gpioled_detach(device_t);
 
 static void 
 gpioled_control(void *priv, int onoff)
 {
 	struct gpioled_softc *sc;
 
 	sc = (struct gpioled_softc *)priv;
 	GPIOLED_LOCK(sc);
 	if (GPIOBUS_PIN_SETFLAGS(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
 	    GPIO_PIN_OUTPUT) == 0) {
+		if (sc->sc_invert)
+			onoff = !onoff;
 		GPIOBUS_PIN_SET(sc->sc_busdev, sc->sc_dev, GPIOLED_PIN,
 		    onoff ? GPIO_PIN_HIGH : GPIO_PIN_LOW);
 	}
 	GPIOLED_UNLOCK(sc);
 }
 
 #ifdef FDT
 static void
 gpioled_identify(driver_t *driver, device_t bus)
 {
 	phandle_t child, leds, root;
 
 	root = OF_finddevice("/");
 	if (root == 0)
 		return;
 	for (leds = OF_child(root); leds != 0; leds = OF_peer(leds)) {
 		if (!fdt_is_compatible_strict(leds, "gpio-leds"))
 			continue;
 		/* Traverse the 'gpio-leds' node and add its children. */
 		for (child = OF_child(leds); child != 0; child = OF_peer(child)) {
 			if (!OF_hasprop(child, "gpios"))
 				continue;
 			if (ofw_gpiobus_add_fdt_child(bus, driver->name, child) == NULL)
 				continue;
 		}
 	}
 }
 #endif
 
 static int
 gpioled_probe(device_t dev)
 {
 #ifdef FDT
 	int match;
 	phandle_t node;
 	char *compat;
 
 	/*
 	 * We can match against our own node compatible string and also against
 	 * our parent node compatible string.  The first is normally used to
 	 * describe leds on a gpiobus and the later when there is a common node
 	 * compatible with 'gpio-leds' which is used to concentrate all the
 	 * leds nodes on the dts.
 	 */
 	match = 0;
 	if (ofw_bus_is_compatible(dev, "gpioled"))
 		match = 1;
 
 	if (match == 0) {
 		if ((node = ofw_bus_get_node(dev)) == -1)
 			return (ENXIO);
 		if ((node = OF_parent(node)) == -1)
 			return (ENXIO);
 		if (OF_getprop_alloc(node, "compatible", 1,
 		    (void **)&compat) == -1)
 			return (ENXIO);
 
 		if (strcasecmp(compat, "gpio-leds") == 0)
 			match = 1;
 
 		OF_prop_free(compat);
 	}
 
 	if (match == 0)
 		return (ENXIO);
 #endif
 	device_set_desc(dev, "GPIO led");
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 gpioled_attach(device_t dev)
 {
 	struct gpioled_softc *sc;
 	int state;
 #ifdef FDT
 	phandle_t node;
 	char *default_state;
 	char *name;
 #else
 	const char *name;
 #endif
 
 	sc = device_get_softc(dev);
 	sc->sc_dev = dev;
 	sc->sc_busdev = device_get_parent(dev);
 	GPIOLED_LOCK_INIT(sc);
 
 	state = 0;
 
 #ifdef FDT
 	if ((node = ofw_bus_get_node(dev)) == -1)
 		return (ENXIO);
 
 	if (OF_getprop_alloc(node, "default-state",
 	    sizeof(char), (void **)&default_state) != -1) {
 		if (strcasecmp(default_state, "on") == 0)
 			state = 1;
 		else if (strcasecmp(default_state, "off") == 0)
 			state = 0;
 		else if (strcasecmp(default_state, "keep") == 0)
 			state = -1;
 		else {
 			device_printf(dev,
 			    "unknown value for default-state in FDT\n");
 		}
 		OF_prop_free(default_state);
 	}
 
 	name = NULL;
 	if (OF_getprop_alloc(node, "label", 1, (void **)&name) == -1)
 		OF_getprop_alloc(node, "name", 1, (void **)&name);
 #else
 	if (resource_string_value(device_get_name(dev), 
 	    device_get_unit(dev), "name", &name))
 		name = NULL;
+	resource_int_value(device_get_name(dev),
+	    device_get_unit(dev), "invert", &sc->sc_invert);
 #endif
 
 	sc->sc_leddev = led_create_state(gpioled_control, sc, name ? name :
 	    device_get_nameunit(dev), state);
 #ifdef FDT
 	if (name != NULL)
 		OF_prop_free(name);
 #endif
 
 	return (0);
 }
 
 static int
 gpioled_detach(device_t dev)
 {
 	struct gpioled_softc *sc;
 
 	sc = device_get_softc(dev);
 	if (sc->sc_leddev) {
 		led_destroy(sc->sc_leddev);
 		sc->sc_leddev = NULL;
 	}
 	GPIOLED_LOCK_DESTROY(sc);
 	return (0);
 }
 
 static devclass_t gpioled_devclass;
 
 static device_method_t gpioled_methods[] = {
 	/* Device interface */
 #ifdef FDT
 	DEVMETHOD(device_identify,	gpioled_identify),
 #endif
 	DEVMETHOD(device_probe,		gpioled_probe),
 	DEVMETHOD(device_attach,	gpioled_attach),
 	DEVMETHOD(device_detach,	gpioled_detach),
 
 	DEVMETHOD_END
 };
 
 static driver_t gpioled_driver = {
 	"gpioled",
 	gpioled_methods,
 	sizeof(struct gpioled_softc),
 };
 
 DRIVER_MODULE(gpioled, gpiobus, gpioled_driver, gpioled_devclass, 0, 0);
 MODULE_DEPEND(gpioled, gpiobus, 1, 1, 1);
Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/include/vmbus.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/hyperv/include/vmbus.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/include/vmbus.h	(revision 303642)
@@ -1,162 +1,159 @@
 /*-
  * Copyright (c) 2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMBUS_H_
 #define _VMBUS_H_
 
 #include <sys/param.h>
 #include <sys/bus.h>
 
 /*
  * VMBUS version is 32 bit, upper 16 bit for major_number and lower
  * 16 bit for minor_number.
  *
  * 0.13  --  Windows Server 2008
  * 1.1   --  Windows 7
  * 2.4   --  Windows 8
  * 3.0   --  Windows 8.1
  */
 #define VMBUS_VERSION_WS2008		((0 << 16) | (13))
 #define VMBUS_VERSION_WIN7		((1 << 16) | (1))
 #define VMBUS_VERSION_WIN8		((2 << 16) | (4))
 #define VMBUS_VERSION_WIN8_1		((3 << 16) | (0))
 
 #define VMBUS_VERSION_MAJOR(ver)	(((uint32_t)(ver)) >> 16)
 #define VMBUS_VERSION_MINOR(ver)	(((uint32_t)(ver)) & 0xffff)
 
 /*
  * GPA stuffs.
  */
 struct vmbus_gpa_range {
 	uint32_t	gpa_len;
 	uint32_t	gpa_ofs;
 	uint64_t	gpa_page[0];
 } __packed;
 
 /* This is actually vmbus_gpa_range.gpa_page[1] */
 struct vmbus_gpa {
 	uint32_t	gpa_len;
 	uint32_t	gpa_ofs;
 	uint64_t	gpa_page;
 } __packed;
 
 #define VMBUS_CHANPKT_SIZE_SHIFT	3
 
 #define VMBUS_CHANPKT_GETLEN(pktlen)	\
 	(((int)(pktlen)) << VMBUS_CHANPKT_SIZE_SHIFT)
 
 struct vmbus_chanpkt_hdr {
 	uint16_t	cph_type;	/* VMBUS_CHANPKT_TYPE_ */
 	uint16_t	cph_hlen;	/* header len, in 8 bytes */
 	uint16_t	cph_tlen;	/* total len, in 8 bytes */
 	uint16_t	cph_flags;	/* VMBUS_CHANPKT_FLAG_ */
 	uint64_t	cph_xactid;
 } __packed;
 
 #define VMBUS_CHANPKT_TYPE_INBAND	0x0006
 #define VMBUS_CHANPKT_TYPE_RXBUF	0x0007
 #define VMBUS_CHANPKT_TYPE_GPA		0x0009
 #define VMBUS_CHANPKT_TYPE_COMP		0x000b
 
 #define VMBUS_CHANPKT_FLAG_RC		0x0001	/* report completion */
 
 #define VMBUS_CHANPKT_CONST_DATA(pkt)		\
 	(const void *)((const uint8_t *)(pkt) +	\
 	VMBUS_CHANPKT_GETLEN((pkt)->cph_hlen))
 
 struct vmbus_rxbuf_desc {
 	uint32_t	rb_len;
 	uint32_t	rb_ofs;
 } __packed;
 
 struct vmbus_chanpkt_rxbuf {
 	struct vmbus_chanpkt_hdr cp_hdr;
 	uint16_t	cp_rxbuf_id;
 	uint16_t	cp_rsvd;
 	uint32_t	cp_rxbuf_cnt;
 	struct vmbus_rxbuf_desc cp_rxbuf[];
 } __packed;
 
-#define VMBUS_CHAN_SGLIST_MAX		32
-#define VMBUS_CHAN_PRPLIST_MAX		32
-
 struct vmbus_channel;
 struct hyperv_guid;
 
 typedef void	(*vmbus_chan_callback_t)(struct vmbus_channel *, void *);
 
 static __inline struct vmbus_channel *
 vmbus_get_channel(device_t dev)
 {
 	return device_get_ivars(dev);
 }
 
 int		vmbus_chan_open(struct vmbus_channel *chan,
 		    int txbr_size, int rxbr_size, const void *udata, int udlen,
 		    vmbus_chan_callback_t cb, void *cbarg);
 void		vmbus_chan_close(struct vmbus_channel *chan);
 
 int		vmbus_chan_gpadl_connect(struct vmbus_channel *chan,
 		    bus_addr_t paddr, int size, uint32_t *gpadl);
 int		vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan,
 		    uint32_t gpadl);
 
 void		vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu);
 void		vmbus_chan_cpu_rr(struct vmbus_channel *chan);
 struct vmbus_channel *
 		vmbus_chan_cpu2chan(struct vmbus_channel *chan, int cpu);
 void		vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on);
 
 struct vmbus_channel **
 		vmbus_subchan_get(struct vmbus_channel *pri_chan,
 		    int subchan_cnt);
 void		vmbus_subchan_rel(struct vmbus_channel **subchan,
 		    int subchan_cnt);
 void		vmbus_subchan_drain(struct vmbus_channel *pri_chan);
 
 int		vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen,
 		    uint64_t *xactid);
 int		vmbus_chan_recv_pkt(struct vmbus_channel *chan,
 		    struct vmbus_chanpkt_hdr *pkt, int *pktlen);
 
 int		vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
 		    uint16_t flags, void *data, int dlen, uint64_t xactid);
 int		vmbus_chan_send_sglist(struct vmbus_channel *chan,
 		    struct vmbus_gpa sg[], int sglen, void *data, int dlen,
 		    uint64_t xactid);
 int		vmbus_chan_send_prplist(struct vmbus_channel *chan,
 		    struct vmbus_gpa_range *prp, int prp_cnt, void *data,
 		    int dlen, uint64_t xactid);
 
 uint32_t	vmbus_chan_id(const struct vmbus_channel *chan);
 uint32_t	vmbus_chan_subidx(const struct vmbus_channel *chan);
 bool		vmbus_chan_is_primary(const struct vmbus_channel *chan);
 const struct hyperv_guid *
 		vmbus_chan_guid_inst(const struct vmbus_channel *chan);
 
 #endif	/* !_VMBUS_H_ */
Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_net_vsc.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_net_vsc.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_net_vsc.h	(revision 303642)
@@ -1,1283 +1,1284 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * HyperV vmbus (virtual machine bus) network VSC (virtual services client)
  * header file
  *
  * (Updated from unencumbered NvspProtocol.h)
  */
 
 #ifndef __HV_NET_VSC_H__
 #define __HV_NET_VSC_H__
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/taskqueue.h>
 #include <sys/sema.h>
 #include <sys/sx.h>
 
 #include <machine/bus.h>
 #include <sys/bus.h>
 #include <sys/bus_dma.h>
 
 #include <netinet/in.h>
 #include <netinet/tcp_lro.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_media.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/include/vmbus.h>
 
 #define HN_USE_TXDESC_BUFRING
 
 MALLOC_DECLARE(M_NETVSC);
 
 #define NVSP_INVALID_PROTOCOL_VERSION           (0xFFFFFFFF)
 
 #define NVSP_PROTOCOL_VERSION_1                 2
 #define NVSP_PROTOCOL_VERSION_2                 0x30002
 #define NVSP_PROTOCOL_VERSION_4                 0x40000
 #define NVSP_PROTOCOL_VERSION_5                 0x50000
 #define NVSP_MIN_PROTOCOL_VERSION               (NVSP_PROTOCOL_VERSION_1)
 #define NVSP_MAX_PROTOCOL_VERSION               (NVSP_PROTOCOL_VERSION_2)
 
 #define NVSP_PROTOCOL_VERSION_CURRENT           NVSP_PROTOCOL_VERSION_2
 
 #define VERSION_4_OFFLOAD_SIZE                  22
 
 #define NVSP_OPERATIONAL_STATUS_OK              (0x00000000)
 #define NVSP_OPERATIONAL_STATUS_DEGRADED        (0x00000001)
 #define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE  (0x00000002)
 #define NVSP_OPERATIONAL_STATUS_NO_CONTACT      (0x00000003)
 #define NVSP_OPERATIONAL_STATUS_LOST_COMMUNICATION (0x00000004)
 
 /*
  * Maximun number of transfer pages (packets) the VSP will use on a receive
  */
 #define NVSP_MAX_PACKETS_PER_RECEIVE            375
 
 /* vRSS stuff */
 #define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES      0x88
 #define RNDIS_OBJECT_TYPE_RSS_PARAMETERS        0x89
 
 #define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2     2
 #define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2       2
 
 struct rndis_obj_header {
         uint8_t type;
         uint8_t rev;
         uint16_t size;
 } __packed;
 
 /* rndis_recv_scale_cap/cap_flag */
 #define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS      0x01000000
 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR            0x02000000
 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC            0x04000000
 #define RNDIS_RSS_CAPS_USING_MSI_X                      0x08000000
 #define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS           0x10000000
 #define RNDIS_RSS_CAPS_SUPPORTS_MSI_X                   0x20000000
 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4               0x00000100
 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6               0x00000200
 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX            0x00000400
 
 /* RNDIS_RECEIVE_SCALE_CAPABILITIES */
 struct rndis_recv_scale_cap {
         struct rndis_obj_header hdr;
         uint32_t cap_flag;
         uint32_t num_int_msg;
         uint32_t num_recv_que;
         uint16_t num_indirect_tabent;
 } __packed;
 
 /* rndis_recv_scale_param flags */
 #define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED         0x0001
 #define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED        0x0002
 #define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED           0x0004
 #define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED         0x0008
 #define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS                0x0010
 
 /* Hash info bits */
 #define RNDIS_HASH_FUNC_TOEPLITZ                0x00000001
 #define RNDIS_HASH_IPV4                         0x00000100
 #define RNDIS_HASH_TCP_IPV4                     0x00000200
 #define RNDIS_HASH_IPV6                         0x00000400
 #define RNDIS_HASH_IPV6_EX                      0x00000800
 #define RNDIS_HASH_TCP_IPV6                     0x00001000
 #define RNDIS_HASH_TCP_IPV6_EX                  0x00002000
 
 #define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
 #define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
 
 #define ITAB_NUM                                        128
 #define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
 
 /* RNDIS_RECEIVE_SCALE_PARAMETERS */
 typedef struct rndis_recv_scale_param_ {
         struct rndis_obj_header hdr;
 
         /* Qualifies the rest of the information */
         uint16_t flag;
 
         /* The base CPU number to do receive processing. not used */
         uint16_t base_cpu_number;
 
         /* This describes the hash function and type being enabled */
         uint32_t hashinfo;
 
         /* The size of indirection table array */
         uint16_t indirect_tabsize;
 
         /* The offset of the indirection table from the beginning of this
          * structure
          */
         uint32_t indirect_taboffset;
 
         /* The size of the hash secret key */
         uint16_t hashkey_size;
 
         /* The offset of the secret key from the beginning of this structure */
         uint32_t hashkey_offset;
 
         uint32_t processor_masks_offset;
         uint32_t num_processor_masks;
         uint32_t processor_masks_entry_size;
 } rndis_recv_scale_param;
 
 typedef enum nvsp_msg_type_ {
 	nvsp_msg_type_none                      = 0,
 
 	/*
 	 * Init Messages
 	 */
 	nvsp_msg_type_init                      = 1,
 	nvsp_msg_type_init_complete             = 2,
 
 	nvsp_version_msg_start                  = 100,
 
 	/*
 	 * Version 1 Messages
 	 */
 	nvsp_msg_1_type_send_ndis_vers          = nvsp_version_msg_start,
 
 	nvsp_msg_1_type_send_rx_buf,
 	nvsp_msg_1_type_send_rx_buf_complete,
 	nvsp_msg_1_type_revoke_rx_buf,
 
 	nvsp_msg_1_type_send_send_buf,
 	nvsp_msg_1_type_send_send_buf_complete,
 	nvsp_msg_1_type_revoke_send_buf,
 
 	nvsp_msg_1_type_send_rndis_pkt,
 	nvsp_msg_1_type_send_rndis_pkt_complete,
     
 	/*
 	 * Version 2 Messages
 	 */
 	nvsp_msg_2_type_send_chimney_delegated_buf,
 	nvsp_msg_2_type_send_chimney_delegated_buf_complete,
 	nvsp_msg_2_type_revoke_chimney_delegated_buf,
 
 	nvsp_msg_2_type_resume_chimney_rx_indication,
 
 	nvsp_msg_2_type_terminate_chimney,
 	nvsp_msg_2_type_terminate_chimney_complete,
 
 	nvsp_msg_2_type_indicate_chimney_event,
 
 	nvsp_msg_2_type_send_chimney_packet,
 	nvsp_msg_2_type_send_chimney_packet_complete,
 
 	nvsp_msg_2_type_post_chimney_rx_request,
 	nvsp_msg_2_type_post_chimney_rx_request_complete,
 
 	nvsp_msg_2_type_alloc_rx_buf,
 	nvsp_msg_2_type_alloc_rx_buf_complete,
 
 	nvsp_msg_2_type_free_rx_buf,
 
 	nvsp_msg_2_send_vmq_rndis_pkt,
 	nvsp_msg_2_send_vmq_rndis_pkt_complete,
 
 	nvsp_msg_2_type_send_ndis_config,
 
 	nvsp_msg_2_type_alloc_chimney_handle,
 	nvsp_msg_2_type_alloc_chimney_handle_complete,
 
 	nvsp_msg2_max = nvsp_msg_2_type_alloc_chimney_handle_complete,
 
 	/*
 	 * Version 4 Messages
 	 */
 	nvsp_msg4_type_send_vf_association,
 	nvsp_msg4_type_switch_data_path,
 	nvsp_msg4_type_uplink_connect_state_deprecated,
 
 	nvsp_msg4_max = nvsp_msg4_type_uplink_connect_state_deprecated,
 
 	/*
 	 * Version 5 Messages
 	 */
 	nvsp_msg5_type_oid_query_ex,
 	nvsp_msg5_type_oid_query_ex_comp,
 	nvsp_msg5_type_subchannel,
 	nvsp_msg5_type_send_indirection_table,
 
 	nvsp_msg5_max = nvsp_msg5_type_send_indirection_table,
 } nvsp_msg_type;
 
 typedef enum nvsp_status_ {
 	nvsp_status_none = 0,
 	nvsp_status_success,
 	nvsp_status_failure,
 	/* Deprecated */
 	nvsp_status_prot_vers_range_too_new,
 	/* Deprecated */
 	nvsp_status_prot_vers_range_too_old,
 	nvsp_status_invalid_rndis_pkt,
 	nvsp_status_busy,
 	nvsp_status_max,
 } nvsp_status;
 
 typedef struct nvsp_msg_hdr_ {
 	uint32_t                                msg_type;
 } __packed nvsp_msg_hdr;
 
 /*
  * Init Messages
  */
 
 /*
  * This message is used by the VSC to initialize the channel
  * after the channels has been opened. This message should 
  * never include anything other then versioning (i.e. this
  * message will be the same for ever).
  *
  * Forever is a long time.  The values have been redefined
  * in Win7 to indicate major and minor protocol version
  * number.
  */
 typedef struct nvsp_msg_init_ {
 	union {
 		struct {
 			uint16_t                minor_protocol_version;
 			uint16_t                major_protocol_version;
 		} s;
 		/* Formerly min_protocol_version */
 		uint32_t                        protocol_version;
 	} p1;
 	/* Formerly max_protocol_version */
 	uint32_t                                protocol_version_2;
 } __packed nvsp_msg_init;
 
 /*
  * This message is used by the VSP to complete the initialization
  * of the channel. This message should never include anything other 
  * then versioning (i.e. this message will be the same forever).
  */
 typedef struct nvsp_msg_init_complete_ {
 	/* Deprecated */
 	uint32_t                                negotiated_prot_vers;
 	uint32_t                                max_mdl_chain_len;
 	uint32_t                                status;
 } __packed nvsp_msg_init_complete;
 
 typedef union nvsp_msg_init_uber_ {
 	nvsp_msg_init                           init;
 	nvsp_msg_init_complete                  init_compl;
 } __packed nvsp_msg_init_uber;
 
 /*
  * Version 1 Messages
  */
 
 /*
  * This message is used by the VSC to send the NDIS version
  * to the VSP.  The VSP can use this information when handling
  * OIDs sent by the VSC.
  */
 typedef struct nvsp_1_msg_send_ndis_version_ {
 	uint32_t                                ndis_major_vers;
 	/* Deprecated */
 	uint32_t                                ndis_minor_vers;
 } __packed nvsp_1_msg_send_ndis_version;
 
 /*
  * This message is used by the VSC to send a receive buffer
  * to the VSP. The VSP can then use the receive buffer to
  * send data to the VSC.
  */
 typedef struct nvsp_1_msg_send_rx_buf_ {
 	uint32_t                                gpadl_handle;
 	uint16_t                                id;
 } __packed nvsp_1_msg_send_rx_buf;
 
 typedef struct nvsp_1_rx_buf_section_ {
 	uint32_t                                offset;
 	uint32_t                                sub_allocation_size;
 	uint32_t                                num_sub_allocations;
 	uint32_t                                end_offset;
 } __packed nvsp_1_rx_buf_section;
 
 /*
  * This message is used by the VSP to acknowledge a receive 
  * buffer send by the VSC.  This message must be sent by the 
  * VSP before the VSP uses the receive buffer.
  */
 typedef struct nvsp_1_msg_send_rx_buf_complete_ {
 	uint32_t                                status;
 	uint32_t                                num_sections;
 
 	/*
 	 * The receive buffer is split into two parts, a large
 	 * suballocation section and a small suballocation
 	 * section. These sections are then suballocated by a 
 	 * certain size.
 	 *
 	 * For example, the following break up of the receive
 	 * buffer has 6 large suballocations and 10 small
 	 * suballocations.
 	 *
 	 * |            Large Section          |  |   Small Section   |
 	 * ------------------------------------------------------------
 	 * |     |     |     |     |     |     |  | | | | | | | | | | |
 	 * |                                      |  
 	 * LargeOffset                            SmallOffset
 	 */
 	nvsp_1_rx_buf_section                   sections[1];
 
 } __packed nvsp_1_msg_send_rx_buf_complete;
 
 /*
  * This message is sent by the VSC to revoke the receive buffer.
  * After the VSP completes this transaction, the VSP should never
  * use the receive buffer again.
  */
 typedef struct nvsp_1_msg_revoke_rx_buf_ {
 	uint16_t                                id;
 } __packed nvsp_1_msg_revoke_rx_buf;
 
 /*
  * This message is used by the VSC to send a send buffer
  * to the VSP. The VSC can then use the send buffer to
  * send data to the VSP.
  */
 typedef struct nvsp_1_msg_send_send_buf_ {
 	uint32_t                                gpadl_handle;
 	uint16_t                                id;
 } __packed nvsp_1_msg_send_send_buf;
 
 /*
  * This message is used by the VSP to acknowledge a send 
  * buffer sent by the VSC. This message must be sent by the 
  * VSP before the VSP uses the sent buffer.
  */
 typedef struct nvsp_1_msg_send_send_buf_complete_ {
 	uint32_t                                status;
 
 	/*
 	 * The VSC gets to choose the size of the send buffer and
 	 * the VSP gets to choose the sections size of the buffer.
 	 * This was done to enable dynamic reconfigurations when
 	 * the cost of GPA-direct buffers decreases.
 	 */
 	uint32_t                                section_size;
 } __packed nvsp_1_msg_send_send_buf_complete;
 
 /*
  * This message is sent by the VSC to revoke the send buffer.
  * After the VSP completes this transaction, the vsp should never
  * use the send buffer again.
  */
 typedef struct nvsp_1_msg_revoke_send_buf_ {
 	uint16_t                                id;
 } __packed nvsp_1_msg_revoke_send_buf;
 
 /*
  * This message is used by both the VSP and the VSC to send
  * an RNDIS message to the opposite channel endpoint.
  */
 typedef struct nvsp_1_msg_send_rndis_pkt_ {
 	/*
 	 * This field is specified by RNIDS.  They assume there's
 	 * two different channels of communication. However, 
 	 * the Network VSP only has one.  Therefore, the channel
 	 * travels with the RNDIS packet.
 	 */
 	uint32_t                                chan_type;
 
 	/*
 	 * This field is used to send part or all of the data
 	 * through a send buffer. This values specifies an 
 	 * index into the send buffer.  If the index is 
 	 * 0xFFFFFFFF, then the send buffer is not being used
 	 * and all of the data was sent through other VMBus
 	 * mechanisms.
 	 */
 	uint32_t                                send_buf_section_idx;
 	uint32_t                                send_buf_section_size;
 } __packed nvsp_1_msg_send_rndis_pkt;
 
 /*
  * This message is used by both the VSP and the VSC to complete
  * a RNDIS message to the opposite channel endpoint.  At this
  * point, the initiator of this message cannot use any resources
  * associated with the original RNDIS packet.
  */
 typedef struct nvsp_1_msg_send_rndis_pkt_complete_ {
 	uint32_t                                status;
 } __packed nvsp_1_msg_send_rndis_pkt_complete;
 
 
 /*
  * Version 2 Messages
  */
 
 /*
  * This message is used by the VSC to send the NDIS version
  * to the VSP.  The VSP can use this information when handling
  * OIDs sent by the VSC.
  */
 typedef struct nvsp_2_netvsc_capabilities_ {
 	union {
 		uint64_t                        as_uint64;
 		struct {
 			uint64_t                vmq           : 1;
 			uint64_t                chimney       : 1;
 			uint64_t                sriov         : 1;
 			uint64_t                ieee8021q     : 1;
 			uint64_t                correlationid : 1;
 			uint64_t                teaming       : 1;
 		} u2;
 	} u1;
 } __packed nvsp_2_netvsc_capabilities;
 
 typedef struct nvsp_2_msg_send_ndis_config_ {
 	uint32_t                                mtu;
 	uint32_t                                reserved;
 	nvsp_2_netvsc_capabilities              capabilities;
 } __packed nvsp_2_msg_send_ndis_config;
 
 /*
  * NvspMessage2TypeSendChimneyDelegatedBuffer
  */
 typedef struct nvsp_2_msg_send_chimney_buf_
 {
 	/*
 	 * On WIN7 beta, delegated_obj_max_size is defined as a uint32_t
 	 * Since WIN7 RC, it was split into two uint16_t.  To have the same
 	 * struct layout, delegated_obj_max_size shall be the first field.
 	 */
 	uint16_t                                delegated_obj_max_size;
 
 	/*
 	 * The revision # of chimney protocol used between NVSC and NVSP.
 	 *
 	 * This revision is NOT related to the chimney revision between
 	 * NDIS protocol and miniport drivers.
 	 */
 	uint16_t                                revision;
 
 	uint32_t                                gpadl_handle;
 } __packed nvsp_2_msg_send_chimney_buf;
 
 
 /* Unsupported chimney revision 0 (only present in WIN7 beta) */
 #define NVSP_CHIMNEY_REVISION_0                 0
 
 /* WIN7 Beta Chimney QFE */
 #define NVSP_CHIMNEY_REVISION_1                 1
 
 /* The chimney revision since WIN7 RC */
 #define NVSP_CHIMNEY_REVISION_2                 2
 
 
 /*
  * NvspMessage2TypeSendChimneyDelegatedBufferComplete
  */
 typedef struct nvsp_2_msg_send_chimney_buf_complete_ {
 	uint32_t                                status;
 
 	/*
 	 * Maximum number outstanding sends and pre-posted receives.
 	 *
 	 * NVSC should not post more than SendQuota/ReceiveQuota packets.
 	 * Otherwise, it can block the non-chimney path for an indefinite
 	 * amount of time.
 	 * (since chimney sends/receives are affected by the remote peer).
 	 *
 	 * Note: NVSP enforces the quota restrictions on a per-VMBCHANNEL
 	 * basis.  It doesn't enforce the restriction separately for chimney
 	 * send/receive.  If NVSC doesn't voluntarily enforce "SendQuota",
 	 * it may kill its own network connectivity.
 	 */
 	uint32_t                                send_quota;
 	uint32_t                                rx_quota;
 } __packed nvsp_2_msg_send_chimney_buf_complete;
 
 /*
  * NvspMessage2TypeRevokeChimneyDelegatedBuffer
  */
 typedef struct nvsp_2_msg_revoke_chimney_buf_ {
 	uint32_t                                gpadl_handle;
 } __packed nvsp_2_msg_revoke_chimney_buf;
 
 
 #define NVSP_CHIMNEY_OBJECT_TYPE_NEIGHBOR       0
 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH4          1
 #define NVSP_CHIMNEY_OBJECT_TYPE_PATH6          2
 #define NVSP_CHIMNEY_OBJECT_TYPE_TCP            3
 
 /*
  * NvspMessage2TypeAllocateChimneyHandle
  */
 typedef struct nvsp_2_msg_alloc_chimney_handle_ {
 	uint64_t                                vsc_context;
 	uint32_t                                object_type;
 } __packed nvsp_2_msg_alloc_chimney_handle;
 
 /*
  * NvspMessage2TypeAllocateChimneyHandleComplete
  */
 typedef struct nvsp_2_msg_alloc_chimney_handle_complete_ {
 	uint32_t                                vsp_handle;
 } __packed nvsp_2_msg_alloc_chimney_handle_complete;
 
 
 /*
  * NvspMessage2TypeResumeChimneyRXIndication
  */
 typedef struct nvsp_2_msg_resume_chimney_rx_indication {
 	/*
 	 * Handle identifying the offloaded connection
 	 */
 	uint32_t                                vsp_tcp_handle;
 } __packed nvsp_2_msg_resume_chimney_rx_indication;
 
 
 #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_FIRST_STAGE      (0x01u)
 #define NVSP_2_MSG_TERMINATE_CHIMNEY_FLAGS_RESERVED         (~(0x01u))
 
 /*
  * NvspMessage2TypeTerminateChimney
  */
 typedef struct nvsp_2_msg_terminate_chimney_ {
 	/*
 	* Handle identifying the offloaded object
 	*/
 	uint32_t                                vsp_handle;
 
 	/*
 	 * Terminate Offload Flags
 	 *     Bit 0:
 	 *         When set to 0, terminate the offload at the destination NIC
 	 *     Bit 1-31:  Reserved, shall be zero
 	 */
 	uint32_t                                flags;
 
 	union {
 		/*
 		 * This field is valid only when bit 0 of flags is clear.
 		 * It specifies the index into the premapped delegated
 		 * object buffer.  The buffer was sent through the
 		 * NvspMessage2TypeSendChimneyDelegatedBuffer
 		 * message at initialization time.
 		 *
 		 * NVSP will write the delegated state into the delegated
 		 * buffer upon upload completion.
 		 */
 		uint32_t                        index;
 
 		/*
 		 * This field is valid only when bit 0 of flags is set.
 		 *
 		 * The seqence number of the most recently accepted RX
 		 * indication when VSC sets its TCP context into
 		 * "terminating" state.
 		 *
 		 * This allows NVSP to determines if there are any in-flight
 		 * RX indications for which the acceptance state is still
 		 * undefined.
 		 */
 		uint64_t                        last_accepted_rx_seq_no;
 	} f0;
 } __packed nvsp_2_msg_terminate_chimney;
 
 
 #define NVSP_TERMINATE_CHIMNEY_COMPLETE_FLAG_DATA_CORRUPTED     0x0000001u
 
 /*
  * NvspMessage2TypeTerminateChimneyComplete
  */
 typedef struct nvsp_2_msg_terminate_chimney_complete_ {
 	uint64_t                                vsc_context;
 	uint32_t                                flags;
 } __packed nvsp_2_msg_terminate_chimney_complete;
 
 /*
  * NvspMessage2TypeIndicateChimneyEvent
  */
 typedef struct nvsp_2_msg_indicate_chimney_event_ {
 	/*
 	 * When VscTcpContext is 0, event_type is an NDIS_STATUS event code
 	 * Otherwise, EventType is an TCP connection event (defined in
 	 * NdisTcpOffloadEventHandler chimney DDK document).
 	 */
 	uint32_t                                event_type;
 
 	/*
 	 * When VscTcpContext is 0, EventType is an NDIS_STATUS event code
 	 * Otherwise, EventType is an TCP connection event specific information
 	 * (defined in NdisTcpOffloadEventHandler chimney DDK document).
 	 */
 	uint32_t                                event_specific_info;
 
 	/*
 	 * If not 0, the event is per-TCP connection event.  This field
 	 * contains the VSC's TCP context.
 	 * If 0, the event indication is global.
 	 */
 	uint64_t                                vsc_tcp_context;
 } __packed nvsp_2_msg_indicate_chimney_event;
 
 
 #define NVSP_1_CHIMNEY_SEND_INVALID_OOB_INDEX       0xffffu
 #define NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX   0xffffffff
 
 /*
  * NvspMessage2TypeSendChimneyPacket
  */
 typedef struct nvsp_2_msg_send_chimney_pkt_ {
     /*
      * Identify the TCP connection for which this chimney send is
      */
     uint32_t                                    vsp_tcp_handle;
 
     /*
      * This field is used to send part or all of the data
      * through a send buffer. This values specifies an
      * index into the send buffer. If the index is
      * 0xFFFF, then the send buffer is not being used
      * and all of the data was sent through other VMBus
      * mechanisms.
      */
     uint16_t                                    send_buf_section_index;
     uint16_t                                    send_buf_section_size;
 
     /*
      * OOB Data Index
      * This an index to the OOB data buffer. If the index is 0xFFFFFFFF,
      * then there is no OOB data.
      *
      * This field shall be always 0xFFFFFFFF for now. It is reserved for
      * the future.
      */
     uint16_t                                    oob_data_index;
 
     /*
      * DisconnectFlags = 0
      *      Normal chimney send. See MiniportTcpOffloadSend for details.
      *
      * DisconnectFlags = TCP_DISCONNECT_GRACEFUL_CLOSE (0x01)
      *      Graceful disconnect. See MiniportTcpOffloadDisconnect for details.
      *
      * DisconnectFlags = TCP_DISCONNECT_ABORTIVE_CLOSE (0x02)
      *      Abortive disconnect. See MiniportTcpOffloadDisconnect for details.
      */
     uint16_t                                    disconnect_flags;
 
     uint32_t                                    seq_no;
 } __packed nvsp_2_msg_send_chimney_pkt;
 
 /*
  * NvspMessage2TypeSendChimneyPacketComplete
  */
 typedef struct nvsp_2_msg_send_chimney_pkt_complete_ {
     /*
      * The NDIS_STATUS for the chimney send
      */
     uint32_t                                    status;
 
     /*
      * Number of bytes that have been sent to the peer (and ACKed by the peer).
      */
     uint32_t                                    bytes_transferred;
 } __packed nvsp_2_msg_send_chimney_pkt_complete;
 
 
 #define NVSP_1_CHIMNEY_RECV_FLAG_NO_PUSH        0x0001u
 #define NVSP_1_CHIMNEY_RECV_INVALID_OOB_INDEX   0xffffu
 
 /*
  * NvspMessage2TypePostChimneyRecvRequest
  */
 typedef struct nvsp_2_msg_post_chimney_rx_request_ {
 	/*
 	 * Identify the TCP connection which this chimney receive request
 	 * is for.
 	 */
 	uint32_t                                vsp_tcp_handle;
 
 	/*
 	 * OOB Data Index
 	 * This an index to the OOB data buffer. If the index is 0xFFFFFFFF,
 	 * then there is no OOB data.
 	 *
 	 * This field shall be always 0xFFFFFFFF for now. It is reserved for
 	 * the future.
 	 */
 	uint32_t                                oob_data_index;
 
 	/*
 	 * Bit 0
 	 *      When it is set, this is a "no-push" receive.
 	 *      When it is clear, this is a "push" receive.
 	 *
 	 * Bit 1-15:  Reserved and shall be zero
 	 */
 	uint16_t                                flags;
 
 	/*
 	 * For debugging and diagnoses purpose.
 	 * The SeqNo is per TCP connection and starts from 0.
 	 */
 	uint32_t                                seq_no;
 } __packed nvsp_2_msg_post_chimney_rx_request;
 
 /*
  * NvspMessage2TypePostChimneyRecvRequestComplete
  */
 typedef struct nvsp_2_msg_post_chimney_rx_request_complete_ {
 	/*
 	 * The NDIS_STATUS for the chimney send
 	 */
 	uint32_t                                status;
 
 	/*
 	 * Number of bytes that have been sent to the peer (and ACKed by
 	 * the peer).
 	 */
 	uint32_t                                bytes_xferred;
 } __packed nvsp_2_msg_post_chimney_rx_request_complete;
 
 /*
  * NvspMessage2TypeAllocateReceiveBuffer
  */
 typedef struct nvsp_2_msg_alloc_rx_buf_ {
 	/*
 	 * Allocation ID to match the allocation request and response
 	 */
 	uint32_t                                allocation_id;
 
 	/*
 	 * Length of the VM shared memory receive buffer that needs to
 	 * be allocated
 	 */
 	uint32_t                                length;
 } __packed nvsp_2_msg_alloc_rx_buf;
 
 /*
  * NvspMessage2TypeAllocateReceiveBufferComplete
  */
 typedef struct nvsp_2_msg_alloc_rx_buf_complete_ {
 	/*
 	 * The NDIS_STATUS code for buffer allocation
 	 */
 	uint32_t                                status;
 
 	/*
 	 * Allocation ID from NVSP_2_MESSAGE_ALLOCATE_RECEIVE_BUFFER
 	 */
 	uint32_t                                allocation_id;
 
 	/*
 	 * GPADL handle for the allocated receive buffer
 	 */
 	uint32_t                                gpadl_handle;
 
 	/*
 	 * Receive buffer ID that is further used in
 	 * NvspMessage2SendVmqRndisPacket
 	 */
 	uint64_t                                rx_buf_id;
 } __packed nvsp_2_msg_alloc_rx_buf_complete;
 
 /*
  * NvspMessage2TypeFreeReceiveBuffer
  */
 typedef struct nvsp_2_msg_free_rx_buf_ {
 	/*
 	 * Receive buffer ID previous returned in
 	 * NvspMessage2TypeAllocateReceiveBufferComplete message
 	 */
 	uint64_t                                rx_buf_id;
 } __packed nvsp_2_msg_free_rx_buf;
 
 /*
  * This structure is used in defining the buffers in
  * NVSP_2_MESSAGE_SEND_VMQ_RNDIS_PACKET structure
  */
 typedef struct nvsp_xfer_page_range_ {
 	/*
 	 * Specifies the ID of the receive buffer that has the buffer. This
 	 * ID can be the general receive buffer ID specified in
 	 * NvspMessage1TypeSendReceiveBuffer or it can be the shared memory
 	 * receive buffer ID allocated by the VSC and specified in
 	 * NvspMessage2TypeAllocateReceiveBufferComplete message
 	 */
 	uint64_t                                xfer_page_set_id;
 
 	/*
 	 * Number of bytes
 	 */
 	uint32_t                                byte_count;
 
 	/*
 	 * Offset in bytes from the beginning of the buffer
 	 */
 	uint32_t                                byte_offset;
 } __packed nvsp_xfer_page_range;
 
 /*
  * NvspMessage2SendVmqRndisPacket
  */
 typedef struct nvsp_2_msg_send_vmq_rndis_pkt_ {
 	/*
 	 * This field is specified by RNIDS. They assume there's
 	 * two different channels of communication. However,
 	 * the Network VSP only has one. Therefore, the channel
 	 * travels with the RNDIS packet. It must be RMC_DATA
 	 */
 	uint32_t                                channel_type;
 
 	/*
 	 * Only the Range element corresponding to the RNDIS header of
 	 * the first RNDIS message in the multiple RNDIS messages sent
 	 * in one NVSP message.  Information about the data portions as well
 	 * as the subsequent RNDIS messages in the same NVSP message are
 	 * embedded in the RNDIS header itself
 	 */
 	nvsp_xfer_page_range                    range;
 } __packed nvsp_2_msg_send_vmq_rndis_pkt;
 
 /*
  * This message is used by the VSC to complete
  * a RNDIS VMQ message to the VSP.  At this point,
  * the initiator of this message can use any resources
  * associated with the original RNDIS VMQ packet.
  */
 typedef struct nvsp_2_msg_send_vmq_rndis_pkt_complete_
 {
 	uint32_t                                status;
 } __packed nvsp_2_msg_send_vmq_rndis_pkt_complete;
 
 /*
  * Version 5 messages
  */
 enum nvsp_subchannel_operation {
         NVSP_SUBCHANNEL_NONE = 0,
         NVSP_SUBCHANNE_ALLOCATE,
         NVSP_SUBCHANNE_MAX
 };
 
 typedef struct nvsp_5_subchannel_request_
 {
         uint32_t                                op;
         uint32_t                                num_subchannels;
 } __packed nvsp_5_subchannel_request;
 
 typedef struct nvsp_5_subchannel_complete_
 {
         uint32_t                                status;
         /* Actual number of subchannels allocated */
         uint32_t                                num_subchannels;
 } __packed nvsp_5_subchannel_complete;
 
 typedef struct nvsp_5_send_indirect_table_
 {
         /* The number of entries in the send indirection table */
         uint32_t                                count;
         /*
          * The offset of the send indireciton table from top of
          * this struct. The send indirection table tells which channel
          * to put the send traffic on. Each entry is a channel number.
          */
         uint32_t                                offset;
 } __packed nvsp_5_send_indirect_table;
 
 typedef union nvsp_1_msg_uber_ {
 	nvsp_1_msg_send_ndis_version            send_ndis_vers;
 
 	nvsp_1_msg_send_rx_buf                  send_rx_buf;
 	nvsp_1_msg_send_rx_buf_complete         send_rx_buf_complete;
 	nvsp_1_msg_revoke_rx_buf                revoke_rx_buf;
 
 	nvsp_1_msg_send_send_buf                send_send_buf;
 	nvsp_1_msg_send_send_buf_complete       send_send_buf_complete;
 	nvsp_1_msg_revoke_send_buf              revoke_send_buf;
 
 	nvsp_1_msg_send_rndis_pkt               send_rndis_pkt;
 	nvsp_1_msg_send_rndis_pkt_complete      send_rndis_pkt_complete;
 } __packed nvsp_1_msg_uber;
 
 
 typedef union nvsp_2_msg_uber_ {
 	nvsp_2_msg_send_ndis_config             send_ndis_config;
 
 	nvsp_2_msg_send_chimney_buf             send_chimney_buf;
 	nvsp_2_msg_send_chimney_buf_complete    send_chimney_buf_complete;
 	nvsp_2_msg_revoke_chimney_buf           revoke_chimney_buf;
 
 	nvsp_2_msg_resume_chimney_rx_indication resume_chimney_rx_indication;
 	nvsp_2_msg_terminate_chimney            terminate_chimney;
 	nvsp_2_msg_terminate_chimney_complete   terminate_chimney_complete;
 	nvsp_2_msg_indicate_chimney_event       indicate_chimney_event;
 
 	nvsp_2_msg_send_chimney_pkt             send_chimney_packet;
 	nvsp_2_msg_send_chimney_pkt_complete    send_chimney_packet_complete;
 	nvsp_2_msg_post_chimney_rx_request      post_chimney_rx_request;
 	nvsp_2_msg_post_chimney_rx_request_complete
 	                                       post_chimney_rx_request_complete;
 
 	nvsp_2_msg_alloc_rx_buf                 alloc_rx_buffer;
 	nvsp_2_msg_alloc_rx_buf_complete        alloc_rx_buffer_complete;
 	nvsp_2_msg_free_rx_buf                  free_rx_buffer;
 
 	nvsp_2_msg_send_vmq_rndis_pkt           send_vmq_rndis_pkt;
 	nvsp_2_msg_send_vmq_rndis_pkt_complete  send_vmq_rndis_pkt_complete;
 	nvsp_2_msg_alloc_chimney_handle         alloc_chimney_handle;
 	nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete;
 } __packed nvsp_2_msg_uber;
 
 typedef union nvsp_5_msg_uber_
 {
         nvsp_5_subchannel_request               subchannel_request;
         nvsp_5_subchannel_complete              subchn_complete;
         nvsp_5_send_indirect_table              send_table;
 } __packed nvsp_5_msg_uber;
 
 typedef union nvsp_all_msgs_ {
 	nvsp_msg_init_uber                      init_msgs;
 	nvsp_1_msg_uber                         vers_1_msgs;
 	nvsp_2_msg_uber                         vers_2_msgs;
 	nvsp_5_msg_uber				vers_5_msgs;
 } __packed nvsp_all_msgs;
 
 /*
  * ALL Messages
  */
 typedef struct nvsp_msg_ {
 	nvsp_msg_hdr                            hdr; 
 	nvsp_all_msgs                           msgs;
 } __packed nvsp_msg;
 
 
 /*
  * The following arguably belongs in a separate header file
  */
 
 /*
  * Defines
  */
 
 #define NETVSC_SEND_BUFFER_SIZE			(1024*1024*15)   /* 15M */
 #define NETVSC_SEND_BUFFER_ID			0xface
 
 #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY	(1024*1024*15) /* 15MB */
 #define NETVSC_RECEIVE_BUFFER_SIZE		(1024*1024*16) /* 16MB */
 
 #define NETVSC_RECEIVE_BUFFER_ID		0xcafe
 
 #define NETVSC_RECEIVE_SG_COUNT			1
 
 /* Preallocated receive packets */
 #define NETVSC_RECEIVE_PACKETLIST_COUNT		256
 
 /*
  * Maximum MTU we permit to be configured for a netvsc interface.
  * When the code was developed, a max MTU of 12232 was tested and
  * proven to work.  9K is a reasonable maximum for an Ethernet.
  */
 #define NETVSC_MAX_CONFIGURABLE_MTU		(9 * 1024)
 
 #define NETVSC_PACKET_SIZE			PAGE_SIZE
 #define VRSS_SEND_TABLE_SIZE			16
 
 /*
  * Data types
  */
 
 /*
  * Per netvsc channel-specific
  */
 typedef struct netvsc_dev_ {
 	struct hn_softc				*sc;
 
 	/* Send buffer allocated by us but manages by NetVSP */
 	void					*send_buf;
 	uint32_t				send_buf_size;
 	uint32_t				send_buf_gpadl_handle;
 	uint32_t				send_section_size;
 	uint32_t				send_section_count;
 	unsigned long				bitsmap_words;
 	unsigned long				*send_section_bitsmap;
 
 	/* Receive buffer allocated by us but managed by NetVSP */
 	void					*rx_buf;
 	uint32_t				rx_buf_size;
 	uint32_t				rx_buf_gpadl_handle;
 	uint32_t				rx_section_count;
 	nvsp_1_rx_buf_section			*rx_sections;
 
 	/* Used for NetVSP initialization protocol */
 	struct sema				channel_init_sema;
 	nvsp_msg				channel_init_packet;
 
 	nvsp_msg				revoke_packet;
 	/*uint8_t				hw_mac_addr[ETHER_ADDR_LEN];*/
 
 	/* Holds rndis device info */
 	void					*extension;
 
 	uint8_t					destroy;
 	/* Negotiated NVSP version */
 	uint32_t				nvsp_version;
 
 	uint32_t                                num_channel;
 
 	struct hyperv_dma			rxbuf_dma;
 	struct hyperv_dma			txbuf_dma;
 	uint32_t                                vrss_send_table[VRSS_SEND_TABLE_SIZE];
 } netvsc_dev;
 
 struct vmbus_channel;
 
 typedef void (*pfn_on_send_rx_completion)(struct vmbus_channel *, void *);
 
 #define NETVSC_DEVICE_RING_BUFFER_SIZE	(128 * PAGE_SIZE)
+#define NETVSC_PACKET_MAXPAGE		32
 
 #define NETVSC_VLAN_PRIO_MASK		0xe000
 #define NETVSC_VLAN_PRIO_SHIFT		13
 #define NETVSC_VLAN_VID_MASK		0x0fff
 
 #define TYPE_IPV4			2
 #define TYPE_IPV6			4
 #define TYPE_TCP			2
 #define TYPE_UDP			4
 
 #define TRANSPORT_TYPE_NOT_IP		0
 #define TRANSPORT_TYPE_IPV4_TCP		((TYPE_IPV4 << 16) | TYPE_TCP)
 #define TRANSPORT_TYPE_IPV4_UDP		((TYPE_IPV4 << 16) | TYPE_UDP)
 #define TRANSPORT_TYPE_IPV6_TCP		((TYPE_IPV6 << 16) | TYPE_TCP)
 #define TRANSPORT_TYPE_IPV6_UDP		((TYPE_IPV6 << 16) | TYPE_UDP)
 
 #ifdef __LP64__
 #define BITS_PER_LONG 64
 #else
 #define BITS_PER_LONG 32
 #endif
 
 typedef struct netvsc_packet_ {
 	uint8_t                    is_data_pkt;      /* One byte */
 	uint16_t		   vlan_tci;
 	uint32_t status;
 
 	/* Completion */
 	union {
 		struct {
 			uint64_t   rx_completion_tid;
 			void	   *rx_completion_context;
 			/* This is no longer used */
 			pfn_on_send_rx_completion   on_rx_completion;
 		} rx;
 		struct {
 			uint64_t    send_completion_tid;
 			void	    *send_completion_context;
 			/* Still used in netvsc and filter code */
 			pfn_on_send_rx_completion   on_send_completion;
 		} send;
 	} compl;
 	uint32_t	send_buf_section_idx;
 	uint32_t	send_buf_section_size;
 
 	void		*rndis_mesg;
 	uint32_t	tot_data_buf_len;
 	void		*data;
 	uint32_t	gpa_cnt;
-	struct vmbus_gpa gpa[VMBUS_CHAN_SGLIST_MAX];
+	struct vmbus_gpa gpa[NETVSC_PACKET_MAXPAGE];
 } netvsc_packet;
 
 typedef struct {
 	uint8_t		mac_addr[6];  /* Assumption unsigned long */
 	uint8_t		link_state;
 } netvsc_device_info;
 
 #ifndef HN_USE_TXDESC_BUFRING
 struct hn_txdesc;
 SLIST_HEAD(hn_txdesc_list, hn_txdesc);
 #else
 struct buf_ring;
 #endif
 
 struct hn_tx_ring;
 
 struct hn_rx_ring {
 	struct ifnet	*hn_ifp;
 	struct hn_tx_ring *hn_txr;
 	void		*hn_rdbuf;
 	int		hn_rx_idx;
 
 	/* Trust csum verification on host side */
 	int		hn_trust_hcsum;	/* HN_TRUST_HCSUM_ */
 	struct lro_ctrl	hn_lro;
 
 	u_long		hn_csum_ip;
 	u_long		hn_csum_tcp;
 	u_long		hn_csum_udp;
 	u_long		hn_csum_trusted;
 	u_long		hn_lro_tried;
 	u_long		hn_small_pkts;
 	u_long		hn_pkts;
 	u_long		hn_rss_pkts;
 
 	/* Rarely used stuffs */
 	struct sysctl_oid *hn_rx_sysctl_tree;
 	int		hn_rx_flags;
 } __aligned(CACHE_LINE_SIZE);
 
 #define HN_TRUST_HCSUM_IP	0x0001
 #define HN_TRUST_HCSUM_TCP	0x0002
 #define HN_TRUST_HCSUM_UDP	0x0004
 
 #define HN_RX_FLAG_ATTACHED	0x1
 
 struct hn_tx_ring {
 #ifndef HN_USE_TXDESC_BUFRING
 	struct mtx	hn_txlist_spin;
 	struct hn_txdesc_list hn_txlist;
 #else
 	struct buf_ring	*hn_txdesc_br;
 #endif
 	int		hn_txdesc_cnt;
 	int		hn_txdesc_avail;
 	u_short		hn_has_txeof;
 	u_short		hn_txdone_cnt;
 
 	int		hn_sched_tx;
 	void		(*hn_txeof)(struct hn_tx_ring *);
 	struct taskqueue *hn_tx_taskq;
 	struct task	hn_tx_task;
 	struct task	hn_txeof_task;
 
 	struct buf_ring	*hn_mbuf_br;
 	int		hn_oactive;
 	int		hn_tx_idx;
 
 	struct mtx	hn_tx_lock;
 	struct hn_softc	*hn_sc;
 	struct vmbus_channel *hn_chan;
 
 	int		hn_direct_tx_size;
 	int		hn_tx_chimney_size;
 	bus_dma_tag_t	hn_tx_data_dtag;
 	uint64_t	hn_csum_assist;
 
 	u_long		hn_no_txdescs;
 	u_long		hn_send_failed;
 	u_long		hn_txdma_failed;
 	u_long		hn_tx_collapsed;
 	u_long		hn_tx_chimney_tried;
 	u_long		hn_tx_chimney;
 	u_long		hn_pkts;
 
 	/* Rarely used stuffs */
 	struct hn_txdesc *hn_txdesc;
 	bus_dma_tag_t	hn_tx_rndis_dtag;
 	struct sysctl_oid *hn_tx_sysctl_tree;
 	int		hn_tx_flags;
 } __aligned(CACHE_LINE_SIZE);
 
 #define HN_TX_FLAG_ATTACHED	0x1
 
 /*
  * Device-specific softc structure
  */
 typedef struct hn_softc {
 	struct ifnet    *hn_ifp;
 	struct ifmedia	hn_media;
 	device_t        hn_dev;
 	uint8_t         hn_unit;
 	int             hn_carrier;
 	int             hn_if_flags;
 	struct mtx      hn_lock;
 	int             hn_initdone;
 	/* See hv_netvsc_drv_freebsd.c for rules on how to use */
 	int             temp_unusable;
 	netvsc_dev  	*net_dev;
 	struct vmbus_channel *hn_prichan;
 
 	int		hn_rx_ring_cnt;
 	int		hn_rx_ring_inuse;
 	struct hn_rx_ring *hn_rx_ring;
 
 	int		hn_tx_ring_cnt;
 	int		hn_tx_ring_inuse;
 	struct hn_tx_ring *hn_tx_ring;
 
 	int		hn_cpu;
 	int		hn_tx_chimney_max;
 	struct taskqueue *hn_tx_taskq;
 	struct sysctl_oid *hn_tx_sysctl_tree;
 	struct sysctl_oid *hn_rx_sysctl_tree;
 } hn_softc_t;
 
 /*
  * Externs
  */
 extern int hv_promisc_mode;
 
 void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status);
 netvsc_dev *hv_nv_on_device_add(struct hn_softc *sc,
     void *additional_info, struct hn_rx_ring *rxr);
 int hv_nv_on_device_remove(struct hn_softc *sc,
     boolean_t destroy_channel);
 int hv_nv_on_send(struct vmbus_channel *chan, netvsc_packet *pkt);
 int hv_nv_get_next_send_section(netvsc_dev *net_dev);
 void hv_nv_subchan_attach(struct vmbus_channel *chan,
     struct hn_rx_ring *rxr);
 
 #endif  /* __HV_NET_VSC_H__ */
 
Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	(revision 303642)
@@ -1,3040 +1,3040 @@
 /*-
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2004-2006 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/buf_ring.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 
 #include <net/bpf.h>
 
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <netinet/ip6.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/frame.h>
 
 #include <sys/bus.h>
 #include <sys/rman.h>
 #include <sys/mutex.h>
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <machine/atomic.h>
 
 #include <machine/intr_machdep.h>
 
 #include <machine/in_cksum.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 
 #include "hv_net_vsc.h"
 #include "hv_rndis.h"
 #include "hv_rndis_filter.h"
 #include "vmbus_if.h"
 
 /* Short for Hyper-V network interface */
 #define NETVSC_DEVNAME    "hn"
 
 /*
  * It looks like offset 0 of buf is reserved to hold the softc pointer.
  * The sc pointer evidently not needed, and is not presently populated.
  * The packet offset is where the netvsc_packet starts in the buffer.
  */
 #define HV_NV_SC_PTR_OFFSET_IN_BUF         0
 #define HV_NV_PACKET_OFFSET_IN_BUF         16
 
 /* YYY should get it from the underlying channel */
 #define HN_TX_DESC_CNT			512
 
 #define HN_LROENT_CNT_DEF		128
 
 #define HN_RING_CNT_DEF_MAX		8
 
 #define HN_RNDIS_MSG_LEN		\
     (sizeof(rndis_msg) +		\
      RNDIS_HASHVAL_PPI_SIZE +		\
      RNDIS_VLAN_PPI_SIZE +		\
      RNDIS_TSO_PPI_SIZE +		\
      RNDIS_CSUM_PPI_SIZE)
 #define HN_RNDIS_MSG_BOUNDARY		PAGE_SIZE
 #define HN_RNDIS_MSG_ALIGN		CACHE_LINE_SIZE
 
 #define HN_TX_DATA_BOUNDARY		PAGE_SIZE
 #define HN_TX_DATA_MAXSIZE		IP_MAXPACKET
 #define HN_TX_DATA_SEGSIZE		PAGE_SIZE
 #define HN_TX_DATA_SEGCNT_MAX		\
-    (VMBUS_CHAN_SGLIST_MAX - HV_RF_NUM_TX_RESERVED_PAGE_BUFS)
+    (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS)
 
 #define HN_DIRECT_TX_SIZE_DEF		128
 
 #define HN_EARLY_TXEOF_THRESH		8
 
 struct hn_txdesc {
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_ENTRY(hn_txdesc) link;
 #endif
 	struct mbuf	*m;
 	struct hn_tx_ring *txr;
 	int		refs;
 	uint32_t	flags;		/* HN_TXD_FLAG_ */
 	netvsc_packet	netvsc_pkt;	/* XXX to be removed */
 
 	bus_dmamap_t	data_dmap;
 
 	bus_addr_t	rndis_msg_paddr;
 	rndis_msg	*rndis_msg;
 	bus_dmamap_t	rndis_msg_dmap;
 };
 
 #define HN_TXD_FLAG_ONLIST	0x1
 #define HN_TXD_FLAG_DMAMAP	0x2
 
 /*
  * Only enable UDP checksum offloading when it is on 2012R2 or
  * later.  UDP checksum offloading doesn't work on earlier
  * Windows releases.
  */
 #define HN_CSUM_ASSIST_WIN8	(CSUM_IP | CSUM_TCP)
 #define HN_CSUM_ASSIST		(CSUM_IP | CSUM_UDP | CSUM_TCP)
 
 #define HN_LRO_LENLIM_MULTIRX_DEF	(12 * ETHERMTU)
 #define HN_LRO_LENLIM_DEF		(25 * ETHERMTU)
 /* YYY 2*MTU is a bit rough, but should be good enough. */
 #define HN_LRO_LENLIM_MIN(ifp)		(2 * (ifp)->if_mtu)
 
 #define HN_LRO_ACKCNT_DEF		1
 
 /*
  * Be aware that this sleepable mutex will exhibit WITNESS errors when
  * certain TCP and ARP code paths are taken.  This appears to be a
  * well-known condition, as all other drivers checked use a sleeping
  * mutex to protect their transmit paths.
  * Also Be aware that mutexes do not play well with semaphores, and there
  * is a conflicting semaphore in a certain channel code path.
  */
 #define NV_LOCK_INIT(_sc, _name) \
 	    mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF)
 #define NV_LOCK(_sc)		mtx_lock(&(_sc)->hn_lock)
 #define NV_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->hn_lock, MA_OWNED)
 #define NV_UNLOCK(_sc)		mtx_unlock(&(_sc)->hn_lock)
 #define NV_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->hn_lock)
 
 
 /*
  * Globals
  */
 
 int hv_promisc_mode = 0;    /* normal mode by default */
 
 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "Hyper-V network interface");
 
 /* Trust tcp segements verification on host side. */
 static int hn_trust_hosttcp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
     &hn_trust_hosttcp, 0,
     "Trust tcp segement verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust udp datagrams verification on host side. */
 static int hn_trust_hostudp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
     &hn_trust_hostudp, 0,
     "Trust udp datagram verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust ip packets verification on host side. */
 static int hn_trust_hostip = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
     &hn_trust_hostip, 0,
     "Trust ip packet verification on host side, "
     "when csum info is missing (global setting)");
 
 #if __FreeBSD_version >= 1100045
 /* Limit TSO burst size */
 static int hn_tso_maxlen = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
     &hn_tso_maxlen, 0, "TSO burst limit");
 #endif
 
 /* Limit chimney send size */
 static int hn_tx_chimney_size = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
     &hn_tx_chimney_size, 0, "Chimney send packet size limit");
 
 /* Limit the size of packet for direct transmission */
 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
     &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
 
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
     &hn_lro_entry_count, 0, "LRO entry count");
 #endif
 #endif
 
 static int hn_share_tx_taskq = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN,
     &hn_share_tx_taskq, 0, "Enable shared TX taskqueue");
 
 static struct taskqueue	*hn_tx_taskq;
 
 #ifndef HN_USE_TXDESC_BUFRING
 static int hn_use_txdesc_bufring = 0;
 #else
 static int hn_use_txdesc_bufring = 1;
 #endif
 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
     &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
 
 static int hn_bind_tx_taskq = -1;
 SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
     &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
 
 static int hn_use_if_start = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
     &hn_use_if_start, 0, "Use if_start TX method");
 
 static int hn_chan_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
     &hn_chan_cnt, 0,
     "# of channels to use; each channel has one RX ring and one TX ring");
 
 static int hn_tx_ring_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
     &hn_tx_ring_cnt, 0, "# of TX rings to use");
 
 static int hn_tx_swq_depth = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
     &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
 
 #if __FreeBSD_version >= 1100095
 static u_int hn_lro_mbufq_depth = 0;
 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
     &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
 #endif
 
 static u_int hn_cpu_index;
 
 /*
  * Forward declarations
  */
 static void hn_stop(hn_softc_t *sc);
 static void hn_ifinit_locked(hn_softc_t *sc);
 static void hn_ifinit(void *xsc);
 static int  hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
 static int hn_start_locked(struct hn_tx_ring *txr, int len);
 static void hn_start(struct ifnet *ifp);
 static void hn_start_txeof(struct hn_tx_ring *);
 static int hn_ifmedia_upd(struct ifnet *ifp);
 static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
 #if __FreeBSD_version >= 1100099
 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_check_iplen(const struct mbuf *, int);
 static int hn_create_tx_ring(struct hn_softc *, int);
 static void hn_destroy_tx_ring(struct hn_tx_ring *);
 static int hn_create_tx_data(struct hn_softc *, int);
 static void hn_destroy_tx_data(struct hn_softc *);
 static void hn_start_taskfunc(void *, int);
 static void hn_start_txeof_taskfunc(void *, int);
 static void hn_stop_tx_tasks(struct hn_softc *);
 static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **);
 static void hn_create_rx_data(struct hn_softc *sc, int);
 static void hn_destroy_rx_data(struct hn_softc *sc);
 static void hn_set_tx_chimney_size(struct hn_softc *, int);
 static void hn_channel_attach(struct hn_softc *, struct vmbus_channel *);
 static void hn_subchan_attach(struct hn_softc *, struct vmbus_channel *);
 static void hn_subchan_setup(struct hn_softc *);
 
 static int hn_transmit(struct ifnet *, struct mbuf *);
 static void hn_xmit_qflush(struct ifnet *);
 static int hn_xmit(struct hn_tx_ring *, int);
 static void hn_xmit_txeof(struct hn_tx_ring *);
 static void hn_xmit_taskfunc(void *, int);
 static void hn_xmit_txeof_taskfunc(void *, int);
 
 #if __FreeBSD_version >= 1100099
 static void
 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
 }
 #endif
 
 static int
 hn_get_txswq_depth(const struct hn_tx_ring *txr)
 {
 
 	KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
 	if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
 		return txr->hn_txdesc_cnt;
 	return hn_tx_swq_depth;
 }
 
 static int
 hn_ifmedia_upd(struct ifnet *ifp __unused)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct hn_softc *sc = ifp->if_softc;
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (!sc->hn_carrier) {
 		ifmr->ifm_active |= IFM_NONE;
 		return;
 	}
 	ifmr->ifm_status |= IFM_ACTIVE;
 	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
 }
 
 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
 static const struct hyperv_guid g_net_vsc_device_type = {
 	.hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
 		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
 };
 
 /*
  * Standard probe entry point.
  *
  */
 static int
 netvsc_probe(device_t dev)
 {
 	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
 	    &g_net_vsc_device_type) == 0) {
 		device_set_desc(dev, "Hyper-V Network Interface");
 		return BUS_PROBE_DEFAULT;
 	}
 	return ENXIO;
 }
 
 /*
  * Standard attach entry point.
  *
  * Called when the driver is loaded.  It allocates needed resources,
  * and initializes the "hardware" and software.
  */
 static int
 netvsc_attach(device_t dev)
 {
 	netvsc_device_info device_info;
 	hn_softc_t *sc;
 	int unit = device_get_unit(dev);
 	struct ifnet *ifp = NULL;
 	int error, ring_cnt, tx_ring_cnt;
 #if __FreeBSD_version >= 1100045
 	int tso_maxlen;
 #endif
 
 	sc = device_get_softc(dev);
 
 	sc->hn_unit = unit;
 	sc->hn_dev = dev;
 	sc->hn_prichan = vmbus_get_channel(dev);
 
 	if (hn_tx_taskq == NULL) {
 		sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
 		    taskqueue_thread_enqueue, &sc->hn_tx_taskq);
 		if (hn_bind_tx_taskq >= 0) {
 			int cpu = hn_bind_tx_taskq;
 			cpuset_t cpu_set;
 
 			if (cpu > mp_ncpus - 1)
 				cpu = mp_ncpus - 1;
 			CPU_SETOF(cpu, &cpu_set);
 			taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1,
 			    PI_NET, &cpu_set, "%s tx",
 			    device_get_nameunit(dev));
 		} else {
 			taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET,
 			    "%s tx", device_get_nameunit(dev));
 		}
 	} else {
 		sc->hn_tx_taskq = hn_tx_taskq;
 	}
 	NV_LOCK_INIT(sc, "NetVSCLock");
 
 	ifp = sc->hn_ifp = if_alloc(IFT_ETHER);
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 
 	/*
 	 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
 	 * to use (tx_ring_cnt).
 	 *
 	 * NOTE:
 	 * The # of RX rings to use is same as the # of channels to use.
 	 */
 	ring_cnt = hn_chan_cnt;
 	if (ring_cnt <= 0) {
 		/* Default */
 		ring_cnt = mp_ncpus;
 		if (ring_cnt > HN_RING_CNT_DEF_MAX)
 			ring_cnt = HN_RING_CNT_DEF_MAX;
 	} else if (ring_cnt > mp_ncpus) {
 		ring_cnt = mp_ncpus;
 	}
 
 	tx_ring_cnt = hn_tx_ring_cnt;
 	if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
 		tx_ring_cnt = ring_cnt;
 	if (hn_use_if_start) {
 		/* ifnet.if_start only needs one TX ring. */
 		tx_ring_cnt = 1;
 	}
 
 	/*
 	 * Set the leader CPU for channels.
 	 */
 	sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
 
 	error = hn_create_tx_data(sc, tx_ring_cnt);
 	if (error)
 		goto failed;
 	hn_create_rx_data(sc, ring_cnt);
 
 	/*
 	 * Associate the first TX/RX ring w/ the primary channel.
 	 */
 	hn_channel_attach(sc, sc->hn_prichan);
 
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = hn_ioctl;
 	ifp->if_init = hn_ifinit;
 	/* needed by hv_rf_on_device_add() code */
 	ifp->if_mtu = ETHERMTU;
 	if (hn_use_if_start) {
 		int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
 
 		ifp->if_start = hn_start;
 		IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
 		ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
 		IFQ_SET_READY(&ifp->if_snd);
 	} else {
 		ifp->if_transmit = hn_transmit;
 		ifp->if_qflush = hn_xmit_qflush;
 	}
 
 	ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
 	ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
 	/* XXX ifmedia_set really should do this for us */
 	sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
 
 	/*
 	 * Tell upper layers that we support full VLAN capability.
 	 */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |=
 	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
 	    IFCAP_LRO;
 	ifp->if_capenable |=
 	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
 	    IFCAP_LRO;
 	ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO;
 
 	error = hv_rf_on_device_add(sc, &device_info, ring_cnt,
 	    &sc->hn_rx_ring[0]);
 	if (error)
 		goto failed;
 	KASSERT(sc->net_dev->num_channel > 0 &&
 	    sc->net_dev->num_channel <= sc->hn_rx_ring_inuse,
 	    ("invalid channel count %u, should be less than %d",
 	     sc->net_dev->num_channel, sc->hn_rx_ring_inuse));
 
 	/*
 	 * Set the # of TX/RX rings that could be used according to
 	 * the # of channels that host offered.
 	 */
 	if (sc->hn_tx_ring_inuse > sc->net_dev->num_channel)
 		sc->hn_tx_ring_inuse = sc->net_dev->num_channel;
 	sc->hn_rx_ring_inuse = sc->net_dev->num_channel;
 	device_printf(dev, "%d TX ring, %d RX ring\n",
 	    sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
 
 	if (sc->net_dev->num_channel > 1)
 		hn_subchan_setup(sc);
 
 #if __FreeBSD_version >= 1100099
 	if (sc->hn_rx_ring_inuse > 1) {
 		/*
 		 * Reduce TCP segment aggregation limit for multiple
 		 * RX rings to increase ACK timeliness.
 		 */
 		hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
 	}
 #endif
 
 	if (device_info.link_state == 0) {
 		sc->hn_carrier = 1;
 	}
 
 #if __FreeBSD_version >= 1100045
 	tso_maxlen = hn_tso_maxlen;
 	if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET)
 		tso_maxlen = IP_MAXPACKET;
 
 	ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
 	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
 	ifp->if_hw_tsomax = tso_maxlen -
 	    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 #endif
 
 	ether_ifattach(ifp, device_info.mac_addr);
 
 #if __FreeBSD_version >= 1100045
 	if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax,
 	    ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
 #endif
 
 	sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
 	hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
 	if (hn_tx_chimney_size > 0 &&
 	    hn_tx_chimney_size < sc->hn_tx_chimney_max)
 		hn_set_tx_chimney_size(sc, hn_tx_chimney_size);
 
 	return (0);
 failed:
 	hn_destroy_tx_data(sc);
 	if (ifp != NULL)
 		if_free(ifp);
 	return (error);
 }
 
 /*
  * Standard detach entry point
  */
 static int
 netvsc_detach(device_t dev)
 {
 	struct hn_softc *sc = device_get_softc(dev);
 
 	if (bootverbose)
 		printf("netvsc_detach\n");
 
 	/*
 	 * XXXKYS:  Need to clean up all our
 	 * driver state; this is the driver
 	 * unloading.
 	 */
 
 	/*
 	 * XXXKYS:  Need to stop outgoing traffic and unregister
 	 * the netdevice.
 	 */
 
 	hv_rf_on_device_remove(sc, HV_RF_NV_DESTROY_CHANNEL);
 
 	hn_stop_tx_tasks(sc);
 
 	ifmedia_removeall(&sc->hn_media);
 	hn_destroy_rx_data(sc);
 	hn_destroy_tx_data(sc);
 
 	if (sc->hn_tx_taskq != hn_tx_taskq)
 		taskqueue_free(sc->hn_tx_taskq);
 
 	return (0);
 }
 
 /*
  * Standard shutdown entry point
  */
 static int
 netvsc_shutdown(device_t dev)
 {
 	return (0);
 }
 
 static __inline int
 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
 {
 	struct mbuf *m = *m_head;
 	int error;
 
 	error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
 	    m, segs, nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		struct mbuf *m_new;
 
 		m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
 		if (m_new == NULL)
 			return ENOBUFS;
 		else
 			*m_head = m = m_new;
 		txr->hn_tx_collapsed++;
 
 		error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
 		    txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
 	}
 	if (!error) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
 		    BUS_DMASYNC_PREWRITE);
 		txd->flags |= HN_TXD_FLAG_DMAMAP;
 	}
 	return error;
 }
 
 static __inline void
 hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	if (txd->flags & HN_TXD_FLAG_DMAMAP) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag,
 		    txd->data_dmap, BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txr->hn_tx_data_dtag,
 		    txd->data_dmap);
 		txd->flags &= ~HN_TXD_FLAG_DMAMAP;
 	}
 }
 
 static __inline int
 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
 	    ("put an onlist txd %#x", txd->flags));
 
 	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
 	if (atomic_fetchadd_int(&txd->refs, -1) != 1)
 		return 0;
 
 	hn_txdesc_dmamap_unload(txr, txd);
 	if (txd->m != NULL) {
 		m_freem(txd->m);
 		txd->m = NULL;
 	}
 
 	txd->flags |= HN_TXD_FLAG_ONLIST;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	KASSERT(txr->hn_txdesc_avail >= 0 &&
 	    txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
 	    ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
 	txr->hn_txdesc_avail++;
 	SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	atomic_add_int(&txr->hn_txdesc_avail, 1);
 	buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif
 
 	return 1;
 }
 
 static __inline struct hn_txdesc *
 hn_txdesc_get(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	txd = SLIST_FIRST(&txr->hn_txlist);
 	if (txd != NULL) {
 		KASSERT(txr->hn_txdesc_avail > 0,
 		    ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
 		txr->hn_txdesc_avail--;
 		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 	}
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
 #endif
 
 	if (txd != NULL) {
 #ifdef HN_USE_TXDESC_BUFRING
 		atomic_subtract_int(&txr->hn_txdesc_avail, 1);
 #endif
 		KASSERT(txd->m == NULL && txd->refs == 0 &&
 		    (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd"));
 		txd->flags &= ~HN_TXD_FLAG_ONLIST;
 		txd->refs = 1;
 	}
 	return txd;
 }
 
 static __inline void
 hn_txdesc_hold(struct hn_txdesc *txd)
 {
 
 	/* 0->1 transition will never work */
 	KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs));
 	atomic_add_int(&txd->refs, 1);
 }
 
 static __inline void
 hn_txeof(struct hn_tx_ring *txr)
 {
 	txr->hn_has_txeof = 0;
 	txr->hn_txeof(txr);
 }
 
 static void
 hn_tx_done(struct vmbus_channel *chan, void *xpkt)
 {
 	netvsc_packet *packet = xpkt;
 	struct hn_txdesc *txd;
 	struct hn_tx_ring *txr;
 
 	txd = (struct hn_txdesc *)(uintptr_t)
 	    packet->compl.send.send_completion_tid;
 
 	txr = txd->txr;
 	KASSERT(txr->hn_chan == chan,
 	    ("channel mismatch, on chan%u, should be chan%u",
 	     vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan)));
 
 	txr->hn_has_txeof = 1;
 	hn_txdesc_put(txr, txd);
 
 	++txr->hn_txdone_cnt;
 	if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
 		txr->hn_txdone_cnt = 0;
 		if (txr->hn_oactive)
 			hn_txeof(txr);
 	}
 }
 
 void
 netvsc_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
 {
 #if defined(INET) || defined(INET6)
 	tcp_lro_flush_all(&rxr->hn_lro);
 #endif
 
 	/*
 	 * NOTE:
 	 * 'txr' could be NULL, if multiple channels and
 	 * ifnet.if_start method are enabled.
 	 */
 	if (txr == NULL || !txr->hn_has_txeof)
 		return;
 
 	txr->hn_txdone_cnt = 0;
 	hn_txeof(txr);
 }
 
 /*
  * NOTE:
  * If this function fails, then both txd and m_head0 will be freed.
  */
 static int
 hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
 {
 	bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
 	int error, nsegs, i;
 	struct mbuf *m_head = *m_head0;
 	netvsc_packet *packet;
 	rndis_msg *rndis_mesg;
 	rndis_packet *rndis_pkt;
 	rndis_per_packet_info *rppi;
 	struct rndis_hash_value *hash_value;
 	uint32_t rndis_msg_size;
 
 	packet = &txd->netvsc_pkt;
 	packet->is_data_pkt = TRUE;
 	packet->tot_data_buf_len = m_head->m_pkthdr.len;
 
 	/*
 	 * extension points to the area reserved for the
 	 * rndis_filter_packet, which is placed just after
 	 * the netvsc_packet (and rppi struct, if present;
 	 * length is updated later).
 	 */
 	rndis_mesg = txd->rndis_msg;
 	/* XXX not necessary */
 	memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN);
 	rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
 
 	rndis_pkt = &rndis_mesg->msg.packet;
 	rndis_pkt->data_offset = sizeof(rndis_packet);
 	rndis_pkt->data_length = packet->tot_data_buf_len;
 	rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet);
 
 	rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
 
 	/*
 	 * Set the hash value for this packet, so that the host could
 	 * dispatch the TX done event for this packet back to this TX
 	 * ring's channel.
 	 */
 	rndis_msg_size += RNDIS_HASHVAL_PPI_SIZE;
 	rppi = hv_set_rppi_data(rndis_mesg, RNDIS_HASHVAL_PPI_SIZE,
 	    nbl_hash_value);
 	hash_value = (struct rndis_hash_value *)((uint8_t *)rppi +
 	    rppi->per_packet_info_offset);
 	hash_value->hash_value = txr->hn_tx_idx;
 
 	if (m_head->m_flags & M_VLANTAG) {
 		ndis_8021q_info *rppi_vlan_info;
 
 		rndis_msg_size += RNDIS_VLAN_PPI_SIZE;
 		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE,
 		    ieee_8021q_info);
 
 		rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi +
 		    rppi->per_packet_info_offset);
 		rppi_vlan_info->u1.s1.vlan_id =
 		    m_head->m_pkthdr.ether_vtag & 0xfff;
 	}
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 		rndis_tcp_tso_info *tso_info;	
 		struct ether_vlan_header *eh;
 		int ether_len;
 
 		/*
 		 * XXX need m_pullup and use mtodo
 		 */
 		eh = mtod(m_head, struct ether_vlan_header*);
 		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
 			ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 		else
 			ether_len = ETHER_HDR_LEN;
 
 		rndis_msg_size += RNDIS_TSO_PPI_SIZE;
 		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE,
 		    tcp_large_send_info);
 
 		tso_info = (rndis_tcp_tso_info *)((uint8_t *)rppi +
 		    rppi->per_packet_info_offset);
 		tso_info->lso_v2_xmit.type =
 		    RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
 
 #ifdef INET
 		if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
 			struct ip *ip =
 			    (struct ip *)(m_head->m_data + ether_len);
 			unsigned long iph_len = ip->ip_hl << 2;
 			struct tcphdr *th =
 			    (struct tcphdr *)((caddr_t)ip + iph_len);
 
 			tso_info->lso_v2_xmit.ip_version =
 			    RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
 			ip->ip_len = 0;
 			ip->ip_sum = 0;
 
 			th->th_sum = in_pseudo(ip->ip_src.s_addr,
 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 		}
 #endif
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET6
 		{
 			struct ip6_hdr *ip6 = (struct ip6_hdr *)
 			    (m_head->m_data + ether_len);
 			struct tcphdr *th = (struct tcphdr *)(ip6 + 1);
 
 			tso_info->lso_v2_xmit.ip_version =
 			    RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
 			ip6->ip6_plen = 0;
 			th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 		}
 #endif
 		tso_info->lso_v2_xmit.tcp_header_offset = 0;
 		tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz;
 	} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
 		rndis_tcp_ip_csum_info *csum_info;
 
 		rndis_msg_size += RNDIS_CSUM_PPI_SIZE;
 		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE,
 		    tcpip_chksum_info);
 		csum_info = (rndis_tcp_ip_csum_info *)((uint8_t *)rppi +
 		    rppi->per_packet_info_offset);
 
 		csum_info->xmit.is_ipv4 = 1;
 		if (m_head->m_pkthdr.csum_flags & CSUM_IP)
 			csum_info->xmit.ip_header_csum = 1;
 
 		if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
 			csum_info->xmit.tcp_csum = 1;
 			csum_info->xmit.tcp_header_offset = 0;
 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
 			csum_info->xmit.udp_csum = 1;
 		}
 	}
 
 	rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size;
 	packet->tot_data_buf_len = rndis_mesg->msg_len;
 
 	/*
 	 * Chimney send, if the packet could fit into one chimney buffer.
 	 */
 	if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) {
 		netvsc_dev *net_dev = txr->hn_sc->net_dev;
 		uint32_t send_buf_section_idx;
 
 		txr->hn_tx_chimney_tried++;
 		send_buf_section_idx =
 		    hv_nv_get_next_send_section(net_dev);
 		if (send_buf_section_idx !=
 		    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
 			uint8_t *dest = ((uint8_t *)net_dev->send_buf +
 			    (send_buf_section_idx *
 			     net_dev->send_section_size));
 
 			memcpy(dest, rndis_mesg, rndis_msg_size);
 			dest += rndis_msg_size;
 			m_copydata(m_head, 0, m_head->m_pkthdr.len, dest);
 
 			packet->send_buf_section_idx = send_buf_section_idx;
 			packet->send_buf_section_size =
 			    packet->tot_data_buf_len;
 			packet->gpa_cnt = 0;
 			txr->hn_tx_chimney++;
 			goto done;
 		}
 	}
 
 	error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
 	if (error) {
 		int freed;
 
 		/*
 		 * This mbuf is not linked w/ the txd yet, so free it now.
 		 */
 		m_freem(m_head);
 		*m_head0 = NULL;
 
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon txdma error"));
 
 		txr->hn_txdma_failed++;
 		if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
 		return error;
 	}
 	*m_head0 = m_head;
 
 	packet->gpa_cnt = nsegs + HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
 
 	/* send packet with page buffer */
 	packet->gpa[0].gpa_page = atop(txd->rndis_msg_paddr);
 	packet->gpa[0].gpa_ofs = txd->rndis_msg_paddr & PAGE_MASK;
 	packet->gpa[0].gpa_len = rndis_msg_size;
 
 	/*
 	 * Fill the page buffers with mbuf info starting at index
 	 * HV_RF_NUM_TX_RESERVED_PAGE_BUFS.
 	 */
 	for (i = 0; i < nsegs; ++i) {
 		struct vmbus_gpa *gpa = &packet->gpa[
 		    i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS];
 
 		gpa->gpa_page = atop(segs[i].ds_addr);
 		gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
 		gpa->gpa_len = segs[i].ds_len;
 	}
 
 	packet->send_buf_section_idx =
 	    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
 	packet->send_buf_section_size = 0;
 done:
 	txd->m = m_head;
 
 	/* Set the completion routine */
 	packet->compl.send.on_send_completion = hn_tx_done;
 	packet->compl.send.send_completion_context = packet;
 	packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)txd;
 
 	return 0;
 }
 
 /*
  * NOTE:
  * If this function fails, then txd will be freed, but the mbuf
  * associated w/ the txd will _not_ be freed.
  */
 static int
 hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 	int error, send_failed = 0;
 
 again:
 	/*
 	 * Make sure that txd is not freed before ETHER_BPF_MTAP.
 	 */
 	hn_txdesc_hold(txd);
 	error = hv_nv_on_send(txr->hn_chan, &txd->netvsc_pkt);
 	if (!error) {
 		ETHER_BPF_MTAP(ifp, txd->m);
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if (!hn_use_if_start) {
 			if_inc_counter(ifp, IFCOUNTER_OBYTES,
 			    txd->m->m_pkthdr.len);
 			if (txd->m->m_flags & M_MCAST)
 				if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		}
 		txr->hn_pkts++;
 	}
 	hn_txdesc_put(txr, txd);
 
 	if (__predict_false(error)) {
 		int freed;
 
 		/*
 		 * This should "really rarely" happen.
 		 *
 		 * XXX Too many RX to be acked or too many sideband
 		 * commands to run?  Ask netvsc_channel_rollup()
 		 * to kick start later.
 		 */
 		txr->hn_has_txeof = 1;
 		if (!send_failed) {
 			txr->hn_send_failed++;
 			send_failed = 1;
 			/*
 			 * Try sending again after set hn_has_txeof;
 			 * in case that we missed the last
 			 * netvsc_channel_rollup().
 			 */
 			goto again;
 		}
 		if_printf(ifp, "send failed\n");
 
 		/*
 		 * Caller will perform further processing on the
 		 * associated mbuf, so don't free it in hn_txdesc_put();
 		 * only unload it from the DMA map in hn_txdesc_put(),
 		 * if it was loaded.
 		 */
 		txd->m = NULL;
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon send error"));
 
 		txr->hn_send_failed++;
 	}
 	return error;
 }
 
 /*
  * Start a transmit of one or more packets
  */
 static int
 hn_start_locked(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 
 	KASSERT(hn_use_if_start,
 	    ("hn_start_locked is called, when if_start is disabled"));
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return 0;
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		struct hn_txdesc *txd;
 		struct mbuf *m_head;
 		int error;
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			return 1;
 		}
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
 		}
 
 		error = hn_encap(txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed */
 			continue;
 		}
 
 		error = hn_send_pkt(ifp, txr, txd);
 		if (__predict_false(error)) {
 			/* txd is freed, but m_head is not */
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
 		}
 	}
 	return 0;
 }
 
 /*
  * Link up/down notification
  */
 void
 netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status)
 {
 	if (status == 1) {
 		sc->hn_carrier = 1;
 	} else {
 		sc->hn_carrier = 0;
 	}
 }
 
 /*
  * Append the specified data to the indicated mbuf chain,
  * Extend the mbuf chain if the new data does not fit in
  * existing space.
  *
  * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
  * There should be an equivalent in the kernel mbuf code,
  * but there does not appear to be one yet.
  *
  * Differs from m_append() in that additional mbufs are
  * allocated with cluster size MJUMPAGESIZE, and filled
  * accordingly.
  *
  * Return 1 if able to complete the job; otherwise 0.
  */
 static int
 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
 {
 	struct mbuf *m, *n;
 	int remainder, space;
 
 	for (m = m0; m->m_next != NULL; m = m->m_next)
 		;
 	remainder = len;
 	space = M_TRAILINGSPACE(m);
 	if (space > 0) {
 		/*
 		 * Copy into available space.
 		 */
 		if (space > remainder)
 			space = remainder;
 		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
 		m->m_len += space;
 		cp += space;
 		remainder -= space;
 	}
 	while (remainder > 0) {
 		/*
 		 * Allocate a new mbuf; could check space
 		 * and allocate a cluster instead.
 		 */
 		n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE);
 		if (n == NULL)
 			break;
 		n->m_len = min(MJUMPAGESIZE, remainder);
 		bcopy(cp, mtod(n, caddr_t), n->m_len);
 		cp += n->m_len;
 		remainder -= n->m_len;
 		m->m_next = n;
 		m = n;
 	}
 	if (m0->m_flags & M_PKTHDR)
 		m0->m_pkthdr.len += len - remainder;
 
 	return (remainder == 0);
 }
 
 #if defined(INET) || defined(INET6)
 static __inline int
 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
 {
 #if __FreeBSD_version >= 1100095
 	if (hn_lro_mbufq_depth) {
 		tcp_lro_queue_mbuf(lc, m);
 		return 0;
 	}
 #endif
 	return tcp_lro_rx(lc, m, 0);
 }
 #endif
 
 /*
  * Called when we receive a data packet from the "wire" on the
  * specified device
  *
  * Note:  This is no longer used as a callback
  */
 int
 netvsc_recv(struct hn_rx_ring *rxr, netvsc_packet *packet,
     const rndis_tcp_ip_csum_info *csum_info,
     const struct rndis_hash_info *hash_info,
     const struct rndis_hash_value *hash_value)
 {
 	struct ifnet *ifp = rxr->hn_ifp;
 	struct mbuf *m_new;
 	int size, do_lro = 0, do_csum = 1;
 	int hash_type = M_HASHTYPE_OPAQUE_HASH;
 
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 		return (0);
 
 	/*
 	 * Bail out if packet contains more data than configured MTU.
 	 */
 	if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) {
 		return (0);
 	} else if (packet->tot_data_buf_len <= MHLEN) {
 		m_new = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m_new == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 		memcpy(mtod(m_new, void *), packet->data,
 		    packet->tot_data_buf_len);
 		m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len;
 		rxr->hn_small_pkts++;
 	} else {
 		/*
 		 * Get an mbuf with a cluster.  For packets 2K or less,
 		 * get a standard 2K cluster.  For anything larger, get a
 		 * 4K cluster.  Any buffers larger than 4K can cause problems
 		 * if looped around to the Hyper-V TX channel, so avoid them.
 		 */
 		size = MCLBYTES;
 		if (packet->tot_data_buf_len > MCLBYTES) {
 			/* 4096 */
 			size = MJUMPAGESIZE;
 		}
 
 		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
 		if (m_new == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 
 		hv_m_append(m_new, packet->tot_data_buf_len, packet->data);
 	}
 	m_new->m_pkthdr.rcvif = ifp;
 
 	if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
 		do_csum = 0;
 
 	/* receive side checksum offload */
 	if (csum_info != NULL) {
 		/* IP csum offload */
 		if (csum_info->receive.ip_csum_succeeded && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			rxr->hn_csum_ip++;
 		}
 
 		/* TCP/UDP csum offload */
 		if ((csum_info->receive.tcp_csum_succeeded ||
 		     csum_info->receive.udp_csum_succeeded) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m_new->m_pkthdr.csum_data = 0xffff;
 			if (csum_info->receive.tcp_csum_succeeded)
 				rxr->hn_csum_tcp++;
 			else
 				rxr->hn_csum_udp++;
 		}
 
 		if (csum_info->receive.ip_csum_succeeded &&
 		    csum_info->receive.tcp_csum_succeeded)
 			do_lro = 1;
 	} else {
 		const struct ether_header *eh;
 		uint16_t etype;
 		int hoff;
 
 		hoff = sizeof(*eh);
 		if (m_new->m_len < hoff)
 			goto skip;
 		eh = mtod(m_new, struct ether_header *);
 		etype = ntohs(eh->ether_type);
 		if (etype == ETHERTYPE_VLAN) {
 			const struct ether_vlan_header *evl;
 
 			hoff = sizeof(*evl);
 			if (m_new->m_len < hoff)
 				goto skip;
 			evl = mtod(m_new, struct ether_vlan_header *);
 			etype = ntohs(evl->evl_proto);
 		}
 
 		if (etype == ETHERTYPE_IP) {
 			int pr;
 
 			pr = hn_check_iplen(m_new, hoff);
 			if (pr == IPPROTO_TCP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_TCP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 				do_lro = 1;
 			} else if (pr == IPPROTO_UDP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_UDP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 			} else if (pr != IPPROTO_DONE && do_csum &&
 			    (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
 				rxr->hn_csum_trusted++;
 				m_new->m_pkthdr.csum_flags |=
 				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			}
 		}
 	}
 skip:
 	if ((packet->vlan_tci != 0) &&
 	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) {
 		m_new->m_pkthdr.ether_vtag = packet->vlan_tci;
 		m_new->m_flags |= M_VLANTAG;
 	}
 
 	if (hash_info != NULL && hash_value != NULL) {
 		rxr->hn_rss_pkts++;
 		m_new->m_pkthdr.flowid = hash_value->hash_value;
 		if ((hash_info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
 		    NDIS_HASH_FUNCTION_TOEPLITZ) {
 			uint32_t type =
 			    (hash_info->hash_info & NDIS_HASH_TYPE_MASK);
 
 			switch (type) {
 			case NDIS_HASH_IPV4:
 				hash_type = M_HASHTYPE_RSS_IPV4;
 				break;
 
 			case NDIS_HASH_TCP_IPV4:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV4;
 				break;
 
 			case NDIS_HASH_IPV6:
 				hash_type = M_HASHTYPE_RSS_IPV6;
 				break;
 
 			case NDIS_HASH_IPV6_EX:
 				hash_type = M_HASHTYPE_RSS_IPV6_EX;
 				break;
 
 			case NDIS_HASH_TCP_IPV6:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV6;
 				break;
 
 			case NDIS_HASH_TCP_IPV6_EX:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
 				break;
 			}
 		}
 	} else {
 		if (hash_value != NULL) {
 			m_new->m_pkthdr.flowid = hash_value->hash_value;
 		} else {
 			m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
 			hash_type = M_HASHTYPE_OPAQUE;
 		}
 	}
 	M_HASHTYPE_SET(m_new, hash_type);
 
 	/*
 	 * Note:  Moved RX completion back to hv_nv_on_receive() so all
 	 * messages (not just data messages) will trigger a response.
 	 */
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	rxr->hn_pkts++;
 
 	if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
 #if defined(INET) || defined(INET6)
 		struct lro_ctrl *lro = &rxr->hn_lro;
 
 		if (lro->lro_cnt) {
 			rxr->hn_lro_tried++;
 			if (hn_lro_rx(lro, m_new) == 0) {
 				/* DONE! */
 				return 0;
 			}
 		}
 #endif
 	}
 
 	/* We're not holding the lock here, so don't release it */
 	(*ifp->if_input)(ifp, m_new);
 
 	return (0);
 }
 
 /*
  * Rules for using sc->temp_unusable:
  * 1.  sc->temp_unusable can only be read or written while holding NV_LOCK()
  * 2.  code reading sc->temp_unusable under NV_LOCK(), and finding 
  *     sc->temp_unusable set, must release NV_LOCK() and exit
  * 3.  to retain exclusive control of the interface,
  *     sc->temp_unusable must be set by code before releasing NV_LOCK()
  * 4.  only code setting sc->temp_unusable can clear sc->temp_unusable
  * 5.  code setting sc->temp_unusable must eventually clear sc->temp_unusable
  */
 
 /*
  * Standard ioctl entry point.  Called when the user wants to configure
  * the interface.
  */
 static int
 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	hn_softc_t *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 #ifdef INET
 	struct ifaddr *ifa = (struct ifaddr *)data;
 #endif
 	netvsc_device_info device_info;
 	int mask, error = 0;
 	int retry_cnt = 500;
 	
 	switch(cmd) {
 
 	case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 				hn_ifinit(sc);
 			arp_ifinit(ifp, ifa);
 		} else
 #endif
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	case SIOCSIFMTU:
 		/* Check MTU value change */
 		if (ifp->if_mtu == ifr->ifr_mtu)
 			break;
 
 		if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) {
 			error = EINVAL;
 			break;
 		}
 
 		/* Obtain and record requested MTU */
 		ifp->if_mtu = ifr->ifr_mtu;
 
 #if __FreeBSD_version >= 1100099
 		/*
 		 * Make sure that LRO aggregation length limit is still
 		 * valid, after the MTU change.
 		 */
 		NV_LOCK(sc);
 		if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
 		    HN_LRO_LENLIM_MIN(ifp))
 			hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
 		NV_UNLOCK(sc);
 #endif
 
 		do {
 			NV_LOCK(sc);
 			if (!sc->temp_unusable) {
 				sc->temp_unusable = TRUE;
 				retry_cnt = -1;
 			}
 			NV_UNLOCK(sc);
 			if (retry_cnt > 0) {
 				retry_cnt--;
 				DELAY(5 * 1000);
 			}
 		} while (retry_cnt > 0);
 
 		if (retry_cnt == 0) {
 			error = EINVAL;
 			break;
 		}
 
 		/* We must remove and add back the device to cause the new
 		 * MTU to take effect.  This includes tearing down, but not
 		 * deleting the channel, then bringing it back up.
 		 */
 		error = hv_rf_on_device_remove(sc, HV_RF_NV_RETAIN_CHANNEL);
 		if (error) {
 			NV_LOCK(sc);
 			sc->temp_unusable = FALSE;
 			NV_UNLOCK(sc);
 			break;
 		}
 
 		/* Wait for subchannels to be destroyed */
 		vmbus_subchan_drain(sc->hn_prichan);
 
 		error = hv_rf_on_device_add(sc, &device_info,
 		    sc->hn_rx_ring_inuse, &sc->hn_rx_ring[0]);
 		if (error) {
 			NV_LOCK(sc);
 			sc->temp_unusable = FALSE;
 			NV_UNLOCK(sc);
 			break;
 		}
 		KASSERT(sc->hn_rx_ring_cnt == sc->net_dev->num_channel,
 		    ("RX ring count %d and channel count %u mismatch",
 		     sc->hn_rx_ring_cnt, sc->net_dev->num_channel));
 		if (sc->net_dev->num_channel > 1) {
 			int r;
 
 			/*
 			 * Skip the rings on primary channel; they are
 			 * handled by the hv_rf_on_device_add() above.
 			 */
 			for (r = 1; r < sc->hn_rx_ring_cnt; ++r) {
 				sc->hn_rx_ring[r].hn_rx_flags &=
 				    ~HN_RX_FLAG_ATTACHED;
 			}
 			for (r = 1; r < sc->hn_tx_ring_cnt; ++r) {
 				sc->hn_tx_ring[r].hn_tx_flags &=
 				    ~HN_TX_FLAG_ATTACHED;
 			}
 			hn_subchan_setup(sc);
 		}
 
 		sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
 		if (sc->hn_tx_ring[0].hn_tx_chimney_size >
 		    sc->hn_tx_chimney_max)
 			hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
 
 		hn_ifinit_locked(sc);
 
 		NV_LOCK(sc);
 		sc->temp_unusable = FALSE;
 		NV_UNLOCK(sc);
 		break;
 	case SIOCSIFFLAGS:
 		do {
                        NV_LOCK(sc);
                        if (!sc->temp_unusable) {
                                sc->temp_unusable = TRUE;
                                retry_cnt = -1;
                        }
                        NV_UNLOCK(sc);
                        if (retry_cnt > 0) {
                       	        retry_cnt--;
                         	DELAY(5 * 1000);
                        }
                 } while (retry_cnt > 0);
 
                 if (retry_cnt == 0) {
                        error = EINVAL;
                        break;
                 }
 
 		if (ifp->if_flags & IFF_UP) {
 			/*
 			 * If only the state of the PROMISC flag changed,
 			 * then just use the 'set promisc mode' command
 			 * instead of reinitializing the entire NIC. Doing
 			 * a full re-init means reloading the firmware and
 			 * waiting for it to start up, which may take a
 			 * second or two.
 			 */
 #ifdef notyet
 			/* Fixme:  Promiscuous mode? */
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    ifp->if_flags & IFF_PROMISC &&
 			    !(sc->hn_if_flags & IFF_PROMISC)) {
 				/* do something here for Hyper-V */
 			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    !(ifp->if_flags & IFF_PROMISC) &&
 			    sc->hn_if_flags & IFF_PROMISC) {
 				/* do something here for Hyper-V */
 			} else
 #endif
 				hn_ifinit_locked(sc);
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				hn_stop(sc);
 			}
 		}
 		NV_LOCK(sc);
 		sc->temp_unusable = FALSE;
 		NV_UNLOCK(sc);
 		sc->hn_if_flags = ifp->if_flags;
 		error = 0;
 		break;
 	case SIOCSIFCAP:
 		NV_LOCK(sc);
 
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (mask & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			if (ifp->if_capenable & IFCAP_TXCSUM) {
 				ifp->if_hwassist |=
 				    sc->hn_tx_ring[0].hn_csum_assist;
 			} else {
 				ifp->if_hwassist &=
 				    ~sc->hn_tx_ring[0].hn_csum_assist;
 			}
 		}
 
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 
 		if (mask & IFCAP_LRO)
 			ifp->if_capenable ^= IFCAP_LRO;
 
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			if (ifp->if_capenable & IFCAP_TSO4)
 				ifp->if_hwassist |= CSUM_IP_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_IP_TSO;
 		}
 
 		if (mask & IFCAP_TSO6) {
 			ifp->if_capenable ^= IFCAP_TSO6;
 			if (ifp->if_capenable & IFCAP_TSO6)
 				ifp->if_hwassist |= CSUM_IP6_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
 		}
 
 		NV_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 #ifdef notyet
 		/* Fixme:  Multicast mode? */
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			NV_LOCK(sc);
 			netvsc_setmulti(sc);
 			NV_UNLOCK(sc);
 			error = 0;
 		}
 #endif
 		error = EINVAL;
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	return (error);
 }
 
 /*
  *
  */
 static void
 hn_stop(hn_softc_t *sc)
 {
 	struct ifnet *ifp;
 	int ret, i;
 
 	ifp = sc->hn_ifp;
 
 	if (bootverbose)
 		printf(" Closing Device ...\n");
 
 	atomic_clear_int(&ifp->if_drv_flags,
 	    (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 	sc->hn_initdone = 0;
 
 	ret = hv_rf_on_close(sc);
 }
 
 /*
  * FreeBSD transmit entry point
  */
 static void
 hn_start(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 }
 
 static void
 hn_start_txeof(struct hn_tx_ring *txr)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the OACTIVE earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * flag again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 /*
  *
  */
 static void
 hn_ifinit_locked(hn_softc_t *sc)
 {
 	struct ifnet *ifp;
 	int ret, i;
 
 	ifp = sc->hn_ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		return;
 	}
 
 	hv_promisc_mode = 1;
 
 	ret = hv_rf_on_open(sc);
 	if (ret != 0) {
 		return;
 	} else {
 		sc->hn_initdone = 1;
 	}
 
 	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
 	if_link_state_change(ifp, LINK_STATE_UP);
 }
 
 /*
  *
  */
 static void
 hn_ifinit(void *xsc)
 {
 	hn_softc_t *sc = xsc;
 
 	NV_LOCK(sc);
 	if (sc->temp_unusable) {
 		NV_UNLOCK(sc);
 		return;
 	}
 	sc->temp_unusable = TRUE;
 	NV_UNLOCK(sc);
 
 	hn_ifinit_locked(sc);
 
 	NV_LOCK(sc);
 	sc->temp_unusable = FALSE;
 	NV_UNLOCK(sc);
 }
 
 #ifdef LATER
 /*
  *
  */
 static void
 hn_watchdog(struct ifnet *ifp)
 {
 	hn_softc_t *sc;
 	sc = ifp->if_softc;
 
 	printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit);
 	hn_ifinit(sc);    /*???*/
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 #endif
 
 #if __FreeBSD_version >= 1100099
 
 static int
 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	unsigned int lenlim;
 	int error;
 
 	lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
 	error = sysctl_handle_int(oidp, &lenlim, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
 	    lenlim > TCP_LRO_LENGTH_MAX)
 		return EINVAL;
 
 	NV_LOCK(sc);
 	hn_set_lro_lenlim(sc, lenlim);
 	NV_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ackcnt, error, i;
 
 	/*
 	 * lro_ackcnt_lim is append count limit,
 	 * +1 to turn it into aggregation limit.
 	 */
 	ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
 	error = sysctl_handle_int(oidp, &ackcnt, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
 		return EINVAL;
 
 	/*
 	 * Convert aggregation limit back to append
 	 * count limit.
 	 */
 	--ackcnt;
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
 	NV_UNLOCK(sc);
 	return 0;
 }
 
 #endif
 
 static int
 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int hcsum = arg2;
 	int on, error, i;
 
 	on = 0;
 	if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
 		on = 1;
 
 	error = sysctl_handle_int(oidp, &on, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (on)
 			rxr->hn_trust_hcsum |= hcsum;
 		else
 			rxr->hn_trust_hcsum &= ~hcsum;
 	}
 	NV_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int chimney_size, error;
 
 	chimney_size = sc->hn_tx_ring[0].hn_tx_chimney_size;
 	error = sysctl_handle_int(oidp, &chimney_size, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0)
 		return EINVAL;
 
 	hn_set_tx_chimney_size(sc, chimney_size);
 	return 0;
 }
 
 static int
 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((u_long *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((u_long *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	uint64_t stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((uint64_t *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_64(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_tx_ring *txr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		stat += *((u_long *)((uint8_t *)txr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((u_long *)((uint8_t *)txr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error, conf;
 	struct hn_tx_ring *txr;
 
 	txr = &sc->hn_tx_ring[0];
 	conf = *((int *)((uint8_t *)txr + ofs));
 
 	error = sysctl_handle_int(oidp, &conf, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((int *)((uint8_t *)txr + ofs)) = conf;
 	}
 	NV_UNLOCK(sc);
 
 	return 0;
 }
 
 static int
 hn_check_iplen(const struct mbuf *m, int hoff)
 {
 	const struct ip *ip;
 	int len, iphlen, iplen;
 	const struct tcphdr *th;
 	int thoff;				/* TCP data offset */
 
 	len = hoff + sizeof(struct ip);
 
 	/* The packet must be at least the size of an IP header. */
 	if (m->m_pkthdr.len < len)
 		return IPPROTO_DONE;
 
 	/* The fixed IP header must reside completely in the first mbuf. */
 	if (m->m_len < len)
 		return IPPROTO_DONE;
 
 	ip = mtodo(m, hoff);
 
 	/* Bound check the packet's stated IP header length. */
 	iphlen = ip->ip_hl << 2;
 	if (iphlen < sizeof(struct ip))		/* minimum header length */
 		return IPPROTO_DONE;
 
 	/* The full IP header must reside completely in the one mbuf. */
 	if (m->m_len < hoff + iphlen)
 		return IPPROTO_DONE;
 
 	iplen = ntohs(ip->ip_len);
 
 	/*
 	 * Check that the amount of data in the buffers is as
 	 * at least much as the IP header would have us expect.
 	 */
 	if (m->m_pkthdr.len < hoff + iplen)
 		return IPPROTO_DONE;
 
 	/*
 	 * Ignore IP fragments.
 	 */
 	if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
 		return IPPROTO_DONE;
 
 	/*
 	 * The TCP/IP or UDP/IP header must be entirely contained within
 	 * the first fragment of a packet.
 	 */
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		if (iplen < iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
 		thoff = th->th_off << 2;
 		if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + thoff)
 			return IPPROTO_DONE;
 		break;
 	case IPPROTO_UDP:
 		if (iplen < iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		break;
 	default:
 		if (iplen < iphlen)
 			return IPPROTO_DONE;
 		break;
 	}
 	return ip->ip_p;
 }
 
 static void
 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	device_t dev = sc->hn_dev;
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 	int lroent_cnt;
 #endif
 #endif
 	int i;
 
 	sc->hn_rx_ring_cnt = ring_cnt;
 	sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
 
 	sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
 
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 	lroent_cnt = hn_lro_entry_count;
 	if (lroent_cnt < TCP_LRO_ENTRIES)
 		lroent_cnt = TCP_LRO_ENTRIES;
 	device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
 #endif
 #endif	/* INET || INET6 */
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
 	/* Create dev.hn.UNIT.rx sysctl tree */
 	sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (hn_trust_hosttcp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
 		if (hn_trust_hostudp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
 		if (hn_trust_hostip)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
 		rxr->hn_ifp = sc->hn_ifp;
 		if (i < sc->hn_tx_ring_cnt)
 			rxr->hn_txr = &sc->hn_tx_ring[i];
 		rxr->hn_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
 		rxr->hn_rx_idx = i;
 
 		/*
 		 * Initialize LRO.
 		 */
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 		tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
 		    hn_lro_mbufq_depth);
 #else
 		tcp_lro_init(&rxr->hn_lro);
 		rxr->hn_lro.ifp = sc->hn_ifp;
 #endif
 #if __FreeBSD_version >= 1100099
 		rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
 		rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
 #endif
 #endif	/* INET || INET6 */
 
 		if (sc->hn_rx_sysctl_tree != NULL) {
 			char name[16];
 
 			/*
 			 * Create per RX ring sysctl tree:
 			 * dev.hn.UNIT.rx.RINGID
 			 */
 			snprintf(name, sizeof(name), "%d", i);
 			rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
 			    SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
 			    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 			if (rxr->hn_rx_sysctl_tree != NULL) {
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "packets", CTLFLAG_RW,
 				    &rxr->hn_pkts, "# of packets received");
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "rss_pkts", CTLFLAG_RW,
 				    &rxr->hn_rss_pkts,
 				    "# of packets w/ RSS info received");
 			}
 		}
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
 	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
 	    hn_rx_stat_u64_sysctl, "LU", "LRO queued");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
 	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
 	    hn_rx_stat_u64_sysctl, "LU", "LRO flushed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro_tried),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
 #if __FreeBSD_version >= 1100099
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
 	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_lro_lenlim_sysctl, "IU",
 	    "Max # of data bytes to be aggregated by LRO");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_lro_ackcnt_sysctl, "I",
 	    "Max # of ACKs to be aggregated by LRO");
 #endif
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust tcp segement verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust udp datagram verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust ip packet verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_ip),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_tcp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_udp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_trusted),
 	    hn_rx_stat_ulong_sysctl, "LU",
 	    "# of packets that we trust host's csum verification");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_small_pkts),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
 }
 
 static void
 hn_destroy_rx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_rx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 #if defined(INET) || defined(INET6)
 		tcp_lro_free(&rxr->hn_lro);
 #endif
 		free(rxr->hn_rdbuf, M_NETVSC);
 	}
 	free(sc->hn_rx_ring, M_NETVSC);
 	sc->hn_rx_ring = NULL;
 
 	sc->hn_rx_ring_cnt = 0;
 	sc->hn_rx_ring_inuse = 0;
 }
 
 static int
 hn_create_tx_ring(struct hn_softc *sc, int id)
 {
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
 	device_t dev = sc->hn_dev;
 	bus_dma_tag_t parent_dtag;
 	int error, i;
 	uint32_t version;
 
 	txr->hn_sc = sc;
 	txr->hn_tx_idx = id;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
 #endif
 	mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
 
 	txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
 	txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_INIT(&txr->hn_txlist);
 #else
 	txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC,
 	    M_WAITOK, &txr->hn_tx_lock);
 #endif
 
 	txr->hn_tx_taskq = sc->hn_tx_taskq;
 
 	if (hn_use_if_start) {
 		txr->hn_txeof = hn_start_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
 	} else {
 		int br_depth;
 
 		txr->hn_txeof = hn_xmit_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
 
 		br_depth = hn_get_txswq_depth(txr);
 		txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_NETVSC,
 		    M_WAITOK, &txr->hn_tx_lock);
 	}
 
 	txr->hn_direct_tx_size = hn_direct_tx_size;
 	version = VMBUS_GET_VERSION(device_get_parent(dev), dev);
 	if (version >= VMBUS_VERSION_WIN8_1) {
 		txr->hn_csum_assist = HN_CSUM_ASSIST;
 	} else {
 		txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8;
 		if (id == 0) {
 			device_printf(dev, "bus version %u.%u, "
 			    "no UDP checksum offloading\n",
 			    VMBUS_VERSION_MAJOR(version),
 			    VMBUS_VERSION_MINOR(version));
 		}
 	}
 
 	/*
 	 * Always schedule transmission instead of trying to do direct
 	 * transmission.  This one gives the best performance so far.
 	 */
 	txr->hn_sched_tx = 1;
 
 	parent_dtag = bus_get_dma_tag(dev);
 
 	/* DMA tag for RNDIS messages. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    HN_RNDIS_MSG_ALIGN,		/* alignment */
 	    HN_RNDIS_MSG_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_RNDIS_MSG_LEN,		/* maxsize */
 	    1,				/* nsegments */
 	    HN_RNDIS_MSG_LEN,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_rndis_dtag);
 	if (error) {
 		device_printf(dev, "failed to create rndis dmatag\n");
 		return error;
 	}
 
 	/* DMA tag for data. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    1,				/* alignment */
 	    HN_TX_DATA_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_TX_DATA_MAXSIZE,		/* maxsize */
 	    HN_TX_DATA_SEGCNT_MAX,	/* nsegments */
 	    HN_TX_DATA_SEGSIZE,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_data_dtag);
 	if (error) {
 		device_printf(dev, "failed to create data dmatag\n");
 		return error;
 	}
 
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
 		struct hn_txdesc *txd = &txr->hn_txdesc[i];
 
 		txd->txr = txr;
 
 		/*
 		 * Allocate and load RNDIS messages.
 		 */
         	error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
 		    (void **)&txd->rndis_msg,
 		    BUS_DMA_WAITOK | BUS_DMA_COHERENT,
 		    &txd->rndis_msg_dmap);
 		if (error) {
 			device_printf(dev,
 			    "failed to allocate rndis_msg, %d\n", i);
 			return error;
 		}
 
 		error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
 		    txd->rndis_msg_dmap,
 		    txd->rndis_msg, HN_RNDIS_MSG_LEN,
 		    hyperv_dma_map_paddr, &txd->rndis_msg_paddr,
 		    BUS_DMA_NOWAIT);
 		if (error) {
 			device_printf(dev,
 			    "failed to load rndis_msg, %d\n", i);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg, txd->rndis_msg_dmap);
 			return error;
 		}
 
 		/* DMA map for TX data. */
 		error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
 		    &txd->data_dmap);
 		if (error) {
 			device_printf(dev,
 			    "failed to allocate tx data dmamap\n");
 			bus_dmamap_unload(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg_dmap);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg, txd->rndis_msg_dmap);
 			return error;
 		}
 
 		/* All set, put it to list */
 		txd->flags |= HN_TXD_FLAG_ONLIST;
 #ifndef HN_USE_TXDESC_BUFRING
 		SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 #else
 		buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif
 	}
 	txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
 
 	if (sc->hn_tx_sysctl_tree != NULL) {
 		struct sysctl_oid_list *child;
 		struct sysctl_ctx_list *ctx;
 		char name[16];
 
 		/*
 		 * Create per TX ring sysctl tree:
 		 * dev.hn.UNIT.tx.RINGID
 		 */
 		ctx = device_get_sysctl_ctx(dev);
 		child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
 
 		snprintf(name, sizeof(name), "%d", id);
 		txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
 		    name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 		if (txr->hn_tx_sysctl_tree != NULL) {
 			child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
 
 			SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
 			    CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
 			    "# of available TX descs");
 			if (!hn_use_if_start) {
 				SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
 				    CTLFLAG_RD, &txr->hn_oactive, 0,
 				    "over active");
 			}
 			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
 			    CTLFLAG_RW, &txr->hn_pkts,
 			    "# of packets transmitted");
 		}
 	}
 
 	return 0;
 }
 
 static void
 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
 {
 	struct hn_tx_ring *txr = txd->txr;
 
 	KASSERT(txd->m == NULL, ("still has mbuf installed"));
 	KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
 
 	bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap);
 	bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg,
 	    txd->rndis_msg_dmap);
 	bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
 }
 
 static void
 hn_destroy_tx_ring(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
 	if (txr->hn_txdesc == NULL)
 		return;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
 		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 		hn_txdesc_dmamap_destroy(txd);
 	}
 #else
 	mtx_lock(&txr->hn_tx_lock);
 	while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL)
 		hn_txdesc_dmamap_destroy(txd);
 	mtx_unlock(&txr->hn_tx_lock);
 #endif
 
 	if (txr->hn_tx_data_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_data_dtag);
 	if (txr->hn_tx_rndis_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
 
 #ifdef HN_USE_TXDESC_BUFRING
 	buf_ring_free(txr->hn_txdesc_br, M_NETVSC);
 #endif
 
 	free(txr->hn_txdesc, M_NETVSC);
 	txr->hn_txdesc = NULL;
 
 	if (txr->hn_mbuf_br != NULL)
 		buf_ring_free(txr->hn_mbuf_br, M_NETVSC);
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_destroy(&txr->hn_txlist_spin);
 #endif
 	mtx_destroy(&txr->hn_tx_lock);
 }
 
 static int
 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	int i;
 
 	sc->hn_tx_ring_cnt = ring_cnt;
 	sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
 
 	sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
 
 	ctx = device_get_sysctl_ctx(sc->hn_dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
 
 	/* Create dev.hn.UNIT.tx sysctl tree */
 	sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		int error;
 
 		error = hn_create_tx_ring(sc, i);
 		if (error)
 			return error;
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_no_txdescs),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_send_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_txdma_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_collapsed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
 	    "# of total TX descs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
 	    CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
 	    "Chimney send packet size upper boundary");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_tx_chimney_size_sysctl,
 	    "I", "Chimney send packet size limit");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_direct_tx_size),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Size of the packet for direct transmission");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_sched_tx),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Always schedule transmission "
 	    "instead of doing direct transmission");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
 
 	return 0;
 }
 
 static void
 hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size)
 {
 	int i;
 
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_tx_chimney_size = chimney_size;
 	NV_UNLOCK(sc);
 }
 
 static void
 hn_destroy_tx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_tx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		hn_destroy_tx_ring(&sc->hn_tx_ring[i]);
 
 	free(sc->hn_tx_ring, M_NETVSC);
 	sc->hn_tx_ring = NULL;
 
 	sc->hn_tx_ring_cnt = 0;
 	sc->hn_tx_ring_inuse = 0;
 }
 
 static void
 hn_start_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_stop_tx_tasks(struct hn_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static int
 hn_xmit(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 	struct mbuf *m_head;
 
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 	KASSERT(hn_use_if_start == 0,
 	    ("hn_xmit is called, when if_start is enabled"));
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
 		return 0;
 
 	while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
 		struct hn_txdesc *txd;
 		int error;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			return 1;
 		}
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			txr->hn_oactive = 1;
 			break;
 		}
 
 		error = hn_encap(txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed; discard */
 			drbr_advance(ifp, txr->hn_mbuf_br);
 			continue;
 		}
 
 		error = hn_send_pkt(ifp, txr, txd);
 		if (__predict_false(error)) {
 			/* txd is freed, but m_head is not */
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			txr->hn_oactive = 1;
 			break;
 		}
 
 		/* Sent */
 		drbr_advance(ifp, txr->hn_mbuf_br);
 	}
 	return 0;
 }
 
 static int
 hn_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct hn_tx_ring *txr;
 	int error, idx = 0;
 
 	/*
 	 * Select the TX ring based on flowid
 	 */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
 	txr = &sc->hn_tx_ring[idx];
 
 	error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
 	if (error) {
 		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 		return error;
 	}
 
 	if (txr->hn_oactive)
 		return 0;
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return 0;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 	return 0;
 }
 
 static void
 hn_xmit_qflush(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 		struct mbuf *m;
 
 		mtx_lock(&txr->hn_tx_lock);
 		while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
 			m_freem(m);
 		mtx_unlock(&txr->hn_tx_lock);
 	}
 	if_qflush(ifp);
 }
 
 static void
 hn_xmit_txeof(struct hn_tx_ring *txr)
 {
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		txr->hn_oactive = 0;
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the oactive earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * oactive again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		txr->hn_oactive = 0;
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_xmit_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	txr->hn_oactive = 0;
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_channel_attach(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 	struct hn_rx_ring *rxr;
 	int idx;
 
 	idx = vmbus_chan_subidx(chan);
 
 	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
 	    ("invalid channel index %d, should > 0 && < %d",
 	     idx, sc->hn_rx_ring_inuse));
 	rxr = &sc->hn_rx_ring[idx];
 	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
 	    ("RX ring %d already attached", idx));
 	rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n",
 		    idx, vmbus_chan_id(chan));
 	}
 
 	if (idx < sc->hn_tx_ring_inuse) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
 
 		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
 		    ("TX ring %d already attached", idx));
 		txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
 
 		txr->hn_chan = chan;
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n",
 			    idx, vmbus_chan_id(chan));
 		}
 	}
 
 	/* Bind channel to a proper CPU */
 	vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
 }
 
 static void
 hn_subchan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 
 	KASSERT(!vmbus_chan_is_primary(chan),
 	    ("subchannel callback on primary channel"));
 	hn_channel_attach(sc, chan);
 }
 
 static void
 hn_subchan_setup(struct hn_softc *sc)
 {
 	struct vmbus_channel **subchans;
 	int subchan_cnt = sc->net_dev->num_channel - 1;
 	int i;
 
 	/* Wait for sub-channels setup to complete. */
 	subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
 
 	/* Attach the sub-channels. */
 	for (i = 0; i < subchan_cnt; ++i) {
 		struct vmbus_channel *subchan = subchans[i];
 
 		/* NOTE: Calling order is critical. */
 		hn_subchan_attach(sc, subchan);
 		hv_nv_subchan_attach(subchan,
 		    &sc->hn_rx_ring[vmbus_chan_subidx(subchan)]);
 	}
 
 	/* Release the sub-channels */
 	vmbus_subchan_rel(subchans, subchan_cnt);
 	if_printf(sc->hn_ifp, "%d sub-channels setup done\n", subchan_cnt);
 }
 
 static void
 hn_tx_taskq_create(void *arg __unused)
 {
 	if (!hn_share_tx_taskq)
 		return;
 
 	hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
 	    taskqueue_thread_enqueue, &hn_tx_taskq);
 	if (hn_bind_tx_taskq >= 0) {
 		int cpu = hn_bind_tx_taskq;
 		cpuset_t cpu_set;
 
 		if (cpu > mp_ncpus - 1)
 			cpu = mp_ncpus - 1;
 		CPU_SETOF(cpu, &cpu_set);
 		taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET,
 		    &cpu_set, "hn tx");
 	} else {
 		taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
 	}
 }
 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST,
     hn_tx_taskq_create, NULL);
 
 static void
 hn_tx_taskq_destroy(void *arg __unused)
 {
 	if (hn_tx_taskq != NULL)
 		taskqueue_free(hn_tx_taskq);
 }
 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST,
     hn_tx_taskq_destroy, NULL);
 
 static device_method_t netvsc_methods[] = {
         /* Device interface */
         DEVMETHOD(device_probe,         netvsc_probe),
         DEVMETHOD(device_attach,        netvsc_attach),
         DEVMETHOD(device_detach,        netvsc_detach),
         DEVMETHOD(device_shutdown,      netvsc_shutdown),
 
         { 0, 0 }
 };
 
 static driver_t netvsc_driver = {
         NETVSC_DEVNAME,
         netvsc_methods,
         sizeof(hn_softc_t)
 };
 
 static devclass_t netvsc_devclass;
 
 DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0);
 MODULE_VERSION(hn, 1);
 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c	(revision 303642)
@@ -1,2203 +1,2234 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /**
  * StorVSC driver for Hyper-V.  This driver presents a SCSI HBA interface
  * to the Comman Access Method (CAM) layer.  CAM control blocks (CCBs) are
  * converted into VSCSI protocol messages which are delivered to the parent
  * partition StorVSP driver over the Hyper-V VMBUS.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/condvar.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/bus.h>
 #include <sys/mutex.h>
 #include <sys/callout.h>
 #include <sys/smp.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/uma.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
 #include <sys/sglist.h>
 #include <machine/bus.h>
 #include <sys/bus_dma.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_xpt_internal.h>
 #include <cam/cam_debug.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/vmbus.h>
 
 #include "hv_vstorage.h"
 #include "vmbus_if.h"
 
 #define STORVSC_RINGBUFFER_SIZE		(20*PAGE_SIZE)
 #define STORVSC_MAX_LUNS_PER_TARGET	(64)
 #define STORVSC_MAX_IO_REQUESTS		(STORVSC_MAX_LUNS_PER_TARGET * 2)
 #define BLKVSC_MAX_IDE_DISKS_PER_TARGET	(1)
 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
 #define STORVSC_MAX_TARGETS		(2)
 
 #define VSTOR_PKT_SIZE	(sizeof(struct vstor_packet) - vmscsi_size_delta)
 
-#define STORVSC_DATA_SEGCNT_MAX		VMBUS_CHAN_PRPLIST_MAX
+/*
+ * 33 segments are needed to allow 128KB maxio, in case the data
+ * in the first page is _not_ PAGE_SIZE aligned, e.g.
+ *
+ *     |<----------- 128KB ----------->|
+ *     |                               |
+ *  0  2K 4K    8K   16K   124K  128K  130K
+ *  |  |  |     |     |       |     |  |
+ *  +--+--+-----+-----+.......+-----+--+--+
+ *  |  |  |     |     |       |     |  |  | DATA
+ *  |  |  |     |     |       |     |  |  |
+ *  +--+--+-----+-----+.......------+--+--+
+ *     |  |                         |  |
+ *     | 1|            31           | 1| ...... # of segments
+ */
+#define STORVSC_DATA_SEGCNT_MAX		33
 #define STORVSC_DATA_SEGSZ_MAX		PAGE_SIZE
 #define STORVSC_DATA_SIZE_MAX		\
-	(STORVSC_DATA_SEGCNT_MAX * STORVSC_DATA_SEGSZ_MAX)
+	((STORVSC_DATA_SEGCNT_MAX - 1) * STORVSC_DATA_SEGSZ_MAX)
 
 struct storvsc_softc;
 
 struct hv_sgl_node {
 	LIST_ENTRY(hv_sgl_node) link;
 	struct sglist *sgl_data;
 };
 
 struct hv_sgl_page_pool{
 	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
 	LIST_HEAD(, hv_sgl_node) free_sgl_list;
 	boolean_t                is_init;
 } g_hv_sgl_page_pool;
 
 #define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * STORVSC_DATA_SEGCNT_MAX
 
 enum storvsc_request_type {
 	WRITE_TYPE,
 	READ_TYPE,
 	UNKNOWN_TYPE
 };
 
 SYSCTL_NODE(_hw, OID_AUTO, storvsc, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "Hyper-V storage interface");
 
 static u_int hv_storvsc_use_pim_unmapped = 1;
 SYSCTL_INT(_hw_storvsc, OID_AUTO, use_pim_unmapped, CTLFLAG_RDTUN,
     &hv_storvsc_use_pim_unmapped, 0,
     "Optimize storvsc by using unmapped I/O");
 
 struct hv_storvsc_sysctl {
 	u_long		data_bio_cnt;
 	u_long		data_vaddr_cnt;
 	u_long		data_sg_cnt;
 };
 
 struct storvsc_gpa_range {
 	struct vmbus_gpa_range	gpa_range;
 	uint64_t		gpa_page[STORVSC_DATA_SEGCNT_MAX];
 } __packed;
 
 struct hv_storvsc_request {
 	LIST_ENTRY(hv_storvsc_request)	link;
 	struct vstor_packet		vstor_packet;
 	int				prp_cnt;
 	struct storvsc_gpa_range	prp_list;
 	void				*sense_data;
 	uint8_t				sense_info_len;
 	uint8_t				retries;
 	union ccb			*ccb;
 	struct storvsc_softc		*softc;
 	struct callout			callout;
 	struct sema			synch_sema; /*Synchronize the request/response if needed */
 	struct sglist			*bounce_sgl;
 	unsigned int			bounce_sgl_count;
 	uint64_t			not_aligned_seg_bits;
 	bus_dmamap_t			data_dmap;
 };
 
 struct storvsc_softc {
 	struct vmbus_channel		*hs_chan;
 	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
 	struct mtx			hs_lock;
 	struct storvsc_driver_props	*hs_drv_props;
 	int 				hs_unit;
 	uint32_t			hs_frozen;
 	struct cam_sim			*hs_sim;
 	struct cam_path 		*hs_path;
 	uint32_t			hs_num_out_reqs;
 	boolean_t			hs_destroy;
 	boolean_t			hs_drain_notify;
 	struct sema 			hs_drain_sema;	
 	struct hv_storvsc_request	hs_init_req;
 	struct hv_storvsc_request	hs_reset_req;
 	device_t			hs_dev;
 	bus_dma_tag_t			storvsc_req_dtag;
 	struct hv_storvsc_sysctl	sysctl_data;
 
 	struct vmbus_channel		*hs_cpu2chan[MAXCPU];
 };
 
 
 /**
  * HyperV storvsc timeout testing cases:
  * a. IO returned after first timeout;
  * b. IO returned after second timeout and queue freeze;
  * c. IO returned while timer handler is running
  * The first can be tested by "sg_senddiag -vv /dev/daX",
  * and the second and third can be done by
  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
  */
 #define HVS_TIMEOUT_TEST 0
 
 /*
  * Bus/adapter reset functionality on the Hyper-V host is
  * buggy and it will be disabled until
  * it can be further tested.
  */
 #define HVS_HOST_RESET 0
 
 struct storvsc_driver_props {
 	char		*drv_name;
 	char		*drv_desc;
 	uint8_t		drv_max_luns_per_target;
 	uint8_t		drv_max_ios_per_target;
 	uint32_t	drv_ringbuffer_size;
 };
 
 enum hv_storage_type {
 	DRIVER_BLKVSC,
 	DRIVER_STORVSC,
 	DRIVER_UNKNOWN
 };
 
 #define HS_MAX_ADAPTERS 10
 
 #define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
 
 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
 static const struct hyperv_guid gStorVscDeviceType={
 	.hv_guid = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
 		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
 };
 
 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
 static const struct hyperv_guid gBlkVscDeviceType={
 	.hv_guid = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
 		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
 };
 
 static struct storvsc_driver_props g_drv_props_table[] = {
 	{"blkvsc", "Hyper-V IDE Storage Interface",
 	 BLKVSC_MAX_IDE_DISKS_PER_TARGET, BLKVSC_MAX_IO_REQUESTS,
 	 STORVSC_RINGBUFFER_SIZE},
 	{"storvsc", "Hyper-V SCSI Storage Interface",
 	 STORVSC_MAX_LUNS_PER_TARGET, STORVSC_MAX_IO_REQUESTS,
 	 STORVSC_RINGBUFFER_SIZE}
 };
 
 /*
  * Sense buffer size changed in win8; have a run-time
  * variable to track the size we should use.
  */
 static int sense_buffer_size = PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE;
 
 /*
  * The size of the vmscsi_request has changed in win8. The
  * additional size is for the newly added elements in the
  * structure. These elements are valid only when we are talking
  * to a win8 host.
  * Track the correct size we need to apply.
  */
 static int vmscsi_size_delta;
 /*
  * The storage protocol version is determined during the
  * initial exchange with the host.  It will indicate which
  * storage functionality is available in the host.
 */
 static int vmstor_proto_version;
 
 struct vmstor_proto {
         int proto_version;
         int sense_buffer_size;
         int vmscsi_size_delta;
 };
 
 static const struct vmstor_proto vmstor_proto_list[] = {
         {
                 VMSTOR_PROTOCOL_VERSION_WIN10,
                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
                 0
         },
         {
                 VMSTOR_PROTOCOL_VERSION_WIN8_1,
                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
                 0
         },
         {
                 VMSTOR_PROTOCOL_VERSION_WIN8,
                 POST_WIN7_STORVSC_SENSE_BUFFER_SIZE,
                 0
         },
         {
                 VMSTOR_PROTOCOL_VERSION_WIN7,
                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
                 sizeof(struct vmscsi_win8_extension),
         },
         {
                 VMSTOR_PROTOCOL_VERSION_WIN6,
                 PRE_WIN8_STORVSC_SENSE_BUFFER_SIZE,
                 sizeof(struct vmscsi_win8_extension),
         }
 };
 
 /* static functions */
 static int storvsc_probe(device_t dev);
 static int storvsc_attach(device_t dev);
 static int storvsc_detach(device_t dev);
 static void storvsc_poll(struct cam_sim * sim);
 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
 static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
 static void hv_storvsc_rescan_target(struct storvsc_softc *sc);
 static void hv_storvsc_on_channel_callback(struct vmbus_channel *chan, void *xsc);
 static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
 					struct vstor_packet *vstor_packet,
 					struct hv_storvsc_request *request);
 static int hv_storvsc_connect_vsp(struct storvsc_softc *);
 static void storvsc_io_done(struct hv_storvsc_request *reqp);
 static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
 				bus_dma_segment_t *orig_sgl,
 				unsigned int orig_sgl_count,
 				uint64_t seg_bits);
 void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
 				unsigned int dest_sgl_count,
 				struct sglist* src_sgl,
 				uint64_t seg_bits);
 
 static device_method_t storvsc_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		storvsc_probe),
 	DEVMETHOD(device_attach,	storvsc_attach),
 	DEVMETHOD(device_detach,	storvsc_detach),
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown),
 	DEVMETHOD_END
 };
 
 static driver_t storvsc_driver = {
 	"storvsc", storvsc_methods, sizeof(struct storvsc_softc),
 };
 
 static devclass_t storvsc_devclass;
 DRIVER_MODULE(storvsc, vmbus, storvsc_driver, storvsc_devclass, 0, 0);
 MODULE_VERSION(storvsc, 1);
 MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
 
 static void
 storvsc_subchan_attach(struct storvsc_softc *sc,
     struct vmbus_channel *new_channel)
 {
 	struct vmstor_chan_props props;
 	int ret = 0;
 
 	memset(&props, 0, sizeof(props));
 
 	vmbus_chan_cpu_rr(new_channel);
 	ret = vmbus_chan_open(new_channel,
 	    sc->hs_drv_props->drv_ringbuffer_size,
   	    sc->hs_drv_props->drv_ringbuffer_size,
 	    (void *)&props,
 	    sizeof(struct vmstor_chan_props),
 	    hv_storvsc_on_channel_callback, sc);
 }
 
 /**
  * @brief Send multi-channel creation request to host
  *
  * @param device  a Hyper-V device pointer
  * @param max_chans  the max channels supported by vmbus
  */
 static void
 storvsc_send_multichannel_request(struct storvsc_softc *sc, int max_chans)
 {
 	struct vmbus_channel **subchan;
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;	
 	int request_channels_cnt = 0;
 	int ret, i;
 
 	/* get multichannels count that need to create */
 	request_channels_cnt = MIN(max_chans, mp_ncpus);
 
 	request = &sc->hs_init_req;
 
 	/* request the host to create multi-channel */
 	memset(request, 0, sizeof(struct hv_storvsc_request));
 	
 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
 
 	vstor_packet = &request->vstor_packet;
 	
 	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 	vstor_packet->u.multi_channels_cnt = request_channels_cnt;
 
 	ret = vmbus_chan_send(sc->hs_chan,
 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
 
 	/* wait for 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 	if (ret != 0) {		
 		printf("Storvsc_error: create multi-channel timeout, %d\n",
 		    ret);
 		return;
 	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0) {		
 		printf("Storvsc_error: create multi-channel invalid operation "
 		    "(%d) or statue (%u)\n",
 		    vstor_packet->operation, vstor_packet->status);
 		return;
 	}
 
 	/* Wait for sub-channels setup to complete. */
 	subchan = vmbus_subchan_get(sc->hs_chan, request_channels_cnt);
 
 	/* Attach the sub-channels. */
 	for (i = 0; i < request_channels_cnt; ++i)
 		storvsc_subchan_attach(sc, subchan[i]);
 
 	/* Release the sub-channels. */
 	vmbus_subchan_rel(subchan, request_channels_cnt);
 
 	if (bootverbose)
 		printf("Storvsc create multi-channel success!\n");
 }
 
 /**
  * @brief initialize channel connection to parent partition
  *
  * @param dev  a Hyper-V device pointer
  * @returns  0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_channel_init(struct storvsc_softc *sc)
 {
 	int ret = 0, i;
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 	uint16_t max_chans = 0;
 	boolean_t support_multichannel = FALSE;
 	uint32_t version;
 
 	max_chans = 0;
 	support_multichannel = FALSE;
 
 	request = &sc->hs_init_req;
 	memset(request, 0, sizeof(struct hv_storvsc_request));
 	vstor_packet = &request->vstor_packet;
 	request->softc = sc;
 
 	/**
 	 * Initiate the vsc/vsp initialization protocol on the open channel
 	 */
 	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
 
 	vstor_packet->operation = VSTOR_OPERATION_BEGININITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 
 	ret = vmbus_chan_send(sc->hs_chan,
 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
 
 	if (ret != 0)
 		goto cleanup;
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 	if (ret != 0)
 		goto cleanup;
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 		vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
 	for (i = 0; i < nitems(vmstor_proto_list); i++) {
 		/* reuse the packet for version range supported */
 
 		memset(vstor_packet, 0, sizeof(struct vstor_packet));
 		vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
 		vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 		vstor_packet->u.version.major_minor =
 			vmstor_proto_list[i].proto_version;
 
 		/* revision is only significant for Windows guests */
 		vstor_packet->u.version.revision = 0;
 
 		ret = vmbus_chan_send(sc->hs_chan,
 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
 
 		if (ret != 0)
 			goto cleanup;
 
 		/* wait 5 seconds */
 		ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 		if (ret)
 			goto cleanup;
 
 		if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO) {
 			ret = EINVAL;
 			goto cleanup;	
 		}
 		if (vstor_packet->status == 0) {
 			vmstor_proto_version =
 				vmstor_proto_list[i].proto_version;
 			sense_buffer_size =
 				vmstor_proto_list[i].sense_buffer_size;
 			vmscsi_size_delta =
 				vmstor_proto_list[i].vmscsi_size_delta;
 			break;
 		}
 	}
 
 	if (vstor_packet->status != 0) {
 		ret = EINVAL;
 		goto cleanup;
 	}
 	/**
 	 * Query channel properties
 	 */
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROPERTIES;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = vmbus_chan_send(sc->hs_chan,
 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
 
 	if ( ret != 0)
 		goto cleanup;
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 	if (ret != 0)
 		goto cleanup;
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
 	/* multi-channels feature is supported by WIN8 and above version */
 	max_chans = vstor_packet->u.chan_props.max_channel_cnt;
 	version = VMBUS_GET_VERSION(device_get_parent(sc->hs_dev), sc->hs_dev);
 	if (version != VMBUS_VERSION_WIN7 && version != VMBUS_VERSION_WS2008 &&
 	    (vstor_packet->u.chan_props.flags &
 	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
 		support_multichannel = TRUE;
 	}
 
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = vmbus_chan_send(sc->hs_chan,
 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
 	    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
 
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/* wait 5 seconds */
 	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
 	if (ret != 0)
 		goto cleanup;
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 	    vstor_packet->status != 0)
 		goto cleanup;
 
 	/*
 	 * If multi-channel is supported, send multichannel create
 	 * request to host.
 	 */
 	if (support_multichannel)
 		storvsc_send_multichannel_request(sc, max_chans);
 
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
 }
 
 /**
  * @brief Open channel connection to paraent partition StorVSP driver
  *
  * Open and initialize channel connection to parent partition StorVSP driver.
  *
  * @param pointer to a Hyper-V device
  * @returns 0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_connect_vsp(struct storvsc_softc *sc)
 {	
 	int ret = 0;
 	struct vmstor_chan_props props;
 
 	memset(&props, 0, sizeof(struct vmstor_chan_props));
 
 	/*
 	 * Open the channel
 	 */
 	vmbus_chan_cpu_rr(sc->hs_chan);
 	ret = vmbus_chan_open(
 		sc->hs_chan,
 		sc->hs_drv_props->drv_ringbuffer_size,
 		sc->hs_drv_props->drv_ringbuffer_size,
 		(void *)&props,
 		sizeof(struct vmstor_chan_props),
 		hv_storvsc_on_channel_callback, sc);
 
 	if (ret != 0) {
 		return ret;
 	}
 
 	ret = hv_storvsc_channel_init(sc);
 
 	return (ret);
 }
 
 #if HVS_HOST_RESET
 static int
 hv_storvsc_host_reset(struct storvsc_softc *sc)
 {
 	int ret = 0;
 
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
 	request = &sc->hs_reset_req;
 	request->softc = sc;
 	vstor_packet = &request->vstor_packet;
 
 	sema_init(&request->synch_sema, 0, "stor synch sema");
 
 	vstor_packet->operation = VSTOR_OPERATION_RESETBUS;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
 	ret = vmbus_chan_send(dev->channel,
 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
 	    vstor_packet, VSTOR_PKT_SIZE,
 	    (uint64_t)(uintptr_t)&sc->hs_reset_req);
 
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
 
 	if (ret) {
 		goto cleanup;
 	}
 
 
 	/*
 	 * At this point, all outstanding requests in the adapter
 	 * should have been flushed out and return to us
 	 */
 
 cleanup:
 	sema_destroy(&request->synch_sema);
 	return (ret);
 }
 #endif /* HVS_HOST_RESET */
 
 /**
  * @brief Function to initiate an I/O request
  *
  * @param device Hyper-V device pointer
  * @param request pointer to a request structure
  * @returns 0 on success, non-zero error on failure
  */
 static int
 hv_storvsc_io_request(struct storvsc_softc *sc,
 					  struct hv_storvsc_request *request)
 {
 	struct vstor_packet *vstor_packet = &request->vstor_packet;
 	struct vmbus_channel* outgoing_channel = NULL;
 	int ret = 0;
 
 	vstor_packet->flags |= REQUEST_COMPLETION_FLAG;
 
 	vstor_packet->u.vm_srb.length = VSTOR_PKT_SIZE;
 	
 	vstor_packet->u.vm_srb.sense_info_len = sense_buffer_size;
 
 	vstor_packet->u.vm_srb.transfer_len =
 	    request->prp_list.gpa_range.gpa_len;
 
 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
 
 	outgoing_channel = sc->hs_cpu2chan[curcpu];
 
 	mtx_unlock(&request->softc->hs_lock);
 	if (request->prp_list.gpa_range.gpa_len) {
 		ret = vmbus_chan_send_prplist(outgoing_channel,
 		    &request->prp_list.gpa_range, request->prp_cnt,
 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
 	} else {
 		ret = vmbus_chan_send(outgoing_channel,
 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
 		    vstor_packet, VSTOR_PKT_SIZE, (uint64_t)(uintptr_t)request);
 	}
 	mtx_lock(&request->softc->hs_lock);
 
 	if (ret != 0) {
 		printf("Unable to send packet %p ret %d", vstor_packet, ret);
 	} else {
 		atomic_add_int(&sc->hs_num_out_reqs, 1);
 	}
 
 	return (ret);
 }
 
 
 /**
  * Process IO_COMPLETION_OPERATION and ready
  * the result to be completed for upper layer
  * processing by the CAM layer.
  */
 static void
 hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
 			   struct vstor_packet *vstor_packet,
 			   struct hv_storvsc_request *request)
 {
 	struct vmscsi_req *vm_srb;
 
 	vm_srb = &vstor_packet->u.vm_srb;
 
 	/*
 	 * Copy some fields of the host's response into the request structure,
 	 * because the fields will be used later in storvsc_io_done().
 	 */
 	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
 	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
 
 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
 		/* Autosense data available */
 
 		KASSERT(vm_srb->sense_info_len <= request->sense_info_len,
 				("vm_srb->sense_info_len <= "
 				 "request->sense_info_len"));
 
 		memcpy(request->sense_data, vm_srb->u.sense_data,
 			vm_srb->sense_info_len);
 
 		request->sense_info_len = vm_srb->sense_info_len;
 	}
 
 	/* Complete request by passing to the CAM layer */
 	storvsc_io_done(request);
 	atomic_subtract_int(&sc->hs_num_out_reqs, 1);
 	if (sc->hs_drain_notify && (sc->hs_num_out_reqs == 0)) {
 		sema_post(&sc->hs_drain_sema);
 	}
 }
 
 static void
 hv_storvsc_rescan_target(struct storvsc_softc *sc)
 {
 	path_id_t pathid;
 	target_id_t targetid;
 	union ccb *ccb;
 
 	pathid = cam_sim_path(sc->hs_sim);
 	targetid = CAM_TARGET_WILDCARD;
 
 	/*
 	 * Allocate a CCB and schedule a rescan.
 	 */
 	ccb = xpt_alloc_ccb_nowait();
 	if (ccb == NULL) {
 		printf("unable to alloc CCB for rescan\n");
 		return;
 	}
 
 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		printf("unable to create path for rescan, pathid: %u,"
 		    "targetid: %u\n", pathid, targetid);
 		xpt_free_ccb(ccb);
 		return;
 	}
 
 	if (targetid == CAM_TARGET_WILDCARD)
 		ccb->ccb_h.func_code = XPT_SCAN_BUS;
 	else
 		ccb->ccb_h.func_code = XPT_SCAN_TGT;
 
 	xpt_rescan(ccb);
 }
 
 static void
 hv_storvsc_on_channel_callback(struct vmbus_channel *channel, void *xsc)
 {
 	int ret = 0;
 	struct storvsc_softc *sc = xsc;
 	uint32_t bytes_recvd;
 	uint64_t request_id;
 	uint8_t packet[roundup2(sizeof(struct vstor_packet), 8)];
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
 	bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8);
 	ret = vmbus_chan_recv(channel, packet, &bytes_recvd, &request_id);
 	KASSERT(ret != ENOBUFS, ("storvsc recvbuf is not large enough"));
 	/* XXX check bytes_recvd to make sure that it contains enough data */
 
 	while ((ret == 0) && (bytes_recvd > 0)) {
 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
 
 		if ((request == &sc->hs_init_req) ||
 			(request == &sc->hs_reset_req)) {
 			memcpy(&request->vstor_packet, packet,
 				   sizeof(struct vstor_packet));
 			sema_post(&request->synch_sema);
 		} else {
 			vstor_packet = (struct vstor_packet *)packet;
 			switch(vstor_packet->operation) {
 			case VSTOR_OPERATION_COMPLETEIO:
 				if (request == NULL)
 					panic("VMBUS: storvsc received a "
 					    "packet with NULL request id in "
 					    "COMPLETEIO operation.");
 
 				hv_storvsc_on_iocompletion(sc,
 							vstor_packet, request);
 				break;
 			case VSTOR_OPERATION_REMOVEDEVICE:
 				printf("VMBUS: storvsc operation %d not "
 				    "implemented.\n", vstor_packet->operation);
 				/* TODO: implement */
 				break;
 			case VSTOR_OPERATION_ENUMERATE_BUS:
 				hv_storvsc_rescan_target(sc);
 				break;
 			default:
 				break;
 			}			
 		}
 
 		bytes_recvd = roundup2(VSTOR_PKT_SIZE, 8),
 		ret = vmbus_chan_recv(channel, packet, &bytes_recvd,
 		    &request_id);
 		KASSERT(ret != ENOBUFS,
 		    ("storvsc recvbuf is not large enough"));
 		/*
 		 * XXX check bytes_recvd to make sure that it contains
 		 * enough data
 		 */
 	}
 }
 
 /**
  * @brief StorVSC probe function
  *
  * Device probe function.  Returns 0 if the input device is a StorVSC
  * device.  Otherwise, a ENXIO is returned.  If the input device is
  * for BlkVSC (paravirtual IDE) device and this support is disabled in
  * favor of the emulated ATA/IDE device, return ENXIO.
  *
  * @param a device
  * @returns 0 on success, ENXIO if not a matcing StorVSC device
  */
 static int
 storvsc_probe(device_t dev)
 {
 	int ata_disk_enable = 0;
 	int ret	= ENXIO;
 	
 	switch (storvsc_get_storage_type(dev)) {
 	case DRIVER_BLKVSC:
 		if(bootverbose)
 			device_printf(dev, "DRIVER_BLKVSC-Emulated ATA/IDE probe\n");
 		if (!getenv_int("hw.ata.disk_enable", &ata_disk_enable)) {
 			if(bootverbose)
 				device_printf(dev,
 					"Enlightened ATA/IDE detected\n");
 			device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
 			ret = BUS_PROBE_DEFAULT;
 		} else if(bootverbose)
 			device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n");
 		break;
 	case DRIVER_STORVSC:
 		if(bootverbose)
 			device_printf(dev, "Enlightened SCSI device detected\n");
 		device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
 		ret = BUS_PROBE_DEFAULT;
 		break;
 	default:
 		ret = ENXIO;
 	}
 	return (ret);
 }
 
 static void
 storvsc_create_cpu2chan(struct storvsc_softc *sc)
 {
 	int cpu;
 
 	CPU_FOREACH(cpu) {
 		sc->hs_cpu2chan[cpu] = vmbus_chan_cpu2chan(sc->hs_chan, cpu);
 		if (bootverbose) {
 			device_printf(sc->hs_dev, "cpu%d -> chan%u\n",
 			    cpu, vmbus_chan_id(sc->hs_cpu2chan[cpu]));
 		}
 	}
 }
 
 static int
 storvsc_init_requests(device_t dev)
 {
 	struct storvsc_softc *sc = device_get_softc(dev);
 	struct hv_storvsc_request *reqp;
 	int error, i;
 
 	LIST_INIT(&sc->hs_free_list);
 
 	error = bus_dma_tag_create(
 		bus_get_dma_tag(dev),		/* parent */
 		1,				/* alignment */
 		PAGE_SIZE,			/* boundary */
 		BUS_SPACE_MAXADDR,		/* lowaddr */
 		BUS_SPACE_MAXADDR,		/* highaddr */
 		NULL, NULL,			/* filter, filterarg */
 		STORVSC_DATA_SIZE_MAX,		/* maxsize */
 		STORVSC_DATA_SEGCNT_MAX,	/* nsegments */
 		STORVSC_DATA_SEGSZ_MAX,		/* maxsegsize */
 		0,				/* flags */
 		NULL,				/* lockfunc */
 		NULL,				/* lockfuncarg */
 		&sc->storvsc_req_dtag);
 	if (error) {
 		device_printf(dev, "failed to create storvsc dma tag\n");
 		return (error);
 	}
 
 	for (i = 0; i < sc->hs_drv_props->drv_max_ios_per_target; ++i) {
 		reqp = malloc(sizeof(struct hv_storvsc_request),
 				 M_DEVBUF, M_WAITOK|M_ZERO);
 		reqp->softc = sc;
 		error = bus_dmamap_create(sc->storvsc_req_dtag, 0,
 				&reqp->data_dmap);
 		if (error) {
 			device_printf(dev, "failed to allocate storvsc "
 			    "data dmamap\n");
 			goto cleanup;
 		}
 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 	}
 	return (0);
 
 cleanup:
 	while ((reqp = LIST_FIRST(&sc->hs_free_list)) != NULL) {
 		LIST_REMOVE(reqp, link);
 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
 		free(reqp, M_DEVBUF);
 	}
 	return (error);
 }
 
 static void
 storvsc_sysctl(device_t dev)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	struct storvsc_softc *sc;
 
 	sc = device_get_softc(dev);
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_bio_cnt", CTLFLAG_RW,
 		&sc->sysctl_data.data_bio_cnt, "# of bio data block");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_vaddr_cnt", CTLFLAG_RW,
 		&sc->sysctl_data.data_vaddr_cnt, "# of vaddr data block");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "data_sg_cnt", CTLFLAG_RW,
 		&sc->sysctl_data.data_sg_cnt, "# of sg data block");
 }
 
 /**
  * @brief StorVSC attach function
  *
  * Function responsible for allocating per-device structures,
  * setting up CAM interfaces and scanning for available LUNs to
  * be used for SCSI device peripherals.
  *
  * @param a device
  * @returns 0 on success or an error on failure
  */
 static int
 storvsc_attach(device_t dev)
 {
 	enum hv_storage_type stor_type;
 	struct storvsc_softc *sc;
 	struct cam_devq *devq;
 	int ret, i, j;
 	struct hv_storvsc_request *reqp;
 	struct root_hold_token *root_mount_token = NULL;
 	struct hv_sgl_node *sgl_node = NULL;
 	void *tmp_buff = NULL;
 
 	/*
 	 * We need to serialize storvsc attach calls.
 	 */
 	root_mount_token = root_mount_hold("storvsc");
 
 	sc = device_get_softc(dev);
 	sc->hs_chan = vmbus_get_channel(dev);
 
 	stor_type = storvsc_get_storage_type(dev);
 
 	if (stor_type == DRIVER_UNKNOWN) {
 		ret = ENODEV;
 		goto cleanup;
 	}
 
 	/* fill in driver specific properties */
 	sc->hs_drv_props = &g_drv_props_table[stor_type];
 
 	/* fill in device specific properties */
 	sc->hs_unit	= device_get_unit(dev);
 	sc->hs_dev	= dev;
 
 	mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
 
 	ret = storvsc_init_requests(dev);
 	if (ret != 0)
 		goto cleanup;
 
 	/* create sg-list page pool */
 	if (FALSE == g_hv_sgl_page_pool.is_init) {
 		g_hv_sgl_page_pool.is_init = TRUE;
 		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
 		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
 
 		/*
 		 * Pre-create SG list, each SG list with
 		 * STORVSC_DATA_SEGCNT_MAX segments, each
 		 * segment has one page buffer
 		 */
 		for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
 	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
 			    M_DEVBUF, M_WAITOK|M_ZERO);
 
 			sgl_node->sgl_data =
 			    sglist_alloc(STORVSC_DATA_SEGCNT_MAX,
 			    M_WAITOK|M_ZERO);
 
 			for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
 				tmp_buff = malloc(PAGE_SIZE,
 				    M_DEVBUF, M_WAITOK|M_ZERO);
 
 				sgl_node->sgl_data->sg_segs[j].ss_paddr =
 				    (vm_paddr_t)tmp_buff;
 			}
 
 			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
 			    sgl_node, link);
 		}
 	}
 
 	sc->hs_destroy = FALSE;
 	sc->hs_drain_notify = FALSE;
 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
 
 	ret = hv_storvsc_connect_vsp(sc);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/* Construct cpu to channel mapping */
 	storvsc_create_cpu2chan(sc);
 
 	/*
 	 * Create the device queue.
 	 * Hyper-V maps each target to one SCSI HBA
 	 */
 	devq = cam_simq_alloc(sc->hs_drv_props->drv_max_ios_per_target);
 	if (devq == NULL) {
 		device_printf(dev, "Failed to alloc device queue\n");
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	sc->hs_sim = cam_sim_alloc(storvsc_action,
 				storvsc_poll,
 				sc->hs_drv_props->drv_name,
 				sc,
 				sc->hs_unit,
 				&sc->hs_lock, 1,
 				sc->hs_drv_props->drv_max_ios_per_target,
 				devq);
 
 	if (sc->hs_sim == NULL) {
 		device_printf(dev, "Failed to alloc sim\n");
 		cam_simq_free(devq);
 		ret = ENOMEM;
 		goto cleanup;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	/* bus_id is set to 0, need to get it from VMBUS channel query? */
 	if (xpt_bus_register(sc->hs_sim, dev, 0) != CAM_SUCCESS) {
 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
 		mtx_unlock(&sc->hs_lock);
 		device_printf(dev, "Unable to register SCSI bus\n");
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	if (xpt_create_path(&sc->hs_path, /*periph*/NULL,
 		 cam_sim_path(sc->hs_sim),
 		CAM_TARGET_WILDCARD, CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
 		xpt_bus_deregister(cam_sim_path(sc->hs_sim));
 		cam_sim_free(sc->hs_sim, /*free_devq*/TRUE);
 		mtx_unlock(&sc->hs_lock);
 		device_printf(dev, "Unable to create path\n");
 		ret = ENXIO;
 		goto cleanup;
 	}
 
 	mtx_unlock(&sc->hs_lock);
 
 	storvsc_sysctl(dev);
 
 	root_mount_rel(root_mount_token);
 	return (0);
 
 
 cleanup:
 	root_mount_rel(root_mount_token);
 	while (!LIST_EMPTY(&sc->hs_free_list)) {
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
 		free(reqp, M_DEVBUF);
 	}
 
 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 		LIST_REMOVE(sgl_node, link);
 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++) {
 			if (NULL !=
 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
 			}
 		}
 		sglist_free(sgl_node->sgl_data);
 		free(sgl_node, M_DEVBUF);
 	}
 
 	return (ret);
 }
 
 /**
  * @brief StorVSC device detach function
  *
  * This function is responsible for safely detaching a
  * StorVSC device.  This includes waiting for inbound responses
  * to complete and freeing associated per-device structures.
  *
  * @param dev a device
  * returns 0 on success
  */
 static int
 storvsc_detach(device_t dev)
 {
 	struct storvsc_softc *sc = device_get_softc(dev);
 	struct hv_storvsc_request *reqp = NULL;
 	struct hv_sgl_node *sgl_node = NULL;
 	int j = 0;
 
 	sc->hs_destroy = TRUE;
 
 	/*
 	 * At this point, all outbound traffic should be disabled. We
 	 * only allow inbound traffic (responses) to proceed so that
 	 * outstanding requests can be completed.
 	 */
 
 	sc->hs_drain_notify = TRUE;
 	sema_wait(&sc->hs_drain_sema);
 	sc->hs_drain_notify = FALSE;
 
 	/*
 	 * Since we have already drained, we don't need to busy wait.
 	 * The call to close the channel will reset the callback
 	 * under the protection of the incoming channel lock.
 	 */
 
 	vmbus_chan_close(sc->hs_chan);
 
 	mtx_lock(&sc->hs_lock);
 	while (!LIST_EMPTY(&sc->hs_free_list)) {
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 		bus_dmamap_destroy(sc->storvsc_req_dtag, reqp->data_dmap);
 		free(reqp, M_DEVBUF);
 	}
 	mtx_unlock(&sc->hs_lock);
 
 	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 		LIST_REMOVE(sgl_node, link);
 		for (j = 0; j < STORVSC_DATA_SEGCNT_MAX; j++){
 			if (NULL !=
 			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
 				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
 			}
 		}
 		sglist_free(sgl_node->sgl_data);
 		free(sgl_node, M_DEVBUF);
 	}
 	
 	return (0);
 }
 
 #if HVS_TIMEOUT_TEST
 /**
  * @brief unit test for timed out operations
  *
  * This function provides unit testing capability to simulate
  * timed out operations.  Recompilation with HV_TIMEOUT_TEST=1
  * is required.
  *
  * @param reqp pointer to a request structure
  * @param opcode SCSI operation being performed
  * @param wait if 1, wait for I/O to complete
  */
 static void
 storvsc_timeout_test(struct hv_storvsc_request *reqp,
 		uint8_t opcode, int wait)
 {
 	int ret;
 	union ccb *ccb = reqp->ccb;
 	struct storvsc_softc *sc = reqp->softc;
 
 	if (reqp->vstor_packet.vm_srb.cdb[0] != opcode) {
 		return;
 	}
 
 	if (wait) {
 		mtx_lock(&reqp->event.mtx);
 	}
 	ret = hv_storvsc_io_request(sc, reqp);
 	if (ret != 0) {
 		if (wait) {
 			mtx_unlock(&reqp->event.mtx);
 		}
 		printf("%s: io_request failed with %d.\n",
 				__func__, ret);
 		ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 		mtx_lock(&sc->hs_lock);
 		storvsc_free_request(sc, reqp);
 		xpt_done(ccb);
 		mtx_unlock(&sc->hs_lock);
 		return;
 	}
 
 	if (wait) {
 		xpt_print(ccb->ccb_h.path,
 				"%u: %s: waiting for IO return.\n",
 				ticks, __func__);
 		ret = cv_timedwait(&reqp->event.cv, &reqp->event.mtx, 60*hz);
 		mtx_unlock(&reqp->event.mtx);
 		xpt_print(ccb->ccb_h.path, "%u: %s: %s.\n",
 				ticks, __func__, (ret == 0)?
 				"IO return detected" :
 				"IO return not detected");
 		/*
 		 * Now both the timer handler and io done are running
 		 * simultaneously. We want to confirm the io done always
 		 * finishes after the timer handler exits. So reqp used by
 		 * timer handler is not freed or stale. Do busy loop for
 		 * another 1/10 second to make sure io done does
 		 * wait for the timer handler to complete.
 		 */
 		DELAY(100*1000);
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 				"%u: %s: finishing, queue frozen %d, "
 				"ccb status 0x%x scsi_status 0x%x.\n",
 				ticks, __func__, sc->hs_frozen,
 				ccb->ccb_h.status,
 				ccb->csio.scsi_status);
 		mtx_unlock(&sc->hs_lock);
 	}
 }
 #endif /* HVS_TIMEOUT_TEST */
 
 #ifdef notyet
 /**
  * @brief timeout handler for requests
  *
  * This function is called as a result of a callout expiring.
  *
  * @param arg pointer to a request
  */
 static void
 storvsc_timeout(void *arg)
 {
 	struct hv_storvsc_request *reqp = arg;
 	struct storvsc_softc *sc = reqp->softc;
 	union ccb *ccb = reqp->ccb;
 
 	if (reqp->retries == 0) {
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 		    "%u: IO timed out (req=0x%p), wait for another %u secs.\n",
 		    ticks, reqp, ccb->ccb_h.timeout / 1000);
 		cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
 		mtx_unlock(&sc->hs_lock);
 
 		reqp->retries++;
 		callout_reset_sbt(&reqp->callout, SBT_1MS * ccb->ccb_h.timeout,
 		    0, storvsc_timeout, reqp, 0);
 #if HVS_TIMEOUT_TEST
 		storvsc_timeout_test(reqp, SEND_DIAGNOSTIC, 0);
 #endif
 		return;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	xpt_print(ccb->ccb_h.path,
 		"%u: IO (reqp = 0x%p) did not return for %u seconds, %s.\n",
 		ticks, reqp, ccb->ccb_h.timeout * (reqp->retries+1) / 1000,
 		(sc->hs_frozen == 0)?
 		"freezing the queue" : "the queue is already frozen");
 	if (sc->hs_frozen == 0) {
 		sc->hs_frozen = 1;
 		xpt_freeze_simq(xpt_path_sim(ccb->ccb_h.path), 1);
 	}
 	mtx_unlock(&sc->hs_lock);
 	
 #if HVS_TIMEOUT_TEST
 	storvsc_timeout_test(reqp, MODE_SELECT_10, 1);
 #endif
 }
 #endif
 
 /**
  * @brief StorVSC device poll function
  *
  * This function is responsible for servicing requests when
  * interrupts are disabled (i.e when we are dumping core.)
  *
  * @param sim a pointer to a CAM SCSI interface module
  */
 static void
 storvsc_poll(struct cam_sim *sim)
 {
 	struct storvsc_softc *sc = cam_sim_softc(sim);
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	mtx_unlock(&sc->hs_lock);
 	hv_storvsc_on_channel_callback(sc->hs_chan, sc);
 	mtx_lock(&sc->hs_lock);
 }
 
 /**
  * @brief StorVSC device action function
  *
  * This function is responsible for handling SCSI operations which
  * are passed from the CAM layer.  The requests are in the form of
  * CAM control blocks which indicate the action being performed.
  * Not all actions require converting the request to a VSCSI protocol
  * message - these actions can be responded to by this driver.
  * Requests which are destined for a backend storage device are converted
  * to a VSCSI protocol message and sent on the channel connection associated
  * with this device.
  *
  * @param sim pointer to a CAM SCSI interface module
  * @param ccb pointer to a CAM control block
  */
 static void
 storvsc_action(struct cam_sim *sim, union ccb *ccb)
 {
 	struct storvsc_softc *sc = cam_sim_softc(sim);
 	int res;
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	switch (ccb->ccb_h.func_code) {
 	case XPT_PATH_INQ: {
 		struct ccb_pathinq *cpi = &ccb->cpi;
 
 		cpi->version_num = 1;
 		cpi->hba_inquiry = PI_TAG_ABLE|PI_SDTR_ABLE;
 		cpi->target_sprt = 0;
 		cpi->hba_misc = PIM_NOBUSRESET;
 		if (hv_storvsc_use_pim_unmapped)
 			cpi->hba_misc |= PIM_UNMAPPED;
+		cpi->maxio = STORVSC_DATA_SIZE_MAX;
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = STORVSC_MAX_TARGETS;
 		cpi->max_lun = sc->hs_drv_props->drv_max_luns_per_target;
 		cpi->initiator_id = cpi->max_target;
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->base_transfer_speed = 300000;
 		cpi->transport = XPORT_SAS;
 		cpi->transport_version = 0;
 		cpi->protocol = PROTO_SCSI;
 		cpi->protocol_version = SCSI_REV_SPC2;
 		strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strncpy(cpi->hba_vid, sc->hs_drv_props->drv_name, HBA_IDLEN);
 		strncpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_GET_TRAN_SETTINGS: {
 		struct  ccb_trans_settings *cts = &ccb->cts;
 
 		cts->transport = XPORT_SAS;
 		cts->transport_version = 0;
 		cts->protocol = PROTO_SCSI;
 		cts->protocol_version = SCSI_REV_SPC2;
 
 		/* enable tag queuing and disconnected mode */
 		cts->proto_specific.valid = CTS_SCSI_VALID_TQ;
 		cts->proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
 		cts->proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
 		cts->xport_specific.valid = CTS_SPI_VALID_DISC;
 		cts->xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
 			
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_SET_TRAN_SETTINGS:	{
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 	case XPT_CALC_GEOMETRY:{
 		cam_calc_geometry(&ccb->ccg, 1);
 		xpt_done(ccb);
 		return;
 	}
 	case  XPT_RESET_BUS:
 	case  XPT_RESET_DEV:{
 #if HVS_HOST_RESET
 		if ((res = hv_storvsc_host_reset(sc)) != 0) {
 			xpt_print(ccb->ccb_h.path,
 				"hv_storvsc_host_reset failed with %d\n", res);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			xpt_done(ccb);
 			return;
 		}
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 #else
 		xpt_print(ccb->ccb_h.path,
 				  "%s reset not supported.\n",
 				  (ccb->ccb_h.func_code == XPT_RESET_BUS)?
 				  "bus" : "dev");
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		return;
 #endif	/* HVS_HOST_RESET */
 	}
 	case XPT_SCSI_IO:
 	case XPT_IMMED_NOTIFY: {
 		struct hv_storvsc_request *reqp = NULL;
 		bus_dmamap_t dmap_saved;
 
 		if (ccb->csio.cdb_len == 0) {
 			panic("cdl_len is 0\n");
 		}
 
 		if (LIST_EMPTY(&sc->hs_free_list)) {
 			ccb->ccb_h.status = CAM_REQUEUE_REQ;
 			if (sc->hs_frozen == 0) {
 				sc->hs_frozen = 1;
 				xpt_freeze_simq(sim, /* count*/1);
 			}
 			xpt_done(ccb);
 			return;
 		}
 
 		reqp = LIST_FIRST(&sc->hs_free_list);
 		LIST_REMOVE(reqp, link);
 
 		/* Save the data_dmap before reset request */
 		dmap_saved = reqp->data_dmap;
 
 		/* XXX this is ugly */
 		bzero(reqp, sizeof(struct hv_storvsc_request));
 
 		/* Restore necessary bits */
 		reqp->data_dmap = dmap_saved;
 		reqp->softc = sc;
 		
 		ccb->ccb_h.status |= CAM_SIM_QUEUED;
 		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
 			ccb->ccb_h.status = CAM_REQ_INVALID;
 			xpt_done(ccb);
 			return;
 		}
 
 #ifdef notyet
 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 			callout_init(&reqp->callout, 1);
 			callout_reset_sbt(&reqp->callout,
 			    SBT_1MS * ccb->ccb_h.timeout, 0,
 			    storvsc_timeout, reqp, 0);
 #if HVS_TIMEOUT_TEST
 			cv_init(&reqp->event.cv, "storvsc timeout cv");
 			mtx_init(&reqp->event.mtx, "storvsc timeout mutex",
 					NULL, MTX_DEF);
 			switch (reqp->vstor_packet.vm_srb.cdb[0]) {
 				case MODE_SELECT_10:
 				case SEND_DIAGNOSTIC:
 					/* To have timer send the request. */
 					return;
 				default:
 					break;
 			}
 #endif /* HVS_TIMEOUT_TEST */
 		}
 #endif
 
 		if ((res = hv_storvsc_io_request(sc, reqp)) != 0) {
 			xpt_print(ccb->ccb_h.path,
 				"hv_storvsc_io_request failed with %d\n", res);
 			ccb->ccb_h.status = CAM_PROVIDE_FAIL;
 			storvsc_free_request(sc, reqp);
 			xpt_done(ccb);
 			return;
 		}
 		return;
 	}
 
 	default:
 		ccb->ccb_h.status = CAM_REQ_INVALID;
 		xpt_done(ccb);
 		return;
 	}
 }
 
 /**
  * @brief destroy bounce buffer
  *
  * This function is responsible for destroy a Scatter/Gather list
  * that create by storvsc_create_bounce_buffer()
  *
  * @param sgl- the Scatter/Gather need be destroy
  * @param sg_count- page count of the SG list.
  *
  */
 static void
 storvsc_destroy_bounce_buffer(struct sglist *sgl)
 {
 	struct hv_sgl_node *sgl_node = NULL;
 	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
 		printf("storvsc error: not enough in use sgl\n");
 		return;
 	}
 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
 	LIST_REMOVE(sgl_node, link);
 	sgl_node->sgl_data = sgl;
 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
 }
 
 /**
  * @brief create bounce buffer
  *
  * This function is responsible for create a Scatter/Gather list,
  * which hold several pages that can be aligned with page size.
  *
  * @param seg_count- SG-list segments count
  * @param write - if WRITE_TYPE, set SG list page used size to 0,
  * otherwise set used size to page size.
  *
  * return NULL if create failed
  */
 static struct sglist *
 storvsc_create_bounce_buffer(uint16_t seg_count, int write)
 {
 	int i = 0;
 	struct sglist *bounce_sgl = NULL;
 	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
 	struct hv_sgl_node *sgl_node = NULL;	
 
 	/* get struct sglist from free_sgl_list */
 	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		printf("storvsc error: not enough free sgl\n");
 		return NULL;
 	}
 	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
 	LIST_REMOVE(sgl_node, link);
 	bounce_sgl = sgl_node->sgl_data;
 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
 
 	bounce_sgl->sg_maxseg = seg_count;
 
 	if (write == WRITE_TYPE)
 		bounce_sgl->sg_nseg = 0;
 	else
 		bounce_sgl->sg_nseg = seg_count;
 
 	for (i = 0; i < seg_count; i++)
 	        bounce_sgl->sg_segs[i].ss_len = buf_len;
 
 	return bounce_sgl;
 }
 
 /**
  * @brief copy data from SG list to bounce buffer
  *
  * This function is responsible for copy data from one SG list's segments
  * to another SG list which used as bounce buffer.
  *
  * @param bounce_sgl - the destination SG list
  * @param orig_sgl - the segment of the source SG list.
  * @param orig_sgl_count - the count of segments.
  * @param orig_sgl_count - indicate which segment need bounce buffer,
  *  set 1 means need.
  *
  */
 static void
 storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
 			       bus_dma_segment_t *orig_sgl,
 			       unsigned int orig_sgl_count,
 			       uint64_t seg_bits)
 {
 	int src_sgl_idx = 0;
 
 	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
 		if (seg_bits & (1 << src_sgl_idx)) {
 			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
 			    (void*)orig_sgl[src_sgl_idx].ds_addr,
 			    orig_sgl[src_sgl_idx].ds_len);
 
 			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
 			    orig_sgl[src_sgl_idx].ds_len;
 		}
 	}
 }
 
 /**
  * @brief copy data from SG list which used as bounce to another SG list
  *
  * This function is responsible for copy data from one SG list with bounce
  * buffer to another SG list's segments.
  *
  * @param dest_sgl - the destination SG list's segments
  * @param dest_sgl_count - the count of destination SG list's segment.
  * @param src_sgl - the source SG list.
  * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
  *
  */
 void
 storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
 				    unsigned int dest_sgl_count,
 				    struct sglist* src_sgl,
 				    uint64_t seg_bits)
 {
 	int sgl_idx = 0;
 	
 	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
 		if (seg_bits & (1 << sgl_idx)) {
 			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
 			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
 			    src_sgl->sg_segs[sgl_idx].ss_len);
 		}
 	}
 }
 
 /**
  * @brief check SG list with bounce buffer or not
  *
  * This function is responsible for check if need bounce buffer for SG list.
  *
  * @param sgl - the SG list's segments
  * @param sg_count - the count of SG list's segment.
  * @param bits - segmengs number that need bounce buffer
  *
  * return -1 if SG list needless bounce buffer
  */
 static int
 storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
 				unsigned int sg_count,
 				uint64_t *bits)
 {
 	int i = 0;
 	int offset = 0;
 	uint64_t phys_addr = 0;
 	uint64_t tmp_bits = 0;
 	boolean_t found_hole = FALSE;
 	boolean_t pre_aligned = TRUE;
 
 	if (sg_count < 2){
 		return -1;
 	}
 
 	*bits = 0;
 	
 	phys_addr = vtophys(sgl[0].ds_addr);
 	offset =  phys_addr - trunc_page(phys_addr);
 
 	if (offset != 0) {
 		pre_aligned = FALSE;
 		tmp_bits |= 1;
 	}
 
 	for (i = 1; i < sg_count; i++) {
 		phys_addr = vtophys(sgl[i].ds_addr);
 		offset =  phys_addr - trunc_page(phys_addr);
 
 		if (offset == 0) {
 			if (FALSE == pre_aligned){
 				/*
 				 * This segment is aligned, if the previous
 				 * one is not aligned, find a hole
 				 */
 				found_hole = TRUE;
 			}
 			pre_aligned = TRUE;
 		} else {
 			tmp_bits |= 1 << i;
 			if (!pre_aligned) {
 				if (phys_addr != vtophys(sgl[i-1].ds_addr +
 				    sgl[i-1].ds_len)) {
 					/*
 					 * Check whether connect to previous
 					 * segment,if not, find the hole
 					 */
 					found_hole = TRUE;
 				}
 			} else {
 				found_hole = TRUE;
 			}
 			pre_aligned = FALSE;
 		}
 	}
 
 	if (!found_hole) {
 		return (-1);
 	} else {
 		*bits = tmp_bits;
 		return 0;
 	}
 }
 
 /**
  * Copy bus_dma segments to multiple page buffer, which requires
  * the pages are compact composed except for the 1st and last pages.
  */
 static void
 storvsc_xferbuf_prepare(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct hv_storvsc_request *reqp = arg;
 	union ccb *ccb = reqp->ccb;
 	struct ccb_scsiio *csio = &ccb->csio;
 	struct storvsc_gpa_range *prplist;
 	int i;
 
 	prplist = &reqp->prp_list;
 	prplist->gpa_range.gpa_len = csio->dxfer_len;
 	prplist->gpa_range.gpa_ofs = segs[0].ds_addr & PAGE_MASK;
 
 	for (i = 0; i < nsegs; i++) {
-		prplist->gpa_page[i] = atop(segs[i].ds_addr);
 #ifdef INVARIANTS
-		if (i != 0 && i != nsegs - 1) {
-			KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
-			    segs[i].ds_len == PAGE_SIZE, ("not a full page"));
+		if (nsegs > 1) {
+			if (i == 0) {
+				KASSERT((segs[i].ds_addr & PAGE_MASK) +
+				    segs[i].ds_len == PAGE_SIZE,
+				    ("invalid 1st page, ofs 0x%jx, len %zu",
+				     (uintmax_t)segs[i].ds_addr,
+				     segs[i].ds_len));
+			} else if (i == nsegs - 1) {
+				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0,
+				    ("invalid last page, ofs 0x%jx",
+				     (uintmax_t)segs[i].ds_addr));
+			} else {
+				KASSERT((segs[i].ds_addr & PAGE_MASK) == 0 &&
+				    segs[i].ds_len == PAGE_SIZE,
+				    ("not a full page, ofs 0x%jx, len %zu",
+				     (uintmax_t)segs[i].ds_addr,
+				     segs[i].ds_len));
+			}
 		}
 #endif
+		prplist->gpa_page[i] = atop(segs[i].ds_addr);
 	}
 	reqp->prp_cnt = nsegs;
 }
 
 /**
  * @brief Fill in a request structure based on a CAM control block
  *
  * Fills in a request structure based on the contents of a CAM control
  * block.  The request structure holds the payload information for
  * VSCSI protocol request.
  *
  * @param ccb pointer to a CAM contorl block
  * @param reqp pointer to a request structure
  */
 static int
 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 {
 	struct ccb_scsiio *csio = &ccb->csio;
 	uint64_t phys_addr;
 	uint32_t pfn;
 	uint64_t not_aligned_seg_bits = 0;
 	int error;
 	
 	/* refer to struct vmscsi_req for meanings of these two fields */
 	reqp->vstor_packet.u.vm_srb.port =
 		cam_sim_unit(xpt_path_sim(ccb->ccb_h.path));
 	reqp->vstor_packet.u.vm_srb.path_id =
 		cam_sim_bus(xpt_path_sim(ccb->ccb_h.path));
 
 	reqp->vstor_packet.u.vm_srb.target_id = ccb->ccb_h.target_id;
 	reqp->vstor_packet.u.vm_srb.lun = ccb->ccb_h.target_lun;
 
 	reqp->vstor_packet.u.vm_srb.cdb_len = csio->cdb_len;
 	if(ccb->ccb_h.flags & CAM_CDB_POINTER) {
 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_ptr,
 			csio->cdb_len);
 	} else {
 		memcpy(&reqp->vstor_packet.u.vm_srb.u.cdb, csio->cdb_io.cdb_bytes,
 			csio->cdb_len);
 	}
 
 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
 	case CAM_DIR_OUT:
 		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;	
 		break;
 	case CAM_DIR_IN:
 		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
 		break;
 	case CAM_DIR_NONE:
 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
 		break;
 	default:
 		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
 		break;
 	}
 
 	reqp->sense_data     = &csio->sense_data;
 	reqp->sense_info_len = csio->sense_len;
 
 	reqp->ccb = ccb;
 
 	if (0 == csio->dxfer_len) {
 		return (0);
 	}
 
 	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
 	case CAM_DATA_BIO:
 	case CAM_DATA_VADDR:
 		error = bus_dmamap_load_ccb(reqp->softc->storvsc_req_dtag,
 		    reqp->data_dmap, ccb, storvsc_xferbuf_prepare, reqp,
 		    BUS_DMA_NOWAIT);
 		if (error) {
 			xpt_print(ccb->ccb_h.path,
 			    "bus_dmamap_load_ccb failed: %d\n", error);
 			return (error);
 		}
 		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
 			reqp->softc->sysctl_data.data_bio_cnt++;
 		else
 			reqp->softc->sysctl_data.data_vaddr_cnt++;
 		break;
 
 	case CAM_DATA_SG:
 	{
 		struct storvsc_gpa_range *prplist;
 		int i = 0;
 		int offset = 0;
 		int ret;
 
 		bus_dma_segment_t *storvsc_sglist =
 		    (bus_dma_segment_t *)ccb->csio.data_ptr;
 		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
 
 		prplist = &reqp->prp_list;
 		prplist->gpa_range.gpa_len = csio->dxfer_len;
 
 		printf("Storvsc: get SG I/O operation, %d\n",
 		    reqp->vstor_packet.u.vm_srb.data_in);
 
 		if (storvsc_sg_count > STORVSC_DATA_SEGCNT_MAX){
 			printf("Storvsc: %d segments is too much, "
 			    "only support %d segments\n",
 			    storvsc_sg_count, STORVSC_DATA_SEGCNT_MAX);
 			return (EINVAL);
 		}
 
 		/*
 		 * We create our own bounce buffer function currently. Idealy
 		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
 		 * code there is no callback API to check the page alignment of
 		 * middle segments before busdma can decide if a bounce buffer
 		 * is needed for particular segment. There is callback,
 		 * "bus_dma_filter_t *filter", but the parrameters are not
 		 * sufficient for storvsc driver.
 		 * TODO:
 		 *	Add page alignment check in BUS_DMA(9) callback. Once
 		 *	this is complete, switch the following code to use
 		 *	BUS_DMA(9) for storvsc bounce buffer support.
 		 */
 		/* check if we need to create bounce buffer */
 		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
 		    storvsc_sg_count, &not_aligned_seg_bits);
 		if (ret != -1) {
 			reqp->bounce_sgl =
 			    storvsc_create_bounce_buffer(storvsc_sg_count,
 			    reqp->vstor_packet.u.vm_srb.data_in);
 			if (NULL == reqp->bounce_sgl) {
 				printf("Storvsc_error: "
 				    "create bounce buffer failed.\n");
 				return (ENOMEM);
 			}
 
 			reqp->bounce_sgl_count = storvsc_sg_count;
 			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
 
 			/*
 			 * if it is write, we need copy the original data
 			 *to bounce buffer
 			 */
 			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
 				storvsc_copy_sgl_to_bounce_buf(
 				    reqp->bounce_sgl,
 				    storvsc_sglist,
 				    storvsc_sg_count,
 				    reqp->not_aligned_seg_bits);
 			}
 
 			/* transfer virtual address to physical frame number */
 			if (reqp->not_aligned_seg_bits & 0x1){
  				phys_addr =
 				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
 			}else{
  				phys_addr =
 					vtophys(storvsc_sglist[0].ds_addr);
 			}
 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
 
 			pfn = phys_addr >> PAGE_SHIFT;
 			prplist->gpa_page[0] = pfn;
 			
 			for (i = 1; i < storvsc_sg_count; i++) {
 				if (reqp->not_aligned_seg_bits & (1 << i)) {
 					phys_addr =
 					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
 				} else {
 					phys_addr =
 					    vtophys(storvsc_sglist[i].ds_addr);
 				}
 
 				pfn = phys_addr >> PAGE_SHIFT;
 				prplist->gpa_page[i] = pfn;
 			}
 			reqp->prp_cnt = i;
 		} else {
 			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
 
 			prplist->gpa_range.gpa_ofs = phys_addr & PAGE_MASK;
 
 			for (i = 0; i < storvsc_sg_count; i++) {
 				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
 				pfn = phys_addr >> PAGE_SHIFT;
 				prplist->gpa_page[i] = pfn;
 			}
 			reqp->prp_cnt = i;
 
 			/* check the last segment cross boundary or not */
 			offset = phys_addr & PAGE_MASK;
 			if (offset) {
 				/* Add one more PRP entry */
 				phys_addr =
 				    vtophys(storvsc_sglist[i-1].ds_addr +
 				    PAGE_SIZE - offset);
 				pfn = phys_addr >> PAGE_SHIFT;
 				prplist->gpa_page[i] = pfn;
 				reqp->prp_cnt++;
 			}
 			
 			reqp->bounce_sgl_count = 0;
 		}
 		reqp->softc->sysctl_data.data_sg_cnt++;
 		break;
 	}
 	default:
 		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
 		return(EINVAL);
 	}
 
 	return(0);
 }
 
 /*
  * SCSI Inquiry checks qualifier and type.
  * If qualifier is 011b, means the device server is not capable
  * of supporting a peripheral device on this logical unit, and
  * the type should be set to 1Fh.
  * 
  * Return 1 if it is valid, 0 otherwise.
  */
 static inline int
 is_inquiry_valid(const struct scsi_inquiry_data *inq_data)
 {
 	uint8_t type;
 	if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) {
 		return (0);
 	}
 	type = SID_TYPE(inq_data);
 	if (type == T_NODEVICE) {
 		return (0);
 	}
 	return (1);
 }
 
 /**
  * @brief completion function before returning to CAM
  *
  * I/O process has been completed and the result needs
  * to be passed to the CAM layer.
  * Free resources related to this request.
  *
  * @param reqp pointer to a request structure
  */
 static void
 storvsc_io_done(struct hv_storvsc_request *reqp)
 {
 	union ccb *ccb = reqp->ccb;
 	struct ccb_scsiio *csio = &ccb->csio;
 	struct storvsc_softc *sc = reqp->softc;
 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
 	bus_dma_segment_t *ori_sglist = NULL;
 	int ori_sg_count = 0;
 
 	/* destroy bounce buffer if it is used */
 	if (reqp->bounce_sgl_count) {
 		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
 		ori_sg_count = ccb->csio.sglist_cnt;
 
 		/*
 		 * If it is READ operation, we should copy back the data
 		 * to original SG list.
 		 */
 		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
 			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
 			    ori_sg_count,
 			    reqp->bounce_sgl,
 			    reqp->not_aligned_seg_bits);
 		}
 
 		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
 		reqp->bounce_sgl_count = 0;
 	}
 		
 	if (reqp->retries > 0) {
 		mtx_lock(&sc->hs_lock);
 #if HVS_TIMEOUT_TEST
 		xpt_print(ccb->ccb_h.path,
 			"%u: IO returned after timeout, "
 			"waking up timer handler if any.\n", ticks);
 		mtx_lock(&reqp->event.mtx);
 		cv_signal(&reqp->event.cv);
 		mtx_unlock(&reqp->event.mtx);
 #endif
 		reqp->retries = 0;
 		xpt_print(ccb->ccb_h.path,
 			"%u: IO returned after timeout, "
 			"stopping timer if any.\n", ticks);
 		mtx_unlock(&sc->hs_lock);
 	}
 
 #ifdef notyet
 	/*
 	 * callout_drain() will wait for the timer handler to finish
 	 * if it is running. So we don't need any lock to synchronize
 	 * between this routine and the timer handler.
 	 * Note that we need to make sure reqp is not freed when timer
 	 * handler is using or will use it.
 	 */
 	if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 		callout_drain(&reqp->callout);
 	}
 #endif
 
 	ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
 		const struct scsi_generic *cmd;
 		/*
 		 * Check whether the data for INQUIRY cmd is valid or
 		 * not.  Windows 10 and Windows 2016 send all zero
 		 * inquiry data to VM even for unpopulated slots.
 		 */
 		cmd = (const struct scsi_generic *)
 		    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
 		     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
 		if (cmd->opcode == INQUIRY) {
 		    /*
 		     * The host of Windows 10 or 2016 server will response
 		     * the inquiry request with invalid data for unexisted device:
 			[0x7f 0x0 0x5 0x2 0x1f ... ]
 		     * But on windows 2012 R2, the response is:
 			[0x7f 0x0 0x0 0x0 0x0 ]
 		     * That is why here wants to validate the inquiry response.
 		     * The validation will skip the INQUIRY whose response is short,
 		     * which is less than SHORT_INQUIRY_LENGTH (36).
 		     *
 		     * For more information about INQUIRY, please refer to:
 		     *  ftp://ftp.avc-pioneer.com/Mtfuji_7/Proposal/Jun09/INQUIRY.pdf
 		     */
 		    const struct scsi_inquiry_data *inq_data =
 			(const struct scsi_inquiry_data *)csio->data_ptr;
 		    uint8_t* resp_buf = (uint8_t*)csio->data_ptr;
 		    /* Get the buffer length reported by host */
 		    int resp_xfer_len = vm_srb->transfer_len;
 		    /* Get the available buffer length */
 		    int resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
 		    int data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len;
 		    if (data_len < SHORT_INQUIRY_LENGTH) {
 			ccb->ccb_h.status |= CAM_REQ_CMP;
 			if (bootverbose && data_len >= 5) {
 				mtx_lock(&sc->hs_lock);
 				xpt_print(ccb->ccb_h.path,
 				    "storvsc skips the validation for short inquiry (%d)"
 				    " [%x %x %x %x %x]\n",
 				    data_len,resp_buf[0],resp_buf[1],resp_buf[2],
 				    resp_buf[3],resp_buf[4]);
 				mtx_unlock(&sc->hs_lock);
 			}
 		    } else if (is_inquiry_valid(inq_data) == 0) {
 			ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
 			if (bootverbose && data_len >= 5) {
 				mtx_lock(&sc->hs_lock);
 				xpt_print(ccb->ccb_h.path,
 				    "storvsc uninstalled invalid device"
 				    " [%x %x %x %x %x]\n",
 				resp_buf[0],resp_buf[1],resp_buf[2],resp_buf[3],resp_buf[4]);
 				mtx_unlock(&sc->hs_lock);
 			}
 		    } else {
 			ccb->ccb_h.status |= CAM_REQ_CMP;
 			if (bootverbose) {
 				mtx_lock(&sc->hs_lock);
 				xpt_print(ccb->ccb_h.path,
 				    "storvsc has passed inquiry response (%d) validation\n",
 				    data_len);
 				mtx_unlock(&sc->hs_lock);
 			}
 		    }
 		} else {
 			ccb->ccb_h.status |= CAM_REQ_CMP;
 		}
 	} else {
 		mtx_lock(&sc->hs_lock);
 		xpt_print(ccb->ccb_h.path,
 			"storvsc scsi_status = %d\n",
 			vm_srb->scsi_status);
 		mtx_unlock(&sc->hs_lock);
 		ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
 	}
 
 	ccb->csio.scsi_status = (vm_srb->scsi_status & 0xFF);
 	ccb->csio.resid = ccb->csio.dxfer_len - vm_srb->transfer_len;
 
 	if (reqp->sense_info_len != 0) {
 		csio->sense_resid = csio->sense_len - reqp->sense_info_len;
 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
 	}
 
 	mtx_lock(&sc->hs_lock);
 	if (reqp->softc->hs_frozen == 1) {
 		xpt_print(ccb->ccb_h.path,
 			"%u: storvsc unfreezing softc 0x%p.\n",
 			ticks, reqp->softc);
 		ccb->ccb_h.status |= CAM_RELEASE_SIMQ;
 		reqp->softc->hs_frozen = 0;
 	}
 	storvsc_free_request(sc, reqp);
 	mtx_unlock(&sc->hs_lock);
 
 	xpt_done_direct(ccb);
 }
 
 /**
  * @brief Free a request structure
  *
  * Free a request structure by returning it to the free list
  *
  * @param sc pointer to a softc
  * @param reqp pointer to a request structure
  */	
 static void
 storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp)
 {
 
 	LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 }
 
 /**
  * @brief Determine type of storage device from GUID
  *
  * Using the type GUID, determine if this is a StorVSC (paravirtual
  * SCSI or BlkVSC (paravirtual IDE) device.
  *
  * @param dev a device
  * returns an enum
  */
 static enum hv_storage_type
 storvsc_get_storage_type(device_t dev)
 {
 	device_t parent = device_get_parent(dev);
 
 	if (VMBUS_PROBE_GUID(parent, dev, &gBlkVscDeviceType) == 0)
 		return DRIVER_BLKVSC;
 	if (VMBUS_PROBE_GUID(parent, dev, &gStorVscDeviceType) == 0)
 		return DRIVER_STORVSC;
 	return DRIVER_UNKNOWN;
 }
Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_brvar.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_brvar.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_brvar.h	(revision 303642)
@@ -1,93 +1,100 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMBUS_BRVAR_H_
 #define _VMBUS_BRVAR_H_
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/_iovec.h>
 
 struct vmbus_br {
 	struct vmbus_bufring	*vbr;
 	uint32_t		vbr_dsize;	/* total data size */
 };
 
 #define vbr_windex		vbr->br_windex
 #define vbr_rindex		vbr->br_rindex
 #define vbr_imask		vbr->br_imask
 #define vbr_data		vbr->br_data
 
 struct vmbus_rxbr {
 	struct mtx		rxbr_lock;
 	struct vmbus_br		rxbr;
 };
 
 #define rxbr_windex		rxbr.vbr_windex
 #define rxbr_rindex		rxbr.vbr_rindex
 #define rxbr_imask		rxbr.vbr_imask
 #define rxbr_data		rxbr.vbr_data
 #define rxbr_dsize		rxbr.vbr_dsize
 
 struct vmbus_txbr {
 	struct mtx		txbr_lock;
 	struct vmbus_br		txbr;
 };
 
 #define txbr_windex		txbr.vbr_windex
 #define txbr_rindex		txbr.vbr_rindex
 #define txbr_imask		txbr.vbr_imask
 #define txbr_data		txbr.vbr_data
 #define txbr_dsize		txbr.vbr_dsize
 
 struct sysctl_ctx_list;
 struct sysctl_oid;
 
+static __inline int
+vmbus_txbr_maxpktsz(const struct vmbus_txbr *tbr)
+{
+	/* 1/2 data size */
+	return (tbr->txbr_dsize / 2);
+}
+
 void		vmbus_br_sysctl_create(struct sysctl_ctx_list *ctx,
 		    struct sysctl_oid *br_tree, struct vmbus_br *br,
 		    const char *name);
 
 void		vmbus_rxbr_init(struct vmbus_rxbr *rbr);
 void		vmbus_rxbr_deinit(struct vmbus_rxbr *rbr);
 void		vmbus_rxbr_setup(struct vmbus_rxbr *rbr, void *buf, int blen);
 int		vmbus_rxbr_peek(struct vmbus_rxbr *rbr, void *data, int dlen);
 int		vmbus_rxbr_read(struct vmbus_rxbr *rbr, void *data, int dlen,
 		    uint32_t skip);
 void		vmbus_rxbr_intr_mask(struct vmbus_rxbr *rbr);
 uint32_t	vmbus_rxbr_intr_unmask(struct vmbus_rxbr *rbr);
 
 void		vmbus_txbr_init(struct vmbus_txbr *tbr);
 void		vmbus_txbr_deinit(struct vmbus_txbr *tbr);
 void		vmbus_txbr_setup(struct vmbus_txbr *tbr, void *buf, int blen);
 int		vmbus_txbr_write(struct vmbus_txbr *tbr,
 		    const struct iovec iov[], int iovlen, boolean_t *need_sig);
 
 #endif  /* _VMBUS_BRVAR_H_ */
Index: user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_chan.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_chan.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/hyperv/vmbus/vmbus_chan.c	(revision 303642)
@@ -1,1413 +1,1413 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * Copyright (c) 2012 Citrix Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <machine/atomic.h>
 
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/vmbus/hyperv_var.h>
 #include <dev/hyperv/vmbus/vmbus_reg.h>
 #include <dev/hyperv/vmbus/vmbus_var.h>
 #include <dev/hyperv/vmbus/vmbus_brvar.h>
 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
 
 static void			vmbus_chan_update_evtflagcnt(
 				    struct vmbus_softc *,
 				    const struct vmbus_channel *);
 static void			vmbus_chan_close_internal(
 				    struct vmbus_channel *);
 static int			vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS);
 static void			vmbus_chan_sysctl_create(
 				    struct vmbus_channel *);
 static struct vmbus_channel	*vmbus_chan_alloc(struct vmbus_softc *);
 static void			vmbus_chan_free(struct vmbus_channel *);
 static int			vmbus_chan_add(struct vmbus_channel *);
 static void			vmbus_chan_cpu_default(struct vmbus_channel *);
 
 static void			vmbus_chan_task(void *, int);
 static void			vmbus_chan_task_nobatch(void *, int);
 static void			vmbus_chan_detach_task(void *, int);
 
 static void			vmbus_chan_msgproc_choffer(struct vmbus_softc *,
 				    const struct vmbus_message *);
 static void			vmbus_chan_msgproc_chrescind(
 				    struct vmbus_softc *,
 				    const struct vmbus_message *);
 
 /*
  * Vmbus channel message processing.
  */
 static const vmbus_chanmsg_proc_t
 vmbus_chan_msgprocs[VMBUS_CHANMSG_TYPE_MAX] = {
 	VMBUS_CHANMSG_PROC(CHOFFER,	vmbus_chan_msgproc_choffer),
 	VMBUS_CHANMSG_PROC(CHRESCIND,	vmbus_chan_msgproc_chrescind),
 
 	VMBUS_CHANMSG_PROC_WAKEUP(CHOPEN_RESP),
 	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_CONNRESP),
 	VMBUS_CHANMSG_PROC_WAKEUP(GPADL_DISCONNRESP)
 };
 
 /*
  * Notify host that there are data pending on our TX bufring.
  */
 static __inline void
 vmbus_chan_signal_tx(const struct vmbus_channel *chan)
 {
 	atomic_set_long(chan->ch_evtflag, chan->ch_evtflag_mask);
 	if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
 		atomic_set_int(chan->ch_montrig, chan->ch_montrig_mask);
 	else
 		hypercall_signal_event(chan->ch_monprm_dma.hv_paddr);
 }
 
 static int
 vmbus_chan_sysctl_mnf(SYSCTL_HANDLER_ARGS)
 {
 	struct vmbus_channel *chan = arg1;
 	int mnf = 0;
 
 	if (chan->ch_txflags & VMBUS_CHAN_TXF_HASMNF)
 		mnf = 1;
 	return sysctl_handle_int(oidp, &mnf, 0, req);
 }
 
 static void
 vmbus_chan_sysctl_create(struct vmbus_channel *chan)
 {
 	struct sysctl_oid *ch_tree, *chid_tree, *br_tree;
 	struct sysctl_ctx_list *ctx;
 	uint32_t ch_id;
 	char name[16];
 
 	/*
 	 * Add sysctl nodes related to this channel to this
 	 * channel's sysctl ctx, so that they can be destroyed
 	 * independently upon close of this channel, which can
 	 * happen even if the device is not detached.
 	 */
 	ctx = &chan->ch_sysctl_ctx;
 	sysctl_ctx_init(ctx);
 
 	/*
 	 * Create dev.NAME.UNIT.channel tree.
 	 */
 	ch_tree = SYSCTL_ADD_NODE(ctx,
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(chan->ch_dev)),
 	    OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 	if (ch_tree == NULL)
 		return;
 
 	/*
 	 * Create dev.NAME.UNIT.channel.CHANID tree.
 	 */
 	if (VMBUS_CHAN_ISPRIMARY(chan))
 		ch_id = chan->ch_id;
 	else
 		ch_id = chan->ch_prichan->ch_id;
 	snprintf(name, sizeof(name), "%d", ch_id);
 	chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
 	    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 	if (chid_tree == NULL)
 		return;
 
 	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
 		/*
 		 * Create dev.NAME.UNIT.channel.CHANID.sub tree.
 		 */
 		ch_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree),
 		    OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 		if (ch_tree == NULL)
 			return;
 
 		/*
 		 * Create dev.NAME.UNIT.channel.CHANID.sub.SUBIDX tree.
 		 *
 		 * NOTE:
 		 * chid_tree is changed to this new sysctl tree.
 		 */
 		snprintf(name, sizeof(name), "%d", chan->ch_subidx);
 		chid_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(ch_tree),
 		    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 		if (chid_tree == NULL)
 			return;
 
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 		    "chanid", CTLFLAG_RD, &chan->ch_id, 0, "channel id");
 	}
 
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 	    "cpu", CTLFLAG_RD, &chan->ch_cpuid, 0, "owner CPU id");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 	    "mnf", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 	    chan, 0, vmbus_chan_sysctl_mnf, "I",
 	    "has monitor notification facilities");
 
 	br_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(chid_tree), OID_AUTO,
 	    "br", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 	if (br_tree != NULL) {
 		/*
 		 * Create sysctl tree for RX bufring.
 		 */
 		vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_rxbr.rxbr, "rx");
 		/*
 		 * Create sysctl tree for TX bufring.
 		 */
 		vmbus_br_sysctl_create(ctx, br_tree, &chan->ch_txbr.txbr, "tx");
 	}
 }
 
 int
 vmbus_chan_open(struct vmbus_channel *chan, int txbr_size, int rxbr_size,
     const void *udata, int udlen, vmbus_chan_callback_t cb, void *cbarg)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	const struct vmbus_chanmsg_chopen_resp *resp;
 	const struct vmbus_message *msg;
 	struct vmbus_chanmsg_chopen *req;
 	struct vmbus_msghc *mh;
 	uint32_t status;
 	int error;
 	uint8_t *br;
 
 	if (udlen > VMBUS_CHANMSG_CHOPEN_UDATA_SIZE) {
 		device_printf(sc->vmbus_dev,
 		    "invalid udata len %d for chan%u\n", udlen, chan->ch_id);
 		return EINVAL;
 	}
 	KASSERT((txbr_size & PAGE_MASK) == 0,
 	    ("send bufring size is not multiple page"));
 	KASSERT((rxbr_size & PAGE_MASK) == 0,
 	    ("recv bufring size is not multiple page"));
 
 	if (atomic_testandset_int(&chan->ch_stflags,
 	    VMBUS_CHAN_ST_OPENED_SHIFT))
 		panic("double-open chan%u", chan->ch_id);
 
 	chan->ch_cb = cb;
 	chan->ch_cbarg = cbarg;
 
 	vmbus_chan_update_evtflagcnt(sc, chan);
 
 	chan->ch_tq = VMBUS_PCPU_GET(chan->ch_vmbus, event_tq, chan->ch_cpuid);
 	if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
 		TASK_INIT(&chan->ch_task, 0, vmbus_chan_task, chan);
 	else
 		TASK_INIT(&chan->ch_task, 0, vmbus_chan_task_nobatch, chan);
 
 	/*
 	 * Allocate the TX+RX bufrings.
 	 * XXX should use ch_dev dtag
 	 */
 	br = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
 	    PAGE_SIZE, 0, txbr_size + rxbr_size, &chan->ch_bufring_dma,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (br == NULL) {
 		device_printf(sc->vmbus_dev, "bufring allocation failed\n");
 		error = ENOMEM;
 		goto failed;
 	}
 	chan->ch_bufring = br;
 
 	/* TX bufring comes first */
 	vmbus_txbr_setup(&chan->ch_txbr, br, txbr_size);
 	/* RX bufring immediately follows TX bufring */
 	vmbus_rxbr_setup(&chan->ch_rxbr, br + txbr_size, rxbr_size);
 
 	/* Create sysctl tree for this channel */
 	vmbus_chan_sysctl_create(chan);
 
 	/*
 	 * Connect the bufrings, both RX and TX, to this channel.
 	 */
 	error = vmbus_chan_gpadl_connect(chan, chan->ch_bufring_dma.hv_paddr,
 	    txbr_size + rxbr_size, &chan->ch_bufring_gpadl);
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "failed to connect bufring GPADL to chan%u\n", chan->ch_id);
 		goto failed;
 	}
 
 	/*
 	 * Open channel w/ the bufring GPADL on the target CPU.
 	 */
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		device_printf(sc->vmbus_dev,
 		    "can not get msg hypercall for chopen(chan%u)\n",
 		    chan->ch_id);
 		error = ENXIO;
 		goto failed;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHOPEN;
 	req->chm_chanid = chan->ch_id;
 	req->chm_openid = chan->ch_id;
 	req->chm_gpadl = chan->ch_bufring_gpadl;
 	req->chm_vcpuid = chan->ch_vcpuid;
 	req->chm_txbr_pgcnt = txbr_size >> PAGE_SHIFT;
 	if (udlen > 0)
 		memcpy(req->chm_udata, udata, udlen);
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "chopen(chan%u) msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		vmbus_msghc_put(sc, mh);
 		goto failed;
 	}
 
 	msg = vmbus_msghc_wait_result(sc, mh);
 	resp = (const struct vmbus_chanmsg_chopen_resp *)msg->msg_data;
 	status = resp->chm_status;
 
 	vmbus_msghc_put(sc, mh);
 
 	if (status == 0) {
 		if (bootverbose) {
 			device_printf(sc->vmbus_dev, "chan%u opened\n",
 			    chan->ch_id);
 		}
 		return 0;
 	}
 
 	device_printf(sc->vmbus_dev, "failed to open chan%u\n", chan->ch_id);
 	error = ENXIO;
 
 failed:
 	if (chan->ch_bufring_gpadl) {
 		vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl);
 		chan->ch_bufring_gpadl = 0;
 	}
 	if (chan->ch_bufring != NULL) {
 		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
 		chan->ch_bufring = NULL;
 	}
 	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
 	return error;
 }
 
 int
 vmbus_chan_gpadl_connect(struct vmbus_channel *chan, bus_addr_t paddr,
     int size, uint32_t *gpadl0)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	struct vmbus_msghc *mh;
 	struct vmbus_chanmsg_gpadl_conn *req;
 	const struct vmbus_message *msg;
 	size_t reqsz;
 	uint32_t gpadl, status;
 	int page_count, range_len, i, cnt, error;
 	uint64_t page_id;
 
 	/*
 	 * Preliminary checks.
 	 */
 
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("invalid GPA size %d, not multiple page size", size));
 	page_count = size >> PAGE_SHIFT;
 
 	KASSERT((paddr & PAGE_MASK) == 0,
 	    ("GPA is not page aligned %jx", (uintmax_t)paddr));
 	page_id = paddr >> PAGE_SHIFT;
 
 	range_len = __offsetof(struct vmbus_gpa_range, gpa_page[page_count]);
 	/*
 	 * We don't support multiple GPA ranges.
 	 */
 	if (range_len > UINT16_MAX) {
 		device_printf(sc->vmbus_dev, "GPA too large, %d pages\n",
 		    page_count);
 		return EOPNOTSUPP;
 	}
 
 	/*
 	 * Allocate GPADL id.
 	 */
 	gpadl = vmbus_gpadl_alloc(sc);
 	*gpadl0 = gpadl;
 
 	/*
 	 * Connect this GPADL to the target channel.
 	 *
 	 * NOTE:
 	 * Since each message can only hold small set of page
 	 * addresses, several messages may be required to
 	 * complete the connection.
 	 */
 	if (page_count > VMBUS_CHANMSG_GPADL_CONN_PGMAX)
 		cnt = VMBUS_CHANMSG_GPADL_CONN_PGMAX;
 	else
 		cnt = page_count;
 	page_count -= cnt;
 
 	reqsz = __offsetof(struct vmbus_chanmsg_gpadl_conn,
 	    chm_range.gpa_page[cnt]);
 	mh = vmbus_msghc_get(sc, reqsz);
 	if (mh == NULL) {
 		device_printf(sc->vmbus_dev,
 		    "can not get msg hypercall for gpadl->chan%u\n",
 		    chan->ch_id);
 		return EIO;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_CONN;
 	req->chm_chanid = chan->ch_id;
 	req->chm_gpadl = gpadl;
 	req->chm_range_len = range_len;
 	req->chm_range_cnt = 1;
 	req->chm_range.gpa_len = size;
 	req->chm_range.gpa_ofs = 0;
 	for (i = 0; i < cnt; ++i)
 		req->chm_range.gpa_page[i] = page_id++;
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "gpadl->chan%u msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		vmbus_msghc_put(sc, mh);
 		return error;
 	}
 
 	while (page_count > 0) {
 		struct vmbus_chanmsg_gpadl_subconn *subreq;
 
 		if (page_count > VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX)
 			cnt = VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX;
 		else
 			cnt = page_count;
 		page_count -= cnt;
 
 		reqsz = __offsetof(struct vmbus_chanmsg_gpadl_subconn,
 		    chm_gpa_page[cnt]);
 		vmbus_msghc_reset(mh, reqsz);
 
 		subreq = vmbus_msghc_dataptr(mh);
 		subreq->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_SUBCONN;
 		subreq->chm_gpadl = gpadl;
 		for (i = 0; i < cnt; ++i)
 			subreq->chm_gpa_page[i] = page_id++;
 
 		vmbus_msghc_exec_noresult(mh);
 	}
 	KASSERT(page_count == 0, ("invalid page count %d", page_count));
 
 	msg = vmbus_msghc_wait_result(sc, mh);
 	status = ((const struct vmbus_chanmsg_gpadl_connresp *)
 	    msg->msg_data)->chm_status;
 
 	vmbus_msghc_put(sc, mh);
 
 	if (status != 0) {
 		device_printf(sc->vmbus_dev, "gpadl->chan%u failed: "
 		    "status %u\n", chan->ch_id, status);
 		return EIO;
 	} else {
 		if (bootverbose) {
 			device_printf(sc->vmbus_dev, "gpadl->chan%u "
 			    "succeeded\n", chan->ch_id);
 		}
 	}
 	return 0;
 }
 
 /*
  * Disconnect the GPA from the target channel
  */
 int
 vmbus_chan_gpadl_disconnect(struct vmbus_channel *chan, uint32_t gpadl)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	struct vmbus_msghc *mh;
 	struct vmbus_chanmsg_gpadl_disconn *req;
 	int error;
 
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		device_printf(sc->vmbus_dev,
 		    "can not get msg hypercall for gpa x->chan%u\n",
 		    chan->ch_id);
 		return EBUSY;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_GPADL_DISCONN;
 	req->chm_chanid = chan->ch_id;
 	req->chm_gpadl = gpadl;
 
 	error = vmbus_msghc_exec(sc, mh);
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "gpa x->chan%u msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		vmbus_msghc_put(sc, mh);
 		return error;
 	}
 
 	vmbus_msghc_wait_result(sc, mh);
 	/* Discard result; no useful information */
 	vmbus_msghc_put(sc, mh);
 
 	return 0;
 }
 
 static void
 vmbus_chan_close_internal(struct vmbus_channel *chan)
 {
 	struct vmbus_softc *sc = chan->ch_vmbus;
 	struct vmbus_msghc *mh;
 	struct vmbus_chanmsg_chclose *req;
 	struct taskqueue *tq = chan->ch_tq;
 	int error;
 
 	/* TODO: stringent check */
 	atomic_clear_int(&chan->ch_stflags, VMBUS_CHAN_ST_OPENED);
 
 	/*
 	 * Free this channel's sysctl tree attached to its device's
 	 * sysctl tree.
 	 */
 	sysctl_ctx_free(&chan->ch_sysctl_ctx);
 
 	/*
 	 * Set ch_tq to NULL to avoid more requests be scheduled.
 	 * XXX pretty broken; need rework.
 	 */
 	chan->ch_tq = NULL;
 	taskqueue_drain(tq, &chan->ch_task);
 	chan->ch_cb = NULL;
 
 	/*
 	 * Close this channel.
 	 */
 	mh = vmbus_msghc_get(sc, sizeof(*req));
 	if (mh == NULL) {
 		device_printf(sc->vmbus_dev,
 		    "can not get msg hypercall for chclose(chan%u)\n",
 		    chan->ch_id);
 		return;
 	}
 
 	req = vmbus_msghc_dataptr(mh);
 	req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHCLOSE;
 	req->chm_chanid = chan->ch_id;
 
 	error = vmbus_msghc_exec_noresult(mh);
 	vmbus_msghc_put(sc, mh);
 
 	if (error) {
 		device_printf(sc->vmbus_dev,
 		    "chclose(chan%u) msg hypercall exec failed: %d\n",
 		    chan->ch_id, error);
 		return;
 	} else if (bootverbose) {
 		device_printf(sc->vmbus_dev, "close chan%u\n", chan->ch_id);
 	}
 
 	/*
 	 * Disconnect the TX+RX bufrings from this channel.
 	 */
 	if (chan->ch_bufring_gpadl) {
 		vmbus_chan_gpadl_disconnect(chan, chan->ch_bufring_gpadl);
 		chan->ch_bufring_gpadl = 0;
 	}
 
 	/*
 	 * Destroy the TX+RX bufrings.
 	 */
 	if (chan->ch_bufring != NULL) {
 		hyperv_dmamem_free(&chan->ch_bufring_dma, chan->ch_bufring);
 		chan->ch_bufring = NULL;
 	}
 }
 
 /*
  * Caller should make sure that all sub-channels have
  * been added to 'chan' and all to-be-closed channels
  * are not being opened.
  */
 void
 vmbus_chan_close(struct vmbus_channel *chan)
 {
 	int subchan_cnt;
 
 	if (!VMBUS_CHAN_ISPRIMARY(chan)) {
 		/*
 		 * Sub-channel is closed when its primary channel
 		 * is closed; done.
 		 */
 		return;
 	}
 
 	/*
 	 * Close all sub-channels, if any.
 	 */
 	subchan_cnt = chan->ch_subchan_cnt;
 	if (subchan_cnt > 0) {
 		struct vmbus_channel **subchan;
 		int i;
 
 		subchan = vmbus_subchan_get(chan, subchan_cnt);
 		for (i = 0; i < subchan_cnt; ++i)
 			vmbus_chan_close_internal(subchan[i]);
 		vmbus_subchan_rel(subchan, subchan_cnt);
 	}
 
 	/* Then close the primary channel. */
 	vmbus_chan_close_internal(chan);
 }
 
 int
 vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, uint16_t flags,
     void *data, int dlen, uint64_t xactid)
 {
 	struct vmbus_chanpkt pkt;
 	int pktlen, pad_pktlen, hlen, error;
 	uint64_t pad = 0;
 	struct iovec iov[3];
 	boolean_t send_evt;
 
 	hlen = sizeof(pkt);
 	pktlen = hlen + dlen;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
+	KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr),
+	    ("invalid packet size %d", pad_pktlen));
 
 	pkt.cp_hdr.cph_type = type;
 	pkt.cp_hdr.cph_flags = flags;
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
 	pkt.cp_hdr.cph_xactid = xactid;
 
 	iov[0].iov_base = &pkt;
 	iov[0].iov_len = hlen;
 	iov[1].iov_base = data;
 	iov[1].iov_len = dlen;
 	iov[2].iov_base = &pad;
 	iov[2].iov_len = pad_pktlen - pktlen;
 
 	error = vmbus_txbr_write(&chan->ch_txbr, iov, 3, &send_evt);
 	if (!error && send_evt)
 		vmbus_chan_signal_tx(chan);
 	return error;
 }
 
 int
 vmbus_chan_send_sglist(struct vmbus_channel *chan,
     struct vmbus_gpa sg[], int sglen, void *data, int dlen, uint64_t xactid)
 {
 	struct vmbus_chanpkt_sglist pkt;
 	int pktlen, pad_pktlen, hlen, error;
 	struct iovec iov[4];
 	boolean_t send_evt;
 	uint64_t pad = 0;
 
-	KASSERT(sglen < VMBUS_CHAN_SGLIST_MAX,
-	    ("invalid sglist len %d", sglen));
-
 	hlen = __offsetof(struct vmbus_chanpkt_sglist, cp_gpa[sglen]);
 	pktlen = hlen + dlen;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
+	KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr),
+	    ("invalid packet size %d", pad_pktlen));
 
 	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
 	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
 	pkt.cp_hdr.cph_xactid = xactid;
 	pkt.cp_rsvd = 0;
 	pkt.cp_gpa_cnt = sglen;
 
 	iov[0].iov_base = &pkt;
 	iov[0].iov_len = sizeof(pkt);
 	iov[1].iov_base = sg;
 	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
 	iov[2].iov_base = data;
 	iov[2].iov_len = dlen;
 	iov[3].iov_base = &pad;
 	iov[3].iov_len = pad_pktlen - pktlen;
 
 	error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt);
 	if (!error && send_evt)
 		vmbus_chan_signal_tx(chan);
 	return error;
 }
 
 int
 vmbus_chan_send_prplist(struct vmbus_channel *chan,
     struct vmbus_gpa_range *prp, int prp_cnt, void *data, int dlen,
     uint64_t xactid)
 {
 	struct vmbus_chanpkt_prplist pkt;
 	int pktlen, pad_pktlen, hlen, error;
 	struct iovec iov[4];
 	boolean_t send_evt;
 	uint64_t pad = 0;
 
-	KASSERT(prp_cnt < VMBUS_CHAN_PRPLIST_MAX,
-	    ("invalid prplist entry count %d", prp_cnt));
-
 	hlen = __offsetof(struct vmbus_chanpkt_prplist,
 	    cp_range[0].gpa_page[prp_cnt]);
 	pktlen = hlen + dlen;
 	pad_pktlen = VMBUS_CHANPKT_TOTLEN(pktlen);
+	KASSERT(pad_pktlen <= vmbus_txbr_maxpktsz(&chan->ch_txbr),
+	    ("invalid packet size %d", pad_pktlen));
 
 	pkt.cp_hdr.cph_type = VMBUS_CHANPKT_TYPE_GPA;
 	pkt.cp_hdr.cph_flags = VMBUS_CHANPKT_FLAG_RC;
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_hlen, hlen);
 	VMBUS_CHANPKT_SETLEN(pkt.cp_hdr.cph_tlen, pad_pktlen);
 	pkt.cp_hdr.cph_xactid = xactid;
 	pkt.cp_rsvd = 0;
 	pkt.cp_range_cnt = 1;
 
 	iov[0].iov_base = &pkt;
 	iov[0].iov_len = sizeof(pkt);
 	iov[1].iov_base = prp;
 	iov[1].iov_len = __offsetof(struct vmbus_gpa_range, gpa_page[prp_cnt]);
 	iov[2].iov_base = data;
 	iov[2].iov_len = dlen;
 	iov[3].iov_base = &pad;
 	iov[3].iov_len = pad_pktlen - pktlen;
 
 	error = vmbus_txbr_write(&chan->ch_txbr, iov, 4, &send_evt);
 	if (!error && send_evt)
 		vmbus_chan_signal_tx(chan);
 	return error;
 }
 
 int
 vmbus_chan_recv(struct vmbus_channel *chan, void *data, int *dlen0,
     uint64_t *xactid)
 {
 	struct vmbus_chanpkt_hdr pkt;
 	int error, dlen, hlen;
 
 	error = vmbus_rxbr_peek(&chan->ch_rxbr, &pkt, sizeof(pkt));
 	if (error)
 		return error;
 
 	hlen = VMBUS_CHANPKT_GETLEN(pkt.cph_hlen);
 	dlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen) - hlen;
 
 	if (*dlen0 < dlen) {
 		/* Return the size of this packet's data. */
 		*dlen0 = dlen;
 		return ENOBUFS;
 	}
 
 	*xactid = pkt.cph_xactid;
 	*dlen0 = dlen;
 
 	/* Skip packet header */
 	error = vmbus_rxbr_read(&chan->ch_rxbr, data, dlen, hlen);
 	KASSERT(!error, ("vmbus_rxbr_read failed"));
 
 	return 0;
 }
 
 int
 vmbus_chan_recv_pkt(struct vmbus_channel *chan,
     struct vmbus_chanpkt_hdr *pkt0, int *pktlen0)
 {
 	struct vmbus_chanpkt_hdr pkt;
 	int error, pktlen;
 
 	error = vmbus_rxbr_peek(&chan->ch_rxbr, &pkt, sizeof(pkt));
 	if (error)
 		return error;
 
 	pktlen = VMBUS_CHANPKT_GETLEN(pkt.cph_tlen);
 	if (*pktlen0 < pktlen) {
 		/* Return the size of this packet. */
 		*pktlen0 = pktlen;
 		return ENOBUFS;
 	}
 	*pktlen0 = pktlen;
 
 	/* Include packet header */
 	error = vmbus_rxbr_read(&chan->ch_rxbr, pkt0, pktlen, 0);
 	KASSERT(!error, ("vmbus_rxbr_read failed"));
 
 	return 0;
 }
 
 static void
 vmbus_chan_task(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 	vmbus_chan_callback_t cb = chan->ch_cb;
 	void *cbarg = chan->ch_cbarg;
 
 	/*
 	 * Optimize host to guest signaling by ensuring:
 	 * 1. While reading the channel, we disable interrupts from
 	 *    host.
 	 * 2. Ensure that we process all posted messages from the host
 	 *    before returning from this callback.
 	 * 3. Once we return, enable signaling from the host. Once this
 	 *    state is set we check to see if additional packets are
 	 *    available to read. In this case we repeat the process.
 	 *
 	 * NOTE: Interrupt has been disabled in the ISR.
 	 */
 	for (;;) {
 		uint32_t left;
 
 		cb(chan, cbarg);
 
 		left = vmbus_rxbr_intr_unmask(&chan->ch_rxbr);
 		if (left == 0) {
 			/* No more data in RX bufring; done */
 			break;
 		}
 		vmbus_rxbr_intr_mask(&chan->ch_rxbr);
 	}
 }
 
 static void
 vmbus_chan_task_nobatch(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 
 	chan->ch_cb(chan, chan->ch_cbarg);
 }
 
 static __inline void
 vmbus_event_flags_proc(struct vmbus_softc *sc, volatile u_long *event_flags,
     int flag_cnt)
 {
 	int f;
 
 	for (f = 0; f < flag_cnt; ++f) {
 		uint32_t chid_base;
 		u_long flags;
 		int chid_ofs;
 
 		if (event_flags[f] == 0)
 			continue;
 
 		flags = atomic_swap_long(&event_flags[f], 0);
 		chid_base = f << VMBUS_EVTFLAG_SHIFT;
 
 		while ((chid_ofs = ffsl(flags)) != 0) {
 			struct vmbus_channel *chan;
 
 			--chid_ofs; /* NOTE: ffsl is 1-based */
 			flags &= ~(1UL << chid_ofs);
 
 			chan = sc->vmbus_chmap[chid_base + chid_ofs];
 
 			/* if channel is closed or closing */
 			if (chan == NULL || chan->ch_tq == NULL)
 				continue;
 
 			if (chan->ch_flags & VMBUS_CHAN_FLAG_BATCHREAD)
 				vmbus_rxbr_intr_mask(&chan->ch_rxbr);
 			taskqueue_enqueue(chan->ch_tq, &chan->ch_task);
 		}
 	}
 }
 
 void
 vmbus_event_proc(struct vmbus_softc *sc, int cpu)
 {
 	struct vmbus_evtflags *eventf;
 
 	/*
 	 * On Host with Win8 or above, the event page can be checked directly
 	 * to get the id of the channel that has the pending interrupt.
 	 */
 	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
 	vmbus_event_flags_proc(sc, eventf->evt_flags,
 	    VMBUS_PCPU_GET(sc, event_flags_cnt, cpu));
 }
 
 void
 vmbus_event_proc_compat(struct vmbus_softc *sc, int cpu)
 {
 	struct vmbus_evtflags *eventf;
 
 	eventf = VMBUS_PCPU_GET(sc, event_flags, cpu) + VMBUS_SINT_MESSAGE;
 	if (atomic_testandclear_long(&eventf->evt_flags[0], 0)) {
 		vmbus_event_flags_proc(sc, sc->vmbus_rx_evtflags,
 		    VMBUS_CHAN_MAX_COMPAT >> VMBUS_EVTFLAG_SHIFT);
 	}
 }
 
 static void
 vmbus_chan_update_evtflagcnt(struct vmbus_softc *sc,
     const struct vmbus_channel *chan)
 {
 	volatile int *flag_cnt_ptr;
 	int flag_cnt;
 
 	flag_cnt = (chan->ch_id / VMBUS_EVTFLAG_LEN) + 1;
 	flag_cnt_ptr = VMBUS_PCPU_PTR(sc, event_flags_cnt, chan->ch_cpuid);
 
 	for (;;) {
 		int old_flag_cnt;
 
 		old_flag_cnt = *flag_cnt_ptr;
 		if (old_flag_cnt >= flag_cnt)
 			break;
 		if (atomic_cmpset_int(flag_cnt_ptr, old_flag_cnt, flag_cnt)) {
 			if (bootverbose) {
 				device_printf(sc->vmbus_dev,
 				    "channel%u update cpu%d flag_cnt to %d\n",
 				    chan->ch_id, chan->ch_cpuid, flag_cnt);
 			}
 			break;
 		}
 	}
 }
 
 static struct vmbus_channel *
 vmbus_chan_alloc(struct vmbus_softc *sc)
 {
 	struct vmbus_channel *chan;
 
 	chan = malloc(sizeof(*chan), M_DEVBUF, M_WAITOK | M_ZERO);
 
 	chan->ch_monprm = hyperv_dmamem_alloc(bus_get_dma_tag(sc->vmbus_dev),
 	    HYPERCALL_PARAM_ALIGN, 0, sizeof(struct hyperv_mon_param),
 	    &chan->ch_monprm_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (chan->ch_monprm == NULL) {
 		device_printf(sc->vmbus_dev, "monprm alloc failed\n");
 		free(chan, M_DEVBUF);
 		return NULL;
 	}
 
 	chan->ch_vmbus = sc;
 	mtx_init(&chan->ch_subchan_lock, "vmbus subchan", NULL, MTX_DEF);
 	TAILQ_INIT(&chan->ch_subchans);
 	TASK_INIT(&chan->ch_detach_task, 0, vmbus_chan_detach_task, chan);
 	vmbus_rxbr_init(&chan->ch_rxbr);
 	vmbus_txbr_init(&chan->ch_txbr);
 
 	return chan;
 }
 
 static void
 vmbus_chan_free(struct vmbus_channel *chan)
 {
 	/* TODO: assert sub-channel list is empty */
 	/* TODO: asset no longer on the primary channel's sub-channel list */
 	/* TODO: asset no longer on the vmbus channel list */
 	hyperv_dmamem_free(&chan->ch_monprm_dma, chan->ch_monprm);
 	mtx_destroy(&chan->ch_subchan_lock);
 	vmbus_rxbr_deinit(&chan->ch_rxbr);
 	vmbus_txbr_deinit(&chan->ch_txbr);
 	free(chan, M_DEVBUF);
 }
 
 static int
 vmbus_chan_add(struct vmbus_channel *newchan)
 {
 	struct vmbus_softc *sc = newchan->ch_vmbus;
 	struct vmbus_channel *prichan;
 
 	if (newchan->ch_id == 0) {
 		/*
 		 * XXX
 		 * Chan0 will neither be processed nor should be offered;
 		 * skip it.
 		 */
 		device_printf(sc->vmbus_dev, "got chan0 offer, discard\n");
 		return EINVAL;
 	} else if (newchan->ch_id >= VMBUS_CHAN_MAX) {
 		device_printf(sc->vmbus_dev, "invalid chan%u offer\n",
 		    newchan->ch_id);
 		return EINVAL;
 	}
 	sc->vmbus_chmap[newchan->ch_id] = newchan;
 
 	if (bootverbose) {
 		device_printf(sc->vmbus_dev, "chan%u subidx%u offer\n",
 		    newchan->ch_id, newchan->ch_subidx);
 	}
 
 	mtx_lock(&sc->vmbus_prichan_lock);
 	TAILQ_FOREACH(prichan, &sc->vmbus_prichans, ch_prilink) {
 		/*
 		 * Sub-channel will have the same type GUID and instance
 		 * GUID as its primary channel.
 		 */
 		if (memcmp(&prichan->ch_guid_type, &newchan->ch_guid_type,
 		    sizeof(struct hyperv_guid)) == 0 &&
 		    memcmp(&prichan->ch_guid_inst, &newchan->ch_guid_inst,
 		    sizeof(struct hyperv_guid)) == 0)
 			break;
 	}
 	if (VMBUS_CHAN_ISPRIMARY(newchan)) {
 		if (prichan == NULL) {
 			/* Install the new primary channel */
 			TAILQ_INSERT_TAIL(&sc->vmbus_prichans, newchan,
 			    ch_prilink);
 			mtx_unlock(&sc->vmbus_prichan_lock);
 			return 0;
 		} else {
 			mtx_unlock(&sc->vmbus_prichan_lock);
 			device_printf(sc->vmbus_dev, "duplicated primary "
 			    "chan%u\n", newchan->ch_id);
 			return EINVAL;
 		}
 	} else { /* Sub-channel */
 		if (prichan == NULL) {
 			mtx_unlock(&sc->vmbus_prichan_lock);
 			device_printf(sc->vmbus_dev, "no primary chan for "
 			    "chan%u\n", newchan->ch_id);
 			return EINVAL;
 		}
 		/*
 		 * Found the primary channel for this sub-channel and
 		 * move on.
 		 *
 		 * XXX refcnt prichan
 		 */
 	}
 	mtx_unlock(&sc->vmbus_prichan_lock);
 
 	/*
 	 * This is a sub-channel; link it with the primary channel.
 	 */
 	KASSERT(!VMBUS_CHAN_ISPRIMARY(newchan),
 	    ("new channel is not sub-channel"));
 	KASSERT(prichan != NULL, ("no primary channel"));
 
 	newchan->ch_prichan = prichan;
 	newchan->ch_dev = prichan->ch_dev;
 
 	mtx_lock(&prichan->ch_subchan_lock);
 	TAILQ_INSERT_TAIL(&prichan->ch_subchans, newchan, ch_sublink);
 	/*
 	 * Bump up sub-channel count and notify anyone that is
 	 * interested in this sub-channel, after this sub-channel
 	 * is setup.
 	 */
 	prichan->ch_subchan_cnt++;
 	mtx_unlock(&prichan->ch_subchan_lock);
 	wakeup(prichan);
 
 	return 0;
 }
 
 void
 vmbus_chan_cpu_set(struct vmbus_channel *chan, int cpu)
 {
 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
 
 	if (chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WS2008 ||
 	    chan->ch_vmbus->vmbus_version == VMBUS_VERSION_WIN7) {
 		/* Only cpu0 is supported */
 		cpu = 0;
 	}
 
 	chan->ch_cpuid = cpu;
 	chan->ch_vcpuid = VMBUS_PCPU_GET(chan->ch_vmbus, vcpuid, cpu);
 
 	if (bootverbose) {
 		printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
 		    chan->ch_id, chan->ch_cpuid, chan->ch_vcpuid);
 	}
 }
 
 void
 vmbus_chan_cpu_rr(struct vmbus_channel *chan)
 {
 	static uint32_t vmbus_chan_nextcpu;
 	int cpu;
 
 	cpu = atomic_fetchadd_int(&vmbus_chan_nextcpu, 1) % mp_ncpus;
 	vmbus_chan_cpu_set(chan, cpu);
 }
 
 static void
 vmbus_chan_cpu_default(struct vmbus_channel *chan)
 {
 	/*
 	 * By default, pin the channel to cpu0.  Devices having
 	 * special channel-cpu mapping requirement should call
 	 * vmbus_chan_cpu_{set,rr}().
 	 */
 	vmbus_chan_cpu_set(chan, 0);
 }
 
 static void
 vmbus_chan_msgproc_choffer(struct vmbus_softc *sc,
     const struct vmbus_message *msg)
 {
 	const struct vmbus_chanmsg_choffer *offer;
 	struct vmbus_channel *chan;
 	int error;
 
 	offer = (const struct vmbus_chanmsg_choffer *)msg->msg_data;
 
 	chan = vmbus_chan_alloc(sc);
 	if (chan == NULL) {
 		device_printf(sc->vmbus_dev, "allocate chan%u failed\n",
 		    offer->chm_chanid);
 		return;
 	}
 
 	chan->ch_id = offer->chm_chanid;
 	chan->ch_subidx = offer->chm_subidx;
 	chan->ch_guid_type = offer->chm_chtype;
 	chan->ch_guid_inst = offer->chm_chinst;
 
 	/* Batch reading is on by default */
 	chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
 
 	chan->ch_monprm->mp_connid = VMBUS_CONNID_EVENT;
 	if (sc->vmbus_version != VMBUS_VERSION_WS2008)
 		chan->ch_monprm->mp_connid = offer->chm_connid;
 
 	if (offer->chm_flags1 & VMBUS_CHOFFER_FLAG1_HASMNF) {
 		int trig_idx;
 
 		/*
 		 * Setup MNF stuffs.
 		 */
 		chan->ch_txflags |= VMBUS_CHAN_TXF_HASMNF;
 
 		trig_idx = offer->chm_montrig / VMBUS_MONTRIG_LEN;
 		if (trig_idx >= VMBUS_MONTRIGS_MAX)
 			panic("invalid monitor trigger %u", offer->chm_montrig);
 		chan->ch_montrig =
 		    &sc->vmbus_mnf2->mnf_trigs[trig_idx].mt_pending;
 
 		chan->ch_montrig_mask =
 		    1 << (offer->chm_montrig % VMBUS_MONTRIG_LEN);
 	}
 
 	/*
 	 * Setup event flag.
 	 */
 	chan->ch_evtflag =
 	    &sc->vmbus_tx_evtflags[chan->ch_id >> VMBUS_EVTFLAG_SHIFT];
 	chan->ch_evtflag_mask = 1UL << (chan->ch_id & VMBUS_EVTFLAG_MASK);
 
 	/* Select default cpu for this channel. */
 	vmbus_chan_cpu_default(chan);
 
 	error = vmbus_chan_add(chan);
 	if (error) {
 		device_printf(sc->vmbus_dev, "add chan%u failed: %d\n",
 		    chan->ch_id, error);
 		vmbus_chan_free(chan);
 		return;
 	}
 
 	if (VMBUS_CHAN_ISPRIMARY(chan)) {
 		/*
 		 * Add device for this primary channel.
 		 *
 		 * NOTE:
 		 * Error is ignored here; don't have much to do if error
 		 * really happens.
 		 */
 		vmbus_add_child(chan);
 	}
 }
 
 /*
  * XXX pretty broken; need rework.
  */
 static void
 vmbus_chan_msgproc_chrescind(struct vmbus_softc *sc,
     const struct vmbus_message *msg)
 {
 	const struct vmbus_chanmsg_chrescind *note;
 	struct vmbus_channel *chan;
 
 	note = (const struct vmbus_chanmsg_chrescind *)msg->msg_data;
 	if (note->chm_chanid > VMBUS_CHAN_MAX) {
 		device_printf(sc->vmbus_dev, "invalid rescinded chan%u\n",
 		    note->chm_chanid);
 		return;
 	}
 
 	if (bootverbose) {
 		device_printf(sc->vmbus_dev, "chan%u rescinded\n",
 		    note->chm_chanid);
 	}
 
 	chan = sc->vmbus_chmap[note->chm_chanid];
 	if (chan == NULL)
 		return;
 	sc->vmbus_chmap[note->chm_chanid] = NULL;
 
 	taskqueue_enqueue(taskqueue_thread, &chan->ch_detach_task);
 }
 
 static void
 vmbus_chan_detach_task(void *xchan, int pending __unused)
 {
 	struct vmbus_channel *chan = xchan;
 
 	if (VMBUS_CHAN_ISPRIMARY(chan)) {
 		/* Only primary channel owns the device */
 		vmbus_delete_child(chan);
 		/* NOTE: DO NOT free primary channel for now */
 	} else {
 		struct vmbus_softc *sc = chan->ch_vmbus;
 		struct vmbus_channel *pri_chan = chan->ch_prichan;
 		struct vmbus_chanmsg_chfree *req;
 		struct vmbus_msghc *mh;
 		int error;
 
 		mh = vmbus_msghc_get(sc, sizeof(*req));
 		if (mh == NULL) {
 			device_printf(sc->vmbus_dev,
 			    "can not get msg hypercall for chfree(chan%u)\n",
 			    chan->ch_id);
 			goto remove;
 		}
 
 		req = vmbus_msghc_dataptr(mh);
 		req->chm_hdr.chm_type = VMBUS_CHANMSG_TYPE_CHFREE;
 		req->chm_chanid = chan->ch_id;
 
 		error = vmbus_msghc_exec_noresult(mh);
 		vmbus_msghc_put(sc, mh);
 
 		if (error) {
 			device_printf(sc->vmbus_dev,
 			    "chfree(chan%u) failed: %d",
 			    chan->ch_id, error);
 			/* NOTE: Move on! */
 		} else {
 			if (bootverbose) {
 				device_printf(sc->vmbus_dev, "chan%u freed\n",
 				    chan->ch_id);
 			}
 		}
 remove:
 		mtx_lock(&pri_chan->ch_subchan_lock);
 		TAILQ_REMOVE(&pri_chan->ch_subchans, chan, ch_sublink);
 		KASSERT(pri_chan->ch_subchan_cnt > 0,
 		    ("invalid subchan_cnt %d", pri_chan->ch_subchan_cnt));
 		pri_chan->ch_subchan_cnt--;
 		mtx_unlock(&pri_chan->ch_subchan_lock);
 		wakeup(pri_chan);
 
 		vmbus_chan_free(chan);
 	}
 }
 
 /*
  * Detach all devices and destroy the corresponding primary channels.
  */
 void
 vmbus_chan_destroy_all(struct vmbus_softc *sc)
 {
 	struct vmbus_channel *chan;
 
 	mtx_lock(&sc->vmbus_prichan_lock);
 	while ((chan = TAILQ_FIRST(&sc->vmbus_prichans)) != NULL) {
 		KASSERT(VMBUS_CHAN_ISPRIMARY(chan), ("not primary channel"));
 		TAILQ_REMOVE(&sc->vmbus_prichans, chan, ch_prilink);
 		mtx_unlock(&sc->vmbus_prichan_lock);
 
 		vmbus_delete_child(chan);
 		vmbus_chan_free(chan);
 
 		mtx_lock(&sc->vmbus_prichan_lock);
 	}
 	bzero(sc->vmbus_chmap,
 	    sizeof(struct vmbus_channel *) * VMBUS_CHAN_MAX);
 	mtx_unlock(&sc->vmbus_prichan_lock);
 }
 
 /*
  * The channel whose vcpu binding is closest to the currect vcpu will
  * be selected.
  * If no multi-channel, always select primary channel.
  */
 struct vmbus_channel *
 vmbus_chan_cpu2chan(struct vmbus_channel *prichan, int cpu)
 {
 	struct vmbus_channel *sel, *chan;
 	uint32_t vcpu, sel_dist;
 
 	KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpuid %d", cpu));
 	if (TAILQ_EMPTY(&prichan->ch_subchans))
 		return prichan;
 
 	vcpu = VMBUS_PCPU_GET(prichan->ch_vmbus, vcpuid, cpu);
 
 #define CHAN_VCPU_DIST(ch, vcpu)		\
 	(((ch)->ch_vcpuid > (vcpu)) ?		\
 	 ((ch)->ch_vcpuid - (vcpu)) : ((vcpu) - (ch)->ch_vcpuid))
 
 #define CHAN_SELECT(ch)				\
 do {						\
 	sel = ch;				\
 	sel_dist = CHAN_VCPU_DIST(ch, vcpu);	\
 } while (0)
 
 	CHAN_SELECT(prichan);
 
 	mtx_lock(&prichan->ch_subchan_lock);
 	TAILQ_FOREACH(chan, &prichan->ch_subchans, ch_sublink) {
 		uint32_t dist;
 
 		KASSERT(chan->ch_stflags & VMBUS_CHAN_ST_OPENED,
 		    ("chan%u is not opened", chan->ch_id));
 
 		if (chan->ch_vcpuid == vcpu) {
 			/* Exact match; done */
 			CHAN_SELECT(chan);
 			break;
 		}
 
 		dist = CHAN_VCPU_DIST(chan, vcpu);
 		if (sel_dist <= dist) {
 			/* Far or same distance; skip */
 			continue;
 		}
 
 		/* Select the closer channel. */
 		CHAN_SELECT(chan);
 	}
 	mtx_unlock(&prichan->ch_subchan_lock);
 
 #undef CHAN_SELECT
 #undef CHAN_VCPU_DIST
 
 	return sel;
 }
 
 struct vmbus_channel **
 vmbus_subchan_get(struct vmbus_channel *pri_chan, int subchan_cnt)
 {
 	struct vmbus_channel **ret, *chan;
 	int i;
 
 	ret = malloc(subchan_cnt * sizeof(struct vmbus_channel *), M_TEMP,
 	    M_WAITOK);
 
 	mtx_lock(&pri_chan->ch_subchan_lock);
 
 	while (pri_chan->ch_subchan_cnt < subchan_cnt)
 		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "subch", 0);
 
 	i = 0;
 	TAILQ_FOREACH(chan, &pri_chan->ch_subchans, ch_sublink) {
 		/* TODO: refcnt chan */
 		ret[i] = chan;
 
 		++i;
 		if (i == subchan_cnt)
 			break;
 	}
 	KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
 	    pri_chan->ch_subchan_cnt, subchan_cnt));
 
 	mtx_unlock(&pri_chan->ch_subchan_lock);
 
 	return ret;
 }
 
 void
 vmbus_subchan_rel(struct vmbus_channel **subchan, int subchan_cnt __unused)
 {
 
 	free(subchan, M_TEMP);
 }
 
 void
 vmbus_subchan_drain(struct vmbus_channel *pri_chan)
 {
 	mtx_lock(&pri_chan->ch_subchan_lock);
 	while (pri_chan->ch_subchan_cnt > 0)
 		mtx_sleep(pri_chan, &pri_chan->ch_subchan_lock, 0, "dsubch", 0);
 	mtx_unlock(&pri_chan->ch_subchan_lock);
 }
 
 void
 vmbus_chan_msgproc(struct vmbus_softc *sc, const struct vmbus_message *msg)
 {
 	vmbus_chanmsg_proc_t msg_proc;
 	uint32_t msg_type;
 
 	msg_type = ((const struct vmbus_chanmsg_hdr *)msg->msg_data)->chm_type;
 	KASSERT(msg_type < VMBUS_CHANMSG_TYPE_MAX,
 	    ("invalid message type %u", msg_type));
 
 	msg_proc = vmbus_chan_msgprocs[msg_type];
 	if (msg_proc != NULL)
 		msg_proc(sc, msg);
 }
 
 void
 vmbus_chan_set_readbatch(struct vmbus_channel *chan, bool on)
 {
 	if (!on)
 		chan->ch_flags &= ~VMBUS_CHAN_FLAG_BATCHREAD;
 	else
 		chan->ch_flags |= VMBUS_CHAN_FLAG_BATCHREAD;
 }
 
 uint32_t
 vmbus_chan_id(const struct vmbus_channel *chan)
 {
 	return chan->ch_id;
 }
 
 uint32_t
 vmbus_chan_subidx(const struct vmbus_channel *chan)
 {
 	return chan->ch_subidx;
 }
 
 bool
 vmbus_chan_is_primary(const struct vmbus_channel *chan)
 {
 	if (VMBUS_CHAN_ISPRIMARY(chan))
 		return true;
 	else
 		return false;
 }
 
 const struct hyperv_guid *
 vmbus_chan_guid_inst(const struct vmbus_channel *chan)
 {
 	return &chan->ch_guid_inst;
 }
Index: user/alc/PQ_LAUNDRY/sys/dev/iwm/if_iwm.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/iwm/if_iwm.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/iwm/if_iwm.c	(revision 303642)
@@ -1,6189 +1,6189 @@
 /*	$OpenBSD: if_iwm.c,v 1.42 2015/05/30 02:49:23 deraadt Exp $	*/
 
 /*
  * Copyright (c) 2014 genua mbh <info@genua.de>
  * Copyright (c) 2014 Fixup Software Ltd.
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 /*-
  * Based on BSD-licensed source modules in the Linux iwlwifi driver,
  * which were used as the reference documentation for this implementation.
  *
  * Driver version we are currently based off of is
  * Linux 3.14.3 (tag id a2df521e42b1d9a23f620ac79dbfe8655a8391dd)
  *
  ***********************************************************************
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  *
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2007 - 2013 Intel Corporation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
  * USA
  *
  * The full GNU General Public License is included in this distribution
  * in the file called COPYING.
  *
  * Contact Information:
  *  Intel Linux Wireless <ilw@linux.intel.com>
  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
  *
  *
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2013 Intel Corporation. All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *  * Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *  * Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *  * Neither the name Intel Corporation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2007-2010 Damien Bergamini <damien.bergamini@free.fr>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/endian.h>
 #include <sys/firmware.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/linker.h>
 
 #include <machine/bus.h>
 #include <machine/endian.h>
 #include <machine/resource.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include <net/bpf.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_regdomain.h>
 #include <net80211/ieee80211_ratectl.h>
 #include <net80211/ieee80211_radiotap.h>
 
 #include <dev/iwm/if_iwmreg.h>
 #include <dev/iwm/if_iwmvar.h>
 #include <dev/iwm/if_iwm_debug.h>
 #include <dev/iwm/if_iwm_util.h>
 #include <dev/iwm/if_iwm_binding.h>
 #include <dev/iwm/if_iwm_phy_db.h>
 #include <dev/iwm/if_iwm_mac_ctxt.h>
 #include <dev/iwm/if_iwm_phy_ctxt.h>
 #include <dev/iwm/if_iwm_time_event.h>
 #include <dev/iwm/if_iwm_power.h>
 #include <dev/iwm/if_iwm_scan.h>
 
 #include <dev/iwm/if_iwm_pcie_trans.h>
 #include <dev/iwm/if_iwm_led.h>
 
 const uint8_t iwm_nvm_channels[] = {
 	/* 2.4 GHz */
 	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
 	/* 5 GHz */
 	36, 40, 44, 48, 52, 56, 60, 64,
 	100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
 	149, 153, 157, 161, 165
 };
 _Static_assert(nitems(iwm_nvm_channels) <= IWM_NUM_CHANNELS,
     "IWM_NUM_CHANNELS is too small");
 
 const uint8_t iwm_nvm_channels_8000[] = {
 	/* 2.4 GHz */
 	1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
 	/* 5 GHz */
 	36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92,
 	96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
 	149, 153, 157, 161, 165, 169, 173, 177, 181
 };
 _Static_assert(nitems(iwm_nvm_channels_8000) <= IWM_NUM_CHANNELS_8000,
     "IWM_NUM_CHANNELS_8000 is too small");
 
 #define IWM_NUM_2GHZ_CHANNELS	14
 #define IWM_N_HW_ADDR_MASK	0xF
 
 /*
  * XXX For now, there's simply a fixed set of rate table entries
  * that are populated.
  */
 const struct iwm_rate {
 	uint8_t rate;
 	uint8_t plcp;
 } iwm_rates[] = {
 	{   2,	IWM_RATE_1M_PLCP  },
 	{   4,	IWM_RATE_2M_PLCP  },
 	{  11,	IWM_RATE_5M_PLCP  },
 	{  22,	IWM_RATE_11M_PLCP },
 	{  12,	IWM_RATE_6M_PLCP  },
 	{  18,	IWM_RATE_9M_PLCP  },
 	{  24,	IWM_RATE_12M_PLCP },
 	{  36,	IWM_RATE_18M_PLCP },
 	{  48,	IWM_RATE_24M_PLCP },
 	{  72,	IWM_RATE_36M_PLCP },
 	{  96,	IWM_RATE_48M_PLCP },
 	{ 108,	IWM_RATE_54M_PLCP },
 };
 #define IWM_RIDX_CCK	0
 #define IWM_RIDX_OFDM	4
 #define IWM_RIDX_MAX	(nitems(iwm_rates)-1)
 #define IWM_RIDX_IS_CCK(_i_) ((_i_) < IWM_RIDX_OFDM)
 #define IWM_RIDX_IS_OFDM(_i_) ((_i_) >= IWM_RIDX_OFDM)
 
 struct iwm_nvm_section {
 	uint16_t length;
 	uint8_t *data;
 };
 
 static int	iwm_store_cscheme(struct iwm_softc *, const uint8_t *, size_t);
 static int	iwm_firmware_store_section(struct iwm_softc *,
                                            enum iwm_ucode_type,
                                            const uint8_t *, size_t);
 static int	iwm_set_default_calib(struct iwm_softc *, const void *);
 static void	iwm_fw_info_free(struct iwm_fw_info *);
 static int	iwm_read_firmware(struct iwm_softc *, enum iwm_ucode_type);
 static void	iwm_dma_map_addr(void *, bus_dma_segment_t *, int, int);
 static int	iwm_dma_contig_alloc(bus_dma_tag_t, struct iwm_dma_info *,
                                      bus_size_t, bus_size_t);
 static void	iwm_dma_contig_free(struct iwm_dma_info *);
 static int	iwm_alloc_fwmem(struct iwm_softc *);
 static void	iwm_free_fwmem(struct iwm_softc *);
 static int	iwm_alloc_sched(struct iwm_softc *);
 static void	iwm_free_sched(struct iwm_softc *);
 static int	iwm_alloc_kw(struct iwm_softc *);
 static void	iwm_free_kw(struct iwm_softc *);
 static int	iwm_alloc_ict(struct iwm_softc *);
 static void	iwm_free_ict(struct iwm_softc *);
 static int	iwm_alloc_rx_ring(struct iwm_softc *, struct iwm_rx_ring *);
 static void	iwm_disable_rx_dma(struct iwm_softc *);
 static void	iwm_reset_rx_ring(struct iwm_softc *, struct iwm_rx_ring *);
 static void	iwm_free_rx_ring(struct iwm_softc *, struct iwm_rx_ring *);
 static int	iwm_alloc_tx_ring(struct iwm_softc *, struct iwm_tx_ring *,
                                   int);
 static void	iwm_reset_tx_ring(struct iwm_softc *, struct iwm_tx_ring *);
 static void	iwm_free_tx_ring(struct iwm_softc *, struct iwm_tx_ring *);
 static void	iwm_enable_interrupts(struct iwm_softc *);
 static void	iwm_restore_interrupts(struct iwm_softc *);
 static void	iwm_disable_interrupts(struct iwm_softc *);
 static void	iwm_ict_reset(struct iwm_softc *);
 static int	iwm_allow_mcast(struct ieee80211vap *, struct iwm_softc *);
 static void	iwm_stop_device(struct iwm_softc *);
 static void	iwm_mvm_nic_config(struct iwm_softc *);
 static int	iwm_nic_rx_init(struct iwm_softc *);
 static int	iwm_nic_tx_init(struct iwm_softc *);
 static int	iwm_nic_init(struct iwm_softc *);
 static int	iwm_enable_txq(struct iwm_softc *, int, int, int);
 static int	iwm_post_alive(struct iwm_softc *);
 static int	iwm_nvm_read_chunk(struct iwm_softc *, uint16_t, uint16_t,
                                    uint16_t, uint8_t *, uint16_t *);
 static int	iwm_nvm_read_section(struct iwm_softc *, uint16_t, uint8_t *,
 				     uint16_t *, size_t);
 static uint32_t	iwm_eeprom_channel_flags(uint16_t);
 static void	iwm_add_channel_band(struct iwm_softc *,
 		    struct ieee80211_channel[], int, int *, int, size_t,
 		    const uint8_t[]);
 static void	iwm_init_channel_map(struct ieee80211com *, int, int *,
 		    struct ieee80211_channel[]);
 static int	iwm_parse_nvm_data(struct iwm_softc *, const uint16_t *,
 				   const uint16_t *, const uint16_t *,
 				   const uint16_t *, const uint16_t *,
 				   const uint16_t *);
 static void	iwm_set_hw_address_8000(struct iwm_softc *,
 					struct iwm_nvm_data *,
 					const uint16_t *, const uint16_t *);
 static int	iwm_get_sku(const struct iwm_softc *, const uint16_t *,
 			    const uint16_t *);
 static int	iwm_get_nvm_version(const struct iwm_softc *, const uint16_t *);
 static int	iwm_get_radio_cfg(const struct iwm_softc *, const uint16_t *,
 				  const uint16_t *);
 static int	iwm_get_n_hw_addrs(const struct iwm_softc *,
 				   const uint16_t *);
 static void	iwm_set_radio_cfg(const struct iwm_softc *,
 				  struct iwm_nvm_data *, uint32_t);
 static int	iwm_parse_nvm_sections(struct iwm_softc *,
                                        struct iwm_nvm_section *);
 static int	iwm_nvm_init(struct iwm_softc *);
 static int	iwm_firmware_load_sect(struct iwm_softc *, uint32_t,
                                        const uint8_t *, uint32_t);
 static int	iwm_firmware_load_chunk(struct iwm_softc *, uint32_t,
                                         const uint8_t *, uint32_t);
 static int	iwm_load_firmware_7000(struct iwm_softc *, enum iwm_ucode_type);
 static int	iwm_load_cpu_sections_8000(struct iwm_softc *,
 					   struct iwm_fw_sects *, int , int *);
 static int	iwm_load_firmware_8000(struct iwm_softc *, enum iwm_ucode_type);
 static int	iwm_load_firmware(struct iwm_softc *, enum iwm_ucode_type);
 static int	iwm_start_fw(struct iwm_softc *, enum iwm_ucode_type);
 static int	iwm_send_tx_ant_cfg(struct iwm_softc *, uint8_t);
 static int	iwm_send_phy_cfg_cmd(struct iwm_softc *);
 static int	iwm_mvm_load_ucode_wait_alive(struct iwm_softc *,
                                               enum iwm_ucode_type);
 static int	iwm_run_init_mvm_ucode(struct iwm_softc *, int);
 static int	iwm_rx_addbuf(struct iwm_softc *, int, int);
 static int	iwm_mvm_calc_rssi(struct iwm_softc *, struct iwm_rx_phy_info *);
 static int	iwm_mvm_get_signal_strength(struct iwm_softc *,
 					    struct iwm_rx_phy_info *);
 static void	iwm_mvm_rx_rx_phy_cmd(struct iwm_softc *,
                                       struct iwm_rx_packet *,
                                       struct iwm_rx_data *);
 static int	iwm_get_noise(const struct iwm_mvm_statistics_rx_non_phy *);
 static void	iwm_mvm_rx_rx_mpdu(struct iwm_softc *, struct iwm_rx_packet *,
                                    struct iwm_rx_data *);
 static int	iwm_mvm_rx_tx_cmd_single(struct iwm_softc *,
                                          struct iwm_rx_packet *,
 				         struct iwm_node *);
 static void	iwm_mvm_rx_tx_cmd(struct iwm_softc *, struct iwm_rx_packet *,
                                   struct iwm_rx_data *);
 static void	iwm_cmd_done(struct iwm_softc *, struct iwm_rx_packet *);
 #if 0
 static void	iwm_update_sched(struct iwm_softc *, int, int, uint8_t,
                                  uint16_t);
 #endif
 static const struct iwm_rate *
 	iwm_tx_fill_cmd(struct iwm_softc *, struct iwm_node *,
 			struct ieee80211_frame *, struct iwm_tx_cmd *);
 static int	iwm_tx(struct iwm_softc *, struct mbuf *,
                        struct ieee80211_node *, int);
 static int	iwm_raw_xmit(struct ieee80211_node *, struct mbuf *,
 			     const struct ieee80211_bpf_params *);
 static int	iwm_mvm_send_add_sta_cmd_status(struct iwm_softc *,
 					        struct iwm_mvm_add_sta_cmd_v7 *,
                                                 int *);
 static int	iwm_mvm_sta_send_to_fw(struct iwm_softc *, struct iwm_node *,
                                        int);
 static int	iwm_mvm_add_sta(struct iwm_softc *, struct iwm_node *);
 static int	iwm_mvm_update_sta(struct iwm_softc *, struct iwm_node *);
 static int	iwm_mvm_add_int_sta_common(struct iwm_softc *,
                                            struct iwm_int_sta *,
 				           const uint8_t *, uint16_t, uint16_t);
 static int	iwm_mvm_add_aux_sta(struct iwm_softc *);
 static int	iwm_mvm_update_quotas(struct iwm_softc *, struct iwm_node *);
 static int	iwm_auth(struct ieee80211vap *, struct iwm_softc *);
 static int	iwm_assoc(struct ieee80211vap *, struct iwm_softc *);
 static int	iwm_release(struct iwm_softc *, struct iwm_node *);
 static struct ieee80211_node *
 		iwm_node_alloc(struct ieee80211vap *,
 		               const uint8_t[IEEE80211_ADDR_LEN]);
 static void	iwm_setrates(struct iwm_softc *, struct iwm_node *);
 static int	iwm_media_change(struct ifnet *);
 static int	iwm_newstate(struct ieee80211vap *, enum ieee80211_state, int);
 static void	iwm_endscan_cb(void *, int);
 static void	iwm_mvm_fill_sf_command(struct iwm_softc *,
 					struct iwm_sf_cfg_cmd *,
 					struct ieee80211_node *);
 static int	iwm_mvm_sf_config(struct iwm_softc *, enum iwm_sf_state);
 static int	iwm_send_bt_init_conf(struct iwm_softc *);
 static int	iwm_send_update_mcc_cmd(struct iwm_softc *, const char *);
 static void	iwm_mvm_tt_tx_backoff(struct iwm_softc *, uint32_t);
 static int	iwm_init_hw(struct iwm_softc *);
 static void	iwm_init(struct iwm_softc *);
 static void	iwm_start(struct iwm_softc *);
 static void	iwm_stop(struct iwm_softc *);
 static void	iwm_watchdog(void *);
 static void	iwm_parent(struct ieee80211com *);
 #ifdef IWM_DEBUG
 static const char *
 		iwm_desc_lookup(uint32_t);
 static void	iwm_nic_error(struct iwm_softc *);
 static void	iwm_nic_umac_error(struct iwm_softc *);
 #endif
 static void	iwm_notif_intr(struct iwm_softc *);
 static void	iwm_intr(void *);
 static int	iwm_attach(device_t);
 static int	iwm_is_valid_ether_addr(uint8_t *);
 static void	iwm_preinit(void *);
 static int	iwm_detach_local(struct iwm_softc *sc, int);
 static void	iwm_init_task(void *);
 static void	iwm_radiotap_attach(struct iwm_softc *);
 static struct ieee80211vap *
 		iwm_vap_create(struct ieee80211com *,
 		               const char [IFNAMSIZ], int,
 		               enum ieee80211_opmode, int,
 		               const uint8_t [IEEE80211_ADDR_LEN],
 		               const uint8_t [IEEE80211_ADDR_LEN]);
 static void	iwm_vap_delete(struct ieee80211vap *);
 static void	iwm_scan_start(struct ieee80211com *);
 static void	iwm_scan_end(struct ieee80211com *);
 static void	iwm_update_mcast(struct ieee80211com *);
 static void	iwm_set_channel(struct ieee80211com *);
 static void	iwm_scan_curchan(struct ieee80211_scan_state *, unsigned long);
 static void	iwm_scan_mindwell(struct ieee80211_scan_state *);
 static int	iwm_detach(device_t);
 
 /*
  * Firmware parser.
  */
 
 static int
 iwm_store_cscheme(struct iwm_softc *sc, const uint8_t *data, size_t dlen)
 {
 	const struct iwm_fw_cscheme_list *l = (const void *)data;
 
 	if (dlen < sizeof(*l) ||
 	    dlen < sizeof(l->size) + l->size * sizeof(*l->cs))
 		return EINVAL;
 
 	/* we don't actually store anything for now, always use s/w crypto */
 
 	return 0;
 }
 
 static int
 iwm_firmware_store_section(struct iwm_softc *sc,
     enum iwm_ucode_type type, const uint8_t *data, size_t dlen)
 {
 	struct iwm_fw_sects *fws;
 	struct iwm_fw_onesect *fwone;
 
 	if (type >= IWM_UCODE_TYPE_MAX)
 		return EINVAL;
 	if (dlen < sizeof(uint32_t))
 		return EINVAL;
 
 	fws = &sc->sc_fw.fw_sects[type];
 	if (fws->fw_count >= IWM_UCODE_SECT_MAX)
 		return EINVAL;
 
 	fwone = &fws->fw_sect[fws->fw_count];
 
 	/* first 32bit are device load offset */
 	memcpy(&fwone->fws_devoff, data, sizeof(uint32_t));
 
 	/* rest is data */
 	fwone->fws_data = data + sizeof(uint32_t);
 	fwone->fws_len = dlen - sizeof(uint32_t);
 
 	fws->fw_count++;
 	fws->fw_totlen += fwone->fws_len;
 
 	return 0;
 }
 
 /* iwlwifi: iwl-drv.c */
 struct iwm_tlv_calib_data {
 	uint32_t ucode_type;
 	struct iwm_tlv_calib_ctrl calib;
 } __packed;
 
 static int
 iwm_set_default_calib(struct iwm_softc *sc, const void *data)
 {
 	const struct iwm_tlv_calib_data *def_calib = data;
 	uint32_t ucode_type = le32toh(def_calib->ucode_type);
 
 	if (ucode_type >= IWM_UCODE_TYPE_MAX) {
 		device_printf(sc->sc_dev,
 		    "Wrong ucode_type %u for default "
 		    "calibration.\n", ucode_type);
 		return EINVAL;
 	}
 
 	sc->sc_default_calib[ucode_type].flow_trigger =
 	    def_calib->calib.flow_trigger;
 	sc->sc_default_calib[ucode_type].event_trigger =
 	    def_calib->calib.event_trigger;
 
 	return 0;
 }
 
 static void
 iwm_fw_info_free(struct iwm_fw_info *fw)
 {
 	firmware_put(fw->fw_fp, FIRMWARE_UNLOAD);
 	fw->fw_fp = NULL;
 	/* don't touch fw->fw_status */
 	memset(fw->fw_sects, 0, sizeof(fw->fw_sects));
 }
 
 static int
 iwm_read_firmware(struct iwm_softc *sc, enum iwm_ucode_type ucode_type)
 {
 	struct iwm_fw_info *fw = &sc->sc_fw;
 	const struct iwm_tlv_ucode_header *uhdr;
 	struct iwm_ucode_tlv tlv;
 	enum iwm_ucode_tlv_type tlv_type;
 	const struct firmware *fwp;
 	const uint8_t *data;
 	int error = 0;
 	size_t len;
 
 	if (fw->fw_status == IWM_FW_STATUS_DONE &&
 	    ucode_type != IWM_UCODE_TYPE_INIT)
 		return 0;
 
 	while (fw->fw_status == IWM_FW_STATUS_INPROGRESS)
 		msleep(&sc->sc_fw, &sc->sc_mtx, 0, "iwmfwp", 0);
 	fw->fw_status = IWM_FW_STATUS_INPROGRESS;
 
 	if (fw->fw_fp != NULL)
 		iwm_fw_info_free(fw);
 
 	/*
 	 * Load firmware into driver memory.
 	 * fw_fp will be set.
 	 */
 	IWM_UNLOCK(sc);
 	fwp = firmware_get(sc->sc_fwname);
 	IWM_LOCK(sc);
 	if (fwp == NULL) {
 		device_printf(sc->sc_dev,
 		    "could not read firmware %s (error %d)\n",
 		    sc->sc_fwname, error);
 		goto out;
 	}
 	fw->fw_fp = fwp;
 
 	/* (Re-)Initialize default values. */
 	sc->sc_capaflags = 0;
 	sc->sc_capa_n_scan_channels = IWM_MAX_NUM_SCAN_CHANNELS;
 	memset(sc->sc_enabled_capa, 0, sizeof(sc->sc_enabled_capa));
 	memset(sc->sc_fw_mcc, 0, sizeof(sc->sc_fw_mcc));
 
 	/*
 	 * Parse firmware contents
 	 */
 
 	uhdr = (const void *)fw->fw_fp->data;
 	if (*(const uint32_t *)fw->fw_fp->data != 0
 	    || le32toh(uhdr->magic) != IWM_TLV_UCODE_MAGIC) {
 		device_printf(sc->sc_dev, "invalid firmware %s\n",
 		    sc->sc_fwname);
 		error = EINVAL;
 		goto out;
 	}
 
 	snprintf(sc->sc_fwver, sizeof(sc->sc_fwver), "%d.%d (API ver %d)",
 	    IWM_UCODE_MAJOR(le32toh(uhdr->ver)),
 	    IWM_UCODE_MINOR(le32toh(uhdr->ver)),
 	    IWM_UCODE_API(le32toh(uhdr->ver)));
 	data = uhdr->data;
 	len = fw->fw_fp->datasize - sizeof(*uhdr);
 
 	while (len >= sizeof(tlv)) {
 		size_t tlv_len;
 		const void *tlv_data;
 
 		memcpy(&tlv, data, sizeof(tlv));
 		tlv_len = le32toh(tlv.length);
 		tlv_type = le32toh(tlv.type);
 
 		len -= sizeof(tlv);
 		data += sizeof(tlv);
 		tlv_data = data;
 
 		if (len < tlv_len) {
 			device_printf(sc->sc_dev,
 			    "firmware too short: %zu bytes\n",
 			    len);
 			error = EINVAL;
 			goto parse_out;
 		}
 
 		switch ((int)tlv_type) {
 		case IWM_UCODE_TLV_PROBE_MAX_LEN:
 			if (tlv_len < sizeof(uint32_t)) {
 				device_printf(sc->sc_dev,
 				    "%s: PROBE_MAX_LEN (%d) < sizeof(uint32_t)\n",
 				    __func__,
 				    (int) tlv_len);
 				error = EINVAL;
 				goto parse_out;
 			}
 			sc->sc_capa_max_probe_len
 			    = le32toh(*(const uint32_t *)tlv_data);
 			/* limit it to something sensible */
 			if (sc->sc_capa_max_probe_len >
 			    IWM_SCAN_OFFLOAD_PROBE_REQ_SIZE) {
 				IWM_DPRINTF(sc, IWM_DEBUG_FIRMWARE_TLV,
 				    "%s: IWM_UCODE_TLV_PROBE_MAX_LEN "
 				    "ridiculous\n", __func__);
 				error = EINVAL;
 				goto parse_out;
 			}
 			break;
 		case IWM_UCODE_TLV_PAN:
 			if (tlv_len) {
 				device_printf(sc->sc_dev,
 				    "%s: IWM_UCODE_TLV_PAN: tlv_len (%d) > 0\n",
 				    __func__,
 				    (int) tlv_len);
 				error = EINVAL;
 				goto parse_out;
 			}
 			sc->sc_capaflags |= IWM_UCODE_TLV_FLAGS_PAN;
 			break;
 		case IWM_UCODE_TLV_FLAGS:
 			if (tlv_len < sizeof(uint32_t)) {
 				device_printf(sc->sc_dev,
 				    "%s: IWM_UCODE_TLV_FLAGS: tlv_len (%d) < sizeof(uint32_t)\n",
 				    __func__,
 				    (int) tlv_len);
 				error = EINVAL;
 				goto parse_out;
 			}
 			/*
 			 * Apparently there can be many flags, but Linux driver
 			 * parses only the first one, and so do we.
 			 *
 			 * XXX: why does this override IWM_UCODE_TLV_PAN?
 			 * Intentional or a bug?  Observations from
 			 * current firmware file:
 			 *  1) TLV_PAN is parsed first
 			 *  2) TLV_FLAGS contains TLV_FLAGS_PAN
 			 * ==> this resets TLV_PAN to itself... hnnnk
 			 */
 			sc->sc_capaflags = le32toh(*(const uint32_t *)tlv_data);
 			break;
 		case IWM_UCODE_TLV_CSCHEME:
 			if ((error = iwm_store_cscheme(sc,
 			    tlv_data, tlv_len)) != 0) {
 				device_printf(sc->sc_dev,
 				    "%s: iwm_store_cscheme(): returned %d\n",
 				    __func__,
 				    error);
 				goto parse_out;
 			}
 			break;
 		case IWM_UCODE_TLV_NUM_OF_CPU: {
 			uint32_t num_cpu;
 			if (tlv_len != sizeof(uint32_t)) {
 				device_printf(sc->sc_dev,
 				    "%s: IWM_UCODE_TLV_NUM_OF_CPU: tlv_len (%d) < sizeof(uint32_t)\n",
 				    __func__,
 				    (int) tlv_len);
 				error = EINVAL;
 				goto parse_out;
 			}
 			num_cpu = le32toh(*(const uint32_t *)tlv_data);
 			if (num_cpu < 1 || num_cpu > 2) {
 				device_printf(sc->sc_dev,
 				    "%s: Driver supports only 1 or 2 CPUs\n",
 				    __func__);
 				error = EINVAL;
 				goto parse_out;
 			}
 			break;
 		}
 		case IWM_UCODE_TLV_SEC_RT:
 			if ((error = iwm_firmware_store_section(sc,
 			    IWM_UCODE_TYPE_REGULAR, tlv_data, tlv_len)) != 0) {
 				device_printf(sc->sc_dev,
 				    "%s: IWM_UCODE_TYPE_REGULAR: iwm_firmware_store_section() failed; %d\n",
 				    __func__,
 				    error);
 				goto parse_out;
 			}
 			break;
 		case IWM_UCODE_TLV_SEC_INIT:
 			if ((error = iwm_firmware_store_section(sc,
 			    IWM_UCODE_TYPE_INIT, tlv_data, tlv_len)) != 0) {
 				device_printf(sc->sc_dev,
 				    "%s: IWM_UCODE_TYPE_INIT: iwm_firmware_store_section() failed; %d\n",
 				    __func__,
 				    error);
 				goto parse_out;
 			}
 			break;
 		case IWM_UCODE_TLV_SEC_WOWLAN:
 			if ((error = iwm_firmware_store_section(sc,
 			    IWM_UCODE_TYPE_WOW, tlv_data, tlv_len)) != 0) {
 				device_printf(sc->sc_dev,
 				    "%s: IWM_UCODE_TYPE_WOW: iwm_firmware_store_section() failed; %d\n",
 				    __func__,
 				    error);
 				goto parse_out;
 			}
 			break;
 		case IWM_UCODE_TLV_DEF_CALIB:
 			if (tlv_len != sizeof(struct iwm_tlv_calib_data)) {
 				device_printf(sc->sc_dev,
 				    "%s: IWM_UCODE_TLV_DEV_CALIB: tlv_len (%d) < sizeof(iwm_tlv_calib_data) (%d)\n",
 				    __func__,
 				    (int) tlv_len,
 				    (int) sizeof(struct iwm_tlv_calib_data));
 				error = EINVAL;
 				goto parse_out;
 			}
 			if ((error = iwm_set_default_calib(sc, tlv_data)) != 0) {
 				device_printf(sc->sc_dev,
 				    "%s: iwm_set_default_calib() failed: %d\n",
 				    __func__,
 				    error);
 				goto parse_out;
 			}
 			break;
 		case IWM_UCODE_TLV_PHY_SKU:
 			if (tlv_len != sizeof(uint32_t)) {
 				error = EINVAL;
 				device_printf(sc->sc_dev,
 				    "%s: IWM_UCODE_TLV_PHY_SKU: tlv_len (%d) < sizeof(uint32_t)\n",
 				    __func__,
 				    (int) tlv_len);
 				goto parse_out;
 			}
 			sc->sc_fw_phy_config =
 			    le32toh(*(const uint32_t *)tlv_data);
 			break;
 
 		case IWM_UCODE_TLV_API_CHANGES_SET: {
 			const struct iwm_ucode_api *api;
 			if (tlv_len != sizeof(*api)) {
 				error = EINVAL;
 				goto parse_out;
 			}
 			api = (const struct iwm_ucode_api *)tlv_data;
 			/* Flags may exceed 32 bits in future firmware. */
 			if (le32toh(api->api_index) > 0) {
 				device_printf(sc->sc_dev,
 				    "unsupported API index %d\n",
 				    le32toh(api->api_index));
 				goto parse_out;
 			}
 			sc->sc_ucode_api = le32toh(api->api_flags);
 			break;
 		}
 
 		case IWM_UCODE_TLV_ENABLED_CAPABILITIES: {
 			const struct iwm_ucode_capa *capa;
 			int idx, i;
 			if (tlv_len != sizeof(*capa)) {
 				error = EINVAL;
 				goto parse_out;
 			}
 			capa = (const struct iwm_ucode_capa *)tlv_data;
 			idx = le32toh(capa->api_index);
 			if (idx > howmany(IWM_NUM_UCODE_TLV_CAPA, 32)) {
 				device_printf(sc->sc_dev,
 				    "unsupported API index %d\n", idx);
 				goto parse_out;
 			}
 			for (i = 0; i < 32; i++) {
 				if ((le32toh(capa->api_capa) & (1U << i)) == 0)
 					continue;
 				setbit(sc->sc_enabled_capa, i + (32 * idx));
 			}
 			break;
 		}
 
 		case 48: /* undocumented TLV */
 		case IWM_UCODE_TLV_SDIO_ADMA_ADDR:
 		case IWM_UCODE_TLV_FW_GSCAN_CAPA:
 			/* ignore, not used by current driver */
 			break;
 
 		case IWM_UCODE_TLV_SEC_RT_USNIFFER:
 			if ((error = iwm_firmware_store_section(sc,
 			    IWM_UCODE_TYPE_REGULAR_USNIFFER, tlv_data,
 			    tlv_len)) != 0)
 				goto parse_out;
 			break;
 
 		case IWM_UCODE_TLV_N_SCAN_CHANNELS:
 			if (tlv_len != sizeof(uint32_t)) {
 				error = EINVAL;
 				goto parse_out;
 			}
 			sc->sc_capa_n_scan_channels =
 			  le32toh(*(const uint32_t *)tlv_data);
 			break;
 
 		case IWM_UCODE_TLV_FW_VERSION:
 			if (tlv_len != sizeof(uint32_t) * 3) {
 				error = EINVAL;
 				goto parse_out;
 			}
 			snprintf(sc->sc_fwver, sizeof(sc->sc_fwver),
 			    "%d.%d.%d",
 			    le32toh(((const uint32_t *)tlv_data)[0]),
 			    le32toh(((const uint32_t *)tlv_data)[1]),
 			    le32toh(((const uint32_t *)tlv_data)[2]));
 			break;
 
 		default:
 			device_printf(sc->sc_dev,
 			    "%s: unknown firmware section %d, abort\n",
 			    __func__, tlv_type);
 			error = EINVAL;
 			goto parse_out;
 		}
 
 		len -= roundup(tlv_len, 4);
 		data += roundup(tlv_len, 4);
 	}
 
 	KASSERT(error == 0, ("unhandled error"));
 
  parse_out:
 	if (error) {
 		device_printf(sc->sc_dev, "firmware parse error %d, "
 		    "section type %d\n", error, tlv_type);
 	}
 
 	if (!(sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_PM_CMD_SUPPORT)) {
 		device_printf(sc->sc_dev,
 		    "device uses unsupported power ops\n");
 		error = ENOTSUP;
 	}
 
  out:
 	if (error) {
 		fw->fw_status = IWM_FW_STATUS_NONE;
 		if (fw->fw_fp != NULL)
 			iwm_fw_info_free(fw);
 	} else
 		fw->fw_status = IWM_FW_STATUS_DONE;
 	wakeup(&sc->sc_fw);
 
 	return error;
 }
 
 /*
  * DMA resource routines
  */
 
 static void
 iwm_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
         if (error != 0)
                 return;
 	KASSERT(nsegs == 1, ("too many DMA segments, %d should be 1", nsegs));
 	*(bus_addr_t *)arg = segs[0].ds_addr;
 }
 
 static int
 iwm_dma_contig_alloc(bus_dma_tag_t tag, struct iwm_dma_info *dma,
     bus_size_t size, bus_size_t alignment)
 {
 	int error;
 
 	dma->tag = NULL;
 	dma->map = NULL;
 	dma->size = size;
 	dma->vaddr = NULL;
 
 	error = bus_dma_tag_create(tag, alignment,
             0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size,
             1, size, 0, NULL, NULL, &dma->tag);
         if (error != 0)
                 goto fail;
 
         error = bus_dmamem_alloc(dma->tag, (void **)&dma->vaddr,
             BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, &dma->map);
         if (error != 0)
                 goto fail;
 
         error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size,
             iwm_dma_map_addr, &dma->paddr, BUS_DMA_NOWAIT);
         if (error != 0) {
 		bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
 		dma->vaddr = NULL;
 		goto fail;
 	}
 
 	bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
 
 	return 0;
 
 fail:
 	iwm_dma_contig_free(dma);
 
 	return error;
 }
 
 static void
 iwm_dma_contig_free(struct iwm_dma_info *dma)
 {
 	if (dma->vaddr != NULL) {
 		bus_dmamap_sync(dma->tag, dma->map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(dma->tag, dma->map);
 		bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
 		dma->vaddr = NULL;
 	}
 	if (dma->tag != NULL) {
 		bus_dma_tag_destroy(dma->tag);
 		dma->tag = NULL;
 	}
 }
 
 /* fwmem is used to load firmware onto the card */
 static int
 iwm_alloc_fwmem(struct iwm_softc *sc)
 {
 	/* Must be aligned on a 16-byte boundary. */
 	return iwm_dma_contig_alloc(sc->sc_dmat, &sc->fw_dma,
 	    sc->sc_fwdmasegsz, 16);
 }
 
 static void
 iwm_free_fwmem(struct iwm_softc *sc)
 {
 	iwm_dma_contig_free(&sc->fw_dma);
 }
 
 /* tx scheduler rings.  not used? */
 static int
 iwm_alloc_sched(struct iwm_softc *sc)
 {
 	/* TX scheduler rings must be aligned on a 1KB boundary. */
 	return iwm_dma_contig_alloc(sc->sc_dmat, &sc->sched_dma,
 	    nitems(sc->txq) * sizeof(struct iwm_agn_scd_bc_tbl), 1024);
 }
 
 static void
 iwm_free_sched(struct iwm_softc *sc)
 {
 	iwm_dma_contig_free(&sc->sched_dma);
 }
 
 /* keep-warm page is used internally by the card.  see iwl-fh.h for more info */
 static int
 iwm_alloc_kw(struct iwm_softc *sc)
 {
 	return iwm_dma_contig_alloc(sc->sc_dmat, &sc->kw_dma, 4096, 4096);
 }
 
 static void
 iwm_free_kw(struct iwm_softc *sc)
 {
 	iwm_dma_contig_free(&sc->kw_dma);
 }
 
 /* interrupt cause table */
 static int
 iwm_alloc_ict(struct iwm_softc *sc)
 {
 	return iwm_dma_contig_alloc(sc->sc_dmat, &sc->ict_dma,
 	    IWM_ICT_SIZE, 1<<IWM_ICT_PADDR_SHIFT);
 }
 
 static void
 iwm_free_ict(struct iwm_softc *sc)
 {
 	iwm_dma_contig_free(&sc->ict_dma);
 }
 
 static int
 iwm_alloc_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring)
 {
 	bus_size_t size;
 	int i, error;
 
 	ring->cur = 0;
 
 	/* Allocate RX descriptors (256-byte aligned). */
 	size = IWM_RX_RING_COUNT * sizeof(uint32_t);
 	error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->desc_dma, size, 256);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not allocate RX ring DMA memory\n");
 		goto fail;
 	}
 	ring->desc = ring->desc_dma.vaddr;
 
 	/* Allocate RX status area (16-byte aligned). */
 	error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->stat_dma,
 	    sizeof(*ring->stat), 16);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not allocate RX status DMA memory\n");
 		goto fail;
 	}
 	ring->stat = ring->stat_dma.vaddr;
 
         /* Create RX buffer DMA tag. */
         error = bus_dma_tag_create(sc->sc_dmat, 1, 0,
             BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
             IWM_RBUF_SIZE, 1, IWM_RBUF_SIZE, 0, NULL, NULL, &ring->data_dmat);
         if (error != 0) {
                 device_printf(sc->sc_dev,
                     "%s: could not create RX buf DMA tag, error %d\n",
                     __func__, error);
                 goto fail;
         }
 
 	/* Allocate spare bus_dmamap_t for iwm_rx_addbuf() */
 	error = bus_dmamap_create(ring->data_dmat, 0, &ring->spare_map);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: could not create RX buf DMA map, error %d\n",
 		    __func__, error);
 		goto fail;
 	}
 	/*
 	 * Allocate and map RX buffers.
 	 */
 	for (i = 0; i < IWM_RX_RING_COUNT; i++) {
 		struct iwm_rx_data *data = &ring->data[i];
 		error = bus_dmamap_create(ring->data_dmat, 0, &data->map);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not create RX buf DMA map, error %d\n",
 			    __func__, error);
 			goto fail;
 		}
 		data->m = NULL;
 
 		if ((error = iwm_rx_addbuf(sc, IWM_RBUF_SIZE, i)) != 0) {
 			goto fail;
 		}
 	}
 	return 0;
 
 fail:	iwm_free_rx_ring(sc, ring);
 	return error;
 }
 
 static void
 iwm_disable_rx_dma(struct iwm_softc *sc)
 {
 	/* XXX conditional nic locks are stupid */
 	/* XXX print out if we can't lock the NIC? */
 	if (iwm_nic_lock(sc)) {
 		/* XXX handle if RX stop doesn't finish? */
 		(void) iwm_pcie_rx_stop(sc);
 		iwm_nic_unlock(sc);
 	}
 }
 
 static void
 iwm_reset_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring)
 {
 	/* Reset the ring state */
 	ring->cur = 0;
 
 	/*
 	 * The hw rx ring index in shared memory must also be cleared,
 	 * otherwise the discrepancy can cause reprocessing chaos.
 	 */
 	memset(sc->rxq.stat, 0, sizeof(*sc->rxq.stat));
 }
 
 static void
 iwm_free_rx_ring(struct iwm_softc *sc, struct iwm_rx_ring *ring)
 {
 	int i;
 
 	iwm_dma_contig_free(&ring->desc_dma);
 	iwm_dma_contig_free(&ring->stat_dma);
 
 	for (i = 0; i < IWM_RX_RING_COUNT; i++) {
 		struct iwm_rx_data *data = &ring->data[i];
 
 		if (data->m != NULL) {
 			bus_dmamap_sync(ring->data_dmat, data->map,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(ring->data_dmat, data->map);
 			m_freem(data->m);
 			data->m = NULL;
 		}
 		if (data->map != NULL) {
 			bus_dmamap_destroy(ring->data_dmat, data->map);
 			data->map = NULL;
 		}
 	}
 	if (ring->spare_map != NULL) {
 		bus_dmamap_destroy(ring->data_dmat, ring->spare_map);
 		ring->spare_map = NULL;
 	}
 	if (ring->data_dmat != NULL) {
 		bus_dma_tag_destroy(ring->data_dmat);
 		ring->data_dmat = NULL;
 	}
 }
 
 static int
 iwm_alloc_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring, int qid)
 {
 	bus_addr_t paddr;
 	bus_size_t size;
 	size_t maxsize;
 	int nsegments;
 	int i, error;
 
 	ring->qid = qid;
 	ring->queued = 0;
 	ring->cur = 0;
 
 	/* Allocate TX descriptors (256-byte aligned). */
 	size = IWM_TX_RING_COUNT * sizeof (struct iwm_tfd);
 	error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->desc_dma, size, 256);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not allocate TX ring DMA memory\n");
 		goto fail;
 	}
 	ring->desc = ring->desc_dma.vaddr;
 
 	/*
 	 * We only use rings 0 through 9 (4 EDCA + cmd) so there is no need
 	 * to allocate commands space for other rings.
 	 */
 	if (qid > IWM_MVM_CMD_QUEUE)
 		return 0;
 
 	size = IWM_TX_RING_COUNT * sizeof(struct iwm_device_cmd);
 	error = iwm_dma_contig_alloc(sc->sc_dmat, &ring->cmd_dma, size, 4);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not allocate TX cmd DMA memory\n");
 		goto fail;
 	}
 	ring->cmd = ring->cmd_dma.vaddr;
 
 	/* FW commands may require more mapped space than packets. */
 	if (qid == IWM_MVM_CMD_QUEUE) {
 		maxsize = IWM_RBUF_SIZE;
 		nsegments = 1;
 	} else {
 		maxsize = MCLBYTES;
 		nsegments = IWM_MAX_SCATTER - 2;
 	}
 
 	error = bus_dma_tag_create(sc->sc_dmat, 1, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, maxsize,
             nsegments, maxsize, 0, NULL, NULL, &ring->data_dmat);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not create TX buf DMA tag\n");
 		goto fail;
 	}
 
 	paddr = ring->cmd_dma.paddr;
 	for (i = 0; i < IWM_TX_RING_COUNT; i++) {
 		struct iwm_tx_data *data = &ring->data[i];
 
 		data->cmd_paddr = paddr;
 		data->scratch_paddr = paddr + sizeof(struct iwm_cmd_header)
 		    + offsetof(struct iwm_tx_cmd, scratch);
 		paddr += sizeof(struct iwm_device_cmd);
 
 		error = bus_dmamap_create(ring->data_dmat, 0, &data->map);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "could not create TX buf DMA map\n");
 			goto fail;
 		}
 	}
 	KASSERT(paddr == ring->cmd_dma.paddr + size,
 	    ("invalid physical address"));
 	return 0;
 
 fail:	iwm_free_tx_ring(sc, ring);
 	return error;
 }
 
 static void
 iwm_reset_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring)
 {
 	int i;
 
 	for (i = 0; i < IWM_TX_RING_COUNT; i++) {
 		struct iwm_tx_data *data = &ring->data[i];
 
 		if (data->m != NULL) {
 			bus_dmamap_sync(ring->data_dmat, data->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(ring->data_dmat, data->map);
 			m_freem(data->m);
 			data->m = NULL;
 		}
 	}
 	/* Clear TX descriptors. */
 	memset(ring->desc, 0, ring->desc_dma.size);
 	bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 	sc->qfullmsk &= ~(1 << ring->qid);
 	ring->queued = 0;
 	ring->cur = 0;
 }
 
 static void
 iwm_free_tx_ring(struct iwm_softc *sc, struct iwm_tx_ring *ring)
 {
 	int i;
 
 	iwm_dma_contig_free(&ring->desc_dma);
 	iwm_dma_contig_free(&ring->cmd_dma);
 
 	for (i = 0; i < IWM_TX_RING_COUNT; i++) {
 		struct iwm_tx_data *data = &ring->data[i];
 
 		if (data->m != NULL) {
 			bus_dmamap_sync(ring->data_dmat, data->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(ring->data_dmat, data->map);
 			m_freem(data->m);
 			data->m = NULL;
 		}
 		if (data->map != NULL) {
 			bus_dmamap_destroy(ring->data_dmat, data->map);
 			data->map = NULL;
 		}
 	}
 	if (ring->data_dmat != NULL) {
 		bus_dma_tag_destroy(ring->data_dmat);
 		ring->data_dmat = NULL;
 	}
 }
 
 /*
  * High-level hardware frobbing routines
  */
 
 static void
 iwm_enable_interrupts(struct iwm_softc *sc)
 {
 	sc->sc_intmask = IWM_CSR_INI_SET_MASK;
 	IWM_WRITE(sc, IWM_CSR_INT_MASK, sc->sc_intmask);
 }
 
 static void
 iwm_restore_interrupts(struct iwm_softc *sc)
 {
 	IWM_WRITE(sc, IWM_CSR_INT_MASK, sc->sc_intmask);
 }
 
 static void
 iwm_disable_interrupts(struct iwm_softc *sc)
 {
 	/* disable interrupts */
 	IWM_WRITE(sc, IWM_CSR_INT_MASK, 0);
 
 	/* acknowledge all interrupts */
 	IWM_WRITE(sc, IWM_CSR_INT, ~0);
 	IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, ~0);
 }
 
 static void
 iwm_ict_reset(struct iwm_softc *sc)
 {
 	iwm_disable_interrupts(sc);
 
 	/* Reset ICT table. */
 	memset(sc->ict_dma.vaddr, 0, IWM_ICT_SIZE);
 	sc->ict_cur = 0;
 
 	/* Set physical address of ICT table (4KB aligned). */
 	IWM_WRITE(sc, IWM_CSR_DRAM_INT_TBL_REG,
 	    IWM_CSR_DRAM_INT_TBL_ENABLE
 	    | IWM_CSR_DRAM_INIT_TBL_WRITE_POINTER
 	    | IWM_CSR_DRAM_INIT_TBL_WRAP_CHECK
 	    | sc->ict_dma.paddr >> IWM_ICT_PADDR_SHIFT);
 
 	/* Switch to ICT interrupt mode in driver. */
 	sc->sc_flags |= IWM_FLAG_USE_ICT;
 
 	/* Re-enable interrupts. */
 	IWM_WRITE(sc, IWM_CSR_INT, ~0);
 	iwm_enable_interrupts(sc);
 }
 
 /* iwlwifi pcie/trans.c */
 
 /*
  * Since this .. hard-resets things, it's time to actually
  * mark the first vap (if any) as having no mac context.
  * It's annoying, but since the driver is potentially being
  * stop/start'ed whilst active (thanks openbsd port!) we
  * have to correctly track this.
  */
 static void
 iwm_stop_device(struct iwm_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	int chnl, qid;
 	uint32_t mask = 0;
 
 	/* tell the device to stop sending interrupts */
 	iwm_disable_interrupts(sc);
 
 	/*
 	 * FreeBSD-local: mark the first vap as not-uploaded,
 	 * so the next transition through auth/assoc
 	 * will correctly populate the MAC context.
 	 */
 	if (vap) {
 		struct iwm_vap *iv = IWM_VAP(vap);
 		iv->is_uploaded = 0;
 	}
 
 	/* device going down, Stop using ICT table */
 	sc->sc_flags &= ~IWM_FLAG_USE_ICT;
 
 	/* stop tx and rx.  tx and rx bits, as usual, are from if_iwn */
 
 	iwm_write_prph(sc, IWM_SCD_TXFACT, 0);
 
 	if (iwm_nic_lock(sc)) {
 		/* Stop each Tx DMA channel */
 		for (chnl = 0; chnl < IWM_FH_TCSR_CHNL_NUM; chnl++) {
 			IWM_WRITE(sc,
 			    IWM_FH_TCSR_CHNL_TX_CONFIG_REG(chnl), 0);
 			mask |= IWM_FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE(chnl);
 		}
 
 		/* Wait for DMA channels to be idle */
-		if (iwm_poll_bit(sc, IWM_FH_TSSR_TX_STATUS_REG, mask, mask,
-		    5000) < 0) {
+		if (!iwm_poll_bit(sc, IWM_FH_TSSR_TX_STATUS_REG, mask, mask,
+		    5000)) {
 			device_printf(sc->sc_dev,
 			    "Failing on timeout while stopping DMA channel: [0x%08x]\n",
 			    IWM_READ(sc, IWM_FH_TSSR_TX_STATUS_REG));
 		}
 		iwm_nic_unlock(sc);
 	}
 	iwm_disable_rx_dma(sc);
 
 	/* Stop RX ring. */
 	iwm_reset_rx_ring(sc, &sc->rxq);
 
 	/* Reset all TX rings. */
 	for (qid = 0; qid < nitems(sc->txq); qid++)
 		iwm_reset_tx_ring(sc, &sc->txq[qid]);
 
 	/*
 	 * Power-down device's busmaster DMA clocks
 	 */
 	iwm_write_prph(sc, IWM_APMG_CLK_DIS_REG, IWM_APMG_CLK_VAL_DMA_CLK_RQT);
 	DELAY(5);
 
 	/* Make sure (redundant) we've released our request to stay awake */
 	IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL,
 	    IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
 
 	/* Stop the device, and put it in low power state */
 	iwm_apm_stop(sc);
 
 	/* Upon stop, the APM issues an interrupt if HW RF kill is set.
 	 * Clean again the interrupt here
 	 */
 	iwm_disable_interrupts(sc);
 	/* stop and reset the on-board processor */
 	IWM_WRITE(sc, IWM_CSR_RESET, IWM_CSR_RESET_REG_FLAG_SW_RESET);
 
 	/*
 	 * Even if we stop the HW, we still want the RF kill
 	 * interrupt
 	 */
 	iwm_enable_rfkill_int(sc);
 	iwm_check_rfkill(sc);
 }
 
 /* iwlwifi: mvm/ops.c */
 static void
 iwm_mvm_nic_config(struct iwm_softc *sc)
 {
 	uint8_t radio_cfg_type, radio_cfg_step, radio_cfg_dash;
 	uint32_t reg_val = 0;
 
 	radio_cfg_type = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_TYPE) >>
 	    IWM_FW_PHY_CFG_RADIO_TYPE_POS;
 	radio_cfg_step = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_STEP) >>
 	    IWM_FW_PHY_CFG_RADIO_STEP_POS;
 	radio_cfg_dash = (sc->sc_fw_phy_config & IWM_FW_PHY_CFG_RADIO_DASH) >>
 	    IWM_FW_PHY_CFG_RADIO_DASH_POS;
 
 	/* SKU control */
 	reg_val |= IWM_CSR_HW_REV_STEP(sc->sc_hw_rev) <<
 	    IWM_CSR_HW_IF_CONFIG_REG_POS_MAC_STEP;
 	reg_val |= IWM_CSR_HW_REV_DASH(sc->sc_hw_rev) <<
 	    IWM_CSR_HW_IF_CONFIG_REG_POS_MAC_DASH;
 
 	/* radio configuration */
 	reg_val |= radio_cfg_type << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_TYPE;
 	reg_val |= radio_cfg_step << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_STEP;
 	reg_val |= radio_cfg_dash << IWM_CSR_HW_IF_CONFIG_REG_POS_PHY_DASH;
 
 	IWM_WRITE(sc, IWM_CSR_HW_IF_CONFIG_REG, reg_val);
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 	    "Radio type=0x%x-0x%x-0x%x\n", radio_cfg_type,
 	    radio_cfg_step, radio_cfg_dash);
 
 	/*
 	 * W/A : NIC is stuck in a reset state after Early PCIe power off
 	 * (PCIe power is lost before PERST# is asserted), causing ME FW
 	 * to lose ownership and not being able to obtain it back.
 	 */
 	if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) {
 		iwm_set_bits_mask_prph(sc, IWM_APMG_PS_CTRL_REG,
 		    IWM_APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS,
 		    ~IWM_APMG_PS_CTRL_EARLY_PWR_OFF_RESET_DIS);
 	}
 }
 
 static int
 iwm_nic_rx_init(struct iwm_softc *sc)
 {
 	if (!iwm_nic_lock(sc))
 		return EBUSY;
 
 	/*
 	 * Initialize RX ring.  This is from the iwn driver.
 	 */
 	memset(sc->rxq.stat, 0, sizeof(*sc->rxq.stat));
 
 	/* stop DMA */
 	iwm_disable_rx_dma(sc);
 	IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_RBDCB_WPTR, 0);
 	IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_FLUSH_RB_REQ, 0);
 	IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RDPTR, 0);
 	IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_RBDCB_WPTR_REG, 0);
 
 	/* Set physical address of RX ring (256-byte aligned). */
 	IWM_WRITE(sc,
 	    IWM_FH_RSCSR_CHNL0_RBDCB_BASE_REG, sc->rxq.desc_dma.paddr >> 8);
 
 	/* Set physical address of RX status (16-byte aligned). */
 	IWM_WRITE(sc,
 	    IWM_FH_RSCSR_CHNL0_STTS_WPTR_REG, sc->rxq.stat_dma.paddr >> 4);
 
 	/* Enable RX. */
 	IWM_WRITE(sc, IWM_FH_MEM_RCSR_CHNL0_CONFIG_REG,
 	    IWM_FH_RCSR_RX_CONFIG_CHNL_EN_ENABLE_VAL		|
 	    IWM_FH_RCSR_CHNL0_RX_IGNORE_RXF_EMPTY		|  /* HW bug */
 	    IWM_FH_RCSR_CHNL0_RX_CONFIG_IRQ_DEST_INT_HOST_VAL	|
 	    IWM_FH_RCSR_CHNL0_RX_CONFIG_SINGLE_FRAME_MSK	|
 	    (IWM_RX_RB_TIMEOUT << IWM_FH_RCSR_RX_CONFIG_REG_IRQ_RBTH_POS) |
 	    IWM_FH_RCSR_RX_CONFIG_REG_VAL_RB_SIZE_4K		|
 	    IWM_RX_QUEUE_SIZE_LOG << IWM_FH_RCSR_RX_CONFIG_RBDCB_SIZE_POS);
 
 	IWM_WRITE_1(sc, IWM_CSR_INT_COALESCING, IWM_HOST_INT_TIMEOUT_DEF);
 
 	/* W/A for interrupt coalescing bug in 7260 and 3160 */
 	if (sc->host_interrupt_operation_mode)
 		IWM_SETBITS(sc, IWM_CSR_INT_COALESCING, IWM_HOST_INT_OPER_MODE);
 
 	/*
 	 * Thus sayeth el jefe (iwlwifi) via a comment:
 	 *
 	 * This value should initially be 0 (before preparing any
 	 * RBs), should be 8 after preparing the first 8 RBs (for example)
 	 */
 	IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_WPTR, 8);
 
 	iwm_nic_unlock(sc);
 
 	return 0;
 }
 
 static int
 iwm_nic_tx_init(struct iwm_softc *sc)
 {
 	int qid;
 
 	if (!iwm_nic_lock(sc))
 		return EBUSY;
 
 	/* Deactivate TX scheduler. */
 	iwm_write_prph(sc, IWM_SCD_TXFACT, 0);
 
 	/* Set physical address of "keep warm" page (16-byte aligned). */
 	IWM_WRITE(sc, IWM_FH_KW_MEM_ADDR_REG, sc->kw_dma.paddr >> 4);
 
 	/* Initialize TX rings. */
 	for (qid = 0; qid < nitems(sc->txq); qid++) {
 		struct iwm_tx_ring *txq = &sc->txq[qid];
 
 		/* Set physical address of TX ring (256-byte aligned). */
 		IWM_WRITE(sc, IWM_FH_MEM_CBBC_QUEUE(qid),
 		    txq->desc_dma.paddr >> 8);
 		IWM_DPRINTF(sc, IWM_DEBUG_XMIT,
 		    "%s: loading ring %d descriptors (%p) at %lx\n",
 		    __func__,
 		    qid, txq->desc,
 		    (unsigned long) (txq->desc_dma.paddr >> 8));
 	}
 
 	iwm_write_prph(sc, IWM_SCD_GP_CTRL, IWM_SCD_GP_CTRL_AUTO_ACTIVE_MODE);
 
 	iwm_nic_unlock(sc);
 
 	return 0;
 }
 
 static int
 iwm_nic_init(struct iwm_softc *sc)
 {
 	int error;
 
 	iwm_apm_init(sc);
 	if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000)
 		iwm_set_pwr(sc);
 
 	iwm_mvm_nic_config(sc);
 
 	if ((error = iwm_nic_rx_init(sc)) != 0)
 		return error;
 
 	/*
 	 * Ditto for TX, from iwn
 	 */
 	if ((error = iwm_nic_tx_init(sc)) != 0)
 		return error;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 	    "%s: shadow registers enabled\n", __func__);
 	IWM_SETBITS(sc, IWM_CSR_MAC_SHADOW_REG_CTRL, 0x800fffff);
 
 	return 0;
 }
 
 const uint8_t iwm_mvm_ac_to_tx_fifo[] = {
 	IWM_MVM_TX_FIFO_VO,
 	IWM_MVM_TX_FIFO_VI,
 	IWM_MVM_TX_FIFO_BE,
 	IWM_MVM_TX_FIFO_BK,
 };
 
 static int
 iwm_enable_txq(struct iwm_softc *sc, int sta_id, int qid, int fifo)
 {
 	if (!iwm_nic_lock(sc)) {
 		device_printf(sc->sc_dev,
 		    "%s: cannot enable txq %d\n",
 		    __func__,
 		    qid);
 		return EBUSY;
 	}
 
 	IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, qid << 8 | 0);
 
 	if (qid == IWM_MVM_CMD_QUEUE) {
 		/* unactivate before configuration */
 		iwm_write_prph(sc, IWM_SCD_QUEUE_STATUS_BITS(qid),
 		    (0 << IWM_SCD_QUEUE_STTS_REG_POS_ACTIVE)
 		    | (1 << IWM_SCD_QUEUE_STTS_REG_POS_SCD_ACT_EN));
 
 		iwm_clear_bits_prph(sc, IWM_SCD_AGGR_SEL, (1 << qid));
 
 		iwm_write_prph(sc, IWM_SCD_QUEUE_RDPTR(qid), 0);
 
 		iwm_write_mem32(sc, sc->sched_base + IWM_SCD_CONTEXT_QUEUE_OFFSET(qid), 0);
 		/* Set scheduler window size and frame limit. */
 		iwm_write_mem32(sc,
 		    sc->sched_base + IWM_SCD_CONTEXT_QUEUE_OFFSET(qid) +
 		    sizeof(uint32_t),
 		    ((IWM_FRAME_LIMIT << IWM_SCD_QUEUE_CTX_REG2_WIN_SIZE_POS) &
 		    IWM_SCD_QUEUE_CTX_REG2_WIN_SIZE_MSK) |
 		    ((IWM_FRAME_LIMIT << IWM_SCD_QUEUE_CTX_REG2_FRAME_LIMIT_POS) &
 		    IWM_SCD_QUEUE_CTX_REG2_FRAME_LIMIT_MSK));
 
 		iwm_write_prph(sc, IWM_SCD_QUEUE_STATUS_BITS(qid),
 		    (1 << IWM_SCD_QUEUE_STTS_REG_POS_ACTIVE) |
 		    (fifo << IWM_SCD_QUEUE_STTS_REG_POS_TXF) |
 		    (1 << IWM_SCD_QUEUE_STTS_REG_POS_WSL) |
 		    IWM_SCD_QUEUE_STTS_REG_MSK);
 	} else {
 		struct iwm_scd_txq_cfg_cmd cmd;
 		int error;
 
 		iwm_nic_unlock(sc);
 
 		memset(&cmd, 0, sizeof(cmd));
 		cmd.scd_queue = qid;
 		cmd.enable = 1;
 		cmd.sta_id = sta_id;
 		cmd.tx_fifo = fifo;
 		cmd.aggregate = 0;
 		cmd.window = IWM_FRAME_LIMIT;
 
 		error = iwm_mvm_send_cmd_pdu(sc, IWM_SCD_QUEUE_CFG, IWM_CMD_SYNC,
 		    sizeof(cmd), &cmd);
 		if (error) {
 			device_printf(sc->sc_dev,
 			    "cannot enable txq %d\n", qid);
 			return error;
 		}
 
 		if (!iwm_nic_lock(sc))
 			return EBUSY;
 	}
 
 	iwm_write_prph(sc, IWM_SCD_EN_CTRL,
 	    iwm_read_prph(sc, IWM_SCD_EN_CTRL) | qid);
 
 	iwm_nic_unlock(sc);
 
 	IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "%s: enabled txq %d FIFO %d\n",
 	    __func__, qid, fifo);
 
 	return 0;
 }
 
 static int
 iwm_post_alive(struct iwm_softc *sc)
 {
 	int nwords;
 	int error, chnl;
 	uint32_t base;
 
 	if (!iwm_nic_lock(sc))
 		return EBUSY;
 
 	base = iwm_read_prph(sc, IWM_SCD_SRAM_BASE_ADDR);
 	if (sc->sched_base != base) {
 		device_printf(sc->sc_dev,
 		    "%s: sched addr mismatch: alive: 0x%x prph: 0x%x\n",
 		    __func__, sc->sched_base, base);
 	}
 
 	iwm_ict_reset(sc);
 
 	/* Clear TX scheduler state in SRAM. */
 	nwords = (IWM_SCD_TRANS_TBL_MEM_UPPER_BOUND -
 	    IWM_SCD_CONTEXT_MEM_LOWER_BOUND)
 	    / sizeof(uint32_t);
 	error = iwm_write_mem(sc,
 	    sc->sched_base + IWM_SCD_CONTEXT_MEM_LOWER_BOUND,
 	    NULL, nwords);
 	if (error)
 		goto out;
 
 	/* Set physical address of TX scheduler rings (1KB aligned). */
 	iwm_write_prph(sc, IWM_SCD_DRAM_BASE_ADDR, sc->sched_dma.paddr >> 10);
 
 	iwm_write_prph(sc, IWM_SCD_CHAINEXT_EN, 0);
 
 	iwm_nic_unlock(sc);
 
 	/* enable command channel */
 	error = iwm_enable_txq(sc, 0 /* unused */, IWM_MVM_CMD_QUEUE, 7);
 	if (error)
 		return error;
 
 	if (!iwm_nic_lock(sc))
 		return EBUSY;
 
 	iwm_write_prph(sc, IWM_SCD_TXFACT, 0xff);
 
 	/* Enable DMA channels. */
 	for (chnl = 0; chnl < IWM_FH_TCSR_CHNL_NUM; chnl++) {
 		IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(chnl),
 		    IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE |
 		    IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE);
 	}
 
 	IWM_SETBITS(sc, IWM_FH_TX_CHICKEN_BITS_REG,
 	    IWM_FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN);
 
 	/* Enable L1-Active */
 	if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000) {
 		iwm_clear_bits_prph(sc, IWM_APMG_PCIDEV_STT_REG,
 		    IWM_APMG_PCIDEV_STT_VAL_L1_ACT_DIS);
 	}
 
  out:
 	iwm_nic_unlock(sc);
 	return error;
 }
 
 /*
  * NVM read access and content parsing.  We do not support
  * external NVM or writing NVM.
  * iwlwifi/mvm/nvm.c
  */
 
 /* list of NVM sections we are allowed/need to read */
 const int nvm_to_read[] = {
 	IWM_NVM_SECTION_TYPE_HW,
 	IWM_NVM_SECTION_TYPE_SW,
 	IWM_NVM_SECTION_TYPE_REGULATORY,
 	IWM_NVM_SECTION_TYPE_CALIBRATION,
 	IWM_NVM_SECTION_TYPE_PRODUCTION,
 	IWM_NVM_SECTION_TYPE_HW_8000,
 	IWM_NVM_SECTION_TYPE_MAC_OVERRIDE,
 	IWM_NVM_SECTION_TYPE_PHY_SKU,
 };
 
 /* Default NVM size to read */
 #define IWM_NVM_DEFAULT_CHUNK_SIZE	(2*1024)
 #define IWM_MAX_NVM_SECTION_SIZE	8192
 
 #define IWM_NVM_WRITE_OPCODE 1
 #define IWM_NVM_READ_OPCODE 0
 
 /* load nvm chunk response */
 #define IWM_READ_NVM_CHUNK_SUCCEED		0
 #define IWM_READ_NVM_CHUNK_INVALID_ADDRESS	1
 
 static int
 iwm_nvm_read_chunk(struct iwm_softc *sc, uint16_t section,
 	uint16_t offset, uint16_t length, uint8_t *data, uint16_t *len)
 {
 	offset = 0;
 	struct iwm_nvm_access_cmd nvm_access_cmd = {
 		.offset = htole16(offset),
 		.length = htole16(length),
 		.type = htole16(section),
 		.op_code = IWM_NVM_READ_OPCODE,
 	};
 	struct iwm_nvm_access_resp *nvm_resp;
 	struct iwm_rx_packet *pkt;
 	struct iwm_host_cmd cmd = {
 		.id = IWM_NVM_ACCESS_CMD,
 		.flags = IWM_CMD_SYNC | IWM_CMD_WANT_SKB |
 		    IWM_CMD_SEND_IN_RFKILL,
 		.data = { &nvm_access_cmd, },
 	};
 	int ret, offset_read;
 	size_t bytes_read;
 	uint8_t *resp_data;
 
 	cmd.len[0] = sizeof(struct iwm_nvm_access_cmd);
 
 	ret = iwm_send_cmd(sc, &cmd);
 	if (ret) {
 		device_printf(sc->sc_dev,
 		    "Could not send NVM_ACCESS command (error=%d)\n", ret);
 		return ret;
 	}
 
 	pkt = cmd.resp_pkt;
 	if (pkt->hdr.flags & IWM_CMD_FAILED_MSK) {
 		device_printf(sc->sc_dev,
 		    "Bad return from IWM_NVM_ACCES_COMMAND (0x%08X)\n",
 		    pkt->hdr.flags);
 		ret = EIO;
 		goto exit;
 	}
 
 	/* Extract NVM response */
 	nvm_resp = (void *)pkt->data;
 
 	ret = le16toh(nvm_resp->status);
 	bytes_read = le16toh(nvm_resp->length);
 	offset_read = le16toh(nvm_resp->offset);
 	resp_data = nvm_resp->data;
 	if (ret) {
 		IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 		    "NVM access command failed with status %d\n", ret);
 		ret = EINVAL;
 		goto exit;
 	}
 
 	if (offset_read != offset) {
 		device_printf(sc->sc_dev,
 		    "NVM ACCESS response with invalid offset %d\n",
 		    offset_read);
 		ret = EINVAL;
 		goto exit;
 	}
 
 	if (bytes_read > length) {
 		device_printf(sc->sc_dev,
 		    "NVM ACCESS response with too much data "
 		    "(%d bytes requested, %zd bytes received)\n",
 		    length, bytes_read);
 		ret = EINVAL;
 		goto exit;
 	}
 
 	memcpy(data + offset, resp_data, bytes_read);
 	*len = bytes_read;
 
  exit:
 	iwm_free_resp(sc, &cmd);
 	return ret;
 }
 
 /*
  * Reads an NVM section completely.
  * NICs prior to 7000 family don't have a real NVM, but just read
  * section 0 which is the EEPROM. Because the EEPROM reading is unlimited
  * by uCode, we need to manually check in this case that we don't
  * overflow and try to read more than the EEPROM size.
  * For 7000 family NICs, we supply the maximal size we can read, and
  * the uCode fills the response with as much data as we can,
  * without overflowing, so no check is needed.
  */
 static int
 iwm_nvm_read_section(struct iwm_softc *sc,
 	uint16_t section, uint8_t *data, uint16_t *len, size_t max_len)
 {
 	uint16_t chunklen, seglen;
 	int error = 0;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 	    "reading NVM section %d\n", section);
 
 	chunklen = seglen = IWM_NVM_DEFAULT_CHUNK_SIZE;
 	*len = 0;
 
 	/* Read NVM chunks until exhausted (reading less than requested) */
 	while (seglen == chunklen && *len < max_len) {
 		error = iwm_nvm_read_chunk(sc,
 		    section, *len, chunklen, data, &seglen);
 		if (error) {
 			IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 			    "Cannot read from NVM section "
 			    "%d at offset %d\n", section, *len);
 			return error;
 		}
 		*len += seglen;
 	}
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 	    "NVM section %d read completed (%d bytes, error=%d)\n",
 	    section, *len, error);
 	return error;
 }
 
 /*
  * BEGIN IWM_NVM_PARSE
  */
 
 /* iwlwifi/iwl-nvm-parse.c */
 
 /* NVM offsets (in words) definitions */
 enum iwm_nvm_offsets {
 	/* NVM HW-Section offset (in words) definitions */
 	IWM_HW_ADDR = 0x15,
 
 /* NVM SW-Section offset (in words) definitions */
 	IWM_NVM_SW_SECTION = 0x1C0,
 	IWM_NVM_VERSION = 0,
 	IWM_RADIO_CFG = 1,
 	IWM_SKU = 2,
 	IWM_N_HW_ADDRS = 3,
 	IWM_NVM_CHANNELS = 0x1E0 - IWM_NVM_SW_SECTION,
 
 /* NVM calibration section offset (in words) definitions */
 	IWM_NVM_CALIB_SECTION = 0x2B8,
 	IWM_XTAL_CALIB = 0x316 - IWM_NVM_CALIB_SECTION
 };
 
 enum iwm_8000_nvm_offsets {
 	/* NVM HW-Section offset (in words) definitions */
 	IWM_HW_ADDR0_WFPM_8000 = 0x12,
 	IWM_HW_ADDR1_WFPM_8000 = 0x16,
 	IWM_HW_ADDR0_PCIE_8000 = 0x8A,
 	IWM_HW_ADDR1_PCIE_8000 = 0x8E,
 	IWM_MAC_ADDRESS_OVERRIDE_8000 = 1,
 
 	/* NVM SW-Section offset (in words) definitions */
 	IWM_NVM_SW_SECTION_8000 = 0x1C0,
 	IWM_NVM_VERSION_8000 = 0,
 	IWM_RADIO_CFG_8000 = 0,
 	IWM_SKU_8000 = 2,
 	IWM_N_HW_ADDRS_8000 = 3,
 
 	/* NVM REGULATORY -Section offset (in words) definitions */
 	IWM_NVM_CHANNELS_8000 = 0,
 	IWM_NVM_LAR_OFFSET_8000_OLD = 0x4C7,
 	IWM_NVM_LAR_OFFSET_8000 = 0x507,
 	IWM_NVM_LAR_ENABLED_8000 = 0x7,
 
 	/* NVM calibration section offset (in words) definitions */
 	IWM_NVM_CALIB_SECTION_8000 = 0x2B8,
 	IWM_XTAL_CALIB_8000 = 0x316 - IWM_NVM_CALIB_SECTION_8000
 };
 
 /* SKU Capabilities (actual values from NVM definition) */
 enum nvm_sku_bits {
 	IWM_NVM_SKU_CAP_BAND_24GHZ	= (1 << 0),
 	IWM_NVM_SKU_CAP_BAND_52GHZ	= (1 << 1),
 	IWM_NVM_SKU_CAP_11N_ENABLE	= (1 << 2),
 	IWM_NVM_SKU_CAP_11AC_ENABLE	= (1 << 3),
 };
 
 /* radio config bits (actual values from NVM definition) */
 #define IWM_NVM_RF_CFG_DASH_MSK(x)   (x & 0x3)         /* bits 0-1   */
 #define IWM_NVM_RF_CFG_STEP_MSK(x)   ((x >> 2)  & 0x3) /* bits 2-3   */
 #define IWM_NVM_RF_CFG_TYPE_MSK(x)   ((x >> 4)  & 0x3) /* bits 4-5   */
 #define IWM_NVM_RF_CFG_PNUM_MSK(x)   ((x >> 6)  & 0x3) /* bits 6-7   */
 #define IWM_NVM_RF_CFG_TX_ANT_MSK(x) ((x >> 8)  & 0xF) /* bits 8-11  */
 #define IWM_NVM_RF_CFG_RX_ANT_MSK(x) ((x >> 12) & 0xF) /* bits 12-15 */
 
 #define IWM_NVM_RF_CFG_FLAVOR_MSK_8000(x)	(x & 0xF)
 #define IWM_NVM_RF_CFG_DASH_MSK_8000(x)		((x >> 4) & 0xF)
 #define IWM_NVM_RF_CFG_STEP_MSK_8000(x)		((x >> 8) & 0xF)
 #define IWM_NVM_RF_CFG_TYPE_MSK_8000(x)		((x >> 12) & 0xFFF)
 #define IWM_NVM_RF_CFG_TX_ANT_MSK_8000(x)	((x >> 24) & 0xF)
 #define IWM_NVM_RF_CFG_RX_ANT_MSK_8000(x)	((x >> 28) & 0xF)
 
 #define DEFAULT_MAX_TX_POWER 16
 
 /**
  * enum iwm_nvm_channel_flags - channel flags in NVM
  * @IWM_NVM_CHANNEL_VALID: channel is usable for this SKU/geo
  * @IWM_NVM_CHANNEL_IBSS: usable as an IBSS channel
  * @IWM_NVM_CHANNEL_ACTIVE: active scanning allowed
  * @IWM_NVM_CHANNEL_RADAR: radar detection required
  * XXX cannot find this (DFS) flag in iwl-nvm-parse.c
  * @IWM_NVM_CHANNEL_DFS: dynamic freq selection candidate
  * @IWM_NVM_CHANNEL_WIDE: 20 MHz channel okay (?)
  * @IWM_NVM_CHANNEL_40MHZ: 40 MHz channel okay (?)
  * @IWM_NVM_CHANNEL_80MHZ: 80 MHz channel okay (?)
  * @IWM_NVM_CHANNEL_160MHZ: 160 MHz channel okay (?)
  */
 enum iwm_nvm_channel_flags {
 	IWM_NVM_CHANNEL_VALID = (1 << 0),
 	IWM_NVM_CHANNEL_IBSS = (1 << 1),
 	IWM_NVM_CHANNEL_ACTIVE = (1 << 3),
 	IWM_NVM_CHANNEL_RADAR = (1 << 4),
 	IWM_NVM_CHANNEL_DFS = (1 << 7),
 	IWM_NVM_CHANNEL_WIDE = (1 << 8),
 	IWM_NVM_CHANNEL_40MHZ = (1 << 9),
 	IWM_NVM_CHANNEL_80MHZ = (1 << 10),
 	IWM_NVM_CHANNEL_160MHZ = (1 << 11),
 };
 
 /*
  * Translate EEPROM flags to net80211.
  */
 static uint32_t
 iwm_eeprom_channel_flags(uint16_t ch_flags)
 {
 	uint32_t nflags;
 
 	nflags = 0;
 	if ((ch_flags & IWM_NVM_CHANNEL_ACTIVE) == 0)
 		nflags |= IEEE80211_CHAN_PASSIVE;
 	if ((ch_flags & IWM_NVM_CHANNEL_IBSS) == 0)
 		nflags |= IEEE80211_CHAN_NOADHOC;
 	if (ch_flags & IWM_NVM_CHANNEL_RADAR) {
 		nflags |= IEEE80211_CHAN_DFS;
 		/* Just in case. */
 		nflags |= IEEE80211_CHAN_NOADHOC;
 	}
 
 	return (nflags);
 }
 
 static void
 iwm_add_channel_band(struct iwm_softc *sc, struct ieee80211_channel chans[],
     int maxchans, int *nchans, int ch_idx, size_t ch_num,
     const uint8_t bands[])
 {
 	const uint16_t * const nvm_ch_flags = sc->sc_nvm.nvm_ch_flags;
 	uint32_t nflags;
 	uint16_t ch_flags;
 	uint8_t ieee;
 	int error;
 
 	for (; ch_idx < ch_num; ch_idx++) {
 		ch_flags = le16_to_cpup(nvm_ch_flags + ch_idx);
 		if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000)
 			ieee = iwm_nvm_channels[ch_idx];
 		else
 			ieee = iwm_nvm_channels_8000[ch_idx];
 
 		if (!(ch_flags & IWM_NVM_CHANNEL_VALID)) {
 			IWM_DPRINTF(sc, IWM_DEBUG_EEPROM,
 			    "Ch. %d Flags %x [%sGHz] - No traffic\n",
 			    ieee, ch_flags,
 			    (ch_idx >= IWM_NUM_2GHZ_CHANNELS) ?
 			    "5.2" : "2.4");
 			continue;
 		}
 
 		nflags = iwm_eeprom_channel_flags(ch_flags);
 		error = ieee80211_add_channel(chans, maxchans, nchans,
 		    ieee, 0, 0, nflags, bands);
 		if (error != 0)
 			break;
 
 		IWM_DPRINTF(sc, IWM_DEBUG_EEPROM,
 		    "Ch. %d Flags %x [%sGHz] - Added\n",
 		    ieee, ch_flags,
 		    (ch_idx >= IWM_NUM_2GHZ_CHANNELS) ?
 		    "5.2" : "2.4");
 	}
 }
 
 static void
 iwm_init_channel_map(struct ieee80211com *ic, int maxchans, int *nchans,
     struct ieee80211_channel chans[])
 {
 	struct iwm_softc *sc = ic->ic_softc;
 	struct iwm_nvm_data *data = &sc->sc_nvm;
 	uint8_t bands[IEEE80211_MODE_BYTES];
 	size_t ch_num;
 
 	memset(bands, 0, sizeof(bands));
 	/* 1-13: 11b/g channels. */
 	setbit(bands, IEEE80211_MODE_11B);
 	setbit(bands, IEEE80211_MODE_11G);
 	iwm_add_channel_band(sc, chans, maxchans, nchans, 0,
 	    IWM_NUM_2GHZ_CHANNELS - 1, bands);
 
 	/* 14: 11b channel only. */
 	clrbit(bands, IEEE80211_MODE_11G);
 	iwm_add_channel_band(sc, chans, maxchans, nchans,
 	    IWM_NUM_2GHZ_CHANNELS - 1, IWM_NUM_2GHZ_CHANNELS, bands);
 
 	if (data->sku_cap_band_52GHz_enable) {
 		if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000)
 			ch_num = nitems(iwm_nvm_channels);
 		else
 			ch_num = nitems(iwm_nvm_channels_8000);
 		memset(bands, 0, sizeof(bands));
 		setbit(bands, IEEE80211_MODE_11A);
 		iwm_add_channel_band(sc, chans, maxchans, nchans,
 		    IWM_NUM_2GHZ_CHANNELS, ch_num, bands);
 	}
 }
 
 static void
 iwm_set_hw_address_8000(struct iwm_softc *sc, struct iwm_nvm_data *data,
 	const uint16_t *mac_override, const uint16_t *nvm_hw)
 {
 	const uint8_t *hw_addr;
 
 	if (mac_override) {
 		static const uint8_t reserved_mac[] = {
 			0x02, 0xcc, 0xaa, 0xff, 0xee, 0x00
 		};
 
 		hw_addr = (const uint8_t *)(mac_override +
 				 IWM_MAC_ADDRESS_OVERRIDE_8000);
 
 		/*
 		 * Store the MAC address from MAO section.
 		 * No byte swapping is required in MAO section
 		 */
 		IEEE80211_ADDR_COPY(data->hw_addr, hw_addr);
 
 		/*
 		 * Force the use of the OTP MAC address in case of reserved MAC
 		 * address in the NVM, or if address is given but invalid.
 		 */
 		if (!IEEE80211_ADDR_EQ(reserved_mac, hw_addr) &&
 		    !IEEE80211_ADDR_EQ(ieee80211broadcastaddr, data->hw_addr) &&
 		    iwm_is_valid_ether_addr(data->hw_addr) &&
 		    !IEEE80211_IS_MULTICAST(data->hw_addr))
 			return;
 
 		IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 		    "%s: mac address from nvm override section invalid\n",
 		    __func__);
 	}
 
 	if (nvm_hw) {
 		/* read the mac address from WFMP registers */
 		uint32_t mac_addr0 =
 		    htole32(iwm_read_prph(sc, IWM_WFMP_MAC_ADDR_0));
 		uint32_t mac_addr1 =
 		    htole32(iwm_read_prph(sc, IWM_WFMP_MAC_ADDR_1));
 
 		hw_addr = (const uint8_t *)&mac_addr0;
 		data->hw_addr[0] = hw_addr[3];
 		data->hw_addr[1] = hw_addr[2];
 		data->hw_addr[2] = hw_addr[1];
 		data->hw_addr[3] = hw_addr[0];
 
 		hw_addr = (const uint8_t *)&mac_addr1;
 		data->hw_addr[4] = hw_addr[1];
 		data->hw_addr[5] = hw_addr[0];
 
 		return;
 	}
 
 	device_printf(sc->sc_dev, "%s: mac address not found\n", __func__);
 	memset(data->hw_addr, 0, sizeof(data->hw_addr));
 }
 
 static int
 iwm_get_sku(const struct iwm_softc *sc, const uint16_t *nvm_sw,
 	    const uint16_t *phy_sku)
 {
 	if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000)
 		return le16_to_cpup(nvm_sw + IWM_SKU);
 
 	return le32_to_cpup((const uint32_t *)(phy_sku + IWM_SKU_8000));
 }
 
 static int
 iwm_get_nvm_version(const struct iwm_softc *sc, const uint16_t *nvm_sw)
 {
 	if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000)
 		return le16_to_cpup(nvm_sw + IWM_NVM_VERSION);
 	else
 		return le32_to_cpup((const uint32_t *)(nvm_sw +
 						IWM_NVM_VERSION_8000));
 }
 
 static int
 iwm_get_radio_cfg(const struct iwm_softc *sc, const uint16_t *nvm_sw,
 		  const uint16_t *phy_sku)
 {
         if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000)
                 return le16_to_cpup(nvm_sw + IWM_RADIO_CFG);
 
         return le32_to_cpup((const uint32_t *)(phy_sku + IWM_RADIO_CFG_8000));
 }
 
 static int
 iwm_get_n_hw_addrs(const struct iwm_softc *sc, const uint16_t *nvm_sw)
 {
 	int n_hw_addr;
 
 	if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000)
 		return le16_to_cpup(nvm_sw + IWM_N_HW_ADDRS);
 
 	n_hw_addr = le32_to_cpup((const uint32_t *)(nvm_sw + IWM_N_HW_ADDRS_8000));
 
         return n_hw_addr & IWM_N_HW_ADDR_MASK;
 }
 
 static void
 iwm_set_radio_cfg(const struct iwm_softc *sc, struct iwm_nvm_data *data,
 		  uint32_t radio_cfg)
 {
 	if (sc->sc_device_family != IWM_DEVICE_FAMILY_8000) {
 		data->radio_cfg_type = IWM_NVM_RF_CFG_TYPE_MSK(radio_cfg);
 		data->radio_cfg_step = IWM_NVM_RF_CFG_STEP_MSK(radio_cfg);
 		data->radio_cfg_dash = IWM_NVM_RF_CFG_DASH_MSK(radio_cfg);
 		data->radio_cfg_pnum = IWM_NVM_RF_CFG_PNUM_MSK(radio_cfg);
 		return;
 	}
 
 	/* set the radio configuration for family 8000 */
 	data->radio_cfg_type = IWM_NVM_RF_CFG_TYPE_MSK_8000(radio_cfg);
 	data->radio_cfg_step = IWM_NVM_RF_CFG_STEP_MSK_8000(radio_cfg);
 	data->radio_cfg_dash = IWM_NVM_RF_CFG_DASH_MSK_8000(radio_cfg);
 	data->radio_cfg_pnum = IWM_NVM_RF_CFG_FLAVOR_MSK_8000(radio_cfg);
 	data->valid_tx_ant = IWM_NVM_RF_CFG_TX_ANT_MSK_8000(radio_cfg);
 	data->valid_rx_ant = IWM_NVM_RF_CFG_RX_ANT_MSK_8000(radio_cfg);
 }
 
 static int
 iwm_parse_nvm_data(struct iwm_softc *sc,
 		   const uint16_t *nvm_hw, const uint16_t *nvm_sw,
 		   const uint16_t *nvm_calib, const uint16_t *mac_override,
 		   const uint16_t *phy_sku, const uint16_t *regulatory)
 {
 	struct iwm_nvm_data *data = &sc->sc_nvm;
 	uint8_t hw_addr[IEEE80211_ADDR_LEN];
 	uint32_t sku, radio_cfg;
 
 	data->nvm_version = iwm_get_nvm_version(sc, nvm_sw);
 
 	radio_cfg = iwm_get_radio_cfg(sc, nvm_sw, phy_sku);
 	iwm_set_radio_cfg(sc, data, radio_cfg);
 
 	sku = iwm_get_sku(sc, nvm_sw, phy_sku);
 	data->sku_cap_band_24GHz_enable = sku & IWM_NVM_SKU_CAP_BAND_24GHZ;
 	data->sku_cap_band_52GHz_enable = sku & IWM_NVM_SKU_CAP_BAND_52GHZ;
 	data->sku_cap_11n_enable = 0;
 
 	data->n_hw_addrs = iwm_get_n_hw_addrs(sc, nvm_sw);
 
 	/* The byte order is little endian 16 bit, meaning 214365 */
 	if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) {
 		IEEE80211_ADDR_COPY(hw_addr, nvm_hw + IWM_HW_ADDR);
 		data->hw_addr[0] = hw_addr[1];
 		data->hw_addr[1] = hw_addr[0];
 		data->hw_addr[2] = hw_addr[3];
 		data->hw_addr[3] = hw_addr[2];
 		data->hw_addr[4] = hw_addr[5];
 		data->hw_addr[5] = hw_addr[4];
 	} else {
 		iwm_set_hw_address_8000(sc, data, mac_override, nvm_hw);
 	}
 
 	if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) {
 		memcpy(data->nvm_ch_flags, &nvm_sw[IWM_NVM_CHANNELS],
 		    IWM_NUM_CHANNELS * sizeof(uint16_t));
 	} else {
 		memcpy(data->nvm_ch_flags, &regulatory[IWM_NVM_CHANNELS_8000],
 		    IWM_NUM_CHANNELS_8000 * sizeof(uint16_t));
 	}
 	data->calib_version = 255;   /* TODO:
 					this value will prevent some checks from
 					failing, we need to check if this
 					field is still needed, and if it does,
 					where is it in the NVM */
 
 	return 0;
 }
 
 /*
  * END NVM PARSE
  */
 
 static int
 iwm_parse_nvm_sections(struct iwm_softc *sc, struct iwm_nvm_section *sections)
 {
 	const uint16_t *hw, *sw, *calib, *regulatory, *mac_override, *phy_sku;
 
 	/* Checking for required sections */
 	if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) {
 		if (!sections[IWM_NVM_SECTION_TYPE_SW].data ||
 		    !sections[IWM_NVM_SECTION_TYPE_HW].data) {
 			device_printf(sc->sc_dev,
 			    "Can't parse empty OTP/NVM sections\n");
 			return ENOENT;
 		}
 
 		hw = (const uint16_t *) sections[IWM_NVM_SECTION_TYPE_HW].data;
 	} else if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000) {
 		/* SW and REGULATORY sections are mandatory */
 		if (!sections[IWM_NVM_SECTION_TYPE_SW].data ||
 		    !sections[IWM_NVM_SECTION_TYPE_REGULATORY].data) {
 			device_printf(sc->sc_dev,
 			    "Can't parse empty OTP/NVM sections\n");
 			return ENOENT;
 		}
 		/* MAC_OVERRIDE or at least HW section must exist */
 		if (!sections[IWM_NVM_SECTION_TYPE_HW_8000].data &&
 		    !sections[IWM_NVM_SECTION_TYPE_MAC_OVERRIDE].data) {
 			device_printf(sc->sc_dev,
 			    "Can't parse mac_address, empty sections\n");
 			return ENOENT;
 		}
 
 		/* PHY_SKU section is mandatory in B0 */
 		if (!sections[IWM_NVM_SECTION_TYPE_PHY_SKU].data) {
 			device_printf(sc->sc_dev,
 			    "Can't parse phy_sku in B0, empty sections\n");
 			return ENOENT;
 		}
 
 		hw = (const uint16_t *)
 		    sections[IWM_NVM_SECTION_TYPE_HW_8000].data;
 	} else {
 		panic("unknown device family %d\n", sc->sc_device_family);
 	}
 
 	sw = (const uint16_t *)sections[IWM_NVM_SECTION_TYPE_SW].data;
 	calib = (const uint16_t *)
 	    sections[IWM_NVM_SECTION_TYPE_CALIBRATION].data;
 	regulatory = (const uint16_t *)
 	    sections[IWM_NVM_SECTION_TYPE_REGULATORY].data;
 	mac_override = (const uint16_t *)
 	    sections[IWM_NVM_SECTION_TYPE_MAC_OVERRIDE].data;
 	phy_sku = (const uint16_t *)sections[IWM_NVM_SECTION_TYPE_PHY_SKU].data;
 
 	return iwm_parse_nvm_data(sc, hw, sw, calib, mac_override,
 	    phy_sku, regulatory);
 }
 
 static int
 iwm_nvm_init(struct iwm_softc *sc)
 {
 	struct iwm_nvm_section nvm_sections[IWM_NVM_NUM_OF_SECTIONS];
 	int i, section, error;
 	uint16_t len;
 	uint8_t *buf;
 	const size_t bufsz = IWM_MAX_NVM_SECTION_SIZE;
 
 	memset(nvm_sections, 0 , sizeof(nvm_sections));
 
 	buf = malloc(bufsz, M_DEVBUF, M_NOWAIT);
 	if (buf == NULL)
 		return ENOMEM;
 
 	for (i = 0; i < nitems(nvm_to_read); i++) {
 		section = nvm_to_read[i];
 		KASSERT(section <= nitems(nvm_sections),
 		    ("too many sections"));
 
 		error = iwm_nvm_read_section(sc, section, buf, &len, bufsz);
 		if (error) {
 			error = 0;
 			continue;
 		}
 		nvm_sections[section].data = malloc(len, M_DEVBUF, M_NOWAIT);
 		if (nvm_sections[section].data == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		memcpy(nvm_sections[section].data, buf, len);
 		nvm_sections[section].length = len;
 	}
 	free(buf, M_DEVBUF);
 	if (error == 0)
 		error = iwm_parse_nvm_sections(sc, nvm_sections);
 
 	for (i = 0; i < IWM_NVM_NUM_OF_SECTIONS; i++) {
 		if (nvm_sections[i].data != NULL)
 			free(nvm_sections[i].data, M_DEVBUF);
 	}
 
 	return error;
 }
 
 /*
  * Firmware loading gunk.  This is kind of a weird hybrid between the
  * iwn driver and the Linux iwlwifi driver.
  */
 
 static int
 iwm_firmware_load_sect(struct iwm_softc *sc, uint32_t dst_addr,
 	const uint8_t *section, uint32_t byte_cnt)
 {
 	int error = EINVAL;
 	uint32_t chunk_sz, offset;
 
 	chunk_sz = MIN(IWM_FH_MEM_TB_MAX_LENGTH, byte_cnt);
 
 	for (offset = 0; offset < byte_cnt; offset += chunk_sz) {
 		uint32_t addr, len;
 		const uint8_t *data;
 
 		addr = dst_addr + offset;
 		len = MIN(chunk_sz, byte_cnt - offset);
 		data = section + offset;
 
 		error = iwm_firmware_load_chunk(sc, addr, data, len);
 		if (error)
 			break;
 	}
 
 	return error;
 }
 
 static int
 iwm_firmware_load_chunk(struct iwm_softc *sc, uint32_t dst_addr,
 	const uint8_t *chunk, uint32_t byte_cnt)
 {
 	struct iwm_dma_info *dma = &sc->fw_dma;
 	int error;
 
 	/* Copy firmware chunk into pre-allocated DMA-safe memory. */
 	memcpy(dma->vaddr, chunk, byte_cnt);
 	bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREWRITE);
 
 	if (dst_addr >= IWM_FW_MEM_EXTENDED_START &&
 	    dst_addr <= IWM_FW_MEM_EXTENDED_END) {
 		iwm_set_bits_prph(sc, IWM_LMPM_CHICK,
 		    IWM_LMPM_CHICK_EXTENDED_ADDR_SPACE);
 	}
 
 	sc->sc_fw_chunk_done = 0;
 
 	if (!iwm_nic_lock(sc))
 		return EBUSY;
 
 	IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(IWM_FH_SRVC_CHNL),
 	    IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_PAUSE);
 	IWM_WRITE(sc, IWM_FH_SRVC_CHNL_SRAM_ADDR_REG(IWM_FH_SRVC_CHNL),
 	    dst_addr);
 	IWM_WRITE(sc, IWM_FH_TFDIB_CTRL0_REG(IWM_FH_SRVC_CHNL),
 	    dma->paddr & IWM_FH_MEM_TFDIB_DRAM_ADDR_LSB_MSK);
 	IWM_WRITE(sc, IWM_FH_TFDIB_CTRL1_REG(IWM_FH_SRVC_CHNL),
 	    (iwm_get_dma_hi_addr(dma->paddr)
 	      << IWM_FH_MEM_TFDIB_REG1_ADDR_BITSHIFT) | byte_cnt);
 	IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_BUF_STS_REG(IWM_FH_SRVC_CHNL),
 	    1 << IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_NUM |
 	    1 << IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_POS_TB_IDX |
 	    IWM_FH_TCSR_CHNL_TX_BUF_STS_REG_VAL_TFDB_VALID);
 	IWM_WRITE(sc, IWM_FH_TCSR_CHNL_TX_CONFIG_REG(IWM_FH_SRVC_CHNL),
 	    IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE    |
 	    IWM_FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_DISABLE |
 	    IWM_FH_TCSR_TX_CONFIG_REG_VAL_CIRQ_HOST_ENDTFD);
 
 	iwm_nic_unlock(sc);
 
 	/* wait 1s for this segment to load */
 	while (!sc->sc_fw_chunk_done)
 		if ((error = msleep(&sc->sc_fw, &sc->sc_mtx, 0, "iwmfw", hz)) != 0)
 			break;
 
 	if (!sc->sc_fw_chunk_done) {
 		device_printf(sc->sc_dev,
 		    "fw chunk addr 0x%x len %d failed to load\n",
 		    dst_addr, byte_cnt);
 	}
 
 	if (dst_addr >= IWM_FW_MEM_EXTENDED_START &&
 	    dst_addr <= IWM_FW_MEM_EXTENDED_END && iwm_nic_lock(sc)) {
 		iwm_clear_bits_prph(sc, IWM_LMPM_CHICK,
 		    IWM_LMPM_CHICK_EXTENDED_ADDR_SPACE);
 		iwm_nic_unlock(sc);
 	}
 
 	return error;
 }
 
 int
 iwm_load_cpu_sections_8000(struct iwm_softc *sc, struct iwm_fw_sects *fws,
     int cpu, int *first_ucode_section)
 {
 	int shift_param;
 	int i, error = 0, sec_num = 0x1;
 	uint32_t val, last_read_idx = 0;
 	const void *data;
 	uint32_t dlen;
 	uint32_t offset;
 
 	if (cpu == 1) {
 		shift_param = 0;
 		*first_ucode_section = 0;
 	} else {
 		shift_param = 16;
 		(*first_ucode_section)++;
 	}
 
 	for (i = *first_ucode_section; i < IWM_UCODE_SECT_MAX; i++) {
 		last_read_idx = i;
 		data = fws->fw_sect[i].fws_data;
 		dlen = fws->fw_sect[i].fws_len;
 		offset = fws->fw_sect[i].fws_devoff;
 
 		/*
 		 * CPU1_CPU2_SEPARATOR_SECTION delimiter - separate between
 		 * CPU1 to CPU2.
 		 * PAGING_SEPARATOR_SECTION delimiter - separate between
 		 * CPU2 non paged to CPU2 paging sec.
 		 */
 		if (!data || offset == IWM_CPU1_CPU2_SEPARATOR_SECTION ||
 		    offset == IWM_PAGING_SEPARATOR_SECTION)
 			break;
 
 		IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 		    "LOAD FIRMWARE chunk %d offset 0x%x len %d for cpu %d\n",
 		    i, offset, dlen, cpu);
 
 		if (dlen > sc->sc_fwdmasegsz) {
 			IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 			    "chunk %d too large (%d bytes)\n", i, dlen);
 			error = EFBIG;
 		} else {
 			error = iwm_firmware_load_sect(sc, offset, data, dlen);
 		}
 		if (error) {
 			device_printf(sc->sc_dev,
 			    "could not load firmware chunk %d (error %d)\n",
 			    i, error);
 			return error;
 		}
 
 		/* Notify the ucode of the loaded section number and status */
 		if (iwm_nic_lock(sc)) {
 			val = IWM_READ(sc, IWM_FH_UCODE_LOAD_STATUS);
 			val = val | (sec_num << shift_param);
 			IWM_WRITE(sc, IWM_FH_UCODE_LOAD_STATUS, val);
 			sec_num = (sec_num << 1) | 0x1;
 			iwm_nic_unlock(sc);
 
 			/*
 			 * The firmware won't load correctly without this delay.
 			 */
 			DELAY(8000);
 		}
 	}
 
 	*first_ucode_section = last_read_idx;
 
 	if (iwm_nic_lock(sc)) {
 		if (cpu == 1)
 			IWM_WRITE(sc, IWM_FH_UCODE_LOAD_STATUS, 0xFFFF);
 		else
 			IWM_WRITE(sc, IWM_FH_UCODE_LOAD_STATUS, 0xFFFFFFFF);
 		iwm_nic_unlock(sc);
 	}
 
 	return 0;
 }
 
 int
 iwm_load_firmware_8000(struct iwm_softc *sc, enum iwm_ucode_type ucode_type)
 {
 	struct iwm_fw_sects *fws;
 	int error = 0;
 	int first_ucode_section;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET, "loading ucode type %d\n",
 	    ucode_type);
 
 	fws = &sc->sc_fw.fw_sects[ucode_type];
 
 	/* configure the ucode to be ready to get the secured image */
 	/* release CPU reset */
 	iwm_write_prph(sc, IWM_RELEASE_CPU_RESET, IWM_RELEASE_CPU_RESET_BIT);
 
 	/* load to FW the binary Secured sections of CPU1 */
 	error = iwm_load_cpu_sections_8000(sc, fws, 1, &first_ucode_section);
 	if (error)
 		return error;
 
 	/* load to FW the binary sections of CPU2 */
 	return iwm_load_cpu_sections_8000(sc, fws, 2, &first_ucode_section);
 }
 
 static int
 iwm_load_firmware_7000(struct iwm_softc *sc, enum iwm_ucode_type ucode_type)
 {
 	struct iwm_fw_sects *fws;
 	int error, i;
 	const void *data;
 	uint32_t dlen;
 	uint32_t offset;
 
 	sc->sc_uc.uc_intr = 0;
 
 	fws = &sc->sc_fw.fw_sects[ucode_type];
 	for (i = 0; i < fws->fw_count; i++) {
 		data = fws->fw_sect[i].fws_data;
 		dlen = fws->fw_sect[i].fws_len;
 		offset = fws->fw_sect[i].fws_devoff;
 		IWM_DPRINTF(sc, IWM_DEBUG_FIRMWARE_TLV,
 		    "LOAD FIRMWARE type %d offset %u len %d\n",
 		    ucode_type, offset, dlen);
 		if (dlen > sc->sc_fwdmasegsz) {
 			IWM_DPRINTF(sc, IWM_DEBUG_FIRMWARE_TLV,
 			    "chunk %d too large (%d bytes)\n", i, dlen);
 			error = EFBIG;
 		} else {
 			error = iwm_firmware_load_sect(sc, offset, data, dlen);
 		}
 		if (error) {
 			device_printf(sc->sc_dev,
 			    "could not load firmware chunk %u of %u "
 			    "(error=%d)\n", i, fws->fw_count, error);
 			return error;
 		}
 	}
 
 	IWM_WRITE(sc, IWM_CSR_RESET, 0);
 
 	return 0;
 }
 
 static int
 iwm_load_firmware(struct iwm_softc *sc, enum iwm_ucode_type ucode_type)
 {
 	int error, w;
 
 	if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000)
 		error = iwm_load_firmware_8000(sc, ucode_type);
 	else
 		error = iwm_load_firmware_7000(sc, ucode_type);
 	if (error)
 		return error;
 
 	/* wait for the firmware to load */
 	for (w = 0; !sc->sc_uc.uc_intr && w < 10; w++) {
 		error = msleep(&sc->sc_uc, &sc->sc_mtx, 0, "iwmuc", hz/10);
 	}
 	if (error || !sc->sc_uc.uc_ok) {
 		device_printf(sc->sc_dev, "could not load firmware\n");
 		if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000) {
 			device_printf(sc->sc_dev, "cpu1 status: 0x%x\n",
 			    iwm_read_prph(sc, IWM_SB_CPU_1_STATUS));
 			device_printf(sc->sc_dev, "cpu2 status: 0x%x\n",
 			    iwm_read_prph(sc, IWM_SB_CPU_2_STATUS));
 		}
 	}
 
 	/*
 	 * Give the firmware some time to initialize.
 	 * Accessing it too early causes errors.
 	 */
 	msleep(&w, &sc->sc_mtx, 0, "iwmfwinit", hz);
 
 	return error;
 }
 
 /* iwlwifi: pcie/trans.c */
 static int
 iwm_start_fw(struct iwm_softc *sc, enum iwm_ucode_type ucode_type)
 {
 	int error;
 
 	IWM_WRITE(sc, IWM_CSR_INT, ~0);
 
 	if ((error = iwm_nic_init(sc)) != 0) {
 		device_printf(sc->sc_dev, "unable to init nic\n");
 		return error;
 	}
 
 	/* make sure rfkill handshake bits are cleared */
 	IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL);
 	IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR,
 	    IWM_CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
 
 	/* clear (again), then enable host interrupts */
 	IWM_WRITE(sc, IWM_CSR_INT, ~0);
 	iwm_enable_interrupts(sc);
 
 	/* really make sure rfkill handshake bits are cleared */
 	/* maybe we should write a few times more?  just to make sure */
 	IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL);
 	IWM_WRITE(sc, IWM_CSR_UCODE_DRV_GP1_CLR, IWM_CSR_UCODE_SW_BIT_RFKILL);
 
 	/* Load the given image to the HW */
 	return iwm_load_firmware(sc, ucode_type);
 }
 
 static int
 iwm_send_tx_ant_cfg(struct iwm_softc *sc, uint8_t valid_tx_ant)
 {
 	struct iwm_tx_ant_cfg_cmd tx_ant_cmd = {
 		.valid = htole32(valid_tx_ant),
 	};
 
 	return iwm_mvm_send_cmd_pdu(sc, IWM_TX_ANT_CONFIGURATION_CMD,
 	    IWM_CMD_SYNC, sizeof(tx_ant_cmd), &tx_ant_cmd);
 }
 
 /* iwlwifi: mvm/fw.c */
 static int
 iwm_send_phy_cfg_cmd(struct iwm_softc *sc)
 {
 	struct iwm_phy_cfg_cmd phy_cfg_cmd;
 	enum iwm_ucode_type ucode_type = sc->sc_uc_current;
 
 	/* Set parameters */
 	phy_cfg_cmd.phy_cfg = htole32(sc->sc_fw_phy_config);
 	phy_cfg_cmd.calib_control.event_trigger =
 	    sc->sc_default_calib[ucode_type].event_trigger;
 	phy_cfg_cmd.calib_control.flow_trigger =
 	    sc->sc_default_calib[ucode_type].flow_trigger;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_CMD | IWM_DEBUG_RESET,
 	    "Sending Phy CFG command: 0x%x\n", phy_cfg_cmd.phy_cfg);
 	return iwm_mvm_send_cmd_pdu(sc, IWM_PHY_CONFIGURATION_CMD, IWM_CMD_SYNC,
 	    sizeof(phy_cfg_cmd), &phy_cfg_cmd);
 }
 
 static int
 iwm_mvm_load_ucode_wait_alive(struct iwm_softc *sc,
 	enum iwm_ucode_type ucode_type)
 {
 	enum iwm_ucode_type old_type = sc->sc_uc_current;
 	int error;
 
 	if ((error = iwm_read_firmware(sc, ucode_type)) != 0) {
 		device_printf(sc->sc_dev, "iwm_read_firmware: failed %d\n",
 			error);
 		return error;
 	}
 
 	sc->sc_uc_current = ucode_type;
 	error = iwm_start_fw(sc, ucode_type);
 	if (error) {
 		device_printf(sc->sc_dev, "iwm_start_fw: failed %d\n", error);
 		sc->sc_uc_current = old_type;
 		return error;
 	}
 
 	error = iwm_post_alive(sc);
 	if (error) {
 		device_printf(sc->sc_dev, "iwm_fw_alive: failed %d\n", error);
 	}
 	return error;
 }
 
 /*
  * mvm misc bits
  */
 
 /*
  * follows iwlwifi/fw.c
  */
 static int
 iwm_run_init_mvm_ucode(struct iwm_softc *sc, int justnvm)
 {
 	int error;
 
 	/* do not operate with rfkill switch turned on */
 	if ((sc->sc_flags & IWM_FLAG_RFKILL) && !justnvm) {
 		device_printf(sc->sc_dev,
 		    "radio is disabled by hardware switch\n");
 		return EPERM;
 	}
 
 	sc->sc_init_complete = 0;
 	if ((error = iwm_mvm_load_ucode_wait_alive(sc,
 	    IWM_UCODE_TYPE_INIT)) != 0) {
 		device_printf(sc->sc_dev, "failed to load init firmware\n");
 		return error;
 	}
 
 	if (justnvm) {
 		if ((error = iwm_nvm_init(sc)) != 0) {
 			device_printf(sc->sc_dev, "failed to read nvm\n");
 			return error;
 		}
 		IEEE80211_ADDR_COPY(sc->sc_ic.ic_macaddr, sc->sc_nvm.hw_addr);
 
 		return 0;
 	}
 
 	if ((error = iwm_send_bt_init_conf(sc)) != 0) {
 		device_printf(sc->sc_dev,
 		    "failed to send bt coex configuration: %d\n", error);
 		return error;
 	}
 
 	/* Init Smart FIFO. */
 	error = iwm_mvm_sf_config(sc, IWM_SF_INIT_OFF);
 	if (error != 0)
 		return error;
 
 	/* Send TX valid antennas before triggering calibrations */
 	if ((error = iwm_send_tx_ant_cfg(sc, iwm_fw_valid_tx_ant(sc))) != 0) {
 		device_printf(sc->sc_dev,
 		    "failed to send antennas before calibration: %d\n", error);
 		return error;
 	}
 
 	/*
 	 * Send phy configurations command to init uCode
 	 * to start the 16.0 uCode init image internal calibrations.
 	 */
 	if ((error = iwm_send_phy_cfg_cmd(sc)) != 0 ) {
 		device_printf(sc->sc_dev,
 		    "%s: failed to run internal calibration: %d\n",
 		    __func__, error);
 		return error;
 	}
 
 	/*
 	 * Nothing to do but wait for the init complete notification
 	 * from the firmware
 	 */
 	while (!sc->sc_init_complete) {
 		error = msleep(&sc->sc_init_complete, &sc->sc_mtx,
 				 0, "iwminit", 2*hz);
 		if (error) {
 			device_printf(sc->sc_dev, "init complete failed: %d\n",
 				sc->sc_init_complete);
 			break;
 		}
 	}
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET, "init %scomplete\n",
 	    sc->sc_init_complete ? "" : "not ");
 
 	return error;
 }
 
 /*
  * receive side
  */
 
 /* (re)stock rx ring, called at init-time and at runtime */
 static int
 iwm_rx_addbuf(struct iwm_softc *sc, int size, int idx)
 {
 	struct iwm_rx_ring *ring = &sc->rxq;
 	struct iwm_rx_data *data = &ring->data[idx];
 	struct mbuf *m;
 	bus_dmamap_t dmamap = NULL;
 	bus_dma_segment_t seg;
 	int nsegs, error;
 
 	m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, IWM_RBUF_SIZE);
 	if (m == NULL)
 		return ENOBUFS;
 
 	m->m_len = m->m_pkthdr.len = m->m_ext.ext_size;
 	error = bus_dmamap_load_mbuf_sg(ring->data_dmat, ring->spare_map, m,
 	    &seg, &nsegs, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: can't map mbuf, error %d\n", __func__, error);
 		goto fail;
 	}
 
 	if (data->m != NULL)
 		bus_dmamap_unload(ring->data_dmat, data->map);
 
 	/* Swap ring->spare_map with data->map */
 	dmamap = data->map;
 	data->map = ring->spare_map;
 	ring->spare_map = dmamap;
 
 	bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_PREREAD);
 	data->m = m;
 
 	/* Update RX descriptor. */
 	KASSERT((seg.ds_addr & 255) == 0, ("seg.ds_addr not aligned"));
 	ring->desc[idx] = htole32(seg.ds_addr >> 8);
 	bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 
 	return 0;
 fail:
 	m_freem(m);
 	return error;
 }
 
 /* iwlwifi: mvm/rx.c */
 #define IWM_RSSI_OFFSET 50
 static int
 iwm_mvm_calc_rssi(struct iwm_softc *sc, struct iwm_rx_phy_info *phy_info)
 {
 	int rssi_a, rssi_b, rssi_a_dbm, rssi_b_dbm, max_rssi_dbm;
 	uint32_t agc_a, agc_b;
 	uint32_t val;
 
 	val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_AGC_IDX]);
 	agc_a = (val & IWM_OFDM_AGC_A_MSK) >> IWM_OFDM_AGC_A_POS;
 	agc_b = (val & IWM_OFDM_AGC_B_MSK) >> IWM_OFDM_AGC_B_POS;
 
 	val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_RSSI_AB_IDX]);
 	rssi_a = (val & IWM_OFDM_RSSI_INBAND_A_MSK) >> IWM_OFDM_RSSI_A_POS;
 	rssi_b = (val & IWM_OFDM_RSSI_INBAND_B_MSK) >> IWM_OFDM_RSSI_B_POS;
 
 	/*
 	 * dBm = rssi dB - agc dB - constant.
 	 * Higher AGC (higher radio gain) means lower signal.
 	 */
 	rssi_a_dbm = rssi_a - IWM_RSSI_OFFSET - agc_a;
 	rssi_b_dbm = rssi_b - IWM_RSSI_OFFSET - agc_b;
 	max_rssi_dbm = MAX(rssi_a_dbm, rssi_b_dbm);
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RECV,
 	    "Rssi In A %d B %d Max %d AGCA %d AGCB %d\n",
 	    rssi_a_dbm, rssi_b_dbm, max_rssi_dbm, agc_a, agc_b);
 
 	return max_rssi_dbm;
 }
 
 /* iwlwifi: mvm/rx.c */
 /*
  * iwm_mvm_get_signal_strength - use new rx PHY INFO API
  * values are reported by the fw as positive values - need to negate
  * to obtain their dBM.  Account for missing antennas by replacing 0
  * values by -256dBm: practically 0 power and a non-feasible 8 bit value.
  */
 static int
 iwm_mvm_get_signal_strength(struct iwm_softc *sc, struct iwm_rx_phy_info *phy_info)
 {
 	int energy_a, energy_b, energy_c, max_energy;
 	uint32_t val;
 
 	val = le32toh(phy_info->non_cfg_phy[IWM_RX_INFO_ENERGY_ANT_ABC_IDX]);
 	energy_a = (val & IWM_RX_INFO_ENERGY_ANT_A_MSK) >>
 	    IWM_RX_INFO_ENERGY_ANT_A_POS;
 	energy_a = energy_a ? -energy_a : -256;
 	energy_b = (val & IWM_RX_INFO_ENERGY_ANT_B_MSK) >>
 	    IWM_RX_INFO_ENERGY_ANT_B_POS;
 	energy_b = energy_b ? -energy_b : -256;
 	energy_c = (val & IWM_RX_INFO_ENERGY_ANT_C_MSK) >>
 	    IWM_RX_INFO_ENERGY_ANT_C_POS;
 	energy_c = energy_c ? -energy_c : -256;
 	max_energy = MAX(energy_a, energy_b);
 	max_energy = MAX(max_energy, energy_c);
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RECV,
 	    "energy In A %d B %d C %d , and max %d\n",
 	    energy_a, energy_b, energy_c, max_energy);
 
 	return max_energy;
 }
 
 static void
 iwm_mvm_rx_rx_phy_cmd(struct iwm_softc *sc,
 	struct iwm_rx_packet *pkt, struct iwm_rx_data *data)
 {
 	struct iwm_rx_phy_info *phy_info = (void *)pkt->data;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RECV, "received PHY stats\n");
 	bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD);
 
 	memcpy(&sc->sc_last_phy_info, phy_info, sizeof(sc->sc_last_phy_info));
 }
 
 /*
  * Retrieve the average noise (in dBm) among receivers.
  */
 static int
 iwm_get_noise(const struct iwm_mvm_statistics_rx_non_phy *stats)
 {
 	int i, total, nbant, noise;
 
 	total = nbant = noise = 0;
 	for (i = 0; i < 3; i++) {
 		noise = le32toh(stats->beacon_silence_rssi[i]) & 0xff;
 		if (noise) {
 			total += noise;
 			nbant++;
 		}
 	}
 
 	/* There should be at least one antenna but check anyway. */
 	return (nbant == 0) ? -127 : (total / nbant) - 107;
 }
 
 /*
  * iwm_mvm_rx_rx_mpdu - IWM_REPLY_RX_MPDU_CMD handler
  *
  * Handles the actual data of the Rx packet from the fw
  */
 static void
 iwm_mvm_rx_rx_mpdu(struct iwm_softc *sc,
 	struct iwm_rx_packet *pkt, struct iwm_rx_data *data)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct ieee80211_frame *wh;
 	struct ieee80211_node *ni;
 	struct ieee80211_rx_stats rxs;
 	struct mbuf *m;
 	struct iwm_rx_phy_info *phy_info;
 	struct iwm_rx_mpdu_res_start *rx_res;
 	uint32_t len;
 	uint32_t rx_pkt_status;
 	int rssi;
 
 	bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD);
 
 	phy_info = &sc->sc_last_phy_info;
 	rx_res = (struct iwm_rx_mpdu_res_start *)pkt->data;
 	wh = (struct ieee80211_frame *)(pkt->data + sizeof(*rx_res));
 	len = le16toh(rx_res->byte_count);
 	rx_pkt_status = le32toh(*(uint32_t *)(pkt->data + sizeof(*rx_res) + len));
 
 	m = data->m;
 	m->m_data = pkt->data + sizeof(*rx_res);
 	m->m_pkthdr.len = m->m_len = len;
 
 	if (__predict_false(phy_info->cfg_phy_cnt > 20)) {
 		device_printf(sc->sc_dev,
 		    "dsp size out of range [0,20]: %d\n",
 		    phy_info->cfg_phy_cnt);
 		return;
 	}
 
 	if (!(rx_pkt_status & IWM_RX_MPDU_RES_STATUS_CRC_OK) ||
 	    !(rx_pkt_status & IWM_RX_MPDU_RES_STATUS_OVERRUN_OK)) {
 		IWM_DPRINTF(sc, IWM_DEBUG_RECV,
 		    "Bad CRC or FIFO: 0x%08X.\n", rx_pkt_status);
 		return; /* drop */
 	}
 
 	if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_RX_ENERGY_API) {
 		rssi = iwm_mvm_get_signal_strength(sc, phy_info);
 	} else {
 		rssi = iwm_mvm_calc_rssi(sc, phy_info);
 	}
 	rssi = (0 - IWM_MIN_DBM) + rssi;	/* normalize */
 	rssi = MIN(rssi, sc->sc_max_rssi);	/* clip to max. 100% */
 
 	/* replenish ring for the buffer we're going to feed to the sharks */
 	if (iwm_rx_addbuf(sc, IWM_RBUF_SIZE, sc->rxq.cur) != 0) {
 		device_printf(sc->sc_dev, "%s: unable to add more buffers\n",
 		    __func__);
 		return;
 	}
 
 	ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RECV,
 	    "%s: phy_info: channel=%d, flags=0x%08x\n",
 	    __func__,
 	    le16toh(phy_info->channel),
 	    le16toh(phy_info->phy_flags));
 
 	/*
 	 * Populate an RX state struct with the provided information.
 	 */
 	bzero(&rxs, sizeof(rxs));
 	rxs.r_flags |= IEEE80211_R_IEEE | IEEE80211_R_FREQ;
 	rxs.r_flags |= IEEE80211_R_NF | IEEE80211_R_RSSI;
 	rxs.c_ieee = le16toh(phy_info->channel);
 	if (le16toh(phy_info->phy_flags & IWM_RX_RES_PHY_FLAGS_BAND_24)) {
 		rxs.c_freq = ieee80211_ieee2mhz(rxs.c_ieee, IEEE80211_CHAN_2GHZ);
 	} else {
 		rxs.c_freq = ieee80211_ieee2mhz(rxs.c_ieee, IEEE80211_CHAN_5GHZ);
 	}
 	rxs.rssi = rssi - sc->sc_noise;
 	rxs.nf = sc->sc_noise;
 
 	if (ieee80211_radiotap_active_vap(vap)) {
 		struct iwm_rx_radiotap_header *tap = &sc->sc_rxtap;
 
 		tap->wr_flags = 0;
 		if (phy_info->phy_flags & htole16(IWM_PHY_INFO_FLAG_SHPREAMBLE))
 			tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE;
 		tap->wr_chan_freq = htole16(rxs.c_freq);
 		/* XXX only if ic->ic_curchan->ic_ieee == rxs.c_ieee */
 		tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
 		tap->wr_dbm_antsignal = (int8_t)rssi;
 		tap->wr_dbm_antnoise = (int8_t)sc->sc_noise;
 		tap->wr_tsft = phy_info->system_timestamp;
 		switch (phy_info->rate) {
 		/* CCK rates. */
 		case  10: tap->wr_rate =   2; break;
 		case  20: tap->wr_rate =   4; break;
 		case  55: tap->wr_rate =  11; break;
 		case 110: tap->wr_rate =  22; break;
 		/* OFDM rates. */
 		case 0xd: tap->wr_rate =  12; break;
 		case 0xf: tap->wr_rate =  18; break;
 		case 0x5: tap->wr_rate =  24; break;
 		case 0x7: tap->wr_rate =  36; break;
 		case 0x9: tap->wr_rate =  48; break;
 		case 0xb: tap->wr_rate =  72; break;
 		case 0x1: tap->wr_rate =  96; break;
 		case 0x3: tap->wr_rate = 108; break;
 		/* Unknown rate: should not happen. */
 		default:  tap->wr_rate =   0;
 		}
 	}
 
 	IWM_UNLOCK(sc);
 	if (ni != NULL) {
 		IWM_DPRINTF(sc, IWM_DEBUG_RECV, "input m %p\n", m);
 		ieee80211_input_mimo(ni, m, &rxs);
 		ieee80211_free_node(ni);
 	} else {
 		IWM_DPRINTF(sc, IWM_DEBUG_RECV, "inputall m %p\n", m);
 		ieee80211_input_mimo_all(ic, m, &rxs);
 	}
 	IWM_LOCK(sc);
 }
 
 static int
 iwm_mvm_rx_tx_cmd_single(struct iwm_softc *sc, struct iwm_rx_packet *pkt,
 	struct iwm_node *in)
 {
 	struct iwm_mvm_tx_resp *tx_resp = (void *)pkt->data;
 	struct ieee80211_node *ni = &in->in_ni;
 	struct ieee80211vap *vap = ni->ni_vap;
 	int status = le16toh(tx_resp->status.status) & IWM_TX_STATUS_MSK;
 	int failack = tx_resp->failure_frame;
 
 	KASSERT(tx_resp->frame_count == 1, ("too many frames"));
 
 	/* Update rate control statistics. */
 	IWM_DPRINTF(sc, IWM_DEBUG_XMIT, "%s: status=0x%04x, seq=%d, fc=%d, btc=%d, frts=%d, ff=%d, irate=%08x, wmt=%d\n",
 	    __func__,
 	    (int) le16toh(tx_resp->status.status),
 	    (int) le16toh(tx_resp->status.sequence),
 	    tx_resp->frame_count,
 	    tx_resp->bt_kill_count,
 	    tx_resp->failure_rts,
 	    tx_resp->failure_frame,
 	    le32toh(tx_resp->initial_rate),
 	    (int) le16toh(tx_resp->wireless_media_time));
 
 	if (status != IWM_TX_STATUS_SUCCESS &&
 	    status != IWM_TX_STATUS_DIRECT_DONE) {
 		ieee80211_ratectl_tx_complete(vap, ni,
 		    IEEE80211_RATECTL_TX_FAILURE, &failack, NULL);
 		return (1);
 	} else {
 		ieee80211_ratectl_tx_complete(vap, ni,
 		    IEEE80211_RATECTL_TX_SUCCESS, &failack, NULL);
 		return (0);
 	}
 }
 
 static void
 iwm_mvm_rx_tx_cmd(struct iwm_softc *sc,
 	struct iwm_rx_packet *pkt, struct iwm_rx_data *data)
 {
 	struct iwm_cmd_header *cmd_hdr = &pkt->hdr;
 	int idx = cmd_hdr->idx;
 	int qid = cmd_hdr->qid;
 	struct iwm_tx_ring *ring = &sc->txq[qid];
 	struct iwm_tx_data *txd = &ring->data[idx];
 	struct iwm_node *in = txd->in;
 	struct mbuf *m = txd->m;
 	int status;
 
 	KASSERT(txd->done == 0, ("txd not done"));
 	KASSERT(txd->in != NULL, ("txd without node"));
 	KASSERT(txd->m != NULL, ("txd without mbuf"));
 
 	bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);
 
 	sc->sc_tx_timer = 0;
 
 	status = iwm_mvm_rx_tx_cmd_single(sc, pkt, in);
 
 	/* Unmap and free mbuf. */
 	bus_dmamap_sync(ring->data_dmat, txd->map, BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(ring->data_dmat, txd->map);
 
 	IWM_DPRINTF(sc, IWM_DEBUG_XMIT,
 	    "free txd %p, in %p\n", txd, txd->in);
 	txd->done = 1;
 	txd->m = NULL;
 	txd->in = NULL;
 
 	ieee80211_tx_complete(&in->in_ni, m, status);
 
 	if (--ring->queued < IWM_TX_RING_LOMARK) {
 		sc->qfullmsk &= ~(1 << ring->qid);
 		if (sc->qfullmsk == 0) {
 			/*
 			 * Well, we're in interrupt context, but then again
 			 * I guess net80211 does all sorts of stunts in
 			 * interrupt context, so maybe this is no biggie.
 			 */
 			iwm_start(sc);
 		}
 	}
 }
 
 /*
  * transmit side
  */
 
 /*
  * Process a "command done" firmware notification.  This is where we wakeup
  * processes waiting for a synchronous command completion.
  * from if_iwn
  */
 static void
 iwm_cmd_done(struct iwm_softc *sc, struct iwm_rx_packet *pkt)
 {
 	struct iwm_tx_ring *ring = &sc->txq[IWM_MVM_CMD_QUEUE];
 	struct iwm_tx_data *data;
 
 	if (pkt->hdr.qid != IWM_MVM_CMD_QUEUE) {
 		return;	/* Not a command ack. */
 	}
 
 	data = &ring->data[pkt->hdr.idx];
 
 	/* If the command was mapped in an mbuf, free it. */
 	if (data->m != NULL) {
 		bus_dmamap_sync(ring->data_dmat, data->map,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(ring->data_dmat, data->map);
 		m_freem(data->m);
 		data->m = NULL;
 	}
 	wakeup(&ring->desc[pkt->hdr.idx]);
 }
 
 #if 0
 /*
  * necessary only for block ack mode
  */
 void
 iwm_update_sched(struct iwm_softc *sc, int qid, int idx, uint8_t sta_id,
 	uint16_t len)
 {
 	struct iwm_agn_scd_bc_tbl *scd_bc_tbl;
 	uint16_t w_val;
 
 	scd_bc_tbl = sc->sched_dma.vaddr;
 
 	len += 8; /* magic numbers came naturally from paris */
 	if (sc->sc_capaflags & IWM_UCODE_TLV_FLAGS_DW_BC_TABLE)
 		len = roundup(len, 4) / 4;
 
 	w_val = htole16(sta_id << 12 | len);
 
 	/* Update TX scheduler. */
 	scd_bc_tbl[qid].tfd_offset[idx] = w_val;
 	bus_dmamap_sync(sc->sched_dma.tag, sc->sched_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 
 	/* I really wonder what this is ?!? */
 	if (idx < IWM_TFD_QUEUE_SIZE_BC_DUP) {
 		scd_bc_tbl[qid].tfd_offset[IWM_TFD_QUEUE_SIZE_MAX + idx] = w_val;
 		bus_dmamap_sync(sc->sched_dma.tag, sc->sched_dma.map,
 		    BUS_DMASYNC_PREWRITE);
 	}
 }
 #endif
 
 /*
  * Take an 802.11 (non-n) rate, find the relevant rate
  * table entry.  return the index into in_ridx[].
  *
  * The caller then uses that index back into in_ridx
  * to figure out the rate index programmed /into/
  * the firmware for this given node.
  */
 static int
 iwm_tx_rateidx_lookup(struct iwm_softc *sc, struct iwm_node *in,
     uint8_t rate)
 {
 	int i;
 	uint8_t r;
 
 	for (i = 0; i < nitems(in->in_ridx); i++) {
 		r = iwm_rates[in->in_ridx[i]].rate;
 		if (rate == r)
 			return (i);
 	}
 	/* XXX Return the first */
 	/* XXX TODO: have it return the /lowest/ */
 	return (0);
 }
 
 /*
  * Fill in the rate related information for a transmit command.
  */
 static const struct iwm_rate *
 iwm_tx_fill_cmd(struct iwm_softc *sc, struct iwm_node *in,
 	struct ieee80211_frame *wh, struct iwm_tx_cmd *tx)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211_node *ni = &in->in_ni;
 	const struct iwm_rate *rinfo;
 	int type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
 	int ridx, rate_flags;
 
 	tx->rts_retry_limit = IWM_RTS_DFAULT_RETRY_LIMIT;
 	tx->data_retry_limit = IWM_DEFAULT_TX_RETRY;
 
 	/*
 	 * XXX TODO: everything about the rate selection here is terrible!
 	 */
 
 	if (type == IEEE80211_FC0_TYPE_DATA) {
 		int i;
 		/* for data frames, use RS table */
 		(void) ieee80211_ratectl_rate(ni, NULL, 0);
 		i = iwm_tx_rateidx_lookup(sc, in, ni->ni_txrate);
 		ridx = in->in_ridx[i];
 
 		/* This is the index into the programmed table */
 		tx->initial_rate_index = i;
 		tx->tx_flags |= htole32(IWM_TX_CMD_FLG_STA_RATE);
 		IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TXRATE,
 		    "%s: start with i=%d, txrate %d\n",
 		    __func__, i, iwm_rates[ridx].rate);
 	} else {
 		/*
 		 * For non-data, use the lowest supported rate for the given
 		 * operational mode.
 		 *
 		 * Note: there may not be any rate control information available.
 		 * This driver currently assumes if we're transmitting data
 		 * frames, use the rate control table.  Grr.
 		 *
 		 * XXX TODO: use the configured rate for the traffic type!
 		 * XXX TODO: this should be per-vap, not curmode; as we later
 		 * on we'll want to handle off-channel stuff (eg TDLS).
 		 */
 		if (ic->ic_curmode == IEEE80211_MODE_11A) {
 			/*
 			 * XXX this assumes the mode is either 11a or not 11a;
 			 * definitely won't work for 11n.
 			 */
 			ridx = IWM_RIDX_OFDM;
 		} else {
 			ridx = IWM_RIDX_CCK;
 		}
 	}
 
 	rinfo = &iwm_rates[ridx];
 
 	IWM_DPRINTF(sc, IWM_DEBUG_TXRATE, "%s: ridx=%d; rate=%d, CCK=%d\n",
 	    __func__, ridx,
 	    rinfo->rate,
 	    !! (IWM_RIDX_IS_CCK(ridx))
 	    );
 
 	/* XXX TODO: hard-coded TX antenna? */
 	rate_flags = 1 << IWM_RATE_MCS_ANT_POS;
 	if (IWM_RIDX_IS_CCK(ridx))
 		rate_flags |= IWM_RATE_MCS_CCK_MSK;
 	tx->rate_n_flags = htole32(rate_flags | rinfo->plcp);
 
 	return rinfo;
 }
 
 #define TB0_SIZE 16
 static int
 iwm_tx(struct iwm_softc *sc, struct mbuf *m, struct ieee80211_node *ni, int ac)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct iwm_node *in = IWM_NODE(ni);
 	struct iwm_tx_ring *ring;
 	struct iwm_tx_data *data;
 	struct iwm_tfd *desc;
 	struct iwm_device_cmd *cmd;
 	struct iwm_tx_cmd *tx;
 	struct ieee80211_frame *wh;
 	struct ieee80211_key *k = NULL;
 	struct mbuf *m1;
 	const struct iwm_rate *rinfo;
 	uint32_t flags;
 	u_int hdrlen;
 	bus_dma_segment_t *seg, segs[IWM_MAX_SCATTER];
 	int nsegs;
 	uint8_t tid, type;
 	int i, totlen, error, pad;
 
 	wh = mtod(m, struct ieee80211_frame *);
 	hdrlen = ieee80211_anyhdrsize(wh);
 	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
 	tid = 0;
 	ring = &sc->txq[ac];
 	desc = &ring->desc[ring->cur];
 	memset(desc, 0, sizeof(*desc));
 	data = &ring->data[ring->cur];
 
 	/* Fill out iwm_tx_cmd to send to the firmware */
 	cmd = &ring->cmd[ring->cur];
 	cmd->hdr.code = IWM_TX_CMD;
 	cmd->hdr.flags = 0;
 	cmd->hdr.qid = ring->qid;
 	cmd->hdr.idx = ring->cur;
 
 	tx = (void *)cmd->data;
 	memset(tx, 0, sizeof(*tx));
 
 	rinfo = iwm_tx_fill_cmd(sc, in, wh, tx);
 
 	/* Encrypt the frame if need be. */
 	if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
 		/* Retrieve key for TX && do software encryption. */
 		k = ieee80211_crypto_encap(ni, m);
 		if (k == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 		/* 802.11 header may have moved. */
 		wh = mtod(m, struct ieee80211_frame *);
 	}
 
 	if (ieee80211_radiotap_active_vap(vap)) {
 		struct iwm_tx_radiotap_header *tap = &sc->sc_txtap;
 
 		tap->wt_flags = 0;
 		tap->wt_chan_freq = htole16(ni->ni_chan->ic_freq);
 		tap->wt_chan_flags = htole16(ni->ni_chan->ic_flags);
 		tap->wt_rate = rinfo->rate;
 		if (k != NULL)
 			tap->wt_flags |= IEEE80211_RADIOTAP_F_WEP;
 		ieee80211_radiotap_tx(vap, m);
 	}
 
 
 	totlen = m->m_pkthdr.len;
 
 	flags = 0;
 	if (!IEEE80211_IS_MULTICAST(wh->i_addr1)) {
 		flags |= IWM_TX_CMD_FLG_ACK;
 	}
 
 	if (type == IEEE80211_FC0_TYPE_DATA
 	    && (totlen + IEEE80211_CRC_LEN > vap->iv_rtsthreshold)
 	    && !IEEE80211_IS_MULTICAST(wh->i_addr1)) {
 		flags |= IWM_TX_CMD_FLG_PROT_REQUIRE;
 	}
 
 	if (IEEE80211_IS_MULTICAST(wh->i_addr1) ||
 	    type != IEEE80211_FC0_TYPE_DATA)
 		tx->sta_id = sc->sc_aux_sta.sta_id;
 	else
 		tx->sta_id = IWM_STATION_ID;
 
 	if (type == IEEE80211_FC0_TYPE_MGT) {
 		uint8_t subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
 
 		if (subtype == IEEE80211_FC0_SUBTYPE_ASSOC_REQ ||
 		    subtype == IEEE80211_FC0_SUBTYPE_REASSOC_REQ) {
 			tx->pm_frame_timeout = htole16(IWM_PM_FRAME_ASSOC);
 		} else if (subtype == IEEE80211_FC0_SUBTYPE_ACTION) {
 			tx->pm_frame_timeout = htole16(IWM_PM_FRAME_NONE);
 		} else {
 			tx->pm_frame_timeout = htole16(IWM_PM_FRAME_MGMT);
 		}
 	} else {
 		tx->pm_frame_timeout = htole16(IWM_PM_FRAME_NONE);
 	}
 
 	if (hdrlen & 3) {
 		/* First segment length must be a multiple of 4. */
 		flags |= IWM_TX_CMD_FLG_MH_PAD;
 		pad = 4 - (hdrlen & 3);
 	} else
 		pad = 0;
 
 	tx->driver_txop = 0;
 	tx->next_frame_len = 0;
 
 	tx->len = htole16(totlen);
 	tx->tid_tspec = tid;
 	tx->life_time = htole32(IWM_TX_CMD_LIFE_TIME_INFINITE);
 
 	/* Set physical address of "scratch area". */
 	tx->dram_lsb_ptr = htole32(data->scratch_paddr);
 	tx->dram_msb_ptr = iwm_get_dma_hi_addr(data->scratch_paddr);
 
 	/* Copy 802.11 header in TX command. */
 	memcpy(((uint8_t *)tx) + sizeof(*tx), wh, hdrlen);
 
 	flags |= IWM_TX_CMD_FLG_BT_DIS | IWM_TX_CMD_FLG_SEQ_CTL;
 
 	tx->sec_ctl = 0;
 	tx->tx_flags |= htole32(flags);
 
 	/* Trim 802.11 header. */
 	m_adj(m, hdrlen);
 	error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, m,
 	    segs, &nsegs, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		if (error != EFBIG) {
 			device_printf(sc->sc_dev, "can't map mbuf (error %d)\n",
 			    error);
 			m_freem(m);
 			return error;
 		}
 		/* Too many DMA segments, linearize mbuf. */
 		m1 = m_collapse(m, M_NOWAIT, IWM_MAX_SCATTER - 2);
 		if (m1 == NULL) {
 			device_printf(sc->sc_dev,
 			    "%s: could not defrag mbuf\n", __func__);
 			m_freem(m);
 			return (ENOBUFS);
 		}
 		m = m1;
 
 		error = bus_dmamap_load_mbuf_sg(ring->data_dmat, data->map, m,
 		    segs, &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			device_printf(sc->sc_dev, "can't map mbuf (error %d)\n",
 			    error);
 			m_freem(m);
 			return error;
 		}
 	}
 	data->m = m;
 	data->in = in;
 	data->done = 0;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_XMIT,
 	    "sending txd %p, in %p\n", data, data->in);
 	KASSERT(data->in != NULL, ("node is NULL"));
 
 	IWM_DPRINTF(sc, IWM_DEBUG_XMIT,
 	    "sending data: qid=%d idx=%d len=%d nsegs=%d txflags=0x%08x rate_n_flags=0x%08x rateidx=%u\n",
 	    ring->qid, ring->cur, totlen, nsegs,
 	    le32toh(tx->tx_flags),
 	    le32toh(tx->rate_n_flags),
 	    tx->initial_rate_index
 	    );
 
 	/* Fill TX descriptor. */
 	desc->num_tbs = 2 + nsegs;
 
 	desc->tbs[0].lo = htole32(data->cmd_paddr);
 	desc->tbs[0].hi_n_len = htole16(iwm_get_dma_hi_addr(data->cmd_paddr)) |
 	    (TB0_SIZE << 4);
 	desc->tbs[1].lo = htole32(data->cmd_paddr + TB0_SIZE);
 	desc->tbs[1].hi_n_len = htole16(iwm_get_dma_hi_addr(data->cmd_paddr)) |
 	    ((sizeof(struct iwm_cmd_header) + sizeof(*tx)
 	      + hdrlen + pad - TB0_SIZE) << 4);
 
 	/* Other DMA segments are for data payload. */
 	for (i = 0; i < nsegs; i++) {
 		seg = &segs[i];
 		desc->tbs[i+2].lo = htole32(seg->ds_addr);
 		desc->tbs[i+2].hi_n_len = \
 		    htole16(iwm_get_dma_hi_addr(seg->ds_addr))
 		    | ((seg->ds_len) << 4);
 	}
 
 	bus_dmamap_sync(ring->data_dmat, data->map,
 	    BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(ring->cmd_dma.tag, ring->cmd_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(ring->desc_dma.tag, ring->desc_dma.map,
 	    BUS_DMASYNC_PREWRITE);
 
 #if 0
 	iwm_update_sched(sc, ring->qid, ring->cur, tx->sta_id, le16toh(tx->len));
 #endif
 
 	/* Kick TX ring. */
 	ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT;
 	IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, ring->qid << 8 | ring->cur);
 
 	/* Mark TX ring as full if we reach a certain threshold. */
 	if (++ring->queued > IWM_TX_RING_HIMARK) {
 		sc->qfullmsk |= 1 << ring->qid;
 	}
 
 	return 0;
 }
 
 static int
 iwm_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
     const struct ieee80211_bpf_params *params)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct iwm_softc *sc = ic->ic_softc;
 	int error = 0;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_XMIT,
 	    "->%s begin\n", __func__);
 
 	if ((sc->sc_flags & IWM_FLAG_HW_INITED) == 0) {
 		m_freem(m);
 		IWM_DPRINTF(sc, IWM_DEBUG_XMIT,
 		    "<-%s not RUNNING\n", __func__);
 		return (ENETDOWN);
         }
 
 	IWM_LOCK(sc);
 	/* XXX fix this */
         if (params == NULL) {
 		error = iwm_tx(sc, m, ni, 0);
 	} else {
 		error = iwm_tx(sc, m, ni, 0);
 	}
 	sc->sc_tx_timer = 5;
 	IWM_UNLOCK(sc);
 
         return (error);
 }
 
 /*
  * mvm/tx.c
  */
 
 #if 0
 /*
  * Note that there are transports that buffer frames before they reach
  * the firmware. This means that after flush_tx_path is called, the
  * queue might not be empty. The race-free way to handle this is to:
  * 1) set the station as draining
  * 2) flush the Tx path
  * 3) wait for the transport queues to be empty
  */
 int
 iwm_mvm_flush_tx_path(struct iwm_softc *sc, int tfd_msk, int sync)
 {
 	struct iwm_tx_path_flush_cmd flush_cmd = {
 		.queues_ctl = htole32(tfd_msk),
 		.flush_ctl = htole16(IWM_DUMP_TX_FIFO_FLUSH),
 	};
 	int ret;
 
 	ret = iwm_mvm_send_cmd_pdu(sc, IWM_TXPATH_FLUSH,
 	    sync ? IWM_CMD_SYNC : IWM_CMD_ASYNC,
 	    sizeof(flush_cmd), &flush_cmd);
 	if (ret)
                 device_printf(sc->sc_dev,
 		    "Flushing tx queue failed: %d\n", ret);
 	return ret;
 }
 #endif
 
 /*
  * BEGIN mvm/sta.c
  */
 
 static int
 iwm_mvm_send_add_sta_cmd_status(struct iwm_softc *sc,
 	struct iwm_mvm_add_sta_cmd_v7 *cmd, int *status)
 {
 	return iwm_mvm_send_cmd_pdu_status(sc, IWM_ADD_STA, sizeof(*cmd),
 	    cmd, status);
 }
 
 /* send station add/update command to firmware */
 static int
 iwm_mvm_sta_send_to_fw(struct iwm_softc *sc, struct iwm_node *in, int update)
 {
 	struct iwm_mvm_add_sta_cmd_v7 add_sta_cmd;
 	int ret;
 	uint32_t status;
 
 	memset(&add_sta_cmd, 0, sizeof(add_sta_cmd));
 
 	add_sta_cmd.sta_id = IWM_STATION_ID;
 	add_sta_cmd.mac_id_n_color
 	    = htole32(IWM_FW_CMD_ID_AND_COLOR(IWM_DEFAULT_MACID,
 	        IWM_DEFAULT_COLOR));
 	if (!update) {
 		int ac;
 		for (ac = 0; ac < WME_NUM_AC; ac++) {
 			add_sta_cmd.tfd_queue_msk |=
 			    htole32(1 << iwm_mvm_ac_to_tx_fifo[ac]);
 		}
 		IEEE80211_ADDR_COPY(&add_sta_cmd.addr, in->in_ni.ni_bssid);
 	}
 	add_sta_cmd.add_modify = update ? 1 : 0;
 	add_sta_cmd.station_flags_msk
 	    |= htole32(IWM_STA_FLG_FAT_EN_MSK | IWM_STA_FLG_MIMO_EN_MSK);
 	add_sta_cmd.tid_disable_tx = htole16(0xffff);
 	if (update)
 		add_sta_cmd.modify_mask |= (IWM_STA_MODIFY_TID_DISABLE_TX);
 
 	status = IWM_ADD_STA_SUCCESS;
 	ret = iwm_mvm_send_add_sta_cmd_status(sc, &add_sta_cmd, &status);
 	if (ret)
 		return ret;
 
 	switch (status) {
 	case IWM_ADD_STA_SUCCESS:
 		break;
 	default:
 		ret = EIO;
 		device_printf(sc->sc_dev, "IWM_ADD_STA failed\n");
 		break;
 	}
 
 	return ret;
 }
 
 static int
 iwm_mvm_add_sta(struct iwm_softc *sc, struct iwm_node *in)
 {
 	return iwm_mvm_sta_send_to_fw(sc, in, 0);
 }
 
 static int
 iwm_mvm_update_sta(struct iwm_softc *sc, struct iwm_node *in)
 {
 	return iwm_mvm_sta_send_to_fw(sc, in, 1);
 }
 
 static int
 iwm_mvm_add_int_sta_common(struct iwm_softc *sc, struct iwm_int_sta *sta,
 	const uint8_t *addr, uint16_t mac_id, uint16_t color)
 {
 	struct iwm_mvm_add_sta_cmd_v7 cmd;
 	int ret;
 	uint32_t status;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.sta_id = sta->sta_id;
 	cmd.mac_id_n_color = htole32(IWM_FW_CMD_ID_AND_COLOR(mac_id, color));
 
 	cmd.tfd_queue_msk = htole32(sta->tfd_queue_msk);
 	cmd.tid_disable_tx = htole16(0xffff);
 
 	if (addr)
 		IEEE80211_ADDR_COPY(cmd.addr, addr);
 
 	ret = iwm_mvm_send_add_sta_cmd_status(sc, &cmd, &status);
 	if (ret)
 		return ret;
 
 	switch (status) {
 	case IWM_ADD_STA_SUCCESS:
 		IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 		    "%s: Internal station added.\n", __func__);
 		return 0;
 	default:
 		device_printf(sc->sc_dev,
 		    "%s: Add internal station failed, status=0x%x\n",
 		    __func__, status);
 		ret = EIO;
 		break;
 	}
 	return ret;
 }
 
 static int
 iwm_mvm_add_aux_sta(struct iwm_softc *sc)
 {
 	int ret;
 
 	sc->sc_aux_sta.sta_id = IWM_AUX_STA_ID;
 	sc->sc_aux_sta.tfd_queue_msk = (1 << IWM_MVM_AUX_QUEUE);
 
 	ret = iwm_enable_txq(sc, 0, IWM_MVM_AUX_QUEUE, IWM_MVM_TX_FIFO_MCAST);
 	if (ret)
 		return ret;
 
 	ret = iwm_mvm_add_int_sta_common(sc,
 	    &sc->sc_aux_sta, NULL, IWM_MAC_INDEX_AUX, 0);
 
 	if (ret)
 		memset(&sc->sc_aux_sta, 0, sizeof(sc->sc_aux_sta));
 	return ret;
 }
 
 /*
  * END mvm/sta.c
  */
 
 /*
  * BEGIN mvm/quota.c
  */
 
 static int
 iwm_mvm_update_quotas(struct iwm_softc *sc, struct iwm_node *in)
 {
 	struct iwm_time_quota_cmd cmd;
 	int i, idx, ret, num_active_macs, quota, quota_rem;
 	int colors[IWM_MAX_BINDINGS] = { -1, -1, -1, -1, };
 	int n_ifs[IWM_MAX_BINDINGS] = {0, };
 	uint16_t id;
 
 	memset(&cmd, 0, sizeof(cmd));
 
 	/* currently, PHY ID == binding ID */
 	if (in) {
 		id = in->in_phyctxt->id;
 		KASSERT(id < IWM_MAX_BINDINGS, ("invalid id"));
 		colors[id] = in->in_phyctxt->color;
 
 		if (1)
 			n_ifs[id] = 1;
 	}
 
 	/*
 	 * The FW's scheduling session consists of
 	 * IWM_MVM_MAX_QUOTA fragments. Divide these fragments
 	 * equally between all the bindings that require quota
 	 */
 	num_active_macs = 0;
 	for (i = 0; i < IWM_MAX_BINDINGS; i++) {
 		cmd.quotas[i].id_and_color = htole32(IWM_FW_CTXT_INVALID);
 		num_active_macs += n_ifs[i];
 	}
 
 	quota = 0;
 	quota_rem = 0;
 	if (num_active_macs) {
 		quota = IWM_MVM_MAX_QUOTA / num_active_macs;
 		quota_rem = IWM_MVM_MAX_QUOTA % num_active_macs;
 	}
 
 	for (idx = 0, i = 0; i < IWM_MAX_BINDINGS; i++) {
 		if (colors[i] < 0)
 			continue;
 
 		cmd.quotas[idx].id_and_color =
 			htole32(IWM_FW_CMD_ID_AND_COLOR(i, colors[i]));
 
 		if (n_ifs[i] <= 0) {
 			cmd.quotas[idx].quota = htole32(0);
 			cmd.quotas[idx].max_duration = htole32(0);
 		} else {
 			cmd.quotas[idx].quota = htole32(quota * n_ifs[i]);
 			cmd.quotas[idx].max_duration = htole32(0);
 		}
 		idx++;
 	}
 
 	/* Give the remainder of the session to the first binding */
 	cmd.quotas[0].quota = htole32(le32toh(cmd.quotas[0].quota) + quota_rem);
 
 	ret = iwm_mvm_send_cmd_pdu(sc, IWM_TIME_QUOTA_CMD, IWM_CMD_SYNC,
 	    sizeof(cmd), &cmd);
 	if (ret)
 		device_printf(sc->sc_dev,
 		    "%s: Failed to send quota: %d\n", __func__, ret);
 	return ret;
 }
 
 /*
  * END mvm/quota.c
  */
 
 /*
  * ieee80211 routines
  */
 
 /*
  * Change to AUTH state in 80211 state machine.  Roughly matches what
  * Linux does in bss_info_changed().
  */
 static int
 iwm_auth(struct ieee80211vap *vap, struct iwm_softc *sc)
 {
 	struct ieee80211_node *ni;
 	struct iwm_node *in;
 	struct iwm_vap *iv = IWM_VAP(vap);
 	uint32_t duration;
 	int error;
 
 	/*
 	 * XXX i have a feeling that the vap node is being
 	 * freed from underneath us. Grr.
 	 */
 	ni = ieee80211_ref_node(vap->iv_bss);
 	in = IWM_NODE(ni);
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_STATE,
 	    "%s: called; vap=%p, bss ni=%p\n",
 	    __func__,
 	    vap,
 	    ni);
 
 	in->in_assoc = 0;
 
 	error = iwm_mvm_sf_config(sc, IWM_SF_FULL_ON);
 	if (error != 0)
 		return error;
 
 	error = iwm_allow_mcast(vap, sc);
 	if (error) {
 		device_printf(sc->sc_dev,
 		    "%s: failed to set multicast\n", __func__);
 		goto out;
 	}
 
 	/*
 	 * This is where it deviates from what Linux does.
 	 *
 	 * Linux iwlwifi doesn't reset the nic each time, nor does it
 	 * call ctxt_add() here.  Instead, it adds it during vap creation,
 	 * and always does a mac_ctx_changed().
 	 *
 	 * The openbsd port doesn't attempt to do that - it reset things
 	 * at odd states and does the add here.
 	 *
 	 * So, until the state handling is fixed (ie, we never reset
 	 * the NIC except for a firmware failure, which should drag
 	 * the NIC back to IDLE, re-setup and re-add all the mac/phy
 	 * contexts that are required), let's do a dirty hack here.
 	 */
 	if (iv->is_uploaded) {
 		if ((error = iwm_mvm_mac_ctxt_changed(sc, vap)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: failed to update MAC\n", __func__);
 			goto out;
 		}
 		if ((error = iwm_mvm_phy_ctxt_changed(sc, &sc->sc_phyctxt[0],
 		    in->in_ni.ni_chan, 1, 1)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: failed update phy ctxt\n", __func__);
 			goto out;
 		}
 		in->in_phyctxt = &sc->sc_phyctxt[0];
 
 		if ((error = iwm_mvm_binding_update(sc, in)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: binding update cmd\n", __func__);
 			goto out;
 		}
 		if ((error = iwm_mvm_update_sta(sc, in)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: failed to update sta\n", __func__);
 			goto out;
 		}
 	} else {
 		if ((error = iwm_mvm_mac_ctxt_add(sc, vap)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: failed to add MAC\n", __func__);
 			goto out;
 		}
 		if ((error = iwm_mvm_phy_ctxt_changed(sc, &sc->sc_phyctxt[0],
 		    in->in_ni.ni_chan, 1, 1)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: failed add phy ctxt!\n", __func__);
 			error = ETIMEDOUT;
 			goto out;
 		}
 		in->in_phyctxt = &sc->sc_phyctxt[0];
 
 		if ((error = iwm_mvm_binding_add_vif(sc, in)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: binding add cmd\n", __func__);
 			goto out;
 		}
 		if ((error = iwm_mvm_add_sta(sc, in)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: failed to add sta\n", __func__);
 			goto out;
 		}
 	}
 
 	/*
 	 * Prevent the FW from wandering off channel during association
 	 * by "protecting" the session with a time event.
 	 */
 	/* XXX duration is in units of TU, not MS */
 	duration = IWM_MVM_TE_SESSION_PROTECTION_MAX_TIME_MS;
 	iwm_mvm_protect_session(sc, in, duration, 500 /* XXX magic number */);
 	DELAY(100);
 
 	error = 0;
 out:
 	ieee80211_free_node(ni);
 	return (error);
 }
 
 static int
 iwm_assoc(struct ieee80211vap *vap, struct iwm_softc *sc)
 {
 	struct iwm_node *in = IWM_NODE(vap->iv_bss);
 	int error;
 
 	if ((error = iwm_mvm_update_sta(sc, in)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: failed to update STA\n", __func__);
 		return error;
 	}
 
 	in->in_assoc = 1;
 	if ((error = iwm_mvm_mac_ctxt_changed(sc, vap)) != 0) {
 		device_printf(sc->sc_dev,
 		    "%s: failed to update MAC\n", __func__);
 		return error;
 	}
 
 	return 0;
 }
 
 static int
 iwm_release(struct iwm_softc *sc, struct iwm_node *in)
 {
 	/*
 	 * Ok, so *technically* the proper set of calls for going
 	 * from RUN back to SCAN is:
 	 *
 	 * iwm_mvm_power_mac_disable(sc, in);
 	 * iwm_mvm_mac_ctxt_changed(sc, in);
 	 * iwm_mvm_rm_sta(sc, in);
 	 * iwm_mvm_update_quotas(sc, NULL);
 	 * iwm_mvm_mac_ctxt_changed(sc, in);
 	 * iwm_mvm_binding_remove_vif(sc, in);
 	 * iwm_mvm_mac_ctxt_remove(sc, in);
 	 *
 	 * However, that freezes the device not matter which permutations
 	 * and modifications are attempted.  Obviously, this driver is missing
 	 * something since it works in the Linux driver, but figuring out what
 	 * is missing is a little more complicated.  Now, since we're going
 	 * back to nothing anyway, we'll just do a complete device reset.
 	 * Up your's, device!
 	 */
 	/* iwm_mvm_flush_tx_path(sc, 0xf, 1); */
 	iwm_stop_device(sc);
 	iwm_init_hw(sc);
 	if (in)
 		in->in_assoc = 0;
 	return 0;
 
 #if 0
 	int error;
 
 	iwm_mvm_power_mac_disable(sc, in);
 
 	if ((error = iwm_mvm_mac_ctxt_changed(sc, in)) != 0) {
 		device_printf(sc->sc_dev, "mac ctxt change fail 1 %d\n", error);
 		return error;
 	}
 
 	if ((error = iwm_mvm_rm_sta(sc, in)) != 0) {
 		device_printf(sc->sc_dev, "sta remove fail %d\n", error);
 		return error;
 	}
 	error = iwm_mvm_rm_sta(sc, in);
 	in->in_assoc = 0;
 	iwm_mvm_update_quotas(sc, NULL);
 	if ((error = iwm_mvm_mac_ctxt_changed(sc, in)) != 0) {
 		device_printf(sc->sc_dev, "mac ctxt change fail 2 %d\n", error);
 		return error;
 	}
 	iwm_mvm_binding_remove_vif(sc, in);
 
 	iwm_mvm_mac_ctxt_remove(sc, in);
 
 	return error;
 #endif
 }
 
 static struct ieee80211_node *
 iwm_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	return malloc(sizeof (struct iwm_node), M_80211_NODE,
 	    M_NOWAIT | M_ZERO);
 }
 
 static void
 iwm_setrates(struct iwm_softc *sc, struct iwm_node *in)
 {
 	struct ieee80211_node *ni = &in->in_ni;
 	struct iwm_lq_cmd *lq = &in->in_lq;
 	int nrates = ni->ni_rates.rs_nrates;
 	int i, ridx, tab = 0;
 	int txant = 0;
 
 	if (nrates > nitems(lq->rs_table)) {
 		device_printf(sc->sc_dev,
 		    "%s: node supports %d rates, driver handles "
 		    "only %zu\n", __func__, nrates, nitems(lq->rs_table));
 		return;
 	}
 	if (nrates == 0) {
 		device_printf(sc->sc_dev,
 		    "%s: node supports 0 rates, odd!\n", __func__);
 		return;
 	}
 
 	/*
 	 * XXX .. and most of iwm_node is not initialised explicitly;
 	 * it's all just 0x0 passed to the firmware.
 	 */
 
 	/* first figure out which rates we should support */
 	/* XXX TODO: this isn't 11n aware /at all/ */
 	memset(&in->in_ridx, -1, sizeof(in->in_ridx));
 	IWM_DPRINTF(sc, IWM_DEBUG_TXRATE,
 	    "%s: nrates=%d\n", __func__, nrates);
 
 	/*
 	 * Loop over nrates and populate in_ridx from the highest
 	 * rate to the lowest rate.  Remember, in_ridx[] has
 	 * IEEE80211_RATE_MAXSIZE entries!
 	 */
 	for (i = 0; i < min(nrates, IEEE80211_RATE_MAXSIZE); i++) {
 		int rate = ni->ni_rates.rs_rates[(nrates - 1) - i] & IEEE80211_RATE_VAL;
 
 		/* Map 802.11 rate to HW rate index. */
 		for (ridx = 0; ridx <= IWM_RIDX_MAX; ridx++)
 			if (iwm_rates[ridx].rate == rate)
 				break;
 		if (ridx > IWM_RIDX_MAX) {
 			device_printf(sc->sc_dev,
 			    "%s: WARNING: device rate for %d not found!\n",
 			    __func__, rate);
 		} else {
 			IWM_DPRINTF(sc, IWM_DEBUG_TXRATE,
 			    "%s: rate: i: %d, rate=%d, ridx=%d\n",
 			    __func__,
 			    i,
 			    rate,
 			    ridx);
 			in->in_ridx[i] = ridx;
 		}
 	}
 
 	/* then construct a lq_cmd based on those */
 	memset(lq, 0, sizeof(*lq));
 	lq->sta_id = IWM_STATION_ID;
 
 	/* For HT, always enable RTS/CTS to avoid excessive retries. */
 	if (ni->ni_flags & IEEE80211_NODE_HT)
 		lq->flags |= IWM_LQ_FLAG_USE_RTS_MSK;
 
 	/*
 	 * are these used? (we don't do SISO or MIMO)
 	 * need to set them to non-zero, though, or we get an error.
 	 */
 	lq->single_stream_ant_msk = 1;
 	lq->dual_stream_ant_msk = 1;
 
 	/*
 	 * Build the actual rate selection table.
 	 * The lowest bits are the rates.  Additionally,
 	 * CCK needs bit 9 to be set.  The rest of the bits
 	 * we add to the table select the tx antenna
 	 * Note that we add the rates in the highest rate first
 	 * (opposite of ni_rates).
 	 */
 	/*
 	 * XXX TODO: this should be looping over the min of nrates
 	 * and LQ_MAX_RETRY_NUM.  Sigh.
 	 */
 	for (i = 0; i < nrates; i++) {
 		int nextant;
 
 		if (txant == 0)
 			txant = iwm_fw_valid_tx_ant(sc);
 		nextant = 1<<(ffs(txant)-1);
 		txant &= ~nextant;
 
 		/*
 		 * Map the rate id into a rate index into
 		 * our hardware table containing the
 		 * configuration to use for this rate.
 		 */
 		ridx = in->in_ridx[i];
 		tab = iwm_rates[ridx].plcp;
 		tab |= nextant << IWM_RATE_MCS_ANT_POS;
 		if (IWM_RIDX_IS_CCK(ridx))
 			tab |= IWM_RATE_MCS_CCK_MSK;
 		IWM_DPRINTF(sc, IWM_DEBUG_TXRATE,
 		    "station rate i=%d, rate=%d, hw=%x\n",
 		    i, iwm_rates[ridx].rate, tab);
 		lq->rs_table[i] = htole32(tab);
 	}
 	/* then fill the rest with the lowest possible rate */
 	for (i = nrates; i < nitems(lq->rs_table); i++) {
 		KASSERT(tab != 0, ("invalid tab"));
 		lq->rs_table[i] = htole32(tab);
 	}
 }
 
 static int
 iwm_media_change(struct ifnet *ifp)
 {
 	struct ieee80211vap *vap = ifp->if_softc;
 	struct ieee80211com *ic = vap->iv_ic;
 	struct iwm_softc *sc = ic->ic_softc;
 	int error;
 
 	error = ieee80211_media_change(ifp);
 	if (error != ENETRESET)
 		return error;
 
 	IWM_LOCK(sc);
 	if (ic->ic_nrunning > 0) {
 		iwm_stop(sc);
 		iwm_init(sc);
 	}
 	IWM_UNLOCK(sc);
 	return error;
 }
 
 
 static int
 iwm_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
 {
 	struct iwm_vap *ivp = IWM_VAP(vap);
 	struct ieee80211com *ic = vap->iv_ic;
 	struct iwm_softc *sc = ic->ic_softc;
 	struct iwm_node *in;
 	int error;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_STATE,
 	    "switching state %s -> %s\n",
 	    ieee80211_state_name[vap->iv_state],
 	    ieee80211_state_name[nstate]);
 	IEEE80211_UNLOCK(ic);
 	IWM_LOCK(sc);
 
 	if (vap->iv_state == IEEE80211_S_SCAN && nstate != vap->iv_state)
 		iwm_led_blink_stop(sc);
 
 	/* disable beacon filtering if we're hopping out of RUN */
 	if (vap->iv_state == IEEE80211_S_RUN && nstate != vap->iv_state) {
 		iwm_mvm_disable_beacon_filter(sc);
 
 		if (((in = IWM_NODE(vap->iv_bss)) != NULL))
 			in->in_assoc = 0;
 
 		iwm_release(sc, NULL);
 
 		/*
 		 * It's impossible to directly go RUN->SCAN. If we iwm_release()
 		 * above then the card will be completely reinitialized,
 		 * so the driver must do everything necessary to bring the card
 		 * from INIT to SCAN.
 		 *
 		 * Additionally, upon receiving deauth frame from AP,
 		 * OpenBSD 802.11 stack puts the driver in IEEE80211_S_AUTH
 		 * state. This will also fail with this driver, so bring the FSM
 		 * from IEEE80211_S_RUN to IEEE80211_S_SCAN in this case as well.
 		 *
 		 * XXX TODO: fix this for FreeBSD!
 		 */
 		if (nstate == IEEE80211_S_SCAN ||
 		    nstate == IEEE80211_S_AUTH ||
 		    nstate == IEEE80211_S_ASSOC) {
 			IWM_DPRINTF(sc, IWM_DEBUG_STATE,
 			    "Force transition to INIT; MGT=%d\n", arg);
 			IWM_UNLOCK(sc);
 			IEEE80211_LOCK(ic);
 			/* Always pass arg as -1 since we can't Tx right now. */
 			/*
 			 * XXX arg is just ignored anyway when transitioning
 			 *     to IEEE80211_S_INIT.
 			 */
 			vap->iv_newstate(vap, IEEE80211_S_INIT, -1);
 			IWM_DPRINTF(sc, IWM_DEBUG_STATE,
 			    "Going INIT->SCAN\n");
 			nstate = IEEE80211_S_SCAN;
 			IEEE80211_UNLOCK(ic);
 			IWM_LOCK(sc);
 		}
 	}
 
 	switch (nstate) {
 	case IEEE80211_S_INIT:
 		break;
 
 	case IEEE80211_S_AUTH:
 		if ((error = iwm_auth(vap, sc)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: could not move to auth state: %d\n",
 			    __func__, error);
 			break;
 		}
 		break;
 
 	case IEEE80211_S_ASSOC:
 		if ((error = iwm_assoc(vap, sc)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: failed to associate: %d\n", __func__,
 			    error);
 			break;
 		}
 		break;
 
 	case IEEE80211_S_RUN:
 	{
 		struct iwm_host_cmd cmd = {
 			.id = IWM_LQ_CMD,
 			.len = { sizeof(in->in_lq), },
 			.flags = IWM_CMD_SYNC,
 		};
 
 		/* Update the association state, now we have it all */
 		/* (eg associd comes in at this point */
 		error = iwm_assoc(vap, sc);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: failed to update association state: %d\n",
 			    __func__,
 			    error);
 			break;
 		}
 
 		in = IWM_NODE(vap->iv_bss);
 		iwm_mvm_power_mac_update_mode(sc, in);
 		iwm_mvm_enable_beacon_filter(sc, in);
 		iwm_mvm_update_quotas(sc, in);
 		iwm_setrates(sc, in);
 
 		cmd.data[0] = &in->in_lq;
 		if ((error = iwm_send_cmd(sc, &cmd)) != 0) {
 			device_printf(sc->sc_dev,
 			    "%s: IWM_LQ_CMD failed\n", __func__);
 		}
 
 		iwm_mvm_led_enable(sc);
 		break;
 	}
 
 	default:
 		break;
 	}
 	IWM_UNLOCK(sc);
 	IEEE80211_LOCK(ic);
 
 	return (ivp->iv_newstate(vap, nstate, arg));
 }
 
 void
 iwm_endscan_cb(void *arg, int pending)
 {
 	struct iwm_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_SCAN | IWM_DEBUG_TRACE,
 	    "%s: scan ended\n",
 	    __func__);
 
 	ieee80211_scan_done(TAILQ_FIRST(&ic->ic_vaps));
 }
 
 /*
  * Aging and idle timeouts for the different possible scenarios
  * in default configuration
  */
 static const uint32_t
 iwm_sf_full_timeout_def[IWM_SF_NUM_SCENARIO][IWM_SF_NUM_TIMEOUT_TYPES] = {
 	{
 		htole32(IWM_SF_SINGLE_UNICAST_AGING_TIMER_DEF),
 		htole32(IWM_SF_SINGLE_UNICAST_IDLE_TIMER_DEF)
 	},
 	{
 		htole32(IWM_SF_AGG_UNICAST_AGING_TIMER_DEF),
 		htole32(IWM_SF_AGG_UNICAST_IDLE_TIMER_DEF)
 	},
 	{
 		htole32(IWM_SF_MCAST_AGING_TIMER_DEF),
 		htole32(IWM_SF_MCAST_IDLE_TIMER_DEF)
 	},
 	{
 		htole32(IWM_SF_BA_AGING_TIMER_DEF),
 		htole32(IWM_SF_BA_IDLE_TIMER_DEF)
 	},
 	{
 		htole32(IWM_SF_TX_RE_AGING_TIMER_DEF),
 		htole32(IWM_SF_TX_RE_IDLE_TIMER_DEF)
 	},
 };
 
 /*
  * Aging and idle timeouts for the different possible scenarios
  * in single BSS MAC configuration.
  */
 static const uint32_t
 iwm_sf_full_timeout[IWM_SF_NUM_SCENARIO][IWM_SF_NUM_TIMEOUT_TYPES] = {
 	{
 		htole32(IWM_SF_SINGLE_UNICAST_AGING_TIMER),
 		htole32(IWM_SF_SINGLE_UNICAST_IDLE_TIMER)
 	},
 	{
 		htole32(IWM_SF_AGG_UNICAST_AGING_TIMER),
 		htole32(IWM_SF_AGG_UNICAST_IDLE_TIMER)
 	},
 	{
 		htole32(IWM_SF_MCAST_AGING_TIMER),
 		htole32(IWM_SF_MCAST_IDLE_TIMER)
 	},
 	{
 		htole32(IWM_SF_BA_AGING_TIMER),
 		htole32(IWM_SF_BA_IDLE_TIMER)
 	},
 	{
 		htole32(IWM_SF_TX_RE_AGING_TIMER),
 		htole32(IWM_SF_TX_RE_IDLE_TIMER)
 	},
 };
 
 static void
 iwm_mvm_fill_sf_command(struct iwm_softc *sc, struct iwm_sf_cfg_cmd *sf_cmd,
     struct ieee80211_node *ni)
 {
 	int i, j, watermark;
 
 	sf_cmd->watermark[IWM_SF_LONG_DELAY_ON] = htole32(IWM_SF_W_MARK_SCAN);
 
 	/*
 	 * If we are in association flow - check antenna configuration
 	 * capabilities of the AP station, and choose the watermark accordingly.
 	 */
 	if (ni) {
 		if (ni->ni_flags & IEEE80211_NODE_HT) {
 #ifdef notyet
 			if (ni->ni_rxmcs[2] != 0)
 				watermark = IWM_SF_W_MARK_MIMO3;
 			else if (ni->ni_rxmcs[1] != 0)
 				watermark = IWM_SF_W_MARK_MIMO2;
 			else
 #endif
 				watermark = IWM_SF_W_MARK_SISO;
 		} else {
 			watermark = IWM_SF_W_MARK_LEGACY;
 		}
 	/* default watermark value for unassociated mode. */
 	} else {
 		watermark = IWM_SF_W_MARK_MIMO2;
 	}
 	sf_cmd->watermark[IWM_SF_FULL_ON] = htole32(watermark);
 
 	for (i = 0; i < IWM_SF_NUM_SCENARIO; i++) {
 		for (j = 0; j < IWM_SF_NUM_TIMEOUT_TYPES; j++) {
 			sf_cmd->long_delay_timeouts[i][j] =
 					htole32(IWM_SF_LONG_DELAY_AGING_TIMER);
 		}
 	}
 
 	if (ni) {
 		memcpy(sf_cmd->full_on_timeouts, iwm_sf_full_timeout,
 		       sizeof(iwm_sf_full_timeout));
 	} else {
 		memcpy(sf_cmd->full_on_timeouts, iwm_sf_full_timeout_def,
 		       sizeof(iwm_sf_full_timeout_def));
 	}
 }
 
 static int
 iwm_mvm_sf_config(struct iwm_softc *sc, enum iwm_sf_state new_state)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct iwm_sf_cfg_cmd sf_cmd = {
 		.state = htole32(IWM_SF_FULL_ON),
 	};
 	int ret = 0;
 
 	if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000)
 		sf_cmd.state |= htole32(IWM_SF_CFG_DUMMY_NOTIF_OFF);
 
 	switch (new_state) {
 	case IWM_SF_UNINIT:
 	case IWM_SF_INIT_OFF:
 		iwm_mvm_fill_sf_command(sc, &sf_cmd, NULL);
 		break;
 	case IWM_SF_FULL_ON:
 		iwm_mvm_fill_sf_command(sc, &sf_cmd, vap->iv_bss);
 		break;
 	default:
 		IWM_DPRINTF(sc, IWM_DEBUG_PWRSAVE,
 		    "Invalid state: %d. not sending Smart Fifo cmd\n",
 			  new_state);
 		return EINVAL;
 	}
 
 	ret = iwm_mvm_send_cmd_pdu(sc, IWM_REPLY_SF_CFG_CMD, IWM_CMD_ASYNC,
 				   sizeof(sf_cmd), &sf_cmd);
 	return ret;
 }
 
 static int
 iwm_send_bt_init_conf(struct iwm_softc *sc)
 {
 	struct iwm_bt_coex_cmd bt_cmd;
 
 	bt_cmd.mode = htole32(IWM_BT_COEX_WIFI);
 	bt_cmd.enabled_modules = htole32(IWM_BT_COEX_HIGH_BAND_RET);
 
 	return iwm_mvm_send_cmd_pdu(sc, IWM_BT_CONFIG, 0, sizeof(bt_cmd),
 	    &bt_cmd);
 }
 
 static int
 iwm_send_update_mcc_cmd(struct iwm_softc *sc, const char *alpha2)
 {
 	struct iwm_mcc_update_cmd mcc_cmd;
 	struct iwm_host_cmd hcmd = {
 		.id = IWM_MCC_UPDATE_CMD,
 		.flags = (IWM_CMD_SYNC | IWM_CMD_WANT_SKB),
 		.data = { &mcc_cmd },
 	};
 	int ret;
 #ifdef IWM_DEBUG
 	struct iwm_rx_packet *pkt;
 	struct iwm_mcc_update_resp_v1 *mcc_resp_v1 = NULL;
 	struct iwm_mcc_update_resp *mcc_resp;
 	int n_channels;
 	uint16_t mcc;
 #endif
 	int resp_v2 = isset(sc->sc_enabled_capa,
 	    IWM_UCODE_TLV_CAPA_LAR_SUPPORT_V2);
 
 	memset(&mcc_cmd, 0, sizeof(mcc_cmd));
 	mcc_cmd.mcc = htole16(alpha2[0] << 8 | alpha2[1]);
 	if ((sc->sc_ucode_api & IWM_UCODE_TLV_API_WIFI_MCC_UPDATE) ||
 	    isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_LAR_MULTI_MCC))
 		mcc_cmd.source_id = IWM_MCC_SOURCE_GET_CURRENT;
 	else
 		mcc_cmd.source_id = IWM_MCC_SOURCE_OLD_FW;
 
 	if (resp_v2)
 		hcmd.len[0] = sizeof(struct iwm_mcc_update_cmd);
 	else
 		hcmd.len[0] = sizeof(struct iwm_mcc_update_cmd_v1);
 
 	IWM_DPRINTF(sc, IWM_DEBUG_NODE,
 	    "send MCC update to FW with '%c%c' src = %d\n",
 	    alpha2[0], alpha2[1], mcc_cmd.source_id);
 
 	ret = iwm_send_cmd(sc, &hcmd);
 	if (ret)
 		return ret;
 
 #ifdef IWM_DEBUG
 	pkt = hcmd.resp_pkt;
 
 	/* Extract MCC response */
 	if (resp_v2) {
 		mcc_resp = (void *)pkt->data;
 		mcc = mcc_resp->mcc;
 		n_channels =  le32toh(mcc_resp->n_channels);
 	} else {
 		mcc_resp_v1 = (void *)pkt->data;
 		mcc = mcc_resp_v1->mcc;
 		n_channels =  le32toh(mcc_resp_v1->n_channels);
 	}
 
 	/* W/A for a FW/NVM issue - returns 0x00 for the world domain */
 	if (mcc == 0)
 		mcc = 0x3030;  /* "00" - world */
 
 	IWM_DPRINTF(sc, IWM_DEBUG_NODE,
 	    "regulatory domain '%c%c' (%d channels available)\n",
 	    mcc >> 8, mcc & 0xff, n_channels);
 #endif
 	iwm_free_resp(sc, &hcmd);
 
 	return 0;
 }
 
 static void
 iwm_mvm_tt_tx_backoff(struct iwm_softc *sc, uint32_t backoff)
 {
 	struct iwm_host_cmd cmd = {
 		.id = IWM_REPLY_THERMAL_MNG_BACKOFF,
 		.len = { sizeof(uint32_t), },
 		.data = { &backoff, },
 	};
 
 	if (iwm_send_cmd(sc, &cmd) != 0) {
 		device_printf(sc->sc_dev,
 		    "failed to change thermal tx backoff\n");
 	}
 }
 
 static int
 iwm_init_hw(struct iwm_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	int error, i, ac;
 
 	if ((error = iwm_start_hw(sc)) != 0) {
 		printf("iwm_start_hw: failed %d\n", error);
 		return error;
 	}
 
 	if ((error = iwm_run_init_mvm_ucode(sc, 0)) != 0) {
 		printf("iwm_run_init_mvm_ucode: failed %d\n", error);
 		return error;
 	}
 
 	/*
 	 * should stop and start HW since that INIT
 	 * image just loaded
 	 */
 	iwm_stop_device(sc);
 	if ((error = iwm_start_hw(sc)) != 0) {
 		device_printf(sc->sc_dev, "could not initialize hardware\n");
 		return error;
 	}
 
 	/* omstart, this time with the regular firmware */
 	error = iwm_mvm_load_ucode_wait_alive(sc, IWM_UCODE_TYPE_REGULAR);
 	if (error) {
 		device_printf(sc->sc_dev, "could not load firmware\n");
 		goto error;
 	}
 
 	if ((error = iwm_send_bt_init_conf(sc)) != 0) {
 		device_printf(sc->sc_dev, "bt init conf failed\n");
 		goto error;
 	}
 
 	if ((error = iwm_send_tx_ant_cfg(sc, iwm_fw_valid_tx_ant(sc))) != 0) {
 		device_printf(sc->sc_dev, "antenna config failed\n");
 		goto error;
 	}
 
 	/* Send phy db control command and then phy db calibration*/
 	if ((error = iwm_send_phy_db_data(sc)) != 0) {
 		device_printf(sc->sc_dev, "phy_db_data failed\n");
 		goto error;
 	}
 
 	if ((error = iwm_send_phy_cfg_cmd(sc)) != 0) {
 		device_printf(sc->sc_dev, "phy_cfg_cmd failed\n");
 		goto error;
 	}
 
 	/* Add auxiliary station for scanning */
 	if ((error = iwm_mvm_add_aux_sta(sc)) != 0) {
 		device_printf(sc->sc_dev, "add_aux_sta failed\n");
 		goto error;
 	}
 
 	for (i = 0; i < IWM_NUM_PHY_CTX; i++) {
 		/*
 		 * The channel used here isn't relevant as it's
 		 * going to be overwritten in the other flows.
 		 * For now use the first channel we have.
 		 */
 		if ((error = iwm_mvm_phy_ctxt_add(sc,
 		    &sc->sc_phyctxt[i], &ic->ic_channels[1], 1, 1)) != 0)
 			goto error;
 	}
 
 	/* Initialize tx backoffs to the minimum. */
 	if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000)
 		iwm_mvm_tt_tx_backoff(sc, 0);
 
 	error = iwm_mvm_power_update_device(sc);
 	if (error)
 		goto error;
 
 	if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_LAR_SUPPORT)) {
 		if ((error = iwm_send_update_mcc_cmd(sc, "ZZ")) != 0)
 			goto error;
 	}
 
 	if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_UMAC_SCAN)) {
 		if ((error = iwm_mvm_config_umac_scan(sc)) != 0)
 			goto error;
 	}
 
 	/* Enable Tx queues. */
 	for (ac = 0; ac < WME_NUM_AC; ac++) {
 		error = iwm_enable_txq(sc, IWM_STATION_ID, ac,
 		    iwm_mvm_ac_to_tx_fifo[ac]);
 		if (error)
 			goto error;
 	}
 
 	if ((error = iwm_mvm_disable_beacon_filter(sc)) != 0) {
 		device_printf(sc->sc_dev, "failed to disable beacon filter\n");
 		goto error;
 	}
 
 	return 0;
 
  error:
 	iwm_stop_device(sc);
 	return error;
 }
 
 /* Allow multicast from our BSSID. */
 static int
 iwm_allow_mcast(struct ieee80211vap *vap, struct iwm_softc *sc)
 {
 	struct ieee80211_node *ni = vap->iv_bss;
 	struct iwm_mcast_filter_cmd *cmd;
 	size_t size;
 	int error;
 
 	size = roundup(sizeof(*cmd), 4);
 	cmd = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cmd == NULL)
 		return ENOMEM;
 	cmd->filter_own = 1;
 	cmd->port_id = 0;
 	cmd->count = 0;
 	cmd->pass_all = 1;
 	IEEE80211_ADDR_COPY(cmd->bssid, ni->ni_bssid);
 
 	error = iwm_mvm_send_cmd_pdu(sc, IWM_MCAST_FILTER_CMD,
 	    IWM_CMD_SYNC, size, cmd);
 	free(cmd, M_DEVBUF);
 
 	return (error);
 }
 
 /*
  * ifnet interfaces
  */
 
 static void
 iwm_init(struct iwm_softc *sc)
 {
 	int error;
 
 	if (sc->sc_flags & IWM_FLAG_HW_INITED) {
 		return;
 	}
 	sc->sc_generation++;
 	sc->sc_flags &= ~IWM_FLAG_STOPPED;
 
 	if ((error = iwm_init_hw(sc)) != 0) {
 		printf("iwm_init_hw failed %d\n", error);
 		iwm_stop(sc);
 		return;
 	}
 
 	/*
 	 * Ok, firmware loaded and we are jogging
 	 */
 	sc->sc_flags |= IWM_FLAG_HW_INITED;
 	callout_reset(&sc->sc_watchdog_to, hz, iwm_watchdog, sc);
 }
 
 static int
 iwm_transmit(struct ieee80211com *ic, struct mbuf *m)
 {
 	struct iwm_softc *sc;
 	int error;
 
 	sc = ic->ic_softc;
 
 	IWM_LOCK(sc);
 	if ((sc->sc_flags & IWM_FLAG_HW_INITED) == 0) {
 		IWM_UNLOCK(sc);
 		return (ENXIO);
 	}
 	error = mbufq_enqueue(&sc->sc_snd, m);
 	if (error) {
 		IWM_UNLOCK(sc);
 		return (error);
 	}
 	iwm_start(sc);
 	IWM_UNLOCK(sc);
 	return (0);
 }
 
 /*
  * Dequeue packets from sendq and call send.
  */
 static void
 iwm_start(struct iwm_softc *sc)
 {
 	struct ieee80211_node *ni;
 	struct mbuf *m;
 	int ac = 0;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TRACE, "->%s\n", __func__);
 	while (sc->qfullmsk == 0 &&
 		(m = mbufq_dequeue(&sc->sc_snd)) != NULL) {
 		ni = (struct ieee80211_node *)m->m_pkthdr.rcvif;
 		if (iwm_tx(sc, m, ni, ac) != 0) {
 			if_inc_counter(ni->ni_vap->iv_ifp,
 			    IFCOUNTER_OERRORS, 1);
 			ieee80211_free_node(ni);
 			continue;
 		}
 		sc->sc_tx_timer = 15;
 	}
 	IWM_DPRINTF(sc, IWM_DEBUG_XMIT | IWM_DEBUG_TRACE, "<-%s\n", __func__);
 }
 
 static void
 iwm_stop(struct iwm_softc *sc)
 {
 
 	sc->sc_flags &= ~IWM_FLAG_HW_INITED;
 	sc->sc_flags |= IWM_FLAG_STOPPED;
 	sc->sc_generation++;
 	iwm_led_blink_stop(sc);
 	sc->sc_tx_timer = 0;
 	iwm_stop_device(sc);
 }
 
 static void
 iwm_watchdog(void *arg)
 {
 	struct iwm_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	if (sc->sc_tx_timer > 0) {
 		if (--sc->sc_tx_timer == 0) {
 			device_printf(sc->sc_dev, "device timeout\n");
 #ifdef IWM_DEBUG
 			iwm_nic_error(sc);
 #endif
 			ieee80211_restart_all(ic);
 			counter_u64_add(sc->sc_ic.ic_oerrors, 1);
 			return;
 		}
 	}
 	callout_reset(&sc->sc_watchdog_to, hz, iwm_watchdog, sc);
 }
 
 static void
 iwm_parent(struct ieee80211com *ic)
 {
 	struct iwm_softc *sc = ic->ic_softc;
 	int startall = 0;
 
 	IWM_LOCK(sc);
 	if (ic->ic_nrunning > 0) {
 		if (!(sc->sc_flags & IWM_FLAG_HW_INITED)) {
 			iwm_init(sc);
 			startall = 1;
 		}
 	} else if (sc->sc_flags & IWM_FLAG_HW_INITED)
 		iwm_stop(sc);
 	IWM_UNLOCK(sc);
 	if (startall)
 		ieee80211_start_all(ic);
 }
 
 /*
  * The interrupt side of things
  */
 
 /*
  * error dumping routines are from iwlwifi/mvm/utils.c
  */
 
 /*
  * Note: This structure is read from the device with IO accesses,
  * and the reading already does the endian conversion. As it is
  * read with uint32_t-sized accesses, any members with a different size
  * need to be ordered correctly though!
  */
 struct iwm_error_event_table {
 	uint32_t valid;		/* (nonzero) valid, (0) log is empty */
 	uint32_t error_id;		/* type of error */
 	uint32_t trm_hw_status0;	/* TRM HW status */
 	uint32_t trm_hw_status1;	/* TRM HW status */
 	uint32_t blink2;		/* branch link */
 	uint32_t ilink1;		/* interrupt link */
 	uint32_t ilink2;		/* interrupt link */
 	uint32_t data1;		/* error-specific data */
 	uint32_t data2;		/* error-specific data */
 	uint32_t data3;		/* error-specific data */
 	uint32_t bcon_time;		/* beacon timer */
 	uint32_t tsf_low;		/* network timestamp function timer */
 	uint32_t tsf_hi;		/* network timestamp function timer */
 	uint32_t gp1;		/* GP1 timer register */
 	uint32_t gp2;		/* GP2 timer register */
 	uint32_t fw_rev_type;	/* firmware revision type */
 	uint32_t major;		/* uCode version major */
 	uint32_t minor;		/* uCode version minor */
 	uint32_t hw_ver;		/* HW Silicon version */
 	uint32_t brd_ver;		/* HW board version */
 	uint32_t log_pc;		/* log program counter */
 	uint32_t frame_ptr;		/* frame pointer */
 	uint32_t stack_ptr;		/* stack pointer */
 	uint32_t hcmd;		/* last host command header */
 	uint32_t isr0;		/* isr status register LMPM_NIC_ISR0:
 				 * rxtx_flag */
 	uint32_t isr1;		/* isr status register LMPM_NIC_ISR1:
 				 * host_flag */
 	uint32_t isr2;		/* isr status register LMPM_NIC_ISR2:
 				 * enc_flag */
 	uint32_t isr3;		/* isr status register LMPM_NIC_ISR3:
 				 * time_flag */
 	uint32_t isr4;		/* isr status register LMPM_NIC_ISR4:
 				 * wico interrupt */
 	uint32_t last_cmd_id;	/* last HCMD id handled by the firmware */
 	uint32_t wait_event;		/* wait event() caller address */
 	uint32_t l2p_control;	/* L2pControlField */
 	uint32_t l2p_duration;	/* L2pDurationField */
 	uint32_t l2p_mhvalid;	/* L2pMhValidBits */
 	uint32_t l2p_addr_match;	/* L2pAddrMatchStat */
 	uint32_t lmpm_pmg_sel;	/* indicate which clocks are turned on
 				 * (LMPM_PMG_SEL) */
 	uint32_t u_timestamp;	/* indicate when the date and time of the
 				 * compilation */
 	uint32_t flow_handler;	/* FH read/write pointers, RX credit */
 } __packed /* LOG_ERROR_TABLE_API_S_VER_3 */;
 
 /*
  * UMAC error struct - relevant starting from family 8000 chip.
  * Note: This structure is read from the device with IO accesses,
  * and the reading already does the endian conversion. As it is
  * read with u32-sized accesses, any members with a different size
  * need to be ordered correctly though!
  */
 struct iwm_umac_error_event_table {
 	uint32_t valid;		/* (nonzero) valid, (0) log is empty */
 	uint32_t error_id;	/* type of error */
 	uint32_t blink1;	/* branch link */
 	uint32_t blink2;	/* branch link */
 	uint32_t ilink1;	/* interrupt link */
 	uint32_t ilink2;	/* interrupt link */
 	uint32_t data1;		/* error-specific data */
 	uint32_t data2;		/* error-specific data */
 	uint32_t data3;		/* error-specific data */
 	uint32_t umac_major;
 	uint32_t umac_minor;
 	uint32_t frame_pointer;	/* core register 27*/
 	uint32_t stack_pointer;	/* core register 28 */
 	uint32_t cmd_header;	/* latest host cmd sent to UMAC */
 	uint32_t nic_isr_pref;	/* ISR status register */
 } __packed;
 
 #define ERROR_START_OFFSET  (1 * sizeof(uint32_t))
 #define ERROR_ELEM_SIZE     (7 * sizeof(uint32_t))
 
 #ifdef IWM_DEBUG
 struct {
 	const char *name;
 	uint8_t num;
 } advanced_lookup[] = {
 	{ "NMI_INTERRUPT_WDG", 0x34 },
 	{ "SYSASSERT", 0x35 },
 	{ "UCODE_VERSION_MISMATCH", 0x37 },
 	{ "BAD_COMMAND", 0x38 },
 	{ "NMI_INTERRUPT_DATA_ACTION_PT", 0x3C },
 	{ "FATAL_ERROR", 0x3D },
 	{ "NMI_TRM_HW_ERR", 0x46 },
 	{ "NMI_INTERRUPT_TRM", 0x4C },
 	{ "NMI_INTERRUPT_BREAK_POINT", 0x54 },
 	{ "NMI_INTERRUPT_WDG_RXF_FULL", 0x5C },
 	{ "NMI_INTERRUPT_WDG_NO_RBD_RXF_FULL", 0x64 },
 	{ "NMI_INTERRUPT_HOST", 0x66 },
 	{ "NMI_INTERRUPT_ACTION_PT", 0x7C },
 	{ "NMI_INTERRUPT_UNKNOWN", 0x84 },
 	{ "NMI_INTERRUPT_INST_ACTION_PT", 0x86 },
 	{ "ADVANCED_SYSASSERT", 0 },
 };
 
 static const char *
 iwm_desc_lookup(uint32_t num)
 {
 	int i;
 
 	for (i = 0; i < nitems(advanced_lookup) - 1; i++)
 		if (advanced_lookup[i].num == num)
 			return advanced_lookup[i].name;
 
 	/* No entry matches 'num', so it is the last: ADVANCED_SYSASSERT */
 	return advanced_lookup[i].name;
 }
 
 static void
 iwm_nic_umac_error(struct iwm_softc *sc)
 {
 	struct iwm_umac_error_event_table table;
 	uint32_t base;
 
 	base = sc->sc_uc.uc_umac_error_event_table;
 
 	if (base < 0x800000) {
 		device_printf(sc->sc_dev, "Invalid error log pointer 0x%08x\n",
 		    base);
 		return;
 	}
 
 	if (iwm_read_mem(sc, base, &table, sizeof(table)/sizeof(uint32_t))) {
 		device_printf(sc->sc_dev, "reading errlog failed\n");
 		return;
 	}
 
 	if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) {
 		device_printf(sc->sc_dev, "Start UMAC Error Log Dump:\n");
 		device_printf(sc->sc_dev, "Status: 0x%x, count: %d\n",
 		    sc->sc_flags, table.valid);
 	}
 
 	device_printf(sc->sc_dev, "0x%08X | %s\n", table.error_id,
 		iwm_desc_lookup(table.error_id));
 	device_printf(sc->sc_dev, "0x%08X | umac branchlink1\n", table.blink1);
 	device_printf(sc->sc_dev, "0x%08X | umac branchlink2\n", table.blink2);
 	device_printf(sc->sc_dev, "0x%08X | umac interruptlink1\n",
 	    table.ilink1);
 	device_printf(sc->sc_dev, "0x%08X | umac interruptlink2\n",
 	    table.ilink2);
 	device_printf(sc->sc_dev, "0x%08X | umac data1\n", table.data1);
 	device_printf(sc->sc_dev, "0x%08X | umac data2\n", table.data2);
 	device_printf(sc->sc_dev, "0x%08X | umac data3\n", table.data3);
 	device_printf(sc->sc_dev, "0x%08X | umac major\n", table.umac_major);
 	device_printf(sc->sc_dev, "0x%08X | umac minor\n", table.umac_minor);
 	device_printf(sc->sc_dev, "0x%08X | frame pointer\n",
 	    table.frame_pointer);
 	device_printf(sc->sc_dev, "0x%08X | stack pointer\n",
 	    table.stack_pointer);
 	device_printf(sc->sc_dev, "0x%08X | last host cmd\n", table.cmd_header);
 	device_printf(sc->sc_dev, "0x%08X | isr status reg\n",
 	    table.nic_isr_pref);
 }
 
 /*
  * Support for dumping the error log seemed like a good idea ...
  * but it's mostly hex junk and the only sensible thing is the
  * hw/ucode revision (which we know anyway).  Since it's here,
  * I'll just leave it in, just in case e.g. the Intel guys want to
  * help us decipher some "ADVANCED_SYSASSERT" later.
  */
 static void
 iwm_nic_error(struct iwm_softc *sc)
 {
 	struct iwm_error_event_table table;
 	uint32_t base;
 
 	device_printf(sc->sc_dev, "dumping device error log\n");
 	base = sc->sc_uc.uc_error_event_table;
 	if (base < 0x800000) {
 		device_printf(sc->sc_dev,
 		    "Invalid error log pointer 0x%08x\n", base);
 		return;
 	}
 
 	if (iwm_read_mem(sc, base, &table, sizeof(table)/sizeof(uint32_t))) {
 		device_printf(sc->sc_dev, "reading errlog failed\n");
 		return;
 	}
 
 	if (!table.valid) {
 		device_printf(sc->sc_dev, "errlog not found, skipping\n");
 		return;
 	}
 
 	if (ERROR_START_OFFSET <= table.valid * ERROR_ELEM_SIZE) {
 		device_printf(sc->sc_dev, "Start Error Log Dump:\n");
 		device_printf(sc->sc_dev, "Status: 0x%x, count: %d\n",
 		    sc->sc_flags, table.valid);
 	}
 
 	device_printf(sc->sc_dev, "0x%08X | %-28s\n", table.error_id,
 	    iwm_desc_lookup(table.error_id));
 	device_printf(sc->sc_dev, "%08X | trm_hw_status0\n",
 	    table.trm_hw_status0);
 	device_printf(sc->sc_dev, "%08X | trm_hw_status1\n",
 	    table.trm_hw_status1);
 	device_printf(sc->sc_dev, "%08X | branchlink2\n", table.blink2);
 	device_printf(sc->sc_dev, "%08X | interruptlink1\n", table.ilink1);
 	device_printf(sc->sc_dev, "%08X | interruptlink2\n", table.ilink2);
 	device_printf(sc->sc_dev, "%08X | data1\n", table.data1);
 	device_printf(sc->sc_dev, "%08X | data2\n", table.data2);
 	device_printf(sc->sc_dev, "%08X | data3\n", table.data3);
 	device_printf(sc->sc_dev, "%08X | beacon time\n", table.bcon_time);
 	device_printf(sc->sc_dev, "%08X | tsf low\n", table.tsf_low);
 	device_printf(sc->sc_dev, "%08X | tsf hi\n", table.tsf_hi);
 	device_printf(sc->sc_dev, "%08X | time gp1\n", table.gp1);
 	device_printf(sc->sc_dev, "%08X | time gp2\n", table.gp2);
 	device_printf(sc->sc_dev, "%08X | uCode revision type\n",
 	    table.fw_rev_type);
 	device_printf(sc->sc_dev, "%08X | uCode version major\n", table.major);
 	device_printf(sc->sc_dev, "%08X | uCode version minor\n", table.minor);
 	device_printf(sc->sc_dev, "%08X | hw version\n", table.hw_ver);
 	device_printf(sc->sc_dev, "%08X | board version\n", table.brd_ver);
 	device_printf(sc->sc_dev, "%08X | hcmd\n", table.hcmd);
 	device_printf(sc->sc_dev, "%08X | isr0\n", table.isr0);
 	device_printf(sc->sc_dev, "%08X | isr1\n", table.isr1);
 	device_printf(sc->sc_dev, "%08X | isr2\n", table.isr2);
 	device_printf(sc->sc_dev, "%08X | isr3\n", table.isr3);
 	device_printf(sc->sc_dev, "%08X | isr4\n", table.isr4);
 	device_printf(sc->sc_dev, "%08X | last cmd Id\n", table.last_cmd_id);
 	device_printf(sc->sc_dev, "%08X | wait_event\n", table.wait_event);
 	device_printf(sc->sc_dev, "%08X | l2p_control\n", table.l2p_control);
 	device_printf(sc->sc_dev, "%08X | l2p_duration\n", table.l2p_duration);
 	device_printf(sc->sc_dev, "%08X | l2p_mhvalid\n", table.l2p_mhvalid);
 	device_printf(sc->sc_dev, "%08X | l2p_addr_match\n", table.l2p_addr_match);
 	device_printf(sc->sc_dev, "%08X | lmpm_pmg_sel\n", table.lmpm_pmg_sel);
 	device_printf(sc->sc_dev, "%08X | timestamp\n", table.u_timestamp);
 	device_printf(sc->sc_dev, "%08X | flow_handler\n", table.flow_handler);
 
 	if (sc->sc_uc.uc_umac_error_event_table)
 		iwm_nic_umac_error(sc);
 }
 #endif
 
 #define SYNC_RESP_STRUCT(_var_, _pkt_)					\
 do {									\
 	bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);\
 	_var_ = (void *)((_pkt_)+1);					\
 } while (/*CONSTCOND*/0)
 
 #define SYNC_RESP_PTR(_ptr_, _len_, _pkt_)				\
 do {									\
 	bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD);\
 	_ptr_ = (void *)((_pkt_)+1);					\
 } while (/*CONSTCOND*/0)
 
 #define ADVANCE_RXQ(sc) (sc->rxq.cur = (sc->rxq.cur + 1) % IWM_RX_RING_COUNT);
 
 /*
  * Process an IWM_CSR_INT_BIT_FH_RX or IWM_CSR_INT_BIT_SW_RX interrupt.
  * Basic structure from if_iwn
  */
 static void
 iwm_notif_intr(struct iwm_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	uint16_t hw;
 
 	bus_dmamap_sync(sc->rxq.stat_dma.tag, sc->rxq.stat_dma.map,
 	    BUS_DMASYNC_POSTREAD);
 
 	hw = le16toh(sc->rxq.stat->closed_rb_num) & 0xfff;
 
 	/*
 	 * Process responses
 	 */
 	while (sc->rxq.cur != hw) {
 		struct iwm_rx_ring *ring = &sc->rxq;
 		struct iwm_rx_data *data = &sc->rxq.data[sc->rxq.cur];
 		struct iwm_rx_packet *pkt;
 		struct iwm_cmd_response *cresp;
 		int qid, idx, code;
 
 		bus_dmamap_sync(sc->rxq.data_dmat, data->map,
 		    BUS_DMASYNC_POSTREAD);
 		pkt = mtod(data->m, struct iwm_rx_packet *);
 
 		qid = pkt->hdr.qid & ~0x80;
 		idx = pkt->hdr.idx;
 
 		code = IWM_WIDE_ID(pkt->hdr.flags, pkt->hdr.code);
 		IWM_DPRINTF(sc, IWM_DEBUG_INTR,
 		    "rx packet qid=%d idx=%d type=%x %d %d\n",
 		    pkt->hdr.qid & ~0x80, pkt->hdr.idx, code, sc->rxq.cur, hw);
 
 		/*
 		 * randomly get these from the firmware, no idea why.
 		 * they at least seem harmless, so just ignore them for now
 		 */
 		if (__predict_false((pkt->hdr.code == 0 && qid == 0 && idx == 0)
 		    || pkt->len_n_flags == htole32(0x55550000))) {
 			ADVANCE_RXQ(sc);
 			continue;
 		}
 
 		switch (code) {
 		case IWM_REPLY_RX_PHY_CMD:
 			iwm_mvm_rx_rx_phy_cmd(sc, pkt, data);
 			break;
 
 		case IWM_REPLY_RX_MPDU_CMD:
 			iwm_mvm_rx_rx_mpdu(sc, pkt, data);
 			break;
 
 		case IWM_TX_CMD:
 			iwm_mvm_rx_tx_cmd(sc, pkt, data);
 			break;
 
 		case IWM_MISSED_BEACONS_NOTIFICATION: {
 			struct iwm_missed_beacons_notif *resp;
 			int missed;
 
 			/* XXX look at mac_id to determine interface ID */
 			struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 			SYNC_RESP_STRUCT(resp, pkt);
 			missed = le32toh(resp->consec_missed_beacons);
 
 			IWM_DPRINTF(sc, IWM_DEBUG_BEACON | IWM_DEBUG_STATE,
 			    "%s: MISSED_BEACON: mac_id=%d, "
 			    "consec_since_last_rx=%d, consec=%d, num_expect=%d "
 			    "num_rx=%d\n",
 			    __func__,
 			    le32toh(resp->mac_id),
 			    le32toh(resp->consec_missed_beacons_since_last_rx),
 			    le32toh(resp->consec_missed_beacons),
 			    le32toh(resp->num_expected_beacons),
 			    le32toh(resp->num_recvd_beacons));
 
 			/* Be paranoid */
 			if (vap == NULL)
 				break;
 
 			/* XXX no net80211 locking? */
 			if (vap->iv_state == IEEE80211_S_RUN &&
 			    (ic->ic_flags & IEEE80211_F_SCAN) == 0) {
 				if (missed > vap->iv_bmissthreshold) {
 					/* XXX bad locking; turn into task */
 					IWM_UNLOCK(sc);
 					ieee80211_beacon_miss(ic);
 					IWM_LOCK(sc);
 				}
 			}
 
 			break; }
 
 		case IWM_MFUART_LOAD_NOTIFICATION:
 			break;
 
 		case IWM_MVM_ALIVE: {
 			struct iwm_mvm_alive_resp_v1 *resp1;
 			struct iwm_mvm_alive_resp_v2 *resp2;
 			struct iwm_mvm_alive_resp_v3 *resp3;
 
 			if (iwm_rx_packet_payload_len(pkt) == sizeof(*resp1)) {
 				SYNC_RESP_STRUCT(resp1, pkt);
 				sc->sc_uc.uc_error_event_table
 				    = le32toh(resp1->error_event_table_ptr);
 				sc->sc_uc.uc_log_event_table
 				    = le32toh(resp1->log_event_table_ptr);
 				sc->sched_base = le32toh(resp1->scd_base_ptr);
 				if (resp1->status == IWM_ALIVE_STATUS_OK)
 					sc->sc_uc.uc_ok = 1;
 				else
 					sc->sc_uc.uc_ok = 0;
 			}
 
 			if (iwm_rx_packet_payload_len(pkt) == sizeof(*resp2)) {
 				SYNC_RESP_STRUCT(resp2, pkt);
 				sc->sc_uc.uc_error_event_table
 				    = le32toh(resp2->error_event_table_ptr);
 				sc->sc_uc.uc_log_event_table
 				    = le32toh(resp2->log_event_table_ptr);
 				sc->sched_base = le32toh(resp2->scd_base_ptr);
 				sc->sc_uc.uc_umac_error_event_table
 				    = le32toh(resp2->error_info_addr);
 				if (resp2->status == IWM_ALIVE_STATUS_OK)
 					sc->sc_uc.uc_ok = 1;
 				else
 					sc->sc_uc.uc_ok = 0;
 			}
 
 			if (iwm_rx_packet_payload_len(pkt) == sizeof(*resp3)) {
 				SYNC_RESP_STRUCT(resp3, pkt);
 				sc->sc_uc.uc_error_event_table
 				    = le32toh(resp3->error_event_table_ptr);
 				sc->sc_uc.uc_log_event_table
 				    = le32toh(resp3->log_event_table_ptr);
 				sc->sched_base = le32toh(resp3->scd_base_ptr);
 				sc->sc_uc.uc_umac_error_event_table
 				    = le32toh(resp3->error_info_addr);
 				if (resp3->status == IWM_ALIVE_STATUS_OK)
 					sc->sc_uc.uc_ok = 1;
 				else
 					sc->sc_uc.uc_ok = 0;
 			}
 
 			sc->sc_uc.uc_intr = 1;
 			wakeup(&sc->sc_uc);
 			break; }
 
 		case IWM_CALIB_RES_NOTIF_PHY_DB: {
 			struct iwm_calib_res_notif_phy_db *phy_db_notif;
 			SYNC_RESP_STRUCT(phy_db_notif, pkt);
 
 			iwm_phy_db_set_section(sc, phy_db_notif);
 
 			break; }
 
 		case IWM_STATISTICS_NOTIFICATION: {
 			struct iwm_notif_statistics *stats;
 			SYNC_RESP_STRUCT(stats, pkt);
 			memcpy(&sc->sc_stats, stats, sizeof(sc->sc_stats));
 			sc->sc_noise = iwm_get_noise(&stats->rx.general);
 			break; }
 
 		case IWM_NVM_ACCESS_CMD:
 		case IWM_MCC_UPDATE_CMD:
 			if (sc->sc_wantresp == ((qid << 16) | idx)) {
 				bus_dmamap_sync(sc->rxq.data_dmat, data->map,
 				    BUS_DMASYNC_POSTREAD);
 				memcpy(sc->sc_cmd_resp,
 				    pkt, sizeof(sc->sc_cmd_resp));
 			}
 			break;
 
 		case IWM_MCC_CHUB_UPDATE_CMD: {
 			struct iwm_mcc_chub_notif *notif;
 			SYNC_RESP_STRUCT(notif, pkt);
 
 			sc->sc_fw_mcc[0] = (notif->mcc & 0xff00) >> 8;
 			sc->sc_fw_mcc[1] = notif->mcc & 0xff;
 			sc->sc_fw_mcc[2] = '\0';
 			IWM_DPRINTF(sc, IWM_DEBUG_RESET,
 			    "fw source %d sent CC '%s'\n",
 			    notif->source_id, sc->sc_fw_mcc);
 			break; }
 
 		case IWM_DTS_MEASUREMENT_NOTIFICATION:
 			break;
 
 		case IWM_PHY_CONFIGURATION_CMD:
 		case IWM_TX_ANT_CONFIGURATION_CMD:
 		case IWM_ADD_STA:
 		case IWM_MAC_CONTEXT_CMD:
 		case IWM_REPLY_SF_CFG_CMD:
 		case IWM_POWER_TABLE_CMD:
 		case IWM_PHY_CONTEXT_CMD:
 		case IWM_BINDING_CONTEXT_CMD:
 		case IWM_TIME_EVENT_CMD:
 		case IWM_SCAN_REQUEST_CMD:
 		case IWM_WIDE_ID(IWM_ALWAYS_LONG_GROUP, IWM_SCAN_CFG_CMD):
 		case IWM_WIDE_ID(IWM_ALWAYS_LONG_GROUP, IWM_SCAN_REQ_UMAC):
 		case IWM_SCAN_OFFLOAD_REQUEST_CMD:
 		case IWM_REPLY_BEACON_FILTERING_CMD:
 		case IWM_MAC_PM_POWER_TABLE:
 		case IWM_TIME_QUOTA_CMD:
 		case IWM_REMOVE_STA:
 		case IWM_TXPATH_FLUSH:
 		case IWM_LQ_CMD:
 		case IWM_BT_CONFIG:
 		case IWM_REPLY_THERMAL_MNG_BACKOFF:
 			SYNC_RESP_STRUCT(cresp, pkt);
 			if (sc->sc_wantresp == ((qid << 16) | idx)) {
 				memcpy(sc->sc_cmd_resp,
 				    pkt, sizeof(*pkt)+sizeof(*cresp));
 			}
 			break;
 
 		/* ignore */
 		case 0x6c: /* IWM_PHY_DB_CMD, no idea why it's not in fw-api.h */
 			break;
 
 		case IWM_INIT_COMPLETE_NOTIF:
 			sc->sc_init_complete = 1;
 			wakeup(&sc->sc_init_complete);
 			break;
 
 		case IWM_SCAN_OFFLOAD_COMPLETE: {
 			struct iwm_periodic_scan_complete *notif;
 			SYNC_RESP_STRUCT(notif, pkt);
 			break;
 		}
 
 		case IWM_SCAN_ITERATION_COMPLETE: {
 			struct iwm_lmac_scan_complete_notif *notif;
  			SYNC_RESP_STRUCT(notif, pkt);
 			ieee80211_runtask(&sc->sc_ic, &sc->sc_es_task);
  			break;
 		}
  
 		case IWM_SCAN_COMPLETE_UMAC: {
 			struct iwm_umac_scan_complete *notif;
 			SYNC_RESP_STRUCT(notif, pkt);
 
 			IWM_DPRINTF(sc, IWM_DEBUG_SCAN,
 			    "UMAC scan complete, status=0x%x\n",
 			    notif->status);
 #if 0	/* XXX This would be a duplicate scan end call */
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_es_task);
 #endif
 			break;
 		}
 
 		case IWM_SCAN_ITERATION_COMPLETE_UMAC: {
 			struct iwm_umac_scan_iter_complete_notif *notif;
 			SYNC_RESP_STRUCT(notif, pkt);
 
 			IWM_DPRINTF(sc, IWM_DEBUG_SCAN, "UMAC scan iteration "
 			    "complete, status=0x%x, %d channels scanned\n",
 			    notif->status, notif->scanned_channels);
 			ieee80211_runtask(&sc->sc_ic, &sc->sc_es_task);
 			break;
 		}
 
 		case IWM_REPLY_ERROR: {
 			struct iwm_error_resp *resp;
 			SYNC_RESP_STRUCT(resp, pkt);
 
 			device_printf(sc->sc_dev,
 			    "firmware error 0x%x, cmd 0x%x\n",
 			    le32toh(resp->error_type),
 			    resp->cmd_id);
 			break;
 		}
 
 		case IWM_TIME_EVENT_NOTIFICATION: {
 			struct iwm_time_event_notif *notif;
 			SYNC_RESP_STRUCT(notif, pkt);
 
 			IWM_DPRINTF(sc, IWM_DEBUG_INTR,
 			    "TE notif status = 0x%x action = 0x%x\n",
 			    notif->status, notif->action);
 			break;
 		}
 
 		case IWM_MCAST_FILTER_CMD:
 			break;
 
 		case IWM_SCD_QUEUE_CFG: {
 			struct iwm_scd_txq_cfg_rsp *rsp;
 			SYNC_RESP_STRUCT(rsp, pkt);
 
 			IWM_DPRINTF(sc, IWM_DEBUG_CMD,
 			    "queue cfg token=0x%x sta_id=%d "
 			    "tid=%d scd_queue=%d\n",
 			    rsp->token, rsp->sta_id, rsp->tid,
 			    rsp->scd_queue);
 			break;
 		}
 
 		default:
 			device_printf(sc->sc_dev,
 			    "frame %d/%d %x UNHANDLED (this should "
 			    "not happen)\n", qid, idx,
 			    pkt->len_n_flags);
 			break;
 		}
 
 		/*
 		 * Why test bit 0x80?  The Linux driver:
 		 *
 		 * There is one exception:  uCode sets bit 15 when it
 		 * originates the response/notification, i.e. when the
 		 * response/notification is not a direct response to a
 		 * command sent by the driver.  For example, uCode issues
 		 * IWM_REPLY_RX when it sends a received frame to the driver;
 		 * it is not a direct response to any driver command.
 		 *
 		 * Ok, so since when is 7 == 15?  Well, the Linux driver
 		 * uses a slightly different format for pkt->hdr, and "qid"
 		 * is actually the upper byte of a two-byte field.
 		 */
 		if (!(pkt->hdr.qid & (1 << 7))) {
 			iwm_cmd_done(sc, pkt);
 		}
 
 		ADVANCE_RXQ(sc);
 	}
 
 	IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL,
 	    IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
 
 	/*
 	 * Tell the firmware what we have processed.
 	 * Seems like the hardware gets upset unless we align
 	 * the write by 8??
 	 */
 	hw = (hw == 0) ? IWM_RX_RING_COUNT - 1 : hw - 1;
 	IWM_WRITE(sc, IWM_FH_RSCSR_CHNL0_WPTR, hw & ~7);
 }
 
 static void
 iwm_intr(void *arg)
 {
 	struct iwm_softc *sc = arg;
 	int handled = 0;
 	int r1, r2, rv = 0;
 	int isperiodic = 0;
 
 	IWM_LOCK(sc);
 	IWM_WRITE(sc, IWM_CSR_INT_MASK, 0);
 
 	if (sc->sc_flags & IWM_FLAG_USE_ICT) {
 		uint32_t *ict = sc->ict_dma.vaddr;
 		int tmp;
 
 		tmp = htole32(ict[sc->ict_cur]);
 		if (!tmp)
 			goto out_ena;
 
 		/*
 		 * ok, there was something.  keep plowing until we have all.
 		 */
 		r1 = r2 = 0;
 		while (tmp) {
 			r1 |= tmp;
 			ict[sc->ict_cur] = 0;
 			sc->ict_cur = (sc->ict_cur+1) % IWM_ICT_COUNT;
 			tmp = htole32(ict[sc->ict_cur]);
 		}
 
 		/* this is where the fun begins.  don't ask */
 		if (r1 == 0xffffffff)
 			r1 = 0;
 
 		/* i am not expected to understand this */
 		if (r1 & 0xc0000)
 			r1 |= 0x8000;
 		r1 = (0xff & r1) | ((0xff00 & r1) << 16);
 	} else {
 		r1 = IWM_READ(sc, IWM_CSR_INT);
 		/* "hardware gone" (where, fishing?) */
 		if (r1 == 0xffffffff || (r1 & 0xfffffff0) == 0xa5a5a5a0)
 			goto out;
 		r2 = IWM_READ(sc, IWM_CSR_FH_INT_STATUS);
 	}
 	if (r1 == 0 && r2 == 0) {
 		goto out_ena;
 	}
 
 	IWM_WRITE(sc, IWM_CSR_INT, r1 | ~sc->sc_intmask);
 
 	/* ignored */
 	handled |= (r1 & (IWM_CSR_INT_BIT_ALIVE /*| IWM_CSR_INT_BIT_SCD*/));
 
 	if (r1 & IWM_CSR_INT_BIT_SW_ERR) {
 		int i;
 		struct ieee80211com *ic = &sc->sc_ic;
 		struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 #ifdef IWM_DEBUG
 		iwm_nic_error(sc);
 #endif
 		/* Dump driver status (TX and RX rings) while we're here. */
 		device_printf(sc->sc_dev, "driver status:\n");
 		for (i = 0; i < IWM_MVM_MAX_QUEUES; i++) {
 			struct iwm_tx_ring *ring = &sc->txq[i];
 			device_printf(sc->sc_dev,
 			    "  tx ring %2d: qid=%-2d cur=%-3d "
 			    "queued=%-3d\n",
 			    i, ring->qid, ring->cur, ring->queued);
 		}
 		device_printf(sc->sc_dev,
 		    "  rx ring: cur=%d\n", sc->rxq.cur);
 		device_printf(sc->sc_dev,
 		    "  802.11 state %d\n", (vap == NULL) ? -1 : vap->iv_state);
 
 		/* Don't stop the device; just do a VAP restart */
 		IWM_UNLOCK(sc);
 
 		if (vap == NULL) {
 			printf("%s: null vap\n", __func__);
 			return;
 		}
 
 		device_printf(sc->sc_dev, "%s: controller panicked, iv_state = %d; "
 		    "restarting\n", __func__, vap->iv_state);
 
 		/* XXX TODO: turn this into a callout/taskqueue */
 		ieee80211_restart_all(ic);
 		return;
 	}
 
 	if (r1 & IWM_CSR_INT_BIT_HW_ERR) {
 		handled |= IWM_CSR_INT_BIT_HW_ERR;
 		device_printf(sc->sc_dev, "hardware error, stopping device\n");
 		iwm_stop(sc);
 		rv = 1;
 		goto out;
 	}
 
 	/* firmware chunk loaded */
 	if (r1 & IWM_CSR_INT_BIT_FH_TX) {
 		IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, IWM_CSR_FH_INT_TX_MASK);
 		handled |= IWM_CSR_INT_BIT_FH_TX;
 		sc->sc_fw_chunk_done = 1;
 		wakeup(&sc->sc_fw);
 	}
 
 	if (r1 & IWM_CSR_INT_BIT_RF_KILL) {
 		handled |= IWM_CSR_INT_BIT_RF_KILL;
 		if (iwm_check_rfkill(sc)) {
 			device_printf(sc->sc_dev,
 			    "%s: rfkill switch, disabling interface\n",
 			    __func__);
 			iwm_stop(sc);
 		}
 	}
 
 	/*
 	 * The Linux driver uses periodic interrupts to avoid races.
 	 * We cargo-cult like it's going out of fashion.
 	 */
 	if (r1 & IWM_CSR_INT_BIT_RX_PERIODIC) {
 		handled |= IWM_CSR_INT_BIT_RX_PERIODIC;
 		IWM_WRITE(sc, IWM_CSR_INT, IWM_CSR_INT_BIT_RX_PERIODIC);
 		if ((r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX)) == 0)
 			IWM_WRITE_1(sc,
 			    IWM_CSR_INT_PERIODIC_REG, IWM_CSR_INT_PERIODIC_DIS);
 		isperiodic = 1;
 	}
 
 	if ((r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX)) || isperiodic) {
 		handled |= (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX);
 		IWM_WRITE(sc, IWM_CSR_FH_INT_STATUS, IWM_CSR_FH_INT_RX_MASK);
 
 		iwm_notif_intr(sc);
 
 		/* enable periodic interrupt, see above */
 		if (r1 & (IWM_CSR_INT_BIT_FH_RX | IWM_CSR_INT_BIT_SW_RX) && !isperiodic)
 			IWM_WRITE_1(sc, IWM_CSR_INT_PERIODIC_REG,
 			    IWM_CSR_INT_PERIODIC_ENA);
 	}
 
 	if (__predict_false(r1 & ~handled))
 		IWM_DPRINTF(sc, IWM_DEBUG_INTR,
 		    "%s: unhandled interrupts: %x\n", __func__, r1);
 	rv = 1;
 
  out_ena:
 	iwm_restore_interrupts(sc);
  out:
 	IWM_UNLOCK(sc);
 	return;
 }
 
 /*
  * Autoconf glue-sniffing
  */
 #define	PCI_VENDOR_INTEL		0x8086
 #define	PCI_PRODUCT_INTEL_WL_3160_1	0x08b3
 #define	PCI_PRODUCT_INTEL_WL_3160_2	0x08b4
 #define	PCI_PRODUCT_INTEL_WL_3165_1	0x3165
 #define	PCI_PRODUCT_INTEL_WL_3165_2	0x3166
 #define	PCI_PRODUCT_INTEL_WL_7260_1	0x08b1
 #define	PCI_PRODUCT_INTEL_WL_7260_2	0x08b2
 #define	PCI_PRODUCT_INTEL_WL_7265_1	0x095a
 #define	PCI_PRODUCT_INTEL_WL_7265_2	0x095b
 #define	PCI_PRODUCT_INTEL_WL_8260_1	0x24f3
 #define	PCI_PRODUCT_INTEL_WL_8260_2	0x24f4
 
 static const struct iwm_devices {
 	uint16_t	device;
 	const char	*name;
 } iwm_devices[] = {
 	{ PCI_PRODUCT_INTEL_WL_3160_1, "Intel Dual Band Wireless AC 3160" },
 	{ PCI_PRODUCT_INTEL_WL_3160_2, "Intel Dual Band Wireless AC 3160" },
 	{ PCI_PRODUCT_INTEL_WL_3165_1, "Intel Dual Band Wireless AC 3165" },
 	{ PCI_PRODUCT_INTEL_WL_3165_2, "Intel Dual Band Wireless AC 3165" },
 	{ PCI_PRODUCT_INTEL_WL_7260_1, "Intel Dual Band Wireless AC 7260" },
 	{ PCI_PRODUCT_INTEL_WL_7260_2, "Intel Dual Band Wireless AC 7260" },
 	{ PCI_PRODUCT_INTEL_WL_7265_1, "Intel Dual Band Wireless AC 7265" },
 	{ PCI_PRODUCT_INTEL_WL_7265_2, "Intel Dual Band Wireless AC 7265" },
 	{ PCI_PRODUCT_INTEL_WL_8260_1, "Intel Dual Band Wireless AC 8260" },
 	{ PCI_PRODUCT_INTEL_WL_8260_2, "Intel Dual Band Wireless AC 8260" },
 };
 
 static int
 iwm_probe(device_t dev)
 {
 	int i;
 
 	for (i = 0; i < nitems(iwm_devices); i++) {
 		if (pci_get_vendor(dev) == PCI_VENDOR_INTEL &&
 		    pci_get_device(dev) == iwm_devices[i].device) {
 			device_set_desc(dev, iwm_devices[i].name);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 
 	return (ENXIO);
 }
 
 static int
 iwm_dev_check(device_t dev)
 {
 	struct iwm_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	sc->sc_hw_rev = IWM_READ(sc, IWM_CSR_HW_REV);
 	switch (pci_get_device(dev)) {
 	case PCI_PRODUCT_INTEL_WL_3160_1:
 	case PCI_PRODUCT_INTEL_WL_3160_2:
 		sc->sc_fwname = "iwm3160fw";
 		sc->host_interrupt_operation_mode = 1;
 		sc->sc_device_family = IWM_DEVICE_FAMILY_7000;
 		sc->sc_fwdmasegsz = IWM_FWDMASEGSZ;
 		return (0);
 	case PCI_PRODUCT_INTEL_WL_3165_1:
 	case PCI_PRODUCT_INTEL_WL_3165_2:
 		sc->sc_fwname = "iwm7265fw";
 		sc->host_interrupt_operation_mode = 0;
 		sc->sc_device_family = IWM_DEVICE_FAMILY_7000;
 		sc->sc_fwdmasegsz = IWM_FWDMASEGSZ;
 		return (0);
 	case PCI_PRODUCT_INTEL_WL_7260_1:
 	case PCI_PRODUCT_INTEL_WL_7260_2:
 		sc->sc_fwname = "iwm7260fw";
 		sc->host_interrupt_operation_mode = 1;
 		sc->sc_device_family = IWM_DEVICE_FAMILY_7000;
 		sc->sc_fwdmasegsz = IWM_FWDMASEGSZ;
 		return (0);
 	case PCI_PRODUCT_INTEL_WL_7265_1:
 	case PCI_PRODUCT_INTEL_WL_7265_2:
 		sc->sc_fwname = "iwm7265fw";
 		sc->host_interrupt_operation_mode = 0;
 		sc->sc_device_family = IWM_DEVICE_FAMILY_7000;
 		sc->sc_fwdmasegsz = IWM_FWDMASEGSZ;
 		return (0);
 	case PCI_PRODUCT_INTEL_WL_8260_1:
 	case PCI_PRODUCT_INTEL_WL_8260_2:
 		sc->sc_fwname = "iwm8000Cfw";
 		sc->host_interrupt_operation_mode = 0;
 		sc->sc_device_family = IWM_DEVICE_FAMILY_8000;
 		sc->sc_fwdmasegsz = IWM_FWDMASEGSZ_8000;
 		return (0);
 	default:
 		device_printf(dev, "unknown adapter type\n");
 		return ENXIO;
 	}
 }
 
 static int
 iwm_pci_attach(device_t dev)
 {
 	struct iwm_softc *sc;
 	int count, error, rid;
 	uint16_t reg;
 
 	sc = device_get_softc(dev);
 
 	/* Clear device-specific "PCI retry timeout" register (41h). */
 	reg = pci_read_config(dev, 0x40, sizeof(reg));
 	pci_write_config(dev, 0x40, reg & ~0xff00, sizeof(reg));
 
 	/* Enable bus-mastering and hardware bug workaround. */
 	pci_enable_busmaster(dev);
 	reg = pci_read_config(dev, PCIR_STATUS, sizeof(reg));
 	/* if !MSI */
 	if (reg & PCIM_STATUS_INTxSTATE) {
 		reg &= ~PCIM_STATUS_INTxSTATE;
 	}
 	pci_write_config(dev, PCIR_STATUS, reg, sizeof(reg));
 
 	rid = PCIR_BAR(0);
 	sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->sc_mem == NULL) {
 		device_printf(sc->sc_dev, "can't map mem space\n");
 		return (ENXIO);
 	}
 	sc->sc_st = rman_get_bustag(sc->sc_mem);
 	sc->sc_sh = rman_get_bushandle(sc->sc_mem);
 
 	/* Install interrupt handler. */
 	count = 1;
 	rid = 0;
 	if (pci_alloc_msi(dev, &count) == 0)
 		rid = 1;
 	sc->sc_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE |
 	    (rid != 0 ? 0 : RF_SHAREABLE));
 	if (sc->sc_irq == NULL) {
 		device_printf(dev, "can't map interrupt\n");
 			return (ENXIO);
 	}
 	error = bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET | INTR_MPSAFE,
 	    NULL, iwm_intr, sc, &sc->sc_ih);
 	if (sc->sc_ih == NULL) {
 		device_printf(dev, "can't establish interrupt");
 			return (ENXIO);
 	}
 	sc->sc_dmat = bus_get_dma_tag(sc->sc_dev);
 
 	return (0);
 }
 
 static void
 iwm_pci_detach(device_t dev)
 {
 	struct iwm_softc *sc = device_get_softc(dev);
 
 	if (sc->sc_irq != NULL) {
 		bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih);
 		bus_release_resource(dev, SYS_RES_IRQ,
 		    rman_get_rid(sc->sc_irq), sc->sc_irq);
 		pci_release_msi(dev);
         }
 	if (sc->sc_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    rman_get_rid(sc->sc_mem), sc->sc_mem);
 }
 
 
 
 static int
 iwm_attach(device_t dev)
 {
 	struct iwm_softc *sc = device_get_softc(dev);
 	struct ieee80211com *ic = &sc->sc_ic;
 	int error;
 	int txq_i, i;
 
 	sc->sc_dev = dev;
 	IWM_LOCK_INIT(sc);
 	mbufq_init(&sc->sc_snd, ifqmaxlen);
 	callout_init_mtx(&sc->sc_watchdog_to, &sc->sc_mtx, 0);
 	callout_init_mtx(&sc->sc_led_blink_to, &sc->sc_mtx, 0);
 	TASK_INIT(&sc->sc_es_task, 0, iwm_endscan_cb, sc);
 
 	/* PCI attach */
 	error = iwm_pci_attach(dev);
 	if (error != 0)
 		goto fail;
 
 	sc->sc_wantresp = -1;
 
 	/* Check device type */
 	error = iwm_dev_check(dev);
 	if (error != 0)
 		goto fail;
 
 	/*
 	 * We now start fiddling with the hardware
 	 */
 	/*
 	 * In the 8000 HW family the format of the 4 bytes of CSR_HW_REV have
 	 * changed, and now the revision step also includes bit 0-1 (no more
 	 * "dash" value). To keep hw_rev backwards compatible - we'll store it
 	 * in the old format.
 	 */
 	if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000)
 		sc->sc_hw_rev = (sc->sc_hw_rev & 0xfff0) |
 				(IWM_CSR_HW_REV_STEP(sc->sc_hw_rev << 2) << 2);
 
 	if (iwm_prepare_card_hw(sc) != 0) {
 		device_printf(dev, "could not initialize hardware\n");
 		goto fail;
 	}
 
 	if (sc->sc_device_family == IWM_DEVICE_FAMILY_8000) {
 		int ret;
 		uint32_t hw_step;
 
 		/*
 		 * In order to recognize C step the driver should read the
 		 * chip version id located at the AUX bus MISC address.
 		 */
 		IWM_SETBITS(sc, IWM_CSR_GP_CNTRL,
 			    IWM_CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
 		DELAY(2);
 
 		ret = iwm_poll_bit(sc, IWM_CSR_GP_CNTRL,
 				   IWM_CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
 				   IWM_CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY,
 				   25000);
 		if (ret < 0) {
 			device_printf(sc->sc_dev,
 			    "Failed to wake up the nic\n");
 			goto fail;
 		}
 
 		if (iwm_nic_lock(sc)) {
 			hw_step = iwm_read_prph(sc, IWM_WFPM_CTRL_REG);
 			hw_step |= IWM_ENABLE_WFPM;
 			iwm_write_prph(sc, IWM_WFPM_CTRL_REG, hw_step);
 			hw_step = iwm_read_prph(sc, IWM_AUX_MISC_REG);
 			hw_step = (hw_step >> IWM_HW_STEP_LOCATION_BITS) & 0xF;
 			if (hw_step == 0x3)
 				sc->sc_hw_rev = (sc->sc_hw_rev & 0xFFFFFFF3) |
 						(IWM_SILICON_C_STEP << 2);
 			iwm_nic_unlock(sc);
 		} else {
 			device_printf(sc->sc_dev, "Failed to lock the nic\n");
 			goto fail;
 		}
 	}
 
 	/* Allocate DMA memory for firmware transfers. */
 	if ((error = iwm_alloc_fwmem(sc)) != 0) {
 		device_printf(dev, "could not allocate memory for firmware\n");
 		goto fail;
 	}
 
 	/* Allocate "Keep Warm" page. */
 	if ((error = iwm_alloc_kw(sc)) != 0) {
 		device_printf(dev, "could not allocate keep warm page\n");
 		goto fail;
 	}
 
 	/* We use ICT interrupts */
 	if ((error = iwm_alloc_ict(sc)) != 0) {
 		device_printf(dev, "could not allocate ICT table\n");
 		goto fail;
 	}
 
 	/* Allocate TX scheduler "rings". */
 	if ((error = iwm_alloc_sched(sc)) != 0) {
 		device_printf(dev, "could not allocate TX scheduler rings\n");
 		goto fail;
 	}
 
 	/* Allocate TX rings */
 	for (txq_i = 0; txq_i < nitems(sc->txq); txq_i++) {
 		if ((error = iwm_alloc_tx_ring(sc,
 		    &sc->txq[txq_i], txq_i)) != 0) {
 			device_printf(dev,
 			    "could not allocate TX ring %d\n",
 			    txq_i);
 			goto fail;
 		}
 	}
 
 	/* Allocate RX ring. */
 	if ((error = iwm_alloc_rx_ring(sc, &sc->rxq)) != 0) {
 		device_printf(dev, "could not allocate RX ring\n");
 		goto fail;
 	}
 
 	/* Clear pending interrupts. */
 	IWM_WRITE(sc, IWM_CSR_INT, 0xffffffff);
 
 	ic->ic_softc = sc;
 	ic->ic_name = device_get_nameunit(sc->sc_dev);
 	ic->ic_phytype = IEEE80211_T_OFDM;	/* not only, but not used */
 	ic->ic_opmode = IEEE80211_M_STA;	/* default to BSS mode */
 
 	/* Set device capabilities. */
 	ic->ic_caps =
 	    IEEE80211_C_STA |
 	    IEEE80211_C_WPA |		/* WPA/RSN */
 	    IEEE80211_C_WME |
 	    IEEE80211_C_SHSLOT |	/* short slot time supported */
 	    IEEE80211_C_SHPREAMBLE	/* short preamble supported */
 //	    IEEE80211_C_BGSCAN		/* capable of bg scanning */
 	    ;
 	for (i = 0; i < nitems(sc->sc_phyctxt); i++) {
 		sc->sc_phyctxt[i].id = i;
 		sc->sc_phyctxt[i].color = 0;
 		sc->sc_phyctxt[i].ref = 0;
 		sc->sc_phyctxt[i].channel = NULL;
 	}
 
 	/* Max RSSI */
 	sc->sc_max_rssi = IWM_MAX_DBM - IWM_MIN_DBM;
 	sc->sc_preinit_hook.ich_func = iwm_preinit;
 	sc->sc_preinit_hook.ich_arg = sc;
 	if (config_intrhook_establish(&sc->sc_preinit_hook) != 0) {
 		device_printf(dev, "config_intrhook_establish failed\n");
 		goto fail;
 	}
 
 #ifdef IWM_DEBUG
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "debug",
 	    CTLFLAG_RW, &sc->sc_debug, 0, "control debugging");
 #endif
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE,
 	    "<-%s\n", __func__);
 
 	return 0;
 
 	/* Free allocated memory if something failed during attachment. */
 fail:
 	iwm_detach_local(sc, 0);
 
 	return ENXIO;
 }
 
 static int
 iwm_is_valid_ether_addr(uint8_t *addr)
 {
 	char zero_addr[IEEE80211_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
 
 	if ((addr[0] & 1) || IEEE80211_ADDR_EQ(zero_addr, addr))
 		return (FALSE);
 
 	return (TRUE);
 }
 
 static int
 iwm_update_edca(struct ieee80211com *ic)
 {
 	struct iwm_softc *sc = ic->ic_softc;
 
 	device_printf(sc->sc_dev, "%s: called\n", __func__);
 	return (0);
 }
 
 static void
 iwm_preinit(void *arg)
 {
 	struct iwm_softc *sc = arg;
 	device_t dev = sc->sc_dev;
 	struct ieee80211com *ic = &sc->sc_ic;
 	int error;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE,
 	    "->%s\n", __func__);
 
 	IWM_LOCK(sc);
 	if ((error = iwm_start_hw(sc)) != 0) {
 		device_printf(dev, "could not initialize hardware\n");
 		IWM_UNLOCK(sc);
 		goto fail;
 	}
 
 	error = iwm_run_init_mvm_ucode(sc, 1);
 	iwm_stop_device(sc);
 	if (error) {
 		IWM_UNLOCK(sc);
 		goto fail;
 	}
 	device_printf(dev,
 	    "hw rev 0x%x, fw ver %s, address %s\n",
 	    sc->sc_hw_rev & IWM_CSR_HW_REV_TYPE_MSK,
 	    sc->sc_fwver, ether_sprintf(sc->sc_nvm.hw_addr));
 
 	/* not all hardware can do 5GHz band */
 	if (!sc->sc_nvm.sku_cap_band_52GHz_enable)
 		memset(&ic->ic_sup_rates[IEEE80211_MODE_11A], 0,
 		    sizeof(ic->ic_sup_rates[IEEE80211_MODE_11A]));
 	IWM_UNLOCK(sc);
 
 	iwm_init_channel_map(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans,
 	    ic->ic_channels);
 
 	/*
 	 * At this point we've committed - if we fail to do setup,
 	 * we now also have to tear down the net80211 state.
 	 */
 	ieee80211_ifattach(ic);
 	ic->ic_vap_create = iwm_vap_create;
 	ic->ic_vap_delete = iwm_vap_delete;
 	ic->ic_raw_xmit = iwm_raw_xmit;
 	ic->ic_node_alloc = iwm_node_alloc;
 	ic->ic_scan_start = iwm_scan_start;
 	ic->ic_scan_end = iwm_scan_end;
 	ic->ic_update_mcast = iwm_update_mcast;
 	ic->ic_getradiocaps = iwm_init_channel_map;
 	ic->ic_set_channel = iwm_set_channel;
 	ic->ic_scan_curchan = iwm_scan_curchan;
 	ic->ic_scan_mindwell = iwm_scan_mindwell;
 	ic->ic_wme.wme_update = iwm_update_edca;
 	ic->ic_parent = iwm_parent;
 	ic->ic_transmit = iwm_transmit;
 	iwm_radiotap_attach(sc);
 	if (bootverbose)
 		ieee80211_announce(ic);
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE,
 	    "<-%s\n", __func__);
 	config_intrhook_disestablish(&sc->sc_preinit_hook);
 
 	return;
 fail:
 	config_intrhook_disestablish(&sc->sc_preinit_hook);
 	iwm_detach_local(sc, 0);
 }
 
 /*
  * Attach the interface to 802.11 radiotap.
  */
 static void
 iwm_radiotap_attach(struct iwm_softc *sc)
 {
         struct ieee80211com *ic = &sc->sc_ic;
 
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE,
 	    "->%s begin\n", __func__);
         ieee80211_radiotap_attach(ic,
             &sc->sc_txtap.wt_ihdr, sizeof(sc->sc_txtap),
                 IWM_TX_RADIOTAP_PRESENT,
             &sc->sc_rxtap.wr_ihdr, sizeof(sc->sc_rxtap),
                 IWM_RX_RADIOTAP_PRESENT);
 	IWM_DPRINTF(sc, IWM_DEBUG_RESET | IWM_DEBUG_TRACE,
 	    "->%s end\n", __func__);
 }
 
 static struct ieee80211vap *
 iwm_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit,
     enum ieee80211_opmode opmode, int flags,
     const uint8_t bssid[IEEE80211_ADDR_LEN],
     const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct iwm_vap *ivp;
 	struct ieee80211vap *vap;
 
 	if (!TAILQ_EMPTY(&ic->ic_vaps))         /* only one at a time */
 		return NULL;
 	ivp = malloc(sizeof(struct iwm_vap), M_80211_VAP, M_WAITOK | M_ZERO);
 	vap = &ivp->iv_vap;
 	ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid);
 	vap->iv_bmissthreshold = 10;            /* override default */
 	/* Override with driver methods. */
 	ivp->iv_newstate = vap->iv_newstate;
 	vap->iv_newstate = iwm_newstate;
 
 	ieee80211_ratectl_init(vap);
 	/* Complete setup. */
 	ieee80211_vap_attach(vap, iwm_media_change, ieee80211_media_status,
 	    mac);
 	ic->ic_opmode = opmode;
 
 	return vap;
 }
 
 static void
 iwm_vap_delete(struct ieee80211vap *vap)
 {
 	struct iwm_vap *ivp = IWM_VAP(vap);
 
 	ieee80211_ratectl_deinit(vap);
 	ieee80211_vap_detach(vap);
 	free(ivp, M_80211_VAP);
 }
 
 static void
 iwm_scan_start(struct ieee80211com *ic)
 {
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct iwm_softc *sc = ic->ic_softc;
 	int error;
 
 	IWM_LOCK(sc);
 	if (isset(sc->sc_enabled_capa, IWM_UCODE_TLV_CAPA_UMAC_SCAN))
 		error = iwm_mvm_umac_scan(sc);
 	else
 		error = iwm_mvm_lmac_scan(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not initiate 2 GHz scan\n");
 		IWM_UNLOCK(sc);
 		ieee80211_cancel_scan(vap);
 	} else {
 		iwm_led_blink_start(sc);
 		IWM_UNLOCK(sc);
 	}
 }
 
 static void
 iwm_scan_end(struct ieee80211com *ic)
 {
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct iwm_softc *sc = ic->ic_softc;
 
 	IWM_LOCK(sc);
 	iwm_led_blink_stop(sc);
 	if (vap->iv_state == IEEE80211_S_RUN)
 		iwm_mvm_led_enable(sc);
 	IWM_UNLOCK(sc);
 }
 
 static void
 iwm_update_mcast(struct ieee80211com *ic)
 {
 }
 
 static void
 iwm_set_channel(struct ieee80211com *ic)
 {
 }
 
 static void
 iwm_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell)
 {
 }
 
 static void
 iwm_scan_mindwell(struct ieee80211_scan_state *ss)
 {
 	return;
 }
 
 void
 iwm_init_task(void *arg1)
 {
 	struct iwm_softc *sc = arg1;
 
 	IWM_LOCK(sc);
 	while (sc->sc_flags & IWM_FLAG_BUSY)
 		msleep(&sc->sc_flags, &sc->sc_mtx, 0, "iwmpwr", 0);
 	sc->sc_flags |= IWM_FLAG_BUSY;
 	iwm_stop(sc);
 	if (sc->sc_ic.ic_nrunning > 0)
 		iwm_init(sc);
 	sc->sc_flags &= ~IWM_FLAG_BUSY;
 	wakeup(&sc->sc_flags);
 	IWM_UNLOCK(sc);
 }
 
 static int
 iwm_resume(device_t dev)
 {
 	struct iwm_softc *sc = device_get_softc(dev);
 	int do_reinit = 0;
 	uint16_t reg;
 
 	/* Clear device-specific "PCI retry timeout" register (41h). */
 	reg = pci_read_config(dev, 0x40, sizeof(reg));
 	pci_write_config(dev, 0x40, reg & ~0xff00, sizeof(reg));
 	iwm_init_task(device_get_softc(dev));
 
 	IWM_LOCK(sc);
 	if (sc->sc_flags & IWM_FLAG_SCANNING) {
 		sc->sc_flags &= ~IWM_FLAG_SCANNING;
 		do_reinit = 1;
 	}
 	IWM_UNLOCK(sc);
 
 	if (do_reinit)
 		ieee80211_resume_all(&sc->sc_ic);
 
 	return 0;
 }
 
 static int
 iwm_suspend(device_t dev)
 {
 	int do_stop = 0;
 	struct iwm_softc *sc = device_get_softc(dev);
 
 	do_stop = !! (sc->sc_ic.ic_nrunning > 0);
 
 	ieee80211_suspend_all(&sc->sc_ic);
 
 	if (do_stop) {
 		IWM_LOCK(sc);
 		iwm_stop(sc);
 		sc->sc_flags |= IWM_FLAG_SCANNING;
 		IWM_UNLOCK(sc);
 	}
 
 	return (0);
 }
 
 static int
 iwm_detach_local(struct iwm_softc *sc, int do_net80211)
 {
 	struct iwm_fw_info *fw = &sc->sc_fw;
 	device_t dev = sc->sc_dev;
 	int i;
 
 	ieee80211_draintask(&sc->sc_ic, &sc->sc_es_task);
 
 	callout_drain(&sc->sc_led_blink_to);
 	callout_drain(&sc->sc_watchdog_to);
 	iwm_stop_device(sc);
 	if (do_net80211) {
 		ieee80211_ifdetach(&sc->sc_ic);
 	}
 
 	iwm_phy_db_free(sc);
 
 	/* Free descriptor rings */
 	iwm_free_rx_ring(sc, &sc->rxq);
 	for (i = 0; i < nitems(sc->txq); i++)
 		iwm_free_tx_ring(sc, &sc->txq[i]);
 
 	/* Free firmware */
 	if (fw->fw_fp != NULL)
 		iwm_fw_info_free(fw);
 
 	/* Free scheduler */
 	iwm_free_sched(sc);
 	if (sc->ict_dma.vaddr != NULL)
 		iwm_free_ict(sc);
 	if (sc->kw_dma.vaddr != NULL)
 		iwm_free_kw(sc);
 	if (sc->fw_dma.vaddr != NULL)
 		iwm_free_fwmem(sc);
 
 	/* Finished with the hardware - detach things */
 	iwm_pci_detach(dev);
 
 	mbufq_drain(&sc->sc_snd);
 	IWM_LOCK_DESTROY(sc);
 
 	return (0);
 }
 
 static int
 iwm_detach(device_t dev)
 {
 	struct iwm_softc *sc = device_get_softc(dev);
 
 	return (iwm_detach_local(sc, 1));
 }
 
 static device_method_t iwm_pci_methods[] = {
         /* Device interface */
         DEVMETHOD(device_probe,         iwm_probe),
         DEVMETHOD(device_attach,        iwm_attach),
         DEVMETHOD(device_detach,        iwm_detach),
         DEVMETHOD(device_suspend,       iwm_suspend),
         DEVMETHOD(device_resume,        iwm_resume),
 
         DEVMETHOD_END
 };
 
 static driver_t iwm_pci_driver = {
         "iwm",
         iwm_pci_methods,
         sizeof (struct iwm_softc)
 };
 
 static devclass_t iwm_devclass;
 
 DRIVER_MODULE(iwm, pci, iwm_pci_driver, iwm_devclass, NULL, NULL);
 MODULE_DEPEND(iwm, firmware, 1, 1, 1);
 MODULE_DEPEND(iwm, pci, 1, 1, 1);
 MODULE_DEPEND(iwm, wlan, 1, 1, 1);
Index: user/alc/PQ_LAUNDRY/sys/dev/ntb/if_ntb/if_ntb.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/ntb/if_ntb/if_ntb.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/ntb/if_ntb/if_ntb.c	(revision 303642)
@@ -1,517 +1,516 @@
 /*-
  * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
  * Copyright (C) 2013 Intel Corporation
  * Copyright (C) 2015 EMC Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * The Non-Transparent Bridge (NTB) is a device that allows you to connect
  * two or more systems using a PCI-e links, providing remote memory access.
  *
  * This module contains a driver for simulated Ethernet device, using
  * underlying NTB Transport device.
  *
  * NOTE: Much of the code in this module is shared with Linux. Any patches may
  * be picked up and redistributed in Linux with a dual GPL/BSD license.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/buf_ring.h>
 #include <sys/bus.h>
 #include <sys/limits.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
 #include <net/if_var.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 
 #include <machine/bus.h>
 
 #include "../ntb_transport.h"
 
 #define KTR_NTB KTR_SPARE3
 #define NTB_MEDIATYPE		 (IFM_ETHER | IFM_AUTO | IFM_FDX)
 
 #define	NTB_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
 #define	NTB_CSUM_FEATURES6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
 #define	NTB_CSUM_SET		(CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
 				    CSUM_PSEUDO_HDR | \
 				    CSUM_IP_CHECKED | CSUM_IP_VALID | \
 				    CSUM_SCTP_VALID)
 
 static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb");
 
 static unsigned g_if_ntb_num_queues = UINT_MAX;
 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, num_queues, CTLFLAG_RWTUN,
     &g_if_ntb_num_queues, 0, "Number of queues per interface");
 
 struct ntb_net_queue {
 	struct ntb_net_ctx	*sc;
 	if_t			 ifp;
 	struct ntb_transport_qp *qp;
 	struct buf_ring		*br;
 	struct task		 tx_task;
 	struct taskqueue	*tx_tq;
 	struct mtx		 tx_lock;
 	struct callout		 queue_full;
 };
 
 struct ntb_net_ctx {
 	if_t			 ifp;
 	struct ifmedia		 media;
 	u_char			 eaddr[ETHER_ADDR_LEN];
 	int			 num_queues;
 	struct ntb_net_queue	*queues;
 	int			 mtu;
 };
 
 static int ntb_net_probe(device_t dev);
 static int ntb_net_attach(device_t dev);
 static int ntb_net_detach(device_t dev);
 static void ntb_net_init(void *arg);
 static int ntb_ifmedia_upd(struct ifnet *);
 static void ntb_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 static int ntb_ioctl(if_t ifp, u_long command, caddr_t data);
 static int ntb_transmit(if_t ifp, struct mbuf *m);
 static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
     void *data, int len);
 static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
     void *data, int len);
 static void ntb_net_event_handler(void *data, enum ntb_link_event status);
 static void ntb_handle_tx(void *arg, int pending);
 static void ntb_qp_full(void *arg);
 static void ntb_qflush(if_t ifp);
 static void create_random_local_eui48(u_char *eaddr);
 
 static int
 ntb_net_probe(device_t dev)
 {
 
 	device_set_desc(dev, "NTB Network Interface");
 	return (0);
 }
 
 static int
 ntb_net_attach(device_t dev)
 {
 	struct ntb_net_ctx *sc = device_get_softc(dev);
 	struct ntb_net_queue *q;
 	if_t ifp;
 	struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
 	    ntb_net_tx_handler, ntb_net_event_handler };
 	int i;
 
 	ifp = sc->ifp = if_gethandle(IFT_ETHER);
 	if (ifp == NULL) {
 		printf("ntb: Cannot allocate ifnet structure\n");
 		return (ENOMEM);
 	}
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	if_setdev(ifp, dev);
 
 	sc->num_queues = min(g_if_ntb_num_queues,
 	    ntb_transport_queue_count(dev));
 	sc->queues = malloc(sc->num_queues * sizeof(struct ntb_net_queue),
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 	sc->mtu = INT_MAX;
 	for (i = 0; i < sc->num_queues; i++) {
 		q = &sc->queues[i];
 		q->sc = sc;
 		q->ifp = ifp;
 		q->qp = ntb_transport_create_queue(dev, i, &handlers, q);
 		if (q->qp == NULL)
 			break;
 		sc->mtu = imin(sc->mtu, ntb_transport_max_size(q->qp));
 		mtx_init(&q->tx_lock, "ntb tx", NULL, MTX_DEF);
 		q->br = buf_ring_alloc(4096, M_DEVBUF, M_WAITOK, &q->tx_lock);
 		TASK_INIT(&q->tx_task, 0, ntb_handle_tx, q);
 		q->tx_tq = taskqueue_create_fast("ntb_txq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &q->tx_tq);
 		taskqueue_start_threads(&q->tx_tq, 1, PI_NET, "%s txq%d",
 		    device_get_nameunit(dev), i);
 		callout_init(&q->queue_full, 1);
 	}
 	sc->num_queues = i;
 	device_printf(dev, "%d queue(s)\n", sc->num_queues);
 
 	if_setinitfn(ifp, ntb_net_init);
 	if_setsoftc(ifp, sc);
 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
 	if_setioctlfn(ifp, ntb_ioctl);
 	if_settransmitfn(ifp, ntb_transmit);
 	if_setqflushfn(ifp, ntb_qflush);
 	create_random_local_eui48(sc->eaddr);
 	ether_ifattach(ifp, sc->eaddr);
 	if_setcapabilities(ifp, IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 |
 	    IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
 	if_setcapenable(ifp, IFCAP_JUMBO_MTU | IFCAP_LINKSTATE);
 	if_setmtu(ifp, sc->mtu - ETHER_HDR_LEN);
 
 	ifmedia_init(&sc->media, IFM_IMASK, ntb_ifmedia_upd,
 	    ntb_ifmedia_sts);
 	ifmedia_add(&sc->media, NTB_MEDIATYPE, 0, NULL);
 	ifmedia_set(&sc->media, NTB_MEDIATYPE);
 
 	for (i = 0; i < sc->num_queues; i++)
 		ntb_transport_link_up(sc->queues[i].qp);
 	return (0);
 }
 
 static int
 ntb_net_detach(device_t dev)
 {
 	struct ntb_net_ctx *sc = device_get_softc(dev);
 	struct ntb_net_queue *q;
 	int i;
 
 	for (i = 0; i < sc->num_queues; i++)
 		ntb_transport_link_down(sc->queues[i].qp);
 	ether_ifdetach(sc->ifp);
 	if_free(sc->ifp);
 	ifmedia_removeall(&sc->media);
 	for (i = 0; i < sc->num_queues; i++) {
 		q = &sc->queues[i];
 		ntb_transport_free_queue(q->qp);
 		buf_ring_free(q->br, M_DEVBUF);
 		callout_drain(&q->queue_full);
 		taskqueue_drain_all(q->tx_tq);
 		mtx_destroy(&q->tx_lock);
 	}
 	free(sc->queues, M_DEVBUF);
 	return (0);
 }
 
 /* Network device interface */
 
 static void
 ntb_net_init(void *arg)
 {
 	struct ntb_net_ctx *sc = arg;
 	if_t ifp = sc->ifp;
 
 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
 	if_link_state_change(ifp, ntb_transport_link_query(sc->queues[0].qp) ?
 	    LINK_STATE_UP : LINK_STATE_DOWN);
 }
 
 static int
 ntb_ioctl(if_t ifp, u_long command, caddr_t data)
 {
 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFMTU:
 	    {
 		if (ifr->ifr_mtu > sc->mtu - ETHER_HDR_LEN) {
 			error = EINVAL;
 			break;
 		}
 
 		if_setmtu(ifp, ifr->ifr_mtu);
 		break;
 	    }
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->media, command);
 		break;
 
 	case SIOCSIFCAP:
 		if (ifr->ifr_reqcap & IFCAP_RXCSUM)
 			if_setcapenablebit(ifp, IFCAP_RXCSUM, 0);
 		else
 			if_setcapenablebit(ifp, 0, IFCAP_RXCSUM);
 		if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
 			if_setcapenablebit(ifp, IFCAP_TXCSUM, 0);
 			if_sethwassistbits(ifp, NTB_CSUM_FEATURES, 0);
 		} else {
 			if_setcapenablebit(ifp, 0, IFCAP_TXCSUM);
 			if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES);
 		}
 		if (ifr->ifr_reqcap & IFCAP_RXCSUM_IPV6)
 			if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0);
 		else
 			if_setcapenablebit(ifp, 0, IFCAP_RXCSUM_IPV6);
 		if (ifr->ifr_reqcap & IFCAP_TXCSUM_IPV6) {
 			if_setcapenablebit(ifp, IFCAP_TXCSUM_IPV6, 0);
 			if_sethwassistbits(ifp, NTB_CSUM_FEATURES6, 0);
 		} else {
 			if_setcapenablebit(ifp, 0, IFCAP_TXCSUM_IPV6);
 			if_sethwassistbits(ifp, 0, NTB_CSUM_FEATURES6);
 		}
 		break;
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 static int
 ntb_ifmedia_upd(struct ifnet *ifp)
 {
 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 	struct ifmedia *ifm = &sc->media;
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	return (0);
 }
 
 static void
 ntb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = NTB_MEDIATYPE;
 	if (ntb_transport_link_query(sc->queues[0].qp))
 		ifmr->ifm_status |= IFM_ACTIVE;
 }
 
 static void
 ntb_transmit_locked(struct ntb_net_queue *q)
 {
 	if_t ifp = q->ifp;
 	struct mbuf *m;
 	int rc, len;
 	short mflags;
 
 	CTR0(KTR_NTB, "TX: ntb_transmit_locked");
 	while ((m = drbr_peek(ifp, q->br)) != NULL) {
 		CTR1(KTR_NTB, "TX: start mbuf %p", m);
 		if_etherbpfmtap(ifp, m);
 		len = m->m_pkthdr.len;
 		mflags = m->m_flags;
 		rc = ntb_transport_tx_enqueue(q->qp, m, m, len);
 		if (rc != 0) {
 			CTR2(KTR_NTB, "TX: could not tx mbuf %p: %d", m, rc);
 			if (rc == EAGAIN) {
 				drbr_putback(ifp, q->br, m);
 				callout_reset_sbt(&q->queue_full,
 				    SBT_1MS / 4, SBT_1MS / 4,
 				    ntb_qp_full, q, 0);
 			} else {
 				m_freem(m);
 				drbr_advance(ifp, q->br);
 				if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 			}
 			break;
 		}
 		drbr_advance(ifp, q->br);
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
 		if (mflags & M_MCAST)
 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 	}
 }
 
 static int
 ntb_transmit(if_t ifp, struct mbuf *m)
 {
 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 	struct ntb_net_queue *q;
 	int error, i;
 
 	CTR0(KTR_NTB, "TX: ntb_transmit");
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		i = m->m_pkthdr.flowid % sc->num_queues;
 	else
 		i = curcpu % sc->num_queues;
 	q = &sc->queues[i];
 
 	error = drbr_enqueue(ifp, q->br, m);
 	if (error)
 		return (error);
 
 	if (mtx_trylock(&q->tx_lock)) {
 		ntb_transmit_locked(q);
 		mtx_unlock(&q->tx_lock);
 	} else
 		taskqueue_enqueue(q->tx_tq, &q->tx_task);
 	return (0);
 }
 
 static void
 ntb_handle_tx(void *arg, int pending)
 {
 	struct ntb_net_queue *q = arg;
 
 	mtx_lock(&q->tx_lock);
 	ntb_transmit_locked(q);
 	mtx_unlock(&q->tx_lock);
 }
 
 static void
 ntb_qp_full(void *arg)
 {
 	struct ntb_net_queue *q = arg;
 
 	CTR0(KTR_NTB, "TX: qp_full callout");
 	if (ntb_transport_tx_free_entry(q->qp) > 0)
 		taskqueue_enqueue(q->tx_tq, &q->tx_task);
 	else
 		callout_schedule_sbt(&q->queue_full,
 		    SBT_1MS / 4, SBT_1MS / 4, 0);
 }
 
 static void
 ntb_qflush(if_t ifp)
 {
 	struct ntb_net_ctx *sc = if_getsoftc(ifp);
 	struct ntb_net_queue *q;
 	struct mbuf *m;
 	int i;
 
 	for (i = 0; i < sc->num_queues; i++) {
 		q = &sc->queues[i];
 		mtx_lock(&q->tx_lock);
 		while ((m = buf_ring_dequeue_sc(q->br)) != NULL)
 			m_freem(m);
 		mtx_unlock(&q->tx_lock);
 	}
 	if_qflush(ifp);
 }
 
 /* Network Device Callbacks */
 static void
 ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
     int len)
 {
 
 	m_freem(data);
 	CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
 }
 
 static void
 ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
     int len)
 {
 	struct ntb_net_queue *q = qp_data;
 	struct ntb_net_ctx *sc = q->sc;
 	struct mbuf *m = data;
 	if_t ifp = q->ifp;
 	uint16_t proto;
 
 	CTR1(KTR_NTB, "RX: rx handler (%d)", len);
 	if (len < 0) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		return;
 	}
 
 	m->m_pkthdr.rcvif = ifp;
 	if (sc->num_queues > 1) {
 		m->m_pkthdr.flowid = q - sc->queues;
 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 	}
 	if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
 		m_copydata(m, 12, 2, (void *)&proto);
 		switch (ntohs(proto)) {
 		case ETHERTYPE_IP:
 			if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
 				m->m_pkthdr.csum_data = 0xffff;
 				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
 			}
 			break;
 		case ETHERTYPE_IPV6:
 			if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) {
 				m->m_pkthdr.csum_data = 0xffff;
 				m->m_pkthdr.csum_flags = NTB_CSUM_SET;
 			}
 			break;
 		}
 	}
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_input(ifp, m);
 }
 
 static void
 ntb_net_event_handler(void *data, enum ntb_link_event status)
 {
 	struct ntb_net_queue *q = data;
 	int new_state;
 
 	switch (status) {
 	case NTB_LINK_DOWN:
 		new_state = LINK_STATE_DOWN;
 		break;
 	case NTB_LINK_UP:
 		new_state = LINK_STATE_UP;
 		break;
 	default:
 		new_state = LINK_STATE_UNKNOWN;
 		break;
 	}
 	if_link_state_change(q->ifp, new_state);
 }
 
 /* Helper functions */
 /* TODO: This too should really be part of the kernel */
 #define EUI48_MULTICAST			1 << 0
 #define EUI48_LOCALLY_ADMINISTERED	1 << 1
 static void
 create_random_local_eui48(u_char *eaddr)
 {
 	static uint8_t counter = 0;
-	uint32_t seed = ticks;
 
 	eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
-	memcpy(&eaddr[1], &seed, sizeof(uint32_t));
+	arc4rand(&eaddr[1], 4, 0);
 	eaddr[5] = counter++;
 }
 
 static device_method_t ntb_net_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,     ntb_net_probe),
 	DEVMETHOD(device_attach,    ntb_net_attach),
 	DEVMETHOD(device_detach,    ntb_net_detach),
 	DEVMETHOD_END
 };
 
 devclass_t ntb_net_devclass;
 static DEFINE_CLASS_0(ntb, ntb_net_driver, ntb_net_methods,
     sizeof(struct ntb_net_ctx));
 DRIVER_MODULE(if_ntb, ntb_transport, ntb_net_driver, ntb_net_devclass,
     NULL, NULL);
 MODULE_DEPEND(if_ntb, ntb_transport, 1, 1, 1);
 MODULE_VERSION(if_ntb, 1);
Index: user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb.c	(revision 303642)
@@ -1,462 +1,462 @@
 /*-
  * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/rmlock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 
 #include "ntb.h"
 
 devclass_t ntb_hw_devclass;
 SYSCTL_NODE(_hw, OID_AUTO, ntb, CTLFLAG_RW, 0, "NTB sysctls");
 
 struct ntb_child {
 	device_t	dev;
 	int		enabled;
 	int		mwoff;
 	int		mwcnt;
 	int		spadoff;
 	int		spadcnt;
 	int		dboff;
 	int		dbmask;
 	void		*ctx;
 	const struct ntb_ctx_ops *ctx_ops;
 	struct rmlock	ctx_lock;
 	struct ntb_child *next;
 };
 
 int
 ntb_register_device(device_t dev)
 {
 	struct ntb_child **cpp = device_get_softc(dev);
 	struct ntb_child *nc;
 	int i, mw, mwu, mwt, spad, spadu, spadt, db, dbu, dbt;
 	char cfg[128] = "";
 	char buf[32];
 	char *n, *np, *c, *p, *name;
 
 	mwu = 0;
 	mwt = NTB_MW_COUNT(dev);
 	spadu = 0;
 	spadt = NTB_SPAD_COUNT(dev);
 	dbu = 0;
 	dbt = flsll(NTB_DB_VALID_MASK(dev));
 
 	device_printf(dev, "%d memory windows, %d scratchpads, "
 	    "%d doorbells\n", mwt, spadt, dbt);
 
 	snprintf(buf, sizeof(buf), "hint.%s.%d.config", device_get_name(dev),
 	    device_get_unit(dev));
 	TUNABLE_STR_FETCH(buf, cfg, sizeof(cfg));
 	n = cfg;
 	i = 0;
 	while ((c = strsep(&n, ",")) != NULL) {
 		np = c;
 		name = strsep(&np, ":");
 		if (name != NULL && name[0] == 0)
 			name = NULL;
 		p = strsep(&np, ":");
 		mw = (p && p[0] != 0) ? strtol(p, NULL, 10) : mwt - mwu;
 		p = strsep(&np, ":");
 		spad = (p && p[0] != 0) ? strtol(p, NULL, 10) : spadt - spadu;
 		db = (np && np[0] != 0) ? strtol(np, NULL, 10) : dbt - dbu;
 
 		if (mw > mwt - mwu || spad > spadt - spadu || db > dbt - dbu) {
 			device_printf(dev, "Not enough resources for config\n");
 			break;
 		}
 
 		nc = malloc(sizeof(*nc), M_DEVBUF, M_WAITOK | M_ZERO);
 		nc->mwoff = mwu;
 		nc->mwcnt = mw;
 		nc->spadoff = spadu;
 		nc->spadcnt = spad;
 		nc->dboff = dbu;
 		nc->dbmask = (db == 0) ? 0 : (0xffffffffffffffff >> (64 - db));
 		rm_init(&nc->ctx_lock, "ntb ctx");
 		nc->dev = device_add_child(dev, name, -1);
 		if (nc->dev == NULL) {
 			ntb_unregister_device(dev);
 			return (ENOMEM);
 		}
 		device_set_ivars(nc->dev, nc);
 		*cpp = nc;
 		cpp = &nc->next;
 
 		if (bootverbose) {
 			device_printf(dev, "%d \"%s\":", i, name);
 			if (mw > 0) {
 				printf(" memory windows %d", mwu);
 				if (mw > 1)
 					printf("-%d", mwu + mw - 1);
 			}
 			if (spad > 0) {
 				printf(" scratchpads %d", spadu);
 				if (spad > 1)
 					printf("-%d", spadu + spad - 1);
 			}
 			if (db > 0) {
 				printf(" doorbells %d", dbu);
 				if (db > 1)
 					printf("-%d", dbu + db - 1);
 			}
 			printf("\n");
 		}
 
 		mwu += mw;
 		spadu += spad;
 		dbu += db;
 		i++;
 	}
 
 	bus_generic_attach(dev);
 	return (0);
 }
 
 int
 ntb_unregister_device(device_t dev)
 {
 	struct ntb_child **cpp = device_get_softc(dev);
 	struct ntb_child *nc;
 	int error = 0;
 
 	while ((nc = *cpp) != NULL) {
 		*cpp = (*cpp)->next;
 		error = device_delete_child(dev, nc->dev);
 		if (error)
 			break;
 		rm_destroy(&nc->ctx_lock);
 		free(nc, M_DEVBUF);
 	}
 	return (error);
 }
 
 void
 ntb_link_event(device_t dev)
 {
 	struct ntb_child **cpp = device_get_softc(dev);
 	struct ntb_child *nc;
 	struct rm_priotracker ctx_tracker;
 
 	for (nc = *cpp; nc != NULL; nc = nc->next) {
 		rm_rlock(&nc->ctx_lock, &ctx_tracker);
 		if (nc->ctx_ops != NULL && nc->ctx_ops->link_event != NULL)
 			nc->ctx_ops->link_event(nc->ctx);
 		rm_runlock(&nc->ctx_lock, &ctx_tracker);
 	}
 }
 
 void
 ntb_db_event(device_t dev, uint32_t vec)
 {
 	struct ntb_child **cpp = device_get_softc(dev);
 	struct ntb_child *nc;
 	struct rm_priotracker ctx_tracker;
 
 	for (nc = *cpp; nc != NULL; nc = nc->next) {
 		rm_rlock(&nc->ctx_lock, &ctx_tracker);
 		if (nc->ctx_ops != NULL && nc->ctx_ops->db_event != NULL)
 			nc->ctx_ops->db_event(nc->ctx, vec);
 		rm_runlock(&nc->ctx_lock, &ctx_tracker);
 	}
 }
 
 bool
 ntb_link_is_up(device_t ntb, enum ntb_speed *speed, enum ntb_width *width)
 {
 
 	return (NTB_LINK_IS_UP(device_get_parent(ntb), speed, width));
 }
 
 int
 ntb_link_enable(device_t ntb, enum ntb_speed speed, enum ntb_width width)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 	struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev));
 	struct ntb_child *nc1;
 
-	for (nc1 = *cpp; nc1 != NULL; nc1 = nc->next) {
+	for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) {
 		if (nc1->enabled) {
 			nc->enabled = 1;
 			return (0);
 		}
 	}
 	nc->enabled = 1;
 	return (NTB_LINK_ENABLE(device_get_parent(ntb), speed, width));
 }
 
 int
 ntb_link_disable(device_t ntb)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 	struct ntb_child **cpp = device_get_softc(device_get_parent(nc->dev));
 	struct ntb_child *nc1;
 
 	if (!nc->enabled)
 		return (0);
 	nc->enabled = 0;
-	for (nc1 = *cpp; nc1 != NULL; nc1 = nc->next) {
+	for (nc1 = *cpp; nc1 != NULL; nc1 = nc1->next) {
 		if (nc1->enabled)
 			return (0);
 	}
 	return (NTB_LINK_DISABLE(device_get_parent(ntb)));
 }
 
 bool
 ntb_link_enabled(device_t ntb)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (nc->enabled && NTB_LINK_ENABLED(device_get_parent(ntb)));
 }
 
 int
 ntb_set_ctx(device_t ntb, void *ctx, const struct ntb_ctx_ops *ctx_ops)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	if (ctx == NULL || ctx_ops == NULL)
 		return (EINVAL);
 
 	rm_wlock(&nc->ctx_lock);
 	if (nc->ctx_ops != NULL) {
 		rm_wunlock(&nc->ctx_lock);
 		return (EINVAL);
 	}
 	nc->ctx = ctx;
 	nc->ctx_ops = ctx_ops;
 	rm_wunlock(&nc->ctx_lock);
 
 	return (0);
 }
 
 void *
 ntb_get_ctx(device_t ntb, const struct ntb_ctx_ops **ctx_ops)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	KASSERT(nc->ctx != NULL && nc->ctx_ops != NULL, ("bogus"));
 	if (ctx_ops != NULL)
 		*ctx_ops = nc->ctx_ops;
 	return (nc->ctx);
 }
 
 void
 ntb_clear_ctx(device_t ntb)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	rm_wlock(&nc->ctx_lock);
 	nc->ctx = NULL;
 	nc->ctx_ops = NULL;
 	rm_wunlock(&nc->ctx_lock);
 }
 
 uint8_t
 ntb_mw_count(device_t ntb)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (nc->mwcnt);
 }
 
 int
 ntb_mw_get_range(device_t ntb, unsigned mw_idx, vm_paddr_t *base,
     caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
     bus_addr_t *plimit)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_MW_GET_RANGE(device_get_parent(ntb), mw_idx + nc->mwoff,
 	    base, vbase, size, align, align_size, plimit));
 }
 
 int
 ntb_mw_set_trans(device_t ntb, unsigned mw_idx, bus_addr_t addr, size_t size)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_MW_SET_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff,
 	    addr, size));
 }
 
 int
 ntb_mw_clear_trans(device_t ntb, unsigned mw_idx)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_MW_CLEAR_TRANS(device_get_parent(ntb), mw_idx + nc->mwoff));
 }
 
 int
 ntb_mw_get_wc(device_t ntb, unsigned mw_idx, vm_memattr_t *mode)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_MW_GET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode));
 }
 
 int
 ntb_mw_set_wc(device_t ntb, unsigned mw_idx, vm_memattr_t mode)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_MW_SET_WC(device_get_parent(ntb), mw_idx + nc->mwoff, mode));
 }
 
 uint8_t
 ntb_spad_count(device_t ntb)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (nc->spadcnt);
 }
 
 void
 ntb_spad_clear(device_t ntb)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 	unsigned i;
 
 	for (i = 0; i < nc->spadcnt; i++)
 		NTB_SPAD_WRITE(device_get_parent(ntb), i + nc->spadoff, 0);
 }
 
 int
 ntb_spad_write(device_t ntb, unsigned int idx, uint32_t val)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff, val));
 }
 
 int
 ntb_spad_read(device_t ntb, unsigned int idx, uint32_t *val)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff, val));
 }
 
 int
 ntb_peer_spad_write(device_t ntb, unsigned int idx, uint32_t val)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_PEER_SPAD_WRITE(device_get_parent(ntb), idx + nc->spadoff,
 	    val));
 }
 
 int
 ntb_peer_spad_read(device_t ntb, unsigned int idx, uint32_t *val)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_PEER_SPAD_READ(device_get_parent(ntb), idx + nc->spadoff,
 	    val));
 }
 
 uint64_t
 ntb_db_valid_mask(device_t ntb)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (nc->dbmask);
 }
 
 int
 ntb_db_vector_count(device_t ntb)
 {
 
 	return (NTB_DB_VECTOR_COUNT(device_get_parent(ntb)));
 }
 
 uint64_t
 ntb_db_vector_mask(device_t ntb, uint32_t vector)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return ((NTB_DB_VECTOR_MASK(device_get_parent(ntb), vector)
 	    >> nc->dboff) & nc->dbmask);
 }
 
 int
 ntb_peer_db_addr(device_t ntb, bus_addr_t *db_addr, vm_size_t *db_size)
 {
 
 	return (NTB_PEER_DB_ADDR(device_get_parent(ntb), db_addr, db_size));
 }
 
 void
 ntb_db_clear(device_t ntb, uint64_t bits)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_DB_CLEAR(device_get_parent(ntb), bits << nc->dboff));
 }
 
 void
 ntb_db_clear_mask(device_t ntb, uint64_t bits)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_DB_CLEAR_MASK(device_get_parent(ntb), bits << nc->dboff));
 }
 
 uint64_t
 ntb_db_read(device_t ntb)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return ((NTB_DB_READ(device_get_parent(ntb)) >> nc->dboff)
 	    & nc->dbmask);
 }
 
 void
 ntb_db_set_mask(device_t ntb, uint64_t bits)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_DB_SET_MASK(device_get_parent(ntb), bits << nc->dboff));
 }
 
 void
 ntb_peer_db_set(device_t ntb, uint64_t bits)
 {
 	struct ntb_child *nc = device_get_ivars(ntb);
 
 	return (NTB_PEER_DB_SET(device_get_parent(ntb), bits << nc->dboff));
 }
 
 MODULE_VERSION(ntb, 1);
Index: user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb_hw/ntb_hw.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb_hw/ntb_hw.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/ntb/ntb_hw/ntb_hw.c	(revision 303642)
@@ -1,3095 +1,3114 @@
 /*-
  * Copyright (c) 2016 Alexander Motin <mav@FreeBSD.org>
  * Copyright (C) 2013 Intel Corporation
  * Copyright (C) 2015 EMC Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * The Non-Transparent Bridge (NTB) is a device that allows you to connect
  * two or more systems using a PCI-e links, providing remote memory access.
  *
  * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs.
  *
  * NOTE: Much of the code in this module is shared with Linux. Any patches may
  * be picked up and redistributed in Linux with a dual GPL/BSD license.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/interrupt.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/pciio.h>
 #include <sys/queue.h>
 #include <sys/rman.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 #include <machine/resource.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include "ntb_regs.h"
 #include "../ntb.h"
 
 #define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT)
 
 #define NTB_HB_TIMEOUT		1 /* second */
 #define ATOM_LINK_RECOVERY_TIME	500 /* ms */
 #define BAR_HIGH_MASK		(~((1ull << 12) - 1))
 
 #define	NTB_MSIX_VER_GUARD	0xaabbccdd
 #define	NTB_MSIX_RECEIVED	0xe0f0e0f0
 
 /*
  * PCI constants could be somewhere more generic, but aren't defined/used in
  * pci.c.
  */
 #define	PCI_MSIX_ENTRY_SIZE		16
 #define	PCI_MSIX_ENTRY_LOWER_ADDR	0
 #define	PCI_MSIX_ENTRY_UPPER_ADDR	4
 #define	PCI_MSIX_ENTRY_DATA		8
 
 enum ntb_device_type {
 	NTB_XEON,
 	NTB_ATOM
 };
 
 /* ntb_conn_type are hardware numbers, cannot change. */
 enum ntb_conn_type {
 	NTB_CONN_TRANSPARENT = 0,
 	NTB_CONN_B2B = 1,
 	NTB_CONN_RP = 2,
 };
 
 enum ntb_b2b_direction {
 	NTB_DEV_USD = 0,
 	NTB_DEV_DSD = 1,
 };
 
 enum ntb_bar {
 	NTB_CONFIG_BAR = 0,
 	NTB_B2B_BAR_1,
 	NTB_B2B_BAR_2,
 	NTB_B2B_BAR_3,
 	NTB_MAX_BARS
 };
 
 enum {
 	NTB_MSIX_GUARD = 0,
 	NTB_MSIX_DATA0,
 	NTB_MSIX_DATA1,
 	NTB_MSIX_DATA2,
 	NTB_MSIX_OFS0,
 	NTB_MSIX_OFS1,
 	NTB_MSIX_OFS2,
 	NTB_MSIX_DONE,
 	NTB_MAX_MSIX_SPAD
 };
 
 /* Device features and workarounds */
 #define HAS_FEATURE(ntb, feature)	\
 	(((ntb)->features & (feature)) != 0)
 
 struct ntb_hw_info {
 	uint32_t		device_id;
 	const char		*desc;
 	enum ntb_device_type	type;
 	uint32_t		features;
 };
 
 struct ntb_pci_bar_info {
 	bus_space_tag_t		pci_bus_tag;
 	bus_space_handle_t	pci_bus_handle;
 	int			pci_resource_id;
 	struct resource		*pci_resource;
 	vm_paddr_t		pbase;
 	caddr_t			vbase;
 	vm_size_t		size;
 	vm_memattr_t		map_mode;
 
 	/* Configuration register offsets */
 	uint32_t		psz_off;
 	uint32_t		ssz_off;
 	uint32_t		pbarxlat_off;
 };
 
 struct ntb_int_info {
 	struct resource	*res;
 	int		rid;
 	void		*tag;
 };
 
 struct ntb_vec {
 	struct ntb_softc	*ntb;
 	uint32_t		num;
 	unsigned		masked;
 };
 
 struct ntb_reg {
 	uint32_t	ntb_ctl;
 	uint32_t	lnk_sta;
 	uint8_t		db_size;
 	unsigned	mw_bar[NTB_MAX_BARS];
 };
 
 struct ntb_alt_reg {
 	uint32_t	db_bell;
 	uint32_t	db_mask;
 	uint32_t	spad;
 };
 
 struct ntb_xlat_reg {
 	uint32_t	bar0_base;
 	uint32_t	bar2_base;
 	uint32_t	bar4_base;
 	uint32_t	bar5_base;
 
 	uint32_t	bar2_xlat;
 	uint32_t	bar4_xlat;
 	uint32_t	bar5_xlat;
 
 	uint32_t	bar2_limit;
 	uint32_t	bar4_limit;
 	uint32_t	bar5_limit;
 };
 
 struct ntb_b2b_addr {
 	uint64_t	bar0_addr;
 	uint64_t	bar2_addr64;
 	uint64_t	bar4_addr64;
 	uint64_t	bar4_addr32;
 	uint64_t	bar5_addr32;
 };
 
 struct ntb_msix_data {
 	uint32_t	nmd_ofs;
 	uint32_t	nmd_data;
 };
 
 struct ntb_softc {
 	/* ntb.c context. Do not move! Must go first! */
 	void			*ntb_store;
 
 	device_t		device;
 	enum ntb_device_type	type;
 	uint32_t		features;
 
 	struct ntb_pci_bar_info	bar_info[NTB_MAX_BARS];
 	struct ntb_int_info	int_info[MAX_MSIX_INTERRUPTS];
 	uint32_t		allocated_interrupts;
 
 	struct ntb_msix_data	peer_msix_data[XEON_NONLINK_DB_MSIX_BITS];
 	struct ntb_msix_data	msix_data[XEON_NONLINK_DB_MSIX_BITS];
 	bool			peer_msix_good;
 	bool			peer_msix_done;
 	struct ntb_pci_bar_info	*peer_lapic_bar;
 	struct callout		peer_msix_work;
 
 	struct callout		heartbeat_timer;
 	struct callout		lr_timer;
 
 	struct ntb_vec		*msix_vec;
 
 	uint32_t		ppd;
 	enum ntb_conn_type	conn_type;
 	enum ntb_b2b_direction	dev_type;
 
 	/* Offset of peer bar0 in B2B BAR */
 	uint64_t			b2b_off;
 	/* Memory window used to access peer bar0 */
 #define B2B_MW_DISABLED			UINT8_MAX
 	uint8_t				b2b_mw_idx;
 	uint32_t			msix_xlat;
 	uint8_t				msix_mw_idx;
 
 	uint8_t				mw_count;
 	uint8_t				spad_count;
 	uint8_t				db_count;
 	uint8_t				db_vec_count;
 	uint8_t				db_vec_shift;
 
 	/* Protects local db_mask. */
 #define DB_MASK_LOCK(sc)	mtx_lock_spin(&(sc)->db_mask_lock)
 #define DB_MASK_UNLOCK(sc)	mtx_unlock_spin(&(sc)->db_mask_lock)
 #define DB_MASK_ASSERT(sc,f)	mtx_assert(&(sc)->db_mask_lock, (f))
 	struct mtx			db_mask_lock;
 
 	volatile uint32_t		ntb_ctl;
 	volatile uint32_t		lnk_sta;
 
 	uint64_t			db_valid_mask;
 	uint64_t			db_link_mask;
 	uint64_t			db_mask;
 	uint64_t			fake_db_bell;	/* NTB_SB01BASE_LOCKUP*/
 
 	int				last_ts;	/* ticks @ last irq */
 
 	const struct ntb_reg		*reg;
 	const struct ntb_alt_reg	*self_reg;
 	const struct ntb_alt_reg	*peer_reg;
 	const struct ntb_xlat_reg	*xlat_reg;
 };
 
 #ifdef __i386__
 static __inline uint64_t
 bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle,
     bus_size_t offset)
 {
 
 	return (bus_space_read_4(tag, handle, offset) |
 	    ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
 }
 
 static __inline void
 bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle,
     bus_size_t offset, uint64_t val)
 {
 
 	bus_space_write_4(tag, handle, offset, val);
 	bus_space_write_4(tag, handle, offset + 4, val >> 32);
 }
 #endif
 
 #define intel_ntb_bar_read(SIZE, bar, offset) \
 	    bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
 	    ntb->bar_info[(bar)].pci_bus_handle, (offset))
 #define intel_ntb_bar_write(SIZE, bar, offset, val) \
 	    bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \
 	    ntb->bar_info[(bar)].pci_bus_handle, (offset), (val))
 #define intel_ntb_reg_read(SIZE, offset) \
 	    intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset)
 #define intel_ntb_reg_write(SIZE, offset, val) \
 	    intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val)
 #define intel_ntb_mw_read(SIZE, offset) \
 	    intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
 		offset)
 #define intel_ntb_mw_write(SIZE, offset, val) \
 	    intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \
 		offset, val)
 
 static int intel_ntb_probe(device_t device);
 static int intel_ntb_attach(device_t device);
 static int intel_ntb_detach(device_t device);
 static uint64_t intel_ntb_db_valid_mask(device_t dev);
 static void intel_ntb_spad_clear(device_t dev);
 static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector);
 static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed,
     enum ntb_width *width);
 static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed,
     enum ntb_width width);
 static int intel_ntb_link_disable(device_t dev);
 static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val);
 static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val);
 
 static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx);
 static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw);
 static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar);
 static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar,
     uint32_t *base, uint32_t *xlat, uint32_t *lmt);
 static int intel_ntb_map_pci_bars(struct ntb_softc *ntb);
 static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx,
     vm_memattr_t);
 static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *,
     const char *);
 static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar);
 static int map_memory_window_bar(struct ntb_softc *ntb,
     struct ntb_pci_bar_info *bar);
 static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb);
 static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail);
 static int intel_ntb_init_isr(struct ntb_softc *ntb);
 static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb);
 static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors);
 static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb);
 static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector);
 static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec);
 static void ndev_vec_isr(void *arg);
 static void ndev_irq_isr(void *arg);
 static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff);
 static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t);
 static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t);
 static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors);
 static void intel_ntb_free_msix_vec(struct ntb_softc *ntb);
 static void intel_ntb_get_msix_info(struct ntb_softc *ntb);
 static void intel_ntb_exchange_msix(void *);
 static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id);
 static void intel_ntb_detect_max_mw(struct ntb_softc *ntb);
 static int intel_ntb_detect_xeon(struct ntb_softc *ntb);
 static int intel_ntb_detect_atom(struct ntb_softc *ntb);
 static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb);
 static int intel_ntb_atom_init_dev(struct ntb_softc *ntb);
 static void intel_ntb_teardown_xeon(struct ntb_softc *ntb);
 static void configure_atom_secondary_side_bars(struct ntb_softc *ntb);
 static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx,
     enum ntb_bar regbar);
 static void xeon_set_sbar_base_and_limit(struct ntb_softc *,
     uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar);
 static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr,
     enum ntb_bar idx);
 static int xeon_setup_b2b_mw(struct ntb_softc *,
     const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr);
 static inline bool link_is_up(struct ntb_softc *ntb);
 static inline bool _xeon_link_is_up(struct ntb_softc *ntb);
 static inline bool atom_link_is_err(struct ntb_softc *ntb);
 static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *);
 static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *);
 static void atom_link_hb(void *arg);
 static void recover_atom_link(void *arg);
 static bool intel_ntb_poll_link(struct ntb_softc *ntb);
 static void save_bar_parameters(struct ntb_pci_bar_info *bar);
 static void intel_ntb_sysctl_init(struct ntb_softc *);
 static int sysctl_handle_features(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_register(SYSCTL_HANDLER_ARGS);
 
 static unsigned g_ntb_hw_debug_level;
 SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
     &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose");
 #define intel_ntb_printf(lvl, ...) do {				\
 	if ((lvl) <= g_ntb_hw_debug_level) {			\
 		device_printf(ntb->device, __VA_ARGS__);	\
 	}							\
 } while (0)
 
 #define	_NTB_PAT_UC	0
 #define	_NTB_PAT_WC	1
 #define	_NTB_PAT_WT	4
 #define	_NTB_PAT_WP	5
 #define	_NTB_PAT_WB	6
 #define	_NTB_PAT_UCM	7
 static unsigned g_ntb_mw_pat = _NTB_PAT_UC;
 SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN,
     &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): "
     "UC: "  __XSTRING(_NTB_PAT_UC) ", "
     "WC: "  __XSTRING(_NTB_PAT_WC) ", "
     "WT: "  __XSTRING(_NTB_PAT_WT) ", "
     "WP: "  __XSTRING(_NTB_PAT_WP) ", "
     "WB: "  __XSTRING(_NTB_PAT_WB) ", "
     "UC-: " __XSTRING(_NTB_PAT_UCM));
 
 static inline vm_memattr_t
 intel_ntb_pat_flags(void)
 {
 
 	switch (g_ntb_mw_pat) {
 	case _NTB_PAT_WC:
 		return (VM_MEMATTR_WRITE_COMBINING);
 	case _NTB_PAT_WT:
 		return (VM_MEMATTR_WRITE_THROUGH);
 	case _NTB_PAT_WP:
 		return (VM_MEMATTR_WRITE_PROTECTED);
 	case _NTB_PAT_WB:
 		return (VM_MEMATTR_WRITE_BACK);
 	case _NTB_PAT_UCM:
 		return (VM_MEMATTR_WEAK_UNCACHEABLE);
 	case _NTB_PAT_UC:
 		/* FALLTHROUGH */
 	default:
 		return (VM_MEMATTR_UNCACHEABLE);
 	}
 }
 
 /*
  * Well, this obviously doesn't belong here, but it doesn't seem to exist
  * anywhere better yet.
  */
 static inline const char *
 intel_ntb_vm_memattr_to_str(vm_memattr_t pat)
 {
 
 	switch (pat) {
 	case VM_MEMATTR_WRITE_COMBINING:
 		return ("WRITE_COMBINING");
 	case VM_MEMATTR_WRITE_THROUGH:
 		return ("WRITE_THROUGH");
 	case VM_MEMATTR_WRITE_PROTECTED:
 		return ("WRITE_PROTECTED");
 	case VM_MEMATTR_WRITE_BACK:
 		return ("WRITE_BACK");
 	case VM_MEMATTR_WEAK_UNCACHEABLE:
 		return ("UNCACHED");
 	case VM_MEMATTR_UNCACHEABLE:
 		return ("UNCACHEABLE");
 	default:
 		return ("UNKNOWN");
 	}
 }
 
 static int g_ntb_msix_idx = 1;
 SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx,
     0, "Use this memory window to access the peer MSIX message complex on "
     "certain Xeon-based NTB systems, as a workaround for a hardware errata.  "
     "Like b2b_mw_idx, negative values index from the last available memory "
     "window.  (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)");
 
 static int g_ntb_mw_idx = -1;
 SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx,
     0, "Use this memory window to access the peer NTB registers.  A "
     "non-negative value starts from the first MW index; a negative value "
     "starts from the last MW index.  The default is -1, i.e., the last "
     "available memory window.  Both sides of the NTB MUST set the same "
     "value here!  (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)");
 
 /* Hardware owns the low 16 bits of features. */
 #define NTB_BAR_SIZE_4K		(1 << 0)
 #define NTB_SDOORBELL_LOCKUP	(1 << 1)
 #define NTB_SB01BASE_LOCKUP	(1 << 2)
 #define NTB_B2BDOORBELL_BIT14	(1 << 3)
 /* Software/configuration owns the top 16 bits. */
 #define NTB_SPLIT_BAR		(1ull << 16)
 
 #define NTB_FEATURES_STR \
     "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \
     "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K"
 
 static struct ntb_hw_info pci_ids[] = {
 	/* XXX: PS/SS IDs left out until they are supported. */
 	{ 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B",
 		NTB_ATOM, 0 },
 
 	{ 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B",
 		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
 	{ 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B",
 		NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 },
 	{ 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON,
 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
 		    NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K },
 	{ 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON,
 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
 		    NTB_SB01BASE_LOCKUP },
 	{ 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON,
 		NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 |
 		    NTB_SB01BASE_LOCKUP },
 
 	{ 0x00000000, NULL, NTB_ATOM, 0 }
 };
 
 static const struct ntb_reg atom_reg = {
 	.ntb_ctl = ATOM_NTBCNTL_OFFSET,
 	.lnk_sta = ATOM_LINK_STATUS_OFFSET,
 	.db_size = sizeof(uint64_t),
 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 },
 };
 
 static const struct ntb_alt_reg atom_pri_reg = {
 	.db_bell = ATOM_PDOORBELL_OFFSET,
 	.db_mask = ATOM_PDBMSK_OFFSET,
 	.spad = ATOM_SPAD_OFFSET,
 };
 
 static const struct ntb_alt_reg atom_b2b_reg = {
 	.db_bell = ATOM_B2B_DOORBELL_OFFSET,
 	.spad = ATOM_B2B_SPAD_OFFSET,
 };
 
 static const struct ntb_xlat_reg atom_sec_xlat = {
 #if 0
 	/* "FIXME" says the Linux driver. */
 	.bar0_base = ATOM_SBAR0BASE_OFFSET,
 	.bar2_base = ATOM_SBAR2BASE_OFFSET,
 	.bar4_base = ATOM_SBAR4BASE_OFFSET,
 
 	.bar2_limit = ATOM_SBAR2LMT_OFFSET,
 	.bar4_limit = ATOM_SBAR4LMT_OFFSET,
 #endif
 
 	.bar2_xlat = ATOM_SBAR2XLAT_OFFSET,
 	.bar4_xlat = ATOM_SBAR4XLAT_OFFSET,
 };
 
 static const struct ntb_reg xeon_reg = {
 	.ntb_ctl = XEON_NTBCNTL_OFFSET,
 	.lnk_sta = XEON_LINK_STATUS_OFFSET,
 	.db_size = sizeof(uint16_t),
 	.mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 },
 };
 
 static const struct ntb_alt_reg xeon_pri_reg = {
 	.db_bell = XEON_PDOORBELL_OFFSET,
 	.db_mask = XEON_PDBMSK_OFFSET,
 	.spad = XEON_SPAD_OFFSET,
 };
 
 static const struct ntb_alt_reg xeon_b2b_reg = {
 	.db_bell = XEON_B2B_DOORBELL_OFFSET,
 	.spad = XEON_B2B_SPAD_OFFSET,
 };
 
 static const struct ntb_xlat_reg xeon_sec_xlat = {
 	.bar0_base = XEON_SBAR0BASE_OFFSET,
 	.bar2_base = XEON_SBAR2BASE_OFFSET,
 	.bar4_base = XEON_SBAR4BASE_OFFSET,
 	.bar5_base = XEON_SBAR5BASE_OFFSET,
 
 	.bar2_limit = XEON_SBAR2LMT_OFFSET,
 	.bar4_limit = XEON_SBAR4LMT_OFFSET,
 	.bar5_limit = XEON_SBAR5LMT_OFFSET,
 
 	.bar2_xlat = XEON_SBAR2XLAT_OFFSET,
 	.bar4_xlat = XEON_SBAR4XLAT_OFFSET,
 	.bar5_xlat = XEON_SBAR5XLAT_OFFSET,
 };
 
 static struct ntb_b2b_addr xeon_b2b_usd_addr = {
 	.bar0_addr = XEON_B2B_BAR0_ADDR,
 	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
 	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
 	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
 	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
 };
 
 static struct ntb_b2b_addr xeon_b2b_dsd_addr = {
 	.bar0_addr = XEON_B2B_BAR0_ADDR,
 	.bar2_addr64 = XEON_B2B_BAR2_ADDR64,
 	.bar4_addr64 = XEON_B2B_BAR4_ADDR64,
 	.bar4_addr32 = XEON_B2B_BAR4_ADDR32,
 	.bar5_addr32 = XEON_B2B_BAR5_ADDR32,
 };
 
 SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0,
     "B2B MW segment overrides -- MUST be the same on both sides");
 
 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN,
     &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
     "hardware, use this 64-bit address on the bus between the NTB devices for "
     "the window at BAR2, on the upstream side of the link.  MUST be the same "
     "address on both sides.");
 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN,
     &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4.");
 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN,
     &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 "
     "(split-BAR mode).");
 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN,
     &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 "
     "(split-BAR mode).");
 
 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN,
     &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon "
     "hardware, use this 64-bit address on the bus between the NTB devices for "
     "the window at BAR2, on the downstream side of the link.  MUST be the same"
     " address on both sides.");
 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN,
     &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4.");
 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN,
     &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 "
     "(split-BAR mode).");
 SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN,
     &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 "
     "(split-BAR mode).");
 
 /*
  * OS <-> Driver interface structures
  */
 MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations");
 
 /*
  * OS <-> Driver linkage functions
  */
 static int
 intel_ntb_probe(device_t device)
 {
 	struct ntb_hw_info *p;
 
 	p = intel_ntb_get_device_info(pci_get_devid(device));
 	if (p == NULL)
 		return (ENXIO);
 
 	device_set_desc(device, p->desc);
 	return (0);
 }
 
 static int
 intel_ntb_attach(device_t device)
 {
 	struct ntb_softc *ntb;
 	struct ntb_hw_info *p;
 	int error;
 
 	ntb = device_get_softc(device);
 	p = intel_ntb_get_device_info(pci_get_devid(device));
 
 	ntb->device = device;
 	ntb->type = p->type;
 	ntb->features = p->features;
 	ntb->b2b_mw_idx = B2B_MW_DISABLED;
 	ntb->msix_mw_idx = B2B_MW_DISABLED;
 
 	/* Heartbeat timer for NTB_ATOM since there is no link interrupt */
 	callout_init(&ntb->heartbeat_timer, 1);
 	callout_init(&ntb->lr_timer, 1);
 	callout_init(&ntb->peer_msix_work, 1);
 	mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN);
 
 	if (ntb->type == NTB_ATOM)
 		error = intel_ntb_detect_atom(ntb);
 	else
 		error = intel_ntb_detect_xeon(ntb);
 	if (error != 0)
 		goto out;
 
 	intel_ntb_detect_max_mw(ntb);
 
 	pci_enable_busmaster(ntb->device);
 
 	error = intel_ntb_map_pci_bars(ntb);
 	if (error != 0)
 		goto out;
 	if (ntb->type == NTB_ATOM)
 		error = intel_ntb_atom_init_dev(ntb);
 	else
 		error = intel_ntb_xeon_init_dev(ntb);
 	if (error != 0)
 		goto out;
 
 	intel_ntb_spad_clear(device);
 
 	intel_ntb_poll_link(ntb);
 
 	intel_ntb_sysctl_init(ntb);
 
 	/* Attach children to this controller */
 	error = ntb_register_device(device);
 
 out:
 	if (error != 0)
 		intel_ntb_detach(device);
 	return (error);
 }
 
 static int
 intel_ntb_detach(device_t device)
 {
 	struct ntb_softc *ntb;
 
 	ntb = device_get_softc(device);
 
 	/* Detach & delete all children */
 	ntb_unregister_device(device);
 
 	if (ntb->self_reg != NULL) {
 		DB_MASK_LOCK(ntb);
 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask);
 		DB_MASK_UNLOCK(ntb);
 	}
 	callout_drain(&ntb->heartbeat_timer);
 	callout_drain(&ntb->lr_timer);
 	callout_drain(&ntb->peer_msix_work);
 	pci_disable_busmaster(ntb->device);
 	if (ntb->type == NTB_XEON)
 		intel_ntb_teardown_xeon(ntb);
 	intel_ntb_teardown_interrupts(ntb);
 
 	mtx_destroy(&ntb->db_mask_lock);
 
 	intel_ntb_unmap_pci_bar(ntb);
 
 	return (0);
 }
 
 /*
  * Driver internal routines
  */
 static inline enum ntb_bar
 intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw)
 {
 
 	KASSERT(mw < ntb->mw_count,
 	    ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count));
 	KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw"));
 
 	return (ntb->reg->mw_bar[mw]);
 }
 
 static inline bool
 bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar)
 {
 	/* XXX This assertion could be stronger. */
 	KASSERT(bar < NTB_MAX_BARS, ("bogus bar"));
 	return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR));
 }
 
 static inline void
 bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base,
     uint32_t *xlat, uint32_t *lmt)
 {
 	uint32_t basev, lmtv, xlatv;
 
 	switch (bar) {
 	case NTB_B2B_BAR_1:
 		basev = ntb->xlat_reg->bar2_base;
 		lmtv = ntb->xlat_reg->bar2_limit;
 		xlatv = ntb->xlat_reg->bar2_xlat;
 		break;
 	case NTB_B2B_BAR_2:
 		basev = ntb->xlat_reg->bar4_base;
 		lmtv = ntb->xlat_reg->bar4_limit;
 		xlatv = ntb->xlat_reg->bar4_xlat;
 		break;
 	case NTB_B2B_BAR_3:
 		basev = ntb->xlat_reg->bar5_base;
 		lmtv = ntb->xlat_reg->bar5_limit;
 		xlatv = ntb->xlat_reg->bar5_xlat;
 		break;
 	default:
 		KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS,
 		    ("bad bar"));
 		basev = lmtv = xlatv = 0;
 		break;
 	}
 
 	if (base != NULL)
 		*base = basev;
 	if (xlat != NULL)
 		*xlat = xlatv;
 	if (lmt != NULL)
 		*lmt = lmtv;
 }
 
 static int
 intel_ntb_map_pci_bars(struct ntb_softc *ntb)
 {
 	int rc;
 
 	ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0);
 	rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]);
 	if (rc != 0)
 		goto out;
 
 	ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2);
 	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]);
 	if (rc != 0)
 		goto out;
 	ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET;
 	ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET;
 	ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET;
 
 	ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4);
 	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]);
 	if (rc != 0)
 		goto out;
 	ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET;
 	ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET;
 	ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET;
 
 	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		goto out;
 
 	ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5);
 	rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]);
 	ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET;
 	ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET;
 	ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET;
 
 out:
 	if (rc != 0)
 		device_printf(ntb->device,
 		    "unable to allocate pci resource\n");
 	return (rc);
 }
 
 static void
 print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar,
     const char *kind)
 {
 
 	device_printf(ntb->device,
 	    "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n",
 	    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
 	    (char *)bar->vbase + bar->size - 1,
 	    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
 	    (uintmax_t)bar->size, kind);
 }
 
 static int
 map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
 {
 
 	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
 	    &bar->pci_resource_id, RF_ACTIVE);
 	if (bar->pci_resource == NULL)
 		return (ENXIO);
 
 	save_bar_parameters(bar);
 	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
 	print_map_success(ntb, bar, "mmr");
 	return (0);
 }
 
 static int
 map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar)
 {
 	int rc;
 	vm_memattr_t mapmode;
 	uint8_t bar_size_bits = 0;
 
 	bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY,
 	    &bar->pci_resource_id, RF_ACTIVE);
 
 	if (bar->pci_resource == NULL)
 		return (ENXIO);
 
 	save_bar_parameters(bar);
 	/*
 	 * Ivytown NTB BAR sizes are misreported by the hardware due to a
 	 * hardware issue. To work around this, query the size it should be
 	 * configured to by the device and modify the resource to correspond to
 	 * this new size. The BIOS on systems with this problem is required to
 	 * provide enough address space to allow the driver to make this change
 	 * safely.
 	 *
 	 * Ideally I could have just specified the size when I allocated the
 	 * resource like:
 	 *  bus_alloc_resource(ntb->device,
 	 *	SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul,
 	 *	1ul << bar_size_bits, RF_ACTIVE);
 	 * but the PCI driver does not honor the size in this call, so we have
 	 * to modify it after the fact.
 	 */
 	if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) {
 		if (bar->pci_resource_id == PCIR_BAR(2))
 			bar_size_bits = pci_read_config(ntb->device,
 			    XEON_PBAR23SZ_OFFSET, 1);
 		else
 			bar_size_bits = pci_read_config(ntb->device,
 			    XEON_PBAR45SZ_OFFSET, 1);
 
 		rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY,
 		    bar->pci_resource, bar->pbase,
 		    bar->pbase + (1ul << bar_size_bits) - 1);
 		if (rc != 0) {
 			device_printf(ntb->device,
 			    "unable to resize bar\n");
 			return (rc);
 		}
 
 		save_bar_parameters(bar);
 	}
 
 	bar->map_mode = VM_MEMATTR_UNCACHEABLE;
 	print_map_success(ntb, bar, "mw");
 
 	/*
 	 * Optionally, mark MW BARs as anything other than UC to improve
 	 * performance.
 	 */
 	mapmode = intel_ntb_pat_flags();
 	if (mapmode == bar->map_mode)
 		return (0);
 
 	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode);
 	if (rc == 0) {
 		bar->map_mode = mapmode;
 		device_printf(ntb->device,
 		    "Marked BAR%d v:[%p-%p] p:[%p-%p] as "
 		    "%s.\n",
 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
 		    (char *)bar->vbase + bar->size - 1,
 		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
 		    intel_ntb_vm_memattr_to_str(mapmode));
 	} else
 		device_printf(ntb->device,
 		    "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as "
 		    "%s: %d\n",
 		    PCI_RID2BAR(bar->pci_resource_id), bar->vbase,
 		    (char *)bar->vbase + bar->size - 1,
 		    (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1),
 		    intel_ntb_vm_memattr_to_str(mapmode), rc);
 		/* Proceed anyway */
 	return (0);
 }
 
 static void
 intel_ntb_unmap_pci_bar(struct ntb_softc *ntb)
 {
 	struct ntb_pci_bar_info *current_bar;
 	int i;
 
 	for (i = 0; i < NTB_MAX_BARS; i++) {
 		current_bar = &ntb->bar_info[i];
 		if (current_bar->pci_resource != NULL)
 			bus_release_resource(ntb->device, SYS_RES_MEMORY,
 			    current_bar->pci_resource_id,
 			    current_bar->pci_resource);
 	}
 }
 
 static int
 intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors)
 {
 	uint32_t i;
 	int rc;
 
 	for (i = 0; i < num_vectors; i++) {
 		ntb->int_info[i].rid = i + 1;
 		ntb->int_info[i].res = bus_alloc_resource_any(ntb->device,
 		    SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE);
 		if (ntb->int_info[i].res == NULL) {
 			device_printf(ntb->device,
 			    "bus_alloc_resource failed\n");
 			return (ENOMEM);
 		}
 		ntb->int_info[i].tag = NULL;
 		ntb->allocated_interrupts++;
 		rc = bus_setup_intr(ntb->device, ntb->int_info[i].res,
 		    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr,
 		    &ntb->msix_vec[i], &ntb->int_info[i].tag);
 		if (rc != 0) {
 			device_printf(ntb->device, "bus_setup_intr failed\n");
 			return (ENXIO);
 		}
 	}
 	return (0);
 }
 
 /*
  * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector
  * cannot be allocated for each MSI-X message.  JHB seems to think remapping
  * should be okay.  This tunable should enable us to test that hypothesis
  * when someone gets their hands on some Xeon hardware.
  */
 static int ntb_force_remap_mode;
 SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN,
     &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped"
     " to a smaller number of ithreads, even if the desired number are "
     "available");
 
 /*
  * In case it is NOT ok, give consumers an abort button.
  */
 static int ntb_prefer_intx;
 SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN,
     &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather "
     "than remapping MSI-X messages over available slots (match Linux driver "
     "behavior)");
 
 /*
  * Remap the desired number of MSI-X messages to available ithreads in a simple
  * round-robin fashion.
  */
 static int
 intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail)
 {
 	u_int *vectors;
 	uint32_t i;
 	int rc;
 
 	if (ntb_prefer_intx != 0)
 		return (ENXIO);
 
 	vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK);
 
 	for (i = 0; i < desired; i++)
 		vectors[i] = (i % avail) + 1;
 
 	rc = pci_remap_msix(dev, desired, vectors);
 	free(vectors, M_NTB);
 	return (rc);
 }
 
 static int
 intel_ntb_init_isr(struct ntb_softc *ntb)
 {
 	uint32_t desired_vectors, num_vectors;
 	int rc;
 
 	ntb->allocated_interrupts = 0;
 	ntb->last_ts = ticks;
 
 	/*
 	 * Mask all doorbell interrupts.  (Except link events!)
 	 */
 	DB_MASK_LOCK(ntb);
 	ntb->db_mask = ntb->db_valid_mask;
 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
 	DB_MASK_UNLOCK(ntb);
 
 	num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device),
 	    ntb->db_count);
 	if (desired_vectors >= 1) {
 		rc = pci_alloc_msix(ntb->device, &num_vectors);
 
 		if (ntb_force_remap_mode != 0 && rc == 0 &&
 		    num_vectors == desired_vectors)
 			num_vectors--;
 
 		if (rc == 0 && num_vectors < desired_vectors) {
 			rc = intel_ntb_remap_msix(ntb->device, desired_vectors,
 			    num_vectors);
 			if (rc == 0)
 				num_vectors = desired_vectors;
 			else
 				pci_release_msi(ntb->device);
 		}
 		if (rc != 0)
 			num_vectors = 1;
 	} else
 		num_vectors = 1;
 
 	if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) {
 		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 			device_printf(ntb->device,
 			    "Errata workaround does not support MSI or INTX\n");
 			return (EINVAL);
 		}
 
 		ntb->db_vec_count = 1;
 		ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT;
 		rc = intel_ntb_setup_legacy_interrupt(ntb);
 	} else {
 		if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS &&
 		    HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 			device_printf(ntb->device,
 			    "Errata workaround expects %d doorbell bits\n",
 			    XEON_NONLINK_DB_MSIX_BITS);
 			return (EINVAL);
 		}
 
 		intel_ntb_create_msix_vec(ntb, num_vectors);
 		rc = intel_ntb_setup_msix(ntb, num_vectors);
 	}
 	if (rc != 0) {
 		device_printf(ntb->device,
 		    "Error allocating interrupts: %d\n", rc);
 		intel_ntb_free_msix_vec(ntb);
 	}
 
 	return (rc);
 }
 
 static int
 intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb)
 {
 	int rc;
 
 	ntb->int_info[0].rid = 0;
 	ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ,
 	    &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE);
 	if (ntb->int_info[0].res == NULL) {
 		device_printf(ntb->device, "bus_alloc_resource failed\n");
 		return (ENOMEM);
 	}
 
 	ntb->int_info[0].tag = NULL;
 	ntb->allocated_interrupts = 1;
 
 	rc = bus_setup_intr(ntb->device, ntb->int_info[0].res,
 	    INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr,
 	    ntb, &ntb->int_info[0].tag);
 	if (rc != 0) {
 		device_printf(ntb->device, "bus_setup_intr failed\n");
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 static void
 intel_ntb_teardown_interrupts(struct ntb_softc *ntb)
 {
 	struct ntb_int_info *current_int;
 	int i;
 
 	for (i = 0; i < ntb->allocated_interrupts; i++) {
 		current_int = &ntb->int_info[i];
 		if (current_int->tag != NULL)
 			bus_teardown_intr(ntb->device, current_int->res,
 			    current_int->tag);
 
 		if (current_int->res != NULL)
 			bus_release_resource(ntb->device, SYS_RES_IRQ,
 			    rman_get_rid(current_int->res), current_int->res);
 	}
 
 	intel_ntb_free_msix_vec(ntb);
 	pci_release_msi(ntb->device);
 }
 
 /*
  * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon.  Abstract it
  * out to make code clearer.
  */
 static inline uint64_t
 db_ioread(struct ntb_softc *ntb, uint64_t regoff)
 {
 
 	if (ntb->type == NTB_ATOM)
 		return (intel_ntb_reg_read(8, regoff));
 
 	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
 
 	return (intel_ntb_reg_read(2, regoff));
 }
 
 static inline void
 db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
 {
 
 	KASSERT((val & ~ntb->db_valid_mask) == 0,
 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
 	     (uintmax_t)(val & ~ntb->db_valid_mask),
 	     (uintmax_t)ntb->db_valid_mask));
 
 	if (regoff == ntb->self_reg->db_mask)
 		DB_MASK_ASSERT(ntb, MA_OWNED);
 	db_iowrite_raw(ntb, regoff, val);
 }
 
 static inline void
 db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val)
 {
 
 	if (ntb->type == NTB_ATOM) {
 		intel_ntb_reg_write(8, regoff, val);
 		return;
 	}
 
 	KASSERT(ntb->type == NTB_XEON, ("bad ntb type"));
 	intel_ntb_reg_write(2, regoff, (uint16_t)val);
 }
 
 static void
 intel_ntb_db_set_mask(device_t dev, uint64_t bits)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	DB_MASK_LOCK(ntb);
 	ntb->db_mask |= bits;
 	if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
 	DB_MASK_UNLOCK(ntb);
 }
 
 static void
 intel_ntb_db_clear_mask(device_t dev, uint64_t bits)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 	uint64_t ibits;
 	int i;
 
 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
 	     (uintmax_t)ntb->db_valid_mask));
 
 	DB_MASK_LOCK(ntb);
 	ibits = ntb->fake_db_bell & ntb->db_mask & bits;
 	ntb->db_mask &= ~bits;
 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 		/* Simulate fake interrupts if unmasked DB bits are set. */
 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
 			if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0)
 				swi_sched(ntb->int_info[i].tag, 0);
 		}
 	} else {
 		db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
 	}
 	DB_MASK_UNLOCK(ntb);
 }
 
 static uint64_t
 intel_ntb_db_read(device_t dev)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
 		return (ntb->fake_db_bell);
 
 	return (db_ioread(ntb, ntb->self_reg->db_bell));
 }
 
 static void
 intel_ntb_db_clear(device_t dev, uint64_t bits)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	KASSERT((bits & ~ntb->db_valid_mask) == 0,
 	    ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__,
 	     (uintmax_t)(bits & ~ntb->db_valid_mask),
 	     (uintmax_t)ntb->db_valid_mask));
 
 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 		DB_MASK_LOCK(ntb);
 		ntb->fake_db_bell &= ~bits;
 		DB_MASK_UNLOCK(ntb);
 		return;
 	}
 
 	db_iowrite(ntb, ntb->self_reg->db_bell, bits);
 }
 
 static inline uint64_t
 intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector)
 {
 	uint64_t shift, mask;
 
 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 		/*
 		 * Remap vectors in custom way to make at least first
 		 * three doorbells to not generate stray events.
 		 * This breaks Linux compatibility (if one existed)
 		 * when more then one DB is used (not by if_ntb).
 		 */
 		if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1)
 			return (1 << db_vector);
 		if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1)
 			return (0x7ffc);
 	}
 
 	shift = ntb->db_vec_shift;
 	mask = (1ull << shift) - 1;
 	return (mask << (shift * db_vector));
 }
 
 static void
 intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec)
 {
 	uint64_t vec_mask;
 
 	ntb->last_ts = ticks;
 	vec_mask = intel_ntb_vec_mask(ntb, vec);
 
 	if ((vec_mask & ntb->db_link_mask) != 0) {
 		if (intel_ntb_poll_link(ntb))
 			ntb_link_event(ntb->device);
 	}
 
 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
 	    (vec_mask & ntb->db_link_mask) == 0) {
 		DB_MASK_LOCK(ntb);
 
 		/* Do not report same DB events again if not cleared yet. */
 		vec_mask &= ~ntb->fake_db_bell;
 
 		/* Update our internal doorbell register. */
 		ntb->fake_db_bell |= vec_mask;
 
 		/* Do not report masked DB events. */
 		vec_mask &= ~ntb->db_mask;
 
 		DB_MASK_UNLOCK(ntb);
 	}
 
 	if ((vec_mask & ntb->db_valid_mask) != 0)
 		ntb_db_event(ntb->device, vec);
 }
 
 static void
 ndev_vec_isr(void *arg)
 {
 	struct ntb_vec *nvec = arg;
 
 	intel_ntb_interrupt(nvec->ntb, nvec->num);
 }
 
 static void
 ndev_irq_isr(void *arg)
 {
 	/* If we couldn't set up MSI-X, we only have the one vector. */
 	intel_ntb_interrupt(arg, 0);
 }
 
 static int
 intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors)
 {
 	uint32_t i;
 
 	ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB,
 	    M_ZERO | M_WAITOK);
 	for (i = 0; i < num_vectors; i++) {
 		ntb->msix_vec[i].num = i;
 		ntb->msix_vec[i].ntb = ntb;
 	}
 
 	return (0);
 }
 
 static void
 intel_ntb_free_msix_vec(struct ntb_softc *ntb)
 {
 
 	if (ntb->msix_vec == NULL)
 		return;
 
 	free(ntb->msix_vec, M_NTB);
 	ntb->msix_vec = NULL;
 }
 
 static void
 intel_ntb_get_msix_info(struct ntb_softc *ntb)
 {
 	struct pci_devinfo *dinfo;
 	struct pcicfg_msix *msix;
 	uint32_t laddr, data, i, offset;
 
 	dinfo = device_get_ivars(ntb->device);
 	msix = &dinfo->cfg.msix;
 
 	CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data));
 
 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
 		offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE;
 
 		laddr = bus_read_4(msix->msix_table_res, offset +
 		    PCI_MSIX_ENTRY_LOWER_ADDR);
 		intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr);
 
 		KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE,
 		    ("local MSIX addr 0x%x not in MSI base 0x%x", laddr,
 		     MSI_INTEL_ADDR_BASE));
 		ntb->msix_data[i].nmd_ofs = laddr;
 
 		data = bus_read_4(msix->msix_table_res, offset +
 		    PCI_MSIX_ENTRY_DATA);
 		intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data);
 
 		ntb->msix_data[i].nmd_data = data;
 	}
 }
 
 static struct ntb_hw_info *
 intel_ntb_get_device_info(uint32_t device_id)
 {
 	struct ntb_hw_info *ep = pci_ids;
 
 	while (ep->device_id) {
 		if (ep->device_id == device_id)
 			return (ep);
 		++ep;
 	}
 	return (NULL);
 }
 
 static void
 intel_ntb_teardown_xeon(struct ntb_softc *ntb)
 {
 
 	if (ntb->reg != NULL)
 		intel_ntb_link_disable(ntb->device);
 }
 
 static void
 intel_ntb_detect_max_mw(struct ntb_softc *ntb)
 {
 
 	if (ntb->type == NTB_ATOM) {
 		ntb->mw_count = ATOM_MW_COUNT;
 		return;
 	}
 
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT;
 	else
 		ntb->mw_count = XEON_SNB_MW_COUNT;
 }
 
 static int
 intel_ntb_detect_xeon(struct ntb_softc *ntb)
 {
 	uint8_t ppd, conn_type;
 
 	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1);
 	ntb->ppd = ppd;
 
 	if ((ppd & XEON_PPD_DEV_TYPE) != 0)
 		ntb->dev_type = NTB_DEV_DSD;
 	else
 		ntb->dev_type = NTB_DEV_USD;
 
 	if ((ppd & XEON_PPD_SPLIT_BAR) != 0)
 		ntb->features |= NTB_SPLIT_BAR;
 
 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) &&
 	    !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		device_printf(ntb->device,
 		    "Can not apply SB01BASE_LOCKUP workaround "
 		    "with split BARs disabled!\n");
 		device_printf(ntb->device,
 		    "Expect system hangs under heavy NTB traffic!\n");
 		ntb->features &= ~NTB_SB01BASE_LOCKUP;
 	}
 
 	/*
 	 * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP
 	 * errata workaround; only do one at a time.
 	 */
 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))
 		ntb->features &= ~NTB_SDOORBELL_LOCKUP;
 
 	conn_type = ppd & XEON_PPD_CONN_TYPE;
 	switch (conn_type) {
 	case NTB_CONN_B2B:
 		ntb->conn_type = conn_type;
 		break;
 	case NTB_CONN_RP:
 	case NTB_CONN_TRANSPARENT:
 	default:
 		device_printf(ntb->device, "Unsupported connection type: %u\n",
 		    (unsigned)conn_type);
 		return (ENXIO);
 	}
 	return (0);
 }
 
 static int
 intel_ntb_detect_atom(struct ntb_softc *ntb)
 {
 	uint32_t ppd, conn_type;
 
 	ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
 	ntb->ppd = ppd;
 
 	if ((ppd & ATOM_PPD_DEV_TYPE) != 0)
 		ntb->dev_type = NTB_DEV_DSD;
 	else
 		ntb->dev_type = NTB_DEV_USD;
 
 	conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8;
 	switch (conn_type) {
 	case NTB_CONN_B2B:
 		ntb->conn_type = conn_type;
 		break;
 	default:
 		device_printf(ntb->device, "Unsupported NTB configuration\n");
 		return (ENXIO);
 	}
 	return (0);
 }
 
 static int
 intel_ntb_xeon_init_dev(struct ntb_softc *ntb)
 {
 	int rc;
 
 	ntb->spad_count		= XEON_SPAD_COUNT;
 	ntb->db_count		= XEON_DB_COUNT;
 	ntb->db_link_mask	= XEON_DB_LINK_BIT;
 	ntb->db_vec_count	= XEON_DB_MSIX_VECTOR_COUNT;
 	ntb->db_vec_shift	= XEON_DB_MSIX_VECTOR_SHIFT;
 
 	if (ntb->conn_type != NTB_CONN_B2B) {
 		device_printf(ntb->device, "Connection type %d not supported\n",
 		    ntb->conn_type);
 		return (ENXIO);
 	}
 
 	ntb->reg = &xeon_reg;
 	ntb->self_reg = &xeon_pri_reg;
 	ntb->peer_reg = &xeon_b2b_reg;
 	ntb->xlat_reg = &xeon_sec_xlat;
 
 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 		ntb->fake_db_bell = 0;
 		ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) %
 		    ntb->mw_count;
 		intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n",
 		    g_ntb_msix_idx, ntb->msix_mw_idx);
 		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx,
 		    VM_MEMATTR_UNCACHEABLE);
 		KASSERT(rc == 0, ("shouldn't fail"));
 	} else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
 		/*
 		 * There is a Xeon hardware errata related to writes to SDOORBELL or
 		 * B2BDOORBELL in conjunction with inbound access to NTB MMIO space,
 		 * which may hang the system.  To workaround this, use a memory
 		 * window to access the interrupt and scratch pad registers on the
 		 * remote system.
 		 */
 		ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) %
 		    ntb->mw_count;
 		intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n",
 		    g_ntb_mw_idx, ntb->b2b_mw_idx);
 		rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx,
 		    VM_MEMATTR_UNCACHEABLE);
 		KASSERT(rc == 0, ("shouldn't fail"));
 	} else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14))
 		/*
 		 * HW Errata on bit 14 of b2bdoorbell register.  Writes will not be
 		 * mirrored to the remote system.  Shrink the number of bits by one,
 		 * since bit 14 is the last bit.
 		 *
 		 * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register
 		 * anyway.  Nor for non-B2B connection types.
 		 */
 		ntb->db_count = XEON_DB_COUNT - 1;
 
 	ntb->db_valid_mask = (1ull << ntb->db_count) - 1;
 
 	if (ntb->dev_type == NTB_DEV_USD)
 		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr,
 		    &xeon_b2b_usd_addr);
 	else
 		rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr,
 		    &xeon_b2b_dsd_addr);
 	if (rc != 0)
 		return (rc);
 
 	/* Enable Bus Master and Memory Space on the secondary side */
 	intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET,
 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
 
 	/*
 	 * Mask all doorbell interrupts.
 	 */
 	DB_MASK_LOCK(ntb);
 	ntb->db_mask = ntb->db_valid_mask;
 	db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask);
 	DB_MASK_UNLOCK(ntb);
 
 	rc = intel_ntb_init_isr(ntb);
 	return (rc);
 }
 
 static int
 intel_ntb_atom_init_dev(struct ntb_softc *ntb)
 {
 	int error;
 
 	KASSERT(ntb->conn_type == NTB_CONN_B2B,
 	    ("Unsupported NTB configuration (%d)\n", ntb->conn_type));
 
 	ntb->spad_count		 = ATOM_SPAD_COUNT;
 	ntb->db_count		 = ATOM_DB_COUNT;
 	ntb->db_vec_count	 = ATOM_DB_MSIX_VECTOR_COUNT;
 	ntb->db_vec_shift	 = ATOM_DB_MSIX_VECTOR_SHIFT;
 	ntb->db_valid_mask	 = (1ull << ntb->db_count) - 1;
 
 	ntb->reg = &atom_reg;
 	ntb->self_reg = &atom_pri_reg;
 	ntb->peer_reg = &atom_b2b_reg;
 	ntb->xlat_reg = &atom_sec_xlat;
 
 	/*
 	 * FIXME - MSI-X bug on early Atom HW, remove once internal issue is
 	 * resolved.  Mask transaction layer internal parity errors.
 	 */
 	pci_write_config(ntb->device, 0xFC, 0x4, 4);
 
 	configure_atom_secondary_side_bars(ntb);
 
 	/* Enable Bus Master and Memory Space on the secondary side */
 	intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET,
 	    PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN);
 
 	error = intel_ntb_init_isr(ntb);
 	if (error != 0)
 		return (error);
 
 	/* Initiate PCI-E link training */
 	intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
 
 	callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb);
 
 	return (0);
 }
 
 /* XXX: Linux driver doesn't seem to do any of this for Atom. */
 static void
 configure_atom_secondary_side_bars(struct ntb_softc *ntb)
 {
 
 	if (ntb->dev_type == NTB_DEV_USD) {
 		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
 		    XEON_B2B_BAR2_ADDR64);
 		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
 		    XEON_B2B_BAR4_ADDR64);
 		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
 		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
 	} else {
 		intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET,
 		    XEON_B2B_BAR2_ADDR64);
 		intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET,
 		    XEON_B2B_BAR4_ADDR64);
 		intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64);
 		intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64);
 	}
 }
 
 
 /*
  * When working around Xeon SDOORBELL errata by remapping remote registers in a
  * MW, limit the B2B MW to half a MW.  By sharing a MW, half the shared MW
  * remains for use by a higher layer.
  *
  * Will only be used if working around SDOORBELL errata and the BIOS-configured
  * MW size is sufficiently large.
  */
 static unsigned int ntb_b2b_mw_share;
 SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share,
     0, "If enabled (non-zero), prefer to share half of the B2B peer register "
     "MW with higher level consumers.  Both sides of the NTB MUST set the same "
     "value here.");
 
 static void
 xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx,
     enum ntb_bar regbar)
 {
 	struct ntb_pci_bar_info *bar;
 	uint8_t bar_sz;
 
 	if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3)
 		return;
 
 	bar = &ntb->bar_info[idx];
 	bar_sz = pci_read_config(ntb->device, bar->psz_off, 1);
 	if (idx == regbar) {
 		if (ntb->b2b_off != 0)
 			bar_sz--;
 		else
 			bar_sz = 0;
 	}
 	pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1);
 	bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1);
 	(void)bar_sz;
 }
 
 static void
 xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr,
     enum ntb_bar idx, enum ntb_bar regbar)
 {
 	uint64_t reg_val;
 	uint32_t base_reg, lmt_reg;
 
 	bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg);
 	if (idx == regbar) {
 		if (ntb->b2b_off)
 			bar_addr += ntb->b2b_off;
 		else
 			bar_addr = 0;
 	}
 
 	if (!bar_is_64bit(ntb, idx)) {
 		intel_ntb_reg_write(4, base_reg, bar_addr);
 		reg_val = intel_ntb_reg_read(4, base_reg);
 		(void)reg_val;
 
 		intel_ntb_reg_write(4, lmt_reg, bar_addr);
 		reg_val = intel_ntb_reg_read(4, lmt_reg);
 		(void)reg_val;
 	} else {
 		intel_ntb_reg_write(8, base_reg, bar_addr);
 		reg_val = intel_ntb_reg_read(8, base_reg);
 		(void)reg_val;
 
 		intel_ntb_reg_write(8, lmt_reg, bar_addr);
 		reg_val = intel_ntb_reg_read(8, lmt_reg);
 		(void)reg_val;
 	}
 }
 
 static void
 xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx)
 {
 	struct ntb_pci_bar_info *bar;
 
 	bar = &ntb->bar_info[idx];
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) {
 		intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr);
 		base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off);
 	} else {
 		intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr);
 		base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off);
 	}
 	(void)base_addr;
 }
 
 static int
 xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr,
     const struct ntb_b2b_addr *peer_addr)
 {
 	struct ntb_pci_bar_info *b2b_bar;
 	vm_size_t bar_size;
 	uint64_t bar_addr;
 	enum ntb_bar b2b_bar_num, i;
 
 	if (ntb->b2b_mw_idx == B2B_MW_DISABLED) {
 		b2b_bar = NULL;
 		b2b_bar_num = NTB_CONFIG_BAR;
 		ntb->b2b_off = 0;
 	} else {
 		b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx);
 		KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS,
 		    ("invalid b2b mw bar"));
 
 		b2b_bar = &ntb->bar_info[b2b_bar_num];
 		bar_size = b2b_bar->size;
 
 		if (ntb_b2b_mw_share != 0 &&
 		    (bar_size >> 1) >= XEON_B2B_MIN_SIZE)
 			ntb->b2b_off = bar_size >> 1;
 		else if (bar_size >= XEON_B2B_MIN_SIZE) {
 			ntb->b2b_off = 0;
 		} else {
 			device_printf(ntb->device,
 			    "B2B bar size is too small!\n");
 			return (EIO);
 		}
 	}
 
 	/*
 	 * Reset the secondary bar sizes to match the primary bar sizes.
 	 * (Except, disable or halve the size of the B2B secondary bar.)
 	 */
 	for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++)
 		xeon_reset_sbar_size(ntb, i, b2b_bar_num);
 
 	bar_addr = 0;
 	if (b2b_bar_num == NTB_CONFIG_BAR)
 		bar_addr = addr->bar0_addr;
 	else if (b2b_bar_num == NTB_B2B_BAR_1)
 		bar_addr = addr->bar2_addr64;
 	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		bar_addr = addr->bar4_addr64;
 	else if (b2b_bar_num == NTB_B2B_BAR_2)
 		bar_addr = addr->bar4_addr32;
 	else if (b2b_bar_num == NTB_B2B_BAR_3)
 		bar_addr = addr->bar5_addr32;
 	else
 		KASSERT(false, ("invalid bar"));
 
 	intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr);
 
 	/*
 	 * Other SBARs are normally hit by the PBAR xlat, except for the b2b
 	 * register BAR.  The B2B BAR is either disabled above or configured
 	 * half-size.  It starts at PBAR xlat + offset.
 	 *
 	 * Also set up incoming BAR limits == base (zero length window).
 	 */
 	xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1,
 	    b2b_bar_num);
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32,
 		    NTB_B2B_BAR_2, b2b_bar_num);
 		xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32,
 		    NTB_B2B_BAR_3, b2b_bar_num);
 	} else
 		xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64,
 		    NTB_B2B_BAR_2, b2b_bar_num);
 
 	/* Zero incoming translation addrs */
 	intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0);
 	intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0);
 
 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 		uint32_t xlat_reg, lmt_reg;
 		enum ntb_bar bar_num;
 
 		/*
 		 * We point the chosen MSIX MW BAR xlat to remote LAPIC for
 		 * workaround
 		 */
 		bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx);
 		bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg);
 		if (bar_is_64bit(ntb, bar_num)) {
 			intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE);
 			ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg);
 			intel_ntb_reg_write(8, lmt_reg, 0);
 		} else {
 			intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE);
 			ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg);
 			intel_ntb_reg_write(4, lmt_reg, 0);
 		}
 
 		ntb->peer_lapic_bar =  &ntb->bar_info[bar_num];
 	}
 	(void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET);
 	(void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET);
 
 	/* Zero outgoing translation limits (whole bar size windows) */
 	intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0);
 	intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0);
 
 	/* Set outgoing translation offsets */
 	xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1);
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2);
 		xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3);
 	} else
 		xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2);
 
 	/* Set the translation offset for B2B registers */
 	bar_addr = 0;
 	if (b2b_bar_num == NTB_CONFIG_BAR)
 		bar_addr = peer_addr->bar0_addr;
 	else if (b2b_bar_num == NTB_B2B_BAR_1)
 		bar_addr = peer_addr->bar2_addr64;
 	else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		bar_addr = peer_addr->bar4_addr64;
 	else if (b2b_bar_num == NTB_B2B_BAR_2)
 		bar_addr = peer_addr->bar4_addr32;
 	else if (b2b_bar_num == NTB_B2B_BAR_3)
 		bar_addr = peer_addr->bar5_addr32;
 	else
 		KASSERT(false, ("invalid bar"));
 
 	/*
 	 * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits
 	 * at a time.
 	 */
 	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff);
 	intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32);
 	return (0);
 }
 
 static inline bool
 _xeon_link_is_up(struct ntb_softc *ntb)
 {
 
 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
 		return (true);
 	return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0);
 }
 
 static inline bool
 link_is_up(struct ntb_softc *ntb)
 {
 
 	if (ntb->type == NTB_XEON)
 		return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good ||
 		    !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)));
 
 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
 	return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0);
 }
 
 static inline bool
 atom_link_is_err(struct ntb_softc *ntb)
 {
 	uint32_t status;
 
 	KASSERT(ntb->type == NTB_ATOM, ("ntb type"));
 
 	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
 	if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0)
 		return (true);
 
 	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
 	return ((status & ATOM_IBIST_ERR_OFLOW) != 0);
 }
 
 /* Atom does not have link status interrupt, poll on that platform */
 static void
 atom_link_hb(void *arg)
 {
 	struct ntb_softc *ntb = arg;
 	sbintime_t timo, poll_ts;
 
 	timo = NTB_HB_TIMEOUT * hz;
 	poll_ts = ntb->last_ts + timo;
 
 	/*
 	 * Delay polling the link status if an interrupt was received, unless
 	 * the cached link status says the link is down.
 	 */
 	if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) {
 		timo = poll_ts - ticks;
 		goto out;
 	}
 
 	if (intel_ntb_poll_link(ntb))
 		ntb_link_event(ntb->device);
 
 	if (!link_is_up(ntb) && atom_link_is_err(ntb)) {
 		/* Link is down with error, proceed with recovery */
 		callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb);
 		return;
 	}
 
 out:
 	callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb);
 }
 
 static void
 atom_perform_link_restart(struct ntb_softc *ntb)
 {
 	uint32_t status;
 
 	/* Driver resets the NTB ModPhy lanes - magic! */
 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0);
 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40);
 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60);
 	intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60);
 
 	/* Driver waits 100ms to allow the NTB ModPhy to settle */
 	pause("ModPhy", hz / 10);
 
 	/* Clear AER Errors, write to clear */
 	status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET);
 	status &= PCIM_AER_COR_REPLAY_ROLLOVER;
 	intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status);
 
 	/* Clear unexpected electrical idle event in LTSSM, write to clear */
 	status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET);
 	status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI;
 	intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status);
 
 	/* Clear DeSkew Buffer error, write to clear */
 	status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET);
 	status |= ATOM_DESKEWSTS_DBERR;
 	intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status);
 
 	status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET);
 	status &= ATOM_IBIST_ERR_OFLOW;
 	intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status);
 
 	/* Releases the NTB state machine to allow the link to retrain */
 	status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET);
 	status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT;
 	intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status);
 }
 
 static int
 intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused,
     enum ntb_width width __unused)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 	uint32_t cntl;
 
 	intel_ntb_printf(2, "%s\n", __func__);
 
 	if (ntb->type == NTB_ATOM) {
 		pci_write_config(ntb->device, NTB_PPD_OFFSET,
 		    ntb->ppd | ATOM_PPD_INIT_LINK, 4);
 		return (0);
 	}
 
 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
 		ntb_link_event(dev);
 		return (0);
 	}
 
 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
 	cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK);
 	cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP;
 	cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP;
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP;
 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
 	return (0);
 }
 
 static int
 intel_ntb_link_disable(device_t dev)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 	uint32_t cntl;
 
 	intel_ntb_printf(2, "%s\n", __func__);
 
 	if (ntb->conn_type == NTB_CONN_TRANSPARENT) {
 		ntb_link_event(dev);
 		return (0);
 	}
 
 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
 	cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP);
 	cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP);
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR))
 		cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP);
 	cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK;
 	intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl);
 	return (0);
 }
 
 static bool
 intel_ntb_link_enabled(device_t dev)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 	uint32_t cntl;
 
 	if (ntb->type == NTB_ATOM) {
 		cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4);
 		return ((cntl & ATOM_PPD_INIT_LINK) != 0);
 	}
 
 	if (ntb->conn_type == NTB_CONN_TRANSPARENT)
 		return (true);
 
 	cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
 	return ((cntl & NTB_CNTL_LINK_DISABLE) == 0);
 }
 
 static void
 recover_atom_link(void *arg)
 {
 	struct ntb_softc *ntb = arg;
 	unsigned speed, width, oldspeed, oldwidth;
 	uint32_t status32;
 
 	atom_perform_link_restart(ntb);
 
 	/*
 	 * There is a potential race between the 2 NTB devices recovering at
 	 * the same time.  If the times are the same, the link will not recover
 	 * and the driver will be stuck in this loop forever.  Add a random
 	 * interval to the recovery time to prevent this race.
 	 */
 	status32 = arc4random() % ATOM_LINK_RECOVERY_TIME;
 	pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000);
 
 	if (atom_link_is_err(ntb))
 		goto retry;
 
 	status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
 	if ((status32 & ATOM_CNTL_LINK_DOWN) != 0)
 		goto out;
 
 	status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
 	width = NTB_LNK_STA_WIDTH(status32);
 	speed = status32 & NTB_LINK_SPEED_MASK;
 
 	oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta);
 	oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK;
 	if (oldwidth != width || oldspeed != speed)
 		goto retry;
 
 out:
 	callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb,
 	    ntb);
 	return;
 
 retry:
 	callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link,
 	    ntb);
 }
 
 /*
  * Polls the HW link status register(s); returns true if something has changed.
  */
 static bool
 intel_ntb_poll_link(struct ntb_softc *ntb)
 {
 	uint32_t ntb_cntl;
 	uint16_t reg_val;
 
 	if (ntb->type == NTB_ATOM) {
 		ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl);
 		if (ntb_cntl == ntb->ntb_ctl)
 			return (false);
 
 		ntb->ntb_ctl = ntb_cntl;
 		ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta);
 	} else {
 		db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask);
 
 		reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
 		if (reg_val == ntb->lnk_sta)
 			return (false);
 
 		ntb->lnk_sta = reg_val;
 
 		if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 			if (_xeon_link_is_up(ntb)) {
 				if (!ntb->peer_msix_good) {
 					callout_reset(&ntb->peer_msix_work, 0,
 					    intel_ntb_exchange_msix, ntb);
 					return (false);
 				}
 			} else {
 				ntb->peer_msix_good = false;
 				ntb->peer_msix_done = false;
 			}
 		}
 	}
 	return (true);
 }
 
 static inline enum ntb_speed
 intel_ntb_link_sta_speed(struct ntb_softc *ntb)
 {
 
 	if (!link_is_up(ntb))
 		return (NTB_SPEED_NONE);
 	return (ntb->lnk_sta & NTB_LINK_SPEED_MASK);
 }
 
 static inline enum ntb_width
 intel_ntb_link_sta_width(struct ntb_softc *ntb)
 {
 
 	if (!link_is_up(ntb))
 		return (NTB_WIDTH_NONE);
 	return (NTB_LNK_STA_WIDTH(ntb->lnk_sta));
 }
 
 SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0,
     "Driver state, statistics, and HW registers");
 
 #define NTB_REGSZ_MASK	(3ul << 30)
 #define NTB_REG_64	(1ul << 30)
 #define NTB_REG_32	(2ul << 30)
 #define NTB_REG_16	(3ul << 30)
 #define NTB_REG_8	(0ul << 30)
 
 #define NTB_DB_READ	(1ul << 29)
 #define NTB_PCI_REG	(1ul << 28)
 #define NTB_REGFLAGS_MASK	(NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG)
 
 static void
 intel_ntb_sysctl_init(struct ntb_softc *ntb)
 {
 	struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *tree, *tmptree;
 
 	ctx = device_get_sysctl_ctx(ntb->device);
 	globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device));
 
 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status",
 	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0,
 	    sysctl_handle_link_status_human, "A",
 	    "Link status (human readable)");
 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active",
 	    CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status,
 	    "IU", "Link status (1=active, 0=inactive)");
 	SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up",
 	    CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin,
 	    "IU", "Set/get interface status (1=UP, 0=DOWN)");
 
 	tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info",
 	    CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers");
 	tree_par = SYSCTL_CHILDREN(tree);
 
 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD,
 	    &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port");
 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD,
 	    &ntb->dev_type, 0, "0 - USD; 1 - DSD");
 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD,
 	    &ntb->ppd, 0, "Raw PPD register (cached)");
 
 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED) {
 		SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD,
 		    &ntb->b2b_mw_idx, 0,
 		    "Index of the MW used for B2B remote register access");
 		SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off",
 		    CTLFLAG_RD, &ntb->b2b_off,
 		    "If non-zero, offset of B2B register region in shared MW");
 	}
 
 	SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features",
 	    CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A",
 	    "Features/errata of this NTB device");
 
 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD,
 	    __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0,
 	    "NTB CTL register (cached)");
 	SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD,
 	    __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0,
 	    "LNK STA register (cached)");
 
 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD,
 	    &ntb->mw_count, 0, "MW count");
 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD,
 	    &ntb->spad_count, 0, "Scratchpad count");
 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD,
 	    &ntb->db_count, 0, "Doorbell count");
 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD,
 	    &ntb->db_vec_count, 0, "Doorbell vector count");
 	SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD,
 	    &ntb->db_vec_shift, 0, "Doorbell vector shift");
 
 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD,
 	    &ntb->db_valid_mask, "Doorbell valid mask");
 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD,
 	    &ntb->db_link_mask, "Doorbell link mask");
 	SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD,
 	    &ntb->db_mask, "Doorbell mask (cached)");
 
 	tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers",
 	    CTLFLAG_RD, NULL, "Raw HW registers (big-endian)");
 	regpar = SYSCTL_CHILDREN(tmptree);
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
 	    ntb->reg->ntb_ctl, sysctl_handle_register, "IU",
 	    "NTB Control register");
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
 	    0x19c, sysctl_handle_register, "IU",
 	    "NTB Link Capabilities");
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 |
 	    0x1a0, sysctl_handle_register, "IU",
 	    "NTB Link Control register");
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask,
 	    sysctl_handle_register, "QU", "Doorbell mask register");
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell,
 	    sysctl_handle_register, "QU", "Doorbell register");
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | ntb->xlat_reg->bar2_xlat,
 	    sysctl_handle_register, "QU", "Incoming XLAT23 register");
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->xlat_reg->bar4_xlat,
 		    sysctl_handle_register, "IU", "Incoming XLAT4 register");
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->xlat_reg->bar5_xlat,
 		    sysctl_handle_register, "IU", "Incoming XLAT5 register");
 	} else {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_64 | ntb->xlat_reg->bar4_xlat,
 		    sysctl_handle_register, "QU", "Incoming XLAT45 register");
 	}
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | ntb->xlat_reg->bar2_limit,
 	    sysctl_handle_register, "QU", "Incoming LMT23 register");
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->xlat_reg->bar4_limit,
 		    sysctl_handle_register, "IU", "Incoming LMT4 register");
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->xlat_reg->bar5_limit,
 		    sysctl_handle_register, "IU", "Incoming LMT5 register");
 	} else {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_64 | ntb->xlat_reg->bar4_limit,
 		    sysctl_handle_register, "QU", "Incoming LMT45 register");
 	}
 
 	if (ntb->type == NTB_ATOM)
 		return;
 
 	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats",
 	    CTLFLAG_RD, NULL, "Xeon HW statistics");
 	statpar = SYSCTL_CHILDREN(tmptree);
 	SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_16 | XEON_USMEMMISS_OFFSET,
 	    sysctl_handle_register, "SU", "Upstream Memory Miss");
 
 	tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err",
 	    CTLFLAG_RD, NULL, "Xeon HW errors");
 	errpar = SYSCTL_CHILDREN(tmptree);
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET,
 	    sysctl_handle_register, "CU", "PPD");
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET,
 	    sysctl_handle_register, "CU", "PBAR23 SZ (log2)");
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET,
 	    sysctl_handle_register, "CU", "PBAR4 SZ (log2)");
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET,
 	    sysctl_handle_register, "CU", "PBAR5 SZ (log2)");
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET,
 	    sysctl_handle_register, "CU", "SBAR23 SZ (log2)");
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET,
 	    sysctl_handle_register, "CU", "SBAR4 SZ (log2)");
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET,
 	    sysctl_handle_register, "CU", "SBAR5 SZ (log2)");
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET,
 	    sysctl_handle_register, "SU", "DEVSTS");
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET,
 	    sysctl_handle_register, "SU", "LNKSTS");
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET,
 	    sysctl_handle_register, "SU", "SLNKSTS");
 
 	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET,
 	    sysctl_handle_register, "IU", "UNCERRSTS");
 	SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET,
 	    sysctl_handle_register, "IU", "CORERRSTS");
 
 	if (ntb->conn_type != NTB_CONN_B2B)
 		return;
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off,
 	    sysctl_handle_register, "QU", "Outgoing XLAT23 register");
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
 		    sysctl_handle_register, "IU", "Outgoing XLAT4 register");
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off,
 		    sysctl_handle_register, "IU", "Outgoing XLAT5 register");
 	} else {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off,
 		    sysctl_handle_register, "QU", "Outgoing XLAT45 register");
 	}
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | XEON_PBAR2LMT_OFFSET,
 	    sysctl_handle_register, "QU", "Outgoing LMT23 register");
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | XEON_PBAR4LMT_OFFSET,
 		    sysctl_handle_register, "IU", "Outgoing LMT4 register");
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | XEON_PBAR5LMT_OFFSET,
 		    sysctl_handle_register, "IU", "Outgoing LMT5 register");
 	} else {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_64 | XEON_PBAR4LMT_OFFSET,
 		    sysctl_handle_register, "QU", "Outgoing LMT45 register");
 	}
 
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | ntb->xlat_reg->bar0_base,
 	    sysctl_handle_register, "QU", "Secondary BAR01 base register");
 	SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base",
 	    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 	    NTB_REG_64 | ntb->xlat_reg->bar2_base,
 	    sysctl_handle_register, "QU", "Secondary BAR23 base register");
 	if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->xlat_reg->bar4_base,
 		    sysctl_handle_register, "IU",
 		    "Secondary BAR4 base register");
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_32 | ntb->xlat_reg->bar5_base,
 		    sysctl_handle_register, "IU",
 		    "Secondary BAR5 base register");
 	} else {
 		SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base",
 		    CTLFLAG_RD | CTLTYPE_OPAQUE, ntb,
 		    NTB_REG_64 | ntb->xlat_reg->bar4_base,
 		    sysctl_handle_register, "QU",
 		    "Secondary BAR45 base register");
 	}
 }
 
 static int
 sysctl_handle_features(SYSCTL_HANDLER_ARGS)
 {
 	struct ntb_softc *ntb = arg1;
 	struct sbuf sb;
 	int error;
 
 	sbuf_new_for_sysctl(&sb, NULL, 256, req);
 
 	sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR);
 	error = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 
 	if (error || !req->newptr)
 		return (error);
 	return (EINVAL);
 }
 
 static int
 sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS)
 {
 	struct ntb_softc *ntb = arg1;
 	unsigned old, new;
 	int error;
 
 	old = intel_ntb_link_enabled(ntb->device);
 
 	error = SYSCTL_OUT(req, &old, sizeof(old));
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	error = SYSCTL_IN(req, &new, sizeof(new));
 	if (error != 0)
 		return (error);
 
 	intel_ntb_printf(0, "Admin set interface state to '%sabled'\n",
 	    (new != 0)? "en" : "dis");
 
 	if (new != 0)
 		error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
 	else
 		error = intel_ntb_link_disable(ntb->device);
 	return (error);
 }
 
 static int
 sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS)
 {
 	struct ntb_softc *ntb = arg1;
 	struct sbuf sb;
 	enum ntb_speed speed;
 	enum ntb_width width;
 	int error;
 
 	sbuf_new_for_sysctl(&sb, NULL, 32, req);
 
 	if (intel_ntb_link_is_up(ntb->device, &speed, &width))
 		sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u",
 		    (unsigned)speed, (unsigned)width);
 	else
 		sbuf_printf(&sb, "down");
 
 	error = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 
 	if (error || !req->newptr)
 		return (error);
 	return (EINVAL);
 }
 
 static int
 sysctl_handle_link_status(SYSCTL_HANDLER_ARGS)
 {
 	struct ntb_softc *ntb = arg1;
 	unsigned res;
 	int error;
 
 	res = intel_ntb_link_is_up(ntb->device, NULL, NULL);
 
 	error = SYSCTL_OUT(req, &res, sizeof(res));
 	if (error || !req->newptr)
 		return (error);
 	return (EINVAL);
 }
 
 static int
 sysctl_handle_register(SYSCTL_HANDLER_ARGS)
 {
 	struct ntb_softc *ntb;
 	const void *outp;
 	uintptr_t sz;
 	uint64_t umv;
 	char be[sizeof(umv)];
 	size_t outsz;
 	uint32_t reg;
 	bool db, pci;
 	int error;
 
 	ntb = arg1;
 	reg = arg2 & ~NTB_REGFLAGS_MASK;
 	sz = arg2 & NTB_REGSZ_MASK;
 	db = (arg2 & NTB_DB_READ) != 0;
 	pci = (arg2 & NTB_PCI_REG) != 0;
 
 	KASSERT(!(db && pci), ("bogus"));
 
 	if (db) {
 		KASSERT(sz == NTB_REG_64, ("bogus"));
 		umv = db_ioread(ntb, reg);
 		outsz = sizeof(uint64_t);
 	} else {
 		switch (sz) {
 		case NTB_REG_64:
 			if (pci)
 				umv = pci_read_config(ntb->device, reg, 8);
 			else
 				umv = intel_ntb_reg_read(8, reg);
 			outsz = sizeof(uint64_t);
 			break;
 		case NTB_REG_32:
 			if (pci)
 				umv = pci_read_config(ntb->device, reg, 4);
 			else
 				umv = intel_ntb_reg_read(4, reg);
 			outsz = sizeof(uint32_t);
 			break;
 		case NTB_REG_16:
 			if (pci)
 				umv = pci_read_config(ntb->device, reg, 2);
 			else
 				umv = intel_ntb_reg_read(2, reg);
 			outsz = sizeof(uint16_t);
 			break;
 		case NTB_REG_8:
 			if (pci)
 				umv = pci_read_config(ntb->device, reg, 1);
 			else
 				umv = intel_ntb_reg_read(1, reg);
 			outsz = sizeof(uint8_t);
 			break;
 		default:
 			panic("bogus");
 			break;
 		}
 	}
 
 	/* Encode bigendian so that sysctl -x is legible. */
 	be64enc(be, umv);
 	outp = ((char *)be) + sizeof(umv) - outsz;
 
 	error = SYSCTL_OUT(req, outp, outsz);
 	if (error || !req->newptr)
 		return (error);
 	return (EINVAL);
 }
 
 static unsigned
 intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx)
 {
 
 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
 	    uidx >= ntb->b2b_mw_idx) ||
 	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
 		uidx++;
 	if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 &&
 	    uidx >= ntb->b2b_mw_idx) &&
 	    (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx))
 		uidx++;
 	return (uidx);
 }
 
+#ifndef EARLY_AP_STARTUP
+static int msix_ready;
+
 static void
+intel_ntb_msix_ready(void *arg __unused)
+{
+
+	msix_ready = 1;
+}
+SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY,
+    intel_ntb_msix_ready, NULL);
+#endif
+
+static void
 intel_ntb_exchange_msix(void *ctx)
 {
 	struct ntb_softc *ntb;
 	uint32_t val;
 	unsigned i;
 
 	ntb = ctx;
 
 	if (ntb->peer_msix_good)
 		goto msix_good;
 	if (ntb->peer_msix_done)
 		goto msix_done;
+
+#ifndef EARLY_AP_STARTUP
+	/* Block MSIX negotiation until SMP started and IRQ reshuffled. */
+	if (!msix_ready)
+		goto reschedule;
+#endif
 
 	intel_ntb_get_msix_info(ntb);
 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
 		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i,
 		    ntb->msix_data[i].nmd_data);
 		intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i,
 		    ntb->msix_data[i].nmd_ofs - ntb->msix_xlat);
 	}
 	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD);
 
 	intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val);
 	if (val != NTB_MSIX_VER_GUARD)
 		goto reschedule;
 
 	for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
 		intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val);
 		intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val);
 		ntb->peer_msix_data[i].nmd_data = val;
 		intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val);
 		intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val);
 		ntb->peer_msix_data[i].nmd_ofs = val;
 	}
 
 	ntb->peer_msix_done = true;
 
 msix_done:
 	intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED);
 	intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val);
 	if (val != NTB_MSIX_RECEIVED)
 		goto reschedule;
 
 	intel_ntb_spad_clear(ntb->device);
 	ntb->peer_msix_good = true;
 	/* Give peer time to see our NTB_MSIX_RECEIVED. */
 	goto reschedule;
 
 msix_good:
 	intel_ntb_poll_link(ntb);
 	ntb_link_event(ntb->device);
 	return;
 
 reschedule:
 	ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2);
 	if (_xeon_link_is_up(ntb)) {
 		callout_reset(&ntb->peer_msix_work,
 		    hz * (ntb->peer_msix_good ? 2 : 1) / 100,
 		    intel_ntb_exchange_msix, ntb);
 	} else
 		intel_ntb_spad_clear(ntb->device);
 }
 
 /*
  * Public API to the rest of the OS
  */
 
 static uint8_t
 intel_ntb_spad_count(device_t dev)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	return (ntb->spad_count);
 }
 
 static uint8_t
 intel_ntb_mw_count(device_t dev)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 	uint8_t res;
 
 	res = ntb->mw_count;
 	if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0)
 		res--;
 	if (ntb->msix_mw_idx != B2B_MW_DISABLED)
 		res--;
 	return (res);
 }
 
 static int
 intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (idx >= ntb->spad_count)
 		return (EINVAL);
 
 	intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val);
 
 	return (0);
 }
 
 /*
  * Zeros the local scratchpad.
  */
 static void
 intel_ntb_spad_clear(device_t dev)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 	unsigned i;
 
 	for (i = 0; i < ntb->spad_count; i++)
 		intel_ntb_spad_write(dev, i, 0);
 }
 
 static int
 intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (idx >= ntb->spad_count)
 		return (EINVAL);
 
 	*val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4);
 
 	return (0);
 }
 
 static int
 intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (idx >= ntb->spad_count)
 		return (EINVAL);
 
 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
 		intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val);
 	else
 		intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val);
 
 	return (0);
 }
 
 static int
 intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (idx >= ntb->spad_count)
 		return (EINVAL);
 
 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP))
 		*val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4);
 	else
 		*val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4);
 
 	return (0);
 }
 
 static int
 intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base,
     caddr_t *vbase, size_t *size, size_t *align, size_t *align_size,
     bus_addr_t *plimit)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 	struct ntb_pci_bar_info *bar;
 	bus_addr_t limit;
 	size_t bar_b2b_off;
 	enum ntb_bar bar_num;
 
 	if (mw_idx >= intel_ntb_mw_count(dev))
 		return (EINVAL);
 	mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx);
 
 	bar_num = intel_ntb_mw_to_bar(ntb, mw_idx);
 	bar = &ntb->bar_info[bar_num];
 	bar_b2b_off = 0;
 	if (mw_idx == ntb->b2b_mw_idx) {
 		KASSERT(ntb->b2b_off != 0,
 		    ("user shouldn't get non-shared b2b mw"));
 		bar_b2b_off = ntb->b2b_off;
 	}
 
 	if (bar_is_64bit(ntb, bar_num))
 		limit = BUS_SPACE_MAXADDR;
 	else
 		limit = BUS_SPACE_MAXADDR_32BIT;
 
 	if (base != NULL)
 		*base = bar->pbase + bar_b2b_off;
 	if (vbase != NULL)
 		*vbase = bar->vbase + bar_b2b_off;
 	if (size != NULL)
 		*size = bar->size - bar_b2b_off;
 	if (align != NULL)
 		*align = bar->size;
 	if (align_size != NULL)
 		*align_size = 1;
 	if (plimit != NULL)
 		*plimit = limit;
 	return (0);
 }
 
 static int
 intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 	struct ntb_pci_bar_info *bar;
 	uint64_t base, limit, reg_val;
 	size_t bar_size, mw_size;
 	uint32_t base_reg, xlat_reg, limit_reg;
 	enum ntb_bar bar_num;
 
 	if (idx >= intel_ntb_mw_count(dev))
 		return (EINVAL);
 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
 
 	bar_num = intel_ntb_mw_to_bar(ntb, idx);
 	bar = &ntb->bar_info[bar_num];
 
 	bar_size = bar->size;
 	if (idx == ntb->b2b_mw_idx)
 		mw_size = bar_size - ntb->b2b_off;
 	else
 		mw_size = bar_size;
 
 	/* Hardware requires that addr is aligned to bar size */
 	if ((addr & (bar_size - 1)) != 0)
 		return (EINVAL);
 
 	if (size > mw_size)
 		return (EINVAL);
 
 	bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg);
 
 	limit = 0;
 	if (bar_is_64bit(ntb, bar_num)) {
 		base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK;
 
 		if (limit_reg != 0 && size != mw_size)
 			limit = base + size;
 
 		/* Set and verify translation address */
 		intel_ntb_reg_write(8, xlat_reg, addr);
 		reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK;
 		if (reg_val != addr) {
 			intel_ntb_reg_write(8, xlat_reg, 0);
 			return (EIO);
 		}
 
 		/* Set and verify the limit */
 		intel_ntb_reg_write(8, limit_reg, limit);
 		reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK;
 		if (reg_val != limit) {
 			intel_ntb_reg_write(8, limit_reg, base);
 			intel_ntb_reg_write(8, xlat_reg, 0);
 			return (EIO);
 		}
 	} else {
 		/* Configure 32-bit (split) BAR MW */
 
 		if ((addr & UINT32_MAX) != addr)
 			return (ERANGE);
 		if (((addr + size) & UINT32_MAX) != (addr + size))
 			return (ERANGE);
 
 		base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK;
 
 		if (limit_reg != 0 && size != mw_size)
 			limit = base + size;
 
 		/* Set and verify translation address */
 		intel_ntb_reg_write(4, xlat_reg, addr);
 		reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK;
 		if (reg_val != addr) {
 			intel_ntb_reg_write(4, xlat_reg, 0);
 			return (EIO);
 		}
 
 		/* Set and verify the limit */
 		intel_ntb_reg_write(4, limit_reg, limit);
 		reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK;
 		if (reg_val != limit) {
 			intel_ntb_reg_write(4, limit_reg, base);
 			intel_ntb_reg_write(4, xlat_reg, 0);
 			return (EIO);
 		}
 	}
 	return (0);
 }
 
 static int
 intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx)
 {
 
 	return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0));
 }
 
 static int
 intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 	struct ntb_pci_bar_info *bar;
 
 	if (idx >= intel_ntb_mw_count(dev))
 		return (EINVAL);
 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
 
 	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
 	*mode = bar->map_mode;
 	return (0);
 }
 
 static int
 intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (idx >= intel_ntb_mw_count(dev))
 		return (EINVAL);
 
 	idx = intel_ntb_user_mw_to_idx(ntb, idx);
 	return (intel_ntb_mw_set_wc_internal(ntb, idx, mode));
 }
 
 static int
 intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode)
 {
 	struct ntb_pci_bar_info *bar;
 	int rc;
 
 	bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)];
 	if (bar->map_mode == mode)
 		return (0);
 
 	rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode);
 	if (rc == 0)
 		bar->map_mode = mode;
 
 	return (rc);
 }
 
 static void
 intel_ntb_peer_db_set(device_t dev, uint64_t bit)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) {
 		struct ntb_pci_bar_info *lapic;
 		unsigned i;
 
 		lapic = ntb->peer_lapic_bar;
 
 		for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) {
 			if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0)
 				bus_space_write_4(lapic->pci_bus_tag,
 				    lapic->pci_bus_handle,
 				    ntb->peer_msix_data[i].nmd_ofs,
 				    ntb->peer_msix_data[i].nmd_data);
 		}
 		return;
 	}
 
 	if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
 		intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit);
 		return;
 	}
 
 	db_iowrite(ntb, ntb->peer_reg->db_bell, bit);
 }
 
 static int
 intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 	struct ntb_pci_bar_info *bar;
 	uint64_t regoff;
 
 	KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL"));
 
 	if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) {
 		bar = &ntb->bar_info[NTB_CONFIG_BAR];
 		regoff = ntb->peer_reg->db_bell;
 	} else {
 		KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED,
 		    ("invalid b2b idx"));
 
 		bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)];
 		regoff = XEON_PDOORBELL_OFFSET;
 	}
 	KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh"));
 
 	/* HACK: Specific to current x86 bus implementation. */
 	*db_addr = ((uint64_t)bar->pci_bus_handle + regoff);
 	*db_size = ntb->reg->db_size;
 	return (0);
 }
 
 static uint64_t
 intel_ntb_db_valid_mask(device_t dev)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	return (ntb->db_valid_mask);
 }
 
 static int
 intel_ntb_db_vector_count(device_t dev)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	return (ntb->db_vec_count);
 }
 
 static uint64_t
 intel_ntb_db_vector_mask(device_t dev, uint32_t vector)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (vector > ntb->db_vec_count)
 		return (0);
 	return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector));
 }
 
 static bool
 intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width)
 {
 	struct ntb_softc *ntb = device_get_softc(dev);
 
 	if (speed != NULL)
 		*speed = intel_ntb_link_sta_speed(ntb);
 	if (width != NULL)
 		*width = intel_ntb_link_sta_width(ntb);
 	return (link_is_up(ntb));
 }
 
 static void
 save_bar_parameters(struct ntb_pci_bar_info *bar)
 {
 
 	bar->pci_bus_tag = rman_get_bustag(bar->pci_resource);
 	bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource);
 	bar->pbase = rman_get_start(bar->pci_resource);
 	bar->size = rman_get_size(bar->pci_resource);
 	bar->vbase = rman_get_virtual(bar->pci_resource);
 }
 
 static device_method_t ntb_intel_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		intel_ntb_probe),
 	DEVMETHOD(device_attach,	intel_ntb_attach),
 	DEVMETHOD(device_detach,	intel_ntb_detach),
 	/* NTB interface */
 	DEVMETHOD(ntb_link_is_up,	intel_ntb_link_is_up),
 	DEVMETHOD(ntb_link_enable,	intel_ntb_link_enable),
 	DEVMETHOD(ntb_link_disable,	intel_ntb_link_disable),
 	DEVMETHOD(ntb_link_enabled,	intel_ntb_link_enabled),
 	DEVMETHOD(ntb_mw_count,		intel_ntb_mw_count),
 	DEVMETHOD(ntb_mw_get_range,	intel_ntb_mw_get_range),
 	DEVMETHOD(ntb_mw_set_trans,	intel_ntb_mw_set_trans),
 	DEVMETHOD(ntb_mw_clear_trans,	intel_ntb_mw_clear_trans),
 	DEVMETHOD(ntb_mw_get_wc,	intel_ntb_mw_get_wc),
 	DEVMETHOD(ntb_mw_set_wc,	intel_ntb_mw_set_wc),
 	DEVMETHOD(ntb_spad_count,	intel_ntb_spad_count),
 	DEVMETHOD(ntb_spad_clear,	intel_ntb_spad_clear),
 	DEVMETHOD(ntb_spad_write,	intel_ntb_spad_write),
 	DEVMETHOD(ntb_spad_read,	intel_ntb_spad_read),
 	DEVMETHOD(ntb_peer_spad_write,	intel_ntb_peer_spad_write),
 	DEVMETHOD(ntb_peer_spad_read,	intel_ntb_peer_spad_read),
 	DEVMETHOD(ntb_db_valid_mask,	intel_ntb_db_valid_mask),
 	DEVMETHOD(ntb_db_vector_count,	intel_ntb_db_vector_count),
 	DEVMETHOD(ntb_db_vector_mask,	intel_ntb_db_vector_mask),
 	DEVMETHOD(ntb_db_clear,		intel_ntb_db_clear),
 	DEVMETHOD(ntb_db_clear_mask,	intel_ntb_db_clear_mask),
 	DEVMETHOD(ntb_db_read,		intel_ntb_db_read),
 	DEVMETHOD(ntb_db_set_mask,	intel_ntb_db_set_mask),
 	DEVMETHOD(ntb_peer_db_addr,	intel_ntb_peer_db_addr),
 	DEVMETHOD(ntb_peer_db_set,	intel_ntb_peer_db_set),
 	DEVMETHOD_END
 };
 
 static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods,
     sizeof(struct ntb_softc));
 DRIVER_MODULE(ntb_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL);
 MODULE_DEPEND(ntb_intel, ntb, 1, 1, 1);
 MODULE_VERSION(ntb_intel, 1);
Index: user/alc/PQ_LAUNDRY/sys/dev/pci/pci_pci.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/dev/pci/pci_pci.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/dev/pci/pci_pci.c	(revision 303642)
@@ -1,2807 +1,2809 @@
 /*-
  * Copyright (c) 1994,1995 Stefan Esser, Wolfgang StanglMeier
  * Copyright (c) 2000 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2000 BSDi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * PCI:PCI bridge support.
  */
 
 #include "opt_pci.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pci_private.h>
 #include <dev/pci/pcib_private.h>
 
 #include "pcib_if.h"
 
 static int		pcib_probe(device_t dev);
 static int		pcib_suspend(device_t dev);
 static int		pcib_resume(device_t dev);
 static int		pcib_power_for_sleep(device_t pcib, device_t dev,
 			    int *pstate);
 static int		pcib_ari_get_id(device_t pcib, device_t dev,
     enum pci_id_type type, uintptr_t *id);
 static uint32_t		pcib_read_config(device_t dev, u_int b, u_int s,
     u_int f, u_int reg, int width);
 static void		pcib_write_config(device_t dev, u_int b, u_int s,
     u_int f, u_int reg, uint32_t val, int width);
 static int		pcib_ari_maxslots(device_t dev);
 static int		pcib_ari_maxfuncs(device_t dev);
 static int		pcib_try_enable_ari(device_t pcib, device_t dev);
 static int		pcib_ari_enabled(device_t pcib);
 static void		pcib_ari_decode_rid(device_t pcib, uint16_t rid,
 			    int *bus, int *slot, int *func);
 #ifdef PCI_HP
 static void		pcib_pcie_ab_timeout(void *arg);
 static void		pcib_pcie_cc_timeout(void *arg);
 static void		pcib_pcie_dll_timeout(void *arg);
 #endif
 
 static device_method_t pcib_methods[] = {
     /* Device interface */
     DEVMETHOD(device_probe,		pcib_probe),
     DEVMETHOD(device_attach,		pcib_attach),
     DEVMETHOD(device_detach,		pcib_detach),
     DEVMETHOD(device_shutdown,		bus_generic_shutdown),
     DEVMETHOD(device_suspend,		pcib_suspend),
     DEVMETHOD(device_resume,		pcib_resume),
 
     /* Bus interface */
     DEVMETHOD(bus_child_present,	pcib_child_present),
     DEVMETHOD(bus_read_ivar,		pcib_read_ivar),
     DEVMETHOD(bus_write_ivar,		pcib_write_ivar),
     DEVMETHOD(bus_alloc_resource,	pcib_alloc_resource),
 #ifdef NEW_PCIB
     DEVMETHOD(bus_adjust_resource,	pcib_adjust_resource),
     DEVMETHOD(bus_release_resource,	pcib_release_resource),
 #else
     DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
     DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
 #endif
     DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
     DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
     DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
     DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
 
     /* pcib interface */
     DEVMETHOD(pcib_maxslots,		pcib_ari_maxslots),
     DEVMETHOD(pcib_maxfuncs,		pcib_ari_maxfuncs),
     DEVMETHOD(pcib_read_config,		pcib_read_config),
     DEVMETHOD(pcib_write_config,	pcib_write_config),
     DEVMETHOD(pcib_route_interrupt,	pcib_route_interrupt),
     DEVMETHOD(pcib_alloc_msi,		pcib_alloc_msi),
     DEVMETHOD(pcib_release_msi,		pcib_release_msi),
     DEVMETHOD(pcib_alloc_msix,		pcib_alloc_msix),
     DEVMETHOD(pcib_release_msix,	pcib_release_msix),
     DEVMETHOD(pcib_map_msi,		pcib_map_msi),
     DEVMETHOD(pcib_power_for_sleep,	pcib_power_for_sleep),
     DEVMETHOD(pcib_get_id,		pcib_ari_get_id),
     DEVMETHOD(pcib_try_enable_ari,	pcib_try_enable_ari),
     DEVMETHOD(pcib_ari_enabled,		pcib_ari_enabled),
     DEVMETHOD(pcib_decode_rid,		pcib_ari_decode_rid),
 
     DEVMETHOD_END
 };
 
 static devclass_t pcib_devclass;
 
 DEFINE_CLASS_0(pcib, pcib_driver, pcib_methods, sizeof(struct pcib_softc));
 DRIVER_MODULE(pcib, pci, pcib_driver, pcib_devclass, NULL, NULL);
 
-#ifdef NEW_PCIB
+#if defined(NEW_PCIB) || defined(PCI_HP)
 SYSCTL_DECL(_hw_pci);
+#endif
 
+#ifdef NEW_PCIB
 static int pci_clear_pcib;
 SYSCTL_INT(_hw_pci, OID_AUTO, clear_pcib, CTLFLAG_RDTUN, &pci_clear_pcib, 0,
     "Clear firmware-assigned resources for PCI-PCI bridge I/O windows.");
 
 /*
  * Is a resource from a child device sub-allocated from one of our
  * resource managers?
  */
 static int
 pcib_is_resource_managed(struct pcib_softc *sc, int type, struct resource *r)
 {
 
 	switch (type) {
 #ifdef PCI_RES_BUS
 	case PCI_RES_BUS:
 		return (rman_is_region_manager(r, &sc->bus.rman));
 #endif
 	case SYS_RES_IOPORT:
 		return (rman_is_region_manager(r, &sc->io.rman));
 	case SYS_RES_MEMORY:
 		/* Prefetchable resources may live in either memory rman. */
 		if (rman_get_flags(r) & RF_PREFETCHABLE &&
 		    rman_is_region_manager(r, &sc->pmem.rman))
 			return (1);
 		return (rman_is_region_manager(r, &sc->mem.rman));
 	}
 	return (0);
 }
 
 static int
 pcib_is_window_open(struct pcib_window *pw)
 {
 
 	return (pw->valid && pw->base < pw->limit);
 }
 
 /*
  * XXX: If RF_ACTIVE did not also imply allocating a bus space tag and
  * handle for the resource, we could pass RF_ACTIVE up to the PCI bus
  * when allocating the resource windows and rely on the PCI bus driver
  * to do this for us.
  */
 static void
 pcib_activate_window(struct pcib_softc *sc, int type)
 {
 
 	PCI_ENABLE_IO(device_get_parent(sc->dev), sc->dev, type);
 }
 
 static void
 pcib_write_windows(struct pcib_softc *sc, int mask)
 {
 	device_t dev;
 	uint32_t val;
 
 	dev = sc->dev;
 	if (sc->io.valid && mask & WIN_IO) {
 		val = pci_read_config(dev, PCIR_IOBASEL_1, 1);
 		if ((val & PCIM_BRIO_MASK) == PCIM_BRIO_32) {
 			pci_write_config(dev, PCIR_IOBASEH_1,
 			    sc->io.base >> 16, 2);
 			pci_write_config(dev, PCIR_IOLIMITH_1,
 			    sc->io.limit >> 16, 2);
 		}
 		pci_write_config(dev, PCIR_IOBASEL_1, sc->io.base >> 8, 1);
 		pci_write_config(dev, PCIR_IOLIMITL_1, sc->io.limit >> 8, 1);
 	}
 
 	if (mask & WIN_MEM) {
 		pci_write_config(dev, PCIR_MEMBASE_1, sc->mem.base >> 16, 2);
 		pci_write_config(dev, PCIR_MEMLIMIT_1, sc->mem.limit >> 16, 2);
 	}
 
 	if (sc->pmem.valid && mask & WIN_PMEM) {
 		val = pci_read_config(dev, PCIR_PMBASEL_1, 2);
 		if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) {
 			pci_write_config(dev, PCIR_PMBASEH_1,
 			    sc->pmem.base >> 32, 4);
 			pci_write_config(dev, PCIR_PMLIMITH_1,
 			    sc->pmem.limit >> 32, 4);
 		}
 		pci_write_config(dev, PCIR_PMBASEL_1, sc->pmem.base >> 16, 2);
 		pci_write_config(dev, PCIR_PMLIMITL_1, sc->pmem.limit >> 16, 2);
 	}
 }
 
 /*
  * This is used to reject I/O port allocations that conflict with an
  * ISA alias range.
  */
 static int
 pcib_is_isa_range(struct pcib_softc *sc, rman_res_t start, rman_res_t end,
     rman_res_t count)
 {
 	rman_res_t next_alias;
 
 	if (!(sc->bridgectl & PCIB_BCR_ISA_ENABLE))
 		return (0);
 
 	/* Only check fixed ranges for overlap. */
 	if (start + count - 1 != end)
 		return (0);
 
 	/* ISA aliases are only in the lower 64KB of I/O space. */
 	if (start >= 65536)
 		return (0);
 
 	/* Check for overlap with 0x000 - 0x0ff as a special case. */
 	if (start < 0x100)
 		goto alias;
 
 	/*
 	 * If the start address is an alias, the range is an alias.
 	 * Otherwise, compute the start of the next alias range and
 	 * check if it is before the end of the candidate range.
 	 */
 	if ((start & 0x300) != 0)
 		goto alias;
 	next_alias = (start & ~0x3fful) | 0x100;
 	if (next_alias <= end)
 		goto alias;
 	return (0);
 
 alias:
 	if (bootverbose)
 		device_printf(sc->dev,
 		    "I/O range %#jx-%#jx overlaps with an ISA alias\n", start,
 		    end);
 	return (1);
 }
 
 static void
 pcib_add_window_resources(struct pcib_window *w, struct resource **res,
     int count)
 {
 	struct resource **newarray;
 	int error, i;
 
 	newarray = malloc(sizeof(struct resource *) * (w->count + count),
 	    M_DEVBUF, M_WAITOK);
 	if (w->res != NULL)
 		bcopy(w->res, newarray, sizeof(struct resource *) * w->count);
 	bcopy(res, newarray + w->count, sizeof(struct resource *) * count);
 	free(w->res, M_DEVBUF);
 	w->res = newarray;
 	w->count += count;
 
 	for (i = 0; i < count; i++) {
 		error = rman_manage_region(&w->rman, rman_get_start(res[i]),
 		    rman_get_end(res[i]));
 		if (error)
 			panic("Failed to add resource to rman");
 	}
 }
 
 typedef void (nonisa_callback)(rman_res_t start, rman_res_t end, void *arg);
 
 static void
 pcib_walk_nonisa_ranges(rman_res_t start, rman_res_t end, nonisa_callback *cb,
     void *arg)
 {
 	rman_res_t next_end;
 
 	/*
 	 * If start is within an ISA alias range, move up to the start
 	 * of the next non-alias range.  As a special case, addresses
 	 * in the range 0x000 - 0x0ff should also be skipped since
 	 * those are used for various system I/O devices in ISA
 	 * systems.
 	 */
 	if (start <= 65535) {
 		if (start < 0x100 || (start & 0x300) != 0) {
 			start &= ~0x3ff;
 			start += 0x400;
 		}
 	}
 
 	/* ISA aliases are only in the lower 64KB of I/O space. */
 	while (start <= MIN(end, 65535)) {
 		next_end = MIN(start | 0xff, end);
 		cb(start, next_end, arg);
 		start += 0x400;
 	}
 
 	if (start <= end)
 		cb(start, end, arg);
 }
 
 static void
 count_ranges(rman_res_t start, rman_res_t end, void *arg)
 {
 	int *countp;
 
 	countp = arg;
 	(*countp)++;
 }
 
 struct alloc_state {
 	struct resource **res;
 	struct pcib_softc *sc;
 	int count, error;
 };
 
 static void
 alloc_ranges(rman_res_t start, rman_res_t end, void *arg)
 {
 	struct alloc_state *as;
 	struct pcib_window *w;
 	int rid;
 
 	as = arg;
 	if (as->error != 0)
 		return;
 
 	w = &as->sc->io;
 	rid = w->reg;
 	if (bootverbose)
 		device_printf(as->sc->dev,
 		    "allocating non-ISA range %#jx-%#jx\n", start, end);
 	as->res[as->count] = bus_alloc_resource(as->sc->dev, SYS_RES_IOPORT,
 	    &rid, start, end, end - start + 1, 0);
 	if (as->res[as->count] == NULL)
 		as->error = ENXIO;
 	else
 		as->count++;
 }
 
 static int
 pcib_alloc_nonisa_ranges(struct pcib_softc *sc, rman_res_t start, rman_res_t end)
 {
 	struct alloc_state as;
 	int i, new_count;
 
 	/* First, see how many ranges we need. */
 	new_count = 0;
 	pcib_walk_nonisa_ranges(start, end, count_ranges, &new_count);
 
 	/* Second, allocate the ranges. */
 	as.res = malloc(sizeof(struct resource *) * new_count, M_DEVBUF,
 	    M_WAITOK);
 	as.sc = sc;
 	as.count = 0;
 	as.error = 0;
 	pcib_walk_nonisa_ranges(start, end, alloc_ranges, &as);
 	if (as.error != 0) {
 		for (i = 0; i < as.count; i++)
 			bus_release_resource(sc->dev, SYS_RES_IOPORT,
 			    sc->io.reg, as.res[i]);
 		free(as.res, M_DEVBUF);
 		return (as.error);
 	}
 	KASSERT(as.count == new_count, ("%s: count mismatch", __func__));
 
 	/* Third, add the ranges to the window. */
 	pcib_add_window_resources(&sc->io, as.res, as.count);
 	free(as.res, M_DEVBUF);
 	return (0);
 }
 
 static void
 pcib_alloc_window(struct pcib_softc *sc, struct pcib_window *w, int type,
     int flags, pci_addr_t max_address)
 {
 	struct resource *res;
 	char buf[64];
 	int error, rid;
 
 	if (max_address != (rman_res_t)max_address)
 		max_address = ~0;
 	w->rman.rm_start = 0;
 	w->rman.rm_end = max_address;
 	w->rman.rm_type = RMAN_ARRAY;
 	snprintf(buf, sizeof(buf), "%s %s window",
 	    device_get_nameunit(sc->dev), w->name);
 	w->rman.rm_descr = strdup(buf, M_DEVBUF);
 	error = rman_init(&w->rman);
 	if (error)
 		panic("Failed to initialize %s %s rman",
 		    device_get_nameunit(sc->dev), w->name);
 
 	if (!pcib_is_window_open(w))
 		return;
 
 	if (w->base > max_address || w->limit > max_address) {
 		device_printf(sc->dev,
 		    "initial %s window has too many bits, ignoring\n", w->name);
 		return;
 	}
 	if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE)
 		(void)pcib_alloc_nonisa_ranges(sc, w->base, w->limit);
 	else {
 		rid = w->reg;
 		res = bus_alloc_resource(sc->dev, type, &rid, w->base, w->limit,
 		    w->limit - w->base + 1, flags);
 		if (res != NULL)
 			pcib_add_window_resources(w, &res, 1);
 	}
 	if (w->res == NULL) {
 		device_printf(sc->dev,
 		    "failed to allocate initial %s window: %#jx-%#jx\n",
 		    w->name, (uintmax_t)w->base, (uintmax_t)w->limit);
 		w->base = max_address;
 		w->limit = 0;
 		pcib_write_windows(sc, w->mask);
 		return;
 	}
 	pcib_activate_window(sc, type);
 }
 
 /*
  * Initialize I/O windows.
  */
 static void
 pcib_probe_windows(struct pcib_softc *sc)
 {
 	pci_addr_t max;
 	device_t dev;
 	uint32_t val;
 
 	dev = sc->dev;
 
 	if (pci_clear_pcib) {
 		pcib_bridge_init(dev);
 	}
 
 	/* Determine if the I/O port window is implemented. */
 	val = pci_read_config(dev, PCIR_IOBASEL_1, 1);
 	if (val == 0) {
 		/*
 		 * If 'val' is zero, then only 16-bits of I/O space
 		 * are supported.
 		 */
 		pci_write_config(dev, PCIR_IOBASEL_1, 0xff, 1);
 		if (pci_read_config(dev, PCIR_IOBASEL_1, 1) != 0) {
 			sc->io.valid = 1;
 			pci_write_config(dev, PCIR_IOBASEL_1, 0, 1);
 		}
 	} else
 		sc->io.valid = 1;
 
 	/* Read the existing I/O port window. */
 	if (sc->io.valid) {
 		sc->io.reg = PCIR_IOBASEL_1;
 		sc->io.step = 12;
 		sc->io.mask = WIN_IO;
 		sc->io.name = "I/O port";
 		if ((val & PCIM_BRIO_MASK) == PCIM_BRIO_32) {
 			sc->io.base = PCI_PPBIOBASE(
 			    pci_read_config(dev, PCIR_IOBASEH_1, 2), val);
 			sc->io.limit = PCI_PPBIOLIMIT(
 			    pci_read_config(dev, PCIR_IOLIMITH_1, 2),
 			    pci_read_config(dev, PCIR_IOLIMITL_1, 1));
 			max = 0xffffffff;
 		} else {
 			sc->io.base = PCI_PPBIOBASE(0, val);
 			sc->io.limit = PCI_PPBIOLIMIT(0,
 			    pci_read_config(dev, PCIR_IOLIMITL_1, 1));
 			max = 0xffff;
 		}
 		pcib_alloc_window(sc, &sc->io, SYS_RES_IOPORT, 0, max);
 	}
 
 	/* Read the existing memory window. */
 	sc->mem.valid = 1;
 	sc->mem.reg = PCIR_MEMBASE_1;
 	sc->mem.step = 20;
 	sc->mem.mask = WIN_MEM;
 	sc->mem.name = "memory";
 	sc->mem.base = PCI_PPBMEMBASE(0,
 	    pci_read_config(dev, PCIR_MEMBASE_1, 2));
 	sc->mem.limit = PCI_PPBMEMLIMIT(0,
 	    pci_read_config(dev, PCIR_MEMLIMIT_1, 2));
 	pcib_alloc_window(sc, &sc->mem, SYS_RES_MEMORY, 0, 0xffffffff);
 
 	/* Determine if the prefetchable memory window is implemented. */
 	val = pci_read_config(dev, PCIR_PMBASEL_1, 2);
 	if (val == 0) {
 		/*
 		 * If 'val' is zero, then only 32-bits of memory space
 		 * are supported.
 		 */
 		pci_write_config(dev, PCIR_PMBASEL_1, 0xffff, 2);
 		if (pci_read_config(dev, PCIR_PMBASEL_1, 2) != 0) {
 			sc->pmem.valid = 1;
 			pci_write_config(dev, PCIR_PMBASEL_1, 0, 2);
 		}
 	} else
 		sc->pmem.valid = 1;
 
 	/* Read the existing prefetchable memory window. */
 	if (sc->pmem.valid) {
 		sc->pmem.reg = PCIR_PMBASEL_1;
 		sc->pmem.step = 20;
 		sc->pmem.mask = WIN_PMEM;
 		sc->pmem.name = "prefetch";
 		if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) {
 			sc->pmem.base = PCI_PPBMEMBASE(
 			    pci_read_config(dev, PCIR_PMBASEH_1, 4), val);
 			sc->pmem.limit = PCI_PPBMEMLIMIT(
 			    pci_read_config(dev, PCIR_PMLIMITH_1, 4),
 			    pci_read_config(dev, PCIR_PMLIMITL_1, 2));
 			max = 0xffffffffffffffff;
 		} else {
 			sc->pmem.base = PCI_PPBMEMBASE(0, val);
 			sc->pmem.limit = PCI_PPBMEMLIMIT(0,
 			    pci_read_config(dev, PCIR_PMLIMITL_1, 2));
 			max = 0xffffffff;
 		}
 		pcib_alloc_window(sc, &sc->pmem, SYS_RES_MEMORY,
 		    RF_PREFETCHABLE, max);
 	}
 }
 
 static void
 pcib_release_window(struct pcib_softc *sc, struct pcib_window *w, int type)
 {
 	device_t dev;
 	int error, i;
 
 	if (!w->valid)
 		return;
 
 	dev = sc->dev;
 	error = rman_fini(&w->rman);
 	if (error) {
 		device_printf(dev, "failed to release %s rman\n", w->name);
 		return;
 	}
 	free(__DECONST(char *, w->rman.rm_descr), M_DEVBUF);
 
 	for (i = 0; i < w->count; i++) {
 		error = bus_free_resource(dev, type, w->res[i]);
 		if (error)
 			device_printf(dev,
 			    "failed to release %s resource: %d\n", w->name,
 			    error);
 	}
 	free(w->res, M_DEVBUF);
 }
 
 static void
 pcib_free_windows(struct pcib_softc *sc)
 {
 
 	pcib_release_window(sc, &sc->pmem, SYS_RES_MEMORY);
 	pcib_release_window(sc, &sc->mem, SYS_RES_MEMORY);
 	pcib_release_window(sc, &sc->io, SYS_RES_IOPORT);
 }
 
 #ifdef PCI_RES_BUS
 /*
  * Allocate a suitable secondary bus for this bridge if needed and
  * initialize the resource manager for the secondary bus range.  Note
  * that the minimum count is a desired value and this may allocate a
  * smaller range.
  */
 void
 pcib_setup_secbus(device_t dev, struct pcib_secbus *bus, int min_count)
 {
 	char buf[64];
 	int error, rid, sec_reg;
 
 	switch (pci_read_config(dev, PCIR_HDRTYPE, 1) & PCIM_HDRTYPE) {
 	case PCIM_HDRTYPE_BRIDGE:
 		sec_reg = PCIR_SECBUS_1;
 		bus->sub_reg = PCIR_SUBBUS_1;
 		break;
 	case PCIM_HDRTYPE_CARDBUS:
 		sec_reg = PCIR_SECBUS_2;
 		bus->sub_reg = PCIR_SUBBUS_2;
 		break;
 	default:
 		panic("not a PCI bridge");
 	}
 	bus->sec = pci_read_config(dev, sec_reg, 1);
 	bus->sub = pci_read_config(dev, bus->sub_reg, 1);
 	bus->dev = dev;
 	bus->rman.rm_start = 0;
 	bus->rman.rm_end = PCI_BUSMAX;
 	bus->rman.rm_type = RMAN_ARRAY;
 	snprintf(buf, sizeof(buf), "%s bus numbers", device_get_nameunit(dev));
 	bus->rman.rm_descr = strdup(buf, M_DEVBUF);
 	error = rman_init(&bus->rman);
 	if (error)
 		panic("Failed to initialize %s bus number rman",
 		    device_get_nameunit(dev));
 
 	/*
 	 * Allocate a bus range.  This will return an existing bus range
 	 * if one exists, or a new bus range if one does not.
 	 */
 	rid = 0;
 	bus->res = bus_alloc_resource_anywhere(dev, PCI_RES_BUS, &rid,
 	    min_count, 0);
 	if (bus->res == NULL) {
 		/*
 		 * Fall back to just allocating a range of a single bus
 		 * number.
 		 */
 		bus->res = bus_alloc_resource_anywhere(dev, PCI_RES_BUS, &rid,
 		    1, 0);
 	} else if (rman_get_size(bus->res) < min_count)
 		/*
 		 * Attempt to grow the existing range to satisfy the
 		 * minimum desired count.
 		 */
 		(void)bus_adjust_resource(dev, PCI_RES_BUS, bus->res,
 		    rman_get_start(bus->res), rman_get_start(bus->res) +
 		    min_count - 1);
 
 	/*
 	 * Add the initial resource to the rman.
 	 */
 	if (bus->res != NULL) {
 		error = rman_manage_region(&bus->rman, rman_get_start(bus->res),
 		    rman_get_end(bus->res));
 		if (error)
 			panic("Failed to add resource to rman");
 		bus->sec = rman_get_start(bus->res);
 		bus->sub = rman_get_end(bus->res);
 	}
 }
 
 void
 pcib_free_secbus(device_t dev, struct pcib_secbus *bus)
 {
 	int error;
 
 	error = rman_fini(&bus->rman);
 	if (error) {
 		device_printf(dev, "failed to release bus number rman\n");
 		return;
 	}
 	free(__DECONST(char *, bus->rman.rm_descr), M_DEVBUF);
 
 	error = bus_free_resource(dev, PCI_RES_BUS, bus->res);
 	if (error)
 		device_printf(dev,
 		    "failed to release bus numbers resource: %d\n", error);
 }
 
 static struct resource *
 pcib_suballoc_bus(struct pcib_secbus *bus, device_t child, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct resource *res;
 
 	res = rman_reserve_resource(&bus->rman, start, end, count, flags,
 	    child);
 	if (res == NULL)
 		return (NULL);
 
 	if (bootverbose)
 		device_printf(bus->dev,
 		    "allocated bus range (%ju-%ju) for rid %d of %s\n",
 		    rman_get_start(res), rman_get_end(res), *rid,
 		    pcib_child_name(child));
 	rman_set_rid(res, *rid);
 	return (res);
 }
 
 /*
  * Attempt to grow the secondary bus range.  This is much simpler than
  * for I/O windows as the range can only be grown by increasing
  * subbus.
  */
 static int
 pcib_grow_subbus(struct pcib_secbus *bus, rman_res_t new_end)
 {
 	rman_res_t old_end;
 	int error;
 
 	old_end = rman_get_end(bus->res);
 	KASSERT(new_end > old_end, ("attempt to shrink subbus"));
 	error = bus_adjust_resource(bus->dev, PCI_RES_BUS, bus->res,
 	    rman_get_start(bus->res), new_end);
 	if (error)
 		return (error);
 	if (bootverbose)
 		device_printf(bus->dev, "grew bus range to %ju-%ju\n",
 		    rman_get_start(bus->res), rman_get_end(bus->res));
 	error = rman_manage_region(&bus->rman, old_end + 1,
 	    rman_get_end(bus->res));
 	if (error)
 		panic("Failed to add resource to rman");
 	bus->sub = rman_get_end(bus->res);
 	pci_write_config(bus->dev, bus->sub_reg, bus->sub, 1);
 	return (0);
 }
 
 struct resource *
 pcib_alloc_subbus(struct pcib_secbus *bus, device_t child, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct resource *res;
 	rman_res_t start_free, end_free, new_end;
 
 	/*
 	 * First, see if the request can be satisified by the existing
 	 * bus range.
 	 */
 	res = pcib_suballoc_bus(bus, child, rid, start, end, count, flags);
 	if (res != NULL)
 		return (res);
 
 	/*
 	 * Figure out a range to grow the bus range.  First, find the
 	 * first bus number after the last allocated bus in the rman and
 	 * enforce that as a minimum starting point for the range.
 	 */
 	if (rman_last_free_region(&bus->rman, &start_free, &end_free) != 0 ||
 	    end_free != bus->sub)
 		start_free = bus->sub + 1;
 	if (start_free < start)
 		start_free = start;
 	new_end = start_free + count - 1;
 
 	/*
 	 * See if this new range would satisfy the request if it
 	 * succeeds.
 	 */
 	if (new_end > end)
 		return (NULL);
 
 	/* Finally, attempt to grow the existing resource. */
 	if (bootverbose) {
 		device_printf(bus->dev,
 		    "attempting to grow bus range for %ju buses\n", count);
 		printf("\tback candidate range: %ju-%ju\n", start_free,
 		    new_end);
 	}
 	if (pcib_grow_subbus(bus, new_end) == 0)
 		return (pcib_suballoc_bus(bus, child, rid, start, end, count,
 		    flags));
 	return (NULL);
 }
 #endif
 
 #else
 
 /*
  * Is the prefetch window open (eg, can we allocate memory in it?)
  */
 static int
 pcib_is_prefetch_open(struct pcib_softc *sc)
 {
 	return (sc->pmembase > 0 && sc->pmembase < sc->pmemlimit);
 }
 
 /*
  * Is the nonprefetch window open (eg, can we allocate memory in it?)
  */
 static int
 pcib_is_nonprefetch_open(struct pcib_softc *sc)
 {
 	return (sc->membase > 0 && sc->membase < sc->memlimit);
 }
 
 /*
  * Is the io window open (eg, can we allocate ports in it?)
  */
 static int
 pcib_is_io_open(struct pcib_softc *sc)
 {
 	return (sc->iobase > 0 && sc->iobase < sc->iolimit);
 }
 
 /*
  * Get current I/O decode.
  */
 static void
 pcib_get_io_decode(struct pcib_softc *sc)
 {
 	device_t	dev;
 	uint32_t	iolow;
 
 	dev = sc->dev;
 
 	iolow = pci_read_config(dev, PCIR_IOBASEL_1, 1);
 	if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32)
 		sc->iobase = PCI_PPBIOBASE(
 		    pci_read_config(dev, PCIR_IOBASEH_1, 2), iolow);
 	else
 		sc->iobase = PCI_PPBIOBASE(0, iolow);
 
 	iolow = pci_read_config(dev, PCIR_IOLIMITL_1, 1);
 	if ((iolow & PCIM_BRIO_MASK) == PCIM_BRIO_32)
 		sc->iolimit = PCI_PPBIOLIMIT(
 		    pci_read_config(dev, PCIR_IOLIMITH_1, 2), iolow);
 	else
 		sc->iolimit = PCI_PPBIOLIMIT(0, iolow);
 }
 
 /*
  * Get current memory decode.
  */
 static void
 pcib_get_mem_decode(struct pcib_softc *sc)
 {
 	device_t	dev;
 	pci_addr_t	pmemlow;
 
 	dev = sc->dev;
 
 	sc->membase = PCI_PPBMEMBASE(0,
 	    pci_read_config(dev, PCIR_MEMBASE_1, 2));
 	sc->memlimit = PCI_PPBMEMLIMIT(0,
 	    pci_read_config(dev, PCIR_MEMLIMIT_1, 2));
 
 	pmemlow = pci_read_config(dev, PCIR_PMBASEL_1, 2);
 	if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64)
 		sc->pmembase = PCI_PPBMEMBASE(
 		    pci_read_config(dev, PCIR_PMBASEH_1, 4), pmemlow);
 	else
 		sc->pmembase = PCI_PPBMEMBASE(0, pmemlow);
 
 	pmemlow = pci_read_config(dev, PCIR_PMLIMITL_1, 2);
 	if ((pmemlow & PCIM_BRPM_MASK) == PCIM_BRPM_64)
 		sc->pmemlimit = PCI_PPBMEMLIMIT(
 		    pci_read_config(dev, PCIR_PMLIMITH_1, 4), pmemlow);
 	else
 		sc->pmemlimit = PCI_PPBMEMLIMIT(0, pmemlow);
 }
 
 /*
  * Restore previous I/O decode.
  */
 static void
 pcib_set_io_decode(struct pcib_softc *sc)
 {
 	device_t	dev;
 	uint32_t	iohi;
 
 	dev = sc->dev;
 
 	iohi = sc->iobase >> 16;
 	if (iohi > 0)
 		pci_write_config(dev, PCIR_IOBASEH_1, iohi, 2);
 	pci_write_config(dev, PCIR_IOBASEL_1, sc->iobase >> 8, 1);
 
 	iohi = sc->iolimit >> 16;
 	if (iohi > 0)
 		pci_write_config(dev, PCIR_IOLIMITH_1, iohi, 2);
 	pci_write_config(dev, PCIR_IOLIMITL_1, sc->iolimit >> 8, 1);
 }
 
 /*
  * Restore previous memory decode.
  */
 static void
 pcib_set_mem_decode(struct pcib_softc *sc)
 {
 	device_t	dev;
 	pci_addr_t	pmemhi;
 
 	dev = sc->dev;
 
 	pci_write_config(dev, PCIR_MEMBASE_1, sc->membase >> 16, 2);
 	pci_write_config(dev, PCIR_MEMLIMIT_1, sc->memlimit >> 16, 2);
 
 	pmemhi = sc->pmembase >> 32;
 	if (pmemhi > 0)
 		pci_write_config(dev, PCIR_PMBASEH_1, pmemhi, 4);
 	pci_write_config(dev, PCIR_PMBASEL_1, sc->pmembase >> 16, 2);
 
 	pmemhi = sc->pmemlimit >> 32;
 	if (pmemhi > 0)
 		pci_write_config(dev, PCIR_PMLIMITH_1, pmemhi, 4);
 	pci_write_config(dev, PCIR_PMLIMITL_1, sc->pmemlimit >> 16, 2);
 }
 #endif
 
 #ifdef PCI_HP
 /*
  * PCI-express HotPlug support.
  */
 static int pci_enable_pcie_hp = 1;
 SYSCTL_INT(_hw_pci, OID_AUTO, enable_pcie_hp, CTLFLAG_RDTUN,
     &pci_enable_pcie_hp, 0,
     "Enable support for native PCI-express HotPlug.");
 
 static void
 pcib_probe_hotplug(struct pcib_softc *sc)
 {
 	device_t dev;
 
 	if (!pci_enable_pcie_hp)
 		return;
 
 	dev = sc->dev;
 	if (pci_find_cap(dev, PCIY_EXPRESS, NULL) != 0)
 		return;
 
 	if (!(pcie_read_config(dev, PCIER_FLAGS, 2) & PCIEM_FLAGS_SLOT))
 		return;
 
 	sc->pcie_link_cap = pcie_read_config(dev, PCIER_LINK_CAP, 4);
 	sc->pcie_slot_cap = pcie_read_config(dev, PCIER_SLOT_CAP, 4);
 
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_HPC)
 		sc->flags |= PCIB_HOTPLUG;
 }
 
 /*
  * Send a HotPlug command to the slot control register.  If this slot
  * uses command completion interrupts and a previous command is still
  * in progress, then the command is dropped.  Once the previous
  * command completes or times out, pcib_pcie_hotplug_update() will be
  * invoked to post a new command based on the slot's state at that
  * time.
  */
 static void
 pcib_pcie_hotplug_command(struct pcib_softc *sc, uint16_t val, uint16_t mask)
 {
 	device_t dev;
 	uint16_t ctl, new;
 
 	dev = sc->dev;
 
 	if (sc->flags & PCIB_HOTPLUG_CMD_PENDING)
 		return;
 
 	ctl = pcie_read_config(dev, PCIER_SLOT_CTL, 2);
 	new = (ctl & ~mask) | val;
 	if (new == ctl)
 		return;
 	pcie_write_config(dev, PCIER_SLOT_CTL, new, 2);
 	if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS) &&
 	    (ctl & new) & PCIEM_SLOT_CTL_CCIE) {
 		sc->flags |= PCIB_HOTPLUG_CMD_PENDING;
 		if (!cold)
 			callout_reset(&sc->pcie_cc_timer, hz,
 			    pcib_pcie_cc_timeout, sc);
 	}
 }
 
 static void
 pcib_pcie_hotplug_command_completed(struct pcib_softc *sc)
 {
 	device_t dev;
 
 	dev = sc->dev;
 
 	if (bootverbose)
 		device_printf(dev, "Command Completed\n");
 	if (!(sc->flags & PCIB_HOTPLUG_CMD_PENDING))
 		return;
 	callout_stop(&sc->pcie_cc_timer);
 	sc->flags &= ~PCIB_HOTPLUG_CMD_PENDING;
 	wakeup(sc);
 }
 
 /*
  * Returns true if a card is fully inserted from the user's
  * perspective.  It may not yet be ready for access, but the driver
  * can now start enabling access if necessary.
  */
 static bool
 pcib_hotplug_inserted(struct pcib_softc *sc)
 {
 
 	/* Pretend the card isn't present if a detach is forced. */
 	if (sc->flags & PCIB_DETACHING)
 		return (false);
 
 	/* Card must be present in the slot. */
 	if ((sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS) == 0)
 		return (false);
 
 	/* A power fault implicitly turns off power to the slot. */
 	if (sc->pcie_slot_sta & PCIEM_SLOT_STA_PFD)
 		return (false);
 
 	/* If the MRL is disengaged, the slot is powered off. */
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP &&
 	    (sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS) != 0)
 		return (false);
 
 	return (true);
 }
 
 /*
  * Returns -1 if the card is fully inserted, powered, and ready for
  * access.  Otherwise, returns 0.
  */
 static int
 pcib_hotplug_present(struct pcib_softc *sc)
 {
 	device_t dev;
 
 	dev = sc->dev;
 
 	/* Card must be inserted. */
 	if (!pcib_hotplug_inserted(sc))
 		return (0);
 
 	/*
 	 * Require the Electromechanical Interlock to be engaged if
 	 * present.
 	 */
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_EIP &&
 	    (sc->pcie_slot_sta & PCIEM_SLOT_STA_EIS) == 0)
 		return (0);
 
 	/* Require the Data Link Layer to be active. */
 	if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) {
 		if (!(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE))
 			return (0);
 	}
 
 	return (-1);
 }
 
 static void
 pcib_pcie_hotplug_update(struct pcib_softc *sc, uint16_t val, uint16_t mask,
     bool schedule_task)
 {
 	bool card_inserted;
 
 	/* Clear DETACHING if Present Detect has cleared. */
 	if ((sc->pcie_slot_sta & (PCIEM_SLOT_STA_PDC | PCIEM_SLOT_STA_PDS)) ==
 	    PCIEM_SLOT_STA_PDC)
 		sc->flags &= ~PCIB_DETACHING;
 
 	card_inserted = pcib_hotplug_inserted(sc);
 
 	/* Turn the power indicator on if a card is inserted. */
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PIP) {
 		mask |= PCIEM_SLOT_CTL_PIC;
 		if (card_inserted)
 			val |= PCIEM_SLOT_CTL_PI_ON;
 		else if (sc->flags & PCIB_DETACH_PENDING)
 			val |= PCIEM_SLOT_CTL_PI_BLINK;
 		else
 			val |= PCIEM_SLOT_CTL_PI_OFF;
 	}
 
 	/* Turn the power on via the Power Controller if a card is inserted. */
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP) {
 		mask |= PCIEM_SLOT_CTL_PCC;
 		if (card_inserted)
 			val |= PCIEM_SLOT_CTL_PC_ON;
 		else
 			val |= PCIEM_SLOT_CTL_PC_OFF;
 	}
 
 	/*
 	 * If a card is inserted, enable the Electromechanical
 	 * Interlock.  If a card is not inserted (or we are in the
 	 * process of detaching), disable the Electromechanical
 	 * Interlock.
 	 */
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_EIP) {
 		mask |= PCIEM_SLOT_CTL_EIC;
 		if (card_inserted !=
 		    !(sc->pcie_slot_sta & PCIEM_SLOT_STA_EIS))
 			val |= PCIEM_SLOT_CTL_EIC;
 	}
 
 	/*
 	 * Start a timer to see if the Data Link Layer times out.
 	 * Note that we only start the timer if Presence Detect
 	 * changed on this interrupt.  Stop any scheduled timer if
 	 * the Data Link Layer is active.
 	 */
 	if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE) {
 		if (card_inserted &&
 		    !(sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE) &&
 		    sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC) {
 			if (cold)
 				device_printf(sc->dev,
 				    "Data Link Layer inactive\n");
 			else
 				callout_reset(&sc->pcie_dll_timer, hz,
 				    pcib_pcie_dll_timeout, sc);
 		} else if (sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE)
 			callout_stop(&sc->pcie_dll_timer);
 	}
 
 	pcib_pcie_hotplug_command(sc, val, mask);
 
 	/*
 	 * During attach the child "pci" device is added sychronously;
 	 * otherwise, the task is scheduled to manage the child
 	 * device.
 	 */
 	if (schedule_task &&
 	    (pcib_hotplug_present(sc) != 0) != (sc->child != NULL))
 		taskqueue_enqueue(taskqueue_thread, &sc->pcie_hp_task);
 }
 
 static void
 pcib_pcie_intr(void *arg)
 {
 	struct pcib_softc *sc;
 	device_t dev;
 
 	sc = arg;
 	dev = sc->dev;
 	sc->pcie_slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
 
 	/* Clear the events just reported. */
 	pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2);
 
 	if (sc->pcie_slot_sta & PCIEM_SLOT_STA_ABP) {
 		if (sc->flags & PCIB_DETACH_PENDING) {	
 			device_printf(dev,
 			    "Attention Button Pressed: Detach Cancelled\n");
 			sc->flags &= ~PCIB_DETACH_PENDING;
 			callout_stop(&sc->pcie_ab_timer);
 		} else {
 			device_printf(dev,
 		    "Attention Button Pressed: Detaching in 5 seconds\n");
 			sc->flags |= PCIB_DETACH_PENDING;
 			callout_reset(&sc->pcie_ab_timer, 5 * hz,
 			    pcib_pcie_ab_timeout, sc);
 		}
 	}
 	if (sc->pcie_slot_sta & PCIEM_SLOT_STA_PFD)
 		device_printf(dev, "Power Fault Detected\n");
 	if (sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSC)
 		device_printf(dev, "MRL Sensor Changed to %s\n",
 		    sc->pcie_slot_sta & PCIEM_SLOT_STA_MRLSS ? "open" :
 		    "closed");
 	if (bootverbose && sc->pcie_slot_sta & PCIEM_SLOT_STA_PDC)
 		device_printf(dev, "Present Detect Changed to %s\n",
 		    sc->pcie_slot_sta & PCIEM_SLOT_STA_PDS ? "card present" :
 		    "empty");
 	if (sc->pcie_slot_sta & PCIEM_SLOT_STA_CC)
 		pcib_pcie_hotplug_command_completed(sc);
 	if (sc->pcie_slot_sta & PCIEM_SLOT_STA_DLLSC) {
 		sc->pcie_link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2);
 		if (bootverbose)
 			device_printf(dev,
 			    "Data Link Layer State Changed to %s\n",
 			    sc->pcie_link_sta & PCIEM_LINK_STA_DL_ACTIVE ?
 			    "active" : "inactive");
 	}
 
 	pcib_pcie_hotplug_update(sc, 0, 0, true);
 }
 
 static void
 pcib_pcie_hotplug_task(void *context, int pending)
 {
 	struct pcib_softc *sc;
 	device_t dev;
 
 	sc = context;
 	mtx_lock(&Giant);
 	dev = sc->dev;
 	if (pcib_hotplug_present(sc) != 0) {
 		if (sc->child == NULL) {
 			sc->child = device_add_child(dev, "pci", -1);
 			bus_generic_attach(dev);
 		}
 	} else {
 		if (sc->child != NULL) {
 			if (device_delete_child(dev, sc->child) == 0)
 				sc->child = NULL;
 		}
 	}
 	mtx_unlock(&Giant);
 }
 
 static void
 pcib_pcie_ab_timeout(void *arg)
 {
 	struct pcib_softc *sc;
 	device_t dev;
 
 	sc = arg;
 	dev = sc->dev;
 	mtx_assert(&Giant, MA_OWNED);
 	if (sc->flags & PCIB_DETACH_PENDING) {
 		sc->flags |= PCIB_DETACHING;
 		sc->flags &= ~PCIB_DETACH_PENDING;
 		pcib_pcie_hotplug_update(sc, 0, 0, true);
 	}
 }
 
 static void
 pcib_pcie_cc_timeout(void *arg)
 {
 	struct pcib_softc *sc;
 	device_t dev;
 	uint16_t sta;
 
 	sc = arg;
 	dev = sc->dev;
 	mtx_assert(&Giant, MA_OWNED);
 	sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
 	if (!(sta & PCIEM_SLOT_STA_CC)) {
 		device_printf(dev,
 		    "Hotplug Command Timed Out - forcing detach\n");
 		sc->flags &= ~(PCIB_HOTPLUG_CMD_PENDING | PCIB_DETACH_PENDING);
 		sc->flags |= PCIB_DETACHING;
 		pcib_pcie_hotplug_update(sc, 0, 0, true);
 	} else {
 		device_printf(dev,
 	    "Missed HotPlug interrupt waiting for Command Completion\n");
 		pcib_pcie_intr(sc);
 	}
 }
 
 static void
 pcib_pcie_dll_timeout(void *arg)
 {
 	struct pcib_softc *sc;
 	device_t dev;
 	uint16_t sta;
 
 	sc = arg;
 	dev = sc->dev;
 	mtx_assert(&Giant, MA_OWNED);
 	sta = pcie_read_config(dev, PCIER_LINK_STA, 2);
 	if (!(sta & PCIEM_LINK_STA_DL_ACTIVE)) {
 		device_printf(dev,
 		    "Timed out waiting for Data Link Layer Active\n");
 		sc->flags |= PCIB_DETACHING;
 		pcib_pcie_hotplug_update(sc, 0, 0, true);
 	} else if (sta != sc->pcie_link_sta) {
 		device_printf(dev,
 		    "Missed HotPlug interrupt waiting for DLL Active\n");
 		pcib_pcie_intr(sc);
 	}
 }
 
 static int
 pcib_alloc_pcie_irq(struct pcib_softc *sc)
 {
 	device_t dev;
 	int count, error, rid;
 
 	rid = -1;
 	dev = sc->dev;
 
 	/*
 	 * For simplicity, only use MSI-X if there is a single message.
 	 * To support a device with multiple messages we would have to
 	 * use remap intr if the MSI number is not 0.
 	 */
 	count = pci_msix_count(dev);
 	if (count == 1) {
 		error = pci_alloc_msix(dev, &count);
 		if (error == 0)
 			rid = 1;
 	}
 
 	if (rid < 0 && pci_msi_count(dev) > 0) {
 		count = 1;
 		error = pci_alloc_msi(dev, &count);
 		if (error == 0)
 			rid = 1;
 	}
 
 	if (rid < 0)
 		rid = 0;
 
 	sc->pcie_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 	    RF_ACTIVE);
 	if (sc->pcie_irq == NULL) {
 		device_printf(dev,
 		    "Failed to allocate interrupt for PCI-e events\n");
 		if (rid > 0)
 			pci_release_msi(dev);
 		return (ENXIO);
 	}
 
 	error = bus_setup_intr(dev, sc->pcie_irq, INTR_TYPE_MISC,
 	    NULL, pcib_pcie_intr, sc, &sc->pcie_ihand);
 	if (error) {
 		device_printf(dev, "Failed to setup PCI-e interrupt handler\n");
 		bus_release_resource(dev, SYS_RES_IRQ, rid, sc->pcie_irq);
 		if (rid > 0)
 			pci_release_msi(dev);
 		return (error);
 	}
 	return (0);
 }
 
 static int
 pcib_release_pcie_irq(struct pcib_softc *sc)
 {
 	device_t dev;
 	int error;
 
 	dev = sc->dev;
 	error = bus_teardown_intr(dev, sc->pcie_irq, sc->pcie_ihand);
 	if (error)
 		return (error);
 	error = bus_free_resource(dev, SYS_RES_IRQ, sc->pcie_irq);
 	if (error)
 		return (error);
 	return (pci_release_msi(dev));
 }
 
 static void
 pcib_setup_hotplug(struct pcib_softc *sc)
 {
 	device_t dev;
 	uint16_t mask, val;
 
 	dev = sc->dev;
 	callout_init(&sc->pcie_ab_timer, 0);
 	callout_init(&sc->pcie_cc_timer, 0);
 	callout_init(&sc->pcie_dll_timer, 0);
 	TASK_INIT(&sc->pcie_hp_task, 0, pcib_pcie_hotplug_task, sc);
 
 	/* Allocate IRQ. */
 	if (pcib_alloc_pcie_irq(sc) != 0)
 		return;
 
 	sc->pcie_link_sta = pcie_read_config(dev, PCIER_LINK_STA, 2);
 	sc->pcie_slot_sta = pcie_read_config(dev, PCIER_SLOT_STA, 2);
 
 	/* Clear any events previously pending. */
 	pcie_write_config(dev, PCIER_SLOT_STA, sc->pcie_slot_sta, 2);
 
 	/* Enable HotPlug events. */
 	mask = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE |
 	    PCIEM_SLOT_CTL_CCIE | PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_MRLSCE |
 	    PCIEM_SLOT_CTL_PFDE | PCIEM_SLOT_CTL_ABPE;
 	val = PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_HPIE;
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_APB)
 		val |= PCIEM_SLOT_CTL_ABPE;
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_PCP)
 		val |= PCIEM_SLOT_CTL_PFDE;
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_MRLSP)
 		val |= PCIEM_SLOT_CTL_MRLSCE;
 	if (!(sc->pcie_slot_cap & PCIEM_SLOT_CAP_NCCS))
 		val |= PCIEM_SLOT_CTL_CCIE;
 	if (sc->pcie_link_cap & PCIEM_LINK_CAP_DL_ACTIVE)
 		val |= PCIEM_SLOT_CTL_DLLSCE;
 
 	/* Turn the attention indicator off. */
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_AIP) {
 		mask |= PCIEM_SLOT_CTL_AIC;
 		val |= PCIEM_SLOT_CTL_AI_OFF;
 	}
 
 	pcib_pcie_hotplug_update(sc, val, mask, false);
 }
 
 static int
 pcib_detach_hotplug(struct pcib_softc *sc)
 {
 	uint16_t mask, val;
 	int error;
 
 	/* Disable the card in the slot and force it to detach. */
 	if (sc->flags & PCIB_DETACH_PENDING) {
 		sc->flags &= ~PCIB_DETACH_PENDING;
 		callout_stop(&sc->pcie_ab_timer);
 	}
 	sc->flags |= PCIB_DETACHING;
 
 	if (sc->flags & PCIB_HOTPLUG_CMD_PENDING) {
 		callout_stop(&sc->pcie_cc_timer);
 		tsleep(sc, 0, "hpcmd", hz);
 		sc->flags &= ~PCIB_HOTPLUG_CMD_PENDING;
 	}
 
 	/* Disable HotPlug events. */
 	mask = PCIEM_SLOT_CTL_DLLSCE | PCIEM_SLOT_CTL_HPIE |
 	    PCIEM_SLOT_CTL_CCIE | PCIEM_SLOT_CTL_PDCE | PCIEM_SLOT_CTL_MRLSCE |
 	    PCIEM_SLOT_CTL_PFDE | PCIEM_SLOT_CTL_ABPE;
 	val = 0;
 
 	/* Turn the attention indicator off. */
 	if (sc->pcie_slot_cap & PCIEM_SLOT_CAP_AIP) {
 		mask |= PCIEM_SLOT_CTL_AIC;
 		val |= PCIEM_SLOT_CTL_AI_OFF;
 	}
 
 	pcib_pcie_hotplug_update(sc, val, mask, false);
 	
 	error = pcib_release_pcie_irq(sc);
 	if (error)
 		return (error);
 	taskqueue_drain(taskqueue_thread, &sc->pcie_hp_task);
 	callout_drain(&sc->pcie_ab_timer);
 	callout_drain(&sc->pcie_cc_timer);
 	callout_drain(&sc->pcie_dll_timer);
 	return (0);
 }
 #endif
 
 /*
  * Get current bridge configuration.
  */
 static void
 pcib_cfg_save(struct pcib_softc *sc)
 {
 #ifndef NEW_PCIB
 	device_t	dev;
 	uint16_t command;
 
 	dev = sc->dev;
 
 	command = pci_read_config(dev, PCIR_COMMAND, 2);
 	if (command & PCIM_CMD_PORTEN)
 		pcib_get_io_decode(sc);
 	if (command & PCIM_CMD_MEMEN)
 		pcib_get_mem_decode(sc);
 #endif
 }
 
 /*
  * Restore previous bridge configuration.
  */
 static void
 pcib_cfg_restore(struct pcib_softc *sc)
 {
 	device_t	dev;
 #ifndef NEW_PCIB
 	uint16_t command;
 #endif
 	dev = sc->dev;
 
 #ifdef NEW_PCIB
 	pcib_write_windows(sc, WIN_IO | WIN_MEM | WIN_PMEM);
 #else
 	command = pci_read_config(dev, PCIR_COMMAND, 2);
 	if (command & PCIM_CMD_PORTEN)
 		pcib_set_io_decode(sc);
 	if (command & PCIM_CMD_MEMEN)
 		pcib_set_mem_decode(sc);
 #endif
 }
 
 /*
  * Generic device interface
  */
 static int
 pcib_probe(device_t dev)
 {
     if ((pci_get_class(dev) == PCIC_BRIDGE) &&
 	(pci_get_subclass(dev) == PCIS_BRIDGE_PCI)) {
 	device_set_desc(dev, "PCI-PCI bridge");
 	return(-10000);
     }
     return(ENXIO);
 }
 
 void
 pcib_attach_common(device_t dev)
 {
     struct pcib_softc	*sc;
     struct sysctl_ctx_list *sctx;
     struct sysctl_oid	*soid;
     int comma;
 
     sc = device_get_softc(dev);
     sc->dev = dev;
 
     /*
      * Get current bridge configuration.
      */
     sc->domain = pci_get_domain(dev);
 #if !(defined(NEW_PCIB) && defined(PCI_RES_BUS))
     sc->bus.sec = pci_read_config(dev, PCIR_SECBUS_1, 1);
     sc->bus.sub = pci_read_config(dev, PCIR_SUBBUS_1, 1);
 #endif
     sc->bridgectl = pci_read_config(dev, PCIR_BRIDGECTL_1, 2);
     pcib_cfg_save(sc);
 
     /*
      * The primary bus register should always be the bus of the
      * parent.
      */
     sc->pribus = pci_get_bus(dev);
     pci_write_config(dev, PCIR_PRIBUS_1, sc->pribus, 1);
 
     /*
      * Setup sysctl reporting nodes
      */
     sctx = device_get_sysctl_ctx(dev);
     soid = device_get_sysctl_tree(dev);
     SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "domain",
       CTLFLAG_RD, &sc->domain, 0, "Domain number");
     SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "pribus",
       CTLFLAG_RD, &sc->pribus, 0, "Primary bus number");
     SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "secbus",
       CTLFLAG_RD, &sc->bus.sec, 0, "Secondary bus number");
     SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "subbus",
       CTLFLAG_RD, &sc->bus.sub, 0, "Subordinate bus number");
 
     /*
      * Quirk handling.
      */
     switch (pci_get_devid(dev)) {
 #if !(defined(NEW_PCIB) && defined(PCI_RES_BUS))
     case 0x12258086:		/* Intel 82454KX/GX (Orion) */
 	{
 	    uint8_t	supbus;
 
 	    supbus = pci_read_config(dev, 0x41, 1);
 	    if (supbus != 0xff) {
 		sc->bus.sec = supbus + 1;
 		sc->bus.sub = supbus + 1;
 	    }
 	    break;
 	}
 #endif
 
     /*
      * The i82380FB mobile docking controller is a PCI-PCI bridge,
      * and it is a subtractive bridge.  However, the ProgIf is wrong
      * so the normal setting of PCIB_SUBTRACTIVE bit doesn't
      * happen.  There are also Toshiba and Cavium ThunderX bridges
      * that behave this way.
      */
     case 0xa002177d:		/* Cavium ThunderX */
     case 0x124b8086:		/* Intel 82380FB Mobile */
     case 0x060513d7:		/* Toshiba ???? */
 	sc->flags |= PCIB_SUBTRACTIVE;
 	break;
 
 #if !(defined(NEW_PCIB) && defined(PCI_RES_BUS))
     /* Compaq R3000 BIOS sets wrong subordinate bus number. */
     case 0x00dd10de:
 	{
 	    char *cp;
 
 	    if ((cp = kern_getenv("smbios.planar.maker")) == NULL)
 		break;
 	    if (strncmp(cp, "Compal", 6) != 0) {
 		freeenv(cp);
 		break;
 	    }
 	    freeenv(cp);
 	    if ((cp = kern_getenv("smbios.planar.product")) == NULL)
 		break;
 	    if (strncmp(cp, "08A0", 4) != 0) {
 		freeenv(cp);
 		break;
 	    }
 	    freeenv(cp);
 	    if (sc->bus.sub < 0xa) {
 		pci_write_config(dev, PCIR_SUBBUS_1, 0xa, 1);
 		sc->bus.sub = pci_read_config(dev, PCIR_SUBBUS_1, 1);
 	    }
 	    break;
 	}
 #endif
     }
 
     if (pci_msi_device_blacklisted(dev))
 	sc->flags |= PCIB_DISABLE_MSI;
 
     if (pci_msix_device_blacklisted(dev))
 	sc->flags |= PCIB_DISABLE_MSIX;
 
     /*
      * Intel 815, 845 and other chipsets say they are PCI-PCI bridges,
      * but have a ProgIF of 0x80.  The 82801 family (AA, AB, BAM/CAM,
      * BA/CA/DB and E) PCI bridges are HUB-PCI bridges, in Intelese.
      * This means they act as if they were subtractively decoding
      * bridges and pass all transactions.  Mark them and real ProgIf 1
      * parts as subtractive.
      */
     if ((pci_get_devid(dev) & 0xff00ffff) == 0x24008086 ||
       pci_read_config(dev, PCIR_PROGIF, 1) == PCIP_BRIDGE_PCI_SUBTRACTIVE)
 	sc->flags |= PCIB_SUBTRACTIVE;
 
 #ifdef PCI_HP
     pcib_probe_hotplug(sc);
 #endif
 #ifdef NEW_PCIB
 #ifdef PCI_RES_BUS
     pcib_setup_secbus(dev, &sc->bus, 1);
 #endif
     pcib_probe_windows(sc);
 #endif
 #ifdef PCI_HP
     if (sc->flags & PCIB_HOTPLUG)
 	    pcib_setup_hotplug(sc);
 #endif
     if (bootverbose) {
 	device_printf(dev, "  domain            %d\n", sc->domain);
 	device_printf(dev, "  secondary bus     %d\n", sc->bus.sec);
 	device_printf(dev, "  subordinate bus   %d\n", sc->bus.sub);
 #ifdef NEW_PCIB
 	if (pcib_is_window_open(&sc->io))
 	    device_printf(dev, "  I/O decode        0x%jx-0x%jx\n",
 	      (uintmax_t)sc->io.base, (uintmax_t)sc->io.limit);
 	if (pcib_is_window_open(&sc->mem))
 	    device_printf(dev, "  memory decode     0x%jx-0x%jx\n",
 	      (uintmax_t)sc->mem.base, (uintmax_t)sc->mem.limit);
 	if (pcib_is_window_open(&sc->pmem))
 	    device_printf(dev, "  prefetched decode 0x%jx-0x%jx\n",
 	      (uintmax_t)sc->pmem.base, (uintmax_t)sc->pmem.limit);
 #else
 	if (pcib_is_io_open(sc))
 	    device_printf(dev, "  I/O decode        0x%x-0x%x\n",
 	      sc->iobase, sc->iolimit);
 	if (pcib_is_nonprefetch_open(sc))
 	    device_printf(dev, "  memory decode     0x%jx-0x%jx\n",
 	      (uintmax_t)sc->membase, (uintmax_t)sc->memlimit);
 	if (pcib_is_prefetch_open(sc))
 	    device_printf(dev, "  prefetched decode 0x%jx-0x%jx\n",
 	      (uintmax_t)sc->pmembase, (uintmax_t)sc->pmemlimit);
 #endif
 	if (sc->bridgectl & (PCIB_BCR_ISA_ENABLE | PCIB_BCR_VGA_ENABLE) ||
 	    sc->flags & PCIB_SUBTRACTIVE) {
 		device_printf(dev, "  special decode    ");
 		comma = 0;
 		if (sc->bridgectl & PCIB_BCR_ISA_ENABLE) {
 			printf("ISA");
 			comma = 1;
 		}
 		if (sc->bridgectl & PCIB_BCR_VGA_ENABLE) {
 			printf("%sVGA", comma ? ", " : "");
 			comma = 1;
 		}
 		if (sc->flags & PCIB_SUBTRACTIVE)
 			printf("%ssubtractive", comma ? ", " : "");
 		printf("\n");
 	}
     }
 
     /*
      * Always enable busmastering on bridges so that transactions
      * initiated on the secondary bus are passed through to the
      * primary bus.
      */
     pci_enable_busmaster(dev);
 }
 
 #ifdef PCI_HP
 static int
 pcib_present(struct pcib_softc *sc)
 {
 
 	if (sc->flags & PCIB_HOTPLUG)
 		return (pcib_hotplug_present(sc) != 0);
 	return (1);
 }
 #endif
 
 int
 pcib_attach_child(device_t dev)
 {
 	struct pcib_softc *sc;
 
 	sc = device_get_softc(dev);
 	if (sc->bus.sec == 0) {
 		/* no secondary bus; we should have fixed this */
 		return(0);
 	}
 
 #ifdef PCI_HP
 	if (!pcib_present(sc)) {
 		/* An empty HotPlug slot, so don't add a PCI bus yet. */
 		return (0);
 	}
 #endif
 
 	sc->child = device_add_child(dev, "pci", -1);
 	return (bus_generic_attach(dev));
 }
 
 int
 pcib_attach(device_t dev)
 {
 
     pcib_attach_common(dev);
     return (pcib_attach_child(dev));
 }
 
 int
 pcib_detach(device_t dev)
 {
 #if defined(PCI_HP) || defined(NEW_PCIB)
 	struct pcib_softc *sc;
 #endif
 	int error;
 
 #if defined(PCI_HP) || defined(NEW_PCIB)
 	sc = device_get_softc(dev);
 #endif
 	error = bus_generic_detach(dev);
 	if (error)
 		return (error);
 #ifdef PCI_HP
 	if (sc->flags & PCIB_HOTPLUG) {
 		error = pcib_detach_hotplug(sc);
 		if (error)
 			return (error);
 	}
 #endif
 	error = device_delete_children(dev);
 	if (error)
 		return (error);
 #ifdef NEW_PCIB
 	pcib_free_windows(sc);
 #ifdef PCI_RES_BUS
 	pcib_free_secbus(dev, &sc->bus);
 #endif
 #endif
 	return (0);
 }
 
 int
 pcib_suspend(device_t dev)
 {
 
 	pcib_cfg_save(device_get_softc(dev));
 	return (bus_generic_suspend(dev));
 }
 
 int
 pcib_resume(device_t dev)
 {
 
 	pcib_cfg_restore(device_get_softc(dev));
 	return (bus_generic_resume(dev));
 }
 
 void
 pcib_bridge_init(device_t dev)
 {
 	pci_write_config(dev, PCIR_IOBASEL_1, 0xff, 1);
 	pci_write_config(dev, PCIR_IOBASEH_1, 0xffff, 2);
 	pci_write_config(dev, PCIR_IOLIMITL_1, 0, 1);
 	pci_write_config(dev, PCIR_IOLIMITH_1, 0, 2);
 	pci_write_config(dev, PCIR_MEMBASE_1, 0xffff, 2);
 	pci_write_config(dev, PCIR_MEMLIMIT_1, 0, 2);
 	pci_write_config(dev, PCIR_PMBASEL_1, 0xffff, 2);
 	pci_write_config(dev, PCIR_PMBASEH_1, 0xffffffff, 4);
 	pci_write_config(dev, PCIR_PMLIMITL_1, 0, 2);
 	pci_write_config(dev, PCIR_PMLIMITH_1, 0, 4);
 }
 
 int
 pcib_child_present(device_t dev, device_t child)
 {
 #ifdef PCI_HP
 	struct pcib_softc *sc = device_get_softc(dev);
 	int retval;
 
 	retval = bus_child_present(dev);
 	if (retval != 0 && sc->flags & PCIB_HOTPLUG)
 		retval = pcib_hotplug_present(sc);
 	return (retval);
 #else
 	return (bus_child_present(dev));
 #endif
 }
 
 int
 pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
 {
     struct pcib_softc	*sc = device_get_softc(dev);
 
     switch (which) {
     case PCIB_IVAR_DOMAIN:
 	*result = sc->domain;
 	return(0);
     case PCIB_IVAR_BUS:
 	*result = sc->bus.sec;
 	return(0);
     }
     return(ENOENT);
 }
 
 int
 pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
 {
 
     switch (which) {
     case PCIB_IVAR_DOMAIN:
 	return(EINVAL);
     case PCIB_IVAR_BUS:
 	return(EINVAL);
     }
     return(ENOENT);
 }
 
 #ifdef NEW_PCIB
 /*
  * Attempt to allocate a resource from the existing resources assigned
  * to a window.
  */
 static struct resource *
 pcib_suballoc_resource(struct pcib_softc *sc, struct pcib_window *w,
     device_t child, int type, int *rid, rman_res_t start, rman_res_t end,
     rman_res_t count, u_int flags)
 {
 	struct resource *res;
 
 	if (!pcib_is_window_open(w))
 		return (NULL);
 
 	res = rman_reserve_resource(&w->rman, start, end, count,
 	    flags & ~RF_ACTIVE, child);
 	if (res == NULL)
 		return (NULL);
 
 	if (bootverbose)
 		device_printf(sc->dev,
 		    "allocated %s range (%#jx-%#jx) for rid %x of %s\n",
 		    w->name, rman_get_start(res), rman_get_end(res), *rid,
 		    pcib_child_name(child));
 	rman_set_rid(res, *rid);
 
 	/*
 	 * If the resource should be active, pass that request up the
 	 * tree.  This assumes the parent drivers can handle
 	 * activating sub-allocated resources.
 	 */
 	if (flags & RF_ACTIVE) {
 		if (bus_activate_resource(child, type, *rid, res) != 0) {
 			rman_release_resource(res);
 			return (NULL);
 		}
 	}
 
 	return (res);
 }
 
 /* Allocate a fresh resource range for an unconfigured window. */
 static int
 pcib_alloc_new_window(struct pcib_softc *sc, struct pcib_window *w, int type,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct resource *res;
 	rman_res_t base, limit, wmask;
 	int rid;
 
 	/*
 	 * If this is an I/O window on a bridge with ISA enable set
 	 * and the start address is below 64k, then try to allocate an
 	 * initial window of 0x1000 bytes long starting at address
 	 * 0xf000 and walking down.  Note that if the original request
 	 * was larger than the non-aliased range size of 0x100 our
 	 * caller would have raised the start address up to 64k
 	 * already.
 	 */
 	if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE &&
 	    start < 65536) {
 		for (base = 0xf000; (long)base >= 0; base -= 0x1000) {
 			limit = base + 0xfff;
 
 			/*
 			 * Skip ranges that wouldn't work for the
 			 * original request.  Note that the actual
 			 * window that overlaps are the non-alias
 			 * ranges within [base, limit], so this isn't
 			 * quite a simple comparison.
 			 */
 			if (start + count > limit - 0x400)
 				continue;
 			if (base == 0) {
 				/*
 				 * The first open region for the window at
 				 * 0 is 0x400-0x4ff.
 				 */
 				if (end - count + 1 < 0x400)
 					continue;
 			} else {
 				if (end - count + 1 < base)
 					continue;
 			}
 
 			if (pcib_alloc_nonisa_ranges(sc, base, limit) == 0) {
 				w->base = base;
 				w->limit = limit;
 				return (0);
 			}
 		}
 		return (ENOSPC);
 	}
 
 	wmask = ((rman_res_t)1 << w->step) - 1;
 	if (RF_ALIGNMENT(flags) < w->step) {
 		flags &= ~RF_ALIGNMENT_MASK;
 		flags |= RF_ALIGNMENT_LOG2(w->step);
 	}
 	start &= ~wmask;
 	end |= wmask;
 	count = roundup2(count, (rman_res_t)1 << w->step);
 	rid = w->reg;
 	res = bus_alloc_resource(sc->dev, type, &rid, start, end, count,
 	    flags & ~RF_ACTIVE);
 	if (res == NULL)
 		return (ENOSPC);
 	pcib_add_window_resources(w, &res, 1);
 	pcib_activate_window(sc, type);
 	w->base = rman_get_start(res);
 	w->limit = rman_get_end(res);
 	return (0);
 }
 
 /* Try to expand an existing window to the requested base and limit. */
 static int
 pcib_expand_window(struct pcib_softc *sc, struct pcib_window *w, int type,
     rman_res_t base, rman_res_t limit)
 {
 	struct resource *res;
 	int error, i, force_64k_base;
 
 	KASSERT(base <= w->base && limit >= w->limit,
 	    ("attempting to shrink window"));
 
 	/*
 	 * XXX: pcib_grow_window() doesn't try to do this anyway and
 	 * the error handling for all the edge cases would be tedious.
 	 */
 	KASSERT(limit == w->limit || base == w->base,
 	    ("attempting to grow both ends of a window"));
 
 	/*
 	 * Yet more special handling for requests to expand an I/O
 	 * window behind an ISA-enabled bridge.  Since I/O windows
 	 * have to grow in 0x1000 increments and the end of the 0xffff
 	 * range is an alias, growing a window below 64k will always
 	 * result in allocating new resources and never adjusting an
 	 * existing resource.
 	 */
 	if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE &&
 	    (limit <= 65535 || (base <= 65535 && base != w->base))) {
 		KASSERT(limit == w->limit || limit <= 65535,
 		    ("attempting to grow both ends across 64k ISA alias"));
 
 		if (base != w->base)
 			error = pcib_alloc_nonisa_ranges(sc, base, w->base - 1);
 		else
 			error = pcib_alloc_nonisa_ranges(sc, w->limit + 1,
 			    limit);
 		if (error == 0) {
 			w->base = base;
 			w->limit = limit;
 		}
 		return (error);
 	}
 
 	/*
 	 * Find the existing resource to adjust.  Usually there is only one,
 	 * but for an ISA-enabled bridge we might be growing the I/O window
 	 * above 64k and need to find the existing resource that maps all
 	 * of the area above 64k.
 	 */
 	for (i = 0; i < w->count; i++) {
 		if (rman_get_end(w->res[i]) == w->limit)
 			break;
 	}
 	KASSERT(i != w->count, ("did not find existing resource"));
 	res = w->res[i];
 
 	/*
 	 * Usually the resource we found should match the window's
 	 * existing range.  The one exception is the ISA-enabled case
 	 * mentioned above in which case the resource should start at
 	 * 64k.
 	 */
 	if (type == SYS_RES_IOPORT && sc->bridgectl & PCIB_BCR_ISA_ENABLE &&
 	    w->base <= 65535) {
 		KASSERT(rman_get_start(res) == 65536,
 		    ("existing resource mismatch"));
 		force_64k_base = 1;
 	} else {
 		KASSERT(w->base == rman_get_start(res),
 		    ("existing resource mismatch"));
 		force_64k_base = 0;
 	}
 
 	error = bus_adjust_resource(sc->dev, type, res, force_64k_base ?
 	    rman_get_start(res) : base, limit);
 	if (error)
 		return (error);
 
 	/* Add the newly allocated region to the resource manager. */
 	if (w->base != base) {
 		error = rman_manage_region(&w->rman, base, w->base - 1);
 		w->base = base;
 	} else {
 		error = rman_manage_region(&w->rman, w->limit + 1, limit);
 		w->limit = limit;
 	}
 	if (error) {
 		if (bootverbose)
 			device_printf(sc->dev,
 			    "failed to expand %s resource manager\n", w->name);
 		(void)bus_adjust_resource(sc->dev, type, res, force_64k_base ?
 		    rman_get_start(res) : w->base, w->limit);
 	}
 	return (error);
 }
 
 /*
  * Attempt to grow a window to make room for a given resource request.
  */
 static int
 pcib_grow_window(struct pcib_softc *sc, struct pcib_window *w, int type,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	rman_res_t align, start_free, end_free, front, back, wmask;
 	int error;
 
 	/*
 	 * Clamp the desired resource range to the maximum address
 	 * this window supports.  Reject impossible requests.
 	 *
 	 * For I/O port requests behind a bridge with the ISA enable
 	 * bit set, force large allocations to start above 64k.
 	 */
 	if (!w->valid)
 		return (EINVAL);
 	if (sc->bridgectl & PCIB_BCR_ISA_ENABLE && count > 0x100 &&
 	    start < 65536)
 		start = 65536;
 	if (end > w->rman.rm_end)
 		end = w->rman.rm_end;
 	if (start + count - 1 > end || start + count < start)
 		return (EINVAL);
 	wmask = ((rman_res_t)1 << w->step) - 1;
 
 	/*
 	 * If there is no resource at all, just try to allocate enough
 	 * aligned space for this resource.
 	 */
 	if (w->res == NULL) {
 		error = pcib_alloc_new_window(sc, w, type, start, end, count,
 		    flags);
 		if (error) {
 			if (bootverbose)
 				device_printf(sc->dev,
 		    "failed to allocate initial %s window (%#jx-%#jx,%#jx)\n",
 				    w->name, start, end, count);
 			return (error);
 		}
 		if (bootverbose)
 			device_printf(sc->dev,
 			    "allocated initial %s window of %#jx-%#jx\n",
 			    w->name, (uintmax_t)w->base, (uintmax_t)w->limit);
 		goto updatewin;
 	}
 
 	/*
 	 * See if growing the window would help.  Compute the minimum
 	 * amount of address space needed on both the front and back
 	 * ends of the existing window to satisfy the allocation.
 	 *
 	 * For each end, build a candidate region adjusting for the
 	 * required alignment, etc.  If there is a free region at the
 	 * edge of the window, grow from the inner edge of the free
 	 * region.  Otherwise grow from the window boundary.
 	 *
 	 * Growing an I/O window below 64k for a bridge with the ISA
 	 * enable bit doesn't require any special magic as the step
 	 * size of an I/O window (1k) always includes multiple
 	 * non-alias ranges when it is grown in either direction.
 	 *
 	 * XXX: Special case: if w->res is completely empty and the
 	 * request size is larger than w->res, we should find the
 	 * optimal aligned buffer containing w->res and allocate that.
 	 */
 	if (bootverbose)
 		device_printf(sc->dev,
 		    "attempting to grow %s window for (%#jx-%#jx,%#jx)\n",
 		    w->name, start, end, count);
 	align = (rman_res_t)1 << RF_ALIGNMENT(flags);
 	if (start < w->base) {
 		if (rman_first_free_region(&w->rman, &start_free, &end_free) !=
 		    0 || start_free != w->base)
 			end_free = w->base;
 		if (end_free > end)
 			end_free = end + 1;
 
 		/* Move end_free down until it is properly aligned. */
 		end_free &= ~(align - 1);
 		end_free--;
 		front = end_free - (count - 1);
 
 		/*
 		 * The resource would now be allocated at (front,
 		 * end_free).  Ensure that fits in the (start, end)
 		 * bounds.  end_free is checked above.  If 'front' is
 		 * ok, ensure it is properly aligned for this window.
 		 * Also check for underflow.
 		 */
 		if (front >= start && front <= end_free) {
 			if (bootverbose)
 				printf("\tfront candidate range: %#jx-%#jx\n",
 				    front, end_free);
 			front &= ~wmask;
 			front = w->base - front;
 		} else
 			front = 0;
 	} else
 		front = 0;
 	if (end > w->limit) {
 		if (rman_last_free_region(&w->rman, &start_free, &end_free) !=
 		    0 || end_free != w->limit)
 			start_free = w->limit + 1;
 		if (start_free < start)
 			start_free = start;
 
 		/* Move start_free up until it is properly aligned. */
 		start_free = roundup2(start_free, align);
 		back = start_free + count - 1;
 
 		/*
 		 * The resource would now be allocated at (start_free,
 		 * back).  Ensure that fits in the (start, end)
 		 * bounds.  start_free is checked above.  If 'back' is
 		 * ok, ensure it is properly aligned for this window.
 		 * Also check for overflow.
 		 */
 		if (back <= end && start_free <= back) {
 			if (bootverbose)
 				printf("\tback candidate range: %#jx-%#jx\n",
 				    start_free, back);
 			back |= wmask;
 			back -= w->limit;
 		} else
 			back = 0;
 	} else
 		back = 0;
 
 	/*
 	 * Try to allocate the smallest needed region first.
 	 * If that fails, fall back to the other region.
 	 */
 	error = ENOSPC;
 	while (front != 0 || back != 0) {
 		if (front != 0 && (front <= back || back == 0)) {
 			error = pcib_expand_window(sc, w, type, w->base - front,
 			    w->limit);
 			if (error == 0)
 				break;
 			front = 0;
 		} else {
 			error = pcib_expand_window(sc, w, type, w->base,
 			    w->limit + back);
 			if (error == 0)
 				break;
 			back = 0;
 		}
 	}
 
 	if (error)
 		return (error);
 	if (bootverbose)
 		device_printf(sc->dev, "grew %s window to %#jx-%#jx\n",
 		    w->name, (uintmax_t)w->base, (uintmax_t)w->limit);
 
 updatewin:
 	/* Write the new window. */
 	KASSERT((w->base & wmask) == 0, ("start address is not aligned"));
 	KASSERT((w->limit & wmask) == wmask, ("end address is not aligned"));
 	pcib_write_windows(sc, w->mask);
 	return (0);
 }
 
 /*
  * We have to trap resource allocation requests and ensure that the bridge
  * is set up to, or capable of handling them.
  */
 struct resource *
 pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct pcib_softc *sc;
 	struct resource *r;
 
 	sc = device_get_softc(dev);
 
 	/*
 	 * VGA resources are decoded iff the VGA enable bit is set in
 	 * the bridge control register.  VGA resources do not fall into
 	 * the resource windows and are passed up to the parent.
 	 */
 	if ((type == SYS_RES_IOPORT && pci_is_vga_ioport_range(start, end)) ||
 	    (type == SYS_RES_MEMORY && pci_is_vga_memory_range(start, end))) {
 		if (sc->bridgectl & PCIB_BCR_VGA_ENABLE)
 			return (bus_generic_alloc_resource(dev, child, type,
 			    rid, start, end, count, flags));
 		else
 			return (NULL);
 	}
 
 	switch (type) {
 #ifdef PCI_RES_BUS
 	case PCI_RES_BUS:
 		return (pcib_alloc_subbus(&sc->bus, child, rid, start, end,
 		    count, flags));
 #endif
 	case SYS_RES_IOPORT:
 		if (pcib_is_isa_range(sc, start, end, count))
 			return (NULL);
 		r = pcib_suballoc_resource(sc, &sc->io, child, type, rid, start,
 		    end, count, flags);
 		if (r != NULL || (sc->flags & PCIB_SUBTRACTIVE) != 0)
 			break;
 		if (pcib_grow_window(sc, &sc->io, type, start, end, count,
 		    flags) == 0)
 			r = pcib_suballoc_resource(sc, &sc->io, child, type,
 			    rid, start, end, count, flags);
 		break;
 	case SYS_RES_MEMORY:
 		/*
 		 * For prefetchable resources, prefer the prefetchable
 		 * memory window, but fall back to the regular memory
 		 * window if that fails.  Try both windows before
 		 * attempting to grow a window in case the firmware
 		 * has used a range in the regular memory window to
 		 * map a prefetchable BAR.
 		 */
 		if (flags & RF_PREFETCHABLE) {
 			r = pcib_suballoc_resource(sc, &sc->pmem, child, type,
 			    rid, start, end, count, flags);
 			if (r != NULL)
 				break;
 		}
 		r = pcib_suballoc_resource(sc, &sc->mem, child, type, rid,
 		    start, end, count, flags);
 		if (r != NULL || (sc->flags & PCIB_SUBTRACTIVE) != 0)
 			break;
 		if (flags & RF_PREFETCHABLE) {
 			if (pcib_grow_window(sc, &sc->pmem, type, start, end,
 			    count, flags) == 0) {
 				r = pcib_suballoc_resource(sc, &sc->pmem, child,
 				    type, rid, start, end, count, flags);
 				if (r != NULL)
 					break;
 			}
 		}
 		if (pcib_grow_window(sc, &sc->mem, type, start, end, count,
 		    flags & ~RF_PREFETCHABLE) == 0)
 			r = pcib_suballoc_resource(sc, &sc->mem, child, type,
 			    rid, start, end, count, flags);
 		break;
 	default:
 		return (bus_generic_alloc_resource(dev, child, type, rid,
 		    start, end, count, flags));
 	}
 
 	/*
 	 * If attempts to suballocate from the window fail but this is a
 	 * subtractive bridge, pass the request up the tree.
 	 */
 	if (sc->flags & PCIB_SUBTRACTIVE && r == NULL)
 		return (bus_generic_alloc_resource(dev, child, type, rid,
 		    start, end, count, flags));
 	return (r);
 }
 
 int
 pcib_adjust_resource(device_t bus, device_t child, int type, struct resource *r,
     rman_res_t start, rman_res_t end)
 {
 	struct pcib_softc *sc;
 
 	sc = device_get_softc(bus);
 	if (pcib_is_resource_managed(sc, type, r))
 		return (rman_adjust_resource(r, start, end));
 	return (bus_generic_adjust_resource(bus, child, type, r, start, end));
 }
 
 int
 pcib_release_resource(device_t dev, device_t child, int type, int rid,
     struct resource *r)
 {
 	struct pcib_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	if (pcib_is_resource_managed(sc, type, r)) {
 		if (rman_get_flags(r) & RF_ACTIVE) {
 			error = bus_deactivate_resource(child, type, rid, r);
 			if (error)
 				return (error);
 		}
 		return (rman_release_resource(r));
 	}
 	return (bus_generic_release_resource(dev, child, type, rid, r));
 }
 #else
 /*
  * We have to trap resource allocation requests and ensure that the bridge
  * is set up to, or capable of handling them.
  */
 struct resource *
 pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
     rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
 {
 	struct pcib_softc	*sc = device_get_softc(dev);
 	const char *name, *suffix;
 	int ok;
 
 	/*
 	 * Fail the allocation for this range if it's not supported.
 	 */
 	name = device_get_nameunit(child);
 	if (name == NULL) {
 		name = "";
 		suffix = "";
 	} else
 		suffix = " ";
 	switch (type) {
 	case SYS_RES_IOPORT:
 		ok = 0;
 		if (!pcib_is_io_open(sc))
 			break;
 		ok = (start >= sc->iobase && end <= sc->iolimit);
 
 		/*
 		 * Make sure we allow access to VGA I/O addresses when the
 		 * bridge has the "VGA Enable" bit set.
 		 */
 		if (!ok && pci_is_vga_ioport_range(start, end))
 			ok = (sc->bridgectl & PCIB_BCR_VGA_ENABLE) ? 1 : 0;
 
 		if ((sc->flags & PCIB_SUBTRACTIVE) == 0) {
 			if (!ok) {
 				if (start < sc->iobase)
 					start = sc->iobase;
 				if (end > sc->iolimit)
 					end = sc->iolimit;
 				if (start < end)
 					ok = 1;
 			}
 		} else {
 			ok = 1;
 #if 0
 			/*
 			 * If we overlap with the subtractive range, then
 			 * pick the upper range to use.
 			 */
 			if (start < sc->iolimit && end > sc->iobase)
 				start = sc->iolimit + 1;
 #endif
 		}
 		if (end < start) {
 			device_printf(dev, "ioport: end (%jx) < start (%jx)\n",
 			    end, start);
 			start = 0;
 			end = 0;
 			ok = 0;
 		}
 		if (!ok) {
 			device_printf(dev, "%s%srequested unsupported I/O "
 			    "range 0x%jx-0x%jx (decoding 0x%x-0x%x)\n",
 			    name, suffix, start, end, sc->iobase, sc->iolimit);
 			return (NULL);
 		}
 		if (bootverbose)
 			device_printf(dev,
 			    "%s%srequested I/O range 0x%jx-0x%jx: in range\n",
 			    name, suffix, start, end);
 		break;
 
 	case SYS_RES_MEMORY:
 		ok = 0;
 		if (pcib_is_nonprefetch_open(sc))
 			ok = ok || (start >= sc->membase && end <= sc->memlimit);
 		if (pcib_is_prefetch_open(sc))
 			ok = ok || (start >= sc->pmembase && end <= sc->pmemlimit);
 
 		/*
 		 * Make sure we allow access to VGA memory addresses when the
 		 * bridge has the "VGA Enable" bit set.
 		 */
 		if (!ok && pci_is_vga_memory_range(start, end))
 			ok = (sc->bridgectl & PCIB_BCR_VGA_ENABLE) ? 1 : 0;
 
 		if ((sc->flags & PCIB_SUBTRACTIVE) == 0) {
 			if (!ok) {
 				ok = 1;
 				if (flags & RF_PREFETCHABLE) {
 					if (pcib_is_prefetch_open(sc)) {
 						if (start < sc->pmembase)
 							start = sc->pmembase;
 						if (end > sc->pmemlimit)
 							end = sc->pmemlimit;
 					} else {
 						ok = 0;
 					}
 				} else {	/* non-prefetchable */
 					if (pcib_is_nonprefetch_open(sc)) {
 						if (start < sc->membase)
 							start = sc->membase;
 						if (end > sc->memlimit)
 							end = sc->memlimit;
 					} else {
 						ok = 0;
 					}
 				}
 			}
 		} else if (!ok) {
 			ok = 1;	/* subtractive bridge: always ok */
 #if 0
 			if (pcib_is_nonprefetch_open(sc)) {
 				if (start < sc->memlimit && end > sc->membase)
 					start = sc->memlimit + 1;
 			}
 			if (pcib_is_prefetch_open(sc)) {
 				if (start < sc->pmemlimit && end > sc->pmembase)
 					start = sc->pmemlimit + 1;
 			}
 #endif
 		}
 		if (end < start) {
 			device_printf(dev, "memory: end (%jx) < start (%jx)\n",
 			    end, start);
 			start = 0;
 			end = 0;
 			ok = 0;
 		}
 		if (!ok && bootverbose)
 			device_printf(dev,
 			    "%s%srequested unsupported memory range %#jx-%#jx "
 			    "(decoding %#jx-%#jx, %#jx-%#jx)\n",
 			    name, suffix, start, end,
 			    (uintmax_t)sc->membase, (uintmax_t)sc->memlimit,
 			    (uintmax_t)sc->pmembase, (uintmax_t)sc->pmemlimit);
 		if (!ok)
 			return (NULL);
 		if (bootverbose)
 			device_printf(dev,"%s%srequested memory range "
 			    "0x%jx-0x%jx: good\n",
 			    name, suffix, start, end);
 		break;
 
 	default:
 		break;
 	}
 	/*
 	 * Bridge is OK decoding this resource, so pass it up.
 	 */
 	return (bus_generic_alloc_resource(dev, child, type, rid, start, end,
 	    count, flags));
 }
 #endif
 
 /*
  * If ARI is enabled on this downstream port, translate the function number
  * to the non-ARI slot/function.  The downstream port will convert it back in
  * hardware.  If ARI is not enabled slot and func are not modified.
  */
 static __inline void
 pcib_xlate_ari(device_t pcib, int bus, int *slot, int *func)
 {
 	struct pcib_softc *sc;
 	int ari_func;
 
 	sc = device_get_softc(pcib);
 	ari_func = *func;
 
 	if (sc->flags & PCIB_ENABLE_ARI) {
 		KASSERT(*slot == 0,
 		    ("Non-zero slot number with ARI enabled!"));
 		*slot = PCIE_ARI_SLOT(ari_func);
 		*func = PCIE_ARI_FUNC(ari_func);
 	}
 }
 
 
 static void
 pcib_enable_ari(struct pcib_softc *sc, uint32_t pcie_pos)
 {
 	uint32_t ctl2;
 
 	ctl2 = pci_read_config(sc->dev, pcie_pos + PCIER_DEVICE_CTL2, 4);
 	ctl2 |= PCIEM_CTL2_ARI;
 	pci_write_config(sc->dev, pcie_pos + PCIER_DEVICE_CTL2, ctl2, 4);
 
 	sc->flags |= PCIB_ENABLE_ARI;
 }
 
 /*
  * PCIB interface.
  */
 int
 pcib_maxslots(device_t dev)
 {
 	return (PCI_SLOTMAX);
 }
 
 static int
 pcib_ari_maxslots(device_t dev)
 {
 	struct pcib_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	if (sc->flags & PCIB_ENABLE_ARI)
 		return (PCIE_ARI_SLOTMAX);
 	else
 		return (PCI_SLOTMAX);
 }
 
 static int
 pcib_ari_maxfuncs(device_t dev)
 {
 	struct pcib_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	if (sc->flags & PCIB_ENABLE_ARI)
 		return (PCIE_ARI_FUNCMAX);
 	else
 		return (PCI_FUNCMAX);
 }
 
 static void
 pcib_ari_decode_rid(device_t pcib, uint16_t rid, int *bus, int *slot,
     int *func)
 {
 	struct pcib_softc *sc;
 
 	sc = device_get_softc(pcib);
 
 	*bus = PCI_RID2BUS(rid);
 	if (sc->flags & PCIB_ENABLE_ARI) {
 		*slot = PCIE_ARI_RID2SLOT(rid);
 		*func = PCIE_ARI_RID2FUNC(rid);
 	} else {
 		*slot = PCI_RID2SLOT(rid);
 		*func = PCI_RID2FUNC(rid);
 	}
 }
 
 /*
  * Since we are a child of a PCI bus, its parent must support the pcib interface.
  */
 static uint32_t
 pcib_read_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, int width)
 {
 #ifdef PCI_HP
 	struct pcib_softc *sc;
 
 	sc = device_get_softc(dev);
 	if (!pcib_present(sc)) {
 		switch (width) {
 		case 2:
 			return (0xffff);
 		case 1:
 			return (0xff);
 		default:
 			return (0xffffffff);
 		}
 	}
 #endif
 	pcib_xlate_ari(dev, b, &s, &f);
 	return(PCIB_READ_CONFIG(device_get_parent(device_get_parent(dev)), b, s,
 	    f, reg, width));
 }
 
 static void
 pcib_write_config(device_t dev, u_int b, u_int s, u_int f, u_int reg, uint32_t val, int width)
 {
 #ifdef PCI_HP
 	struct pcib_softc *sc;
 
 	sc = device_get_softc(dev);
 	if (!pcib_present(sc))
 		return;
 #endif
 	pcib_xlate_ari(dev, b, &s, &f);
 	PCIB_WRITE_CONFIG(device_get_parent(device_get_parent(dev)), b, s, f,
 	    reg, val, width);
 }
 
 /*
  * Route an interrupt across a PCI bridge.
  */
 int
 pcib_route_interrupt(device_t pcib, device_t dev, int pin)
 {
     device_t	bus;
     int		parent_intpin;
     int		intnum;
 
     /*
      *
      * The PCI standard defines a swizzle of the child-side device/intpin to
      * the parent-side intpin as follows.
      *
      * device = device on child bus
      * child_intpin = intpin on child bus slot (0-3)
      * parent_intpin = intpin on parent bus slot (0-3)
      *
      * parent_intpin = (device + child_intpin) % 4
      */
     parent_intpin = (pci_get_slot(dev) + (pin - 1)) % 4;
 
     /*
      * Our parent is a PCI bus.  Its parent must export the pcib interface
      * which includes the ability to route interrupts.
      */
     bus = device_get_parent(pcib);
     intnum = PCIB_ROUTE_INTERRUPT(device_get_parent(bus), pcib, parent_intpin + 1);
     if (PCI_INTERRUPT_VALID(intnum) && bootverbose) {
 	device_printf(pcib, "slot %d INT%c is routed to irq %d\n",
 	    pci_get_slot(dev), 'A' + pin - 1, intnum);
     }
     return(intnum);
 }
 
 /* Pass request to alloc MSI/MSI-X messages up to the parent bridge. */
 int
 pcib_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs)
 {
 	struct pcib_softc *sc = device_get_softc(pcib);
 	device_t bus;
 
 	if (sc->flags & PCIB_DISABLE_MSI)
 		return (ENXIO);
 	bus = device_get_parent(pcib);
 	return (PCIB_ALLOC_MSI(device_get_parent(bus), dev, count, maxcount,
 	    irqs));
 }
 
 /* Pass request to release MSI/MSI-X messages up to the parent bridge. */
 int
 pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
 {
 	device_t bus;
 
 	bus = device_get_parent(pcib);
 	return (PCIB_RELEASE_MSI(device_get_parent(bus), dev, count, irqs));
 }
 
 /* Pass request to alloc an MSI-X message up to the parent bridge. */
 int
 pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
 {
 	struct pcib_softc *sc = device_get_softc(pcib);
 	device_t bus;
 
 	if (sc->flags & PCIB_DISABLE_MSIX)
 		return (ENXIO);
 	bus = device_get_parent(pcib);
 	return (PCIB_ALLOC_MSIX(device_get_parent(bus), dev, irq));
 }
 
 /* Pass request to release an MSI-X message up to the parent bridge. */
 int
 pcib_release_msix(device_t pcib, device_t dev, int irq)
 {
 	device_t bus;
 
 	bus = device_get_parent(pcib);
 	return (PCIB_RELEASE_MSIX(device_get_parent(bus), dev, irq));
 }
 
 /* Pass request to map MSI/MSI-X message up to parent bridge. */
 int
 pcib_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr,
     uint32_t *data)
 {
 	device_t bus;
 	int error;
 
 	bus = device_get_parent(pcib);
 	error = PCIB_MAP_MSI(device_get_parent(bus), dev, irq, addr, data);
 	if (error)
 		return (error);
 
 	pci_ht_map_msi(pcib, *addr);
 	return (0);
 }
 
 /* Pass request for device power state up to parent bridge. */
 int
 pcib_power_for_sleep(device_t pcib, device_t dev, int *pstate)
 {
 	device_t bus;
 
 	bus = device_get_parent(pcib);
 	return (PCIB_POWER_FOR_SLEEP(bus, dev, pstate));
 }
 
 static int
 pcib_ari_enabled(device_t pcib)
 {
 	struct pcib_softc *sc;
 
 	sc = device_get_softc(pcib);
 
 	return ((sc->flags & PCIB_ENABLE_ARI) != 0);
 }
 
 static int
 pcib_ari_get_id(device_t pcib, device_t dev, enum pci_id_type type,
     uintptr_t *id)
 {
 	struct pcib_softc *sc;
 	device_t bus_dev;
 	uint8_t bus, slot, func;
 
 	if (type != PCI_ID_RID) {
 		bus_dev = device_get_parent(pcib);
 		return (PCIB_GET_ID(device_get_parent(bus_dev), dev, type, id));
 	}
 
 	sc = device_get_softc(pcib);
 
 	if (sc->flags & PCIB_ENABLE_ARI) {
 		bus = pci_get_bus(dev);
 		func = pci_get_function(dev);
 
 		*id = (PCI_ARI_RID(bus, func));
 	} else {
 		bus = pci_get_bus(dev);
 		slot = pci_get_slot(dev);
 		func = pci_get_function(dev);
 
 		*id = (PCI_RID(bus, slot, func));
 	}
 
 	return (0);
 }
 
 /*
  * Check that the downstream port (pcib) and the endpoint device (dev) both
  * support ARI.  If so, enable it and return 0, otherwise return an error.
  */
 static int
 pcib_try_enable_ari(device_t pcib, device_t dev)
 {
 	struct pcib_softc *sc;
 	int error;
 	uint32_t cap2;
 	int ari_cap_off;
 	uint32_t ari_ver;
 	uint32_t pcie_pos;
 
 	sc = device_get_softc(pcib);
 
 	/*
 	 * ARI is controlled in a register in the PCIe capability structure.
 	 * If the downstream port does not have the PCIe capability structure
 	 * then it does not support ARI.
 	 */
 	error = pci_find_cap(pcib, PCIY_EXPRESS, &pcie_pos);
 	if (error != 0)
 		return (ENODEV);
 
 	/* Check that the PCIe port advertises ARI support. */
 	cap2 = pci_read_config(pcib, pcie_pos + PCIER_DEVICE_CAP2, 4);
 	if (!(cap2 & PCIEM_CAP2_ARI))
 		return (ENODEV);
 
 	/*
 	 * Check that the endpoint device advertises ARI support via the ARI
 	 * extended capability structure.
 	 */
 	error = pci_find_extcap(dev, PCIZ_ARI, &ari_cap_off);
 	if (error != 0)
 		return (ENODEV);
 
 	/*
 	 * Finally, check that the endpoint device supports the same version
 	 * of ARI that we do.
 	 */
 	ari_ver = pci_read_config(dev, ari_cap_off, 4);
 	if (PCI_EXTCAP_VER(ari_ver) != PCIB_SUPPORTED_ARI_VER) {
 		if (bootverbose)
 			device_printf(pcib,
 			    "Unsupported version of ARI (%d) detected\n",
 			    PCI_EXTCAP_VER(ari_ver));
 
 		return (ENXIO);
 	}
 
 	pcib_enable_ari(sc, pcie_pos);
 
 	return (0);
 }
Index: user/alc/PQ_LAUNDRY/sys/geom/geom_disk.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/geom/geom_disk.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/geom/geom_disk.c	(revision 303642)
@@ -1,1003 +1,1010 @@
 /*-
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_geom.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/ctype.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/sbuf.h>
 #include <sys/devicestat.h>
 #include <machine/md_var.h>
 
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <geom/geom.h>
 #include <geom/geom_disk.h>
 #include <geom/geom_int.h>
 
 #include <dev/led/led.h>
 
 #include <machine/bus.h>
 
 struct g_disk_softc {
 	struct mtx		 done_mtx;
 	struct disk		*dp;
 	struct sysctl_ctx_list	sysctl_ctx;
 	struct sysctl_oid	*sysctl_tree;
 	char			led[64];
 	uint32_t		state;
 	struct mtx		 start_mtx;
 };
 
 static g_access_t g_disk_access;
 static g_start_t g_disk_start;
 static g_ioctl_t g_disk_ioctl;
 static g_dumpconf_t g_disk_dumpconf;
 static g_provgone_t g_disk_providergone;
 
 static struct g_class g_disk_class = {
 	.name = G_DISK_CLASS_NAME,
 	.version = G_VERSION,
 	.start = g_disk_start,
 	.access = g_disk_access,
 	.ioctl = g_disk_ioctl,
 	.providergone = g_disk_providergone,
 	.dumpconf = g_disk_dumpconf,
 };
 
 SYSCTL_DECL(_kern_geom);
 static SYSCTL_NODE(_kern_geom, OID_AUTO, disk, CTLFLAG_RW, 0,
     "GEOM_DISK stuff");
 
 DECLARE_GEOM_CLASS(g_disk_class, g_disk);
 
 static int
 g_disk_access(struct g_provider *pp, int r, int w, int e)
 {
 	struct disk *dp;
 	struct g_disk_softc *sc;
 	int error;
 
 	g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)",
 	    pp->name, r, w, e);
 	g_topology_assert();
 	sc = pp->private;
 	if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
 		/*
 		 * Allow decreasing access count even if disk is not
 		 * available anymore.
 		 */
 		if (r <= 0 && w <= 0 && e <= 0)
 			return (0);
 		return (ENXIO);
 	}
 	r += pp->acr;
 	w += pp->acw;
 	e += pp->ace;
 	error = 0;
 	if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
 		if (dp->d_open != NULL) {
 			error = dp->d_open(dp);
 			if (bootverbose && error != 0)
 				printf("Opened disk %s -> %d\n",
 				    pp->name, error);
 			if (error != 0)
 				return (error);
 		}
 		pp->sectorsize = dp->d_sectorsize;
 		if (dp->d_maxsize == 0) {
 			printf("WARNING: Disk drive %s%d has no d_maxsize\n",
 			    dp->d_name, dp->d_unit);
 			dp->d_maxsize = DFLTPHYS;
 		}
 		if (dp->d_delmaxsize == 0) {
 			if (bootverbose && dp->d_flags & DISKFLAG_CANDELETE) {
 				printf("WARNING: Disk drive %s%d has no "
 				    "d_delmaxsize\n", dp->d_name, dp->d_unit);
 			}
 			dp->d_delmaxsize = dp->d_maxsize;
 		}
 		pp->stripeoffset = dp->d_stripeoffset;
 		pp->stripesize = dp->d_stripesize;
 		dp->d_flags |= DISKFLAG_OPEN;
-		g_resize_provider(pp, dp->d_mediasize);
+		/*
+		 * Do not invoke resize event when initial size was zero.
+		 * Some disks report its size only after first opening.
+		 */
+		if (pp->mediasize == 0)
+			pp->mediasize = dp->d_mediasize;
+		else
+			g_resize_provider(pp, dp->d_mediasize);
 	} else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
 		if (dp->d_close != NULL) {
 			error = dp->d_close(dp);
 			if (error != 0)
 				printf("Closed disk %s -> %d\n",
 				    pp->name, error);
 		}
 		sc->state = G_STATE_ACTIVE;
 		if (sc->led[0] != 0)
 			led_set(sc->led, "0");
 		dp->d_flags &= ~DISKFLAG_OPEN;
 	}
 	return (error);
 }
 
 static void
 g_disk_kerneldump(struct bio *bp, struct disk *dp)
 {
 	struct g_kerneldump *gkd;
 	struct g_geom *gp;
 
 	gkd = (struct g_kerneldump*)bp->bio_data;
 	gp = bp->bio_to->geom;
 	g_trace(G_T_TOPOLOGY, "g_disk_kerneldump(%s, %jd, %jd)",
 		gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
 	if (dp->d_dump == NULL) {
 		g_io_deliver(bp, ENODEV);
 		return;
 	}
 	gkd->di.dumper = dp->d_dump;
 	gkd->di.priv = dp;
 	gkd->di.blocksize = dp->d_sectorsize;
 	gkd->di.maxiosize = dp->d_maxsize;
 	gkd->di.mediaoffset = gkd->offset;
 	if ((gkd->offset + gkd->length) > dp->d_mediasize)
 		gkd->length = dp->d_mediasize - gkd->offset;
 	gkd->di.mediasize = gkd->length;
 	g_io_deliver(bp, 0);
 }
 
 static void
 g_disk_setstate(struct bio *bp, struct g_disk_softc *sc)
 {
 	const char *cmd;
 
 	memcpy(&sc->state, bp->bio_data, sizeof(sc->state));
 	if (sc->led[0] != 0) {
 		switch (sc->state) {
 		case G_STATE_FAILED:
 			cmd = "1";
 			break;
 		case G_STATE_REBUILD:
 			cmd = "f5";
 			break;
 		case G_STATE_RESYNC:
 			cmd = "f1";
 			break;
 		default:
 			cmd = "0";
 			break;
 		}
 		led_set(sc->led, cmd);
 	}
 	g_io_deliver(bp, 0);
 }
 
 static void
 g_disk_done(struct bio *bp)
 {
 	struct bintime now;
 	struct bio *bp2;
 	struct g_disk_softc *sc;
 
 	/* See "notes" for why we need a mutex here */
 	/* XXX: will witness accept a mix of Giant/unGiant drivers here ? */
 	bp2 = bp->bio_parent;
 	sc = bp2->bio_to->private;
 	bp->bio_completed = bp->bio_length - bp->bio_resid;
 	binuptime(&now);
 	mtx_lock(&sc->done_mtx);
 	if (bp2->bio_error == 0)
 		bp2->bio_error = bp->bio_error;
 	bp2->bio_completed += bp->bio_completed;
 
 	switch (bp->bio_cmd) {
 	case BIO_ZONE:
 		bcopy(&bp->bio_zone, &bp2->bio_zone, sizeof(bp->bio_zone));
 		/*FALLTHROUGH*/
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 	case BIO_FLUSH:
 		devstat_end_transaction_bio_bt(sc->dp->d_devstat, bp, &now);
 		break;
 	default:
 		break;
 	}
 	bp2->bio_inbed++;
 	if (bp2->bio_children == bp2->bio_inbed) {
 		mtx_unlock(&sc->done_mtx);
 		bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed;
 		g_io_deliver(bp2, bp2->bio_error);
 	} else
 		mtx_unlock(&sc->done_mtx);
 	g_destroy_bio(bp);
 }
 
 static int
 g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td)
 {
 	struct disk *dp;
 	struct g_disk_softc *sc;
 	int error;
 
 	sc = pp->private;
 	dp = sc->dp;
 
 	if (dp->d_ioctl == NULL)
 		return (ENOIOCTL);
 	error = dp->d_ioctl(dp, cmd, data, fflag, td);
 	return (error);
 }
 
 static off_t
 g_disk_maxsize(struct disk *dp, struct bio *bp)
 {
 	if (bp->bio_cmd == BIO_DELETE)
 		return (dp->d_delmaxsize);
 	return (dp->d_maxsize);
 }
 
 static int
 g_disk_maxsegs(struct disk *dp, struct bio *bp)
 {
 	return ((g_disk_maxsize(dp, bp) / PAGE_SIZE) + 1);
 }
 
 static void
 g_disk_advance(struct disk *dp, struct bio *bp, off_t off)
 {
 
 	bp->bio_offset += off;
 	bp->bio_length -= off;
 
 	if ((bp->bio_flags & BIO_VLIST) != 0) {
 		bus_dma_segment_t *seg, *end;
 
 		seg = (bus_dma_segment_t *)bp->bio_data;
 		end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
 		off += bp->bio_ma_offset;
 		while (off >= seg->ds_len) {
 			KASSERT((seg != end),
 			    ("vlist request runs off the end"));
 			off -= seg->ds_len;
 			seg++;
 		}
 		bp->bio_ma_offset = off;
 		bp->bio_ma_n = end - seg;
 		bp->bio_data = (void *)seg;
 	} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 		bp->bio_ma += off / PAGE_SIZE;
 		bp->bio_ma_offset += off;
 		bp->bio_ma_offset %= PAGE_SIZE;
 		bp->bio_ma_n -= off / PAGE_SIZE;
 	} else {
 		bp->bio_data += off;
 	}
 }
 
 static void
 g_disk_seg_limit(bus_dma_segment_t *seg, off_t *poffset,
     off_t *plength, int *ppages)
 {
 	uintptr_t seg_page_base;
 	uintptr_t seg_page_end;
 	off_t offset;
 	off_t length;
 	int seg_pages;
 
 	offset = *poffset;
 	length = *plength;
 
 	if (length > seg->ds_len - offset)
 		length = seg->ds_len - offset;
 
 	seg_page_base = trunc_page(seg->ds_addr + offset);
 	seg_page_end  = round_page(seg->ds_addr + offset + length);
 	seg_pages = (seg_page_end - seg_page_base) >> PAGE_SHIFT;
 
 	if (seg_pages > *ppages) {
 		seg_pages = *ppages;
 		length = (seg_page_base + (seg_pages << PAGE_SHIFT)) -
 		    (seg->ds_addr + offset);
 	}
 
 	*poffset = 0;
 	*plength -= length;
 	*ppages -= seg_pages;
 }
 
 static off_t
 g_disk_vlist_limit(struct disk *dp, struct bio *bp, bus_dma_segment_t **pendseg)
 {
 	bus_dma_segment_t *seg, *end;
 	off_t residual;
 	off_t offset;
 	int pages;
 
 	seg = (bus_dma_segment_t *)bp->bio_data;
 	end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
 	residual = bp->bio_length;
 	offset = bp->bio_ma_offset;
 	pages = g_disk_maxsegs(dp, bp);
 	while (residual != 0 && pages != 0) {
 		KASSERT((seg != end),
 		    ("vlist limit runs off the end"));
 		g_disk_seg_limit(seg, &offset, &residual, &pages);
 		seg++;
 	}
 	if (pendseg != NULL)
 		*pendseg = seg;
 	return (residual);
 }
 
 static bool
 g_disk_limit(struct disk *dp, struct bio *bp)
 {
 	bool limited = false;
 	off_t maxsz;
 
 	maxsz = g_disk_maxsize(dp, bp);
 
 	/*
 	 * XXX: If we have a stripesize we should really use it here.
 	 *      Care should be taken in the delete case if this is done
 	 *      as deletes can be very sensitive to size given how they
 	 *      are processed.
 	 */
 	if (bp->bio_length > maxsz) {
 		bp->bio_length = maxsz;
 		limited = true;
 	}
 
 	if ((bp->bio_flags & BIO_VLIST) != 0) {
 		bus_dma_segment_t *firstseg, *endseg;
 		off_t residual;
 
 		firstseg = (bus_dma_segment_t*)bp->bio_data;
 		residual = g_disk_vlist_limit(dp, bp, &endseg);
 		if (residual != 0) {
 			bp->bio_ma_n = endseg - firstseg;
 			bp->bio_length -= residual;
 			limited = true;
 		}
 	} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 		bp->bio_ma_n =
 		    howmany(bp->bio_ma_offset + bp->bio_length, PAGE_SIZE);
 	}
 
 	return (limited);
 }
 
 static void
 g_disk_start(struct bio *bp)
 {
 	struct bio *bp2, *bp3;
 	struct disk *dp;
 	struct g_disk_softc *sc;
 	int error;
 	off_t off;
 
 	sc = bp->bio_to->private;
 	if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) {
 		g_io_deliver(bp, ENXIO);
 		return;
 	}
 	error = EJUSTRETURN;
 	switch(bp->bio_cmd) {
 	case BIO_DELETE:
 		if (!(dp->d_flags & DISKFLAG_CANDELETE)) {
 			error = EOPNOTSUPP;
 			break;
 		}
 		/* fall-through */
 	case BIO_READ:
 	case BIO_WRITE:
 		KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0 ||
 		    (bp->bio_flags & BIO_UNMAPPED) == 0,
 		    ("unmapped bio not supported by disk %s", dp->d_name));
 		off = 0;
 		bp3 = NULL;
 		bp2 = g_clone_bio(bp);
 		if (bp2 == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		for (;;) {
 			if (g_disk_limit(dp, bp2)) {
 				off += bp2->bio_length;
 
 				/*
 				 * To avoid a race, we need to grab the next bio
 				 * before we schedule this one.  See "notes".
 				 */
 				bp3 = g_clone_bio(bp);
 				if (bp3 == NULL)
 					bp->bio_error = ENOMEM;
 			}
 			bp2->bio_done = g_disk_done;
 			bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize;
 			bp2->bio_bcount = bp2->bio_length;
 			bp2->bio_disk = dp;
 			mtx_lock(&sc->start_mtx); 
 			devstat_start_transaction_bio(dp->d_devstat, bp2);
 			mtx_unlock(&sc->start_mtx); 
 			dp->d_strategy(bp2);
 
 			if (bp3 == NULL)
 				break;
 
 			bp2 = bp3;
 			bp3 = NULL;
 			g_disk_advance(dp, bp2, off);
 		}
 		break;
 	case BIO_GETATTR:
 		/* Give the driver a chance to override */
 		if (dp->d_getattr != NULL) {
 			if (bp->bio_disk == NULL)
 				bp->bio_disk = dp;
 			error = dp->d_getattr(bp);
 			if (error != -1)
 				break;
 			error = EJUSTRETURN;
 		}
 		if (g_handleattr_int(bp, "GEOM::candelete",
 		    (dp->d_flags & DISKFLAG_CANDELETE) != 0))
 			break;
 		else if (g_handleattr_int(bp, "GEOM::fwsectors",
 		    dp->d_fwsectors))
 			break;
 		else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads))
 			break;
 		else if (g_handleattr_off_t(bp, "GEOM::frontstuff", 0))
 			break;
 		else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident))
 			break;
 		else if (g_handleattr_uint16_t(bp, "GEOM::hba_vendor",
 		    dp->d_hba_vendor))
 			break;
 		else if (g_handleattr_uint16_t(bp, "GEOM::hba_device",
 		    dp->d_hba_device))
 			break;
 		else if (g_handleattr_uint16_t(bp, "GEOM::hba_subvendor",
 		    dp->d_hba_subvendor))
 			break;
 		else if (g_handleattr_uint16_t(bp, "GEOM::hba_subdevice",
 		    dp->d_hba_subdevice))
 			break;
 		else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
 			g_disk_kerneldump(bp, dp);
 		else if (!strcmp(bp->bio_attribute, "GEOM::setstate"))
 			g_disk_setstate(bp, sc);
 		else if (g_handleattr_uint16_t(bp, "GEOM::rotation_rate",
 		    dp->d_rotation_rate))
 			break;
 		else 
 			error = ENOIOCTL;
 		break;
 	case BIO_FLUSH:
 		g_trace(G_T_BIO, "g_disk_flushcache(%s)",
 		    bp->bio_to->name);
 		if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) {
 			error = EOPNOTSUPP;
 			break;
 		}
 		/*FALLTHROUGH*/
 	case BIO_ZONE:
 		if (bp->bio_cmd == BIO_ZONE) {
 			if (!(dp->d_flags & DISKFLAG_CANZONE)) {
 				error = EOPNOTSUPP;
 				break;
 			}
 			g_trace(G_T_BIO, "g_disk_zone(%s)",
 			    bp->bio_to->name);
 		}
 		bp2 = g_clone_bio(bp);
 		if (bp2 == NULL) {
 			g_io_deliver(bp, ENOMEM);
 			return;
 		}
 		bp2->bio_done = g_disk_done;
 		bp2->bio_disk = dp;
 		mtx_lock(&sc->start_mtx);
 		devstat_start_transaction_bio(dp->d_devstat, bp2);
 		mtx_unlock(&sc->start_mtx);
 		dp->d_strategy(bp2);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	if (error != EJUSTRETURN)
 		g_io_deliver(bp, error);
 	return;
 }
 
 static void
 g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
 {
 	struct bio *bp;
 	struct disk *dp;
 	struct g_disk_softc *sc;
 	char *buf;
 	int res = 0;
 
 	sc = gp->softc;
 	if (sc == NULL || (dp = sc->dp) == NULL)
 		return;
 	if (indent == NULL) {
 		sbuf_printf(sb, " hd %u", dp->d_fwheads);
 		sbuf_printf(sb, " sc %u", dp->d_fwsectors);
 		return;
 	}
 	if (pp != NULL) {
 		sbuf_printf(sb, "%s<fwheads>%u</fwheads>\n",
 		    indent, dp->d_fwheads);
 		sbuf_printf(sb, "%s<fwsectors>%u</fwsectors>\n",
 		    indent, dp->d_fwsectors);
 
 		/*
 		 * "rotationrate" is a little complicated, because the value
 		 * returned by the drive might not be the RPM; 0 and 1 are
 		 * special cases, and there's also a valid range.
 		 */
 		sbuf_printf(sb, "%s<rotationrate>", indent);
 		if (dp->d_rotation_rate == 0)		/* Old drives don't */
 			sbuf_printf(sb, "unknown");	/* report RPM. */
 		else if (dp->d_rotation_rate == 1)	/* Since 0 is used */
 			sbuf_printf(sb, "0");		/* above, SSDs use 1. */
 		else if ((dp->d_rotation_rate >= 0x041) &&
 		    (dp->d_rotation_rate <= 0xfffe))
 			sbuf_printf(sb, "%u", dp->d_rotation_rate);
 		else
 			sbuf_printf(sb, "invalid");
 		sbuf_printf(sb, "</rotationrate>\n");
 		if (dp->d_getattr != NULL) {
 			buf = g_malloc(DISK_IDENT_SIZE, M_WAITOK);
 			bp = g_alloc_bio();
 			bp->bio_disk = dp;
 			bp->bio_attribute = "GEOM::ident";
 			bp->bio_length = DISK_IDENT_SIZE;
 			bp->bio_data = buf;
 			res = dp->d_getattr(bp);
 			sbuf_printf(sb, "%s<ident>", indent);
 			g_conf_printf_escaped(sb, "%s",
 			    res == 0 ? buf: dp->d_ident);
 			sbuf_printf(sb, "</ident>\n");
 			bp->bio_attribute = "GEOM::lunid";
 			bp->bio_length = DISK_IDENT_SIZE;
 			bp->bio_data = buf;
 			if (dp->d_getattr(bp) == 0) {
 				sbuf_printf(sb, "%s<lunid>", indent);
 				g_conf_printf_escaped(sb, "%s", buf);
 				sbuf_printf(sb, "</lunid>\n");
 			}
 			bp->bio_attribute = "GEOM::lunname";
 			bp->bio_length = DISK_IDENT_SIZE;
 			bp->bio_data = buf;
 			if (dp->d_getattr(bp) == 0) {
 				sbuf_printf(sb, "%s<lunname>", indent);
 				g_conf_printf_escaped(sb, "%s", buf);
 				sbuf_printf(sb, "</lunname>\n");
 			}
 			g_destroy_bio(bp);
 			g_free(buf);
 		} else {
 			sbuf_printf(sb, "%s<ident>", indent);
 			g_conf_printf_escaped(sb, "%s", dp->d_ident);
 			sbuf_printf(sb, "</ident>\n");
 		}
 		sbuf_printf(sb, "%s<descr>", indent);
 		g_conf_printf_escaped(sb, "%s", dp->d_descr);
 		sbuf_printf(sb, "</descr>\n");
 	}
 }
 
 static void
 g_disk_resize(void *ptr, int flag)
 {
 	struct disk *dp;
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	if (flag == EV_CANCEL)
 		return;
 	g_topology_assert();
 
 	dp = ptr;
 	gp = dp->d_geom;
 
 	if (dp->d_destroyed || gp == NULL)
 		return;
 
 	LIST_FOREACH(pp, &gp->provider, provider) {
 		if (pp->sectorsize != 0 &&
 		    pp->sectorsize != dp->d_sectorsize)
 			g_wither_provider(pp, ENXIO);
 		else
 			g_resize_provider(pp, dp->d_mediasize);
 	}
 }
 
 static void
 g_disk_create(void *arg, int flag)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct disk *dp;
 	struct g_disk_softc *sc;
 	char tmpstr[80];
 
 	if (flag == EV_CANCEL)
 		return;
 	g_topology_assert();
 	dp = arg;
 
 	mtx_pool_lock(mtxpool_sleep, dp);
 	dp->d_init_level = DISK_INIT_START;
 
 	/*
 	 * If the disk has already gone away, we can just stop here and
 	 * call the user's callback to tell him we've cleaned things up.
 	 */
 	if (dp->d_goneflag != 0) {
 		mtx_pool_unlock(mtxpool_sleep, dp);
 		if (dp->d_gone != NULL)
 			dp->d_gone(dp);
 		return;
 	}
 	mtx_pool_unlock(mtxpool_sleep, dp);
 
 	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
 	mtx_init(&sc->start_mtx, "g_disk_start", NULL, MTX_DEF);
 	mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF);
 	sc->dp = dp;
 	gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit);
 	gp->softc = sc;
 	pp = g_new_providerf(gp, "%s", gp->name);
 	devstat_remove_entry(pp->stat);
 	pp->stat = NULL;
 	dp->d_devstat->id = pp;
 	pp->mediasize = dp->d_mediasize;
 	pp->sectorsize = dp->d_sectorsize;
 	pp->stripeoffset = dp->d_stripeoffset;
 	pp->stripesize = dp->d_stripesize;
 	if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0)
 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
 	if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0)
 		pp->flags |= G_PF_DIRECT_SEND;
 	pp->flags |= G_PF_DIRECT_RECEIVE;
 	if (bootverbose)
 		printf("GEOM: new disk %s\n", gp->name);
 	sysctl_ctx_init(&sc->sysctl_ctx);
 	snprintf(tmpstr, sizeof(tmpstr), "GEOM disk %s", gp->name);
 	sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
 		SYSCTL_STATIC_CHILDREN(_kern_geom_disk), OID_AUTO, gp->name,
 		CTLFLAG_RD, 0, tmpstr);
 	if (sc->sysctl_tree != NULL) {
 		SYSCTL_ADD_STRING(&sc->sysctl_ctx,
 		    SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "led",
 		    CTLFLAG_RWTUN, sc->led, sizeof(sc->led),
 		    "LED name");
 	}
 	pp->private = sc;
 	dp->d_geom = gp;
 	g_error_provider(pp, 0);
 
 	mtx_pool_lock(mtxpool_sleep, dp);
 	dp->d_init_level = DISK_INIT_DONE;
 
 	/*
 	 * If the disk has gone away at this stage, start the withering
 	 * process for it.
 	 */
 	if (dp->d_goneflag != 0) {
 		mtx_pool_unlock(mtxpool_sleep, dp);
 		g_wither_provider(pp, ENXIO);
 		return;
 	}
 	mtx_pool_unlock(mtxpool_sleep, dp);
 
 }
 
 /*
  * We get this callback after all of the consumers have gone away, and just
  * before the provider is freed.  If the disk driver provided a d_gone
  * callback, let them know that it is okay to free resources -- they won't
  * be getting any more accesses from GEOM.
  */
 static void
 g_disk_providergone(struct g_provider *pp)
 {
 	struct disk *dp;
 	struct g_disk_softc *sc;
 
 	sc = (struct g_disk_softc *)pp->private;
 	dp = sc->dp;
 	if (dp != NULL && dp->d_gone != NULL)
 		dp->d_gone(dp);
 	if (sc->sysctl_tree != NULL) {
 		sysctl_ctx_free(&sc->sysctl_ctx);
 		sc->sysctl_tree = NULL;
 	}
 	if (sc->led[0] != 0) {
 		led_set(sc->led, "0");
 		sc->led[0] = 0;
 	}
 	pp->private = NULL;
 	pp->geom->softc = NULL;
 	mtx_destroy(&sc->done_mtx);
 	mtx_destroy(&sc->start_mtx);
 	g_free(sc);
 }
 
 static void
 g_disk_destroy(void *ptr, int flag)
 {
 	struct disk *dp;
 	struct g_geom *gp;
 	struct g_disk_softc *sc;
 
 	g_topology_assert();
 	dp = ptr;
 	gp = dp->d_geom;
 	if (gp != NULL) {
 		sc = gp->softc;
 		if (sc != NULL)
 			sc->dp = NULL;
 		dp->d_geom = NULL;
 		g_wither_geom(gp, ENXIO);
 	}
 
 	g_free(dp);
 }
 
 /*
  * We only allow printable characters in disk ident,
  * the rest is converted to 'x<HH>'.
  */
 static void
 g_disk_ident_adjust(char *ident, size_t size)
 {
 	char *p, tmp[4], newid[DISK_IDENT_SIZE];
 
 	newid[0] = '\0';
 	for (p = ident; *p != '\0'; p++) {
 		if (isprint(*p)) {
 			tmp[0] = *p;
 			tmp[1] = '\0';
 		} else {
 			snprintf(tmp, sizeof(tmp), "x%02hhx",
 			    *(unsigned char *)p);
 		}
 		if (strlcat(newid, tmp, sizeof(newid)) >= sizeof(newid))
 			break;
 	}
 	bzero(ident, size);
 	strlcpy(ident, newid, size);
 }
 
 struct disk *
 disk_alloc(void)
 {
 
 	return (g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO));
 }
 
 void
 disk_create(struct disk *dp, int version)
 {
 
 	if (version != DISK_VERSION) {
 		printf("WARNING: Attempt to add disk %s%d %s",
 		    dp->d_name, dp->d_unit,
 		    " using incompatible ABI version of disk(9)\n");
 		printf("WARNING: Ignoring disk %s%d\n",
 		    dp->d_name, dp->d_unit);
 		return;
 	}
 	if (dp->d_flags & DISKFLAG_RESERVED) {
 		printf("WARNING: Attempt to add non-MPSAFE disk %s%d\n",
 		    dp->d_name, dp->d_unit);
 		printf("WARNING: Ignoring disk %s%d\n",
 		    dp->d_name, dp->d_unit);
 		return;
 	}
 	KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy"));
 	KASSERT(dp->d_name != NULL, ("disk_create need d_name"));
 	KASSERT(*dp->d_name != 0, ("disk_create need d_name"));
 	KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long"));
 	if (dp->d_devstat == NULL)
 		dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit,
 		    dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
 		    DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
 	dp->d_geom = NULL;
 
 	dp->d_init_level = DISK_INIT_NONE;
 
 	g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident));
 	g_post_event(g_disk_create, dp, M_WAITOK, dp, NULL);
 }
 
 void
 disk_destroy(struct disk *dp)
 {
 
 	g_cancel_event(dp);
 	dp->d_destroyed = 1;
 	if (dp->d_devstat != NULL)
 		devstat_remove_entry(dp->d_devstat);
 	g_post_event(g_disk_destroy, dp, M_WAITOK, NULL);
 }
 
 void
 disk_gone(struct disk *dp)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	mtx_pool_lock(mtxpool_sleep, dp);
 	dp->d_goneflag = 1;
 
 	/*
 	 * If we're still in the process of creating this disk (the
 	 * g_disk_create() function is still queued, or is in
 	 * progress), the init level will not yet be DISK_INIT_DONE.
 	 *
 	 * If that is the case, g_disk_create() will see d_goneflag
 	 * and take care of cleaning things up.
 	 *
 	 * If the disk has already been created, we default to
 	 * withering the provider as usual below.
 	 *
 	 * If the caller has not set a d_gone() callback, he will
 	 * not be any worse off by returning here, because the geom
 	 * has not been fully setup in any case.
 	 */
 	if (dp->d_init_level < DISK_INIT_DONE) {
 		mtx_pool_unlock(mtxpool_sleep, dp);
 		return;
 	}
 	mtx_pool_unlock(mtxpool_sleep, dp);
 
 	gp = dp->d_geom;
 	if (gp != NULL) {
 		pp = LIST_FIRST(&gp->provider);
 		if (pp != NULL) {
 			KASSERT(LIST_NEXT(pp, provider) == NULL,
 			    ("geom %p has more than one provider", gp));
 			g_wither_provider(pp, ENXIO);
 		}
 	}
 }
 
 void
 disk_attr_changed(struct disk *dp, const char *attr, int flag)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 	char devnamebuf[128];
 
 	gp = dp->d_geom;
 	if (gp != NULL)
 		LIST_FOREACH(pp, &gp->provider, provider)
 			(void)g_attr_changed(pp, attr, flag);
 	snprintf(devnamebuf, sizeof(devnamebuf), "devname=%s%d", dp->d_name,
 	    dp->d_unit);
 	devctl_notify("GEOM", "disk", attr, devnamebuf);
 }
 
 void
 disk_media_changed(struct disk *dp, int flag)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	gp = dp->d_geom;
 	if (gp != NULL) {
 		pp = LIST_FIRST(&gp->provider);
 		if (pp != NULL) {
 			KASSERT(LIST_NEXT(pp, provider) == NULL,
 			    ("geom %p has more than one provider", gp));
 			g_media_changed(pp, flag);
 		}
 	}
 }
 
 void
 disk_media_gone(struct disk *dp, int flag)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	gp = dp->d_geom;
 	if (gp != NULL) {
 		pp = LIST_FIRST(&gp->provider);
 		if (pp != NULL) {
 			KASSERT(LIST_NEXT(pp, provider) == NULL,
 			    ("geom %p has more than one provider", gp));
 			g_media_gone(pp, flag);
 		}
 	}
 }
 
 int
 disk_resize(struct disk *dp, int flag)
 {
 
 	if (dp->d_destroyed || dp->d_geom == NULL)
 		return (0);
 
 	return (g_post_event(g_disk_resize, dp, flag, NULL));
 }
 
 static void
 g_kern_disks(void *p, int flag __unused)
 {
 	struct sbuf *sb;
 	struct g_geom *gp;
 	char *sp;
 
 	sb = p;
 	sp = "";
 	g_topology_assert();
 	LIST_FOREACH(gp, &g_disk_class.geom, geom) {
 		sbuf_printf(sb, "%s%s", sp, gp->name);
 		sp = " ";
 	}
 	sbuf_finish(sb);
 }
 
 static int
 sysctl_disks(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct sbuf *sb;
 
 	sb = sbuf_new_auto();
 	g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL);
 	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 	sbuf_delete(sb);
 	return error;
 }
  
 SYSCTL_PROC(_kern, OID_AUTO, disks,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_disks, "A", "names of available disks");
Index: user/alc/PQ_LAUNDRY/sys/kern/kern_mutex.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/kern/kern_mutex.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/kern/kern_mutex.c	(revision 303642)
@@ -1,1047 +1,1047 @@
 /*-
  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
  */
 
 /*
  * Machine independent bits of mutex implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_adaptive_mutexes.h"
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/turnstile.h>
 #include <sys/vmmeter.h>
 #include <sys/lock_profile.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cpu.h>
 
 #include <ddb/ddb.h>
 
 #include <fs/devfs/devfs_int.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
 #define	ADAPTIVE_MUTEXES
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DEFINE( , , lock, failed);
 #endif
 
 /*
  * Return the mutex address when the lock cookie address is provided.
  * This functionality assumes that struct mtx* have a member named mtx_lock.
  */
 #define	mtxlock2mtx(c)	(__containerof(c, struct mtx, mtx_lock))
 
 /*
  * Internal utility macros.
  */
 #define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
 
 #define	mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED)
 
 #define	mtx_owner(m)	((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK))
 
 static void	assert_mtx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_mtx(const struct lock_object *lock);
 #endif
 static void	lock_mtx(struct lock_object *lock, uintptr_t how);
 static void	lock_spin(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_mtx(const struct lock_object *lock,
 		    struct thread **owner);
 #endif
 static uintptr_t unlock_mtx(struct lock_object *lock);
 static uintptr_t unlock_spin(struct lock_object *lock);
 
 /*
  * Lock classes for sleep and spin mutexes.
  */
 struct lock_class lock_class_mtx_sleep = {
 	.lc_name = "sleep mutex",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
 	.lc_assert = assert_mtx,
 #ifdef DDB
 	.lc_ddb_show = db_show_mtx,
 #endif
 	.lc_lock = lock_mtx,
 	.lc_unlock = unlock_mtx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_mtx,
 #endif
 };
 struct lock_class lock_class_mtx_spin = {
 	.lc_name = "spin mutex",
 	.lc_flags = LC_SPINLOCK | LC_RECURSABLE,
 	.lc_assert = assert_mtx,
 #ifdef DDB
 	.lc_ddb_show = db_show_mtx,
 #endif
 	.lc_lock = lock_spin,
 	.lc_unlock = unlock_spin,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_mtx,
 #endif
 };
 
 /*
  * System-wide mutexes
  */
 struct mtx blocked_lock;
 struct mtx Giant;
 
 void
 assert_mtx(const struct lock_object *lock, int what)
 {
 
 	mtx_assert((const struct mtx *)lock, what);
 }
 
 void
 lock_mtx(struct lock_object *lock, uintptr_t how)
 {
 
 	mtx_lock((struct mtx *)lock);
 }
 
 void
 lock_spin(struct lock_object *lock, uintptr_t how)
 {
 
 	panic("spin locks can only use msleep_spin");
 }
 
 uintptr_t
 unlock_mtx(struct lock_object *lock)
 {
 	struct mtx *m;
 
 	m = (struct mtx *)lock;
 	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
 	mtx_unlock(m);
 	return (0);
 }
 
 uintptr_t
 unlock_spin(struct lock_object *lock)
 {
 
 	panic("spin locks can only use msleep_spin");
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_mtx(const struct lock_object *lock, struct thread **owner)
 {
 	const struct mtx *m = (const struct mtx *)lock;
 
 	*owner = mtx_owner(m);
 	return (mtx_unowned(m) == 0);
 }
 #endif
 
 /*
  * Function versions of the inlined __mtx_* macros.  These are used by
  * modules and can also be called from assembly language if needed.
  */
 void
 __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
 	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 	WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) |
 	    LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 
 	__mtx_lock(m, curthread, opts, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE,
 	    file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 void
 __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 
 	__mtx_unlock(m, curthread, opts, file, line);
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	if (mtx_owned(m))
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0,
 	    ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 	opts &= ~MTX_RECURSE;
 	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
 	    file, line, NULL);
 	__mtx_lock_spin(m, curthread, opts, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 }
 
 int
 __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_trylock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	KASSERT((opts & MTX_RECURSE) == 0,
 	    ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n",
 	    m->lock_object.lo_name, file, line));
 	if (__mtx_trylock_spin(m, curthread, opts, file, line)) {
 		LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line);
 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 		return (1);
 	}
 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line);
 	return (0);
 }
 
 void
 __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 
 	__mtx_unlock_spin(m);
 }
 
 /*
  * The important part of mtx_trylock{,_flags}()
  * Tries to acquire lock `m.'  If this function is called on a mutex that
  * is already owned, it will recursively acquire the lock.
  */
 int
 _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int rval;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
 	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 
 	if (mtx_owned(m) && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 	    (opts & MTX_RECURSE) != 0)) {
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		rval = 1;
 	} else
 		rval = _mtx_obtain_lock(m, (uintptr_t)curthread);
 	opts &= ~MTX_RECURSE;
 
 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		TD_LOCKS_INC(curthread);
 		if (m->mtx_recurse == 0)
 			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,
 			    m, contested, waittime, file, line);
 
 	}
 
 	return (rval);
 }
 
 /*
  * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
  *
  * We call this if the lock is either contested (i.e. we need to go to
  * sleep waiting for it), or if we need to recurse on it.
  */
 void
 __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
     const char *file, int line)
 {
 	struct mtx *m;
 	struct turnstile *ts;
 	uintptr_t v;
 #ifdef ADAPTIVE_MUTEXES
 	volatile struct thread *owner;
 #endif
 #ifdef KTR
 	int cont_logged = 0;
 #endif
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
-	uint64_t spin_cnt = 0;
-	uint64_t sleep_cnt = 0;
+	u_int spin_cnt = 0;
+	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	if (mtx_owned(m)) {
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0,
 	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 		opts &= ~MTX_RECURSE;
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
 			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
 		return;
 	}
 	opts &= ~MTX_RECURSE;
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object,
 		    &contested, &waittime);
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR4(KTR_LOCK,
 		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
 		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
 #ifdef KDTRACE_HOOKS
 	all_time -= lockstat_nsecs(&m->lock_object);
 #endif
 
 	for (;;) {
 		if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
 			break;
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
 #ifdef ADAPTIVE_MUTEXES
 		/*
 		 * If the owner is running on another CPU, spin until the
 		 * owner stops running or the state of the lock changes.
 		 */
 		v = m->mtx_lock;
 		if (v != MTX_UNOWNED) {
 			owner = (struct thread *)(v & ~MTX_FLAGMASK);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&m->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, m, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname((struct thread *)tid),
 				    "spinning", "lockname:\"%s\"",
 				    m->lock_object.lo_name);
 				while (mtx_owner(m) == owner &&
 				    TD_IS_RUNNING(owner)) {
 					cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 					spin_cnt++;
 #endif
 				}
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname((struct thread *)tid),
 				    "running");
 				continue;
 			}
 		}
 #endif
 
 		ts = turnstile_trywait(&m->lock_object);
 		v = m->mtx_lock;
 
 		/*
 		 * Check if the lock has been released while spinning for
 		 * the turnstile chain lock.
 		 */
 		if (v == MTX_UNOWNED) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 #ifdef ADAPTIVE_MUTEXES
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		owner = (struct thread *)(v & ~MTX_FLAGMASK);
 		if (TD_IS_RUNNING(owner)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 #endif
 
 		/*
 		 * If the mutex isn't already contested and a failure occurs
 		 * setting the contested bit, the mutex was either released
 		 * or the state of the MTX_RECURSED bit changed.
 		 */
 		if ((v & MTX_CONTESTED) == 0 &&
 		    !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 		/*
 		 * We definitely must sleep for this lock.
 		 */
 		mtx_assert(m, MA_NOTOWNED);
 
 #ifdef KTR
 		if (!cont_logged) {
 			CTR6(KTR_CONTENTION,
 			    "contention: %p at %s:%d wants %s, taken by %s:%d",
 			    (void *)tid, file, line, m->lock_object.lo_name,
 			    WITNESS_FILE(&m->lock_object),
 			    WITNESS_LINE(&m->lock_object));
 			cont_logged = 1;
 		}
 #endif
 
 		/*
 		 * Block on the turnstile.
 		 */
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&m->lock_object);
 #endif
 		turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&m->lock_object);
 		sleep_cnt++;
 #endif
 	}
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&m->lock_object);
 #endif
 #ifdef KTR
 	if (cont_logged) {
 		CTR4(KTR_CONTENTION,
 		    "contention end: %s acquired by %p at %s:%d",
 		    m->lock_object.lo_name, (void *)tid, file, line);
 	}
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested,
 	    waittime, file, line);
 #ifdef KDTRACE_HOOKS
 	if (sleep_time)
 		LOCKSTAT_RECORD1(adaptive__block, m, sleep_time);
 
 	/*
 	 * Only record the loops spinning and not sleeping. 
 	 */
 	if (spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time);
 #endif
 }
 
 static void
 _mtx_lock_spin_failed(struct mtx *m)
 {
 	struct thread *td;
 
 	td = mtx_owner(m);
 
 	/* If the mutex is unlocked, try again. */
 	if (td == NULL)
 		return;
 
 	printf( "spin lock %p (%s) held by %p (tid %d) too long\n",
 	    m, m->lock_object.lo_name, td, td->td_tid);
 #ifdef WITNESS
 	witness_display_spinlock(&m->lock_object, td, printf);
 #endif
 	panic("spin lock held too long");
 }
 
 #ifdef SMP
 /*
  * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock.
  *
  * This is only called if we need to actually spin for the lock. Recursion
  * is handled inline.
  */
 void
 _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
     const char *file, int line)
 {
 	struct mtx *m;
 	int i = 0;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
 	int64_t spin_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
 	    "spinning", "lockname:\"%s\"", m->lock_object.lo_name);
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
 #ifdef KDTRACE_HOOKS
 	spin_time -= lockstat_nsecs(&m->lock_object);
 #endif
 	for (;;) {
 		if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
 			break;
 		/* Give interrupts a chance while we spin. */
 		spinlock_exit();
 		while (m->mtx_lock != MTX_UNOWNED) {
 			if (i++ < 10000000) {
 				cpu_spinwait();
 				continue;
 			}
 			if (i < 60000000 || kdb_active || panicstr != NULL)
 				DELAY(1);
 			else
 				_mtx_lock_spin_failed(m);
 			cpu_spinwait();
 		}
 		spinlock_enter();
 	}
 #ifdef KDTRACE_HOOKS
 	spin_time += lockstat_nsecs(&m->lock_object);
 #endif
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
 	KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
 	    "running");
 
 #ifdef KDTRACE_HOOKS
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
 	    contested, waittime, file, line);
 	if (spin_time != 0)
 		LOCKSTAT_RECORD1(spin__spin, m, spin_time);
 #endif
 }
 #endif /* SMP */
 
 void
 thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	uintptr_t tid;
 	int i;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
 	int64_t spin_time = 0;
 #endif
 
 	i = 0;
 	tid = (uintptr_t)curthread;
 
 	if (SCHEDULER_STOPPED()) {
 		/*
 		 * Ensure that spinlock sections are balanced even when the
 		 * scheduler is stopped, since we may otherwise inadvertently
 		 * re-enable interrupts while dumping core.
 		 */
 		spinlock_enter();
 		return;
 	}
 
 #ifdef KDTRACE_HOOKS
 	spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
 #endif
 	for (;;) {
 retry:
 		spinlock_enter();
 		m = td->td_lock;
 		KASSERT(m->mtx_lock != MTX_DESTROYED,
 		    ("thread_lock() of destroyed mutex @ %s:%d", file, line));
 		KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 		    ("thread_lock() of sleep mutex %s @ %s:%d",
 		    m->lock_object.lo_name, file, line));
 		if (mtx_owned(m))
 			KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
 	    ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n",
 			    m->lock_object.lo_name, file, line));
 		WITNESS_CHECKORDER(&m->lock_object,
 		    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 		for (;;) {
 			if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
 				break;
 			if (m->mtx_lock == tid) {
 				m->mtx_recurse++;
 				break;
 			}
 #ifdef HWPMC_HOOKS
 			PMC_SOFT_CALL( , , lock, failed);
 #endif
 			lock_profile_obtain_lock_failed(&m->lock_object,
 			    &contested, &waittime);
 			/* Give interrupts a chance while we spin. */
 			spinlock_exit();
 			while (m->mtx_lock != MTX_UNOWNED) {
 				if (i++ < 10000000)
 					cpu_spinwait();
 				else if (i < 60000000 ||
 				    kdb_active || panicstr != NULL)
 					DELAY(1);
 				else
 					_mtx_lock_spin_failed(m);
 				cpu_spinwait();
 				if (m != td->td_lock)
 					goto retry;
 			}
 			spinlock_enter();
 		}
 		if (m == td->td_lock)
 			break;
 		__mtx_unlock_spin(m);	/* does spinlock_exit() */
 	}
 #ifdef KDTRACE_HOOKS
 	spin_time += lockstat_nsecs(&m->lock_object);
 #endif
 	if (m->mtx_recurse == 0)
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
 		    contested, waittime, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 #ifdef KDTRACE_HOOKS
 	if (spin_time != 0)
 		LOCKSTAT_RECORD1(thread__spin, m, spin_time);
 #endif
 }
 
 struct mtx *
 thread_lock_block(struct thread *td)
 {
 	struct mtx *lock;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
 	td->td_lock = &blocked_lock;
 	mtx_unlock_spin(lock);
 
 	return (lock);
 }
 
 void
 thread_lock_unblock(struct thread *td, struct mtx *new)
 {
 	mtx_assert(new, MA_OWNED);
 	MPASS(td->td_lock == &blocked_lock);
 	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
 }
 
 void
 thread_lock_set(struct thread *td, struct mtx *new)
 {
 	struct mtx *lock;
 
 	mtx_assert(new, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
 	td->td_lock = new;
 	mtx_unlock_spin(lock);
 }
 
 /*
  * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
  *
  * We are only called here if the lock is recursed or contested (i.e. we
  * need to wake up a blocked thread).
  */
 void
 __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	struct turnstile *ts;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	if (mtx_recursed(m)) {
 		if (--(m->mtx_recurse) == 0)
 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
 		return;
 	}
 
 	/*
 	 * We have to lock the chain before the turnstile so this turnstile
 	 * can be removed from the hash list if it is empty.
 	 */
 	turnstile_chain_lock(&m->lock_object);
 	ts = turnstile_lookup(&m->lock_object);
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
 	MPASS(ts != NULL);
 	turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
 	_mtx_release_lock_quick(m);
 
 	/*
 	 * This turnstile is now no longer associated with the mutex.  We can
 	 * unlock the chain lock so a new turnstile may take it's place.
 	 */
 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	turnstile_chain_unlock(&m->lock_object);
 }
 
 /*
  * All the unlocking of MTX_SPIN locks is done inline.
  * See the __mtx_unlock_spin() macro for the details.
  */
 
 /*
  * The backing function for the INVARIANTS-enabled mtx_assert()
  */
 #ifdef INVARIANT_SUPPORT
 void
 __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct mtx *m;
 
 	if (panicstr != NULL || dumping)
 		return;
 
 	m = mtxlock2mtx(c);
 
 	switch (what) {
 	case MA_OWNED:
 	case MA_OWNED | MA_RECURSED:
 	case MA_OWNED | MA_NOTRECURSED:
 		if (!mtx_owned(m))
 			panic("mutex %s not owned at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		if (mtx_recursed(m)) {
 			if ((what & MA_NOTRECURSED) != 0)
 				panic("mutex %s recursed at %s:%d",
 				    m->lock_object.lo_name, file, line);
 		} else if ((what & MA_RECURSED) != 0) {
 			panic("mutex %s unrecursed at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		}
 		break;
 	case MA_NOTOWNED:
 		if (mtx_owned(m))
 			panic("mutex %s owned at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		break;
 	default:
 		panic("unknown mtx_assert at %s:%d", file, line);
 	}
 }
 #endif
 
 /*
  * General init routine used by the MTX_SYSINIT() macro.
  */
 void
 mtx_sysinit(void *arg)
 {
 	struct mtx_args *margs = arg;
 
 	mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
 	    margs->ma_opts);
 }
 
 /*
  * Mutex initialization routine; initialize lock `m' of type contained in
  * `opts' with options contained in `opts' and name `name.'  The optional
  * lock type `type' is used as a general lock category name for use with
  * witness.
  */
 void
 _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts)
 {
 	struct mtx *m;
 	struct lock_class *class;
 	int flags;
 
 	m = mtxlock2mtx(c);
 
 	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
 	    MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock,
 	    ("%s: mtx_lock not aligned for %s: %p", __func__, name,
 	    &m->mtx_lock));
 
 	/* Determine lock class and lock flags. */
 	if (opts & MTX_SPIN)
 		class = &lock_class_mtx_spin;
 	else
 		class = &lock_class_mtx_sleep;
 	flags = 0;
 	if (opts & MTX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & MTX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if ((opts & MTX_NOWITNESS) == 0)
 		flags |= LO_WITNESS;
 	if (opts & MTX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & MTX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (opts & MTX_NEW)
 		flags |= LO_NEW;
 
 	/* Initialize mutex. */
 	lock_init(&m->lock_object, class, name, type, flags);
 
 	m->mtx_lock = MTX_UNOWNED;
 	m->mtx_recurse = 0;
 }
 
 /*
  * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
  * passed in as a flag here because if the corresponding mtx_init() was
  * called with MTX_QUIET set, then it will already be set in the mutex's
  * flags.
  */
 void
 _mtx_destroy(volatile uintptr_t *c)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 
 	if (!mtx_owned(m))
 		MPASS(mtx_unowned(m));
 	else {
 		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
 
 		/* Perform the non-mtx related part of mtx_unlock_spin(). */
 		if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin)
 			spinlock_exit();
 		else
 			TD_LOCKS_DEC(curthread);
 
 		lock_profile_release_lock(&m->lock_object);
 		/* Tell witness this isn't locked to make it happy. */
 		WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__,
 		    __LINE__);
 	}
 
 	m->mtx_lock = MTX_DESTROYED;
 	lock_destroy(&m->lock_object);
 }
 
 /*
  * Intialize the mutex code and system mutexes.  This is called from the MD
  * startup code prior to mi_startup().  The per-CPU data space needs to be
  * setup before this is called.
  */
 void
 mutex_init(void)
 {
 
 	/* Setup turnstiles so that sleep mutexes work. */
 	init_turnstiles();
 
 	/*
 	 * Initialize mutexes.
 	 */
 	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
 	mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN);
 	blocked_lock.mtx_lock = 0xdeadc0de;	/* Always blocked. */
 	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
 	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN);
 	mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
 	mtx_lock(&Giant);
 }
 
 #ifdef DDB
 void
 db_show_mtx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct mtx *m;
 
 	m = (const struct mtx *)lock;
 
 	db_printf(" flags: {");
 	if (LOCK_CLASS(lock) == &lock_class_mtx_spin)
 		db_printf("SPIN");
 	else
 		db_printf("DEF");
 	if (m->lock_object.lo_flags & LO_RECURSABLE)
 		db_printf(", RECURSE");
 	if (m->lock_object.lo_flags & LO_DUPOK)
 		db_printf(", DUPOK");
 	db_printf("}\n");
 	db_printf(" state: {");
 	if (mtx_unowned(m))
 		db_printf("UNOWNED");
 	else if (mtx_destroyed(m))
 		db_printf("DESTROYED");
 	else {
 		db_printf("OWNED");
 		if (m->mtx_lock & MTX_CONTESTED)
 			db_printf(", CONTESTED");
 		if (m->mtx_lock & MTX_RECURSED)
 			db_printf(", RECURSED");
 	}
 	db_printf("}\n");
 	if (!mtx_unowned(m) && !mtx_destroyed(m)) {
 		td = mtx_owner(m);
 		db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (mtx_recursed(m))
 			db_printf(" recursed: %d\n", m->mtx_recurse);
 	}
 }
 #endif
Index: user/alc/PQ_LAUNDRY/sys/kern/kern_rwlock.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/kern/kern_rwlock.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/kern/kern_rwlock.c	(revision 303642)
@@ -1,1277 +1,1277 @@
 /*-
  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Machine independent bits of reader/writer lock implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_rwlocks.h"
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/turnstile.h>
 
 #include <machine/cpu.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
 #define	ADAPTIVE_RWLOCKS
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /*
  * Return the rwlock address when the lock cookie address is provided.
  * This functionality assumes that struct rwlock* have a member named rw_lock.
  */
 #define	rwlock2rw(c)	(__containerof(c, struct rwlock, rw_lock))
 
 #ifdef ADAPTIVE_RWLOCKS
 static int rowner_retries = 10;
 static int rowner_loops = 10000;
 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
     "rwlock debugging");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 static void	db_show_rwlock(const struct lock_object *lock);
 #endif
 static void	assert_rw(const struct lock_object *lock, int what);
 static void	lock_rw(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_rw(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_rw(struct lock_object *lock);
 
 struct lock_class lock_class_rw = {
 	.lc_name = "rw",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_rw,
 #ifdef DDB
 	.lc_ddb_show = db_show_rwlock,
 #endif
 	.lc_lock = lock_rw,
 	.lc_unlock = unlock_rw,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_rw,
 #endif
 };
 
 /*
  * Return a pointer to the owning thread if the lock is write-locked or
  * NULL if the lock is unlocked or read-locked.
  */
 #define	rw_wowner(rw)							\
 	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
 	    (struct thread *)RW_OWNER((rw)->rw_lock))
 
 /*
  * Returns if a write owner is recursed.  Write ownership is not assured
  * here and should be previously checked.
  */
 #define	rw_recursed(rw)		((rw)->rw_recurse != 0)
 
 /*
  * Return true if curthread helds the lock.
  */
 #define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
 
 /*
  * Return a pointer to the owning thread for this lock who should receive
  * any priority lent by threads that block on this lock.  Currently this
  * is identical to rw_wowner().
  */
 #define	rw_owner(rw)		rw_wowner(rw)
 
 #ifndef INVARIANTS
 #define	__rw_assert(c, what, file, line)
 #endif
 
 void
 assert_rw(const struct lock_object *lock, int what)
 {
 
 	rw_assert((const struct rwlock *)lock, what);
 }
 
 void
 lock_rw(struct lock_object *lock, uintptr_t how)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	if (how)
 		rw_rlock(rw);
 	else
 		rw_wlock(rw);
 }
 
 uintptr_t
 unlock_rw(struct lock_object *lock)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
 	if (rw->rw_lock & RW_LOCK_READ) {
 		rw_runlock(rw);
 		return (1);
 	} else {
 		rw_wunlock(rw);
 		return (0);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_rw(const struct lock_object *lock, struct thread **owner)
 {
 	const struct rwlock *rw = (const struct rwlock *)lock;
 	uintptr_t x = rw->rw_lock;
 
 	*owner = rw_wowner(rw);
 	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
 	    (*owner != NULL));
 }
 #endif
 
 void
 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
 {
 	struct rwlock *rw;
 	int flags;
 
 	rw = rwlock2rw(c);
 
 	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
 	    RW_RECURSE | RW_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
 	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
 	    &rw->rw_lock));
 
 	flags = LO_UPGRADABLE;
 	if (opts & RW_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & RW_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & RW_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & RW_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & RW_QUIET)
 		flags |= LO_QUIET;
 	if (opts & RW_NEW)
 		flags |= LO_NEW;
 
 	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
 	rw->rw_lock = RW_UNLOCKED;
 	rw->rw_recurse = 0;
 }
 
 void
 _rw_destroy(volatile uintptr_t *c)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
 	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
 	rw->rw_lock = RW_DESTROYED;
 	lock_destroy(&rw->lock_object);
 }
 
 void
 rw_sysinit(void *arg)
 {
 	struct rw_args *args = arg;
 
 	rw_init((struct rwlock *)args->ra_rw, args->ra_desc);
 }
 
 void
 rw_sysinit_flags(void *arg)
 {
 	struct rw_args_flags *args = arg;
 
 	rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
 	    args->ra_flags);
 }
 
 int
 _rw_wowned(const volatile uintptr_t *c)
 {
 
 	return (rw_wowner(rwlock2rw(c)) == curthread);
 }
 
 void
 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	__rw_wlock(rw, curthread, file, line);
 	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
 	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 int
 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	int rval;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
 
 	if (rw_wlocked(rw) &&
 	    (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) {
 		rw->rw_recurse++;
 		rval = 1;
 	} else
 		rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED,
 		    (uintptr_t)curthread);
 
 	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		if (!rw_recursed(rw))
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
 			    rw, 0, 0, file, line, LOCKSTAT_WRITER);
 		TD_LOCKS_INC(curthread);
 	}
 	return (rval);
 }
 
 void
 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_WLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
 	    line);
 	__rw_wunlock(rw, curthread, file, line);
 	TD_LOCKS_DEC(curthread);
 }
 
 /*
  * Determines whether a new reader can acquire a lock.  Succeeds if the
  * reader already owns a read lock and the lock is locked for read to
  * prevent deadlock from reader recursion.  Also succeeds if the lock
  * is unlocked and has no writer waiters or spinners.  Failing otherwise
  * prioritizes writers before readers.
  */
 #define	RW_CAN_READ(_rw)						\
     ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &	\
     (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==	\
     RW_LOCK_READ)
 
 void
 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
 	int spintries = 0;
 	int i;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	uintptr_t v;
 #ifdef KDTRACE_HOOKS
 	uintptr_t state;
-	uint64_t spin_cnt = 0;
-	uint64_t sleep_cnt = 0;
+	u_int spin_cnt = 0;
+	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
 	KASSERT(rw_wowner(rw) != curthread,
 	    ("rw_rlock: wlock already held for %s @ %s:%d",
 	    rw->lock_object.lo_name, file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
 
 #ifdef KDTRACE_HOOKS
 	all_time -= lockstat_nsecs(&rw->lock_object);
 	state = rw->rw_lock;
 #endif
 	for (;;) {
 		/*
 		 * Handle the easy case.  If no other thread has a write
 		 * lock, then try to bump up the count of read locks.  Note
 		 * that we have to preserve the current state of the
 		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
 		 * read lock, then rw_lock must have changed, so restart
 		 * the loop.  Note that this handles the case of a
 		 * completely unlocked rwlock since such a lock is encoded
 		 * as a read lock with no waiters.
 		 */
 		v = rw->rw_lock;
 		if (RW_CAN_READ(v)) {
 			/*
 			 * The RW_LOCK_READ_WAITERS flag should only be set
 			 * if the lock has been unlocked and write waiters
 			 * were present.
 			 */
 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
 			    v + RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeed %p -> %p", __func__,
 					    rw, (void *)v,
 					    (void *)(v + RW_ONE_READER));
 				break;
 			}
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&rw->lock_object,
 		    &contested, &waittime);
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((v & RW_LOCK_READ) == 0) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, rw, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", rw->lock_object.lo_name);
 				while ((struct thread*)RW_OWNER(rw->rw_lock) ==
 				    owner && TD_IS_RUNNING(owner)) {
 					cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 					spin_cnt++;
 #endif
 				}
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		} else if (spintries < rowner_retries) {
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			for (i = 0; i < rowner_loops; i++) {
 				v = rw->rw_lock;
 				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
 					break;
 				cpu_spinwait();
 			}
 #ifdef KDTRACE_HOOKS
 			spin_cnt += rowner_loops - i;
 #endif
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			if (i != rowner_loops)
 				continue;
 		}
 #endif
 
 		/*
 		 * Okay, now it's the hard case.  Some other thread already
 		 * has a write lock or there are write waiters present,
 		 * acquire the turnstile lock so we can begin the process
 		 * of blocking.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 
 		/*
 		 * The lock might have been released while we spun, so
 		 * recheck its state and restart the loop if needed.
 		 */
 		v = rw->rw_lock;
 		if (RW_CAN_READ(v)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if ((v & RW_LOCK_READ) == 0) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * The lock is held in write mode or it already has waiters.
 		 */
 		MPASS(!RW_CAN_READ(v));
 
 		/*
 		 * If the RW_LOCK_READ_WAITERS flag is already set, then
 		 * we can go ahead and block.  If it is not set then try
 		 * to set it.  If we fail to set it drop the turnstile
 		 * lock and restart the loop.
 		 */
 		if (!(v & RW_LOCK_READ_WAITERS)) {
 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 			    v | RW_LOCK_READ_WAITERS)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
 				    __func__, rw);
 		}
 
 		/*
 		 * We were unable to acquire the lock and the read waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 	}
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
 	/* Record only the loops spinning and not sleeping. */
 	if (spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 #endif
 	/*
 	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
 	 * however.  turnstiles don't like owners changing between calls to
 	 * turnstile_wait() currently.
 	 */
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_READER);
 	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&rw->lock_object, 0, file, line);
 	TD_LOCKS_INC(curthread);
 	curthread->td_rw_rlocks++;
 }
 
 int
 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 
 	for (;;) {
 		x = rw->rw_lock;
 		KASSERT(rw->rw_lock != RW_DESTROYED,
 		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
 		if (!(x & RW_LOCK_READ))
 			break;
 		if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) {
 			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
 			    line);
 			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
 			    rw, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
 			curthread->td_rw_rlocks++;
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 void
 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t x, v, queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_RLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
 
 	/* TODO: drop "owner of record" here. */
 
 	for (;;) {
 		/*
 		 * See if there is more than one read lock held.  If so,
 		 * just drop one and return.
 		 */
 		x = rw->rw_lock;
 		if (RW_READERS(x) > 1) {
 			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
 			    x - RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, rw, (void *)x,
 					    (void *)(x - RW_ONE_READER));
 				break;
 			}
 			continue;
 		}
 		/*
 		 * If there aren't any waiters for a write lock, then try
 		 * to drop it quickly.
 		 */
 		if (!(x & RW_LOCK_WAITERS)) {
 			MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
 			    RW_READERS_LOCK(1));
 			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
 			    RW_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, rw);
 				break;
 			}
 			continue;
 		}
 		/*
 		 * Ok, we know we have waiters and we think we are the
 		 * last reader, so grab the turnstile lock.
 		 */
 		turnstile_chain_lock(&rw->lock_object);
 		v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		MPASS(v & RW_LOCK_WAITERS);
 
 		/*
 		 * Try to drop our lock leaving the lock in a unlocked
 		 * state.
 		 *
 		 * If you wanted to do explicit lock handoff you'd have to
 		 * do it here.  You'd also want to use turnstile_signal()
 		 * and you'd have to handle the race where a higher
 		 * priority thread blocks on the write lock before the
 		 * thread you wakeup actually runs and have the new thread
 		 * "steal" the lock.  For now it's a lot simpler to just
 		 * wakeup all of the waiters.
 		 *
 		 * As above, if we fail, then another thread might have
 		 * acquired a read lock, so drop the turnstile lock and
 		 * restart.
 		 */
 		x = RW_UNLOCKED;
 		if (v & RW_LOCK_WRITE_WAITERS) {
 			queue = TS_EXCLUSIVE_QUEUE;
 			x |= (v & RW_LOCK_READ_WAITERS);
 		} else
 			queue = TS_SHARED_QUEUE;
 		if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
 		    x)) {
 			turnstile_chain_unlock(&rw->lock_object);
 			continue;
 		}
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
 			    __func__, rw);
 
 		/*
 		 * Ok.  The lock is released and all that's left is to
 		 * wake up the waiters.  Note that the lock might not be
 		 * free anymore, but in that case the writers will just
 		 * block again if they run before the new lock holder(s)
 		 * release the lock.
 		 */
 		ts = turnstile_lookup(&rw->lock_object);
 		MPASS(ts != NULL);
 		turnstile_broadcast(ts, queue);
 		turnstile_unpend(ts, TS_SHARED_LOCK);
 		turnstile_chain_unlock(&rw->lock_object);
 		break;
 	}
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
 	TD_LOCKS_DEC(curthread);
 	curthread->td_rw_rlocks--;
 }
 
 /*
  * This function is called when we are unable to obtain a write lock on the
  * first try.  This means that at least one other thread holds either a
  * read or write lock.
  */
 void
 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
     int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
 	int spintries = 0;
 	int i;
 #endif
 	uintptr_t v, x;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 #ifdef KDTRACE_HOOKS
 	uintptr_t state;
-	uint64_t spin_cnt = 0;
-	uint64_t sleep_cnt = 0;
+	u_int spin_cnt = 0;
+	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	if (rw_wlocked(rw)) {
 		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
 		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
 		    __func__, rw->lock_object.lo_name, file, line));
 		rw->rw_recurse++;
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
 		return;
 	}
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
 
 #ifdef KDTRACE_HOOKS
 	all_time -= lockstat_nsecs(&rw->lock_object);
 	state = rw->rw_lock;
 #endif
 	for (;;) {
 		if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid))
 			break;
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&rw->lock_object,
 		    &contested, &waittime);
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		v = rw->rw_lock;
 		owner = (struct thread *)RW_OWNER(v);
 		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, rw, owner);
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
 			    TD_IS_RUNNING(owner)) {
 				cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 				spin_cnt++;
 #endif
 			}
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			continue;
 		}
 		if ((v & RW_LOCK_READ) && RW_READERS(v) &&
 		    spintries < rowner_retries) {
 			if (!(v & RW_LOCK_WRITE_SPINNER)) {
 				if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 				    v | RW_LOCK_WRITE_SPINNER)) {
 					continue;
 				}
 			}
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			for (i = 0; i < rowner_loops; i++) {
 				if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
 					break;
 				cpu_spinwait();
 			}
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 #ifdef KDTRACE_HOOKS
 			spin_cnt += rowner_loops - i;
 #endif
 			if (i != rowner_loops)
 				continue;
 		}
 #endif
 		ts = turnstile_trywait(&rw->lock_object);
 		v = rw->rw_lock;
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if (!(v & RW_LOCK_READ)) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 		}
 #endif
 		/*
 		 * Check for the waiters flags about this rwlock.
 		 * If the lock was released, without maintain any pending
 		 * waiters queue, simply try to acquire it.
 		 * If a pending waiters queue is present, claim the lock
 		 * ownership and maintain the pending queue.
 		 */
 		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		if ((v & ~x) == RW_UNLOCKED) {
 			x &= ~RW_LOCK_WRITE_SPINNER;
 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
 				if (x)
 					turnstile_claim(ts);
 				else
 					turnstile_cancel(ts);
 				break;
 			}
 			turnstile_cancel(ts);
 			continue;
 		}
 		/*
 		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
 		 * set it.  If we fail to set it, then loop back and try
 		 * again.
 		 */
 		if (!(v & RW_LOCK_WRITE_WAITERS)) {
 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 			    v | RW_LOCK_WRITE_WAITERS)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
 				    __func__, rw);
 		}
 		/*
 		 * We were unable to acquire the lock and the write waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 #ifdef ADAPTIVE_RWLOCKS
 		spintries = 0;
 #endif
 	}
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
 	/* Record only the loops spinning and not sleeping. */
 	if (spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
-		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
+		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_WRITER);
 }
 
 /*
  * This function is called if the first try at releasing a write lock failed.
  * This means that one of the 2 waiter bits must be set indicating that at
  * least one thread is waiting on this lock.
  */
 void
 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
     int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t v;
 	int queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	if (rw_wlocked(rw) && rw_recursed(rw)) {
 		rw->rw_recurse--;
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
 		return;
 	}
 
 	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
 	    ("%s: neither of the waiter flags are set", __func__));
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
 
 	turnstile_chain_lock(&rw->lock_object);
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 
 	/*
 	 * Use the same algo as sx locks for now.  Prefer waking up shared
 	 * waiters if we have any over writers.  This is probably not ideal.
 	 *
 	 * 'v' is the value we are going to write back to rw_lock.  If we
 	 * have waiters on both queues, we need to preserve the state of
 	 * the waiter flag for the queue we don't wake up.  For now this is
 	 * hardcoded for the algorithm mentioned above.
 	 *
 	 * In the case of both readers and writers waiting we wakeup the
 	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
 	 * new writer comes in before a reader it will claim the lock up
 	 * above.  There is probably a potential priority inversion in
 	 * there that could be worked around either by waking both queues
 	 * of waiters or doing some complicated lock handoff gymnastics.
 	 */
 	v = RW_UNLOCKED;
 	if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) {
 		queue = TS_EXCLUSIVE_QUEUE;
 		v |= (rw->rw_lock & RW_LOCK_READ_WAITERS);
 	} else
 		queue = TS_SHARED_QUEUE;
 
 	/* Wake up all waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
 		    queue == TS_SHARED_QUEUE ? "read" : "write");
 	turnstile_broadcast(ts, queue);
 	atomic_store_rel_ptr(&rw->rw_lock, v);
 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	turnstile_chain_unlock(&rw->lock_object);
 }
 
 /*
  * Attempt to do a non-blocking upgrade from a read lock to a write
  * lock.  This will only succeed if this thread holds a single read
  * lock.  Returns true if the upgrade succeeded and false otherwise.
  */
 int
 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	uintptr_t v, x, tid;
 	struct turnstile *ts;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_RLOCKED, file, line);
 
 	/*
 	 * Attempt to switch from one reader to a writer.  If there
 	 * are any write waiters, then we will have to lock the
 	 * turnstile first to prevent races with another writer
 	 * calling turnstile_wait() before we have claimed this
 	 * turnstile.  So, do the simple case of no waiters first.
 	 */
 	tid = (uintptr_t)curthread;
 	success = 0;
 	for (;;) {
 		v = rw->rw_lock;
 		if (RW_READERS(v) > 1)
 			break;
 		if (!(v & RW_LOCK_WAITERS)) {
 			success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
 			if (!success)
 				continue;
 			break;
 		}
 
 		/*
 		 * Ok, we think we have waiters, so lock the turnstile.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 		v = rw->rw_lock;
 		if (RW_READERS(v) > 1) {
 			turnstile_cancel(ts);
 			break;
 		}
 		/*
 		 * Try to switch from one reader to a writer again.  This time
 		 * we honor the current state of the waiters flags.
 		 * If we obtain the lock with the flags set, then claim
 		 * ownership of the turnstile.
 		 */
 		x = rw->rw_lock & RW_LOCK_WAITERS;
 		success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
 		if (success) {
 			if (x)
 				turnstile_claim(ts);
 			else
 				turnstile_cancel(ts);
 			break;
 		}
 		turnstile_cancel(ts);
 	}
 	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
 	if (success) {
 		curthread->td_rw_rlocks--;
 		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(rw__upgrade, rw);
 	}
 	return (success);
 }
 
 /*
  * Downgrade a write lock into a single read lock.
  */
 void
 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t tid, v;
 	int rwait, wwait;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (rw_recursed(rw))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
 
 	/*
 	 * Convert from a writer to a single reader.  First we handle
 	 * the easy case with no waiters.  If there are any waiters, we
 	 * lock the turnstile and "disown" the lock.
 	 */
 	tid = (uintptr_t)curthread;
 	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
 		goto out;
 
 	/*
 	 * Ok, we think we have waiters, so lock the turnstile so we can
 	 * read the waiter flags without any races.
 	 */
 	turnstile_chain_lock(&rw->lock_object);
 	v = rw->rw_lock & RW_LOCK_WAITERS;
 	rwait = v & RW_LOCK_READ_WAITERS;
 	wwait = v & RW_LOCK_WRITE_WAITERS;
 	MPASS(rwait | wwait);
 
 	/*
 	 * Downgrade from a write lock while preserving waiters flag
 	 * and give up ownership of the turnstile.
 	 */
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 	if (!wwait)
 		v &= ~RW_LOCK_READ_WAITERS;
 	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
 	/*
 	 * Wake other readers if there are no writers pending.  Otherwise they
 	 * won't be able to acquire the lock anyway.
 	 */
 	if (rwait && !wwait) {
 		turnstile_broadcast(ts, TS_SHARED_QUEUE);
 		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	} else
 		turnstile_disown(ts);
 	turnstile_chain_unlock(&rw->lock_object);
 out:
 	curthread->td_rw_rlocks++;
 	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(rw__downgrade, rw);
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef __rw_assert
 #endif
 
 /*
  * In the non-WITNESS case, rw_assert() can only detect that at least
  * *some* thread owns an rlock, but it cannot guarantee that *this*
  * thread owns an rlock.
  */
 void
 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct rwlock *rw;
 
 	if (panicstr != NULL)
 		return;
 
 	rw = rwlock2rw(c);
 
 	switch (what) {
 	case RA_LOCKED:
 	case RA_LOCKED | RA_RECURSED:
 	case RA_LOCKED | RA_NOTRECURSED:
 	case RA_RLOCKED:
 	case RA_RLOCKED | RA_RECURSED:
 	case RA_RLOCKED | RA_NOTRECURSED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has a write lock or we have one
 		 * and are asserting a read lock, fail.  Also, if no one
 		 * has a lock at all, fail.
 		 */
 		if (rw->rw_lock == RW_UNLOCKED ||
 		    (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
 		    rw_wowner(rw) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    rw->lock_object.lo_name, (what & RA_RLOCKED) ?
 			    "read " : "", file, line);
 
 		if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
 			if (rw_recursed(rw)) {
 				if (what & RA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    rw->lock_object.lo_name, file,
 					    line);
 			} else if (what & RA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case RA_WLOCKED:
 	case RA_WLOCKED | RA_RECURSED:
 	case RA_WLOCKED | RA_NOTRECURSED:
 		if (rw_wowner(rw) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		if (rw_recursed(rw)) {
 			if (what & RA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		} else if (what & RA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		break;
 	case RA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold a write lock fail.  We can't reliably check
 		 * to see if we hold a read lock or not.
 		 */
 		if (rw_wowner(rw) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif /* INVARIANT_SUPPORT */
 
 #ifdef DDB
 void
 db_show_rwlock(const struct lock_object *lock)
 {
 	const struct rwlock *rw;
 	struct thread *td;
 
 	rw = (const struct rwlock *)lock;
 
 	db_printf(" state: ");
 	if (rw->rw_lock == RW_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (rw->rw_lock == RW_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (rw->rw_lock & RW_LOCK_READ)
 		db_printf("RLOCK: %ju locks\n",
 		    (uintmax_t)(RW_READERS(rw->rw_lock)));
 	else {
 		td = rw_wowner(rw);
 		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (rw_recursed(rw))
 			db_printf(" recursed: %u\n", rw->rw_recurse);
 	}
 	db_printf(" waiters: ");
 	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
 	case RW_LOCK_READ_WAITERS:
 		db_printf("readers\n");
 		break;
 	case RW_LOCK_WRITE_WAITERS:
 		db_printf("writers\n");
 		break;
 	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
 		db_printf("readers and writers\n");
 		break;
 	default:
 		db_printf("none\n");
 		break;
 	}
 }
 
 #endif
Index: user/alc/PQ_LAUNDRY/sys/kern/kern_sx.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/kern/kern_sx.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/kern/kern_sx.c	(revision 303642)
@@ -1,1258 +1,1258 @@
 /*-
  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 /*
  * Shared/exclusive locks.  This implementation attempts to ensure
  * deterministic lock granting behavior, so that slocks and xlocks are
  * interleaved.
  *
  * Priority propagation will not generally raise the priority of lock holders,
  * so should not be relied upon in combination with sx locks.
  */
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_sx.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #include <machine/cpu.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #define	ADAPTIVE_SX
 #endif
 
 CTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE);
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /* Handy macros for sleep queues. */
 #define	SQ_EXCLUSIVE_QUEUE	0
 #define	SQ_SHARED_QUEUE		1
 
 /*
  * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
  * drop Giant anytime we have to sleep or if we adaptively spin.
  */
 #define	GIANT_DECLARE							\
 	int _giantcnt = 0;						\
 	WITNESS_SAVE_DECL(Giant)					\
 
 #define	GIANT_SAVE() do {						\
 	if (mtx_owned(&Giant)) {					\
 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
 		while (mtx_owned(&Giant)) {				\
 			_giantcnt++;					\
 			mtx_unlock(&Giant);				\
 		}							\
 	}								\
 } while (0)
 
 #define GIANT_RESTORE() do {						\
 	if (_giantcnt > 0) {						\
 		mtx_assert(&Giant, MA_NOTOWNED);			\
 		while (_giantcnt--)					\
 			mtx_lock(&Giant);				\
 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
 	}								\
 } while (0)
 
 /*
  * Returns true if an exclusive lock is recursed.  It assumes
  * curthread currently has an exclusive lock.
  */
 #define	sx_recursed(sx)		((sx)->sx_recurse != 0)
 
 static void	assert_sx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_sx(const struct lock_object *lock);
 #endif
 static void	lock_sx(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_sx(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_sx(struct lock_object *lock);
 
 struct lock_class lock_class_sx = {
 	.lc_name = "sx",
 	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_sx,
 #ifdef DDB
 	.lc_ddb_show = db_show_sx,
 #endif
 	.lc_lock = lock_sx,
 	.lc_unlock = unlock_sx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_sx,
 #endif
 };
 
 #ifndef INVARIANTS
 #define	_sx_assert(sx, what, file, line)
 #endif
 
 #ifdef ADAPTIVE_SX
 static u_int asx_retries = 10;
 static u_int asx_loops = 10000;
 static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
 SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
 SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
 #endif
 
 void
 assert_sx(const struct lock_object *lock, int what)
 {
 
 	sx_assert((const struct sx *)lock, what);
 }
 
 void
 lock_sx(struct lock_object *lock, uintptr_t how)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	if (how)
 		sx_slock(sx);
 	else
 		sx_xlock(sx);
 }
 
 uintptr_t
 unlock_sx(struct lock_object *lock)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
 	if (sx_xlocked(sx)) {
 		sx_xunlock(sx);
 		return (0);
 	} else {
 		sx_sunlock(sx);
 		return (1);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_sx(const struct lock_object *lock, struct thread **owner)
 {
         const struct sx *sx = (const struct sx *)lock;
 	uintptr_t x = sx->sx_lock;
 
         *owner = (struct thread *)SX_OWNER(x);
         return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
 	    (*owner != NULL));
 }
 #endif
 
 void
 sx_sysinit(void *arg)
 {
 	struct sx_args *sargs = arg;
 
 	sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
 }
 
 void
 sx_init_flags(struct sx *sx, const char *description, int opts)
 {
 	int flags;
 
 	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
 	    SX_NOPROFILE | SX_NOADAPTIVE | SX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock,
 	    ("%s: sx_lock not aligned for %s: %p", __func__, description,
 	    &sx->sx_lock));
 
 	flags = LO_SLEEPABLE | LO_UPGRADABLE;
 	if (opts & SX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & SX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & SX_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & SX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & SX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & SX_NEW)
 		flags |= LO_NEW;
 
 	flags |= opts & SX_NOADAPTIVE;
 	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
 	sx->sx_lock = SX_LOCK_UNLOCKED;
 	sx->sx_recurse = 0;
 }
 
 void
 sx_destroy(struct sx *sx)
 {
 
 	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
 	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
 	sx->sx_lock = SX_LOCK_DESTROYED;
 	lock_destroy(&sx->lock_object);
 }
 
 int
 _sx_slock(struct sx *sx, int opts, const char *file, int line)
 {
 	int error = 0;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
 	error = __sx_slock(sx, opts, file, line);
 	if (!error) {
 		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
 		WITNESS_LOCK(&sx->lock_object, 0, file, line);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (error);
 }
 
 int
 sx_try_slock_(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_try_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 
 	for (;;) {
 		x = sx->sx_lock;
 		KASSERT(x != SX_LOCK_DESTROYED,
 		    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
 		if (!(x & SX_LOCK_SHARED))
 			break;
 		if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) {
 			LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
 			WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 int
 _sx_xlock(struct sx *sx, int opts, const char *file, int line)
 {
 	int error = 0;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	error = __sx_xlock(sx, curthread, opts, file, line);
 	if (!error) {
 		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
 		    file, line);
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (error);
 }
 
 int
 sx_try_xlock_(struct sx *sx, const char *file, int line)
 {
 	int rval;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
 
 	if (sx_xlocked(sx) &&
 	    (sx->lock_object.lo_flags & LO_RECURSABLE) != 0) {
 		sx->sx_recurse++;
 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		rval = 1;
 	} else
 		rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED,
 		    (uintptr_t)curthread);
 	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		if (!sx_recursed(sx))
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_WRITER);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (rval);
 }
 
 void
 _sx_sunlock(struct sx *sx, const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return;
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
 	__sx_sunlock(sx, file, line);
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 _sx_xunlock(struct sx *sx, const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return;
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
 	    line);
 	__sx_xunlock(sx, curthread, file, line);
 	TD_LOCKS_DEC(curthread);
 }
 
 /*
  * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
  * This will only succeed if this thread holds a single shared lock.
  * Return 1 if if the upgrade succeed, 0 otherwise.
  */
 int
 sx_try_upgrade_(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 
 	/*
 	 * Try to switch from one shared lock to an exclusive lock.  We need
 	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
 	 * we will wake up the exclusive waiters when we drop the lock.
 	 */
 	x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
 	success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
 	    (uintptr_t)curthread | x);
 	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
 	if (success) {
 		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(sx__upgrade, sx);
 	}
 	return (success);
 }
 
 /*
  * Downgrade an unrecursed exclusive lock into a single shared lock.
  */
 void
 sx_downgrade_(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 	int wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (sx_recursed(sx))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
 
 	/*
 	 * Try to switch from an exclusive lock with no shared waiters
 	 * to one sharer with no shared waiters.  If there are
 	 * exclusive waiters, we don't need to lock the sleep queue so
 	 * long as we preserve the flag.  We do one quick try and if
 	 * that fails we grab the sleepq lock to keep the flags from
 	 * changing and do it the slow way.
 	 *
 	 * We have to lock the sleep queue if there are shared waiters
 	 * so we can wake them up.
 	 */
 	x = sx->sx_lock;
 	if (!(x & SX_LOCK_SHARED_WAITERS) &&
 	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
 		LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 		return;
 	}
 
 	/*
 	 * Lock the sleep queue so we can read the waiters bits
 	 * without any races and wakeup any shared waiters.
 	 */
 	sleepq_lock(&sx->lock_object);
 
 	/*
 	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
 	 * shared lock.  If there are any shared waiters, wake them up.
 	 */
 	wakeup_swapper = 0;
 	x = sx->sx_lock;
 	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
 	if (x & SX_LOCK_SHARED_WAITERS)
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
 		    0, SQ_SHARED_QUEUE);
 	sleepq_release(&sx->lock_object);
 
 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(sx__downgrade, sx);
 
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_xlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 int
 _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
     int line)
 {
 	GIANT_DECLARE;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 	u_int i, spintries = 0;
 #endif
 	uintptr_t x;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int error = 0;
 #ifdef	KDTRACE_HOOKS
 	uintptr_t state;
-	uint64_t spin_cnt = 0;
-	uint64_t sleep_cnt = 0;
+	u_int spin_cnt = 0;
+	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 	/* If we already hold an exclusive lock, then recurse. */
 	if (sx_xlocked(sx)) {
 		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
 	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
 		    sx->lock_object.lo_name, file, line));
 		sx->sx_recurse++;
 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
 		return (0);
 	}
 
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
 
 #ifdef KDTRACE_HOOKS
 	all_time -= lockstat_nsecs(&sx->lock_object);
 	state = sx->sx_lock;
 #endif
 	for (;;) {
 		if (sx->sx_lock == SX_LOCK_UNLOCKED &&
 		    atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
 			break;
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
 		    &waittime);
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		x = sx->sx_lock;
 		if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			if ((x & SX_LOCK_SHARED) == 0) {
 				x = SX_OWNER(x);
 				owner = (struct thread *)x;
 				if (TD_IS_RUNNING(owner)) {
 					if (LOCK_LOG_TEST(&sx->lock_object, 0))
 						CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 						    __func__, sx, owner);
 					KTR_STATE1(KTR_SCHED, "thread",
 					    sched_tdname(curthread), "spinning",
 					    "lockname:\"%s\"",
 					    sx->lock_object.lo_name);
 					GIANT_SAVE();
 					while (SX_OWNER(sx->sx_lock) == x &&
 					    TD_IS_RUNNING(owner)) {
 						cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 						spin_cnt++;
 #endif
 					}
 					KTR_STATE0(KTR_SCHED, "thread",
 					    sched_tdname(curthread), "running");
 					continue;
 				}
 			} else if (SX_SHARERS(x) && spintries < asx_retries) {
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", sx->lock_object.lo_name);
 				GIANT_SAVE();
 				spintries++;
 				for (i = 0; i < asx_loops; i++) {
 					if (LOCK_LOG_TEST(&sx->lock_object, 0))
 						CTR4(KTR_LOCK,
 				    "%s: shared spinning on %p with %u and %u",
 						    __func__, sx, spintries, i);
 					x = sx->sx_lock;
 					if ((x & SX_LOCK_SHARED) == 0 ||
 					    SX_SHARERS(x) == 0)
 						break;
 					cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 					spin_cnt++;
 #endif
 				}
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				if (i != asx_loops)
 					continue;
 			}
 		}
 #endif
 
 		sleepq_lock(&sx->lock_object);
 		x = sx->sx_lock;
 
 		/*
 		 * If the lock was released while spinning on the
 		 * sleep queue chain lock, try again.
 		 */
 		if (x == SX_LOCK_UNLOCKED) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the sleep queue
 		 * chain lock.  If so, drop the sleep queue lock and try
 		 * again.
 		 */
 		if (!(x & SX_LOCK_SHARED) &&
 		    (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * If an exclusive lock was released with both shared
 		 * and exclusive waiters and a shared waiter hasn't
 		 * woken up and acquired the lock yet, sx_lock will be
 		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
 		 * If we see that value, try to acquire it once.  Note
 		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
 		 * as there are other exclusive waiters still.  If we
 		 * fail, restart the loop.
 		 */
 		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
 			if (atomic_cmpset_acq_ptr(&sx->sx_lock,
 			    SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
 			    tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
 				    __func__, sx);
 				break;
 			}
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 		/*
 		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
 		 * than loop back and retry.
 		 */
 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
 			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
 				    __func__, sx);
 		}
 
 		/*
 		 * Since we have been unable to acquire the exclusive
 		 * lock and the exclusive waiters flag is set, we have
 		 * to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 		GIANT_SAVE();
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&sx->lock_object);
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 	}
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
 		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
 		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 #endif
 	if (!error)
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_WRITER);
 	GIANT_RESTORE();
 	return (error);
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_xunlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 void
 _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
 {
 	uintptr_t x;
 	int queue, wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
 
 	/* If the lock is recursed, then unrecurse one level. */
 	if (sx_xlocked(sx) && sx_recursed(sx)) {
 		if ((--sx->sx_recurse) == 0)
 			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
 		return;
 	}
 	MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
 	    SX_LOCK_EXCLUSIVE_WAITERS));
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
 
 	sleepq_lock(&sx->lock_object);
 	x = SX_LOCK_UNLOCKED;
 
 	/*
 	 * The wake up algorithm here is quite simple and probably not
 	 * ideal.  It gives precedence to shared waiters if they are
 	 * present.  For this condition, we have to preserve the
 	 * state of the exclusive waiters flag.
 	 * If interruptible sleeps left the shared queue empty avoid a
 	 * starvation for the threads sleeping on the exclusive queue by giving
 	 * them precedence and cleaning up the shared waiters bit anyway.
 	 */
 	if ((sx->sx_lock & SX_LOCK_SHARED_WAITERS) != 0 &&
 	    sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) {
 		queue = SQ_SHARED_QUEUE;
 		x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
 	} else
 		queue = SQ_EXCLUSIVE_QUEUE;
 
 	/* Wake up all the waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
 		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
 		    "exclusive");
 	atomic_store_rel_ptr(&sx->sx_lock, x);
 	wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
 	    queue);
 	sleepq_release(&sx->lock_object);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_slock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 int
 _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
 {
 	GIANT_DECLARE;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	uintptr_t x;
 	int error = 0;
 #ifdef KDTRACE_HOOKS
 	uintptr_t state;
-	uint64_t spin_cnt = 0;
-	uint64_t sleep_cnt = 0;
+	u_int spin_cnt = 0;
+	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
 #ifdef KDTRACE_HOOKS
 	state = sx->sx_lock;
 	all_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 
 	/*
 	 * As with rwlocks, we don't make any attempt to try to block
 	 * shared locks once there is an exclusive waiter.
 	 */
 	for (;;) {
 #ifdef KDTRACE_HOOKS
 		spin_cnt++;
 #endif
 		x = sx->sx_lock;
 
 		/*
 		 * If no other thread has an exclusive lock then try to bump up
 		 * the count of sharers.  Since we have to preserve the state
 		 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
 		 * shared lock loop back and retry.
 		 */
 		if (x & SX_LOCK_SHARED) {
 			MPASS(!(x & SX_LOCK_SHARED_WAITERS));
 			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
 			    x + SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeed %p -> %p", __func__,
 					    sx, (void *)x,
 					    (void *)(x + SX_ONE_SHARER));
 				break;
 			}
 			continue;
 		}
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
 		    &waittime);
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			x = SX_OWNER(x);
 			owner = (struct thread *)x;
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, sx, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", sx->lock_object.lo_name);
 				GIANT_SAVE();
 				while (SX_OWNER(sx->sx_lock) == x &&
 				    TD_IS_RUNNING(owner)) {
+					cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 					spin_cnt++;
 #endif
-					cpu_spinwait();
 				}
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * Some other thread already has an exclusive lock, so
 		 * start the process of blocking.
 		 */
 		sleepq_lock(&sx->lock_object);
 		x = sx->sx_lock;
 
 		/*
 		 * The lock could have been released while we spun.
 		 * In this case loop back and retry.
 		 */
 		if (x & SX_LOCK_SHARED) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if (!(x & SX_LOCK_SHARED) &&
 		    (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
 		 * fail to set it drop the sleep queue lock and loop
 		 * back.
 		 */
 		if (!(x & SX_LOCK_SHARED_WAITERS)) {
 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
 			    x | SX_LOCK_SHARED_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
 				    __func__, sx);
 		}
 
 		/*
 		 * Since we have been unable to acquire the shared lock,
 		 * we have to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 		GIANT_SAVE();
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&sx->lock_object);
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 	}
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 #endif
 	if (error == 0)
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_READER);
 	GIANT_RESTORE();
 	return (error);
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_sunlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 void
 _sx_sunlock_hard(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 	int wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	for (;;) {
 		x = sx->sx_lock;
 
 		/*
 		 * We should never have sharers while at least one thread
 		 * holds a shared lock.
 		 */
 		KASSERT(!(x & SX_LOCK_SHARED_WAITERS),
 		    ("%s: waiting sharers", __func__));
 
 		/*
 		 * See if there is more than one shared lock held.  If
 		 * so, just drop one and return.
 		 */
 		if (SX_SHARERS(x) > 1) {
 			if (atomic_cmpset_rel_ptr(&sx->sx_lock, x,
 			    x - SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, sx, (void *)x,
 					    (void *)(x - SX_ONE_SHARER));
 				break;
 			}
 			continue;
 		}
 
 		/*
 		 * If there aren't any waiters for an exclusive lock,
 		 * then try to drop it quickly.
 		 */
 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
 			MPASS(x == SX_SHARERS_LOCK(1));
 			if (atomic_cmpset_rel_ptr(&sx->sx_lock,
 			    SX_SHARERS_LOCK(1), SX_LOCK_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, sx);
 				break;
 			}
 			continue;
 		}
 
 		/*
 		 * At this point, there should just be one sharer with
 		 * exclusive waiters.
 		 */
 		MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
 
 		sleepq_lock(&sx->lock_object);
 
 		/*
 		 * Wake up semantic here is quite simple:
 		 * Just wake up all the exclusive waiters.
 		 * Note that the state of the lock could have changed,
 		 * so if it fails loop back and retry.
 		 */
 		if (!atomic_cmpset_rel_ptr(&sx->sx_lock,
 		    SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
 		    SX_LOCK_UNLOCKED)) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
 			    "exclusive queue", __func__, sx);
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
 		    0, SQ_EXCLUSIVE_QUEUE);
 		sleepq_release(&sx->lock_object);
 		if (wakeup_swapper)
 			kick_proc0();
 		break;
 	}
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef	_sx_assert
 #endif
 
 /*
  * In the non-WITNESS case, sx_assert() can only detect that at least
  * *some* thread owns an slock, but it cannot guarantee that *this*
  * thread owns an slock.
  */
 void
 _sx_assert(const struct sx *sx, int what, const char *file, int line)
 {
 #ifndef WITNESS
 	int slocked = 0;
 #endif
 
 	if (panicstr != NULL)
 		return;
 	switch (what) {
 	case SA_SLOCKED:
 	case SA_SLOCKED | SA_NOTRECURSED:
 	case SA_SLOCKED | SA_RECURSED:
 #ifndef WITNESS
 		slocked = 1;
 		/* FALLTHROUGH */
 #endif
 	case SA_LOCKED:
 	case SA_LOCKED | SA_NOTRECURSED:
 	case SA_LOCKED | SA_RECURSED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has an exclusive lock or we
 		 * have one and are asserting a shared lock, fail.
 		 * Also, if no one has a lock at all, fail.
 		 */
 		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
 		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
 		    sx_xholder(sx) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    sx->lock_object.lo_name, slocked ? "share " : "",
 			    file, line);
 
 		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
 			if (sx_recursed(sx)) {
 				if (what & SA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    sx->lock_object.lo_name, file,
 					    line);
 			} else if (what & SA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case SA_XLOCKED:
 	case SA_XLOCKED | SA_NOTRECURSED:
 	case SA_XLOCKED | SA_RECURSED:
 		if (sx_xholder(sx) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		if (sx_recursed(sx)) {
 			if (what & SA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		} else if (what & SA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		break;
 	case SA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold an exclusve lock fail.  We can't
 		 * reliably check to see if we hold a shared lock or
 		 * not.
 		 */
 		if (sx_xholder(sx) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif	/* INVARIANT_SUPPORT */
 
 #ifdef DDB
 static void
 db_show_sx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct sx *sx;
 
 	sx = (const struct sx *)lock;
 
 	db_printf(" state: ");
 	if (sx->sx_lock == SX_LOCK_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else {
 		td = sx_xholder(sx);
 		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (sx_recursed(sx))
 			db_printf(" recursed: %d\n", sx->sx_recurse);
 	}
 
 	db_printf(" waiters: ");
 	switch(sx->sx_lock &
 	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
 	case SX_LOCK_SHARED_WAITERS:
 		db_printf("shared\n");
 		break;
 	case SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive\n");
 		break;
 	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive and shared\n");
 		break;
 	default:
 		db_printf("none\n");
 	}
 }
 
 /*
  * Check to see if a thread that is blocked on a sleep queue is actually
  * blocked on an sx lock.  If so, output some details and return true.
  * If the lock has an exclusive owner, return that in *ownerp.
  */
 int
 sx_chain(struct thread *td, struct thread **ownerp)
 {
 	struct sx *sx;
 
 	/*
 	 * Check to see if this thread is blocked on an sx lock.
 	 * First, we check the lock class.  If that is ok, then we
 	 * compare the lock name against the wait message.
 	 */
 	sx = td->td_wchan;
 	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
 	    sx->lock_object.lo_name != td->td_wmesg)
 		return (0);
 
 	/* We think we have an sx lock, so output some details. */
 	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
 	*ownerp = sx_xholder(sx);
 	if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK (count %ju)\n",
 		    (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else
 		db_printf("XLOCK\n");
 	return (1);
 }
 #endif
Index: user/alc/PQ_LAUNDRY/sys/kern/kern_tc.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/kern/kern_tc.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/kern/kern_tc.c	(revision 303642)
@@ -1,2168 +1,2166 @@
 /*-
  * ----------------------------------------------------------------------------
  * "THE BEER-WARE LICENSE" (Revision 42):
  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
  * can do whatever you want with this stuff. If we meet some day, and you think
  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  * ----------------------------------------------------------------------------
  *
  * Copyright (c) 2011 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by Julien Ridoux at the University
  * of Melbourne under sponsorship from the FreeBSD Foundation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ntp.h"
 #include "opt_ffclock.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/timeffc.h>
 #include <sys/timepps.h>
 #include <sys/timetc.h>
 #include <sys/timex.h>
 #include <sys/vdso.h>
 
 /*
  * A large step happens on boot.  This constant detects such steps.
  * It is relatively small so that ntp_update_second gets called enough
  * in the typical 'missed a couple of seconds' case, but doesn't loop
  * forever when the time step is large.
  */
 #define LARGE_STEP	200
 
 /*
  * Implement a dummy timecounter which we can use until we get a real one
  * in the air.  This allows the console and other early stuff to use
  * time services.
  */
 
 static u_int
 dummy_get_timecount(struct timecounter *tc)
 {
 	static u_int now;
 
 	return (++now);
 }
 
 static struct timecounter dummy_timecounter = {
 	dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
 };
 
 struct timehands {
 	/* These fields must be initialized by the driver. */
 	struct timecounter	*th_counter;
 	int64_t			th_adjustment;
 	uint64_t		th_scale;
 	u_int	 		th_offset_count;
 	struct bintime		th_offset;
+	struct bintime		th_bintime;
 	struct timeval		th_microtime;
 	struct timespec		th_nanotime;
 	struct bintime		th_boottime;
 	/* Fields not to be copied in tc_windup start with th_generation. */
 	u_int			th_generation;
 	struct timehands	*th_next;
 };
 
 static struct timehands th0;
 static struct timehands th1 = {
 	.th_next = &th0
 };
 static struct timehands th0 = {
 	.th_counter = &dummy_timecounter,
 	.th_scale = (uint64_t)-1 / 1000000,
 	.th_offset = { .sec = 1 },
 	.th_generation = 1,
 	.th_next = &th1
 };
 
 static struct timehands *volatile timehands = &th0;
 struct timecounter *timecounter = &dummy_timecounter;
 static struct timecounter *timecounters = &dummy_timecounter;
 
 int tc_min_ticktock_freq = 1;
 
 volatile time_t time_second = 1;
 volatile time_t time_uptime = 1;
 
 static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD,
     NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime");
 
 SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
 static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, CTLFLAG_RW, 0, "");
 
 static int timestepwarnings;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
     &timestepwarnings, 0, "Log time steps");
 
 struct bintime bt_timethreshold;
 struct bintime bt_tickthreshold;
 sbintime_t sbt_timethreshold;
 sbintime_t sbt_tickthreshold;
 struct bintime tc_tick_bt;
 sbintime_t tc_tick_sbt;
 int tc_precexp;
 int tc_timepercentage = TC_DEFAULTPERC;
 static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation,
     CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
     sysctl_kern_timecounter_adjprecision, "I",
     "Allowed time interval deviation in percents");
 
 static int tc_chosen;	/* Non-zero if a specific tc was chosen via sysctl. */
 
 static void tc_windup(struct bintime *new_boottimebin);
 static void cpu_tick_calibrate(int);
 
 void dtrace_getnanotime(struct timespec *tsp);
 
 static int
 sysctl_kern_boottime(SYSCTL_HANDLER_ARGS)
 {
 	struct timeval boottime;
 
 	getboottime(&boottime);
 
 #ifndef __mips__
 #ifdef SCTL_MASK32
 	int tv[2];
 
 	if (req->flags & SCTL_MASK32) {
 		tv[0] = boottime.tv_sec;
 		tv[1] = boottime.tv_usec;
 		return (SYSCTL_OUT(req, tv, sizeof(tv)));
 	}
 #endif
 #endif
 	return (SYSCTL_OUT(req, &boottime, sizeof(boottime)));
 }
 
 static int
 sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS)
 {
 	u_int ncount;
 	struct timecounter *tc = arg1;
 
 	ncount = tc->tc_get_timecount(tc);
 	return (sysctl_handle_int(oidp, &ncount, 0, req));
 }
 
 static int
 sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS)
 {
 	uint64_t freq;
 	struct timecounter *tc = arg1;
 
 	freq = tc->tc_frequency;
 	return (sysctl_handle_64(oidp, &freq, 0, req));
 }
 
 /*
  * Return the difference between the timehands' counter value now and what
  * was when we copied it to the timehands' offset_count.
  */
 static __inline u_int
 tc_delta(struct timehands *th)
 {
 	struct timecounter *tc;
 
 	tc = th->th_counter;
 	return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
 	    tc->tc_counter_mask);
 }
 
 /*
  * Functions for reading the time.  We have to loop until we are sure that
  * the timehands that we operated on was not updated under our feet.  See
  * the comment in <sys/time.h> for a description of these 12 functions.
  */
 
 #ifdef FFCLOCK
 void
 fbclock_binuptime(struct bintime *bt)
 {
 	struct timehands *th;
 	unsigned int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		*bt = th->th_offset;
 		bintime_addx(bt, th->th_scale * tc_delta(th));
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 fbclock_nanouptime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	fbclock_binuptime(&bt);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 fbclock_microuptime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	fbclock_binuptime(&bt);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 fbclock_bintime(struct bintime *bt)
 {
 	struct timehands *th;
 	unsigned int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
-		*bt = th->th_offset;
+		*bt = th->th_bintime;
 		bintime_addx(bt, th->th_scale * tc_delta(th));
-		bintime_add(bt, &th->th_boottime);
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 fbclock_nanotime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	fbclock_bintime(&bt);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 fbclock_microtime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	fbclock_bintime(&bt);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 fbclock_getbinuptime(struct bintime *bt)
 {
 	struct timehands *th;
 	unsigned int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		*bt = th->th_offset;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 fbclock_getnanouptime(struct timespec *tsp)
 {
 	struct timehands *th;
 	unsigned int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		bintime2timespec(&th->th_offset, tsp);
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 fbclock_getmicrouptime(struct timeval *tvp)
 {
 	struct timehands *th;
 	unsigned int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		bintime2timeval(&th->th_offset, tvp);
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 fbclock_getbintime(struct bintime *bt)
 {
 	struct timehands *th;
 	unsigned int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
-		*bt = th->th_offset;
-		bintime_add(bt, &th->th_boottime);
+		*bt = th->th_bintime;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 fbclock_getnanotime(struct timespec *tsp)
 {
 	struct timehands *th;
 	unsigned int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		*tsp = th->th_nanotime;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 fbclock_getmicrotime(struct timeval *tvp)
 {
 	struct timehands *th;
 	unsigned int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		*tvp = th->th_microtime;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 #else /* !FFCLOCK */
 void
 binuptime(struct bintime *bt)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		*bt = th->th_offset;
 		bintime_addx(bt, th->th_scale * tc_delta(th));
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 nanouptime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	binuptime(&bt);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 microuptime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	binuptime(&bt);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 bintime(struct bintime *bt)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
-		*bt = th->th_offset;
+		*bt = th->th_bintime;
 		bintime_addx(bt, th->th_scale * tc_delta(th));
-		bintime_add(bt, &th->th_boottime);
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 nanotime(struct timespec *tsp)
 {
 	struct bintime bt;
 
 	bintime(&bt);
 	bintime2timespec(&bt, tsp);
 }
 
 void
 microtime(struct timeval *tvp)
 {
 	struct bintime bt;
 
 	bintime(&bt);
 	bintime2timeval(&bt, tvp);
 }
 
 void
 getbinuptime(struct bintime *bt)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		*bt = th->th_offset;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 getnanouptime(struct timespec *tsp)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		bintime2timespec(&th->th_offset, tsp);
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 getmicrouptime(struct timeval *tvp)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		bintime2timeval(&th->th_offset, tvp);
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 getbintime(struct bintime *bt)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
-		*bt = th->th_offset;
-		bintime_add(bt, &th->th_boottime);
+		*bt = th->th_bintime;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 getnanotime(struct timespec *tsp)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		*tsp = th->th_nanotime;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 void
 getmicrotime(struct timeval *tvp)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		*tvp = th->th_microtime;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 #endif /* FFCLOCK */
 
 void
 getboottime(struct timeval *boottime)
 {
 	struct bintime boottimebin;
 
 	getboottimebin(&boottimebin);
 	bintime2timeval(&boottimebin, boottime);
 }
 
 void
 getboottimebin(struct bintime *boottimebin)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		*boottimebin = th->th_boottime;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 #ifdef FFCLOCK
 /*
  * Support for feed-forward synchronization algorithms. This is heavily inspired
  * by the timehands mechanism but kept independent from it. *_windup() functions
  * have some connection to avoid accessing the timecounter hardware more than
  * necessary.
  */
 
 /* Feed-forward clock estimates kept updated by the synchronization daemon. */
 struct ffclock_estimate ffclock_estimate;
 struct bintime ffclock_boottime;	/* Feed-forward boot time estimate. */
 uint32_t ffclock_status;		/* Feed-forward clock status. */
 int8_t ffclock_updated;			/* New estimates are available. */
 struct mtx ffclock_mtx;			/* Mutex on ffclock_estimate. */
 
 struct fftimehands {
 	struct ffclock_estimate	cest;
 	struct bintime		tick_time;
 	struct bintime		tick_time_lerp;
 	ffcounter		tick_ffcount;
 	uint64_t		period_lerp;
 	volatile uint8_t	gen;
 	struct fftimehands	*next;
 };
 
 #define	NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x))
 
 static struct fftimehands ffth[10];
 static struct fftimehands *volatile fftimehands = ffth;
 
 static void
 ffclock_init(void)
 {
 	struct fftimehands *cur;
 	struct fftimehands *last;
 
 	memset(ffth, 0, sizeof(ffth));
 
 	last = ffth + NUM_ELEMENTS(ffth) - 1;
 	for (cur = ffth; cur < last; cur++)
 		cur->next = cur + 1;
 	last->next = ffth;
 
 	ffclock_updated = 0;
 	ffclock_status = FFCLOCK_STA_UNSYNC;
 	mtx_init(&ffclock_mtx, "ffclock lock", NULL, MTX_DEF);
 }
 
 /*
  * Reset the feed-forward clock estimates. Called from inittodr() to get things
  * kick started and uses the timecounter nominal frequency as a first period
  * estimate. Note: this function may be called several time just after boot.
  * Note: this is the only function that sets the value of boot time for the
  * monotonic (i.e. uptime) version of the feed-forward clock.
  */
 void
 ffclock_reset_clock(struct timespec *ts)
 {
 	struct timecounter *tc;
 	struct ffclock_estimate cest;
 
 	tc = timehands->th_counter;
 	memset(&cest, 0, sizeof(struct ffclock_estimate));
 
 	timespec2bintime(ts, &ffclock_boottime);
 	timespec2bintime(ts, &(cest.update_time));
 	ffclock_read_counter(&cest.update_ffcount);
 	cest.leapsec_next = 0;
 	cest.period = ((1ULL << 63) / tc->tc_frequency) << 1;
 	cest.errb_abs = 0;
 	cest.errb_rate = 0;
 	cest.status = FFCLOCK_STA_UNSYNC;
 	cest.leapsec_total = 0;
 	cest.leapsec = 0;
 
 	mtx_lock(&ffclock_mtx);
 	bcopy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate));
 	ffclock_updated = INT8_MAX;
 	mtx_unlock(&ffclock_mtx);
 
 	printf("ffclock reset: %s (%llu Hz), time = %ld.%09lu\n", tc->tc_name,
 	    (unsigned long long)tc->tc_frequency, (long)ts->tv_sec,
 	    (unsigned long)ts->tv_nsec);
 }
 
 /*
  * Sub-routine to convert a time interval measured in RAW counter units to time
  * in seconds stored in bintime format.
  * NOTE: bintime_mul requires u_int, but the value of the ffcounter may be
  * larger than the max value of u_int (on 32 bit architecture). Loop to consume
  * extra cycles.
  */
 static void
 ffclock_convert_delta(ffcounter ffdelta, uint64_t period, struct bintime *bt)
 {
 	struct bintime bt2;
 	ffcounter delta, delta_max;
 
 	delta_max = (1ULL << (8 * sizeof(unsigned int))) - 1;
 	bintime_clear(bt);
 	do {
 		if (ffdelta > delta_max)
 			delta = delta_max;
 		else
 			delta = ffdelta;
 		bt2.sec = 0;
 		bt2.frac = period;
 		bintime_mul(&bt2, (unsigned int)delta);
 		bintime_add(bt, &bt2);
 		ffdelta -= delta;
 	} while (ffdelta > 0);
 }
 
 /*
  * Update the fftimehands.
  * Push the tick ffcount and time(s) forward based on current clock estimate.
  * The conversion from ffcounter to bintime relies on the difference clock
  * principle, whose accuracy relies on computing small time intervals. If a new
  * clock estimate has been passed by the synchronisation daemon, make it
  * current, and compute the linear interpolation for monotonic time if needed.
  */
 static void
 ffclock_windup(unsigned int delta)
 {
 	struct ffclock_estimate *cest;
 	struct fftimehands *ffth;
 	struct bintime bt, gap_lerp;
 	ffcounter ffdelta;
 	uint64_t frac;
 	unsigned int polling;
 	uint8_t forward_jump, ogen;
 
 	/*
 	 * Pick the next timehand, copy current ffclock estimates and move tick
 	 * times and counter forward.
 	 */
 	forward_jump = 0;
 	ffth = fftimehands->next;
 	ogen = ffth->gen;
 	ffth->gen = 0;
 	cest = &ffth->cest;
 	bcopy(&fftimehands->cest, cest, sizeof(struct ffclock_estimate));
 	ffdelta = (ffcounter)delta;
 	ffth->period_lerp = fftimehands->period_lerp;
 
 	ffth->tick_time = fftimehands->tick_time;
 	ffclock_convert_delta(ffdelta, cest->period, &bt);
 	bintime_add(&ffth->tick_time, &bt);
 
 	ffth->tick_time_lerp = fftimehands->tick_time_lerp;
 	ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt);
 	bintime_add(&ffth->tick_time_lerp, &bt);
 
 	ffth->tick_ffcount = fftimehands->tick_ffcount + ffdelta;
 
 	/*
 	 * Assess the status of the clock, if the last update is too old, it is
 	 * likely the synchronisation daemon is dead and the clock is free
 	 * running.
 	 */
 	if (ffclock_updated == 0) {
 		ffdelta = ffth->tick_ffcount - cest->update_ffcount;
 		ffclock_convert_delta(ffdelta, cest->period, &bt);
 		if (bt.sec > 2 * FFCLOCK_SKM_SCALE)
 			ffclock_status |= FFCLOCK_STA_UNSYNC;
 	}
 
 	/*
 	 * If available, grab updated clock estimates and make them current.
 	 * Recompute time at this tick using the updated estimates. The clock
 	 * estimates passed the feed-forward synchronisation daemon may result
 	 * in time conversion that is not monotonically increasing (just after
 	 * the update). time_lerp is a particular linear interpolation over the
 	 * synchronisation algo polling period that ensures monotonicity for the
 	 * clock ids requesting it.
 	 */
 	if (ffclock_updated > 0) {
 		bcopy(&ffclock_estimate, cest, sizeof(struct ffclock_estimate));
 		ffdelta = ffth->tick_ffcount - cest->update_ffcount;
 		ffth->tick_time = cest->update_time;
 		ffclock_convert_delta(ffdelta, cest->period, &bt);
 		bintime_add(&ffth->tick_time, &bt);
 
 		/* ffclock_reset sets ffclock_updated to INT8_MAX */
 		if (ffclock_updated == INT8_MAX)
 			ffth->tick_time_lerp = ffth->tick_time;
 
 		if (bintime_cmp(&ffth->tick_time, &ffth->tick_time_lerp, >))
 			forward_jump = 1;
 		else
 			forward_jump = 0;
 
 		bintime_clear(&gap_lerp);
 		if (forward_jump) {
 			gap_lerp = ffth->tick_time;
 			bintime_sub(&gap_lerp, &ffth->tick_time_lerp);
 		} else {
 			gap_lerp = ffth->tick_time_lerp;
 			bintime_sub(&gap_lerp, &ffth->tick_time);
 		}
 
 		/*
 		 * The reset from the RTC clock may be far from accurate, and
 		 * reducing the gap between real time and interpolated time
 		 * could take a very long time if the interpolated clock insists
 		 * on strict monotonicity. The clock is reset under very strict
 		 * conditions (kernel time is known to be wrong and
 		 * synchronization daemon has been restarted recently.
 		 * ffclock_boottime absorbs the jump to ensure boot time is
 		 * correct and uptime functions stay consistent.
 		 */
 		if (((ffclock_status & FFCLOCK_STA_UNSYNC) == FFCLOCK_STA_UNSYNC) &&
 		    ((cest->status & FFCLOCK_STA_UNSYNC) == 0) &&
 		    ((cest->status & FFCLOCK_STA_WARMUP) == FFCLOCK_STA_WARMUP)) {
 			if (forward_jump)
 				bintime_add(&ffclock_boottime, &gap_lerp);
 			else
 				bintime_sub(&ffclock_boottime, &gap_lerp);
 			ffth->tick_time_lerp = ffth->tick_time;
 			bintime_clear(&gap_lerp);
 		}
 
 		ffclock_status = cest->status;
 		ffth->period_lerp = cest->period;
 
 		/*
 		 * Compute corrected period used for the linear interpolation of
 		 * time. The rate of linear interpolation is capped to 5000PPM
 		 * (5ms/s).
 		 */
 		if (bintime_isset(&gap_lerp)) {
 			ffdelta = cest->update_ffcount;
 			ffdelta -= fftimehands->cest.update_ffcount;
 			ffclock_convert_delta(ffdelta, cest->period, &bt);
 			polling = bt.sec;
 			bt.sec = 0;
 			bt.frac = 5000000 * (uint64_t)18446744073LL;
 			bintime_mul(&bt, polling);
 			if (bintime_cmp(&gap_lerp, &bt, >))
 				gap_lerp = bt;
 
 			/* Approximate 1 sec by 1-(1/2^64) to ease arithmetic */
 			frac = 0;
 			if (gap_lerp.sec > 0) {
 				frac -= 1;
 				frac /= ffdelta / gap_lerp.sec;
 			}
 			frac += gap_lerp.frac / ffdelta;
 
 			if (forward_jump)
 				ffth->period_lerp += frac;
 			else
 				ffth->period_lerp -= frac;
 		}
 
 		ffclock_updated = 0;
 	}
 	if (++ogen == 0)
 		ogen = 1;
 	ffth->gen = ogen;
 	fftimehands = ffth;
 }
 
 /*
  * Adjust the fftimehands when the timecounter is changed. Stating the obvious,
  * the old and new hardware counter cannot be read simultaneously. tc_windup()
  * does read the two counters 'back to back', but a few cycles are effectively
  * lost, and not accumulated in tick_ffcount. This is a fairly radical
  * operation for a feed-forward synchronization daemon, and it is its job to not
  * pushing irrelevant data to the kernel. Because there is no locking here,
  * simply force to ignore pending or next update to give daemon a chance to
  * realize the counter has changed.
  */
 static void
 ffclock_change_tc(struct timehands *th)
 {
 	struct fftimehands *ffth;
 	struct ffclock_estimate *cest;
 	struct timecounter *tc;
 	uint8_t ogen;
 
 	tc = th->th_counter;
 	ffth = fftimehands->next;
 	ogen = ffth->gen;
 	ffth->gen = 0;
 
 	cest = &ffth->cest;
 	bcopy(&(fftimehands->cest), cest, sizeof(struct ffclock_estimate));
 	cest->period = ((1ULL << 63) / tc->tc_frequency ) << 1;
 	cest->errb_abs = 0;
 	cest->errb_rate = 0;
 	cest->status |= FFCLOCK_STA_UNSYNC;
 
 	ffth->tick_ffcount = fftimehands->tick_ffcount;
 	ffth->tick_time_lerp = fftimehands->tick_time_lerp;
 	ffth->tick_time = fftimehands->tick_time;
 	ffth->period_lerp = cest->period;
 
 	/* Do not lock but ignore next update from synchronization daemon. */
 	ffclock_updated--;
 
 	if (++ogen == 0)
 		ogen = 1;
 	ffth->gen = ogen;
 	fftimehands = ffth;
 }
 
 /*
  * Retrieve feed-forward counter and time of last kernel tick.
  */
 void
 ffclock_last_tick(ffcounter *ffcount, struct bintime *bt, uint32_t flags)
 {
 	struct fftimehands *ffth;
 	uint8_t gen;
 
 	/*
 	 * No locking but check generation has not changed. Also need to make
 	 * sure ffdelta is positive, i.e. ffcount > tick_ffcount.
 	 */
 	do {
 		ffth = fftimehands;
 		gen = ffth->gen;
 		if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP)
 			*bt = ffth->tick_time_lerp;
 		else
 			*bt = ffth->tick_time;
 		*ffcount = ffth->tick_ffcount;
 	} while (gen == 0 || gen != ffth->gen);
 }
 
 /*
  * Absolute clock conversion. Low level function to convert ffcounter to
  * bintime. The ffcounter is converted using the current ffclock period estimate
  * or the "interpolated period" to ensure monotonicity.
  * NOTE: this conversion may have been deferred, and the clock updated since the
  * hardware counter has been read.
  */
 void
 ffclock_convert_abs(ffcounter ffcount, struct bintime *bt, uint32_t flags)
 {
 	struct fftimehands *ffth;
 	struct bintime bt2;
 	ffcounter ffdelta;
 	uint8_t gen;
 
 	/*
 	 * No locking but check generation has not changed. Also need to make
 	 * sure ffdelta is positive, i.e. ffcount > tick_ffcount.
 	 */
 	do {
 		ffth = fftimehands;
 		gen = ffth->gen;
 		if (ffcount > ffth->tick_ffcount)
 			ffdelta = ffcount - ffth->tick_ffcount;
 		else
 			ffdelta = ffth->tick_ffcount - ffcount;
 
 		if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) {
 			*bt = ffth->tick_time_lerp;
 			ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt2);
 		} else {
 			*bt = ffth->tick_time;
 			ffclock_convert_delta(ffdelta, ffth->cest.period, &bt2);
 		}
 
 		if (ffcount > ffth->tick_ffcount)
 			bintime_add(bt, &bt2);
 		else
 			bintime_sub(bt, &bt2);
 	} while (gen == 0 || gen != ffth->gen);
 }
 
 /*
  * Difference clock conversion.
  * Low level function to Convert a time interval measured in RAW counter units
  * into bintime. The difference clock allows measuring small intervals much more
  * reliably than the absolute clock.
  */
 void
 ffclock_convert_diff(ffcounter ffdelta, struct bintime *bt)
 {
 	struct fftimehands *ffth;
 	uint8_t gen;
 
 	/* No locking but check generation has not changed. */
 	do {
 		ffth = fftimehands;
 		gen = ffth->gen;
 		ffclock_convert_delta(ffdelta, ffth->cest.period, bt);
 	} while (gen == 0 || gen != ffth->gen);
 }
 
 /*
  * Access to current ffcounter value.
  */
 void
 ffclock_read_counter(ffcounter *ffcount)
 {
 	struct timehands *th;
 	struct fftimehands *ffth;
 	unsigned int gen, delta;
 
 	/*
 	 * ffclock_windup() called from tc_windup(), safe to rely on
 	 * th->th_generation only, for correct delta and ffcounter.
 	 */
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		ffth = fftimehands;
 		delta = tc_delta(th);
 		*ffcount = ffth->tick_ffcount;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 
 	*ffcount += delta;
 }
 
 void
 binuptime(struct bintime *bt)
 {
 
 	binuptime_fromclock(bt, sysclock_active);
 }
 
 void
 nanouptime(struct timespec *tsp)
 {
 
 	nanouptime_fromclock(tsp, sysclock_active);
 }
 
 void
 microuptime(struct timeval *tvp)
 {
 
 	microuptime_fromclock(tvp, sysclock_active);
 }
 
 void
 bintime(struct bintime *bt)
 {
 
 	bintime_fromclock(bt, sysclock_active);
 }
 
 void
 nanotime(struct timespec *tsp)
 {
 
 	nanotime_fromclock(tsp, sysclock_active);
 }
 
 void
 microtime(struct timeval *tvp)
 {
 
 	microtime_fromclock(tvp, sysclock_active);
 }
 
 void
 getbinuptime(struct bintime *bt)
 {
 
 	getbinuptime_fromclock(bt, sysclock_active);
 }
 
 void
 getnanouptime(struct timespec *tsp)
 {
 
 	getnanouptime_fromclock(tsp, sysclock_active);
 }
 
 void
 getmicrouptime(struct timeval *tvp)
 {
 
 	getmicrouptime_fromclock(tvp, sysclock_active);
 }
 
 void
 getbintime(struct bintime *bt)
 {
 
 	getbintime_fromclock(bt, sysclock_active);
 }
 
 void
 getnanotime(struct timespec *tsp)
 {
 
 	getnanotime_fromclock(tsp, sysclock_active);
 }
 
 void
 getmicrotime(struct timeval *tvp)
 {
 
 	getmicrouptime_fromclock(tvp, sysclock_active);
 }
 
 #endif /* FFCLOCK */
 
 /*
  * This is a clone of getnanotime and used for walltimestamps.
  * The dtrace_ prefix prevents fbt from creating probes for
  * it so walltimestamp can be safely used in all fbt probes.
  */
 void
 dtrace_getnanotime(struct timespec *tsp)
 {
 	struct timehands *th;
 	u_int gen;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		*tsp = th->th_nanotime;
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 }
 
 /*
  * System clock currently providing time to the system. Modifiable via sysctl
  * when the FFCLOCK option is defined.
  */
 int sysclock_active = SYSCLOCK_FBCK;
 
 /* Internal NTP status and error estimates. */
 extern int time_status;
 extern long time_esterror;
 
 /*
  * Take a snapshot of sysclock data which can be used to compare system clocks
  * and generate timestamps after the fact.
  */
 void
 sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast)
 {
 	struct fbclock_info *fbi;
 	struct timehands *th;
 	struct bintime bt;
 	unsigned int delta, gen;
 #ifdef FFCLOCK
 	ffcounter ffcount;
 	struct fftimehands *ffth;
 	struct ffclock_info *ffi;
 	struct ffclock_estimate cest;
 
 	ffi = &clock_snap->ff_info;
 #endif
 
 	fbi = &clock_snap->fb_info;
 	delta = 0;
 
 	do {
 		th = timehands;
 		gen = atomic_load_acq_int(&th->th_generation);
 		fbi->th_scale = th->th_scale;
 		fbi->tick_time = th->th_offset;
 #ifdef FFCLOCK
 		ffth = fftimehands;
 		ffi->tick_time = ffth->tick_time_lerp;
 		ffi->tick_time_lerp = ffth->tick_time_lerp;
 		ffi->period = ffth->cest.period;
 		ffi->period_lerp = ffth->period_lerp;
 		clock_snap->ffcount = ffth->tick_ffcount;
 		cest = ffth->cest;
 #endif
 		if (!fast)
 			delta = tc_delta(th);
 		atomic_thread_fence_acq();
 	} while (gen == 0 || gen != th->th_generation);
 
 	clock_snap->delta = delta;
 	clock_snap->sysclock_active = sysclock_active;
 
 	/* Record feedback clock status and error. */
 	clock_snap->fb_info.status = time_status;
 	/* XXX: Very crude estimate of feedback clock error. */
 	bt.sec = time_esterror / 1000000;
 	bt.frac = ((time_esterror - bt.sec) * 1000000) *
 	    (uint64_t)18446744073709ULL;
 	clock_snap->fb_info.error = bt;
 
 #ifdef FFCLOCK
 	if (!fast)
 		clock_snap->ffcount += delta;
 
 	/* Record feed-forward clock leap second adjustment. */
 	ffi->leapsec_adjustment = cest.leapsec_total;
 	if (clock_snap->ffcount > cest.leapsec_next)
 		ffi->leapsec_adjustment -= cest.leapsec;
 
 	/* Record feed-forward clock status and error. */
 	clock_snap->ff_info.status = cest.status;
 	ffcount = clock_snap->ffcount - cest.update_ffcount;
 	ffclock_convert_delta(ffcount, cest.period, &bt);
 	/* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s]. */
 	bintime_mul(&bt, cest.errb_rate * (uint64_t)18446744073709ULL);
 	/* 18446744073 = int(2^64 / 1e9), since err_abs in [ns]. */
 	bintime_addx(&bt, cest.errb_abs * (uint64_t)18446744073ULL);
 	clock_snap->ff_info.error = bt;
 #endif
 }
 
 /*
  * Convert a sysclock snapshot into a struct bintime based on the specified
  * clock source and flags.
  */
 int
 sysclock_snap2bintime(struct sysclock_snap *cs, struct bintime *bt,
     int whichclock, uint32_t flags)
 {
 	struct bintime boottimebin;
 #ifdef FFCLOCK
 	struct bintime bt2;
 	uint64_t period;
 #endif
 
 	switch (whichclock) {
 	case SYSCLOCK_FBCK:
 		*bt = cs->fb_info.tick_time;
 
 		/* If snapshot was created with !fast, delta will be >0. */
 		if (cs->delta > 0)
 			bintime_addx(bt, cs->fb_info.th_scale * cs->delta);
 
 		if ((flags & FBCLOCK_UPTIME) == 0) {
 			getboottimebin(&boottimebin);
 			bintime_add(bt, &boottimebin);
 		}
 		break;
 #ifdef FFCLOCK
 	case SYSCLOCK_FFWD:
 		if (flags & FFCLOCK_LERP) {
 			*bt = cs->ff_info.tick_time_lerp;
 			period = cs->ff_info.period_lerp;
 		} else {
 			*bt = cs->ff_info.tick_time;
 			period = cs->ff_info.period;
 		}
 
 		/* If snapshot was created with !fast, delta will be >0. */
 		if (cs->delta > 0) {
 			ffclock_convert_delta(cs->delta, period, &bt2);
 			bintime_add(bt, &bt2);
 		}
 
 		/* Leap second adjustment. */
 		if (flags & FFCLOCK_LEAPSEC)
 			bt->sec -= cs->ff_info.leapsec_adjustment;
 
 		/* Boot time adjustment, for uptime/monotonic clocks. */
 		if (flags & FFCLOCK_UPTIME)
 			bintime_sub(bt, &ffclock_boottime);
 		break;
 #endif
 	default:
 		return (EINVAL);
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * Initialize a new timecounter and possibly use it.
  */
 void
 tc_init(struct timecounter *tc)
 {
 	u_int u;
 	struct sysctl_oid *tc_root;
 
 	u = tc->tc_frequency / tc->tc_counter_mask;
 	/* XXX: We need some margin here, 10% is a guess */
 	u *= 11;
 	u /= 10;
 	if (u > hz && tc->tc_quality >= 0) {
 		tc->tc_quality = -2000;
 		if (bootverbose) {
 			printf("Timecounter \"%s\" frequency %ju Hz",
 			    tc->tc_name, (uintmax_t)tc->tc_frequency);
 			printf(" -- Insufficient hz, needs at least %u\n", u);
 		}
 	} else if (tc->tc_quality >= 0 || bootverbose) {
 		printf("Timecounter \"%s\" frequency %ju Hz quality %d\n",
 		    tc->tc_name, (uintmax_t)tc->tc_frequency,
 		    tc->tc_quality);
 	}
 
 	tc->tc_next = timecounters;
 	timecounters = tc;
 	/*
 	 * Set up sysctl tree for this counter.
 	 */
 	tc_root = SYSCTL_ADD_NODE(NULL,
 	    SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name,
 	    CTLFLAG_RW, 0, "timecounter description");
 	SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0,
 	    "mask for implemented bits");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "counter", CTLTYPE_UINT | CTLFLAG_RD, tc, sizeof(*tc),
 	    sysctl_kern_timecounter_get, "IU", "current timecounter value");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "frequency", CTLTYPE_U64 | CTLFLAG_RD, tc, sizeof(*tc),
 	     sysctl_kern_timecounter_freq, "QU", "timecounter frequency");
 	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
 	    "quality", CTLFLAG_RD, &(tc->tc_quality), 0,
 	    "goodness of time counter");
 	/*
 	 * Do not automatically switch if the current tc was specifically
 	 * chosen.  Never automatically use a timecounter with negative quality.
 	 * Even though we run on the dummy counter, switching here may be
 	 * worse since this timecounter may not be monotonic.
 	 */
 	if (tc_chosen)
 		return;
 	if (tc->tc_quality < 0)
 		return;
 	if (tc->tc_quality < timecounter->tc_quality)
 		return;
 	if (tc->tc_quality == timecounter->tc_quality &&
 	    tc->tc_frequency < timecounter->tc_frequency)
 		return;
 	(void)tc->tc_get_timecount(tc);
 	(void)tc->tc_get_timecount(tc);
 	timecounter = tc;
 }
 
 /* Report the frequency of the current timecounter. */
 uint64_t
 tc_getfrequency(void)
 {
 
 	return (timehands->th_counter->tc_frequency);
 }
 
 static struct mtx tc_setclock_mtx;
 MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN);
 
 /*
  * Step our concept of UTC.  This is done by modifying our estimate of
  * when we booted.
  */
 void
 tc_setclock(struct timespec *ts)
 {
 	struct timespec tbef, taft;
 	struct bintime bt, bt2;
 
 	timespec2bintime(ts, &bt);
 	nanotime(&tbef);
 	mtx_lock_spin(&tc_setclock_mtx);
 	cpu_tick_calibrate(1);
 	binuptime(&bt2);
 	bintime_sub(&bt, &bt2);
 
 	/* XXX fiddle all the little crinkly bits around the fiords... */
 	tc_windup(&bt);
 	mtx_unlock_spin(&tc_setclock_mtx);
 	if (timestepwarnings) {
 		nanotime(&taft);
 		log(LOG_INFO,
 		    "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n",
 		    (intmax_t)tbef.tv_sec, tbef.tv_nsec,
 		    (intmax_t)taft.tv_sec, taft.tv_nsec,
 		    (intmax_t)ts->tv_sec, ts->tv_nsec);
 	}
 }
 
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
  * timecounter and/or do seconds processing in NTP.  Slightly magic.
  */
 static void
 tc_windup(struct bintime *new_boottimebin)
 {
 	struct bintime bt;
 	struct timehands *th, *tho;
 	uint64_t scale;
 	u_int delta, ncount, ogen;
 	int i;
 	time_t t;
 
 	/*
 	 * Make the next timehands a copy of the current one, but do
 	 * not overwrite the generation or next pointer.  While we
 	 * update the contents, the generation must be zero.  We need
 	 * to ensure that the zero generation is visible before the
 	 * data updates become visible, which requires release fence.
 	 * For similar reasons, re-reading of the generation after the
 	 * data is read should use acquire fence.
 	 */
 	tho = timehands;
 	th = tho->th_next;
 	ogen = th->th_generation;
 	th->th_generation = 0;
 	atomic_thread_fence_rel();
 	bcopy(tho, th, offsetof(struct timehands, th_generation));
 	if (new_boottimebin != NULL)
 		th->th_boottime = *new_boottimebin;
 
 	/*
 	 * Capture a timecounter delta on the current timecounter and if
 	 * changing timecounters, a counter value from the new timecounter.
 	 * Update the offset fields accordingly.
 	 */
 	delta = tc_delta(th);
 	if (th->th_counter != timecounter)
 		ncount = timecounter->tc_get_timecount(timecounter);
 	else
 		ncount = 0;
 #ifdef FFCLOCK
 	ffclock_windup(delta);
 #endif
 	th->th_offset_count += delta;
 	th->th_offset_count &= th->th_counter->tc_counter_mask;
 	while (delta > th->th_counter->tc_frequency) {
 		/* Eat complete unadjusted seconds. */
 		delta -= th->th_counter->tc_frequency;
 		th->th_offset.sec++;
 	}
 	if ((delta > th->th_counter->tc_frequency / 2) &&
 	    (th->th_scale * delta < ((uint64_t)1 << 63))) {
 		/* The product th_scale * delta just barely overflows. */
 		th->th_offset.sec++;
 	}
 	bintime_addx(&th->th_offset, th->th_scale * delta);
 
 	/*
 	 * Hardware latching timecounters may not generate interrupts on
 	 * PPS events, so instead we poll them.  There is a finite risk that
 	 * the hardware might capture a count which is later than the one we
 	 * got above, and therefore possibly in the next NTP second which might
 	 * have a different rate than the current NTP second.  It doesn't
 	 * matter in practice.
 	 */
 	if (tho->th_counter->tc_poll_pps)
 		tho->th_counter->tc_poll_pps(tho->th_counter);
 
 	/*
 	 * Deal with NTP second processing.  The for loop normally
 	 * iterates at most once, but in extreme situations it might
 	 * keep NTP sane if timeouts are not run for several seconds.
 	 * At boot, the time step can be large when the TOD hardware
 	 * has been read, so on really large steps, we call
 	 * ntp_update_second only twice.  We need to call it twice in
 	 * case we missed a leap second.
 	 */
 	bt = th->th_offset;
 	bintime_add(&bt, &th->th_boottime);
 	i = bt.sec - tho->th_microtime.tv_sec;
 	if (i > LARGE_STEP)
 		i = 2;
 	for (; i > 0; i--) {
 		t = bt.sec;
 		ntp_update_second(&th->th_adjustment, &bt.sec);
 		if (bt.sec != t)
 			th->th_boottime.sec += bt.sec - t;
 	}
+	th->th_bintime = th->th_offset;
+	bintime_add(&th->th_bintime, &th->th_boottime);
 	/* Update the UTC timestamps used by the get*() functions. */
 	/* XXX shouldn't do this here.  Should force non-`get' versions. */
 	bintime2timeval(&bt, &th->th_microtime);
 	bintime2timespec(&bt, &th->th_nanotime);
 
 	/* Now is a good time to change timecounters. */
 	if (th->th_counter != timecounter) {
 #ifndef __arm__
 		if ((timecounter->tc_flags & TC_FLAGS_C2STOP) != 0)
 			cpu_disable_c2_sleep++;
 		if ((th->th_counter->tc_flags & TC_FLAGS_C2STOP) != 0)
 			cpu_disable_c2_sleep--;
 #endif
 		th->th_counter = timecounter;
 		th->th_offset_count = ncount;
 		tc_min_ticktock_freq = max(1, timecounter->tc_frequency /
 		    (((uint64_t)timecounter->tc_counter_mask + 1) / 3));
 #ifdef FFCLOCK
 		ffclock_change_tc(th);
 #endif
 	}
 
 	/*-
 	 * Recalculate the scaling factor.  We want the number of 1/2^64
 	 * fractions of a second per period of the hardware counter, taking
 	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
 	 * processing provides us with.
 	 *
 	 * The th_adjustment is nanoseconds per second with 32 bit binary
 	 * fraction and we want 64 bit binary fraction of second:
 	 *
 	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
 	 *
 	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
 	 * we can only multiply by about 850 without overflowing, that
 	 * leaves no suitably precise fractions for multiply before divide.
 	 *
 	 * Divide before multiply with a fraction of 2199/512 results in a
 	 * systematic undercompensation of 10PPM of th_adjustment.  On a
 	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
  	 *
 	 * We happily sacrifice the lowest of the 64 bits of our result
 	 * to the goddess of code clarity.
 	 *
 	 */
 	scale = (uint64_t)1 << 63;
 	scale += (th->th_adjustment / 1024) * 2199;
 	scale /= th->th_counter->tc_frequency;
 	th->th_scale = scale * 2;
 
 	/*
 	 * Now that the struct timehands is again consistent, set the new
 	 * generation number, making sure to not make it zero.
 	 */
 	if (++ogen == 0)
 		ogen = 1;
 	atomic_store_rel_int(&th->th_generation, ogen);
 
 	/* Go live with the new struct timehands. */
 #ifdef FFCLOCK
 	switch (sysclock_active) {
 	case SYSCLOCK_FBCK:
 #endif
 		time_second = th->th_microtime.tv_sec;
 		time_uptime = th->th_offset.sec;
 #ifdef FFCLOCK
 		break;
 	case SYSCLOCK_FFWD:
 		time_second = fftimehands->tick_time_lerp.sec;
 		time_uptime = fftimehands->tick_time_lerp.sec - ffclock_boottime.sec;
 		break;
 	}
 #endif
 
 	timehands = th;
 	timekeep_push_vdso();
 }
 
 /* Report or change the active timecounter hardware. */
 static int
 sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS)
 {
 	char newname[32];
 	struct timecounter *newtc, *tc;
 	int error;
 
 	tc = timecounter;
 	strlcpy(newname, tc->tc_name, sizeof(newname));
 
 	error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	/* Record that the tc in use now was specifically chosen. */
 	tc_chosen = 1;
 	if (strcmp(newname, tc->tc_name) == 0)
 		return (0);
 	for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
 		if (strcmp(newname, newtc->tc_name) != 0)
 			continue;
 
 		/* Warm up new timecounter. */
 		(void)newtc->tc_get_timecount(newtc);
 		(void)newtc->tc_get_timecount(newtc);
 
 		timecounter = newtc;
 
 		/*
 		 * The vdso timehands update is deferred until the next
 		 * 'tc_windup()'.
 		 *
 		 * This is prudent given that 'timekeep_push_vdso()' does not
 		 * use any locking and that it can be called in hard interrupt
 		 * context via 'tc_windup()'.
 		 */
 		return (0);
 	}
 	return (EINVAL);
 }
 
 SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW,
     0, 0, sysctl_kern_timecounter_hardware, "A",
     "Timecounter hardware selected");
 
 
 /* Report the available timecounter hardware. */
 static int
 sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sb;
 	struct timecounter *tc;
 	int error;
 
 	sbuf_new_for_sysctl(&sb, NULL, 0, req);
 	for (tc = timecounters; tc != NULL; tc = tc->tc_next) {
 		if (tc != timecounters)
 			sbuf_putc(&sb, ' ');
 		sbuf_printf(&sb, "%s(%d)", tc->tc_name, tc->tc_quality);
 	}
 	error = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error);
 }
 
 SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, CTLTYPE_STRING | CTLFLAG_RD,
     0, 0, sysctl_kern_timecounter_choice, "A", "Timecounter hardware detected");
 
 /*
  * RFC 2783 PPS-API implementation.
  */
 
 /*
  *  Return true if the driver is aware of the abi version extensions in the
  *  pps_state structure, and it supports at least the given abi version number.
  */
 static inline int
 abi_aware(struct pps_state *pps, int vers)
 {
 
 	return ((pps->kcmode & KCMODE_ABIFLAG) && pps->driver_abi >= vers);
 }
 
 static int
 pps_fetch(struct pps_fetch_args *fapi, struct pps_state *pps)
 {
 	int err, timo;
 	pps_seq_t aseq, cseq;
 	struct timeval tv;
 
 	if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
 		return (EINVAL);
 
 	/*
 	 * If no timeout is requested, immediately return whatever values were
 	 * most recently captured.  If timeout seconds is -1, that's a request
 	 * to block without a timeout.  WITNESS won't let us sleep forever
 	 * without a lock (we really don't need a lock), so just repeatedly
 	 * sleep a long time.
 	 */
 	if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) {
 		if (fapi->timeout.tv_sec == -1)
 			timo = 0x7fffffff;
 		else {
 			tv.tv_sec = fapi->timeout.tv_sec;
 			tv.tv_usec = fapi->timeout.tv_nsec / 1000;
 			timo = tvtohz(&tv);
 		}
 		aseq = pps->ppsinfo.assert_sequence;
 		cseq = pps->ppsinfo.clear_sequence;
 		while (aseq == pps->ppsinfo.assert_sequence &&
 		    cseq == pps->ppsinfo.clear_sequence) {
 			if (abi_aware(pps, 1) && pps->driver_mtx != NULL) {
 				if (pps->flags & PPSFLAG_MTX_SPIN) {
 					err = msleep_spin(pps, pps->driver_mtx,
 					    "ppsfch", timo);
 				} else {
 					err = msleep(pps, pps->driver_mtx, PCATCH,
 					    "ppsfch", timo);
 				}
 			} else {
 				err = tsleep(pps, PCATCH, "ppsfch", timo);
 			}
 			if (err == EWOULDBLOCK) {
 				if (fapi->timeout.tv_sec == -1) {
 					continue;
 				} else {
 					return (ETIMEDOUT);
 				}
 			} else if (err != 0) {
 				return (err);
 			}
 		}
 	}
 
 	pps->ppsinfo.current_mode = pps->ppsparam.mode;
 	fapi->pps_info_buf = pps->ppsinfo;
 
 	return (0);
 }
 
 int
 pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
 {
 	pps_params_t *app;
 	struct pps_fetch_args *fapi;
 #ifdef FFCLOCK
 	struct pps_fetch_ffc_args *fapi_ffc;
 #endif
 #ifdef PPS_SYNC
 	struct pps_kcbind_args *kapi;
 #endif
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl"));
 	switch (cmd) {
 	case PPS_IOC_CREATE:
 		return (0);
 	case PPS_IOC_DESTROY:
 		return (0);
 	case PPS_IOC_SETPARAMS:
 		app = (pps_params_t *)data;
 		if (app->mode & ~pps->ppscap)
 			return (EINVAL);
 #ifdef FFCLOCK
 		/* Ensure only a single clock is selected for ffc timestamp. */
 		if ((app->mode & PPS_TSCLK_MASK) == PPS_TSCLK_MASK)
 			return (EINVAL);
 #endif
 		pps->ppsparam = *app;
 		return (0);
 	case PPS_IOC_GETPARAMS:
 		app = (pps_params_t *)data;
 		*app = pps->ppsparam;
 		app->api_version = PPS_API_VERS_1;
 		return (0);
 	case PPS_IOC_GETCAP:
 		*(int*)data = pps->ppscap;
 		return (0);
 	case PPS_IOC_FETCH:
 		fapi = (struct pps_fetch_args *)data;
 		return (pps_fetch(fapi, pps));
 #ifdef FFCLOCK
 	case PPS_IOC_FETCH_FFCOUNTER:
 		fapi_ffc = (struct pps_fetch_ffc_args *)data;
 		if (fapi_ffc->tsformat && fapi_ffc->tsformat !=
 		    PPS_TSFMT_TSPEC)
 			return (EINVAL);
 		if (fapi_ffc->timeout.tv_sec || fapi_ffc->timeout.tv_nsec)
 			return (EOPNOTSUPP);
 		pps->ppsinfo_ffc.current_mode = pps->ppsparam.mode;
 		fapi_ffc->pps_info_buf_ffc = pps->ppsinfo_ffc;
 		/* Overwrite timestamps if feedback clock selected. */
 		switch (pps->ppsparam.mode & PPS_TSCLK_MASK) {
 		case PPS_TSCLK_FBCK:
 			fapi_ffc->pps_info_buf_ffc.assert_timestamp =
 			    pps->ppsinfo.assert_timestamp;
 			fapi_ffc->pps_info_buf_ffc.clear_timestamp =
 			    pps->ppsinfo.clear_timestamp;
 			break;
 		case PPS_TSCLK_FFWD:
 			break;
 		default:
 			break;
 		}
 		return (0);
 #endif /* FFCLOCK */
 	case PPS_IOC_KCBIND:
 #ifdef PPS_SYNC
 		kapi = (struct pps_kcbind_args *)data;
 		/* XXX Only root should be able to do this */
 		if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
 			return (EINVAL);
 		if (kapi->kernel_consumer != PPS_KC_HARDPPS)
 			return (EINVAL);
 		if (kapi->edge & ~pps->ppscap)
 			return (EINVAL);
 		pps->kcmode = (kapi->edge & KCMODE_EDGEMASK) |
 		    (pps->kcmode & KCMODE_ABIFLAG);
 		return (0);
 #else
 		return (EOPNOTSUPP);
 #endif
 	default:
 		return (ENOIOCTL);
 	}
 }
 
 void
 pps_init(struct pps_state *pps)
 {
 	pps->ppscap |= PPS_TSFMT_TSPEC | PPS_CANWAIT;
 	if (pps->ppscap & PPS_CAPTUREASSERT)
 		pps->ppscap |= PPS_OFFSETASSERT;
 	if (pps->ppscap & PPS_CAPTURECLEAR)
 		pps->ppscap |= PPS_OFFSETCLEAR;
 #ifdef FFCLOCK
 	pps->ppscap |= PPS_TSCLK_MASK;
 #endif
 	pps->kcmode &= ~KCMODE_ABIFLAG;
 }
 
 void
 pps_init_abi(struct pps_state *pps)
 {
 
 	pps_init(pps);
 	if (pps->driver_abi > 0) {
 		pps->kcmode |= KCMODE_ABIFLAG;
 		pps->kernel_abi = PPS_ABI_VERSION;
 	}
 }
 
 void
 pps_capture(struct pps_state *pps)
 {
 	struct timehands *th;
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_capture"));
 	th = timehands;
 	pps->capgen = atomic_load_acq_int(&th->th_generation);
 	pps->capth = th;
 #ifdef FFCLOCK
 	pps->capffth = fftimehands;
 #endif
 	pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
 	atomic_thread_fence_acq();
 	if (pps->capgen != th->th_generation)
 		pps->capgen = 0;
 }
 
 void
 pps_event(struct pps_state *pps, int event)
 {
 	struct bintime bt;
 	struct timespec ts, *tsp, *osp;
 	u_int tcount, *pcount;
 	int foff;
 	pps_seq_t *pseq;
 #ifdef FFCLOCK
 	struct timespec *tsp_ffc;
 	pps_seq_t *pseq_ffc;
 	ffcounter *ffcount;
 #endif
 #ifdef PPS_SYNC
 	int fhard;
 #endif
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_event"));
 	/* Nothing to do if not currently set to capture this event type. */
 	if ((event & pps->ppsparam.mode) == 0)
 		return;
 	/* If the timecounter was wound up underneath us, bail out. */
 	if (pps->capgen == 0 || pps->capgen !=
 	    atomic_load_acq_int(&pps->capth->th_generation))
 		return;
 
 	/* Things would be easier with arrays. */
 	if (event == PPS_CAPTUREASSERT) {
 		tsp = &pps->ppsinfo.assert_timestamp;
 		osp = &pps->ppsparam.assert_offset;
 		foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
 #ifdef PPS_SYNC
 		fhard = pps->kcmode & PPS_CAPTUREASSERT;
 #endif
 		pcount = &pps->ppscount[0];
 		pseq = &pps->ppsinfo.assert_sequence;
 #ifdef FFCLOCK
 		ffcount = &pps->ppsinfo_ffc.assert_ffcount;
 		tsp_ffc = &pps->ppsinfo_ffc.assert_timestamp;
 		pseq_ffc = &pps->ppsinfo_ffc.assert_sequence;
 #endif
 	} else {
 		tsp = &pps->ppsinfo.clear_timestamp;
 		osp = &pps->ppsparam.clear_offset;
 		foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
 #ifdef PPS_SYNC
 		fhard = pps->kcmode & PPS_CAPTURECLEAR;
 #endif
 		pcount = &pps->ppscount[1];
 		pseq = &pps->ppsinfo.clear_sequence;
 #ifdef FFCLOCK
 		ffcount = &pps->ppsinfo_ffc.clear_ffcount;
 		tsp_ffc = &pps->ppsinfo_ffc.clear_timestamp;
 		pseq_ffc = &pps->ppsinfo_ffc.clear_sequence;
 #endif
 	}
 
 	/*
 	 * If the timecounter changed, we cannot compare the count values, so
 	 * we have to drop the rest of the PPS-stuff until the next event.
 	 */
 	if (pps->ppstc != pps->capth->th_counter) {
 		pps->ppstc = pps->capth->th_counter;
 		*pcount = pps->capcount;
 		pps->ppscount[2] = pps->capcount;
 		return;
 	}
 
 	/* Convert the count to a timespec. */
 	tcount = pps->capcount - pps->capth->th_offset_count;
 	tcount &= pps->capth->th_counter->tc_counter_mask;
-	bt = pps->capth->th_offset;
+	bt = pps->capth->th_bintime;
 	bintime_addx(&bt, pps->capth->th_scale * tcount);
-	bintime_add(&bt, &pps->capth->th_boottime);
 	bintime2timespec(&bt, &ts);
 
 	/* If the timecounter was wound up underneath us, bail out. */
 	atomic_thread_fence_acq();
 	if (pps->capgen != pps->capth->th_generation)
 		return;
 
 	*pcount = pps->capcount;
 	(*pseq)++;
 	*tsp = ts;
 
 	if (foff) {
 		timespecadd(tsp, osp);
 		if (tsp->tv_nsec < 0) {
 			tsp->tv_nsec += 1000000000;
 			tsp->tv_sec -= 1;
 		}
 	}
 
 #ifdef FFCLOCK
 	*ffcount = pps->capffth->tick_ffcount + tcount;
 	bt = pps->capffth->tick_time;
 	ffclock_convert_delta(tcount, pps->capffth->cest.period, &bt);
 	bintime_add(&bt, &pps->capffth->tick_time);
 	bintime2timespec(&bt, &ts);
 	(*pseq_ffc)++;
 	*tsp_ffc = ts;
 #endif
 
 #ifdef PPS_SYNC
 	if (fhard) {
 		uint64_t scale;
 
 		/*
 		 * Feed the NTP PLL/FLL.
 		 * The FLL wants to know how many (hardware) nanoseconds
 		 * elapsed since the previous event.
 		 */
 		tcount = pps->capcount - pps->ppscount[2];
 		pps->ppscount[2] = pps->capcount;
 		tcount &= pps->capth->th_counter->tc_counter_mask;
 		scale = (uint64_t)1 << 63;
 		scale /= pps->capth->th_counter->tc_frequency;
 		scale *= 2;
 		bt.sec = 0;
 		bt.frac = 0;
 		bintime_addx(&bt, scale * tcount);
 		bintime2timespec(&bt, &ts);
 		hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
 	}
 #endif
 
 	/* Wakeup anyone sleeping in pps_fetch().  */
 	wakeup(pps);
 }
 
 /*
  * Timecounters need to be updated every so often to prevent the hardware
  * counter from overflowing.  Updating also recalculates the cached values
  * used by the get*() family of functions, so their precision depends on
  * the update frequency.
  */
 
 static int tc_tick;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0,
     "Approximate number of hardclock ticks in a millisecond");
 
 void
 tc_ticktock(int cnt)
 {
 	static int count;
 
 	if (mtx_trylock_spin(&tc_setclock_mtx)) {
 		count += cnt;
 		if (count >= tc_tick) {
 			count = 0;
 			tc_windup(NULL);
 		}
 		mtx_unlock_spin(&tc_setclock_mtx);
 	}
 }
 
 static void __inline
 tc_adjprecision(void)
 {
 	int t;
 
 	if (tc_timepercentage > 0) {
 		t = (99 + tc_timepercentage) / tc_timepercentage;
 		tc_precexp = fls(t + (t >> 1)) - 1;
 		FREQ2BT(hz / tc_tick, &bt_timethreshold);
 		FREQ2BT(hz, &bt_tickthreshold);
 		bintime_shift(&bt_timethreshold, tc_precexp);
 		bintime_shift(&bt_tickthreshold, tc_precexp);
 	} else {
 		tc_precexp = 31;
 		bt_timethreshold.sec = INT_MAX;
 		bt_timethreshold.frac = ~(uint64_t)0;
 		bt_tickthreshold = bt_timethreshold;
 	}
 	sbt_timethreshold = bttosbt(bt_timethreshold);
 	sbt_tickthreshold = bttosbt(bt_tickthreshold);
 }
 
 static int
 sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = tc_timepercentage;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	tc_timepercentage = val;
 	if (cold)
 		goto done;
 	tc_adjprecision();
 done:
 	return (0);
 }
 
 static void
 inittimecounter(void *dummy)
 {
 	u_int p;
 	int tick_rate;
 
 	/*
 	 * Set the initial timeout to
 	 * max(1, <approx. number of hardclock ticks in a millisecond>).
 	 * People should probably not use the sysctl to set the timeout
 	 * to smaller than its initial value, since that value is the
 	 * smallest reasonable one.  If they want better timestamps they
 	 * should use the non-"get"* functions.
 	 */
 	if (hz > 1000)
 		tc_tick = (hz + 500) / 1000;
 	else
 		tc_tick = 1;
 	tc_adjprecision();
 	FREQ2BT(hz, &tick_bt);
 	tick_sbt = bttosbt(tick_bt);
 	tick_rate = hz / tc_tick;
 	FREQ2BT(tick_rate, &tc_tick_bt);
 	tc_tick_sbt = bttosbt(tc_tick_bt);
 	p = (tc_tick * 1000000) / hz;
 	printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
 
 #ifdef FFCLOCK
 	ffclock_init();
 #endif
 	/* warm up new timecounter (again) and get rolling. */
 	(void)timecounter->tc_get_timecount(timecounter);
 	(void)timecounter->tc_get_timecount(timecounter);
 	mtx_lock_spin(&tc_setclock_mtx);
 	tc_windup(NULL);
 	mtx_unlock_spin(&tc_setclock_mtx);
 }
 
 SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL);
 
 /* Cpu tick handling -------------------------------------------------*/
 
 static int cpu_tick_variable;
 static uint64_t	cpu_tick_frequency;
 
 static DPCPU_DEFINE(uint64_t, tc_cpu_ticks_base);
 static DPCPU_DEFINE(unsigned, tc_cpu_ticks_last);
 
 static uint64_t
 tc_cpu_ticks(void)
 {
 	struct timecounter *tc;
 	uint64_t res, *base;
 	unsigned u, *last;
 
 	critical_enter();
 	base = DPCPU_PTR(tc_cpu_ticks_base);
 	last = DPCPU_PTR(tc_cpu_ticks_last);
 	tc = timehands->th_counter;
 	u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
 	if (u < *last)
 		*base += (uint64_t)tc->tc_counter_mask + 1;
 	*last = u;
 	res = u + *base;
 	critical_exit();
 	return (res);
 }
 
 void
 cpu_tick_calibration(void)
 {
 	static time_t last_calib;
 
 	if (time_uptime != last_calib && !(time_uptime & 0xf)) {
 		cpu_tick_calibrate(0);
 		last_calib = time_uptime;
 	}
 }
 
 /*
  * This function gets called every 16 seconds on only one designated
  * CPU in the system from hardclock() via cpu_tick_calibration()().
  *
  * Whenever the real time clock is stepped we get called with reset=1
  * to make sure we handle suspend/resume and similar events correctly.
  */
 
 static void
 cpu_tick_calibrate(int reset)
 {
 	static uint64_t c_last;
 	uint64_t c_this, c_delta;
 	static struct bintime  t_last;
 	struct bintime t_this, t_delta;
 	uint32_t divi;
 
 	if (reset) {
 		/* The clock was stepped, abort & reset */
 		t_last.sec = 0;
 		return;
 	}
 
 	/* we don't calibrate fixed rate cputicks */
 	if (!cpu_tick_variable)
 		return;
 
 	getbinuptime(&t_this);
 	c_this = cpu_ticks();
 	if (t_last.sec != 0) {
 		c_delta = c_this - c_last;
 		t_delta = t_this;
 		bintime_sub(&t_delta, &t_last);
 		/*
 		 * Headroom:
 		 * 	2^(64-20) / 16[s] =
 		 * 	2^(44) / 16[s] =
 		 * 	17.592.186.044.416 / 16 =
 		 * 	1.099.511.627.776 [Hz]
 		 */
 		divi = t_delta.sec << 20;
 		divi |= t_delta.frac >> (64 - 20);
 		c_delta <<= 20;
 		c_delta /= divi;
 		if (c_delta > cpu_tick_frequency) {
 			if (0 && bootverbose)
 				printf("cpu_tick increased to %ju Hz\n",
 				    c_delta);
 			cpu_tick_frequency = c_delta;
 		}
 	}
 	c_last = c_this;
 	t_last = t_this;
 }
 
 void
 set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var)
 {
 
 	if (func == NULL) {
 		cpu_ticks = tc_cpu_ticks;
 	} else {
 		cpu_tick_frequency = freq;
 		cpu_tick_variable = var;
 		cpu_ticks = func;
 	}
 }
 
 uint64_t
 cpu_tickrate(void)
 {
 
 	if (cpu_ticks == tc_cpu_ticks) 
 		return (tc_getfrequency());
 	return (cpu_tick_frequency);
 }
 
 /*
  * We need to be slightly careful converting cputicks to microseconds.
  * There is plenty of margin in 64 bits of microseconds (half a million
  * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply
  * before divide conversion (to retain precision) we find that the
  * margin shrinks to 1.5 hours (one millionth of 146y).
  * With a three prong approach we never lose significant bits, no
  * matter what the cputick rate and length of timeinterval is.
  */
 
 uint64_t
 cputick2usec(uint64_t tick)
 {
 
 	if (tick > 18446744073709551LL)		/* floor(2^64 / 1000) */
 		return (tick / (cpu_tickrate() / 1000000LL));
 	else if (tick > 18446744073709LL)	/* floor(2^64 / 1000000) */
 		return ((tick * 1000LL) / (cpu_tickrate() / 1000LL));
 	else
 		return ((tick * 1000000LL) / cpu_tickrate());
 }
 
 cpu_tick_f	*cpu_ticks = tc_cpu_ticks;
 
 static int vdso_th_enable = 1;
 static int
 sysctl_fast_gettime(SYSCTL_HANDLER_ARGS)
 {
 	int old_vdso_th_enable, error;
 
 	old_vdso_th_enable = vdso_th_enable;
 	error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req);
 	if (error != 0)
 		return (error);
 	vdso_th_enable = old_vdso_th_enable;
 	return (0);
 }
 SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day");
 
 uint32_t
 tc_fill_vdso_timehands(struct vdso_timehands *vdso_th)
 {
 	struct timehands *th;
 	uint32_t enabled;
 
 	th = timehands;
 	vdso_th->th_algo = VDSO_TH_ALGO_1;
 	vdso_th->th_scale = th->th_scale;
 	vdso_th->th_offset_count = th->th_offset_count;
 	vdso_th->th_counter_mask = th->th_counter->tc_counter_mask;
 	vdso_th->th_offset = th->th_offset;
 	vdso_th->th_boottime = th->th_boottime;
 	enabled = cpu_fill_vdso_timehands(vdso_th, th->th_counter);
 	if (!vdso_th_enable)
 		enabled = 0;
 	return (enabled);
 }
 
 #ifdef COMPAT_FREEBSD32
 uint32_t
 tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32)
 {
 	struct timehands *th;
 	uint32_t enabled;
 
 	th = timehands;
 	vdso_th32->th_algo = VDSO_TH_ALGO_1;
 	*(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale;
 	vdso_th32->th_offset_count = th->th_offset_count;
 	vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask;
 	vdso_th32->th_offset.sec = th->th_offset.sec;
 	*(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac;
 	vdso_th32->th_boottime.sec = th->th_boottime.sec;
 	*(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac;
 	enabled = cpu_fill_vdso_timehands32(vdso_th32, th->th_counter);
 	if (!vdso_th_enable)
 		enabled = 0;
 	return (enabled);
 }
 #endif
Index: user/alc/PQ_LAUNDRY/sys/mips/atheros/ar71xx_gpio.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/mips/atheros/ar71xx_gpio.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/mips/atheros/ar71xx_gpio.c	(revision 303642)
@@ -1,589 +1,637 @@
 /*-
  * Copyright (c) 2009, Oleksandr Tymoshenko <gonzo@FreeBSD.org>
  * Copyright (c) 2009, Luiz Otavio O Souza. 
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * GPIO driver for AR71xx 
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/gpio.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <mips/atheros/ar71xxreg.h>
 #include <mips/atheros/ar71xx_setup.h>
 #include <mips/atheros/ar71xx_cpudef.h>
 #include <mips/atheros/ar71xx_gpiovar.h>
 #include <dev/gpio/gpiobusvar.h>
 #include <mips/atheros/ar933xreg.h>
 #include <mips/atheros/ar934xreg.h>
 #include <mips/atheros/qca953xreg.h>
 #include <mips/atheros/qca955xreg.h>
 
 #include "gpio_if.h"
 
 #define	DEFAULT_CAPS	(GPIO_PIN_INPUT | GPIO_PIN_OUTPUT)
 
 /*
  * Helpers
  */
 static void ar71xx_gpio_function_enable(struct ar71xx_gpio_softc *sc, 
     uint32_t mask);
 static void ar71xx_gpio_function_disable(struct ar71xx_gpio_softc *sc, 
     uint32_t mask);
 static void ar71xx_gpio_pin_configure(struct ar71xx_gpio_softc *sc, 
     struct gpio_pin *pin, uint32_t flags);
 
 /*
  * Driver stuff
  */
 static int ar71xx_gpio_probe(device_t dev);
 static int ar71xx_gpio_attach(device_t dev);
 static int ar71xx_gpio_detach(device_t dev);
 static int ar71xx_gpio_filter(void *arg);
 static void ar71xx_gpio_intr(void *arg);
 
 /*
  * GPIO interface
  */
 static device_t ar71xx_gpio_get_bus(device_t);
 static int ar71xx_gpio_pin_max(device_t dev, int *maxpin);
 static int ar71xx_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps);
 static int ar71xx_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t
     *flags);
 static int ar71xx_gpio_pin_getname(device_t dev, uint32_t pin, char *name);
 static int ar71xx_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags);
 static int ar71xx_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value);
 static int ar71xx_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *val);
 static int ar71xx_gpio_pin_toggle(device_t dev, uint32_t pin);
 
 /*
  * Enable/disable the GPIO function control space.
  *
  * This is primarily for the AR71xx, which has SPI CS1/CS2, UART, SLIC, I2S
  * as GPIO pin options.
  */
 static void
 ar71xx_gpio_function_enable(struct ar71xx_gpio_softc *sc, uint32_t mask)
 {
 
 	/*
 	 * XXX TODO: refactor this out into a per-chipset method.
 	 */
 	if (ar71xx_soc == AR71XX_SOC_AR9341 ||
 	    ar71xx_soc == AR71XX_SOC_AR9342 ||
 	    ar71xx_soc == AR71XX_SOC_AR9344 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9533 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9533_V2 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9556 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9558)
 		GPIO_SET_BITS(sc, AR934X_GPIO_REG_FUNC, mask);
 	else
 		GPIO_SET_BITS(sc, AR71XX_GPIO_FUNCTION, mask);
 }
 
 static void
 ar71xx_gpio_function_disable(struct ar71xx_gpio_softc *sc, uint32_t mask)
 {
 
 	/*
 	 * XXX TODO: refactor this out into a per-chipset method.
 	 */
 	if (ar71xx_soc == AR71XX_SOC_AR9341 ||
 	    ar71xx_soc == AR71XX_SOC_AR9342 ||
 	    ar71xx_soc == AR71XX_SOC_AR9344 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9533 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9533_V2 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9556 ||
 	    ar71xx_soc == AR71XX_SOC_QCA9558)
 		GPIO_CLEAR_BITS(sc, AR934X_GPIO_REG_FUNC, mask);
 	else
 		GPIO_CLEAR_BITS(sc, AR71XX_GPIO_FUNCTION, mask);
 }
 
+/*
+ * On most platforms, GPIO_OE is a bitmap where the bit set
+ * means "enable output."
+ *
+ * On AR934x and QCA953x, it's the opposite - the bit set means
+ * "input enable".
+ */
+static int
+ar71xx_gpio_oe_is_high(void)
+{
+	switch (ar71xx_soc) {
+	case AR71XX_SOC_AR9344:
+	case AR71XX_SOC_QCA9533:
+	case AR71XX_SOC_QCA9533_V2:
+		return 0;
+	default:
+		return 1;
+	}
+}
+
 static void
-ar71xx_gpio_pin_configure(struct ar71xx_gpio_softc *sc, struct gpio_pin *pin,
-    unsigned int flags)
+ar71xx_gpio_oe_set_output(struct ar71xx_gpio_softc *sc, int b)
 {
 	uint32_t mask;
 
-	mask = 1 << pin->gp_pin;
+	mask = 1 << b;
 
+	if (ar71xx_gpio_oe_is_high())
+		GPIO_SET_BITS(sc, AR71XX_GPIO_OE, mask);
+	else
+		GPIO_CLEAR_BITS(sc, AR71XX_GPIO_OE, mask);
+}
+
+static void
+ar71xx_gpio_oe_set_input(struct ar71xx_gpio_softc *sc, int b)
+{
+	uint32_t mask;
+
+	mask = 1 << b;
+
+	if (ar71xx_gpio_oe_is_high())
+		GPIO_CLEAR_BITS(sc, AR71XX_GPIO_OE, mask);
+	else
+		GPIO_SET_BITS(sc, AR71XX_GPIO_OE, mask);
+}
+
+static void
+ar71xx_gpio_pin_configure(struct ar71xx_gpio_softc *sc, struct gpio_pin *pin,
+    unsigned int flags)
+{
+
 	/*
 	 * Manage input/output
 	 */
 	if (flags & (GPIO_PIN_INPUT|GPIO_PIN_OUTPUT)) {
 		pin->gp_flags &= ~(GPIO_PIN_INPUT|GPIO_PIN_OUTPUT);
 		if (flags & GPIO_PIN_OUTPUT) {
 			pin->gp_flags |= GPIO_PIN_OUTPUT;
-			GPIO_SET_BITS(sc, AR71XX_GPIO_OE, mask);
-		}
-		else {
+			ar71xx_gpio_oe_set_output(sc, pin->gp_pin);
+		} else {
 			pin->gp_flags |= GPIO_PIN_INPUT;
-			GPIO_CLEAR_BITS(sc, AR71XX_GPIO_OE, mask);
+			ar71xx_gpio_oe_set_input(sc, pin->gp_pin);
 		}
 	}
 }
 
 static device_t
 ar71xx_gpio_get_bus(device_t dev)
 {
 	struct ar71xx_gpio_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	return (sc->busdev);
 }
 
 static int
 ar71xx_gpio_pin_max(device_t dev, int *maxpin)
 {
 
 	switch (ar71xx_soc) {
 		case AR71XX_SOC_AR9130:
 		case AR71XX_SOC_AR9132:
 			*maxpin = AR91XX_GPIO_PINS - 1;
 			break;
 		case AR71XX_SOC_AR7240:
 		case AR71XX_SOC_AR7241:
 		case AR71XX_SOC_AR7242:
 			*maxpin = AR724X_GPIO_PINS - 1;
 			break;
 		case AR71XX_SOC_AR9330:
 		case AR71XX_SOC_AR9331:
 			*maxpin = AR933X_GPIO_COUNT - 1;
 			break;
 		case AR71XX_SOC_AR9341:
 		case AR71XX_SOC_AR9342:
 		case AR71XX_SOC_AR9344:
 			*maxpin = AR934X_GPIO_COUNT - 1;
 			break;
 		case AR71XX_SOC_QCA9533:
 		case AR71XX_SOC_QCA9533_V2:
 			*maxpin = QCA953X_GPIO_COUNT - 1;
 			break;
 		case AR71XX_SOC_QCA9556:
 		case AR71XX_SOC_QCA9558:
 			*maxpin = QCA955X_GPIO_COUNT - 1;
 			break;
 		default:
 			*maxpin = AR71XX_GPIO_PINS - 1;
 	}
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_getcaps(device_t dev, uint32_t pin, uint32_t *caps)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i;
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	GPIO_LOCK(sc);
 	*caps = sc->gpio_pins[i].gp_caps;
 	GPIO_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_getflags(device_t dev, uint32_t pin, uint32_t *flags)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i;
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	GPIO_LOCK(sc);
 	*flags = sc->gpio_pins[i].gp_flags;
 	GPIO_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_getname(device_t dev, uint32_t pin, char *name)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i;
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	GPIO_LOCK(sc);
 	memcpy(name, sc->gpio_pins[i].gp_name, GPIOMAXNAME);
 	GPIO_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_setflags(device_t dev, uint32_t pin, uint32_t flags)
 {
 	int i;
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	ar71xx_gpio_pin_configure(sc, &sc->gpio_pins[i], flags);
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_set(device_t dev, uint32_t pin, unsigned int value)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i;
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	if (value)
 		GPIO_WRITE(sc, AR71XX_GPIO_SET, (1 << pin));
 	else
 		GPIO_WRITE(sc, AR71XX_GPIO_CLEAR, (1 << pin));
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_get(device_t dev, uint32_t pin, unsigned int *val)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i;
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	*val = (GPIO_READ(sc, AR71XX_GPIO_IN) & (1 << pin)) ? 1 : 0;
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_pin_toggle(device_t dev, uint32_t pin)
 {
 	int res, i;
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 
 	for (i = 0; i < sc->gpio_npins; i++) {
 		if (sc->gpio_pins[i].gp_pin == pin)
 			break;
 	}
 
 	if (i >= sc->gpio_npins)
 		return (EINVAL);
 
 	res = (GPIO_READ(sc, AR71XX_GPIO_IN) & (1 << pin)) ? 1 : 0;
 	if (res)
 		GPIO_WRITE(sc, AR71XX_GPIO_CLEAR, (1 << pin));
 	else
 		GPIO_WRITE(sc, AR71XX_GPIO_SET, (1 << pin));
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_filter(void *arg)
 {
 
 	/* TODO: something useful */
 	return (FILTER_STRAY);
 }
 
 
 
 static void
 ar71xx_gpio_intr(void *arg)
 {
 	struct ar71xx_gpio_softc *sc = arg;
 	GPIO_LOCK(sc);
 	/* TODO: something useful */
 	GPIO_UNLOCK(sc);
 }
 
 static int
 ar71xx_gpio_probe(device_t dev)
 {
 
 	device_set_desc(dev, "Atheros AR71XX GPIO driver");
 	return (0);
 }
 
 static int
 ar71xx_gpio_attach(device_t dev)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 	int i, j, maxpin;
 	int mask, pinon;
 	uint32_t oe;
 
 	KASSERT((device_get_unit(dev) == 0),
 	    ("ar71xx_gpio: Only one gpio module supported"));
 
 	mtx_init(&sc->gpio_mtx, device_get_nameunit(dev), NULL, MTX_DEF);
 
 	/* Map control/status registers. */
 	sc->gpio_mem_rid = 0;
 	sc->gpio_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &sc->gpio_mem_rid, RF_ACTIVE);
 
 	if (sc->gpio_mem_res == NULL) {
 		device_printf(dev, "couldn't map memory\n");
 		ar71xx_gpio_detach(dev);
 		return (ENXIO);
 	}
 
 	if ((sc->gpio_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, 
 	    &sc->gpio_irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
 		device_printf(dev, "unable to allocate IRQ resource\n");
 		ar71xx_gpio_detach(dev);
 		return (ENXIO);
 	}
 
 	if ((bus_setup_intr(dev, sc->gpio_irq_res, INTR_TYPE_MISC, 
 	    ar71xx_gpio_filter, ar71xx_gpio_intr, sc, &sc->gpio_ih))) {
 		device_printf(dev,
 		    "WARNING: unable to register interrupt handler\n");
 		ar71xx_gpio_detach(dev);
 		return (ENXIO);
 	}
 
 	sc->dev = dev;
 
 	/* Enable function bits that are required */
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "function_set", &mask) == 0) {
 		device_printf(dev, "function_set: 0x%x\n", mask);
 		ar71xx_gpio_function_enable(sc, mask);
 	}
 	/* Disable function bits that are required */
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "function_clear", &mask) == 0) {
 		device_printf(dev, "function_clear: 0x%x\n", mask);
 		ar71xx_gpio_function_disable(sc, mask);
 	}
 
 	/* Disable interrupts for all pins. */
 	GPIO_WRITE(sc, AR71XX_GPIO_INT_MASK, 0);
 
 	/* Initialise all pins specified in the mask, up to the pin count */
 	(void) ar71xx_gpio_pin_max(dev, &maxpin);
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "pinmask", &mask) != 0)
 		mask = 0;
 	if (resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "pinon", &pinon) != 0)
 		pinon = 0;
 	device_printf(dev, "gpio pinmask=0x%x\n", mask);
 	for (j = 0; j <= maxpin; j++) {
 		if ((mask & (1 << j)) == 0)
 			continue;
 		sc->gpio_npins++;
 	}
 	/* Iniatilize the GPIO pins, keep the loader settings. */
 	oe = GPIO_READ(sc, AR71XX_GPIO_OE);
+	/*
+	 * For AR934x and QCA953x, the meaning of oe is inverted;
+	 * so flip it the right way around so we can parse the GPIO
+	 * state.
+	 */
+	if (!ar71xx_gpio_oe_is_high())
+		oe = ~oe;
+
 	sc->gpio_pins = malloc(sizeof(*sc->gpio_pins) * sc->gpio_npins,
 	    M_DEVBUF, M_WAITOK | M_ZERO);
 	for (i = 0, j = 0; j <= maxpin; j++) {
 		if ((mask & (1 << j)) == 0)
 			continue;
 		snprintf(sc->gpio_pins[i].gp_name, GPIOMAXNAME,
 		    "pin %d", j);
 		sc->gpio_pins[i].gp_pin = j;
 		sc->gpio_pins[i].gp_caps = DEFAULT_CAPS;
 		if (oe & (1 << j))
 			sc->gpio_pins[i].gp_flags = GPIO_PIN_OUTPUT;
 		else
 			sc->gpio_pins[i].gp_flags = GPIO_PIN_INPUT;
 		i++;
 	}
 
 	/* Turn on the hinted pins. */
 	for (i = 0; i < sc->gpio_npins; i++) {
 		j = sc->gpio_pins[i].gp_pin;
 		if ((pinon & (1 << j)) != 0) {
 			ar71xx_gpio_pin_setflags(dev, j, GPIO_PIN_OUTPUT);
 			ar71xx_gpio_pin_set(dev, j, 1);
 		}
 	}
 
 	/*
 	 * Search through the function hints, in case there's some
 	 * overrides such as LNA control.
 	 *
 	 * hint.gpio.X.func.<pin>.gpiofunc=<func value>
 	 * hint.gpio.X.func.<pin>.gpiomode=1 (for output, default low)
 	 */
 	for (i = 0; i <= maxpin; i++) {
 		char buf[32];
 		int gpiofunc, gpiomode;
 
 		snprintf(buf, 32, "func.%d.gpiofunc", i);
 		if (resource_int_value(device_get_name(dev),
 		    device_get_unit(dev),
 		    buf,
 		    &gpiofunc) != 0)
 			continue;
 		/* Get the mode too */
 		snprintf(buf, 32, "func.%d.gpiomode", i);
 		if (resource_int_value(device_get_name(dev),
 		    device_get_unit(dev),
 		    buf,
 		    &gpiomode) != 0)
 			continue;
 
 		/* We only handle mode=1 for now */
 		if (gpiomode != 1)
 			continue;
 
 		device_printf(dev, "%s: GPIO %d: func=%d, mode=%d\n",
 		    __func__,
 		    i,
 		    gpiofunc,
 		    gpiomode);
 
-		/* Set output (bit == 0) */
-		oe = GPIO_READ(sc, AR71XX_GPIO_OE);
-		oe &= ~ (1 << i);
-		GPIO_WRITE(sc, AR71XX_GPIO_OE, oe);
-
 		/* Set pin value = 0, so it stays low by default */
 		oe = GPIO_READ(sc, AR71XX_GPIO_OUT);
 		oe &= ~ (1 << i);
 		GPIO_WRITE(sc, AR71XX_GPIO_OUT, oe);
+
+		/* Set output */
+		ar71xx_gpio_oe_set_output(sc, i);
 
 		/* Finally: Set the output config */
 		ar71xx_gpio_ouput_configure(i, gpiofunc);
 	}
 
 	sc->busdev = gpiobus_attach_bus(dev);
 	if (sc->busdev == NULL) {
 		ar71xx_gpio_detach(dev);
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 static int
 ar71xx_gpio_detach(device_t dev)
 {
 	struct ar71xx_gpio_softc *sc = device_get_softc(dev);
 
 	KASSERT(mtx_initialized(&sc->gpio_mtx), ("gpio mutex not initialized"));
 
 	gpiobus_detach_bus(dev);
 	if (sc->gpio_ih)
 		bus_teardown_intr(dev, sc->gpio_irq_res, sc->gpio_ih);
 	if (sc->gpio_irq_res)
 		bus_release_resource(dev, SYS_RES_IRQ, sc->gpio_irq_rid,
 		    sc->gpio_irq_res);
 	if (sc->gpio_mem_res)
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->gpio_mem_rid,
 		    sc->gpio_mem_res);
 	if (sc->gpio_pins)
 		free(sc->gpio_pins, M_DEVBUF);
 	mtx_destroy(&sc->gpio_mtx);
 
 	return(0);
 }
 
 static device_method_t ar71xx_gpio_methods[] = {
 	DEVMETHOD(device_probe, ar71xx_gpio_probe),
 	DEVMETHOD(device_attach, ar71xx_gpio_attach),
 	DEVMETHOD(device_detach, ar71xx_gpio_detach),
 
 	/* GPIO protocol */
 	DEVMETHOD(gpio_get_bus, ar71xx_gpio_get_bus),
 	DEVMETHOD(gpio_pin_max, ar71xx_gpio_pin_max),
 	DEVMETHOD(gpio_pin_getname, ar71xx_gpio_pin_getname),
 	DEVMETHOD(gpio_pin_getflags, ar71xx_gpio_pin_getflags),
 	DEVMETHOD(gpio_pin_getcaps, ar71xx_gpio_pin_getcaps),
 	DEVMETHOD(gpio_pin_setflags, ar71xx_gpio_pin_setflags),
 	DEVMETHOD(gpio_pin_get, ar71xx_gpio_pin_get),
 	DEVMETHOD(gpio_pin_set, ar71xx_gpio_pin_set),
 	DEVMETHOD(gpio_pin_toggle, ar71xx_gpio_pin_toggle),
 	{0, 0},
 };
 
 static driver_t ar71xx_gpio_driver = {
 	"gpio",
 	ar71xx_gpio_methods,
 	sizeof(struct ar71xx_gpio_softc),
 };
 static devclass_t ar71xx_gpio_devclass;
 
 DRIVER_MODULE(ar71xx_gpio, apb, ar71xx_gpio_driver, ar71xx_gpio_devclass, 0, 0);
Index: user/alc/PQ_LAUNDRY/sys/mips/broadcom/uart_cpu_chipc.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/mips/broadcom/uart_cpu_chipc.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/mips/broadcom/uart_cpu_chipc.c	(revision 303642)
@@ -1,123 +1,173 @@
 /*-
  * Copyright (c) 2016 Michael Zhilin <mizhka@gmail.com>
  *
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_uart.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <machine/bus.h>
 
 #include <dev/bhnd/cores/chipc/chipcreg.h>
 
 #include <dev/uart/uart.h>
 #include <dev/uart/uart_bus.h>
 #include <dev/uart/uart_cpu.h>
 
 #include "bcm_socinfo.h"
 
+#ifdef CFE
+#include <dev/cfe/cfe_api.h>
+#include <dev/cfe/cfe_ioctl.h>
+#include <dev/cfe/cfe_error.h>
+#endif
+
 bus_space_tag_t uart_bus_space_io;
 bus_space_tag_t uart_bus_space_mem;
 
 static struct uart_class *chipc_uart_class = &uart_ns8250_class;
 
 #define	CHIPC_UART_BAUDRATE	115200
 
 int
 uart_cpu_eqres(struct uart_bas *b1, struct uart_bas *b2)
 {
 	return ((b1->bsh == b2->bsh && b1->bst == b2->bst) ? 1 : 0);
 }
 
 static int
-uart_cpu_init(struct uart_devinfo *di, int uart, int baudrate)
+uart_cpu_init(struct uart_devinfo *di, u_int uart, int baudrate)
 {
 	struct bcm_socinfo	*socinfo;
 
 	if (uart >= CHIPC_UART_MAX)
 		return (EINVAL);
 
 	socinfo = bcm_get_socinfo();
 	di->ops = uart_getops(chipc_uart_class);
 	di->bas.chan = 0;
 	di->bas.bst = uart_bus_space_mem;
 	di->bas.bsh = (bus_space_handle_t) BCM_SOCREG(CHIPC_UART(uart));
 	di->bas.regshft = 0;
 	di->bas.rclk = socinfo->uartrate;  /* in Hz */
 	di->baudrate = baudrate;
 	di->databits = 8;
 	di->stopbits = 1;
 	di->parity = UART_PARITY_NONE;
 
 	return (0);
 }
 
+#ifdef CFE
+static int
+uart_getenv_cfe(int devtype, struct uart_devinfo *di)
+{
+	char	device[sizeof("uartXX")];
+	int	baud, fd, len;
+	int	ret;
+	u_int	uart;
+
+	/* CFE only vends console configuration */
+	if (devtype != UART_DEV_CONSOLE)
+		return (ENODEV);
+
+	/* Fetch console device */
+	ret = cfe_getenv("BOOT_CONSOLE", device, sizeof(device));
+	if (ret != CFE_OK)
+		return (ENXIO);
+
+	/* Parse serial console unit. Fails on non-uart devices.  */
+	if (sscanf(device, "uart%u", &uart) != 1)
+		return (ENXIO);
+
+	/* Fetch device handle */
+	fd = cfe_getstdhandle(CFE_STDHANDLE_CONSOLE);
+	if (fd < 0)
+		return (ENXIO);
+
+	/* Fetch serial configuration */
+	ret = cfe_ioctl(fd, IOCTL_SERIAL_GETSPEED, (unsigned char *)&baud,
+	    sizeof(baud), &len, 0);	
+	if (ret != CFE_OK)
+		baud = CHIPC_UART_BAUDRATE;
+
+	/* Initialize device info */
+	return (uart_cpu_init(di, uart, baud));
+}
+#endif /* CFE */
+
 int
 uart_cpu_getdev(int devtype, struct uart_devinfo *di)
 {
 	int			 ivar;
 
 	uart_bus_space_io = NULL;
 	uart_bus_space_mem = mips_bus_space_generic;
 
-	/* Check the environment. */
+#ifdef CFE
+	/* Check the CFE environment */
+	if (uart_getenv_cfe(devtype, di) == 0)
+		return (0);
+#endif /* CFE */
+
+	/* Check the kernel environment. */
 	if (uart_getenv(devtype, di, chipc_uart_class) == 0)
 		return (0);
 
 	/* Scan the device hints for the first matching device */
-	for (int i = 0; i < CHIPC_UART_MAX; i++) {
+	for (u_int i = 0; i < CHIPC_UART_MAX; i++) {
 		if (resource_int_value("uart", i, "flags", &ivar))
 			continue;
 
 		/* Check usability */
 		if (devtype == UART_DEV_CONSOLE && !UART_FLAGS_CONSOLE(ivar))
 			continue;
 
 		if (devtype == UART_DEV_DBGPORT && !UART_FLAGS_DBGPORT(ivar))
 			continue;
 
 		if (resource_int_value("uart", i, "disabled", &ivar) == 0 &&
 		    ivar == 0)
 			continue;
 
 		/* Found */
 		if (resource_int_value("uart", i, "baud", &ivar) != 0)
 			ivar = CHIPC_UART_BAUDRATE;
 		
 		return (uart_cpu_init(di, i, ivar));
 	}
 
 	/* Default to uart0/115200 */
 	return (uart_cpu_init(di, 0, CHIPC_UART_BAUDRATE));
 }
Index: user/alc/PQ_LAUNDRY/sys/mips/conf/SENTRY5.hints
===================================================================
--- user/alc/PQ_LAUNDRY/sys/mips/conf/SENTRY5.hints	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/mips/conf/SENTRY5.hints	(revision 303642)
@@ -1,7 +1,4 @@
 # $FreeBSD$
 hint.bhnd.0.at="nexus0"
 hint.bhnd.0.maddr="0x18000000" 
 hint.bhnd.0.msize="0x00100000"
-
-# console on uart1
-hint.uart.1.flags="0x10"
Index: user/alc/PQ_LAUNDRY/sys/mips/conf/TL-WDR4300.hints
===================================================================
--- user/alc/PQ_LAUNDRY/sys/mips/conf/TL-WDR4300.hints	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/mips/conf/TL-WDR4300.hints	(revision 303642)
@@ -1,226 +1,231 @@
 # $FreeBSD$
 
 # MAC/ART ? - they're 00:02:03:04:05:06 :(
 
 # ath0 chain0 EXTERNAL_LNA0: 18
 # ath0 chain1 EXTERNAL_LNA1: 19
 # These are configured as GPIO output, init low, then
 # set the GPIO 'type' AR934X_GPIO_OUT_EXT_LNA0/AR934X_GPIO_OUT_EXT_LNA1.
 
 # XXX There's no arge1 on this!
 
 # XXX RFKILL?
 
 # mdiobus0 on arge0
 hint.argemdio.0.at="nexus0"
 hint.argemdio.0.maddr=0x19000000
 hint.argemdio.0.msize=0x1000
 hint.argemdio.0.order=0
 
 # DB120 GMAC configuration
 # + AR934X_ETH_CFG_RGMII_GMAC0              (1 << 0)
 hint.ar934x_gmac.0.gmac_cfg=0x1
 
 # Board mac address is at 0x1f01fc00.
 # ath0: offset 0
 # ath1: offset -1
 # arge0: offset -2
 # arge1: not hooked up; doesn't matter
 hint.ar71xx.0.eeprom_mac_addr=0x1f01fc00
 hint.ar71xx.0.eeprom_mac_isascii=0
 
 hint.ar71xx_mac_map.0.devid=ath
 hint.ar71xx_mac_map.0.unitid=0
 hint.ar71xx_mac_map.0.offset=0
 hint.ar71xx_mac_map.0.is_local=0
 
 hint.ar71xx_mac_map.1.devid=ath
 hint.ar71xx_mac_map.1.unitid=1
 hint.ar71xx_mac_map.1.offset=-1
 hint.ar71xx_mac_map.1.is_local=0
 
 hint.ar71xx_mac_map.2.devid=arge
 hint.ar71xx_mac_map.2.unitid=0
 hint.ar71xx_mac_map.2.offset=-2
 hint.ar71xx_mac_map.2.is_local=0
 
 # GMAC0 here - connected to an AR8327
 hint.arswitch.0.at="mdio0"
 hint.arswitch.0.is_7240=0
 hint.arswitch.0.is_9340=0	# not the internal switch!
 hint.arswitch.0.numphys=5
 hint.arswitch.0.phy4cpu=0
 hint.arswitch.0.is_rgmii=0
 hint.arswitch.0.is_gmii=0
 
 # Other AR8327 configuration parameters
 
 # AR8327_PAD_MAC_RGMII
 hint.arswitch.0.pad.0.mode=6
 hint.arswitch.0.pad.0.txclk_delay_en=1
 hint.arswitch.0.pad.0.rxclk_delay_en=1
 # AR8327_CLK_DELAY_SEL1
 hint.arswitch.0.pad.0.txclk_delay_sel=1
 # AR8327_CLK_DELAY_SEL2
 hint.arswitch.0.pad.0.rxclk_delay_sel=2
 
 # XXX there's no LED management just yet!
 hint.arswitch.0.led.ctrl0=0xc737c737
 hint.arswitch.0.led.ctrl1=0x00000000
 hint.arswitch.0.led.ctrl2=0x00000000
 hint.arswitch.0.led.ctrl3=0x0030c300
 hint.arswitch.0.led.open_drain=0
 
 # force_link=1 is required for the rest of the parameters
 # to be configured.
 hint.arswitch.0.port.0.force_link=1
 hint.arswitch.0.port.0.speed=1000
 hint.arswitch.0.port.0.duplex=1
 hint.arswitch.0.port.0.txpause=1
 hint.arswitch.0.port.0.rxpause=1
 
 # XXX OpenWRT DB120 BSP doesn't have media/duplex set?
 hint.arge.0.phymask=0x0
 hint.arge.0.media=1000
 hint.arge.0.fduplex=1
 hint.arge.0.miimode=3           # RGMII
 hint.arge.0.pll_1000=0x06000000
 
 # mdiobus1 on arge1
 hint.argemdio.1.at="nexus0"
 hint.argemdio.1.maddr=0x1a000000
 hint.argemdio.1.msize=0x1000
 hint.argemdio.1.order=0
 
 # Embedded switch on the AR9344
 # mdio1 is actually created as the AR8327 internal bus; so
 # this pops up as mdio2.
 #
 # XXX TODO: there's no need for AR9344 internal switch; it isn't exposed
 hint.arswitch.1.at="mdio2"
 hint.arswitch.1.is_7240=0
 hint.arswitch.1.is_9340=1
 hint.arswitch.1.numphys=5
 hint.arswitch.1.phy4cpu=0       # phy 4 is not a "CPU port" PHY here
 hint.arswitch.1.is_rgmii=0
 hint.arswitch.1.is_gmii=1       # arge1 <-> switch PHY is GMII
 
 # arge1 - lock up to 1000/full
 hint.arge.1.phymask=0x0		# Nothing attached here (XXX?)
 hint.arge.1.media=1000
 hint.arge.1.fduplex=1
 hint.arge.1.miimode=1		# GMII
 
 # MAC for arge1 is the second 6 bytes of the ART
 # hint.arge.1.eeprommac=0x1f7f0006
 
 # ath0: Where the ART is - last 64k in the flash
 hint.ath.0.eepromaddr=0x1fff0000
 hint.ath.0.eepromsize=16384
 
 # ath1: it's different; it's a PCIe attached device, so
 # we instead need to teach the PCIe bridge code about it
 # (ie, the 'early pci fixup' stuff that programs the PCIe
 # host registers on the NIC) and then we teach ath where
 # to find it.
 
 # ath1 hint - pcie slot 0
 hint.pcib.0.bus.0.0.0.ath_fixup_addr=0x1fff4000
 hint.pcib.0.bus.0.0.0.ath_fixup_size=16384
 
 # ath0 - eeprom comes from here
 hint.ath.1.eeprom_firmware="pcib.0.bus.0.0.0.eeprom_firmware"
 
 # flash layout:
 #
 # bootargs=console=ttyS0,115200 root=31:02 rootfstype=jffs2 init=/sbin/init mtdparts=ath-nor0:256k(u-boot),64k(u-boot-env),6336k(rootfs),1408k(uImage),64k(mib0),64k(ART)
 
 # 128KiB uboot
 hint.map.0.at="flash/spi0"
 hint.map.0.start=0x00000000
 hint.map.0.end=0x00020000	# 128k u-boot
 hint.map.0.name="u-boot"
 hint.map.0.readonly=1
 
 # kernel
 hint.map.2.at="flash/spi0"
 hint.map.2.start=0x00020000
 hint.map.2.end="search:0x00020000:0x10000:.!/bin/sh"
 hint.map.2.name="kernel"
 hint.map.2.readonly=1
 
 # 1344KiB uImage
 hint.map.3.at="flash/spi0"
 hint.map.3.start="search:0x00020000:0x10000:.!/bin/sh"
 hint.map.3.end=0x007d0000
 hint.map.3.name="rootfs"
 hint.map.3.readonly=1
 
 # 64KiB cfg
 hint.map.4.at="flash/spi0"
 hint.map.4.start=0x007d0000
 hint.map.4.end=0x007e0000
 hint.map.4.name="cfg"
 hint.map.4.readonly=0
 
 # 64KiB mib0
 hint.map.5.at="flash/spi0"
 hint.map.5.start=0x007e0000
 hint.map.5.end=0x007f0000	# 64k mib0
 hint.map.5.name="mib0"
 hint.map.5.readonly=1
 
 # 64KiB ART
 hint.map.6.at="flash/spi0"
 hint.map.6.start=0x007f0000
 hint.map.6.end=0x00800000	# 64k ART
 hint.map.6.name="ART"
 hint.map.6.readonly=1
 
 # GPIO configuration
 # GPIO21 and GPIO22 - USB1 and USB2 power
 # ath0 chain0 EXTERNAL_LNA0: 18, output
 # ath0 chain1 EXTERNAL_LNA1: 19, output
 
 # These are the GPIO LEDs and buttons which can be software controlled.
 hint.gpio.0.pinmask=0x0063f800
 
 # Enable GPIO21, GPIO22 output and high - for USB power
 hint.gpio.0.pinon=0x00600000
 
 hint.gpio.0.func.18.gpiofunc=46
 hint.gpio.0.func.18.gpiomode=1	# output, default low
 
 hint.gpio.0.func.19.gpiofunc=47
 hint.gpio.0.func.19.gpiomode=1	# output, default low
 
 # LED QSS - 15
 # LED SYSTEM - 14
 # LED USB1 - 11
 # LED USB2 - 12
 # LED WLAN2G - 13
 
 # SWITCH WPS - 16
 # SWITCH RFKILL - 17
 
 hint.gpioled.0.at="gpiobus0"
 hint.gpioled.0.name="USB1"
 hint.gpioled.0.pins=0x0800
+hint.gpioled.0.invert=1
 
 hint.gpioled.1.at="gpiobus0"
 hint.gpioled.1.name="USB2"
 hint.gpioled.1.pins=0x1000
+hint.gpioled.1.invert=1
 
 hint.gpioled.2.at="gpiobus0"
 hint.gpioled.2.name="WLAN2G"
 hint.gpioled.2.pins=0x2000
+hint.gpioled.2.invert=1
 
 hint.gpioled.3.at="gpiobus0"
 hint.gpioled.3.name="SYSTEM"
 hint.gpioled.3.pins=0x4000
+hint.gpioled.3.invert=1
 
 hint.gpioled.4.at="gpiobus0"
 hint.gpioled.4.name="QSS"
 hint.gpioled.4.pins=0x8000
+hint.gpioled.4.invert=1
 
 # XXX TODO: WPS/RFKILL switch
Index: user/alc/PQ_LAUNDRY/sys/modules/netgraph/checksum/Makefile
===================================================================
--- user/alc/PQ_LAUNDRY/sys/modules/netgraph/checksum/Makefile	(nonexistent)
+++ user/alc/PQ_LAUNDRY/sys/modules/netgraph/checksum/Makefile	(revision 303642)
@@ -0,0 +1,20 @@
+# $FreeBSD$
+
+.include <bsd.own.mk>
+
+KMOD=	ng_checksum
+SRCS= 	ng_checksum.c opt_inet.h opt_inet6.h
+
+#.if !defined(KERNBUILDDIR)
+#
+#.if ${MK_INET_SUPPORT} != "no"
+#opt_inet.h:
+#	echo "#define INET 1" > ${.TARGET}
+#.endif
+#.if ${MK_INET6_SUPPORT} != "no"
+#opt_inet6.h:
+#	echo "#define INET6 1" > ${.TARGET}
+#.endif
+#.endif
+
+.include <bsd.kmod.mk>

Property changes on: user/alc/PQ_LAUNDRY/sys/modules/netgraph/checksum/Makefile
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.c	(nonexistent)
+++ user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.c	(revision 303642)
@@ -0,0 +1,729 @@
+/*-
+ * Copyright (c) 2015 Dmitry Vagin <daemon.hammer@ya.ru>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/endian.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_vlan_var.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <machine/in_cksum.h>
+
+#include <netgraph/ng_message.h>
+#include <netgraph/ng_parse.h>
+#include <netgraph/netgraph.h>
+
+#include <netgraph/ng_checksum.h>
+
+/* private data */
+struct ng_checksum_priv {
+	hook_p in;
+	hook_p out;
+	uint8_t dlt;	/* DLT_XXX from bpf.h */
+	struct ng_checksum_config *conf;
+	struct ng_checksum_stats stats;
+};
+
+typedef struct ng_checksum_priv *priv_p;
+
+/* Netgraph methods */
+static ng_constructor_t	ng_checksum_constructor;
+static ng_rcvmsg_t	ng_checksum_rcvmsg;
+static ng_shutdown_t	ng_checksum_shutdown;
+static ng_newhook_t	ng_checksum_newhook;
+static ng_rcvdata_t	ng_checksum_rcvdata;
+static ng_disconnect_t	ng_checksum_disconnect;
+
+#define ERROUT(x) { error = (x); goto done; }
+
+static const struct ng_parse_struct_field ng_checksum_config_type_fields[]
+	= NG_CHECKSUM_CONFIG_TYPE;
+static const struct ng_parse_type ng_checksum_config_type = {
+	&ng_parse_struct_type,
+	&ng_checksum_config_type_fields
+};
+
+static const struct ng_parse_struct_field ng_checksum_stats_fields[]
+	= NG_CHECKSUM_STATS_TYPE;
+static const struct ng_parse_type ng_checksum_stats_type = {
+	&ng_parse_struct_type,
+	&ng_checksum_stats_fields
+};
+
+static const struct ng_cmdlist ng_checksum_cmdlist[] = {
+	{
+		NGM_CHECKSUM_COOKIE,
+		NGM_CHECKSUM_GETDLT,
+		"getdlt",
+		NULL,
+		&ng_parse_uint8_type
+	},
+	{
+		NGM_CHECKSUM_COOKIE,
+		NGM_CHECKSUM_SETDLT,
+		"setdlt",
+		&ng_parse_uint8_type,
+		NULL
+	},
+	{
+		NGM_CHECKSUM_COOKIE,
+		NGM_CHECKSUM_GETCONFIG,
+		"getconfig",
+		NULL,
+		&ng_checksum_config_type
+	},
+	{
+		NGM_CHECKSUM_COOKIE,
+		NGM_CHECKSUM_SETCONFIG,
+		"setconfig",
+		&ng_checksum_config_type,
+		NULL
+	},
+	{
+		NGM_CHECKSUM_COOKIE,
+		NGM_CHECKSUM_GET_STATS,
+		"getstats",
+		NULL,
+		&ng_checksum_stats_type
+	},
+	{
+		NGM_CHECKSUM_COOKIE,
+		NGM_CHECKSUM_CLR_STATS,
+		"clrstats",
+		NULL,
+		NULL
+	},
+	{
+		NGM_CHECKSUM_COOKIE,
+		NGM_CHECKSUM_GETCLR_STATS,
+		"getclrstats",
+		NULL,
+		&ng_checksum_stats_type
+	},
+	{ 0 }
+};
+
+static struct ng_type typestruct = {
+	.version =	NG_ABI_VERSION,
+	.name =		NG_CHECKSUM_NODE_TYPE,
+	.constructor =	ng_checksum_constructor,
+	.rcvmsg =	ng_checksum_rcvmsg,
+	.shutdown =	ng_checksum_shutdown,
+	.newhook =	ng_checksum_newhook,
+	.rcvdata =	ng_checksum_rcvdata,
+	.disconnect =	ng_checksum_disconnect,
+	.cmdlist =	ng_checksum_cmdlist,
+};
+
+NETGRAPH_INIT(checksum, &typestruct);
+
+static int
+ng_checksum_constructor(node_p node)
+{
+	priv_p priv;
+
+	priv = malloc(sizeof(*priv), M_NETGRAPH, M_WAITOK|M_ZERO);
+	priv->dlt = DLT_RAW;
+
+	NG_NODE_SET_PRIVATE(node, priv);
+
+	return (0);
+}
+
+static int
+ng_checksum_newhook(node_p node, hook_p hook, const char *name)
+{
+	const priv_p priv = NG_NODE_PRIVATE(node);
+
+	if (strncmp(name, NG_CHECKSUM_HOOK_IN, strlen(NG_CHECKSUM_HOOK_IN)) == 0) {
+		priv->in = hook;
+	} else if (strncmp(name, NG_CHECKSUM_HOOK_OUT, strlen(NG_CHECKSUM_HOOK_OUT)) == 0) {
+		priv->out = hook;
+	} else
+		return (EINVAL);
+
+	return (0);
+}
+
+static int
+ng_checksum_rcvmsg(node_p node, item_p item, hook_p lasthook)
+{
+	const priv_p priv = NG_NODE_PRIVATE(node);
+	struct ng_checksum_config *conf, *newconf;
+	struct ng_mesg *msg;
+	struct ng_mesg *resp = NULL;
+	int error = 0;
+
+	NGI_GET_MSG(item, msg);
+
+	if  (msg->header.typecookie != NGM_CHECKSUM_COOKIE)
+		ERROUT(EINVAL);
+
+	switch (msg->header.cmd)
+	{
+		case NGM_CHECKSUM_GETDLT:
+			NG_MKRESPONSE(resp, msg, sizeof(uint8_t), M_WAITOK);
+
+			if (resp == NULL)
+				ERROUT(ENOMEM);
+
+			*((uint8_t *) resp->data) = priv->dlt;
+
+			break;
+
+		case NGM_CHECKSUM_SETDLT:
+			if (msg->header.arglen != sizeof(uint8_t))
+				ERROUT(EINVAL);
+
+			switch (*(uint8_t *) msg->data)
+			{
+				case DLT_EN10MB:
+				case DLT_RAW:
+					priv->dlt = *(uint8_t *) msg->data;
+					break;
+
+				default:
+					ERROUT(EINVAL);
+			}
+
+			break;
+
+		case NGM_CHECKSUM_GETCONFIG:
+			if (priv->conf == NULL)
+				ERROUT(0);
+
+			NG_MKRESPONSE(resp, msg, sizeof(struct ng_checksum_config), M_WAITOK);
+
+			if (resp == NULL)
+				ERROUT(ENOMEM);
+
+			bcopy(priv->conf, resp->data, sizeof(struct ng_checksum_config));
+
+			break;
+
+		case NGM_CHECKSUM_SETCONFIG:
+			conf = (struct ng_checksum_config *) msg->data;
+
+			if (msg->header.arglen != sizeof(struct ng_checksum_config))
+				ERROUT(EINVAL);
+
+			conf->csum_flags &= NG_CHECKSUM_CSUM_IPV4|NG_CHECKSUM_CSUM_IPV6;
+			conf->csum_offload &= NG_CHECKSUM_CSUM_IPV4|NG_CHECKSUM_CSUM_IPV6;
+
+			newconf = malloc(sizeof(struct ng_checksum_config), M_NETGRAPH, M_WAITOK|M_ZERO);
+
+			bcopy(conf, newconf, sizeof(struct ng_checksum_config));
+
+			if (priv->conf)
+				free(priv->conf, M_NETGRAPH);
+
+			priv->conf = newconf;
+
+			break;
+
+		case NGM_CHECKSUM_GET_STATS:
+		case NGM_CHECKSUM_CLR_STATS:
+		case NGM_CHECKSUM_GETCLR_STATS:
+			if (msg->header.cmd != NGM_CHECKSUM_CLR_STATS) {
+				NG_MKRESPONSE(resp, msg, sizeof(struct ng_checksum_stats), M_WAITOK);
+
+				if (resp == NULL)
+					ERROUT(ENOMEM);
+
+				bcopy(&(priv->stats), resp->data, sizeof(struct ng_checksum_stats));
+			}
+
+			if (msg->header.cmd != NGM_CHECKSUM_GET_STATS)
+				bzero(&(priv->stats), sizeof(struct ng_checksum_stats));
+
+			break;
+
+		default:
+			ERROUT(EINVAL);
+	}
+
+done:
+	NG_RESPOND_MSG(error, node, item, resp);
+	NG_FREE_MSG(msg);
+
+	return (error);
+}
+
+#define	PULLUP_CHECK(mbuf, length) do {					\
+	pullup_len += length;						\
+	if (((mbuf)->m_pkthdr.len < pullup_len) ||			\
+	    (pullup_len > MHLEN)) {					\
+		return (EINVAL);					\
+	}								\
+	if ((mbuf)->m_len < pullup_len &&				\
+	    (((mbuf) = m_pullup((mbuf), pullup_len)) == NULL)) {	\
+		return (ENOBUFS);					\
+	}								\
+} while (0)
+
+#ifdef INET
+static int
+checksum_ipv4(priv_p priv, struct mbuf *m, int l3_offset)
+{
+	struct ip *ip4;
+	int pullup_len;
+	int hlen, plen;
+	int processed = 0;
+
+	pullup_len = l3_offset;
+
+	PULLUP_CHECK(m, sizeof(struct ip));
+	ip4 = (struct ip *) mtodo(m, l3_offset);
+
+	if (ip4->ip_v != IPVERSION)
+		return (EOPNOTSUPP);
+
+	hlen = ip4->ip_hl << 2;
+	plen = ntohs(ip4->ip_len);
+
+	if (hlen < sizeof(struct ip) || m->m_pkthdr.len < l3_offset + plen)
+		return (EINVAL);
+
+	if (m->m_pkthdr.csum_flags & CSUM_IP) {
+		ip4->ip_sum = 0;
+
+		if ((priv->conf->csum_offload & CSUM_IP) == 0) {
+			if (hlen == sizeof(struct ip))
+				ip4->ip_sum = in_cksum_hdr(ip4);
+			else
+				ip4->ip_sum = in_cksum_skip(m, l3_offset + hlen, l3_offset);
+
+			m->m_pkthdr.csum_flags &= ~CSUM_IP;
+		}
+
+		processed = 1;
+	}
+
+	pullup_len = l3_offset + hlen;
+
+	/* We can not calculate a checksum fragmented packets */
+	if (ip4->ip_off & htons(IP_MF|IP_OFFMASK)) {
+		m->m_pkthdr.csum_flags &= ~(CSUM_TCP|CSUM_UDP);
+		return (0);
+	}
+
+	switch (ip4->ip_p)
+	{
+		case IPPROTO_TCP:
+			if (m->m_pkthdr.csum_flags & CSUM_TCP) {
+				struct tcphdr *th;
+
+				PULLUP_CHECK(m, sizeof(struct tcphdr));
+				th = (struct tcphdr *) mtodo(m, l3_offset + hlen);
+
+				th->th_sum = in_pseudo(ip4->ip_src.s_addr,
+				    ip4->ip_dst.s_addr, htons(ip4->ip_p + plen - hlen));
+
+				if ((priv->conf->csum_offload & CSUM_TCP) == 0) {
+					th->th_sum = in_cksum_skip(m, l3_offset + plen, l3_offset + hlen);
+					m->m_pkthdr.csum_flags &= ~CSUM_TCP;
+				}
+
+				processed = 1;
+			}
+
+			m->m_pkthdr.csum_flags &= ~CSUM_UDP;
+			break;
+
+		case IPPROTO_UDP:
+			if (m->m_pkthdr.csum_flags & CSUM_UDP) {
+				struct udphdr *uh;
+
+				PULLUP_CHECK(m, sizeof(struct udphdr));
+				uh = (struct udphdr *) mtodo(m, l3_offset + hlen);
+
+				uh->uh_sum = in_pseudo(ip4->ip_src.s_addr,
+				    ip4->ip_dst.s_addr, htons(ip4->ip_p + plen - hlen));
+
+				if ((priv->conf->csum_offload & CSUM_UDP) == 0) {
+					uh->uh_sum = in_cksum_skip(m,
+					    l3_offset + plen, l3_offset + hlen);
+
+					if (uh->uh_sum == 0)
+						uh->uh_sum = 0xffff;
+
+					m->m_pkthdr.csum_flags &= ~CSUM_UDP;
+				}
+
+				processed = 1;
+			}
+
+			m->m_pkthdr.csum_flags &= ~CSUM_TCP;
+			break;
+
+		default:
+			m->m_pkthdr.csum_flags &= ~(CSUM_TCP|CSUM_UDP);
+			break;
+	}
+
+	m->m_pkthdr.csum_flags &= ~NG_CHECKSUM_CSUM_IPV6;
+
+	if (processed)
+		priv->stats.processed++;
+
+	return (0);
+}
+#endif /* INET */
+
+#ifdef INET6
+static int
+checksum_ipv6(priv_p priv, struct mbuf *m, int l3_offset)
+{
+	struct ip6_hdr *ip6;
+	struct ip6_ext *ip6e = NULL;
+	int pullup_len;
+	int hlen, plen;
+	int nxt;
+	int processed = 0;
+
+	pullup_len = l3_offset;
+
+	PULLUP_CHECK(m, sizeof(struct ip6_hdr));
+	ip6 = (struct ip6_hdr *) mtodo(m, l3_offset);
+
+	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
+		return (EOPNOTSUPP);
+
+	hlen = sizeof(struct ip6_hdr);
+	plen = ntohs(ip6->ip6_plen) + hlen;
+
+	if (m->m_pkthdr.len < l3_offset + plen)
+		return (EINVAL);
+
+	nxt = ip6->ip6_nxt;
+
+	for (;;) {
+		switch (nxt)
+		{
+			case IPPROTO_DSTOPTS:
+			case IPPROTO_HOPOPTS:
+			case IPPROTO_ROUTING:
+				PULLUP_CHECK(m, sizeof(struct ip6_ext));
+				ip6e = (struct ip6_ext *) mtodo(m, l3_offset + hlen);
+				nxt = ip6e->ip6e_nxt;
+				hlen += (ip6e->ip6e_len + 1) << 3;
+				pullup_len = l3_offset + hlen;
+				break;
+
+			case IPPROTO_AH:
+				PULLUP_CHECK(m, sizeof(struct ip6_ext));
+				ip6e = (struct ip6_ext *) mtodo(m, l3_offset + hlen);
+				nxt = ip6e->ip6e_nxt;
+				hlen += (ip6e->ip6e_len + 2) << 2;
+				pullup_len = l3_offset + hlen;
+				break;
+
+			case IPPROTO_FRAGMENT:
+				/* We can not calculate a checksum fragmented packets */
+				m->m_pkthdr.csum_flags &= ~(CSUM_TCP_IPV6|CSUM_UDP_IPV6);
+				return (0);
+
+			default:
+				goto loopend;
+		}
+
+		if (nxt == 0)
+			return (EINVAL);
+	}
+
+loopend:
+
+	switch (nxt)
+	{
+		case IPPROTO_TCP:
+			if (m->m_pkthdr.csum_flags & CSUM_TCP_IPV6) {
+				struct tcphdr *th;
+
+				PULLUP_CHECK(m, sizeof(struct tcphdr));
+				th = (struct tcphdr *) mtodo(m, l3_offset + hlen);
+
+				th->th_sum = in6_cksum_pseudo(ip6, plen - hlen, nxt, 0);
+
+				if ((priv->conf->csum_offload & CSUM_TCP_IPV6) == 0) {
+					th->th_sum = in_cksum_skip(m, l3_offset + plen, l3_offset + hlen);
+					m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6;
+				}
+
+				processed = 1;
+			}
+
+			m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6;
+			break;
+
+		case IPPROTO_UDP:
+			if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6) {
+				struct udphdr *uh;
+
+				PULLUP_CHECK(m, sizeof(struct udphdr));
+				uh = (struct udphdr *) mtodo(m, l3_offset + hlen);
+
+				uh->uh_sum = in6_cksum_pseudo(ip6, plen - hlen, nxt, 0);
+
+				if ((priv->conf->csum_offload & CSUM_UDP_IPV6) == 0) {
+					uh->uh_sum = in_cksum_skip(m,
+					    l3_offset + plen, l3_offset + hlen);
+
+					if (uh->uh_sum == 0)
+						uh->uh_sum = 0xffff;
+
+					m->m_pkthdr.csum_flags &= ~CSUM_UDP_IPV6;
+				}
+
+				processed = 1;
+			}
+
+			m->m_pkthdr.csum_flags &= ~CSUM_TCP_IPV6;
+			break;
+
+		default:
+			m->m_pkthdr.csum_flags &= ~(CSUM_TCP_IPV6|CSUM_UDP_IPV6);
+			break;
+	}
+
+	m->m_pkthdr.csum_flags &= ~NG_CHECKSUM_CSUM_IPV4;
+
+	if (processed)
+		priv->stats.processed++;
+
+	return (0);
+}
+#endif /* INET6 */
+
+#undef	PULLUP_CHECK
+
+static int
+ng_checksum_rcvdata(hook_p hook, item_p item)
+{
+	const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
+	struct mbuf *m;
+	hook_p out;
+	int error = 0;
+
+	priv->stats.received++;
+
+	NGI_GET_M(item, m);
+
+#define	PULLUP_CHECK(mbuf, length) do {					\
+	pullup_len += length;						\
+	if (((mbuf)->m_pkthdr.len < pullup_len) ||			\
+	    (pullup_len > MHLEN)) {					\
+		error = EINVAL;						\
+		goto bypass;						\
+	}								\
+	if ((mbuf)->m_len < pullup_len &&				\
+	    (((mbuf) = m_pullup((mbuf), pullup_len)) == NULL)) {	\
+		error = ENOBUFS;					\
+		goto drop;						\
+	}								\
+} while (0)
+
+	if (!(priv->conf && hook == priv->in && m && (m->m_flags & M_PKTHDR)))
+		goto bypass;
+
+	m->m_pkthdr.csum_flags |= priv->conf->csum_flags;
+
+	if (m->m_pkthdr.csum_flags & (NG_CHECKSUM_CSUM_IPV4|NG_CHECKSUM_CSUM_IPV6))
+	{
+		struct ether_header *eh;
+		struct ng_checksum_vlan_header *vh;
+		int pullup_len = 0;
+		uint16_t etype;
+
+		m = m_unshare(m, M_NOWAIT);
+
+		if (m == NULL)
+			ERROUT(ENOMEM);
+
+		switch (priv->dlt)
+		{
+			case DLT_EN10MB:
+				PULLUP_CHECK(m, sizeof(struct ether_header));
+				eh = mtod(m, struct ether_header *);
+				etype = ntohs(eh->ether_type);
+
+				for (;;) {	/* QinQ support */
+					switch (etype)
+					{
+						case 0x8100:
+						case 0x88A8:
+						case 0x9100:
+							PULLUP_CHECK(m, sizeof(struct ng_checksum_vlan_header));
+							vh = (struct ng_checksum_vlan_header *) mtodo(m,
+							    pullup_len - sizeof(struct ng_checksum_vlan_header));
+							etype = ntohs(vh->etype);
+							break;
+
+						default:
+							goto loopend;
+					}
+				}
+loopend:
+#ifdef INET
+				if (etype == ETHERTYPE_IP &&
+				    (m->m_pkthdr.csum_flags & NG_CHECKSUM_CSUM_IPV4)) {
+					error = checksum_ipv4(priv, m, pullup_len);
+					if (error == ENOBUFS)
+						goto drop;
+				} else
+#endif
+#ifdef INET6
+				if (etype == ETHERTYPE_IPV6 &&
+				    (m->m_pkthdr.csum_flags & NG_CHECKSUM_CSUM_IPV6)) {
+					error = checksum_ipv6(priv, m, pullup_len);
+					if (error == ENOBUFS)
+						goto drop;
+				} else
+#endif
+				{
+					m->m_pkthdr.csum_flags &=
+					    ~(NG_CHECKSUM_CSUM_IPV4|NG_CHECKSUM_CSUM_IPV6);
+				}
+
+				break;
+
+			case DLT_RAW:
+#ifdef INET
+				if (m->m_pkthdr.csum_flags & NG_CHECKSUM_CSUM_IPV4)
+				{
+					error = checksum_ipv4(priv, m, pullup_len);
+
+					if (error == 0)
+						goto bypass;
+					else if (error == ENOBUFS)
+						goto drop;
+				}
+#endif
+#ifdef INET6
+				if (m->m_pkthdr.csum_flags & NG_CHECKSUM_CSUM_IPV6)
+				{
+					error = checksum_ipv6(priv, m, pullup_len);
+
+					if (error == 0)
+						goto bypass;
+					else if (error == ENOBUFS)
+						goto drop;
+				}
+#endif
+				if (error)
+					m->m_pkthdr.csum_flags &=
+					    ~(NG_CHECKSUM_CSUM_IPV4|NG_CHECKSUM_CSUM_IPV6);
+
+				break;
+
+			default:
+				ERROUT(EINVAL);
+		}
+	}
+
+#undef	PULLUP_CHECK
+
+bypass:
+	out = NULL;
+
+	if (hook == priv->in) {
+		/* return frames on 'in' hook if 'out' not connected */
+		out = priv->out ? priv->out : priv->in;
+	} else if (hook == priv->out && priv->in) {
+		/* pass frames on 'out' hook if 'in' connected */
+		out = priv->in;
+	}
+
+	if (out == NULL)
+		ERROUT(0);
+
+	NG_FWD_NEW_DATA(error, item, out, m);
+
+	return (error);
+
+done:
+drop:
+	NG_FREE_ITEM(item);
+	NG_FREE_M(m);
+
+	priv->stats.dropped++;
+
+	return (error);
+}
+
+static int
+ng_checksum_shutdown(node_p node)
+{
+	const priv_p priv = NG_NODE_PRIVATE(node);
+
+	NG_NODE_SET_PRIVATE(node, NULL);
+	NG_NODE_UNREF(node);
+
+	if (priv->conf)
+		free(priv->conf, M_NETGRAPH);
+
+	free(priv, M_NETGRAPH);
+
+	return (0);
+}
+
+static int
+ng_checksum_disconnect(hook_p hook)
+{
+	priv_p priv;
+
+	priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
+
+	if (hook == priv->in)
+		priv->in = NULL;
+
+	if (hook == priv->out)
+		priv->out = NULL;
+
+	if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0 &&
+	    NG_NODE_IS_VALID(NG_HOOK_NODE(hook))) /* already shutting down? */
+		ng_rmnode_self(NG_HOOK_NODE(hook));
+
+	return (0);
+}

Property changes on: user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.h	(nonexistent)
+++ user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.h	(revision 303642)
@@ -0,0 +1,88 @@
+/*-
+ * Copyright (c) 2015 Dmitry Vagin <daemon.hammer@ya.ru>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETGRAPH_NG_CHECKSUM_H_
+#define _NETGRAPH_NG_CHECKSUM_H_
+
+/* Node type name. */
+#define	NG_CHECKSUM_NODE_TYPE	"checksum"
+
+/* Node type cookie. */
+#define	NGM_CHECKSUM_COOKIE	439419912
+
+/* Hook names */
+#define	NG_CHECKSUM_HOOK_IN	"in"
+#define	NG_CHECKSUM_HOOK_OUT	"out"
+
+/* Checksum flags */
+#define NG_CHECKSUM_CSUM_IPV4	(CSUM_IP|CSUM_TCP|CSUM_UDP)
+#define NG_CHECKSUM_CSUM_IPV6	(CSUM_TCP_IPV6|CSUM_UDP_IPV6)
+
+/* Netgraph commands understood by this node type */
+enum {
+	NGM_CHECKSUM_GETDLT = 1,
+	NGM_CHECKSUM_SETDLT,
+	NGM_CHECKSUM_GETCONFIG,
+	NGM_CHECKSUM_SETCONFIG,
+	NGM_CHECKSUM_GETCLR_STATS,
+	NGM_CHECKSUM_GET_STATS,
+	NGM_CHECKSUM_CLR_STATS,
+};
+
+/* Parsing declarations */
+
+#define	NG_CHECKSUM_CONFIG_TYPE {				\
+	{ "csum_flags",		&ng_parse_uint64_type	},	\
+	{ "csum_offload",	&ng_parse_uint64_type	},	\
+	{ NULL }						\
+}
+
+#define	NG_CHECKSUM_STATS_TYPE {				\
+	{ "Received",		&ng_parse_uint64_type	},	\
+	{ "Processed",		&ng_parse_uint64_type	},	\
+	{ "Dropped",		&ng_parse_uint64_type	},	\
+	{ NULL }					\
+}
+
+struct ng_checksum_config {
+	uint64_t	csum_flags;
+	uint64_t	csum_offload;
+};
+
+struct ng_checksum_stats {
+	uint64_t	received;
+	uint64_t	processed;
+	uint64_t	dropped;
+};
+
+struct ng_checksum_vlan_header {
+	u_int16_t tag;
+	u_int16_t etype;
+};
+
+#endif /* _NETGRAPH_NG_CHECKSUM_H_ */

Property changes on: user/alc/PQ_LAUNDRY/sys/netgraph/ng_checksum.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.c	(revision 303642)
@@ -1,576 +1,579 @@
 /*-
- * Copyright (C) 2010 by Maxim Ignatenko <gelraen.ua@gmail.com>
+ * Copyright (c) 2010  Maxim Ignatenko <gelraen.ua@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/endian.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <netgraph/ng_message.h>
 #include <netgraph/ng_parse.h>
 #include <netgraph/ng_patch.h>
 #include <netgraph/netgraph.h>
 
 static ng_constructor_t	ng_patch_constructor;
 static ng_rcvmsg_t	ng_patch_rcvmsg;
 static ng_shutdown_t	ng_patch_shutdown;
 static ng_newhook_t	ng_patch_newhook;
 static ng_rcvdata_t	ng_patch_rcvdata;
 static ng_disconnect_t	ng_patch_disconnect;
 
 static int
 ng_patch_config_getlen(const struct ng_parse_type *type,
     const u_char *start, const u_char *buf)
 {
-	const struct ng_patch_config *p;
+	const struct ng_patch_config *conf;
 
-	p = (const struct ng_patch_config *)(buf -
+	conf = (const struct ng_patch_config *)(buf -
 	    offsetof(struct ng_patch_config, ops));
-	return (p->count);
+
+	return (conf->count);
 }
 
 static const struct ng_parse_struct_field ng_patch_op_type_fields[]
 	= NG_PATCH_OP_TYPE_INFO;
 static const struct ng_parse_type ng_patch_op_type = {
 	&ng_parse_struct_type,
 	&ng_patch_op_type_fields
 };
 
-static const struct ng_parse_array_info ng_patch_confarr_info = {
+static const struct ng_parse_array_info ng_patch_ops_array_info = {
 	&ng_patch_op_type,
 	&ng_patch_config_getlen
 };
-static const struct ng_parse_type ng_patch_confarr_type = {
+static const struct ng_parse_type ng_patch_ops_array_type = {
 	&ng_parse_array_type,
-	&ng_patch_confarr_info
+	&ng_patch_ops_array_info
 };
 
 static const struct ng_parse_struct_field ng_patch_config_type_fields[]
 	= NG_PATCH_CONFIG_TYPE_INFO;
 static const struct ng_parse_type ng_patch_config_type = {
 	&ng_parse_struct_type,
 	&ng_patch_config_type_fields
 };
 
 static const struct ng_parse_struct_field ng_patch_stats_fields[]
 	= NG_PATCH_STATS_TYPE_INFO;
 static const struct ng_parse_type ng_patch_stats_type = {
 	&ng_parse_struct_type,
 	&ng_patch_stats_fields
 };
 
 static const struct ng_cmdlist ng_patch_cmdlist[] = {
 	{
 		NGM_PATCH_COOKIE,
 		NGM_PATCH_GETCONFIG,
 		"getconfig",
 		NULL,
 		&ng_patch_config_type
 	},
 	{
 		NGM_PATCH_COOKIE,
 		NGM_PATCH_SETCONFIG,
 		"setconfig",
 		&ng_patch_config_type,
 		NULL
 	},
 	{
 		NGM_PATCH_COOKIE,
 		NGM_PATCH_GET_STATS,
 		"getstats",
 		NULL,
 		&ng_patch_stats_type
 	},
 	{
 		NGM_PATCH_COOKIE,
 		NGM_PATCH_CLR_STATS,
 		"clrstats",
 		NULL,
 		NULL
 	},
 	{
 		NGM_PATCH_COOKIE,
 		NGM_PATCH_GETCLR_STATS,
 		"getclrstats",
 		NULL,
 		&ng_patch_stats_type
 	},
 	{ 0 }
 };
 
 static struct ng_type typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_PATCH_NODE_TYPE,
 	.constructor =	ng_patch_constructor,
 	.rcvmsg =	ng_patch_rcvmsg,
 	.shutdown =	ng_patch_shutdown,
 	.newhook =	ng_patch_newhook,
 	.rcvdata =	ng_patch_rcvdata,
 	.disconnect =	ng_patch_disconnect,
 	.cmdlist =	ng_patch_cmdlist,
 };
+
 NETGRAPH_INIT(patch, &typestruct);
 
 union patch_val {
 	uint8_t		v1;
 	uint16_t	v2;
 	uint32_t	v4;
 	uint64_t	v8;
 };
 
+/* private data */
 struct ng_patch_priv {
 	hook_p		in;
 	hook_p		out;
 	struct ng_patch_config *config;
 	union patch_val *val;
 	struct ng_patch_stats stats;
 };
 typedef struct ng_patch_priv *priv_p;
 
 #define	NG_PATCH_CONF_SIZE(count)	(sizeof(struct ng_patch_config) + \
 		(count) * sizeof(struct ng_patch_op))
 
 static void do_patch(priv_p conf, struct mbuf *m);
 
 static int
 ng_patch_constructor(node_p node)
 {
 	priv_p privdata;
 
 	privdata = malloc(sizeof(*privdata), M_NETGRAPH, M_WAITOK | M_ZERO);
 	NG_NODE_SET_PRIVATE(node, privdata);
 	privdata->in = NULL;
 	privdata->out = NULL;
 	privdata->config = NULL;
 	return (0);
 }
 
 static int
 ng_patch_newhook(node_p node, hook_p hook, const char *name)
 {
 	const priv_p privp = NG_NODE_PRIVATE(node);
 
 	if (strncmp(name, NG_PATCH_HOOK_IN, strlen(NG_PATCH_HOOK_IN)) == 0) {
 		privp->in = hook;
 	} else if (strncmp(name, NG_PATCH_HOOK_OUT,
 	    strlen(NG_PATCH_HOOK_OUT)) == 0) {
 		privp->out = hook;
 	} else
 		return (EINVAL);
 	return(0);
 }
 
 static int
 ng_patch_rcvmsg(node_p node, item_p item, hook_p lasthook)
 {
 	const priv_p privp = NG_NODE_PRIVATE(node);
 	struct ng_patch_config *conf, *newconf;
 	union patch_val *newval;
 	struct ng_mesg *msg;
 	struct ng_mesg *resp;
 	int i, clear, error;
 
 	clear = error = 0;
 	resp = NULL;
 	NGI_GET_MSG(item, msg);
 	switch (msg->header.typecookie) {
 	case NGM_PATCH_COOKIE:
 		switch (msg->header.cmd) {
 		case NGM_PATCH_GETCONFIG:
 			if (privp->config == NULL)
 				break;
 			NG_MKRESPONSE(resp, msg,
 			    NG_PATCH_CONF_SIZE(privp->config->count),
 			    M_WAITOK);
 			bcopy(privp->config, resp->data,
 			    NG_PATCH_CONF_SIZE(privp->config->count));
 			break;
 		case NGM_PATCH_SETCONFIG:
 		    {
 			if (msg->header.arglen <
 			    sizeof(struct ng_patch_config)) {
 				error = EINVAL;
 				break;
 			}
 
 			conf = (struct ng_patch_config *)msg->data;
 			if (msg->header.arglen <
 			    NG_PATCH_CONF_SIZE(conf->count)) {
 				error = EINVAL;
 				break;
 			}
 
 			for(i = 0; i < conf->count; i++) {
 				switch(conf->ops[i].length) {
 				case 1:
 				case 2:
 				case 4:
 				case 8:
 					break;
 				default:
 					error = EINVAL;
 					break;
 				}
 				if (error != 0)
 					break;
 			}
 
 			conf->csum_flags &= CSUM_IP | CSUM_TCP | CSUM_UDP |
 			    CSUM_SCTP;
 
 			if (error == 0) {
 				newconf = malloc(
 				    NG_PATCH_CONF_SIZE(conf->count),
 				    M_NETGRAPH, M_WAITOK);
 				newval = malloc(conf->count *
 				    sizeof(union patch_val), M_NETGRAPH,
 				    M_WAITOK);
 				for(i = 0; i < conf->count; i++) {
 					switch (conf->ops[i].length) {
 					case 1:
 						newval[i].v1 =
 						    conf->ops[i].value;
 						break;
 					case 2:
 						newval[i].v2 =
 						    conf->ops[i].value;
 						break;
 					case 4:
 						newval[i].v4 =
 						    conf->ops[i].value;
 						break;
 					case 8:
 						newval[i].v8 =
 						    conf->ops[i].value;
 						break;
 					}
 				}
 				bcopy(conf, newconf,
 				    NG_PATCH_CONF_SIZE(conf->count));
 				if (privp->val != NULL)
 					free(privp->val, M_NETGRAPH);
 				privp->val = newval;
 				if (privp->config != NULL)
 					free(privp->config, M_NETGRAPH);
 				privp->config = newconf;
 			}
 			break;
 		    }
 		case NGM_PATCH_GETCLR_STATS:
 			clear = 1;
 			/* FALLTHROUGH */
 		case NGM_PATCH_GET_STATS:
 			NG_MKRESPONSE(resp, msg, sizeof(struct ng_patch_stats),
 			    M_WAITOK);
 			bcopy(&(privp->stats), resp->data,
 			    sizeof(struct ng_patch_stats));
 			if (clear == 0)
 				break;
 			/* else FALLTHROUGH */
 		case NGM_PATCH_CLR_STATS:
 			bzero(&(privp->stats), sizeof(struct ng_patch_stats));
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	NG_RESPOND_MSG(error, node, item, resp);
 	NG_FREE_MSG(msg);
 	return(error);
 }
 
 static void
 do_patch(priv_p privp, struct mbuf *m)
 {
 	struct ng_patch_config *conf;
 	uint64_t buf;
 	int i, patched;
 
 	conf = privp->config;
 	patched = 0;
 	for(i = 0; i < conf->count; i++) {
 		if (conf->ops[i].offset + conf->ops[i].length >
 		    m->m_pkthdr.len)
 			continue;
 
 		/* for "=" operation we don't need to copy data from mbuf */
 		if (conf->ops[i].mode != NG_PATCH_MODE_SET) {
 			m_copydata(m, conf->ops[i].offset,
 			    conf->ops[i].length, (caddr_t)&buf);
 		}
 
 		switch (conf->ops[i].length) {
 		case 1:
 			switch (conf->ops[i].mode) {
 			case NG_PATCH_MODE_SET:
 				*((uint8_t *)&buf) = privp->val[i].v1;
 				break;
 			case NG_PATCH_MODE_ADD:
 				*((uint8_t *)&buf) += privp->val[i].v1;
 				break;
 			case NG_PATCH_MODE_SUB:
 				*((uint8_t *)&buf) -= privp->val[i].v1;
 				break;
 			case NG_PATCH_MODE_MUL:
 				*((uint8_t *)&buf) *= privp->val[i].v1;
 				break;
 			case NG_PATCH_MODE_DIV:
 				*((uint8_t *)&buf) /= privp->val[i].v1;
 				break;
 			case NG_PATCH_MODE_NEG:
 				*((int8_t *)&buf) = - *((int8_t *)&buf);
 				break;
 			case NG_PATCH_MODE_AND:
 				*((uint8_t *)&buf) &= privp->val[i].v1;
 				break;
 			case NG_PATCH_MODE_OR:
 				*((uint8_t *)&buf) |= privp->val[i].v1;
 				break;
 			case NG_PATCH_MODE_XOR:
 				*((uint8_t *)&buf) ^= privp->val[i].v1;
 				break;
 			case NG_PATCH_MODE_SHL:
 				*((uint8_t *)&buf) <<= privp->val[i].v1;
 				break;
 			case NG_PATCH_MODE_SHR:
 				*((uint8_t *)&buf) >>= privp->val[i].v1;
 				break;
 			}
 			break;
 		case 2:
 			*((int16_t *)&buf) =  ntohs(*((int16_t *)&buf));
 			switch (conf->ops[i].mode) {
 			case NG_PATCH_MODE_SET:
 				*((uint16_t *)&buf) = privp->val[i].v2;
 				break;
 			case NG_PATCH_MODE_ADD:
 				*((uint16_t *)&buf) += privp->val[i].v2;
 				break;
 			case NG_PATCH_MODE_SUB:
 				*((uint16_t *)&buf) -= privp->val[i].v2;
 				break;
 			case NG_PATCH_MODE_MUL:
 				*((uint16_t *)&buf) *= privp->val[i].v2;
 				break;
 			case NG_PATCH_MODE_DIV:
 				*((uint16_t *)&buf) /= privp->val[i].v2;
 				break;
 			case NG_PATCH_MODE_NEG:
 				*((int16_t *)&buf) = - *((int16_t *)&buf);
 				break;
 			case NG_PATCH_MODE_AND:
 				*((uint16_t *)&buf) &= privp->val[i].v2;
 				break;
 			case NG_PATCH_MODE_OR:
 				*((uint16_t *)&buf) |= privp->val[i].v2;
 				break;
 			case NG_PATCH_MODE_XOR:
 				*((uint16_t *)&buf) ^= privp->val[i].v2;
 				break;
 			case NG_PATCH_MODE_SHL:
 				*((uint16_t *)&buf) <<= privp->val[i].v2;
 				break;
 			case NG_PATCH_MODE_SHR:
 				*((uint16_t *)&buf) >>= privp->val[i].v2;
 				break;
 			}
 			*((int16_t *)&buf) =  htons(*((int16_t *)&buf));
 			break;
 		case 4:
 			*((int32_t *)&buf) =  ntohl(*((int32_t *)&buf));
 			switch (conf->ops[i].mode) {
 			case NG_PATCH_MODE_SET:
 				*((uint32_t *)&buf) = privp->val[i].v4;
 				break;
 			case NG_PATCH_MODE_ADD:
 				*((uint32_t *)&buf) += privp->val[i].v4;
 				break;
 			case NG_PATCH_MODE_SUB:
 				*((uint32_t *)&buf) -= privp->val[i].v4;
 				break;
 			case NG_PATCH_MODE_MUL:
 				*((uint32_t *)&buf) *= privp->val[i].v4;
 				break;
 			case NG_PATCH_MODE_DIV:
 				*((uint32_t *)&buf) /= privp->val[i].v4;
 				break;
 			case NG_PATCH_MODE_NEG:
 				*((int32_t *)&buf) = - *((int32_t *)&buf);
 				break;
 			case NG_PATCH_MODE_AND:
 				*((uint32_t *)&buf) &= privp->val[i].v4;
 				break;
 			case NG_PATCH_MODE_OR:
 				*((uint32_t *)&buf) |= privp->val[i].v4;
 				break;
 			case NG_PATCH_MODE_XOR:
 				*((uint32_t *)&buf) ^= privp->val[i].v4;
 				break;
 			case NG_PATCH_MODE_SHL:
 				*((uint32_t *)&buf) <<= privp->val[i].v4;
 				break;
 			case NG_PATCH_MODE_SHR:
 				*((uint32_t *)&buf) >>= privp->val[i].v4;
 				break;
 			}
 			*((int32_t *)&buf) =  htonl(*((int32_t *)&buf));
 			break;
 		case 8:
 			*((int64_t *)&buf) =  be64toh(*((int64_t *)&buf));
 			switch (conf->ops[i].mode) {
 			case NG_PATCH_MODE_SET:
 				*((uint64_t *)&buf) = privp->val[i].v8;
 				break;
 			case NG_PATCH_MODE_ADD:
 				*((uint64_t *)&buf) += privp->val[i].v8;
 				break;
 			case NG_PATCH_MODE_SUB:
 				*((uint64_t *)&buf) -= privp->val[i].v8;
 				break;
 			case NG_PATCH_MODE_MUL:
 				*((uint64_t *)&buf) *= privp->val[i].v8;
 				break;
 			case NG_PATCH_MODE_DIV:
 				*((uint64_t *)&buf) /= privp->val[i].v8;
 				break;
 			case NG_PATCH_MODE_NEG:
 				*((int64_t *)&buf) = - *((int64_t *)&buf);
 				break;
 			case NG_PATCH_MODE_AND:
 				*((uint64_t *)&buf) &= privp->val[i].v8;
 				break;
 			case NG_PATCH_MODE_OR:
 				*((uint64_t *)&buf) |= privp->val[i].v8;
 				break;
 			case NG_PATCH_MODE_XOR:
 				*((uint64_t *)&buf) ^= privp->val[i].v8;
 				break;
 			case NG_PATCH_MODE_SHL:
 				*((uint64_t *)&buf) <<= privp->val[i].v8;
 				break;
 			case NG_PATCH_MODE_SHR:
 				*((uint64_t *)&buf) >>= privp->val[i].v8;
 				break;
 			}
 			*((int64_t *)&buf) =  htobe64(*((int64_t *)&buf));
 			break;
 		}
 
 		m_copyback(m, conf->ops[i].offset, conf->ops[i].length,
 		    (caddr_t)&buf);
 		patched = 1;
 	}
 	if (patched > 0)
 		privp->stats.patched++;
 }
 
 static int
 ng_patch_rcvdata(hook_p hook, item_p item)
 {
 	const priv_p priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
 	struct mbuf *m;
 	hook_p target;
 	int error;
 
 	priv->stats.received++;
 	NGI_GET_M(item, m);
 	if (priv->config != NULL && hook == priv->in &&
 	    (m->m_flags & M_PKTHDR) != 0) {
 		m = m_unshare(m,M_NOWAIT);
 		if (m == NULL) {
 			priv->stats.dropped++;
 			NG_FREE_ITEM(item);
 			return (ENOMEM);
 		}
 		do_patch(priv, m);
 		m->m_pkthdr.csum_flags |= priv->config->csum_flags;
 	}
 
 	target = NULL;
 	if (hook == priv->in) {
 		/* return frames on 'in' hook if 'out' not connected */
 		if (priv->out != NULL)
 			target = priv->out;
 		else
 			target = priv->in;
 	}
 	if (hook == priv->out && priv->in != NULL)
 		target = priv->in;
 
 	if (target == NULL) {
 		priv->stats.dropped++;
 		NG_FREE_ITEM(item);
 		NG_FREE_M(m);
 		return (0);
 	}
 	NG_FWD_NEW_DATA(error, item, target, m);
 	return (error);
 }
 
 static int
 ng_patch_shutdown(node_p node)
 {
 	const priv_p privdata = NG_NODE_PRIVATE(node);
 
 	if (privdata->val != NULL)
 		free(privdata->val, M_NETGRAPH);
 	if (privdata->config != NULL)
 		free(privdata->config, M_NETGRAPH);
 	NG_NODE_SET_PRIVATE(node, NULL);
 	NG_NODE_UNREF(node);
 	free(privdata, M_NETGRAPH);
 	return (0);
 }
 
 static int
 ng_patch_disconnect(hook_p hook)
 {
 	priv_p priv;
 
 	priv = NG_NODE_PRIVATE(NG_HOOK_NODE(hook));
 	if (hook == priv->in) {
 		priv->in = NULL;
 	}
 	if (hook == priv->out) {
 		priv->out = NULL;
 	}
 	if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0 &&
 	    NG_NODE_IS_VALID(NG_HOOK_NODE(hook))) /* already shutting down? */
 		ng_rmnode_self(NG_HOOK_NODE(hook));
 	return (0);
 }
 
Index: user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/netgraph/ng_patch.h	(revision 303642)
@@ -1,107 +1,107 @@
 /*-
  * Copyright (C) 2010 by Maxim Ignatenko <gelraen.ua@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NETGRAPH_NG_PATCH_H_
 #define _NETGRAPH_NG_PATCH_H_
 
 /* Node type name. */
 #define	NG_PATCH_NODE_TYPE	"patch"
 
 /* Node type cookie. */
 #define	NGM_PATCH_COOKIE	1262445509
 
 /* Hook names */
 #define	NG_PATCH_HOOK_IN	"in"
 #define	NG_PATCH_HOOK_OUT	"out"
 
 /* Netgraph commands understood by this node type */
 enum {
 	NGM_PATCH_SETCONFIG = 1,
 	NGM_PATCH_GETCONFIG,
 	NGM_PATCH_GET_STATS,
 	NGM_PATCH_CLR_STATS,
 	NGM_PATCH_GETCLR_STATS
 };
 
 /* Patching modes */
 enum {
 	NG_PATCH_MODE_SET = 1,
 	NG_PATCH_MODE_ADD = 2,
 	NG_PATCH_MODE_SUB = 3,
 	NG_PATCH_MODE_MUL = 4,
 	NG_PATCH_MODE_DIV = 5,
 	NG_PATCH_MODE_NEG = 6,
 	NG_PATCH_MODE_AND = 7,
-	NG_PATCH_MODE_OR = 8,
+	NG_PATCH_MODE_OR  = 8,
 	NG_PATCH_MODE_XOR = 9,
 	NG_PATCH_MODE_SHL = 10,
 	NG_PATCH_MODE_SHR = 11
 };
 
 struct ng_patch_op {
 	uint64_t	value;
 	uint32_t	offset;
-	uint16_t	length;	/* 1,2,4 or 8 (bytes) */
+	uint16_t	length;	/* 1, 2, 4 or 8 (bytes) */
 	uint16_t	mode;
 };
 
-#define	NG_PATCH_OP_TYPE_INFO	{	\
-		{ "value",	&ng_parse_uint64_type	},	\
-		{ "offset",	&ng_parse_uint32_type	},	\
-		{ "length",	&ng_parse_uint16_type	},	\
-		{ "mode",	&ng_parse_uint16_type	},	\
-		{ NULL } \
+#define	NG_PATCH_OP_TYPE_INFO {				\
+	{ "value",	&ng_parse_uint64_type	},	\
+	{ "offset",	&ng_parse_uint32_type	},	\
+	{ "length",	&ng_parse_uint16_type	},	\
+	{ "mode",	&ng_parse_uint16_type	},	\
+	{ NULL }					\
 }
 
 struct ng_patch_config {
 	uint32_t	count;
 	uint32_t	csum_flags;
 	struct ng_patch_op ops[];
 };
 
-#define	NG_PATCH_CONFIG_TYPE_INFO	{	\
-		{ "count",	&ng_parse_uint32_type	},	\
-		{ "csum_flags",	&ng_parse_uint32_type	},	\
-		{ "ops",	&ng_patch_confarr_type	},	\
-		{ NULL } \
+#define	NG_PATCH_CONFIG_TYPE_INFO {					\
+	{ "count",		&ng_parse_uint32_type		},	\
+	{ "csum_flags",		&ng_parse_uint64_type		},	\
+	{ "ops",		&ng_patch_ops_array_type		},	\
+	{ NULL }							\
 }
 
 struct ng_patch_stats {
 	uint64_t	received;
 	uint64_t	patched;
 	uint64_t	dropped;
 };
 
-#define	NG_PATCH_STATS_TYPE_INFO {	\
-		{ "received",	&ng_parse_uint64_type	},	\
-		{ "patched",	&ng_parse_uint64_type	},	\
-		{ "dropped",	&ng_parse_uint64_type	},	\
-		{ NULL } \
+#define	NG_PATCH_STATS_TYPE_INFO {			\
+	{ "Received",	&ng_parse_uint64_type	},	\
+	{ "Patched",	&ng_parse_uint64_type	},	\
+	{ "Dropped",	&ng_parse_uint64_type	},	\
+	{ NULL }					\
 }
 
 #endif /* _NETGRAPH_NG_PATCH_H_ */
Index: user/alc/PQ_LAUNDRY/sys/netinet/tcp_subr.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/netinet/tcp_subr.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/netinet/tcp_subr.c	(revision 303642)
@@ -1,3018 +1,3093 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/eventhandler.h>
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/khelp.h>
 #include <sys/sysctl.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/refcount.h>
 #include <sys/mbuf.h>
 #ifdef INET6
 #include <sys/domain.h>
 #endif
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/random.h>
 
 #include <vm/uma.h>
 
 #include <net/route.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
+#include <netinet/icmp6.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #endif
 
 #ifdef TCP_RFC7413
 #include <netinet/tcp_fastopen.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/cc/cc.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #include <netinet/tcpip.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #ifdef INET6
 #include <netinet6/ip6protosw.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/xform.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 #include <netipsec/key.h>
 #include <sys/syslog.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 #include <sys/md5.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
 #ifdef INET6
 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
 #endif
 
 struct rwlock tcp_function_lock;
 
 static int
 sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_mssdflt;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new < TCP_MINMSS)
 			error = EINVAL;
 		else
 			V_tcp_mssdflt = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0,
     &sysctl_net_inet_tcp_mss_check, "I",
     "Default TCP Maximum Segment Size");
 
 #ifdef INET6
 static int
 sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_v6mssdflt;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new < TCP_MINMSS)
 			error = EINVAL;
 		else
 			V_tcp_v6mssdflt = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0,
     &sysctl_net_inet_tcp_mss_v6_check, "I",
    "Default TCP Maximum Segment Size for IPv6");
 #endif /* INET6 */
 
 /*
  * Minimum MSS we accept and use. This prevents DoS attacks where
  * we are forced to a ridiculous low MSS like 20 and send hundreds
  * of packets instead of one. The effect scales with the available
  * bandwidth and quickly saturates the CPU and network interface
  * with packet generation and sending. Set to zero to disable MINMSS
  * checking. This setting prevents us from sending too small packets.
  */
 VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW,
      &VNET_NAME(tcp_minmss), 0,
     "Minimum TCP Maximum Segment Size");
 
 VNET_DEFINE(int, tcp_do_rfc1323) = 1;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc1323), 0,
     "Enable rfc1323 (high performance TCP) extensions");
 
 static int	tcp_log_debug = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
     &tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
 
 static int	tcp_tcbhashsize;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
 
 static int	do_tcpdrain = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
     "Enable tcp_drain routine for extra help when low on mbufs");
 
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs");
 
 static VNET_DEFINE(int, icmp_may_rst) = 1;
 #define	V_icmp_may_rst			VNET(icmp_may_rst)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(icmp_may_rst), 0,
     "Certain ICMP unreachable messages may abort connections in SYN_SENT");
 
 static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0;
 #define	V_tcp_isn_reseed_interval	VNET(tcp_isn_reseed_interval)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_isn_reseed_interval), 0,
     "Seconds between reseeding of ISN secret");
 
 static int	tcp_soreceive_stream;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
     &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets");
 
 #ifdef TCP_SIGNATURE
 static int	tcp_sig_checksigs = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, signature_verify_input, CTLFLAG_RW,
     &tcp_sig_checksigs, 0, "Verify RFC2385 digests on inbound traffic");
 #endif
 
 VNET_DEFINE(uma_zone_t, sack_hole_zone);
 #define	V_sack_hole_zone		VNET(sack_hole_zone)
 
 VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
 
 static struct inpcb *tcp_notify(struct inpcb *, int);
 static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
 static void tcp_mtudisc(struct inpcb *, int);
 static char *	tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
 		    void *ip4hdr, const void *ip6hdr);
 
 
 static struct tcp_function_block tcp_def_funcblk = {
 	"default",
 	tcp_output,
 	tcp_do_segment,
 	tcp_default_ctloutput,
 	NULL,
 	NULL,	
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	0,
 	0
 };
 
 int t_functions_inited = 0;
 struct tcp_funchead t_functions;
 static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
 
 static void
 init_tcp_functions(void)
 {
 	if (t_functions_inited == 0) {
 		TAILQ_INIT(&t_functions);
 		rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0);
 		t_functions_inited = 1;
 	}
 }
 
 static struct tcp_function_block *
 find_tcp_functions_locked(struct tcp_function_set *fs)
 {
 	struct tcp_function *f;
 	struct tcp_function_block *blk=NULL;
 
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) {
 			blk = f->tf_fb;
 			break;
 		}
 	}
 	return(blk);
 }
 
 static struct tcp_function_block *
 find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
 {
 	struct tcp_function_block *rblk=NULL;
 	struct tcp_function *f;
 
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (f->tf_fb == blk) {
 			rblk = blk;
 			if (s) {
 				*s = f;
 			}
 			break;
 		}
 	}
 	return (rblk);
 }
 
 struct tcp_function_block *
 find_and_ref_tcp_functions(struct tcp_function_set *fs)
 {
 	struct tcp_function_block *blk;
 	
 	rw_rlock(&tcp_function_lock);	
 	blk = find_tcp_functions_locked(fs);
 	if (blk)
 		refcount_acquire(&blk->tfb_refcnt); 
 	rw_runlock(&tcp_function_lock);
 	return(blk);
 }
 
 struct tcp_function_block *
 find_and_ref_tcp_fb(struct tcp_function_block *blk)
 {
 	struct tcp_function_block *rblk;
 	
 	rw_rlock(&tcp_function_lock);	
 	rblk = find_tcp_fb_locked(blk, NULL);
 	if (rblk) 
 		refcount_acquire(&rblk->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	return(rblk);
 }
 
 
 static int
 sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
 {
 	int error=ENOENT;
 	struct tcp_function_set fs;
 	struct tcp_function_block *blk;
 
 	memset(&fs, 0, sizeof(fs));
 	rw_rlock(&tcp_function_lock);
 	blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL);
 	if (blk) {
 		/* Found him */
 		strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
 		fs.pcbcnt = blk->tfb_refcnt;
 	}
 	rw_runlock(&tcp_function_lock);	
 	error = sysctl_handle_string(oidp, fs.function_set_name,
 				     sizeof(fs.function_set_name), req);
 
 	/* Check for error or no change */
 	if (error != 0 || req->newptr == NULL)
 		return(error);
 
 	rw_wlock(&tcp_function_lock);
 	blk = find_tcp_functions_locked(&fs);
 	if ((blk == NULL) ||
 	    (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) { 
 		error = ENOENT; 
 		goto done;
 	}
 	tcp_func_set_ptr = blk;
 done:
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default,
 	    CTLTYPE_STRING | CTLFLAG_RW,
 	    NULL, 0, sysctl_net_inet_default_tcp_functions, "A",
 	    "Set/get the default TCP functions");
 
 static int
 sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
 {
 	int error, cnt, linesz;
 	struct tcp_function *f;
 	char *buffer, *cp;
 	size_t bufsz, outsz;
 
 	cnt = 0;
 	rw_rlock(&tcp_function_lock);
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		cnt++;
 	}
 	rw_runlock(&tcp_function_lock);
 
 	bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1;
 	buffer = malloc(bufsz, M_TEMP, M_WAITOK);
 
 	error = 0;
 	cp = buffer;
 
 	linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count");
 	cp += linesz;
 	bufsz -= linesz;
 	outsz = linesz;
 
 	rw_rlock(&tcp_function_lock);	
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		linesz = snprintf(cp, bufsz, "%-32s%c %u\n",
 		    f->tf_fb->tfb_tcp_block_name,
 		    (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ',
 		    f->tf_fb->tfb_refcnt);
 		if (linesz >= bufsz) {
 			error = EOVERFLOW;
 			break;
 		}
 		cp += linesz;
 		bufsz -= linesz;
 		outsz += linesz;
 	}
 	rw_runlock(&tcp_function_lock);
 	if (error == 0)
 		error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
 	free(buffer, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
 	    CTLTYPE_STRING|CTLFLAG_RD,
 	    NULL, 0, sysctl_net_inet_list_available, "A",
 	    "list available TCP Function sets");
 
 /*
  * Target size of TCP PCB hash tables. Must be a power of two.
  *
  * Note that this can be overridden by the kernel environment
  * variable net.inet.tcp.tcbhashsize
  */
 #ifndef TCBHASHSIZE
 #define TCBHASHSIZE	0
 #endif
 
 /*
  * XXX
  * Callouts should be moved into struct tcp directly.  They are currently
  * separate because the tcpcb structure is exported to userland for sysctl
  * parsing purposes, which do not know about callouts.
  */
 struct tcpcb_mem {
 	struct	tcpcb		tcb;
 	struct	tcp_timer	tt;
 	struct	cc_var		ccv;
 	struct	osd		osd;
 };
 
 static VNET_DEFINE(uma_zone_t, tcpcb_zone);
 #define	V_tcpcb_zone			VNET(tcpcb_zone)
 
 MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
 MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory");
 
 static struct mtx isn_mtx;
 
 #define	ISN_LOCK_INIT()	mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
 #define	ISN_LOCK()	mtx_lock(&isn_mtx)
 #define	ISN_UNLOCK()	mtx_unlock(&isn_mtx)
 
 /*
  * TCP initialization.
  */
 static void
 tcp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
 	tcp_tw_zone_change();
 }
 
 static int
 tcp_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "tcpinp");
 	return (0);
 }
 
 /*
  * Take a value and get the next power of 2 that doesn't overflow.
  * Used to size the tcp_inpcb hash buckets.
  */
 static int
 maketcp_hashsize(int size)
 {
 	int hashsize;
 
 	/*
 	 * auto tune.
 	 * get the next power of 2 higher than maxsockets.
 	 */
 	hashsize = 1 << fls(size);
 	/* catch overflow, and just go one power of 2 smaller */
 	if (hashsize < size) {
 		hashsize = 1 << (fls(size) - 1);
 	}
 	return (hashsize);
 }
 
 int
 register_tcp_functions(struct tcp_function_block *blk, int wait)
 {
 	struct tcp_function_block *lblk;
 	struct tcp_function *n;
 	struct tcp_function_set fs;
 
 	if (t_functions_inited == 0) {
 		init_tcp_functions();
 	}
 	if ((blk->tfb_tcp_output == NULL) ||
 	    (blk->tfb_tcp_do_segment == NULL) ||
 	    (blk->tfb_tcp_ctloutput == NULL) ||
 	    (strlen(blk->tfb_tcp_block_name) == 0)) {
 		/* 
 		 * These functions are required and you
 		 * need a name.
 		 */
 		return (EINVAL);
 	}
 	if (blk->tfb_tcp_timer_stop_all ||
 	    blk->tfb_tcp_timer_activate ||
 	    blk->tfb_tcp_timer_active ||
 	    blk->tfb_tcp_timer_stop) {
 		/*
 		 * If you define one timer function you 
 		 * must have them all.
 		 */
 		if ((blk->tfb_tcp_timer_stop_all == NULL) ||
 		    (blk->tfb_tcp_timer_activate == NULL) ||
 		    (blk->tfb_tcp_timer_active == NULL) ||
 		    (blk->tfb_tcp_timer_stop == NULL)) {
 			return (EINVAL);			
 		}
 	}	
 	n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
 	if (n == NULL) {
 		return (ENOMEM);
 	}
 	n->tf_fb = blk;
 	strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
 	rw_wlock(&tcp_function_lock);
 	lblk = find_tcp_functions_locked(&fs);
 	if (lblk) {
 		/* Duplicate name space not allowed */
 		rw_wunlock(&tcp_function_lock);
 		free(n, M_TCPFUNCTIONS);
 		return (EALREADY);
 	}
 	refcount_init(&blk->tfb_refcnt, 0);
 	blk->tfb_flags = 0;
 	TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
 	rw_wunlock(&tcp_function_lock);
 	return(0);
 }	
 
 int
 deregister_tcp_functions(struct tcp_function_block *blk)
 {
 	struct tcp_function_block *lblk;
 	struct tcp_function *f;
 	int error=ENOENT;
 	
 	if (strcmp(blk->tfb_tcp_block_name, "default") == 0) {
 		/* You can't un-register the default */
 		return (EPERM);
 	}
 	rw_wlock(&tcp_function_lock);
 	if (blk == tcp_func_set_ptr) {
 		/* You can't free the current default */
 		rw_wunlock(&tcp_function_lock);
 		return (EBUSY);
 	}
 	if (blk->tfb_refcnt) {
 		/* Still tcb attached, mark it. */
 		blk->tfb_flags |= TCP_FUNC_BEING_REMOVED;
 		rw_wunlock(&tcp_function_lock);		
 		return (EBUSY);
 	}
 	lblk = find_tcp_fb_locked(blk, &f);
 	if (lblk) {
 		/* Found */
 		TAILQ_REMOVE(&t_functions, f, tf_next);
 		f->tf_fb = NULL;
 		free(f, M_TCPFUNCTIONS);
 		error = 0;
 	}
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 void
 tcp_init(void)
 {
 	const char *tcbhash_tuneable;
 	int hashsize;
 
 	tcbhash_tuneable = "net.inet.tcp.tcbhashsize";
 
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
 	    &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
 	    &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 	hashsize = TCBHASHSIZE;
 	TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
 	if (hashsize == 0) {
 		/*
 		 * Auto tune the hash size based on maxsockets.
 		 * A perfect hash would have a 1:1 mapping
 		 * (hashsize = maxsockets) however it's been
 		 * suggested that O(2) average is better.
 		 */
 		hashsize = maketcp_hashsize(maxsockets / 4);
 		/*
 		 * Our historical default is 512,
 		 * do not autotune lower than this.
 		 */
 		if (hashsize < 512)
 			hashsize = 512;
 		if (bootverbose && IS_DEFAULT_VNET(curvnet))
 			printf("%s: %s auto tuned to %d\n", __func__,
 			    tcbhash_tuneable, hashsize);
 	}
 	/*
 	 * We require a hashsize to be a power of two.
 	 * Previously if it was not a power of two we would just reset it
 	 * back to 512, which could be a nasty surprise if you did not notice
 	 * the error message.
 	 * Instead what we do is clip it to the closest power of two lower
 	 * than the specified hash value.
 	 */
 	if (!powerof2(hashsize)) {
 		int oldhashsize = hashsize;
 
 		hashsize = maketcp_hashsize(hashsize);
 		/* prevent absurdly low value */
 		if (hashsize < 16)
 			hashsize = 16;
 		printf("%s: WARNING: TCB hash size not a power of 2, "
 		    "clipped from %d to %d.\n", __func__, oldhashsize,
 		    hashsize);
 	}
 	in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
 	    "tcp_inpcb", tcp_inpcb_init, NULL, 0, IPI_HASHFIELDS_4TUPLE);
 
 	/*
 	 * These have to be type stable for the benefit of the timers.
 	 */
 	V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
 	uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached");
 
 	tcp_tw_init();
 	syncache_init();
 	tcp_hc_init();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack);
 	V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	tcp_reass_global_init();
 
 	/* XXX virtualize those bellow? */
 	tcp_delacktime = TCPTV_DELACK;
 	tcp_keepinit = TCPTV_KEEP_INIT;
 	tcp_keepidle = TCPTV_KEEP_IDLE;
 	tcp_keepintvl = TCPTV_KEEPINTVL;
 	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
 	tcp_msl = TCPTV_MSL;
 	tcp_rexmit_min = TCPTV_MIN;
 	if (tcp_rexmit_min < 1)
 		tcp_rexmit_min = 1;
 	tcp_persmin = TCPTV_PERSMIN;
 	tcp_persmax = TCPTV_PERSMAX;
 	tcp_rexmit_slop = TCPTV_CPU_VAR;
 	tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
 	tcp_tcbhashsize = hashsize;
 	/* Setup the tcp function block list */
 	init_tcp_functions();
 	register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
 
 	if (tcp_soreceive_stream) {
 #ifdef INET
 		tcp_usrreqs.pru_soreceive = soreceive_stream;
 #endif
 #ifdef INET6
 		tcp6_usrreqs.pru_soreceive = soreceive_stream;
 #endif /* INET6 */
 	}
 
 #ifdef INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
 #else /* INET6 */
 #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
 #endif /* INET6 */
 	if (max_protohdr < TCP_MINPROTOHDR)
 		max_protohdr = TCP_MINPROTOHDR;
 	if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
 		panic("tcp_init");
 #undef TCP_MINPROTOHDR
 
 	ISN_LOCK_INIT();
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 	EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
 		EVENTHANDLER_PRI_ANY);
 #ifdef TCPPCAP
 	tcp_pcap_init();
 #endif
 
 #ifdef TCP_RFC7413
 	tcp_fastopen_init();
 #endif
 }
 
 #ifdef VIMAGE
 static void
 tcp_destroy(void *unused __unused)
 {
 	int error, n;
 
 	/*
 	 * All our processes are gone, all our sockets should be cleaned
 	 * up, which means, we should be past the tcp_discardcb() calls.
 	 * Sleep to let all tcpcb timers really disappear and cleanup.
 	 */
 	for (;;) {
 		INP_LIST_RLOCK(&V_tcbinfo);
 		n = V_tcbinfo.ipi_count;
 		INP_LIST_RUNLOCK(&V_tcbinfo);
 		if (n == 0)
 			break;
 		pause("tcpdes", hz / 10);
 	}
 	tcp_hc_destroy();
 	syncache_destroy();
 	tcp_tw_destroy();
 	in_pcbinfo_destroy(&V_tcbinfo);
 	/* tcp_discardcb() clears the sack_holes up. */
 	uma_zdestroy(V_sack_hole_zone);
 	uma_zdestroy(V_tcpcb_zone);
 
 #ifdef TCP_RFC7413
 	/*
 	 * Cannot free the zone until all tcpcbs are released as we attach
 	 * the allocations to them.
 	 */
 	tcp_fastopen_destroy();
 #endif
 
 	error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister helper hook "
 		    "type=%d, id=%d: error %d returned\n", __func__,
 		    HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, error);
 	}
 	error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_OUT]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister helper hook "
 		    "type=%d, id=%d: error %d returned\n", __func__,
 		    HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, error);
 	}
 }
 VNET_SYSUNINIT(tcp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_destroy, NULL);
 #endif
 
 void
 tcp_fini(void *xtp)
 {
 
 }
 
 /*
  * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb.
  * tcp_template used to store this data in mbufs, but we now recopy it out
  * of the tcpcb each time to conserve mbufs.
  */
 void
 tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
 {
 	struct tcphdr *th = (struct tcphdr *)tcp_ptr;
 
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		struct ip6_hdr *ip6;
 
 		ip6 = (struct ip6_hdr *)ip_ptr;
 		ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
 			(inp->inp_flow & IPV6_FLOWINFO_MASK);
 		ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
 			(IPV6_VERSION & IPV6_VERSION_MASK);
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_src = inp->in6p_laddr;
 		ip6->ip6_dst = inp->in6p_faddr;
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		struct ip *ip;
 
 		ip = (struct ip *)ip_ptr;
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = 5;
 		ip->ip_tos = inp->inp_ip_tos;
 		ip->ip_len = 0;
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_ttl = inp->inp_ip_ttl;
 		ip->ip_sum = 0;
 		ip->ip_p = IPPROTO_TCP;
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst = inp->inp_faddr;
 	}
 #endif /* INET */
 	th->th_sport = inp->inp_lport;
 	th->th_dport = inp->inp_fport;
 	th->th_seq = 0;
 	th->th_ack = 0;
 	th->th_x2 = 0;
 	th->th_off = 5;
 	th->th_flags = 0;
 	th->th_win = 0;
 	th->th_urp = 0;
 	th->th_sum = 0;		/* in_pseudo() is called later for ipv4 */
 }
 
 /*
  * Create template to be used to send tcp packets on a connection.
  * Allocates an mbuf and fills in a skeletal tcp/ip header.  The only
  * use for this function is in keepalives, which use tcp_respond.
  */
 struct tcptemp *
 tcpip_maketemplate(struct inpcb *inp)
 {
 	struct tcptemp *t;
 
 	t = malloc(sizeof(*t), M_TEMP, M_NOWAIT);
 	if (t == NULL)
 		return (NULL);
 	tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t);
 	return (t);
 }
 
 /*
  * Send a single message to the TCP at address specified by
  * the given TCP/IP header.  If m == NULL, then we make a copy
  * of the tcpiphdr at th and send directly to the addressed host.
  * This is used to force keep alive messages out using the TCP
  * template for a connection.  If flags are given then we send
  * a message back to the TCP which originated the segment th,
  * and discard the mbuf containing it and any other attached mbufs.
  *
  * In any case the ack and sequence number of the transmitted
  * segment are as specified by the parameters.
  *
  * NOTE: If m != NULL, then th must point to *inside* the mbuf.
  */
 void
 tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
     tcp_seq ack, tcp_seq seq, int flags)
 {
 	struct tcpopt to;
 	struct inpcb *inp;
 	struct ip *ip;
 	struct mbuf *optm;
 	struct tcphdr *nth;
 	u_char *optp;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	int isipv6;
 #endif /* INET6 */
 	int optlen, tlen, win;
 	bool incl_opts;
 
 	KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
 
 #ifdef INET6
 	isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4);
 	ip6 = ipgen;
 #endif /* INET6 */
 	ip = ipgen;
 
 	if (tp != NULL) {
 		inp = tp->t_inpcb;
 		KASSERT(inp != NULL, ("tcp control block w/o inpcb"));
 		INP_WLOCK_ASSERT(inp);
 	} else
 		inp = NULL;
 
 	incl_opts = false;
 	win = 0;
 	if (tp != NULL) {
 		if (!(flags & TH_RST)) {
 			win = sbspace(&inp->inp_socket->so_rcv);
 			if (win > (long)TCP_MAXWIN << tp->rcv_scale)
 				win = (long)TCP_MAXWIN << tp->rcv_scale;
 		}
 		if ((tp->t_flags & TF_NOOPT) == 0)
 			incl_opts = true;
 	}
 	if (m == NULL) {
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL)
 			return;
 		m->m_data += max_linkhdr;
 #ifdef INET6
 		if (isipv6) {
 			bcopy((caddr_t)ip6, mtod(m, caddr_t),
 			      sizeof(struct ip6_hdr));
 			ip6 = mtod(m, struct ip6_hdr *);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
 			ip = mtod(m, struct ip *);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 		flags = TH_ACK;
 	} else if (!M_WRITABLE(m)) {
 		struct mbuf *n;
 
 		/* Can't reuse 'm', allocate a new mbuf. */
 		n = m_gethdr(M_NOWAIT, MT_DATA);
 		if (n == NULL) {
 			m_freem(m);
 			return;
 		}
 
 		if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
 			m_freem(m);
 			m_freem(n);
 			return;
 		}
 
 		n->m_data += max_linkhdr;
 		/* m_len is set later */
 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
 #ifdef INET6
 		if (isipv6) {
 			bcopy((caddr_t)ip6, mtod(n, caddr_t),
 			      sizeof(struct ip6_hdr));
 			ip6 = mtod(n, struct ip6_hdr *);
 			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			bcopy((caddr_t)ip, mtod(n, caddr_t), sizeof(struct ip));
 			ip = mtod(n, struct ip *);
 			xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 		xchg(nth->th_dport, nth->th_sport, uint16_t);
 		th = nth;
 		m_freem(m);
 		m = n;
 	} else {
 		/*
 		 *  reuse the mbuf. 
 		 * XXX MRT We inherit the FIB, which is lucky.
 		 */
 		m_freem(m->m_next);
 		m->m_next = NULL;
 		m->m_data = (caddr_t)ipgen;
 		/* m_len is set later */
 #ifdef INET6
 		if (isipv6) {
 			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		if (th != nth) {
 			/*
 			 * this is usually a case when an extension header
 			 * exists between the IPv6 header and the
 			 * TCP header.
 			 */
 			nth->th_sport = th->th_sport;
 			nth->th_dport = th->th_dport;
 		}
 		xchg(nth->th_dport, nth->th_sport, uint16_t);
 #undef xchg
 	}
 	tlen = 0;
 #ifdef INET6
 	if (isipv6)
 		tlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 		tlen = sizeof (struct tcpiphdr);
 #endif
 #ifdef INVARIANTS
 	m->m_len = 0;
 	KASSERT(M_TRAILINGSPACE(m) >= tlen,
 	    ("Not enough trailing space for message (m=%p, need=%d, have=%ld)",
 	    m, tlen, (long)M_TRAILINGSPACE(m)));
 #endif
 	m->m_len = tlen;
 	to.to_flags = 0;
 	if (incl_opts) {
 		/* Make sure we have room. */
 		if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) {
 			m->m_next = m_get(M_NOWAIT, MT_DATA);
 			if (m->m_next) {
 				optp = mtod(m->m_next, u_char *);
 				optm = m->m_next;
 			} else
 				incl_opts = false;
 		} else {
 			optp = (u_char *) (nth + 1);
 			optm = m;
 		}
 	}
 	if (incl_opts) {
 		/* Timestamps. */
 		if (tp->t_flags & TF_RCVD_TSTMP) {
 			to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
 			to.to_tsecr = tp->ts_recent;
 			to.to_flags |= TOF_TS;
 		}
 #ifdef TCP_SIGNATURE
 		/* TCP-MD5 (RFC2385). */
 		if (tp->t_flags & TF_SIGNATURE)
 			to.to_flags |= TOF_SIGNATURE;
 #endif
 
 		/* Add the options. */
 		tlen += optlen = tcp_addoptions(&to, optp);
 
 		/* Update m_len in the correct mbuf. */
 		optm->m_len += optlen;
 	} else
 		optlen = 0;
 #ifdef INET6
 	if (isipv6) {
 		ip6->ip6_flow = 0;
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_plen = htons(tlen - sizeof(*ip6));
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		ip->ip_len = htons(tlen);
 		ip->ip_ttl = V_ip_defttl;
 		if (V_path_mtu_discovery)
 			ip->ip_off |= htons(IP_DF);
 	}
 #endif
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 	if (inp != NULL) {
 		/*
 		 * Packet is associated with a socket, so allow the
 		 * label of the response to reflect the socket label.
 		 */
 		INP_WLOCK_ASSERT(inp);
 		mac_inpcb_create_mbuf(inp, m);
 	} else {
 		/*
 		 * Packet is not associated with a socket, so possibly
 		 * update the label in place.
 		 */
 		mac_netinet_tcp_reply(m);
 	}
 #endif
 	nth->th_seq = htonl(seq);
 	nth->th_ack = htonl(ack);
 	nth->th_x2 = 0;
 	nth->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
 	nth->th_flags = flags;
 	if (tp != NULL)
 		nth->th_win = htons((u_short) (win >> tp->rcv_scale));
 	else
 		nth->th_win = htons((u_short)win);
 	nth->th_urp = 0;
 
 #ifdef TCP_SIGNATURE
 	if (to.to_flags & TOF_SIGNATURE) {
 		tcp_signature_compute(m, 0, 0, optlen, to.to_signature,
 		    IPSEC_DIR_OUTBOUND);
 	}
 #endif
 
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 #ifdef INET6
 	if (isipv6) {
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 		nth->th_sum = in6_cksum_pseudo(ip6,
 		    tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
 		ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
 		    NULL, NULL);
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
 	}
 #endif /* INET */
 #ifdef TCPDEBUG
 	if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
 #endif
 	TCP_PROBE3(debug__output, tp, th, mtod(m, const char *));
 	if (flags & TH_RST)
 		TCP_PROBE5(accept__refused, NULL, NULL, mtod(m, const char *),
 		    tp, nth);
 
 	TCP_PROBE5(send, NULL, tp, mtod(m, const char *), tp, nth);
 #ifdef INET6
 	if (isipv6)
 		(void) ip6_output(m, NULL, NULL, 0, NULL, NULL, inp);
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 		(void) ip_output(m, NULL, NULL, 0, NULL, inp);
 #endif
 }
 
 /*
  * Create a new TCP control block, making an
  * empty reassembly queue and hooking it to the argument
  * protocol control block.  The `inp' parameter must have
  * come from the zone allocator set up in tcp_init().
  */
 struct tcpcb *
 tcp_newtcpcb(struct inpcb *inp)
 {
 	struct tcpcb_mem *tm;
 	struct tcpcb *tp;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 
 	tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO);
 	if (tm == NULL)
 		return (NULL);
 	tp = &tm->tcb;
 
 	/* Initialise cc_var struct for this tcpcb. */
 	tp->ccv = &tm->ccv;
 	tp->ccv->type = IPPROTO_TCP;
 	tp->ccv->ccvc.tcp = tp;
 	rw_rlock(&tcp_function_lock);
 	tp->t_fb = tcp_func_set_ptr;
 	refcount_acquire(&tp->t_fb->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	if (tp->t_fb->tfb_tcp_fb_init) {
 		(*tp->t_fb->tfb_tcp_fb_init)(tp);
 	}
 	/*
 	 * Use the current system default CC algorithm.
 	 */
 	CC_LIST_RLOCK();
 	KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!"));
 	CC_ALGO(tp) = CC_DEFAULT();
 	CC_LIST_RUNLOCK();
 
 	if (CC_ALGO(tp)->cb_init != NULL)
 		if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
 			if (tp->t_fb->tfb_tcp_fb_fini)
 				(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 			refcount_release(&tp->t_fb->tfb_refcnt);
 			uma_zfree(V_tcpcb_zone, tm);
 			return (NULL);
 		}
 
 	tp->osd = &tm->osd;
 	if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		uma_zfree(V_tcpcb_zone, tm);
 		return (NULL);
 	}
 
 #ifdef VIMAGE
 	tp->t_vnet = inp->inp_vnet;
 #endif
 	tp->t_timers = &tm->tt;
 	/*	LIST_INIT(&tp->t_segq); */	/* XXX covered by M_ZERO */
 	tp->t_maxseg =
 #ifdef INET6
 		isipv6 ? V_tcp_v6mssdflt :
 #endif /* INET6 */
 		V_tcp_mssdflt;
 
 	/* Set up our timeouts. */
 	callout_init(&tp->t_timers->tt_rexmt, 1);
 	callout_init(&tp->t_timers->tt_persist, 1);
 	callout_init(&tp->t_timers->tt_keep, 1);
 	callout_init(&tp->t_timers->tt_2msl, 1);
 	callout_init(&tp->t_timers->tt_delack, 1);
 
 	if (V_tcp_do_rfc1323)
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
 	if (V_tcp_do_sack)
 		tp->t_flags |= TF_SACK_PERMIT;
 	TAILQ_INIT(&tp->snd_holes);
 	/*
 	 * The tcpcb will hold a reference on its inpcb until tcp_discardcb()
 	 * is called.
 	 */
 	in_pcbref(inp);	/* Reference for tcpcb */
 	tp->t_inpcb = inp;
 
 	/*
 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
 	 * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
 	 * reasonable initial retransmit time.
 	 */
 	tp->t_srtt = TCPTV_SRTTBASE;
 	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
 	tp->t_rttmin = tcp_rexmit_min;
 	tp->t_rxtcur = TCPTV_RTOBASE;
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->t_rcvtime = ticks;
 	/*
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
 	inp->inp_ppcb = tp;
 #ifdef TCPPCAP
 	/*
 	 * Init the TCP PCAP queues.
 	 */
 	tcp_pcap_tcpcb_init(tp);
 #endif
 	return (tp);		/* XXX */
 }
 
 /*
  * Switch the congestion control algorithm back to NewReno for any active
  * control blocks using an algorithm which is about to go away.
  * This ensures the CC framework can allow the unload to proceed without leaving
  * any dangling pointers which would trigger a panic.
  * Returning non-zero would inform the CC framework that something went wrong
  * and it would be unsafe to allow the unload to proceed. However, there is no
  * way for this to occur with this implementation so we always return zero.
  */
 int
 tcp_ccalgounload(struct cc_algo *unload_algo)
 {
 	struct cc_algo *tmpalgo;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	/*
 	 * Check all active control blocks across all network stacks and change
 	 * any that are using "unload_algo" back to NewReno. If "unload_algo"
 	 * requires cleanup code to be run, call it.
 	 */
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		INP_INFO_WLOCK(&V_tcbinfo);
 		/*
 		 * New connections already part way through being initialised
 		 * with the CC algo we're removing will not race with this code
 		 * because the INP_INFO_WLOCK is held during initialisation. We
 		 * therefore don't enter the loop below until the connection
 		 * list has stabilised.
 		 */
 		LIST_FOREACH(inp, &V_tcb, inp_list) {
 			INP_WLOCK(inp);
 			/* Important to skip tcptw structs. */
 			if (!(inp->inp_flags & INP_TIMEWAIT) &&
 			    (tp = intotcpcb(inp)) != NULL) {
 				/*
 				 * By holding INP_WLOCK here, we are assured
 				 * that the connection is not currently
 				 * executing inside the CC module's functions
 				 * i.e. it is safe to make the switch back to
 				 * NewReno.
 				 */
 				if (CC_ALGO(tp) == unload_algo) {
 					tmpalgo = CC_ALGO(tp);
 					/* NewReno does not require any init. */
 					CC_ALGO(tp) = &newreno_cc_algo;
 					if (tmpalgo->cb_destroy != NULL)
 						tmpalgo->cb_destroy(tp->ccv);
 				}
 			}
 			INP_WUNLOCK(inp);
 		}
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 
 	return (0);
 }
 
 /*
  * Drop a TCP connection, reporting
  * the specified error.  If connection is synchronized,
  * then send a RST to peer.
  */
 struct tcpcb *
 tcp_drop(struct tcpcb *tp, int errno)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
 		tcp_state_change(tp, TCPS_CLOSED);
 		(void) tp->t_fb->tfb_tcp_output(tp);
 		TCPSTAT_INC(tcps_drops);
 	} else
 		TCPSTAT_INC(tcps_conndrops);
 	if (errno == ETIMEDOUT && tp->t_softerror)
 		errno = tp->t_softerror;
 	so->so_error = errno;
 	return (tcp_close(tp));
 }
 
 void
 tcp_discardcb(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 	int released;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Make sure that all of our timers are stopped before we delete the
 	 * PCB.
 	 *
 	 * If stopping a timer fails, we schedule a discard function in same
 	 * callout, and the last discard function called will take care of
 	 * deleting the tcpcb.
 	 */
 	tp->t_timers->tt_draincnt = 0;
 	tcp_timer_stop(tp, TT_REXMT);
 	tcp_timer_stop(tp, TT_PERSIST);
 	tcp_timer_stop(tp, TT_KEEP);
 	tcp_timer_stop(tp, TT_2MSL);
 	tcp_timer_stop(tp, TT_DELACK);
 	if (tp->t_fb->tfb_tcp_timer_stop_all) {
 		/* 
 		 * Call the stop-all function of the methods, 
 		 * this function should call the tcp_timer_stop()
 		 * method with each of the function specific timeouts.
 		 * That stop will be called via the tfb_tcp_timer_stop()
 		 * which should use the async drain function of the 
 		 * callout system (see tcp_var.h).
 		 */
 		tp->t_fb->tfb_tcp_timer_stop_all(tp);
 	}
 
 	/*
 	 * If we got enough samples through the srtt filter,
 	 * save the rtt and rttvar in the routing entry.
 	 * 'Enough' is arbitrarily defined as 4 rtt samples.
 	 * 4 samples is enough for the srtt filter to converge
 	 * to within enough % of the correct value; fewer samples
 	 * and we could save a bogus rtt. The danger is not high
 	 * as tcp quickly recovers from everything.
 	 * XXX: Works very well but needs some more statistics!
 	 */
 	if (tp->t_rttupdated >= 4) {
 		struct hc_metrics_lite metrics;
 		u_long ssthresh;
 
 		bzero(&metrics, sizeof(metrics));
 		/*
 		 * Update the ssthresh always when the conditions below
 		 * are satisfied. This gives us better new start value
 		 * for the congestion avoidance for new connections.
 		 * ssthresh is only set if packet loss occurred on a session.
 		 *
 		 * XXXRW: 'so' may be NULL here, and/or socket buffer may be
 		 * being torn down.  Ideally this code would not use 'so'.
 		 */
 		ssthresh = tp->snd_ssthresh;
 		if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
 			/*
 			 * convert the limit from user data bytes to
 			 * packets then to packet data bytes.
 			 */
 			ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg;
 			if (ssthresh < 2)
 				ssthresh = 2;
 			ssthresh *= (u_long)(tp->t_maxseg +
 #ifdef INET6
 			    (isipv6 ? sizeof (struct ip6_hdr) +
 				sizeof (struct tcphdr) :
 #endif
 				sizeof (struct tcpiphdr)
 #ifdef INET6
 			    )
 #endif
 			    );
 		} else
 			ssthresh = 0;
 		metrics.rmx_ssthresh = ssthresh;
 
 		metrics.rmx_rtt = tp->t_srtt;
 		metrics.rmx_rttvar = tp->t_rttvar;
 		metrics.rmx_cwnd = tp->snd_cwnd;
 		metrics.rmx_sendpipe = 0;
 		metrics.rmx_recvpipe = 0;
 
 		tcp_hc_update(&inp->inp_inc, &metrics);
 	}
 
 	/* free the reassembly queue, if any */
 	tcp_reass_flush(tp);
 
 #ifdef TCP_OFFLOAD
 	/* Disconnect offload device, if any. */
 	if (tp->t_flags & TF_TOE)
 		tcp_offload_detach(tp);
 #endif
 		
 	tcp_free_sackholes(tp);
 
 #ifdef TCPPCAP
 	/* Free the TCP PCAP queues. */
 	tcp_pcap_drain(&(tp->t_inpkts));
 	tcp_pcap_drain(&(tp->t_outpkts));
 #endif
 
 	/* Allow the CC algorithm to clean up after itself. */
 	if (CC_ALGO(tp)->cb_destroy != NULL)
 		CC_ALGO(tp)->cb_destroy(tp->ccv);
 
 	khelp_destroy_osd(tp->osd);
 
 	CC_ALGO(tp) = NULL;
 	inp->inp_ppcb = NULL;
 	if (tp->t_timers->tt_draincnt == 0) {
 		/* We own the last reference on tcpcb, let's free it. */
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_inpcb = NULL;
 		uma_zfree(V_tcpcb_zone, tp);
 		released = in_pcbrele_wlocked(inp);
 		KASSERT(!released, ("%s: inp %p should not have been released "
 			"here", __func__, inp));
 	}
 }
 
 void
 tcp_timer_discard(void *ptp)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	
 	tp = (struct tcpcb *)ptp;
 	CURVNET_SET(tp->t_vnet);
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
 		__func__, tp));
 	INP_WLOCK(inp);
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
 		("%s: tcpcb has to be stopped here", __func__));
 	tp->t_timers->tt_draincnt--;
 	if (tp->t_timers->tt_draincnt == 0) {
 		/* We own the last reference on this tcpcb, let's free it. */
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_inpcb = NULL;
 		uma_zfree(V_tcpcb_zone, tp);
 		if (in_pcbrele_wlocked(inp)) {
 			INP_INFO_RUNLOCK(&V_tcbinfo);
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 /*
  * Attempt to close a TCP control block, marking it as dropped, and freeing
  * the socket if we hold the only reference.
  */
 struct tcpcb *
 tcp_close(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_state == TCPS_LISTEN)
 		tcp_offload_listen_stop(tp);
 #endif
 #ifdef TCP_RFC7413
 	/*
 	 * This releases the TFO pending counter resource for TFO listen
 	 * sockets as well as passively-created TFO sockets that transition
 	 * from SYN_RECEIVED to CLOSED.
 	 */
 	if (tp->t_tfo_pending) {
 		tcp_fastopen_decrement_counter(tp->t_tfo_pending);
 		tp->t_tfo_pending = NULL;
 	}
 #endif
 	in_pcbdrop(inp);
 	TCPSTAT_INC(tcps_closed);
 	TCPSTATES_DEC(tp->t_state);
 	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
 	so = inp->inp_socket;
 	soisdisconnected(so);
 	if (inp->inp_flags & INP_SOCKREF) {
 		KASSERT(so->so_state & SS_PROTOREF,
 		    ("tcp_close: !SS_PROTOREF"));
 		inp->inp_flags &= ~INP_SOCKREF;
 		INP_WUNLOCK(inp);
 		ACCEPT_LOCK();
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_PROTOREF;
 		sofree(so);
 		return (NULL);
 	}
 	return (tp);
 }
 
 void
 tcp_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	if (!do_tcpdrain)
 		return;
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		struct inpcb *inpb;
 		struct tcpcb *tcpb;
 
 	/*
 	 * Walk the tcpbs, if existing, and flush the reassembly queue,
 	 * if there is one...
 	 * XXX: The "Net/3" implementation doesn't imply that the TCP
 	 *      reassembly queue should be flushed, but in a situation
 	 *	where we're really low on mbufs, this is potentially
 	 *	useful.
 	 */
 		INP_INFO_WLOCK(&V_tcbinfo);
 		LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
 			if (inpb->inp_flags & INP_TIMEWAIT)
 				continue;
 			INP_WLOCK(inpb);
 			if ((tcpb = intotcpcb(inpb)) != NULL) {
 				tcp_reass_flush(tcpb);
 				tcp_clean_sackreport(tcpb);
 #ifdef TCPPCAP
 				if (tcp_pcap_aggressive_free) {
 					/* Free the TCP PCAP queues. */
 					tcp_pcap_drain(&(tcpb->t_inpkts));
 					tcp_pcap_drain(&(tcpb->t_outpkts));
 				}
 #endif
 			}
 			INP_WUNLOCK(inpb);
 		}
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Notify a tcp user of an asynchronous error;
  * store error as soft error, but wake up user
  * (for now, won't do anything until can select for soft error).
  *
  * Do not wake up user since there currently is no mechanism for
  * reporting soft errors (yet - a kqueue filter may be added).
  */
 static struct inpcb *
 tcp_notify(struct inpcb *inp, int error)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_notify: tp == NULL"));
 
 	/*
 	 * Ignore some errors if we are hooked up.
 	 * If connection hasn't completed, has retransmitted several times,
 	 * and receives a second error, give up now.  This is better
 	 * than waiting a long time to establish a connection that
 	 * can never complete.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
 	     error == EHOSTDOWN)) {
 		if (inp->inp_route.ro_rt) {
 			RTFREE(inp->inp_route.ro_rt);
 			inp->inp_route.ro_rt = (struct rtentry *)NULL;
 		}
 		return (inp);
 	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
 	    tp->t_softerror) {
 		tp = tcp_drop(tp, error);
 		if (tp != NULL)
 			return (inp);
 		else
 			return (NULL);
 	} else {
 		tp->t_softerror = error;
 		return (inp);
 	}
 #if 0
 	wakeup( &so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 #endif
 }
 
 static int
 tcp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, m, n, pcb_count;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = V_tcbinfo.ipi_count +
 		    counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_LIST_RLOCK(&V_tcbinfo);
 	gencnt = V_tcbinfo.ipi_gencnt;
 	n = V_tcbinfo.ipi_count;
 	INP_LIST_RUNLOCK(&V_tcbinfo);
 
 	m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
 
 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ (n + m) * sizeof(struct xtcpcb));
 	if (error != 0)
 		return (error);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n + m;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	error = syncache_pcblist(req, m, &pcb_count);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 
 	INP_INFO_WLOCK(&V_tcbinfo);
 	for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
 	    inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			/*
 			 * XXX: This use of cr_cansee(), introduced with
 			 * TCP state changes, is not quite right, but for
 			 * now, better than nothing.
 			 */
 			if (inp->inp_flags & INP_TIMEWAIT) {
 				if (intotw(inp) != NULL)
 					error = cr_cansee(req->td->td_ucred,
 					    intotw(inp)->tw_cred);
 				else
 					error = EINVAL;	/* Skip this inp. */
 			} else
 				error = cr_canseeinpcb(req->td->td_ucred, inp);
 			if (error == 0) {
 				in_pcbref(inp);
 				inp_list[i++] = inp;
 			}
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xtcpcb xt;
 			void *inp_ppcb;
 
 			bzero(&xt, sizeof(xt));
 			xt.xt_len = sizeof xt;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xt.xt_inp, sizeof *inp);
 			inp_ppcb = inp->inp_ppcb;
 			if (inp_ppcb == NULL)
 				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
 			else if (inp->inp_flags & INP_TIMEWAIT) {
 				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
 				xt.xt_tp.t_state = TCPS_TIME_WAIT;
 			} else {
 				bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
 				if (xt.xt_tp.t_timers)
 					tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer);
 			}
 			if (inp->inp_socket != NULL)
 				sotoxsocket(inp->inp_socket, &xt.xt_socket);
 			else {
 				bzero(&xt.xt_socket, sizeof xt.xt_socket);
 				xt.xt_socket.xso_protocol = IPPROTO_TCP;
 			}
 			xt.xt_inp.inp_gencnt = inp->inp_gencnt;
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xt, sizeof xt);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_RLOCK(&V_tcbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		INP_LIST_RLOCK(&V_tcbinfo);
 		xig.xig_gen = V_tcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
 		INP_LIST_RUNLOCK(&V_tcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
     tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
 
 #ifdef INET
 static int
 tcp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
 	    addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     tcp_getcred, "S,xucred", "Get the xucred of a TCP connection");
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct inpcb *inp;
 	int error;
 #ifdef INET
 	int mapped = 0;
 #endif
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
 #ifdef INET
 		if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
 			mapped = 1;
 		else
 #endif
 			return (EINVAL);
 	}
 
 #ifdef INET
 	if (mapped == 1)
 		inp = in_pcblookup(&V_tcbinfo,
 			*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
 			addrs[1].sin6_port,
 			*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
 			addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL);
 	else
 #endif
 		inp = in6_pcblookup(&V_tcbinfo,
 			&addrs[1].sin6_addr, addrs[1].sin6_port,
 			&addrs[0].sin6_addr, addrs[0].sin6_port,
 			INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection");
 #endif /* INET6 */
 
 
 #ifdef INET
 void
 tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct ip *ip = vip;
 	struct tcphdr *th;
 	struct in_addr faddr;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct icmp *icp;
 	struct in_conninfo inc;
 	tcp_seq icmp_tcp_seq;
 	int mtu;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc_notify;
 	else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
 		cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip)
 		notify = tcp_drop_syn_sent;
 
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 * XXX: We never get this from ICMP, otherwise it makes an
 	 * excellent DoS attack on machines with many connections.
 	 */
 	else if (cmd == PRC_HOSTDEAD)
 		ip = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
 
 	if (ip == NULL) {
 		in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
 		return;
 	}
 
 	icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
 	th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
 	    th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
 	if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
 		/* signal EHOSTDOWN, as it flushes the cached route */
 		inp = (*notify)(inp, EHOSTDOWN);
 		if (inp != NULL)
 			INP_WUNLOCK(inp);
 	} else if (inp != NULL)  {
 		if (!(inp->inp_flags & INP_TIMEWAIT) &&
 		    !(inp->inp_flags & INP_DROPPED) &&
 		    !(inp->inp_socket == NULL)) {
 			icmp_tcp_seq = ntohl(th->th_seq);
 			tp = intotcpcb(inp);
 			if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
 			    SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
 				if (cmd == PRC_MSGSIZE) {
 					/*
 					 * MTU discovery:
 					 * If we got a needfrag set the MTU
 					 * in the route to the suggested new
 					 * value (if given) and then notify.
 					 */
 				    	mtu = ntohs(icp->icmp_nextmtu);
 					/*
 					 * If no alternative MTU was
 					 * proposed, try the next smaller
 					 * one.
 					 */
 					if (!mtu)
 						mtu = ip_next_mtu(
 						    ntohs(ip->ip_len), 1);
 					if (mtu < V_tcp_minmss +
 					    sizeof(struct tcpiphdr))
 						mtu = V_tcp_minmss +
 						    sizeof(struct tcpiphdr);
 					/*
 					 * Only process the offered MTU if it
 					 * is smaller than the current one.
 					 */
 					if (mtu < tp->t_maxseg +
 					    sizeof(struct tcpiphdr)) {
 						bzero(&inc, sizeof(inc));
 						inc.inc_faddr = faddr;
 						inc.inc_fibnum =
 						    inp->inp_inc.inc_fibnum;
 						tcp_hc_updatemtu(&inc, mtu);
 						tcp_mtudisc(inp, mtu);
 					}
 				} else
 					inp = (*notify)(inp,
 					    inetctlerrmap[cmd]);
 			}
 		}
 		if (inp != NULL)
 			INP_WUNLOCK(inp);
 	} else {
 		bzero(&inc, sizeof(inc));
 		inc.inc_fport = th->th_dport;
 		inc.inc_lport = th->th_sport;
 		inc.inc_faddr = faddr;
 		inc.inc_laddr = ip->ip_src;
 		syncache_unreach(&inc, th);
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 #endif /* INET */
 
 #ifdef INET6
 void
 tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
-	struct tcphdr th;
+	struct in6_addr *dst;
+	struct tcphdr *th;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
+	struct inpcb *inp;
+	struct tcpcb *tp;
+	struct icmp6_hdr *icmp6;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
-	int off;
-	struct tcp_portonly {
-		u_int16_t th_sport;
-		u_int16_t th_dport;
-	} *thp;
+	struct in_conninfo inc;
+	tcp_seq icmp_tcp_seq;
+	unsigned int mtu;
+	unsigned int off;
 
+
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
-	if (cmd == PRC_MSGSIZE)
-		notify = tcp_mtudisc_notify;
-	else if (!PRC_IS_REDIRECT(cmd) &&
-		 ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
-		return;
-
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
+		icmp6 = ip6cp->ip6c_icmp6;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		sa6_src = ip6cp->ip6c_src;
+		dst = ip6cp->ip6c_finaldst;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		off = 0;	/* fool gcc */
 		sa6_src = &sa6_any;
+		dst = NULL;
 	}
 
-	if (ip6 != NULL) {
-		struct in_conninfo inc;
-		/*
-		 * XXX: We assume that when IPV6 is non NULL,
-		 * M and OFF are valid.
-		 */
+	if (cmd == PRC_MSGSIZE)
+		notify = tcp_mtudisc_notify;
+	else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
+		cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) &&
+		ip6 != NULL)
+		notify = tcp_drop_syn_sent;
 
-		/* check if we can safely examine src and dst ports */
-		if (m->m_pkthdr.len < off + sizeof(*thp))
-			return;
+	/*
+	 * Hostdead is ugly because it goes linearly through all PCBs.
+	 * XXX: We never get this from ICMP, otherwise it makes an
+	 * excellent DoS attack on machines with many connections.
+	 */
+	else if (cmd == PRC_HOSTDEAD)
+		ip6 = NULL;
+	else if ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)
+		return;
 
-		bzero(&th, sizeof(th));
-		m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
+	if (ip6 == NULL) {
+		in6_pcbnotify(&V_tcbinfo, sa, 0,
+			      (const struct sockaddr *)sa6_src,
+			      0, cmd, NULL, notify);
+		return;
+	}
 
-		in6_pcbnotify(&V_tcbinfo, sa, th.th_dport,
-		    (struct sockaddr *)ip6cp->ip6c_src,
-		    th.th_sport, cmd, NULL, notify);
+	/* Check if we can safely get the ports from the tcp hdr */
+	if (m == NULL ||
+	    (m->m_pkthdr.len <
+		(int32_t) (off + offsetof(struct tcphdr, th_seq)))) {
+		return;
+	}
 
+	th = (struct tcphdr *) mtodo(ip6cp->ip6c_m, ip6cp->ip6c_off);
+	INP_INFO_RLOCK(&V_tcbinfo);
+	inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, th->th_dport,
+	    &ip6->ip6_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
+	if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
+		/* signal EHOSTDOWN, as it flushes the cached route */
+		inp = (*notify)(inp, EHOSTDOWN);
+		if (inp != NULL)
+			INP_WUNLOCK(inp);
+	} else if (inp != NULL)  {
+		if (!(inp->inp_flags & INP_TIMEWAIT) &&
+		    !(inp->inp_flags & INP_DROPPED) &&
+		    !(inp->inp_socket == NULL)) {
+			icmp_tcp_seq = ntohl(th->th_seq);
+			tp = intotcpcb(inp);
+			if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
+			    SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
+				if (cmd == PRC_MSGSIZE) {
+					/*
+					 * MTU discovery:
+					 * If we got a needfrag set the MTU
+					 * in the route to the suggested new
+					 * value (if given) and then notify.
+					 */
+					mtu = ntohl(icmp6->icmp6_mtu);
+					/*
+					 * If no alternative MTU was
+					 * proposed, or the proposed
+					 * MTU was too small, set to
+					 * the min.
+					 */
+					if (mtu < IPV6_MMTU)
+						mtu = IPV6_MMTU - 8;
+
+
+					bzero(&inc, sizeof(inc));
+					inc.inc_fibnum = M_GETFIB(m);
+					inc.inc_flags |= INC_ISIPV6;
+					inc.inc6_faddr = *dst;
+					if (in6_setscope(&inc.inc6_faddr,
+						m->m_pkthdr.rcvif, NULL))
+						goto unlock_inp;
+
+					/*
+					 * Only process the offered MTU if it
+					 * is smaller than the current one.
+					 */
+					if (mtu < tp->t_maxseg +
+					    (sizeof (*th) + sizeof (*ip6))) {
+						tcp_hc_updatemtu(&inc, mtu);
+						tcp_mtudisc(inp, mtu);
+						ICMP6STAT_INC(icp6s_pmtuchg);
+					}
+				} else
+					inp = (*notify)(inp,
+					    inet6ctlerrmap[cmd]);
+			}
+		}
+unlock_inp:
+		if (inp != NULL)
+			INP_WUNLOCK(inp);
+	} else {
 		bzero(&inc, sizeof(inc));
-		inc.inc_fport = th.th_dport;
-		inc.inc_lport = th.th_sport;
-		inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
-		inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
+		inc.inc_fibnum = M_GETFIB(m);
 		inc.inc_flags |= INC_ISIPV6;
-		INP_INFO_RLOCK(&V_tcbinfo);
-		syncache_unreach(&inc, &th);
-		INP_INFO_RUNLOCK(&V_tcbinfo);
-	} else
-		in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
-			      0, cmd, NULL, notify);
+		inc.inc_fport = th->th_dport;
+		inc.inc_lport = th->th_sport;
+		inc.inc6_faddr = *dst;
+		inc.inc6_laddr = ip6->ip6_src;
+		syncache_unreach(&inc, th);
+	}
+	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 #endif /* INET6 */
 
 
 /*
  * Following is where TCP initial sequence number generation occurs.
  *
  * There are two places where we must use initial sequence numbers:
  * 1.  In SYN-ACK packets.
  * 2.  In SYN packets.
  *
  * All ISNs for SYN-ACK packets are generated by the syncache.  See
  * tcp_syncache.c for details.
  *
  * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling
  * depends on this property.  In addition, these ISNs should be
  * unguessable so as to prevent connection hijacking.  To satisfy
  * the requirements of this situation, the algorithm outlined in
  * RFC 1948 is used, with only small modifications.
  *
  * Implementation details:
  *
  * Time is based off the system timer, and is corrected so that it
  * increases by one megabyte per second.  This allows for proper
  * recycling on high speed LANs while still leaving over an hour
  * before rollover.
  *
  * As reading the *exact* system time is too expensive to be done
  * whenever setting up a TCP connection, we increment the time
  * offset in two ways.  First, a small random positive increment
  * is added to isn_offset for each connection that is set up.
  * Second, the function tcp_isn_tick fires once per clock tick
  * and increments isn_offset as necessary so that sequence numbers
  * are incremented at approximately ISN_BYTES_PER_SECOND.  The
  * random positive increments serve only to ensure that the same
  * exact sequence number is never sent out twice (as could otherwise
  * happen when a port is recycled in less than the system tick
  * interval.)
  *
  * net.inet.tcp.isn_reseed_interval controls the number of seconds
  * between seeding of isn_secret.  This is normally set to zero,
  * as reseeding should not be necessary.
  *
  * Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
  * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock.  In
  * general, this means holding an exclusive (write) lock.
  */
 
 #define ISN_BYTES_PER_SECOND 1048576
 #define ISN_STATIC_INCREMENT 4096
 #define ISN_RANDOM_INCREMENT (4096 - 1)
 
 static VNET_DEFINE(u_char, isn_secret[32]);
 static VNET_DEFINE(int, isn_last);
 static VNET_DEFINE(int, isn_last_reseed);
 static VNET_DEFINE(u_int32_t, isn_offset);
 static VNET_DEFINE(u_int32_t, isn_offset_old);
 
 #define	V_isn_secret			VNET(isn_secret)
 #define	V_isn_last			VNET(isn_last)
 #define	V_isn_last_reseed		VNET(isn_last_reseed)
 #define	V_isn_offset			VNET(isn_offset)
 #define	V_isn_offset_old		VNET(isn_offset_old)
 
 tcp_seq
 tcp_new_isn(struct tcpcb *tp)
 {
 	MD5_CTX isn_ctx;
 	u_int32_t md5_buffer[4];
 	tcp_seq new_isn;
 	u_int32_t projected_offset;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	ISN_LOCK();
 	/* Seed if this is the first use, reseed if requested. */
 	if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) &&
 	     (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz)
 		< (u_int)ticks))) {
 		read_random(&V_isn_secret, sizeof(V_isn_secret));
 		V_isn_last_reseed = ticks;
 	}
 
 	/* Compute the md5 hash and return the ISN. */
 	MD5Init(&isn_ctx);
 	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short));
 	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short));
 #ifdef INET6
 	if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) {
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr,
 			  sizeof(struct in6_addr));
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr,
 			  sizeof(struct in6_addr));
 	} else
 #endif
 	{
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr,
 			  sizeof(struct in_addr));
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr,
 			  sizeof(struct in_addr));
 	}
 	MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret));
 	MD5Final((u_char *) &md5_buffer, &isn_ctx);
 	new_isn = (tcp_seq) md5_buffer[0];
 	V_isn_offset += ISN_STATIC_INCREMENT +
 		(arc4random() & ISN_RANDOM_INCREMENT);
 	if (ticks != V_isn_last) {
 		projected_offset = V_isn_offset_old +
 		    ISN_BYTES_PER_SECOND / hz * (ticks - V_isn_last);
 		if (SEQ_GT(projected_offset, V_isn_offset))
 			V_isn_offset = projected_offset;
 		V_isn_offset_old = V_isn_offset;
 		V_isn_last = ticks;
 	}
 	new_isn += V_isn_offset;
 	ISN_UNLOCK();
 	return (new_isn);
 }
 
 /*
  * When a specific ICMP unreachable message is received and the
  * connection state is SYN-SENT, drop the connection.  This behavior
  * is controlled by the icmp_may_rst sysctl.
  */
 struct inpcb *
 tcp_drop_syn_sent(struct inpcb *inp, int errno)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	if (tp->t_state != TCPS_SYN_SENT)
 		return (inp);
 
 	tp = tcp_drop(tp, errno);
 	if (tp != NULL)
 		return (inp);
 	else
 		return (NULL);
 }
 
 /*
  * When `need fragmentation' ICMP is received, update our idea of the MSS
  * based on the new value. Also nudge TCP to send something, since we
  * know the packet we just sent was dropped.
  * This duplicates some code in the tcp_mss() function in tcp_input.c.
  */
 static struct inpcb *
 tcp_mtudisc_notify(struct inpcb *inp, int error)
 {
 
 	tcp_mtudisc(inp, -1);
 	return (inp);
 }
 
 static void
 tcp_mtudisc(struct inpcb *inp, int mtuoffer)
 {
 	struct tcpcb *tp;
 	struct socket *so;
 
 	INP_WLOCK_ASSERT(inp);
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return;
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
 
 	tcp_mss_update(tp, -1, mtuoffer, NULL, NULL);
   
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_snd);
 	/* If the mss is larger than the socket buffer, decrease the mss. */
 	if (so->so_snd.sb_hiwat < tp->t_maxseg)
 		tp->t_maxseg = so->so_snd.sb_hiwat;
 	SOCKBUF_UNLOCK(&so->so_snd);
 
 	TCPSTAT_INC(tcps_mturesent);
 	tp->t_rtttime = 0;
 	tp->snd_nxt = tp->snd_una;
 	tcp_free_sackholes(tp);
 	tp->snd_recover = tp->snd_max;
 	if (tp->t_flags & TF_SACK_PERMIT)
 		EXIT_FASTRECOVERY(tp->t_flags);
 	tp->t_fb->tfb_tcp_output(tp);
 }
 
 #ifdef INET
 /*
  * Look-up the routing entry to the peer of this inpcb.  If no route
  * is found and it cannot be allocated, then return 0.  This routine
  * is called by TCP routines that access the rmx structure and by
  * tcp_mss_update to get the peer/interface MTU.
  */
 u_long
 tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct nhop4_extended nh4;
 	struct ifnet *ifp;
 	u_long maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer"));
 
 	if (inc->inc_faddr.s_addr != INADDR_ANY) {
 
 		if (fib4_lookup_nh_ext(inc->inc_fibnum, inc->inc_faddr,
 		    NHR_REF, 0, &nh4) != 0)
 			return (0);
 
 		ifp = nh4.nh_ifp;
 		maxmtu = nh4.nh_mtu;
 
 		/* Report additional interface capabilities. */
 		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO4 &&
 			    ifp->if_hwassist & CSUM_TSO) {
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 			}
 		}
 		fib4_free_nh_ext(inc->inc_fibnum, &nh4);
 	}
 	return (maxmtu);
 }
 #endif /* INET */
 
 #ifdef INET6
 u_long
 tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct nhop6_extended nh6;
 	struct in6_addr dst6;
 	uint32_t scopeid;
 	struct ifnet *ifp;
 	u_long maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
 
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
 		in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid);
 		if (fib6_lookup_nh_ext(inc->inc_fibnum, &dst6, scopeid, 0,
 		    0, &nh6) != 0)
 			return (0);
 
 		ifp = nh6.nh_ifp;
 		maxmtu = nh6.nh_mtu;
 
 		/* Report additional interface capabilities. */
 		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO6 &&
 			    ifp->if_hwassist & CSUM_TSO) {
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 			}
 		}
 		fib6_free_nh_ext(inc->inc_fibnum, &nh6);
 	}
 
 	return (maxmtu);
 }
 #endif /* INET6 */
 
 /*
  * Calculate effective SMSS per RFC5681 definition for a given TCP
  * connection at its current state, taking into account SACK and etc.
  */
 u_int
 tcp_maxseg(const struct tcpcb *tp)
 {
 	u_int optlen;
 
 	if (tp->t_flags & TF_NOOPT)
 		return (tp->t_maxseg);
 
 	/*
 	 * Here we have a simplified code from tcp_addoptions(),
 	 * without a proper loop, and having most of paddings hardcoded.
 	 * We might make mistakes with padding here in some edge cases,
 	 * but this is harmless, since result of tcp_maxseg() is used
 	 * only in cwnd and ssthresh estimations.
 	 */
 #define	PAD(len)	((((len) / 4) + !!((len) % 4)) * 4)
 	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 		if (tp->t_flags & TF_RCVD_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
 			optlen = 0;
 #ifdef TCP_SIGNATURE
 		if (tp->t_flags & TF_SIGNATURE)
 			optlen += PAD(TCPOLEN_SIGNATURE);
 #endif
 		if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) {
 			optlen += TCPOLEN_SACKHDR;
 			optlen += tp->rcv_numsacks * TCPOLEN_SACK;
 			optlen = PAD(optlen);
 		}
 	} else {
 		if (tp->t_flags & TF_REQ_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
 			optlen = PAD(TCPOLEN_MAXSEG);
 		if (tp->t_flags & TF_REQ_SCALE)
 			optlen += PAD(TCPOLEN_WINDOW);
 #ifdef TCP_SIGNATURE
 		if (tp->t_flags & TF_SIGNATURE)
 			optlen += PAD(TCPOLEN_SIGNATURE);
 #endif
 		if (tp->t_flags & TF_SACK_PERMIT)
 			optlen += PAD(TCPOLEN_SACK_PERMITTED);
 	}
 #undef PAD
 	optlen = min(optlen, TCP_MAXOLEN);
 	return (tp->t_maxseg - optlen);
 }
 
 #ifdef IPSEC
 /* compute ESP/AH header size for TCP, including outer IP header. */
 size_t
 ipsec_hdrsiz_tcp(struct tcpcb *tp)
 {
 	struct inpcb *inp;
 	struct mbuf *m;
 	size_t hdrsiz;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct tcphdr *th;
 
 	if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) ||
 		(!key_havesp(IPSEC_DIR_OUTBOUND)))
 		return (0);
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (!m)
 		return (0);
 
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)(ip6 + 1);
 		m->m_pkthdr.len = m->m_len =
 			sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 		tcpip_fillheaders(inp, ip6, th);
 		hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
 	} else
 #endif /* INET6 */
 	{
 		ip = mtod(m, struct ip *);
 		th = (struct tcphdr *)(ip + 1);
 		m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
 		tcpip_fillheaders(inp, ip, th);
 		hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
 	}
 
 	m_free(m);
 	return (hdrsiz);
 }
 #endif /* IPSEC */
 
 #ifdef TCP_SIGNATURE
 /*
  * Callback function invoked by m_apply() to digest TCP segment data
  * contained within an mbuf chain.
  */
 static int
 tcp_signature_apply(void *fstate, void *data, u_int len)
 {
 
 	MD5Update(fstate, (u_char *)data, len);
 	return (0);
 }
 
 /*
  * XXX The key is retrieved from the system's PF_KEY SADB, by keying a
  * search with the destination IP address, and a 'magic SPI' to be
  * determined by the application. This is hardcoded elsewhere to 1179
 */
 struct secasvar *
 tcp_get_sav(struct mbuf *m, u_int direction)
 {
 	union sockaddr_union dst;
 	struct secasvar *sav;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
 
 	/* Extract the destination from the IP header in the mbuf. */
 	bzero(&dst, sizeof(union sockaddr_union));
 	ip = mtod(m, struct ip *);
 #ifdef INET6
 	ip6 = NULL;	/* Make the compiler happy. */
 #endif
 	switch (ip->ip_v) {
 #ifdef INET
 	case IPVERSION:
 		dst.sa.sa_len = sizeof(struct sockaddr_in);
 		dst.sa.sa_family = AF_INET;
 		dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ?
 		    ip->ip_src : ip->ip_dst;
 		break;
 #endif
 #ifdef INET6
 	case (IPV6_VERSION >> 4):
 		ip6 = mtod(m, struct ip6_hdr *);
 		dst.sa.sa_len = sizeof(struct sockaddr_in6);
 		dst.sa.sa_family = AF_INET6;
 		dst.sin6.sin6_addr = (direction == IPSEC_DIR_INBOUND) ?
 		    ip6->ip6_src : ip6->ip6_dst;
 		break;
 #endif
 	default:
 		return (NULL);
 		/* NOTREACHED */
 		break;
 	}
 
 	/* Look up an SADB entry which matches the address of the peer. */
 	sav = KEY_ALLOCSA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI));
 	if (sav == NULL) {
 		ipseclog((LOG_ERR, "%s: SADB lookup failed for %s\n", __func__,
 		    (ip->ip_v == IPVERSION) ? inet_ntoa(dst.sin.sin_addr) :
 #ifdef INET6
 			(ip->ip_v == (IPV6_VERSION >> 4)) ?
 			    ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) :
 #endif
 			"(unsupported)"));
 	}
 
 	return (sav);
 }
 
 /*
  * Compute TCP-MD5 hash of a TCP segment. (RFC2385)
  *
  * Parameters:
  * m		pointer to head of mbuf chain
  * len		length of TCP segment data, excluding options
  * optlen	length of TCP segment options
  * buf		pointer to storage for computed MD5 digest
  * sav		pointer to security assosiation
  *
  * We do this over ip, tcphdr, segment data, and the key in the SADB.
  * When called from tcp_input(), we can be sure that th_sum has been
  * zeroed out and verified already.
  *
  * Releases reference to SADB key before return. 
  *
  * Return 0 if successful, otherwise return -1.
  *
  */
 int
 tcp_signature_do_compute(struct mbuf *m, int len, int optlen,
     u_char *buf, struct secasvar *sav)
 {
 #ifdef INET
 	struct ippseudo ippseudo;
 #endif
 	MD5_CTX ctx;
 	int doff;
 	struct ip *ip;
 #ifdef INET
 	struct ipovly *ipovly;
 #endif
 	struct tcphdr *th;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	struct in6_addr in6;
 	uint32_t plen;
 	uint16_t nhdr;
 #endif
 	u_short savecsum;
 
 	KASSERT(m != NULL, ("NULL mbuf chain"));
 	KASSERT(buf != NULL, ("NULL signature pointer"));
 
 	/* Extract the destination from the IP header in the mbuf. */
 	ip = mtod(m, struct ip *);
 #ifdef INET6
 	ip6 = NULL;	/* Make the compiler happy. */
 #endif
 
 	MD5Init(&ctx);
 	/*
 	 * Step 1: Update MD5 hash with IP(v6) pseudo-header.
 	 *
 	 * XXX The ippseudo header MUST be digested in network byte order,
 	 * or else we'll fail the regression test. Assume all fields we've
 	 * been doing arithmetic on have been in host byte order.
 	 * XXX One cannot depend on ipovly->ih_len here. When called from
 	 * tcp_output(), the underlying ip_len member has not yet been set.
 	 */
 	switch (ip->ip_v) {
 #ifdef INET
 	case IPVERSION:
 		ipovly = (struct ipovly *)ip;
 		ippseudo.ippseudo_src = ipovly->ih_src;
 		ippseudo.ippseudo_dst = ipovly->ih_dst;
 		ippseudo.ippseudo_pad = 0;
 		ippseudo.ippseudo_p = IPPROTO_TCP;
 		ippseudo.ippseudo_len = htons(len + sizeof(struct tcphdr) +
 		    optlen);
 		MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ippseudo));
 
 		th = (struct tcphdr *)((u_char *)ip + sizeof(struct ip));
 		doff = sizeof(struct ip) + sizeof(struct tcphdr) + optlen;
 		break;
 #endif
 #ifdef INET6
 	/*
 	 * RFC 2385, 2.0  Proposal
 	 * For IPv6, the pseudo-header is as described in RFC 2460, namely the
 	 * 128-bit source IPv6 address, 128-bit destination IPv6 address, zero-
 	 * extended next header value (to form 32 bits), and 32-bit segment
 	 * length.
 	 * Note: Upper-Layer Packet Length comes before Next Header.
 	 */
 	case (IPV6_VERSION >> 4):
 		in6 = ip6->ip6_src;
 		in6_clearscope(&in6);
 		MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr));
 		in6 = ip6->ip6_dst;
 		in6_clearscope(&in6);
 		MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr));
 		plen = htonl(len + sizeof(struct tcphdr) + optlen);
 		MD5Update(&ctx, (char *)&plen, sizeof(uint32_t));
 		nhdr = 0;
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 		nhdr = IPPROTO_TCP;
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 
 		th = (struct tcphdr *)((u_char *)ip6 + sizeof(struct ip6_hdr));
 		doff = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + optlen;
 		break;
 #endif
 	default:
 		KEY_FREESAV(&sav);
 		return (-1);
 		/* NOTREACHED */
 		break;
 	}
 
 
 	/*
 	 * Step 2: Update MD5 hash with TCP header, excluding options.
 	 * The TCP checksum must be set to zero.
 	 */
 	savecsum = th->th_sum;
 	th->th_sum = 0;
 	MD5Update(&ctx, (char *)th, sizeof(struct tcphdr));
 	th->th_sum = savecsum;
 
 	/*
 	 * Step 3: Update MD5 hash with TCP segment data.
 	 *         Use m_apply() to avoid an early m_pullup().
 	 */
 	if (len > 0)
 		m_apply(m, doff, len, tcp_signature_apply, &ctx);
 
 	/*
 	 * Step 4: Update MD5 hash with shared secret.
 	 */
 	MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth));
 	MD5Final(buf, &ctx);
 
 	key_sa_recordxfer(sav, m);
 	KEY_FREESAV(&sav);
 	return (0);
 }
 
 /*
  * Compute TCP-MD5 hash of a TCP segment. (RFC2385)
  *
  * Return 0 if successful, otherwise return -1.
  */
 int
 tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
     u_char *buf, u_int direction)
 {
 	struct secasvar *sav;
 
 	if ((sav = tcp_get_sav(m, direction)) == NULL)
 		return (-1);
 
 	return (tcp_signature_do_compute(m, len, optlen, buf, sav));
 }
 
 /*
  * Verify the TCP-MD5 hash of a TCP segment. (RFC2385)
  *
  * Parameters:
  * m		pointer to head of mbuf chain
  * len		length of TCP segment data, excluding options
  * optlen	length of TCP segment options
  * buf		pointer to storage for computed MD5 digest
  * direction	direction of flow (IPSEC_DIR_INBOUND or OUTBOUND)
  *
  * Return 1 if successful, otherwise return 0.
  */
 int
 tcp_signature_verify(struct mbuf *m, int off0, int tlen, int optlen,
     struct tcpopt *to, struct tcphdr *th, u_int tcpbflag)
 {
 	char tmpdigest[TCP_SIGLEN];
 
 	if (tcp_sig_checksigs == 0)
 		return (1);
 	if ((tcpbflag & TF_SIGNATURE) == 0) {
 		if ((to->to_flags & TOF_SIGNATURE) != 0) {
 
 			/*
 			 * If this socket is not expecting signature but
 			 * the segment contains signature just fail.
 			 */
 			TCPSTAT_INC(tcps_sig_err_sigopt);
 			TCPSTAT_INC(tcps_sig_rcvbadsig);
 			return (0);
 		}
 
 		/* Signature is not expected, and not present in segment. */
 		return (1);
 	}
 
 	/*
 	 * If this socket is expecting signature but the segment does not
 	 * contain any just fail.
 	 */
 	if ((to->to_flags & TOF_SIGNATURE) == 0) {
 		TCPSTAT_INC(tcps_sig_err_nosigopt);
 		TCPSTAT_INC(tcps_sig_rcvbadsig);
 		return (0);
 	}
 	if (tcp_signature_compute(m, off0, tlen, optlen, &tmpdigest[0],
 	    IPSEC_DIR_INBOUND) == -1) {
 		TCPSTAT_INC(tcps_sig_err_buildsig);
 		TCPSTAT_INC(tcps_sig_rcvbadsig);
 		return (0);
 	}
 	
 	if (bcmp(to->to_signature, &tmpdigest[0], TCP_SIGLEN) != 0) {
 		TCPSTAT_INC(tcps_sig_rcvbadsig);
 		return (0);
 	}
 	TCPSTAT_INC(tcps_sig_rcvgoodsig);
 	return (1);
 }
 #endif /* TCP_SIGNATURE */
 
 static int
 sysctl_drop(SYSCTL_HANDLER_ARGS)
 {
 	/* addrs[0] is a foreign socket, addrs[1] is a local one. */
 	struct sockaddr_storage addrs[2];
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct tcptw *tw;
 	struct sockaddr_in *fin, *lin;
 #ifdef INET6
 	struct sockaddr_in6 *fin6, *lin6;
 #endif
 	int error;
 
 	inp = NULL;
 	fin = lin = NULL;
 #ifdef INET6
 	fin6 = lin6 = NULL;
 #endif
 	error = 0;
 
 	if (req->oldptr != NULL || req->oldlen != 0)
 		return (EINVAL);
 	if (req->newptr == NULL)
 		return (EPERM);
 	if (req->newlen < sizeof(addrs))
 		return (ENOMEM);
 	error = SYSCTL_IN(req, &addrs, sizeof(addrs));
 	if (error)
 		return (error);
 
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		fin6 = (struct sockaddr_in6 *)&addrs[0];
 		lin6 = (struct sockaddr_in6 *)&addrs[1];
 		if (fin6->sin6_len != sizeof(struct sockaddr_in6) ||
 		    lin6->sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 		if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
 			if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
 				return (EINVAL);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]);
 			fin = (struct sockaddr_in *)&addrs[0];
 			lin = (struct sockaddr_in *)&addrs[1];
 			break;
 		}
 		error = sa6_embedscope(fin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		error = sa6_embedscope(lin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		fin = (struct sockaddr_in *)&addrs[0];
 		lin = (struct sockaddr_in *)&addrs[1];
 		if (fin->sin_len != sizeof(struct sockaddr_in) ||
 		    lin->sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 		break;
 #endif
 	default:
 		return (EINVAL);
 	}
 	INP_INFO_RLOCK(&V_tcbinfo);
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr,
 		    fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
 		    INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port,
 		    lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 	}
 	if (inp != NULL) {
 		if (inp->inp_flags & INP_TIMEWAIT) {
 			/*
 			 * XXXRW: There currently exists a state where an
 			 * inpcb is present, but its timewait state has been
 			 * discarded.  For now, don't allow dropping of this
 			 * type of inpcb.
 			 */
 			tw = intotw(inp);
 			if (tw != NULL)
 				tcp_twclose(tw, 0);
 			else
 				INP_WUNLOCK(inp);
 		} else if (!(inp->inp_flags & INP_DROPPED) &&
 			   !(inp->inp_socket->so_options & SO_ACCEPTCONN)) {
 			tp = intotcpcb(inp);
 			tp = tcp_drop(tp, ECONNABORTED);
 			if (tp != NULL)
 				INP_WUNLOCK(inp);
 		} else
 			INP_WUNLOCK(inp);
 	} else
 		error = ESRCH;
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
     CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL,
     0, sysctl_drop, "", "Drop TCP connection");
 
 /*
  * Generate a standardized TCP log line for use throughout the
  * tcp subsystem.  Memory allocation is done with M_NOWAIT to
  * allow use in the interrupt context.
  *
  * NB: The caller MUST free(s, M_TCPLOG) the returned string.
  * NB: The function may return NULL if memory allocation failed.
  *
  * Due to header inclusion and ordering limitations the struct ip
  * and ip6_hdr pointers have to be passed as void pointers.
  */
 char *
 tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 
 	/* Is logging enabled? */
 	if (tcp_log_in_vain == 0)
 		return (NULL);
 
 	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
 }
 
 char *
 tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 
 	/* Is logging enabled? */
 	if (tcp_log_debug == 0)
 		return (NULL);
 
 	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
 }
 
 static char *
 tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 	char *s, *sp;
 	size_t size;
 	struct ip *ip;
 #ifdef INET6
 	const struct ip6_hdr *ip6;
 
 	ip6 = (const struct ip6_hdr *)ip6hdr;
 #endif /* INET6 */
 	ip = (struct ip *)ip4hdr;
 
 	/*
 	 * The log line looks like this:
 	 * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2<SYN>"
 	 */
 	size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") +
 	    sizeof(PRINT_TH_FLAGS) + 1 +
 #ifdef INET6
 	    2 * INET6_ADDRSTRLEN;
 #else
 	    2 * INET_ADDRSTRLEN;
 #endif /* INET6 */
 
 	s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT);
 	if (s == NULL)
 		return (NULL);
 
 	strcat(s, "TCP: [");
 	sp = s + strlen(s);
 
 	if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) {
 		inet_ntoa_r(inc->inc_faddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		inet_ntoa_r(inc->inc_laddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 #ifdef INET6
 	} else if (inc) {
 		ip6_sprintf(sp, &inc->inc6_faddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &inc->inc6_laddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 	} else if (ip6 && th) {
 		ip6_sprintf(sp, &ip6->ip6_src);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &ip6->ip6_dst);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET6 */
 #ifdef INET
 	} else if (ip && th) {
 		inet_ntoa_r(ip->ip_src, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		inet_ntoa_r(ip->ip_dst, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET */
 	} else {
 		free(s, M_TCPLOG);
 		return (NULL);
 	}
 	sp = s + strlen(s);
 	if (th)
 		sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
 	if (*(s + size - 1) != '\0')
 		panic("%s: string too long", __func__);
 	return (s);
 }
 
 /*
  * A subroutine which makes it easy to track TCP state changes with DTrace.
  * This function shouldn't be called for t_state initializations that don't
  * correspond to actual TCP state transitions.
  */
 void
 tcp_state_change(struct tcpcb *tp, int newstate)
 {
 #if defined(KDTRACE_HOOKS)
 	int pstate = tp->t_state;
 #endif
 
 	TCPSTATES_DEC(tp->t_state);
 	TCPSTATES_INC(newstate);
 	tp->t_state = newstate;
 	TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate);
 }
Index: user/alc/PQ_LAUNDRY/sys/netinet6/icmp6.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/netinet6/icmp6.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/netinet6/icmp6.c	(revision 303642)
@@ -1,2872 +1,2870 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define	MBUF_PRIVATE	/* XXXRW: Optimisation tries to avoid M_EXT mbufs */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/tcp_var.h>
 
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/send.h>
 
 extern struct domain inet6domain;
 
 VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat);
 VNET_PCPUSTAT_SYSINIT(icmp6stat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
 #endif /* VIMAGE */
 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 VNET_DECLARE(struct inpcbhead, ripcb);
 VNET_DECLARE(int, icmp6errppslim);
 static VNET_DEFINE(int, icmp6errpps_count) = 0;
 static VNET_DEFINE(struct timeval, icmp6errppslim_last);
 VNET_DECLARE(int, icmp6_nodeinfo);
 
 #define	V_ripcbinfo			VNET(ripcbinfo)
 #define	V_ripcb				VNET(ripcb)
 #define	V_icmp6errppslim		VNET(icmp6errppslim)
 #define	V_icmp6errpps_count		VNET(icmp6errpps_count)
 #define	V_icmp6errppslim_last		VNET(icmp6errppslim_last)
 #define	V_icmp6_nodeinfo		VNET(icmp6_nodeinfo)
 
 static void icmp6_errcount(int, int);
 static int icmp6_rip6_input(struct mbuf **, int);
 static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
 static const char *icmp6_redirect_diag(struct in6_addr *,
 	struct in6_addr *, struct in6_addr *);
 static struct mbuf *ni6_input(struct mbuf *, int);
 static struct mbuf *ni6_nametodns(const char *, int, int);
 static int ni6_dnsmatch(const char *, int, const char *, int);
 static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *,
 			  struct ifnet **, struct in6_addr *);
 static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
 				struct ifnet *, int);
 static int icmp6_notify_error(struct mbuf **, int, int, int);
 
 /*
  * Kernel module interface for updating icmp6stat.  The argument is an index
  * into icmp6stat treated as an array of u_quad_t.  While this encodes the
  * general layout of icmp6stat into the caller, it doesn't encode its
  * location, so that future changes to add, for example, per-CPU stats
  * support won't cause binary compatibility problems for kernel modules.
  */
 void
 kmod_icmp6stat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(icmp6stat)[statnum], 1);
 }
 
 static void
 icmp6_errcount(int type, int code)
 {
 	switch (type) {
 	case ICMP6_DST_UNREACH:
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 			ICMP6STAT_INC(icp6s_odst_unreach_noroute);
 			return;
 		case ICMP6_DST_UNREACH_ADMIN:
 			ICMP6STAT_INC(icp6s_odst_unreach_admin);
 			return;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			ICMP6STAT_INC(icp6s_odst_unreach_beyondscope);
 			return;
 		case ICMP6_DST_UNREACH_ADDR:
 			ICMP6STAT_INC(icp6s_odst_unreach_addr);
 			return;
 		case ICMP6_DST_UNREACH_NOPORT:
 			ICMP6STAT_INC(icp6s_odst_unreach_noport);
 			return;
 		}
 		break;
 	case ICMP6_PACKET_TOO_BIG:
 		ICMP6STAT_INC(icp6s_opacket_too_big);
 		return;
 	case ICMP6_TIME_EXCEEDED:
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			ICMP6STAT_INC(icp6s_otime_exceed_transit);
 			return;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			ICMP6STAT_INC(icp6s_otime_exceed_reassembly);
 			return;
 		}
 		break;
 	case ICMP6_PARAM_PROB:
 		switch (code) {
 		case ICMP6_PARAMPROB_HEADER:
 			ICMP6STAT_INC(icp6s_oparamprob_header);
 			return;
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			ICMP6STAT_INC(icp6s_oparamprob_nextheader);
 			return;
 		case ICMP6_PARAMPROB_OPTION:
 			ICMP6STAT_INC(icp6s_oparamprob_option);
 			return;
 		}
 		break;
 	case ND_REDIRECT:
 		ICMP6STAT_INC(icp6s_oredirect);
 		return;
 	}
 	ICMP6STAT_INC(icp6s_ounknown);
 }
 
 /*
  * A wrapper function for icmp6_error() necessary when the erroneous packet
  * may not contain enough scope zone information.
  */
 void
 icmp6_error2(struct mbuf *m, int type, int code, int param,
     struct ifnet *ifp)
 {
 	struct ip6_hdr *ip6;
 
 	if (ifp == NULL)
 		return;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
 #else
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			return;
 	}
 #endif
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
 		return;
 	if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
 		return;
 
 	icmp6_error(m, type, code, param);
 }
 
 /*
  * Generate an error packet of type error in response to bad IP6 packet.
  */
 void
 icmp6_error(struct mbuf *m, int type, int code, int param)
 {
 	struct ip6_hdr *oip6, *nip6;
 	struct icmp6_hdr *icmp6;
 	u_int preplen;
 	int off;
 	int nxt;
 
 	ICMP6STAT_INC(icp6s_error);
 
 	/* count per-type-code statistics */
 	icmp6_errcount(type, code);
 
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m->m_flags & M_DECRYPTED) {
 		ICMP6STAT_INC(icp6s_canterror);
 		goto freeit;
 	}
 #endif
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
 #else
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			return;
 	}
 #endif
 	oip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * If the destination address of the erroneous packet is a multicast
 	 * address, or the packet was sent using link-layer multicast,
 	 * we should basically suppress sending an error (RFC 2463, Section
 	 * 2.4).
 	 * We have two exceptions (the item e.2 in that section):
 	 * - the Packet Too Big message can be sent for path MTU discovery.
 	 * - the Parameter Problem Message that can be allowed an icmp6 error
 	 *   in the option type field.  This check has been done in
 	 *   ip6_unknown_opt(), so we can just check the type and code.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST) ||
 	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
 	    (type != ICMP6_PACKET_TOO_BIG &&
 	     (type != ICMP6_PARAM_PROB ||
 	      code != ICMP6_PARAMPROB_OPTION)))
 		goto freeit;
 
 	/*
 	 * RFC 2463, 2.4 (e.5): source address check.
 	 * XXX: the case of anycast source?
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
 	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
 		goto freeit;
 
 	/*
 	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
 	 * don't do it.
 	 */
 	nxt = -1;
 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
 	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
 		struct icmp6_hdr *icp;
 
 #ifndef PULLDOWN_TEST
 		IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), );
 		icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
 			sizeof(*icp));
 		if (icp == NULL) {
 			ICMP6STAT_INC(icp6s_tooshort);
 			return;
 		}
 #endif
 		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
 		    icp->icmp6_type == ND_REDIRECT) {
 			/*
 			 * ICMPv6 error
 			 * Special case: for redirect (which is
 			 * informational) we must not send icmp6 error.
 			 */
 			ICMP6STAT_INC(icp6s_canterror);
 			goto freeit;
 		} else {
 			/* ICMPv6 informational - send the error */
 		}
 	} else {
 		/* non-ICMPv6 - send the error */
 	}
 
 	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
 
 	/* Finally, do rate limitation check. */
 	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
 		ICMP6STAT_INC(icp6s_toofreq);
 		goto freeit;
 	}
 
 	/*
 	 * OK, ICMP6 can be generated.
 	 */
 
 	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
 		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
 
 	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 	M_PREPEND(m, preplen, M_NOWAIT);	/* FIB is also copied over. */
 	if (m == NULL) {
 		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
 		return;
 	}
 
 	nip6 = mtod(m, struct ip6_hdr *);
 	nip6->ip6_src  = oip6->ip6_src;
 	nip6->ip6_dst  = oip6->ip6_dst;
 
 	in6_clearscope(&oip6->ip6_src);
 	in6_clearscope(&oip6->ip6_dst);
 
 	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
 	icmp6->icmp6_type = type;
 	icmp6->icmp6_code = code;
 	icmp6->icmp6_pptr = htonl((u_int32_t)param);
 
 	/*
 	 * icmp6_reflect() is designed to be in the input path.
 	 * icmp6_error() can be called from both input and output path,
 	 * and if we are in output path rcvif could contain bogus value.
 	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
 	 * information in ip header (nip6).
 	 */
 	m->m_pkthdr.rcvif = NULL;
 
 	ICMP6STAT_INC(icp6s_outhist[type]);
 	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
 
 	return;
 
   freeit:
 	/*
 	 * If we can't tell whether or not we can generate ICMP6, free it.
 	 */
 	m_freem(m);
 }
 
 /*
  * Process a received ICMP6 message.
  */
 int
 icmp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp, *n;
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6, *nip6;
 	struct icmp6_hdr *icmp6, *nicmp6;
 	int off = *offp;
 	int icmp6len = m->m_pkthdr.len - *offp;
 	int code, sum, noff;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 	int ip6len, error;
 
 	ifp = m->m_pkthdr.rcvif;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
 	/* m might change if M_LOOP.  So, call mtod after this */
 #endif
 
 	/*
 	 * Locate icmp6 structure in mbuf, and check
 	 * that not corrupted and of at least minimum length
 	 */
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
 	if (icmp6len < sizeof(struct icmp6_hdr)) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		goto freeit;
 	}
 
 	/*
 	 * Check multicast group membership.
 	 * Note: SSM filters are not applied for ICMPv6 traffic.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		struct in6_multi	*inm;
 
 		inm = in6m_lookup(ifp, &ip6->ip6_dst);
 		if (inm == NULL) {
 			IP6STAT_INC(ip6s_notmember);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			goto freeit;
 		}
 	}
 
 	/*
 	 * calculate the checksum
 	 */
 #ifndef PULLDOWN_TEST
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
 	if (icmp6 == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return IPPROTO_DONE;
 	}
 #endif
 	code = icmp6->icmp6_code;
 
 	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
 		nd6log((LOG_ERR,
 		    "ICMP6 checksum error(%d|%x) %s\n",
 		    icmp6->icmp6_type, sum,
 		    ip6_sprintf(ip6bufs, &ip6->ip6_src)));
 		ICMP6STAT_INC(icp6s_checksum);
 		goto freeit;
 	}
 
 	ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]);
 	icmp6_ifstat_inc(ifp, ifs6_in_msg);
 	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
 		icmp6_ifstat_inc(ifp, ifs6_in_error);
 
 	switch (icmp6->icmp6_type) {
 	case ICMP6_DST_UNREACH:
 		icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
+		case ICMP6_DST_UNREACH_ADDR:	/* PRC_HOSTDEAD is a DOS */
 			code = PRC_UNREACH_NET;
 			break;
 		case ICMP6_DST_UNREACH_ADMIN:
 			icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
 			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
-			break;
-		case ICMP6_DST_UNREACH_ADDR:
-			code = PRC_HOSTDEAD;
 			break;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			/* I mean "source address was incorrect." */
 			code = PRC_PARAMPROB;
 			break;
 		case ICMP6_DST_UNREACH_NOPORT:
 			code = PRC_UNREACH_PORT;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PACKET_TOO_BIG:
 		icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig);
 
 		/* validation is made in icmp6_mtudisc_update */
 
 		code = PRC_MSGSIZE;
 
 		/*
 		 * Updating the path MTU will be done after examining
 		 * intermediate extension headers.
 		 */
 		goto deliver;
 		break;
 
 	case ICMP6_TIME_EXCEEDED:
 		icmp6_ifstat_inc(ifp, ifs6_in_timeexceed);
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			code = PRC_TIMXCEED_INTRANS;
 			break;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			code = PRC_TIMXCEED_REASS;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PARAM_PROB:
 		icmp6_ifstat_inc(ifp, ifs6_in_paramprob);
 		switch (code) {
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			code = PRC_UNREACH_PROTOCOL;
 			break;
 		case ICMP6_PARAMPROB_HEADER:
 		case ICMP6_PARAMPROB_OPTION:
 			code = PRC_PARAMPROB;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_ECHO_REQUEST:
 		icmp6_ifstat_inc(ifp, ifs6_in_echo);
 		if (code != 0)
 			goto badcode;
 		if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
 			/* Give up remote */
 			break;
 		}
 		if (!M_WRITABLE(n)
 		 || n->m_len < off + sizeof(struct icmp6_hdr)) {
 			struct mbuf *n0 = n;
 			int n0len;
 
 			CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN);
 			n = m_gethdr(M_NOWAIT, n0->m_type);
 			if (n == NULL) {
 				/* Give up remote */
 				m_freem(n0);
 				break;
 			}
 
 			m_move_pkthdr(n, n0);	/* FIB copied. */
 			n0len = n0->m_pkthdr.len;	/* save for use below */
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			noff = sizeof(struct ip6_hdr);
 			/* new mbuf contains only ipv6+icmpv6 headers */
 			n->m_len = noff + sizeof(struct icmp6_hdr);
 			/*
 			 * Adjust mbuf.  ip6_plen will be adjusted in
 			 * ip6_output().
 			 */
 			m_adj(n0, off + sizeof(struct icmp6_hdr));
 			/* recalculate complete packet size */
 			n->m_pkthdr.len = n0len + (noff - off);
 			n->m_next = n0;
 		} else {
 			nip6 = mtod(n, struct ip6_hdr *);
 			IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
 			    sizeof(*nicmp6));
 			noff = off;
 		}
 		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
 		nicmp6->icmp6_code = 0;
 		if (n) {
 			ICMP6STAT_INC(icp6s_reflect);
 			ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]);
 			icmp6_reflect(n, noff);
 		}
 		break;
 
 	case ICMP6_ECHO_REPLY:
 		icmp6_ifstat_inc(ifp, ifs6_in_echoreply);
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case MLD_LISTENER_QUERY:
 	case MLD_LISTENER_REPORT:
 	case MLD_LISTENER_DONE:
 	case MLDV2_LISTENER_REPORT:
 		/*
 		 * Drop MLD traffic which is not link-local, has a hop limit
 		 * of greater than 1 hop, or which does not have the
 		 * IPv6 HBH Router Alert option.
 		 * As IPv6 HBH options are stripped in ip6_input() we must
 		 * check an mbuf header flag.
 		 * XXX Should we also sanity check that these messages
 		 * were directed to a link-local multicast prefix?
 		 */
 		if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0)
 			goto freeit;
 		if (mld_input(m, off, icmp6len) != 0)
 			return (IPPROTO_DONE);
 		/* m stays. */
 		break;
 
 	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
 	    {
 		enum { WRU, FQDN } mode;
 
 		if (!V_icmp6_nodeinfo)
 			break;
 
 		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
 			mode = WRU;
 		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
 			mode = FQDN;
 		else
 			goto badlen;
 
 		if (mode == FQDN) {
 #ifndef PULLDOWN_TEST
 			IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
 			    IPPROTO_DONE);
 #endif
 			n = m_copy(m, 0, M_COPYALL);
 			if (n)
 				n = ni6_input(n, off);
 			/* XXX meaningless if n == NULL */
 			noff = sizeof(struct ip6_hdr);
 		} else {
 			struct prison *pr;
 			u_char *p;
 			int maxhlen, hlen;
 
 			/*
 			 * XXX: this combination of flags is pointless,
 			 * but should we keep this for compatibility?
 			 */
 			if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK |
 			    ICMP6_NODEINFO_TMPADDROK)) !=
 			    (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK))
 				break;
 
 			if (code != 0)
 				goto badcode;
 
 			CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN);
 			n = m_gethdr(M_NOWAIT, m->m_type);
 			if (n == NULL) {
 				/* Give up remote */
 				break;
 			}
 			if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
 				/*
 				 * Previous code did a blind M_COPY_PKTHDR
 				 * and said "just for rcvif".  If true, then
 				 * we could tolerate the dup failing (due to
 				 * the deep copy of the tag chain).  For now
 				 * be conservative and just fail.
 				 */
 				m_free(n);
 				n = NULL;
 			}
 			maxhlen = M_TRAILINGSPACE(n) -
 			    (sizeof(*nip6) + sizeof(*nicmp6) + 4);
 			pr = curthread->td_ucred->cr_prison;
 			mtx_lock(&pr->pr_mtx);
 			hlen = strlen(pr->pr_hostname);
 			if (maxhlen > hlen)
 				maxhlen = hlen;
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			p = (u_char *)(nicmp6 + 1);
 			bzero(p, 4);
 			/* meaningless TTL */
 			bcopy(pr->pr_hostname, p + 4, maxhlen);
 			mtx_unlock(&pr->pr_mtx);
 			noff = sizeof(struct ip6_hdr);
 			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 				sizeof(struct icmp6_hdr) + 4 + maxhlen;
 			nicmp6->icmp6_type = ICMP6_WRUREPLY;
 			nicmp6->icmp6_code = 0;
 		}
 		if (n) {
 			ICMP6STAT_INC(icp6s_reflect);
 			ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]);
 			icmp6_reflect(n, noff);
 		}
 		break;
 	    }
 
 	case ICMP6_WRUREPLY:
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case ND_ROUTER_SOLICIT:
 		icmp6_ifstat_inc(ifp, ifs6_in_routersolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_solicit))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			/* give up local */
 
 			/* Send incoming SeND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
 				IP6_EXTHDR_CHECK(m, off,
 				    icmp6len, IPPROTO_DONE);
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				/* -1 == no app on SEND socket */
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_rs_input(m, off, icmp6len);
 			} else
 				nd6_rs_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			IP6_EXTHDR_CHECK(n, off,
 			    icmp6len, IPPROTO_DONE);
                         error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			/* -1 == no app on SEND socket */
 			nd6_rs_input(n, off, icmp6len);
 		} else
 			nd6_rs_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_ROUTER_ADVERT:
 		icmp6_ifstat_inc(ifp, ifs6_in_routeradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_advert))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 
 			/* Send incoming SeND-protected/ND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_ra_input(m, off, icmp6len);
 			} else
 				nd6_ra_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			nd6_ra_input(n, off, icmp6len);
 		} else
 			nd6_ra_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_NEIGHBOR_SOLICIT:
 		icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_ns_input(m, off, icmp6len);
 			} else
 				nd6_ns_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			nd6_ns_input(n, off, icmp6len);
 		} else
 			nd6_ns_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_NEIGHBOR_ADVERT:
 		icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_advert))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 
 			/* Send incoming SeND-protected/ND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_na_input(m, off, icmp6len);
 			} else
 				nd6_na_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			nd6_na_input(n, off, icmp6len);
 		} else
 			nd6_na_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_REDIRECT:
 		icmp6_ifstat_inc(ifp, ifs6_in_redirect);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_redirect))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 		 		if (error == 0)
 					return (IPPROTO_DONE);
 			    icmp6_redirect_input(m, off);
 			} else
 				icmp6_redirect_input(m, off);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			icmp6_redirect_input(n, off);
 		} else
 			icmp6_redirect_input(n, off);
 		/* m stays. */
 		break;
 
 	case ICMP6_ROUTER_RENUMBERING:
 		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
 		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
 			goto badcode;
 		if (icmp6len < sizeof(struct icmp6_router_renum))
 			goto badlen;
 		break;
 
 	default:
 		nd6log((LOG_DEBUG,
 		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
 		    icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 		    ifp ? ifp->if_index : 0));
 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
 			/* ICMPv6 error: MUST deliver it by spec... */
 			code = PRC_NCMDS;
 			/* deliver */
 		} else {
 			/* ICMPv6 informational: MUST not deliver */
 			break;
 		}
 	deliver:
 		if (icmp6_notify_error(&m, off, icmp6len, code) != 0) {
 			/* In this case, m should've been freed. */
 			return (IPPROTO_DONE);
 		}
 		break;
 
 	badcode:
 		ICMP6STAT_INC(icp6s_badcode);
 		break;
 
 	badlen:
 		ICMP6STAT_INC(icp6s_badlen);
 		break;
 	}
 
 	/* deliver the packet to appropriate sockets */
 	icmp6_rip6_input(&m, *offp);
 
 	return IPPROTO_DONE;
 
  freeit:
 	m_freem(m);
 	return IPPROTO_DONE;
 }
 
 static int
 icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
 {
 	struct mbuf *m = *mp;
 	struct icmp6_hdr *icmp6;
 	struct ip6_hdr *eip6;
 	u_int32_t notifymtu;
 	struct sockaddr_in6 icmp6src, icmp6dst;
 
 	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		goto freeit;
 	}
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off,
 	    sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1);
 	icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
 	    sizeof(*icmp6) + sizeof(struct ip6_hdr));
 	if (icmp6 == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return (-1);
 	}
 #endif
 	eip6 = (struct ip6_hdr *)(icmp6 + 1);
 
 	/* Detect the upper level protocol */
 	{
 		void (*ctlfunc)(int, struct sockaddr *, void *);
 		u_int8_t nxt = eip6->ip6_nxt;
 		int eoff = off + sizeof(struct icmp6_hdr) +
 		    sizeof(struct ip6_hdr);
 		struct ip6ctlparam ip6cp;
 		struct in6_addr *finaldst = NULL;
 		int icmp6type = icmp6->icmp6_type;
 		struct ip6_frag *fh;
 		struct ip6_rthdr *rth;
 		struct ip6_rthdr0 *rth0;
 		int rthlen;
 
 		while (1) { /* XXX: should avoid infinite loop explicitly? */
 			struct ip6_ext *eh;
 
 			switch (nxt) {
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_AH:
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0,
 				    eoff + sizeof(struct ip6_ext), -1);
 				eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff);
 #else
 				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
 				    eoff, sizeof(*eh));
 				if (eh == NULL) {
 					ICMP6STAT_INC(icp6s_tooshort);
 					return (-1);
 				}
 #endif
 
 				if (nxt == IPPROTO_AH)
 					eoff += (eh->ip6e_len + 2) << 2;
 				else
 					eoff += (eh->ip6e_len + 1) << 3;
 				nxt = eh->ip6e_nxt;
 				break;
 			case IPPROTO_ROUTING:
 				/*
 				 * When the erroneous packet contains a
 				 * routing header, we should examine the
 				 * header to determine the final destination.
 				 * Otherwise, we can't properly update
 				 * information that depends on the final
 				 * destination (e.g. path MTU).
 				 */
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1);
 				rth = (struct ip6_rthdr *)
 				    (mtod(m, caddr_t) + eoff);
 #else
 				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
 				    eoff, sizeof(*rth));
 				if (rth == NULL) {
 					ICMP6STAT_INC(icp6s_tooshort);
 					return (-1);
 				}
 #endif
 				rthlen = (rth->ip6r_len + 1) << 3;
 				/*
 				 * XXX: currently there is no
 				 * officially defined type other
 				 * than type-0.
 				 * Note that if the segment left field
 				 * is 0, all intermediate hops must
 				 * have been passed.
 				 */
 				if (rth->ip6r_segleft &&
 				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
 					int hops;
 
 #ifndef PULLDOWN_TEST
 					IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1);
 					rth0 = (struct ip6_rthdr0 *)
 					    (mtod(m, caddr_t) + eoff);
 #else
 					IP6_EXTHDR_GET(rth0,
 					    struct ip6_rthdr0 *, m,
 					    eoff, rthlen);
 					if (rth0 == NULL) {
 						ICMP6STAT_INC(icp6s_tooshort);
 						return (-1);
 					}
 #endif
 					/* just ignore a bogus header */
 					if ((rth0->ip6r0_len % 2) == 0 &&
 					    (hops = rth0->ip6r0_len/2))
 						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
 				}
 				eoff += rthlen;
 				nxt = rth->ip6r_nxt;
 				break;
 			case IPPROTO_FRAGMENT:
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff +
 				    sizeof(struct ip6_frag), -1);
 				fh = (struct ip6_frag *)(mtod(m, caddr_t) +
 				    eoff);
 #else
 				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
 				    eoff, sizeof(*fh));
 				if (fh == NULL) {
 					ICMP6STAT_INC(icp6s_tooshort);
 					return (-1);
 				}
 #endif
 				/*
 				 * Data after a fragment header is meaningless
 				 * unless it is the first fragment, but
 				 * we'll go to the notify label for path MTU
 				 * discovery.
 				 */
 				if (fh->ip6f_offlg & IP6F_OFF_MASK)
 					goto notify;
 
 				eoff += sizeof(struct ip6_frag);
 				nxt = fh->ip6f_nxt;
 				break;
 			default:
 				/*
 				 * This case includes ESP and the No Next
 				 * Header.  In such cases going to the notify
 				 * label does not have any meaning
 				 * (i.e. ctlfunc will be NULL), but we go
 				 * anyway since we might have to update
 				 * path MTU information.
 				 */
 				goto notify;
 			}
 		}
 	  notify:
 #ifndef PULLDOWN_TEST
 		icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
 		    sizeof(*icmp6) + sizeof(struct ip6_hdr));
 		if (icmp6 == NULL) {
 			ICMP6STAT_INC(icp6s_tooshort);
 			return (-1);
 		}
 #endif
 
 		/*
 		 * retrieve parameters from the inner IPv6 header, and convert
 		 * them into sockaddr structures.
 		 * XXX: there is no guarantee that the source or destination
 		 * addresses of the inner packet are in the same scope as
 		 * the addresses of the icmp packet.  But there is no other
 		 * way to determine the zone.
 		 */
 		eip6 = (struct ip6_hdr *)(icmp6 + 1);
 
 		bzero(&icmp6dst, sizeof(icmp6dst));
 		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6dst.sin6_family = AF_INET6;
 		if (finaldst == NULL)
 			icmp6dst.sin6_addr = eip6->ip6_dst;
 		else
 			icmp6dst.sin6_addr = *finaldst;
 		if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL))
 			goto freeit;
 		bzero(&icmp6src, sizeof(icmp6src));
 		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6src.sin6_family = AF_INET6;
 		icmp6src.sin6_addr = eip6->ip6_src;
 		if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL))
 			goto freeit;
 		icmp6src.sin6_flowinfo =
 		    (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
 
 		if (finaldst == NULL)
 			finaldst = &eip6->ip6_dst;
 		ip6cp.ip6c_m = m;
 		ip6cp.ip6c_icmp6 = icmp6;
 		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
 		ip6cp.ip6c_off = eoff;
 		ip6cp.ip6c_finaldst = finaldst;
 		ip6cp.ip6c_src = &icmp6src;
 		ip6cp.ip6c_nxt = nxt;
 
 		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
 			notifymtu = ntohl(icmp6->icmp6_mtu);
 			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
 			icmp6_mtudisc_update(&ip6cp, 1);	/*XXX*/
 		}
 
 		ctlfunc = (void (*)(int, struct sockaddr *, void *))
 		    (inet6sw[ip6_protox[nxt]].pr_ctlinput);
 		if (ctlfunc) {
 			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
 			    &ip6cp);
 		}
 	}
 	*mp = m;
 	return (0);
 
   freeit:
 	m_freem(m);
 	return (-1);
 }
 
 void
 icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
 {
 	struct in6_addr *dst = ip6cp->ip6c_finaldst;
 	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
 	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
 	u_int mtu = ntohl(icmp6->icmp6_mtu);
 	struct in_conninfo inc;
 
 #if 0
 	/*
 	 * RFC2460 section 5, last paragraph.
 	 * even though minimum link MTU for IPv6 is IPV6_MMTU,
 	 * we may see ICMPv6 too big with mtu < IPV6_MMTU
 	 * due to packet translator in the middle.
 	 * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for
 	 * special handling.
 	 */
 	if (mtu < IPV6_MMTU)
 		return;
 #endif
 
 	/*
 	 * we reject ICMPv6 too big with abnormally small value.
 	 * XXX what is the good definition of "abnormally small"?
 	 */
 	if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
 		return;
 
 	if (!validated)
 		return;
 
 	/*
 	 * In case the suggested mtu is less than IPV6_MMTU, we
 	 * only need to remember that it was for above mentioned
 	 * "alwaysfrag" case.
 	 * Try to be as close to the spec as possible.
 	 */
 	if (mtu < IPV6_MMTU)
 		mtu = IPV6_MMTU - 8;
 
 	bzero(&inc, sizeof(inc));
 	inc.inc_fibnum = M_GETFIB(m);
 	inc.inc_flags |= INC_ISIPV6;
 	inc.inc6_faddr = *dst;
 	if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
 		return;
 
 	if (mtu < tcp_maxmtu6(&inc, NULL)) {
 		tcp_hc_updatemtu(&inc, mtu);
 		ICMP6STAT_INC(icp6s_pmtuchg);
 	}
 }
 
 /*
  * Process a Node Information Query packet, based on
  * draft-ietf-ipngwg-icmp-name-lookups-07.
  *
  * Spec incompatibilities:
  * - IPv6 Subject address handling
  * - IPv4 Subject address handling support missing
  * - Proxy reply (answer even if it's not for me)
  * - joins NI group address at in6_ifattach() time only, does not cope
  *   with hostname changes by sethostname(3)
  */
 static struct mbuf *
 ni6_input(struct mbuf *m, int off)
 {
 	struct icmp6_nodeinfo *ni6, *nni6;
 	struct mbuf *n = NULL;
 	struct prison *pr;
 	u_int16_t qtype;
 	int subjlen;
 	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 	struct ni_reply_fqdn *fqdn;
 	int addrs;		/* for NI_QTYPE_NODEADDR */
 	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
 	struct in6_addr in6_subj; /* subject address */
 	struct ip6_hdr *ip6;
 	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
 	char *subj = NULL;
 	struct in6_ifaddr *ia6 = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #ifndef PULLDOWN_TEST
 	ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
 	if (ni6 == NULL) {
 		/* m is already reclaimed */
 		return (NULL);
 	}
 #endif
 
 	/*
 	 * Validate IPv6 source address.
 	 * The default configuration MUST be to refuse answering queries from
 	 * global-scope addresses according to RFC4602.
 	 * Notes:
 	 *  - it's not very clear what "refuse" means; this implementation
 	 *    simply drops it.
 	 *  - it's not very easy to identify global-scope (unicast) addresses
 	 *    since there are many prefixes for them.  It should be safer
 	 *    and in practice sufficient to check "all" but loopback and
 	 *    link-local (note that site-local unicast was deprecated and
 	 *    ULA is defined as global scope-wise)
 	 */
 	if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 &&
 	    !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
 	    !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
 		goto bad;
 
 	/*
 	 * Validate IPv6 destination address.
 	 *
 	 * The Responder must discard the Query without further processing
 	 * unless it is one of the Responder's unicast or anycast addresses, or
 	 * a link-local scope multicast address which the Responder has joined.
 	 * [RFC4602, Section 5.]
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
 			goto bad;
 		/* else it's a link-local multicast, fine */
 	} else {		/* unicast or anycast */
 		ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 		if (ia6 == NULL)
 			goto bad; /* XXX impossible */
 
 		if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
 		    !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) {
 			ifa_free(&ia6->ia_ifa);
 			nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
 				"a temporary address in %s:%d",
 			       __FILE__, __LINE__));
 			goto bad;
 		}
 		ifa_free(&ia6->ia_ifa);
 	}
 
 	/* validate query Subject field. */
 	qtype = ntohs(ni6->ni_qtype);
 	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 	case NI_QTYPE_SUPTYPES:
 		/* 07 draft */
 		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
 			break;
 		/* FALLTHROUGH */
 	case NI_QTYPE_FQDN:
 	case NI_QTYPE_NODEADDR:
 	case NI_QTYPE_IPV4ADDR:
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 #if ICMP6_NI_SUBJ_IPV6 != 0
 		case 0:
 #endif
 			/*
 			 * backward compatibility - try to accept 03 draft
 			 * format, where no Subject is present.
 			 */
 			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
 			    subjlen == 0) {
 				oldfqdn++;
 				break;
 			}
 #if ICMP6_NI_SUBJ_IPV6 != 0
 			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
 				goto bad;
 #endif
 
 			if (subjlen != sizeof(struct in6_addr))
 				goto bad;
 
 			/*
 			 * Validate Subject address.
 			 *
 			 * Not sure what exactly "address belongs to the node"
 			 * means in the spec, is it just unicast, or what?
 			 *
 			 * At this moment we consider Subject address as
 			 * "belong to the node" if the Subject address equals
 			 * to the IPv6 destination address; validation for
 			 * IPv6 destination address should have done enough
 			 * check for us.
 			 *
 			 * We do not do proxy at this moment.
 			 */
 			/* m_pulldown instead of copy? */
 			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
 			    subjlen, (caddr_t)&in6_subj);
 			if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL))
 				goto bad;
 
 			subj = (char *)&in6_subj;
 			if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
 				break;
 
 			/*
 			 * XXX if we are to allow other cases, we should really
 			 * be careful about scope here.
 			 * basically, we should disallow queries toward IPv6
 			 * destination X with subject Y,
 			 * if scope(X) > scope(Y).
 			 * if we allow scope(X) > scope(Y), it will result in
 			 * information leakage across scope boundary.
 			 */
 			goto bad;
 
 		case ICMP6_NI_SUBJ_FQDN:
 			/*
 			 * Validate Subject name with gethostname(3).
 			 *
 			 * The behavior may need some debate, since:
 			 * - we are not sure if the node has FQDN as
 			 *   hostname (returned by gethostname(3)).
 			 * - the code does wildcard match for truncated names.
 			 *   however, we are not sure if we want to perform
 			 *   wildcard match, if gethostname(3) side has
 			 *   truncated hostname.
 			 */
 			pr = curthread->td_ucred->cr_prison;
 			mtx_lock(&pr->pr_mtx);
 			n = ni6_nametodns(pr->pr_hostname,
 			    strlen(pr->pr_hostname), 0);
 			mtx_unlock(&pr->pr_mtx);
 			if (!n || n->m_next || n->m_len == 0)
 				goto bad;
 			IP6_EXTHDR_GET(subj, char *, m,
 			    off + sizeof(struct icmp6_nodeinfo), subjlen);
 			if (subj == NULL)
 				goto bad;
 			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
 			    n->m_len)) {
 				goto bad;
 			}
 			m_freem(n);
 			n = NULL;
 			break;
 
 		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
 		default:
 			goto bad;
 		}
 		break;
 	}
 
 	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
 	switch (qtype) {
 	case NI_QTYPE_FQDN:
 		if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0)
 			goto bad;
 		break;
 	case NI_QTYPE_NODEADDR:
 	case NI_QTYPE_IPV4ADDR:
 		if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0)
 			goto bad;
 		break;
 	}
 
 	/* guess reply length */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		break;		/* no reply data */
 	case NI_QTYPE_SUPTYPES:
 		replylen += sizeof(u_int32_t);
 		break;
 	case NI_QTYPE_FQDN:
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		break;
 	case NI_QTYPE_NODEADDR:
 		addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj);
 		if ((replylen += addrs * (sizeof(struct in6_addr) +
 		    sizeof(u_int32_t))) > MCLBYTES)
 			replylen = MCLBYTES; /* XXX: will truncate pkt later */
 		break;
 	case NI_QTYPE_IPV4ADDR:
 		/* unsupported - should respond with unknown Qtype? */
 		break;
 	default:
 		/*
 		 * XXX: We must return a reply with the ICMP6 code
 		 * `unknown Qtype' in this case.  However we regard the case
 		 * as an FQDN query for backward compatibility.
 		 * Older versions set a random value to this field,
 		 * so it rarely varies in the defined qtypes.
 		 * But the mechanism is not reliable...
 		 * maybe we should obsolete older versions.
 		 */
 		qtype = NI_QTYPE_FQDN;
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		oldfqdn++;
 		break;
 	}
 
 	/* Allocate an mbuf to reply. */
 	if (replylen > MCLBYTES) {
 		/*
 		 * XXX: should we try to allocate more? But MCLBYTES
 		 * is probably much larger than IPV6_MMTU...
 		 */
 		goto bad;
 	}
 	if (replylen > MHLEN)
 		n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR);
 	else
 		n = m_gethdr(M_NOWAIT, m->m_type);
 	if (n == NULL) {
 		m_freem(m);
 		return (NULL);
 	}
 	m_move_pkthdr(n, m); /* just for recvif and FIB */
 	n->m_pkthdr.len = n->m_len = replylen;
 
 	/* copy mbuf header and IPv6 + Node Information base headers */
 	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
 	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
 	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
 
 	/* qtype dependent procedure */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = 0;
 		break;
 	case NI_QTYPE_SUPTYPES:
 	{
 		u_int32_t v;
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
 		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
 		v = (u_int32_t)htonl(0x0000000f);
 		bcopy(&v, nni6 + 1, sizeof(u_int32_t));
 		break;
 	}
 	case NI_QTYPE_FQDN:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
 		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
 		nni6->ni_flags = 0; /* XXX: meaningless TTL */
 		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
 		/*
 		 * XXX do we really have FQDN in hostname?
 		 */
 		pr = curthread->td_ucred->cr_prison;
 		mtx_lock(&pr->pr_mtx);
 		n->m_next = ni6_nametodns(pr->pr_hostname,
 		    strlen(pr->pr_hostname), oldfqdn);
 		mtx_unlock(&pr->pr_mtx);
 		if (n->m_next == NULL)
 			goto bad;
 		/* XXX we assume that n->m_next is not a chain */
 		if (n->m_next->m_next != NULL)
 			goto bad;
 		n->m_pkthdr.len += n->m_next->m_len;
 		break;
 	case NI_QTYPE_NODEADDR:
 	{
 		int lenlim, copied;
 
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		n->m_pkthdr.len = n->m_len =
 		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 		lenlim = M_TRAILINGSPACE(n);
 		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
 		/* XXX: reset mbuf length */
 		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 		    sizeof(struct icmp6_nodeinfo) + copied;
 		break;
 	}
 	default:
 		break;		/* XXX impossible! */
 	}
 
 	nni6->ni_type = ICMP6_NI_REPLY;
 	m_freem(m);
 	return (n);
 
   bad:
 	m_freem(m);
 	if (n)
 		m_freem(n);
 	return (NULL);
 }
 
 /*
  * make a mbuf with DNS-encoded string.  no compression support.
  *
  * XXX names with less than 2 dots (like "foo" or "foo.section") will be
  * treated as truncated name (two \0 at the end).  this is a wild guess.
  *
  * old - return pascal string if non-zero
  */
 static struct mbuf *
 ni6_nametodns(const char *name, int namelen, int old)
 {
 	struct mbuf *m;
 	char *cp, *ep;
 	const char *p, *q;
 	int i, len, nterm;
 
 	if (old)
 		len = namelen + 1;
 	else
 		len = MCLBYTES;
 
 	/* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */
 	if (len > MLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, 0);
 	else
 		m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		goto fail;
 
 	if (old) {
 		m->m_len = len;
 		*mtod(m, char *) = namelen;
 		bcopy(name, mtod(m, char *) + 1, namelen);
 		return m;
 	} else {
 		m->m_len = 0;
 		cp = mtod(m, char *);
 		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
 
 		/* if not certain about my name, return empty buffer */
 		if (namelen == 0)
 			return m;
 
 		/*
 		 * guess if it looks like shortened hostname, or FQDN.
 		 * shortened hostname needs two trailing "\0".
 		 */
 		i = 0;
 		for (p = name; p < name + namelen; p++) {
 			if (*p && *p == '.')
 				i++;
 		}
 		if (i < 2)
 			nterm = 2;
 		else
 			nterm = 1;
 
 		p = name;
 		while (cp < ep && p < name + namelen) {
 			i = 0;
 			for (q = p; q < name + namelen && *q && *q != '.'; q++)
 				i++;
 			/* result does not fit into mbuf */
 			if (cp + i + 1 >= ep)
 				goto fail;
 			/*
 			 * DNS label length restriction, RFC1035 page 8.
 			 * "i == 0" case is included here to avoid returning
 			 * 0-length label on "foo..bar".
 			 */
 			if (i <= 0 || i >= 64)
 				goto fail;
 			*cp++ = i;
 			bcopy(p, cp, i);
 			cp += i;
 			p = q;
 			if (p < name + namelen && *p == '.')
 				p++;
 		}
 		/* termination */
 		if (cp + nterm >= ep)
 			goto fail;
 		while (nterm-- > 0)
 			*cp++ = '\0';
 		m->m_len = cp - mtod(m, char *);
 		return m;
 	}
 
 	panic("should not reach here");
 	/* NOTREACHED */
 
  fail:
 	if (m)
 		m_freem(m);
 	return NULL;
 }
 
 /*
  * check if two DNS-encoded string matches.  takes care of truncated
  * form (with \0\0 at the end).  no compression support.
  * XXX upper/lowercase match (see RFC2065)
  */
 static int
 ni6_dnsmatch(const char *a, int alen, const char *b, int blen)
 {
 	const char *a0, *b0;
 	int l;
 
 	/* simplest case - need validation? */
 	if (alen == blen && bcmp(a, b, alen) == 0)
 		return 1;
 
 	a0 = a;
 	b0 = b;
 
 	/* termination is mandatory */
 	if (alen < 2 || blen < 2)
 		return 0;
 	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
 		return 0;
 	alen--;
 	blen--;
 
 	while (a - a0 < alen && b - b0 < blen) {
 		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
 			return 0;
 
 		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
 			return 0;
 		/* we don't support compression yet */
 		if (a[0] >= 64 || b[0] >= 64)
 			return 0;
 
 		/* truncated case */
 		if (a[0] == 0 && a - a0 == alen - 1)
 			return 1;
 		if (b[0] == 0 && b - b0 == blen - 1)
 			return 1;
 		if (a[0] == 0 || b[0] == 0)
 			return 0;
 
 		if (a[0] != b[0])
 			return 0;
 		l = a[0];
 		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
 			return 0;
 		if (bcmp(a + 1, b + 1, l) != 0)
 			return 0;
 
 		a += 1 + l;
 		b += 1 + l;
 	}
 
 	if (a - a0 == alen && b - b0 == blen)
 		return 1;
 	else
 		return 0;
 }
 
 /*
  * calculate the number of addresses to be returned in the node info reply.
  */
 static int
 ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
     struct in6_addr *subj)
 {
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	int addrs = 0, addrsofif, iffound = 0;
 	int niflags = ni6->ni_flags;
 
 	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 			if (subj == NULL) /* must be impossible... */
 				return (0);
 			break;
 		default:
 			/*
 			 * XXX: we only support IPv6 subject address for
 			 * this Qtype.
 			 */
 			return (0);
 		}
 	}
 
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		addrsofif = 0;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
 			    IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr))
 				iffound = 1;
 
 			/*
 			 * IPv4-mapped addresses can only be returned by a
 			 * Node Information proxy, since they represent
 			 * addresses of IPv4-only nodes, which perforce do
 			 * not implement this protocol.
 			 * [icmp-name-lookups-07, Section 5.4]
 			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
 			 * this function at this moment.
 			 */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue; /* we need only unicast addresses */
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
 				continue;
 			}
 			addrsofif++; /* count the address */
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		if (iffound) {
 			*ifpp = ifp;
 			IFNET_RUNLOCK_NOSLEEP();
 			return (addrsofif);
 		}
 
 		addrs += addrsofif;
 	}
 	IFNET_RUNLOCK_NOSLEEP();
 
 	return (addrs);
 }
 
 static int
 ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
     struct ifnet *ifp0, int resid)
 {
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	struct ifnet *ifp_dep = NULL;
 	int copied = 0, allow_deprecated = 0;
 	u_char *cp = (u_char *)(nni6 + 1);
 	int niflags = ni6->ni_flags;
 	u_int32_t ltime;
 
 	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
 		return (0);	/* needless to copy */
 
 	IFNET_RLOCK_NOSLEEP();
 	ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet);
   again:
 
 	for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) {
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
 			    allow_deprecated == 0) {
 				/*
 				 * prefererred address should be put before
 				 * deprecated addresses.
 				 */
 
 				/* record the interface for later search */
 				if (ifp_dep == NULL)
 					ifp_dep = ifp;
 
 				continue;
 			} else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
 			    allow_deprecated != 0)
 				continue; /* we now collect deprecated addrs */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue;
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
 				continue;
 			}
 
 			/* now we can copy the address */
 			if (resid < sizeof(struct in6_addr) +
 			    sizeof(u_int32_t)) {
 				IF_ADDR_RUNLOCK(ifp);
 				/*
 				 * We give up much more copy.
 				 * Set the truncate flag and return.
 				 */
 				nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
 				IFNET_RUNLOCK_NOSLEEP();
 				return (copied);
 			}
 
 			/*
 			 * Set the TTL of the address.
 			 * The TTL value should be one of the following
 			 * according to the specification:
 			 *
 			 * 1. The remaining lifetime of a DHCP lease on the
 			 *    address, or
 			 * 2. The remaining Valid Lifetime of a prefix from
 			 *    which the address was derived through Stateless
 			 *    Autoconfiguration.
 			 *
 			 * Note that we currently do not support stateful
 			 * address configuration by DHCPv6, so the former
 			 * case can't happen.
 			 */
 			if (ifa6->ia6_lifetime.ia6t_expire == 0)
 				ltime = ND6_INFINITE_LIFETIME;
 			else {
 				if (ifa6->ia6_lifetime.ia6t_expire >
 				    time_uptime)
 					ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime);
 				else
 					ltime = 0;
 			}
 
 			bcopy(&ltime, cp, sizeof(u_int32_t));
 			cp += sizeof(u_int32_t);
 
 			/* copy the address itself */
 			bcopy(&ifa6->ia_addr.sin6_addr, cp,
 			    sizeof(struct in6_addr));
 			in6_clearscope((struct in6_addr *)cp); /* XXX */
 			cp += sizeof(struct in6_addr);
 
 			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
 			copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		if (ifp0)	/* we need search only on the specified IF */
 			break;
 	}
 
 	if (allow_deprecated == 0 && ifp_dep != NULL) {
 		ifp = ifp_dep;
 		allow_deprecated = 1;
 
 		goto again;
 	}
 
 	IFNET_RUNLOCK_NOSLEEP();
 
 	return (copied);
 }
 
 /*
  * XXX almost dup'ed code with rip6_input.
  */
 static int
 icmp6_rip6_input(struct mbuf **mp, int off)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct inpcb *in6p;
 	struct inpcb *last = NULL;
 	struct sockaddr_in6 fromsa;
 	struct icmp6_hdr *icmp6;
 	struct mbuf *opts = NULL;
 
 #ifndef PULLDOWN_TEST
 	/* this is assumed to be safe. */
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
 	if (icmp6 == NULL) {
 		/* m is already reclaimed */
 		return (IPPROTO_DONE);
 	}
 #endif
 
 	/*
 	 * XXX: the address may have embedded scope zone ID, which should be
 	 * hidden from applications.
 	 */
 	bzero(&fromsa, sizeof(fromsa));
 	fromsa.sin6_family = AF_INET6;
 	fromsa.sin6_len = sizeof(struct sockaddr_in6);
 	fromsa.sin6_addr = ip6->ip6_src;
 	if (sa6_recoverscope(&fromsa)) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	LIST_FOREACH(in6p, &V_ripcb, inp_list) {
 		if ((in6p->inp_vflag & INP_IPV6) == 0)
 			continue;
 		if (in6p->inp_ip_p != IPPROTO_ICMPV6)
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
 			continue;
 		INP_RLOCK(in6p);
 		if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
 		    in6p->in6p_icmp6filt)) {
 			INP_RUNLOCK(in6p);
 			continue;
 		}
 		if (last != NULL) {
 			struct	mbuf *n = NULL;
 
 			/*
 			 * Recent network drivers tend to allocate a single
 			 * mbuf cluster, rather than to make a couple of
 			 * mbufs without clusters.  Also, since the IPv6 code
 			 * path tries to avoid m_pullup(), it is highly
 			 * probable that we still have an mbuf cluster here
 			 * even though the necessary length can be stored in an
 			 * mbuf's internal buffer.
 			 * Meanwhile, the default size of the receive socket
 			 * buffer for raw sockets is not so large.  This means
 			 * the possibility of packet loss is relatively higher
 			 * than before.  To avoid this scenario, we copy the
 			 * received data to a separate mbuf that does not use
 			 * a cluster, if possible.
 			 * XXX: it is better to copy the data after stripping
 			 * intermediate headers.
 			 */
 			if ((m->m_flags & M_EXT) && m->m_next == NULL &&
 			    m->m_len <= MHLEN) {
 				n = m_get(M_NOWAIT, m->m_type);
 				if (n != NULL) {
 					if (m_dup_pkthdr(n, m, M_NOWAIT)) {
 						bcopy(m->m_data, n->m_data,
 						      m->m_len);
 						n->m_len = m->m_len;
 					} else {
 						m_free(n);
 						n = NULL;
 					}
 				}
 			}
 			if (n != NULL ||
 			    (n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
 				if (last->inp_flags & INP_CONTROLOPTS)
 					ip6_savecontrol(last, n, &opts);
 				/* strip intermediate headers */
 				m_adj(n, off);
 				SOCKBUF_LOCK(&last->inp_socket->so_rcv);
 				if (sbappendaddr_locked(
 				    &last->inp_socket->so_rcv,
 				    (struct sockaddr *)&fromsa, n, opts)
 				    == 0) {
 					/* should notify about lost packet */
 					m_freem(n);
 					if (opts) {
 						m_freem(opts);
 					}
 					SOCKBUF_UNLOCK(
 					    &last->inp_socket->so_rcv);
 				} else
 					sorwakeup_locked(last->inp_socket);
 				opts = NULL;
 			}
 			INP_RUNLOCK(last);
 		}
 		last = in6p;
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 	if (last != NULL) {
 		if (last->inp_flags & INP_CONTROLOPTS)
 			ip6_savecontrol(last, m, &opts);
 		/* strip intermediate headers */
 		m_adj(m, off);
 
 		/* avoid using mbuf clusters if possible (see above) */
 		if ((m->m_flags & M_EXT) && m->m_next == NULL &&
 		    m->m_len <= MHLEN) {
 			struct mbuf *n;
 
 			n = m_get(M_NOWAIT, m->m_type);
 			if (n != NULL) {
 				if (m_dup_pkthdr(n, m, M_NOWAIT)) {
 					bcopy(m->m_data, n->m_data, m->m_len);
 					n->m_len = m->m_len;
 
 					m_freem(m);
 					m = n;
 				} else {
 					m_freem(n);
 					n = NULL;
 				}
 			}
 		}
 		SOCKBUF_LOCK(&last->inp_socket->so_rcv);
 		if (sbappendaddr_locked(&last->inp_socket->so_rcv,
 		    (struct sockaddr *)&fromsa, m, opts) == 0) {
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
 			SOCKBUF_UNLOCK(&last->inp_socket->so_rcv);
 		} else
 			sorwakeup_locked(last->inp_socket);
 		INP_RUNLOCK(last);
 	} else {
 		m_freem(m);
 		IP6STAT_DEC(ip6s_delivered);
 	}
 	return IPPROTO_DONE;
 }
 
 /*
  * Reflect the ip6 packet back to the source.
  * OFF points to the icmp6 header, counted from the top of the mbuf.
  */
 void
 icmp6_reflect(struct mbuf *m, size_t off)
 {
 	struct in6_addr src6, *srcp;
 	struct ip6_hdr *ip6;
 	struct icmp6_hdr *icmp6;
 	struct in6_ifaddr *ia = NULL;
 	struct ifnet *outif = NULL;
 	int plen;
 	int type, code, hlim;
 
 	/* too short to reflect */
 	if (off < sizeof(struct ip6_hdr)) {
 		nd6log((LOG_DEBUG,
 		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
 		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
 		    __FILE__, __LINE__));
 		goto bad;
 	}
 
 	/*
 	 * If there are extra headers between IPv6 and ICMPv6, strip
 	 * off that header first.
 	 */
 #ifdef DIAGNOSTIC
 	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
 		panic("assumption failed in icmp6_reflect");
 #endif
 	if (off > sizeof(struct ip6_hdr)) {
 		size_t l;
 		struct ip6_hdr nip6;
 
 		l = off - sizeof(struct ip6_hdr);
 		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
 		m_adj(m, l);
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
 	} else /* off == sizeof(struct ip6_hdr) */ {
 		size_t l;
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 	}
 	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
 	type = icmp6->icmp6_type; /* keep type for statistics */
 	code = icmp6->icmp6_code; /* ditto. */
 	hlim = 0;
 	srcp = NULL;
 
 	/*
 	 * If the incoming packet was addressed directly to us (i.e. unicast),
 	 * use dst as the src for the reply.
 	 * The IN6_IFF_NOTREADY case should be VERY rare, but is possible
 	 * (for example) when we encounter an error while forwarding procedure
 	 * destined to a duplicated address of ours.
 	 */
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 		if (ia != NULL && !(ia->ia6_flags &
 		    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
 			src6 = ia->ia_addr.sin6_addr;
 			srcp = &src6;
 
 			if (m->m_pkthdr.rcvif != NULL) {
 				/* XXX: This may not be the outgoing interface */
 				hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
 			} else
 				hlim = V_ip6_defhlim;
 		}
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 	}
 
 	if (srcp == NULL) {
 		int error;
 		struct in6_addr dst6;
 		uint32_t scopeid;
 
 		/*
 		 * This case matches to multicasts, our anycast, or unicasts
 		 * that we do not own.  Select a source address based on the
 		 * source address of the erroneous packet.
 		 */
 		in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid);
 		error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
 		    scopeid, NULL, &src6, &hlim);
 
 		if (error) {
 			char ip6buf[INET6_ADDRSTRLEN];
 			nd6log((LOG_DEBUG,
 			    "icmp6_reflect: source can't be determined: "
 			    "dst=%s, error=%d\n",
 			    ip6_sprintf(ip6buf, &ip6->ip6_dst), error));
 			goto bad;
 		}
 		srcp = &src6;
 	}
 	/*
 	 * ip6_input() drops a packet if its src is multicast.
 	 * So, the src is never multicast.
 	 */
 	ip6->ip6_dst = ip6->ip6_src;
 	ip6->ip6_src = *srcp;
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = hlim;
 
 	icmp6->icmp6_cksum = 0;
 	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 	    sizeof(struct ip6_hdr), plen);
 
 	/*
 	 * XXX option handling
 	 */
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif)
 		icmp6_ifoutstat_inc(outif, type, code);
 
 	return;
 
  bad:
 	m_freem(m);
 	return;
 }
 
 void
 icmp6_fasttimo(void)
 {
 
 	mld_fasttimo();
 }
 
 void
 icmp6_slowtimo(void)
 {
 
 	mld_slowtimo();
 }
 
 static const char *
 icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6,
     struct in6_addr *tgt6)
 {
 	static char buf[1024];
 	char ip6bufs[INET6_ADDRSTRLEN];
 	char ip6bufd[INET6_ADDRSTRLEN];
 	char ip6buft[INET6_ADDRSTRLEN];
 	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
 	    ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6),
 	    ip6_sprintf(ip6buft, tgt6));
 	return buf;
 }
 
 void
 icmp6_redirect_input(struct mbuf *m, int off)
 {
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_redirect *nd_rd;
 	int icmp6len = ntohs(ip6->ip6_plen);
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	int is_router;
 	int is_onlink;
 	struct in6_addr src6 = ip6->ip6_src;
 	struct in6_addr redtgt6;
 	struct in6_addr reddst6;
 	union nd_opts ndopts;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	M_ASSERTPKTHDR(m);
 	KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__));
 
 	ifp = m->m_pkthdr.rcvif;
 
 	/* XXX if we are router, we don't update route by icmp6 redirect */
 	if (V_ip6_forwarding)
 		goto freeit;
 	if (!V_icmp6_rediraccept)
 		goto freeit;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
 	if (nd_rd == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return;
 	}
 #endif
 	redtgt6 = nd_rd->nd_rd_target;
 	reddst6 = nd_rd->nd_rd_dst;
 
 	if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) ||
 	    in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) {
 		goto freeit;
 	}
 
 	/* validation */
 	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect sent from %s rejected; "
 		    "must be from linklocal\n",
 		    ip6_sprintf(ip6buf, &src6)));
 		goto bad;
 	}
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect sent from %s rejected; "
 		    "hlim=%d (must be 255)\n",
 		    ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim));
 		goto bad;
 	}
     {
 	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
 	struct nhop6_basic nh6;
 	struct in6_addr kdst;
 	uint32_t scopeid;
 
 	in6_splitscope(&reddst6, &kdst, &scopeid);
 	if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &kdst, scopeid, 0, 0,&nh6)==0){
 		if ((nh6.nh_flags & NHF_GATEWAY) == 0) {
 			nd6log((LOG_ERR,
 			    "ICMP6 redirect rejected; no route "
 			    "with inet6 gateway found for redirect dst: %s\n",
 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 			goto bad;
 		}
 
 		if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) {
 			nd6log((LOG_ERR,
 			    "ICMP6 redirect rejected; "
 			    "not equal to gw-for-src=%s (must be same): "
 			    "%s\n",
 			    ip6_sprintf(ip6buf, &nh6.nh_addr),
 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 			goto bad;
 		}
 	} else {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "no route found for redirect dst: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
     }
 	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "redirect dst must be unicast: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	is_router = is_onlink = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
 		is_router = 1;	/* router case */
 	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
 		is_onlink = 1;	/* on-link destination case */
 	if (!is_router && !is_onlink) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "neither router case nor onlink case: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	icmp6len -= sizeof(*nd_rd);
 	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n",
 		    __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_tgt_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s "
 		    "(if %d, icmp6 packet %d): %s\n",
 		    __func__, ip6_sprintf(ip6buf, &redtgt6),
 		    ifp->if_addrlen, lladdrlen - 2,
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	/* Validation passed. */
 
 	/* RFC 2461 8.3 */
 	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
 	    is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
 
 	/*
 	 * Install a gateway route in the better-router case or an interface
 	 * route in the on-link-destination case.
 	 */
 	{
 		struct sockaddr_in6 sdst;
 		struct sockaddr_in6 sgw;
 		struct sockaddr_in6 ssrc;
 		struct sockaddr *gw;
 		int rt_flags;
 		u_int fibnum;
 
 		bzero(&sdst, sizeof(sdst));
 		bzero(&ssrc, sizeof(ssrc));
 		sdst.sin6_family = ssrc.sin6_family = AF_INET6;
 		sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6);
 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
 		rt_flags = RTF_HOST;
 		if (is_router) {
 			bzero(&sgw, sizeof(sgw));
 			sgw.sin6_family = AF_INET6;
 			sgw.sin6_len = sizeof(struct sockaddr_in6);
 			bcopy(&redtgt6, &sgw.sin6_addr,
 				sizeof(struct in6_addr));
 			gw = (struct sockaddr *)&sgw;
 			rt_flags |= RTF_GATEWAY;
 		} else
 			gw = ifp->if_addr->ifa_addr;
 		for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
 			in6_rtredirect((struct sockaddr *)&sdst, gw,
 			    (struct sockaddr *)NULL, rt_flags,
 			    (struct sockaddr *)&ssrc, fibnum);
 	}
 	/* finally update cached route in each socket via pfctlinput */
     {
 	struct sockaddr_in6 sdst;
 
 	bzero(&sdst, sizeof(sdst));
 	sdst.sin6_family = AF_INET6;
 	sdst.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 	pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
     }
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	ICMP6STAT_INC(icp6s_badredirect);
 	m_freem(m);
 }
 
 void
 icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
 {
 	struct ifnet *ifp;	/* my outgoing interface */
 	struct in6_addr *ifp_ll6;
 	struct in6_addr *router_ll6;
 	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
 	struct mbuf *m = NULL;	/* newly allocated one */
 	struct m_tag *mtag;
 	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
 	struct nd_redirect *nd_rd;
 	struct llentry *ln = NULL;
 	size_t maxlen;
 	u_char *p;
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 src_sa;
 
 	icmp6_errcount(ND_REDIRECT, 0);
 
 	/* if we are not router, we don't send icmp6 redirect */
 	if (!V_ip6_forwarding)
 		goto fail;
 
 	/* sanity check */
 	if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
 		goto fail;
 
 	/*
 	 * Address check:
 	 *  the source address must identify a neighbor, and
 	 *  the destination address must not be a multicast address
 	 *  [RFC 2461, sec 8.2]
 	 */
 	sip6 = mtod(m0, struct ip6_hdr *);
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = sip6->ip6_src;
 	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
 		goto fail;
 	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
 		goto fail;	/* what should we do here? */
 
 	/* rate limit */
 	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
 		goto fail;
 
 	/*
 	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
 	 * we almost always ask for an mbuf cluster for simplicity.
 	 * (MHLEN < IPV6_MMTU is almost always true)
 	 */
 #if IPV6_MMTU >= MCLBYTES
 # error assumption failed about IPV6_MMTU and MCLBYTES
 #endif
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		goto fail;
 	M_SETFIB(m, rt->rt_fibnum);
 	maxlen = M_TRAILINGSPACE(m);
 	maxlen = min(IPV6_MMTU, maxlen);
 	/* just for safety */
 	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
 	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
 		goto fail;
 	}
 
 	{
 		/* get ip6 linklocal address for ifp(my outgoing interface). */
 		struct in6_ifaddr *ia;
 		if ((ia = in6ifa_ifpforlinklocal(ifp,
 						 IN6_IFF_NOTREADY|
 						 IN6_IFF_ANYCAST)) == NULL)
 			goto fail;
 		ifp_ll6 = &ia->ia_addr.sin6_addr;
 		/* XXXRW: reference released prematurely. */
 		ifa_free(&ia->ia_ifa);
 	}
 
 	/* get ip6 linklocal address for the router. */
 	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
 		struct sockaddr_in6 *sin6;
 		sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
 		router_ll6 = &sin6->sin6_addr;
 		if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
 			router_ll6 = (struct in6_addr *)NULL;
 	} else
 		router_ll6 = (struct in6_addr *)NULL;
 
 	/* ip6 */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	/* ip6->ip6_plen will be set later */
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = 255;
 	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
 	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
 	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
 
 	/* ND Redirect */
 	nd_rd = (struct nd_redirect *)(ip6 + 1);
 	nd_rd->nd_rd_type = ND_REDIRECT;
 	nd_rd->nd_rd_code = 0;
 	nd_rd->nd_rd_reserved = 0;
 	if (rt->rt_flags & RTF_GATEWAY) {
 		/*
 		 * nd_rd->nd_rd_target must be a link-local address in
 		 * better router cases.
 		 */
 		if (!router_ll6)
 			goto fail;
 		bcopy(router_ll6, &nd_rd->nd_rd_target,
 		    sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		    sizeof(nd_rd->nd_rd_dst));
 	} else {
 		/* make sure redtgt == reddst */
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
 		    sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		    sizeof(nd_rd->nd_rd_dst));
 	}
 
 	p = (u_char *)(nd_rd + 1);
 
 	if (!router_ll6)
 		goto nolladdropt;
 
 	{
 		/* target lladdr option */
 		int len;
 		struct nd_opt_hdr *nd_opt;
 		char *lladdr;
 
 		IF_AFDATA_RLOCK(ifp);
 		ln = nd6_lookup(router_ll6, 0, ifp);
 		IF_AFDATA_RUNLOCK(ifp);
 		if (ln == NULL)
 			goto nolladdropt;
 
 		len = sizeof(*nd_opt) + ifp->if_addrlen;
 		len = (len + 7) & ~7;	/* round by 8 */
 		/* safety check */
 		if (len + (p - (u_char *)ip6) > maxlen) 			
 			goto nolladdropt;
 
 		if (ln->la_flags & LLE_VALID) {
 			nd_opt = (struct nd_opt_hdr *)p;
 			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
 			nd_opt->nd_opt_len = len >> 3;
 			lladdr = (char *)(nd_opt + 1);
 			bcopy(ln->ll_addr, lladdr, ifp->if_addrlen);
 			p += len;
 		}
 	}
 nolladdropt:
 	if (ln != NULL)
 		LLE_RUNLOCK(ln);
 		
 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 	/* just to be safe */
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m0->m_flags & M_DECRYPTED)
 		goto noredhdropt;
 #endif
 	if (p - (u_char *)ip6 > maxlen)
 		goto noredhdropt;
 
 	{
 		/* redirected header option */
 		int len;
 		struct nd_opt_rd_hdr *nd_opt_rh;
 
 		/*
 		 * compute the maximum size for icmp6 redirect header option.
 		 * XXX room for auth header?
 		 */
 		len = maxlen - (p - (u_char *)ip6);
 		len &= ~7;
 
 		/* This is just for simplicity. */
 		if (m0->m_pkthdr.len != m0->m_len) {
 			if (m0->m_next) {
 				m_freem(m0->m_next);
 				m0->m_next = NULL;
 			}
 			m0->m_pkthdr.len = m0->m_len;
 		}
 
 		/*
 		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
 		 * about padding/truncate rule for the original IP packet.
 		 * From the discussion on IPv6imp in Feb 1999,
 		 * the consensus was:
 		 * - "attach as much as possible" is the goal
 		 * - pad if not aligned (original size can be guessed by
 		 *   original ip6 header)
 		 * Following code adds the padding if it is simple enough,
 		 * and truncates if not.
 		 */
 		if (m0->m_next || m0->m_pkthdr.len != m0->m_len)
 			panic("assumption failed in %s:%d", __FILE__,
 			    __LINE__);
 
 		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
 			/* not enough room, truncate */
 			m0->m_pkthdr.len = m0->m_len = len -
 			    sizeof(*nd_opt_rh);
 		} else {
 			/* enough room, pad or truncate */
 			size_t extra;
 
 			extra = m0->m_pkthdr.len % 8;
 			if (extra) {
 				/* pad if easy enough, truncate if not */
 				if (8 - extra <= M_TRAILINGSPACE(m0)) {
 					/* pad */
 					m0->m_len += (8 - extra);
 					m0->m_pkthdr.len += (8 - extra);
 				} else {
 					/* truncate */
 					m0->m_pkthdr.len -= extra;
 					m0->m_len -= extra;
 				}
 			}
 			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
 			m0->m_pkthdr.len = m0->m_len = len -
 			    sizeof(*nd_opt_rh);
 		}
 
 		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
 		bzero(nd_opt_rh, sizeof(*nd_opt_rh));
 		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
 		nd_opt_rh->nd_opt_rh_len = len >> 3;
 		p += sizeof(*nd_opt_rh);
 		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 		/* connect m0 to m */
 		m_tag_delete_chain(m0, NULL);
 		m0->m_flags &= ~M_PKTHDR;
 		m->m_next = m0;
 		m->m_pkthdr.len = m->m_len + m0->m_len;
 		m0 = NULL;
 	}
 noredhdropt:;
 	if (m0) {
 		m_freem(m0);
 		m0 = NULL;
 	}
 
 	/* XXX: clear embedded link IDs in the inner header */
 	in6_clearscope(&sip6->ip6_src);
 	in6_clearscope(&sip6->ip6_dst);
 	in6_clearscope(&nd_rd->nd_rd_target);
 	in6_clearscope(&nd_rd->nd_rd_dst);
 
 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
 
 	nd_rd->nd_rd_cksum = 0;
 	nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 	    sizeof(*ip6), ntohs(ip6->ip6_plen));
 
         if (send_sendso_input_hook != NULL) {
 		mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short),
 			M_NOWAIT);
 		if (mtag == NULL)
 			goto fail;
 		*(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type;
 		m_tag_prepend(m, mtag);
 	}
 
 	/* send the packet to outside... */
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif) {
 		icmp6_ifstat_inc(outif, ifs6_out_msg);
 		icmp6_ifstat_inc(outif, ifs6_out_redirect);
 	}
 	ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]);
 
 	return;
 
 fail:
 	if (m)
 		m_freem(m);
 	if (m0)
 		m_freem(m0);
 }
 
 /*
  * ICMPv6 socket option processing.
  */
 int
 icmp6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 	int optlen;
 	struct inpcb *inp = sotoinpcb(so);
 	int level, op, optname;
 
 	if (sopt) {
 		level = sopt->sopt_level;
 		op = sopt->sopt_dir;
 		optname = sopt->sopt_name;
 		optlen = sopt->sopt_valsize;
 	} else
 		level = op = optname = optlen = 0;
 
 	if (level != IPPROTO_ICMPV6) {
 		return EINVAL;
 	}
 
 	switch (op) {
 	case PRCO_SETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			struct icmp6_filter ic6f;
 
 			if (optlen != sizeof(ic6f)) {
 				error = EMSGSIZE;
 				break;
 			}
 			error = sooptcopyin(sopt, &ic6f, optlen, optlen);
 			if (error == 0) {
 				INP_WLOCK(inp);
 				*inp->in6p_icmp6filt = ic6f;
 				INP_WUNLOCK(inp);
 			}
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case PRCO_GETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			struct icmp6_filter ic6f;
 
 			INP_RLOCK(inp);
 			ic6f = *inp->in6p_icmp6filt;
 			INP_RUNLOCK(inp);
 			error = sooptcopyout(sopt, &ic6f, sizeof(ic6f));
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Perform rate limit check.
  * Returns 0 if it is okay to send the icmp6 packet.
  * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
  * limitation.
  *
  * XXX per-destination/type check necessary?
  *
  * dst - not used at this moment
  * type - not used at this moment
  * code - not used at this moment
  */
 static int
 icmp6_ratelimit(const struct in6_addr *dst, const int type,
     const int code)
 {
 	int ret;
 
 	ret = 0;	/* okay to send */
 
 	/* PPS limit */
 	if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count,
 	    V_icmp6errppslim)) {
 		/* The packet is subject to rate limit */
 		ret++;
 	}
 
 	return ret;
 }
Index: user/alc/PQ_LAUNDRY/sys/netinet6/ip6_output.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/netinet6/ip6_output.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/netinet6/ip6_output.c	(revision 303642)
@@ -1,3072 +1,3077 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_sctp.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 
 #include <machine/in_cksum.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/pfil.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_rss.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #include <netinet6/ip6_ipsec.h>
 #endif /* IPSEC */
 #ifdef SCTP
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #endif
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/scope6_var.h>
 
 #ifdef FLOWTABLE
 #include <net/flowtable.h>
 #endif
 
 extern int in6_mcast_loop;
 
 struct ip6_exthdrs {
 	struct mbuf *ip6e_ip6;
 	struct mbuf *ip6e_hbh;
 	struct mbuf *ip6e_dest1;
 	struct mbuf *ip6e_rthdr;
 	struct mbuf *ip6e_dest2;
 };
 
 static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
 
 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
 			   struct ucred *, int);
 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
 	struct socket *, struct sockopt *);
 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *,
 	struct ucred *, int, int, int);
 
 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
 	struct ip6_frag **);
 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
 static int ip6_getpmtu(struct route_in6 *, int,
-	struct ifnet *, const struct in6_addr *, u_long *, int *, u_int);
+	struct ifnet *, const struct in6_addr *, u_long *, int *, u_int,
+	u_int);
 static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long,
-	u_long *, int *);
+	u_long *, int *, u_int);
 static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *);
 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
 
 
 /*
  * Make an extension header from option data.  hp is the source, and
  * mp is the destination.
  */
 #define MAKE_EXTHDR(hp, mp)						\
     do {								\
 	if (hp) {							\
 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
 		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
 		    ((eh)->ip6e_len + 1) << 3);				\
 		if (error)						\
 			goto freehdrs;					\
 	}								\
     } while (/*CONSTCOND*/ 0)
 
 /*
  * Form a chain of extension headers.
  * m is the extension header mbuf
  * mp is the previous mbuf in the chain
  * p is the next header
  * i is the type of option.
  */
 #define MAKE_CHAIN(m, mp, p, i)\
     do {\
 	if (m) {\
 		if (!hdrsplit) \
 			panic("assumption failed: hdr not split"); \
 		*mtod((m), u_char *) = *(p);\
 		*(p) = (i);\
 		p = mtod((m), u_char *);\
 		(m)->m_next = (mp)->m_next;\
 		(mp)->m_next = (m);\
 		(mp) = (m);\
 	}\
     } while (/*CONSTCOND*/ 0)
 
 void
 in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
 {
 	u_short csum;
 
 	csum = in_cksum_skip(m, offset + plen, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	if (offset + sizeof(u_short) > m->m_len) {
 		printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
 		    "csum_flags=%b\n", __func__, m->m_len, plen, offset,
 		    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
 		/*
 		 * XXX this should not happen, but if it does, the correct
 		 * behavior may be to insert the checksum in the appropriate
 		 * next mbuf in the chain.
 		 */
 		return;
 	}
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 int
 ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
     int mtu, uint32_t id)
 {
 	struct mbuf *m, **mnext, *m_frgpart;
 	struct ip6_hdr *ip6, *mhip6;
 	struct ip6_frag *ip6f;
 	int off;
 	int error;
 	int tlen = m0->m_pkthdr.len;
 
 	m = m0;
 	ip6 = mtod(m, struct ip6_hdr *);
 	mnext = &m->m_nextpkt;
 
 	for (off = hlen; off < tlen; off += mtu) {
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (!m) {
 			IP6STAT_INC(ip6s_odropped);
 			return (ENOBUFS);
 		}
 		m->m_flags = m0->m_flags & M_COPYFLAGS;
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 		m->m_data += max_linkhdr;
 		mhip6 = mtod(m, struct ip6_hdr *);
 		*mhip6 = *ip6;
 		m->m_len = sizeof(*mhip6);
 		error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
 		if (error) {
 			IP6STAT_INC(ip6s_odropped);
 			return (error);
 		}
 		ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
 		if (off + mtu >= tlen)
 			mtu = tlen - off;
 		else
 			ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
 		mhip6->ip6_plen = htons((u_short)(mtu + hlen +
 		    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
 		if ((m_frgpart = m_copy(m0, off, mtu)) == NULL) {
 			IP6STAT_INC(ip6s_odropped);
 			return (ENOBUFS);
 		}
 		m_cat(m, m_frgpart);
 		m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f);
 		m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
 		m->m_pkthdr.rcvif = NULL;
 		ip6f->ip6f_reserved = 0;
 		ip6f->ip6f_ident = id;
 		ip6f->ip6f_nxt = nextproto;
 		IP6STAT_INC(ip6s_ofragments);
 		in6_ifstat_inc(ifp, ifs6_out_fragcreat);
 	}
 
 	return (0);
 }
 
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route_in6 ro is present and has ro_rt initialized, route lookup would be
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
  * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and
  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
  * which is rt_mtu.
  *
  * ifpp - XXX: just for statistics
  */
 /*
  * XXX TODO: no flowid is assigned for outbound flows?
  */
 int
 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
     struct ifnet **ifpp, struct inpcb *inp)
 {
 	struct ip6_hdr *ip6;
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
 	struct mbuf *mprev = NULL;
 	int hlen, tlen, len;
 	struct route_in6 ip6route;
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 *dst, src_sa, dst_sa;
 	struct in6_addr odst;
 	int error = 0;
 	struct in6_ifaddr *ia = NULL;
 	u_long mtu;
 	int alwaysfrag, dontfrag;
 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
 	struct ip6_exthdrs exthdrs;
 	struct in6_addr src0, dst0;
 	u_int32_t zone;
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	int sw_csum, tso;
 	int needfiblookup;
 	uint32_t fibnum;
 	struct m_tag *fwd_tag = NULL;
 	uint32_t id;
 
 	if (inp != NULL) {
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
 			/* unconditionally set flowid */
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
 		}
 	}
 
 	bzero(&exthdrs, sizeof(exthdrs));
 	if (opt) {
 		/* Hop-by-Hop options header */
 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
 		/* Destination options header(1st part) */
 		if (opt->ip6po_rthdr) {
 			/*
 			 * Destination options header(1st part)
 			 * This only makes sense with a routing header.
 			 * See Section 9.2 of RFC 3542.
 			 * Disabling this part just for MIP6 convenience is
 			 * a bad idea.  We need to think carefully about a
 			 * way to make the advanced API coexist with MIP6
 			 * options, which might automatically be inserted in
 			 * the kernel.
 			 */
 			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
 		}
 		/* Routing header */
 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
 		/* Destination options header(2nd part) */
 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
 	}
 
 #ifdef IPSEC
 	/*
 	 * IPSec checking which handles several cases.
 	 * FAST IPSEC: We re-injected the packet.
 	 * XXX: need scope argument.
 	 */
 	switch(ip6_ipsec_output(&m, inp, &error))
 	{
 	case 1:                 /* Bad packet */
 		goto freehdrs;
 	case -1:                /* IPSec done */
 		goto done;
 	case 0:                 /* No IPSec */
 	default:
 		break;
 	}
 #endif /* IPSEC */
 
 	/*
 	 * Calculate the total length of the extension header chain.
 	 * Keep the length of the unfragmentable part for fragmentation.
 	 */
 	optlen = 0;
 	if (exthdrs.ip6e_hbh)
 		optlen += exthdrs.ip6e_hbh->m_len;
 	if (exthdrs.ip6e_dest1)
 		optlen += exthdrs.ip6e_dest1->m_len;
 	if (exthdrs.ip6e_rthdr)
 		optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
 
 	/* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */
 	if (exthdrs.ip6e_dest2)
 		optlen += exthdrs.ip6e_dest2->m_len;
 
 	/*
 	 * If there is at least one extension header,
 	 * separate IP6 header from the payload.
 	 */
 	if (optlen && !hdrsplit) {
 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 			m = NULL;
 			goto freehdrs;
 		}
 		m = exthdrs.ip6e_ip6;
 		hdrsplit++;
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf packet header length */
 	m->m_pkthdr.len += optlen;
 	plen = m->m_pkthdr.len - sizeof(*ip6);
 
 	/* If this is a jumbo payload, insert a jumbo payload option. */
 	if (plen > IPV6_MAXPACKET) {
 		if (!hdrsplit) {
 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 				m = NULL;
 				goto freehdrs;
 			}
 			m = exthdrs.ip6e_ip6;
 			hdrsplit++;
 		}
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
 			goto freehdrs;
 		ip6->ip6_plen = 0;
 	} else
 		ip6->ip6_plen = htons(plen);
 
 	/*
 	 * Concatenate headers and fill in next header fields.
 	 * Here we have, on "m"
 	 *	IPv6 payload
 	 * and we insert headers accordingly.  Finally, we should be getting:
 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
 	 *
 	 * during the header composing process, "m" points to IPv6 header.
 	 * "mprev" points to an extension header prior to esp.
 	 */
 	u_char *nexthdrp = &ip6->ip6_nxt;
 	mprev = m;
 
 	/*
 	 * we treat dest2 specially.  this makes IPsec processing
 	 * much easier.  the goal here is to make mprev point the
 	 * mbuf prior to dest2.
 	 *
 	 * result: IPv6 dest2 payload
 	 * m and mprev will point to IPv6 header.
 	 */
 	if (exthdrs.ip6e_dest2) {
 		if (!hdrsplit)
 			panic("assumption failed: hdr not split");
 		exthdrs.ip6e_dest2->m_next = m->m_next;
 		m->m_next = exthdrs.ip6e_dest2;
 		*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
 		ip6->ip6_nxt = IPPROTO_DSTOPTS;
 	}
 
 	/*
 	 * result: IPv6 hbh dest1 rthdr dest2 payload
 	 * m will point to IPv6 header.  mprev will point to the
 	 * extension header prior to dest2 (rthdr in the above case).
 	 */
 	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
 	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
 		   IPPROTO_DSTOPTS);
 	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
 		   IPPROTO_ROUTING);
 
 	/*
 	 * If there is a routing header, discard the packet.
 	 */
 	if (exthdrs.ip6e_rthdr) {
 		 error = EINVAL;
 		 goto bad;
 	}
 
 	/* Source address validation */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
 	    (flags & IPV6_UNSPECSRC) == 0) {
 		error = EOPNOTSUPP;
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 		error = EOPNOTSUPP;
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 
 	IP6STAT_INC(ip6s_localout);
 
 	/*
 	 * Route packet.
 	 */
 	if (ro == NULL) {
 		ro = &ip6route;
 		bzero((caddr_t)ro, sizeof(*ro));
 	} else
 		ro->ro_flags |= RT_LLE_CACHE;
 	ro_pmtu = ro;
 	if (opt && opt->ip6po_rthdr)
 		ro = &opt->ip6po_route;
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 #ifdef FLOWTABLE
 	if (ro->ro_rt == NULL)
 		(void )flowtable_lookup(AF_INET6, m, (struct route *)ro);
 #endif
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
 again:
 	/*
 	 * if specified, try to fill in the traffic class field.
 	 * do not override if a non-zero value is already set.
 	 * we check the diffserv field and the ecn field separately.
 	 */
 	if (opt && opt->ip6po_tclass >= 0) {
 		int mask = 0;
 
 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
 			mask |= 0xfc;
 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
 			mask |= 0x03;
 		if (mask != 0)
 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
 	}
 
 	/* fill in or override the hop limit field, if necessary. */
 	if (opt && opt->ip6po_hlim != -1)
 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (im6o != NULL)
 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
 		else
 			ip6->ip6_hlim = V_ip6_defmcasthlim;
 	}
 	/*
 	 * Validate route against routing table additions;
 	 * a better/more specific route might have been added.
 	 * Make sure address family is set in route.
 	 */
 	if (inp) {
 		ro->ro_dst.sin6_family = AF_INET6;
 		RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum);
 	}
 	if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) &&
 	    ro->ro_dst.sin6_family == AF_INET6 &&
 	    IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
 		rt = ro->ro_rt;
 		ifp = ro->ro_rt->rt_ifp;
 	} else {
 		if (fwd_tag == NULL) {
 			bzero(&dst_sa, sizeof(dst_sa));
 			dst_sa.sin6_family = AF_INET6;
 			dst_sa.sin6_len = sizeof(dst_sa);
 			dst_sa.sin6_addr = ip6->ip6_dst;
 		}
 		error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp,
 		    &rt, fibnum);
 		if (error != 0) {
 			if (ifp != NULL)
 				in6_ifstat_inc(ifp, ifs6_out_discard);
 			goto bad;
 		}
 	}
 	if (rt == NULL) {
 		/*
 		 * If in6_selectroute() does not return a route entry,
 		 * dst may not have been updated.
 		 */
 		*dst = dst_sa;	/* XXX */
 	}
 
 	/*
 	 * then rt (for unicast) and ifp must be non-NULL valid values.
 	 */
 	if ((flags & IPV6_FORWARDING) == 0) {
 		/* XXX: the FORWARDING flag can be set for mrouting. */
 		in6_ifstat_inc(ifp, ifs6_out_request);
 	}
 	if (rt != NULL) {
 		ia = (struct in6_ifaddr *)(rt->rt_ifa);
 		counter_u64_add(rt->rt_pksent, 1);
 	}
 
 
 	/*
 	 * The outgoing interface must be in the zone of source and
 	 * destination addresses.
 	 */
 	origifp = ifp;
 
 	src0 = ip6->ip6_src;
 	if (in6_setscope(&src0, origifp, &zone))
 		goto badscope;
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = ip6->ip6_src;
 	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
 		goto badscope;
 
 	dst0 = ip6->ip6_dst;
 	if (in6_setscope(&dst0, origifp, &zone))
 		goto badscope;
 	/* re-initialize to be sure */
 	bzero(&dst_sa, sizeof(dst_sa));
 	dst_sa.sin6_family = AF_INET6;
 	dst_sa.sin6_len = sizeof(dst_sa);
 	dst_sa.sin6_addr = ip6->ip6_dst;
 	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
 		goto badscope;
 	}
 
 	/* We should use ia_ifp to support the case of
 	 * sending packets to an address of our own.
 	 */
 	if (ia != NULL && ia->ia_ifp)
 		ifp = ia->ia_ifp;
 
 	/* scope check is done. */
 	goto routefound;
 
   badscope:
 	IP6STAT_INC(ip6s_badscope);
 	in6_ifstat_inc(origifp, ifs6_out_discard);
 	if (error == 0)
 		error = EHOSTUNREACH; /* XXX */
 	goto bad;
 
   routefound:
 	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (opt && opt->ip6po_nextroute.ro_rt) {
 			/*
 			 * The nexthop is explicitly specified by the
 			 * application.  We assume the next hop is an IPv6
 			 * address.
 			 */
 			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
 		}
 		else if ((rt->rt_flags & RTF_GATEWAY))
 			dst = (struct sockaddr_in6 *)rt->rt_gateway;
 	}
 
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
 	} else {
 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
 		in6_ifstat_inc(ifp, ifs6_out_mcast);
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if (!(ifp->if_flags & IFF_MULTICAST)) {
 			IP6STAT_INC(ip6s_noroute);
 			in6_ifstat_inc(ifp, ifs6_out_discard);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		if ((im6o == NULL && in6_mcast_loop) ||
 		    (im6o && im6o->im6o_multicast_loop)) {
 			/*
 			 * Loop back multicast datagram if not expressly
 			 * forbidden to do so, even if we have not joined
 			 * the address; protocols will filter it later,
 			 * thus deferring a hash lookup and lock acquisition
 			 * at the expense of an m_copym().
 			 */
 			ip6_mloopback(ifp, m);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IPV6_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip6_mloopback(),
 			 * above, will be forwarded by the ip6_input() routine,
 			 * if necessary.
 			 */
 			if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
 				/*
 				 * XXX: ip6_mforward expects that rcvif is NULL
 				 * when it is called from the originating path.
 				 * However, it may not always be the case.
 				 */
 				m->m_pkthdr.rcvif = NULL;
 				if (ip6_mforward(ip6, ifp, m) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 		/*
 		 * Multicasts with a hoplimit of zero may be looped back,
 		 * above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip6_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
 			m_freem(m);
 			goto done;
 		}
 	}
 
 	/*
 	 * Fill the outgoing inteface to tell the upper layer
 	 * to increment per-interface statistics.
 	 */
 	if (ifpp)
 		*ifpp = ifp;
 
 	/* Determine path MTU. */
 	if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst,
-	    &mtu, &alwaysfrag, fibnum)) != 0)
+		    &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0)
 		goto bad;
 
 	/*
 	 * The caller of this function may specify to use the minimum MTU
 	 * in some cases.
 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
 	 * setting.  The logic is a bit complicated; by default, unicast
 	 * packets will follow path MTU while multicast packets will be sent at
 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
 	 * including unicast ones will be sent at the minimum MTU.  Multicast
 	 * packets will always be sent at the minimum MTU unless
 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
 	 * See RFC 3542 for more details.
 	 */
 	if (mtu > IPV6_MMTU) {
 		if ((flags & IPV6_MINMTU))
 			mtu = IPV6_MMTU;
 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
 			mtu = IPV6_MMTU;
 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 			 (opt == NULL ||
 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
 			mtu = IPV6_MMTU;
 		}
 	}
 
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	/*
 	 * If the outgoing packet contains a hop-by-hop options header,
 	 * it must be examined and processed even by the source node.
 	 * (RFC 2460, section 4.)
 	 */
 	if (exthdrs.ip6e_hbh) {
 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
 		u_int32_t dummy; /* XXX unused */
 		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
 
 #ifdef DIAGNOSTIC
 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
 			panic("ip6e_hbh is not contiguous");
 #endif
 		/*
 		 *  XXX: if we have to send an ICMPv6 error to the sender,
 		 *       we need the M_LOOP flag since icmp6_error() expects
 		 *       the IPv6 and the hop-by-hop options header are
 		 *       contiguous unless the flag is set.
 		 */
 		m->m_flags |= M_LOOP;
 		m->m_pkthdr.rcvif = ifp;
 		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
 		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
 		    &dummy, &plen) < 0) {
 			/* m was already freed at this point */
 			error = EINVAL;/* better error? */
 			goto done;
 		}
 		m->m_flags &= ~M_LOOP; /* XXX */
 		m->m_pkthdr.rcvif = NULL;
 	}
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet6_pfil_hook))
 		goto passout;
 
 	odst = ip6->ip6_dst;
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
 	if (error != 0 || m == NULL)
 		goto done;
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	needfiblookup = 0;
 	/* See if destination IP address was changed by packet filter. */
 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip6_input(). */
 		if (in6_localip(&ip6->ip6_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = V_loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 #ifdef SCTP
 			if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 			error = netisr_queue(NETISR_IPV6, m);
 			goto done;
 		} else {
 			RO_RTFREE(ro);
 			needfiblookup = 1; /* Redo the routing table lookup. */
 		}
 	}
 	/* See if fib was changed by packet filter. */
 	if (fibnum != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		fibnum = M_GETFIB(m);
 		RO_RTFREE(ro);
 		needfiblookup = 1;
 	}
 	if (needfiblookup)
 		goto again;
 
 	/* See if local, if yes, send it to netisr. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		error = netisr_queue(NETISR_IPV6, m);
 		goto done;
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		dst = (struct sockaddr_in6 *)&ro->ro_dst;
 		bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP6_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 		goto again;
 	}
 
 passout:
 	/*
 	 * Send the packet to the outgoing interface.
 	 * If necessary, do IPv6 fragmentation before sending.
 	 *
 	 * the logic here is rather complex:
 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
 	 * 1-a:	send as is if tlen <= path mtu
 	 * 1-b:	fragment if tlen > path mtu
 	 *
 	 * 2: if user asks us not to fragment (dontfrag == 1)
 	 * 2-a:	send as is if tlen <= interface mtu
 	 * 2-b:	error if tlen > interface mtu
 	 *
 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
 	 *	always fragment
 	 *
 	 * 4: if dontfrag == 1 && alwaysfrag == 1
 	 *	error, as we cannot handle this conflicting request
 	 */
 	sw_csum = m->m_pkthdr.csum_flags;
 	if (!hdrsplit) {
 		tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0;
 		sw_csum &= ~ifp->if_hwassist;
 	} else
 		tso = 0;
 	/*
 	 * If we added extension headers, we will not do TSO and calculate the
 	 * checksums ourselves for now.
 	 * XXX-BZ  Need a framework to know when the NIC can handle it, even
 	 * with ext. hdrs.
 	 */
 	if (sw_csum & CSUM_DELAY_DATA_IPV6) {
 		sw_csum &= ~CSUM_DELAY_DATA_IPV6;
 		in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
 	}
 #ifdef SCTP
 	if (sw_csum & CSUM_SCTP_IPV6) {
 		sw_csum &= ~CSUM_SCTP_IPV6;
 		sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
 	}
 #endif
 	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
 	tlen = m->m_pkthdr.len;
 
 	if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso)
 		dontfrag = 1;
 	else
 		dontfrag = 0;
 	if (dontfrag && alwaysfrag) {	/* case 4 */
 		/* conflicting request - can't transmit */
 		error = EMSGSIZE;
 		goto bad;
 	}
 	if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) {	/* case 2-b */
 		/*
 		 * Even if the DONTFRAG option is specified, we cannot send the
 		 * packet when the data length is larger than the MTU of the
 		 * outgoing interface.
 		 * Notify the error by sending IPV6_PATHMTU ancillary data if
 		 * application wanted to know the MTU value. Also return an
 		 * error code (this is not described in the API spec).
 		 */
 		if (inp != NULL)
 			ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu);
 		error = EMSGSIZE;
 		goto bad;
 	}
 
 	/*
 	 * transmit packet without fragmentation
 	 */
 	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
 		struct in6_ifaddr *ia6;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
 		if (ia6) {
 			/* Record statistics for this interface address. */
 			counter_u64_add(ia6->ia_ifa.ifa_opackets, 1);
 			counter_u64_add(ia6->ia_ifa.ifa_obytes,
 			    m->m_pkthdr.len);
 			ifa_free(&ia6->ia_ifa);
 		}
 		error = nd6_output_ifp(ifp, origifp, m, dst,
 		    (struct route *)ro);
 		goto done;
 	}
 
 	/*
 	 * try to fragment the packet.  case 1-b and 3
 	 */
 	if (mtu < IPV6_MMTU) {
 		/* path MTU cannot be less than IPV6_MMTU */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else if (ip6->ip6_plen == 0) {
 		/* jumbo payload cannot be fragmented */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else {
 		u_char nextproto;
 
 		/*
 		 * Too large for the destination or interface;
 		 * fragment if possible.
 		 * Must be able to put at least 8 bytes per fragment.
 		 */
 		hlen = unfragpartlen;
 		if (mtu > IPV6_MAXPACKET)
 			mtu = IPV6_MAXPACKET;
 
 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
 		if (len < 8) {
 			error = EMSGSIZE;
 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
 			goto bad;
 		}
 
 		/*
 		 * If the interface will not calculate checksums on
 		 * fragmented packets, then do it here.
 		 * XXX-BZ handle the hw offloading case.  Need flags.
 		 */
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			in6_delayed_cksum(m, plen, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
 			sctp_delayed_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
 		}
 #endif
 		/*
 		 * Change the next header field of the last header in the
 		 * unfragmentable part.
 		 */
 		if (exthdrs.ip6e_rthdr) {
 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_dest1) {
 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_hbh) {
 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
 		} else {
 			nextproto = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
 		}
 
 		/*
 		 * Loop through length of segment after first fragment,
 		 * make new header and copy data of each part and link onto
 		 * chain.
 		 */
 		m0 = m;
 		id = htonl(ip6_randomid());
 		if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id)))
 			goto sendorfree;
 
 		in6_ifstat_inc(ifp, ifs6_out_fragok);
 	}
 
 	/*
 	 * Remove leading garbages.
 	 */
 sendorfree:
 	m = m0->m_nextpkt;
 	m0->m_nextpkt = 0;
 	m_freem(m0);
 	for (m0 = m; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia) {
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
 			error = nd6_output_ifp(ifp, origifp, m, dst,
 			    (struct route *)ro);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		IP6STAT_INC(ip6s_fragmented);
 
 done:
 	/*
 	 * Release the route if using our private route, or if
 	 * (with flowtable) we don't have our own reference.
 	 */
 	if (ro == &ip6route || ro->ro_flags & RT_NORTREF)
 		RO_RTFREE(ro);
 	return (error);
 
 freehdrs:
 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
 	m_freem(exthdrs.ip6e_dest1);
 	m_freem(exthdrs.ip6e_rthdr);
 	m_freem(exthdrs.ip6e_dest2);
 	/* FALLTHROUGH */
 bad:
 	if (m)
 		m_freem(m);
 	goto done;
 }
 
 static int
 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
 {
 	struct mbuf *m;
 
 	if (hlen > MCLBYTES)
 		return (ENOBUFS); /* XXX */
 
 	if (hlen > MLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, 0);
 	else
 		m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = hlen;
 	if (hdr)
 		bcopy(hdr, mtod(m, caddr_t), hlen);
 
 	*mp = m;
 	return (0);
 }
 
 /*
  * Insert jumbo payload option.
  */
 static int
 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
 {
 	struct mbuf *mopt;
 	u_char *optbuf;
 	u_int32_t v;
 
 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
 
 	/*
 	 * If there is no hop-by-hop options header, allocate new one.
 	 * If there is one but it doesn't have enough space to store the
 	 * jumbo payload option, allocate a cluster to store the whole options.
 	 * Otherwise, use it to store the options.
 	 */
 	if (exthdrs->ip6e_hbh == NULL) {
 		mopt = m_get(M_NOWAIT, MT_DATA);
 		if (mopt == NULL)
 			return (ENOBUFS);
 		mopt->m_len = JUMBOOPTLEN;
 		optbuf = mtod(mopt, u_char *);
 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
 		exthdrs->ip6e_hbh = mopt;
 	} else {
 		struct ip6_hbh *hbh;
 
 		mopt = exthdrs->ip6e_hbh;
 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
 			/*
 			 * XXX assumption:
 			 * - exthdrs->ip6e_hbh is not referenced from places
 			 *   other than exthdrs.
 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
 			 */
 			int oldoptlen = mopt->m_len;
 			struct mbuf *n;
 
 			/*
 			 * XXX: give up if the whole (new) hbh header does
 			 * not fit even in an mbuf cluster.
 			 */
 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
 				return (ENOBUFS);
 
 			/*
 			 * As a consequence, we must always prepare a cluster
 			 * at this point.
 			 */
 			n = m_getcl(M_NOWAIT, MT_DATA, 0);
 			if (n == NULL)
 				return (ENOBUFS);
 			n->m_len = oldoptlen + JUMBOOPTLEN;
 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
 			    oldoptlen);
 			optbuf = mtod(n, caddr_t) + oldoptlen;
 			m_freem(mopt);
 			mopt = exthdrs->ip6e_hbh = n;
 		} else {
 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
 			mopt->m_len += JUMBOOPTLEN;
 		}
 		optbuf[0] = IP6OPT_PADN;
 		optbuf[1] = 1;
 
 		/*
 		 * Adjust the header length according to the pad and
 		 * the jumbo payload option.
 		 */
 		hbh = mtod(mopt, struct ip6_hbh *);
 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
 	}
 
 	/* fill in the option. */
 	optbuf[2] = IP6OPT_JUMBO;
 	optbuf[3] = 4;
 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
 
 	/* finally, adjust the packet header length */
 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
 
 	return (0);
 #undef JUMBOOPTLEN
 }
 
 /*
  * Insert fragment header and copy unfragmentable header portions.
  */
 static int
 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
     struct ip6_frag **frghdrp)
 {
 	struct mbuf *n, *mlast;
 
 	if (hlen > sizeof(struct ip6_hdr)) {
 		n = m_copym(m0, sizeof(struct ip6_hdr),
 		    hlen - sizeof(struct ip6_hdr), M_NOWAIT);
 		if (n == NULL)
 			return (ENOBUFS);
 		m->m_next = n;
 	} else
 		n = m;
 
 	/* Search for the last mbuf of unfragmentable part. */
 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
 		;
 
 	if (M_WRITABLE(mlast) &&
 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
 		/* use the trailing space of the last mbuf for the fragment hdr */
 		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
 		    mlast->m_len);
 		mlast->m_len += sizeof(struct ip6_frag);
 		m->m_pkthdr.len += sizeof(struct ip6_frag);
 	} else {
 		/* allocate a new mbuf for the fragment header */
 		struct mbuf *mfrg;
 
 		mfrg = m_get(M_NOWAIT, MT_DATA);
 		if (mfrg == NULL)
 			return (ENOBUFS);
 		mfrg->m_len = sizeof(struct ip6_frag);
 		*frghdrp = mtod(mfrg, struct ip6_frag *);
 		mlast->m_next = mfrg;
 	}
 
 	return (0);
 }
 
 /*
  * Calculates IPv6 path mtu for destination @dst.
  * Resulting MTU is stored in @mtup.
  *
  * Returns 0 on success.
  */
 static int
 ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup)
 {
 	struct nhop6_extended nh6;
 	struct in6_addr kdst;
 	uint32_t scopeid;
 	struct ifnet *ifp;
 	u_long mtu;
 	int error;
 
 	in6_splitscope(dst, &kdst, &scopeid);
 	if (fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0, &nh6) != 0)
 		return (EHOSTUNREACH);
 
 	ifp = nh6.nh_ifp;
 	mtu = nh6.nh_mtu;
 
-	error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL);
+	error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0);
 	fib6_free_nh_ext(fibnum, &nh6);
 
 	return (error);
 }
 
 /*
  * Calculates IPv6 path MTU for @dst based on transmit @ifp,
  * and cached data in @ro_pmtu.
  * MTU from (successful) route lookup is saved (along with dst)
  * inside @ro_pmtu to avoid subsequent route lookups after packet
  * filter processing.
  *
  * Stores mtu and always-frag value into @mtup and @alwaysfragp.
  * Returns 0 on success.
  */
 static int
 ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
     struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup,
-    int *alwaysfragp, u_int fibnum)
+    int *alwaysfragp, u_int fibnum, u_int proto)
 {
 	struct nhop6_basic nh6;
 	struct in6_addr kdst;
 	uint32_t scopeid;
 	struct sockaddr_in6 *sa6_dst;
 	u_long mtu;
 
 	mtu = 0;
 	if (do_lookup) {
 
 		/*
 		 * Here ro_pmtu has final destination address, while
 		 * ro might represent immediate destination.
 		 * Use ro_pmtu destination since mtu might differ.
 		 */
 		sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
 		if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
 			ro_pmtu->ro_mtu = 0;
 
 		if (ro_pmtu->ro_mtu == 0) {
 			bzero(sa6_dst, sizeof(*sa6_dst));
 			sa6_dst->sin6_family = AF_INET6;
 			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
 			sa6_dst->sin6_addr = *dst;
 
 			in6_splitscope(dst, &kdst, &scopeid);
 			if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0,
 			    &nh6) == 0)
 				ro_pmtu->ro_mtu = nh6.nh_mtu;
 		}
 
 		mtu = ro_pmtu->ro_mtu;
 	}
 
 	if (ro_pmtu->ro_rt)
 		mtu = ro_pmtu->ro_rt->rt_mtu;
 
-	return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp));
+	return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto));
 }
 
 /*
  * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and
  * hostcache data for @dst.
  * Stores mtu and always-frag value into @mtup and @alwaysfragp.
  *
  * Returns 0 on success.
  */
 static int
 ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu,
-    u_long *mtup, int *alwaysfragp)
+    u_long *mtup, int *alwaysfragp, u_int proto)
 {
 	u_long mtu = 0;
 	int alwaysfrag = 0;
 	int error = 0;
 
 	if (rt_mtu > 0) {
 		u_int32_t ifmtu;
 		struct in_conninfo inc;
 
 		bzero(&inc, sizeof(inc));
 		inc.inc_flags |= INC_ISIPV6;
 		inc.inc6_faddr = *dst;
 
 		ifmtu = IN6_LINKMTU(ifp);
-		mtu = tcp_hc_getmtu(&inc);
+
+		/* TCP is known to react to pmtu changes so skip hc */
+		if (proto != IPPROTO_TCP)
+			mtu = tcp_hc_getmtu(&inc);
+
 		if (mtu)
 			mtu = min(mtu, rt_mtu);
 		else
 			mtu = rt_mtu;
 		if (mtu == 0)
 			mtu = ifmtu;
 		else if (mtu < IPV6_MMTU) {
 			/*
 			 * RFC2460 section 5, last paragraph:
 			 * if we record ICMPv6 too big message with
 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
 			 * or smaller, with framgent header attached.
 			 * (fragment header is needed regardless from the
 			 * packet size, for translators to identify packets)
 			 */
 			alwaysfrag = 1;
 			mtu = IPV6_MMTU;
 		}
 	} else if (ifp) {
 		mtu = IN6_LINKMTU(ifp);
 	} else
 		error = EHOSTUNREACH; /* XXX */
 
 	*mtup = mtu;
 	if (alwaysfragp)
 		*alwaysfragp = alwaysfrag;
 	return (error);
 }
 
 /*
  * IP6 socket option processing.
  */
 int
 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int optdatalen, uproto;
 	void *optdata;
 	struct inpcb *in6p = sotoinpcb(so);
 	int error, optval;
 	int level, op, optname;
 	int optlen;
 	struct thread *td;
 #ifdef	RSS
 	uint32_t rss_bucket;
 	int retval;
 #endif
 
 	level = sopt->sopt_level;
 	op = sopt->sopt_dir;
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 	td = sopt->sopt_td;
 	error = 0;
 	optval = 0;
 	uproto = (int)so->so_proto->pr_protocol;
 
 	if (level != IPPROTO_IPV6) {
 		error = EINVAL;
 
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(in6p);
 				if ((so->so_options & SO_REUSEADDR) != 0)
 					in6p->inp_flags2 |= INP_REUSEADDR;
 				else
 					in6p->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			case SO_REUSEPORT:
 				INP_WLOCK(in6p);
 				if ((so->so_options & SO_REUSEPORT) != 0)
 					in6p->inp_flags2 |= INP_REUSEPORT;
 				else
 					in6p->inp_flags2 &= ~INP_REUSEPORT;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			case SO_SETFIB:
 				INP_WLOCK(in6p);
 				in6p->inp_inc.inc_fibnum = so->so_fibnum;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			default:
 				break;
 			}
 		}
 	} else {		/* level == IPPROTO_IPV6 */
 		switch (op) {
 
 		case SOPT_SET:
 			switch (optname) {
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 			{
 				struct mbuf *m;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				error = ip6_pcbopts(&in6p->in6p_outputopts,
 						    m, so, sopt);
 				m_freem(m); /* XXX */
 				break;
 			}
 
 			/*
 			 * Use of some Hop-by-Hop options or some
 			 * Destination options, might require special
 			 * privilege.  That is, normal applications
 			 * (without special privilege) might be forbidden
 			 * from setting certain options in outgoing packets,
 			 * and might never see certain options in received
 			 * packets. [RFC 2292 Section 6]
 			 * KAME specific note:
 			 *  KAME prevents non-privileged users from sending or
 			 *  receiving ANY hbh/dst options in order to avoid
 			 *  overhead of parsing options in the kernel.
 			 */
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 				if (td != NULL) {
 					error = priv_check(td,
 					    PRIV_NETINET_SETHDROPTS);
 					if (error)
 						break;
 				}
 				/* FALLTHROUGH */
 			case IPV6_UNICAST_HOPS:
 			case IPV6_HOPLIMIT:
 
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 			case IPV6_RECVTCLASS:
 			case IPV6_RECVFLOWID:
 #ifdef	RSS
 			case IPV6_RECVRSSBUCKETID:
 #endif
 			case IPV6_V6ONLY:
 			case IPV6_AUTOFLOWLABEL:
 			case IPV6_BINDANY:
 			case IPV6_BINDMULTI:
 #ifdef	RSS
 			case IPV6_RSS_LISTEN_BUCKET:
 #endif
 				if (optname == IPV6_BINDANY && td != NULL) {
 					error = priv_check(td,
 					    PRIV_NETINET_BINDANY);
 					if (error)
 						break;
 				}
 
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 
 				case IPV6_UNICAST_HOPS:
 					if (optval < -1 || optval >= 256)
 						error = EINVAL;
 					else {
 						/* -1 = kernel default */
 						in6p->in6p_hops = optval;
 						if ((in6p->inp_vflag &
 						     INP_IPV4) != 0)
 							in6p->inp_ip_ttl = optval;
 					}
 					break;
 #define OPTSET(bit) \
 do { \
 	INP_WLOCK(in6p); \
 	if (optval) \
 		in6p->inp_flags |= (bit); \
 	else \
 		in6p->inp_flags &= ~(bit); \
 	INP_WUNLOCK(in6p); \
 } while (/*CONSTCOND*/ 0)
 #define OPTSET2292(bit) \
 do { \
 	INP_WLOCK(in6p); \
 	in6p->inp_flags |= IN6P_RFC2292; \
 	if (optval) \
 		in6p->inp_flags |= (bit); \
 	else \
 		in6p->inp_flags &= ~(bit); \
 	INP_WUNLOCK(in6p); \
 } while (/*CONSTCOND*/ 0)
 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
 
 #define OPTSET2(bit, val) do {						\
 	INP_WLOCK(in6p);						\
 	if (val)							\
 		in6p->inp_flags2 |= bit;				\
 	else								\
 		in6p->inp_flags2 &= ~bit;				\
 	INP_WUNLOCK(in6p);						\
 } while (0)
 #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0)
 
 				case IPV6_RECVPKTINFO:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_PKTINFO);
 					break;
 
 				case IPV6_HOPLIMIT:
 				{
 					struct ip6_pktopts **optp;
 
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(IPV6_HOPLIMIT,
 					    (u_char *)&optval, sizeof(optval),
 					    optp, (td != NULL) ? td->td_ucred :
 					    NULL, uproto);
 					break;
 				}
 
 				case IPV6_RECVHOPLIMIT:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVHOPOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDR:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					/*
 					 * We ignore this option for TCP
 					 * sockets.
 					 * (RFC3542 leaves this case
 					 * unspecified.)
 					 */
 					if (uproto != IPPROTO_TCP)
 						OPTSET(IN6P_MTU);
 					break;
 
 				case IPV6_RECVFLOWID:
 					OPTSET2(INP_RECVFLOWID, optval);
 					break;
 
 #ifdef	RSS
 				case IPV6_RECVRSSBUCKETID:
 					OPTSET2(INP_RECVRSSBUCKETID, optval);
 					break;
 #endif
 
 				case IPV6_V6ONLY:
 					/*
 					 * make setsockopt(IPV6_V6ONLY)
 					 * available only prior to bind(2).
 					 * see ipng mailing list, Jun 22 2001.
 					 */
 					if (in6p->inp_lport ||
 					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_IPV6_V6ONLY);
 					if (optval)
 						in6p->inp_vflag &= ~INP_IPV4;
 					else
 						in6p->inp_vflag |= INP_IPV4;
 					break;
 				case IPV6_RECVTCLASS:
 					/* cannot mix with RFC2292 XXX */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_TCLASS);
 					break;
 				case IPV6_AUTOFLOWLABEL:
 					OPTSET(IN6P_AUTOFLOWLABEL);
 					break;
 
 				case IPV6_BINDANY:
 					OPTSET(INP_BINDANY);
 					break;
 
 				case IPV6_BINDMULTI:
 					OPTSET2(INP_BINDMULTI, optval);
 					break;
 #ifdef	RSS
 				case IPV6_RSS_LISTEN_BUCKET:
 					if ((optval >= 0) &&
 					    (optval < rss_getnumbuckets())) {
 						in6p->inp_rss_listen_bucket = optval;
 						OPTSET2(INP_RSS_BUCKET_SET, 1);
 					} else {
 						error = EINVAL;
 					}
 					break;
 #endif
 				}
 				break;
 
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				if (optlen != sizeof(optval)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				{
 					struct ip6_pktopts **optp;
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(optname,
 					    (u_char *)&optval, sizeof(optval),
 					    optp, (td != NULL) ? td->td_ucred :
 					    NULL, uproto);
 					break;
 				}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292DSTOPTS:
 			case IPV6_2292RTHDR:
 				/* RFC 2292 */
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					OPTSET2292(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					OPTSET2292(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					/*
 					 * Check super-user privilege.
 					 * See comments for IPV6_RECVHOPOPTS.
 					 */
 					if (td != NULL) {
 						error = priv_check(td,
 						    PRIV_NETINET_SETHDROPTS);
 						if (error)
 							return (error);
 					}
 					OPTSET2292(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292DSTOPTS:
 					if (td != NULL) {
 						error = priv_check(td,
 						    PRIV_NETINET_SETHDROPTS);
 						if (error)
 							return (error);
 					}
 					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
 					break;
 				case IPV6_2292RTHDR:
 					OPTSET2292(IN6P_RTHDR);
 					break;
 				}
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			{
 				/* new advanced API (RFC3542) */
 				u_char *optbuf;
 				u_char optbuf_storage[MCLBYTES];
 				int optlen;
 				struct ip6_pktopts **optp;
 
 				/* cannot mix with RFC2292 */
 				if (OPTBIT(IN6P_RFC2292)) {
 					error = EINVAL;
 					break;
 				}
 
 				/*
 				 * We only ensure valsize is not too large
 				 * here.  Further validation will be done
 				 * later.
 				 */
 				error = sooptcopyin(sopt, optbuf_storage,
 				    sizeof(optbuf_storage), 0);
 				if (error)
 					break;
 				optlen = sopt->sopt_valsize;
 				optbuf = optbuf_storage;
 				optp = &in6p->in6p_outputopts;
 				error = ip6_pcbopt(optname, optbuf, optlen,
 				    optp, (td != NULL) ? td->td_ucred : NULL,
 				    uproto);
 				break;
 			}
 #undef OPTSET
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			case IPV6_MSFILTER:
 			case MCAST_BLOCK_SOURCE:
 			case MCAST_UNBLOCK_SOURCE:
 			case MCAST_JOIN_GROUP:
 			case MCAST_LEAVE_GROUP:
 			case MCAST_JOIN_SOURCE_GROUP:
 			case MCAST_LEAVE_SOURCE_GROUP:
 				error = ip6_setmoptions(in6p, sopt);
 				break;
 
 			case IPV6_PORTRANGE:
 				error = sooptcopyin(sopt, &optval,
 				    sizeof optval, sizeof optval);
 				if (error)
 					break;
 
 				INP_WLOCK(in6p);
 				switch (optval) {
 				case IPV6_PORTRANGE_DEFAULT:
 					in6p->inp_flags &= ~(INP_LOWPORT);
 					in6p->inp_flags &= ~(INP_HIGHPORT);
 					break;
 
 				case IPV6_PORTRANGE_HIGH:
 					in6p->inp_flags &= ~(INP_LOWPORT);
 					in6p->inp_flags |= INP_HIGHPORT;
 					break;
 
 				case IPV6_PORTRANGE_LOW:
 					in6p->inp_flags &= ~(INP_HIGHPORT);
 					in6p->inp_flags |= INP_LOWPORT;
 					break;
 
 				default:
 					error = EINVAL;
 					break;
 				}
 				INP_WUNLOCK(in6p);
 				break;
 
 #ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			{
 				caddr_t req;
 				struct mbuf *m;
 
 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 					break;
 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 					break;
 				req = mtod(m, caddr_t);
 				error = ipsec_set_policy(in6p, optname, req,
 				    m->m_len, (sopt->sopt_td != NULL) ?
 				    sopt->sopt_td->td_ucred : NULL);
 				m_freem(m);
 				break;
 			}
 #endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 
 		case SOPT_GET:
 			switch (optname) {
 
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 				/*
 				 * RFC3542 (effectively) deprecated the
 				 * semantics of the 2292-style pktoptions.
 				 * Since it was not reliable in nature (i.e.,
 				 * applications had to expect the lack of some
 				 * information after all), it would make sense
 				 * to simplify this part by always returning
 				 * empty data.
 				 */
 				sopt->sopt_valsize = 0;
 				break;
 
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 			case IPV6_UNICAST_HOPS:
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 
 			case IPV6_V6ONLY:
 			case IPV6_PORTRANGE:
 			case IPV6_RECVTCLASS:
 			case IPV6_AUTOFLOWLABEL:
 			case IPV6_BINDANY:
 			case IPV6_FLOWID:
 			case IPV6_FLOWTYPE:
 			case IPV6_RECVFLOWID:
 #ifdef	RSS
 			case IPV6_RSSBUCKETID:
 			case IPV6_RECVRSSBUCKETID:
 #endif
 			case IPV6_BINDMULTI:
 				switch (optname) {
 
 				case IPV6_RECVHOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_UNICAST_HOPS:
 					optval = in6p->in6p_hops;
 					break;
 
 				case IPV6_RECVPKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 
 				case IPV6_RECVHOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVRTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					optval = OPTBIT(IN6P_MTU);
 					break;
 
 				case IPV6_V6ONLY:
 					optval = OPTBIT(IN6P_IPV6_V6ONLY);
 					break;
 
 				case IPV6_PORTRANGE:
 				    {
 					int flags;
 					flags = in6p->inp_flags;
 					if (flags & INP_HIGHPORT)
 						optval = IPV6_PORTRANGE_HIGH;
 					else if (flags & INP_LOWPORT)
 						optval = IPV6_PORTRANGE_LOW;
 					else
 						optval = 0;
 					break;
 				    }
 				case IPV6_RECVTCLASS:
 					optval = OPTBIT(IN6P_TCLASS);
 					break;
 
 				case IPV6_AUTOFLOWLABEL:
 					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
 					break;
 
 				case IPV6_BINDANY:
 					optval = OPTBIT(INP_BINDANY);
 					break;
 
 				case IPV6_FLOWID:
 					optval = in6p->inp_flowid;
 					break;
 
 				case IPV6_FLOWTYPE:
 					optval = in6p->inp_flowtype;
 					break;
 
 				case IPV6_RECVFLOWID:
 					optval = OPTBIT2(INP_RECVFLOWID);
 					break;
 #ifdef	RSS
 				case IPV6_RSSBUCKETID:
 					retval =
 					    rss_hash2bucket(in6p->inp_flowid,
 					    in6p->inp_flowtype,
 					    &rss_bucket);
 					if (retval == 0)
 						optval = rss_bucket;
 					else
 						error = EINVAL;
 					break;
 
 				case IPV6_RECVRSSBUCKETID:
 					optval = OPTBIT2(INP_RECVRSSBUCKETID);
 					break;
 #endif
 
 				case IPV6_BINDMULTI:
 					optval = OPTBIT2(INP_BINDMULTI);
 					break;
 
 				}
 				if (error)
 					break;
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
 			case IPV6_PATHMTU:
 			{
 				u_long pmtu = 0;
 				struct ip6_mtuinfo mtuinfo;
 
 				if (!(so->so_state & SS_ISCONNECTED))
 					return (ENOTCONN);
 				/*
 				 * XXX: we dot not consider the case of source
 				 * routing, or optional information to specify
 				 * the outgoing interface.
 				 */
 				error = ip6_getpmtu_ctl(so->so_fibnum,
 				    &in6p->in6p_faddr, &pmtu);
 				if (error)
 					break;
 				if (pmtu > IPV6_MAXPACKET)
 					pmtu = IPV6_MAXPACKET;
 
 				bzero(&mtuinfo, sizeof(mtuinfo));
 				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
 				optdata = (void *)&mtuinfo;
 				optdatalen = sizeof(mtuinfo);
 				error = sooptcopyout(sopt, optdata,
 				    optdatalen);
 				break;
 			}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292RTHDR:
 			case IPV6_2292DSTOPTS:
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292RTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 				case IPV6_2292DSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
 					break;
 				}
 				error = sooptcopyout(sopt, &optval,
 				    sizeof optval);
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				error = ip6_getpcbopt(in6p->in6p_outputopts,
 				    optname, sopt);
 				break;
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_MSFILTER:
 				error = ip6_getmoptions(in6p, sopt);
 				break;
 
 #ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			  {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m = NULL;
 				struct mbuf **mp = &m;
 				size_t ovalsize = sopt->sopt_valsize;
 				caddr_t oval = (caddr_t)sopt->sopt_val;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				sopt->sopt_valsize = ovalsize;
 				sopt->sopt_val = oval;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec_get_policy(in6p, req, len, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /* XXX */
 				if (error == 0 && m)
 					m_freem(m);
 				break;
 			  }
 #endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 		}
 	}
 	return (error);
 }
 
 int
 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0, optval, optlen;
 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
 	struct inpcb *in6p = sotoinpcb(so);
 	int level, op, optname;
 
 	level = sopt->sopt_level;
 	op = sopt->sopt_dir;
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 
 	if (level != IPPROTO_IPV6) {
 		return (EINVAL);
 	}
 
 	switch (optname) {
 	case IPV6_CHECKSUM:
 		/*
 		 * For ICMPv6 sockets, no modification allowed for checksum
 		 * offset, permit "no change" values to help existing apps.
 		 *
 		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
 		 * for an ICMPv6 socket will fail."
 		 * The current behavior does not meet RFC3542.
 		 */
 		switch (op) {
 		case SOPT_SET:
 			if (optlen != sizeof(int)) {
 				error = EINVAL;
 				break;
 			}
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 			if ((optval % 2) != 0) {
 				/* the API assumes even offset values */
 				error = EINVAL;
 			} else if (so->so_proto->pr_protocol ==
 			    IPPROTO_ICMPV6) {
 				if (optval != icmp6off)
 					error = EINVAL;
 			} else
 				in6p->in6p_cksum = optval;
 			break;
 
 		case SOPT_GET:
 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
 				optval = icmp6off;
 			else
 				optval = in6p->in6p_cksum;
 
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 
 	default:
 		error = ENOPROTOOPT;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Set up IP6 options in pcb for insertion in output packets or
  * specifying behavior of outgoing packets.
  */
 static int
 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
     struct socket *so, struct sockopt *sopt)
 {
 	struct ip6_pktopts *opt = *pktopt;
 	int error = 0;
 	struct thread *td = sopt->sopt_td;
 
 	/* turn off any old options. */
 	if (opt) {
 #ifdef DIAGNOSTIC
 		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
 		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
 		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			printf("ip6_pcbopts: all specified options are cleared.\n");
 #endif
 		ip6_clearpktopts(opt, -1);
 	} else
 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
 	*pktopt = NULL;
 
 	if (!m || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options, regardless of
 		 * whether the opt is just created or given.
 		 */
 		free(opt, M_IP6OPT);
 		return (0);
 	}
 
 	/*  set options specified by user. */
 	if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
 	    td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
 		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
 		free(opt, M_IP6OPT);
 		return (error);
 	}
 	*pktopt = opt;
 	return (0);
 }
 
 /*
  * initialize ip6_pktopts.  beware that there are non-zero default values in
  * the struct.
  */
 void
 ip6_initpktopts(struct ip6_pktopts *opt)
 {
 
 	bzero(opt, sizeof(*opt));
 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
 	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
 }
 
 static int
 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
     struct ucred *cred, int uproto)
 {
 	struct ip6_pktopts *opt;
 
 	if (*pktopt == NULL) {
 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
 		    M_WAITOK);
 		ip6_initpktopts(*pktopt);
 	}
 	opt = *pktopt;
 
 	return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
 }
 
 static int
 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
 {
 	void *optdata = NULL;
 	int optdatalen = 0;
 	struct ip6_ext *ip6e;
 	int error = 0;
 	struct in6_pktinfo null_pktinfo;
 	int deftclass = 0, on;
 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
 	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
 
 	switch (optname) {
 	case IPV6_PKTINFO:
 		optdata = (void *)&null_pktinfo;
 		if (pktopt && pktopt->ip6po_pktinfo) {
 			bcopy(pktopt->ip6po_pktinfo, &null_pktinfo,
 			    sizeof(null_pktinfo));
 			in6_clearscope(&null_pktinfo.ipi6_addr);
 		} else {
 			/* XXX: we don't have to do this every time... */
 			bzero(&null_pktinfo, sizeof(null_pktinfo));
 		}
 		optdatalen = sizeof(struct in6_pktinfo);
 		break;
 	case IPV6_TCLASS:
 		if (pktopt && pktopt->ip6po_tclass >= 0)
 			optdata = (void *)&pktopt->ip6po_tclass;
 		else
 			optdata = (void *)&deftclass;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_HOPOPTS:
 		if (pktopt && pktopt->ip6po_hbh) {
 			optdata = (void *)pktopt->ip6po_hbh;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDR:
 		if (pktopt && pktopt->ip6po_rthdr) {
 			optdata = (void *)pktopt->ip6po_rthdr;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDRDSTOPTS:
 		if (pktopt && pktopt->ip6po_dest1) {
 			optdata = (void *)pktopt->ip6po_dest1;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_DSTOPTS:
 		if (pktopt && pktopt->ip6po_dest2) {
 			optdata = (void *)pktopt->ip6po_dest2;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_NEXTHOP:
 		if (pktopt && pktopt->ip6po_nexthop) {
 			optdata = (void *)pktopt->ip6po_nexthop;
 			optdatalen = pktopt->ip6po_nexthop->sa_len;
 		}
 		break;
 	case IPV6_USE_MIN_MTU:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_minmtu;
 		else
 			optdata = (void *)&defminmtu;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_DONTFRAG:
 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
 			on = 1;
 		else
 			on = 0;
 		optdata = (void *)&on;
 		optdatalen = sizeof(on);
 		break;
 	case IPV6_PREFER_TEMPADDR:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
 		else
 			optdata = (void *)&defpreftemp;
 		optdatalen = sizeof(int);
 		break;
 	default:		/* should not happen */
 #ifdef DIAGNOSTIC
 		panic("ip6_getpcbopt: unexpected option\n");
 #endif
 		return (ENOPROTOOPT);
 	}
 
 	error = sooptcopyout(sopt, optdata, optdatalen);
 
 	return (error);
 }
 
 void
 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
 {
 	if (pktopt == NULL)
 		return;
 
 	if (optname == -1 || optname == IPV6_PKTINFO) {
 		if (pktopt->ip6po_pktinfo)
 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
 		pktopt->ip6po_pktinfo = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPLIMIT)
 		pktopt->ip6po_hlim = -1;
 	if (optname == -1 || optname == IPV6_TCLASS)
 		pktopt->ip6po_tclass = -1;
 	if (optname == -1 || optname == IPV6_NEXTHOP) {
 		if (pktopt->ip6po_nextroute.ro_rt) {
 			RTFREE(pktopt->ip6po_nextroute.ro_rt);
 			pktopt->ip6po_nextroute.ro_rt = NULL;
 		}
 		if (pktopt->ip6po_nexthop)
 			free(pktopt->ip6po_nexthop, M_IP6OPT);
 		pktopt->ip6po_nexthop = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPOPTS) {
 		if (pktopt->ip6po_hbh)
 			free(pktopt->ip6po_hbh, M_IP6OPT);
 		pktopt->ip6po_hbh = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
 		if (pktopt->ip6po_dest1)
 			free(pktopt->ip6po_dest1, M_IP6OPT);
 		pktopt->ip6po_dest1 = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDR) {
 		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
 		if (pktopt->ip6po_route.ro_rt) {
 			RTFREE(pktopt->ip6po_route.ro_rt);
 			pktopt->ip6po_route.ro_rt = NULL;
 		}
 	}
 	if (optname == -1 || optname == IPV6_DSTOPTS) {
 		if (pktopt->ip6po_dest2)
 			free(pktopt->ip6po_dest2, M_IP6OPT);
 		pktopt->ip6po_dest2 = NULL;
 	}
 }
 
 #define PKTOPT_EXTHDRCPY(type) \
 do {\
 	if (src->type) {\
 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
 		if (dst->type == NULL && canwait == M_NOWAIT)\
 			goto bad;\
 		bcopy(src->type, dst->type, hlen);\
 	}\
 } while (/*CONSTCOND*/ 0)
 
 static int
 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
 {
 	if (dst == NULL || src == NULL)  {
 		printf("ip6_clearpktopts: invalid argument\n");
 		return (EINVAL);
 	}
 
 	dst->ip6po_hlim = src->ip6po_hlim;
 	dst->ip6po_tclass = src->ip6po_tclass;
 	dst->ip6po_flags = src->ip6po_flags;
 	dst->ip6po_minmtu = src->ip6po_minmtu;
 	dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
 	if (src->ip6po_pktinfo) {
 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_pktinfo == NULL)
 			goto bad;
 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
 	}
 	if (src->ip6po_nexthop) {
 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_nexthop == NULL)
 			goto bad;
 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
 		    src->ip6po_nexthop->sa_len);
 	}
 	PKTOPT_EXTHDRCPY(ip6po_hbh);
 	PKTOPT_EXTHDRCPY(ip6po_dest1);
 	PKTOPT_EXTHDRCPY(ip6po_dest2);
 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
 	return (0);
 
   bad:
 	ip6_clearpktopts(dst, -1);
 	return (ENOBUFS);
 }
 #undef PKTOPT_EXTHDRCPY
 
 struct ip6_pktopts *
 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
 {
 	int error;
 	struct ip6_pktopts *dst;
 
 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
 	if (dst == NULL)
 		return (NULL);
 	ip6_initpktopts(dst);
 
 	if ((error = copypktopts(dst, src, canwait)) != 0) {
 		free(dst, M_IP6OPT);
 		return (NULL);
 	}
 
 	return (dst);
 }
 
 void
 ip6_freepcbopts(struct ip6_pktopts *pktopt)
 {
 	if (pktopt == NULL)
 		return;
 
 	ip6_clearpktopts(pktopt, -1);
 
 	free(pktopt, M_IP6OPT);
 }
 
 /*
  * Set IPv6 outgoing packet options based on advanced API.
  */
 int
 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
     struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
 {
 	struct cmsghdr *cm = NULL;
 
 	if (control == NULL || opt == NULL)
 		return (EINVAL);
 
 	ip6_initpktopts(opt);
 	if (stickyopt) {
 		int error;
 
 		/*
 		 * If stickyopt is provided, make a local copy of the options
 		 * for this particular packet, then override them by ancillary
 		 * objects.
 		 * XXX: copypktopts() does not copy the cached route to a next
 		 * hop (if any).  This is not very good in terms of efficiency,
 		 * but we can allow this since this option should be rarely
 		 * used.
 		 */
 		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
 			return (error);
 	}
 
 	/*
 	 * XXX: Currently, we assume all the optional information is stored
 	 * in a single mbuf.
 	 */
 	if (control->m_next)
 		return (EINVAL);
 
 	for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
 	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 		int error;
 
 		if (control->m_len < CMSG_LEN(0))
 			return (EINVAL);
 
 		cm = mtod(control, struct cmsghdr *);
 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
 			return (EINVAL);
 		if (cm->cmsg_level != IPPROTO_IPV6)
 			continue;
 
 		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
 		    cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Set a particular packet option, as a sticky option or an ancillary data
  * item.  "len" can be 0 only when it's a sticky option.
  * We have 4 cases of combination of "sticky" and "cmsg":
  * "sticky=0, cmsg=0": impossible
  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
  * "sticky=1, cmsg=0": RFC3542 socket option
  * "sticky=1, cmsg=1": RFC2292 socket option
  */
 static int
 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
     struct ucred *cred, int sticky, int cmsg, int uproto)
 {
 	int minmtupolicy, preftemp;
 	int error;
 
 	if (!sticky && !cmsg) {
 #ifdef DIAGNOSTIC
 		printf("ip6_setpktopt: impossible case\n");
 #endif
 		return (EINVAL);
 	}
 
 	/*
 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
 	 * not be specified in the context of RFC3542.  Conversely,
 	 * RFC3542 types should not be specified in the context of RFC2292.
 	 */
 	if (!cmsg) {
 		switch (optname) {
 		case IPV6_2292PKTINFO:
 		case IPV6_2292HOPLIMIT:
 		case IPV6_2292NEXTHOP:
 		case IPV6_2292HOPOPTS:
 		case IPV6_2292DSTOPTS:
 		case IPV6_2292RTHDR:
 		case IPV6_2292PKTOPTIONS:
 			return (ENOPROTOOPT);
 		}
 	}
 	if (sticky && cmsg) {
 		switch (optname) {
 		case IPV6_PKTINFO:
 		case IPV6_HOPLIMIT:
 		case IPV6_NEXTHOP:
 		case IPV6_HOPOPTS:
 		case IPV6_DSTOPTS:
 		case IPV6_RTHDRDSTOPTS:
 		case IPV6_RTHDR:
 		case IPV6_USE_MIN_MTU:
 		case IPV6_DONTFRAG:
 		case IPV6_TCLASS:
 		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
 			return (ENOPROTOOPT);
 		}
 	}
 
 	switch (optname) {
 	case IPV6_2292PKTINFO:
 	case IPV6_PKTINFO:
 	{
 		struct ifnet *ifp = NULL;
 		struct in6_pktinfo *pktinfo;
 
 		if (len != sizeof(struct in6_pktinfo))
 			return (EINVAL);
 
 		pktinfo = (struct in6_pktinfo *)buf;
 
 		/*
 		 * An application can clear any sticky IPV6_PKTINFO option by
 		 * doing a "regular" setsockopt with ipi6_addr being
 		 * in6addr_any and ipi6_ifindex being zero.
 		 * [RFC 3542, Section 6]
 		 */
 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
 		    pktinfo->ipi6_ifindex == 0 &&
 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			ip6_clearpktopts(opt, optname);
 			break;
 		}
 
 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			return (EINVAL);
 		}
 		if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr))
 			return (EINVAL);
 		/* validate the interface index if specified. */
 		if (pktinfo->ipi6_ifindex > V_if_index)
 			 return (ENXIO);
 		if (pktinfo->ipi6_ifindex) {
 			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
 			if (ifp == NULL)
 				return (ENXIO);
 		}
 		if (ifp != NULL && (ifp->if_afdata[AF_INET6] == NULL ||
 		    (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0))
 			return (ENETDOWN);
 
 		if (ifp != NULL &&
 		    !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			struct in6_ifaddr *ia;
 
 			in6_setscope(&pktinfo->ipi6_addr, ifp, NULL);
 			ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr);
 			if (ia == NULL)
 				return (EADDRNOTAVAIL);
 			ifa_free(&ia->ia_ifa);
 		}
 		/*
 		 * We store the address anyway, and let in6_selectsrc()
 		 * validate the specified address.  This is because ipi6_addr
 		 * may not have enough information about its scope zone, and
 		 * we may need additional information (such as outgoing
 		 * interface or the scope zone of a destination address) to
 		 * disambiguate the scope.
 		 * XXX: the delay of the validation may confuse the
 		 * application when it is used as a sticky option.
 		 */
 		if (opt->ip6po_pktinfo == NULL) {
 			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
 			    M_IP6OPT, M_NOWAIT);
 			if (opt->ip6po_pktinfo == NULL)
 				return (ENOBUFS);
 		}
 		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
 		break;
 	}
 
 	case IPV6_2292HOPLIMIT:
 	case IPV6_HOPLIMIT:
 	{
 		int *hlimp;
 
 		/*
 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
 		 * to simplify the ordering among hoplimit options.
 		 */
 		if (optname == IPV6_HOPLIMIT && sticky)
 			return (ENOPROTOOPT);
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		hlimp = (int *)buf;
 		if (*hlimp < -1 || *hlimp > 255)
 			return (EINVAL);
 
 		opt->ip6po_hlim = *hlimp;
 		break;
 	}
 
 	case IPV6_TCLASS:
 	{
 		int tclass;
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		tclass = *(int *)buf;
 		if (tclass < -1 || tclass > 255)
 			return (EINVAL);
 
 		opt->ip6po_tclass = tclass;
 		break;
 	}
 
 	case IPV6_2292NEXTHOP:
 	case IPV6_NEXTHOP:
 		if (cred != NULL) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {	/* just remove the option */
 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
 			break;
 		}
 
 		/* check if cmsg_len is large enough for sa_len */
 		if (len < sizeof(struct sockaddr) || len < *buf)
 			return (EINVAL);
 
 		switch (((struct sockaddr *)buf)->sa_family) {
 		case AF_INET6:
 		{
 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
 			int error;
 
 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
 				return (EINVAL);
 
 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
 				return (EINVAL);
 			}
 			if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
 			    != 0) {
 				return (error);
 			}
 			break;
 		}
 		case AF_LINK:	/* should eventually be supported */
 		default:
 			return (EAFNOSUPPORT);
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
 		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_nexthop == NULL)
 			return (ENOBUFS);
 		bcopy(buf, opt->ip6po_nexthop, *buf);
 		break;
 
 	case IPV6_2292HOPOPTS:
 	case IPV6_HOPOPTS:
 	{
 		struct ip6_hbh *hbh;
 		int hbhlen;
 
 		/*
 		 * XXX: We don't allow a non-privileged user to set ANY HbH
 		 * options, since per-option restriction has too much
 		 * overhead.
 		 */
 		if (cred != NULL) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_hbh))
 			return (EINVAL);
 		hbh = (struct ip6_hbh *)buf;
 		hbhlen = (hbh->ip6h_len + 1) << 3;
 		if (len != hbhlen)
 			return (EINVAL);
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
 		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_hbh == NULL)
 			return (ENOBUFS);
 		bcopy(hbh, opt->ip6po_hbh, hbhlen);
 
 		break;
 	}
 
 	case IPV6_2292DSTOPTS:
 	case IPV6_DSTOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	{
 		struct ip6_dest *dest, **newdest = NULL;
 		int destlen;
 
 		if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, optname);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_dest))
 			return (EINVAL);
 		dest = (struct ip6_dest *)buf;
 		destlen = (dest->ip6d_len + 1) << 3;
 		if (len != destlen)
 			return (EINVAL);
 
 		/*
 		 * Determine the position that the destination options header
 		 * should be inserted; before or after the routing header.
 		 */
 		switch (optname) {
 		case IPV6_2292DSTOPTS:
 			/*
 			 * The old advacned API is ambiguous on this point.
 			 * Our approach is to determine the position based
 			 * according to the existence of a routing header.
 			 * Note, however, that this depends on the order of the
 			 * extension headers in the ancillary data; the 1st
 			 * part of the destination options header must appear
 			 * before the routing header in the ancillary data,
 			 * too.
 			 * RFC3542 solved the ambiguity by introducing
 			 * separate ancillary data or option types.
 			 */
 			if (opt->ip6po_rthdr == NULL)
 				newdest = &opt->ip6po_dest1;
 			else
 				newdest = &opt->ip6po_dest2;
 			break;
 		case IPV6_RTHDRDSTOPTS:
 			newdest = &opt->ip6po_dest1;
 			break;
 		case IPV6_DSTOPTS:
 			newdest = &opt->ip6po_dest2;
 			break;
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, optname);
 		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
 		if (*newdest == NULL)
 			return (ENOBUFS);
 		bcopy(dest, *newdest, destlen);
 
 		break;
 	}
 
 	case IPV6_2292RTHDR:
 	case IPV6_RTHDR:
 	{
 		struct ip6_rthdr *rth;
 		int rthlen;
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_RTHDR);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_rthdr))
 			return (EINVAL);
 		rth = (struct ip6_rthdr *)buf;
 		rthlen = (rth->ip6r_len + 1) << 3;
 		if (len != rthlen)
 			return (EINVAL);
 
 		switch (rth->ip6r_type) {
 		case IPV6_RTHDR_TYPE_0:
 			if (rth->ip6r_len == 0)	/* must contain one addr */
 				return (EINVAL);
 			if (rth->ip6r_len % 2) /* length must be even */
 				return (EINVAL);
 			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
 				return (EINVAL);
 			break;
 		default:
 			return (EINVAL);	/* not supported */
 		}
 
 		/* turn off the previous option */
 		ip6_clearpktopts(opt, IPV6_RTHDR);
 		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_rthdr == NULL)
 			return (ENOBUFS);
 		bcopy(rth, opt->ip6po_rthdr, rthlen);
 
 		break;
 	}
 
 	case IPV6_USE_MIN_MTU:
 		if (len != sizeof(int))
 			return (EINVAL);
 		minmtupolicy = *(int *)buf;
 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
 		    minmtupolicy != IP6PO_MINMTU_ALL) {
 			return (EINVAL);
 		}
 		opt->ip6po_minmtu = minmtupolicy;
 		break;
 
 	case IPV6_DONTFRAG:
 		if (len != sizeof(int))
 			return (EINVAL);
 
 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
 			/*
 			 * we ignore this option for TCP sockets.
 			 * (RFC3542 leaves this case unspecified.)
 			 */
 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
 		} else
 			opt->ip6po_flags |= IP6PO_DONTFRAG;
 		break;
 
 	case IPV6_PREFER_TEMPADDR:
 		if (len != sizeof(int))
 			return (EINVAL);
 		preftemp = *(int *)buf;
 		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
 		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
 		    preftemp != IP6PO_TEMPADDR_PREFER) {
 			return (EINVAL);
 		}
 		opt->ip6po_prefer_tempaddr = preftemp;
 		break;
 
 	default:
 		return (ENOPROTOOPT);
 	} /* end of switch */
 
 	return (0);
 }
 
 /*
  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be &loif -- easier than replicating that code here.
  */
 void
 ip6_mloopback(struct ifnet *ifp, struct mbuf *m)
 {
 	struct mbuf *copym;
 	struct ip6_hdr *ip6;
 
 	copym = m_copy(m, 0, M_COPYALL);
 	if (copym == NULL)
 		return;
 
 	/*
 	 * Make sure to deep-copy IPv6 header portion in case the data
 	 * is in an mbuf cluster, so that we can safely override the IPv6
 	 * header portion later.
 	 */
 	if (!M_WRITABLE(copym) ||
 	    copym->m_len < sizeof(struct ip6_hdr)) {
 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
 		if (copym == NULL)
 			return;
 	}
 	ip6 = mtod(copym, struct ip6_hdr *);
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 	if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 		copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 |
 		    CSUM_PSEUDO_HDR;
 		copym->m_pkthdr.csum_data = 0xffff;
 	}
 	if_simloop(ifp, copym, AF_INET6, 0);
 }
 
 /*
  * Chop IPv6 header off from the payload.
  */
 static int
 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (m->m_len > sizeof(*ip6)) {
 		mh = m_gethdr(M_NOWAIT, MT_DATA);
 		if (mh == NULL) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		m_move_pkthdr(mh, m);
 		M_ALIGN(mh, sizeof(*ip6));
 		m->m_len -= sizeof(*ip6);
 		m->m_data += sizeof(*ip6);
 		mh->m_next = m;
 		m = mh;
 		m->m_len = sizeof(*ip6);
 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
 	}
 	exthdrs->ip6e_ip6 = m;
 	return 0;
 }
 
 /*
  * Compute IPv6 extension header length.
  */
 int
 ip6_optlen(struct inpcb *in6p)
 {
 	int len;
 
 	if (!in6p->in6p_outputopts)
 		return 0;
 
 	len = 0;
 #define elen(x) \
     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
 
 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
 	if (in6p->in6p_outputopts->ip6po_rthdr)
 		/* dest1 is valid with rthdr only */
 		len += elen(in6p->in6p_outputopts->ip6po_dest1);
 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
 	return len;
 #undef elen
 }
Index: user/alc/PQ_LAUNDRY/sys/ofed/drivers/infiniband/core/cma.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/ofed/drivers/infiniband/core/cma.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/ofed/drivers/infiniband/core/cma.c	(revision 303642)
@@ -1,3865 +1,3888 @@
 /*
  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
  * Copyright (c) 2016 Chelsio Communications.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * General Public License (GPL) Version 2, available from the file
  * COPYING in the main directory of this source tree, or the
  * OpenIB.org BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
  *     conditions are met:
  *
  *      - Redistributions of source code must retain the above
  *        copyright notice, this list of conditions and the following
  *        disclaimer.
  *
  *      - Redistributions in binary form must reproduce the above
  *        copyright notice, this list of conditions and the following
  *        disclaimer in the documentation and/or other materials
  *        provided with the distribution.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 
 #define	LINUXKPI_PARAM_PREFIX ibcore_
 
 #include <linux/completion.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/idr.h>
 #include <linux/inetdevice.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/string.h>
 #include <net/route.h>
 
 #include <net/tcp.h>
 #include <net/ipv6.h>
 
 #include <rdma/rdma_cm.h>
 #include <rdma/rdma_cm_ib.h>
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
 #include <rdma/iw_cm.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
 MODULE_LICENSE("Dual BSD/GPL");
 
 #define CMA_CM_RESPONSE_TIMEOUT 20
 #define CMA_MAX_CM_RETRIES 15
 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
 #define CMA_IBOE_PACKET_LIFETIME 18
 
 static int cma_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
 module_param_named(cma_response_timeout, cma_response_timeout, int, 0644);
 MODULE_PARM_DESC(cma_response_timeout, "CMA_CM_RESPONSE_TIMEOUT (default=20)");
 
 static int def_prec2sl = 3;
 module_param_named(def_prec2sl, def_prec2sl, int, 0644);
 MODULE_PARM_DESC(def_prec2sl, "Default value for SL priority with RoCE. Valid values 0 - 7");
 
 static int unify_tcp_port_space = 1;
 module_param(unify_tcp_port_space, int, 0644);
 MODULE_PARM_DESC(unify_tcp_port_space, "Unify the host TCP and RDMA port "
 		 "space allocation (default=1)");
 
 static int debug_level = 0;
 #define cma_pr(level, priv, format, arg...)		\
 	printk(level "CMA: %p: %s: " format, ((struct rdma_id_priv *) priv) , __func__, ## arg)
 
 #define cma_dbg(priv, format, arg...)		\
 	do { if (debug_level) cma_pr(KERN_DEBUG, priv, format, ## arg); } while (0)
 
 #define cma_warn(priv, format, arg...)		\
 	cma_pr(KERN_WARNING, priv, format, ## arg)
 
 #define CMA_GID_FMT        "%2.2x%2.2x:%2.2x%2.2x"
 #define CMA_GID_RAW_ARG(gid) ((u8 *)(gid))[12],\
 				   ((u8 *)(gid))[13],\
 				   ((u8 *)(gid))[14],\
 				   ((u8 *)(gid))[15]
 
 #define CMA_GID_ARG(gid)   CMA_GID_RAW_ARG((gid).raw)
 #define cma_debug_path(priv, pfx, p) \
 	cma_dbg(priv, pfx "sgid=" CMA_GID_FMT ",dgid="	\
 		CMA_GID_FMT "\n", CMA_GID_ARG(p.sgid),	\
 		CMA_GID_ARG(p.dgid))
 
 #define cma_debug_gid(priv, g) \
 	cma_dbg(priv, "gid=" CMA_GID_FMT "\n", CMA_GID_ARG(g)
 
 module_param_named(debug_level, debug_level, int, 0644);
 MODULE_PARM_DESC(debug_level, "debug level default=0");
 
 static void cma_add_one(struct ib_device *device);
 static void cma_remove_one(struct ib_device *device);
 
 static struct ib_client cma_client = {
 	.name   = "cma",
 	.add    = cma_add_one,
 	.remove = cma_remove_one
 };
 
 static struct ib_sa_client sa_client;
 static struct rdma_addr_client addr_client;
 static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
 static struct workqueue_struct *cma_wq;
 static struct workqueue_struct *cma_free_wq;
 static DEFINE_IDR(sdp_ps);
 static DEFINE_IDR(tcp_ps);
 static DEFINE_IDR(udp_ps);
 static DEFINE_IDR(ipoib_ps);
 static DEFINE_IDR(ib_ps);
 
 struct cma_device {
 	struct list_head	list;
 	struct ib_device	*device;
 	struct completion	comp;
 	atomic_t		refcount;
 	struct list_head	id_list;
 };
 
 struct rdma_bind_list {
 	struct idr		*ps;
 	struct hlist_head	owners;
 	unsigned short		port;
 };
 
 enum {
 	CMA_OPTION_AFONLY,
 };
 
 /*
  * Device removal can occur at anytime, so we need extra handling to
  * serialize notifying the user of device removal with other callbacks.
  * We do this by disabling removal notification while a callback is in process,
  * and reporting it after the callback completes.
  */
 struct rdma_id_private {
 	struct rdma_cm_id	id;
 
 	struct rdma_bind_list	*bind_list;
 	struct socket           *sock;
 	struct hlist_node	node;
 	struct list_head	list; /* listen_any_list or cma_device.list */
 	struct list_head	listen_list; /* per device listens */
 	struct cma_device	*cma_dev;
 	struct list_head	mc_list;
 
 	int			internal_id;
 	enum rdma_cm_state	state;
 	spinlock_t		lock;
 	spinlock_t		cm_lock;
 	struct mutex		qp_mutex;
 
 	struct completion	comp;
 	atomic_t		refcount;
 	struct mutex		handler_mutex;
 	struct work_struct	work;  /* garbage coll */
 
 	int			backlog;
 	int			timeout_ms;
 	struct ib_sa_query	*query;
 	int			query_id;
 	union {
 		struct ib_cm_id	*ib;
 		struct iw_cm_id	*iw;
 	} cm_id;
 
 	u32			seq_num;
 	u32			qkey;
 	u32			qp_num;
 	pid_t			owner;
 	u32			options;
 	u8			srq;
 	u8			tos;
 	u8			reuseaddr;
 	u8			afonly;
 	int			qp_timeout;
 	/* cache for mc record params */
 	struct ib_sa_mcmember_rec rec;
 	int is_valid_rec;
 };
 
 struct cma_multicast {
 	struct rdma_id_private *id_priv;
 	union {
 		struct ib_sa_multicast *ib;
 	} multicast;
 	struct list_head	list;
 	void			*context;
 	struct sockaddr_storage	addr;
 	struct kref		mcref;
 };
 
 struct cma_work {
 	struct work_struct	work;
 	struct rdma_id_private	*id;
 	enum rdma_cm_state	old_state;
 	enum rdma_cm_state	new_state;
 	struct rdma_cm_event	event;
 };
 
 struct cma_ndev_work {
 	struct work_struct	work;
 	struct rdma_id_private	*id;
 	struct rdma_cm_event	event;
 };
 
 struct iboe_mcast_work {
 	struct work_struct	 work;
 	struct rdma_id_private	*id;
 	struct cma_multicast	*mc;
 };
 
 union cma_ip_addr {
 	struct in6_addr ip6;
 	struct {
 		__be32 pad[3];
 		__be32 addr;
 	} ip4;
 };
 
 struct cma_hdr {
 	u8 cma_version;
 	u8 ip_version;	/* IP version: 7:4 */
 	__be16 port;
 	union cma_ip_addr src_addr;
 	union cma_ip_addr dst_addr;
 };
 
 struct sdp_hh {
 	u8 bsdh[16];
 	u8 sdp_version; /* Major version: 7:4 */
 	u8 ip_version;	/* IP version: 7:4 */
 	u8 sdp_specific1[10];
 	__be16 port;
 	__be16 sdp_specific2;
 	union cma_ip_addr src_addr;
 	union cma_ip_addr dst_addr;
 };
 
 struct sdp_hah {
 	u8 bsdh[16];
 	u8 sdp_version;
 };
 
 #define CMA_VERSION 0x00
 #define SDP_MAJ_VERSION 0x2
 
 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
 {
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&id_priv->lock, flags);
 	ret = (id_priv->state == comp);
 	spin_unlock_irqrestore(&id_priv->lock, flags);
 	return ret;
 }
 
 static int cma_comp_exch(struct rdma_id_private *id_priv,
 			 enum rdma_cm_state comp, enum rdma_cm_state exch)
 {
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&id_priv->lock, flags);
 	if ((ret = (id_priv->state == comp)))
 		id_priv->state = exch;
 	spin_unlock_irqrestore(&id_priv->lock, flags);
 	return ret;
 }
 
 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
 				   enum rdma_cm_state exch)
 {
 	unsigned long flags;
 	enum rdma_cm_state old;
 
 	spin_lock_irqsave(&id_priv->lock, flags);
 	old = id_priv->state;
 	id_priv->state = exch;
 	spin_unlock_irqrestore(&id_priv->lock, flags);
 	return old;
 }
 
 static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
 {
 	return hdr->ip_version >> 4;
 }
 
 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
 {
 	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
 }
 
 static inline u8 sdp_get_majv(u8 sdp_version)
 {
 	return sdp_version >> 4;
 }
 
 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
 {
 	return hh->ip_version >> 4;
 }
 
 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
 {
 	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
 }
 
 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
 			      struct cma_device *cma_dev)
 {
 	atomic_inc(&cma_dev->refcount);
 	id_priv->cma_dev = cma_dev;
 	id_priv->id.device = cma_dev->device;
 	id_priv->id.route.addr.dev_addr.transport =
 		rdma_node_get_transport(cma_dev->device->node_type);
 	list_add_tail(&id_priv->list, &cma_dev->id_list);
 }
 
 static inline void cma_deref_dev(struct cma_device *cma_dev)
 {
 	if (atomic_dec_and_test(&cma_dev->refcount))
 		complete(&cma_dev->comp);
 }
 
 static inline void release_mc(struct kref *kref)
 {
 	struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
 
 	kfree(mc->multicast.ib);
 	kfree(mc);
 }
 
 static void cma_release_dev(struct rdma_id_private *id_priv)
 {
 	mutex_lock(&lock);
 	list_del(&id_priv->list);
 	cma_deref_dev(id_priv->cma_dev);
 	id_priv->cma_dev = NULL;
 	mutex_unlock(&lock);
 }
 
 static int cma_set_qkey(struct rdma_id_private *id_priv)
 {
 	struct ib_sa_mcmember_rec rec;
 	int ret = 0;
 
 	if (id_priv->qkey)
 		return 0;
 
 	switch (id_priv->id.ps) {
 	case RDMA_PS_UDP:
 		id_priv->qkey = RDMA_UDP_QKEY;
 		break;
 	case RDMA_PS_IPOIB:
 		ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
 		ret = ib_sa_get_mcmember_rec(id_priv->id.device,
 					     id_priv->id.port_num, &rec.mgid,
 					     &rec);
 		if (!ret)
 			id_priv->qkey = be32_to_cpu(rec.qkey);
 		break;
 	default:
 		break;
 	}
 	return ret;
 }
 
 static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
 {
 	int i;
 	int err;
 	struct ib_port_attr props;
 	union ib_gid tmp;
 
 	err = ib_query_port(device, port_num, &props);
 	if (err)
 		return 1;
 
 	for (i = 0; i < props.gid_tbl_len; ++i) {
 		err = ib_query_gid(device, port_num, i, &tmp);
 		if (err)
 			return 1;
 		if (!memcmp(&tmp, gid, sizeof tmp))
 			return 0;
 	}
 
 	return -EAGAIN;
 }
 
 int
 rdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type,
 							void **cm_id)
 {
 	int ret;
 	u8 port;
 	int found_dev = 0, found_cmid = 0;
 	struct rdma_id_private  *id_priv;
 	struct rdma_id_private  *dev_id_priv;
 	struct cma_device	*cma_dev;
 	struct rdma_dev_addr	dev_addr;
 	union ib_gid		gid;
 	enum rdma_link_layer dev_ll = dev_type == ARPHRD_INFINIBAND ?
 		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 
 	memset(&dev_addr, 0, sizeof(dev_addr));
 
 	ret = rdma_translate_ip((struct sockaddr *)local_addr,
 							&dev_addr, NULL);
 	if (ret)
 		goto err;
 
 	/* find rdma device based on MAC address/gid */
 	mutex_lock(&lock);
 
 	memcpy(&gid, dev_addr.src_dev_addr +
 	       rdma_addr_gid_offset(&dev_addr), sizeof(gid));
 
 	list_for_each_entry(cma_dev, &dev_list, list)
 		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port)
 			if ((rdma_port_get_link_layer(cma_dev->device, port) ==
 								 dev_ll) &&
 			 (rdma_node_get_transport(cma_dev->device->node_type) ==
 							RDMA_TRANSPORT_IWARP)) {
 					ret = find_gid_port(cma_dev->device,
 								&gid, port);
 					if (!ret) {
 						found_dev = 1;
 						goto out;
 					} else if (ret == 1) {
 						mutex_unlock(&lock);
 						goto err;
 					}
 			}
 out:
 	mutex_unlock(&lock);
 
 	if (!found_dev)
 		goto err;
 
 	/* Traverse through the list of listening cm_id's to find the
 	 * desired cm_id based on rdma device & port number.
 	 */
 	list_for_each_entry(id_priv, &listen_any_list, list)
 		list_for_each_entry(dev_id_priv, &id_priv->listen_list,
 						 listen_list)
 			if (dev_id_priv->cma_dev == cma_dev)
 				if (dev_id_priv->cm_id.iw->local_addr.sin_port
 						== local_addr->sin_port) {
 					*cm_id = (void *)dev_id_priv->cm_id.iw;
 					found_cmid = 1;
 				}
 	return found_cmid ? 0 : -ENODEV;
 
 err:
 	return -ENODEV;
 }
 EXPORT_SYMBOL(rdma_find_cmid_laddr);
 
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv,
+			   struct rdma_id_private *listen_id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	struct cma_device *cma_dev;
 	union ib_gid gid, iboe_gid;
 	int ret = -ENODEV;
-	u8 port;
+	u8 port, found_port;
 	enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
 		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 
 	if (dev_ll != IB_LINK_LAYER_INFINIBAND &&
 	    id_priv->id.ps == RDMA_PS_IPOIB)
 		return -EINVAL;
 
 	mutex_lock(&lock);
 	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
 		    &iboe_gid);
 
 	memcpy(&gid, dev_addr->src_dev_addr +
 	       rdma_addr_gid_offset(dev_addr), sizeof gid);
+	if (listen_id_priv &&
+	    rdma_port_get_link_layer(listen_id_priv->id.device,
+				     listen_id_priv->id.port_num) == dev_ll) {
+		cma_dev = listen_id_priv->cma_dev;
+		port = listen_id_priv->id.port_num;
+		if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
+		    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+			ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
+						 &found_port, NULL);
+		else
+			ret = ib_find_cached_gid(cma_dev->device, &gid,
+						 &found_port, NULL);
+
+		if (!ret && (port  == found_port)) {
+			id_priv->id.port_num = found_port;
+			goto out;
+		}
+	}
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+			if (listen_id_priv &&
+			    listen_id_priv->cma_dev == cma_dev &&
+			    listen_id_priv->id.port_num == port)
+				continue;
 			if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
 				if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
 				    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
-					ret = find_gid_port(cma_dev->device, &iboe_gid, port);
+					ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
 				else
-					ret = find_gid_port(cma_dev->device, &gid, port);
+					ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
 
-				if (!ret) {
+				if (!ret && (port == found_port)) {
 					id_priv->id.port_num = port;
 					goto out;
 				} else if (ret == 1)
-			break;
-	}
+					break;
+			}
 		}
 	}
 
 out:
 	if (!ret)
 		cma_attach_to_dev(id_priv, cma_dev);
 
 	mutex_unlock(&lock);
 	return ret;
 }
 
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
 	if (atomic_dec_and_test(&id_priv->refcount))
 		complete(&id_priv->comp);
 }
 
 static int cma_disable_callback(struct rdma_id_private *id_priv,
 				enum rdma_cm_state state)
 {
 	mutex_lock(&id_priv->handler_mutex);
 	if (id_priv->state != state) {
 		mutex_unlock(&id_priv->handler_mutex);
 		return -EINVAL;
 	}
 	return 0;
 }
 
 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
 				  void *context, enum rdma_port_space ps,
 				  enum ib_qp_type qp_type)
 {
 	struct rdma_id_private *id_priv;
 
 	id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL);
 	if (!id_priv)
 		return ERR_PTR(-ENOMEM);
 
 	id_priv->owner = curthread->td_proc->p_pid;
 	id_priv->state = RDMA_CM_IDLE;
 	id_priv->id.context = context;
 	id_priv->id.event_handler = event_handler;
 	id_priv->id.ps = ps;
 	id_priv->id.qp_type = qp_type;
 	spin_lock_init(&id_priv->lock);
 	spin_lock_init(&id_priv->cm_lock);
 	mutex_init(&id_priv->qp_mutex);
 	init_completion(&id_priv->comp);
 	atomic_set(&id_priv->refcount, 1);
 	mutex_init(&id_priv->handler_mutex);
 	INIT_LIST_HEAD(&id_priv->listen_list);
 	INIT_LIST_HEAD(&id_priv->mc_list);
 	get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
 
 	return &id_priv->id;
 }
 EXPORT_SYMBOL(rdma_create_id);
 
 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 {
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 
 	qp_attr.qp_state = IB_QPS_INIT;
 	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
 		return ret;
 
 	ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
 	if (ret)
 		return ret;
 
 	qp_attr.qp_state = IB_QPS_RTR;
 	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
 	if (ret)
 		return ret;
 
 	qp_attr.qp_state = IB_QPS_RTS;
 	qp_attr.sq_psn = 0;
 	ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
 
 	return ret;
 }
 
 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 {
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 
 	qp_attr.qp_state = IB_QPS_INIT;
 	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
 		return ret;
 
 	return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
 }
 
 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
 		   struct ib_qp_init_attr *qp_init_attr)
 {
 	struct rdma_id_private *id_priv;
 	struct ib_qp *qp;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (id->device != pd->device)
 		return -EINVAL;
 
 	qp = ib_create_qp(pd, qp_init_attr);
 	if (IS_ERR(qp))
 		return PTR_ERR(qp);
 
 	if (id->qp_type == IB_QPT_UD)
 		ret = cma_init_ud_qp(id_priv, qp);
 	else
 		ret = cma_init_conn_qp(id_priv, qp);
 	if (ret)
 		goto err;
 
 	id->qp = qp;
 	id_priv->qp_num = qp->qp_num;
 	id_priv->srq = (qp->srq != NULL);
 	return 0;
 err:
 	ib_destroy_qp(qp);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_create_qp);
 
 void rdma_destroy_qp(struct rdma_cm_id *id)
 {
 	struct rdma_id_private *id_priv;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	mutex_lock(&id_priv->qp_mutex);
 	ib_destroy_qp(id_priv->id.qp);
 	id_priv->id.qp = NULL;
 	mutex_unlock(&id_priv->qp_mutex);
 }
 EXPORT_SYMBOL(rdma_destroy_qp);
 
 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
 			     struct rdma_conn_param *conn_param)
 {
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 	union ib_gid sgid;
 
 	mutex_lock(&id_priv->qp_mutex);
 	if (!id_priv->id.qp) {
 		ret = 0;
 		goto out;
 	}
 
 	/* Need to update QP attributes from default values. */
 	qp_attr.qp_state = IB_QPS_INIT;
 	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
 		goto out;
 
 	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 	if (ret)
 		goto out;
 
 	qp_attr.qp_state = IB_QPS_RTR;
 	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
 		goto out;
 	ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
 			   qp_attr.ah_attr.grh.sgid_index, &sgid);
 	if (ret)
 		goto out;
 
 	if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
 	    == RDMA_TRANSPORT_IB &&
 	    rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
 	    == IB_LINK_LAYER_ETHERNET) {
 		u32 scope_id = rdma_get_ipv6_scope_id(id_priv->id.device,
 		    id_priv->id.port_num);
 
 		ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL,
 		    scope_id);
 		if (ret)
 			goto out;
 	}
 
 	if (conn_param)
 		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
 	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 out:
 	mutex_unlock(&id_priv->qp_mutex);
 	return ret;
 }
 
 static int cma_modify_qp_rts(struct rdma_id_private *id_priv,
 			     struct rdma_conn_param *conn_param)
 {
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 
 	mutex_lock(&id_priv->qp_mutex);
 	if (!id_priv->id.qp) {
 		ret = 0;
 		goto out;
 	}
 
 	qp_attr.qp_state = IB_QPS_RTS;
 	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
 	if (ret)
 		goto out;
 
 	if (conn_param)
 		qp_attr.max_rd_atomic = conn_param->initiator_depth;
 
 	if (id_priv->qp_timeout && id_priv->id.qp->qp_type == IB_QPT_RC) {
 		qp_attr.timeout = id_priv->qp_timeout;
 		qp_attr_mask |= IB_QP_TIMEOUT;
 	}
 
 	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 out:
 	mutex_unlock(&id_priv->qp_mutex);
 	return ret;
 }
 
 static int cma_modify_qp_err(struct rdma_id_private *id_priv)
 {
 	struct ib_qp_attr qp_attr;
 	int ret;
 
 	mutex_lock(&id_priv->qp_mutex);
 	if (!id_priv->id.qp) {
 		ret = 0;
 		goto out;
 	}
 
 	qp_attr.qp_state = IB_QPS_ERR;
 	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
 out:
 	mutex_unlock(&id_priv->qp_mutex);
 	return ret;
 }
 
 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
 			       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	int ret;
 	u16 pkey;
 
 	if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
 	    IB_LINK_LAYER_INFINIBAND)
 		pkey = ib_addr_get_pkey(dev_addr);
 	else
 		pkey = 0xffff;
 
 	ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
 				  pkey, &qp_attr->pkey_index);
 	if (ret)
 		return ret;
 
 	qp_attr->port_num = id_priv->id.port_num;
 	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
 
 	if (id_priv->id.qp_type == IB_QPT_UD) {
 		ret = cma_set_qkey(id_priv);
 		if (ret)
 			return ret;
 
 		qp_attr->qkey = id_priv->qkey;
 		*qp_attr_mask |= IB_QP_QKEY;
 	} else {
 		qp_attr->qp_access_flags = 0;
 		*qp_attr_mask |= IB_QP_ACCESS_FLAGS;
 	}
 	return 0;
 }
 
 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 		       int *qp_attr_mask)
 {
 	struct rdma_id_private *id_priv;
 	int ret = 0;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
 	case RDMA_TRANSPORT_IB:
 		if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))
 			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
 		else
 			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
 						 qp_attr_mask);
 		if (qp_attr->qp_state == IB_QPS_RTR)
 			qp_attr->rq_psn = id_priv->seq_num;
 		break;
 	case RDMA_TRANSPORT_IWARP:
 	case RDMA_TRANSPORT_SCIF:
 		if (!id_priv->cm_id.iw) {
 			qp_attr->qp_access_flags = 0;
 			*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
 		} else
 			ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
 						 qp_attr_mask);
 		break;
 	default:
 		ret = -ENOSYS;
 		break;
 	}
 
 	return ret;
 }
 EXPORT_SYMBOL(rdma_init_qp_attr);
 
 static inline int cma_zero_addr(struct sockaddr *addr)
 {
 	struct in6_addr *ip6;
 
 	if (addr->sa_family == AF_INET)
 		return ipv4_is_zeronet(
 			((struct sockaddr_in *)addr)->sin_addr.s_addr);
 	else {
 		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
 		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
 			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
 	}
 }
 
 static inline int cma_loopback_addr(struct sockaddr *addr)
 {
 	if (addr->sa_family == AF_INET)
 		return ipv4_is_loopback(
 			((struct sockaddr_in *) addr)->sin_addr.s_addr);
 	else
 		return ipv6_addr_loopback(
 			&((struct sockaddr_in6 *) addr)->sin6_addr);
 }
 
 static inline int cma_any_addr(struct sockaddr *addr)
 {
 	return cma_zero_addr(addr) || cma_loopback_addr(addr);
 }
 int
 rdma_cma_any_addr(struct sockaddr *addr)
 {
 	return cma_any_addr(addr);
 }
 EXPORT_SYMBOL(rdma_cma_any_addr);
 
 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
 {
 	if (src->sa_family != dst->sa_family)
 		return -1;
 
 	switch (src->sa_family) {
 	case AF_INET:
 		return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
 		       ((struct sockaddr_in *) dst)->sin_addr.s_addr;
 	default:
 		return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
 				     &((struct sockaddr_in6 *) dst)->sin6_addr);
 	}
 }
 
 static inline __be16 cma_port(struct sockaddr *addr)
 {
 	if (addr->sa_family == AF_INET)
 		return ((struct sockaddr_in *) addr)->sin_port;
 	else
 		return ((struct sockaddr_in6 *) addr)->sin6_port;
 }
 
 static inline int cma_any_port(struct sockaddr *addr)
 {
 	return !cma_port(addr);
 }
 
 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
 			    u8 *ip_ver, __be16 *port,
 			    union cma_ip_addr **src, union cma_ip_addr **dst)
 {
 	switch (ps) {
 	case RDMA_PS_SDP:
 		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
 		    SDP_MAJ_VERSION)
 			return -EINVAL;
 
 		*ip_ver	= sdp_get_ip_ver(hdr);
 		*port	= ((struct sdp_hh *) hdr)->port;
 		*src	= &((struct sdp_hh *) hdr)->src_addr;
 		*dst	= &((struct sdp_hh *) hdr)->dst_addr;
 		break;
 	default:
 		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
 			return -EINVAL;
 
 		*ip_ver	= cma_get_ip_ver(hdr);
 		*port	= ((struct cma_hdr *) hdr)->port;
 		*src	= &((struct cma_hdr *) hdr)->src_addr;
 		*dst	= &((struct cma_hdr *) hdr)->dst_addr;
 		break;
 	}
 
 	if (*ip_ver != 4 && *ip_ver != 6)
 		return -EINVAL;
 	return 0;
 }
 
 static void cma_save_net_info(struct rdma_addr *addr,
 			      struct rdma_addr *listen_addr,
 			      u8 ip_ver, __be16 port,
 			      union cma_ip_addr *src, union cma_ip_addr *dst)
 {
 	struct sockaddr_in *listen4, *ip4;
 	struct sockaddr_in6 *listen6, *ip6;
 
 	switch (ip_ver) {
 	case 4:
 		listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
 		ip4 = (struct sockaddr_in *) &addr->src_addr;
 		ip4->sin_family = listen4->sin_family;
 		ip4->sin_addr.s_addr = dst->ip4.addr;
 		ip4->sin_port = listen4->sin_port;
 		ip4->sin_len = sizeof(struct sockaddr_in);
 
 		ip4 = (struct sockaddr_in *) &addr->dst_addr;
 		ip4->sin_family = listen4->sin_family;
 		ip4->sin_addr.s_addr = src->ip4.addr;
 		ip4->sin_port = port;
 		ip4->sin_len = sizeof(struct sockaddr_in);
 		break;
 	case 6:
 		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
 		ip6 = (struct sockaddr_in6 *) &addr->src_addr;
 		ip6->sin6_family = listen6->sin6_family;
 		ip6->sin6_addr = dst->ip6;
 		ip6->sin6_port = listen6->sin6_port;
 		ip6->sin6_len = sizeof(struct sockaddr_in6);
 		ip6->sin6_scope_id = listen6->sin6_scope_id;
 
 		ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
 		ip6->sin6_family = listen6->sin6_family;
 		ip6->sin6_addr = src->ip6;
 		ip6->sin6_port = port;
 		ip6->sin6_len = sizeof(struct sockaddr_in6);
 		ip6->sin6_scope_id = listen6->sin6_scope_id;
 		break;
 	default:
 		break;
 	}
 }
 
 static inline int cma_user_data_offset(enum rdma_port_space ps)
 {
 	switch (ps) {
 	case RDMA_PS_SDP:
 		return 0;
 	default:
 		return sizeof(struct cma_hdr);
 	}
 }
 
 static void cma_cancel_route(struct rdma_id_private *id_priv)
 {
 	switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
 	case IB_LINK_LAYER_INFINIBAND:
 		if (id_priv->query)
 			ib_sa_cancel_query(id_priv->query_id, id_priv->query);
 		break;
 	default:
 		break;
 	}
 }
 
 static void cma_cancel_listens(struct rdma_id_private *id_priv)
 {
 	struct rdma_id_private *dev_id_priv;
 
 	/*
 	 * Remove from listen_any_list to prevent added devices from spawning
 	 * additional listen requests.
 	 */
 	mutex_lock(&lock);
 	list_del(&id_priv->list);
 
 	while (!list_empty(&id_priv->listen_list)) {
 		dev_id_priv = list_entry(id_priv->listen_list.next,
 					 struct rdma_id_private, listen_list);
 		/* sync with device removal to avoid duplicate destruction */
 		list_del_init(&dev_id_priv->list);
 		list_del(&dev_id_priv->listen_list);
 		mutex_unlock(&lock);
 
 		rdma_destroy_id(&dev_id_priv->id);
 		mutex_lock(&lock);
 	}
 	mutex_unlock(&lock);
 }
 
 static void cma_cancel_operation(struct rdma_id_private *id_priv,
 				 enum rdma_cm_state state)
 {
 	switch (state) {
 	case RDMA_CM_ADDR_QUERY:
 		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
 		break;
 	case RDMA_CM_ROUTE_QUERY:
 		cma_cancel_route(id_priv);
 		break;
 	case RDMA_CM_LISTEN:
 		if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)
 				&& !id_priv->cma_dev)
 			cma_cancel_listens(id_priv);
 		break;
 	default:
 		break;
 	}
 }
 
 static void cma_release_port(struct rdma_id_private *id_priv)
 {
 	struct rdma_bind_list *bind_list;
 
 	mutex_lock(&lock);
 	bind_list = id_priv->bind_list;
 	if (!bind_list) {
 		mutex_unlock(&lock);
 		return;
 	}
 	hlist_del(&id_priv->node);
 	id_priv->bind_list = NULL;
 	if (hlist_empty(&bind_list->owners)) {
 		idr_remove(bind_list->ps, bind_list->port);
 		kfree(bind_list);
 	}
 	mutex_unlock(&lock);
 	if (id_priv->sock)
 		sock_release(id_priv->sock);
 }
 
 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
 {
 	struct cma_multicast *mc;
 
 	while (!list_empty(&id_priv->mc_list)) {
 		mc = container_of(id_priv->mc_list.next,
 				  struct cma_multicast, list);
 		list_del(&mc->list);
 		switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
 		case IB_LINK_LAYER_INFINIBAND:
 			ib_sa_free_multicast(mc->multicast.ib);
 			kfree(mc);
 			break;
 		case IB_LINK_LAYER_ETHERNET:
 			kref_put(&mc->mcref, release_mc);
 			break;
 		default:
 			break;
 		}
 	}
 }
 static void __rdma_free(struct work_struct *work)
 {
 	struct rdma_id_private *id_priv;
 	id_priv = container_of(work, struct rdma_id_private, work);
 
 	wait_for_completion(&id_priv->comp);
 
 	if (id_priv->internal_id)
 		cma_deref_id(id_priv->id.context);
 
 	kfree(id_priv->id.route.path_rec);
 	kfree(id_priv);
 }
 
 void rdma_destroy_id(struct rdma_cm_id *id)
 {
 	struct rdma_id_private *id_priv;
 	enum rdma_cm_state state;
 	unsigned long flags;
 	struct ib_cm_id *ib;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	state = cma_exch(id_priv, RDMA_CM_DESTROYING);
 	cma_cancel_operation(id_priv, state);
 
 	/*
 	 * Wait for any active callback to finish.  New callbacks will find
 	 * the id_priv state set to destroying and abort.
 	 */
 	mutex_lock(&id_priv->handler_mutex);
 	mutex_unlock(&id_priv->handler_mutex);
 
 	if (id_priv->cma_dev) {
 		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
 		case RDMA_TRANSPORT_IB:
 			spin_lock_irqsave(&id_priv->cm_lock, flags);
 			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) {
 				ib = id_priv->cm_id.ib;
 				id_priv->cm_id.ib = NULL;
 				spin_unlock_irqrestore(&id_priv->cm_lock, flags);
 				ib_destroy_cm_id(ib);
 			} else
 				spin_unlock_irqrestore(&id_priv->cm_lock, flags);
 			break;
 		case RDMA_TRANSPORT_IWARP:
 		case RDMA_TRANSPORT_SCIF:
 			if (id_priv->cm_id.iw)
 				iw_destroy_cm_id(id_priv->cm_id.iw);
 			break;
 		default:
 			break;
 		}
 		cma_leave_mc_groups(id_priv);
 		cma_release_dev(id_priv);
 	}
 
 	cma_release_port(id_priv);
 	cma_deref_id(id_priv);
 	INIT_WORK(&id_priv->work, __rdma_free);
 	queue_work(cma_free_wq, &id_priv->work);
 }
 EXPORT_SYMBOL(rdma_destroy_id);
 
 static int cma_rep_recv(struct rdma_id_private *id_priv)
 {
 	int ret;
 
 	ret = cma_modify_qp_rtr(id_priv, NULL);
 	if (ret)
 		goto reject;
 
 	ret = cma_modify_qp_rts(id_priv, NULL);
 	if (ret)
 		goto reject;
 
 	cma_dbg(id_priv, "sending RTU\n");
 	ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
 	if (ret)
 		goto reject;
 
 	return 0;
 reject:
 	cma_modify_qp_err(id_priv);
 	cma_dbg(id_priv, "sending REJ\n");
 	ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
 		       NULL, 0, NULL, 0);
 	return ret;
 }
 
 static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
 {
 	if (id_priv->id.ps == RDMA_PS_SDP &&
 	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
 	    SDP_MAJ_VERSION)
 		return -EINVAL;
 
 	return 0;
 }
 
 static void cma_set_rep_event_data(struct rdma_cm_event *event,
 				   struct ib_cm_rep_event_param *rep_data,
 				   void *private_data)
 {
 	event->param.conn.private_data = private_data;
 	event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
 	event->param.conn.responder_resources = rep_data->responder_resources;
 	event->param.conn.initiator_depth = rep_data->initiator_depth;
 	event->param.conn.flow_control = rep_data->flow_control;
 	event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
 	event->param.conn.srq = rep_data->srq;
 	event->param.conn.qp_num = rep_data->remote_qpn;
 }
 
 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *id_priv = cm_id->context;
 	struct rdma_cm_event event;
 	int ret = 0;
 
 	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
 		cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
 	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
 		cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
 		return 0;
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
 	case IB_CM_REQ_ERROR:
 	case IB_CM_REP_ERROR:
 		event.event = RDMA_CM_EVENT_UNREACHABLE;
 		event.status = -ETIMEDOUT;
 		break;
 	case IB_CM_REP_RECEIVED:
 		event.status = cma_verify_rep(id_priv, ib_event->private_data);
 		if (event.status)
 			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
 		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
 			event.status = cma_rep_recv(id_priv);
 			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
 						     RDMA_CM_EVENT_ESTABLISHED;
 		} else
 			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
 		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
 				       ib_event->private_data);
 		break;
 	case IB_CM_RTU_RECEIVED:
 	case IB_CM_USER_ESTABLISHED:
 		event.event = RDMA_CM_EVENT_ESTABLISHED;
 		break;
 	case IB_CM_DREQ_ERROR:
 		event.status = -ETIMEDOUT; /* fall through */
 	case IB_CM_DREQ_RECEIVED:
 	case IB_CM_DREP_RECEIVED:
 		if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
 				   RDMA_CM_DISCONNECT))
 			goto out;
 		event.event = RDMA_CM_EVENT_DISCONNECTED;
 		break;
 	case IB_CM_TIMEWAIT_EXIT:
 		event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT;
 		break;
 	case IB_CM_MRA_RECEIVED:
 		/* ignore event */
 		goto out;
 	case IB_CM_REJ_RECEIVED:
 		cma_modify_qp_err(id_priv);
 		event.status = ib_event->param.rej_rcvd.reason;
 		event.event = RDMA_CM_EVENT_REJECTED;
 		event.param.conn.private_data = ib_event->private_data;
 		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
 		break;
 	default:
 		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
 		       ib_event->event);
 		goto out;
 	}
 
 	ret = id_priv->id.event_handler(&id_priv->id, &event);
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.ib = NULL;
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return ret;
 	}
 out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
 }
 
 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 					       struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *id_priv;
 	struct rdma_cm_id *id;
 	struct rdma_route *rt;
 	union cma_ip_addr *src, *dst;
 	__be16 port;
 	u8 ip_ver;
 	int ret;
 
 	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
 			     &ip_ver, &port, &src, &dst))
 		return NULL;
 
 	id = rdma_create_id(listen_id->event_handler, listen_id->context,
 			    listen_id->ps, ib_event->param.req_rcvd.qp_type);
 	if (IS_ERR(id))
 		return NULL;
 
 	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
 			  ip_ver, port, src, dst);
 
 	rt = &id->route;
 	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
 	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
 			       GFP_KERNEL);
 	if (!rt->path_rec)
 		goto err;
 
 	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
 	if (rt->num_paths == 2)
 		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
 
 	if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
 		rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
 		rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
 		ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
 	} else {
 		ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
 					&rt->addr.dev_addr, NULL);
 		if (ret)
 			goto err;
 	}
 	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = RDMA_CM_CONNECT;
 	return id_priv;
 
 err:
 	rdma_destroy_id(id);
 	return NULL;
 }
 
 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
 					      struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *id_priv;
 	struct rdma_cm_id *id;
 	union cma_ip_addr *src, *dst;
 	__be16 port;
 	u8 ip_ver;
 	int ret;
 
 	id = rdma_create_id(listen_id->event_handler, listen_id->context,
 			    listen_id->ps, IB_QPT_UD);
 	if (IS_ERR(id))
 		return NULL;
 
 
 	if (cma_get_net_info(ib_event->private_data, listen_id->ps,
 			     &ip_ver, &port, &src, &dst))
 		goto err;
 
 	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
 			  ip_ver, port, src, dst);
 
 	if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
 		ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
 					&id->route.addr.dev_addr, NULL);
 		if (ret)
 			goto err;
 	}
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = RDMA_CM_CONNECT;
 	return id_priv;
 err:
 	rdma_destroy_id(id);
 	return NULL;
 }
 
 static void cma_set_req_event_data(struct rdma_cm_event *event,
 				   struct ib_cm_req_event_param *req_data,
 				   void *private_data, int offset)
 {
 	event->param.conn.private_data = private_data + offset;
 	event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
 	event->param.conn.responder_resources = req_data->responder_resources;
 	event->param.conn.initiator_depth = req_data->initiator_depth;
 	event->param.conn.flow_control = req_data->flow_control;
 	event->param.conn.retry_count = req_data->retry_count;
 	event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
 	event->param.conn.srq = req_data->srq;
 	event->param.conn.qp_num = req_data->remote_qpn;
 }
 
 static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
 {
 	return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
 		 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
 		((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
 		 (id->qp_type == IB_QPT_UD)) ||
 		(!id->qp_type));
 }
 
 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *listen_id, *conn_id;
 	struct rdma_cm_event event;
 	int offset, ret;
 	u8 smac[ETH_ALEN];
 	u8 alt_smac[ETH_ALEN];
 	u8 *psmac = smac;
 	u8 *palt_smac = alt_smac;
 	int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
 			RDMA_TRANSPORT_IB) &&
 		       (rdma_port_get_link_layer(cm_id->device,
 			ib_event->param.req_rcvd.port) ==
 			IB_LINK_LAYER_ETHERNET));
 	int is_sidr = 0;
 
 	listen_id = cm_id->context;
 	if (!cma_check_req_qp_type(&listen_id->id, ib_event))
 		return -EINVAL;
 
 	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
 		return -ECONNABORTED;
 
 	memset(&event, 0, sizeof event);
 	offset = cma_user_data_offset(listen_id->id.ps);
 	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
 	if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
 		is_sidr = 1;
 		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
 		event.param.ud.private_data = ib_event->private_data + offset;
 		event.param.ud.private_data_len =
 				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
 	} else {
 		conn_id = cma_new_conn_id(&listen_id->id, ib_event);
 		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
 				       ib_event->private_data, offset);
 	}
 	if (!conn_id) {
 		ret = -ENOMEM;
 		goto err1;
 	}
 
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
-	ret = cma_acquire_dev(conn_id);
+	ret = cma_acquire_dev(conn_id, listen_id);
 	if (ret)
 		goto err2;
 
 	conn_id->cm_id.ib = cm_id;
 	cm_id->context = conn_id;
 	cm_id->cm_handler = cma_ib_handler;
 
 	/*
 	 * Protect against the user destroying conn_id from another thread
 	 * until we're done accessing it.
 	 */
 	atomic_inc(&conn_id->refcount);
 	ret = conn_id->id.event_handler(&conn_id->id, &event);
 	if (ret)
 		goto err3;
 
 	if (is_iboe && !is_sidr) {
 		u32 scope_id = rdma_get_ipv6_scope_id(cm_id->device,
 		    ib_event->param.req_rcvd.port);
 
 		if (ib_event->param.req_rcvd.primary_path != NULL)
 			rdma_addr_find_smac_by_sgid(
 				&ib_event->param.req_rcvd.primary_path->sgid,
 				psmac, NULL, scope_id);
 		else
 			psmac = NULL;
 		if (ib_event->param.req_rcvd.alternate_path != NULL)
 			rdma_addr_find_smac_by_sgid(
 				&ib_event->param.req_rcvd.alternate_path->sgid,
 				palt_smac, NULL, scope_id);
 		else
 			palt_smac = NULL;
 	}
 		/*
 		 * Acquire mutex to prevent user executing rdma_destroy_id()
 		 * while we're accessing the cm_id.
 		 */
 		mutex_lock(&lock);
 	if (is_iboe && !is_sidr)
 		ib_update_cm_av(cm_id, psmac, palt_smac);
 	if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) {
 		cma_dbg(container_of(&conn_id->id, struct rdma_id_private, id), "sending MRA\n");
 			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
 	}
 		mutex_unlock(&lock);
 		mutex_unlock(&conn_id->handler_mutex);
 	mutex_unlock(&listen_id->handler_mutex);
 	cma_deref_id(conn_id);
 	return 0;
 
 err3:
 	cma_deref_id(conn_id);
 	/* Destroy the CM ID by returning a non-zero value. */
 	conn_id->cm_id.ib = NULL;
 err2:
 	cma_exch(conn_id, RDMA_CM_DESTROYING);
 	mutex_unlock(&conn_id->handler_mutex);
 err1:
 	mutex_unlock(&listen_id->handler_mutex);
 	if (conn_id)
 		rdma_destroy_id(&conn_id->id);
 	return ret;
 }
 
 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
 {
 	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
 }
 
 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
 				 struct ib_cm_compare_data *compare)
 {
 	struct cma_hdr *cma_data, *cma_mask;
 	struct sdp_hh *sdp_data, *sdp_mask;
 	__be32 ip4_addr;
 	struct in6_addr ip6_addr;
 
 	memset(compare, 0, sizeof *compare);
 	cma_data = (void *) compare->data;
 	cma_mask = (void *) compare->mask;
 	sdp_data = (void *) compare->data;
 	sdp_mask = (void *) compare->mask;
 
 	switch (addr->sa_family) {
 	case AF_INET:
 		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
 		if (ps == RDMA_PS_SDP) {
 			sdp_set_ip_ver(sdp_data, 4);
 			sdp_set_ip_ver(sdp_mask, 0xF);
 			if (!cma_any_addr(addr)) {
 				sdp_data->dst_addr.ip4.addr = ip4_addr;
 				sdp_mask->dst_addr.ip4.addr = htonl(~0);
 			}
 		} else {
 			cma_set_ip_ver(cma_data, 4);
 			cma_set_ip_ver(cma_mask, 0xF);
 			if (!cma_any_addr(addr)) {
 				cma_data->dst_addr.ip4.addr = ip4_addr;
 				cma_mask->dst_addr.ip4.addr = htonl(~0);
 			}
 		}
 		break;
 	case AF_INET6:
 		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
 		if (ps == RDMA_PS_SDP) {
 			sdp_set_ip_ver(sdp_data, 6);
 			sdp_set_ip_ver(sdp_mask, 0xF);
 			if (!cma_any_addr(addr)) {
 				sdp_data->dst_addr.ip6 = ip6_addr;
 				memset(&sdp_mask->dst_addr.ip6, 0xFF,
 				       sizeof(sdp_mask->dst_addr.ip6));
 			}
 		} else {
 			cma_set_ip_ver(cma_data, 6);
 			cma_set_ip_ver(cma_mask, 0xF);
 			if (!cma_any_addr(addr)) {
 				cma_data->dst_addr.ip6 = ip6_addr;
 				memset(&cma_mask->dst_addr.ip6, 0xFF,
 				       sizeof(cma_mask->dst_addr.ip6));
 			}
 		}
 		break;
 	default:
 		break;
 	}
 }
 
 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 {
 	struct rdma_id_private *id_priv = iw_id->context;
 	struct rdma_cm_event event;
 	struct sockaddr_in *sin;
 	int ret = 0;
 
 	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
 		return 0;
 
 	memset(&event, 0, sizeof event);
 	switch (iw_event->event) {
 	case IW_CM_EVENT_CLOSE:
 		event.event = RDMA_CM_EVENT_DISCONNECTED;
 		break;
 	case IW_CM_EVENT_CONNECT_REPLY:
 		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
 		*sin = iw_event->local_addr;
 		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
 		*sin = iw_event->remote_addr;
 		switch ((int)iw_event->status) {
 		case 0:
 			event.event = RDMA_CM_EVENT_ESTABLISHED;
 			event.param.conn.initiator_depth = iw_event->ird;
 			event.param.conn.responder_resources = iw_event->ord;
 			break;
 		case -ECONNRESET:
 		case -ECONNREFUSED:
 			event.event = RDMA_CM_EVENT_REJECTED;
 			break;
 		case -ETIMEDOUT:
 			event.event = RDMA_CM_EVENT_UNREACHABLE;
 			break;
 		default:
 			event.event = RDMA_CM_EVENT_CONNECT_ERROR;
 			break;
 		}
 		break;
 	case IW_CM_EVENT_ESTABLISHED:
 		event.event = RDMA_CM_EVENT_ESTABLISHED;
 		event.param.conn.initiator_depth = iw_event->ird;
 		event.param.conn.responder_resources = iw_event->ord;
 		break;
 	default:
 		BUG_ON(1);
 	}
 
 	event.status = iw_event->status;
 	event.param.conn.private_data = iw_event->private_data;
 	event.param.conn.private_data_len = iw_event->private_data_len;
 	ret = id_priv->id.event_handler(&id_priv->id, &event);
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.iw = NULL;
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return ret;
 	}
 
 	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
 }
 
 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 			       struct iw_cm_event *iw_event)
 {
 	struct rdma_cm_id *new_cm_id;
 	struct rdma_id_private *listen_id, *conn_id;
 	struct sockaddr_in *sin;
 	struct net_device *dev = NULL;
 	struct rdma_cm_event event;
 	int ret;
 	struct ib_device_attr attr;
 
 	listen_id = cm_id->context;
 	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
 		return -ECONNABORTED;
 
 	/* Create a new RDMA id for the new IW CM ID */
 	new_cm_id = rdma_create_id(listen_id->id.event_handler,
 				   listen_id->id.context,
 				   RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(new_cm_id)) {
 		ret = -ENOMEM;
 		goto out;
 	}
 	conn_id = container_of(new_cm_id, struct rdma_id_private, id);
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
 	conn_id->state = RDMA_CM_CONNECT;
 
 	dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
 	if (!dev) {
 		ret = -EADDRNOTAVAIL;
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
 		goto out;
 	}
 	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
 	if (ret) {
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
 		goto out;
 	}
 
-	ret = cma_acquire_dev(conn_id);
+	ret = cma_acquire_dev(conn_id, listen_id);
 	if (ret) {
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
 		goto out;
 	}
 
 	conn_id->cm_id.iw = cm_id;
 	cm_id->context = conn_id;
 	cm_id->cm_handler = cma_iw_handler;
 
 	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
 	*sin = iw_event->local_addr;
 	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
 	*sin = iw_event->remote_addr;
 
 	ret = ib_query_device(conn_id->id.device, &attr);
 	if (ret) {
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
 		goto out;
 	}
 
 	memset(&event, 0, sizeof event);
 	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
 	event.param.conn.private_data = iw_event->private_data;
 	event.param.conn.private_data_len = iw_event->private_data_len;
 	event.param.conn.initiator_depth = iw_event->ird;
 	event.param.conn.responder_resources = iw_event->ord;
 
 	/*
 	 * Protect against the user destroying conn_id from another thread
 	 * until we're done accessing it.
 	 */
 	atomic_inc(&conn_id->refcount);
 	ret = conn_id->id.event_handler(&conn_id->id, &event);
 	if (ret) {
 		/* User wants to destroy the CM ID */
 		conn_id->cm_id.iw = NULL;
 		cma_exch(conn_id, RDMA_CM_DESTROYING);
 		mutex_unlock(&conn_id->handler_mutex);
 		cma_deref_id(conn_id);
 		rdma_destroy_id(&conn_id->id);
 		goto out;
 	}
 
 	mutex_unlock(&conn_id->handler_mutex);
 	cma_deref_id(conn_id);
 
 out:
 	if (dev)
 		dev_put(dev);
 	mutex_unlock(&listen_id->handler_mutex);
 	return ret;
 }
 
 static int cma_ib_listen(struct rdma_id_private *id_priv)
 {
 	struct ib_cm_compare_data compare_data;
 	struct sockaddr *addr;
 	struct ib_cm_id	*id;
 	__be64 svc_id;
 	int ret;
 
 	id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv);
 	if (IS_ERR(id))
 		return PTR_ERR(id);
 
 	id_priv->cm_id.ib = id;
 
 	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
 	svc_id = cma_get_service_id(id_priv->id.ps, addr);
 	if (cma_any_addr(addr) && !id_priv->afonly)
 		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
 	else {
 		cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
 		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
 	}
 
 	if (ret) {
 		ib_destroy_cm_id(id_priv->cm_id.ib);
 		id_priv->cm_id.ib = NULL;
 	}
 
 	return ret;
 }
 
 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
 {
 	int ret;
 	struct sockaddr_in *sin;
 	struct iw_cm_id	*id;
 
 	id = iw_create_cm_id(id_priv->id.device,
 					    id_priv->sock,
 					    iw_conn_req_handler,
 					    id_priv);
 	if (IS_ERR(id))
 		return PTR_ERR(id);
 
 	id_priv->cm_id.iw = id;
 
 	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
 	id_priv->cm_id.iw->local_addr = *sin;
 
 	ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
 
 	if (ret) {
 		iw_destroy_cm_id(id_priv->cm_id.iw);
 		id_priv->cm_id.iw = NULL;
 	}
 
 	return ret;
 }
 
 static int cma_listen_handler(struct rdma_cm_id *id,
 			      struct rdma_cm_event *event)
 {
 	struct rdma_id_private *id_priv = id->context;
 
 	id->context = id_priv->id.context;
 	id->event_handler = id_priv->id.event_handler;
 	return id_priv->id.event_handler(id, event);
 }
 
 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 			      struct cma_device *cma_dev)
 {
 	struct rdma_id_private *dev_id_priv;
 	struct rdma_cm_id *id;
 	int ret;
 
 	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
 			    id_priv->id.qp_type);
 	if (IS_ERR(id))
 		return;
 
 	dev_id_priv = container_of(id, struct rdma_id_private, id);
 
 	dev_id_priv->state = RDMA_CM_ADDR_BOUND;
 	dev_id_priv->sock = id_priv->sock;
 	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
 	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
 
 	cma_attach_to_dev(dev_id_priv, cma_dev);
 	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
 	atomic_inc(&id_priv->refcount);
 	dev_id_priv->internal_id = 1;
 	dev_id_priv->afonly = id_priv->afonly;
 
 	ret = rdma_listen(id, id_priv->backlog);
 	if (ret)
 		cma_warn(id_priv, "cma_listen_on_dev, error %d, listening on device %s\n", ret, cma_dev->device->name);
 }
 
 static void cma_listen_on_all(struct rdma_id_private *id_priv)
 {
 	struct cma_device *cma_dev;
 
 	mutex_lock(&lock);
 	list_add_tail(&id_priv->list, &listen_any_list);
 	list_for_each_entry(cma_dev, &dev_list, list)
 		cma_listen_on_dev(id_priv, cma_dev);
 	mutex_unlock(&lock);
 }
 
 void rdma_set_service_type(struct rdma_cm_id *id, int tos)
 {
 	struct rdma_id_private *id_priv;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->tos = (u8) tos;
 }
 EXPORT_SYMBOL(rdma_set_service_type);
 
 void rdma_set_timeout(struct rdma_cm_id *id, int timeout)
 {
 	struct rdma_id_private *id_priv;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->qp_timeout = (u8) timeout;
 }
 EXPORT_SYMBOL(rdma_set_timeout);
 
 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
 			      void *context)
 {
 	struct cma_work *work = context;
 	struct rdma_route *route;
 
 	route = &work->id->id.route;
 
 	if (!status) {
 		route->num_paths = 1;
 		*route->path_rec = *path_rec;
 	} else {
 		work->old_state = RDMA_CM_ROUTE_QUERY;
 		work->new_state = RDMA_CM_ADDR_RESOLVED;
 		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
 		work->event.status = status;
 	}
 
 	queue_work(cma_wq, &work->work);
 }
 
 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
 			      struct cma_work *work)
 {
 	struct rdma_addr *addr = &id_priv->id.route.addr;
 	struct ib_sa_path_rec path_rec;
 	ib_sa_comp_mask comp_mask;
 	struct sockaddr_in6 *sin6;
 
 	memset(&path_rec, 0, sizeof path_rec);
 	rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
 	rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
 	path_rec.numb_path = 1;
 	path_rec.reversible = 1;
 	path_rec.service_id = cma_get_service_id(id_priv->id.ps,
 							(struct sockaddr *) &addr->dst_addr);
 
 	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
 		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
 		    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
 
 	if (addr->src_addr.ss_family == AF_INET) {
 		path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
 		comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
 	} else {
 		sin6 = (struct sockaddr_in6 *) &addr->src_addr;
 		path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
 		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
 	}
 
 	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
 					       id_priv->id.port_num, &path_rec,
 					       comp_mask, timeout_ms,
 					       GFP_KERNEL, cma_query_handler,
 					       work, &id_priv->query);
 
 	return (id_priv->query_id < 0) ? id_priv->query_id : 0;
 }
 
 static void cma_work_handler(struct work_struct *_work)
 {
 	struct cma_work *work = container_of(_work, struct cma_work, work);
 	struct rdma_id_private *id_priv = work->id;
 	int destroy = 0;
 
 	mutex_lock(&id_priv->handler_mutex);
 	if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
 		goto out;
 
 	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		destroy = 1;
 	}
 out:
 	mutex_unlock(&id_priv->handler_mutex);
 	cma_deref_id(id_priv);
 	if (destroy)
 		rdma_destroy_id(&id_priv->id);
 	kfree(work);
 }
 
 static void cma_ndev_work_handler(struct work_struct *_work)
 {
 	struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
 	struct rdma_id_private *id_priv = work->id;
 	int destroy = 0;
 
 	mutex_lock(&id_priv->handler_mutex);
 	if (id_priv->state == RDMA_CM_DESTROYING ||
 	    id_priv->state == RDMA_CM_DEVICE_REMOVAL)
 		goto out;
 
 	if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		destroy = 1;
 	}
 
 out:
 	mutex_unlock(&id_priv->handler_mutex);
 	cma_deref_id(id_priv);
 	if (destroy)
 		rdma_destroy_id(&id_priv->id);
 	kfree(work);
 }
 
 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
 {
 	struct rdma_route *route = &id_priv->id.route;
 	struct cma_work *work;
 	int ret;
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (!work)
 		return -ENOMEM;
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
 	work->old_state = RDMA_CM_ROUTE_QUERY;
 	work->new_state = RDMA_CM_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
 
 	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
 	if (!route->path_rec) {
 		ret = -ENOMEM;
 		goto err1;
 	}
 
 	ret = cma_query_ib_route(id_priv, timeout_ms, work);
 	if (ret)
 		goto err2;
 
 	return 0;
 err2:
 	kfree(route->path_rec);
 	route->path_rec = NULL;
 err1:
 	kfree(work);
 	return ret;
 }
 
 int rdma_set_ib_paths(struct rdma_cm_id *id,
 		      struct ib_sa_path_rec *path_rec, int num_paths)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
 			   RDMA_CM_ROUTE_RESOLVED))
 		return -EINVAL;
 
 	id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
 				     GFP_KERNEL);
 	if (!id->route.path_rec) {
 		ret = -ENOMEM;
 		goto err;
 	}
 
 	id->route.num_paths = num_paths;
 	return 0;
 err:
 	cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_set_ib_paths);
 
 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
 {
 	struct cma_work *work;
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (!work)
 		return -ENOMEM;
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
 	work->old_state = RDMA_CM_ROUTE_QUERY;
 	work->new_state = RDMA_CM_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
 	queue_work(cma_wq, &work->work);
 	return 0;
 }
 
 static u8 tos_to_sl(u8 tos)
 {
 	return def_prec2sl & 7;
 }
 
 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 {
 	struct rdma_route *route = &id_priv->id.route;
 	struct rdma_addr *addr = &route->addr;
 	struct cma_work *work;
 	int ret;
 	struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
 	struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
 	struct net_device *ndev = NULL;
 
 
 	if (src_addr->sin_family != dst_addr->sin_family)
 		return -EINVAL;
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (!work)
 		return -ENOMEM;
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
 
 	route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
 	if (!route->path_rec) {
 		ret = -ENOMEM;
 		goto err1;
 	}
 
 	route->num_paths = 1;
 
 	if (addr->dev_addr.bound_dev_if)
 		ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
 	if (!ndev) {
 		ret = -ENODEV;
 		goto err2;
 	}
 
 	route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
 	memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
 	memcpy(route->path_rec->smac, IF_LLADDR(ndev), ndev->if_addrlen);
 
 
 	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
 		    &route->path_rec->sgid);
 	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
 		    &route->path_rec->dgid);
 
 	route->path_rec->hop_limit = 1;
 	route->path_rec->reversible = 1;
 	route->path_rec->pkey = cpu_to_be16(0xffff);
 	route->path_rec->mtu_selector = IB_SA_EQ;
 	route->path_rec->sl = tos_to_sl(id_priv->tos);
 
 	route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu);
 	route->path_rec->rate_selector = IB_SA_EQ;
 	route->path_rec->rate = iboe_get_rate(ndev);
 	dev_put(ndev);
 	route->path_rec->packet_life_time_selector = IB_SA_EQ;
 	route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
 	if (!route->path_rec->mtu) {
 		ret = -EINVAL;
 		goto err2;
 	}
 
 	work->old_state = RDMA_CM_ROUTE_QUERY;
 	work->new_state = RDMA_CM_ROUTE_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
 	work->event.status = 0;
 
 	queue_work(cma_wq, &work->work);
 
 	return 0;
 
 err2:
 	kfree(route->path_rec);
 	route->path_rec = NULL;
 err1:
 	kfree(work);
 	return ret;
 }
 
 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
 		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
 		case IB_LINK_LAYER_INFINIBAND:
 			ret = cma_resolve_ib_route(id_priv, timeout_ms);
 			break;
 		case IB_LINK_LAYER_ETHERNET:
 			ret = cma_resolve_iboe_route(id_priv);
 			break;
 		default:
 			ret = -ENOSYS;
 		}
 		break;
 	case RDMA_TRANSPORT_IWARP:
 	case RDMA_TRANSPORT_SCIF:
 		ret = cma_resolve_iw_route(id_priv, timeout_ms);
 		break;
 	default:
 		ret = -ENOSYS;
 		break;
 	}
 	if (ret)
 		goto err;
 
 	return 0;
 err:
 	cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
 	cma_deref_id(id_priv);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_resolve_route);
 
 int rdma_enable_apm(struct rdma_cm_id *id, enum alt_path_type alt_type)
 {
 	/* APM is not supported yet */
 	return -EINVAL;
 }
 EXPORT_SYMBOL(rdma_enable_apm);
 
 static int cma_bind_loopback(struct rdma_id_private *id_priv)
 {
 	struct cma_device *cma_dev;
 	struct ib_port_attr port_attr;
 	union ib_gid gid;
 	u16 pkey;
 	int ret;
 	u8 p;
 
 	mutex_lock(&lock);
 	if (list_empty(&dev_list)) {
 		ret = -ENODEV;
 		goto out;
 	}
 	list_for_each_entry(cma_dev, &dev_list, list)
 		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
 			if (!ib_query_port(cma_dev->device, p, &port_attr) &&
 			    port_attr.state == IB_PORT_ACTIVE)
 				goto port_found;
 
 	p = 1;
 	cma_dev = list_entry(dev_list.next, struct cma_device, list);
 
 port_found:
 	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
 	if (ret)
 		goto out;
 
 	ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
 	if (ret)
 		goto out;
 
 	id_priv->id.route.addr.dev_addr.dev_type =
 		(rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
 		ARPHRD_INFINIBAND : ARPHRD_ETHER;
 
 	rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
 	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
 	id_priv->id.port_num = p;
 	cma_attach_to_dev(id_priv, cma_dev);
 out:
 	mutex_unlock(&lock);
 	return ret;
 }
 
 static void addr_handler(int status, struct sockaddr *src_addr,
 			 struct rdma_dev_addr *dev_addr, void *context)
 {
 	struct rdma_id_private *id_priv = context;
 	struct rdma_cm_event event;
 
 	memset(&event, 0, sizeof event);
 	mutex_lock(&id_priv->handler_mutex);
 	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
 			   RDMA_CM_ADDR_RESOLVED))
 		goto out;
 
 	memcpy(&id_priv->id.route.addr.src_addr, src_addr,
 	       ip_addr_size(src_addr));
 	if (!status && !id_priv->cma_dev)
-		status = cma_acquire_dev(id_priv);
+		status = cma_acquire_dev(id_priv, NULL);
 
 	if (status) {
 		if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
 				   RDMA_CM_ADDR_BOUND))
 			goto out;
 		event.event = RDMA_CM_EVENT_ADDR_ERROR;
 		event.status = status;
 	} else
 		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 
 	if (id_priv->id.event_handler(&id_priv->id, &event)) {
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		cma_deref_id(id_priv);
 		rdma_destroy_id(&id_priv->id);
 		return;
 	}
 out:
 	mutex_unlock(&id_priv->handler_mutex);
 	cma_deref_id(id_priv);
 }
 
 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 {
 	struct cma_work *work;
 	struct sockaddr *src, *dst;
 	union ib_gid gid;
 	int ret;
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (!work)
 		return -ENOMEM;
 
 	if (!id_priv->cma_dev) {
 		ret = cma_bind_loopback(id_priv);
 		if (ret)
 			goto err;
 	}
 
 	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
 	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
 
 	src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
 	if (cma_zero_addr(src)) {
 		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
 		if ((src->sa_family = dst->sa_family) == AF_INET) {
 			((struct sockaddr_in *)src)->sin_addr =
 				((struct sockaddr_in *)dst)->sin_addr;
 		} else {
 			((struct sockaddr_in6 *)src)->sin6_addr =
 				((struct sockaddr_in6 *)dst)->sin6_addr;
 		}
 	}
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
 	work->old_state = RDMA_CM_ADDR_QUERY;
 	work->new_state = RDMA_CM_ADDR_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 	queue_work(cma_wq, &work->work);
 	return 0;
 err:
 	kfree(work);
 	return ret;
 }
 
 static int cma_resolve_scif(struct rdma_id_private *id_priv)
 {
 	struct cma_work *work;
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (!work)
 		return -ENOMEM;
 
 	/* we probably can leave it empty here */
 
 	work->id = id_priv;
 	INIT_WORK(&work->work, cma_work_handler);
 	work->old_state = RDMA_CM_ADDR_QUERY;
 	work->new_state = RDMA_CM_ADDR_RESOLVED;
 	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 	queue_work(cma_wq, &work->work);
 	return 0;
 }
 
 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 			 struct sockaddr *dst_addr)
 {
 	if (!src_addr || !src_addr->sa_family) {
 		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
 		src_addr->sa_family = dst_addr->sa_family;
 #ifdef INET6
 		if (dst_addr->sa_family == AF_INET6) {
 			((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
 				((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
 		}
 #endif
 	}
 	if (!cma_any_addr(src_addr))
 		return rdma_bind_addr(id, src_addr);
 	else {
 #if defined(INET6) || defined(INET)
 		union {
 #ifdef INET
 			struct sockaddr_in in;
 #endif
 #ifdef INET6
 			struct sockaddr_in6 in6;
 #endif
 		} addr;
 #endif
 
 		switch(dst_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			memset(&addr.in, 0, sizeof(addr.in));
 			addr.in.sin_family = dst_addr->sa_family;
 			addr.in.sin_len = sizeof(addr.in);
 			return rdma_bind_addr(id, (struct sockaddr *)&addr.in);
 #endif
 #ifdef INET6
 		case AF_INET6:
 			memset(&addr.in6, 0, sizeof(addr.in6));
 			addr.in6.sin6_family = dst_addr->sa_family;
 			addr.in6.sin6_len = sizeof(addr.in6);
 			addr.in6.sin6_scope_id =
 			    ((struct sockaddr_in6 *)dst_addr)->sin6_scope_id;
 			return rdma_bind_addr(id, (struct sockaddr *)&addr.in6);
 #endif
 		default:
 			return -EINVAL;
 		}
 	}
 }
 
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 		      struct sockaddr *dst_addr, int timeout_ms)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (id_priv->state == RDMA_CM_IDLE) {
 		ret = cma_bind_addr(id, src_addr, dst_addr);
 		if (ret)
 			return ret;
 	}
 
 	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
 		return -EINVAL;
 
 	atomic_inc(&id_priv->refcount);
 	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
 	if (cma_any_addr(dst_addr))
 		ret = cma_resolve_loopback(id_priv);
 	else if (id_priv->id.device &&
 		rdma_node_get_transport(id_priv->id.device->node_type) == RDMA_TRANSPORT_SCIF)
 		ret = cma_resolve_scif(id_priv);
 	else
 		ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr,
 				      dst_addr, &id->route.addr.dev_addr,
 				      timeout_ms, addr_handler, id_priv);
 	if (ret)
 		goto err;
 
 	return 0;
 err:
 	cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
 	cma_deref_id(id_priv);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_resolve_addr);
 
 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
 {
 	struct rdma_id_private *id_priv;
 	unsigned long flags;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	spin_lock_irqsave(&id_priv->lock, flags);
 	if (id_priv->state == RDMA_CM_IDLE) {
 		id_priv->reuseaddr = reuse;
 		ret = 0;
 	} else {
 		ret = -EINVAL;
 	}
 	spin_unlock_irqrestore(&id_priv->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_set_reuseaddr);
 
 int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
 {
 	struct rdma_id_private *id_priv;
 	unsigned long flags;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	spin_lock_irqsave(&id_priv->lock, flags);
 	if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
 		id_priv->options |= (1 << CMA_OPTION_AFONLY);
 		id_priv->afonly = afonly;
 		ret = 0;
 	} else {
 		ret = -EINVAL;
 	}
 	spin_unlock_irqrestore(&id_priv->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_set_afonly);
 
 static void cma_bind_port(struct rdma_bind_list *bind_list,
 			  struct rdma_id_private *id_priv)
 {
 	struct sockaddr_in *sin;
 
 	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
 	sin->sin_port = htons(bind_list->port);
 	id_priv->bind_list = bind_list;
 	hlist_add_head(&id_priv->node, &bind_list->owners);
 }
 
 static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
 			  unsigned short snum)
 {
 	struct rdma_bind_list *bind_list;
 	int port, ret;
 
 	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
 	if (!bind_list)
 		return -ENOMEM;
 
 	do {
 		ret = idr_get_new_above(ps, bind_list, snum, &port);
 	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
 
 	if (ret)
 		goto err1;
 
 	if (port != snum) {
 		ret = -EADDRNOTAVAIL;
 		goto err2;
 	}
 
 	bind_list->ps = ps;
 	bind_list->port = (unsigned short) port;
 	cma_bind_port(bind_list, id_priv);
 	return 0;
 err2:
 	idr_remove(ps, port);
 err1:
 	kfree(bind_list);
 	return ret;
 }
 
 static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
 {
 	static unsigned int last_used_port;
 	int low, high, remaining;
 	unsigned int rover;
 
 	inet_get_local_port_range(&low, &high);
 	remaining = (high - low) + 1;
 	rover = random() % remaining + low;
 retry:
 	if (last_used_port != rover &&
 	    !idr_find(ps, (unsigned short) rover)) {
 		int ret = cma_alloc_port(ps, id_priv, rover);
 		/*
 		 * Remember previously used port number in order to avoid
 		 * re-using same port immediately after it is closed.
 		 */
 		if (!ret)
 			last_used_port = rover;
 		if (ret != -EADDRNOTAVAIL)
 			return ret;
 		}
 	if (--remaining) {
 		rover++;
 		if ((rover < low) || (rover > high))
 			rover = low;
 		goto retry;
 	}
 	return -EADDRNOTAVAIL;
 }
 
 /*
  * Check that the requested port is available.  This is called when trying to
  * bind to a specific port, or when trying to listen on a bound port.  In
  * the latter case, the provided id_priv may already be on the bind_list, but
  * we still need to check that it's okay to start listening.
  */
 static int cma_check_port(struct rdma_bind_list *bind_list,
 			  struct rdma_id_private *id_priv, uint8_t reuseaddr)
 {
 	struct rdma_id_private *cur_id;
 	struct sockaddr *addr, *cur_addr;
 
 	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
 	hlist_for_each_entry(cur_id, &bind_list->owners, node) {
 		if (id_priv == cur_id)
 			continue;
 
 		if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
 		    cur_id->reuseaddr)
 			continue;
 
 		cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
 		if (id_priv->afonly && cur_id->afonly &&
 		    (addr->sa_family != cur_addr->sa_family))
 			continue;
 
 		if (cma_any_addr(addr) || cma_any_addr(cur_addr))
 			return -EADDRNOTAVAIL;
 
 		if (!cma_addr_cmp(addr, cur_addr))
 			return -EADDRINUSE;
 	}
 	return 0;
 }
 
 static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
 {
 	struct rdma_bind_list *bind_list;
 	unsigned short snum;
 	int ret;
 
 	snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr));
 
 	bind_list = idr_find(ps, snum);
 	if (!bind_list) {
 		ret = cma_alloc_port(ps, id_priv, snum);
 	} else {
 		ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr);
 		if (!ret)
 			cma_bind_port(bind_list, id_priv);
 	}
 	return ret;
 }
 
 static int cma_bind_listen(struct rdma_id_private *id_priv)
 {
 	struct rdma_bind_list *bind_list = id_priv->bind_list;
 	int ret = 0;
 
 	mutex_lock(&lock);
 	if (bind_list->owners.first->next)
 		ret = cma_check_port(bind_list, id_priv, 0);
 	mutex_unlock(&lock);
 	return ret;
 }
 
 static int cma_get_tcp_port(struct rdma_id_private *id_priv)
 {
 	int ret;
 	int size;
 	struct socket *sock;
 
 	ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (ret)
 		return ret;
 #ifdef __linux__
 	ret = sock->ops->bind(sock,
 			(struct sockaddr *) &id_priv->id.route.addr.src_addr,
 			ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
 #else
 	ret = -sobind(sock,
 			(struct sockaddr *)&id_priv->id.route.addr.src_addr,
 			curthread);
 #endif
 	if (ret) {
 		sock_release(sock);
 		return ret;
 	}
 
 	size = ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr);
 	ret = sock_getname(sock,
 			(struct sockaddr *) &id_priv->id.route.addr.src_addr,
 			&size, 0);
 	if (ret) {
 		sock_release(sock);
 		return ret;
 	}
 
 	id_priv->sock = sock;
 	return 0;
 }
 
 static int cma_get_port(struct rdma_id_private *id_priv)
 {
 	struct idr *ps;
 	int ret;
 
 	switch (id_priv->id.ps) {
 	case RDMA_PS_SDP:
 		ps = &sdp_ps;
 		break;
 	case RDMA_PS_TCP:
 		ps = &tcp_ps;
 		if (unify_tcp_port_space) {
 			ret = cma_get_tcp_port(id_priv);
 			if (ret)
 				goto out;
 		}
 		break;
 	case RDMA_PS_UDP:
 		ps = &udp_ps;
 		break;
 	case RDMA_PS_IPOIB:
 		ps = &ipoib_ps;
 		break;
 	case RDMA_PS_IB:
 		ps = &ib_ps;
 		break;
 	default:
 		return -EPROTONOSUPPORT;
 	}
 
 	mutex_lock(&lock);
 	if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr))
 		ret = cma_alloc_any_port(ps, id_priv);
 	else
 		ret = cma_use_port(ps, id_priv);
 	mutex_unlock(&lock);
 out:
 	return ret;
 }
 
 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
 			       struct sockaddr *addr)
 {
 #if defined(INET6)
 	struct sockaddr_in6 *sin6;
 
 	if (addr->sa_family != AF_INET6)
 		return 0;
 
 	sin6 = (struct sockaddr_in6 *) addr;
 	if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) &&
 	    !sin6->sin6_scope_id)
 			return -EINVAL;
 
 	dev_addr->bound_dev_if = sin6->sin6_scope_id;
 #endif
 	return 0;
 }
 
 int rdma_listen(struct rdma_cm_id *id, int backlog)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (id_priv->state == RDMA_CM_IDLE) {
 		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
 		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
 		if (ret)
 			return ret;
 	}
 
 	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
 		return -EINVAL;
 
 	if (id_priv->reuseaddr) {
 		ret = cma_bind_listen(id_priv);
 		if (ret)
 			goto err;
 	}
 
 	id_priv->backlog = backlog;
 	if (id->device) {
 		switch (rdma_node_get_transport(id->device->node_type)) {
 		case RDMA_TRANSPORT_IB:
 			ret = cma_ib_listen(id_priv);
 			if (ret)
 				goto err;
 			break;
 		case RDMA_TRANSPORT_IWARP:
 		case RDMA_TRANSPORT_SCIF:
 			ret = cma_iw_listen(id_priv, backlog);
 			if (ret)
 				goto err;
 			break;
 		default:
 			ret = -ENOSYS;
 			goto err;
 		}
 	} else
 		cma_listen_on_all(id_priv);
 
 	return 0;
 err:
 	id_priv->backlog = 0;
 	cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_listen);
 
 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 #if defined(INET6)
 	int ipv6only;
 	size_t var_size = sizeof(int);
 #endif
 
 	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
 		return -EAFNOSUPPORT;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
 		return -EINVAL;
 
 	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
 	if (ret)
 		goto err1;
 
 	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
 	if (!cma_any_addr(addr)) {
 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr, NULL);
 		if (ret)
 			goto err1;
 
-		ret = cma_acquire_dev(id_priv);
+		ret = cma_acquire_dev(id_priv, NULL);
 		if (ret)
 			goto err1;
 	}
 
 	if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
 		if (addr->sa_family == AF_INET)
 			id_priv->afonly = 1;
 #if defined(INET6)
 		else if (addr->sa_family == AF_INET6)
 			id_priv->afonly = kernel_sysctlbyname(&thread0, "net.inet6.ip6.v6only",
 			                    &ipv6only, &var_size, NULL, 0, NULL, 0);
 #endif
 	}
 	ret = cma_get_port(id_priv);
 	if (ret)
 		goto err2;
 
 	return 0;
 err2:
 	if (id_priv->cma_dev)
 		cma_release_dev(id_priv);
 err1:
 	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_bind_addr);
 
 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
 			  struct rdma_route *route)
 {
 	struct cma_hdr *cma_hdr;
 	struct sdp_hh *sdp_hdr;
 
 	if (route->addr.src_addr.ss_family == AF_INET) {
 		struct sockaddr_in *src4, *dst4;
 
 		src4 = (struct sockaddr_in *) &route->addr.src_addr;
 		dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
 
 		switch (ps) {
 		case RDMA_PS_SDP:
 			sdp_hdr = hdr;
 			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
 				return -EINVAL;
 			sdp_set_ip_ver(sdp_hdr, 4);
 			sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
 			sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
 			sdp_hdr->port = src4->sin_port;
 			break;
 		default:
 			cma_hdr = hdr;
 			cma_hdr->cma_version = CMA_VERSION;
 			cma_set_ip_ver(cma_hdr, 4);
 			cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
 			cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
 			cma_hdr->port = src4->sin_port;
 			break;
 		}
 	} else {
 		struct sockaddr_in6 *src6, *dst6;
 
 		src6 = (struct sockaddr_in6 *) &route->addr.src_addr;
 		dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr;
 
 		switch (ps) {
 		case RDMA_PS_SDP:
 			sdp_hdr = hdr;
 			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
 				return -EINVAL;
 			sdp_set_ip_ver(sdp_hdr, 6);
 			sdp_hdr->src_addr.ip6 = src6->sin6_addr;
 			sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
 			sdp_hdr->port = src6->sin6_port;
 			break;
 		default:
 			cma_hdr = hdr;
 			cma_hdr->cma_version = CMA_VERSION;
 			cma_set_ip_ver(cma_hdr, 6);
 			cma_hdr->src_addr.ip6 = src6->sin6_addr;
 			cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
 			cma_hdr->port = src6->sin6_port;
 			break;
 		}
 	}
 	return 0;
 }
 
 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 				struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *id_priv = cm_id->context;
 	struct rdma_cm_event event;
 	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
 	int ret = 0;
 
 	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
 		return 0;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
 	case IB_CM_SIDR_REQ_ERROR:
 		event.event = RDMA_CM_EVENT_UNREACHABLE;
 		event.status = -ETIMEDOUT;
 		break;
 	case IB_CM_SIDR_REP_RECEIVED:
 		event.param.ud.private_data = ib_event->private_data;
 		event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
 		if (rep->status != IB_SIDR_SUCCESS) {
 			event.event = RDMA_CM_EVENT_UNREACHABLE;
 			event.status = ib_event->param.sidr_rep_rcvd.status;
 			break;
 		}
 		ret = cma_set_qkey(id_priv);
 		if (ret) {
 			event.event = RDMA_CM_EVENT_ADDR_ERROR;
 			event.status = -EINVAL;
 			break;
 		}
 		if (id_priv->qkey != rep->qkey) {
 			event.event = RDMA_CM_EVENT_UNREACHABLE;
 			event.status = -EINVAL;
 			break;
 		}
 		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
 				     id_priv->id.route.path_rec,
 				     &event.param.ud.ah_attr);
 		event.param.ud.qp_num = rep->qpn;
 		event.param.ud.qkey = rep->qkey;
 		event.event = RDMA_CM_EVENT_ESTABLISHED;
 		event.status = 0;
 		break;
 	default:
 		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
 		       ib_event->event);
 		goto out;
 	}
 
 	ret = id_priv->id.event_handler(&id_priv->id, &event);
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.ib = NULL;
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return ret;
 	}
 out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
 }
 
 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
 			      struct rdma_conn_param *conn_param)
 {
 	struct ib_cm_sidr_req_param req;
 	struct rdma_route *route;
 	struct ib_cm_id	*id;
 	int ret;
 
 	req.private_data_len = sizeof(struct cma_hdr) +
 			       conn_param->private_data_len;
 	if (req.private_data_len < conn_param->private_data_len)
 		return -EINVAL;
 
 	req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
 	if (!req.private_data)
 		return -ENOMEM;
 
 	if (conn_param->private_data && conn_param->private_data_len)
 		memcpy((void *) req.private_data + sizeof(struct cma_hdr),
 		       conn_param->private_data, conn_param->private_data_len);
 
 	route = &id_priv->id.route;
 	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
 	if (ret)
 		goto out;
 
 	id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
 			     id_priv);
 	if (IS_ERR(id)) {
 		ret = PTR_ERR(id);
 		goto out;
 	}
 	id_priv->cm_id.ib = id;
 
 	req.path = route->path_rec;
 	req.service_id = cma_get_service_id(id_priv->id.ps,
 					    (struct sockaddr *) &route->addr.dst_addr);
 	req.timeout_ms = 1 << (cma_response_timeout - 8);
 	req.max_cm_retries = CMA_MAX_CM_RETRIES;
 
 	cma_dbg(id_priv, "sending SIDR\n");
 	ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
 	if (ret) {
 		ib_destroy_cm_id(id_priv->cm_id.ib);
 		id_priv->cm_id.ib = NULL;
 	}
 out:
 	kfree(req.private_data);
 	return ret;
 }
 
 static int cma_connect_ib(struct rdma_id_private *id_priv,
 			  struct rdma_conn_param *conn_param)
 {
 	struct ib_cm_req_param req;
 	struct rdma_route *route;
 	void *private_data;
 	struct ib_cm_id	*id;
 	int offset, ret;
 
 	memset(&req, 0, sizeof req);
 	offset = cma_user_data_offset(id_priv->id.ps);
 	req.private_data_len = offset + conn_param->private_data_len;
 	if (req.private_data_len < conn_param->private_data_len)
 		return -EINVAL;
 
 	private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
 	if (!private_data)
 		return -ENOMEM;
 
 	if (conn_param->private_data && conn_param->private_data_len)
 		memcpy(private_data + offset, conn_param->private_data,
 		       conn_param->private_data_len);
 
 	id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv);
 	if (IS_ERR(id)) {
 		ret = PTR_ERR(id);
 		goto out;
 	}
 	id_priv->cm_id.ib = id;
 
 	route = &id_priv->id.route;
 	ret = cma_format_hdr(private_data, id_priv->id.ps, route);
 	if (ret)
 		goto out;
 	req.private_data = private_data;
 
 	req.primary_path = &route->path_rec[0];
 	if (route->num_paths == 2)
 		req.alternate_path = &route->path_rec[1];
 
 	req.service_id = cma_get_service_id(id_priv->id.ps,
 					    (struct sockaddr *) &route->addr.dst_addr);
 	req.qp_num = id_priv->qp_num;
 	req.qp_type = id_priv->id.qp_type;
 	req.starting_psn = id_priv->seq_num;
 	req.responder_resources = conn_param->responder_resources;
 	req.initiator_depth = conn_param->initiator_depth;
 	req.flow_control = conn_param->flow_control;
 	req.retry_count = min_t(u8, 7, conn_param->retry_count);
 	req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
        req.remote_cm_response_timeout = cma_response_timeout;
        req.local_cm_response_timeout = cma_response_timeout;
 	req.max_cm_retries = CMA_MAX_CM_RETRIES;
 	req.srq = id_priv->srq ? 1 : 0;
 
 	cma_dbg(id_priv, "sending REQ\n");
 	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
 out:
 	if (ret && !IS_ERR(id)) {
 		ib_destroy_cm_id(id);
 		id_priv->cm_id.ib = NULL;
 	}
 
 	kfree(private_data);
 	return ret;
 }
 
 static int cma_connect_iw(struct rdma_id_private *id_priv,
 			  struct rdma_conn_param *conn_param)
 {
 	struct iw_cm_id *cm_id;
 	struct sockaddr_in* sin;
 	int ret;
 	struct iw_cm_conn_param iw_param;
 
 	cm_id = iw_create_cm_id(id_priv->id.device, id_priv->sock,
 					cma_iw_handler, id_priv);
 	if (IS_ERR(cm_id))
 		return PTR_ERR(cm_id);
 
 	id_priv->cm_id.iw = cm_id;
 
 	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
 	cm_id->local_addr = *sin;
 
 	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
 	cm_id->remote_addr = *sin;
 
 	ret = cma_modify_qp_rtr(id_priv, conn_param);
 	if (ret)
 		goto out;
 
 	if (conn_param) {
 	iw_param.ord = conn_param->initiator_depth;
 	iw_param.ird = conn_param->responder_resources;
 	iw_param.private_data = conn_param->private_data;
 	iw_param.private_data_len = conn_param->private_data_len;
 		iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
 	} else {
 		memset(&iw_param, 0, sizeof iw_param);
 		iw_param.qpn = id_priv->qp_num;
 	}
 	ret = iw_cm_connect(cm_id, &iw_param);
 out:
 	if (ret) {
 		iw_destroy_cm_id(cm_id);
 		id_priv->cm_id.iw = NULL;
 	}
 	return ret;
 }
 
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
 		return -EINVAL;
 
 	if (!id->qp) {
 		id_priv->qp_num = conn_param->qp_num;
 		id_priv->srq = conn_param->srq;
 	}
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
 		if (id->qp_type == IB_QPT_UD)
 			ret = cma_resolve_ib_udp(id_priv, conn_param);
 		else
 			ret = cma_connect_ib(id_priv, conn_param);
 		break;
 	case RDMA_TRANSPORT_IWARP:
 	case RDMA_TRANSPORT_SCIF:
 		ret = cma_connect_iw(id_priv, conn_param);
 		break;
 	default:
 		ret = -ENOSYS;
 		break;
 	}
 	if (ret)
 		goto err;
 
 	return 0;
 err:
 	cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_connect);
 
 static int cma_accept_ib(struct rdma_id_private *id_priv,
 			 struct rdma_conn_param *conn_param)
 {
 	struct ib_cm_rep_param rep;
 	int ret;
 
 	ret = cma_modify_qp_rtr(id_priv, conn_param);
 	if (ret)
 		goto out;
 
 	ret = cma_modify_qp_rts(id_priv, conn_param);
 	if (ret)
 		goto out;
 
 	memset(&rep, 0, sizeof rep);
 	rep.qp_num = id_priv->qp_num;
 	rep.starting_psn = id_priv->seq_num;
 	rep.private_data = conn_param->private_data;
 	rep.private_data_len = conn_param->private_data_len;
 	rep.responder_resources = conn_param->responder_resources;
 	rep.initiator_depth = conn_param->initiator_depth;
 	rep.failover_accepted = 0;
 	rep.flow_control = conn_param->flow_control;
 	rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
 	rep.srq = id_priv->srq ? 1 : 0;
 	cma_dbg(id_priv, "sending REP\n");
 	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
 out:
 	return ret;
 }
 
 static int cma_accept_iw(struct rdma_id_private *id_priv,
 		  struct rdma_conn_param *conn_param)
 {
 	struct iw_cm_conn_param iw_param;
 	int ret;
 
 	if (!conn_param)
 		return -EINVAL;
 
 	ret = cma_modify_qp_rtr(id_priv, conn_param);
 	if (ret)
 		return ret;
 
 	iw_param.ord = conn_param->initiator_depth;
 	iw_param.ird = conn_param->responder_resources;
 	iw_param.private_data = conn_param->private_data;
 	iw_param.private_data_len = conn_param->private_data_len;
 	if (id_priv->id.qp) {
 		iw_param.qpn = id_priv->qp_num;
 	} else
 		iw_param.qpn = conn_param->qp_num;
 
 	return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
 }
 
 static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
 			     enum ib_cm_sidr_status status,
 			     const void *private_data, int private_data_len)
 {
 	struct ib_cm_sidr_rep_param rep;
 	int ret;
 
 	memset(&rep, 0, sizeof rep);
 	rep.status = status;
 	if (status == IB_SIDR_SUCCESS) {
 		ret = cma_set_qkey(id_priv);
 		if (ret)
 			return ret;
 		rep.qp_num = id_priv->qp_num;
 		rep.qkey = id_priv->qkey;
 	}
 	rep.private_data = private_data;
 	rep.private_data_len = private_data_len;
 
 	cma_dbg(id_priv, "sending SIDR\n");
 	return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
 }
 
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 
 	id_priv->owner = curthread->td_proc->p_pid;
 	if (!cma_comp(id_priv, RDMA_CM_CONNECT))
 		return -EINVAL;
 
 	if (!id->qp && conn_param) {
 		id_priv->qp_num = conn_param->qp_num;
 		id_priv->srq = conn_param->srq;
 	}
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
 		if (id->qp_type == IB_QPT_UD) {
 			if (conn_param)
 			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
 						conn_param->private_data,
 						conn_param->private_data_len);
 			else
 				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
 							NULL, 0);
 		} else {
 			if (conn_param)
 			ret = cma_accept_ib(id_priv, conn_param);
 		else
 			ret = cma_rep_recv(id_priv);
 		}
 		break;
 	case RDMA_TRANSPORT_IWARP:
 	case RDMA_TRANSPORT_SCIF:
 		ret = cma_accept_iw(id_priv, conn_param);
 		break;
 	default:
 		ret = -ENOSYS;
 		break;
 	}
 
 	if (ret)
 		goto reject;
 
 	return 0;
 reject:
 	cma_modify_qp_err(id_priv);
 	rdma_reject(id, NULL, 0);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_accept);
 
 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!id_priv->cm_id.ib)
 		return -EINVAL;
 
 	switch (id->device->node_type) {
 	case RDMA_NODE_IB_CA:
 		ret = ib_cm_notify(id_priv->cm_id.ib, event);
 		break;
 	default:
 		ret = 0;
 		break;
 	}
 	return ret;
 }
 EXPORT_SYMBOL(rdma_notify);
 
 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
 		u8 private_data_len)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!id_priv->cm_id.ib)
 		return -EINVAL;
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
 		if (id->qp_type == IB_QPT_UD)
 			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
 						private_data, private_data_len);
 		else {
 			cma_dbg(id_priv, "sending REJ\n");
 			ret = ib_send_cm_rej(id_priv->cm_id.ib,
 					     IB_CM_REJ_CONSUMER_DEFINED, NULL,
 					     0, private_data, private_data_len);
 		}
 		break;
 	case RDMA_TRANSPORT_IWARP:
 	case RDMA_TRANSPORT_SCIF:
 		ret = iw_cm_reject(id_priv->cm_id.iw,
 				   private_data, private_data_len);
 		break;
 	default:
 		ret = -ENOSYS;
 		break;
 	}
 	return ret;
 }
 EXPORT_SYMBOL(rdma_reject);
 
 int rdma_disconnect(struct rdma_cm_id *id)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!id_priv->cm_id.ib)
 		return -EINVAL;
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
 		ret = cma_modify_qp_err(id_priv);
 		if (ret)
 			goto out;
 		/* Initiate or respond to a disconnect. */
 		cma_dbg(id_priv, "sending DREQ\n");
 		if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
 			cma_dbg(id_priv, "sending DREP\n");
 			ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
 		}
 		break;
 	case RDMA_TRANSPORT_IWARP:
 	case RDMA_TRANSPORT_SCIF:
 		ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
 		break;
 	default:
 		ret = -EINVAL;
 		break;
 	}
 out:
 	return ret;
 }
 EXPORT_SYMBOL(rdma_disconnect);
 
 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 {
 	struct rdma_id_private *id_priv;
 	struct cma_multicast *mc = multicast->context;
 	struct rdma_cm_event event;
 	struct rdma_dev_addr *dev_addr;
 	int ret;
 	struct net_device *ndev = NULL;
 	u16 vlan;
 
 	id_priv = mc->id_priv;
 	dev_addr = &id_priv->id.route.addr.dev_addr;
 	if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
 	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
 		return 0;
 
 	mutex_lock(&id_priv->qp_mutex);
 	if (!status && id_priv->id.qp)
 		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
 					 be16_to_cpu(multicast->rec.mlid));
 	mutex_unlock(&id_priv->qp_mutex);
 
 	memset(&event, 0, sizeof event);
 	event.status = status;
 	event.param.ud.private_data = mc->context;
 	ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
 	if (!ndev) {
 		status = -ENODEV;
 	} else {
 		vlan = rdma_vlan_dev_vlan_id(ndev);
 		dev_put(ndev);
 	}
 	if (!status) {
 		event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
 		ib_init_ah_from_mcmember(id_priv->id.device,
 					 id_priv->id.port_num, &multicast->rec,
 					 &event.param.ud.ah_attr);
 		event.param.ud.ah_attr.vlan_id = vlan;
 		event.param.ud.qp_num = 0xFFFFFF;
 		event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
 	} else {
 		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
 
 		/* mark that the cached record is no longer valid */
 		if (status != -ENETRESET && status != -EAGAIN) {
 			spin_lock(&id_priv->lock);
 			id_priv->is_valid_rec = 0;
 			spin_unlock(&id_priv->lock);
 		}
 	}
 
 	ret = id_priv->id.event_handler(&id_priv->id, &event);
 	if (ret) {
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
 		rdma_destroy_id(&id_priv->id);
 		return 0;
 	}
 
 	mutex_unlock(&id_priv->handler_mutex);
 	return 0;
 }
 
 static void cma_set_mgid(struct rdma_id_private *id_priv,
 			 struct sockaddr *addr, union ib_gid *mgid)
 {
 	unsigned char mc_map[MAX_ADDR_LEN];
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	struct sockaddr_in *sin = (struct sockaddr_in *) addr;
 #if defined(INET6)
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
 #endif
 
 	if (cma_any_addr(addr)) {
 		memset(mgid, 0, sizeof *mgid);
 #if defined(INET6)
 	} else if ((addr->sa_family == AF_INET6) &&
 		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
 								 0xFF10A01B)) {
 		/* IPv6 address is an SA assigned MGID. */
 		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
 	} else if (addr->sa_family == AF_INET6) {
 		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
 		if (id_priv->id.ps == RDMA_PS_UDP)
 			mc_map[7] = 0x01;	/* Use RDMA CM signature */
 		*mgid = *(union ib_gid *) (mc_map + 4);
 #endif
 	} else {
 		ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
 		if (id_priv->id.ps == RDMA_PS_UDP)
 			mc_map[7] = 0x01;	/* Use RDMA CM signature */
 		*mgid = *(union ib_gid *) (mc_map + 4);
 	}
 }
 
 static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
 				 struct cma_multicast *mc)
 {
 	struct ib_sa_mcmember_rec rec;
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	ib_sa_comp_mask comp_mask;
 	int ret = 0;
 
 	ib_addr_get_mgid(dev_addr, &id_priv->rec.mgid);
 
 	/* cache ipoib bc record */
 	spin_lock(&id_priv->lock);
 	if (!id_priv->is_valid_rec)
 		ret = ib_sa_get_mcmember_rec(id_priv->id.device,
 					     id_priv->id.port_num,
 					     &id_priv->rec.mgid,
 					     &id_priv->rec);
 	if (ret) {
 		id_priv->is_valid_rec = 0;
 		spin_unlock(&id_priv->lock);
 		return ret;
 	} else {
 		rec = id_priv->rec;
 		id_priv->is_valid_rec = 1;
 	}
 	spin_unlock(&id_priv->lock);
 
 	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
 	if (id_priv->id.ps == RDMA_PS_UDP)
 		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
 	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
 	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 	rec.join_state = 1;
 
 	comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
 		    IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
 		    IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
 		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
 		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
 
 	if (id_priv->id.ps == RDMA_PS_IPOIB)
 		comp_mask |= IB_SA_MCMEMBER_REC_RATE |
 			     IB_SA_MCMEMBER_REC_RATE_SELECTOR |
 			     IB_SA_MCMEMBER_REC_MTU_SELECTOR |
 			     IB_SA_MCMEMBER_REC_MTU |
 			     IB_SA_MCMEMBER_REC_HOP_LIMIT;
 
 	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
 						id_priv->id.port_num, &rec,
 						comp_mask, GFP_KERNEL,
 						cma_ib_mc_handler, mc);
 	return PTR_RET(mc->multicast.ib);
 }
 
 static void iboe_mcast_work_handler(struct work_struct *work)
 {
 	struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
 	struct cma_multicast *mc = mw->mc;
 	struct ib_sa_multicast *m = mc->multicast.ib;
 
 	mc->multicast.ib->context = mc;
 	cma_ib_mc_handler(0, m);
 	kref_put(&mc->mcref, release_mc);
 	kfree(mw);
 }
 
 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
 {
 	struct sockaddr_in *sin = (struct sockaddr_in *)addr;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
 
 	if (cma_any_addr(addr)) {
 		memset(mgid, 0, sizeof *mgid);
 	} else if (addr->sa_family == AF_INET6) {
 		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
 	} else {
 		mgid->raw[0] = 0xff;
 		mgid->raw[1] = 0x0e;
 		mgid->raw[2] = 0;
 		mgid->raw[3] = 0;
 		mgid->raw[4] = 0;
 		mgid->raw[5] = 0;
 		mgid->raw[6] = 0;
 		mgid->raw[7] = 0;
 		mgid->raw[8] = 0;
 		mgid->raw[9] = 0;
 		mgid->raw[10] = 0xff;
 		mgid->raw[11] = 0xff;
 		*(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
 	}
 }
 
 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 				   struct cma_multicast *mc)
 {
 	struct iboe_mcast_work *work;
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	int err;
 	struct sockaddr *addr = (struct sockaddr *)&mc->addr;
 	struct net_device *ndev = NULL;
 
 	if (cma_zero_addr((struct sockaddr *)&mc->addr))
 		return -EINVAL;
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (!work)
 		return -ENOMEM;
 
 	mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
 	if (!mc->multicast.ib) {
 		err = -ENOMEM;
 		goto out1;
 	}
 
 	cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
 
 	mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
 	if (id_priv->id.ps == RDMA_PS_UDP)
 		mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
 
 	if (dev_addr->bound_dev_if)
 		ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
 	if (!ndev) {
 		err = -ENODEV;
 		goto out2;
 	}
 	mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
 	mc->multicast.ib->rec.hop_limit = 1;
 	mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu);
 	dev_put(ndev);
 	if (!mc->multicast.ib->rec.mtu) {
 		err = -EINVAL;
 		goto out2;
 	}
 	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
 		    &mc->multicast.ib->rec.port_gid);
 	work->id = id_priv;
 	work->mc = mc;
 	INIT_WORK(&work->work, iboe_mcast_work_handler);
 	kref_get(&mc->mcref);
 	queue_work(cma_wq, &work->work);
 
 	return 0;
 
 out2:
 	kfree(mc->multicast.ib);
 out1:
 	kfree(work);
 	return err;
 }
 
 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 			void *context)
 {
 	struct rdma_id_private *id_priv;
 	struct cma_multicast *mc;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
 	    !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
 		return -EINVAL;
 
 	mc = kmalloc(sizeof *mc, GFP_KERNEL);
 	if (!mc)
 		return -ENOMEM;
 
 	memcpy(&mc->addr, addr, ip_addr_size(addr));
 	mc->context = context;
 	mc->id_priv = id_priv;
 
 	spin_lock(&id_priv->lock);
 	list_add(&mc->list, &id_priv->mc_list);
 	spin_unlock(&id_priv->lock);
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
 		switch (rdma_port_get_link_layer(id->device, id->port_num)) {
 		case IB_LINK_LAYER_INFINIBAND:
 			ret = cma_join_ib_multicast(id_priv, mc);
 			break;
 		case IB_LINK_LAYER_ETHERNET:
 			kref_init(&mc->mcref);
 			ret = cma_iboe_join_multicast(id_priv, mc);
 			break;
 		default:
 			ret = -EINVAL;
 		}
 		break;
 	default:
 		ret = -ENOSYS;
 		break;
 	}
 
 	if (ret) {
 		spin_lock_irq(&id_priv->lock);
 		list_del(&mc->list);
 		spin_unlock_irq(&id_priv->lock);
 		kfree(mc);
 	}
 	return ret;
 }
 EXPORT_SYMBOL(rdma_join_multicast);
 
 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
 {
 	struct rdma_id_private *id_priv;
 	struct cma_multicast *mc;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	spin_lock_irq(&id_priv->lock);
 	list_for_each_entry(mc, &id_priv->mc_list, list) {
 		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
 			list_del(&mc->list);
 			spin_unlock_irq(&id_priv->lock);
 
 			if (id->qp)
 				ib_detach_mcast(id->qp,
 						&mc->multicast.ib->rec.mgid,
 						be16_to_cpu(mc->multicast.ib->rec.mlid));
 			if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
 				switch (rdma_port_get_link_layer(id->device, id->port_num)) {
 				case IB_LINK_LAYER_INFINIBAND:
 					ib_sa_free_multicast(mc->multicast.ib);
 					kfree(mc);
 					break;
 				case IB_LINK_LAYER_ETHERNET:
 					kref_put(&mc->mcref, release_mc);
 					break;
 				default:
 					break;
 				}
 			}
 			return;
 		}
 	}
 	spin_unlock_irq(&id_priv->lock);
 }
 EXPORT_SYMBOL(rdma_leave_multicast);
 
 static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
 {
 	struct rdma_dev_addr *dev_addr;
 	struct cma_ndev_work *work;
 
 	dev_addr = &id_priv->id.route.addr.dev_addr;
 
 	if ((dev_addr->bound_dev_if == ndev->if_index) &&
 	    memcmp(dev_addr->src_dev_addr, IF_LLADDR(ndev), ndev->if_addrlen)) {
 		printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
 		       ndev->if_xname, &id_priv->id);
 		work = kzalloc(sizeof *work, GFP_KERNEL);
 		if (!work)
 			return -ENOMEM;
 
 		INIT_WORK(&work->work, cma_ndev_work_handler);
 		work->id = id_priv;
 		work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
 		atomic_inc(&id_priv->refcount);
 		queue_work(cma_wq, &work->work);
 	}
 
 	return 0;
 }
 
 static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
 			       void *ctx)
 {
 	struct net_device *ndev = (struct net_device *)ctx;
 	struct cma_device *cma_dev;
 	struct rdma_id_private *id_priv;
 	int ret = NOTIFY_DONE;
 
 /* BONDING related, commented out until the bonding is resolved */
 #if 0
 	if (dev_net(ndev) != &init_net)
 		return NOTIFY_DONE;
 
 	if (event != NETDEV_BONDING_FAILOVER)
 		return NOTIFY_DONE;
 
 	if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
 		return NOTIFY_DONE;
 #endif
 	if (event != NETDEV_DOWN && event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
 	mutex_lock(&lock);
 	list_for_each_entry(cma_dev, &dev_list, list)
 		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
 			ret = cma_netdev_change(ndev, id_priv);
 			if (ret)
 				goto out;
 		}
 
 out:
 	mutex_unlock(&lock);
 	return ret;
 }
 
 static struct notifier_block cma_nb = {
 	.notifier_call = cma_netdev_callback
 };
 
 static void cma_add_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
 	struct rdma_id_private *id_priv;
 
 	cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
 	if (!cma_dev)
 		return;
 
 	cma_dev->device = device;
 
 	init_completion(&cma_dev->comp);
 	atomic_set(&cma_dev->refcount, 1);
 	INIT_LIST_HEAD(&cma_dev->id_list);
 	ib_set_client_data(device, &cma_client, cma_dev);
 
 	mutex_lock(&lock);
 	list_add_tail(&cma_dev->list, &dev_list);
 	list_for_each_entry(id_priv, &listen_any_list, list)
 		cma_listen_on_dev(id_priv, cma_dev);
 	mutex_unlock(&lock);
 }
 
 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
 {
 	struct rdma_cm_event event;
 	enum rdma_cm_state state;
 	int ret = 0;
 
 	/* Record that we want to remove the device */
 	state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
 	if (state == RDMA_CM_DESTROYING)
 		return 0;
 
 	cma_cancel_operation(id_priv, state);
 	mutex_lock(&id_priv->handler_mutex);
 
 	/* Check for destruction from another callback. */
 	if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
 		goto out;
 
 	memset(&event, 0, sizeof event);
 	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
 	ret = id_priv->id.event_handler(&id_priv->id, &event);
 out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
 }
 
 static void cma_process_remove(struct cma_device *cma_dev)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	mutex_lock(&lock);
 	while (!list_empty(&cma_dev->id_list)) {
 		id_priv = list_entry(cma_dev->id_list.next,
 				     struct rdma_id_private, list);
 
 		list_del(&id_priv->listen_list);
 		list_del_init(&id_priv->list);
 		atomic_inc(&id_priv->refcount);
 		mutex_unlock(&lock);
 
 		ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
 		cma_deref_id(id_priv);
 		if (ret)
 			rdma_destroy_id(&id_priv->id);
 
 		mutex_lock(&lock);
 	}
 	mutex_unlock(&lock);
 
 	cma_deref_dev(cma_dev);
 	wait_for_completion(&cma_dev->comp);
 }
 
 static void cma_remove_one(struct ib_device *device)
 {
 	struct cma_device *cma_dev;
 
 	cma_dev = ib_get_client_data(device, &cma_client);
 	if (!cma_dev)
 		return;
 
 	mutex_lock(&lock);
 	list_del(&cma_dev->list);
 	mutex_unlock(&lock);
 
 	cma_process_remove(cma_dev);
 	kfree(cma_dev);
 }
 
 static int __init cma_init(void)
 {
 	int ret = -ENOMEM;
 
 	cma_wq = create_singlethread_workqueue("rdma_cm");
 	if (!cma_wq)
 		return -ENOMEM;
 
 	cma_free_wq = create_singlethread_workqueue("rdma_cm_fr");
 	if (!cma_free_wq)
 		goto err1;
 
 	ib_sa_register_client(&sa_client);
 	rdma_addr_register_client(&addr_client);
 	register_netdevice_notifier(&cma_nb);
 
 	ret = ib_register_client(&cma_client);
 	if (ret)
 		goto err;
 
 	return 0;
 
 err:
 	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 
 	destroy_workqueue(cma_free_wq);
 err1:
 	destroy_workqueue(cma_wq);
 	return ret;
 }
 
 static void __exit cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
 	unregister_netdevice_notifier(&cma_nb);
 	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	flush_workqueue(cma_free_wq);
 	destroy_workqueue(cma_free_wq);
 	destroy_workqueue(cma_wq);
 	idr_destroy(&sdp_ps);
 	idr_destroy(&tcp_ps);
 	idr_destroy(&udp_ps);
 	idr_destroy(&ipoib_ps);
 	idr_destroy(&ib_ps);
 }
 
 module_init(cma_init);
 module_exit(cma_cleanup);
Index: user/alc/PQ_LAUNDRY/sys/vm/vm_pageout.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/vm/vm_pageout.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/sys/vm/vm_pageout.c	(revision 303642)
@@ -1,2177 +1,2178 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2005 Yahoo! Technologies Norway AS
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *	The proverbial page-out daemon.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/mount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 /*
  * System initialization
  */
 
 /* the kernel process "vm_pageout"*/
 static void vm_pageout(void);
 static void vm_pageout_init(void);
 static int vm_pageout_clean(vm_page_t m, int *numpagedout);
 static int vm_pageout_cluster(vm_page_t m);
 static void vm_pageout_scan(struct vm_domain *vmd, int pass);
 static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
     int starting_page_shortage);
 
 SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init,
     NULL);
 
 struct proc *pageproc;
 
 static struct kproc_desc page_kp = {
 	"pagedaemon",
 	vm_pageout,
 	&pageproc
 };
 SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
     &page_kp);
 
 SDT_PROVIDER_DEFINE(vm);
 SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
 
 #if !defined(NO_SWAPPING)
 /* the kernel process "vm_daemon"*/
 static void vm_daemon(void);
 static struct	proc *vmproc;
 
 static struct kproc_desc vm_kp = {
 	"vmdaemon",
 	vm_daemon,
 	&vmproc
 };
 SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
 #endif
 
 /* Sleep intervals for pagedaemon threads, in subdivisions of one second. */
 #define	VM_LAUNDER_INTERVAL	10
 #define	VM_INACT_SCAN_INTERVAL	2
 
 #define	VM_LAUNDER_RATE		(VM_LAUNDER_INTERVAL / VM_INACT_SCAN_INTERVAL)
 
 int vm_pageout_deficit;		/* Estimated number of pages deficit */
 u_int vm_pageout_wakeup_thresh;
 static int vm_pageout_oom_seq = 12;
 bool vm_pageout_wanted;		/* Event on which pageout daemon sleeps */
 bool vm_pages_needed;		/* Are threads waiting for free pages? */
 
 #if !defined(NO_SWAPPING)
 static int vm_pageout_req_swapout;	/* XXX */
 static int vm_daemon_needed;
 static struct mtx vm_daemon_mtx;
 /* Allow for use by vm_pageout before vm_daemon is initialized. */
 MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
 #endif
 static int vm_pageout_update_period;
 static int disable_swap_pageouts;
 static int lowmem_period = 10;
 static time_t lowmem_uptime;
 
 #if defined(NO_SWAPPING)
 static int vm_swap_enabled = 0;
 static int vm_swap_idle_enabled = 0;
 #else
 static int vm_swap_enabled = 1;
 static int vm_swap_idle_enabled = 0;
 #endif
 
 static int vm_panic_on_oom = 0;
 
 SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
 	CTLFLAG_RWTUN, &vm_panic_on_oom, 0,
 	"panic on out of memory instead of killing the largest process");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
 	CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0,
 	"free page threshold for waking up the pageout daemon");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
 	CTLFLAG_RW, &vm_pageout_update_period, 0,
 	"Maximum active LRU update period");
   
 SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0,
 	"Low memory callback period");
 
 #if defined(NO_SWAPPING)
 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
 	CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout");
 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
 	CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
 #else
 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
 	CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
 	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
 #endif
 
 SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
 	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
 
 static int pageout_lock_miss;
 SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
 	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
 	CTLFLAG_RW, &vm_pageout_oom_seq, 0,
 	"back-to-back calls to oom detector to start OOM");
 
 static int act_scan_laundry_weight = 3;
 SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight,
 	CTLFLAG_RW, &act_scan_laundry_weight, 0,
 	"weight given to clean vs. dirty pages in active queue scans");
 
 static u_int bkgrd_launder_ratio = 50;
 SYSCTL_UINT(_vm, OID_AUTO, bkgrd_launder_ratio,
 	CTLFLAG_RW, &bkgrd_launder_ratio, 0,
 	"ratio of clean to dirty inactive pages needed to trigger laundering");
 
 static u_int bkgrd_launder_max = 2048;
 SYSCTL_UINT(_vm, OID_AUTO, bkgrd_launder_max,
 	CTLFLAG_RW, &bkgrd_launder_max, 0,
 	"maximum background laundering rate, in pages per second");
 
 #define VM_PAGEOUT_PAGE_COUNT 16
 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
 
 int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
 SYSCTL_INT(_vm, OID_AUTO, max_wired,
 	CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
 
 static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
 static int vm_pageout_launder(struct vm_domain *vmd, int launder);
 static void vm_pageout_laundry_worker(void *arg);
 #if !defined(NO_SWAPPING)
 static void vm_pageout_map_deactivate_pages(vm_map_t, long);
 static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
 static void vm_req_vmdaemon(int req);
 #endif
 static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
 
 /*
  * Initialize a dummy page for marking the caller's place in the specified
  * paging queue.  In principle, this function only needs to set the flag
  * PG_MARKER.  Nonetheless, it wirte busies and initializes the hold count
  * to one as safety precautions.
  */ 
 static void
 vm_pageout_init_marker(vm_page_t marker, u_short queue)
 {
 
 	bzero(marker, sizeof(*marker));
 	marker->flags = PG_MARKER;
 	marker->busy_lock = VPB_SINGLE_EXCLUSIVER;
 	marker->queue = queue;
 	marker->hold_count = 1;
 }
 
 /*
  * vm_pageout_fallback_object_lock:
  * 
  * Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is
  * known to have failed and page queue must be either PQ_ACTIVE or
  * PQ_INACTIVE.  To avoid lock order violation, unlock the page queue
  * while locking the vm object.  Use marker page to detect page queue
  * changes and maintain notion of next page on page queue.  Return
  * TRUE if no changes were detected, FALSE otherwise.  vm object is
  * locked on return.
  * 
  * This function depends on both the lock portion of struct vm_object
  * and normal struct vm_page being type stable.
  */
 static boolean_t
 vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
 {
 	struct vm_page marker;
 	struct vm_pagequeue *pq;
 	boolean_t unchanged;
 	u_short queue;
 	vm_object_t object;
 
 	queue = m->queue;
 	vm_pageout_init_marker(&marker, queue);
 	pq = vm_page_pagequeue(m);
 	object = m->object;
 	
 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 	vm_page_unlock(m);
 	VM_OBJECT_WLOCK(object);
 	vm_page_lock(m);
 	vm_pagequeue_lock(pq);
 
 	/*
 	 * The page's object might have changed, and/or the page might
 	 * have moved from its original position in the queue.  If the
 	 * page's object has changed, then the caller should abandon
 	 * processing the page because the wrong object lock was
 	 * acquired.  Use the marker's plinks.q, not the page's, to
 	 * determine if the page has been moved.  The state of the
 	 * page's plinks.q can be indeterminate; whereas, the marker's
 	 * plinks.q must be valid.
 	 */
 	*next = TAILQ_NEXT(&marker, plinks.q);
 	unchanged = m->object == object &&
 	    m == TAILQ_PREV(&marker, pglist, plinks.q);
 	KASSERT(!unchanged || m->queue == queue,
 	    ("page %p queue %d %d", m, queue, m->queue));
 	TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
 	return (unchanged);
 }
 
 /*
  * Lock the page while holding the page queue lock.  Use marker page
  * to detect page queue changes and maintain notion of next page on
  * page queue.  Return TRUE if no changes were detected, FALSE
  * otherwise.  The page is locked on return. The page queue lock might
  * be dropped and reacquired.
  *
  * This function depends on normal struct vm_page being type stable.
  */
 static boolean_t
 vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
 {
 	struct vm_page marker;
 	struct vm_pagequeue *pq;
 	boolean_t unchanged;
 	u_short queue;
 
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	if (vm_page_trylock(m))
 		return (TRUE);
 
 	queue = m->queue;
 	vm_pageout_init_marker(&marker, queue);
 	pq = vm_page_pagequeue(m);
 
 	TAILQ_INSERT_AFTER(&pq->pq_pl, m, &marker, plinks.q);
 	vm_pagequeue_unlock(pq);
 	vm_page_lock(m);
 	vm_pagequeue_lock(pq);
 
 	/* Page queue might have changed. */
 	*next = TAILQ_NEXT(&marker, plinks.q);
 	unchanged = m == TAILQ_PREV(&marker, pglist, plinks.q);
 	KASSERT(!unchanged || m->queue == queue,
 	    ("page %p queue %d %d", m, queue, m->queue));
 	TAILQ_REMOVE(&pq->pq_pl, &marker, plinks.q);
 	return (unchanged);
 }
 
 /*
  * vm_pageout_clean:
  *
  * Clean the page and remove it from the laundry.
  * 
  * We set the busy bit to cause potential page faults on this page to
  * block.  Note the careful timing, however, the busy bit isn't set till
  * late and we cannot do anything that will mess with the page.
  */
 static int
 vm_pageout_cluster(vm_page_t m)
 {
 	vm_object_t object;
 	vm_page_t mc[2*vm_pageout_page_count], pb, ps;
 	int pageout_count;
 	int ib, is, page_base;
 	vm_pindex_t pindex = m->pindex;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
 	 * with the new swapper, but we could have serious problems paging
 	 * out other object types if there is insufficient memory.  
 	 *
 	 * Unfortunately, checking free memory here is far too late, so the
 	 * check has been moved up a procedural level.
 	 */
 
 	/*
 	 * Can't clean the page if it's busy or held.
 	 */
 	vm_page_assert_unbusied(m);
 	KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m));
 	vm_page_dequeue(m);
 	vm_page_unlock(m);
 
 	mc[vm_pageout_page_count] = pb = ps = m;
 	pageout_count = 1;
 	page_base = vm_pageout_page_count;
 	ib = 1;
 	is = 1;
 
 	/*
 	 * Scan object for clusterable pages.
 	 *
 	 * We can cluster ONLY if: ->> the page is NOT
 	 * clean, wired, busy, held, or mapped into a
 	 * buffer, and one of the following:
 	 * 1) The page is in the laundry.
 	 * -or-
 	 * 2) we force the issue.
 	 *
 	 * During heavy mmap/modification loads the pageout
 	 * daemon can really fragment the underlying file
 	 * due to flushing pages out of order and not trying
 	 * align the clusters (which leave sporatic out-of-order
 	 * holes).  To solve this problem we do the reverse scan
 	 * first and attempt to align our cluster, then do a 
 	 * forward scan if room remains.
 	 */
 more:
 	while (ib && pageout_count < vm_pageout_page_count) {
 		vm_page_t p;
 
 		if (ib > pindex) {
 			ib = 0;
 			break;
 		}
 
 		if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) {
 			ib = 0;
 			break;
 		}
 		vm_page_test_dirty(p);
 		if (p->dirty == 0) {
 			ib = 0;
 			break;
 		}
 		vm_page_lock(p);
 		if (!vm_page_in_laundry(p) ||
 		    p->hold_count != 0) {	/* may be undergoing I/O */
 			vm_page_unlock(p);
 			ib = 0;
 			break;
 		}
 		vm_page_dequeue(p);
 		vm_page_unlock(p);
 		mc[--page_base] = pb = p;
 		++pageout_count;
 		++ib;
 		/*
 		 * alignment boundary, stop here and switch directions.  Do
 		 * not clear ib.
 		 */
 		if ((pindex - (ib - 1)) % vm_pageout_page_count == 0)
 			break;
 	}
 
 	while (pageout_count < vm_pageout_page_count && 
 	    pindex + is < object->size) {
 		vm_page_t p;
 
 		if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
 			break;
 		vm_page_test_dirty(p);
 		if (p->dirty == 0)
 			break;
 		vm_page_lock(p);
 		if (!vm_page_in_laundry(p) ||
 		    p->hold_count != 0) {	/* may be undergoing I/O */
 			vm_page_unlock(p);
 			break;
 		}
 		vm_page_dequeue(p);
 		vm_page_unlock(p);
 		mc[page_base + pageout_count] = ps = p;
 		++pageout_count;
 		++is;
 	}
 
 	/*
 	 * If we exhausted our forward scan, continue with the reverse scan
 	 * when possible, even past a page boundary.  This catches boundary
 	 * conditions.
 	 */
 	if (ib && pageout_count < vm_pageout_page_count)
 		goto more;
 
 	/*
 	 * we allow reads during pageouts...
 	 */
 	return (vm_pageout_flush(&mc[page_base], pageout_count, 0, 0, NULL,
 	    NULL));
 }
 
 /*
  * vm_pageout_flush() - launder the given pages
  *
  *	The given pages are laundered.  Note that we setup for the start of
  *	I/O ( i.e. busy the page ), mark it read-only, and bump the object
  *	reference count all in here rather then in the parent.  If we want
  *	the parent to do more sophisticated things we may have to change
  *	the ordering.
  *
  *	Returned runlen is the count of pages between mreq and first
  *	page after mreq with status VM_PAGER_AGAIN.
  *	*eio is set to TRUE if pager returned VM_PAGER_ERROR or VM_PAGER_FAIL
  *	for any page in runlen set.
  */
 int
 vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
     boolean_t *eio)
 {
 	vm_object_t object = mc[0]->object;
 	int pageout_status[count];
 	int numpagedout = 0;
 	int i, runlen;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * Initiate I/O.  Bump the vm_page_t->busy counter and
 	 * mark the pages read-only.
 	 *
 	 * We do not have to fixup the clean/dirty bits here... we can
 	 * allow the pager to do it after the I/O completes.
 	 *
 	 * NOTE! mc[i]->dirty may be partial or fragmented due to an
 	 * edge case with file fragments.
 	 */
 	for (i = 0; i < count; i++) {
 		KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL,
 		    ("vm_pageout_flush: partially invalid page %p index %d/%d",
 			mc[i], i, count));
 		vm_page_sbusy(mc[i]);
 		pmap_remove_write(mc[i]);
 	}
 	vm_object_pip_add(object, count);
 
 	vm_pager_put_pages(object, mc, count, flags, pageout_status);
 
 	runlen = count - mreq;
 	if (eio != NULL)
 		*eio = FALSE;
 	for (i = 0; i < count; i++) {
 		vm_page_t mt = mc[i];
 
 		KASSERT(pageout_status[i] == VM_PAGER_PEND ||
 		    !pmap_page_is_write_mapped(mt),
 		    ("vm_pageout_flush: page %p is not write protected", mt));
 		switch (pageout_status[i]) {
 		case VM_PAGER_OK:
 		case VM_PAGER_PEND:
 			numpagedout++;
 			break;
 		case VM_PAGER_BAD:
 			/*
 			 * Page outside of range of object. Right now we
 			 * essentially lose the changes by pretending it
 			 * worked.
 			 */
 			vm_page_undirty(mt);
 			vm_page_lock(mt);
 			vm_page_deactivate(mt);
 			vm_page_unlock(mt);
 			break;
 		case VM_PAGER_ERROR:
 		case VM_PAGER_FAIL:
 			/*
 			 * If the page couldn't be paged out, then reactivate
 			 * it so that it doesn't clog the laundry and inactive
 			 * queues.  (We will try paging it out again later).
 			 */
 			vm_page_lock(mt);
 			vm_page_activate(mt);
 			vm_page_unlock(mt);
 			if (eio != NULL && i >= mreq && i - mreq < runlen)
 				*eio = TRUE;
 			break;
 		case VM_PAGER_AGAIN:
 			if (i >= mreq && i - mreq < runlen)
 				runlen = i - mreq;
 			break;
 		}
 
 		/*
 		 * If the operation is still going, leave the page busy to
 		 * block all other accesses. Also, leave the paging in
 		 * progress indicator set so that we don't attempt an object
 		 * collapse.
 		 */
 		if (pageout_status[i] != VM_PAGER_PEND) {
 			vm_object_pip_wakeup(object);
 			vm_page_sunbusy(mt);
 		}
 	}
 	if (prunlen != NULL)
 		*prunlen = runlen;
 	return (numpagedout);
 }
 
 #if !defined(NO_SWAPPING)
 /*
  *	vm_pageout_object_deactivate_pages
  *
  *	Deactivate enough pages to satisfy the inactive target
  *	requirements.
  *
  *	The object and map must be locked.
  */
 static void
 vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
     long desired)
 {
 	vm_object_t backing_object, object;
 	vm_page_t p;
 	int act_delta, remove_mode;
 
 	VM_OBJECT_ASSERT_LOCKED(first_object);
 	if ((first_object->flags & OBJ_FICTITIOUS) != 0)
 		return;
 	for (object = first_object;; object = backing_object) {
 		if (pmap_resident_count(pmap) <= desired)
 			goto unlock_return;
 		VM_OBJECT_ASSERT_LOCKED(object);
 		if ((object->flags & OBJ_UNMANAGED) != 0 ||
 		    object->paging_in_progress != 0)
 			goto unlock_return;
 
 		remove_mode = 0;
 		if (object->shadow_count > 1)
 			remove_mode = 1;
 		/*
 		 * Scan the object's entire memory queue.
 		 */
 		TAILQ_FOREACH(p, &object->memq, listq) {
 			if (pmap_resident_count(pmap) <= desired)
 				goto unlock_return;
 			if (vm_page_busied(p))
 				continue;
 			PCPU_INC(cnt.v_pdpages);
 			vm_page_lock(p);
 			if (p->wire_count != 0 || p->hold_count != 0 ||
 			    !pmap_page_exists_quick(pmap, p)) {
 				vm_page_unlock(p);
 				continue;
 			}
 			act_delta = pmap_ts_referenced(p);
 			if ((p->aflags & PGA_REFERENCED) != 0) {
 				if (act_delta == 0)
 					act_delta = 1;
 				vm_page_aflag_clear(p, PGA_REFERENCED);
 			}
 			if (!vm_page_active(p) && act_delta != 0) {
 				vm_page_activate(p);
 				p->act_count += act_delta;
 			} else if (vm_page_active(p)) {
 				if (act_delta == 0) {
 					p->act_count -= min(p->act_count,
 					    ACT_DECLINE);
 					if (!remove_mode && p->act_count == 0) {
 						pmap_remove_all(p);
 						vm_page_deactivate(p);
 					} else
 						vm_page_requeue(p);
 				} else {
 					vm_page_activate(p);
 					if (p->act_count < ACT_MAX -
 					    ACT_ADVANCE)
 						p->act_count += ACT_ADVANCE;
 					vm_page_requeue(p);
 				}
 			} else if (vm_page_inactive(p))
 				pmap_remove_all(p);
 			vm_page_unlock(p);
 		}
 		if ((backing_object = object->backing_object) == NULL)
 			goto unlock_return;
 		VM_OBJECT_RLOCK(backing_object);
 		if (object != first_object)
 			VM_OBJECT_RUNLOCK(object);
 	}
 unlock_return:
 	if (object != first_object)
 		VM_OBJECT_RUNLOCK(object);
 }
 
 /*
  * deactivate some number of pages in a map, try to do it fairly, but
  * that is really hard to do.
  */
 static void
 vm_pageout_map_deactivate_pages(map, desired)
 	vm_map_t map;
 	long desired;
 {
 	vm_map_entry_t tmpe;
 	vm_object_t obj, bigobj;
 	int nothingwired;
 
 	if (!vm_map_trylock(map))
 		return;
 
 	bigobj = NULL;
 	nothingwired = TRUE;
 
 	/*
 	 * first, search out the biggest object, and try to free pages from
 	 * that.
 	 */
 	tmpe = map->header.next;
 	while (tmpe != &map->header) {
 		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 			obj = tmpe->object.vm_object;
 			if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) {
 				if (obj->shadow_count <= 1 &&
 				    (bigobj == NULL ||
 				     bigobj->resident_page_count < obj->resident_page_count)) {
 					if (bigobj != NULL)
 						VM_OBJECT_RUNLOCK(bigobj);
 					bigobj = obj;
 				} else
 					VM_OBJECT_RUNLOCK(obj);
 			}
 		}
 		if (tmpe->wired_count > 0)
 			nothingwired = FALSE;
 		tmpe = tmpe->next;
 	}
 
 	if (bigobj != NULL) {
 		vm_pageout_object_deactivate_pages(map->pmap, bigobj, desired);
 		VM_OBJECT_RUNLOCK(bigobj);
 	}
 	/*
 	 * Next, hunt around for other pages to deactivate.  We actually
 	 * do this search sort of wrong -- .text first is not the best idea.
 	 */
 	tmpe = map->header.next;
 	while (tmpe != &map->header) {
 		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
 			break;
 		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 			obj = tmpe->object.vm_object;
 			if (obj != NULL) {
 				VM_OBJECT_RLOCK(obj);
 				vm_pageout_object_deactivate_pages(map->pmap, obj, desired);
 				VM_OBJECT_RUNLOCK(obj);
 			}
 		}
 		tmpe = tmpe->next;
 	}
 
 	/*
 	 * Remove all mappings if a process is swapped out, this will free page
 	 * table pages.
 	 */
 	if (desired == 0 && nothingwired) {
 		pmap_remove(vm_map_pmap(map), vm_map_min(map),
 		    vm_map_max(map));
 	}
 
 	vm_map_unlock(map);
 }
 #endif		/* !defined(NO_SWAPPING) */
 
 /*
  * Attempt to acquire all of the necessary locks to launder a page and
  * then call through the clustering layer to PUTPAGES.  Wait a short
  * time for a vnode lock.
  *
  * Requires the page and object lock on entry, releases both before return.
  * Returns 0 on success and an errno otherwise.
  */
 static int
 vm_pageout_clean(vm_page_t m, int *numpagedout)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	int error, lockmode;
 
 	vm_page_assert_locked(m);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	error = 0;
 	vp = NULL;
 	mp = NULL;
 
 	/*
 	 * The object is already known NOT to be dead.   It
 	 * is possible for the vget() to block the whole
 	 * pageout daemon, but the new low-memory handling
 	 * code should prevent it.
 	 *
 	 * We can't wait forever for the vnode lock, we might
 	 * deadlock due to a vn_read() getting stuck in
 	 * vm_wait while holding this vnode.  We skip the 
 	 * vnode if we can't get it in a reasonable amount
 	 * of time.
 	 */
 	if (object->type == OBJT_VNODE) {
 		vm_page_unlock(m);
 		vp = object->handle;
 		if (vp->v_type == VREG &&
 		    vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 			mp = NULL;
 			error = EDEADLK;
 			goto unlock_all;
 		}
 		KASSERT(mp != NULL,
 		    ("vp %p with NULL v_mount", vp));
 		vm_object_reference_locked(object);
 		pindex = m->pindex;
 		VM_OBJECT_WUNLOCK(object);
 		lockmode = MNT_SHARED_WRITES(vp->v_mount) ?
 		    LK_SHARED : LK_EXCLUSIVE;
 		if (vget(vp, lockmode | LK_TIMELOCK, curthread)) {
 			vp = NULL;
 			error = EDEADLK;
 			goto unlock_mp;
 		}
 		VM_OBJECT_WLOCK(object);
 		vm_page_lock(m);
 		/*
 		 * While the object and page were unlocked, the page
 		 * may have been:
 		 * (1) moved to a different queue,
 		 * (2) reallocated to a different object,
 		 * (3) reallocated to a different offset, or
 		 * (4) cleaned.
 		 */
 		if (!vm_page_in_laundry(m) || m->object != object ||
 		    m->pindex != pindex || m->dirty == 0) {
 			vm_page_unlock(m);
 			error = ENXIO;
 			goto unlock_all;
 		}
 
 		/*
 		 * The page may have been busied or held while the object
 		 * and page locks were released.
 		 */
 		if (vm_page_busied(m) || m->hold_count != 0) {
 			vm_page_unlock(m);
 			error = EBUSY;
 			goto unlock_all;
 		}
 	}
 
 	/*
 	 * If a page is dirty, then it is either being washed
 	 * (but not yet cleaned) or it is still in the
 	 * laundry.  If it is still in the laundry, then we
 	 * start the cleaning operation. 
 	 */
 	if ((*numpagedout = vm_pageout_cluster(m)) == 0)
 		error = EIO;
 
 unlock_all:
 	VM_OBJECT_WUNLOCK(object);
 
 unlock_mp:
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	if (mp != NULL) {
 		if (vp != NULL)
 			vput(vp);
 		vm_object_deallocate(object);
 		vn_finished_write(mp);
 	}
 
 	return (error);
 }
 
 /*
  * Attempt to launder the specified number of pages.
  *
  * Returns the number of pages successfully laundered.
  */
 static int
 vm_pageout_launder(struct vm_domain *vmd, int launder)
 {
 	vm_page_t m, next;
 	struct vm_pagequeue *pq;
 	vm_object_t object;
 	int act_delta, error, maxscan, numpagedout, starting_target;
 	int vnodes_skipped;
 	boolean_t pageout_ok, queue_locked, shortfall;
 
 	starting_target = launder;
 	vnodes_skipped = 0;
 
 	/*
 	 * Scan the laundry queue for pages eligible to be laundered.  We stop
 	 * once the target number of dirty pages have been laundered, or once
 	 * we've reached the end of the queue.  A single iteration of this loop
 	 * may cause more than one page to be laundered because of clustering.
 	 *
 	 * maxscan ensures that we don't re-examine requeued pages.  Any
 	 * additional pages written as part of a cluster are subtracted from
 	 * maxscan since they must be taken from the laundry queue.
 	 */
 	pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
 	maxscan = pq->pq_cnt;
 	shortfall = vm_laundry_target() > 0;
 
 	vm_pagequeue_lock(pq);
 	queue_locked = TRUE;
 	for (m = TAILQ_FIRST(&pq->pq_pl);
 	    m != NULL && maxscan-- > 0 && launder > 0;
 	    m = next) {
 		vm_pagequeue_assert_locked(pq);
 		KASSERT(queue_locked, ("unlocked laundry queue"));
 		KASSERT(vm_page_in_laundry(m),
 		    ("page %p has an inconsistent queue", m));
 		next = TAILQ_NEXT(m, plinks.q);
 		if ((m->flags & PG_MARKER) != 0)
 			continue;
 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
 		    ("PG_FICTITIOUS page %p cannot be in laundry queue", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("VPO_UNMANAGED page %p cannot be in laundry queue", m));
 		if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
 			vm_page_unlock(m);
 			continue;
 		}
 		object = m->object;
 		if ((!VM_OBJECT_TRYWLOCK(object) &&
 		    (!vm_pageout_fallback_object_lock(m, &next) ||
 		    m->hold_count != 0)) || vm_page_busied(m)) {
 			VM_OBJECT_WUNLOCK(object);
 			vm_page_unlock(m);
 			continue;
 		}
 
 		/*
 		 * We unlock the laundry queue, invalidating the
 		 * 'next' pointer.  Use our marker to remember our
 		 * place.
 		 */
 		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker,
 		    plinks.q);
 		vm_pagequeue_unlock(pq);
 		queue_locked = FALSE;
 
 		/*
 		 * Invalid pages can be easily freed.  They cannot be
 		 * mapped; vm_page_free() asserts this.
 		 */
 		if (m->valid == 0)
 			goto free_page;
 
 		/*
 		 * If the page has been referenced and the object is not dead,
 		 * reactivate or requeue the page depending on whether the
 		 * object is mapped.
 		 */
 		if ((m->aflags & PGA_REFERENCED) != 0) {
 			vm_page_aflag_clear(m, PGA_REFERENCED);
 			act_delta = 1;
 		} else
 			act_delta = 0;
 		if (object->ref_count != 0)
 			act_delta += pmap_ts_referenced(m);
 		else {
 			KASSERT(!pmap_page_is_mapped(m),
 			    ("page %p is mapped", m));
 		}
 		if (act_delta != 0) {
 			if (object->ref_count != 0) {
 				vm_page_activate(m);
 
 				/*
 				 * Increase the activation count if the page
 				 * was referenced while in the laundry queue.
 				 * This makes it less likely that the page will
 				 * be returned prematurely to the inactive
 				 * queue.
  				 */
 				m->act_count += act_delta + ACT_ADVANCE;
 
 				/*
 				 * If this was a background laundering, count
 				 * activated pages towards our target.  The
 				 * purpose of background laundering is to ensure
 				 * that pages are eventually cycled through the
 				 * laundry queue, and an activation is a valid
 				 * way out.
 				 */
 				if (!shortfall)
 					launder--;
 				goto drop_page;
 			} else if ((object->flags & OBJ_DEAD) == 0)
 				goto requeue_page;
 		}
 
 		/*
 		 * If the page appears to be clean at the machine-independent
 		 * layer, then remove all of its mappings from the pmap in
 		 * anticipation of freeing it.  If, however, any of the page's
 		 * mappings allow write access, then the page may still be
 		 * modified until the last of those mappings are removed.
 		 */
 		if (object->ref_count != 0) {
 			vm_page_test_dirty(m);
 			if (m->dirty == 0)
 				pmap_remove_all(m);
 		}
 
 		/*
 		 * Clean pages are freed, and dirty pages are paged out unless
 		 * they belong to a dead object.  Requeueing dirty pages from
 		 * dead objects is pointless, as they are being paged out and
 		 * freed by the thread that destroyed the object.
 		 */
 		if (m->dirty == 0) {
 free_page:
 			vm_page_free(m);
 			PCPU_INC(cnt.v_dfree);
 		} else if ((object->flags & OBJ_DEAD) == 0) {
 			if (object->type != OBJT_SWAP &&
 			    object->type != OBJT_DEFAULT)
 				pageout_ok = TRUE;
 			else if (disable_swap_pageouts)
 				pageout_ok = FALSE;
 			else
 				pageout_ok = TRUE;
 			if (!pageout_ok) {
 requeue_page:
 				vm_pagequeue_lock(pq);
 				queue_locked = TRUE;
 				vm_page_requeue_locked(m);
 				goto drop_page;
 			}
 			error = vm_pageout_clean(m, &numpagedout);
 			if (error == 0) {
 				launder -= numpagedout;
 				maxscan -= numpagedout - 1;
 			} else if (error == EDEADLK) {
 				pageout_lock_miss++;
 				vnodes_skipped++;
 			}
 			goto relock_queue;
 		}
 drop_page:
 		vm_page_unlock(m);
 		VM_OBJECT_WUNLOCK(object);
 relock_queue:
 		if (!queue_locked) {
 			vm_pagequeue_lock(pq);
 			queue_locked = TRUE;
 		}
 		next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q);
 		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
 	}
 	vm_pagequeue_unlock(pq);
 
 	/*
 	 * Wakeup the sync daemon if we skipped a vnode in a writeable object
 	 * and we didn't launder enough pages.
 	 */
 	if (vnodes_skipped > 0 && launder > 0)
 		(void)speedup_syncer();
 
 	return (starting_target - launder);
 }
 
 /*
  * Perform the work of the laundry thread: periodically wake up and determine
  * whether any pages need to be laundered.  If so, determine the number of pages
  * that need to be laundered, and launder them.
  */
 static void
 vm_pageout_laundry_worker(void *arg)
 {
 	struct vm_domain *domain;
 	uint64_t ninact, nlaundry;
 	u_int wakeups, gen;
 	int cycle, domidx, launder;
 	int shortfall, prev_shortfall, target;
 
 	domidx = (uintptr_t)arg;
 	domain = &vm_dom[domidx];
 	KASSERT(domain->vmd_segs != 0, ("domain without segments"));
 	vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY);
 
 	cycle = 0;
 	gen = 0;
 	shortfall = prev_shortfall = 0;
 	target = 0;
 
 	/*
 	 * The pageout laundry worker is never done, so loop forever.
 	 */
 	for (;;) {
 		KASSERT(target >= 0, ("negative target %d", target));
 		launder = 0;
 
 		/*
 		 * First determine whether we need to launder pages to meet a
 		 * shortage of free pages.
 		 */
 		if (vm_laundering_needed()) {
 			shortfall = vm_laundry_target() + vm_pageout_deficit;
 			/*
 			 * If we're in shortfall and we haven't yet started a
 			 * laundering cycle to get us out of it, begin a run.
 			 * If we're still in shortfall despite a previous
 			 * laundering run, start a new one.
 			 */
 			if (prev_shortfall == 0 || cycle == 0) {
 				target = shortfall;
 				cycle = VM_LAUNDER_RATE;
 			}
 			prev_shortfall = shortfall;
 		}
 		if (prev_shortfall > 0) {
 			/*
 			 * We entered shortfall at some point in the recent
 			 * past.  If we have reached our target, or the
 			 * laundering run is finished and we're not currently in
 			 * shortfall, we have no immediate need to launder
 			 * pages.  Otherwise keep laundering.
 			 */
 			if (vm_laundry_target() <= 0 || cycle == 0) {
 				shortfall = prev_shortfall = target = 0;
 			} else {
 				launder = target / cycle;
 				goto dolaundry;
 			}
 		}
 
 		/*
 		 * There's no immediate need to launder any pages; see if we
 		 * meet the conditions to perform background laundering:
 		 *
 		 * 1. The ratio of dirty to clean inactive pages exceeds the
 		 *    background laundering threshold and the pagedaemon has
 		 *    recently been woken up, or
 		 * 2. we haven't yet reached the target of the current
 		 *    background laundering run.
 		 */
 		ninact = vm_cnt.v_inactive_count;
 		nlaundry = vm_cnt.v_laundry_count;
 		wakeups = VM_METER_PCPU_CNT(v_pdwakeups);
 		if (target == 0 && ninact > 0 && wakeups != gen &&
 		    nlaundry * bkgrd_launder_ratio >= ninact) {
 			gen = wakeups;
 			/*
 			 * The pagedaemon has woken up at least once since the
 			 * last background laundering run and we're above the
 			 * dirty page threshold.  Launder some pages to balance
 			 * the inactive and laundry queues.  We attempt to
 			 * finish within one second.
 			 */
 			cycle = VM_LAUNDER_INTERVAL;
 
 			/*
 			 * Set our target to that of the pagedaemon, scaled by
 			 * the relative lengths of the inactive and laundry
 			 * queues.  Divide by a fudge factor as well: we don't
 			 * want to reclaim dirty pages at the same rate as clean
 			 * pages.
 			 */
 			target = vm_cnt.v_free_target -
 			    vm_pageout_wakeup_thresh;
 			target = nlaundry * (u_int)target / ninact / 10;
 			if (target == 0)
 				target = 1;
 
 			/*
 			 * Make sure we don't exceed the background laundering
 			 * threshold.
 			 */
 			target = min(target, bkgrd_launder_max);
 		}
 		if (target > 0 && cycle != 0)
 			launder = target / cycle;
 
 dolaundry:
 		if (launder > 0)
 			target -= min(vm_pageout_launder(domain, launder),
 			    target);
 
 		tsleep(&vm_cnt.v_laundry_count, PVM, "laundr",
 		    hz / VM_LAUNDER_INTERVAL);
 		cycle--;
 	}
 }
 
 /*
  *	vm_pageout_scan does the dirty work for the pageout daemon.
  *
  *	pass 0 - Update active LRU/deactivate pages
  *	pass 1 - Free inactive pages
  */
 static void
 vm_pageout_scan(struct vm_domain *vmd, int pass)
 {
 	vm_page_t m, next;
 	struct vm_pagequeue *pq;
 	vm_object_t object;
 	long min_scan;
 	int act_delta, addl_page_shortage, deficit, maxscan;
 	int page_shortage, scan_tick, scanned, starting_page_shortage;
 	boolean_t queue_locked;
 
 	/*
 	 * If we need to reclaim memory ask kernel caches to return
 	 * some.  We rate limit to avoid thrashing.
 	 */
 	if (vmd == &vm_dom[0] && pass > 0 &&
 	    (time_uptime - lowmem_uptime) >= lowmem_period) {
 		/*
 		 * Decrease registered cache sizes.
 		 */
 		SDT_PROBE0(vm, , , vm__lowmem_scan);
 		EVENTHANDLER_INVOKE(vm_lowmem, 0);
 		/*
 		 * We do this explicitly after the caches have been
 		 * drained above.
 		 */
 		uma_reclaim();
 		lowmem_uptime = time_uptime;
 	}
 
 	/*
 	 * The addl_page_shortage is the number of temporarily
 	 * stuck pages in the inactive queue.  In other words, the
 	 * number of pages from the inactive count that should be
 	 * discounted in setting the target for the active queue scan.
 	 */
 	addl_page_shortage = 0;
 
 	/*
 	 * Calculate the number of pages that we want to free.
 	 */
 	if (pass > 0) {
 		deficit = atomic_readandclear_int(&vm_pageout_deficit);
 		page_shortage = vm_paging_target() + deficit;
 	} else
 		page_shortage = deficit = 0;
 	starting_page_shortage = page_shortage;
 
 	/*
 	 * Start scanning the inactive queue for pages that we can free.  The
 	 * scan will stop when we reach the target or we have scanned the
 	 * entire queue.  (Note that m->act_count is not used to make
 	 * decisions for the inactive queue, only for the active queue.)
 	 */
 	pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
 	maxscan = pq->pq_cnt;
 	vm_pagequeue_lock(pq);
 	queue_locked = TRUE;
 	for (m = TAILQ_FIRST(&pq->pq_pl);
 	     m != NULL && maxscan-- > 0 && page_shortage > 0;
 	     m = next) {
 		vm_pagequeue_assert_locked(pq);
 		KASSERT(queue_locked, ("unlocked inactive queue"));
 		KASSERT(vm_page_inactive(m), ("Inactive queue %p", m));
 
 		PCPU_INC(cnt.v_pdpages);
 		next = TAILQ_NEXT(m, plinks.q);
 
 		/*
 		 * skip marker pages
 		 */
 		if (m->flags & PG_MARKER)
 			continue;
 
 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
 		    ("Fictitious page %p cannot be in inactive queue", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("Unmanaged page %p cannot be in inactive queue", m));
 
 		/*
 		 * The page or object lock acquisitions fail if the
 		 * page was removed from the queue or moved to a
 		 * different position within the queue.  In either
 		 * case, addl_page_shortage should not be incremented.
 		 */
 		if (!vm_pageout_page_lock(m, &next))
 			goto unlock_page;
 		else if (m->hold_count != 0) {
 			/*
 			 * Held pages are essentially stuck in the
 			 * queue.  So, they ought to be discounted
 			 * from the inactive count.  See the
 			 * calculation of the page_shortage for the
 			 * loop over the active queue below.
 			 */
 			addl_page_shortage++;
 			goto unlock_page;
 		}
 		object = m->object;
 		if (!VM_OBJECT_TRYWLOCK(object)) {
 			if (!vm_pageout_fallback_object_lock(m, &next))
 				goto unlock_object;
 			else if (m->hold_count != 0) {
 				addl_page_shortage++;
 				goto unlock_object;
 			}
 		}
 		if (vm_page_busied(m)) {
 			/*
 			 * Don't mess with busy pages.  Leave them at
 			 * the front of the queue.  Most likely, they
 			 * are being paged out and will leave the
 			 * queue shortly after the scan finishes.  So,
 			 * they ought to be discounted from the
 			 * inactive count.
 			 */
 			addl_page_shortage++;
 unlock_object:
 			VM_OBJECT_WUNLOCK(object);
 unlock_page:
 			vm_page_unlock(m);
 			continue;
 		}
 		KASSERT(m->hold_count == 0, ("Held page %p", m));
 
 		/*
 		 * We unlock the inactive page queue, invalidating the
 		 * 'next' pointer.  Use our marker to remember our
 		 * place.
 		 */
 		TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
 		vm_page_dequeue_locked(m);
 		vm_pagequeue_unlock(pq);
 		queue_locked = FALSE;
 
 		/*
 		 * Invalid pages can be easily freed. They cannot be
 		 * mapped, vm_page_free() asserts this.
 		 */
 		if (m->valid == 0)
 			goto free_page;
 
 		/*
 		 * If the page has been referenced and the object is not dead,
 		 * reactivate or requeue the page depending on whether the
 		 * object is mapped.
 		 */
 		if ((m->aflags & PGA_REFERENCED) != 0) {
 			vm_page_aflag_clear(m, PGA_REFERENCED);
 			act_delta = 1;
 		} else
 			act_delta = 0;
 		if (object->ref_count != 0) {
 			act_delta += pmap_ts_referenced(m);
 		} else {
 			KASSERT(!pmap_page_is_mapped(m),
 			    ("vm_pageout_scan: page %p is mapped", m));
 		}
 		if (act_delta != 0) {
 			if (object->ref_count != 0) {
 				vm_page_activate(m);
 
 				/*
 				 * Increase the activation count if the page
 				 * was referenced while in the inactive queue.
 				 * This makes it less likely that the page will
 				 * be returned prematurely to the inactive
 				 * queue.
  				 */
 				m->act_count += act_delta + ACT_ADVANCE;
 				goto drop_page;
 			} else if ((object->flags & OBJ_DEAD) == 0) {
 				vm_pagequeue_lock(pq);
 				queue_locked = TRUE;
 				m->queue = PQ_INACTIVE;
 				TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 				vm_pagequeue_cnt_inc(pq);
 				goto drop_page;
 			}
 		}
 
 		/*
 		 * If the page appears to be clean at the machine-independent
 		 * layer, then remove all of its mappings from the pmap in
 		 * anticipation of freeing it.  If, however, any of the page's
 		 * mappings allow write access, then the page may still be
 		 * modified until the last of those mappings are removed.
 		 */
 		if (object->ref_count != 0) {
 			vm_page_test_dirty(m);
 			if (m->dirty == 0)
 				pmap_remove_all(m);
 		}
 
 		/*
 		 * Clean pages can be freed, but dirty pages must be sent back
 		 * to the laundry, unless they belong to a dead object.
 		 * Requeueing dirty pages from dead objects is pointless, as
 		 * they are being paged out and freed by the thread that
 		 * destroyed the object.
 		 */
 		if (m->dirty == 0) {
 free_page:
 			vm_page_free(m);
 			PCPU_INC(cnt.v_dfree);
 			--page_shortage;
 		} else if ((object->flags & OBJ_DEAD) == 0)
 			vm_page_launder(m);
 drop_page:
 		vm_page_unlock(m);
 		VM_OBJECT_WUNLOCK(object);
 		if (!queue_locked) {
 			vm_pagequeue_lock(pq);
 			queue_locked = TRUE;
 		}
 		next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);
 		TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
 	}
 	vm_pagequeue_unlock(pq);
 
 	/*
 	 * Wakeup the laundry thread(s) if we didn't free the targeted number
 	 * of pages.
 	 */
 	if (page_shortage > 0)
 		wakeup(&vm_cnt.v_laundry_count);
 
 #if !defined(NO_SWAPPING)
 	/*
 	 * Wakeup the swapout daemon if we didn't free the targeted number of
 	 * pages.
 	 */
 	if (vm_swap_enabled && page_shortage > 0)
 		vm_req_vmdaemon(VM_SWAP_NORMAL);
 #endif
 
 	/*
 	 * If the inactive queue scan fails repeatedly to meet its
 	 * target, kill the largest process.
 	 */
 	vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage);
 
 	/*
 	 * Compute the number of pages we want to try to move from the
 	 * active queue to either the inactive or laundry queue.
 	 *
 	 * When scanning active pages, we make clean pages count more heavily
 	 * towards the page shortage than dirty pages.  This is because dirty
 	 * pages must be laundered before they can be reused and thus have less
 	 * utility when attempting to quickly alleviate a shortage.  However,
 	 * this weighting also causes the scan to deactivate dirty pages more
 	 * more aggressively, improving the effectiveness of clustering and
 	 * ensuring that they can eventually be reused.
 	 */
 	page_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count +
 	    vm_cnt.v_laundry_count / act_scan_laundry_weight) +
 	    vm_paging_target() + deficit + addl_page_shortage;
 	page_shortage *= act_scan_laundry_weight;
 
 	pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
 	vm_pagequeue_lock(pq);
 	maxscan = pq->pq_cnt;
 
 	/*
 	 * If we're just idle polling attempt to visit every
 	 * active page within 'update_period' seconds.
 	 */
 	scan_tick = ticks;
 	if (vm_pageout_update_period != 0) {
 		min_scan = pq->pq_cnt;
 		min_scan *= scan_tick - vmd->vmd_last_active_scan;
 		min_scan /= hz * vm_pageout_update_period;
 	} else
 		min_scan = 0;
 	if (min_scan > 0 || (page_shortage > 0 && maxscan > 0))
 		vmd->vmd_last_active_scan = scan_tick;
 
 	/*
 	 * Scan the active queue for pages that can be deactivated.  Update
 	 * the per-page activity counter and use it to identify deactivation
 	 * candidates.
 	 */
 	for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
 	    min_scan || (page_shortage > 0 && scanned < maxscan)); m = next,
 	    scanned++) {
 
 		KASSERT(m->queue == PQ_ACTIVE,
 		    ("vm_pageout_scan: page %p isn't active", m));
 
 		next = TAILQ_NEXT(m, plinks.q);
 		if ((m->flags & PG_MARKER) != 0)
 			continue;
 		KASSERT((m->flags & PG_FICTITIOUS) == 0,
 		    ("Fictitious page %p cannot be in active queue", m));
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("Unmanaged page %p cannot be in active queue", m));
 		if (!vm_pageout_page_lock(m, &next)) {
 			vm_page_unlock(m);
 			continue;
 		}
 
 		/*
 		 * The count for pagedaemon pages is done after checking the
 		 * page for eligibility...
 		 */
 		PCPU_INC(cnt.v_pdpages);
 
 		/*
 		 * Check to see "how much" the page has been used.
 		 */
 		if ((m->aflags & PGA_REFERENCED) != 0) {
 			vm_page_aflag_clear(m, PGA_REFERENCED);
 			act_delta = 1;
 		} else
 			act_delta = 0;
 
 		/*
 		 * Unlocked object ref count check.  Two races are possible.
 		 * 1) The ref was transitioning to zero and we saw non-zero,
 		 *    the pmap bits will be checked unnecessarily.
 		 * 2) The ref was transitioning to one and we saw zero. 
 		 *    The page lock prevents a new reference to this page so
 		 *    we need not check the reference bits.
 		 */
 		if (m->object->ref_count != 0)
 			act_delta += pmap_ts_referenced(m);
 
 		/*
 		 * Advance or decay the act_count based on recent usage.
 		 */
 		if (act_delta != 0) {
 			m->act_count += ACT_ADVANCE + act_delta;
 			if (m->act_count > ACT_MAX)
 				m->act_count = ACT_MAX;
 		} else
 			m->act_count -= min(m->act_count, ACT_DECLINE);
 
 		/*
 		 * Move this page to the tail of the active, inactive or laundry
 		 * queue depending on usage.
 		 */
 		if (m->act_count == 0) {
 			/* Dequeue to avoid later lock recursion. */
 			vm_page_dequeue_locked(m);
 #if 0
 			/*
 			 * This requires the object write lock.  It might be a
 			 * good idea during a page shortage, but might also
 			 * cause contention with a concurrent attempt to launder
 			 * pages from this object.
 			 */
 			if (m->object->ref_count != 0)
 				vm_page_test_dirty(m);
 #endif
 			/*
 			 * When not short for inactive pages, let dirty pages go
 			 * through the inactive queue before moving to the
 			 * laundry queues.  This gives them some extra time to
 			 * be reactivated, potentially avoiding an expensive
 			 * pageout.  During a page shortage, the inactive queue
 			 * is necessarily small, so we may move dirty pages
 			 * directly to the laundry queue.
 			 */
 			if (page_shortage <= 0)
 				vm_page_deactivate(m);
 			else {
 				if (m->dirty == 0) {
 					vm_page_deactivate(m);
 					page_shortage -=
 					    act_scan_laundry_weight;
 				} else {
 					vm_page_launder(m);
 					page_shortage--;
 				}
 			}
 		} else
 			vm_page_requeue_locked(m);
 		vm_page_unlock(m);
 	}
 	vm_pagequeue_unlock(pq);
 #if !defined(NO_SWAPPING)
 	/*
-	 * Idle process swapout -- run once per second.
+	 * Idle process swapout -- run once per second when we are reclaiming
+	 * pages.
 	 */
-	if (vm_swap_idle_enabled) {
+	if (vm_swap_idle_enabled && pass > 0) {
 		static long lsec;
 		if (time_second != lsec) {
 			vm_req_vmdaemon(VM_SWAP_IDLE);
 			lsec = time_second;
 		}
 	}
 #endif
 }
 
 static int vm_pageout_oom_vote;
 
 /*
  * The pagedaemon threads randlomly select one to perform the
  * OOM.  Trying to kill processes before all pagedaemons
  * failed to reach free target is premature.
  */
 static void
 vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
     int starting_page_shortage)
 {
 	int old_vote;
 
 	if (starting_page_shortage <= 0 || starting_page_shortage !=
 	    page_shortage)
 		vmd->vmd_oom_seq = 0;
 	else
 		vmd->vmd_oom_seq++;
 	if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
 		if (vmd->vmd_oom) {
 			vmd->vmd_oom = FALSE;
 			atomic_subtract_int(&vm_pageout_oom_vote, 1);
 		}
 		return;
 	}
 
 	/*
 	 * Do not follow the call sequence until OOM condition is
 	 * cleared.
 	 */
 	vmd->vmd_oom_seq = 0;
 
 	if (vmd->vmd_oom)
 		return;
 
 	vmd->vmd_oom = TRUE;
 	old_vote = atomic_fetchadd_int(&vm_pageout_oom_vote, 1);
 	if (old_vote != vm_ndomains - 1)
 		return;
 
 	/*
 	 * The current pagedaemon thread is the last in the quorum to
 	 * start OOM.  Initiate the selection and signaling of the
 	 * victim.
 	 */
 	vm_pageout_oom(VM_OOM_MEM);
 
 	/*
 	 * After one round of OOM terror, recall our vote.  On the
 	 * next pass, current pagedaemon would vote again if the low
 	 * memory condition is still there, due to vmd_oom being
 	 * false.
 	 */
 	vmd->vmd_oom = FALSE;
 	atomic_subtract_int(&vm_pageout_oom_vote, 1);
 }
 
 /*
  * The OOM killer is the page daemon's action of last resort when
  * memory allocation requests have been stalled for a prolonged period
  * of time because it cannot reclaim memory.  This function computes
  * the approximate number of physical pages that could be reclaimed if
  * the specified address space is destroyed.
  *
  * Private, anonymous memory owned by the address space is the
  * principal resource that we expect to recover after an OOM kill.
  * Since the physical pages mapped by the address space's COW entries
  * are typically shared pages, they are unlikely to be released and so
  * they are not counted.
  *
  * To get to the point where the page daemon runs the OOM killer, its
  * efforts to write-back vnode-backed pages may have stalled.  This
  * could be caused by a memory allocation deadlock in the write path
  * that might be resolved by an OOM kill.  Therefore, physical pages
  * belonging to vnode-backed objects are counted, because they might
  * be freed without being written out first if the address space holds
  * the last reference to an unlinked vnode.
  *
  * Similarly, physical pages belonging to OBJT_PHYS objects are
  * counted because the address space might hold the last reference to
  * the object.
  */
 static long
 vm_pageout_oom_pagecount(struct vmspace *vmspace)
 {
 	vm_map_t map;
 	vm_map_entry_t entry;
 	vm_object_t obj;
 	long res;
 
 	map = &vmspace->vm_map;
 	KASSERT(!map->system_map, ("system map"));
 	sx_assert(&map->lock, SA_LOCKED);
 	res = 0;
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
 			continue;
 		obj = entry->object.vm_object;
 		if (obj == NULL)
 			continue;
 		if ((entry->eflags & MAP_ENTRY_NEEDS_COPY) != 0 &&
 		    obj->ref_count != 1)
 			continue;
 		switch (obj->type) {
 		case OBJT_DEFAULT:
 		case OBJT_SWAP:
 		case OBJT_PHYS:
 		case OBJT_VNODE:
 			res += obj->resident_page_count;
 			break;
 		}
 	}
 	return (res);
 }
 
 void
 vm_pageout_oom(int shortage)
 {
 	struct proc *p, *bigproc;
 	vm_offset_t size, bigsize;
 	struct thread *td;
 	struct vmspace *vm;
 
 	/*
 	 * We keep the process bigproc locked once we find it to keep anyone
 	 * from messing with it; however, there is a possibility of
 	 * deadlock if process B is bigproc and one of it's child processes
 	 * attempts to propagate a signal to B while we are waiting for A's
 	 * lock while walking this list.  To avoid this, we don't block on
 	 * the process lock but just skip a process if it is already locked.
 	 */
 	bigproc = NULL;
 	bigsize = 0;
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		int breakout;
 
 		PROC_LOCK(p);
 
 		/*
 		 * If this is a system, protected or killed process, skip it.
 		 */
 		if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC |
 		    P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 ||
 		    p->p_pid == 1 || P_KILLED(p) ||
 		    (p->p_pid < 48 && swap_pager_avail != 0)) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		/*
 		 * If the process is in a non-running type state,
 		 * don't touch it.  Check all the threads individually.
 		 */
 		breakout = 0;
 		FOREACH_THREAD_IN_PROC(p, td) {
 			thread_lock(td);
 			if (!TD_ON_RUNQ(td) &&
 			    !TD_IS_RUNNING(td) &&
 			    !TD_IS_SLEEPING(td) &&
 			    !TD_IS_SUSPENDED(td) &&
 			    !TD_IS_SWAPPED(td)) {
 				thread_unlock(td);
 				breakout = 1;
 				break;
 			}
 			thread_unlock(td);
 		}
 		if (breakout) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		/*
 		 * get the process size
 		 */
 		vm = vmspace_acquire_ref(p);
 		if (vm == NULL) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		_PHOLD_LITE(p);
 		PROC_UNLOCK(p);
 		sx_sunlock(&allproc_lock);
 		if (!vm_map_trylock_read(&vm->vm_map)) {
 			vmspace_free(vm);
 			sx_slock(&allproc_lock);
 			PRELE(p);
 			continue;
 		}
 		size = vmspace_swap_count(vm);
 		if (shortage == VM_OOM_MEM)
 			size += vm_pageout_oom_pagecount(vm);
 		vm_map_unlock_read(&vm->vm_map);
 		vmspace_free(vm);
 		sx_slock(&allproc_lock);
 
 		/*
 		 * If this process is bigger than the biggest one,
 		 * remember it.
 		 */
 		if (size > bigsize) {
 			if (bigproc != NULL)
 				PRELE(bigproc);
 			bigproc = p;
 			bigsize = size;
 		} else {
 			PRELE(p);
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	if (bigproc != NULL) {
 		if (vm_panic_on_oom != 0)
 			panic("out of swap space");
 		PROC_LOCK(bigproc);
 		killproc(bigproc, "out of swap space");
 		sched_nice(bigproc, PRIO_MIN);
 		_PRELE(bigproc);
 		PROC_UNLOCK(bigproc);
 		wakeup(&vm_cnt.v_free_count);
 	}
 }
 
 static void
 vm_pageout_worker(void *arg)
 {
 	struct vm_domain *domain;
 	int domidx;
 
 	domidx = (uintptr_t)arg;
 	domain = &vm_dom[domidx];
 
 	/*
 	 * XXXKIB It could be useful to bind pageout daemon threads to
 	 * the cores belonging to the domain, from which vm_page_array
 	 * is allocated.
 	 */
 
 	KASSERT(domain->vmd_segs != 0, ("domain without segments"));
 	domain->vmd_last_active_scan = ticks;
 	vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE);
 	vm_pageout_init_marker(&domain->vmd_inacthead, PQ_INACTIVE);
 	TAILQ_INSERT_HEAD(&domain->vmd_pagequeues[PQ_INACTIVE].pq_pl,
 	    &domain->vmd_inacthead, plinks.q);
 
 	/*
 	 * The pageout daemon worker is never done, so loop forever.
 	 */
 	while (TRUE) {
 		mtx_lock(&vm_page_queue_free_mtx);
 
 		/*
 		 * Generally, after a level >= 1 scan, if there are enough
 		 * free pages to wakeup the waiters, then they are already
 		 * awake.  A call to vm_page_free() during the scan awakened
 		 * them.  However, in the following case, this wakeup serves
 		 * to bound the amount of time that a thread might wait.
 		 * Suppose a thread's call to vm_page_alloc() fails, but
 		 * before that thread calls VM_WAIT, enough pages are freed by
 		 * other threads to alleviate the free page shortage.  The
 		 * thread will, nonetheless, wait until another page is freed
 		 * or this wakeup is performed.
 		 */
 		if (vm_pages_needed && !vm_page_count_min()) {
 			vm_pages_needed = false;
 			wakeup(&vm_cnt.v_free_count);
 		}
 
 		/*
 		 * Do not clear vm_pageout_wanted until we reach our target.
 		 * Otherwise, we may be awakened over and over again, wasting
 		 * CPU time.
 		 */
 		if (vm_pageout_wanted && !vm_paging_needed())
 			vm_pageout_wanted = false;
 
 		/*
 		 * Might the page daemon receive a wakeup call?
 		 */
 		if (vm_pageout_wanted) {
 			/*
 			 * No.  Either vm_pageout_wanted was set by another
 			 * thread during the previous scan, which must have
 			 * been a level 0 scan, or vm_pageout_wanted was
 			 * already set and the scan failed to free enough
 			 * pages.  If we haven't yet performed a level >= 2
 			 * scan (unlimited dirty cleaning), then upgrade the
 			 * level and scan again now.  Otherwise, sleep a bit
 			 * and try again later.
 			 */
 			mtx_unlock(&vm_page_queue_free_mtx);
 			if (domain->vmd_pass > 1)
 				pause("psleep", hz / 2);
 			domain->vmd_pass++;
 		} else {
 			/*
 			 * Yes.  Sleep until pages need to be reclaimed or
 			 * have their reference stats updated.
 			 */
 			if (mtx_sleep(&vm_pageout_wanted,
 			    &vm_page_queue_free_mtx, PDROP | PVM, "psleep",
 			    hz) == 0) {
 				PCPU_INC(cnt.v_pdwakeups);
 				domain->vmd_pass = 1;
 			} else
 				domain->vmd_pass = 0;
 		}
 
 		vm_pageout_scan(domain, domain->vmd_pass);
 	}
 }
 
 /*
  *	vm_pageout_init initialises basic pageout daemon settings.
  */
 static void
 vm_pageout_init(void)
 {
 	/*
 	 * Initialize some paging parameters.
 	 */
 	vm_cnt.v_interrupt_free_min = 2;
 	if (vm_cnt.v_page_count < 2000)
 		vm_pageout_page_count = 8;
 
 	/*
 	 * v_free_reserved needs to include enough for the largest
 	 * swap pager structures plus enough for any pv_entry structs
 	 * when paging. 
 	 */
 	if (vm_cnt.v_page_count > 1024)
 		vm_cnt.v_free_min = 4 + (vm_cnt.v_page_count - 1024) / 200;
 	else
 		vm_cnt.v_free_min = 4;
 	vm_cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
 	    vm_cnt.v_interrupt_free_min;
 	vm_cnt.v_free_reserved = vm_pageout_page_count +
 	    vm_cnt.v_pageout_free_min + (vm_cnt.v_page_count / 768);
 	vm_cnt.v_free_severe = vm_cnt.v_free_min / 2;
 	vm_cnt.v_free_target = 4 * vm_cnt.v_free_min + vm_cnt.v_free_reserved;
 	vm_cnt.v_free_min += vm_cnt.v_free_reserved;
 	vm_cnt.v_free_severe += vm_cnt.v_free_reserved;
 	vm_cnt.v_inactive_target = (3 * vm_cnt.v_free_target) / 2;
 	if (vm_cnt.v_inactive_target > vm_cnt.v_free_count / 3)
 		vm_cnt.v_inactive_target = vm_cnt.v_free_count / 3;
 
 	/*
 	 * Set the default wakeup threshold to be 10% above the minimum
 	 * page limit.  This keeps the steady state out of shortfall.
 	 */
 	vm_pageout_wakeup_thresh = (vm_cnt.v_free_min / 10) * 11;
 
 	/*
 	 * Set interval in seconds for active scan.  We want to visit each
 	 * page at least once every ten minutes.  This is to prevent worst
 	 * case paging behaviors with stale active LRU.
 	 */
 	if (vm_pageout_update_period == 0)
 		vm_pageout_update_period = 600;
 
 	/* XXX does not really belong here */
 	if (vm_page_max_wired == 0)
 		vm_page_max_wired = vm_cnt.v_free_count / 3;
 }
 
 /*
  *     vm_pageout is the high level pageout daemon.
  */
 static void
 vm_pageout(void)
 {
 	int error;
 #ifdef VM_NUMA_ALLOC
 	int i;
 #endif
 
 	swap_pager_swap_init();
 	error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
 	    0, 0, "laundry: dom0");
 	if (error != 0)
 		panic("starting laundry for domain 0, error %d", error);
 #ifdef VM_NUMA_ALLOC
 	for (i = 1; i < vm_ndomains; i++) {
 		error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
 		    curproc, NULL, 0, 0, "dom%d", i);
 		if (error != 0) {
 			panic("starting pageout for domain %d, error %d\n",
 			    i, error);
 		}
 	}
 #endif
 	error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
 	    0, 0, "uma");
 	if (error != 0)
 		panic("starting uma_reclaim helper, error %d\n", error);
 	vm_pageout_worker((void *)(uintptr_t)0);
 }
 
 /*
  * Unless the free page queue lock is held by the caller, this function
  * should be regarded as advisory.  Specifically, the caller should
  * not msleep() on &vm_cnt.v_free_count following this function unless
  * the free page queue lock is held until the msleep() is performed.
  */
 void
 pagedaemon_wakeup(void)
 {
 
 	if (!vm_pageout_wanted && curthread->td_proc != pageproc) {
 		vm_pageout_wanted = true;
 		wakeup(&vm_pageout_wanted);
 	}
 }
 
 #if !defined(NO_SWAPPING)
 static void
 vm_req_vmdaemon(int req)
 {
 	static int lastrun = 0;
 
 	mtx_lock(&vm_daemon_mtx);
 	vm_pageout_req_swapout |= req;
 	if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
 		wakeup(&vm_daemon_needed);
 		lastrun = ticks;
 	}
 	mtx_unlock(&vm_daemon_mtx);
 }
 
 static void
 vm_daemon(void)
 {
 	struct rlimit rsslim;
 	struct proc *p;
 	struct thread *td;
 	struct vmspace *vm;
 	int breakout, swapout_flags, tryagain, attempts;
 #ifdef RACCT
 	uint64_t rsize, ravailable;
 #endif
 
 	while (TRUE) {
 		mtx_lock(&vm_daemon_mtx);
 		msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep",
 #ifdef RACCT
 		    racct_enable ? hz : 0
 #else
 		    0
 #endif
 		);
 		swapout_flags = vm_pageout_req_swapout;
 		vm_pageout_req_swapout = 0;
 		mtx_unlock(&vm_daemon_mtx);
 		if (swapout_flags)
 			swapout_procs(swapout_flags);
 
 		/*
 		 * scan the processes for exceeding their rlimits or if
 		 * process is swapped out -- deactivate pages
 		 */
 		tryagain = 0;
 		attempts = 0;
 again:
 		attempts++;
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			vm_pindex_t limit, size;
 
 			/*
 			 * if this is a system process or if we have already
 			 * looked at this process, skip it.
 			 */
 			PROC_LOCK(p);
 			if (p->p_state != PRS_NORMAL ||
 			    p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			/*
 			 * if the process is in a non-running type state,
 			 * don't touch it.
 			 */
 			breakout = 0;
 			FOREACH_THREAD_IN_PROC(p, td) {
 				thread_lock(td);
 				if (!TD_ON_RUNQ(td) &&
 				    !TD_IS_RUNNING(td) &&
 				    !TD_IS_SLEEPING(td) &&
 				    !TD_IS_SUSPENDED(td)) {
 					thread_unlock(td);
 					breakout = 1;
 					break;
 				}
 				thread_unlock(td);
 			}
 			if (breakout) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			/*
 			 * get a limit
 			 */
 			lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
 			limit = OFF_TO_IDX(
 			    qmin(rsslim.rlim_cur, rsslim.rlim_max));
 
 			/*
 			 * let processes that are swapped out really be
 			 * swapped out set the limit to nothing (will force a
 			 * swap-out.)
 			 */
 			if ((p->p_flag & P_INMEM) == 0)
 				limit = 0;	/* XXX */
 			vm = vmspace_acquire_ref(p);
 			_PHOLD_LITE(p);
 			PROC_UNLOCK(p);
 			if (vm == NULL) {
 				PRELE(p);
 				continue;
 			}
 			sx_sunlock(&allproc_lock);
 
 			size = vmspace_resident_count(vm);
 			if (size >= limit) {
 				vm_pageout_map_deactivate_pages(
 				    &vm->vm_map, limit);
 			}
 #ifdef RACCT
 			if (racct_enable) {
 				rsize = IDX_TO_OFF(size);
 				PROC_LOCK(p);
 				racct_set(p, RACCT_RSS, rsize);
 				ravailable = racct_get_available(p, RACCT_RSS);
 				PROC_UNLOCK(p);
 				if (rsize > ravailable) {
 					/*
 					 * Don't be overly aggressive; this
 					 * might be an innocent process,
 					 * and the limit could've been exceeded
 					 * by some memory hog.  Don't try
 					 * to deactivate more than 1/4th
 					 * of process' resident set size.
 					 */
 					if (attempts <= 8) {
 						if (ravailable < rsize -
 						    (rsize / 4)) {
 							ravailable = rsize -
 							    (rsize / 4);
 						}
 					}
 					vm_pageout_map_deactivate_pages(
 					    &vm->vm_map,
 					    OFF_TO_IDX(ravailable));
 					/* Update RSS usage after paging out. */
 					size = vmspace_resident_count(vm);
 					rsize = IDX_TO_OFF(size);
 					PROC_LOCK(p);
 					racct_set(p, RACCT_RSS, rsize);
 					PROC_UNLOCK(p);
 					if (rsize > ravailable)
 						tryagain = 1;
 				}
 			}
 #endif
 			vmspace_free(vm);
 			sx_slock(&allproc_lock);
 			PRELE(p);
 		}
 		sx_sunlock(&allproc_lock);
 		if (tryagain != 0 && attempts <= 10)
 			goto again;
 	}
 }
 #endif			/* !defined(NO_SWAPPING) */
Index: user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_size_mismatch.t
===================================================================
--- user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_size_mismatch.t	(revision 303641)
+++ user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_size_mismatch.t	(revision 303642)
@@ -1,436 +1,434 @@
 #!/bin/sh
 # $FreeBSD$
 
 dir=`dirname $0`
 . ${dir}/../../misc.sh
 
-[ "${os}" = "FreeBSD" ] && die "panics FreeBSD; see bug # 194586"
-
 echo "1..100"
 
 disks_create 7
 disks_create 1 64M
 files_create 7
 files_create 1 64M
 names_create 1
 
 expect_ok ${ZPOOL} create ${name0} mirror ${disk0} ${disk1}
 expect_fl ${ZPOOL} add ${name0} mirror ${disk7} ${disk2}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${disk0} ${disk1}
 expect_ok ${ZPOOL} add -f ${name0} mirror ${disk7} ${disk2}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk7}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1}
 expect_fl ${ZPOOL} add ${name0} mirror ${file7} ${file2}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1}
 expect_ok ${ZPOOL} add -f ${name0} mirror ${file7} ${file2}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file7}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz1 ${disk0} ${disk1} ${disk2}
 expect_fl ${ZPOOL} add ${name0} raidz1 ${disk3} ${disk7} ${disk4}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz1 ${disk0} ${disk1} ${disk2}
 expect_ok ${ZPOOL} add -f ${name0} raidz1 ${disk3} ${disk7} ${disk4}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${disk3}  ONLINE     0     0     0"
   echo "	    ${disk7}  ONLINE     0     0     0"
   echo "	    ${disk4}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz1 ${file0} ${file1} ${file2}
 expect_fl ${ZPOOL} add ${name0} raidz1 ${file3} ${file7} ${file4}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz1 ${file0} ${file1} ${file2}
 expect_ok ${ZPOOL} add -f ${name0} raidz1 ${file3} ${file7} ${file4}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${file3}  ONLINE     0     0     0"
   echo "	    ${file7}  ONLINE     0     0     0"
   echo "	    ${file4}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz2 ${disk0} ${disk1} ${disk2} ${disk3}
 expect_fl ${ZPOOL} add ${name0} raidz2 ${disk4} ${disk5} ${disk6} ${disk7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "	    ${disk3}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz2 ${disk0} ${disk1} ${disk2} ${disk3}
 expect_ok ${ZPOOL} add -f ${name0} raidz2 ${disk4} ${disk5} ${disk6} ${disk7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "	    ${disk3}  ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${disk4}  ONLINE     0     0     0"
   echo "	    ${disk5}  ONLINE     0     0     0"
   echo "	    ${disk6}  ONLINE     0     0     0"
   echo "	    ${disk7}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz2 ${file0} ${file1} ${file2} ${file3}
 expect_fl ${ZPOOL} add ${name0} raidz2 ${file4} ${file5} ${file6} ${file7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "	    ${file3}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz2 ${file0} ${file1} ${file2} ${file3}
 expect_ok ${ZPOOL} add -f ${name0} raidz2 ${file4} ${file5} ${file6} ${file7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "	    ${file3}  ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${file4}  ONLINE     0     0     0"
   echo "	    ${file5}  ONLINE     0     0     0"
   echo "	    ${file6}  ONLINE     0     0     0"
   echo "	    ${file7}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${disk0}
 add_msg="# TODO Sun CR 6726091, Lustre bug 16873"
 expect_fl ${ZPOOL} add ${name0} log mirror ${disk1} ${disk7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME        STATE   READ WRITE CKSUM"
   echo "	${name0}    ONLINE     0     0     0"
   echo "	  ${disk0}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 add_msg=""
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${disk0}
 expect_ok ${ZPOOL} add -f ${name0} log mirror ${disk1} ${disk7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  ${disk0}    ONLINE     0     0     0"
   echo "	logs          ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk7}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${file0}
 add_msg="# TODO Sun CR 6726091, Lustre bug 16873"
 expect_fl ${ZPOOL} add ${name0} log mirror ${file1} ${file7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME        STATE   READ WRITE CKSUM"
   echo "	${name0}    ONLINE     0     0     0"
   echo "	  ${file0}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 add_msg=""
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${file0}
 expect_ok ${ZPOOL} add -f ${name0} log mirror ${file1} ${file7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  ${file0}    ONLINE     0     0     0"
   echo "	logs          ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file7}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${disk0} log mirror ${disk1} ${disk2}
 add_msg="# TODO Sun CR 6726091, Lustre bug 16873"
 expect_fl ${ZPOOL} add ${name0} log mirror ${disk3} ${disk7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  ${disk0}    ONLINE     0     0     0"
   echo "	logs          ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 add_msg=""
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${disk0} log mirror ${disk1} ${disk2}
 expect_ok ${ZPOOL} add -f ${name0} log mirror ${disk3} ${disk7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  ${disk0}    ONLINE     0     0     0"
   echo "	logs          ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk3}  ONLINE     0     0     0"
   echo "	    ${disk7}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${file0} log mirror ${file1} ${file2}
 add_msg="# TODO Sun CR 6726091, Lustre bug 16873"
 expect_fl ${ZPOOL} add ${name0} log mirror ${file3} ${file7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  ${file0}    ONLINE     0     0     0"
   echo "	logs          ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 add_msg=""
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${file0} log mirror ${file1} ${file2}
 expect_ok ${ZPOOL} add -f ${name0} log mirror ${file3} ${file7}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  ${file0}    ONLINE     0     0     0"
   echo "	logs          ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file3}  ONLINE     0     0     0"
   echo "	    ${file7}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 disks_destroy
 files_destroy
Index: user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_type_mismatch.t
===================================================================
--- user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_type_mismatch.t	(revision 303641)
+++ user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/add/option-f_type_mismatch.t	(revision 303642)
@@ -1,410 +1,408 @@
 #!/bin/sh
 # $FreeBSD$
 
 dir=`dirname $0`
 . ${dir}/../../misc.sh
 
-[ "${os}" = "FreeBSD" ] && die "panics FreeBSD; see bug # 194587"
-
 echo "1..100"
 
 disks_create 7
 files_create 7
 names_create 1
 
 expect_ok ${ZPOOL} create ${name0} ${disk0}
 expect_fl ${ZPOOL} add ${name0} ${file0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME        STATE   READ WRITE CKSUM"
   echo "	${name0}    ONLINE     0     0     0"
   echo "	  ${disk0}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${disk0}
 expect_ok ${ZPOOL} add -f ${name0} ${file0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME        STATE   READ WRITE CKSUM"
   echo "	${name0}    ONLINE     0     0     0"
   echo "	  ${disk0}  ONLINE     0     0     0"
   echo "	  ${file0}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${file0}
 expect_fl ${ZPOOL} add ${name0} ${disk0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME        STATE   READ WRITE CKSUM"
   echo "	${name0}    ONLINE     0     0     0"
   echo "	  ${file0}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${file0}
 expect_ok ${ZPOOL} add -f ${name0} ${disk0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME        STATE   READ WRITE CKSUM"
   echo "	${name0}    ONLINE     0     0     0"
   echo "	  ${file0}  ONLINE     0     0     0"
   echo "	  ${disk0}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${disk0} ${disk1}
 expect_fl ${ZPOOL} add ${name0} mirror ${disk2} ${file0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${disk0} ${disk1}
 expect_ok ${ZPOOL} add -f ${name0} mirror ${disk2} ${file0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1}
 expect_fl ${ZPOOL} add ${name0} mirror ${disk0} ${file2}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1}
 expect_ok ${ZPOOL} add -f ${name0} mirror ${disk0} ${file2}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz1 ${disk0} ${disk1} ${disk2}
 expect_fl ${ZPOOL} add ${name0} raidz1 ${disk3} ${file0} ${disk4}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz1 ${disk0} ${disk1} ${disk2}
 expect_ok ${ZPOOL} add -f ${name0} raidz1 ${disk3} ${file0} ${disk4}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${disk3}  ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${disk4}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz1 ${file0} ${file1} ${file2}
 expect_fl ${ZPOOL} add ${name0} raidz1 ${file3} ${disk0} ${file4}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz1 ${file0} ${file1} ${file2}
 expect_ok ${ZPOOL} add -f ${name0} raidz1 ${file3} ${disk0} ${file4}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${file3}  ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${file4}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz2 ${disk0} ${disk1} ${disk2} ${disk3}
 expect_fl ${ZPOOL} add ${name0} raidz2 ${disk4} ${file0} ${disk5} ${disk6}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "	    ${disk3}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz2 ${disk0} ${disk1} ${disk2} ${disk3}
 expect_ok ${ZPOOL} add -f ${name0} raidz2 ${disk4} ${file0} ${disk5} ${disk6}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${disk2}  ONLINE     0     0     0"
   echo "	    ${disk3}  ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${disk4}  ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${disk5}  ONLINE     0     0     0"
   echo "	    ${disk6}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz2 ${file0} ${file1} ${file2} ${file3}
 expect_fl ${ZPOOL} add ${name0} raidz2 ${file4} ${disk0} ${file5} ${file6}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "	    ${file3}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz2 ${file0} ${file1} ${file2} ${file3}
 expect_ok ${ZPOOL} add -f ${name0} raidz2 ${file4} ${disk0} ${file5} ${file6}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "	    ${file3}  ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${file4}  ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "	    ${file5}  ONLINE     0     0     0"
   echo "	    ${file6}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${disk0}
 add_msg="# TODO Sun CR 6726091, Lustre bug 16873"
 expect_fl ${ZPOOL} add ${name0} log mirror ${disk1} ${file0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  ${disk0}    ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 add_msg=""
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${disk0}
 expect_ok ${ZPOOL} add -f ${name0} log mirror ${disk1} ${file0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  ${disk0}    ONLINE     0     0     0"
   echo "	logs          ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${disk1}  ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${file0}
 add_msg="# TODO Sun CR 6726091, Lustre bug 16873"
 expect_fl ${ZPOOL} add ${name0} log mirror ${file1} ${disk0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  ${file0}    ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 add_msg=""
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${file0}
 expect_ok ${ZPOOL} add -f ${name0} log mirror ${file1} ${disk0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  ${file0}    ONLINE     0     0     0"
   echo "	logs          ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${disk0}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 
 disks_destroy
 files_destroy
Index: user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/create/files.t
===================================================================
--- user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/create/files.t	(revision 303641)
+++ user/alc/PQ_LAUNDRY/tools/regression/zfs/zpool/create/files.t	(revision 303642)
@@ -1,191 +1,189 @@
 #!/bin/sh
 # $FreeBSD$
 
 dir=`dirname $0`
 . ${dir}/../../misc.sh
 
-[ "${os}" = "FreeBSD" ] && die "panics FreeBSD; see bug # 194589"
-
 echo "1..59"
 
 files_create 5
 names_create 1
 
 expect_ok ${ZPOOL} create ${name0} ${file0}
 expect_ok ${ZPOOL} status -x ${name0}
 expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME        STATE   READ WRITE CKSUM"
   echo "	${name0}    ONLINE     0     0     0"
   echo "	  ${file0}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 expect_fl ${ZPOOL} destroy ${name0}
 
 expect_ok ${ZPOOL} create ${name0} ${file0} ${file1} ${file2} ${file3} ${file4}
 expect_ok ${ZPOOL} status -x ${name0}
 expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME        STATE   READ WRITE CKSUM"
   echo "	${name0}    ONLINE     0     0     0"
   echo "	  ${file0}  ONLINE     0     0     0"
   echo "	  ${file1}  ONLINE     0     0     0"
   echo "	  ${file2}  ONLINE     0     0     0"
   echo "	  ${file3}  ONLINE     0     0     0"
   echo "	  ${file4}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 expect_fl ${ZPOOL} destroy ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1}
 expect_ok ${ZPOOL} status -x ${name0}
 expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 expect_fl ${ZPOOL} destroy ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz1 ${file0} ${file1} ${file2}
 expect_ok ${ZPOOL} status -x ${name0}
 expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz1      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 expect_fl ${ZPOOL} destroy ${name0}
 
 expect_ok ${ZPOOL} create ${name0} raidz2 ${file0} ${file1} ${file2} ${file3}
 expect_ok ${ZPOOL} status -x ${name0}
 expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  raidz2      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "	    ${file3}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 expect_fl ${ZPOOL} destroy ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} spare ${file2} ${file3}
 expect_ok ${ZPOOL} status -x ${name0}
 expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	spares"
   echo "	  ${file2}    AVAIL   "
   echo "	  ${file3}    AVAIL   "
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 expect_fl ${ZPOOL} destroy ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} log ${file2} ${file3}
 expect_ok ${ZPOOL} status -x ${name0}
 expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	logs          ONLINE     0     0     0"
   echo "	  ${file2}    ONLINE     0     0     0"
   echo "	  ${file3}    ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 expect_fl ${ZPOOL} destroy ${name0}
 
 expect_ok ${ZPOOL} create ${name0} mirror ${file0} ${file1} log mirror ${file2} ${file3}
 expect_ok ${ZPOOL} status -x ${name0}
 expect "pool '${name0}' is healthy" ${ZPOOL} status -x ${name0}
 exp=`(
   echo "  pool: ${name0}"
   echo " state: ONLINE"
   echo " scrub: none requested"
   echo "config:"
   echo "	NAME          STATE   READ WRITE CKSUM"
   echo "	${name0}      ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file0}  ONLINE     0     0     0"
   echo "	    ${file1}  ONLINE     0     0     0"
   echo "	logs          ONLINE     0     0     0"
   echo "	  mirror      ONLINE     0     0     0"
   echo "	    ${file2}  ONLINE     0     0     0"
   echo "	    ${file3}  ONLINE     0     0     0"
   echo "errors: No known data errors"
 )`
 expect "${exp}" ${ZPOOL} status ${name0}
 expect_ok ${ZPOOL} destroy ${name0}
 expect_fl ${ZPOOL} status -x ${name0}
 expect_fl ${ZPOOL} destroy ${name0}
 
 expect_fl ${ZPOOL} create ${name0} mirror ${file0} ${file1} cache ${file2} ${file3}
 expect_fl ${ZPOOL} status -x ${name0}
 expect_fl ${ZPOOL} destroy ${name0}
 
 files_destroy
Index: user/alc/PQ_LAUNDRY/usr.bin/at/at.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/at/at.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/at/at.c	(revision 303642)
@@ -1,913 +1,913 @@
 /* 
  *  at.c : Put file into atrun queue
  *  Copyright (C) 1993, 1994 Thomas Koenig
  *
  *  Atrun & Atq modifications
  *  Copyright (C) 1993  David Parsons
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the author(s) may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define _USE_BSD 1
 
 /* System Headers */
 
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/wait.h>
 #include <ctype.h>
 #include <dirent.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #ifndef __FreeBSD__
 #include <getopt.h>
 #endif
 #ifdef __FreeBSD__
 #include <locale.h>
 #endif
 #include <pwd.h>
 #include <signal.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 
 /* Local headers */
 
 #include "at.h"
 #include "panic.h"
 #include "parsetime.h"
 #include "perm.h"
 
 #define MAIN
 #include "privs.h"
 
 /* Macros */
 
 #ifndef ATJOB_DIR 
 #define ATJOB_DIR "/usr/spool/atjobs/"
 #endif
 
 #ifndef LFILE
 #define LFILE ATJOB_DIR ".lockfile"
 #endif
 
 #ifndef ATJOB_MX
 #define ATJOB_MX 255
 #endif
 
 #define ALARMC 10 /* Number of seconds to wait for timeout */
 
 #define SIZE 255
 #define TIMESIZE 50
 
 enum { ATQ, ATRM, AT, BATCH, CAT };	/* what program we want to run */
 
 /* File scope variables */
 
 static const char *no_export[] = {
     "TERM", "TERMCAP", "DISPLAY", "_"
 };
 static int send_mail = 0;
 static char *atinput = NULL;	/* where to get input from */
 static char atqueue = 0;	/* which queue to examine for jobs (atq) */
 
 /* External variables */
 
 extern char **environ;
 int fcreated;
 char atfile[] = ATJOB_DIR "12345678901234";
 char atverify = 0;		/* verify time instead of queuing job */
 char *namep;
 
 /* Function declarations */
 
 static void sigc(int signo);
 static void alarmc(int signo);
 static char *cwdname(void);
 static void writefile(time_t runtimer, char queue);
 static void list_jobs(long *, int);
 static long nextjob(void);
 static time_t ttime(const char *arg);
 static int in_job_list(long, long *, int);
 static long *get_job_list(int, char *[], int *);
 
 /* Signal catching functions */
 
 static void sigc(int signo __unused)
 {
 /* If the user presses ^C, remove the spool file and exit 
  */
     if (fcreated)
     {
 	PRIV_START
 	    unlink(atfile);
 	PRIV_END
     }
 
     _exit(EXIT_FAILURE);
 }
 
 static void alarmc(int signo __unused)
 {
     char buf[1024];
 
     /* Time out after some seconds. */
     strlcpy(buf, namep, sizeof(buf));
     strlcat(buf, ": file locking timed out\n", sizeof(buf));
     write(STDERR_FILENO, buf, strlen(buf));
     sigc(0);
 }
 
 /* Local functions */
 
 static char *cwdname(void)
 {
 /* Read in the current directory; the name will be overwritten on
  * subsequent calls.
  */
     static char *ptr = NULL;
     static size_t size = SIZE;
 
     if (ptr == NULL)
 	if ((ptr = malloc(size)) == NULL)
 	    errx(EXIT_FAILURE, "virtual memory exhausted");
 
     while (1)
     {
 	if (ptr == NULL)
 	    panic("out of memory");
 
 	if (getcwd(ptr, size-1) != NULL)
 	    return ptr;
 	
 	if (errno != ERANGE)
 	    perr("cannot get directory");
 	
 	free (ptr);
 	size += SIZE;
 	if ((ptr = malloc(size)) == NULL)
 	    errx(EXIT_FAILURE, "virtual memory exhausted");
     }
 }
 
 static long
 nextjob(void)
 {
     long jobno;
     FILE *fid;
 
     if ((fid = fopen(ATJOB_DIR ".SEQ", "r+")) != NULL) {
 	if (fscanf(fid, "%5lx", &jobno) == 1) {
 	    rewind(fid);
 	    jobno = (1+jobno) % 0xfffff;	/* 2^20 jobs enough? */
 	    fprintf(fid, "%05lx\n", jobno);
 	}
 	else
 	    jobno = EOF;
 	fclose(fid);
 	return jobno;
     }
     else if ((fid = fopen(ATJOB_DIR ".SEQ", "w")) != NULL) {
 	fprintf(fid, "%05lx\n", jobno = 1);
 	fclose(fid);
 	return 1;
     }
     return EOF;
 }
 
 static void
 writefile(time_t runtimer, char queue)
 {
 /* This does most of the work if at or batch are invoked for writing a job.
  */
     long jobno;
     char *ap, *ppos, *mailname;
     struct passwd *pass_entry;
     struct stat statbuf;
     int fdes, lockdes, fd2;
     FILE *fp, *fpin;
     struct sigaction act;
     char **atenv;
     int ch;
     mode_t cmask;
     struct flock lock;
     
 #ifdef __FreeBSD__
     (void) setlocale(LC_TIME, "");
 #endif
 
 /* Install the signal handler for SIGINT; terminate after removing the
  * spool file if necessary
  */
     act.sa_handler = sigc;
     sigemptyset(&(act.sa_mask));
     act.sa_flags = 0;
 
     sigaction(SIGINT, &act, NULL);
 
     ppos = atfile + strlen(ATJOB_DIR);
 
     /* Loop over all possible file names for running something at this
      * particular time, see if a file is there; the first empty slot at any
      * particular time is used.  Lock the file LFILE first to make sure
      * we're alone when doing this.
      */
 
     PRIV_START
 
     if ((lockdes = open(LFILE, O_WRONLY | O_CREAT, S_IWUSR | S_IRUSR)) < 0)
 	perr("cannot open lockfile " LFILE);
 
     lock.l_type = F_WRLCK; lock.l_whence = SEEK_SET; lock.l_start = 0;
     lock.l_len = 0;
 
     act.sa_handler = alarmc;
     sigemptyset(&(act.sa_mask));
     act.sa_flags = 0;
 
     /* Set an alarm so a timeout occurs after ALARMC seconds, in case
      * something is seriously broken.
      */
     sigaction(SIGALRM, &act, NULL);
     alarm(ALARMC);
     fcntl(lockdes, F_SETLKW, &lock);
     alarm(0);
 
     if ((jobno = nextjob()) == EOF)
 	perr("cannot generate job number");
 
     sprintf(ppos, "%c%5lx%8lx", queue, 
 	    jobno, (unsigned long) (runtimer/60));
 
     for(ap=ppos; *ap != '\0'; ap ++)
 	if (*ap == ' ')
 	    *ap = '0';
 
     if (stat(atfile, &statbuf) != 0)
 	if (errno != ENOENT)
 	    perr("cannot access " ATJOB_DIR);
 
     /* Create the file. The x bit is only going to be set after it has
      * been completely written out, to make sure it is not executed in the
      * meantime.  To make sure they do not get deleted, turn off their r
      * bit.  Yes, this is a kluge.
      */
     cmask = umask(S_IRUSR | S_IWUSR | S_IXUSR);
     if ((fdes = creat(atfile, O_WRONLY)) == -1)
 	perr("cannot create atjob file"); 
 
     if ((fd2 = dup(fdes)) <0)
 	perr("error in dup() of job file");
 
     if(fchown(fd2, real_uid, real_gid) != 0)
 	perr("cannot give away file");
 
     PRIV_END
 
     /* We no longer need suid root; now we just need to be able to write
      * to the directory, if necessary.
      */
 
     REDUCE_PRIV(DAEMON_UID, DAEMON_GID)
 
     /* We've successfully created the file; let's set the flag so it 
      * gets removed in case of an interrupt or error.
      */
     fcreated = 1;
 
     /* Now we can release the lock, so other people can access it
      */
     lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = 0;
     lock.l_len = 0;
     fcntl(lockdes, F_SETLKW, &lock);
     close(lockdes);
 
     if((fp = fdopen(fdes, "w")) == NULL)
 	panic("cannot reopen atjob file");
 
     /* Get the userid to mail to, first by trying getlogin(),
      * then from LOGNAME, finally from getpwuid().
      */
     mailname = getlogin();
     if (mailname == NULL)
 	mailname = getenv("LOGNAME");
 
     if ((mailname == NULL) || (mailname[0] == '\0') 
 	|| (strlen(mailname) >= MAXLOGNAME) || (getpwnam(mailname)==NULL))
     {
 	pass_entry = getpwuid(real_uid);
 	if (pass_entry != NULL)
 	    mailname = pass_entry->pw_name;
     }
 
     if (atinput != (char *) NULL)
     {
 	fpin = freopen(atinput, "r", stdin);
 	if (fpin == NULL)
 	    perr("cannot open input file");
     }
     fprintf(fp, "#!/bin/sh\n# atrun uid=%ld gid=%ld\n# mail %*s %d\n",
 	(long) real_uid, (long) real_gid, MAXLOGNAME - 1, mailname,
 	send_mail);
 
     /* Write out the umask at the time of invocation
      */
     fprintf(fp, "umask %lo\n", (unsigned long) cmask);
 
     /* Write out the environment. Anything that may look like a
      * special character to the shell is quoted, except for \n, which is
      * done with a pair of "'s.  Don't export the no_export list (such
      * as TERM or DISPLAY) because we don't want these.
      */
     for (atenv= environ; *atenv != NULL; atenv++)
     {
 	int export = 1;
 	char *eqp;
 
 	eqp = strchr(*atenv, '=');
 	if (eqp == NULL)
 	    eqp = *atenv;
 	else
 	{
 	    size_t i;
-	    for (i=0; i<sizeof(no_export)/sizeof(no_export[0]); i++)
+	    for (i = 0; i < nitems(no_export); i++)
 	    {
 		export = export
 		    && (strncmp(*atenv, no_export[i], 
 				(size_t) (eqp-*atenv)) != 0);
 	    }
 	    eqp++;
 	}
 
 	if (export)
 	{
 	    (void)fputs("export ", fp);
 	    fwrite(*atenv, sizeof(char), eqp-*atenv, fp);
 	    for(ap = eqp;*ap != '\0'; ap++)
 	    {
 		if (*ap == '\n')
 		    fprintf(fp, "\"\n\"");
 		else
 		{
 		    if (!isalnum(*ap)) {
 			switch (*ap) {
 			  case '%': case '/': case '{': case '[':
 			  case ']': case '=': case '}': case '@':
 			  case '+': case '#': case ',': case '.':
 			  case ':': case '-': case '_':
 			    break;
 			  default:
 			    fputc('\\', fp);
 			    break;
 			}
 		    }
 		    fputc(*ap, fp);
 		}
 	    }
 	    fputc('\n', fp);
 	    
 	}
     }	
     /* Cd to the directory at the time and write out all the
      * commands the user supplies from stdin.
      */
     fprintf(fp, "cd ");
     for (ap = cwdname(); *ap != '\0'; ap++)
     {
 	if (*ap == '\n')
 	    fprintf(fp, "\"\n\"");
 	else
 	{
 	    if (*ap != '/' && !isalnum(*ap))
 		fputc('\\', fp);
 	    
 	    fputc(*ap, fp);
 	}
     }
     /* Test cd's exit status: die if the original directory has been
      * removed, become unreadable or whatever
      */
     fprintf(fp, " || {\n\t echo 'Execution directory "
 	        "inaccessible' >&2\n\t exit 1\n}\n");
 
     while((ch = getchar()) != EOF)
 	fputc(ch, fp);
 
     fprintf(fp, "\n");
     if (ferror(fp))
 	panic("output error");
 	
     if (ferror(stdin))
 	panic("input error");
 
     fclose(fp);
 
     /* Set the x bit so that we're ready to start executing
      */
 
     if (fchmod(fd2, S_IRUSR | S_IWUSR | S_IXUSR) < 0)
 	perr("cannot give away file");
 
     close(fd2);
     fprintf(stderr, "Job %ld will be executed using /bin/sh\n", jobno);
 }
 
 static int 
 in_job_list(long job, long *joblist, int len)
 {
     int i;
 
     for (i = 0; i < len; i++)
 	if (job == joblist[i])
 	    return 1;
 
     return 0;
 }
 
 static void
 list_jobs(long *joblist, int len)
 {
     /* List all a user's jobs in the queue, by looping through ATJOB_DIR, 
      * or everybody's if we are root
      */
     struct passwd *pw;
     DIR *spool;
     struct dirent *dirent;
     struct stat buf;
     struct tm runtime;
     unsigned long ctm;
     char queue;
     long jobno;
     time_t runtimer;
     char timestr[TIMESIZE];
     int first=1;
     
 #ifdef __FreeBSD__
     (void) setlocale(LC_TIME, "");
 #endif
 
     PRIV_START
 
     if (chdir(ATJOB_DIR) != 0)
 	perr("cannot change to " ATJOB_DIR);
 
     if ((spool = opendir(".")) == NULL)
 	perr("cannot open " ATJOB_DIR);
 
     /*	Loop over every file in the directory 
      */
     while((dirent = readdir(spool)) != NULL) {
 	if (stat(dirent->d_name, &buf) != 0)
 	    perr("cannot stat in " ATJOB_DIR);
 	
 	/* See it's a regular file and has its x bit turned on and
          * is the user's
          */
 	if (!S_ISREG(buf.st_mode)
 	    || ((buf.st_uid != real_uid) && ! (real_uid == 0))
 	    || !(S_IXUSR & buf.st_mode || atverify))
 	    continue;
 
 	if(sscanf(dirent->d_name, "%c%5lx%8lx", &queue, &jobno, &ctm)!=3)
 	    continue;
 
 	/* If jobs are given, only list those jobs */
 	if (joblist && !in_job_list(jobno, joblist, len))
 	    continue;
 
 	if (atqueue && (queue != atqueue))
 	    continue;
 
 	runtimer = 60*(time_t) ctm;
 	runtime = *localtime(&runtimer);
 	strftime(timestr, TIMESIZE, "%+", &runtime);
 	if (first) {
 	    printf("Date\t\t\t\tOwner\t\tQueue\tJob#\n");
 	    first=0;
 	}
 	pw = getpwuid(buf.st_uid);
 
 	printf("%s\t%-16s%c%s\t%ld\n", 
 	       timestr, 
 	       pw ? pw->pw_name : "???", 
 	       queue, 
 	       (S_IXUSR & buf.st_mode) ? "":"(done)", 
 	       jobno);
     }
     PRIV_END
     closedir(spool);
 }
 
 static void
 process_jobs(int argc, char **argv, int what)
 {
     /* Delete every argument (job - ID) given
      */
     int i;
     int rc;
     int nofJobs;
     int nofDone;
     int statErrno;
     struct stat buf;
     DIR *spool;
     struct dirent *dirent;
     unsigned long ctm;
     char queue;
     long jobno;
 
     nofJobs = argc - optind;
     nofDone = 0;
 
     PRIV_START
 
     if (chdir(ATJOB_DIR) != 0)
 	perr("cannot change to " ATJOB_DIR);
 
     if ((spool = opendir(".")) == NULL)
 	perr("cannot open " ATJOB_DIR);
 
     PRIV_END
 
     /*	Loop over every file in the directory 
      */
     while((dirent = readdir(spool)) != NULL) {
 
 	PRIV_START
 	rc = stat(dirent->d_name, &buf);
 	statErrno = errno;
 	PRIV_END
 	/* There's a race condition between readdir above and stat here:
 	 * another atrm process could have removed the file from the spool
 	 * directory under our nose. If this happens, stat will set errno to
 	 * ENOENT, which we shouldn't treat as fatal.
 	 */
 	if (rc != 0) {
 	    if (statErrno == ENOENT)
 		continue;
 	    else
 		perr("cannot stat in " ATJOB_DIR);
 	}
 
 	if(sscanf(dirent->d_name, "%c%5lx%8lx", &queue, &jobno, &ctm)!=3)
 	    continue;
 
 	for (i=optind; i < argc; i++) {
 	    if (atoi(argv[i]) == jobno) {
 		if ((buf.st_uid != real_uid) && !(real_uid == 0))
 		    errx(EXIT_FAILURE, "%s: not owner", argv[i]);
 		switch (what) {
 		  case ATRM:
 
 		    PRIV_START
 
 		    if (unlink(dirent->d_name) != 0)
 		        perr(dirent->d_name);
 
 		    PRIV_END
 
 		    break;
 
 		  case CAT:
 		    {
 			FILE *fp;
 			int ch;
 
 			PRIV_START
 
 			fp = fopen(dirent->d_name,"r");
 
 			PRIV_END
 
 			if (!fp) {
 			    perr("cannot open file");
 			}
 			while((ch = getc(fp)) != EOF) {
 			    putchar(ch);
 			}
 			fclose(fp);
 		    }
 		    break;
 
 		  default:
 		    errx(EXIT_FAILURE, "internal error, process_jobs = %d",
 			what);
 	        }
 
 		/* All arguments have been processed
 		 */
 		if (++nofDone == nofJobs)
 		    goto end;
 	    }
 	}
     }
 end:
     closedir(spool);
 } /* delete_jobs */
 
 #define	ATOI2(ar)	((ar)[0] - '0') * 10 + ((ar)[1] - '0'); (ar) += 2;
 
 static time_t
 ttime(const char *arg)
 {
     /*
      * This is pretty much a copy of stime_arg1() from touch.c.  I changed
      * the return value and the argument list because it's more convenient
      * (IMO) to do everything in one place. - Joe Halpin
      */
     struct timeval tv[2];
     time_t now;
     struct tm *t;
     int yearset;
     char *p;
     
     if (gettimeofday(&tv[0], NULL))
 	panic("Cannot get current time");
     
     /* Start with the current time. */
     now = tv[0].tv_sec;
     if ((t = localtime(&now)) == NULL)
 	panic("localtime");
     /* [[CC]YY]MMDDhhmm[.SS] */
     if ((p = strchr(arg, '.')) == NULL)
 	t->tm_sec = 0;		/* Seconds defaults to 0. */
     else {
 	if (strlen(p + 1) != 2)
 	    goto terr;
 	*p++ = '\0';
 	t->tm_sec = ATOI2(p);
     }
     
     yearset = 0;
     switch(strlen(arg)) {
     case 12:			/* CCYYMMDDhhmm */
 	t->tm_year = ATOI2(arg);
 	t->tm_year *= 100;
 	yearset = 1;
 	/* FALLTHROUGH */
     case 10:			/* YYMMDDhhmm */
 	if (yearset) {
 	    yearset = ATOI2(arg);
 	    t->tm_year += yearset;
 	} else {
 	    yearset = ATOI2(arg);
 	    t->tm_year = yearset + 2000;
 	}
 	t->tm_year -= 1900;	/* Convert to UNIX time. */
 	/* FALLTHROUGH */
     case 8:				/* MMDDhhmm */
 	t->tm_mon = ATOI2(arg);
 	--t->tm_mon;		/* Convert from 01-12 to 00-11 */
 	t->tm_mday = ATOI2(arg);
 	t->tm_hour = ATOI2(arg);
 	t->tm_min = ATOI2(arg);
 	break;
     default:
 	goto terr;
     }
     
     t->tm_isdst = -1;		/* Figure out DST. */
     tv[0].tv_sec = tv[1].tv_sec = mktime(t);
     if (tv[0].tv_sec != -1)
 	return tv[0].tv_sec;
     else
 terr:
 	panic(
 	   "out of range or illegal time specification: [[CC]YY]MMDDhhmm[.SS]");
 }
 
 static long *
 get_job_list(int argc, char *argv[], int *joblen)
 {
     int i, len;
     long *joblist;
     char *ep;
 
     joblist = NULL;
     len = argc;
     if (len > 0) {
 	if ((joblist = malloc(len * sizeof(*joblist))) == NULL)
 	    panic("out of memory");
 
 	for (i = 0; i < argc; i++) {
 	    errno = 0;
 	    if ((joblist[i] = strtol(argv[i], &ep, 10)) < 0 ||
 		ep == argv[i] || *ep != '\0' || errno)
 		panic("invalid job number");
 	}
     }
 
     *joblen = len;
     return joblist;
 }
 
 int
 main(int argc, char **argv)
 {
     int c;
     char queue = DEFAULT_AT_QUEUE;
     char queue_set = 0;
     char *pgm;
 
     int program = AT;			/* our default program */
     const char *options = "q:f:t:rmvldbc"; /* default options for at */
     time_t timer;
     long *joblist;
     int joblen;
 
     joblist = NULL;
     joblen = 0;
     timer = -1;
     RELINQUISH_PRIVS
 
     /* Eat any leading paths
      */
     if ((pgm = strrchr(argv[0], '/')) == NULL)
 	pgm = argv[0];
     else
         pgm++;
 
     namep = pgm;
 
     /* find out what this program is supposed to do
      */
     if (strcmp(pgm, "atq") == 0) {
 	program = ATQ;
 	options = "q:v";
     }
     else if (strcmp(pgm, "atrm") == 0) {
 	program = ATRM;
 	options = "";
     }
     else if (strcmp(pgm, "batch") == 0) {
 	program = BATCH;
 	options = "f:q:mv";
     }
 
     /* process whatever options we can process
      */
     opterr=1;
     while ((c=getopt(argc, argv, options)) != -1)
 	switch (c) {
 	case 'v':   /* verify time settings */
 	    atverify = 1;
 	    break;
 
 	case 'm':   /* send mail when job is complete */
 	    send_mail = 1;
 	    break;
 
 	case 'f':
 	    atinput = optarg;
 	    break;
 	    
 	case 'q':    /* specify queue */
 	    if (strlen(optarg) > 1)
 		usage();
 
 	    atqueue = queue = *optarg;
 	    if (!(islower(queue)||isupper(queue)))
 		usage();
 
 	    queue_set = 1;
 	    break;
 
 	case 'd':
 	    warnx("-d is deprecated; use -r instead");
 	    /* fall through to 'r' */
 
 	case 'r':
 	    if (program != AT)
 		usage();
 
 	    program = ATRM;
 	    options = "";
 	    break;
 
 	case 't':
 	    if (program != AT)
 		usage();
 	    timer = ttime(optarg);
 	    break;
 
 	case 'l':
 	    if (program != AT)
 		usage();
 
 	    program = ATQ;
 	    options = "q:";
 	    break;
 
 	case 'b':
 	    if (program != AT)
 		usage();
 
 	    program = BATCH;
 	    options = "f:q:mv";
 	    break;
 
 	case 'c':
 	    program = CAT;
 	    options = "";
 	    break;
 
 	default:
 	    usage();
 	    break;
 	}
     /* end of options eating
      */
 
     /* select our program
      */
     if(!check_permission())
 	errx(EXIT_FAILURE, "you do not have permission to use this program");
     switch (program) {
     case ATQ:
 
 	REDUCE_PRIV(DAEMON_UID, DAEMON_GID)
 
 	if (queue_set == 0)
 	    joblist = get_job_list(argc - optind, argv + optind, &joblen);
 	list_jobs(joblist, joblen);
 	break;
 
     case ATRM:
 
 	REDUCE_PRIV(DAEMON_UID, DAEMON_GID)
 
 	process_jobs(argc, argv, ATRM);
 	break;
 
     case CAT:
 
 	process_jobs(argc, argv, CAT);
 	break;
 
     case AT:
 	/*
 	 * If timer is > -1, then the user gave the time with -t.  In that
 	 * case, it's already been set. If not, set it now.  
 	 */
 	if (timer == -1) 
 	    timer = parsetime(argc, argv);
 
 	if (atverify)
 	{
 	    struct tm *tm = localtime(&timer);
 	    fprintf(stderr, "%s\n", asctime(tm));
 	}
 	writefile(timer, queue);
 	break;
 
     case BATCH:
 	if (queue_set)
 	    queue = toupper(queue);
 	else
 	    queue = DEFAULT_BATCH_QUEUE;
 
 	if (argc > optind)
 	    timer = parsetime(argc, argv);
 	else
 	    timer = time(NULL);
 	
 	if (atverify)
 	{
 	    struct tm *tm = localtime(&timer);
 	    fprintf(stderr, "%s\n", asctime(tm));
 	}
 
         writefile(timer, queue);
 	break;
 
     default:
 	panic("internal error");
 	break;
     }
     exit(EXIT_SUCCESS);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.common
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.common	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.common	(revision 303642)
@@ -1,105 +1,105 @@
 /*
  * ���������� ���������
  *
  * $FreeBSD$
  */
 
 #ifndef _ru_RU_KOI8_R_common_
 #define _ru_RU_KOI8_R_common_
 
 LANG=ru_RU.KOI8-R
 
-12 ���	���� ��������� �����������
-13 ���	���� ���������� ������
-14 ���	������ ����� ���
-21 ���	���� ���������� �����
-25 ���	�������� ����. ������������ ��������
- 8 ���	���� ���������� �����
-10 ���	���� ���������������� ���������
- 1 ���	��������� ���� ����������� �������
+12 ���.	���� ��������� �����������
+13 ���.	���� ���������� ������
+14 ���.	������ ����� ���
+21 ���.	���� ���������� �����
+25 ���.	�������� ����. ������������ ��������
+ 8 ����.	���� ���������� �����
+10 ����.	���� ���������������� ���������
+ 1 �����	��������� ���� ����������� �������
 03/SunSecond	���� ���������� �������� � �����������
-11 ���	���� ��������� ������� �������������
-18 ���	���� ��������� �������
+11 �����	���� ��������� ������� �������������
+18 �����	���� ��������� �������
 03/SunThird	���� ���������� ��������, �������� ������������ ��������� � �������-������������� ���������
-27 ���	������������� ���� ������
-27 ���	���� ���������� �����
- 1 ���	���� �����
- 2 ���	���� �������� �������
+27 �����	������������� ���� ������
+27 �����	���� ���������� �����
+ 1 ���.	���� �����
+ 2 ���.	���� �������� �������
 04/SunFirst	���� �������
-12 ���	���� ������������
+12 ���.	���� ������������
 04/SunSecond	���� ����� ���������������� �������
-26 ���	���� ������ �������� � ������������ ������� � �����������
-30 ���	���� �������� ������
- 7 ���	���� �����
-17 ���	������������� ���� ����������������
-18 ���	������������� ���� ������
-24 ���	���� ���������� ������������ � ��������
-26 ���	���� ����������� �������������������
-27 ���	�������������� ���� ���������
-28 ���	���� ������������
-30 ���	���� �������� ������
-31 ���	���� ���������� ����������
+26 ���.	���� ������ �������� � ������������ ������� � �����������
+30 ���.	���� �������� ������
+ 7 ���	���� �����
+17 ���	������������� ���� ����������������
+18 ���	������������� ���� ������
+24 ���	���� ���������� ������������ � ��������
+26 ���	���� ����������� �������������������
+27 ���	�������������� ���� ���������
+28 ���	���� ������������
+30 ���	���� �������� ������
+31 ���	���� ���������� ����������
 05/SunLast	���� ������
- 1 ���	���� ������ �����
- 5 ���	���� �������
- 6 ���	���������� ����
- 8 ���	���� ����������� ���������
+ 1 ����	���� ������ �����
+ 5 ����	���� �������
+ 6 ����	���������� ����
+ 8 ����	���� ����������� ���������
 06/SunSecond	���� ���������� ������ ��������������
 06/SunThird	���� ������������ ���������
-22 ���	���� ������ � ������ (������ ������� ������������� �����, 1941 ���)
-27 ���	���� ��������
-29 ���	���� �������� � ������������
+22 ����	���� ������ � ������ (������ ������� ������������� �����, 1941 ���)
+27 ����	���� ��������
+29 ����	���� �������� � ������������
 06/SatLast	���� ������������ � ���������������
 07/SunFirst	���� ���������� �������� � ������� �����
 07/SunSecond	���� ������
 07/SunSecond	���� ���������� �����
 07/SunThird	���� ����������
 07/SunLast	���� ������-�������� �����
-28 ���	���� �������� ����
- 6 ���	���� ��������������� �����
+28 ����	���� �������� ����
+ 6 ���.	���� ��������������� �����
 08/SunFirst	���� ����������������
-12 ���	���� ������-��������� ���
+12 ���.	���� ������-��������� ���
 08/SunSecond	���� ���������
 08/SunThird	���� ���������� �����
-22 ���	���� ���������������� �����
-27 ���	���� ����
+22 ���.	���� ���������������� �����
+27 ���.	���� ����
 08/SunLast	���� �������
- 1 ���	���� ������
- 2 ���	���� ���������� �������
- 3 ���	���� ������������ � ������ � �����������
- 4 ���	���� ����������� �� �������� �����������
+ 1 ����.	���� ������
+ 2 ����.	���� ���������� �������
+ 3 ����.	���� ������������ � ������ � �����������
+ 4 ����.	���� ����������� �� �������� �����������
 09/SunFirst	���� ���������� �������� � ������� ��������������
 09/SunSecond	���� ��������
 09/SunThird	���� ���������� ����
-28 ���	���� ��������� ������� ��������������
+28 ����.	���� ��������� ������� ��������������
 09/SunLast	���� ���������������
- 1 ���	���� ������� �����
- 1 ���	���� ���������� �����
- 4 ���	���� ����������� �����
- 5 ���	���� �������
-14 ���	������������� ���� ��������������
+ 1 ���.	���� ������� �����
+ 1 ���.	���� ���������� �����
+ 4 ���.	���� ����������� �����
+ 5 ���.	���� �������
+14 ���.	������������� ���� ��������������
 10/SunSecond	���� ���������� ��������� ��������� � ���������������� ��������������
 10/SunThird	���� ���������� ��������� ���������
-24 ���	������������� ���� ���
-25 ���	���� �����������
-30 ���	���� ������ ����� ������������ ���������
+24 ���.	������������� ���� ���
+25 ���.	���� �����������
+30 ���.	���� ������ ����� ������������ ���������
 10/SunLast	���� ���������� �������������� ����������
- 7 ���	���� ����������� ��������� 1917 ����
- 9 ���	��������� ���� ��������
-10 ���	���� �������
-16 ���	���� ������� ������
-17 ���	������������� ���� ���������
-19 ���	���� �������� ����� � ����������
-21 ���	���� ���������� ��������� �������
-26 ���	��������� ���� ����������
+ 7 ����.	���� ����������� ��������� 1917 ����
+ 9 ����.	��������� ���� ��������
+10 ����.	���� �������
+16 ����.	���� ������� ������
+17 ����.	������������� ���� ���������
+19 ����.	���� �������� ����� � ����������
+21 ����.	���� ���������� ��������� �������
+26 ����.	��������� ���� ����������
 11/SunLast	���� ������
- 1 ���	��������� ���� ������ �� ������
- 3 ���	���� ������
- 9 ���	���� ������ ���������
-12 ���	���� �����������
-17 ���	���� �������� ����� ��������������� ����������
-20 ���	���� ��������� ������� ������������
-22 ���	���� ����������
-27 ���	���� ���������
+ 1 ���.	��������� ���� ������ �� ������
+ 3 ���.	���� ������
+ 9 ���.	���� ������ ���������
+12 ���.	���� �����������
+17 ���.	���� �������� ����� ��������������� ����������
+20 ���.	���� ��������� ������� ������������
+22 ���.	���� ����������
+27 ���.	���� ���������
 
 #endif /* !_ru_RU_KOI8_R_common_ */
Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.holiday
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.holiday	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.holiday	(revision 303642)
@@ -1,25 +1,25 @@
 /*
  * ���������� ��������� (��������� "�������" ���)
  *
  * $FreeBSD$
  */
 
 #ifndef _ru_RU_KOI8_R_holiday_
 #define _ru_RU_KOI8_R_holiday_
 
 LANG=ru_RU.KOI8-R
 
- 1 ���	����� ���
- 2 ���	���������� ��������
- 3 ���	���������� ��������
- 4 ���	���������� ��������
- 5 ���	���������� ��������
- 7 ���	��������� ��������
-23 ���	���� ��������� ���������
- 8 ���	������������� ������� ����
- 1 ���	�������� ����� � �����
- 9 ���	���� ������
-12 ���	���� ������
- 4 ���	���� ��������� ��������
+ 1 ���.	����� ���
+ 2 ���.	���������� ��������
+ 3 ���.	���������� ��������
+ 4 ���.	���������� ��������
+ 5 ���.	���������� ��������
+ 7 ���.	��������� ��������
+23 ����.	���� ��������� ���������
+ 8 �����	������������� ������� ����
+ 1 ���	�������� ����� � �����
+ 9 ���	���� ������
+12 ����	���� ������
+ 4 ����.	���� ��������� ��������
 
 #endif /* !_ru_RU_KOI8_R_holiday_ */

Property changes on: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.holiday
___________________________________________________________________
Added: fbsd:notbinary
## -0,0 +1 ##
+yes
\ No newline at end of property
Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.military
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.military	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.military	(revision 303642)
@@ -1,28 +1,28 @@
 /*
  * ��� �������� ����� ������
  *
  * $FreeBSD$
  */
 
 #ifndef _ru_RU_KOI8_R_military_
 #define _ru_RU_KOI8_R_military_
 
 LANG=ru_RU.KOI8-R
 
-27 ���	���� ������ ������� ������ ���������� (1944 ���)
- 2 ���	���� �������� ���������� �������� �������-���������� ����� � �������������� ����� (1943 ���)
-23 ���	���� ������ ������� ����� ��� ������������� �������� �������� (1918 ���)
-18 ���	���� ������ ������� ������ ����� ���������� �������� ��� ��������� �������� �� ������� ����� (������� �������, 1242 ���)
-10 ���	���� ������ ������� ����� ��� ������������� ����� ������� ��� ������� � ���������� �������� (1709 ���)
- 9 ���	���� ������ � ���������� ������� ������� ������ �������� ����� ��� ������������� ����� ������� ��� ������� � ���� ������ (1714 ���)
-23 ���	���� �������� ���������� �������� �������-���������� ����� � ������� ����� (1943 ���)
- 2 ���	���� ��������� ������ ������� ����� (1945 ���)
- 8 ���	���� ������������ �������� ������� ����� ��� ������������� �.�. �������� � ����������� ������ (1812 ���)
-11 ���	���� ������ ������� ������� ��� ������������� �.�. ������� ��� �������� �������� � ���� ������ (1790 ���)
-21 ���	���� ������ ������� ������ �� ����� � ������� ������ �������� ������� ��� �������-���������� �������� � ����������� ����� (1380 ���)
- 7 ���	���� ������������ ������ ������ ��������� ��������� ��� ������������ ������ ������ � ������� ���������� �� �������� ����������� (1612 ���)
- 1 ���	���� ������ ������� ������� ��� ������������� �.�. �������� ��� �������� �������� � ���� ����� (1853 ���)
- 5 ���	���� ������ ���������������� ��������� ����� ������ �������-���������� ����� � ����� ��� ������� (1941 ���)
-24 ���	���� ������ �������� �������� ������ �������� �������� ��� ������������� �.�. �������� (1790 ���)
+27 ���.	���� ������ ������� ������ ���������� (1944 ���)
+ 2 ����.	���� �������� ���������� �������� �������-���������� ����� � �������������� ����� (1943 ���)
+23 ����.	���� ������ ������� ����� ��� ������������� �������� �������� (1918 ���)
+18 ���.	���� ������ ������� ������ ����� ���������� �������� ��� ��������� �������� �� ������� ����� (������� �������, 1242 ���)
+10 ����	���� ������ ������� ����� ��� ������������� ����� ������� ��� ������� � ���������� �������� (1709 ���)
+ 9 ���.	���� ������ � ���������� ������� ������� ������ �������� ����� ��� ������������� ����� ������� ��� ������� � ���� ������ (1714 ���)
+23 ���.	���� �������� ���������� �������� �������-���������� ����� � ������� ����� (1943 ���)
+ 2 ����.	���� ��������� ������ ������� ����� (1945 ���)
+ 8 ����.	���� ������������ �������� ������� ����� ��� ������������� �.�. �������� � ����������� ������ (1812 ���)
+11 ����.	���� ������ ������� ������� ��� ������������� �.�. ������� ��� �������� �������� � ���� ������ (1790 ���)
+21 ����.	���� ������ ������� ������ �� ����� � ������� ������ �������� ������� ��� �������-���������� �������� � ����������� ����� (1380 ���)
+ 4 ����.	���� ������������ ������ ������ ��������� ��������� ��� ������������ ������ ������ � ������� ���������� �� �������� ����������� (1612 ���)
+ 1 ���.	���� ������ ������� ������� ��� ������������� �.�. �������� ��� �������� �������� � ���� ����� (1853 ���)
+ 5 ���.	���� ������ ���������������� ��������� ����� ������ �������-���������� ����� � ����� ��� ������� (1941 ���)
+24 ���.	���� ������ �������� �������� ������ �������� �������� ��� ������������� �.�. �������� (1790 ���)
 
 #endif /* !_ru_RU_KOI8_R_military_ */
Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.orthodox
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.orthodox	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.orthodox	(revision 303642)
@@ -1,36 +1,36 @@
 /*
  * ������������ ���������
  *
  * $FreeBSD$
  */
 
 #ifndef _ru_RU_KOI8_R_orthodox_
 #define _ru_RU_KOI8_R_orthodox_
 
 LANG=ru_RU.KOI8-R
 Paskha=�����
 
-21 ���	��������� ��������� ����������
-27 ���	����������� ������ ��������
-14 ���	������ ��������� ����������
- 4 ���	�������� �� ���� ��������� ����������
- 7 ���	��������� ��������
-14 ���	��������� ��������
-19 ���	����������� ��� �������� ��������
-15 ���	�������� ��������
+21 ����.	��������� ��������� ����������
+27 ����.	����������� ������ ��������
+14 ���.	������ ��������� ����������
+ 4 ���.	�������� �� ���� ��������� ����������
+ 7 ���.	��������� ��������
+14 ���.	��������� ��������
+19 ���.	����������� ��� �������� ��������
+15 ����.	�������� ��������
 �����-48	������� ����
 �����-7	���� ��������� � ���������. ������� �����������
 �����-3	������� �������
 �����-2	��������� �������
 �����	����������� ��������
 �����+39	����������
 �����+49	���� ������ ������. �������������
- 7 ���	������������ ��������� ����������
- 7 ���	��������� ������ ��������
-12 ���	���� ������ �������������� ��������� ����� � �����
-19 ���	������������ ��������
-28 ���	������� ��������� ����������
-11 ���	����������� ����� ������ ��������
+ 7 ���.	������������ ��������� ����������
+ 7 ����	��������� ������ ��������
+12 ����	���� ������ �������������� ��������� ����� � �����
+19 ���.	������������ ��������
+28 ���.	������� ��������� ����������
+11 ����.	����������� ����� ������ ��������
 
 #endif /* !_ru_RU_KOI8_R_orthodox_ */
 
Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.pagan
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.pagan	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.KOI8-R/calendar.pagan	(revision 303642)
@@ -1,42 +1,42 @@
 /*
  * ��������� ���������
  *
  * $FreeBSD$
  */
 
 #ifndef _ru_RU_KOI8_R_pagan_
 #define _ru_RU_KOI8_R_pagan_
 
 LANG=ru_RU.KOI8-R
 Paskha=�����
 
-21 ���*	������ �������������
-25 ���	������ (��������� ������ �������������)
- 6 ���	���� ����� � ������
-24 ���	���� ������
-29 ���	���� �����
- 1 ���	���� ������
-14 ���	����� ���, ������ �����
+21 ���.*	������ �������������
+25 ���.	������ (��������� ������ �������������)
+ 6 ���.	���� ����� � ������
+24 ����.	���� ������
+29 ����.	���� �����
+ 1 �����	���� ������
+14 �����	����� ���, ������ �����
 �����-55	���������
 �����+7	������� �����
 �����+16	��������
-20 ���*	�������� �������������
- 7 ���	���� ������ (��������� �������� �������������)
- 6 ���	���� ��������, ������ �������
-22 ���	������ ����
-15 ���	���� ��������
-21 ���*	������ �������������
- 1 ���	��������� ������
- 7 ���	������ (��������� ������ �������������)
-27 ���	����� ����� ������, �������
- 2 ���	������� ����
-21 ���	���� ��������
-28 ���	������� ����������
-14 ���	���� ����� ��������
-22 ���*	������� � ���� (������� �������������)
-10 ���	���� ������
-21 ���	���� ������� � ��������
- 9 ���	���� �������� � ������
+20 �����*	�������� �������������
+ 7 ���.	���� ������ (��������� �������� �������������)
+ 6 ���	���� ��������, ������ �������
+22 ���	������ ����
+15 ����	���� ��������
+21 ����*	������ �������������
+ 1 ����	��������� ������
+ 7 ����	������ (��������� ������ �������������)
+27 ����	����� ����� ������, �������
+ 2 ���.	������� ����
+21 ���.	���� ��������
+28 ���.	������� ����������
+14 ����.	���� ����� ��������
+22 ����.*	������� � ���� (������� �������������)
+10 ����.	���� ������
+21 ����.	���� ������� � ��������
+ 9 ���.	���� �������� � ������
 
 #endif /* !_ru_RU_KOI8_R_pagan_ */
 
Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.common
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.common	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.common	(revision 303642)
@@ -1,105 +1,105 @@
 /*
  * Российские праздники
  *
  * $FreeBSD$
  */
 
 #ifndef _ru_RU_UTF_8_common_
 #define _ru_RU_UTF_8_common_
 
 LANG=ru_RU.UTF-8
 
 12 янв.	День работника прокуратуры
 13 янв.	День российской печати
 14 янв.	Старый Новый год
 21 янв.	День инженерных войск
 25 янв.	Татьянин день. Студенческий праздник
  8 февр.	День российской науки
 10 февр.	День дипломатического работника
  1 марта	Всемирный день гражданской обороны
 03/SunSecond	День работников геодезии и картографии
 11 марта	День работника органов наркоконтроля
 18 марта	День налоговой полиции
 03/SunThird	День работников торговли, бытового обслуживания населения и жилищно-коммунального хозяйства
 27 марта	Международный день театра
 27 марта	День внутренних войск
  1 апр.	День смеха
  2 апр.	День единения народов
 04/SunFirst	День геолога
 12 апр.	День космонавтики
 04/SunSecond	День войск противовоздушной обороны
 26 апр.	День памяти погибших в радиационных авариях и катастрофах
 30 апр.	День пожарной охраны
  7 мая	День радио
 17 мая	Международный день телекоммуникаций
 18 мая	Международный день музеев
 24 мая	День славянской письменности и культуры
 26 мая	День российского предпринимательства
 27 мая	Общероссийский день библиотек
 28 мая	День пограничника
 30 мая	День пожарной охраны
 31 мая	День Российской Адвокатуры
 05/SunLast	День химика
  1 июня	День защиты детей
  5 июня	День эколога
  6 июня	Пушкинский день
  8 июня	День социального работника
 06/SunSecond	День работников легкой промышленности
 06/SunThird	День медицинского работника
 22 июня	День памяти и скорби (Начало Великой Отечественной Войны, 1941 год)
 27 июня	День молодежи
 29 июня	День партизан и подпольщиков
 06/SatLast	День изобретателя и рационализатора
 07/SunFirst	День работников морского и речного флота
 07/SunSecond	День рыбака
 07/SunSecond	День российской почты
 07/SunThird	День металлурга
 07/SunLast	День Военно-Морского Флота
 28 июля	День крещения Руси
  6 авг.	День железнодорожных войск
 08/SunFirst	День железнодорожника
 12 авг.	День военно-воздушных сил
 08/SunSecond	День строителя
 08/SunThird	День Воздушного Флота
 22 авг.	День государственного флага
 27 авг.	День кино
 08/SunLast	День шахтера
  1 сент.	День знаний
  2 сент.	День российской гвардии
  3 сент.	День солидарности в борьбе с терроризмом
  4 сент.	День специалиста по ядерному обеспечению
 09/SunFirst	День работников нефтяной и газовой промышленности
 09/SunSecond	День танкиста
 09/SunThird	День работников леса
 28 сент.	День работника атомной промышленности
 09/SunLast	День машиностроителя
  1 окт.	День пожилых людей
  1 окт.	День сухопутных войск
  4 окт.	День космических войск
  5 окт.	День учителя
 14 окт.	Международный день стандартизации
 10/SunSecond	День работников сельского хозяйства и перерабатывающей промышленности
 10/SunThird	День работников дорожного хозяйства
 24 окт.	Международный день ООН
 25 окт.	День таможенника
 30 окт.	День памяти жертв политических репрессий
 10/SunLast	День работников автомобильного транспорта
- 7 нояб.	День окт.ябрьской революции 1917 года
+ 7 нояб.	День октябрьской революции 1917 года
  9 нояб.	Всемирный день качества
 10 нояб.	День милиции
 16 нояб.	День морской пехоты
 17 нояб.	Международный день студентов
 19 нояб.	День ракетных войск и артиллерии
 21 нояб.	День работников налоговых органов
 26 нояб.	Всемирный день информации
 11/SunLast	День матери
  1 дек.	Всемирный день борьбы со СПИДом
  3 дек.	День юриста
  9 дек.	День Героев Отечества
 12 дек.	День Конституции
 17 дек.	День ракетных войск стратегического назначения
 20 дек.	День работника органов безопасности
 22 дек.	День энергетика
 27 дек.	День спасателя
 
 #endif /* !_ru_RU_UTF_8_common_ */
Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.military
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.military	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.military	(revision 303642)
@@ -1,28 +1,28 @@
 /*
  * Дни воинской славы России
  *
  * $FreeBSD$
  */
 
 #ifndef _ru_RU_UTF_8_military_
 #define _ru_RU_UTF_8_military_
 
 LANG=ru_RU.UTF-8
 
 27 янв.	День снятия блокады города Ленинграда (1944 год)
  2 февр.	День разгрома советскими войсками немецко-фашистских войск в Сталинградской битве (1943 год)
 23 февр.	День победы Красной Армии над кайзеровскими войсками Германии (1918 год)
 18 апр.	День победы русских воинов князя Александра Невского над немецкими рыцарями на Чудском озере (Ледовое побоище, 1242 год)
 10 июля	День победы русской армии под командованием Петра Первого над шведами в Полтавском сражении (1709 год)
  9 авг.	День первой в российской истории морской победы русского флота под командованием Петра Первого над шведами у мыса Гангут (1714 год)
 23 авг.	День разгрома советскими войсками немецко-фашистских войск в Курской битве (1943 год)
  2 сент.	День окончания Второй мировой войны (1945 год)
  8 сент.	День Бородинского сражения русской армии под командованием М.И. Кутузова с французской армией (1812 год)
 11 сент.	День победы русской эскадры под командованием Ф.Ф. Ушакова над турецкой эскадрой у мыса Тендра (1790 год)
 21 сент.	День победы русских полков во главе с великим князем Дмитрием Донским над монголо-татарскими войсками в Куликовской битве (1380 год)
- 7 нояб.	День освобождения Москвы силами народного ополчения под руководством Кузьмы Минина и Дмитрия Пожарского от польских интервентов (1612 год)
+ 4 нояб.	День освобождения Москвы силами народного ополчения под руководством Кузьмы Минина и Дмитрия Пожарского от польских интервентов (1612 год)
  1 дек.	День победы русской эскадры под командованием П.С. Нахимова над турецкой эскадрой у мыса Синоп (1853 год)
  5 дек.	День начала контрнаступления советских войск против немецко-фашистских войск в битве под Москвой (1941 год)
 24 дек.	День взятия турецкой крепости Измаил русскими войсками под командованием А.В. Суворова (1790 год)
 
 #endif /* !_ru_RU_UTF_8_military_ */
Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.pagan
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.pagan	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/calendar/calendars/ru_RU.UTF-8/calendar.pagan	(revision 303642)
@@ -1,42 +1,42 @@
 /*
  * Языческие праздники
  *
  * $FreeBSD$
  */
 
 #ifndef _ru_RU_UTF_8_pagan_
 #define _ru_RU_UTF_8_pagan_
 
 LANG=ru_RU.UTF-8
 Paskha=Пасха
 
 21 дек.*	Зимнее солнцестояние
 25 дек.	Коляда (сдвинутое зимнее солнцестояние)
  6 янв.	День Кащея и Велеса
 24 февр.	День Велеса
 29 февр.	День Кащея
  1 марта	День Марены
 14 марта	Новый Год, Овсень малый
 Пасха-55	Масленица
 Пасха+7	Красная Горка
 Пасха+16	Радуница
 20 марта*	Весеннее равноденствие
  7 апр.	День Марены (сдвинутое весеннее равноденствие)
  6 мая	День Дажьбога, Овсень большой
 22 мая	Ярилин День
 15 июня	День Триглава
 21 июня*	Летнее солнцестояние
  1 июля	Русальная Неделя
  7 июля	Купала (сдвинутое летнее солнцестояние)
 27 июля	Отбор жертв Перуну, русалии
  2 авг.	Перунов День
 21 авг.	День Стрибога
 28 авг.	Успение Златогорки
 14 сент.	День Волха Змеевича
 22 сент.*	Поворот к зиме (осеннее равноденствие)
 10 нояб.	День Макоши
-21 нояб.	День Сварога и Семартагла
+21 нояб.    День Сварога и Семаргла
  9 дек.	День Дажьбога и Марены
 
 #endif /* !_ru_RU_UTF_8_pagan_ */
 
Index: user/alc/PQ_LAUNDRY/usr.bin/calendar/io.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/calendar/io.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/calendar/io.c	(revision 303642)
@@ -1,503 +1,502 @@
 /*-
  * Copyright (c) 1989, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1989, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)calendar.c  8.3 (Berkeley) 3/25/94";
 #endif
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <langinfo.h>
 #include <locale.h>
 #include <pwd.h>
 #include <stdbool.h>
-#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stringlist.h>
 #include <unistd.h>
 
 #include "pathnames.h"
 #include "calendar.h"
 
 enum {
 	T_OK = 0,
 	T_ERR,
 	T_PROCESS,
 };
 
 const char *calendarFile = "calendar";	/* default calendar file */
 static const char *calendarHomes[] = {".calendar", _PATH_INCLUDE}; /* HOME */
 static const char *calendarNoMail = "nomail";/* don't sent mail if file exist */
 
 static char path[MAXPATHLEN];
 
 struct fixs neaster, npaskha, ncny, nfullmoon, nnewmoon;
 struct fixs nmarequinox, nsepequinox, njunsolstice, ndecsolstice;
 
 static int cal_parse(FILE *in, FILE *out);
 
 static StringList *definitions = NULL;
 static struct event *events[MAXCOUNT];
 static char *extradata[MAXCOUNT];
 
 static void
 trimlr(char **buf)
 {
 	char *walk = *buf;
 	char *last;
 
 	while (isspace(*walk))
 		walk++;
 	if (*walk != '\0') {
 		last = walk + strlen(walk) - 1;
 		while (last > walk && isspace(*last))
 			last--;
 		*(last+1) = 0;
 	}
 
 	*buf = walk;
 }
 
 static FILE *
 cal_fopen(const char *file)
 {
 	FILE *fp;
 	char *home = getenv("HOME");
 	unsigned int i;
 
 	if (home == NULL || *home == '\0') {
 		warnx("Cannot get home directory");
 		return (NULL);
 	}
 
 	if (chdir(home) != 0) {
 		warnx("Cannot enter home directory");
 		return (NULL);
 	}
 
-	for (i = 0; i < sizeof(calendarHomes)/sizeof(calendarHomes[0]) ; i++) {
+	for (i = 0; i < nitems(calendarHomes); i++) {
 		if (chdir(calendarHomes[i]) != 0)
 			continue;
 
 		if ((fp = fopen(file, "r")) != NULL)
 			return (fp);
 	}
 
 	warnx("can't open calendar file \"%s\"", file);
 
 	return (NULL);
 }
 
 static int
 token(char *line, FILE *out, bool *skip)
 {
 	char *walk, c, a;
 
 	if (strncmp(line, "endif", 5) == 0) {
 		*skip = false;
 		return (T_OK);
 	}
 
 	if (*skip)
 		return (T_OK);
 
 	if (strncmp(line, "include", 7) == 0) {
 		walk = line + 7;
 
 		trimlr(&walk);
 
 		if (*walk == '\0') {
 			warnx("Expecting arguments after #include");
 			return (T_ERR);
 		}
 
 		if (*walk != '<' && *walk != '\"') {
 			warnx("Excecting '<' or '\"' after #include");
 			return (T_ERR);
 		}
 
 		a = *walk;
 		walk++;
 		c = walk[strlen(walk) - 1];
 
 		switch(c) {
 		case '>':
 			if (a != '<') {
 				warnx("Unterminated include expecting '\"'");
 				return (T_ERR);
 			}
 			break;
 		case '\"':
 			if (a != '\"') {
 				warnx("Unterminated include expecting '>'");
 				return (T_ERR);
 			}
 			break;
 		default:
 			warnx("Unterminated include expecting '%c'",
 			    a == '<' ? '>' : '\"' );
 			return (T_ERR);
 		}
 		walk[strlen(walk) - 1] = '\0';
 
 		if (cal_parse(cal_fopen(walk), out))
 			return (T_ERR);
 
 		return (T_OK);
 	}
 
 	if (strncmp(line, "define", 6) == 0) {
 		if (definitions == NULL)
 			definitions = sl_init();
 		walk = line + 6;
 		trimlr(&walk);
 
 		if (*walk == '\0') {
 			warnx("Expecting arguments after #define");
 			return (T_ERR);
 		}
 
 		sl_add(definitions, strdup(walk));
 		return (T_OK);
 	}
 
 	if (strncmp(line, "ifndef", 6) == 0) {
 		walk = line + 6;
 		trimlr(&walk);
 
 		if (*walk == '\0') {
 			warnx("Expecting arguments after #ifndef");
 			return (T_ERR);
 		}
 
 		if (definitions != NULL && sl_find(definitions, walk) != NULL)
 			*skip = true;
 
 		return (T_OK);
 	}
 
 	return (T_PROCESS);
 
 }
 
 #define	REPLACE(string, slen, struct_) \
 		if (strncasecmp(buf, (string), (slen)) == 0 && buf[(slen)]) { \
 			if (struct_.name != NULL)			      \
 				free(struct_.name);			      \
 			if ((struct_.name = strdup(buf + (slen))) == NULL)    \
 				errx(1, "cannot allocate memory");	      \
 			struct_.len = strlen(buf + (slen));		      \
 			continue;					      \
 		}
 static int
 cal_parse(FILE *in, FILE *out)
 {
 	char *line = NULL;
 	char *buf;
 	size_t linecap = 0;
 	ssize_t linelen;
 	ssize_t l;
 	static int d_first = -1;
 	static int count = 0;
 	int i;
 	int month[MAXCOUNT];
 	int day[MAXCOUNT];
 	int year[MAXCOUNT];
 	bool skip = false;
 	char dbuf[80];
 	char *pp, p;
 	struct tm tm;
 	int flags;
 
 	/* Unused */
 	tm.tm_sec = 0;
 	tm.tm_min = 0;
 	tm.tm_hour = 0;
 	tm.tm_wday = 0;
 
 	if (in == NULL)
 		return (1);
 
 	while ((linelen = getline(&line, &linecap, in)) > 0) {
 		if (*line == '#') {
 			switch (token(line+1, out, &skip)) {
 			case T_ERR:
 				free(line);
 				return (1);
 			case T_OK:
 				continue;
 			case T_PROCESS:
 				break;
 			default:
 				break;
 			}
 		}
 
 		if (skip)
 			continue;
 
 		buf = line;
 		for (l = linelen;
 		     l > 0 && isspace((unsigned char)buf[l - 1]);
 		     l--)
 			;
 		buf[l] = '\0';
 		if (buf[0] == '\0')
 			continue;
 
 		/* Parse special definitions: LANG, Easter, Paskha etc */
 		if (strncmp(buf, "LANG=", 5) == 0) {
 			(void)setlocale(LC_ALL, buf + 5);
 			d_first = (*nl_langinfo(D_MD_ORDER) == 'd');
 			setnnames();
 			continue;
 		}
 		REPLACE("Easter=", 7, neaster);
 		REPLACE("Paskha=", 7, npaskha);
 		REPLACE("ChineseNewYear=", 15, ncny);
 		REPLACE("NewMoon=", 8, nnewmoon);
 		REPLACE("FullMoon=", 9, nfullmoon);
 		REPLACE("MarEquinox=", 11, nmarequinox);
 		REPLACE("SepEquinox=", 11, nsepequinox);
 		REPLACE("JunSolstice=", 12, njunsolstice);
 		REPLACE("DecSolstice=", 12, ndecsolstice);
 		if (strncmp(buf, "SEQUENCE=", 9) == 0) {
 			setnsequences(buf + 9);
 			continue;
 		}
 
 		/*
 		 * If the line starts with a tab, the data has to be
 		 * added to the previous line
 		 */
 		if (buf[0] == '\t') {
 			for (i = 0; i < count; i++)
 				event_continue(events[i], buf);
 			continue;
 		}
 
 		/* Get rid of leading spaces (non-standard) */
 		while (isspace((unsigned char)buf[0]))
 			memcpy(buf, buf + 1, strlen(buf));
 
 		/* No tab in the line, then not a valid line */
 		if ((pp = strchr(buf, '\t')) == NULL)
 			continue;
 
 		/* Trim spaces in front of the tab */
 		while (isspace((unsigned char)pp[-1]))
 			pp--;
 
 		p = *pp;
 		*pp = '\0';
 		if ((count = parsedaymonth(buf, year, month, day, &flags,
 		    extradata)) == 0)
 			continue;
 		*pp = p;
 		if (count < 0) {
 			/* Show error status based on return value */
 			if (debug)
 				fprintf(stderr, "Ignored: %s\n", buf);
 			if (count == -1)
 				continue;
 			count = -count + 1;
 		}
 
 		/* Find the last tab */
 		while (pp[1] == '\t')
 			pp++;
 
 		if (d_first < 0)
 			d_first = (*nl_langinfo(D_MD_ORDER) == 'd');
 
 		for (i = 0; i < count; i++) {
 			tm.tm_mon = month[i] - 1;
 			tm.tm_mday = day[i];
 			tm.tm_year = year[i] - 1900;
 			(void)strftime(dbuf, sizeof(dbuf),
 			    d_first ? "%e %b" : "%b %e", &tm);
 			if (debug)
 				fprintf(stderr, "got %s\n", pp);
 			events[i] = event_add(year[i], month[i], day[i], dbuf,
 			    ((flags &= F_VARIABLE) != 0) ? 1 : 0, pp,
 			    extradata[i]);
 		}
 	}
 
 	free(line);
 	fclose(in);
 
 	return (0);
 }
 
 void
 cal(void)
 {
 	FILE *fpin;
 	FILE *fpout;
 	int i;
 
 	for (i = 0; i < MAXCOUNT; i++)
 		extradata[i] = (char *)calloc(1, 20);
 
 
 	if ((fpin = opencalin()) == NULL)
 		return;
 
 	if ((fpout = opencalout()) == NULL) {
 		fclose(fpin);
 		return;
 	}
 
 	if (cal_parse(fpin, fpout))
 		return;
 
 	event_print_all(fpout);
 	closecal(fpout);
 }
 
 FILE *
 opencalin(void)
 {
 	struct stat sbuf;
 	FILE *fpin;
 
 	/* open up calendar file */
 	if ((fpin = fopen(calendarFile, "r")) == NULL) {
 		if (doall) {
 			if (chdir(calendarHomes[0]) != 0)
 				return (NULL);
 			if (stat(calendarNoMail, &sbuf) == 0)
 				return (NULL);
 			if ((fpin = fopen(calendarFile, "r")) == NULL)
 				return (NULL);
 		} else {
 			fpin = cal_fopen(calendarFile);
 		}
 	}
 	return (fpin);
 }
 
 FILE *
 opencalout(void)
 {
 	int fd;
 
 	/* not reading all calendar files, just set output to stdout */
 	if (!doall)
 		return (stdout);
 
 	/* set output to a temporary file, so if no output don't send mail */
 	snprintf(path, sizeof(path), "%s/_calXXXXXX", _PATH_TMP);
 	if ((fd = mkstemp(path)) < 0)
 		return (NULL);
 	return (fdopen(fd, "w+"));
 }
 
 void
 closecal(FILE *fp)
 {
 	uid_t uid;
 	struct stat sbuf;
 	int nread, pdes[2], status;
 	char buf[1024];
 
 	if (!doall)
 		return;
 
 	rewind(fp);
 	if (fstat(fileno(fp), &sbuf) || !sbuf.st_size)
 		goto done;
 	if (pipe(pdes) < 0)
 		goto done;
 	switch (fork()) {
 	case -1:			/* error */
 		(void)close(pdes[0]);
 		(void)close(pdes[1]);
 		goto done;
 	case 0:
 		/* child -- set stdin to pipe output */
 		if (pdes[0] != STDIN_FILENO) {
 			(void)dup2(pdes[0], STDIN_FILENO);
 			(void)close(pdes[0]);
 		}
 		(void)close(pdes[1]);
 		uid = geteuid();
 		if (setuid(getuid()) < 0) {
 			warnx("setuid failed");
 			_exit(1);
 		}
 		if (setgid(getegid()) < 0) {
 			warnx("setgid failed");
 			_exit(1);
 		}
 		if (setuid(uid) < 0) {
 			warnx("setuid failed");
 			_exit(1);
 		}
 		execl(_PATH_SENDMAIL, "sendmail", "-i", "-t", "-F",
 		    "\"Reminder Service\"", (char *)NULL);
 		warn(_PATH_SENDMAIL);
 		_exit(1);
 	}
 	/* parent -- write to pipe input */
 	(void)close(pdes[0]);
 
 	write(pdes[1], "From: \"Reminder Service\" <", 26);
 	write(pdes[1], pw->pw_name, strlen(pw->pw_name));
 	write(pdes[1], ">\nTo: <", 7);
 	write(pdes[1], pw->pw_name, strlen(pw->pw_name));
 	write(pdes[1], ">\nSubject: ", 11);
 	write(pdes[1], dayname, strlen(dayname));
 	write(pdes[1], "'s Calendar\nPrecedence: bulk\n\n", 30);
 
 	while ((nread = read(fileno(fp), buf, sizeof(buf))) > 0)
 		(void)write(pdes[1], buf, nread);
 	(void)close(pdes[1]);
 done:	(void)fclose(fp);
 	(void)unlink(path);
 	while (wait(&status) >= 0);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/checknr/checknr.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/checknr/checknr.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/checknr/checknr.c	(revision 303642)
@@ -1,647 +1,646 @@
 /*
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)checknr.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * checknr: check an nroff/troff input file for matching macro calls.
  * we also attempt to match size and font changes, but only the embedded
  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
  * later but for now think of these restrictions as contributions to
  * structured typesetting.
  */
 #include <err.h>
-#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 
 #define MAXSTK	100	/* Stack size */
 #define MAXBR	100	/* Max number of bracket pairs known */
 #define MAXCMDS	600	/* Max number of commands known */
 
 static void addcmd(char *);
 static void addmac(const char *);
 static int binsrch(const char *);
 static void checkknown(const char *);
 static void chkcmd(const char *, const char *);
 static void complain(int);
 static int eq(const char *, const char *);
 static void nomatch(const char *);
 static void pe(int);
 static void process(FILE *);
 static void prop(int);
 static void usage(void);
 
 /*
  * The stack on which we remember what we've seen so far.
  */
 static struct stkstr {
 	int opno;	/* number of opening bracket */
 	int pl;		/* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
 	int parm;	/* parm to size, font, etc */
 	int lno;	/* line number */
 } stk[MAXSTK];
 static int stktop;
 
 /*
  * The kinds of opening and closing brackets.
  */
 static struct brstr {
 	const char *opbr;
 	const char *clbr;
 } br[MAXBR] = {
 	/* A few bare bones troff commands */
 #define SZ	0
 	{"sz",	"sz"},	/* also \s */
 #define FT	1
 	{"ft",	"ft"},	/* also \f */
 	/* the -mm package */
 	{"AL",	"LE"},
 	{"AS",	"AE"},
 	{"BL",	"LE"},
 	{"BS",	"BE"},
 	{"DF",	"DE"},
 	{"DL",	"LE"},
 	{"DS",	"DE"},
 	{"FS",	"FE"},
 	{"ML",	"LE"},
 	{"NS",	"NE"},
 	{"RL",	"LE"},
 	{"VL",	"LE"},
 	/* the -ms package */
 	{"AB",	"AE"},
 	{"BD",	"DE"},
 	{"CD",	"DE"},
 	{"DS",	"DE"},
 	{"FS",	"FE"},
 	{"ID",	"DE"},
 	{"KF",	"KE"},
 	{"KS",	"KE"},
 	{"LD",	"DE"},
 	{"LG",	"NL"},
 	{"QS",	"QE"},
 	{"RS",	"RE"},
 	{"SM",	"NL"},
 	{"XA",	"XE"},
 	{"XS",	"XE"},
 	/* The -me package */
 	{"(b",	")b"},
 	{"(c",	")c"},
 	{"(d",	")d"},
 	{"(f",	")f"},
 	{"(l",	")l"},
 	{"(q",	")q"},
 	{"(x",	")x"},
 	{"(z",	")z"},
 	/* The -mdoc package */
 	{"Ao",  "Ac"},
 	{"Bd",  "Ed"},
 	{"Bk",  "Ek"},
 	{"Bo",  "Bc"},
 	{"Do",  "Dc"},
 	{"Fo",  "Fc"},
 	{"Oo",  "Oc"},
 	{"Po",  "Pc"},
 	{"Qo",  "Qc"},
 	{"Rs",  "Re"},
 	{"So",  "Sc"},
 	{"Xo",  "Xc"},
 	/* Things needed by preprocessors */
 	{"EQ",	"EN"},
 	{"TS",	"TE"},
 	/* Refer */
 	{"[",	"]"},
 	{0,	0}
 };
 
 /*
  * All commands known to nroff, plus macro packages.
  * Used so we can complain about unrecognized commands.
  */
 static const char *knowncmds[MAXCMDS] = {
 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
 "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", "(t", "(x",
 "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", "+c", "1C",
 "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f",
 "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL",
 "AM", "AS", "AT", "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
 "B", "B" , "B1", "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd",
 "Bf", "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", "Cd",
 "Cm", "D", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", "Dd",
 "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", "EX",
 "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", "Ev", "FA", "FD", "FE", "FG",
 "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl",
 "Fn", "Fo", "Ft", "Fx", "H", "H" , "HC", "HD", "HM", "HO", "HU", "I", "I" ,
 "ID", "IE", "IH", "IM", "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF",
 "KQ", "KS", "LB", "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME",
 "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", "Nm",
 "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", "Os", "Ot", "Ox",
 "P", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", "Pa", "Pc", "Pf", "Po",
 "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", "Qo", "Qq", "R", "R" , "RA", "RC",
 "RE", "RL", "RP", "RQ", "RS", "RT", "Re", "Rs", "S", "S" , "S0", "S2", "S3",
 "SA", "SG", "SH", "SK", "SM", "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss",
 "St", "Sx", "Sy", "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP",
 "TQ", "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", "Xr", "[",
 "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "\\{", "\\}",
 "]", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as",
 "b", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc",
 "ce", "cf", "ch", "chop", "cs", "ct", "cu", "da", "de", "di", "dl", "dn", "do",
 "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", "em", "eo", "ep", "ev", "evc",
 "ex", "fallback", "fc", "feature", "fi", "fl", "flig", "fo", "fp", "ft", "ftr",
 "fz", "fzoom", "hc", "he", "hidechar", "hl", "hp", "ht", "hw", "hx", "hy",
 "hylang", "i", "i" , "ie", "if", "ig", "in", "ip", "it", "ix", "kern",
 "kernafter", "kernbefore", "kernpair", "lc", "lc_ctype", "lg", "lhang", "li",
 "ll", "ln", "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
 "of", "oh", "os", "pa", "papersize", "pc", "pi", "pl", "pm", "pn", "po", "pp",
 "ps", "q", "q" , "r", "r" , "rb", "rd", "re", "recursionlimit", "return",
 "rhang", "rm", "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "shift", "sk",
 "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp",
 "tr", "track", "u", "uf", "uh", "ul", "vs", "wh", "xflag", "xp", "yr",
 0
 };
 
 static int	lineno;		/* current line number in input file */
 static const char *cfilename;	/* name of current file */
 static int	nfiles;		/* number of files to process */
 static int	fflag;		/* -f: ignore \f */
 static int	sflag;		/* -s: ignore \s */
 static int	ncmds;		/* size of knowncmds */
 static int	slot;		/* slot in knowncmds found by binsrch */
 
 int
 main(int argc, char **argv)
 {
 	FILE *f;
 	int i;
 	char *cp;
 	char b1[4];
 
 	/* Figure out how many known commands there are */
 	while (knowncmds[ncmds])
 		ncmds++;
 	while (argc > 1 && argv[1][0] == '-') {
 		switch(argv[1][1]) {
 
 		/* -a: add pairs of macros */
 		case 'a':
 			i = strlen(argv[1]) - 2;
 			if (i % 6 != 0)
 				usage();
 			/* look for empty macro slots */
 			for (i=0; br[i].opbr; i++)
 				;
 			for (cp=argv[1]+3; cp[-1]; cp += 6) {
 				char *tmp;
 
 				if (i >= MAXBR)
 					errx(1, "too many pairs");
 				if ((tmp = malloc(3)) == NULL)
 					err(1, "malloc");
 				strlcpy(tmp, cp, 3);
 				br[i].opbr = tmp;
 				if ((tmp = malloc(3)) == NULL)
 					err(1, "malloc");
 				strlcpy(tmp, cp+3, 3);
 				br[i].clbr = tmp;
 				addmac(br[i].opbr);	/* knows pairs are also known cmds */
 				addmac(br[i].clbr);
 				i++;
 			}
 			break;
 
 		/* -c: add known commands */
 		case 'c':
 			i = strlen(argv[1]) - 2;
 			if (i % 3 != 0)
 				usage();
 			for (cp=argv[1]+3; cp[-1]; cp += 3) {
 				if (cp[2] && cp[2] != '.')
 					usage();
 				strncpy(b1, cp, 2);
 				b1[2] = '\0';
 				addmac(b1);
 			}
 			break;
 
 		/* -f: ignore font changes */
 		case 'f':
 			fflag = 1;
 			break;
 
 		/* -s: ignore size changes */
 		case 's':
 			sflag = 1;
 			break;
 		default:
 			usage();
 		}
 		argc--; argv++;
 	}
 
 	nfiles = argc - 1;
 
 	if (nfiles > 0) {
 		for (i = 1; i < argc; i++) {
 			cfilename = argv[i];
 			f = fopen(cfilename, "r");
 			if (f == NULL)
 				warn("%s", cfilename);
 			else {
 				process(f);
 				fclose(f);
 			}
 		}
 	} else {
 		cfilename = "stdin";
 		process(stdin);
 	}
 	exit(0);
 }
 
 static void
 usage(void)
 {
 	fprintf(stderr,
 	"usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
 	exit(1);
 }
 
 static void
 process(FILE *f)
 {
 	int i, n;
 	char mac[64];	/* The current macro or nroff command */
 	char *line;
 	size_t linecap;
 	int pl;
 
 	line = NULL;
 	linecap = 0;
 	stktop = -1;
 	for (lineno = 1; getline(&line, &linecap, f) > 0; lineno++) {
 		if (line[0] == '.') {
 			/*
 			 * find and isolate the macro/command name.
 			 */
 			strncpy(mac, line+1, 4);
 			if (isspace(mac[0])) {
 				pe(lineno);
 				printf("Empty command\n");
 			} else if (isspace(mac[1])) {
 				mac[1] = 0;
 			} else if (isspace(mac[2])) {
 				mac[2] = 0;
 			} else if (mac[0] != '\\' || mac[1] != '\"') {
 				pe(lineno);
 				printf("Command too long\n");
 			}
 
 			/*
 			 * Is it a known command?
 			 */
 			checkknown(mac);
 
 			/*
 			 * Should we add it?
 			 */
 			if (eq(mac, "de"))
 				addcmd(line);
 
 			chkcmd(line, mac);
 		}
 
 		/*
 		 * At this point we process the line looking
 		 * for \s and \f.
 		 */
 		for (i = 0; line[i]; i++)
 			if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
 				if (!sflag && line[++i] == 's') {
 					pl = line[++i];
 					if (isdigit(pl)) {
 						n = pl - '0';
 						pl = ' ';
 					} else
 						n = 0;
 					while (isdigit(line[++i]))
 						n = 10 * n + line[i] - '0';
 					i--;
 					if (n == 0) {
 						if (stktop >= 0 &&
 						    stk[stktop].opno == SZ) {
 							stktop--;
 						} else {
 							pe(lineno);
 							printf("unmatched \\s0\n");
 						}
 					} else {
 						stk[++stktop].opno = SZ;
 						stk[stktop].pl = pl;
 						stk[stktop].parm = n;
 						stk[stktop].lno = lineno;
 					}
 				} else if (!fflag && line[i] == 'f') {
 					n = line[++i];
 					if (n == 'P') {
 						if (stktop >= 0 && 
 						    stk[stktop].opno == FT) {
 							stktop--;
 						} else {
 							pe(lineno);
 							printf("unmatched \\fP\n");
 						}
 					} else {
 						stk[++stktop].opno = FT;
 						stk[stktop].pl = 1;
 						stk[stktop].parm = n;
 						stk[stktop].lno = lineno;
 					}
 				}
 			}
 	}
 	free(line);
 	/*
 	 * We've hit the end and look at all this stuff that hasn't been
 	 * matched yet!  Complain, complain.
 	 */
 	for (i = stktop; i >= 0; i--) {
 		complain(i);
 	}
 }
 
 static void
 complain(int i)
 {
 	pe(stk[i].lno);
 	printf("Unmatched ");
 	prop(i);
 	printf("\n");
 }
 
 static void
 prop(int i)
 {
 	if (stk[i].pl == 0)
 		printf(".%s", br[stk[i].opno].opbr);
 	else switch(stk[i].opno) {
 	case SZ:
 		printf("\\s%c%d", stk[i].pl, stk[i].parm);
 		break;
 	case FT:
 		printf("\\f%c", stk[i].parm);
 		break;
 	default:
 		printf("Bug: stk[%d].opno = %d = .%s, .%s",
 			i, stk[i].opno, br[stk[i].opno].opbr,
 			br[stk[i].opno].clbr);
 	}
 }
 
 static void
 chkcmd(const char *line __unused, const char *mac)
 {
 	int i;
 
 	/*
 	 * Check to see if it matches top of stack.
 	 */
 	if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
 		stktop--;	/* OK. Pop & forget */
 	else {
 		/* No. Maybe it's an opener */
 		for (i=0; br[i].opbr; i++) {
 			if (eq(mac, br[i].opbr)) {
 				/* Found. Push it. */
 				stktop++;
 				stk[stktop].opno = i;
 				stk[stktop].pl = 0;
 				stk[stktop].parm = 0;
 				stk[stktop].lno = lineno;
 				break;
 			}
 			/*
 			 * Maybe it's an unmatched closer.
 			 * NOTE: this depends on the fact
 			 * that none of the closers can be
 			 * openers too.
 			 */
 			if (eq(mac, br[i].clbr)) {
 				nomatch(mac);
 				break;
 			}
 		}
 	}
 }
 
 static void
 nomatch(const char *mac)
 {
 	int i, j;
 
 	/*
 	 * Look for a match further down on stack
 	 * If we find one, it suggests that the stuff in
 	 * between is supposed to match itself.
 	 */
 	for (j=stktop; j>=0; j--)
 		if (eq(mac,br[stk[j].opno].clbr)) {
 			/* Found.  Make a good diagnostic. */
 			if (j == stktop-2) {
 				/*
 				 * Check for special case \fx..\fR and don't
 				 * complain.
 				 */
 				if (stk[j+1].opno==FT && stk[j+1].parm!='R'
 				 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
 					stktop = j -1;
 					return;
 				}
 				/*
 				 * We have two unmatched frobs.  Chances are
 				 * they were intended to match, so we mention
 				 * them together.
 				 */
 				pe(stk[j+1].lno);
 				prop(j+1);
 				printf(" does not match %d: ", stk[j+2].lno);
 				prop(j+2);
 				printf("\n");
 			} else for (i=j+1; i <= stktop; i++) {
 				complain(i);
 			}
 			stktop = j-1;
 			return;
 		}
 	/* Didn't find one.  Throw this away. */
 	pe(lineno);
 	printf("Unmatched .%s\n", mac);
 }
 
 /* eq: are two strings equal? */
 static int
 eq(const char *s1, const char *s2)
 {
 	return (strcmp(s1, s2) == 0);
 }
 
 /* print the first part of an error message, given the line number */
 static void
 pe(int linen)
 {
 	if (nfiles > 1)
 		printf("%s: ", cfilename);
 	printf("%d: ", linen);
 }
 
 static void
 checkknown(const char *mac)
 {
 
 	if (eq(mac, "."))
 		return;
 	if (binsrch(mac) >= 0)
 		return;
 	if (mac[0] == '\\' && mac[1] == '"')	/* comments */
 		return;
 
 	pe(lineno);
 	printf("Unknown command: .%s\n", mac);
 }
 
 /*
  * We have a .de xx line in "line".  Add xx to the list of known commands.
  */
 static void
 addcmd(char *line)
 {
 	char *mac;
 
 	/* grab the macro being defined */
 	mac = line+4;
 	while (isspace(*mac))
 		mac++;
 	if (*mac == 0) {
 		pe(lineno);
 		printf("illegal define: %s\n", line);
 		return;
 	}
 	mac[2] = 0;
 	if (isspace(mac[1]) || mac[1] == '\\')
 		mac[1] = 0;
 	if (ncmds >= MAXCMDS) {
 		printf("Only %d known commands allowed\n", MAXCMDS);
 		exit(1);
 	}
 	addmac(mac);
 }
 
 /*
  * Add mac to the list.  We should really have some kind of tree
  * structure here but this is a quick-and-dirty job and I just don't
  * have time to mess with it.  (I wonder if this will come back to haunt
  * me someday?)  Anyway, I claim that .de is fairly rare in user
  * nroff programs, and the register loop below is pretty fast.
  */
 static void
 addmac(const char *mac)
 {
 	const char **src, **dest, **loc;
 
 	if (binsrch(mac) >= 0){	/* it's OK to redefine something */
 #ifdef DEBUG
 		printf("binsrch(%s) -> already in table\n", mac);
 #endif
 		return;
 	}
 	/* binsrch sets slot as a side effect */
 #ifdef DEBUG
 	printf("binsrch(%s) -> %d\n", mac, slot);
 #endif
 	loc = &knowncmds[slot];
 	src = &knowncmds[ncmds-1];
 	dest = src+1;
 	while (dest > loc)
 		*dest-- = *src--;
 	if ((*loc = strdup(mac)) == NULL)
 		err(1, "strdup");
 	ncmds++;
 #ifdef DEBUG
 	printf("after: %s %s %s %s %s, %d cmds\n",
 	    knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
 	    knowncmds[slot+1], knowncmds[slot+2], ncmds);
 #endif
 }
 
 /*
  * Do a binary search in knowncmds for mac.
  * If found, return the index.  If not, return -1.
  */
 static int
 binsrch(const char *mac)
 {
 	const char *p;	/* pointer to current cmd in list */
 	int d;		/* difference if any */
 	int mid;	/* mid point in binary search */
 	int top, bot;	/* boundaries of bin search, inclusive */
 
 	top = ncmds-1;
 	bot = 0;
 	while (top >= bot) {
 		mid = (top+bot)/2;
 		p = knowncmds[mid];
 		d = p[0] - mac[0];
 		if (d == 0)
 			d = p[1] - mac[1];
 		if (d == 0)
 			return (mid);
 		if (d < 0)
 			bot = mid + 1;
 		else
 			top = mid - 1;
 	}
 	slot = bot;	/* place it would have gone */
 	return (-1);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/comm/comm.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/comm/comm.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/comm/comm.c	(revision 303642)
@@ -1,249 +1,248 @@
 /*
  * Copyright (c) 1989, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Case Larsen.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1989, 1993, 1994\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif
 
 #if 0
 #ifndef lint
 static char sccsid[] = "From: @(#)comm.c	8.4 (Berkeley) 5/4/95";
 #endif
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <err.h>
 #include <limits.h>
 #include <locale.h>
 #include <stdint.h>
-#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <wchar.h>
 #include <wctype.h>
 
 static int iflag;
 static const char *tabs[] = { "", "\t", "\t\t" };
 
 static FILE	*file(const char *);
 static wchar_t	*convert(const char *);
 static void	show(FILE *, const char *, const char *, char **, size_t *);
 static void	usage(void);
 
 int
 main(int argc, char *argv[])
 {
 	int comp, read1, read2;
 	int ch, flag1, flag2, flag3;
 	FILE *fp1, *fp2;
 	const char *col1, *col2, *col3;
 	size_t line1len, line2len;
 	char *line1, *line2;
 	ssize_t n1, n2;
 	wchar_t *tline1, *tline2;
 	const char **p;
 
 	(void) setlocale(LC_ALL, "");
 
 	flag1 = flag2 = flag3 = 1;
 
 	while ((ch = getopt(argc, argv, "123i")) != -1)
 		switch(ch) {
 		case '1':
 			flag1 = 0;
 			break;
 		case '2':
 			flag2 = 0;
 			break;
 		case '3':
 			flag3 = 0;
 			break;
 		case 'i':
 			iflag = 1;
 			break;
 		case '?':
 		default:
 			usage();
 		}
 	argc -= optind;
 	argv += optind;
 
 	if (argc != 2)
 		usage();
 
 	fp1 = file(argv[0]);
 	fp2 = file(argv[1]);
 
 	/* for each column printed, add another tab offset */
 	p = tabs;
 	col1 = col2 = col3 = NULL;
 	if (flag1)
 		col1 = *p++;
 	if (flag2)
 		col2 = *p++;
 	if (flag3)
 		col3 = *p;
 
 	line1len = line2len = 0;
 	line1 = line2 = NULL;
 	n1 = n2 = -1;
 
 	for (read1 = read2 = 1;;) {
 		/* read next line, check for EOF */
 		if (read1) {
 			n1 = getline(&line1, &line1len, fp1);
 			if (n1 < 0 && ferror(fp1))
 				err(1, "%s", argv[0]);
 			if (n1 > 0 && line1[n1 - 1] == '\n')
 				line1[n1 - 1] = '\0';
 
 		}
 		if (read2) {
 			n2 = getline(&line2, &line2len, fp2);
 			if (n2 < 0 && ferror(fp2))
 				err(1, "%s", argv[1]);
 			if (n2 > 0 && line2[n2 - 1] == '\n')
 				line2[n2 - 1] = '\0';
 		}
 
 		/* if one file done, display the rest of the other file */
 		if (n1 < 0) {
 			if (n2 >= 0 && col2 != NULL)
 				show(fp2, argv[1], col2, &line2, &line2len);
 			break;
 		}
 		if (n2 < 0) {
 			if (n1 >= 0 && col1 != NULL)
 				show(fp1, argv[0], col1, &line1, &line1len);
 			break;
 		}
 
 		tline2 = NULL;
 		if ((tline1 = convert(line1)) != NULL)
 			tline2 = convert(line2);
 		if (tline1 == NULL || tline2 == NULL)
 			comp = strcmp(line1, line2);
 		else
 			comp = wcscoll(tline1, tline2);
 		if (tline1 != NULL)
 			free(tline1);
 		if (tline2 != NULL)
 			free(tline2);
 
 		/* lines are the same */
 		if (!comp) {
 			read1 = read2 = 1;
 			if (col3 != NULL)
 				(void)printf("%s%s\n", col3, line1);
 			continue;
 		}
 
 		/* lines are different */
 		if (comp < 0) {
 			read1 = 1;
 			read2 = 0;
 			if (col1 != NULL)
 				(void)printf("%s%s\n", col1, line1);
 		} else {
 			read1 = 0;
 			read2 = 1;
 			if (col2 != NULL)
 				(void)printf("%s%s\n", col2, line2);
 		}
 	}
 	exit(0);
 }
 
 static wchar_t *
 convert(const char *str)
 {
 	size_t n;
 	wchar_t *buf, *p;
 
 	if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
 		return (NULL);
 	if (SIZE_MAX / sizeof(*buf) < n + 1)
 		errx(1, "conversion buffer length overflow");
 	if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
 		err(1, "malloc");
 	if (mbstowcs(buf, str, n + 1) != n)
 		errx(1, "internal mbstowcs() error");
 
 	if (iflag) {
 		for (p = buf; *p != L'\0'; p++)
 			*p = towlower(*p);
 	}
 
 	return (buf);
 }
 
 static void
 show(FILE *fp, const char *fn, const char *offset, char **bufp, size_t *buflenp)
 {
 	ssize_t n;
 
 	do {
 		(void)printf("%s%s\n", offset, *bufp);
 		if ((n = getline(bufp, buflenp, fp)) < 0)
 			break;
 		if (n > 0 && (*bufp)[n - 1] == '\n')
 			(*bufp)[n - 1] = '\0';
 	} while (1);
 	if (ferror(fp))
 		err(1, "%s", fn);
 }
 
 static FILE *
 file(const char *name)
 {
 	FILE *fp;
 
 	if (!strcmp(name, "-"))
 		return (stdin);
 	if ((fp = fopen(name, "r")) == NULL) {
 		err(1, "%s", name);
 	}
 	return (fp);
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr, "usage: comm [-123i] file1 file2\n");
 	exit(1);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/cpuset/cpuset.1
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/cpuset/cpuset.1	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/cpuset/cpuset.1	(revision 303642)
@@ -1,197 +1,197 @@
 .\" Copyright (c) 2008 Christian Brueffer
 .\" Copyright (c) 2008 Jeffrey Roberson
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 8, 2015
+.Dd July 29, 2016
 .Dt CPUSET 1
 .Os
 .Sh NAME
 .Nm cpuset
 .Nd "configure processor sets"
 .Sh SYNOPSIS
 .Nm
 .Op Fl l Ar cpu-list
 .Op Fl s Ar setid
 .Ar cmd ...
 .Nm
 .Op Fl l Ar cpu-list
 .Op Fl s Ar setid
 .Fl p Ar pid
 .Nm
 .Op Fl c
 .Op Fl l Ar cpu-list
 .Fl C
 .Fl p Ar pid
 .Nm
 .Op Fl c
 .Op Fl l Ar cpu-list
 .Op Fl j Ar jailid | Fl p Ar pid | Fl t Ar tid | Fl s Ar setid | Fl x Ar irq
 .Nm
 .Fl g
 .Op Fl cir
-.Op Fl d Ar domain | j Ar jailid | Fl p Ar pid | Fl t Ar tid | Fl s Ar setid | Fl x Ar irq
+.Op Fl d Ar domain | Fl j Ar jailid | Fl p Ar pid | Fl t Ar tid | Fl s Ar setid | Fl x Ar irq
 .Sh DESCRIPTION
 The
 .Nm
 command can be used to assign processor sets to processes, run commands
 constrained to a given set or list of processors, and query information
 about processor binding, sets, and available processors in the system.
 .Pp
 .Nm
 requires a target to modify or query.
 The target may be specified as a command, process id, thread id, a
 cpuset id, an irq, a jail id, or a NUMA domain.
 Using
 .Fl g
 the target's set id or mask may be queried.
 Using
 .Fl l
 or
 .Fl s
 the target's CPU mask or set id may be set.
 If no target is specified,
 .Nm
 operates on itself.
 Not all combinations of operations and targets are supported.
 For example,
 you may not set the id of an existing set or query and launch a command
 at the same time.
 .Pp
 There are two sets applicable to each process and one private mask per thread.
 Every process in the system belongs to a cpuset.
 By default processes are started in set 1.
 The mask or id may be queried using
 .Fl c .
 Each thread also has a private mask of CPUs it is allowed to run
 on that must be a subset of the assigned set.
 And finally, there is a root set, numbered 0, that is immutable.
 This last set is the list of all possible CPUs in the system and is
 queried using
 .Fl r .
 .Pp
 When running a command it may join a set specified with
 .Fl s
 otherwise a new set is created.
 In addition, a mask for the command may be specified using
 .Fl l .
 When used in conjunction with
 .Fl c
 the mask modifies the supplied or created set rather than the private mask
 for the thread.
 .Pp
 The options are as follows:
 .Bl -tag -width ".Fl l Ar cpu-list"
 .It Fl C
 Create a new cpuset and assign the target process to that set.
 .It Fl c
 The requested operation should reference the cpuset available via the
 target specifier.
 .It Fl d Ar domain
 Specifies a NUMA domain id as the target of the operation.
 .It Fl g
 Causes
 .Nm
 to print either a list of valid CPUs or, using
 .Fl i ,
 the id of the target.
 .It Fl i
 When used with the
 .Fl g
 option print the id rather than the valid mask of the target.
 .It Fl j Ar jailid
 Specifies a jail id as the target of the operation.
 .It Fl l Ar cpu-list
 Specifies a list of CPUs to apply to a target.
 Specification may include
 numbers separated by '-' for ranges and commas separating individual numbers.
 A special list of
 .Dq all
 may be specified in which case the list includes all CPUs from the root set.
 .It Fl p Ar pid
 Specifies a pid as the target of the operation.
 .It Fl s Ar setid
 Specifies a set id as the target of the operation.
 .It Fl r
 The requested operation should reference the root set available via the
 target specifier.
 .It Fl t Ar tid
 Specifies a thread id as the target of the operation.
 .It Fl x Ar irq
 Specifies an irq as the target of the operation.
 .El
 .Sh EXIT STATUS
 .Ex -std
 .Sh EXAMPLES
 Create a new group with CPUs 0-4 inclusive and run
 .Pa /bin/sh
 on it:
 .Dl cpuset -c -l 0-4 /bin/sh
 .Pp
 Query the mask of CPUs the
 .Aq sh pid
 is allowed to run on:
 .Dl cpuset -g -p <sh pid>
 .Pp
 Restrict
 .Pa /bin/sh
 to run on CPUs 0 and 2 while its group is still allowed to run on
 CPUs 0-4:
 .Dl cpuset -l 0,2 -p <sh pid>
 .Pp
 Modify the cpuset
 .Pa /bin/sh
 belongs to restricting it to CPUs 0 and 2:
 .Dl cpuset -l 0,2 -c -p <sh pid>
 .Pp
 Modify the cpuset all threads are in by default to contain only
 the first 4 CPUs, leaving the rest idle:
 .Dl cpuset -l 0-3 -s 1
 .Pp
 Print the id of the cpuset
 .Pa /bin/sh
 is in:
 .Dl cpuset -g -i -p <sh pid>
 .Pp
 Move the
 .Ar pid
 into the specified cpuset
 .Ar setid
 so it may be managed with other pids in that set:
 .Dl cpuset -s <setid> -p <pid>
 .Pp
 Create a new cpuset that is restricted to CPUs 0 and 2 and move
 .Ar pid
 into the new set:
 .Dl cpuset -C -c -l 0,2 -p <pid>
 .Sh SEE ALSO
 .Xr cpuset 2
 .Sh HISTORY
 The
 .Nm
 command first appeared in
 .Fx 7.1 .
 .Sh AUTHORS
 .An Jeffrey Roberson Aq Mt jeff@FreeBSD.org
Index: user/alc/PQ_LAUNDRY/usr.bin/grep/grep.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/grep/grep.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/grep/grep.c	(revision 303642)
@@ -1,748 +1,747 @@
 /*	$NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $	*/
 /* 	$FreeBSD$	*/
 /*	$OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $	*/
 
 /*-
  * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
  * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/stat.h>
 #include <sys/types.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <getopt.h>
 #include <limits.h>
 #include <libgen.h>
 #include <locale.h>
 #include <stdbool.h>
-#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "fastmatch.h"
 #include "grep.h"
 
 #ifndef WITHOUT_NLS
 #include <nl_types.h>
 nl_catd	 catalog;
 #endif
 
 /*
  * Default messags to use when NLS is disabled or no catalogue
  * is found.
  */
 const char	*errstr[] = {
 	"",
 /* 1*/	"(standard input)",
 /* 2*/	"cannot read bzip2 compressed file",
 /* 3*/	"unknown %s option",
 /* 4*/	"usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n",
 /* 5*/	"\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
 /* 6*/	"\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
 /* 7*/	"\t[--null] [pattern] [file ...]\n",
 /* 8*/	"Binary file %s matches\n",
 /* 9*/	"%s (BSD grep) %s\n",
 };
 
 /* Flags passed to regcomp() and regexec() */
 int		 cflags = REG_NOSUB;
 int		 eflags = REG_STARTEND;
 
 /* Shortcut for matching all cases like empty regex */
 bool		 matchall;
 
 /* Searching patterns */
 unsigned int	 patterns;
 static unsigned int pattern_sz;
 struct pat	*pattern;
 regex_t		*r_pattern;
 fastmatch_t	*fg_pattern;
 
 /* Filename exclusion/inclusion patterns */
 unsigned int	fpatterns, dpatterns;
 static unsigned int fpattern_sz, dpattern_sz;
 struct epat	*dpattern, *fpattern;
 
 /* For regex errors  */
 char	 re_error[RE_ERROR_BUF + 1];
 
 /* Command-line flags */
 unsigned long long Aflag;	/* -A x: print x lines trailing each match */
 unsigned long long Bflag;	/* -B x: print x lines leading each match */
 bool	 Hflag;		/* -H: always print file name */
 bool	 Lflag;		/* -L: only show names of files with no matches */
 bool	 bflag;		/* -b: show block numbers for each match */
 bool	 cflag;		/* -c: only show a count of matching lines */
 bool	 hflag;		/* -h: don't print filename headers */
 bool	 iflag;		/* -i: ignore case */
 bool	 lflag;		/* -l: only show names of files with matches */
 bool	 mflag;		/* -m x: stop reading the files after x matches */
 long long mcount;	/* count for -m */
 long long mlimit;	/* requested value for -m */
 bool	 nflag;		/* -n: show line numbers in front of matching lines */
 bool	 oflag;		/* -o: print only matching part */
 bool	 qflag;		/* -q: quiet mode (don't output anything) */
 bool	 sflag;		/* -s: silent mode (ignore errors) */
 bool	 vflag;		/* -v: only show non-matching lines */
 bool	 wflag;		/* -w: pattern must start and end on word boundaries */
 bool	 xflag;		/* -x: pattern must match entire line */
 bool	 lbflag;	/* --line-buffered */
 bool	 nullflag;	/* --null */
 char	*label;		/* --label */
 const char *color;	/* --color */
 int	 grepbehave = GREP_BASIC;	/* -EFGP: type of the regex */
 int	 binbehave = BINFILE_BIN;	/* -aIU: handling of binary files */
 int	 filebehave = FILE_STDIO;	/* -JZ: normal, gzip or bzip2 file */
 int	 devbehave = DEV_READ;		/* -D: handling of devices */
 int	 dirbehave = DIR_READ;		/* -dRr: handling of directories */
 int	 linkbehave = LINK_READ;	/* -OpS: handling of symlinks */
 
 bool	 dexclude, dinclude;	/* --exclude-dir and --include-dir */
 bool	 fexclude, finclude;	/* --exclude and --include */
 
 enum {
 	BIN_OPT = CHAR_MAX + 1,
 	COLOR_OPT,
 	HELP_OPT,
 	MMAP_OPT,
 	LINEBUF_OPT,
 	LABEL_OPT,
 	NULL_OPT,
 	R_EXCLUDE_OPT,
 	R_INCLUDE_OPT,
 	R_DEXCLUDE_OPT,
 	R_DINCLUDE_OPT
 };
 
 static inline const char	*init_color(const char *);
 
 /* Housekeeping */
 bool	 first = true;	/* flag whether we are processing the first match */
 bool	 prev;		/* flag whether or not the previous line matched */
 int	 tail;		/* lines left to print */
 bool	 file_err;	/* file reading error */
 
 /*
  * Prints usage information and returns 2.
  */
 static void
 usage(void)
 {
 	fprintf(stderr, getstr(4), getprogname());
 	fprintf(stderr, "%s", getstr(5));
 	fprintf(stderr, "%s", getstr(6));
 	fprintf(stderr, "%s", getstr(7));
 	exit(2);
 }
 
 static const char	*optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXy";
 
 static const struct option long_options[] =
 {
 	{"binary-files",	required_argument,	NULL, BIN_OPT},
 	{"help",		no_argument,		NULL, HELP_OPT},
 	{"mmap",		no_argument,		NULL, MMAP_OPT},
 	{"line-buffered",	no_argument,		NULL, LINEBUF_OPT},
 	{"label",		required_argument,	NULL, LABEL_OPT},
 	{"null",		no_argument,		NULL, NULL_OPT},
 	{"color",		optional_argument,	NULL, COLOR_OPT},
 	{"colour",		optional_argument,	NULL, COLOR_OPT},
 	{"exclude",		required_argument,	NULL, R_EXCLUDE_OPT},
 	{"include",		required_argument,	NULL, R_INCLUDE_OPT},
 	{"exclude-dir",		required_argument,	NULL, R_DEXCLUDE_OPT},
 	{"include-dir",		required_argument,	NULL, R_DINCLUDE_OPT},
 	{"after-context",	required_argument,	NULL, 'A'},
 	{"text",		no_argument,		NULL, 'a'},
 	{"before-context",	required_argument,	NULL, 'B'},
 	{"byte-offset",		no_argument,		NULL, 'b'},
 	{"context",		optional_argument,	NULL, 'C'},
 	{"count",		no_argument,		NULL, 'c'},
 	{"devices",		required_argument,	NULL, 'D'},
         {"directories",		required_argument,	NULL, 'd'},
 	{"extended-regexp",	no_argument,		NULL, 'E'},
 	{"regexp",		required_argument,	NULL, 'e'},
 	{"fixed-strings",	no_argument,		NULL, 'F'},
 	{"file",		required_argument,	NULL, 'f'},
 	{"basic-regexp",	no_argument,		NULL, 'G'},
 	{"no-filename",		no_argument,		NULL, 'h'},
 	{"with-filename",	no_argument,		NULL, 'H'},
 	{"ignore-case",		no_argument,		NULL, 'i'},
 	{"bz2decompress",	no_argument,		NULL, 'J'},
 	{"files-with-matches",	no_argument,		NULL, 'l'},
 	{"files-without-match", no_argument,            NULL, 'L'},
 	{"max-count",		required_argument,	NULL, 'm'},
 	{"lzma",		no_argument,		NULL, 'M'},
 	{"line-number",		no_argument,		NULL, 'n'},
 	{"only-matching",	no_argument,		NULL, 'o'},
 	{"quiet",		no_argument,		NULL, 'q'},
 	{"silent",		no_argument,		NULL, 'q'},
 	{"recursive",		no_argument,		NULL, 'r'},
 	{"no-messages",		no_argument,		NULL, 's'},
 	{"binary",		no_argument,		NULL, 'U'},
 	{"unix-byte-offsets",	no_argument,		NULL, 'u'},
 	{"invert-match",	no_argument,		NULL, 'v'},
 	{"version",		no_argument,		NULL, 'V'},
 	{"word-regexp",		no_argument,		NULL, 'w'},
 	{"line-regexp",		no_argument,		NULL, 'x'},
 	{"xz",			no_argument,		NULL, 'X'},
 	{"decompress",          no_argument,            NULL, 'Z'},
 	{NULL,			no_argument,		NULL, 0}
 };
 
 /*
  * Adds a searching pattern to the internal array.
  */
 static void
 add_pattern(char *pat, size_t len)
 {
 
 	/* Do not add further pattern is we already match everything */
 	if (matchall)
 	  return;
 
 	/* Check if we can do a shortcut */
 	if (len == 0) {
 		matchall = true;
 		for (unsigned int i = 0; i < patterns; i++) {
 			free(pattern[i].pat);
 		}
 		pattern = grep_realloc(pattern, sizeof(struct pat));
 		pattern[0].pat = NULL;
 		pattern[0].len = 0;
 		patterns = 1;
 		return;
 	}
 	/* Increase size if necessary */
 	if (patterns == pattern_sz) {
 		pattern_sz *= 2;
 		pattern = grep_realloc(pattern, ++pattern_sz *
 		    sizeof(struct pat));
 	}
 	if (len > 0 && pat[len - 1] == '\n')
 		--len;
 	/* pat may not be NUL-terminated */
 	pattern[patterns].pat = grep_malloc(len + 1);
 	memcpy(pattern[patterns].pat, pat, len);
 	pattern[patterns].len = len;
 	pattern[patterns].pat[len] = '\0';
 	++patterns;
 }
 
 /*
  * Adds a file include/exclude pattern to the internal array.
  */
 static void
 add_fpattern(const char *pat, int mode)
 {
 
 	/* Increase size if necessary */
 	if (fpatterns == fpattern_sz) {
 		fpattern_sz *= 2;
 		fpattern = grep_realloc(fpattern, ++fpattern_sz *
 		    sizeof(struct epat));
 	}
 	fpattern[fpatterns].pat = grep_strdup(pat);
 	fpattern[fpatterns].mode = mode;
 	++fpatterns;
 }
 
 /*
  * Adds a directory include/exclude pattern to the internal array.
  */
 static void
 add_dpattern(const char *pat, int mode)
 {
 
 	/* Increase size if necessary */
 	if (dpatterns == dpattern_sz) {
 		dpattern_sz *= 2;
 		dpattern = grep_realloc(dpattern, ++dpattern_sz *
 		    sizeof(struct epat));
 	}
 	dpattern[dpatterns].pat = grep_strdup(pat);
 	dpattern[dpatterns].mode = mode;
 	++dpatterns;
 }
 
 /*
  * Reads searching patterns from a file and adds them with add_pattern().
  */
 static void
 read_patterns(const char *fn)
 {
 	struct stat st;
 	FILE *f;
 	char *line;
 	size_t len;
 	ssize_t rlen;
 
 	if ((f = fopen(fn, "r")) == NULL)
 		err(2, "%s", fn);
 	if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) {
 		fclose(f);
 		return;
 	}
 	len = 0;
 	line = NULL;
 	while ((rlen = getline(&line, &len, f)) != -1)
 		add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen);
 	free(line);
 	if (ferror(f))
 		err(2, "%s", fn);
 	fclose(f);
 }
 
 static inline const char *
 init_color(const char *d)
 {
 	char *c;
 
 	c = getenv("GREP_COLOR");
 	return (c != NULL && c[0] != '\0' ? c : d);
 }
 
 int
 main(int argc, char *argv[])
 {
 	char **aargv, **eargv, *eopts;
 	char *ep;
 	const char *pn;
 	unsigned long long l;
 	unsigned int aargc, eargc, i;
 	int c, lastc, needpattern, newarg, prevoptind;
 
 	setlocale(LC_ALL, "");
 
 #ifndef WITHOUT_NLS
 	catalog = catopen("grep", NL_CAT_LOCALE);
 #endif
 
 	/* Check what is the program name of the binary.  In this
 	   way we can have all the funcionalities in one binary
 	   without the need of scripting and using ugly hacks. */
 	pn = getprogname();
 	if (pn[0] == 'b' && pn[1] == 'z') {
 		filebehave = FILE_BZIP;
 		pn += 2;
 	} else if (pn[0] == 'x' && pn[1] == 'z') {
 		filebehave = FILE_XZ;
 		pn += 2;
 	} else if (pn[0] == 'l' && pn[1] == 'z') {
 		filebehave = FILE_LZMA;
 		pn += 2;
 	} else if (pn[0] == 'z') {
 		filebehave = FILE_GZIP;
 		pn += 1;
 	}
 	switch (pn[0]) {
 	case 'e':
 		grepbehave = GREP_EXTENDED;
 		break;
 	case 'f':
 		grepbehave = GREP_FIXED;
 		break;
 	}
 
 	lastc = '\0';
 	newarg = 1;
 	prevoptind = 1;
 	needpattern = 1;
 
 	eopts = getenv("GREP_OPTIONS");
 
 	/* support for extra arguments in GREP_OPTIONS */
 	eargc = 0;
 	if (eopts != NULL && eopts[0] != '\0') {
 		char *str;
 
 		/* make an estimation of how many extra arguments we have */
 		for (unsigned int j = 0; j < strlen(eopts); j++)
 			if (eopts[j] == ' ')
 				eargc++;
 
 		eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
 
 		eargc = 0;
 		/* parse extra arguments */
 		while ((str = strsep(&eopts, " ")) != NULL)
 			if (str[0] != '\0')
 				eargv[eargc++] = grep_strdup(str);
 
 		aargv = (char **)grep_calloc(eargc + argc + 1,
 		    sizeof(char *));
 
 		aargv[0] = argv[0];
 		for (i = 0; i < eargc; i++)
 			aargv[i + 1] = eargv[i];
 		for (int j = 1; j < argc; j++, i++)
 			aargv[i + 1] = argv[j];
 
 		aargc = eargc + argc;
 	} else {
 		aargv = argv;
 		aargc = argc;
 	}
 
 	while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
 	    -1)) {
 		switch (c) {
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
 			if (newarg || !isdigit(lastc))
 				Aflag = 0;
 			else if (Aflag > LLONG_MAX / 10) {
 				errno = ERANGE;
 				err(2, NULL);
 			}
 			Aflag = Bflag = (Aflag * 10) + (c - '0');
 			break;
 		case 'C':
 			if (optarg == NULL) {
 				Aflag = Bflag = 2;
 				break;
 			}
 			/* FALLTHROUGH */
 		case 'A':
 			/* FALLTHROUGH */
 		case 'B':
 			errno = 0;
 			l = strtoull(optarg, &ep, 10);
 			if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
 			    ((errno == EINVAL) && (l == 0)))
 				err(2, NULL);
 			else if (ep[0] != '\0') {
 				errno = EINVAL;
 				err(2, NULL);
 			}
 			if (c == 'A')
 				Aflag = l;
 			else if (c == 'B')
 				Bflag = l;
 			else
 				Aflag = Bflag = l;
 			break;
 		case 'a':
 			binbehave = BINFILE_TEXT;
 			break;
 		case 'b':
 			bflag = true;
 			break;
 		case 'c':
 			cflag = true;
 			break;
 		case 'D':
 			if (strcasecmp(optarg, "skip") == 0)
 				devbehave = DEV_SKIP;
 			else if (strcasecmp(optarg, "read") == 0)
 				devbehave = DEV_READ;
 			else
 				errx(2, getstr(3), "--devices");
 			break;
 		case 'd':
 			if (strcasecmp("recurse", optarg) == 0) {
 				Hflag = true;
 				dirbehave = DIR_RECURSE;
 			} else if (strcasecmp("skip", optarg) == 0)
 				dirbehave = DIR_SKIP;
 			else if (strcasecmp("read", optarg) == 0)
 				dirbehave = DIR_READ;
 			else
 				errx(2, getstr(3), "--directories");
 			break;
 		case 'E':
 			grepbehave = GREP_EXTENDED;
 			break;
 		case 'e':
 			{
 				char *token;
 				char *string = optarg;
 
 				while ((token = strsep(&string, "\n")) != NULL)
 					add_pattern(token, strlen(token));
 			}
 			needpattern = 0;
 			break;
 		case 'F':
 			grepbehave = GREP_FIXED;
 			break;
 		case 'f':
 			read_patterns(optarg);
 			needpattern = 0;
 			break;
 		case 'G':
 			grepbehave = GREP_BASIC;
 			break;
 		case 'H':
 			Hflag = true;
 			break;
 		case 'h':
 			Hflag = false;
 			hflag = true;
 			break;
 		case 'I':
 			binbehave = BINFILE_SKIP;
 			break;
 		case 'i':
 		case 'y':
 			iflag =  true;
 			cflags |= REG_ICASE;
 			break;
 		case 'J':
 #ifdef WITHOUT_BZIP2
 			errno = EOPNOTSUPP;
 			err(2, "bzip2 support was disabled at compile-time");
 #endif
 			filebehave = FILE_BZIP;
 			break;
 		case 'L':
 			lflag = false;
 			Lflag = true;
 			break;
 		case 'l':
 			Lflag = false;
 			lflag = true;
 			break;
 		case 'm':
 			mflag = true;
 			errno = 0;
 			mlimit = mcount = strtoll(optarg, &ep, 10);
 			if (((errno == ERANGE) && (mcount == LLONG_MAX)) ||
 			    ((errno == EINVAL) && (mcount == 0)))
 				err(2, NULL);
 			else if (ep[0] != '\0') {
 				errno = EINVAL;
 				err(2, NULL);
 			}
 			break;
 		case 'M':
 			filebehave = FILE_LZMA;
 			break;
 		case 'n':
 			nflag = true;
 			break;
 		case 'O':
 			linkbehave = LINK_EXPLICIT;
 			break;
 		case 'o':
 			oflag = true;
 			cflags &= ~REG_NOSUB;
 			break;
 		case 'p':
 			linkbehave = LINK_SKIP;
 			break;
 		case 'q':
 			qflag = true;
 			break;
 		case 'S':
 			linkbehave = LINK_READ;
 			break;
 		case 'R':
 		case 'r':
 			dirbehave = DIR_RECURSE;
 			Hflag = true;
 			break;
 		case 's':
 			sflag = true;
 			break;
 		case 'U':
 			binbehave = BINFILE_BIN;
 			break;
 		case 'u':
 		case MMAP_OPT:
 			filebehave = FILE_MMAP;
 			break;
 		case 'V':
 			printf(getstr(9), getprogname(), VERSION);
 			exit(0);
 		case 'v':
 			vflag = true;
 			break;
 		case 'w':
 			wflag = true;
 			cflags &= ~REG_NOSUB;
 			break;
 		case 'x':
 			xflag = true;
 			cflags &= ~REG_NOSUB;
 			break;
 		case 'X':
 			filebehave = FILE_XZ;
 			break;
 		case 'Z':
 			filebehave = FILE_GZIP;
 			break;
 		case BIN_OPT:
 			if (strcasecmp("binary", optarg) == 0)
 				binbehave = BINFILE_BIN;
 			else if (strcasecmp("without-match", optarg) == 0)
 				binbehave = BINFILE_SKIP;
 			else if (strcasecmp("text", optarg) == 0)
 				binbehave = BINFILE_TEXT;
 			else
 				errx(2, getstr(3), "--binary-files");
 			break;
 		case COLOR_OPT:
 			color = NULL;
 			if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
 			    strcasecmp("tty", optarg) == 0 ||
 			    strcasecmp("if-tty", optarg) == 0) {
 				char *term;
 
 				term = getenv("TERM");
 				if (isatty(STDOUT_FILENO) && term != NULL &&
 				    strcasecmp(term, "dumb") != 0)
 					color = init_color("01;31");
 			} else if (strcasecmp("always", optarg) == 0 ||
 			    strcasecmp("yes", optarg) == 0 ||
 			    strcasecmp("force", optarg) == 0) {
 				color = init_color("01;31");
 			} else if (strcasecmp("never", optarg) != 0 &&
 			    strcasecmp("none", optarg) != 0 &&
 			    strcasecmp("no", optarg) != 0)
 				errx(2, getstr(3), "--color");
 			cflags &= ~REG_NOSUB;
 			break;
 		case LABEL_OPT:
 			label = optarg;
 			break;
 		case LINEBUF_OPT:
 			lbflag = true;
 			break;
 		case NULL_OPT:
 			nullflag = true;
 			break;
 		case R_INCLUDE_OPT:
 			finclude = true;
 			add_fpattern(optarg, INCL_PAT);
 			break;
 		case R_EXCLUDE_OPT:
 			fexclude = true;
 			add_fpattern(optarg, EXCL_PAT);
 			break;
 		case R_DINCLUDE_OPT:
 			dinclude = true;
 			add_dpattern(optarg, INCL_PAT);
 			break;
 		case R_DEXCLUDE_OPT:
 			dexclude = true;
 			add_dpattern(optarg, EXCL_PAT);
 			break;
 		case HELP_OPT:
 		default:
 			usage();
 		}
 		lastc = c;
 		newarg = optind != prevoptind;
 		prevoptind = optind;
 	}
 	aargc -= optind;
 	aargv += optind;
 
 	/* Empty pattern file matches nothing */
 	if (!needpattern && (patterns == 0))
 		exit(1);
 
 	/* Fail if we don't have any pattern */
 	if (aargc == 0 && needpattern)
 		usage();
 
 	/* Process patterns from command line */
 	if (aargc != 0 && needpattern) {
 		char *token;
 		char *string = *aargv;
 
 		while ((token = strsep(&string, "\n")) != NULL)
 			add_pattern(token, strlen(token));
 		--aargc;
 		++aargv;
 	}
 
 	switch (grepbehave) {
 	case GREP_BASIC:
 		break;
 	case GREP_FIXED:
 		/* XXX: header mess, REG_LITERAL not defined in gnu/regex.h */
 		cflags |= 0020;
 		break;
 	case GREP_EXTENDED:
 		cflags |= REG_EXTENDED;
 		break;
 	default:
 		/* NOTREACHED */
 		usage();
 	}
 
 	fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
 	r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
 
 	/* Check if cheating is allowed (always is for fgrep). */
 	for (i = 0; i < patterns; ++i) {
 		if (fastncomp(&fg_pattern[i], pattern[i].pat,
 		    pattern[i].len, cflags) != 0) {
 			/* Fall back to full regex library */
 			c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
 			if (c != 0) {
 				regerror(c, &r_pattern[i], re_error,
 				    RE_ERROR_BUF);
 				errx(2, "%s", re_error);
 			}
 		}
 	}
 
 	if (lbflag)
 		setlinebuf(stdout);
 
 	if ((aargc == 0 || aargc == 1) && !Hflag)
 		hflag = true;
 
 	if (aargc == 0)
 		exit(!procfile("-"));
 
 	if (dirbehave == DIR_RECURSE)
 		c = grep_tree(aargv);
 	else
 		for (c = 0; aargc--; ++aargv) {
 			if ((finclude || fexclude) && !file_matching(*aargv))
 				continue;
 			c+= procfile(*aargv);
 		}
 
 #ifndef WITHOUT_NLS
 	catclose(catalog);
 #endif
 
 	/* Find out the correct return value according to the
 	   results and the command line option. */
 	exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1));
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/gzip/gzip.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/gzip/gzip.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/gzip/gzip.c	(revision 303642)
@@ -1,2174 +1,2174 @@
 /*	$NetBSD: gzip.c,v 1.109 2015/10/27 07:36:18 mrg Exp $	*/
 
 /*-
  * Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 #ifndef lint
 __COPYRIGHT("@(#) Copyright (c) 1997, 1998, 2003, 2004, 2006\
  Matthew R. Green.  All rights reserved.");
 __FBSDID("$FreeBSD$");
 #endif /* not lint */
 
 /*
  * gzip.c -- GPL free gzip using zlib.
  *
  * RFC 1950 covers the zlib format
  * RFC 1951 covers the deflate format
  * RFC 1952 covers the gzip format
  *
  * TODO:
  *	- use mmap where possible
  *	- make bzip2/compress -v/-t/-l support work as well as possible
  */
 
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 
 #include <inttypes.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <zlib.h>
 #include <fts.h>
 #include <libgen.h>
 #include <stdarg.h>
 #include <getopt.h>
 #include <time.h>
 
 /* what type of file are we dealing with */
 enum filetype {
 	FT_GZIP,
 #ifndef NO_BZIP2_SUPPORT
 	FT_BZIP2,
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	FT_Z,
 #endif
 #ifndef NO_PACK_SUPPORT
 	FT_PACK,
 #endif
 #ifndef NO_XZ_SUPPORT
 	FT_XZ,
 #endif
 	FT_LAST,
 	FT_UNKNOWN
 };
 
 #ifndef NO_BZIP2_SUPPORT
 #include <bzlib.h>
 
 #define BZ2_SUFFIX	".bz2"
 #define BZIP2_MAGIC	"\102\132\150"
 #endif
 
 #ifndef NO_COMPRESS_SUPPORT
 #define Z_SUFFIX	".Z"
 #define Z_MAGIC		"\037\235"
 #endif
 
 #ifndef NO_PACK_SUPPORT
 #define PACK_MAGIC	"\037\036"
 #endif
 
 #ifndef NO_XZ_SUPPORT
 #include <lzma.h>
 #define XZ_SUFFIX	".xz"
 #define XZ_MAGIC	"\3757zXZ"
 #endif
 
 #define GZ_SUFFIX	".gz"
 
 #define BUFLEN		(64 * 1024)
 
 #define GZIP_MAGIC0	0x1F
 #define GZIP_MAGIC1	0x8B
 #define GZIP_OMAGIC1	0x9E
 
 #define GZIP_TIMESTAMP	(off_t)4
 #define GZIP_ORIGNAME	(off_t)10
 
 #define HEAD_CRC	0x02
 #define EXTRA_FIELD	0x04
 #define ORIG_NAME	0x08
 #define COMMENT		0x10
 
 #define OS_CODE		3	/* Unix */
 
 typedef struct {
     const char	*zipped;
     int		ziplen;
     const char	*normal;	/* for unzip - must not be longer than zipped */
 } suffixes_t;
 static suffixes_t suffixes[] = {
 #define	SUFFIX(Z, N) {Z, sizeof Z - 1, N}
 	SUFFIX(GZ_SUFFIX,	""),	/* Overwritten by -S .xxx */
 #ifndef SMALL
 	SUFFIX(GZ_SUFFIX,	""),
 	SUFFIX(".z",		""),
 	SUFFIX("-gz",		""),
 	SUFFIX("-z",		""),
 	SUFFIX("_z",		""),
 	SUFFIX(".taz",		".tar"),
 	SUFFIX(".tgz",		".tar"),
 #ifndef NO_BZIP2_SUPPORT
 	SUFFIX(BZ2_SUFFIX,	""),
 	SUFFIX(".tbz",		".tar"),
 	SUFFIX(".tbz2",		".tar"),
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	SUFFIX(Z_SUFFIX,	""),
 #endif
 #ifndef NO_XZ_SUPPORT
 	SUFFIX(XZ_SUFFIX,	""),
 #endif
 	SUFFIX(GZ_SUFFIX,	""),	/* Overwritten by -S "" */
 #endif /* SMALL */
 #undef SUFFIX
 };
-#define NUM_SUFFIXES (sizeof suffixes / sizeof suffixes[0])
+#define NUM_SUFFIXES (nitems(suffixes))
 #define SUFFIX_MAXLEN	30
 
 static	const char	gzip_version[] = "FreeBSD gzip 20150413";
 
 #ifndef SMALL
 static	const char	gzip_copyright[] = \
 "   Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green\n"
 "   All rights reserved.\n"
 "\n"
 "   Redistribution and use in source and binary forms, with or without\n"
 "   modification, are permitted provided that the following conditions\n"
 "   are met:\n"
 "   1. Redistributions of source code must retain the above copyright\n"
 "      notice, this list of conditions and the following disclaimer.\n"
 "   2. Redistributions in binary form must reproduce the above copyright\n"
 "      notice, this list of conditions and the following disclaimer in the\n"
 "      documentation and/or other materials provided with the distribution.\n"
 "\n"
 "   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n"
 "   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n"
 "   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n"
 "   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n"
 "   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n"
 "   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n"
 "   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\n"
 "   AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n"
 "   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n"
 "   OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n"
 "   SUCH DAMAGE.";
 #endif
 
 static	int	cflag;			/* stdout mode */
 static	int	dflag;			/* decompress mode */
 static	int	lflag;			/* list mode */
 static	int	numflag = 6;		/* gzip -1..-9 value */
 
 #ifndef SMALL
 static	int	fflag;			/* force mode */
 static	int	kflag;			/* don't delete input files */
 static	int	nflag;			/* don't save name/timestamp */
 static	int	Nflag;			/* don't restore name/timestamp */
 static	int	qflag;			/* quiet mode */
 static	int	rflag;			/* recursive mode */
 static	int	tflag;			/* test */
 static	int	vflag;			/* verbose mode */
 static	const char *remove_file = NULL;	/* file to be removed upon SIGINT */
 #else
 #define		qflag	0
 #define		tflag	0
 #endif
 
 static	int	exit_value = 0;		/* exit value */
 
 static	char	*infile;		/* name of file coming in */
 
 static	void	maybe_err(const char *fmt, ...) __printflike(1, 2) __dead2;
 #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) ||	\
     !defined(NO_XZ_SUPPORT)
 static	void	maybe_errx(const char *fmt, ...) __printflike(1, 2) __dead2;
 #endif
 static	void	maybe_warn(const char *fmt, ...) __printflike(1, 2);
 static	void	maybe_warnx(const char *fmt, ...) __printflike(1, 2);
 static	enum filetype file_gettype(u_char *);
 #ifdef SMALL
 #define gz_compress(if, of, sz, fn, tm) gz_compress(if, of, sz)
 #endif
 static	off_t	gz_compress(int, int, off_t *, const char *, uint32_t);
 static	off_t	gz_uncompress(int, int, char *, size_t, off_t *, const char *);
 static	off_t	file_compress(char *, char *, size_t);
 static	off_t	file_uncompress(char *, char *, size_t);
 static	void	handle_pathname(char *);
 static	void	handle_file(char *, struct stat *);
 static	void	handle_stdin(void);
 static	void	handle_stdout(void);
 static	void	print_ratio(off_t, off_t, FILE *);
 static	void	print_list(int fd, off_t, const char *, time_t);
 static	void	usage(void) __dead2;
 static	void	display_version(void) __dead2;
 #ifndef SMALL
 static	void	display_license(void);
 static	void	sigint_handler(int);
 #endif
 static	const suffixes_t *check_suffix(char *, int);
 static	ssize_t	read_retry(int, void *, size_t);
 
 #ifdef SMALL
 #define unlink_input(f, sb) unlink(f)
 #else
 static	off_t	cat_fd(unsigned char *, size_t, off_t *, int fd);
 static	void	prepend_gzip(char *, int *, char ***);
 static	void	handle_dir(char *);
 static	void	print_verbage(const char *, const char *, off_t, off_t);
 static	void	print_test(const char *, int);
 static	void	copymodes(int fd, const struct stat *, const char *file);
 static	int	check_outfile(const char *outfile);
 #endif
 
 #ifndef NO_BZIP2_SUPPORT
 static	off_t	unbzip2(int, int, char *, size_t, off_t *);
 #endif
 
 #ifndef NO_COMPRESS_SUPPORT
 static	FILE 	*zdopen(int);
 static	off_t	zuncompress(FILE *, FILE *, char *, size_t, off_t *);
 #endif
 
 #ifndef NO_PACK_SUPPORT
 static	off_t	unpack(int, int, char *, size_t, off_t *);
 #endif
 
 #ifndef NO_XZ_SUPPORT
 static	off_t	unxz(int, int, char *, size_t, off_t *);
 #endif
 
 #ifdef SMALL
 #define getopt_long(a,b,c,d,e) getopt(a,b,c)
 #else
 static const struct option longopts[] = {
 	{ "stdout",		no_argument,		0,	'c' },
 	{ "to-stdout",		no_argument,		0,	'c' },
 	{ "decompress",		no_argument,		0,	'd' },
 	{ "uncompress",		no_argument,		0,	'd' },
 	{ "force",		no_argument,		0,	'f' },
 	{ "help",		no_argument,		0,	'h' },
 	{ "keep",		no_argument,		0,	'k' },
 	{ "list",		no_argument,		0,	'l' },
 	{ "no-name",		no_argument,		0,	'n' },
 	{ "name",		no_argument,		0,	'N' },
 	{ "quiet",		no_argument,		0,	'q' },
 	{ "recursive",		no_argument,		0,	'r' },
 	{ "suffix",		required_argument,	0,	'S' },
 	{ "test",		no_argument,		0,	't' },
 	{ "verbose",		no_argument,		0,	'v' },
 	{ "version",		no_argument,		0,	'V' },
 	{ "fast",		no_argument,		0,	'1' },
 	{ "best",		no_argument,		0,	'9' },
 	{ "ascii",		no_argument,		0,	'a' },
 	{ "license",		no_argument,		0,	'L' },
 	{ NULL,			no_argument,		0,	0 },
 };
 #endif
 
 int
 main(int argc, char **argv)
 {
 	const char *progname = getprogname();
 #ifndef SMALL
 	char *gzip;
 	int len;
 #endif
 	int ch;
 
 #ifndef SMALL
 	if ((gzip = getenv("GZIP")) != NULL)
 		prepend_gzip(gzip, &argc, &argv);
 	signal(SIGINT, sigint_handler);
 #endif
 
 	/*
 	 * XXX
 	 * handle being called `gunzip', `zcat' and `gzcat'
 	 */
 	if (strcmp(progname, "gunzip") == 0)
 		dflag = 1;
 	else if (strcmp(progname, "zcat") == 0 ||
 		 strcmp(progname, "gzcat") == 0)
 		dflag = cflag = 1;
 
 #ifdef SMALL
 #define OPT_LIST "123456789cdhlV"
 #else
 #define OPT_LIST "123456789acdfhklLNnqrS:tVv"
 #endif
 
 	while ((ch = getopt_long(argc, argv, OPT_LIST, longopts, NULL)) != -1) {
 		switch (ch) {
 		case '1': case '2': case '3':
 		case '4': case '5': case '6':
 		case '7': case '8': case '9':
 			numflag = ch - '0';
 			break;
 		case 'c':
 			cflag = 1;
 			break;
 		case 'd':
 			dflag = 1;
 			break;
 		case 'l':
 			lflag = 1;
 			dflag = 1;
 			break;
 		case 'V':
 			display_version();
 			/* NOTREACHED */
 #ifndef SMALL
 		case 'a':
 			fprintf(stderr, "%s: option --ascii ignored on this system\n", progname);
 			break;
 		case 'f':
 			fflag = 1;
 			break;
 		case 'k':
 			kflag = 1;
 			break;
 		case 'L':
 			display_license();
 			/* NOT REACHED */
 		case 'N':
 			nflag = 0;
 			Nflag = 1;
 			break;
 		case 'n':
 			nflag = 1;
 			Nflag = 0;
 			break;
 		case 'q':
 			qflag = 1;
 			break;
 		case 'r':
 			rflag = 1;
 			break;
 		case 'S':
 			len = strlen(optarg);
 			if (len != 0) {
 				if (len > SUFFIX_MAXLEN)
 					errx(1, "incorrect suffix: '%s': too long", optarg);
 				suffixes[0].zipped = optarg;
 				suffixes[0].ziplen = len;
 			} else {
 				suffixes[NUM_SUFFIXES - 1].zipped = "";
 				suffixes[NUM_SUFFIXES - 1].ziplen = 0;
 			}
 			break;
 		case 't':
 			cflag = 1;
 			tflag = 1;
 			dflag = 1;
 			break;
 		case 'v':
 			vflag = 1;
 			break;
 #endif
 		default:
 			usage();
 			/* NOTREACHED */
 		}
 	}
 	argv += optind;
 	argc -= optind;
 
 	if (argc == 0) {
 		if (dflag)	/* stdin mode */
 			handle_stdin();
 		else		/* stdout mode */
 			handle_stdout();
 	} else {
 		do {
 			handle_pathname(argv[0]);
 		} while (*++argv);
 	}
 #ifndef SMALL
 	if (qflag == 0 && lflag && argc > 1)
 		print_list(-1, 0, "(totals)", 0);
 #endif
 	exit(exit_value);
 }
 
 /* maybe print a warning */
 void
 maybe_warn(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarn(fmt, ap);
 		va_end(ap);
 	}
 	if (exit_value == 0)
 		exit_value = 1;
 }
 
 /* ... without an errno. */
 void
 maybe_warnx(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarnx(fmt, ap);
 		va_end(ap);
 	}
 	if (exit_value == 0)
 		exit_value = 1;
 }
 
 /* maybe print an error */
 void
 maybe_err(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarn(fmt, ap);
 		va_end(ap);
 	}
 	exit(2);
 }
 
 #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) ||	\
     !defined(NO_XZ_SUPPORT)
 /* ... without an errno. */
 void
 maybe_errx(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarnx(fmt, ap);
 		va_end(ap);
 	}
 	exit(2);
 }
 #endif
 
 #ifndef SMALL
 /* split up $GZIP and prepend it to the argument list */
 static void
 prepend_gzip(char *gzip, int *argc, char ***argv)
 {
 	char *s, **nargv, **ac;
 	int nenvarg = 0, i;
 
 	/* scan how many arguments there are */
 	for (s = gzip;;) {
 		while (*s == ' ' || *s == '\t')
 			s++;
 		if (*s == 0)
 			goto count_done;
 		nenvarg++;
 		while (*s != ' ' && *s != '\t')
 			if (*s++ == 0)
 				goto count_done;
 	}
 count_done:
 	/* punt early */
 	if (nenvarg == 0)
 		return;
 
 	*argc += nenvarg;
 	ac = *argv;
 
 	nargv = (char **)malloc((*argc + 1) * sizeof(char *));
 	if (nargv == NULL)
 		maybe_err("malloc");
 
 	/* stash this away */
 	*argv = nargv;
 
 	/* copy the program name first */
 	i = 0;
 	nargv[i++] = *(ac++);
 
 	/* take a copy of $GZIP and add it to the array */
 	s = strdup(gzip);
 	if (s == NULL)
 		maybe_err("strdup");
 	for (;;) {
 		/* Skip whitespaces. */
 		while (*s == ' ' || *s == '\t')
 			s++;
 		if (*s == 0)
 			goto copy_done;
 		nargv[i++] = s;
 		/* Find the end of this argument. */
 		while (*s != ' ' && *s != '\t')
 			if (*s++ == 0)
 				/* Argument followed by NUL. */
 				goto copy_done;
 		/* Terminate by overwriting ' ' or '\t' with NUL. */
 		*s++ = 0;
 	}
 copy_done:
 
 	/* copy the original arguments and a NULL */
 	while (*ac)
 		nargv[i++] = *(ac++);
 	nargv[i] = NULL;
 }
 #endif
 
 /* compress input to output. Return bytes read, -1 on error */
 static off_t
 gz_compress(int in, int out, off_t *gsizep, const char *origname, uint32_t mtime)
 {
 	z_stream z;
 	char *outbufp, *inbufp;
 	off_t in_tot = 0, out_tot = 0;
 	ssize_t in_size;
 	int i, error;
 	uLong crc;
 #ifdef SMALL
 	static char header[] = { GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED, 0,
 				 0, 0, 0, 0,
 				 0, OS_CODE };
 #endif
 
 	outbufp = malloc(BUFLEN);
 	inbufp = malloc(BUFLEN);
 	if (outbufp == NULL || inbufp == NULL) {
 		maybe_err("malloc failed");
 		goto out;
 	}
 
 	memset(&z, 0, sizeof z);
 	z.zalloc = Z_NULL;
 	z.zfree = Z_NULL;
 	z.opaque = 0;
 
 #ifdef SMALL
 	memcpy(outbufp, header, sizeof header);
 	i = sizeof header;
 #else
 	if (nflag != 0) {
 		mtime = 0;
 		origname = "";
 	}
 
 	i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c%c%c%s", 
 		     GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED,
 		     *origname ? ORIG_NAME : 0,
 		     mtime & 0xff,
 		     (mtime >> 8) & 0xff,
 		     (mtime >> 16) & 0xff,
 		     (mtime >> 24) & 0xff,
 		     numflag == 1 ? 4 : numflag == 9 ? 2 : 0,
 		     OS_CODE, origname);
 	if (i >= BUFLEN)     
 		/* this need PATH_MAX > BUFLEN ... */
 		maybe_err("snprintf");
 	if (*origname)
 		i++;
 #endif
 
 	z.next_out = (unsigned char *)outbufp + i;
 	z.avail_out = BUFLEN - i;
 
 	error = deflateInit2(&z, numflag, Z_DEFLATED,
 			     (-MAX_WBITS), 8, Z_DEFAULT_STRATEGY);
 	if (error != Z_OK) {
 		maybe_warnx("deflateInit2 failed");
 		in_tot = -1;
 		goto out;
 	}
 
 	crc = crc32(0L, Z_NULL, 0);
 	for (;;) {
 		if (z.avail_out == 0) {
 			if (write(out, outbufp, BUFLEN) != BUFLEN) {
 				maybe_warn("write");
 				out_tot = -1;
 				goto out;
 			}
 
 			out_tot += BUFLEN;
 			z.next_out = (unsigned char *)outbufp;
 			z.avail_out = BUFLEN;
 		}
 
 		if (z.avail_in == 0) {
 			in_size = read(in, inbufp, BUFLEN);
 			if (in_size < 0) {
 				maybe_warn("read");
 				in_tot = -1;
 				goto out;
 			}
 			if (in_size == 0)
 				break;
 
 			crc = crc32(crc, (const Bytef *)inbufp, (unsigned)in_size);
 			in_tot += in_size;
 			z.next_in = (unsigned char *)inbufp;
 			z.avail_in = in_size;
 		}
 
 		error = deflate(&z, Z_NO_FLUSH);
 		if (error != Z_OK && error != Z_STREAM_END) {
 			maybe_warnx("deflate failed");
 			in_tot = -1;
 			goto out;
 		}
 	}
 
 	/* clean up */
 	for (;;) {
 		size_t len;
 		ssize_t w;
 
 		error = deflate(&z, Z_FINISH);
 		if (error != Z_OK && error != Z_STREAM_END) {
 			maybe_warnx("deflate failed");
 			in_tot = -1;
 			goto out;
 		}
 
 		len = (char *)z.next_out - outbufp;
 
 		w = write(out, outbufp, len);
 		if (w == -1 || (size_t)w != len) {
 			maybe_warn("write");
 			out_tot = -1;
 			goto out;
 		}
 		out_tot += len;
 		z.next_out = (unsigned char *)outbufp;
 		z.avail_out = BUFLEN;
 
 		if (error == Z_STREAM_END)
 			break;
 	}
 
 	if (deflateEnd(&z) != Z_OK) {
 		maybe_warnx("deflateEnd failed");
 		in_tot = -1;
 		goto out;
 	}
 
 	i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c", 
 		 (int)crc & 0xff,
 		 (int)(crc >> 8) & 0xff,
 		 (int)(crc >> 16) & 0xff,
 		 (int)(crc >> 24) & 0xff,
 		 (int)in_tot & 0xff,
 		 (int)(in_tot >> 8) & 0xff,
 		 (int)(in_tot >> 16) & 0xff,
 		 (int)(in_tot >> 24) & 0xff);
 	if (i != 8)
 		maybe_err("snprintf");
 	if (write(out, outbufp, i) != i) {
 		maybe_warn("write");
 		in_tot = -1;
 	} else
 		out_tot += i;
 
 out:
 	if (inbufp != NULL)
 		free(inbufp);
 	if (outbufp != NULL)
 		free(outbufp);
 	if (gsizep)
 		*gsizep = out_tot;
 	return in_tot;
 }
 
 /*
  * uncompress input to output then close the input.  return the
  * uncompressed size written, and put the compressed sized read
  * into `*gsizep'.
  */
 static off_t
 gz_uncompress(int in, int out, char *pre, size_t prelen, off_t *gsizep,
 	      const char *filename)
 {
 	z_stream z;
 	char *outbufp, *inbufp;
 	off_t out_tot = -1, in_tot = 0;
 	uint32_t out_sub_tot = 0;
 	enum {
 		GZSTATE_MAGIC0,
 		GZSTATE_MAGIC1,
 		GZSTATE_METHOD,
 		GZSTATE_FLAGS,
 		GZSTATE_SKIPPING,
 		GZSTATE_EXTRA,
 		GZSTATE_EXTRA2,
 		GZSTATE_EXTRA3,
 		GZSTATE_ORIGNAME,
 		GZSTATE_COMMENT,
 		GZSTATE_HEAD_CRC1,
 		GZSTATE_HEAD_CRC2,
 		GZSTATE_INIT,
 		GZSTATE_READ,
 		GZSTATE_CRC,
 		GZSTATE_LEN,
 	} state = GZSTATE_MAGIC0;
 	int flags = 0, skip_count = 0;
 	int error = Z_STREAM_ERROR, done_reading = 0;
 	uLong crc = 0;
 	ssize_t wr;
 	int needmore = 0;
 
 #define ADVANCE()       { z.next_in++; z.avail_in--; }
 
 	if ((outbufp = malloc(BUFLEN)) == NULL) {
 		maybe_err("malloc failed");
 		goto out2;
 	}
 	if ((inbufp = malloc(BUFLEN)) == NULL) {
 		maybe_err("malloc failed");
 		goto out1;
 	}
 
 	memset(&z, 0, sizeof z);
 	z.avail_in = prelen;
 	z.next_in = (unsigned char *)pre;
 	z.avail_out = BUFLEN;
 	z.next_out = (unsigned char *)outbufp;
 	z.zalloc = NULL;
 	z.zfree = NULL;
 	z.opaque = 0;
 
 	in_tot = prelen;
 	out_tot = 0;
 
 	for (;;) {
 		if ((z.avail_in == 0 || needmore) && done_reading == 0) {
 			ssize_t in_size;
 
 			if (z.avail_in > 0) {
 				memmove(inbufp, z.next_in, z.avail_in);
 			}
 			z.next_in = (unsigned char *)inbufp;
 			in_size = read(in, z.next_in + z.avail_in,
 			    BUFLEN - z.avail_in);
 
 			if (in_size == -1) {
 				maybe_warn("failed to read stdin");
 				goto stop_and_fail;
 			} else if (in_size == 0) {
 				done_reading = 1;
 			}
 
 			z.avail_in += in_size;
 			needmore = 0;
 
 			in_tot += in_size;
 		}
 		if (z.avail_in == 0) {
 			if (done_reading && state != GZSTATE_MAGIC0) {
 				maybe_warnx("%s: unexpected end of file",
 					    filename);
 				goto stop_and_fail;
 			}
 			goto stop;
 		}
 		switch (state) {
 		case GZSTATE_MAGIC0:
 			if (*z.next_in != GZIP_MAGIC0) {
 				if (in_tot > 0) {
 					maybe_warnx("%s: trailing garbage "
 						    "ignored", filename);
 					exit_value = 2;
 					goto stop;
 				}
 				maybe_warnx("input not gziped (MAGIC0)");
 				goto stop_and_fail;
 			}
 			ADVANCE();
 			state++;
 			out_sub_tot = 0;
 			crc = crc32(0L, Z_NULL, 0);
 			break;
 
 		case GZSTATE_MAGIC1:
 			if (*z.next_in != GZIP_MAGIC1 &&
 			    *z.next_in != GZIP_OMAGIC1) {
 				maybe_warnx("input not gziped (MAGIC1)");
 				goto stop_and_fail;
 			}
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_METHOD:
 			if (*z.next_in != Z_DEFLATED) {
 				maybe_warnx("unknown compression method");
 				goto stop_and_fail;
 			}
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_FLAGS:
 			flags = *z.next_in;
 			ADVANCE();
 			skip_count = 6;
 			state++;
 			break;
 
 		case GZSTATE_SKIPPING:
 			if (skip_count > 0) {
 				skip_count--;
 				ADVANCE();
 			} else
 				state++;
 			break;
 
 		case GZSTATE_EXTRA:
 			if ((flags & EXTRA_FIELD) == 0) {
 				state = GZSTATE_ORIGNAME;
 				break;
 			}
 			skip_count = *z.next_in;
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_EXTRA2:
 			skip_count |= ((*z.next_in) << 8);
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_EXTRA3:
 			if (skip_count > 0) {
 				skip_count--;
 				ADVANCE();
 			} else
 				state++;
 			break;
 
 		case GZSTATE_ORIGNAME:
 			if ((flags & ORIG_NAME) == 0) {
 				state++;
 				break;
 			}
 			if (*z.next_in == 0)
 				state++;
 			ADVANCE();
 			break;
 
 		case GZSTATE_COMMENT:
 			if ((flags & COMMENT) == 0) {
 				state++;
 				break;
 			}
 			if (*z.next_in == 0)
 				state++;
 			ADVANCE();
 			break;
 
 		case GZSTATE_HEAD_CRC1:
 			if (flags & HEAD_CRC)
 				skip_count = 2;
 			else
 				skip_count = 0;
 			state++;
 			break;
 
 		case GZSTATE_HEAD_CRC2:
 			if (skip_count > 0) {
 				skip_count--;
 				ADVANCE();
 			} else
 				state++;
 			break;
 
 		case GZSTATE_INIT:
 			if (inflateInit2(&z, -MAX_WBITS) != Z_OK) {
 				maybe_warnx("failed to inflateInit");
 				goto stop_and_fail;
 			}
 			state++;
 			break;
 
 		case GZSTATE_READ:
 			error = inflate(&z, Z_FINISH);
 			switch (error) {
 			/* Z_BUF_ERROR goes with Z_FINISH... */
 			case Z_BUF_ERROR:
 				if (z.avail_out > 0 && !done_reading)
 					continue;
 
 			case Z_STREAM_END:
 			case Z_OK:
 				break;
 
 			case Z_NEED_DICT:
 				maybe_warnx("Z_NEED_DICT error");
 				goto stop_and_fail;
 			case Z_DATA_ERROR:
 				maybe_warnx("data stream error");
 				goto stop_and_fail;
 			case Z_STREAM_ERROR:
 				maybe_warnx("internal stream error");
 				goto stop_and_fail;
 			case Z_MEM_ERROR:
 				maybe_warnx("memory allocation error");
 				goto stop_and_fail;
 
 			default:
 				maybe_warn("unknown error from inflate(): %d",
 				    error);
 			}
 			wr = BUFLEN - z.avail_out;
 
 			if (wr != 0) {
 				crc = crc32(crc, (const Bytef *)outbufp, (unsigned)wr);
 				if (
 #ifndef SMALL
 				    /* don't write anything with -t */
 				    tflag == 0 &&
 #endif
 				    write(out, outbufp, wr) != wr) {
 					maybe_warn("error writing to output");
 					goto stop_and_fail;
 				}
 
 				out_tot += wr;
 				out_sub_tot += wr;
 			}
 
 			if (error == Z_STREAM_END) {
 				inflateEnd(&z);
 				state++;
 			}
 
 			z.next_out = (unsigned char *)outbufp;
 			z.avail_out = BUFLEN;
 
 			break;
 		case GZSTATE_CRC:
 			{
 				uLong origcrc;
 
 				if (z.avail_in < 4) {
 					if (!done_reading) {
 						needmore = 1;
 						continue;
 					}
 					maybe_warnx("truncated input");
 					goto stop_and_fail;
 				}
 				origcrc = ((unsigned)z.next_in[0] & 0xff) |
 					((unsigned)z.next_in[1] & 0xff) << 8 |
 					((unsigned)z.next_in[2] & 0xff) << 16 |
 					((unsigned)z.next_in[3] & 0xff) << 24;
 				if (origcrc != crc) {
 					maybe_warnx("invalid compressed"
 					     " data--crc error");
 					goto stop_and_fail;
 				}
 			}
 
 			z.avail_in -= 4;
 			z.next_in += 4;
 
 			if (!z.avail_in && done_reading) {
 				goto stop;
 			}
 			state++;
 			break;
 		case GZSTATE_LEN:
 			{
 				uLong origlen;
 
 				if (z.avail_in < 4) {
 					if (!done_reading) {
 						needmore = 1;
 						continue;
 					}
 					maybe_warnx("truncated input");
 					goto stop_and_fail;
 				}
 				origlen = ((unsigned)z.next_in[0] & 0xff) |
 					((unsigned)z.next_in[1] & 0xff) << 8 |
 					((unsigned)z.next_in[2] & 0xff) << 16 |
 					((unsigned)z.next_in[3] & 0xff) << 24;
 
 				if (origlen != out_sub_tot) {
 					maybe_warnx("invalid compressed"
 					     " data--length error");
 					goto stop_and_fail;
 				}
 			}
 				
 			z.avail_in -= 4;
 			z.next_in += 4;
 
 			if (error < 0) {
 				maybe_warnx("decompression error");
 				goto stop_and_fail;
 			}
 			state = GZSTATE_MAGIC0;
 			break;
 		}
 		continue;
 stop_and_fail:
 		out_tot = -1;
 stop:
 		break;
 	}
 	if (state > GZSTATE_INIT)
 		inflateEnd(&z);
 
 	free(inbufp);
 out1:
 	free(outbufp);
 out2:
 	if (gsizep)
 		*gsizep = in_tot;
 	return (out_tot);
 }
 
 #ifndef SMALL
 /*
  * set the owner, mode, flags & utimes using the given file descriptor.
  * file is only used in possible warning messages.
  */
 static void
 copymodes(int fd, const struct stat *sbp, const char *file)
 {
 	struct timespec times[2];
 	struct stat sb;
 
 	/*
 	 * If we have no info on the input, give this file some
 	 * default values and return..
 	 */
 	if (sbp == NULL) {
 		mode_t mask = umask(022);
 
 		(void)fchmod(fd, DEFFILEMODE & ~mask);
 		(void)umask(mask);
 		return; 
 	}
 	sb = *sbp;
 
 	/* if the chown fails, remove set-id bits as-per compress(1) */
 	if (fchown(fd, sb.st_uid, sb.st_gid) < 0) {
 		if (errno != EPERM)
 			maybe_warn("couldn't fchown: %s", file);
 		sb.st_mode &= ~(S_ISUID|S_ISGID);
 	}
 
 	/* we only allow set-id and the 9 normal permission bits */
 	sb.st_mode &= S_ISUID | S_ISGID | S_IRWXU | S_IRWXG | S_IRWXO;
 	if (fchmod(fd, sb.st_mode) < 0)
 		maybe_warn("couldn't fchmod: %s", file);
 
 	times[0] = sb.st_atim;
 	times[1] = sb.st_mtim;
 	if (futimens(fd, times) < 0)
 		maybe_warn("couldn't futimens: %s", file);
 
 	/* only try flags if they exist already */
         if (sb.st_flags != 0 && fchflags(fd, sb.st_flags) < 0)
 		maybe_warn("couldn't fchflags: %s", file);
 }
 #endif
 
 /* what sort of file is this? */
 static enum filetype
 file_gettype(u_char *buf)
 {
 
 	if (buf[0] == GZIP_MAGIC0 &&
 	    (buf[1] == GZIP_MAGIC1 || buf[1] == GZIP_OMAGIC1))
 		return FT_GZIP;
 	else
 #ifndef NO_BZIP2_SUPPORT
 	if (memcmp(buf, BZIP2_MAGIC, 3) == 0 &&
 	    buf[3] >= '0' && buf[3] <= '9')
 		return FT_BZIP2;
 	else
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	if (memcmp(buf, Z_MAGIC, 2) == 0)
 		return FT_Z;
 	else
 #endif
 #ifndef NO_PACK_SUPPORT
 	if (memcmp(buf, PACK_MAGIC, 2) == 0)
 		return FT_PACK;
 	else
 #endif
 #ifndef NO_XZ_SUPPORT
 	if (memcmp(buf, XZ_MAGIC, 4) == 0)	/* XXX: We only have 4 bytes */
 		return FT_XZ;
 	else
 #endif
 		return FT_UNKNOWN;
 }
 
 #ifndef SMALL
 /* check the outfile is OK. */
 static int
 check_outfile(const char *outfile)
 {
 	struct stat sb;
 	int ok = 1;
 
 	if (lflag == 0 && stat(outfile, &sb) == 0) {
 		if (fflag)
 			unlink(outfile);
 		else if (isatty(STDIN_FILENO)) {
 			char ans[10] = { 'n', '\0' };	/* default */
 
 			fprintf(stderr, "%s already exists -- do you wish to "
 					"overwrite (y or n)? " , outfile);
 			(void)fgets(ans, sizeof(ans) - 1, stdin);
 			if (ans[0] != 'y' && ans[0] != 'Y') {
 				fprintf(stderr, "\tnot overwriting\n");
 				ok = 0;
 			} else
 				unlink(outfile);
 		} else {
 			maybe_warnx("%s already exists -- skipping", outfile);
 			ok = 0;
 		}
 	}
 	return ok;
 }
 
 static void
 unlink_input(const char *file, const struct stat *sb)
 {
 	struct stat nsb;
 
 	if (kflag)
 		return;
 	if (stat(file, &nsb) != 0)
 		/* Must be gone already */
 		return;
 	if (nsb.st_dev != sb->st_dev || nsb.st_ino != sb->st_ino)
 		/* Definitely a different file */
 		return;
 	unlink(file);
 }
 
 static void
 sigint_handler(int signo __unused)
 {
 
 	if (remove_file != NULL)
 		unlink(remove_file);
 	_exit(2);
 }
 #endif
 
 static const suffixes_t *
 check_suffix(char *file, int xlate)
 {
 	const suffixes_t *s;
 	int len = strlen(file);
 	char *sp;
 
 	for (s = suffixes; s != suffixes + NUM_SUFFIXES; s++) {
 		/* if it doesn't fit in "a.suf", don't bother */
 		if (s->ziplen >= len)
 			continue;
 		sp = file + len - s->ziplen;
 		if (strcmp(s->zipped, sp) != 0)
 			continue;
 		if (xlate)
 			strcpy(sp, s->normal);
 		return s;
 	}
 	return NULL;
 }
 
 /*
  * compress the given file: create a corresponding .gz file and remove the
  * original.
  */
 static off_t
 file_compress(char *file, char *outfile, size_t outsize)
 {
 	int in;
 	int out;
 	off_t size, insize;
 #ifndef SMALL
 	struct stat isb, osb;
 	const suffixes_t *suff;
 #endif
 
 	in = open(file, O_RDONLY);
 	if (in == -1) {
 		maybe_warn("can't open %s", file);
 		return (-1);
 	}
 
 #ifndef SMALL
 	if (fstat(in, &isb) != 0) {
 		maybe_warn("couldn't stat: %s", file);
 		close(in);
 		return (-1);
 	}
 #endif
 
 	if (cflag == 0) {
 #ifndef SMALL
 		if (isb.st_nlink > 1 && fflag == 0) {
 			maybe_warnx("%s has %d other link%s -- skipping",
 			    file, isb.st_nlink - 1,
 			    (isb.st_nlink - 1) == 1 ? "" : "s");
 			close(in);
 			return (-1);
 		}
 
 		if (fflag == 0 && (suff = check_suffix(file, 0)) &&
 		    suff->zipped[0] != 0) {
 			maybe_warnx("%s already has %s suffix -- unchanged",
 			    file, suff->zipped);
 			close(in);
 			return (-1);
 		}
 #endif
 
 		/* Add (usually) .gz to filename */
 		if ((size_t)snprintf(outfile, outsize, "%s%s",
 		    file, suffixes[0].zipped) >= outsize)
 			memcpy(outfile + outsize - suffixes[0].ziplen - 1,
 			    suffixes[0].zipped, suffixes[0].ziplen + 1);
 
 #ifndef SMALL
 		if (check_outfile(outfile) == 0) {
 			close(in);
 			return (-1);
 		}
 #endif
 	}
 
 	if (cflag == 0) {
 		out = open(outfile, O_WRONLY | O_CREAT | O_EXCL, 0600);
 		if (out == -1) {
 			maybe_warn("could not create output: %s", outfile);
 			fclose(stdin);
 			return (-1);
 		}
 #ifndef SMALL
 		remove_file = outfile;
 #endif
 	} else
 		out = STDOUT_FILENO;
 
 	insize = gz_compress(in, out, &size, basename(file), (uint32_t)isb.st_mtime);
 
 	(void)close(in);
 
 	/*
 	 * If there was an error, insize will be -1.
 	 * If we compressed to stdout, just return the size.
 	 * Otherwise stat the file and check it is the correct size.
 	 * We only blow away the file if we can stat the output and it
 	 * has the expected size.
 	 */
 	if (cflag != 0)
 		return (insize == -1 ? -1 : size);
 
 #ifndef SMALL
 	if (fstat(out, &osb) != 0) {
 		maybe_warn("couldn't stat: %s", outfile);
 		goto bad_outfile;
 	}
 
 	if (osb.st_size != size) {
 		maybe_warnx("output file: %s wrong size (%ju != %ju), deleting",
 		    outfile, (uintmax_t)osb.st_size, (uintmax_t)size);
 		goto bad_outfile;
 	}
 
 	copymodes(out, &isb, outfile);
 	remove_file = NULL;
 #endif
 	if (close(out) == -1)
 		maybe_warn("couldn't close output");
 
 	/* output is good, ok to delete input */
 	unlink_input(file, &isb);
 	return (size);
 
 #ifndef SMALL
     bad_outfile:
 	if (close(out) == -1)
 		maybe_warn("couldn't close output");
 
 	maybe_warnx("leaving original %s", file);
 	unlink(outfile);
 	return (size);
 #endif
 }
 
 /* uncompress the given file and remove the original */
 static off_t
 file_uncompress(char *file, char *outfile, size_t outsize)
 {
 	struct stat isb, osb;
 	off_t size;
 	ssize_t rbytes;
 	unsigned char header1[4];
 	enum filetype method;
 	int fd, ofd, zfd = -1;
 #ifndef SMALL
 	ssize_t rv;
 	time_t timestamp = 0;
 	char name[PATH_MAX + 1];
 #endif
 
 	/* gather the old name info */
 
 	fd = open(file, O_RDONLY);
 	if (fd < 0) {
 		maybe_warn("can't open %s", file);
 		goto lose;
 	}
 
 	strlcpy(outfile, file, outsize);
 	if (check_suffix(outfile, 1) == NULL && !(cflag || lflag)) {
 		maybe_warnx("%s: unknown suffix -- ignored", file);
 		goto lose;
 	}
 
 	rbytes = read(fd, header1, sizeof header1);
 	if (rbytes != sizeof header1) {
 		/* we don't want to fail here. */
 #ifndef SMALL
 		if (fflag)
 			goto lose;
 #endif
 		if (rbytes == -1)
 			maybe_warn("can't read %s", file);
 		else
 			goto unexpected_EOF;
 		goto lose;
 	}
 
 	method = file_gettype(header1);
 #ifndef SMALL
 	if (fflag == 0 && method == FT_UNKNOWN) {
 		maybe_warnx("%s: not in gzip format", file);
 		goto lose;
 	}
 
 #endif
 
 #ifndef SMALL
 	if (method == FT_GZIP && Nflag) {
 		unsigned char ts[4];	/* timestamp */
 
 		rv = pread(fd, ts, sizeof ts, GZIP_TIMESTAMP);
 		if (rv >= 0 && rv < (ssize_t)(sizeof ts))
 			goto unexpected_EOF;
 		if (rv == -1) {
 			if (!fflag)
 				maybe_warn("can't read %s", file);
 			goto lose;
 		}
 		timestamp = ts[3] << 24 | ts[2] << 16 | ts[1] << 8 | ts[0];
 
 		if (header1[3] & ORIG_NAME) {
 			rbytes = pread(fd, name, sizeof(name) - 1, GZIP_ORIGNAME);
 			if (rbytes < 0) {
 				maybe_warn("can't read %s", file);
 				goto lose;
 			}
 			if (name[0] != '\0') {
 				char *dp, *nf;
 
 				/* Make sure that name is NUL-terminated */
 				name[rbytes] = '\0';
 
 				/* strip saved directory name */
 				nf = strrchr(name, '/');
 				if (nf == NULL)
 					nf = name;
 				else
 					nf++;
 
 				/* preserve original directory name */
 				dp = strrchr(file, '/');
 				if (dp == NULL)
 					dp = file;
 				else
 					dp++;
 				snprintf(outfile, outsize, "%.*s%.*s",
 						(int) (dp - file), 
 						file, (int) rbytes, nf);
 			}
 		}
 	}
 #endif
 	lseek(fd, 0, SEEK_SET);
 
 	if (cflag == 0 || lflag) {
 		if (fstat(fd, &isb) != 0)
 			goto lose;
 #ifndef SMALL
 		if (isb.st_nlink > 1 && lflag == 0 && fflag == 0) {
 			maybe_warnx("%s has %d other links -- skipping",
 			    file, isb.st_nlink - 1);
 			goto lose;
 		}
 		if (nflag == 0 && timestamp)
 			isb.st_mtime = timestamp;
 		if (check_outfile(outfile) == 0)
 			goto lose;
 #endif
 	}
 
 	if (cflag == 0 && lflag == 0) {
 		zfd = open(outfile, O_WRONLY|O_CREAT|O_EXCL, 0600);
 		if (zfd == STDOUT_FILENO) {
 			/* We won't close STDOUT_FILENO later... */
 			zfd = dup(zfd);
 			close(STDOUT_FILENO);
 		}
 		if (zfd == -1) {
 			maybe_warn("can't open %s", outfile);
 			goto lose;
 		}
 #ifndef SMALL
 		remove_file = outfile;
 #endif
 	} else
 		zfd = STDOUT_FILENO;
 
 	switch (method) {
 #ifndef NO_BZIP2_SUPPORT
 	case FT_BZIP2:
 		/* XXX */
 		if (lflag) {
 			maybe_warnx("no -l with bzip2 files");
 			goto lose;
 		}
 
 		size = unbzip2(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
 
 #ifndef NO_COMPRESS_SUPPORT
 	case FT_Z: {
 		FILE *in, *out;
 
 		/* XXX */
 		if (lflag) {
 			maybe_warnx("no -l with Lempel-Ziv files");
 			goto lose;
 		}
 
 		if ((in = zdopen(fd)) == NULL) {
 			maybe_warn("zdopen for read: %s", file);
 			goto lose;
 		}
 
 		out = fdopen(dup(zfd), "w");
 		if (out == NULL) {
 			maybe_warn("fdopen for write: %s", outfile);
 			fclose(in);
 			goto lose;
 		}
 
 		size = zuncompress(in, out, NULL, 0, NULL);
 		/* need to fclose() if ferror() is true... */
 		if (ferror(in) | fclose(in)) {
 			maybe_warn("failed infile fclose");
 			unlink(outfile);
 			(void)fclose(out);
 		}
 		if (fclose(out) != 0) {
 			maybe_warn("failed outfile fclose");
 			unlink(outfile);
 			goto lose;
 		}
 		break;
 	}
 #endif
 
 #ifndef NO_PACK_SUPPORT
 	case FT_PACK:
 		if (lflag) {
 			maybe_warnx("no -l with packed files");
 			goto lose;
 		}
 
 		size = unpack(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
 
 #ifndef NO_XZ_SUPPORT
 	case FT_XZ:
 		if (lflag) {
 			maybe_warnx("no -l with xz files");
 			goto lose;
 		}
 
 		size = unxz(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
 
 #ifndef SMALL
 	case FT_UNKNOWN:
 		if (lflag) {
 			maybe_warnx("no -l for unknown filetypes");
 			goto lose;
 		}
 		size = cat_fd(NULL, 0, NULL, fd);
 		break;
 #endif
 	default:
 		if (lflag) {
 			print_list(fd, isb.st_size, outfile, isb.st_mtime);
 			close(fd);
 			return -1;	/* XXX */
 		}
 
 		size = gz_uncompress(fd, zfd, NULL, 0, NULL, file);
 		break;
 	}
 
 	if (close(fd) != 0)
 		maybe_warn("couldn't close input");
 	if (zfd != STDOUT_FILENO && close(zfd) != 0)
 		maybe_warn("couldn't close output");
 
 	if (size == -1) {
 		if (cflag == 0)
 			unlink(outfile);
 		maybe_warnx("%s: uncompress failed", file);
 		return -1;
 	}
 
 	/* if testing, or we uncompressed to stdout, this is all we need */
 #ifndef SMALL
 	if (tflag)
 		return size;
 #endif
 	/* if we are uncompressing to stdin, don't remove the file. */
 	if (cflag)
 		return size;
 
 	/*
 	 * if we create a file...
 	 */
 	/*
 	 * if we can't stat the file don't remove the file.
 	 */
 
 	ofd = open(outfile, O_RDWR, 0);
 	if (ofd == -1) {
 		maybe_warn("couldn't open (leaving original): %s",
 			   outfile);
 		return -1;
 	}
 	if (fstat(ofd, &osb) != 0) {
 		maybe_warn("couldn't stat (leaving original): %s",
 			   outfile);
 		close(ofd);
 		return -1;
 	}
 	if (osb.st_size != size) {
 		maybe_warnx("stat gave different size: %ju != %ju (leaving original)",
 		    (uintmax_t)size, (uintmax_t)osb.st_size);
 		close(ofd);
 		unlink(outfile);
 		return -1;
 	}
 #ifndef SMALL
 	copymodes(ofd, &isb, outfile);
 	remove_file = NULL;
 #endif
 	close(ofd);
 	unlink_input(file, &isb);
 	return size;
 
     unexpected_EOF:
 	maybe_warnx("%s: unexpected end of file", file);
     lose:
 	if (fd != -1)
 		close(fd);
 	if (zfd != -1 && zfd != STDOUT_FILENO)
 		close(fd);
 	return -1;
 }
 
 #ifndef SMALL
 static off_t
 cat_fd(unsigned char * prepend, size_t count, off_t *gsizep, int fd)
 {
 	char buf[BUFLEN];
 	off_t in_tot;
 	ssize_t w;
 
 	in_tot = count;
 	w = write(STDOUT_FILENO, prepend, count);
 	if (w == -1 || (size_t)w != count) {
 		maybe_warn("write to stdout");
 		return -1;
 	}
 	for (;;) {
 		ssize_t rv;
 
 		rv = read(fd, buf, sizeof buf);
 		if (rv == 0)
 			break;
 		if (rv < 0) {
 			maybe_warn("read from fd %d", fd);
 			break;
 		}
 
 		if (write(STDOUT_FILENO, buf, rv) != rv) {
 			maybe_warn("write to stdout");
 			break;
 		}
 		in_tot += rv;
 	}
 
 	if (gsizep)
 		*gsizep = in_tot;
 	return (in_tot);
 }
 #endif
 
 static void
 handle_stdin(void)
 {
 	unsigned char header1[4];
 	off_t usize, gsize;
 	enum filetype method;
 	ssize_t bytes_read;
 #ifndef NO_COMPRESS_SUPPORT
 	FILE *in;
 #endif
 
 #ifndef SMALL
 	if (fflag == 0 && lflag == 0 && isatty(STDIN_FILENO)) {
 		maybe_warnx("standard input is a terminal -- ignoring");
 		return;
 	}
 #endif
 
 	if (lflag) {
 		struct stat isb;
 
 		/* XXX could read the whole file, etc. */
 		if (fstat(STDIN_FILENO, &isb) < 0) {
 			maybe_warn("fstat");
 			return;
 		}
 		print_list(STDIN_FILENO, isb.st_size, "stdout", isb.st_mtime);
 		return;
 	}
 
 	bytes_read = read_retry(STDIN_FILENO, header1, sizeof header1);
 	if (bytes_read == -1) {
 		maybe_warn("can't read stdin");
 		return;
 	} else if (bytes_read != sizeof(header1)) {
 		maybe_warnx("(stdin): unexpected end of file");
 		return;
 	}
 
 	method = file_gettype(header1);
 	switch (method) {
 	default:
 #ifndef SMALL
 		if (fflag == 0) {
 			maybe_warnx("unknown compression format");
 			return;
 		}
 		usize = cat_fd(header1, sizeof header1, &gsize, STDIN_FILENO);
 		break;
 #endif
 	case FT_GZIP:
 		usize = gz_uncompress(STDIN_FILENO, STDOUT_FILENO, 
 			      (char *)header1, sizeof header1, &gsize, "(stdin)");
 		break;
 #ifndef NO_BZIP2_SUPPORT
 	case FT_BZIP2:
 		usize = unbzip2(STDIN_FILENO, STDOUT_FILENO,
 				(char *)header1, sizeof header1, &gsize);
 		break;
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	case FT_Z:
 		if ((in = zdopen(STDIN_FILENO)) == NULL) {
 			maybe_warnx("zopen of stdin");
 			return;
 		}
 
 		usize = zuncompress(in, stdout, (char *)header1,
 		    sizeof header1, &gsize);
 		fclose(in);
 		break;
 #endif
 #ifndef NO_PACK_SUPPORT
 	case FT_PACK:
 		usize = unpack(STDIN_FILENO, STDOUT_FILENO,
 			       (char *)header1, sizeof header1, &gsize);
 		break;
 #endif
 #ifndef NO_XZ_SUPPORT
 	case FT_XZ:
 		usize = unxz(STDIN_FILENO, STDOUT_FILENO,
 			     (char *)header1, sizeof header1, &gsize);
 		break;
 #endif
 	}
 
 #ifndef SMALL
         if (vflag && !tflag && usize != -1 && gsize != -1)
 		print_verbage(NULL, NULL, usize, gsize);
 	if (vflag && tflag)
 		print_test("(stdin)", usize != -1);
 #endif 
 
 }
 
 static void
 handle_stdout(void)
 {
 	off_t gsize, usize;
 	struct stat sb;
 	time_t systime;
 	uint32_t mtime;
 	int ret;
 
 #ifndef SMALL
 	if (fflag == 0 && isatty(STDOUT_FILENO)) {
 		maybe_warnx("standard output is a terminal -- ignoring");
 		return;
 	}
 #endif
 	/* If stdin is a file use its mtime, otherwise use current time */
 	ret = fstat(STDIN_FILENO, &sb);
 
 #ifndef SMALL
 	if (ret < 0) {
 		maybe_warn("Can't stat stdin");
 		return;
 	}
 #endif
 
 	if (S_ISREG(sb.st_mode))
 		mtime = (uint32_t)sb.st_mtime;
 	else {
 		systime = time(NULL);
 #ifndef SMALL
 		if (systime == -1) {
 			maybe_warn("time");
 			return;
 		} 
 #endif
 		mtime = (uint32_t)systime;
 	}
 	 		
 	usize = gz_compress(STDIN_FILENO, STDOUT_FILENO, &gsize, "", mtime);
 #ifndef SMALL
         if (vflag && !tflag && usize != -1 && gsize != -1)
 		print_verbage(NULL, NULL, usize, gsize);
 #endif 
 }
 
 /* do what is asked for, for the path name */
 static void
 handle_pathname(char *path)
 {
 	char *opath = path, *s = NULL;
 	ssize_t len;
 	int slen;
 	struct stat sb;
 
 	/* check for stdout/stdin */
 	if (path[0] == '-' && path[1] == '\0') {
 		if (dflag)
 			handle_stdin();
 		else
 			handle_stdout();
 		return;
 	}
 
 retry:
 	if (stat(path, &sb) != 0 || (fflag == 0 && cflag == 0 &&
 	    lstat(path, &sb) != 0)) {
 		/* lets try <path>.gz if we're decompressing */
 		if (dflag && s == NULL && errno == ENOENT) {
 			len = strlen(path);
 			slen = suffixes[0].ziplen;
 			s = malloc(len + slen + 1);
 			if (s == NULL)
 				maybe_err("malloc");
 			memcpy(s, path, len);
 			memcpy(s + len, suffixes[0].zipped, slen + 1);
 			path = s;
 			goto retry;
 		}
 		maybe_warn("can't stat: %s", opath);
 		goto out;
 	}
 
 	if (S_ISDIR(sb.st_mode)) {
 #ifndef SMALL
 		if (rflag)
 			handle_dir(path);
 		else
 #endif
 			maybe_warnx("%s is a directory", path);
 		goto out;
 	}
 
 	if (S_ISREG(sb.st_mode))
 		handle_file(path, &sb);
 	else
 		maybe_warnx("%s is not a regular file", path);
 
 out:
 	if (s)
 		free(s);
 }
 
 /* compress/decompress a file */
 static void
 handle_file(char *file, struct stat *sbp)
 {
 	off_t usize, gsize;
 	char	outfile[PATH_MAX];
 
 	infile = file;
 	if (dflag) {
 		usize = file_uncompress(file, outfile, sizeof(outfile));
 #ifndef SMALL
 		if (vflag && tflag)
 			print_test(file, usize != -1);
 #endif
 		if (usize == -1)
 			return;
 		gsize = sbp->st_size;
 	} else {
 		gsize = file_compress(file, outfile, sizeof(outfile));
 		if (gsize == -1)
 			return;
 		usize = sbp->st_size;
 	}
 
 
 #ifndef SMALL
 	if (vflag && !tflag)
 		print_verbage(file, (cflag) ? NULL : outfile, usize, gsize);
 #endif
 }
 
 #ifndef SMALL
 /* this is used with -r to recursively descend directories */
 static void
 handle_dir(char *dir)
 {
 	char *path_argv[2];
 	FTS *fts;
 	FTSENT *entry;
 
 	path_argv[0] = dir;
 	path_argv[1] = 0;
 	fts = fts_open(path_argv, FTS_PHYSICAL | FTS_NOCHDIR, NULL);
 	if (fts == NULL) {
 		warn("couldn't fts_open %s", dir);
 		return;
 	}
 
 	while ((entry = fts_read(fts))) {
 		switch(entry->fts_info) {
 		case FTS_D:
 		case FTS_DP:
 			continue;
 
 		case FTS_DNR:
 		case FTS_ERR:
 		case FTS_NS:
 			maybe_warn("%s", entry->fts_path);
 			continue;
 		case FTS_F:
 			handle_file(entry->fts_path, entry->fts_statp);
 		}
 	}
 	(void)fts_close(fts);
 }
 #endif
 
 /* print a ratio - size reduction as a fraction of uncompressed size */
 static void
 print_ratio(off_t in, off_t out, FILE *where)
 {
 	int percent10;	/* 10 * percent */
 	off_t diff;
 	char buff[8];
 	int len;
 
 	diff = in - out/2;
 	if (diff <= 0)
 		/*
 		 * Output is more than double size of input! print -99.9%
 		 * Quite possibly we've failed to get the original size.
 		 */
 		percent10 = -999;
 	else {
 		/*
 		 * We only need 12 bits of result from the final division,
 		 * so reduce the values until a 32bit division will suffice.
 		 */
 		while (in > 0x100000) {
 			diff >>= 1;
 			in >>= 1;
 		}
 		if (in != 0)
 			percent10 = ((u_int)diff * 2000) / (u_int)in - 1000;
 		else
 			percent10 = 0;
 	}
 
 	len = snprintf(buff, sizeof buff, "%2.2d.", percent10);
 	/* Move the '.' to before the last digit */
 	buff[len - 1] = buff[len - 2];
 	buff[len - 2] = '.';
 	fprintf(where, "%5s%%", buff);
 }
 
 #ifndef SMALL
 /* print compression statistics, and the new name (if there is one!) */
 static void
 print_verbage(const char *file, const char *nfile, off_t usize, off_t gsize)
 {
 	if (file)
 		fprintf(stderr, "%s:%s  ", file,
 		    strlen(file) < 7 ? "\t\t" : "\t");
 	print_ratio(usize, gsize, stderr);
 	if (nfile)
 		fprintf(stderr, " -- replaced with %s", nfile);
 	fprintf(stderr, "\n");
 	fflush(stderr);
 }
 
 /* print test results */
 static void
 print_test(const char *file, int ok)
 {
 
 	if (exit_value == 0 && ok == 0)
 		exit_value = 1;
 	fprintf(stderr, "%s:%s  %s\n", file,
 	    strlen(file) < 7 ? "\t\t" : "\t", ok ? "OK" : "NOT OK");
 	fflush(stderr);
 }
 #endif
 
 /* print a file's info ala --list */
 /* eg:
   compressed uncompressed  ratio uncompressed_name
       354841      1679360  78.8% /usr/pkgsrc/distfiles/libglade-2.0.1.tar
 */
 static void
 print_list(int fd, off_t out, const char *outfile, time_t ts)
 {
 	static int first = 1;
 #ifndef SMALL
 	static off_t in_tot, out_tot;
 	uint32_t crc = 0;
 #endif
 	off_t in = 0, rv;
 
 	if (first) {
 #ifndef SMALL
 		if (vflag)
 			printf("method  crc     date  time  ");
 #endif
 		if (qflag == 0)
 			printf("  compressed uncompressed  "
 			       "ratio uncompressed_name\n");
 	}
 	first = 0;
 
 	/* print totals? */
 #ifndef SMALL
 	if (fd == -1) {
 		in = in_tot;
 		out = out_tot;
 	} else
 #endif
 	{
 		/* read the last 4 bytes - this is the uncompressed size */
 		rv = lseek(fd, (off_t)(-8), SEEK_END);
 		if (rv != -1) {
 			unsigned char buf[8];
 			uint32_t usize;
 
 			rv = read(fd, (char *)buf, sizeof(buf));
 			if (rv == -1)
 				maybe_warn("read of uncompressed size");
 			else if (rv != sizeof(buf))
 				maybe_warnx("read of uncompressed size");
 
 			else {
 				usize = buf[4] | buf[5] << 8 |
 					buf[6] << 16 | buf[7] << 24;
 				in = (off_t)usize;
 #ifndef SMALL
 				crc = buf[0] | buf[1] << 8 |
 				      buf[2] << 16 | buf[3] << 24;
 #endif
 			}
 		}
 	}
 
 #ifndef SMALL
 	if (vflag && fd == -1)
 		printf("                            ");
 	else if (vflag) {
 		char *date = ctime(&ts);
 
 		/* skip the day, 1/100th second, and year */
 		date += 4;
 		date[12] = 0;
 		printf("%5s %08x %11s ", "defla"/*XXX*/, crc, date);
 	}
 	in_tot += in;
 	out_tot += out;
 #else
 	(void)&ts;	/* XXX */
 #endif
 	printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in);
 	print_ratio(in, out, stdout);
 	printf(" %s\n", outfile);
 }
 
 /* display the usage of NetBSD gzip */
 static void
 usage(void)
 {
 
 	fprintf(stderr, "%s\n", gzip_version);
 	fprintf(stderr,
 #ifdef SMALL
     "usage: %s [-" OPT_LIST "] [<file> [<file> ...]]\n",
 #else
     "usage: %s [-123456789acdfhklLNnqrtVv] [-S .suffix] [<file> [<file> ...]]\n"
     " -1 --fast            fastest (worst) compression\n"
     " -2 .. -8             set compression level\n"
     " -9 --best            best (slowest) compression\n"
     " -c --stdout          write to stdout, keep original files\n"
     "    --to-stdout\n"
     " -d --decompress      uncompress files\n"
     "    --uncompress\n"
     " -f --force           force overwriting & compress links\n"
     " -h --help            display this help\n"
     " -k --keep            don't delete input files during operation\n"
     " -l --list            list compressed file contents\n"
     " -N --name            save or restore original file name and time stamp\n"
     " -n --no-name         don't save original file name or time stamp\n"
     " -q --quiet           output no warnings\n"
     " -r --recursive       recursively compress files in directories\n"
     " -S .suf              use suffix .suf instead of .gz\n"
     "    --suffix .suf\n"
     " -t --test            test compressed file\n"
     " -V --version         display program version\n"
     " -v --verbose         print extra statistics\n",
 #endif
 	    getprogname());
 	exit(0);
 }
 
 #ifndef SMALL
 /* display the license information of FreeBSD gzip */
 static void
 display_license(void)
 {
 
 	fprintf(stderr, "%s (based on NetBSD gzip 20150113)\n", gzip_version);
 	fprintf(stderr, "%s\n", gzip_copyright);
 	exit(0);
 }
 #endif
 
 /* display the version of NetBSD gzip */
 static void
 display_version(void)
 {
 
 	fprintf(stderr, "%s\n", gzip_version);
 	exit(0);
 }
 
 #ifndef NO_BZIP2_SUPPORT
 #include "unbzip2.c"
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 #include "zuncompress.c"
 #endif
 #ifndef NO_PACK_SUPPORT
 #include "unpack.c"
 #endif
 #ifndef NO_XZ_SUPPORT
 #include "unxz.c"
 #endif
 
 static ssize_t
 read_retry(int fd, void *buf, size_t sz)
 {
 	char *cp = buf;
 	size_t left = MIN(sz, (size_t) SSIZE_MAX);
 
 	while (left > 0) {
 		ssize_t ret;
 
 		ret = read(fd, cp, left);
 		if (ret == -1) {
 			return ret;
 		} else if (ret == 0) {
 			break; /* EOF */
 		}
 		cp += ret;
 		left -= ret;
 	}
 
 	return sz - left;
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/indent/args.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/indent/args.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/indent/args.c	(revision 303642)
@@ -1,327 +1,325 @@
-/*
+/*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)args.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Argument scanning and profile reading code.  Default parameters are set
  * here as well.
  */
 
 #include <ctype.h>
 #include <err.h>
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "indent_globs.h"
 #include "indent.h"
 
 /* profile types */
 #define	PRO_SPECIAL	1	/* special case */
 #define	PRO_BOOL	2	/* boolean */
 #define	PRO_INT		3	/* integer */
 #define PRO_FONT	4	/* troff font */
 
 /* profile specials for booleans */
 #define	ON		1	/* turn it on */
 #define	OFF		0	/* turn it off */
 
 /* profile specials for specials */
 #define	IGN		1	/* ignore it */
 #define	CLI		2	/* case label indent (float) */
 #define	STDIN		3	/* use stdin */
 #define	KEY		4	/* type (keyword) */
 
 static void scan_profile(FILE *);
 
 const char *option_source = "?";
 
 /*
  * N.B.: because of the way the table here is scanned, options whose names are
  * substrings of other options must occur later; that is, with -lp vs -l, -lp
  * must be first.  Also, while (most) booleans occur more than once, the last
  * default value is the one actually assigned.
  */
 struct pro {
     const char *p_name;		/* name, e.g. -bl, -cli */
     int         p_type;		/* type (int, bool, special) */
     int         p_default;	/* the default value (if int) */
     int         p_special;	/* depends on type */
     int        *p_obj;		/* the associated variable */
 }           pro[] = {
 
     {"T", PRO_SPECIAL, 0, KEY, 0},
     {"bacc", PRO_BOOL, false, ON, &blanklines_around_conditional_compilation},
     {"badp", PRO_BOOL, false, ON, &blanklines_after_declarations_at_proctop},
     {"bad", PRO_BOOL, false, ON, &blanklines_after_declarations},
     {"bap", PRO_BOOL, false, ON, &blanklines_after_procs},
     {"bbb", PRO_BOOL, false, ON, &blanklines_before_blockcomments},
     {"bc", PRO_BOOL, true, OFF, &ps.leave_comma},
     {"bl", PRO_BOOL, true, OFF, &btype_2},
     {"br", PRO_BOOL, true, ON, &btype_2},
     {"bs", PRO_BOOL, false, ON, &Bill_Shannon},
     {"cdb", PRO_BOOL, true, ON, &comment_delimiter_on_blankline},
     {"cd", PRO_INT, 0, 0, &ps.decl_com_ind},
     {"ce", PRO_BOOL, true, ON, &cuddle_else},
     {"ci", PRO_INT, 0, 0, &continuation_indent},
     {"cli", PRO_SPECIAL, 0, CLI, 0},
     {"c", PRO_INT, 33, 0, &ps.com_ind},
     {"di", PRO_INT, 16, 0, &ps.decl_indent},
     {"dj", PRO_BOOL, false, ON, &ps.ljust_decl},
     {"d", PRO_INT, 0, 0, &ps.unindent_displace},
     {"eei", PRO_BOOL, false, ON, &extra_expression_indent},
     {"ei", PRO_BOOL, true, ON, &ps.else_if},
     {"fbc", PRO_FONT, 0, 0, (int *) &blkcomf},
     {"fbs", PRO_BOOL, true, ON, &function_brace_split},
     {"fbx", PRO_FONT, 0, 0, (int *) &boxcomf},
     {"fb", PRO_FONT, 0, 0, (int *) &bodyf},
     {"fc1", PRO_BOOL, true, ON, &format_col1_comments},
     {"fcb", PRO_BOOL, true, ON, &format_block_comments},
     {"fc", PRO_FONT, 0, 0, (int *) &scomf},
     {"fk", PRO_FONT, 0, 0, (int *) &keywordf},
     {"fs", PRO_FONT, 0, 0, (int *) &stringf},
     {"ip", PRO_BOOL, true, ON, &ps.indent_parameters},
     {"i", PRO_INT, 8, 0, &ps.ind_size},
     {"lc", PRO_INT, 0, 0, &block_comment_max_col},
     {"ldi", PRO_INT, -1, 0, &ps.local_decl_indent},
     {"lp", PRO_BOOL, true, ON, &lineup_to_parens},
     {"l", PRO_INT, 78, 0, &max_col},
     {"nbacc", PRO_BOOL, false, OFF, &blanklines_around_conditional_compilation},
     {"nbadp", PRO_BOOL, false, OFF, &blanklines_after_declarations_at_proctop},
     {"nbad", PRO_BOOL, false, OFF, &blanklines_after_declarations},
     {"nbap", PRO_BOOL, false, OFF, &blanklines_after_procs},
     {"nbbb", PRO_BOOL, false, OFF, &blanklines_before_blockcomments},
     {"nbc", PRO_BOOL, true, ON, &ps.leave_comma},
     {"nbs", PRO_BOOL, false, OFF, &Bill_Shannon},
     {"ncdb", PRO_BOOL, true, OFF, &comment_delimiter_on_blankline},
     {"nce", PRO_BOOL, true, OFF, &cuddle_else},
     {"ndj", PRO_BOOL, false, OFF, &ps.ljust_decl},
     {"neei", PRO_BOOL, false, OFF, &extra_expression_indent},
     {"nei", PRO_BOOL, true, OFF, &ps.else_if},
     {"nfbs", PRO_BOOL, true, OFF, &function_brace_split},
     {"nfc1", PRO_BOOL, true, OFF, &format_col1_comments},
     {"nfcb", PRO_BOOL, true, OFF, &format_block_comments},
     {"nip", PRO_BOOL, true, OFF, &ps.indent_parameters},
     {"nlp", PRO_BOOL, true, OFF, &lineup_to_parens},
     {"npcs", PRO_BOOL, false, OFF, &proc_calls_space},
     {"npro", PRO_SPECIAL, 0, IGN, 0},
     {"npsl", PRO_BOOL, true, OFF, &procnames_start_line},
     {"nps", PRO_BOOL, false, OFF, &pointer_as_binop},
     {"nsc", PRO_BOOL, true, OFF, &star_comment_cont},
     {"nsob", PRO_BOOL, false, OFF, &swallow_optional_blanklines},
     {"nut", PRO_BOOL, true, OFF, &use_tabs},
     {"nv", PRO_BOOL, false, OFF, &verbose},
     {"pcs", PRO_BOOL, false, ON, &proc_calls_space},
     {"psl", PRO_BOOL, true, ON, &procnames_start_line},
     {"ps", PRO_BOOL, false, ON, &pointer_as_binop},
     {"sc", PRO_BOOL, true, ON, &star_comment_cont},
     {"sob", PRO_BOOL, false, ON, &swallow_optional_blanklines},
     {"st", PRO_SPECIAL, 0, STDIN, 0},
     {"ta", PRO_BOOL, false, ON, &auto_typedefs},
     {"troff", PRO_BOOL, false, ON, &troff},
     {"ut", PRO_BOOL, true, ON, &use_tabs},
     {"v", PRO_BOOL, false, ON, &verbose},
     /* whew! */
     {0, 0, 0, 0, 0}
 };
 
 /*
  * set_profile reads $HOME/.indent.pro and ./.indent.pro and handles arguments
  * given in these files.
  */
 void
 set_profile(void)
 {
     FILE *f;
     char fname[PATH_MAX];
     static char prof[] = ".indent.pro";
 
     snprintf(fname, sizeof(fname), "%s/%s", getenv("HOME"), prof);
     if ((f = fopen(option_source = fname, "r")) != NULL) {
 	scan_profile(f);
 	(void) fclose(f);
     }
     if ((f = fopen(option_source = prof, "r")) != NULL) {
 	scan_profile(f);
 	(void) fclose(f);
     }
     option_source = "Command line";
 }
 
 static void
 scan_profile(FILE *f)
 {
     int		comment, i;
     char	*p;
     char        buf[BUFSIZ];
 
     while (1) {
 	p = buf;
 	comment = 0;
 	while ((i = getc(f)) != EOF) {
 	    if (i == '*' && !comment && p > buf && p[-1] == '/') {
 		comment = p - buf;
 		*p++ = i;
 	    } else if (i == '/' && comment && p > buf && p[-1] == '*') {
 		p = buf + comment - 1;
 		comment = 0;
 	    } else if (isspace(i)) {
 		if (p > buf && !comment)
 		    break;
 	    } else {
 		*p++ = i;
 	    }
 	}
 	if (p != buf) {
 	    *p++ = 0;
 	    if (verbose)
 		printf("profile: %s\n", buf);
 	    set_option(buf);
 	}
 	else if (i == EOF)
 	    return;
     }
 }
 
-const char	*param_start;
-
-static int
+static const char *
 eqin(const char *s1, const char *s2)
 {
     while (*s1) {
 	if (*s1++ != *s2++)
-	    return (false);
+	    return (NULL);
     }
-    param_start = s2;
-    return (true);
+    return (s2);
 }
 
 /*
  * Set the defaults.
  */
 void
 set_defaults(void)
 {
     struct pro *p;
 
     /*
      * Because ps.case_indent is a float, we can't initialize it from the
      * table:
      */
     ps.case_indent = 0.0;	/* -cli0.0 */
     for (p = pro; p->p_name; p++)
 	if (p->p_type != PRO_SPECIAL && p->p_type != PRO_FONT)
 	    *p->p_obj = p->p_default;
 }
 
 void
 set_option(char *arg)
 {
-    struct pro *p;
+    struct	pro *p;
+    const char	*param_start;
 
     arg++;			/* ignore leading "-" */
     for (p = pro; p->p_name; p++)
-	if (*p->p_name == *arg && eqin(p->p_name, arg))
+	if (*p->p_name == *arg && (param_start = eqin(p->p_name, arg)) != NULL)
 	    goto found;
     errx(1, "%s: unknown parameter \"%s\"", option_source, arg - 1);
 found:
     switch (p->p_type) {
 
     case PRO_SPECIAL:
 	switch (p->p_special) {
 
 	case IGN:
 	    break;
 
 	case CLI:
 	    if (*param_start == 0)
 		goto need_param;
 	    ps.case_indent = atof(param_start);
 	    break;
 
 	case STDIN:
 	    if (input == NULL)
 		input = stdin;
 	    if (output == NULL)
 		output = stdout;
 	    break;
 
 	case KEY:
 	    if (*param_start == 0)
 		goto need_param;
 	    {
 		char *str = strdup(param_start);
 		if (str == NULL)
 			err(1, NULL);
 		addkey(str, 4);
 	    }
 	    break;
 
 	default:
 	    errx(1, "set_option: internal error: p_special %d", p->p_special);
 	}
 	break;
 
     case PRO_BOOL:
 	if (p->p_special == OFF)
 	    *p->p_obj = false;
 	else
 	    *p->p_obj = true;
 	break;
 
     case PRO_INT:
 	if (!isdigit(*param_start)) {
     need_param:
 	    errx(1, "%s: ``%s'' requires a parameter", option_source, arg - 1);
 	}
 	*p->p_obj = atoi(param_start);
 	break;
 
     case PRO_FONT:
 	parsefont((struct fstate *) p->p_obj, param_start);
 	break;
 
     default:
 	errx(1, "set_option: internal error: p_type %d", p->p_type);
     }
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/indent/indent.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/indent/indent.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/indent/indent.c	(revision 303642)
@@ -1,1240 +1,1247 @@
-/*
+/*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1985 Sun Microsystems, Inc.\n\
 @(#) Copyright (c) 1976 Board of Trustees of the University of Illinois.\n\
 @(#) Copyright (c) 1980, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)indent.c	5.17 (Berkeley) 6/7/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <err.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include "indent_globs.h"
 #include "indent_codes.h"
 #include "indent.h"
 
 static void bakcopy(void);
+static void indent_declaration(int, int);
 
 const char *in_name = "Standard Input";	/* will always point to name of input
 					 * file */
 const char *out_name = "Standard Output";	/* will always point to name
 						 * of output file */
 char        bakfile[MAXPATHLEN] = "";
 
 int
 main(int argc, char **argv)
 {
 
     int         dec_ind;	/* current indentation for declarations */
     int         di_stack[20];	/* a stack of structure indentation levels */
     int         flushed_nl;	/* used when buffering up comments to remember
 				 * that a newline was passed over */
     int         force_nl;	/* when true, code must be broken */
     int         hd_type = 0;	/* used to store type of stmt for if (...),
 				 * for (...), etc */
     int		i;		/* local loop counter */
     int         scase;		/* set to true when we see a case, so we will
 				 * know what to do with the following colon */
     int         sp_sw;		/* when true, we are in the expression of
 				 * if(...), while(...), etc. */
     int         squest;		/* when this is positive, we have seen a ?
 				 * without the matching : in a <c>?<s>:<s>
 				 * construct */
     const char *t_ptr;		/* used for copying tokens */
     int		tabs_to_var;	/* true if using tabs to indent to var name */
     int         type_code;	/* the type of token, returned by lexi */
 
     int         last_else = 0;	/* true iff last keyword was an else */
 
 
     /*-----------------------------------------------*\
     |		      INITIALIZATION		      |
     \*-----------------------------------------------*/
 
     found_err = 0;
 
     ps.p_stack[0] = stmt;	/* this is the parser's stack */
     ps.last_nl = true;		/* this is true if the last thing scanned was
 				 * a newline */
     ps.last_token = semicolon;
     combuf = (char *) malloc(bufsize);
     if (combuf == NULL)
 	err(1, NULL);
     labbuf = (char *) malloc(bufsize);
     if (labbuf == NULL)
 	err(1, NULL);
     codebuf = (char *) malloc(bufsize);
     if (codebuf == NULL)
 	err(1, NULL);
     tokenbuf = (char *) malloc(bufsize);
     if (tokenbuf == NULL)
 	err(1, NULL);
     l_com = combuf + bufsize - 5;
     l_lab = labbuf + bufsize - 5;
     l_code = codebuf + bufsize - 5;
     l_token = tokenbuf + bufsize - 5;
     combuf[0] = codebuf[0] = labbuf[0] = ' ';	/* set up code, label, and
 						 * comment buffers */
     combuf[1] = codebuf[1] = labbuf[1] = '\0';
     ps.else_if = 1;		/* Default else-if special processing to on */
     s_lab = e_lab = labbuf + 1;
     s_code = e_code = codebuf + 1;
     s_com = e_com = combuf + 1;
     s_token = e_token = tokenbuf + 1;
 
     in_buffer = (char *) malloc(10);
     if (in_buffer == NULL)
 	err(1, NULL);
     in_buffer_limit = in_buffer + 8;
     buf_ptr = buf_end = in_buffer;
     line_no = 1;
     had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
     sp_sw = force_nl = false;
     ps.in_or_st = false;
     ps.bl_line = true;
     dec_ind = 0;
     di_stack[ps.dec_nest = 0] = 0;
     ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
 
     scase = ps.pcase = false;
     squest = 0;
     sc_end = NULL;
     bp_save = NULL;
     be_save = NULL;
 
     output = NULL;
     tabs_to_var = 0;
 
     /*--------------------------------------------------*\
     |   		COMMAND LINE SCAN		 |
     \*--------------------------------------------------*/
 
 #ifdef undef
     max_col = 78;		/* -l78 */
     lineup_to_parens = 1;	/* -lp */
     ps.ljust_decl = 0;		/* -ndj */
     ps.com_ind = 33;		/* -c33 */
     star_comment_cont = 1;	/* -sc */
     ps.ind_size = 8;		/* -i8 */
     verbose = 0;
     ps.decl_indent = 16;	/* -di16 */
     ps.local_decl_indent = -1;	/* if this is not set to some nonnegative value
 				 * by an arg, we will set this equal to
 				 * ps.decl_ind */
     ps.indent_parameters = 1;	/* -ip */
     ps.decl_com_ind = 0;	/* if this is not set to some positive value
 				 * by an arg, we will set this equal to
 				 * ps.com_ind */
     btype_2 = 1;		/* -br */
     cuddle_else = 1;		/* -ce */
     ps.unindent_displace = 0;	/* -d0 */
     ps.case_indent = 0;		/* -cli0 */
     format_block_comments = 1;	/* -fcb */
     format_col1_comments = 1;	/* -fc1 */
     procnames_start_line = 1;	/* -psl */
     proc_calls_space = 0;	/* -npcs */
     comment_delimiter_on_blankline = 1;	/* -cdb */
     ps.leave_comma = 1;		/* -nbc */
 #endif
 
     for (i = 1; i < argc; ++i)
 	if (strcmp(argv[i], "-npro") == 0)
 	    break;
     set_defaults();
     if (i >= argc)
 	set_profile();
 
     for (i = 1; i < argc; ++i) {
 
 	/*
 	 * look thru args (if any) for changes to defaults
 	 */
 	if (argv[i][0] != '-') {/* no flag on parameter */
 	    if (input == NULL) {	/* we must have the input file */
 		in_name = argv[i];	/* remember name of input file */
 		input = fopen(in_name, "r");
 		if (input == NULL)	/* check for open error */
 			err(1, "%s", in_name);
 		continue;
 	    }
 	    else if (output == NULL) {	/* we have the output file */
 		out_name = argv[i];	/* remember name of output file */
 		if (strcmp(in_name, out_name) == 0) {	/* attempt to overwrite
 							 * the file */
 		    errx(1, "input and output files must be different");
 		}
 		output = fopen(out_name, "w");
 		if (output == NULL)	/* check for create error */
 			err(1, "%s", out_name);
 		continue;
 	    }
 	    errx(1, "unknown parameter: %s", argv[i]);
 	}
 	else
 	    set_option(argv[i]);
     }				/* end of for */
     if (input == NULL)
 	input = stdin;
     if (output == NULL) {
 	if (troff || input == stdin)
 	    output = stdout;
 	else {
 	    out_name = in_name;
 	    bakcopy();
 	}
     }
     if (ps.com_ind <= 1)
 	ps.com_ind = 2;		/* dont put normal comments before column 2 */
     if (troff) {
 	if (bodyf.font[0] == 0)
 	    parsefont(&bodyf, "R");
 	if (scomf.font[0] == 0)
 	    parsefont(&scomf, "I");
 	if (blkcomf.font[0] == 0)
 	    blkcomf = scomf, blkcomf.size += 2;
 	if (boxcomf.font[0] == 0)
 	    boxcomf = blkcomf;
 	if (stringf.font[0] == 0)
 	    parsefont(&stringf, "L");
 	if (keywordf.font[0] == 0)
 	    parsefont(&keywordf, "B");
 	writefdef(&bodyf, 'B');
 	writefdef(&scomf, 'C');
 	writefdef(&blkcomf, 'L');
 	writefdef(&boxcomf, 'X');
 	writefdef(&stringf, 'S');
 	writefdef(&keywordf, 'K');
     }
     if (block_comment_max_col <= 0)
 	block_comment_max_col = max_col;
     if (ps.local_decl_indent < 0)	/* if not specified by user, set this */
 	ps.local_decl_indent = ps.decl_indent;
     if (ps.decl_com_ind <= 0)	/* if not specified by user, set this */
 	ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind;
     if (continuation_indent == 0)
 	continuation_indent = ps.ind_size;
     fill_buffer();		/* get first batch of stuff into input buffer */
 
     parse(semicolon);
     {
 	char *p = buf_ptr;
 	int col = 1;
 
 	while (1) {
 	    if (*p == ' ')
 		col++;
 	    else if (*p == '\t')
 		col = ((col - 1) & ~7) + 9;
 	    else
 		break;
 	    p++;
 	}
 	if (col > ps.ind_size)
 	    ps.ind_level = ps.i_l_follow = col / ps.ind_size;
     }
     if (troff) {
 	const char *p = in_name,
 	           *beg = in_name;
 
 	while (*p)
 	    if (*p++ == '/')
 		beg = p;
 	fprintf(output, ".Fn \"%s\"\n", beg);
     }
     /*
      * START OF MAIN LOOP
      */
 
     while (1) {			/* this is the main loop.  it will go until we
 				 * reach eof */
 	int         is_procname;
 
 	type_code = lexi();	/* lexi reads one token.  The actual
 				 * characters read are stored in "token". lexi
 				 * returns a code indicating the type of token */
 	is_procname = ps.procname[0];
 
 	/*
 	 * The following code moves everything following an if (), while (),
 	 * else, etc. up to the start of the following stmt to a buffer. This
 	 * allows proper handling of both kinds of brace placement.
 	 */
 
 	flushed_nl = false;
 	while (ps.search_brace) {	/* if we scanned an if(), while(),
 					 * etc., we might need to copy stuff
 					 * into a buffer we must loop, copying
 					 * stuff into save_com, until we find
 					 * the start of the stmt which follows
 					 * the if, or whatever */
 	    switch (type_code) {
 	    case newline:
 		++line_no;
 		if (sc_end != NULL)
 		    goto sw_buffer;	/* dump comment, if any */
 		flushed_nl = true;
 	    case form_feed:
 		break;		/* form feeds and newlines found here will be
 				 * ignored */
 
 	    case lbrace:	/* this is a brace that starts the compound
 				 * stmt */
 		if (sc_end == NULL) {	/* ignore buffering if a comment wasn't
 					 * stored up */
 		    ps.search_brace = false;
 		    goto check_type;
 		}
 		if (btype_2) {
 		    save_com[0] = '{';	/* we either want to put the brace
 					 * right after the if */
 		    goto sw_buffer;	/* go to common code to get out of
 					 * this loop */
 		}
 	    case comment:	/* we have a comment, so we must copy it into
 				 * the buffer */
 		if (!flushed_nl || sc_end != NULL) {
-		    if (sc_end == NULL) {	/* if this is the first comment, we
-					 * must set up the buffer */
+		    if (sc_end == NULL) { /* if this is the first comment, we
+					   * must set up the buffer */
 			save_com[0] = save_com[1] = ' ';
 			sc_end = &(save_com[2]);
 		    }
 		    else {
 			*sc_end++ = '\n';	/* add newline between
 						 * comments */
 			*sc_end++ = ' ';
 			--line_no;
 		    }
 		    *sc_end++ = '/';	/* copy in start of comment */
 		    *sc_end++ = '*';
 
 		    for (;;) {	/* loop until we get to the end of the comment */
 			*sc_end = *buf_ptr++;
 			if (buf_ptr >= buf_end)
 			    fill_buffer();
 
 			if (*sc_end++ == '*' && *buf_ptr == '/')
 			    break;	/* we are at end of comment */
 
 			if (sc_end >= &(save_com[sc_size])) {	/* check for temp buffer
 								 * overflow */
 			    diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever");
 			    fflush(output);
 			    exit(1);
 			}
 		    }
 		    *sc_end++ = '/';	/* add ending slash */
 		    if (++buf_ptr >= buf_end)	/* get past / in buffer */
 			fill_buffer();
 		    break;
 		}
 	    default:		/* it is the start of a normal statement */
 		if (flushed_nl)	/* if we flushed a newline, make sure it is
 				 * put back */
 		    force_nl = true;
 		if ((type_code == sp_paren && *token == 'i'
 			&& last_else && ps.else_if)
 			|| (type_code == sp_nparen && *token == 'e'
 			&& e_code != s_code && e_code[-1] == '}'))
 		    force_nl = false;
 
 		if (sc_end == NULL) {	/* ignore buffering if comment wasn't
 					 * saved up */
 		    ps.search_brace = false;
 		    goto check_type;
 		}
 		if (force_nl) {	/* if we should insert a nl here, put it into
 				 * the buffer */
 		    force_nl = false;
 		    --line_no;	/* this will be re-increased when the nl is
 				 * read from the buffer */
 		    *sc_end++ = '\n';
 		    *sc_end++ = ' ';
 		    if (verbose && !flushed_nl)	/* print error msg if the line
 						 * was not already broken */
 			diag2(0, "Line broken");
 		    flushed_nl = false;
 		}
 		for (t_ptr = token; *t_ptr; ++t_ptr)
 		    *sc_end++ = *t_ptr;	/* copy token into temp buffer */
 		ps.procname[0] = 0;
 
 	sw_buffer:
 		ps.search_brace = false;	/* stop looking for start of
 						 * stmt */
 		bp_save = buf_ptr;	/* save current input buffer */
 		be_save = buf_end;
 		buf_ptr = save_com;	/* fix so that subsequent calls to
 					 * lexi will take tokens out of
 					 * save_com */
 		*sc_end++ = ' ';/* add trailing blank, just in case */
 		buf_end = sc_end;
 		sc_end = NULL;
 		break;
 	    }			/* end of switch */
 	    if (type_code != 0)	/* we must make this check, just in case there
 				 * was an unexpected EOF */
 		type_code = lexi();	/* read another token */
 	    /* if (ps.search_brace) ps.procname[0] = 0; */
 	    if ((is_procname = ps.procname[0]) && flushed_nl
 		    && !procnames_start_line && ps.in_decl
 		    && type_code == ident)
 		flushed_nl = 0;
 	}			/* end of while (search_brace) */
 	last_else = 0;
 check_type:
 	if (type_code == 0) {	/* we got eof */
 	    if (s_lab != e_lab || s_code != e_code
 		    || s_com != e_com)	/* must dump end of line */
 		dump_line();
 	    if (ps.tos > 1)	/* check for balanced braces */
 		diag2(1, "Stuff missing from end of file");
 
 	    if (verbose) {
 		printf("There were %d output lines and %d comments\n",
 		       ps.out_lines, ps.out_coms);
 		printf("(Lines with comments)/(Lines with code): %6.3f\n",
 		       (1.0 * ps.com_lines) / code_lines);
 	    }
 	    fflush(output);
 	    exit(found_err);
 	}
 	if (
 		(type_code != comment) &&
 		(type_code != newline) &&
 		(type_code != preesc) &&
 		(type_code != form_feed)) {
 	    if (force_nl &&
 		    (type_code != semicolon) &&
 		    (type_code != lbrace || !btype_2)) {
 		/* we should force a broken line here */
 		if (verbose && !flushed_nl)
 		    diag2(0, "Line broken");
 		flushed_nl = false;
 		dump_line();
 		ps.want_blank = false;	/* dont insert blank at line start */
 		force_nl = false;
 	    }
 	    ps.in_stmt = true;	/* turn on flag which causes an extra level of
 				 * indentation. this is turned off by a ; or
 				 * '}' */
 	    if (s_com != e_com) {	/* the turkey has embedded a comment
 					 * in a line. fix it */
 		*e_code++ = ' ';
 		for (t_ptr = s_com; *t_ptr; ++t_ptr) {
 		    CHECK_SIZE_CODE;
 		    *e_code++ = *t_ptr;
 		}
 		*e_code++ = ' ';
 		*e_code = '\0';	/* null terminate code sect */
 		ps.want_blank = false;
 		e_com = s_com;
 	    }
 	}
 	else if (type_code != comment)	/* preserve force_nl thru a comment */
 	    force_nl = false;	/* cancel forced newline after newline, form
 				 * feed, etc */
 
 
 
 	/*-----------------------------------------------------*\
 	|	   do switch on type of token scanned		|
 	\*-----------------------------------------------------*/
 	CHECK_SIZE_CODE;
 	switch (type_code) {	/* now, decide what to do with the token */
 
 	case form_feed:	/* found a form feed in line */
 	    ps.use_ff = true;	/* a form feed is treated much like a newline */
 	    dump_line();
 	    ps.want_blank = false;
 	    break;
 
 	case newline:
 	    if (ps.last_token != comma || ps.p_l_follow > 0
 		    || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
 		dump_line();
 		ps.want_blank = false;
 	    }
 	    ++line_no;		/* keep track of input line number */
 	    break;
 
 	case lparen:		/* got a '(' or '[' */
 	    ++ps.p_l_follow;	/* count parens to make Healy happy */
 	    if (ps.want_blank && *token != '[' &&
 		    (ps.last_token != ident || proc_calls_space
 	      || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon))))
 		*e_code++ = ' ';
-	    if (ps.in_decl && !ps.block_init)
-		if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) {
-		    ps.dumped_decl_indent = 1;
+	    ps.want_blank = false;
+	    if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent &&
+		!is_procname) {
+		/* function pointer declarations */
+		if (troff) {
 		    sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
 		    e_code += strlen(e_code);
 		}
 		else {
-		    while ((e_code - s_code) < dec_ind) {
-			CHECK_SIZE_CODE;
-			*e_code++ = ' ';
-		    }
-		    *e_code++ = token[0];
+		    indent_declaration(dec_ind, tabs_to_var);
 		}
-	    else
+		ps.dumped_decl_indent = true;
+	    }
+	    if (!troff)
 		*e_code++ = token[0];
 	    ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code;
 	    if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent
 		    && ps.paren_indents[0] < 2 * ps.ind_size)
 		ps.paren_indents[0] = 2 * ps.ind_size;
-	    ps.want_blank = false;
 	    if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
 		/*
 		 * this is a kluge to make sure that declarations will be
 		 * aligned right if proc decl has an explicit type on it, i.e.
 		 * "int a(x) {..."
 		 */
 		parse(semicolon);	/* I said this was a kluge... */
 		ps.in_or_st = false;	/* turn off flag for structure decl or
 					 * initialization */
 	    }
 	    if (ps.sizeof_keyword)
 		ps.sizeof_mask |= 1 << ps.p_l_follow;
 	    break;
 
 	case rparen:		/* got a ')' or ']' */
 	    rparen_count--;
 	    if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) {
 		ps.last_u_d = true;
 		ps.cast_mask &= (1 << ps.p_l_follow) - 1;
 		ps.want_blank = false;
 	    } else
 		ps.want_blank = true;
 	    ps.sizeof_mask &= (1 << ps.p_l_follow) - 1;
 	    if (--ps.p_l_follow < 0) {
 		ps.p_l_follow = 0;
 		diag3(0, "Extra %c", *token);
 	    }
 	    if (e_code == s_code)	/* if the paren starts the line */
 		ps.paren_level = ps.p_l_follow;	/* then indent it */
 
 	    *e_code++ = token[0];
 
 	    if (sp_sw && (ps.p_l_follow == 0)) {	/* check for end of if
 							 * (...), or some such */
 		sp_sw = false;
 		force_nl = true;/* must force newline after if */
 		ps.last_u_d = true;	/* inform lexi that a following
 					 * operator is unary */
 		ps.in_stmt = false;	/* dont use stmt continuation
 					 * indentation */
 
 		parse(hd_type);	/* let parser worry about if, or whatever */
 	    }
 	    ps.search_brace = btype_2;	/* this should insure that constructs
 					 * such as main(){...} and int[]{...}
 					 * have their braces put in the right
 					 * place */
 	    break;
 
 	case unary_op:		/* this could be any unary operation */
-	    if (ps.want_blank)
-		*e_code++ = ' ';
-
-	    if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) {
-		sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
-		ps.dumped_decl_indent = 1;
-		e_code += strlen(e_code);
+	    if (!ps.dumped_decl_indent && ps.in_decl && !is_procname &&
+		!ps.block_init) {
+		/* pointer declarations */
+		if (troff) {
+		    if (ps.want_blank)
+			*e_code++ = ' ';
+		    sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7,
+			token);
+		    e_code += strlen(e_code);
+		}
+		else {
+			/* if this is a unary op in a declaration, we should
+			 * indent this token */
+			for (i = 0; token[i]; ++i)
+			    /* find length of token */;
+			indent_declaration(dec_ind - i, tabs_to_var);
+		}
+		ps.dumped_decl_indent = true;
 	    }
-	    else {
+	    else if (ps.want_blank)
+		*e_code++ = ' ';
+	    {
 		const char *res = token;
 
-		if (ps.in_decl && !ps.block_init) {	/* if this is a unary op
-							 * in a declaration, we
-							 * should indent this
-							 * token */
-		    for (i = 0; token[i]; ++i);	/* find length of token */
-		    while ((e_code - s_code) < (dec_ind - i)) {
-			CHECK_SIZE_CODE;
-			*e_code++ = ' ';	/* pad it */
-		    }
-		}
 		if (troff && token[0] == '-' && token[1] == '>')
 		    res = "\\(->";
 		for (t_ptr = res; *t_ptr; ++t_ptr) {
 		    CHECK_SIZE_CODE;
 		    *e_code++ = *t_ptr;
 		}
 	    }
 	    ps.want_blank = false;
 	    break;
 
 	case binary_op:	/* any binary operation */
 	    if (ps.want_blank)
 		*e_code++ = ' ';
 	    {
 		const char *res = token;
 
 		if (troff)
 		    switch (token[0]) {
 		    case '<':
 			if (token[1] == '=')
 			    res = "\\(<=";
 			break;
 		    case '>':
 			if (token[1] == '=')
 			    res = "\\(>=";
 			break;
 		    case '!':
 			if (token[1] == '=')
 			    res = "\\(!=";
 			break;
 		    case '|':
 			if (token[1] == '|')
 			    res = "\\(br\\(br";
 			else if (token[1] == 0)
 			    res = "\\(br";
 			break;
 		    }
 		for (t_ptr = res; *t_ptr; ++t_ptr) {
 		    CHECK_SIZE_CODE;
 		    *e_code++ = *t_ptr;	/* move the operator */
 		}
 	    }
 	    ps.want_blank = true;
 	    break;
 
 	case postop:		/* got a trailing ++ or -- */
 	    *e_code++ = token[0];
 	    *e_code++ = token[1];
 	    ps.want_blank = true;
 	    break;
 
 	case question:		/* got a ? */
 	    squest++;		/* this will be used when a later colon
 				 * appears so we can distinguish the
 				 * <c>?<n>:<n> construct */
 	    if (ps.want_blank)
 		*e_code++ = ' ';
 	    *e_code++ = '?';
 	    ps.want_blank = true;
 	    break;
 
 	case casestmt:		/* got word 'case' or 'default' */
 	    scase = true;	/* so we can process the later colon properly */
 	    goto copy_id;
 
 	case colon:		/* got a ':' */
 	    if (squest > 0) {	/* it is part of the <c>?<n>: <n> construct */
 		--squest;
 		if (ps.want_blank)
 		    *e_code++ = ' ';
 		*e_code++ = ':';
 		ps.want_blank = true;
 		break;
 	    }
 	    if (ps.in_or_st) {
 		*e_code++ = ':';
 		ps.want_blank = false;
 		break;
 	    }
 	    ps.in_stmt = false;	/* seeing a label does not imply we are in a
 				 * stmt */
 	    for (t_ptr = s_code; *t_ptr; ++t_ptr)
 		*e_lab++ = *t_ptr;	/* turn everything so far into a label */
 	    e_code = s_code;
 	    *e_lab++ = ':';
 	    *e_lab++ = ' ';
 	    *e_lab = '\0';
 
 	    force_nl = ps.pcase = scase;	/* ps.pcase will be used by
 						 * dump_line to decide how to
 						 * indent the label. force_nl
 						 * will force a case n: to be
 						 * on a line by itself */
 	    scase = false;
 	    ps.want_blank = false;
 	    break;
 
 	case semicolon:	/* got a ';' */
-	    if (ps.dec_nest == 0) {
-		/* we are not in an initialization or structure declaration */
-		ps.in_or_st = false;
-	    }
+	    if (ps.dec_nest == 0)
+		ps.in_or_st = false;/* we are not in an initialization or
+				     * structure declaration */
 	    scase = false;	/* these will only need resetting in an error */
 	    squest = 0;
 	    if (ps.last_token == rparen && rparen_count == 0)
 		ps.in_parameter_declaration = 0;
 	    ps.cast_mask = 0;
 	    ps.sizeof_mask = 0;
 	    ps.block_init = 0;
 	    ps.block_init_level = 0;
 	    ps.just_saw_decl--;
 
-	    if (ps.in_decl && s_code == e_code && !ps.block_init)
-		while ((e_code - s_code) < (dec_ind - 1)) {
-		    CHECK_SIZE_CODE;
-		    *e_code++ = ' ';
-		}
+	    if (ps.in_decl && s_code == e_code && !ps.block_init &&
+		!ps.dumped_decl_indent) {
+		/* indent stray semicolons in declarations */
+		indent_declaration(dec_ind - 1, tabs_to_var);
+		ps.dumped_decl_indent = true;
+	    }
 
 	    ps.in_decl = (ps.dec_nest > 0);	/* if we were in a first level
 						 * structure declaration, we
 						 * arent any more */
 
 	    if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
 
 		/*
 		 * This should be true iff there were unbalanced parens in the
 		 * stmt.  It is a bit complicated, because the semicolon might
 		 * be in a for stmt
 		 */
 		diag2(1, "Unbalanced parens");
 		ps.p_l_follow = 0;
 		if (sp_sw) {	/* this is a check for an if, while, etc. with
 				 * unbalanced parens */
 		    sp_sw = false;
 		    parse(hd_type);	/* dont lose the if, or whatever */
 		}
 	    }
 	    *e_code++ = ';';
 	    ps.want_blank = true;
 	    ps.in_stmt = (ps.p_l_follow > 0);	/* we are no longer in the
 						 * middle of a stmt */
 
 	    if (!sp_sw) {	/* if not if for (;;) */
 		parse(semicolon);	/* let parser know about end of stmt */
 		force_nl = true;/* force newline after an end of stmt */
 	    }
 	    break;
 
 	case lbrace:		/* got a '{' */
 	    ps.in_stmt = false;	/* dont indent the {} */
 	    if (!ps.block_init)
 		force_nl = true;/* force other stuff on same line as '{' onto
 				 * new line */
 	    else if (ps.block_init_level <= 0)
 		ps.block_init_level = 1;
 	    else
 		ps.block_init_level++;
 
 	    if (s_code != e_code && !ps.block_init) {
 		if (!btype_2) {
 		    dump_line();
 		    ps.want_blank = false;
 		}
 		else if (ps.in_parameter_declaration && !ps.in_or_st) {
 		    ps.i_l_follow = 0;
 		    if (function_brace_split) {	/* dump the line prior to the
 						 * brace ... */
 			dump_line();
 			ps.want_blank = false;
 		    } else	/* add a space between the decl and brace */
 			ps.want_blank = true;
 		}
 	    }
 	    if (ps.in_parameter_declaration)
 		prefix_blankline_requested = 0;
 
 	    if (ps.p_l_follow > 0) {	/* check for preceding unbalanced
 					 * parens */
 		diag2(1, "Unbalanced parens");
 		ps.p_l_follow = 0;
 		if (sp_sw) {	/* check for unclosed if, for, etc. */
 		    sp_sw = false;
 		    parse(hd_type);
 		    ps.ind_level = ps.i_l_follow;
 		}
 	    }
 	    if (s_code == e_code)
 		ps.ind_stmt = false;	/* dont put extra indentation on line
 					 * with '{' */
 	    if (ps.in_decl && ps.in_or_st) {	/* this is either a structure
 						 * declaration or an init */
 		di_stack[ps.dec_nest++] = dec_ind;
 		/* ?		dec_ind = 0; */
 	    }
 	    else {
 		ps.decl_on_line = false;	/* we can't be in the middle of
 						 * a declaration, so don't do
 						 * special indentation of
 						 * comments */
 		if (blanklines_after_declarations_at_proctop
 			&& ps.in_parameter_declaration)
 		    postfix_blankline_requested = 1;
 		ps.in_parameter_declaration = 0;
 	    }
 	    dec_ind = 0;
 	    parse(lbrace);	/* let parser know about this */
 	    if (ps.want_blank)	/* put a blank before '{' if '{' is not at
 				 * start of line */
 		*e_code++ = ' ';
 	    ps.want_blank = false;
 	    *e_code++ = '{';
 	    ps.just_saw_decl = 0;
 	    break;
 
 	case rbrace:		/* got a '}' */
 	    if (ps.p_stack[ps.tos] == decl && !ps.block_init)	/* semicolons can be
 								 * omitted in
 								 * declarations */
 		parse(semicolon);
 	    if (ps.p_l_follow) {/* check for unclosed if, for, else. */
 		diag2(1, "Unbalanced parens");
 		ps.p_l_follow = 0;
 		sp_sw = false;
 	    }
 	    ps.just_saw_decl = 0;
 	    ps.block_init_level--;
 	    if (s_code != e_code && !ps.block_init) {	/* '}' must be first on
 							 * line */
 		if (verbose)
 		    diag2(0, "Line broken");
 		dump_line();
 	    }
 	    *e_code++ = '}';
 	    ps.want_blank = true;
 	    ps.in_stmt = ps.ind_stmt = false;
 	    if (ps.dec_nest > 0) {	/* we are in multi-level structure
 					 * declaration */
 		dec_ind = di_stack[--ps.dec_nest];
 		if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
 		    ps.just_saw_decl = 2;
 		ps.in_decl = true;
 	    }
 	    prefix_blankline_requested = 0;
 	    parse(rbrace);	/* let parser know about this */
 	    ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead
 		&& ps.il[ps.tos] >= ps.ind_level;
 	    if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0)
 		postfix_blankline_requested = 1;
 	    break;
 
 	case swstmt:		/* got keyword "switch" */
 	    sp_sw = true;
 	    hd_type = swstmt;	/* keep this for when we have seen the
 				 * expression */
 	    goto copy_id;	/* go move the token into buffer */
 
 	case sp_paren:		/* token is if, while, for */
 	    sp_sw = true;	/* the interesting stuff is done after the
 				 * expression is scanned */
 	    hd_type = (*token == 'i' ? ifstmt :
 		       (*token == 'w' ? whilestmt : forstmt));
 
 	    /*
 	     * remember the type of header for later use by parser
 	     */
 	    goto copy_id;	/* copy the token into line */
 
 	case sp_nparen:	/* got else, do */
 	    ps.in_stmt = false;
 	    if (*token == 'e') {
 		if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) {
 		    if (verbose)
 			diag2(0, "Line broken");
 		    dump_line();/* make sure this starts a line */
 		    ps.want_blank = false;
 		}
 		force_nl = true;/* also, following stuff must go onto new line */
 		last_else = 1;
 		parse(elselit);
 	    }
 	    else {
 		if (e_code != s_code) {	/* make sure this starts a line */
 		    if (verbose)
 			diag2(0, "Line broken");
 		    dump_line();
 		    ps.want_blank = false;
 		}
 		force_nl = true;/* also, following stuff must go onto new line */
 		last_else = 0;
 		parse(dolit);
 	    }
 	    goto copy_id;	/* move the token into line */
 
 	case decl:		/* we have a declaration type (int, register,
 				 * etc.) */
 	    parse(decl);	/* let parser worry about indentation */
 	    if (ps.last_token == rparen && ps.tos <= 1) {
 		ps.in_parameter_declaration = 1;
 		if (s_code != e_code) {
 		    dump_line();
 		    ps.want_blank = 0;
 		}
 	    }
 	    if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) {
 		ps.ind_level = ps.i_l_follow = 1;
 		ps.ind_stmt = 0;
 	    }
 	    ps.in_or_st = true;	/* this might be a structure or initialization
 				 * declaration */
 	    ps.in_decl = ps.decl_on_line = true;
 	    if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
 		ps.just_saw_decl = 2;
 	    prefix_blankline_requested = 0;
 	    for (i = 0; token[i++];);	/* get length of token */
 
 	    if (ps.ind_level == 0 || ps.dec_nest > 0) {
 		/* global variable or struct member in local variable */
 		dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i;
 		tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0);
 	    } else {
 		/* local variable */
 		dec_ind = ps.local_decl_indent > 0 ? ps.local_decl_indent : i;
 		tabs_to_var = (use_tabs ? ps.local_decl_indent > 0 : 0);
 	    }
 	    goto copy_id;
 
 	case ident:		/* got an identifier or constant */
 	    if (ps.in_decl) {	/* if we are in a declaration, we must indent
 				 * identifier */
 		if (is_procname == 0 || !procnames_start_line) {
-		    if (!ps.block_init) {
-			if (troff && !ps.dumped_decl_indent) {
+		    if (!ps.block_init && !ps.dumped_decl_indent) {
+			if (troff) {
 			    if (ps.want_blank)
 				*e_code++ = ' ';
-			    ps.want_blank = false;
 			    sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7);
-			    ps.dumped_decl_indent = 1;
 			    e_code += strlen(e_code);
-			} else {
-			    int cur_dec_ind;
-			    int pos, startpos;
-
-			    /*
-			     * in order to get the tab math right for
-			     * indentations that are not multiples of 8 we
-			     * need to modify both startpos and dec_ind
-			     * (cur_dec_ind) here by eight minus the
-			     * remainder of the current starting column
-			     * divided by eight. This seems to be a
-			     * properly working fix
-			     */
-			    startpos = e_code - s_code;
-			    cur_dec_ind = dec_ind;
-			    pos = startpos;
-			    if ((ps.ind_level * ps.ind_size) % 8 != 0) {
-				pos += (ps.ind_level * ps.ind_size) % 8;
-				cur_dec_ind += (ps.ind_level * ps.ind_size) % 8;
-			    }
-
-			    if (tabs_to_var) {
-				while ((pos & ~7) + 8 <= cur_dec_ind) {
-				    CHECK_SIZE_CODE;
-				    *e_code++ = '\t';
-				    pos = (pos & ~7) + 8;
-				}
-			    }
-			    while (pos < cur_dec_ind) {
-				CHECK_SIZE_CODE;
-				*e_code++ = ' ';
-				pos++;
-			    }
-			    if (ps.want_blank && e_code - s_code == startpos)
-				*e_code++ = ' ';
-			    ps.want_blank = false;
-			}
+			} else
+			    indent_declaration(dec_ind, tabs_to_var);
+			ps.dumped_decl_indent = true;
+			ps.want_blank = false;
 		    }
 		} else {
 		    if (ps.want_blank)
 			*e_code++ = ' ';
 		    ps.want_blank = false;
 		    if (dec_ind && s_code != e_code) {
 			*e_code = '\0';
 			dump_line();
 		    }
 		    dec_ind = 0;
 		}
 	    }
 	    else if (sp_sw && ps.p_l_follow == 0) {
 		sp_sw = false;
 		force_nl = true;
 		ps.last_u_d = true;
 		ps.in_stmt = false;
 		parse(hd_type);
 	    }
     copy_id:
 	    if (ps.want_blank)
 		*e_code++ = ' ';
 	    if (troff && ps.its_a_keyword) {
 		e_code = chfont(&bodyf, &keywordf, e_code);
 		for (t_ptr = token; *t_ptr; ++t_ptr) {
 		    CHECK_SIZE_CODE;
 		    *e_code++ = keywordf.allcaps && islower(*t_ptr)
 			? toupper(*t_ptr) : *t_ptr;
 		}
 		e_code = chfont(&keywordf, &bodyf, e_code);
 	    }
 	    else
 		for (t_ptr = token; *t_ptr; ++t_ptr) {
 		    CHECK_SIZE_CODE;
 		    *e_code++ = *t_ptr;
 		}
 	    ps.want_blank = true;
 	    break;
 
 	case period:		/* treat a period kind of like a binary
 				 * operation */
 	    *e_code++ = '.';	/* move the period into line */
 	    ps.want_blank = false;	/* dont put a blank after a period */
 	    break;
 
 	case comma:
 	    ps.want_blank = (s_code != e_code);	/* only put blank after comma
 						 * if comma does not start the
 						 * line */
-	    if (ps.in_decl && is_procname == 0 && !ps.block_init)
-		while ((e_code - s_code) < (dec_ind - 1)) {
-		    CHECK_SIZE_CODE;
-		    *e_code++ = ' ';
-		}
-
+	    if (ps.in_decl && is_procname == 0 && !ps.block_init &&
+		!ps.dumped_decl_indent) {
+		/* indent leading commas and not the actual identifiers */
+		indent_declaration(dec_ind - 1, tabs_to_var);
+		ps.dumped_decl_indent = true;
+	    }
 	    *e_code++ = ',';
 	    if (ps.p_l_follow == 0) {
 		if (ps.block_init_level <= 0)
 		    ps.block_init = 0;
 		if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8))
 		    force_nl = true;
 	    }
 	    break;
 
 	case preesc:		/* got the character '#' */
 	    if ((s_com != e_com) ||
 		    (s_lab != e_lab) ||
 		    (s_code != e_code))
 		dump_line();
 	    *e_lab++ = '#';	/* move whole line to 'label' buffer */
 	    {
 		int         in_comment = 0;
 		int         com_start = 0;
 		char        quote = 0;
 		int         com_end = 0;
 
 		while (*buf_ptr == ' ' || *buf_ptr == '\t') {
 		    buf_ptr++;
 		    if (buf_ptr >= buf_end)
 			fill_buffer();
 		}
 		while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
 		    CHECK_SIZE_LAB;
 		    *e_lab = *buf_ptr++;
 		    if (buf_ptr >= buf_end)
 			fill_buffer();
 		    switch (*e_lab++) {
 		    case BACKSLASH:
 			if (troff)
 			    *e_lab++ = BACKSLASH;
 			if (!in_comment) {
 			    *e_lab++ = *buf_ptr++;
 			    if (buf_ptr >= buf_end)
 				fill_buffer();
 			}
 			break;
 		    case '/':
 			if (*buf_ptr == '*' && !in_comment && !quote) {
 			    in_comment = 1;
 			    *e_lab++ = *buf_ptr++;
 			    com_start = e_lab - s_lab - 2;
 			}
 			break;
 		    case '"':
 			if (quote == '"')
 			    quote = 0;
 			break;
 		    case '\'':
 			if (quote == '\'')
 			    quote = 0;
 			break;
 		    case '*':
 			if (*buf_ptr == '/' && in_comment) {
 			    in_comment = 0;
 			    *e_lab++ = *buf_ptr++;
 			    com_end = e_lab - s_lab;
 			}
 			break;
 		    }
 		}
 
 		while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
 		    e_lab--;
-		/* comment on preprocessor line */
 		if (e_lab - s_lab == com_end && bp_save == NULL) {
+		    /* comment on preprocessor line */
 		    if (sc_end == NULL)	/* if this is the first comment, we
 					 * must set up the buffer */
 			sc_end = &(save_com[0]);
 		    else {
 			*sc_end++ = '\n';	/* add newline between
 						 * comments */
 			*sc_end++ = ' ';
 			--line_no;
 		    }
 		    bcopy(s_lab + com_start, sc_end, com_end - com_start);
 		    sc_end += com_end - com_start;
 		    if (sc_end >= &save_com[sc_size])
 			abort();
 		    e_lab = s_lab + com_start;
 		    while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
 			e_lab--;
 		    bp_save = buf_ptr;	/* save current input buffer */
 		    be_save = buf_end;
 		    buf_ptr = save_com;	/* fix so that subsequent calls to
 					 * lexi will take tokens out of
 					 * save_com */
 		    *sc_end++ = ' ';	/* add trailing blank, just in case */
 		    buf_end = sc_end;
 		    sc_end = NULL;
 		}
 		*e_lab = '\0';	/* null terminate line */
 		ps.pcase = false;
 	    }
 
-	    if (strncmp(s_lab, "#if", 3) == 0) {
-		if (blanklines_around_conditional_compilation) {
-		    int c;
-		    prefix_blankline_requested++;
-		    while ((c = getc(input)) == '\n');
-		    ungetc(c, input);
-		}
-		if ((size_t)ifdef_level < sizeof(state_stack)/sizeof(state_stack[0])) {
+	    if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */
+		if ((size_t)ifdef_level < nitems(state_stack)) {
 		    match_state[ifdef_level].tos = -1;
 		    state_stack[ifdef_level++] = ps;
 		}
 		else
 		    diag2(1, "#if stack overflow");
 	    }
-	    else if (strncmp(s_lab, "#else", 5) == 0)
+	    else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */
 		if (ifdef_level <= 0)
-		    diag2(1, "Unmatched #else");
+		    diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else");
 		else {
 		    match_state[ifdef_level - 1] = ps;
 		    ps = state_stack[ifdef_level - 1];
 		}
+	    }
 	    else if (strncmp(s_lab, "#endif", 6) == 0) {
 		if (ifdef_level <= 0)
 		    diag2(1, "Unmatched #endif");
-		else {
+		else
 		    ifdef_level--;
-
-#ifdef undef
-		    /*
-		     * This match needs to be more intelligent before the
-		     * message is useful
-		     */
-		    if (match_state[ifdef_level].tos >= 0
-			  && bcmp(&ps, &match_state[ifdef_level], sizeof ps))
-			diag2(0, "Syntactically inconsistent #ifdef alternatives");
-#endif
+	    } else {
+		struct directives {
+		    int size;
+		    const char *string;
 		}
-		if (blanklines_around_conditional_compilation) {
-		    postfix_blankline_requested++;
-		    n_real_blanklines = 0;
+		recognized[] = {
+		    {7, "include"},
+		    {6, "define"},
+		    {5, "undef"},
+		    {4, "line"},
+		    {5, "error"},
+		    {6, "pragma"}
+		};
+		int d = nitems(recognized);
+		while (--d >= 0)
+		    if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0)
+			break;
+		if (d < 0) {
+		    diag2(1, "Unrecognized cpp directive");
+		    break;
 		}
 	    }
+	    if (blanklines_around_conditional_compilation) {
+		postfix_blankline_requested++;
+		n_real_blanklines = 0;
+	    }
+	    else {
+		postfix_blankline_requested = 0;
+		prefix_blankline_requested = 0;
+	    }
 	    break;		/* subsequent processing of the newline
 				 * character will cause the line to be printed */
 
 	case comment:		/* we have gotten a / followed by * this is a biggie */
 	    if (flushed_nl) {	/* we should force a broken line here */
 		flushed_nl = false;
 		dump_line();
 		ps.want_blank = false;	/* dont insert blank at line start */
 		force_nl = false;
 	    }
 	    pr_comment();
 	    break;
 	}			/* end of big switch stmt */
 
 	*e_code = '\0';		/* make sure code section is null terminated */
 	if (type_code != comment && type_code != newline && type_code != preesc)
 	    ps.last_token = type_code;
     }				/* end of main while (1) loop */
 }
 
 /*
  * copy input file to backup file if in_name is /blah/blah/blah/file, then
  * backup file will be ".Bfile" then make the backup file the input and
  * original input file the output
  */
 static void
 bakcopy(void)
 {
     int         n,
                 bakchn;
     char        buff[8 * 1024];
     const char *p;
 
     /* construct file name .Bfile */
     for (p = in_name; *p; p++);	/* skip to end of string */
     while (p > in_name && *p != '/')	/* find last '/' */
 	p--;
     if (*p == '/')
 	p++;
     sprintf(bakfile, "%s.BAK", p);
 
     /* copy in_name to backup file */
     bakchn = creat(bakfile, 0600);
     if (bakchn < 0)
 	err(1, "%s", bakfile);
     while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
 	if (write(bakchn, buff, n) != n)
 	    err(1, "%s", bakfile);
     if (n < 0)
 	err(1, "%s", in_name);
     close(bakchn);
     fclose(input);
 
     /* re-open backup file as the input file */
     input = fopen(bakfile, "r");
     if (input == NULL)
 	err(1, "%s", bakfile);
     /* now the original input file will be the output */
     output = fopen(in_name, "w");
     if (output == NULL) {
 	unlink(bakfile);
 	err(1, "%s", in_name);
+    }
+}
+
+static void
+indent_declaration(int cur_dec_ind, int tabs_to_var)
+{
+    int pos = e_code - s_code;
+    char *startpos = e_code;
+
+    /*
+     * get the tab math right for indentations that are not multiples of 8
+     */
+    if ((ps.ind_level * ps.ind_size) % 8 != 0) {
+	pos += (ps.ind_level * ps.ind_size) % 8;
+	cur_dec_ind += (ps.ind_level * ps.ind_size) % 8;
+    }
+    if (tabs_to_var)
+	while ((pos & ~7) + 8 <= cur_dec_ind) {
+	    CHECK_SIZE_CODE;
+	    *e_code++ = '\t';
+	    pos = (pos & ~7) + 8;
+	}
+    while (pos < cur_dec_ind) {
+	CHECK_SIZE_CODE;
+	*e_code++ = ' ';
+	pos++;
+    }
+    if (e_code == startpos && ps.want_blank) {
+	*e_code++ = ' ';
+	ps.want_blank = false;
     }
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/indent/indent.h
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/indent/indent.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/indent/indent.h	(revision 303642)
@@ -1,47 +1,48 @@
-/*
+/*-
  * Copyright (c) 2001 Jens Schweikhardt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 __FBSDID("$FreeBSD$");
 #endif
 
 void	addkey(char *, int);
 int	compute_code_target(void);
 int	compute_label_target(void);
 int	count_spaces(int, char *);
+int	count_spaces_until(int, char *, char *);
 int	lexi(void);
 void	diag2(int, const char *);
 void	diag3(int, const char *, int);
 void	diag4(int, const char *, int, int);
 void	dump_line(void);
 void	fill_buffer(void);
 void	parse(int);
 void	parsefont(struct fstate *, const char *);
 void	pr_comment(void);
 void	set_defaults(void);
 void	set_option(char *);
 void	set_profile(void);
 void	writefdef(struct fstate *f, int);
Index: user/alc/PQ_LAUNDRY/usr.bin/indent/indent_codes.h
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/indent/indent_codes.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/indent/indent_codes.h	(revision 303642)
@@ -1,70 +1,70 @@
-/*
+/*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)indent_codes.h	8.1 (Berkeley) 6/6/93
  * $FreeBSD$
  */
 
 #define newline		1
 #define lparen		2
 #define rparen		3
 #define unary_op	4
 #define binary_op	5
 #define postop		6
 #define question	7
 #define casestmt	8
 #define colon		9
 #define semicolon	10
 #define lbrace		11
 #define rbrace		12
 #define ident		13
 #define comma		14
 #define comment		15
 #define swstmt		16
 #define preesc		17
 #define form_feed	18
 #define decl		19
 #define sp_paren	20
 #define sp_nparen	21
 #define ifstmt		22
 #define whilestmt	23
 #define forstmt		24
 #define stmt		25
 #define stmtl		26
 #define elselit		27
 #define dolit		28
 #define dohead		29
 #define ifhead		30
 #define elsehead	31
 #define period		32
Index: user/alc/PQ_LAUNDRY/usr.bin/indent/indent_globs.h
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/indent/indent_globs.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/indent/indent_globs.h	(revision 303642)
@@ -1,330 +1,335 @@
-/*
+/*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)indent_globs.h	8.1 (Berkeley) 6/6/93
  * $FreeBSD$
  */
 
 #define BACKSLASH '\\'
 #define bufsize 200		/* size of internal buffers */
 #define sc_size 5000		/* size of save_com buffer */
 #define label_offset 2		/* number of levels a label is placed to left
 				 * of code */
 
 #define tabsize 8		/* the size of a tab */
 #define tabmask 0177770		/* mask used when figuring length of lines
 				 * with tabs */
 
 
 #define false 0
 #define true  1
 
 
 FILE       *input;		/* the fid for the input file */
 FILE       *output;		/* the output file */
 
 #define CHECK_SIZE_CODE \
 	if (e_code >= l_code) { \
 	    int nsize = l_code-s_code+400; \
+	    int code_len = e_code-s_code; \
 	    codebuf = (char *) realloc(codebuf, nsize); \
 	    if (codebuf == NULL) \
 		err(1, NULL); \
-	    e_code = codebuf + (e_code-s_code) + 1; \
+	    e_code = codebuf + code_len + 1; \
 	    l_code = codebuf + nsize - 5; \
 	    s_code = codebuf + 1; \
 	}
 #define CHECK_SIZE_COM \
 	if (e_com >= l_com) { \
 	    int nsize = l_com-s_com+400; \
+	    int com_len = e_com - s_com; \
+	    int blank_pos = last_bl - s_com; \
 	    combuf = (char *) realloc(combuf, nsize); \
 	    if (combuf == NULL) \
 		err(1, NULL); \
-	    e_com = combuf + (e_com-s_com) + 1; \
-	    last_bl = combuf + (last_bl-s_com) + 1; \
+	    e_com = combuf + com_len + 1; \
+	    last_bl = combuf + blank_pos + 1; \
 	    l_com = combuf + nsize - 5; \
 	    s_com = combuf + 1; \
 	}
 #define CHECK_SIZE_LAB \
 	if (e_lab >= l_lab) { \
 	    int nsize = l_lab-s_lab+400; \
+	    int label_len = e_lab - s_lab; \
 	    labbuf = (char *) realloc(labbuf, nsize); \
 	    if (labbuf == NULL) \
 		err(1, NULL); \
-	    e_lab = labbuf + (e_lab-s_lab) + 1; \
+	    e_lab = labbuf + label_len + 1; \
 	    l_lab = labbuf + nsize - 5; \
 	    s_lab = labbuf + 1; \
 	}
 #define CHECK_SIZE_TOKEN \
 	if (e_token >= l_token) { \
 	    int nsize = l_token-s_token+400; \
+	    int token_len = e_token - s_token; \
 	    tokenbuf = (char *) realloc(tokenbuf, nsize); \
 	    if (tokenbuf == NULL) \
 		err(1, NULL); \
-	    e_token = tokenbuf + (e_token-s_token) + 1; \
+	    e_token = tokenbuf + token_len + 1; \
 	    l_token = tokenbuf + nsize - 5; \
 	    s_token = tokenbuf + 1; \
 	}
 
 char       *labbuf;		/* buffer for label */
 char       *s_lab;		/* start ... */
 char       *e_lab;		/* .. and end of stored label */
 char       *l_lab;		/* limit of label buffer */
 
 char       *codebuf;		/* buffer for code section */
 char       *s_code;		/* start ... */
 char       *e_code;		/* .. and end of stored code */
 char       *l_code;		/* limit of code section */
 
 char       *combuf;		/* buffer for comments */
 char       *s_com;		/* start ... */
 char       *e_com;		/* ... and end of stored comments */
 char       *l_com;		/* limit of comment buffer */
 
 #define token s_token
 char       *tokenbuf;		/* the last token scanned */
 char	   *s_token;
 char       *e_token;
 char	   *l_token;
 
 char       *in_buffer;		/* input buffer */
 char	   *in_buffer_limit;	/* the end of the input buffer */
 char       *buf_ptr;		/* ptr to next character to be taken from
 				 * in_buffer */
 char       *buf_end;		/* ptr to first after last char in in_buffer */
 
 char        save_com[sc_size];	/* input text is saved here when looking for
 				 * the brace after an if, while, etc */
 char       *sc_end;		/* pointer into save_com buffer */
 
 char       *bp_save;		/* saved value of buf_ptr when taking input
 				 * from save_com */
 char       *be_save;		/* similarly saved value of buf_end */
 
 
 int         found_err;
 int         pointer_as_binop;
 int         blanklines_after_declarations;
 int         blanklines_before_blockcomments;
 int         blanklines_after_procs;
 int         blanklines_around_conditional_compilation;
 int         swallow_optional_blanklines;
 int         n_real_blanklines;
 int         prefix_blankline_requested;
 int         postfix_blankline_requested;
 int         break_comma;	/* when true and not in parens, break after a
 				 * comma */
 int         btype_2;		/* when true, brace should be on same line as
 				 * if, while, etc */
 float       case_ind;		/* indentation level to be used for a "case
 				 * n:" */
 int         code_lines;		/* count of lines with code */
 int         had_eof;		/* set to true when input is exhausted */
 int         line_no;		/* the current line number. */
 int         max_col;		/* the maximum allowable line length */
 int         verbose;		/* when true, non-essential error messages are
 				 * printed */
 int         cuddle_else;	/* true if else should cuddle up to '}' */
 int         star_comment_cont;	/* true iff comment continuation lines should
 				 * have stars at the beginning of each line. */
 int         comment_delimiter_on_blankline;
 int         troff;		/* true iff were generating troff input */
 int         procnames_start_line;	/* if true, the names of procedures
 					 * being defined get placed in column
 					 * 1 (ie. a newline is placed between
 					 * the type of the procedure and its
 					 * name) */
 int         proc_calls_space;	/* If true, procedure calls look like:
 				 * foo(bar) rather than foo (bar) */
 int         format_block_comments;	/* true if comments beginning with
 					 * `/ * \n' are to be reformatted */
 int         format_col1_comments;	/* If comments which start in column 1
 					 * are to be magically reformatted
 					 * (just like comments that begin in
 					 * later columns) */
 int         inhibit_formatting;	/* true if INDENT OFF is in effect */
 int         suppress_blanklines;/* set iff following blanklines should be
 				 * suppressed */
 int         continuation_indent;/* set to the indentation between the edge of
 				 * code and continuation lines */
 int         lineup_to_parens;	/* if true, continued code within parens will
 				 * be lined up to the open paren */
 int         Bill_Shannon;	/* true iff a blank should always be inserted
 				 * after sizeof */
 int         blanklines_after_declarations_at_proctop;	/* This is vaguely
 							 * similar to
 							 * blanklines_after_decla
 							 * rations except that
 							 * it only applies to
 							 * the first set of
 							 * declarations in a
 							 * procedure (just after
 							 * the first '{') and it
 							 * causes a blank line
 							 * to be generated even
 							 * if there are no
 							 * declarations */
 int         block_comment_max_col;
 int         extra_expression_indent;	/* true if continuation lines from the
 					 * expression part of "if(e)",
 					 * "while(e)", "for(e;e;e)" should be
 					 * indented an extra tab stop so that
 					 * they don't conflict with the code
 					 * that follows */
 int	    function_brace_split;	/* split function declaration and
 					 * brace onto separate lines */
 int	    use_tabs;			/* set true to use tabs for spacing,
 					 * false uses all spaces */
 int	    auto_typedefs;		/* set true to recognize identifiers
 					 * ending in "_t" like typedefs */
 
 /* -troff font state information */
 
 struct fstate {
     char        font[4];
     char        size;
     int         allcaps:1;
 } __aligned(sizeof(int));
 char       *chfont(struct fstate *, struct fstate *, char *);
 
 struct fstate
             keywordf,		/* keyword font */
             stringf,		/* string font */
             boxcomf,		/* Box comment font */
             blkcomf,		/* Block comment font */
             scomf,		/* Same line comment font */
             bodyf;		/* major body font */
 
 
-#define STACKSIZE 150
+#define	STACKSIZE 256
 
 struct parser_state {
     int         last_token;
     struct fstate cfont;	/* Current font */
     int         p_stack[STACKSIZE];	/* this is the parsers stack */
     int         il[STACKSIZE];	/* this stack stores indentation levels */
     float       cstk[STACKSIZE];/* used to store case stmt indentation levels */
     int         box_com;	/* set to true when we are in a "boxed"
 				 * comment. In that case, the first non-blank
 				 * char should be lined up with the / in / followed by * */
     int         comment_delta,
                 n_comment_delta;
     int         cast_mask;	/* indicates which close parens close off
 				 * casts */
     int         sizeof_mask;	/* indicates which close parens close off
 				 * sizeof''s */
     int         block_init;	/* true iff inside a block initialization */
     int         block_init_level;	/* The level of brace nesting in an
 					 * initialization */
     int         last_nl;	/* this is true if the last thing scanned was
 				 * a newline */
     int         in_or_st;	/* Will be true iff there has been a
 				 * declarator (e.g. int or char) and no left
 				 * paren since the last semicolon. When true,
 				 * a '{' is starting a structure definition or
 				 * an initialization list */
     int         bl_line;	/* set to 1 by dump_line if the line is blank */
     int         col_1;		/* set to true if the last token started in
 				 * column 1 */
     int         com_col;	/* this is the column in which the current
 				 * comment should start */
     int         com_ind;	/* the column in which comments to the right
 				 * of code should start */
     int         com_lines;	/* the number of lines with comments, set by
 				 * dump_line */
     int         dec_nest;	/* current nesting level for structure or init */
     int         decl_com_ind;	/* the column in which comments after
 				 * declarations should be put */
     int         decl_on_line;	/* set to true if this line of code has part
 				 * of a declaration on it */
     int         i_l_follow;	/* the level to which ind_level should be set
 				 * after the current line is printed */
     int         in_decl;	/* set to true when we are in a declaration
 				 * stmt.  The processing of braces is then
 				 * slightly different */
     int         in_stmt;	/* set to 1 while in a stmt */
     int         ind_level;	/* the current indentation level */
     int         ind_size;	/* the size of one indentation level */
     int         ind_stmt;	/* set to 1 if next line should have an extra
 				 * indentation level because we are in the
 				 * middle of a stmt */
     int         last_u_d;	/* set to true after scanning a token which
 				 * forces a following operator to be unary */
     int         leave_comma;	/* if true, never break declarations after
 				 * commas */
     int         ljust_decl;	/* true if declarations should be left
 				 * justified */
     int         out_coms;	/* the number of comments processed, set by
 				 * pr_comment */
     int         out_lines;	/* the number of lines written, set by
 				 * dump_line */
     int         p_l_follow;	/* used to remember how to indent following
 				 * statement */
     int         paren_level;	/* parenthesization level. used to indent
 				 * within statements */
     short       paren_indents[20];	/* column positions of each paren */
     int         pcase;		/* set to 1 if the current line label is a
 				 * case.  It is printed differently from a
 				 * regular label */
     int         search_brace;	/* set to true by parse when it is necessary
 				 * to buffer up all info up to the start of a
 				 * stmt after an if, while, etc */
     int         unindent_displace;	/* comments not to the right of code
 					 * will be placed this many
 					 * indentation levels to the left of
 					 * code */
     int         use_ff;		/* set to one if the current line should be
 				 * terminated with a form feed */
     int         want_blank;	/* set to true when the following token should
 				 * be prefixed by a blank. (Said prefixing is
 				 * ignored in some cases.) */
     int         else_if;	/* True iff else if pairs should be handled
 				 * specially */
     int         decl_indent;	/* column to indent declared identifiers to */
     int         local_decl_indent;	/* like decl_indent but for locals */
     int         its_a_keyword;
     int         sizeof_keyword;
     int         dumped_decl_indent;
     float       case_indent;	/* The distance to indent case labels from the
 				 * switch statement */
     int         in_parameter_declaration;
     int         indent_parameters;
     int         tos;		/* pointer to top of stack */
     char        procname[100];	/* The name of the current procedure */
     int         just_saw_decl;
 }           ps;
 
 int         ifdef_level;
 int	    rparen_count;
 struct parser_state state_stack[5];
 struct parser_state match_state[5];
Index: user/alc/PQ_LAUNDRY/usr.bin/indent/io.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/indent/io.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/indent/io.c	(revision 303642)
@@ -1,668 +1,662 @@
-/*
+/*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)io.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <ctype.h>
 #include <err.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "indent_globs.h"
 #include "indent.h"
 
 int         comment_open;
 static int  paren_target;
 static int pad_output(int current, int target);
 
 void
 dump_line(void)
 {				/* dump_line is the routine that actually
 				 * effects the printing of the new source. It
 				 * prints the label section, followed by the
 				 * code section with the appropriate nesting
 				 * level, followed by any comments */
     int cur_col,
                 target_col = 1;
     static int  not_first_line;
 
     if (ps.procname[0]) {
 	if (troff) {
 	    if (comment_open) {
 		comment_open = 0;
 		fprintf(output, ".*/\n");
 	    }
 	    fprintf(output, ".Pr \"%s\"\n", ps.procname);
 	}
 	ps.ind_level = 0;
 	ps.procname[0] = 0;
     }
     if (s_code == e_code && s_lab == e_lab && s_com == e_com) {
 	if (suppress_blanklines > 0)
 	    suppress_blanklines--;
 	else {
 	    ps.bl_line = true;
 	    n_real_blanklines++;
 	}
     }
     else if (!inhibit_formatting) {
 	suppress_blanklines = 0;
 	ps.bl_line = false;
 	if (prefix_blankline_requested && not_first_line) {
 	    if (swallow_optional_blanklines) {
 		if (n_real_blanklines == 1)
 		    n_real_blanklines = 0;
 	    }
 	    else {
 		if (n_real_blanklines == 0)
 		    n_real_blanklines = 1;
 	    }
 	}
 	while (--n_real_blanklines >= 0)
 	    putc('\n', output);
 	n_real_blanklines = 0;
 	if (ps.ind_level == 0)
 	    ps.ind_stmt = 0;	/* this is a class A kludge. dont do
 				 * additional statement indentation if we are
 				 * at bracket level 0 */
 
 	if (e_lab != s_lab || e_code != s_code)
 	    ++code_lines;	/* keep count of lines with code */
 
 
 	if (e_lab != s_lab) {	/* print lab, if any */
 	    if (comment_open) {
 		comment_open = 0;
 		fprintf(output, ".*/\n");
 	    }
 	    while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
 		e_lab--;
 	    *e_lab = '\0';
 	    cur_col = pad_output(1, compute_label_target());
 	    if (s_lab[0] == '#' && (strncmp(s_lab, "#else", 5) == 0
 				    || strncmp(s_lab, "#endif", 6) == 0)) {
 		char *s = s_lab;
 		if (e_lab[-1] == '\n') e_lab--;
 		do putc(*s++, output);
 		while (s < e_lab && 'a' <= *s && *s<='z');
 		while ((*s == ' ' || *s == '\t') && s < e_lab)
 		    s++;
 		if (s < e_lab)
 		    fprintf(output, s[0]=='/' && s[1]=='*' ? "\t%.*s" : "\t/* %.*s */",
 			    (int)(e_lab - s), s);
 	    }
 	    else fprintf(output, "%.*s", (int)(e_lab - s_lab), s_lab);
 	    cur_col = count_spaces(cur_col, s_lab);
 	}
 	else
 	    cur_col = 1;	/* there is no label section */
 
 	ps.pcase = false;
 
 	if (s_code != e_code) {	/* print code section, if any */
 	    char *p;
 
 	    if (comment_open) {
 		comment_open = 0;
 		fprintf(output, ".*/\n");
 	    }
 	    target_col = compute_code_target();
 	    {
 		int i;
 
 		for (i = 0; i < ps.p_l_follow; i++)
 		    if (ps.paren_indents[i] >= 0)
 			ps.paren_indents[i] = -(ps.paren_indents[i] + target_col);
 	    }
 	    cur_col = pad_output(cur_col, target_col);
 	    for (p = s_code; p < e_code; p++)
 		if (*p == (char) 0200)
 		    fprintf(output, "%d", target_col * 7);
 		else
 		    putc(*p, output);
 	    cur_col = count_spaces(cur_col, s_code);
 	}
 	if (s_com != e_com) {
 	    if (troff) {
 		int         all_here = 0;
 		char *p;
 
 		if (e_com[-1] == '/' && e_com[-2] == '*')
 		    e_com -= 2, all_here++;
 		while (e_com > s_com && e_com[-1] == ' ')
 		    e_com--;
 		*e_com = 0;
 		p = s_com;
 		while (*p == ' ')
 		    p++;
 		if (p[0] == '/' && p[1] == '*')
 		    p += 2, all_here++;
 		else if (p[0] == '*')
 		    p += p[1] == '/' ? 2 : 1;
 		while (*p == ' ')
 		    p++;
 		if (*p == 0)
 		    goto inhibit_newline;
 		if (comment_open < 2 && ps.box_com) {
 		    comment_open = 0;
 		    fprintf(output, ".*/\n");
 		}
 		if (comment_open == 0) {
 		    if ('a' <= *p && *p <= 'z')
 			*p = *p + 'A' - 'a';
 		    if (e_com - p < 50 && all_here == 2) {
 			char *follow = p;
 			fprintf(output, "\n.nr C! \\w\1");
 			while (follow < e_com) {
 			    switch (*follow) {
 			    case '\n':
 				putc(' ', output);
 			    case 1:
 				break;
 			    case '\\':
 				putc('\\', output);
 			    default:
 				putc(*follow, output);
 			    }
 			    follow++;
 			}
 			putc(1, output);
 		    }
 		    fprintf(output, "\n./* %dp %d %dp\n",
 			    ps.com_col * 7,
 			    (s_code != e_code || s_lab != e_lab) - ps.box_com,
 			    target_col * 7);
 		}
 		comment_open = 1 + ps.box_com;
 		while (*p) {
 		    if (*p == BACKSLASH)
 			putc(BACKSLASH, output);
 		    putc(*p++, output);
 		}
 	    }
 	    else {		/* print comment, if any */
 		int target = ps.com_col;
 		char *com_st = s_com;
 
 		target += ps.comment_delta;
 		while (*com_st == '\t')
 		    com_st++, target += 8;	/* ? */
 		while (target <= 0)
 		    if (*com_st == ' ')
 			target++, com_st++;
 		    else if (*com_st == '\t')
 			target = ((target - 1) & ~7) + 9, com_st++;
 		    else
 			target = 1;
 		if (cur_col > target) {	/* if comment can't fit on this line,
 					 * put it on next line */
 		    putc('\n', output);
 		    cur_col = 1;
 		    ++ps.out_lines;
 		}
 		while (e_com > com_st && isspace(e_com[-1]))
 		    e_com--;
 		cur_col = pad_output(cur_col, target);
-		if (!ps.box_com) {
-		    if (star_comment_cont && (com_st[1] != '*' || e_com <= com_st + 1)) {
-			if (com_st[1] == ' ' && com_st[0] == ' ' && e_com > com_st + 1)
-			    com_st[1] = '*';
-			else
-			    fwrite(" * ", com_st[0] == '\t' ? 2 : com_st[0] == '*' ? 1 : 3, 1, output);
-		    }
-		}
 		fwrite(com_st, e_com - com_st, 1, output);
 		ps.comment_delta = ps.n_comment_delta;
-		cur_col = count_spaces(cur_col, com_st);
 		++ps.com_lines;	/* count lines with comments */
 	    }
 	}
 	if (ps.use_ff)
 	    putc('\014', output);
 	else
 	    putc('\n', output);
 inhibit_newline:
 	++ps.out_lines;
 	if (ps.just_saw_decl == 1 && blanklines_after_declarations) {
 	    prefix_blankline_requested = 1;
 	    ps.just_saw_decl = 0;
 	}
 	else
 	    prefix_blankline_requested = postfix_blankline_requested;
 	postfix_blankline_requested = 0;
     }
     ps.decl_on_line = ps.in_decl;	/* if we are in the middle of a
 					 * declaration, remember that fact for
 					 * proper comment indentation */
     ps.ind_stmt = ps.in_stmt & ~ps.in_decl;	/* next line should be
 						 * indented if we have not
 						 * completed this stmt and if
 						 * we are not in the middle of
 						 * a declaration */
     ps.use_ff = false;
     ps.dumped_decl_indent = 0;
     *(e_lab = s_lab) = '\0';	/* reset buffers */
     *(e_code = s_code) = '\0';
-    *(e_com = s_com) = '\0';
+    *(e_com = s_com = combuf + 1) = '\0';
     ps.ind_level = ps.i_l_follow;
     ps.paren_level = ps.p_l_follow;
     paren_target = -ps.paren_indents[ps.paren_level - 1];
     not_first_line = 1;
 }
 
 int
 compute_code_target(void)
 {
     int target_col = ps.ind_size * ps.ind_level + 1;
 
     if (ps.paren_level)
 	if (!lineup_to_parens)
 	    target_col += continuation_indent
 		* (2 * continuation_indent == ps.ind_size ? 1 : ps.paren_level);
 	else {
 	    int w;
 	    int t = paren_target;
 
 	    if ((w = count_spaces(t, s_code) - max_col) > 0
 		    && count_spaces(target_col, s_code) <= max_col) {
 		t -= w + 1;
 		if (t > target_col)
 		    target_col = t;
 	    }
 	    else
 		target_col = t;
 	}
     else if (ps.ind_stmt)
 	target_col += continuation_indent;
     return target_col;
 }
 
 int
 compute_label_target(void)
 {
     return
 	ps.pcase ? (int) (case_ind * ps.ind_size) + 1
 	: *s_lab == '#' ? 1
 	: ps.ind_size * (ps.ind_level - label_offset) + 1;
 }
 
 
 /*
  * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
  *
  * All rights reserved
  *
  *
  * NAME: fill_buffer
  *
  * FUNCTION: Reads one block of input into input_buffer
  *
  * HISTORY: initial coding 	November 1976	D A Willcox of CAC 1/7/77 A
  * Willcox of CAC	Added check for switch back to partly full input
  * buffer from temporary buffer
  *
  */
 void
 fill_buffer(void)
 {				/* this routine reads stuff from the input */
     char *p;
     int i;
     FILE *f = input;
 
-    if (bp_save != NULL) {		/* there is a partly filled input buffer left */
-	buf_ptr = bp_save;	/* dont read anything, just switch buffers */
+    if (bp_save != NULL) {	/* there is a partly filled input buffer left */
+	buf_ptr = bp_save;	/* do not read anything, just switch buffers */
 	buf_end = be_save;
 	bp_save = be_save = NULL;
 	if (buf_ptr < buf_end)
 	    return;		/* only return if there is really something in
 				 * this buffer */
     }
     for (p = in_buffer;;) {
 	if (p >= in_buffer_limit) {
 	    int size = (in_buffer_limit - in_buffer) * 2 + 10;
 	    int offset = p - in_buffer;
 	    in_buffer = realloc(in_buffer, size);
 	    if (in_buffer == NULL)
 		errx(1, "input line too long");
 	    p = in_buffer + offset;
 	    in_buffer_limit = in_buffer + size - 2;
 	}
 	if ((i = getc(f)) == EOF) {
 		*p++ = ' ';
 		*p++ = '\n';
 		had_eof = true;
 		break;
 	}
 	*p++ = i;
 	if (i == '\n')
 		break;
     }
     buf_ptr = in_buffer;
     buf_end = p;
     if (p[-2] == '/' && p[-3] == '*') {
 	if (in_buffer[3] == 'I' && strncmp(in_buffer, "/**INDENT**", 11) == 0)
 	    fill_buffer();	/* flush indent error message */
 	else {
 	    int         com = 0;
 
 	    p = in_buffer;
 	    while (*p == ' ' || *p == '\t')
 		p++;
 	    if (*p == '/' && p[1] == '*') {
 		p += 2;
 		while (*p == ' ' || *p == '\t')
 		    p++;
 		if (p[0] == 'I' && p[1] == 'N' && p[2] == 'D' && p[3] == 'E'
 			&& p[4] == 'N' && p[5] == 'T') {
 		    p += 6;
 		    while (*p == ' ' || *p == '\t')
 			p++;
 		    if (*p == '*')
 			com = 1;
 		    else if (*p == 'O') {
 			if (*++p == 'N')
 			    p++, com = 1;
 			else if (*p == 'F' && *++p == 'F')
 			    p++, com = 2;
 		    }
 		    while (*p == ' ' || *p == '\t')
 			p++;
 		    if (p[0] == '*' && p[1] == '/' && p[2] == '\n' && com) {
 			if (s_com != e_com || s_lab != e_lab || s_code != e_code)
 			    dump_line();
 			if (!(inhibit_formatting = com - 1)) {
 			    n_real_blanklines = 0;
 			    postfix_blankline_requested = 0;
 			    prefix_blankline_requested = 0;
 			    suppress_blanklines = 1;
 			}
 		    }
 		}
 	    }
 	}
     }
     if (inhibit_formatting) {
 	p = in_buffer;
 	do
 	    putc(*p, output);
 	while (*p++ != '\n');
     }
 }
 
 /*
  * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
  *
  * All rights reserved
  *
  *
  * NAME: pad_output
  *
  * FUNCTION: Writes tabs and spaces to move the current column up to the desired
  * position.
  *
  * ALGORITHM: Put tabs and/or blanks into pobuf, then write pobuf.
  *
  * PARAMETERS: current		integer		The current column target
  * nteger		The desired column
  *
  * RETURNS: Integer value of the new column.  (If current >= target, no action is
  * taken, and current is returned.
  *
  * GLOBALS: None
  *
  * CALLS: write (sys)
  *
  * CALLED BY: dump_line
  *
  * HISTORY: initial coding 	November 1976	D A Willcox of CAC
  *
  */
 static int
 pad_output(int current, int target)
 			        /* writes tabs and blanks (if necessary) to
 				 * get the current output position up to the
 				 * target column */
     /* current: the current column value */
     /* target: position we want it at */
 {
     int curr;		/* internal column pointer */
     int tcur;
 
     if (troff)
 	fprintf(output, "\\h'|%dp'", (target - 1) * 7);
     else {
 	if (current >= target)
 	    return (current);	/* line is already long enough */
 	curr = current;
         if (use_tabs) {
             while ((tcur = ((curr - 1) & tabmask) + tabsize + 1) <= target) {
                 putc('\t', output);
                 curr = tcur;
             }
         }
         while (curr++ < target)
 	    putc(' ', output);	/* pad with final blanks */
     }
     return (target);
 }
 
 /*
  * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
  *
  * All rights reserved
  *
  *
  * NAME: count_spaces
  *
  * FUNCTION: Find out where printing of a given string will leave the current
  * character position on output.
  *
  * ALGORITHM: Run thru input string and add appropriate values to current
  * position.
  *
  * RETURNS: Integer value of position after printing "buffer" starting in column
  * "current".
  *
  * HISTORY: initial coding 	November 1976	D A Willcox of CAC
  *
  */
 int
-count_spaces(int current, char *buffer)
+count_spaces_until(int cur, char *buffer, char *end)
 /*
  * this routine figures out where the character position will be after
  * printing the text in buffer starting at column "current"
  */
 {
     char *buf;		/* used to look thru buffer */
-    int cur;		/* current character counter */
 
-    cur = current;
-
-    for (buf = buffer; *buf != '\0'; ++buf) {
+    for (buf = buffer; *buf != '\0' && buf != end; ++buf) {
 	switch (*buf) {
 
 	case '\n':
 	case 014:		/* form feed */
 	    cur = 1;
 	    break;
 
 	case '\t':
 	    cur = ((cur - 1) & tabmask) + tabsize + 1;
 	    break;
 
 	case 010:		/* backspace */
 	    --cur;
 	    break;
 
 	default:
 	    ++cur;
 	    break;
 	}			/* end of switch */
     }				/* end of for loop */
     return (cur);
 }
 
+int
+count_spaces(int cur, char *buffer)
+{
+    return (count_spaces_until(cur, buffer, NULL));
+}
+
 void
 diag4(int level, const char *msg, int a, int b)
 {
     if (level)
 	found_err = 1;
     if (output == stdout) {
 	fprintf(stdout, "/**INDENT** %s@%d: ", level == 0 ? "Warning" : "Error", line_no);
 	fprintf(stdout, msg, a, b);
 	fprintf(stdout, " */\n");
     }
     else {
 	fprintf(stderr, "%s@%d: ", level == 0 ? "Warning" : "Error", line_no);
 	fprintf(stderr, msg, a, b);
 	fprintf(stderr, "\n");
     }
 }
 
 void
 diag3(int level, const char *msg, int a)
 {
     if (level)
 	found_err = 1;
     if (output == stdout) {
 	fprintf(stdout, "/**INDENT** %s@%d: ", level == 0 ? "Warning" : "Error", line_no);
 	fprintf(stdout, msg, a);
 	fprintf(stdout, " */\n");
     }
     else {
 	fprintf(stderr, "%s@%d: ", level == 0 ? "Warning" : "Error", line_no);
 	fprintf(stderr, msg, a);
 	fprintf(stderr, "\n");
     }
 }
 
 void
 diag2(int level, const char *msg)
 {
     if (level)
 	found_err = 1;
     if (output == stdout) {
 	fprintf(stdout, "/**INDENT** %s@%d: ", level == 0 ? "Warning" : "Error", line_no);
 	fprintf(stdout, "%s", msg);
 	fprintf(stdout, " */\n");
     }
     else {
 	fprintf(stderr, "%s@%d: ", level == 0 ? "Warning" : "Error", line_no);
 	fprintf(stderr, "%s", msg);
 	fprintf(stderr, "\n");
     }
 }
 
 void
 writefdef(struct fstate *f, int nm)
 {
     fprintf(output, ".ds f%c %s\n.nr s%c %d\n",
 	    nm, f->font, nm, f->size);
 }
 
 char *
 chfont(struct fstate *of, struct fstate *nf, char *s)
 {
     if (of->font[0] != nf->font[0]
 	    || of->font[1] != nf->font[1]) {
 	*s++ = '\\';
 	*s++ = 'f';
 	if (nf->font[1]) {
 	    *s++ = '(';
 	    *s++ = nf->font[0];
 	    *s++ = nf->font[1];
 	}
 	else
 	    *s++ = nf->font[0];
     }
     if (nf->size != of->size) {
 	*s++ = '\\';
 	*s++ = 's';
 	if (nf->size < of->size) {
 	    *s++ = '-';
 	    *s++ = '0' + of->size - nf->size;
 	}
 	else {
 	    *s++ = '+';
 	    *s++ = '0' + nf->size - of->size;
 	}
     }
     return s;
 }
 
 void
 parsefont(struct fstate *f, const char *s0)
 {
     const char *s = s0;
     int         sizedelta = 0;
 
-    bzero(f, sizeof *f);
+    memset(f, 0, sizeof(struct fstate));
     while (*s) {
 	if (isdigit(*s))
 	    f->size = f->size * 10 + *s - '0';
 	else if (isupper(*s))
 	    if (f->font[0])
 		f->font[1] = *s;
 	    else
 		f->font[0] = *s;
 	else if (*s == 'c')
 	    f->allcaps = 1;
 	else if (*s == '+')
 	    sizedelta++;
 	else if (*s == '-')
 	    sizedelta--;
 	else {
 	    errx(1, "bad font specification: %s", s0);
 	}
 	s++;
     }
     if (f->font[0] == 0)
 	f->font[0] = 'R';
     if (bodyf.size == 0)
 	bodyf.size = 11;
     if (f->size == 0)
 	f->size = bodyf.size + sizedelta;
     else if (sizedelta > 0)
 	f->size += bodyf.size;
     else
 	f->size = bodyf.size - f->size;
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/indent/lexi.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/indent/lexi.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/indent/lexi.c	(revision 303642)
@@ -1,606 +1,606 @@
-/*
+/*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)lexi.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Here we have the token scanner for indent.  It scans off one token and puts
  * it in the global variable "token".  It returns a code, indicating the type
  * of token scanned.
  */
 
 #include <err.h>
 #include <stdio.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
 #include "indent_globs.h"
 #include "indent_codes.h"
 #include "indent.h"
 
 #define alphanum 1
 #define opchar 3
 
 struct templ {
     const char *rwd;
     int         rwcode;
 };
 
 struct templ specials[1000] =
 {
     {"switch", 1},
     {"case", 2},
     {"break", 0},
     {"struct", 3},
     {"union", 3},
     {"enum", 3},
     {"default", 2},
     {"int", 4},
     {"char", 4},
     {"float", 4},
     {"double", 4},
     {"long", 4},
     {"short", 4},
     {"typedef", 4},
     {"unsigned", 4},
     {"register", 4},
     {"static", 4},
     {"global", 4},
     {"extern", 4},
     {"void", 4},
     {"const", 4},
     {"volatile", 4},
     {"goto", 0},
     {"return", 0},
     {"if", 5},
     {"while", 5},
     {"for", 5},
     {"else", 6},
     {"do", 6},
     {"sizeof", 7},
     {0, 0}
 };
 
 char        chartype[128] =
 {				/* this is used to facilitate the decision of
 				 * what type (alphanumeric, operator) each
 				 * character is */
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 0, 0, 0, 0, 0, 0, 0,
     0, 3, 0, 0, 1, 3, 3, 0,
     0, 0, 3, 3, 0, 3, 0, 3,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 0, 0, 3, 3, 3, 3,
     0, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 0, 0, 0, 3, 1,
     0, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 0, 3, 0, 3, 0
 };
 
 int
 lexi(void)
 {
     int         unary_delim;	/* this is set to 1 if the current token
 				 * forces a following operator to be unary */
     static int  last_code;	/* the last token type returned */
     static int  l_struct;	/* set to 1 if the last token was 'struct' */
     int         code;		/* internal code to be returned */
     char        qchar;		/* the delimiter character for a string */
 
     e_token = s_token;		/* point to start of place to save token */
     unary_delim = false;
     ps.col_1 = ps.last_nl;	/* tell world that this token started in
 				 * column 1 iff the last thing scanned was nl */
     ps.last_nl = false;
 
     while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
 	ps.col_1 = false;	/* leading blanks imply token is not in column
 				 * 1 */
 	if (++buf_ptr >= buf_end)
 	    fill_buffer();
     }
 
     /* Scan an alphanumeric token */
     if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
 	/*
 	 * we have a character or number
 	 */
 	const char *j;		/* used for searching thru list of
 				 *
 				 * reserved words */
 	struct templ *p;
 
 	if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
 	    int         seendot = 0,
 	                seenexp = 0,
 			seensfx = 0;
 	    if (*buf_ptr == '0' &&
 		    (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
 		*e_token++ = *buf_ptr++;
 		*e_token++ = *buf_ptr++;
 		while (isxdigit(*buf_ptr)) {
 		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		}
 	    }
 	    else
 		while (1) {
 		    if (*buf_ptr == '.') {
 			if (seendot)
 			    break;
 			else
 			    seendot++;
 		    }
 		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		    if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
 			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
 			    break;
 			else {
 			    seenexp++;
 			    seendot++;
 			    CHECK_SIZE_TOKEN;
 			    *e_token++ = *buf_ptr++;
 			    if (*buf_ptr == '+' || *buf_ptr == '-')
 				*e_token++ = *buf_ptr++;
 			}
 		    }
 		}
 	    while (1) {
 		if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
 		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		    seensfx |= 1;
 		    continue;
 		}
-		if (!(seensfx & 2) && strchr("fFlL", *buf_ptr)) {
+		if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
 		    CHECK_SIZE_TOKEN;
 		    if (buf_ptr[1] == buf_ptr[0])
 		        *e_token++ = *buf_ptr++;
 		    *e_token++ = *buf_ptr++;
 		    seensfx |= 2;
 		    continue;
 		}
 		break;
 	    }
 	}
 	else
 	    while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
 		/* fill_buffer() terminates buffer with newline */
 		if (*buf_ptr == BACKSLASH) {
 		    if (*(buf_ptr + 1) == '\n') {
 			buf_ptr += 2;
 			if (buf_ptr >= buf_end)
 			    fill_buffer();
 			} else
 			    break;
 		}
 		CHECK_SIZE_TOKEN;
 		/* copy it over */
 		*e_token++ = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 	    }
 	*e_token++ = '\0';
 	while (*buf_ptr == ' ' || *buf_ptr == '\t') {	/* get rid of blanks */
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	ps.its_a_keyword = false;
 	ps.sizeof_keyword = false;
 	if (l_struct && !ps.p_l_follow) {
 				/* if last token was 'struct' and we're not
 				 * in parentheses, then this token
 				 * should be treated as a declaration */
 	    l_struct = false;
 	    last_code = ident;
 	    ps.last_u_d = true;
 	    return (decl);
 	}
 	ps.last_u_d = l_struct;	/* Operator after identifier is binary
 				 * unless last token was 'struct' */
 	l_struct = false;
 	last_code = ident;	/* Remember that this is the code we will
 				 * return */
 
 	if (auto_typedefs) {
 	    const char *q = s_token;
 	    size_t q_len = strlen(q);
 	    /* Check if we have an "_t" in the end */
 	    if (q_len > 2 &&
 	        (strcmp(q + q_len - 2, "_t") == 0)) {
 	        ps.its_a_keyword = true;
 		ps.last_u_d = true;
 	        goto found_auto_typedef;
 	    }
 	}
 
 	/*
 	 * This loop will check if the token is a keyword.
 	 */
 	for (p = specials; (j = p->rwd) != NULL; p++) {
 	    const char *q = s_token;	/* point at scanned token */
 	    if (*j++ != *q++ || *j++ != *q++)
 		continue;	/* This test depends on the fact that
 				 * identifiers are always at least 1 character
 				 * long (ie. the first two bytes of the
 				 * identifier are always meaningful) */
 	    if (q[-1] == 0)
 		break;		/* If its a one-character identifier */
 	    while (*q++ == *j)
 		if (*j++ == 0)
 		    goto found_keyword;	/* I wish that C had a multi-level
 					 * break... */
 	}
 	if (p->rwd) {		/* we have a keyword */
     found_keyword:
 	    ps.its_a_keyword = true;
 	    ps.last_u_d = true;
 	    switch (p->rwcode) {
 	    case 1:		/* it is a switch */
 		return (swstmt);
 	    case 2:		/* a case or default */
 		return (casestmt);
 
 	    case 3:		/* a "struct" */
 		/*
 		 * Next time around, we will want to know that we have had a
 		 * 'struct'
 		 */
 		l_struct = true;
 		/* FALLTHROUGH */
 
 	    case 4:		/* one of the declaration keywords */
 	    found_auto_typedef:
 		if (ps.p_l_follow) {
 		    ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask;
 		    break;	/* inside parens: cast, param list or sizeof */
 		}
 		last_code = decl;
 		return (decl);
 
 	    case 5:		/* if, while, for */
 		return (sp_paren);
 
 	    case 6:		/* do, else */
 		return (sp_nparen);
 
 	    case 7:
 		ps.sizeof_keyword = true;
 	    default:		/* all others are treated like any other
 				 * identifier */
 		return (ident);
 	    }			/* end of switch */
 	}			/* end of if (found_it) */
 	if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
 	    char *tp = buf_ptr;
 	    while (tp < buf_end)
 		if (*tp++ == ')' && (*tp == ';' || *tp == ','))
 		    goto not_proc;
 	    strncpy(ps.procname, token, sizeof ps.procname - 1);
 	    ps.in_parameter_declaration = 1;
 	    rparen_count = 1;
     not_proc:;
 	}
 	/*
 	 * The following hack attempts to guess whether or not the current
 	 * token is in fact a declaration keyword -- one that has been
 	 * typedefd
 	 */
 	if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
 		&& !ps.p_l_follow
 	        && !ps.block_init
 		&& (ps.last_token == rparen || ps.last_token == semicolon ||
 		    ps.last_token == decl ||
 		    ps.last_token == lbrace || ps.last_token == rbrace)) {
 	    ps.its_a_keyword = true;
 	    ps.last_u_d = true;
 	    last_code = decl;
 	    return decl;
 	}
 	if (last_code == decl)	/* if this is a declared variable, then
 				 * following sign is unary */
 	    ps.last_u_d = true;	/* will make "int a -1" work */
 	last_code = ident;
 	return (ident);		/* the ident is not in the list */
     }				/* end of procesing for alpanum character */
 
     /* Scan a non-alphanumeric token */
 
     *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
 				 * moved here */
     *e_token = '\0';
     if (++buf_ptr >= buf_end)
 	fill_buffer();
 
     switch (*token) {
     case '\n':
 	unary_delim = ps.last_u_d;
 	ps.last_nl = true;	/* remember that we just had a newline */
 	code = (had_eof ? 0 : newline);
 
 	/*
 	 * if data has been exhausted, the newline is a dummy, and we should
 	 * return code to stop
 	 */
 	break;
 
     case '\'':			/* start of quoted character */
     case '"':			/* start of string */
 	qchar = *token;
 	if (troff) {
 	    e_token[-1] = '`';
 	    if (qchar == '"')
 		*e_token++ = '`';
 	    e_token = chfont(&bodyf, &stringf, e_token);
 	}
 	do {			/* copy the string */
 	    while (1) {		/* move one character or [/<char>]<char> */
 		if (*buf_ptr == '\n') {
 		    diag2(1, "Unterminated literal");
 		    goto stop_lit;
 		}
 		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
 					 * since CHECK_SIZE guarantees that there
 					 * are at least 5 entries left */
 		*e_token = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 		if (*e_token == BACKSLASH) {	/* if escape, copy extra char */
 		    if (*buf_ptr == '\n')	/* check for escaped newline */
 			++line_no;
 		    if (troff) {
 			*++e_token = BACKSLASH;
 			if (*buf_ptr == BACKSLASH)
 			    *++e_token = BACKSLASH;
 		    }
 		    *++e_token = *buf_ptr++;
 		    ++e_token;	/* we must increment this again because we
 				 * copied two chars */
 		    if (buf_ptr >= buf_end)
 			fill_buffer();
 		}
 		else
 		    break;	/* we copied one character */
 	    }			/* end of while (1) */
 	} while (*e_token++ != qchar);
 	if (troff) {
 	    e_token = chfont(&stringf, &bodyf, e_token - 1);
 	    if (qchar == '"')
 		*e_token++ = '\'';
 	}
 stop_lit:
 	code = ident;
 	break;
 
     case ('('):
     case ('['):
 	unary_delim = true;
 	code = lparen;
 	break;
 
     case (')'):
     case (']'):
 	code = rparen;
 	break;
 
     case '#':
 	unary_delim = ps.last_u_d;
 	code = preesc;
 	break;
 
     case '?':
 	unary_delim = true;
 	code = question;
 	break;
 
     case (':'):
 	code = colon;
 	unary_delim = true;
 	break;
 
     case (';'):
 	unary_delim = true;
 	code = semicolon;
 	break;
 
     case ('{'):
 	unary_delim = true;
 
 	/*
 	 * if (ps.in_or_st) ps.block_init = 1;
 	 */
 	/* ?	code = ps.block_init ? lparen : lbrace; */
 	code = lbrace;
 	break;
 
     case ('}'):
 	unary_delim = true;
 	/* ?	code = ps.block_init ? rparen : rbrace; */
 	code = rbrace;
 	break;
 
     case 014:			/* a form feed */
 	unary_delim = ps.last_u_d;
 	ps.last_nl = true;	/* remember this so we can set 'ps.col_1'
 				 * right */
 	code = form_feed;
 	break;
 
     case (','):
 	unary_delim = true;
 	code = comma;
 	break;
 
     case '.':
 	unary_delim = false;
 	code = period;
 	break;
 
     case '-':
     case '+':			/* check for -, +, --, ++ */
 	code = (ps.last_u_d ? unary_op : binary_op);
 	unary_delim = true;
 
 	if (*buf_ptr == token[0]) {
 	    /* check for doubled character */
 	    *e_token++ = *buf_ptr++;
 	    /* buffer overflow will be checked at end of loop */
 	    if (last_code == ident || last_code == rparen) {
 		code = (ps.last_u_d ? unary_op : postop);
 		/* check for following ++ or -- */
 		unary_delim = false;
 	    }
 	}
 	else if (*buf_ptr == '=')
 	    /* check for operator += */
 	    *e_token++ = *buf_ptr++;
 	else if (*buf_ptr == '>') {
 	    /* check for operator -> */
 	    *e_token++ = *buf_ptr++;
 	    if (!pointer_as_binop) {
 		unary_delim = false;
 		code = unary_op;
 		ps.want_blank = false;
 	    }
 	}
 	break;			/* buffer overflow will be checked at end of
 				 * switch */
 
     case '=':
 	if (ps.in_or_st)
 	    ps.block_init = 1;
 #ifdef undef
 	if (chartype[*buf_ptr] == opchar) {	/* we have two char assignment */
 	    e_token[-1] = *buf_ptr++;
 	    if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
 		*e_token++ = *buf_ptr++;
 	    *e_token++ = '=';	/* Flip =+ to += */
 	    *e_token = 0;
 	}
 #else
 	if (*buf_ptr == '=') {/* == */
 	    *e_token++ = '=';	/* Flip =+ to += */
 	    buf_ptr++;
 	    *e_token = 0;
 	}
 #endif
 	code = binary_op;
 	unary_delim = true;
 	break;
 	/* can drop thru!!! */
 
     case '>':
     case '<':
     case '!':			/* ops like <, <<, <=, !=, etc */
 	if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
 	    *e_token++ = *buf_ptr;
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	if (*buf_ptr == '=')
 	    *e_token++ = *buf_ptr++;
 	code = (ps.last_u_d ? unary_op : binary_op);
 	unary_delim = true;
 	break;
 
     default:
 	if (token[0] == '/' && *buf_ptr == '*') {
 	    /* it is start of comment */
 	    *e_token++ = '*';
 
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 
 	    code = comment;
 	    unary_delim = ps.last_u_d;
 	    break;
 	}
 	while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
 	    /*
 	     * handle ||, &&, etc, and also things as in int *****i
 	     */
 	    *e_token++ = *buf_ptr;
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
 	code = (ps.last_u_d ? unary_op : binary_op);
 	unary_delim = true;
 
 
     }				/* end of switch */
     if (code != newline) {
 	l_struct = false;
 	last_code = code;
     }
     if (buf_ptr >= buf_end)	/* check for input buffer empty */
 	fill_buffer();
     ps.last_u_d = unary_delim;
     *e_token = '\0';		/* null terminate the token */
     return (code);
 }
 
 /*
  * Add the given keyword to the keyword table, using val as the keyword type
  */
 void
 addkey(char *key, int val)
 {
     struct templ *p = specials;
     while (p->rwd)
 	if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
 	    return;
 	else
 	    p++;
     if (p >= specials + sizeof specials / sizeof specials[0])
 	return;			/* For now, table overflows are silently
 				 * ignored */
     p->rwd = key;
     p->rwcode = val;
     p[1].rwd = NULL;
     p[1].rwcode = 0;
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/indent/parse.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/indent/parse.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/indent/parse.c	(revision 303642)
@@ -1,332 +1,336 @@
-/*
+/*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)parse.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <err.h>
 #include <stdio.h>
 #include "indent_globs.h"
 #include "indent_codes.h"
 #include "indent.h"
 
 static void reduce(void);
 
 void
 parse(int tk) /* tk: the code for the construct scanned */
 {
     int         i;
 
 #ifdef debug
     printf("%2d - %s\n", tk, token);
 #endif
 
     while (ps.p_stack[ps.tos] == ifhead && tk != elselit) {
 	/* true if we have an if without an else */
 	ps.p_stack[ps.tos] = stmt;	/* apply the if(..) stmt ::= stmt
 					 * reduction */
 	reduce();		/* see if this allows any reduction */
     }
 
 
     switch (tk) {		/* go on and figure out what to do with the
 				 * input */
 
     case decl:			/* scanned a declaration word */
 	ps.search_brace = btype_2;
 	/* indicate that following brace should be on same line */
 	if (ps.p_stack[ps.tos] != decl) {	/* only put one declaration
 						 * onto stack */
 	    break_comma = true;	/* while in declaration, newline should be
 				 * forced after comma */
 	    ps.p_stack[++ps.tos] = decl;
 	    ps.il[ps.tos] = ps.i_l_follow;
 
 	    if (ps.ljust_decl) {/* only do if we want left justified
 				 * declarations */
 		ps.ind_level = 0;
 		for (i = ps.tos - 1; i > 0; --i)
 		    if (ps.p_stack[i] == decl)
 			++ps.ind_level;	/* indentation is number of
 					 * declaration levels deep we are */
 		ps.i_l_follow = ps.ind_level;
 	    }
 	}
 	break;
 
     case ifstmt:		/* scanned if (...) */
 	if (ps.p_stack[ps.tos] == elsehead && ps.else_if)	/* "else if ..." */
 	    ps.i_l_follow = ps.il[ps.tos];
     case dolit:		/* 'do' */
     case forstmt:		/* for (...) */
 	ps.p_stack[++ps.tos] = tk;
 	ps.il[ps.tos] = ps.ind_level = ps.i_l_follow;
 	++ps.i_l_follow;	/* subsequent statements should be indented 1 */
 	ps.search_brace = btype_2;
 	break;
 
     case lbrace:		/* scanned { */
 	break_comma = false;	/* don't break comma in an initial list */
 	if (ps.p_stack[ps.tos] == stmt || ps.p_stack[ps.tos] == decl
 		|| ps.p_stack[ps.tos] == stmtl)
 	    ++ps.i_l_follow;	/* it is a random, isolated stmt group or a
 				 * declaration */
 	else {
 	    if (s_code == e_code) {
 		/*
 		 * only do this if there is nothing on the line
 		 */
 		--ps.ind_level;
 		/*
 		 * it is a group as part of a while, for, etc.
 		 */
 		if (ps.p_stack[ps.tos] == swstmt && ps.case_indent >= 1)
 		    --ps.ind_level;
 		/*
 		 * for a switch, brace should be two levels out from the code
 		 */
 	    }
 	}
 
 	ps.p_stack[++ps.tos] = lbrace;
 	ps.il[ps.tos] = ps.ind_level;
 	ps.p_stack[++ps.tos] = stmt;
 	/* allow null stmt between braces */
 	ps.il[ps.tos] = ps.i_l_follow;
 	break;
 
     case whilestmt:		/* scanned while (...) */
 	if (ps.p_stack[ps.tos] == dohead) {
 	    /* it is matched with do stmt */
 	    ps.ind_level = ps.i_l_follow = ps.il[ps.tos];
 	    ps.p_stack[++ps.tos] = whilestmt;
 	    ps.il[ps.tos] = ps.ind_level = ps.i_l_follow;
 	}
 	else {			/* it is a while loop */
 	    ps.p_stack[++ps.tos] = whilestmt;
 	    ps.il[ps.tos] = ps.i_l_follow;
 	    ++ps.i_l_follow;
 	    ps.search_brace = btype_2;
 	}
 
 	break;
 
     case elselit:		/* scanned an else */
 
 	if (ps.p_stack[ps.tos] != ifhead)
 	    diag2(1, "Unmatched 'else'");
 	else {
 	    ps.ind_level = ps.il[ps.tos];	/* indentation for else should
 						 * be same as for if */
 	    ps.i_l_follow = ps.ind_level + 1;	/* everything following should
 						 * be in 1 level */
 	    ps.p_stack[ps.tos] = elsehead;
 	    /* remember if with else */
 	    ps.search_brace = btype_2 | ps.else_if;
 	}
 	break;
 
     case rbrace:		/* scanned a } */
 	/* stack should have <lbrace> <stmt> or <lbrace> <stmtl> */
 	if (ps.p_stack[ps.tos - 1] == lbrace) {
 	    ps.ind_level = ps.i_l_follow = ps.il[--ps.tos];
 	    ps.p_stack[ps.tos] = stmt;
 	}
 	else
 	    diag2(1, "Statement nesting error");
 	break;
 
     case swstmt:		/* had switch (...) */
 	ps.p_stack[++ps.tos] = swstmt;
 	ps.cstk[ps.tos] = case_ind;
 	/* save current case indent level */
 	ps.il[ps.tos] = ps.i_l_follow;
 	case_ind = ps.i_l_follow + ps.case_indent;	/* cases should be one
 							 * level down from
 							 * switch */
 	ps.i_l_follow += ps.case_indent + 1;	/* statements should be two
 						 * levels in */
 	ps.search_brace = btype_2;
 	break;
 
     case semicolon:		/* this indicates a simple stmt */
 	break_comma = false;	/* turn off flag to break after commas in a
 				 * declaration */
 	ps.p_stack[++ps.tos] = stmt;
 	ps.il[ps.tos] = ps.ind_level;
 	break;
 
     default:			/* this is an error */
 	diag2(1, "Unknown code to parser");
 	return;
 
 
     }				/* end of switch */
+
+    if (ps.tos >= STACKSIZE)
+	errx(1, "Parser stack overflow");
 
     reduce();			/* see if any reduction can be done */
 
 #ifdef debug
     for (i = 1; i <= ps.tos; ++i)
 	printf("(%d %d)", ps.p_stack[i], ps.il[i]);
     printf("\n");
 #endif
 
     return;
 }
 
 /*
  * NAME: reduce
  *
  * FUNCTION: Implements the reduce part of the parsing algorithm
  *
  * ALGORITHM: The following reductions are done.  Reductions are repeated
  *	until no more are possible.
  *
  * Old TOS		New TOS
  * <stmt> <stmt>	<stmtl>
  * <stmtl> <stmt>	<stmtl>
  * do <stmt>		"dostmt"
  * if <stmt>		"ifstmt"
  * switch <stmt>	<stmt>
  * decl <stmt>		<stmt>
  * "ifelse" <stmt>	<stmt>
  * for <stmt>		<stmt>
  * while <stmt>		<stmt>
  * "dostmt" while	<stmt>
  *
  * On each reduction, ps.i_l_follow (the indentation for the following line)
  * is set to the indentation level associated with the old TOS.
  *
  * PARAMETERS: None
  *
  * RETURNS: Nothing
  *
  * GLOBALS: ps.cstk ps.i_l_follow = ps.il ps.p_stack = ps.tos =
  *
  * CALLS: None
  *
  * CALLED BY: parse
  *
  * HISTORY: initial coding 	November 1976	D A Willcox of CAC
  *
  */
 /*----------------------------------------------*\
 |   REDUCTION PHASE				    |
 \*----------------------------------------------*/
 static void
 reduce(void)
 {
     int i;
 
     for (;;) {			/* keep looping until there is nothing left to
 				 * reduce */
 
 	switch (ps.p_stack[ps.tos]) {
 
 	case stmt:
 	    switch (ps.p_stack[ps.tos - 1]) {
 
 	    case stmt:
 	    case stmtl:
 		/* stmtl stmt or stmt stmt */
 		ps.p_stack[--ps.tos] = stmtl;
 		break;
 
 	    case dolit:	/* <do> <stmt> */
 		ps.p_stack[--ps.tos] = dohead;
 		ps.i_l_follow = ps.il[ps.tos];
 		break;
 
 	    case ifstmt:
 		/* <if> <stmt> */
 		ps.p_stack[--ps.tos] = ifhead;
 		for (i = ps.tos - 1;
 			(
 			 ps.p_stack[i] != stmt
 			 &&
 			 ps.p_stack[i] != stmtl
 			 &&
 			 ps.p_stack[i] != lbrace
 			 );
 			--i);
 		ps.i_l_follow = ps.il[i];
 		/*
 		 * for the time being, we will assume that there is no else on
 		 * this if, and set the indentation level accordingly. If an
 		 * else is scanned, it will be fixed up later
 		 */
 		break;
 
 	    case swstmt:
 		/* <switch> <stmt> */
 		case_ind = ps.cstk[ps.tos - 1];
 
 	    case decl:		/* finish of a declaration */
 	    case elsehead:
 		/* <<if> <stmt> else> <stmt> */
 	    case forstmt:
 		/* <for> <stmt> */
 	    case whilestmt:
 		/* <while> <stmt> */
 		ps.p_stack[--ps.tos] = stmt;
 		ps.i_l_follow = ps.il[ps.tos];
 		break;
 
 	    default:		/* <anything else> <stmt> */
 		return;
 
 	    }			/* end of section for <stmt> on top of stack */
 	    break;
 
 	case whilestmt:	/* while (...) on top */
 	    if (ps.p_stack[ps.tos - 1] == dohead) {
 		/* it is termination of a do while */
 		ps.tos -= 2;
 		break;
 	    }
 	    else
 		return;
 
 	default:		/* anything else on top */
 	    return;
 
 	}
     }
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/indent/pr_comment.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/indent/pr_comment.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/indent/pr_comment.c	(revision 303642)
@@ -1,429 +1,335 @@
-/*
+/*-
  * Copyright (c) 1985 Sun Microsystems, Inc.
  * Copyright (c) 1980, 1993
  *	The Regents of the University of California.  All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)pr_comment.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <err.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include "indent_globs.h"
 #include "indent.h"
 /*
  * NAME:
  *	pr_comment
  *
  * FUNCTION:
  *	This routine takes care of scanning and printing comments.
  *
  * ALGORITHM:
  *	1) Decide where the comment should be aligned, and if lines should
  *	   be broken.
  *	2) If lines should not be broken and filled, just copy up to end of
  *	   comment.
  *	3) If lines should be filled, then scan thru input_buffer copying
  *	   characters to com_buf.  Remember where the last blank, tab, or
  *	   newline was.  When line is filled, print up to last blank and
  *	   continue copying.
  *
  * HISTORY:
  *	November 1976	D A Willcox of CAC	Initial coding
  *	12/6/76		D A Willcox of CAC	Modification to handle
  *						UNIX-style comments
  *
  */
 
 /*
  * this routine processes comments.  It makes an attempt to keep comments from
  * going over the max line length.  If a line is too long, it moves everything
  * from the last blank to the next comment line.  Blanks and tabs from the
  * beginning of the input line are removed
  */
 
 void
 pr_comment(void)
 {
     int         now_col;	/* column we are in now */
     int         adj_max_col;	/* Adjusted max_col for when we decide to
 				 * spill comments over the right margin */
     char       *last_bl;	/* points to the last blank in the output
 				 * buffer */
     char       *t_ptr;		/* used for moving string */
-    int         unix_comment;	/* tri-state variable used to decide if it is
-				 * a unix-style comment. 0 means only blanks
-				 * since /+*, 1 means regular style comment, 2
-				 * means unix style comment */
     int         break_delim = comment_delimiter_on_blankline;
     int         l_just_saw_decl = ps.just_saw_decl;
-    /*
-     * int         ps.last_nl = 0;	 true iff the last significant thing
-     * weve seen is a newline
-     */
-    int         one_liner = 1;	/* true iff this comment is a one-liner */
     adj_max_col = max_col;
     ps.just_saw_decl = 0;
     last_bl = NULL;		/* no blanks found so far */
     ps.box_com = false;		/* at first, assume that we are not in
 					 * a boxed comment or some other
 					 * comment that should not be touched */
     ++ps.out_coms;		/* keep track of number of comments */
-    unix_comment = 1;		/* set flag to let us figure out if there is a
-				 * unix-style comment ** DISABLED: use 0 to
-				 * reenable this hack! */
 
     /* Figure where to align and how to treat the comment */
 
     if (ps.col_1 && !format_col1_comments) {	/* if comment starts in column
 						 * 1 it should not be touched */
 	ps.box_com = true;
+	break_delim = false;
 	ps.com_col = 1;
     }
     else {
 	if (*buf_ptr == '-' || *buf_ptr == '*' ||
 	    (*buf_ptr == '\n' && !format_block_comments)) {
 	    ps.box_com = true;	/* A comment with a '-' or '*' immediately
 				 * after the /+* is assumed to be a boxed
 				 * comment. A comment with a newline
 				 * immediately after the /+* is assumed to
 				 * be a block comment and is treated as a
 				 * box comment unless format_block_comments
 				 * is nonzero (the default). */
-	    break_delim = 0;
+	    break_delim = false;
 	}
 	if ( /* ps.bl_line && */ (s_lab == e_lab) && (s_code == e_code)) {
 	    /* klg: check only if this line is blank */
 	    /*
 	     * If this (*and previous lines are*) blank, dont put comment way
 	     * out at left
 	     */
 	    ps.com_col = (ps.ind_level - ps.unindent_displace) * ps.ind_size + 1;
 	    adj_max_col = block_comment_max_col;
 	    if (ps.com_col <= 1)
 		ps.com_col = 1 + !format_col1_comments;
 	}
 	else {
 	    int target_col;
-	    break_delim = 0;
+	    break_delim = false;
 	    if (s_code != e_code)
 		target_col = count_spaces(compute_code_target(), s_code);
 	    else {
 		target_col = 1;
 		if (s_lab != e_lab)
 		    target_col = count_spaces(compute_label_target(), s_lab);
 	    }
 	    ps.com_col = ps.decl_on_line || ps.ind_level == 0 ? ps.decl_com_ind : ps.com_ind;
 	    if (ps.com_col < target_col)
 		ps.com_col = ((target_col + 7) & ~7) + 1;
 	    if (ps.com_col + 24 > adj_max_col)
 		adj_max_col = ps.com_col + 24;
 	}
     }
     if (ps.box_com) {
 	buf_ptr[-2] = 0;
 	ps.n_comment_delta = 1 - count_spaces(1, in_buffer);
 	buf_ptr[-2] = '/';
     }
     else {
 	ps.n_comment_delta = 0;
 	while (*buf_ptr == ' ' || *buf_ptr == '\t')
 	    buf_ptr++;
     }
     ps.comment_delta = 0;
     *e_com++ = '/';		/* put '/' followed by '*' into buffer */
     *e_com++ = '*';
     if (*buf_ptr != ' ' && !ps.box_com)
 	*e_com++ = ' ';
 
-    *e_com = '\0';
-    if (troff) {
-	now_col = 1;
-	adj_max_col = 80;
+    /* Don't put a break delimiter if this comment is a one-liner */
+    for (t_ptr = buf_ptr; *t_ptr != '\0' && *t_ptr != '\n'; t_ptr++) {
+	if (t_ptr >= buf_end)
+	    fill_buffer();
+	if (t_ptr[0] == '*' && t_ptr[1] == '/') {
+	    break_delim = false;
+	    break;
+	}
     }
-    else
-	now_col = count_spaces(ps.com_col, s_com);	/* figure what column we
-							 * would be in if we
-							 * printed the comment
-							 * now */
 
+    if (break_delim) {
+	char       *t = e_com;
+	e_com = s_com + 2;
+	*e_com = 0;
+	if (blanklines_before_blockcomments)
+	    prefix_blankline_requested = 1;
+	dump_line();
+	e_com = s_com = t;
+	if (!ps.box_com && star_comment_cont)
+	    *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
+    }
+
+    if (troff)
+	adj_max_col = 80;
+
     /* Start to copy the comment */
 
     while (1) {			/* this loop will go until the comment is
 				 * copied */
-	if (*buf_ptr > 040 && *buf_ptr != '*')
-	    ps.last_nl = 0;
 	CHECK_SIZE_COM;
 	switch (*buf_ptr) {	/* this checks for various spcl cases */
 	case 014:		/* check for a form feed */
 	    if (!ps.box_com) {	/* in a text comment, break the line here */
 		ps.use_ff = true;
 		/* fix so dump_line uses a form feed */
 		dump_line();
 		last_bl = NULL;
-		*e_com++ = ' ';
-		*e_com++ = '*';
-		*e_com++ = ' ';
-		while (*++buf_ptr == ' ' || *buf_ptr == '\t');
+		if (!ps.box_com && star_comment_cont)
+		    *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
+		while (*++buf_ptr == ' ' || *buf_ptr == '\t')
+		    ;
 	    }
 	    else {
 		if (++buf_ptr >= buf_end)
 		    fill_buffer();
 		*e_com++ = 014;
 	    }
 	    break;
 
 	case '\n':
 	    if (had_eof) {	/* check for unexpected eof */
 		printf("Unterminated comment\n");
-		*e_com = '\0';
 		dump_line();
 		return;
 	    }
-	    one_liner = 0;
+	    last_bl = NULL;
 	    if (ps.box_com || ps.last_nl) {	/* if this is a boxed comment,
 						 * we dont ignore the newline */
-		if (s_com == e_com) {
+		if (s_com == e_com)
 		    *e_com++ = ' ';
-		    *e_com++ = ' ';
-		}
-		*e_com = '\0';
 		if (!ps.box_com && e_com - s_com > 3) {
-		    if (break_delim == 1 && s_com[0] == '/'
-			    && s_com[1] == '*' && s_com[2] == ' ') {
-			char       *t = e_com;
-			break_delim = 2;
-			e_com = s_com + 2;
-			*e_com = 0;
-			if (blanklines_before_blockcomments)
-			    prefix_blankline_requested = 1;
-			dump_line();
-			e_com = t;
-			s_com[0] = s_com[1] = s_com[2] = ' ';
-		    }
 		    dump_line();
-		    CHECK_SIZE_COM;
-		    *e_com++ = ' ';
-		    *e_com++ = ' ';
+		    if (star_comment_cont)
+			*e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
 		}
 		dump_line();
-		now_col = ps.com_col;
+		if (!ps.box_com && star_comment_cont)
+		    *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
 	    }
 	    else {
 		ps.last_nl = 1;
-		if (unix_comment != 1) {	/* we not are in unix_style
-						 * comment */
-		    if (unix_comment == 0 && s_code == e_code) {
-			/*
-			 * if it is a UNIX-style comment, ignore the
-			 * requirement that previous line be blank for
-			 * unindention
-			 */
-			ps.com_col = (ps.ind_level - ps.unindent_displace) * ps.ind_size + 1;
-			if (ps.com_col <= 1)
-			    ps.com_col = 2;
-		    }
-		    unix_comment = 2;	/* permanently remember that we are in
-					 * this type of comment */
-		    dump_line();
-		    ++line_no;
-		    now_col = ps.com_col;
-		    *e_com++ = ' ';
-		    /*
-		     * fix so that the star at the start of the line will line
-		     * up
-		     */
-		    do		/* flush leading white space */
-			if (++buf_ptr >= buf_end)
-			    fill_buffer();
-		    while (*buf_ptr == ' ' || *buf_ptr == '\t');
-		    break;
-		}
 		if (*(e_com - 1) == ' ' || *(e_com - 1) == '\t')
 		    last_bl = e_com - 1;
 		/*
 		 * if there was a space at the end of the last line, remember
 		 * where it was
 		 */
 		else {		/* otherwise, insert one */
 		    last_bl = e_com;
 		    CHECK_SIZE_COM;
 		    *e_com++ = ' ';
-		    ++now_col;
 		}
 	    }
 	    ++line_no;		/* keep track of input line number */
 	    if (!ps.box_com) {
 		int         nstar = 1;
 		do {		/* flush any blanks and/or tabs at start of
 				 * next line */
 		    if (++buf_ptr >= buf_end)
 			fill_buffer();
 		    if (*buf_ptr == '*' && --nstar >= 0) {
 			if (++buf_ptr >= buf_end)
 			    fill_buffer();
 			if (*buf_ptr == '/')
 			    goto end_of_comment;
 		    }
 		} while (*buf_ptr == ' ' || *buf_ptr == '\t');
 	    }
 	    else if (++buf_ptr >= buf_end)
 		fill_buffer();
 	    break;		/* end of case for newline */
 
 	case '*':		/* must check for possibility of being at end
 				 * of comment */
 	    if (++buf_ptr >= buf_end)	/* get to next char after * */
 		fill_buffer();
 
-	    if (unix_comment == 0)	/* set flag to show we are not in
-					 * unix-style comment */
-		unix_comment = 1;
-
 	    if (*buf_ptr == '/') {	/* it is the end!!! */
 	end_of_comment:
 		if (++buf_ptr >= buf_end)
 		    fill_buffer();
-
-		if (*(e_com - 1) != ' ' && !ps.box_com) {	/* insure blank before
-								 * end */
+		CHECK_SIZE_COM;
+		if (break_delim) {
+		    if (e_com > s_com + 3) {
+			dump_line();
+		    }
+		    else
+			s_com = e_com;
 		    *e_com++ = ' ';
-		    ++now_col;
 		}
-		if (break_delim == 1 && !one_liner && s_com[0] == '/'
-			&& s_com[1] == '*' && s_com[2] == ' ') {
-		    char       *t = e_com;
-		    break_delim = 2;
-		    e_com = s_com + 2;
-		    *e_com = 0;
-		    if (blanklines_before_blockcomments)
-			prefix_blankline_requested = 1;
-		    dump_line();
-		    e_com = t;
-		    s_com[0] = s_com[1] = s_com[2] = ' ';
-		}
-		if (break_delim == 2 && e_com > s_com + 3
-			 /* now_col > adj_max_col - 2 && !ps.box_com */ ) {
-		    *e_com = '\0';
-		    dump_line();
-		    now_col = ps.com_col;
-		}
-		CHECK_SIZE_COM;
-		*e_com++ = '*';
-		*e_com++ = '/';
-		*e_com = '\0';
+		if (e_com[-1] != ' ' && !ps.box_com)
+		    *e_com++ = ' ';	/* ensure blank before end */
+		*e_com++ = '*', *e_com++ = '/', *e_com = '\0';
 		ps.just_saw_decl = l_just_saw_decl;
 		return;
 	    }
-	    else {		/* handle isolated '*' */
+	    else		/* handle isolated '*' */
 		*e_com++ = '*';
-		++now_col;
-	    }
 	    break;
 	default:		/* we have a random char */
-	    if (unix_comment == 0 && *buf_ptr != ' ' && *buf_ptr != '\t')
-		unix_comment = 1;	/* we are not in unix-style comment */
-
-	    *e_com = *buf_ptr++;
-	    if (buf_ptr >= buf_end)
-		fill_buffer();
-
-	    if (*e_com == '\t')	/* keep track of column */
-		now_col = ((now_col - 1) & tabmask) + tabsize + 1;
-	    else if (*e_com == '\b')	/* this is a backspace */
-		--now_col;
-	    else
-		++now_col;
-
-	    if (*e_com == ' ' || *e_com == '\t')
-		last_bl = e_com;
-	    /* remember we saw a blank */
-
-	    ++e_com;
-	    if (now_col > adj_max_col && !ps.box_com && unix_comment == 1 && e_com[-1] > ' ') {
+	    now_col = count_spaces_until(ps.com_col, s_com, e_com);
+	    do {
+		*e_com = *buf_ptr++;
+		if (buf_ptr >= buf_end)
+		    fill_buffer();
+		if (*e_com == ' ' || *e_com == '\t')
+		    last_bl = e_com;	/* remember we saw a blank */
+		++e_com;
+		now_col++;
+	    } while (!memchr("*\n\r\b\t", *buf_ptr, 6) &&
+		(now_col <= adj_max_col || !last_bl));
+	    ps.last_nl = false;
+	    if (now_col > adj_max_col && !ps.box_com && e_com[-1] > ' ') {
 		/*
 		 * the comment is too long, it must be broken up
 		 */
-		if (break_delim == 1 && s_com[0] == '/'
-			&& s_com[1] == '*' && s_com[2] == ' ') {
-		    char       *t = e_com;
-		    break_delim = 2;
-		    e_com = s_com + 2;
-		    *e_com = 0;
-		    if (blanklines_before_blockcomments)
-			prefix_blankline_requested = 1;
+		if (last_bl == NULL) {
 		    dump_line();
-		    e_com = t;
-		    s_com[0] = s_com[1] = s_com[2] = ' ';
+		    if (!ps.box_com && star_comment_cont)
+			*e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
+		    break;
 		}
-		if (last_bl == NULL) {	/* we have seen no blanks */
-		    last_bl = e_com;	/* fake it */
-		    *e_com++ = ' ';
-		}
-		*e_com = '\0';	/* print what we have */
-		*last_bl = '\0';
-		while (last_bl > s_com && last_bl[-1] < 040)
-		    *--last_bl = 0;
+		*e_com = '\0';
 		e_com = last_bl;
 		dump_line();
-
-		*e_com++ = ' ';	/* add blanks for continuation */
-		*e_com++ = ' ';
-		*e_com++ = ' ';
-
-		t_ptr = last_bl + 1;
+		if (!ps.box_com && star_comment_cont)
+		    *e_com++ = ' ', *e_com++ = '*', *e_com++ = ' ';
+		for (t_ptr = last_bl + 1; *t_ptr == ' ' || *t_ptr == '\t';
+		    t_ptr++)
+			;
 		last_bl = NULL;
-		if (t_ptr >= e_com) {
-		    while (*t_ptr == ' ' || *t_ptr == '\t')
-			t_ptr++;
-		    while (*t_ptr != '\0') {	/* move unprinted part of
-						 * comment down in buffer */
-			if (*t_ptr == ' ' || *t_ptr == '\t')
-			    last_bl = e_com;
-			*e_com++ = *t_ptr++;
-		    }
-		}
-		*e_com = '\0';
-		now_col = count_spaces(ps.com_col, s_com);	/* recompute current
-								 * position */
+		while (*t_ptr != '\0') {
+		    if (*t_ptr == ' ' || *t_ptr == '\t')
+			last_bl = e_com;
+		    *e_com++ = *t_ptr++;
+ 		}
 	    }
 	    break;
 	}
     }
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/locale/locale.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/locale/locale.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/locale/locale.c	(revision 303642)
@@ -1,690 +1,690 @@
 /*-
  * Copyright (c) 2002, 2003 Alexey Zelkin <phantom@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * XXX: implement missing era_* (LC_TIME) keywords (require libc &
  *	nl_langinfo(3) extensions)
  *
  * XXX: correctly handle reserved 'charmap' keyword and '-m' option (require
  *	localedef(1) implementation).  Currently it's handled via
  *	nl_langinfo(CODESET).
  */
 
 #include <sys/param.h>
 #include <sys/types.h>
 
 #include <dirent.h>
 #include <err.h>
 #include <locale.h>
 #include <langinfo.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stringlist.h>
 #include <unistd.h>
 #include "setlocale.h"
 
 /* Local prototypes */
 void	init_locales_list(void);
 void	list_charmaps(void);
 void	list_locales(void);
 const char *lookup_localecat(int);
 char	*kwval_lconv(int);
 int	kwval_lookup(char *, char **, int *, int *);
 void	showdetails(char *);
 void	showkeywordslist(char *substring);
 void	showlocale(void);
 void	usage(void);
 
 /* Global variables */
 static StringList *locales = NULL;
 
 int	all_locales = 0;
 int	all_charmaps = 0;
 int	prt_categories = 0;
 int	prt_keywords = 0;
 int	more_params = 0;
 
 struct _lcinfo {
 	const char	*name;
 	int		id;
 } lcinfo [] = {
 	{ "LC_CTYPE",		LC_CTYPE },
 	{ "LC_COLLATE",		LC_COLLATE },
 	{ "LC_TIME",		LC_TIME },
 	{ "LC_NUMERIC",		LC_NUMERIC },
 	{ "LC_MONETARY",	LC_MONETARY },
 	{ "LC_MESSAGES",	LC_MESSAGES }
 };
 #define	NLCINFO nitems(lcinfo)
 
 /* ids for values not referenced by nl_langinfo() */
 #define	KW_ZERO			10000
 #define	KW_GROUPING		(KW_ZERO+1)
 #define	KW_INT_CURR_SYMBOL	(KW_ZERO+2)
 #define	KW_CURRENCY_SYMBOL	(KW_ZERO+3)
 #define	KW_MON_DECIMAL_POINT	(KW_ZERO+4)
 #define	KW_MON_THOUSANDS_SEP	(KW_ZERO+5)
 #define	KW_MON_GROUPING		(KW_ZERO+6)
 #define	KW_POSITIVE_SIGN	(KW_ZERO+7)
 #define	KW_NEGATIVE_SIGN	(KW_ZERO+8)
 #define	KW_INT_FRAC_DIGITS	(KW_ZERO+9)
 #define	KW_FRAC_DIGITS		(KW_ZERO+10)
 #define	KW_P_CS_PRECEDES	(KW_ZERO+11)
 #define	KW_P_SEP_BY_SPACE	(KW_ZERO+12)
 #define	KW_N_CS_PRECEDES	(KW_ZERO+13)
 #define	KW_N_SEP_BY_SPACE	(KW_ZERO+14)
 #define	KW_P_SIGN_POSN		(KW_ZERO+15)
 #define	KW_N_SIGN_POSN		(KW_ZERO+16)
 #define	KW_INT_P_CS_PRECEDES	(KW_ZERO+17)
 #define	KW_INT_P_SEP_BY_SPACE	(KW_ZERO+18)
 #define	KW_INT_N_CS_PRECEDES	(KW_ZERO+19)
 #define	KW_INT_N_SEP_BY_SPACE	(KW_ZERO+20)
 #define	KW_INT_P_SIGN_POSN	(KW_ZERO+21)
 #define	KW_INT_N_SIGN_POSN	(KW_ZERO+22)
 
 struct _kwinfo {
 	const char	*name;
 	int		isstr;		/* true - string, false - number */
 	int		catid;		/* LC_* */
 	int		value_ref;
 	const char	*comment;
 } kwinfo [] = {
 	{ "charmap",		1, LC_CTYPE,	CODESET, "" },	/* hack */
 
 	{ "decimal_point",	1, LC_NUMERIC,	RADIXCHAR, "" },
 	{ "thousands_sep",	1, LC_NUMERIC,	THOUSEP, "" },
 	{ "grouping",		1, LC_NUMERIC,	KW_GROUPING, "" },
 	{ "radixchar",		1, LC_NUMERIC,	RADIXCHAR,
 	  "Same as decimal_point (FreeBSD only)" },		/* compat */
 	{ "thousep",		1, LC_NUMERIC,	THOUSEP,
 	  "Same as thousands_sep (FreeBSD only)" },		/* compat */
 
 	{ "int_curr_symbol",	1, LC_MONETARY,	KW_INT_CURR_SYMBOL, "" },
 	{ "currency_symbol",	1, LC_MONETARY,	KW_CURRENCY_SYMBOL, "" },
 	{ "mon_decimal_point",	1, LC_MONETARY,	KW_MON_DECIMAL_POINT, "" },
 	{ "mon_thousands_sep",	1, LC_MONETARY,	KW_MON_THOUSANDS_SEP, "" },
 	{ "mon_grouping",	1, LC_MONETARY,	KW_MON_GROUPING, "" },
 	{ "positive_sign",	1, LC_MONETARY,	KW_POSITIVE_SIGN, "" },
 	{ "negative_sign",	1, LC_MONETARY,	KW_NEGATIVE_SIGN, "" },
 
 	{ "int_frac_digits",	0, LC_MONETARY,	KW_INT_FRAC_DIGITS, "" },
 	{ "frac_digits",	0, LC_MONETARY,	KW_FRAC_DIGITS, "" },
 	{ "p_cs_precedes",	0, LC_MONETARY,	KW_P_CS_PRECEDES, "" },
 	{ "p_sep_by_space",	0, LC_MONETARY,	KW_P_SEP_BY_SPACE, "" },
 	{ "n_cs_precedes",	0, LC_MONETARY,	KW_N_CS_PRECEDES, "" },
 	{ "n_sep_by_space",	0, LC_MONETARY,	KW_N_SEP_BY_SPACE, "" },
 	{ "p_sign_posn",	0, LC_MONETARY,	KW_P_SIGN_POSN, "" },
 	{ "n_sign_posn",	0, LC_MONETARY,	KW_N_SIGN_POSN, "" },
 	{ "int_p_cs_precedes",	0, LC_MONETARY,	KW_INT_P_CS_PRECEDES, "" },
 	{ "int_p_sep_by_space",	0, LC_MONETARY,	KW_INT_P_SEP_BY_SPACE, "" },
 	{ "int_n_cs_precedes",	0, LC_MONETARY,	KW_INT_N_CS_PRECEDES, "" },
 	{ "int_n_sep_by_space",	0, LC_MONETARY,	KW_INT_N_SEP_BY_SPACE, "" },
 	{ "int_p_sign_posn",	0, LC_MONETARY,	KW_INT_P_SIGN_POSN, "" },
 	{ "int_n_sign_posn",	0, LC_MONETARY,	KW_INT_N_SIGN_POSN, "" },
 
 	{ "d_t_fmt",		1, LC_TIME,	D_T_FMT, "" },
 	{ "d_fmt",		1, LC_TIME,	D_FMT, "" },
 	{ "t_fmt",		1, LC_TIME,	T_FMT, "" },
 	{ "am_str",		1, LC_TIME,	AM_STR, "" },
 	{ "pm_str",		1, LC_TIME,	PM_STR, "" },
 	{ "t_fmt_ampm",		1, LC_TIME,	T_FMT_AMPM, "" },
 	{ "day_1",		1, LC_TIME,	DAY_1, "" },
 	{ "day_2",		1, LC_TIME,	DAY_2, "" },
 	{ "day_3",		1, LC_TIME,	DAY_3, "" },
 	{ "day_4",		1, LC_TIME,	DAY_4, "" },
 	{ "day_5",		1, LC_TIME,	DAY_5, "" },
 	{ "day_6",		1, LC_TIME,	DAY_6, "" },
 	{ "day_7",		1, LC_TIME,	DAY_7, "" },
 	{ "abday_1",		1, LC_TIME,	ABDAY_1, "" },
 	{ "abday_2",		1, LC_TIME,	ABDAY_2, "" },
 	{ "abday_3",		1, LC_TIME,	ABDAY_3, "" },
 	{ "abday_4",		1, LC_TIME,	ABDAY_4, "" },
 	{ "abday_5",		1, LC_TIME,	ABDAY_5, "" },
 	{ "abday_6",		1, LC_TIME,	ABDAY_6, "" },
 	{ "abday_7",		1, LC_TIME,	ABDAY_7, "" },
 	{ "mon_1",		1, LC_TIME,	MON_1, "" },
 	{ "mon_2",		1, LC_TIME,	MON_2, "" },
 	{ "mon_3",		1, LC_TIME,	MON_3, "" },
 	{ "mon_4",		1, LC_TIME,	MON_4, "" },
 	{ "mon_5",		1, LC_TIME,	MON_5, "" },
 	{ "mon_6",		1, LC_TIME,	MON_6, "" },
 	{ "mon_7",		1, LC_TIME,	MON_7, "" },
 	{ "mon_8",		1, LC_TIME,	MON_8, "" },
 	{ "mon_9",		1, LC_TIME,	MON_9, "" },
 	{ "mon_10",		1, LC_TIME,	MON_10, "" },
 	{ "mon_11",		1, LC_TIME,	MON_11, "" },
 	{ "mon_12",		1, LC_TIME,	MON_12, "" },
 	{ "abmon_1",		1, LC_TIME,	ABMON_1, "" },
 	{ "abmon_2",		1, LC_TIME,	ABMON_2, "" },
 	{ "abmon_3",		1, LC_TIME,	ABMON_3, "" },
 	{ "abmon_4",		1, LC_TIME,	ABMON_4, "" },
 	{ "abmon_5",		1, LC_TIME,	ABMON_5, "" },
 	{ "abmon_6",		1, LC_TIME,	ABMON_6, "" },
 	{ "abmon_7",		1, LC_TIME,	ABMON_7, "" },
 	{ "abmon_8",		1, LC_TIME,	ABMON_8, "" },
 	{ "abmon_9",		1, LC_TIME,	ABMON_9, "" },
 	{ "abmon_10",		1, LC_TIME,	ABMON_10, "" },
 	{ "abmon_11",		1, LC_TIME,	ABMON_11, "" },
 	{ "abmon_12",		1, LC_TIME,	ABMON_12, "" },
 	{ "altmon_1",		1, LC_TIME,	ALTMON_1, "(FreeBSD only)" },
 	{ "altmon_2",		1, LC_TIME,	ALTMON_2, "(FreeBSD only)" },
 	{ "altmon_3",		1, LC_TIME,	ALTMON_3, "(FreeBSD only)" },
 	{ "altmon_4",		1, LC_TIME,	ALTMON_4, "(FreeBSD only)" },
 	{ "altmon_5",		1, LC_TIME,	ALTMON_5, "(FreeBSD only)" },
 	{ "altmon_6",		1, LC_TIME,	ALTMON_6, "(FreeBSD only)" },
 	{ "altmon_7",		1, LC_TIME,	ALTMON_7, "(FreeBSD only)" },
 	{ "altmon_8",		1, LC_TIME,	ALTMON_8, "(FreeBSD only)" },
 	{ "altmon_9",		1, LC_TIME,	ALTMON_9, "(FreeBSD only)" },
 	{ "altmon_10",		1, LC_TIME,	ALTMON_10, "(FreeBSD only)" },
 	{ "altmon_11",		1, LC_TIME,	ALTMON_11, "(FreeBSD only)" },
 	{ "altmon_12",		1, LC_TIME,	ALTMON_12, "(FreeBSD only)" },
 	{ "era",		1, LC_TIME,	ERA, "(unavailable)" },
 	{ "era_d_fmt",		1, LC_TIME,	ERA_D_FMT, "(unavailable)" },
 	{ "era_d_t_fmt",	1, LC_TIME,	ERA_D_T_FMT, "(unavailable)" },
 	{ "era_t_fmt",		1, LC_TIME,	ERA_T_FMT, "(unavailable)" },
 	{ "alt_digits",		1, LC_TIME,	ALT_DIGITS, "" },
 	{ "d_md_order",		1, LC_TIME,	D_MD_ORDER,
 	  "(FreeBSD only)"				},	/* local */
 
 	{ "yesexpr",		1, LC_MESSAGES, YESEXPR, "" },
 	{ "noexpr",		1, LC_MESSAGES, NOEXPR, "" },
 	{ "yesstr",		1, LC_MESSAGES, YESSTR,
 	  "(POSIX legacy)" },					/* compat */
 	{ "nostr",		1, LC_MESSAGES, NOSTR,
 	  "(POSIX legacy)" }					/* compat */
 
 };
-#define	NKWINFO (sizeof(kwinfo)/sizeof(kwinfo[0]))
+#define	NKWINFO (nitems(kwinfo))
 
 const char *boguslocales[] = { "UTF-8" };
-#define	NBOGUS	(sizeof(boguslocales)/sizeof(boguslocales[0]))
+#define	NBOGUS	(nitems(boguslocales))
 
 int
 main(int argc, char *argv[])
 {
 	int	ch;
 	int	tmp;
 
 	while ((ch = getopt(argc, argv, "ackms:")) != -1) {
 		switch (ch) {
 		case 'a':
 			all_locales = 1;
 			break;
 		case 'c':
 			prt_categories = 1;
 			break;
 		case 'k':
 			prt_keywords = 1;
 			break;
 		case 'm':
 			all_charmaps = 1;
 			break;
 		default:
 			usage();
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	/* validate arguments */
 	if (all_locales && all_charmaps)
 		usage();
 	if ((all_locales || all_charmaps) && argc > 0)
 		usage();
 	if ((all_locales || all_charmaps) && (prt_categories || prt_keywords))
 		usage();
 
 	/* process '-a' */
 	if (all_locales) {
 		list_locales();
 		exit(0);
 	}
 
 	/* process '-m' */
 	if (all_charmaps) {
 		list_charmaps();
 		exit(0);
 	}
 
 	/* check for special case '-k list' */
 	tmp = 0;
 	if (prt_keywords && argc > 0)
 		while (tmp < argc)
 			if (strcasecmp(argv[tmp++], "list") == 0) {
 				showkeywordslist(argv[tmp]);
 				exit(0);
 			}
 
 	/* process '-c', '-k', or command line arguments. */
 	if (prt_categories || prt_keywords || argc > 0) {
 		if (argc > 0) {
 			setlocale(LC_ALL, "");
 			while (argc > 0) {
 				showdetails(*argv);
 				argv++;
 				argc--;
 			}
 		} else {
 			uint i;
 			for (i = 0; i < nitems(kwinfo); i++)
 				showdetails ((char *)kwinfo [i].name);
 		}
 		exit(0);
 	}
 
 	/* no arguments, show current locale state */
 	showlocale();
 
 	return (0);
 }
 
 void
 usage(void)
 {
 	printf("Usage: locale [ -a | -m ]\n"
 	       "       locale -k list [prefix]\n"
 	       "       locale [ -ck ] [keyword ...]\n");
 	exit(1);
 }
 
 /*
  * Output information about all available locales
  *
  * XXX actually output of this function does not guarantee that locale
  *     is really available to application, since it can be broken or
  *     inconsistent thus setlocale() will fail.  Maybe add '-V' function to
  *     also validate these locales?
  */
 void
 list_locales(void)
 {
 	size_t i;
 
 	init_locales_list();
 	for (i = 0; i < locales->sl_cur; i++) {
 		printf("%s\n", locales->sl_str[i]);
 	}
 }
 
 /*
  * qsort() helper function
  */
 static int
 scmp(const void *s1, const void *s2)
 {
 	return strcmp(*(const char **)s1, *(const char **)s2);
 }
 
 /*
  * Output information about all available charmaps
  *
  * XXX this function is doing a task in hackish way, i.e. by scaning
  *     list of locales, spliting their codeset part and building list of
  *     them.
  */
 void
 list_charmaps(void)
 {
 	size_t i;
 	char *s, *cs;
 	StringList *charmaps;
 
 	/* initialize StringList */
 	charmaps = sl_init();
 	if (charmaps == NULL)
 		err(1, "could not allocate memory");
 
 	/* fetch locales list */
 	init_locales_list();
 
 	/* split codesets and build their list */
 	for (i = 0; i < locales->sl_cur; i++) {
 		s = locales->sl_str[i];
 		if ((cs = strchr(s, '.')) != NULL) {
 			cs++;
 			if (sl_find(charmaps, cs) == NULL)
 				sl_add(charmaps, cs);
 		}
 	}
 
 	/* add US-ASCII, if not yet added */
 	if (sl_find(charmaps, "US-ASCII") == NULL)
 		sl_add(charmaps, "US-ASCII");
 
 	/* sort the list */
 	qsort(charmaps->sl_str, charmaps->sl_cur, sizeof(char *), scmp);
 
 	/* print results */
 	for (i = 0; i < charmaps->sl_cur; i++) {
 		printf("%s\n", charmaps->sl_str[i]);
 	}
 }
 
 /*
  * Retrieve sorted list of system locales (or user locales, if PATH_LOCALE
  * environment variable is set)
  */
 void
 init_locales_list(void)
 {
 	DIR *dirp;
 	struct dirent *dp;
 	size_t i;
 	int bogus;
 
 	/* why call this function twice ? */
 	if (locales != NULL)
 		return;
 
 	/* initialize StringList */
 	locales = sl_init();
 	if (locales == NULL)
 		err(1, "could not allocate memory");
 
 	/* get actual locales directory name */
 	if (__detect_path_locale() != 0)
 		err(1, "unable to find locales storage");
 
 	/* open locales directory */
 	dirp = opendir(_PathLocale);
 	if (dirp == NULL)
 		err(1, "could not open directory '%s'", _PathLocale);
 
 	/* scan directory and store its contents except "." and ".." */
 	while ((dp = readdir(dirp)) != NULL) {
 		if (*(dp->d_name) == '.')
 			continue;		/* exclude "." and ".." */
 		for (bogus = i = 0; i < NBOGUS; i++)
 			if (strncmp(dp->d_name, boguslocales[i],
 			    strlen(boguslocales[i])) == 0)
 				bogus = 1;
 		if (!bogus)
 			sl_add(locales, strdup(dp->d_name));
 	}
 	closedir(dirp);
 
 	/* make sure that 'POSIX' and 'C' locales are present in the list.
 	 * POSIX 1003.1-2001 requires presence of 'POSIX' name only here, but
 	 * we also list 'C' for constistency
 	 */
 	if (sl_find(locales, "POSIX") == NULL)
 		sl_add(locales, "POSIX");
 
 	if (sl_find(locales, "C") == NULL)
 		sl_add(locales, "C");
 
 	/* make output nicer, sort the list */
 	qsort(locales->sl_str, locales->sl_cur, sizeof(char *), scmp);
 }
 
 /*
  * Show current locale status, depending on environment variables
  */
 void
 showlocale(void)
 {
 	size_t	i;
 	const char *lang, *vval, *eval;
 
 	setlocale(LC_ALL, "");
 
 	lang = getenv("LANG");
 	if (lang == NULL) {
 		lang = "";
 	}
 	printf("LANG=%s\n", lang);
 	/* XXX: if LANG is null, then set it to "C" to get implied values? */
 
 	for (i = 0; i < NLCINFO; i++) {
 		vval = setlocale(lcinfo[i].id, NULL);
 		eval = getenv(lcinfo[i].name);
 		if (eval != NULL && !strcmp(eval, vval)
 				&& strcmp(lang, vval)) {
 			/*
 			 * Appropriate environment variable set, its value
 			 * is valid and not overridden by LC_ALL
 			 *
 			 * XXX: possible side effect: if both LANG and
 			 * overridden environment variable are set into same
 			 * value, then it'll be assumed as 'implied'
 			 */
 			printf("%s=%s\n", lcinfo[i].name, vval);
 		} else {
 			printf("%s=\"%s\"\n", lcinfo[i].name, vval);
 		}
 	}
 
 	vval = getenv("LC_ALL");
 	if (vval == NULL) {
 		vval = "";
 	}
 	printf("LC_ALL=%s\n", vval);
 }
 
 /*
  * keyword value lookup helper (via localeconv())
  */
 char *
 kwval_lconv(int id)
 {
 	struct lconv *lc;
 	char *rval;
 
 	rval = NULL;
 	lc = localeconv();
 	switch (id) {
 		case KW_GROUPING:
 			rval = lc->grouping;
 			break;
 		case KW_INT_CURR_SYMBOL:
 			rval = lc->int_curr_symbol;
 			break;
 		case KW_CURRENCY_SYMBOL:
 			rval = lc->currency_symbol;
 			break;
 		case KW_MON_DECIMAL_POINT:
 			rval = lc->mon_decimal_point;
 			break;
 		case KW_MON_THOUSANDS_SEP:
 			rval = lc->mon_thousands_sep;
 			break;
 		case KW_MON_GROUPING:
 			rval = lc->mon_grouping;
 			break;
 		case KW_POSITIVE_SIGN:
 			rval = lc->positive_sign;
 			break;
 		case KW_NEGATIVE_SIGN:
 			rval = lc->negative_sign;
 			break;
 		case KW_INT_FRAC_DIGITS:
 			rval = &(lc->int_frac_digits);
 			break;
 		case KW_FRAC_DIGITS:
 			rval = &(lc->frac_digits);
 			break;
 		case KW_P_CS_PRECEDES:
 			rval = &(lc->p_cs_precedes);
 			break;
 		case KW_P_SEP_BY_SPACE:
 			rval = &(lc->p_sep_by_space);
 			break;
 		case KW_N_CS_PRECEDES:
 			rval = &(lc->n_cs_precedes);
 			break;
 		case KW_N_SEP_BY_SPACE:
 			rval = &(lc->n_sep_by_space);
 			break;
 		case KW_P_SIGN_POSN:
 			rval = &(lc->p_sign_posn);
 			break;
 		case KW_N_SIGN_POSN:
 			rval = &(lc->n_sign_posn);
 			break;
 		case KW_INT_P_CS_PRECEDES:
 			rval = &(lc->int_p_cs_precedes);
 			break;
 		case KW_INT_P_SEP_BY_SPACE:
 			rval = &(lc->int_p_sep_by_space);
 			break;
 		case KW_INT_N_CS_PRECEDES:
 			rval = &(lc->int_n_cs_precedes);
 			break;
 		case KW_INT_N_SEP_BY_SPACE:
 			rval = &(lc->int_n_sep_by_space);
 			break;
 		case KW_INT_P_SIGN_POSN:
 			rval = &(lc->int_p_sign_posn);
 			break;
 		case KW_INT_N_SIGN_POSN:
 			rval = &(lc->int_n_sign_posn);
 			break;
 		default:
 			break;
 	}
 	return (rval);
 }
 
 /*
  * keyword value and properties lookup
  */
 int
 kwval_lookup(char *kwname, char **kwval, int *cat, int *isstr)
 {
 	int	rval;
 	size_t	i;
 
 	rval = 0;
 	for (i = 0; i < NKWINFO; i++) {
 		if (strcasecmp(kwname, kwinfo[i].name) == 0) {
 			rval = 1;
 			*cat = kwinfo[i].catid;
 			*isstr = kwinfo[i].isstr;
 			if (kwinfo[i].value_ref < KW_ZERO) {
 				*kwval = nl_langinfo(kwinfo[i].value_ref);
 			} else {
 				*kwval = kwval_lconv(kwinfo[i].value_ref);
 			}
 			break;
 		}
 	}
 
 	return (rval);
 }
 
 /*
  * Show details about requested keyword according to '-k' and/or '-c'
  * command line options specified.
  */
 void
 showdetails(char *kw)
 {
 	int	isstr, cat, tmpval;
 	char	*kwval;
 
 	if (kwval_lookup(kw, &kwval, &cat, &isstr) == 0) {
 		/*
 		 * invalid keyword specified.
 		 * XXX: any actions?
 		 */
 		fprintf(stderr, "Unknown keyword: `%s'\n", kw);
 		return;
 	}
 
 	if (prt_categories) {
 		  if (prt_keywords)
 			printf("%-20s ", lookup_localecat(cat));
 		  else
 			printf("%-20s\t%s\n", kw, lookup_localecat(cat));
 	}
 
 	if (prt_keywords) {
 		if (isstr) {
 			printf("%s=\"%s\"\n", kw, kwval);
 		} else {
 			tmpval = (char) *kwval;
 			printf("%s=%d\n", kw, tmpval);
 		}
 	}
 
 	if (!prt_categories && !prt_keywords) {
 		if (isstr) {
 			printf("%s\n", kwval);
 		} else {
 			tmpval = (char) *kwval;
 			printf("%d\n", tmpval);
 		}
 	}
 }
 
 /*
  * Convert locale category id into string
  */
 const char *
 lookup_localecat(int cat)
 {
 	size_t	i;
 
 	for (i = 0; i < NLCINFO; i++)
 		if (lcinfo[i].id == cat) {
 			return (lcinfo[i].name);
 		}
 	return ("UNKNOWN");
 }
 
 /*
  * Show list of keywords
  */
 void
 showkeywordslist(char *substring)
 {
 	size_t	i;
 
 #define	FMT "%-20s %-12s %-7s %-20s\n"
 
 	if (substring == NULL)
 		printf("List of available keywords\n\n");
 	else
 		printf("List of available keywords starting with '%s'\n\n",
 		    substring);
 	printf(FMT, "Keyword", "Category", "Type", "Comment");
 	printf("-------------------- ------------ ------- --------------------\n");
 	for (i = 0; i < NKWINFO; i++) {
 		if (substring != NULL) {
 			if (strncmp(kwinfo[i].name, substring,
 			    strlen(substring)) != 0)
 				continue;
 		}
 		printf(FMT,
 			kwinfo[i].name,
 			lookup_localecat(kwinfo[i].catid),
 			(kwinfo[i].isstr == 0) ? "number" : "string",
 			kwinfo[i].comment);
 	}
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/mandoc/Makefile
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/mandoc/Makefile	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/mandoc/Makefile	(revision 303642)
@@ -1,91 +1,90 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 MDOCMLDIR=	${.CURDIR}/../../contrib/mdocml
 .PATH: ${MDOCMLDIR}
 
 PROG=	mandoc
 MAN=	mandoc.1 eqn.7 mandoc_char.7 tbl.7 man.7 mdoc.7 # roff.7
 MLINKS=	mandoc.1 mdocml.1
 .if ${MK_MANDOCDB} != no && ${MK_MAN_UTILS} != no
 MAN+=	apropos.1 makewhatis.8
 MLINKS+=	apropos.1 whatis.1
 LINKS=	${BINDIR}/mandoc ${BINDIR}/whatis \
 	${BINDIR}/mandoc ${BINDIR}/makewhatis \
 	${BINDIR}/mandoc ${BINDIR}/apropos
 .endif
 
 LIBMAN_SRCS=	man.c \
 		man_hash.c \
 		man_macro.c \
 		man_validate.c
 
 LIBMDOC_SRCS=	att.c \
 		lib.c \
 		mdoc.c \
 		mdoc_argv.c \
 		mdoc_hash.c \
 		mdoc_macro.c \
 		mdoc_state.c \
 		mdoc_validate.c \
 		st.c \
 
 LIBROFF_SRCS=	eqn.c \
 		roff.c \
 		tbl.c \
 		tbl_data.c \
 		tbl_layout.c \
 		tbl_opts.c \
 
 LIB_SRCS=	${LIBMAN_SRCS} \
 		${LIBMDOC_SRCS} \
 		${LIBROFF_SRCS} \
 		chars.c \
 		mandoc.c \
 		mandoc_aux.c \
 		mandoc_ohash.c \
 		msec.c \
 		preconv.c \
 		read.c
 
 HTML_SRCS=	eqn_html.c \
 		html.c \
 		man_html.c \
 		mdoc_html.c \
 		tbl_html.c
 
 MAN_SRCS=	mdoc_man.c
 
 TERM_SRCS=	eqn_term.c \
 		man_term.c \
 		mdoc_term.c \
 		term.c \
 		term_ascii.c \
 		term_ps.c \
 		tbl_term.c
 
 DB_SRCS=	mandocdb.c \
 		mansearch.c \
 		mansearch_const.c \
 		tag.c \
 		manpath.c
 
 SRCS=		${LIB_SRCS} \
 		${HTML_SRCS} \
 		${MAN_SRCS} \
 		${TERM_SRCS} \
 		main.c \
 		out.c \
 		tree.c
 
 SRCS+=	${DB_SRCS}
 
 WARNS?=	2
 CFLAGS+= -DHAVE_CONFIG_H \
-	 -D_WITH_GETLINE \
 	 -I${.CURDIR}/../../lib/libopenbsd/ \
 	 -I${.CURDIR}/../../contrib/sqlite3
 LIBADD=	openbsd sqlite3 z
 
 .include <bsd.prog.mk>
Index: user/alc/PQ_LAUNDRY/usr.bin/netstat/pfkey.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/netstat/pfkey.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/netstat/pfkey.c	(revision 303642)
@@ -1,208 +1,207 @@
 /*	$NetBSD: inet.c,v 1.35.2.1 1999/04/29 14:57:08 perry Exp $	*/
 /*	$KAME: ipsec.c,v 1.25 2001/03/12 09:04:39 itojun Exp $	*/
 /*-
  * Copyright (C) 1995, 1996, 1997, 1998, and 1999 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*-
  * Copyright (c) 1983, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)inet.c	8.5 (Berkeley) 5/24/95";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 
 #include <netinet/in.h>
 
 #ifdef IPSEC
 #include <netipsec/keysock.h>
 #endif
 
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 #include <stdbool.h>
 #include <libxo/xo.h>
 #include "netstat.h"
 
 #ifdef IPSEC
 
 static const char *pfkey_msgtypenames[] = {
 	"reserved", "getspi", "update", "add", "delete",
 	"get", "acquire", "register", "expire", "flush",
 	"dump", "x_promisc", "x_pchange", "x_spdupdate", "x_spdadd",
 	"x_spddelete", "x_spdget", "x_spdacquire", "x_spddump", "x_spdflush",
 	"x_spdsetidx", "x_spdexpire", "x_spddelete2"
 };
 
 static const char *pfkey_msgtype_names (int);
 
 
 static const char *
 pfkey_msgtype_names(int x)
 {
-	const int max =
-	    sizeof(pfkey_msgtypenames)/sizeof(pfkey_msgtypenames[0]);
+	const int max = nitems(pfkey_msgtypenames);
 	static char buf[20];
 
 	if (x < max && pfkey_msgtypenames[x])
 		return pfkey_msgtypenames[x];
 	snprintf(buf, sizeof(buf), "#%d", x);
 	return buf;
 }
 
 void
 pfkey_stats(u_long off, const char *name, int family __unused,
     int proto __unused)
 {
 	struct pfkeystat pfkeystat;
 	unsigned first, type;
 
 	if (off == 0)
 		return;
 	xo_emit("{T:/%s}:\n", name);
 	xo_open_container(name);
 	kread_counters(off, (char *)&pfkeystat, sizeof(pfkeystat));
 
 #define	p(f, m) if (pfkeystat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t)pfkeystat.f, plural(pfkeystat.f))
 
 	/* userland -> kernel */
 	p(out_total, "\t{:sent-requests/%ju} "
 	    "{N:/request%s sent from userland}\n");
 	p(out_bytes, "\t{:sent-bytes/%ju} "
 	    "{N:/byte%s sent from userland}\n");
 	for (first = 1, type = 0;
 	    type<sizeof(pfkeystat.out_msgtype)/sizeof(pfkeystat.out_msgtype[0]);
 	    type++) {
 		if (pfkeystat.out_msgtype[type] <= 0)
 			continue;
 		if (first) {
 			xo_open_list("output-histogram");
 			xo_emit("\t{T:histogram by message type}:\n");
 			first = 0;
 		}
 		xo_open_instance("output-histogram");
 		xo_emit("\t\t{k::type/%s}: {:count/%ju}\n",
 		    pfkey_msgtype_names(type),
 		    (uintmax_t)pfkeystat.out_msgtype[type]);
 		xo_close_instance("output-histogram");
 	}
 	if (!first)
 		xo_close_list("output-histogram");
 
 	p(out_invlen, "\t{:dropped-bad-length/%ju} "
 	    "{N:/message%s with invalid length field}\n");
 	p(out_invver, "\t{:dropped-bad-version/%ju} "
 	    "{N:/message%s with invalid version field}\n");
 	p(out_invmsgtype, "\t{:dropped-bad-type/%ju} "
 	    "{N:/message%s with invalid message type field}\n");
 	p(out_tooshort, "\t{:dropped-too-short/%ju} "
 	    "{N:/message%s too short}\n");
 	p(out_nomem, "\t{:dropped-no-memory/%ju} "
 	    "{N:/message%s with memory allocation failure}\n");
 	p(out_dupext, "\t{:dropped-duplicate-extension/%ju} "
 	    "{N:/message%s with duplicate extension}\n");
 	p(out_invexttype, "\t{:dropped-bad-extension/%ju} "
 	    "{N:/message%s with invalid extension type}\n");
 	p(out_invsatype, "\t{:dropped-bad-sa-type/%ju} "
 	    "{N:/message%s with invalid sa type}\n");
 	p(out_invaddr, "\t{:dropped-bad-address-extension/%ju} "
 	    "{N:/message%s with invalid address extension}\n");
 
 	/* kernel -> userland */
 	p(in_total, "\t{:received-requests/%ju} "
 	    "{N:/request%s sent to userland}\n");
 	p(in_bytes, "\t{:received-bytes/%ju} "
 	    "{N:/byte%s sent to userland}\n");
 	for (first = 1, type = 0;
 	    type < sizeof(pfkeystat.in_msgtype)/sizeof(pfkeystat.in_msgtype[0]);
 	    type++) {
 		if (pfkeystat.in_msgtype[type] <= 0)
 			continue;
 		if (first) {
 			xo_open_list("input-histogram");
 			xo_emit("\t{T:histogram by message type}:\n");
 			first = 0;
 		}
 		xo_open_instance("input-histogram");
 		xo_emit("\t\t{k:type/%s}: {:count/%ju}\n",
 		    pfkey_msgtype_names(type),
 		    (uintmax_t)pfkeystat.in_msgtype[type]);
 		xo_close_instance("input-histogram");
 	}
 	if (!first)
 		xo_close_list("input-histogram");
 	p(in_msgtarget[KEY_SENDUP_ONE], "\t{:received-one-socket/%ju} "
 	    "{N:/message%s toward single socket}\n");
 	p(in_msgtarget[KEY_SENDUP_ALL], "\t{:received-all-sockets/%ju} "
 	    "{N:/message%s toward all sockets}\n");
 	p(in_msgtarget[KEY_SENDUP_REGISTERED],
 	    "\t{:received-registered-sockets/%ju} "
 	    "{N:/message%s toward registered sockets}\n");
 	p(in_nomem, "\t{:discarded-no-memory/%ju} "
 	    "{N:/message%s with memory allocation failure}\n");
 #undef p
 	xo_close_container(name);
 }
 #endif /* IPSEC */
Index: user/alc/PQ_LAUNDRY/usr.bin/nl/nl.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/nl/nl.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/nl/nl.c	(revision 303642)
@@ -1,410 +1,409 @@
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Klaus Klein.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #ifndef lint
 __COPYRIGHT(
 "@(#) Copyright (c) 1999\
  The NetBSD Foundation, Inc.  All rights reserved.");
 __RCSID("$FreeBSD$");
 #endif    
 
-#define	_WITH_GETLINE
 #include <sys/types.h>
 
 #include <err.h>
 #include <errno.h>
 #include <limits.h>
 #include <locale.h>
 #include <regex.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <wchar.h>
 
 typedef enum {
 	number_all,		/* number all lines */
 	number_nonempty,	/* number non-empty lines */
 	number_none,		/* no line numbering */
 	number_regex		/* number lines matching regular expression */
 } numbering_type;
 
 struct numbering_property {
 	const char * const	name;		/* for diagnostics */
 	numbering_type		type;		/* numbering type */
 	regex_t			expr;		/* for type == number_regex */
 };
 
 /* line numbering formats */
 #define FORMAT_LN	"%-*d"	/* left justified, leading zeros suppressed */
 #define FORMAT_RN	"%*d"	/* right justified, leading zeros suppressed */
 #define FORMAT_RZ	"%0*d"	/* right justified, leading zeros kept */
 
 #define FOOTER		0
 #define BODY		1
 #define HEADER		2
 #define NP_LAST		HEADER
 
 static struct numbering_property numbering_properties[NP_LAST + 1] = {
 	{ .name = "footer", .type = number_none },
 	{ .name = "body", .type = number_nonempty },
 	{ .name = "header", .type = number_none }
 };
 
 #define max(a, b)	((a) > (b) ? (a) : (b))
 
 /*
  * Maximum number of characters required for a decimal representation of a
  * (signed) int; courtesy of tzcode.
  */
 #define INT_STRLEN_MAXIMUM \
 	((sizeof (int) * CHAR_BIT - 1) * 302 / 1000 + 2)
 
 static void	filter(void);
 static void	parse_numbering(const char *, int);
 static void	usage(void);
 
 /*
  * Dynamically allocated buffer suitable for string representation of ints.
  */
 static char *intbuffer;
 
 /* delimiter characters that indicate the start of a logical page section */
 static char delim[2 * MB_LEN_MAX];
 static int delimlen;
 
 /*
  * Configurable parameters.
  */
 
 /* line numbering format */
 static const char *format = FORMAT_RN;
 
 /* increment value used to number logical page lines */
 static int incr = 1;
 
 /* number of adjacent blank lines to be considered (and numbered) as one */
 static unsigned int nblank = 1;
 
 /* whether to restart numbering at logical page delimiters */
 static int restart = 1;
 
 /* characters used in separating the line number and the corrsp. text line */
 static const char *sep = "\t";
 
 /* initial value used to number logical page lines */
 static int startnum = 1;
 
 /* number of characters to be used for the line number */
 /* should be unsigned but required signed by `*' precision conversion */
 static int width = 6;
 
 
 int
 main(int argc, char *argv[])
 {
 	int c;
 	long val;
 	unsigned long uval;
 	char *ep;
 	size_t intbuffersize, clen;
 	char delim1[MB_LEN_MAX] = { '\\' }, delim2[MB_LEN_MAX] = { ':' };
 	size_t delim1len = 1, delim2len = 1;
 
 	(void)setlocale(LC_ALL, "");
 
 	while ((c = getopt(argc, argv, "pb:d:f:h:i:l:n:s:v:w:")) != -1) {
 		switch (c) {
 		case 'p':
 			restart = 0;
 			break;
 		case 'b':
 			parse_numbering(optarg, BODY);
 			break;
 		case 'd':
 			clen = mbrlen(optarg, MB_CUR_MAX, NULL);
 			if (clen == (size_t)-1 || clen == (size_t)-2)
 				errc(EXIT_FAILURE, EILSEQ, NULL);
 			if (clen != 0) {
 				memcpy(delim1, optarg, delim1len = clen);
 				clen = mbrlen(optarg + delim1len,
 				    MB_CUR_MAX, NULL);
 				if (clen == (size_t)-1 ||
 				    clen == (size_t)-2)
 					errc(EXIT_FAILURE, EILSEQ, NULL);
 				if (clen != 0) {
 					memcpy(delim2, optarg + delim1len,
 					    delim2len = clen);
 				if (optarg[delim1len + clen] != '\0')
 					errx(EXIT_FAILURE,
 					    "invalid delim argument -- %s",
 					    optarg);
 				}
 			}
 			break;
 		case 'f':
 			parse_numbering(optarg, FOOTER);
 			break;
 		case 'h':
 			parse_numbering(optarg, HEADER);
 			break;
 		case 'i':
 			errno = 0;
 			val = strtol(optarg, &ep, 10);
 			if ((ep != NULL && *ep != '\0') ||
 			 ((val == LONG_MIN || val == LONG_MAX) && errno != 0))
 				errx(EXIT_FAILURE,
 				    "invalid incr argument -- %s", optarg);
 			incr = (int)val;
 			break;
 		case 'l':
 			errno = 0;
 			uval = strtoul(optarg, &ep, 10);
 			if ((ep != NULL && *ep != '\0') ||
 			    (uval == ULONG_MAX && errno != 0))
 				errx(EXIT_FAILURE,
 				    "invalid num argument -- %s", optarg);
 			nblank = (unsigned int)uval;
 			break;
 		case 'n':
 			if (strcmp(optarg, "ln") == 0) {
 				format = FORMAT_LN;
 			} else if (strcmp(optarg, "rn") == 0) {
 				format = FORMAT_RN;
 			} else if (strcmp(optarg, "rz") == 0) {
 				format = FORMAT_RZ;
 			} else
 				errx(EXIT_FAILURE,
 				    "illegal format -- %s", optarg);
 			break;
 		case 's':
 			sep = optarg;
 			break;
 		case 'v':
 			errno = 0;
 			val = strtol(optarg, &ep, 10);
 			if ((ep != NULL && *ep != '\0') ||
 			 ((val == LONG_MIN || val == LONG_MAX) && errno != 0))
 				errx(EXIT_FAILURE,
 				    "invalid startnum value -- %s", optarg);
 			startnum = (int)val;
 			break;
 		case 'w':
 			errno = 0;
 			val = strtol(optarg, &ep, 10);
 			if ((ep != NULL && *ep != '\0') ||
 			 ((val == LONG_MIN || val == LONG_MAX) && errno != 0))
 				errx(EXIT_FAILURE,
 				    "invalid width value -- %s", optarg);
 			width = (int)val;
 			if (!(width > 0))
 				errx(EXIT_FAILURE,
 				    "width argument must be > 0 -- %d",
 				    width);
 			break;
 		case '?':
 		default:
 			usage();
 			/* NOTREACHED */
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	switch (argc) {
 	case 0:
 		break;
 	case 1:
 		if (strcmp(argv[0], "-") != 0 &&
 		    freopen(argv[0], "r", stdin) == NULL)
 			err(EXIT_FAILURE, "%s", argv[0]);
 		break;
 	default:
 		usage();
 		/* NOTREACHED */
 	}
 
 	/* Generate the delimiter sequence */
 	memcpy(delim, delim1, delim1len);
 	memcpy(delim + delim1len, delim2, delim2len);
 	delimlen = delim1len + delim2len;
 
 	/* Allocate a buffer suitable for preformatting line number. */
 	intbuffersize = max((int)INT_STRLEN_MAXIMUM, width) + 1; /* NUL */
 	if ((intbuffer = malloc(intbuffersize)) == NULL)
 		err(EXIT_FAILURE, "cannot allocate preformatting buffer");
 
 	/* Do the work. */
 	filter();
 
 	exit(EXIT_SUCCESS);
 	/* NOTREACHED */
 }
 
 static void
 filter(void)
 {
 	char *buffer;
 	size_t buffersize;
 	ssize_t linelen;
 	int line;		/* logical line number */
 	int section;		/* logical page section */
 	unsigned int adjblank;	/* adjacent blank lines */
 	int consumed;		/* intbuffer measurement */
 	int donumber = 0, idx;
 
 	adjblank = 0;
 	line = startnum;
 	section = BODY;
 
 	buffer = NULL;
 	buffersize = 0;
 	while ((linelen = getline(&buffer, &buffersize, stdin)) > 0) {
 		for (idx = FOOTER; idx <= NP_LAST; idx++) {
 			/* Does it look like a delimiter? */
 			if (delimlen * (idx + 1) > linelen)
 				break;
 			if (memcmp(buffer + delimlen * idx, delim,
 			    delimlen) != 0)
 				break;
 			/* Was this the whole line? */
 			if (buffer[delimlen * (idx + 1)] == '\n') {
 				section = idx;
 				adjblank = 0;
 				if (restart)
 					line = startnum;
 				goto nextline;
 			}
 		}
 
 		switch (numbering_properties[section].type) {
 		case number_all:
 			/*
 			 * Doing this for number_all only is disputable, but
 			 * the standard expresses an explicit dependency on
 			 * `-b a' etc.
 			 */
 			if (buffer[0] == '\n' && ++adjblank < nblank)
 				donumber = 0;
 			else
 				donumber = 1, adjblank = 0;
 			break;
 		case number_nonempty:
 			donumber = (buffer[0] != '\n');
 			break;
 		case number_none:
 			donumber = 0;
 			break;
 		case number_regex:
 			donumber =
 			    (regexec(&numbering_properties[section].expr,
 			    buffer, 0, NULL, 0) == 0);
 			break;
 		}
 
 		if (donumber) {
 			/* Note: sprintf() is safe here. */
 			consumed = sprintf(intbuffer, format, width, line);
 			(void)printf("%s",
 			    intbuffer + max(0, consumed - width));
 			line += incr;
 		} else {
 			(void)printf("%*s", width, "");
 		}
 		(void)fputs(sep, stdout);
 		(void)fwrite(buffer, linelen, 1, stdout);
 
 		if (ferror(stdout))
 			err(EXIT_FAILURE, "output error");
 nextline:
 		;
 	}
 
 	if (ferror(stdin))
 		err(EXIT_FAILURE, "input error");
 
 	free(buffer);
 }
 
 /*
  * Various support functions.
  */
 
 static void
 parse_numbering(const char *argstr, int section)
 {
 	int error;
 	char errorbuf[NL_TEXTMAX];
 
 	switch (argstr[0]) {
 	case 'a':
 		numbering_properties[section].type = number_all;
 		break;
 	case 'n':
 		numbering_properties[section].type = number_none;
 		break;
 	case 't':
 		numbering_properties[section].type = number_nonempty;
 		break;
 	case 'p':
 		/* If there was a previous expression, throw it away. */
 		if (numbering_properties[section].type == number_regex)
 			regfree(&numbering_properties[section].expr);
 		else
 			numbering_properties[section].type = number_regex;
 
 		/* Compile/validate the supplied regular expression. */
 		if ((error = regcomp(&numbering_properties[section].expr,
 		    &argstr[1], REG_NEWLINE|REG_NOSUB)) != 0) {
 			(void)regerror(error,
 			    &numbering_properties[section].expr,
 			    errorbuf, sizeof (errorbuf));
 			errx(EXIT_FAILURE,
 			    "%s expr: %s -- %s",
 			    numbering_properties[section].name, errorbuf,
 			    &argstr[1]);
 		}
 		break;
 	default:
 		errx(EXIT_FAILURE,
 		    "illegal %s line numbering type -- %s",
 		    numbering_properties[section].name, argstr);
 	}
 }
 
 static void
 usage(void)
 {
 
 	(void)fprintf(stderr,
 "usage: nl [-p] [-b type] [-d delim] [-f type] [-h type] [-i incr] [-l num]\n"
 "          [-n format] [-s sep] [-v startnum] [-w width] [file]\n");
 	exit(EXIT_FAILURE);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/procstat/procstat_files.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/procstat/procstat_files.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/procstat/procstat_files.c	(revision 303642)
@@ -1,576 +1,575 @@
 /*-
  * Copyright (c) 2007-2011 Robert N. M. Watson
  * Copyright (c) 2015 Allan Jude <allanjude@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/un.h>
 #include <sys/user.h>
 
 #include <netinet/in.h>
 
 #include <arpa/inet.h>
 
 #include <err.h>
 #include <libprocstat.h>
 #include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "procstat.h"
 
 static const char *
 protocol_to_string(int domain, int type, int protocol)
 {
 
 	switch (domain) {
 	case AF_INET:
 	case AF_INET6:
 		switch (protocol) {
 		case IPPROTO_TCP:
 			return ("TCP");
 		case IPPROTO_UDP:
 			return ("UDP");
 		case IPPROTO_ICMP:
 			return ("ICM");
 		case IPPROTO_RAW:
 			return ("RAW");
 		case IPPROTO_SCTP:
 			return ("SCT");
 		case IPPROTO_DIVERT:
 			return ("IPD");
 		default:
 			return ("IP?");
 		}
 
 	case AF_LOCAL:
 		switch (type) {
 		case SOCK_STREAM:
 			return ("UDS");
 		case SOCK_DGRAM:
 			return ("UDD");
 		default:
 			return ("UD?");
 		}
 	default:
 		return ("?");
 	}
 }
 
 static void
 addr_to_string(struct sockaddr_storage *ss, char *buffer, int buflen)
 {
 	char buffer2[INET6_ADDRSTRLEN];
 	struct sockaddr_in6 *sin6;
 	struct sockaddr_in *sin;
 	struct sockaddr_un *sun;
 
 	switch (ss->ss_family) {
 	case AF_LOCAL:
 		sun = (struct sockaddr_un *)ss;
 		if (strlen(sun->sun_path) == 0)
 			strlcpy(buffer, "-", buflen);
 		else
 			strlcpy(buffer, sun->sun_path, buflen);
 		break;
 
 	case AF_INET:
 		sin = (struct sockaddr_in *)ss;
 		snprintf(buffer, buflen, "%s:%d", inet_ntoa(sin->sin_addr),
 		    ntohs(sin->sin_port));
 		break;
 
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)ss;
 		if (inet_ntop(AF_INET6, &sin6->sin6_addr, buffer2,
 		    sizeof(buffer2)) != NULL)
 			snprintf(buffer, buflen, "%s.%d", buffer2,
 			    ntohs(sin6->sin6_port));
 		else
 			strlcpy(buffer, "-", buflen);
 		break;
 
 	default:
 		strlcpy(buffer, "", buflen);
 		break;
 	}
 }
 
 static struct cap_desc {
 	uint64_t	 cd_right;
 	const char	*cd_desc;
 } cap_desc[] = {
 	/* General file I/O. */
 	{ CAP_READ,		"rd" },
 	{ CAP_WRITE,		"wr" },
 	{ CAP_SEEK,		"se" },
 	{ CAP_MMAP,		"mm" },
 	{ CAP_CREATE,		"cr" },
 	{ CAP_FEXECVE,		"fe" },
 	{ CAP_FSYNC,		"fy" },
 	{ CAP_FTRUNCATE,	"ft" },
 
 	/* VFS methods. */
 	{ CAP_FCHDIR,		"cd" },
 	{ CAP_FCHFLAGS,		"cf" },
 	{ CAP_FCHMOD,		"cm" },
 	{ CAP_FCHOWN,		"cn" },
 	{ CAP_FCNTL,		"fc" },
 	{ CAP_FLOCK,		"fl" },
 	{ CAP_FPATHCONF,	"fp" },
 	{ CAP_FSCK,		"fk" },
 	{ CAP_FSTAT,		"fs" },
 	{ CAP_FSTATFS,		"sf" },
 	{ CAP_FUTIMES,		"fu" },
 	{ CAP_LINKAT_SOURCE,	"ls" },
 	{ CAP_LINKAT_TARGET,	"lt" },
 	{ CAP_MKDIRAT,		"md" },
 	{ CAP_MKFIFOAT,		"mf" },
 	{ CAP_MKNODAT,		"mn" },
 	{ CAP_RENAMEAT_SOURCE,	"rs" },
 	{ CAP_RENAMEAT_TARGET,	"rt" },
 	{ CAP_SYMLINKAT,	"sl" },
 	{ CAP_UNLINKAT,		"un" },
 
 	/* Lookups - used to constrain *at() calls. */
 	{ CAP_LOOKUP,		"lo" },
 
 	/* Extended attributes. */
 	{ CAP_EXTATTR_GET,	"eg" },
 	{ CAP_EXTATTR_SET,	"es" },
 	{ CAP_EXTATTR_DELETE,	"ed" },
 	{ CAP_EXTATTR_LIST,	"el" },
 
 	/* Access Control Lists. */
 	{ CAP_ACL_GET,		"ag" },
 	{ CAP_ACL_SET,		"as" },
 	{ CAP_ACL_DELETE,	"ad" },
 	{ CAP_ACL_CHECK,	"ac" },
 
 	/* Socket operations. */
 	{ CAP_ACCEPT,		"at" },
 	{ CAP_BIND,		"bd" },
 	{ CAP_CONNECT,		"co" },
 	{ CAP_GETPEERNAME,	"pn" },
 	{ CAP_GETSOCKNAME,	"sn" },
 	{ CAP_GETSOCKOPT,	"gs" },
 	{ CAP_LISTEN,		"ln" },
 	{ CAP_PEELOFF,		"pf" },
 	{ CAP_SETSOCKOPT,	"ss" },
 	{ CAP_SHUTDOWN,		"sh" },
 
 	/* Mandatory Access Control. */
 	{ CAP_MAC_GET,		"mg" },
 	{ CAP_MAC_SET,		"ms" },
 
 	/* Methods on semaphores. */
 	{ CAP_SEM_GETVALUE,	"sg" },
 	{ CAP_SEM_POST,		"sp" },
 	{ CAP_SEM_WAIT,		"sw" },
 
 	/* Event monitoring and posting. */
 	{ CAP_EVENT,		"ev" },
 	{ CAP_KQUEUE_EVENT,	"ke" },
 	{ CAP_KQUEUE_CHANGE,	"kc" },
 
 	/* Strange and powerful rights that should not be given lightly. */
 	{ CAP_IOCTL,		"io" },
 	{ CAP_TTYHOOK,		"ty" },
 
 	/* Process management via process descriptors. */
 	{ CAP_PDGETPID,		"pg" },
 	{ CAP_PDWAIT,		"pw" },
 	{ CAP_PDKILL,		"pk" },
 
 	/*
 	 * Rights that allow to use bindat(2) and connectat(2) syscalls on a
 	 * directory descriptor.
 	 */
 	{ CAP_BINDAT,		"ba" },
 	{ CAP_CONNECTAT,	"ca" },
 
 	/* Aliases and defines that combine multiple rights. */
 	{ CAP_PREAD,		"prd" },
 	{ CAP_PWRITE,		"pwr" },
 
 	{ CAP_MMAP_R,		"mmr" },
 	{ CAP_MMAP_W,		"mmw" },
 	{ CAP_MMAP_X,		"mmx" },
 	{ CAP_MMAP_RW,		"mrw" },
 	{ CAP_MMAP_RX,		"mrx" },
 	{ CAP_MMAP_WX,		"mwx" },
 	{ CAP_MMAP_RWX,		"mma" },
 
 	{ CAP_RECV,		"re" },
 	{ CAP_SEND,		"sd" },
 
 	{ CAP_SOCK_CLIENT,	"scl" },
 	{ CAP_SOCK_SERVER,	"ssr" },
 };
-static const u_int	cap_desc_count = sizeof(cap_desc) /
-			    sizeof(cap_desc[0]);
+static const u_int	cap_desc_count = nitems(cap_desc);
 
 static u_int
 width_capability(cap_rights_t *rightsp)
 {
 	u_int count, i, width;
 
 	count = 0;
 	width = 0;
 	for (i = 0; i < cap_desc_count; i++) {
 		if (cap_rights_is_set(rightsp, cap_desc[i].cd_right)) {
 			width += strlen(cap_desc[i].cd_desc);
 			if (count)
 				width++;
 			count++;
 		}
 	}
 	return (width);
 }
 
 static void
 print_capability(cap_rights_t *rightsp, u_int capwidth)
 {
 	u_int count, i, width;
 
 	count = 0;
 	width = 0;
 	for (i = width_capability(rightsp); i < capwidth; i++) {
 		if (i != 0)
 			xo_emit(" ");
 		else
 			xo_emit("-");
 	}
 	xo_open_list("capabilities");
 	for (i = 0; i < cap_desc_count; i++) {
 		if (cap_rights_is_set(rightsp, cap_desc[i].cd_right)) {
 			xo_emit("{D:/%s}{l:capabilities/%s}", count ? "," : "",
 			    cap_desc[i].cd_desc);
 			width += strlen(cap_desc[i].cd_desc);
 			if (count)
 				width++;
 			count++;
 		}
 	}
 	xo_close_list("capabilities");
 }
 
 void
 procstat_files(struct procstat *procstat, struct kinfo_proc *kipp)
 {
 	struct sockstat sock;
 	struct filestat_list *head;
 	struct filestat *fst;
 	const char *str;
 	struct vnstat vn;
 	u_int capwidth, width;
 	int error;
 	char src_addr[PATH_MAX];
 	char dst_addr[PATH_MAX];
 
 	/*
 	 * To print the header in capability mode, we need to know the width
 	 * of the widest capability string.  Even if we get no processes
 	 * back, we will print the header, so we defer aborting due to a lack
 	 * of processes until after the header logic.
 	 */
 	capwidth = 0;
 	head = procstat_getfiles(procstat, kipp, 0);
 	if (head != NULL && Cflag) {
 		STAILQ_FOREACH(fst, head, next) {
 			width = width_capability(&fst->fs_cap_rights);
 			if (width > capwidth)
 				capwidth = width;
 		}
 		if (capwidth < strlen("CAPABILITIES"))
 			capwidth = strlen("CAPABILITIES");
 	}
 
 	if (!hflag) {
 		if (Cflag)
 			xo_emit("{T:/%5s %-16s %5s %1s %-8s %-*s "
 			    "%-3s %-12s}\n", "PID", "COMM", "FD", "T",
 			    "FLAGS", capwidth, "CAPABILITIES", "PRO",
 			    "NAME");
 		else
 			xo_emit("{T:/%5s %-16s %5s %1s %1s %-8s "
 			    "%3s %7s %-3s %-12s}\n", "PID", "COMM", "FD", "T",
 			    "V", "FLAGS", "REF", "OFFSET", "PRO", "NAME");
 	}
 
 	if (head == NULL)
 		return;
 	xo_emit("{ek:process_id/%5d/%d}", kipp->ki_pid);
 	xo_emit("{e:command/%-16s/%s}", kipp->ki_comm);
 	xo_open_list("files");
 	STAILQ_FOREACH(fst, head, next) {
 		xo_open_instance("files");
 		xo_emit("{dk:process_id/%5d/%d} ", kipp->ki_pid);
 		xo_emit("{d:command/%-16s/%s} ", kipp->ki_comm);
 		if (fst->fs_uflags & PS_FST_UFLAG_CTTY)
 			xo_emit("{P: }{:fd/%s} ", "ctty");
 		else if (fst->fs_uflags & PS_FST_UFLAG_CDIR)
 			xo_emit("{P:  }{:fd/%s} ", "cwd");
 		else if (fst->fs_uflags & PS_FST_UFLAG_JAIL)
 			xo_emit("{P: }{:fd/%s} ", "jail");
 		else if (fst->fs_uflags & PS_FST_UFLAG_RDIR)
 			xo_emit("{P: }{:fd/%s} ", "root");
 		else if (fst->fs_uflags & PS_FST_UFLAG_TEXT)
 			xo_emit("{P: }{:fd/%s} ", "text");
 		else if (fst->fs_uflags & PS_FST_UFLAG_TRACE)
 			xo_emit("{:fd/%s} ", "trace");
 		else
 			xo_emit("{:fd/%5d} ", fst->fs_fd);
 
 		switch (fst->fs_type) {
 		case PS_FST_TYPE_VNODE:
 			str = "v";
 			xo_emit("{eq:fd_type/vnode}");
 			break;
 
 		case PS_FST_TYPE_SOCKET:
 			str = "s";
 			xo_emit("{eq:fd_type/socket}");
 			break;
 
 		case PS_FST_TYPE_PIPE:
 			str = "p";
 			xo_emit("{eq:fd_type/pipe}");
 			break;
 
 		case PS_FST_TYPE_FIFO:
 			str = "f";
 			xo_emit("{eq:fd_type/fifo}");
 			break;
 
 		case PS_FST_TYPE_KQUEUE:
 			str = "k";
 			xo_emit("{eq:fd_type/kqueue}");
 			break;
 
 		case PS_FST_TYPE_CRYPTO:
 			str = "c";
 			xo_emit("{eq:fd_type/crypto}");
 			break;
 
 		case PS_FST_TYPE_MQUEUE:
 			str = "m";
 			xo_emit("{eq:fd_type/mqueue}");
 			break;
 
 		case PS_FST_TYPE_SHM:
 			str = "h";
 			xo_emit("{eq:fd_type/shm}");
 			break;
 
 		case PS_FST_TYPE_PTS:
 			str = "t";
 			xo_emit("{eq:fd_type/pts}");
 			break;
 
 		case PS_FST_TYPE_SEM:
 			str = "e";
 			xo_emit("{eq:fd_type/sem}");
 			break;
 
 		case PS_FST_TYPE_NONE:
 			str = "?";
 			xo_emit("{eq:fd_type/none}");
 			break;
 
 		case PS_FST_TYPE_UNKNOWN:
 		default:
 			str = "?";
 			xo_emit("{eq:fd_type/unknown}");
 			break;
 		}
 		xo_emit("{d:fd_type/%1s/%s} ", str);
 		if (!Cflag) {
 			str = "-";
 			if (fst->fs_type == PS_FST_TYPE_VNODE) {
 				error = procstat_get_vnode_info(procstat, fst,
 				    &vn, NULL);
 				switch (vn.vn_type) {
 				case PS_FST_VTYPE_VREG:
 					str = "r";
 					xo_emit("{eq:vode_type/regular}");
 					break;
 
 				case PS_FST_VTYPE_VDIR:
 					str = "d";
 					xo_emit("{eq:vode_type/directory}");
 					break;
 
 				case PS_FST_VTYPE_VBLK:
 					str = "b";
 					xo_emit("{eq:vode_type/block}");
 					break;
 
 				case PS_FST_VTYPE_VCHR:
 					str = "c";
 					xo_emit("{eq:vode_type/character}");
 					break;
 
 				case PS_FST_VTYPE_VLNK:
 					str = "l";
 					xo_emit("{eq:vode_type/link}");
 					break;
 
 				case PS_FST_VTYPE_VSOCK:
 					str = "s";
 					xo_emit("{eq:vode_type/socket}");
 					break;
 
 				case PS_FST_VTYPE_VFIFO:
 					str = "f";
 					xo_emit("{eq:vode_type/fifo}");
 					break;
 
 				case PS_FST_VTYPE_VBAD:
 					str = "x";
 					xo_emit("{eq:vode_type/revoked_device}");
 					break;
 
 				case PS_FST_VTYPE_VNON:
 					str = "?";
 					xo_emit("{eq:vode_type/non}");
 					break;
 
 				case PS_FST_VTYPE_UNKNOWN:
 				default:
 					str = "?";
 					xo_emit("{eq:vode_type/unknown}");
 					break;
 				}
 			}
 			xo_emit("{d:vnode_type/%1s/%s} ", str);
 		}
 		
 		xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_READ ?
 		    "r" : "-");
 		xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_WRITE ?
 		    "w" : "-");
 		xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_APPEND ?
 		    "a" : "-");
 		xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_ASYNC ?
 		    "s" : "-");
 		xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_SYNC ?
 		    "f" : "-");
 		xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_NONBLOCK ?
 		    "n" : "-");
 		xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_DIRECT ?
 		    "d" : "-");
 		xo_emit("{d:/%s}", fst->fs_fflags & PS_FST_FFLAG_HASLOCK ?
 		    "l" : "-");
 		xo_emit(" ");
 		xo_open_list("fd_flags");
 		if (fst->fs_fflags & PS_FST_FFLAG_READ)
 			xo_emit("{elq:fd_flags/read}");
 		if (fst->fs_fflags & PS_FST_FFLAG_WRITE)
 			xo_emit("{elq:fd_flags/write}");
 		if (fst->fs_fflags & PS_FST_FFLAG_APPEND)
 			xo_emit("{elq:fd_flags/append}");
 		if (fst->fs_fflags & PS_FST_FFLAG_ASYNC)
 			xo_emit("{elq:fd_flags/async}");
 		if (fst->fs_fflags & PS_FST_FFLAG_SYNC)
 			xo_emit("{elq:fd_flags/fsync}");
 		if (fst->fs_fflags & PS_FST_FFLAG_NONBLOCK)
 			xo_emit("{elq:fd_flags/nonblocking}");
 		if (fst->fs_fflags & PS_FST_FFLAG_DIRECT)
 			xo_emit("{elq:fd_flags/direct_io}");
 		if (fst->fs_fflags & PS_FST_FFLAG_HASLOCK)
 			xo_emit("{elq:fd_flags/lock_held}");
 		xo_close_list("fd_flags");
 
 		if (!Cflag) {
 			if (fst->fs_ref_count > -1)
 				xo_emit("{:ref_count/%3d/%d} ",
 				    fst->fs_ref_count);
 			else
 				xo_emit("{q:ref_count/%3c/%c} ", '-');
 			if (fst->fs_offset > -1)
 				xo_emit("{:offset/%7jd/%jd} ",
 				    (intmax_t)fst->fs_offset);
 			else
 				xo_emit("{q:offset/%7c/%c} ", '-');
 		}
 		if (Cflag) {
 			print_capability(&fst->fs_cap_rights, capwidth);
 			xo_emit(" ");
 		}
 		switch (fst->fs_type) {
 		case PS_FST_TYPE_SOCKET:
 			error = procstat_get_socket_info(procstat, fst, &sock,
 			    NULL);
 			if (error != 0)
 				break;
 			xo_emit("{:protocol/%-3s/%s} ",
 			    protocol_to_string(sock.dom_family,
 			    sock.type, sock.proto));
 			/*
 			 * While generally we like to print two addresses,
 			 * local and peer, for sockets, it turns out to be
 			 * more useful to print the first non-nul address for
 			 * local sockets, as typically they aren't bound and
 			 *  connected, and the path strings can get long.
 			 */
 			if (sock.dom_family == AF_LOCAL) {
 				struct sockaddr_un *sun =
 				    (struct sockaddr_un *)&sock.sa_local;
 
 				if (sun->sun_path[0] != 0)
 					addr_to_string(&sock.sa_local,
 					    src_addr, sizeof(src_addr));
 				else
 					addr_to_string(&sock.sa_peer,
 					    src_addr, sizeof(src_addr));
 				xo_emit("{:path/%s}", src_addr);
 			} else {
 				addr_to_string(&sock.sa_local, src_addr,
 				    sizeof(src_addr));
 				addr_to_string(&sock.sa_peer, dst_addr,
 				    sizeof(dst_addr));
 				xo_emit("{:path/%s %s}", src_addr, dst_addr);
 			}
 			break;
 
 		default:
 			xo_emit("{:protocol/%-3s/%s} ", "-");
 			xo_emit("{:path/%-18s/%s}", fst->fs_path != NULL ?
 			    fst->fs_path : "-");
 		}
 
 		xo_emit("\n");
 		xo_close_instance("files");
 	}
 	xo_close_list("files");
 	procstat_freefiles(procstat, head);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/sdiff/sdiff.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/sdiff/sdiff.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/sdiff/sdiff.c	(revision 303642)
@@ -1,1189 +1,1188 @@
 /*	$OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
 
 /*
  * Written by Raymond Lai <ray@cyth.net>.
  * Public domain.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <getopt.h>
 #include <limits.h>
 #include <paths.h>
 #include <stdint.h>
-#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "common.h"
 #include "extern.h"
 
 #define DIFF_PATH	"/usr/bin/diff"
 
 #define WIDTH 126
 /*
  * Each column must be at least one character wide, plus three
  * characters between the columns (space, [<|>], space).
  */
 #define WIDTH_MIN 5
 
 /* 3 kilobytes of chars */
 #define MAX_CHECK 768
 
 /* A single diff line. */
 struct diffline {
 	STAILQ_ENTRY(diffline) diffentries;
 	char	*left;
 	char	 div;
 	char	*right;
 };
 
 static void astrcat(char **, const char *);
 static void enqueue(char *, char, char *);
 static char *mktmpcpy(const char *);
 static int istextfile(FILE *);
 static void binexec(char *, char *, char *) __dead2;
 static void freediff(struct diffline *);
 static void int_usage(void);
 static int parsecmd(FILE *, FILE *, FILE *);
 static void printa(FILE *, size_t);
 static void printc(FILE *, size_t, FILE *, size_t);
 static void printcol(const char *, size_t *, const size_t);
 static void printd(FILE *, size_t);
 static void println(const char *, const char, const char *);
 static void processq(void);
 static void prompt(const char *, const char *);
 static void usage(void) __dead2;
 static char *xfgets(FILE *);
 
 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
 static size_t line_width;	/* width of a line (two columns and divider) */
 static size_t width;		/* width of each column */
 static size_t file1ln, file2ln;	/* line number of file1 and file2 */
 static int Iflag = 0;	/* ignore sets matching regexp */
 static int	lflag;		/* print only left column for identical lines */
 static int	sflag;		/* skip identical lines */
 FILE *outfp;		/* file to save changes to */
 const char *tmpdir;	/* TMPDIR or /tmp */
 
 enum {
 	HELP_OPT = CHAR_MAX + 1,
 	NORMAL_OPT,
 	FCASE_SENSITIVE_OPT,
 	FCASE_IGNORE_OPT,
 	FROMFILE_OPT,
 	TOFILE_OPT,
 	UNIDIR_OPT,
 	STRIPCR_OPT,
 	HORIZ_OPT,
 	LEFTC_OPT,
 	SUPCL_OPT,
 	LF_OPT,
 	/* the following groupings must be in sequence */
 	OLDGF_OPT,
 	NEWGF_OPT,
 	UNCGF_OPT,
 	CHGF_OPT,
 	OLDLF_OPT,
 	NEWLF_OPT,
 	UNCLF_OPT,
 	/* end order-sensitive enums */
 	TSIZE_OPT,
 	HLINES_OPT,
 	LFILES_OPT,
 	DIFFPROG_OPT,
 	PIPE_FD,
 	/* pid from the diff parent (if applicable) */
 	DIFF_PID,
 
 	NOOP_OPT,
 };
 
 static struct option longopts[] = {
 	/* options only processed in sdiff */
 	{ "left-column",		no_argument,		NULL,	LEFTC_OPT },
 	{ "suppress-common-lines",	no_argument,		NULL,	's' },
 	{ "width",			required_argument,	NULL,	'w' },
 
 	{ "output",			required_argument,	NULL,	'o' },
 	{ "diff-program",		required_argument,	NULL,	DIFFPROG_OPT },
 
 	{ "pipe-fd",			required_argument,	NULL,	PIPE_FD },
 	{ "diff-pid",			required_argument,	NULL,	DIFF_PID },
 	/* Options processed by diff. */
 	{ "ignore-file-name-case",	no_argument,		NULL,	FCASE_IGNORE_OPT },
 	{ "no-ignore-file-name-case",	no_argument,		NULL,	FCASE_SENSITIVE_OPT },
 	{ "strip-trailing-cr",		no_argument,		NULL,	STRIPCR_OPT },
 	{ "tabsize",			required_argument,	NULL,	TSIZE_OPT },
 	{ "help",			no_argument,		NULL,	HELP_OPT },
 	{ "text",			no_argument,		NULL,	'a' },
 	{ "ignore-blank-lines",		no_argument,		NULL,	'B' },
 	{ "ignore-space-change",	no_argument,		NULL,	'b' },
 	{ "minimal",			no_argument,		NULL,	'd' },
 	{ "ignore-tab-expansion",	no_argument,		NULL,	'E' },
 	{ "ignore-matching-lines",	required_argument,	NULL,	'I' },
 	{ "ignore-case",		no_argument,		NULL,	'i' },
 	{ "expand-tabs",		no_argument,		NULL,	't' },
 	{ "speed-large-files",		no_argument,		NULL,	'H' },
 	{ "ignore-all-space",		no_argument,		NULL,	'W' },
 
 	{ NULL,				0,			NULL,	'\0'}
 };
 
 static const char *help_msg[] = {
 	"\nusage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
 	"\t-l, --left-column, Only print the left column for identical lines.",
 	"\t-o OUTFILE, --output=OUTFILE, nteractively merge file1 and file2 into outfile.",
 	"\t-s, --suppress-common-lines, Skip identical lines.",
 	"\t-w WIDTH, --width=WIDTH, Print a maximum of WIDTH characters on each line.",
 	"\tOptions passed to diff(1) are:",
 	"\t\t-a, --text, Treat file1 and file2 as text files.",
 	"\t\t-b, --ignore-trailing-cr, Ignore trailing blank spaces.",
 	"\t\t-d, --minimal, Minimize diff size.",
 	"\t\t-I RE, --ignore-matching-lines=RE, Ignore changes whose line matches RE.",
 	"\t\t-i, --ignore-case, Do a case-insensitive comparison.",
 	"\t\t-t, --expand-tabs Expand tabs to spaces.",
 	"\t\t-W, --ignore-all-spaces, Ignore all spaces.",
 	"\t\t--speed-large-files, Assume large file with scattered changes.",
 	"\t\t--strip-trailing-cr, Strip trailing carriage return.",
 	"\t\t--ignore-file-name-case, Ignore case of file names.",
 	"\t\t--no-ignore-file-name-case, Do not ignore file name case",
 	"\t\t--tabsize NUM, Change size of tabs (default 8.)",
 
 	NULL,
 };
 
 /*
  * Create temporary file if source_file is not a regular file.
  * Returns temporary file name if one was malloced, NULL if unnecessary.
  */
 static char *
 mktmpcpy(const char *source_file)
 {
 	struct stat sb;
 	ssize_t rcount;
 	int ifd, ofd;
 	u_char buf[BUFSIZ];
 	char *target_file;
 
 	/* Open input and output. */
 	ifd = open(source_file, O_RDONLY, 0);
 	/* File was opened successfully. */
 	if (ifd != -1) {
 		if (fstat(ifd, &sb) == -1)
 			err(2, "error getting file status from %s", source_file);
 
 		/* Regular file. */
 		if (S_ISREG(sb.st_mode)) {
 			close(ifd);
 			return (NULL);
 		}
 	} else {
 		/* If ``-'' does not exist the user meant stdin. */
 		if (errno == ENOENT && strcmp(source_file, "-") == 0)
 			ifd = STDIN_FILENO;
 		else
 			err(2, "error opening %s", source_file);
 	}
 
 	/* Not a regular file, so copy input into temporary file. */
 	if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
 		err(2, "asprintf");
 	if ((ofd = mkstemp(target_file)) == -1) {
 		warn("error opening %s", target_file);
 		goto FAIL;
 	}
 	while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
 	    rcount != 0) {
 		ssize_t wcount;
 
 		wcount = write(ofd, buf, (size_t)rcount);
 		if (-1 == wcount || rcount != wcount) {
 			warn("error writing to %s", target_file);
 			goto FAIL;
 		}
 	}
 	if (rcount == -1) {
 		warn("error reading from %s", source_file);
 		goto FAIL;
 	}
 
 	close(ifd);
 	close(ofd);
 
 	return (target_file);
 
 FAIL:
 	unlink(target_file);
 	exit(2);
 }
 
 int
 main(int argc, char **argv)
 {
 	FILE *diffpipe=NULL, *file1, *file2;
 	size_t diffargc = 0, wflag = WIDTH;
 	int ch, fd[2] = {-1}, status;
 	pid_t pid=0; pid_t ppid =-1;
 	const char *outfile = NULL;
 	struct option *popt;
 	char **diffargv, *diffprog = DIFF_PATH, *filename1, *filename2,
 	     *tmp1, *tmp2, *s1, *s2;
 	int i;
 
 	/*
 	 * Process diff flags.
 	 */
 	/*
 	 * Allocate memory for diff arguments and NULL.
 	 * Each flag has at most one argument, so doubling argc gives an
 	 * upper limit of how many diff args can be passed.  argv[0],
 	 * file1, and file2 won't have arguments so doubling them will
 	 * waste some memory; however we need an extra space for the
 	 * NULL at the end, so it sort of works out.
 	 */
 	if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
 		err(2, "main");
 
 	/* Add first argument, the program name. */
 	diffargv[diffargc++] = diffprog;
 
 	/* create a dynamic string for merging single-switch options */
 	if ( asprintf(&diffargv[diffargc++], "-")  < 0 )
 		err(2, "main");
 
 	while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
 	    longopts, NULL)) != -1) {
 		const char *errstr;
 
 		switch (ch) {
 		/* only compatible --long-name-form with diff */
 		case FCASE_IGNORE_OPT:
 		case FCASE_SENSITIVE_OPT:
 		case STRIPCR_OPT:
 		case TSIZE_OPT:
 		case 'S':
 		break;
 		/* combine no-arg single switches */
 		case 'a':
 		case 'B':
 		case 'b':
 		case 'd':
 		case 'E':
 		case 'i':
 		case 't':
 		case 'H':
 		case 'W':
 			for(popt = longopts; ch != popt->val && popt->name != NULL; popt++);
 			diffargv[1]  = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
 			/*
 			 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
 			 */
 			if (ch == 'W')
 				sprintf(diffargv[1], "%sw", diffargv[1]);
 			else
 				sprintf(diffargv[1], "%s%c", diffargv[1], ch);
 			break;
 		case DIFFPROG_OPT:
 			diffargv[0] = diffprog = optarg;
 			break;
 		case 'I':
 			Iflag = 1;
 			diffargv[diffargc++] = "-I";
 			diffargv[diffargc++] = optarg;
 			break;
 		case 'l':
 			lflag = 1;
 			break;
 		case 'o':
 			outfile = optarg;
 			break;
 		case 's':
 			sflag = 1;
 			break;
 		case 'w':
 			wflag = strtonum(optarg, WIDTH_MIN,
 			    INT_MAX, &errstr);
 			if (errstr)
 				errx(2, "width is %s: %s", errstr, optarg);
 			break;
 		case DIFF_PID:
 			ppid = strtonum(optarg, 0, INT_MAX, &errstr);
 			if (errstr)
 				errx(2, "diff pid value is %s: %s", errstr, optarg);
 			break;
 		case HELP_OPT:
 			for (i = 0; help_msg[i] != NULL; i++)
 				printf("%s\n", help_msg[i]);
 			exit(0);
 			break;
 		default:
 			usage();
 			break;
 		}
 	}
 
 	/* no single switches were used */
 	if (strcmp(diffargv[1], "-") == 0 ) {
 		for ( i = 1; i < argc-1; i++) {
 			diffargv[i] = diffargv[i+1];
 		}
 		diffargv[diffargc-1] = NULL;
 		diffargc--;
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc != 2)
 		usage();
 
 	if (outfile && (outfp = fopen(outfile, "w")) == NULL)
 		err(2, "could not open: %s", optarg);
 
 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
 		tmpdir = _PATH_TMP;
 
 	filename1 = argv[0];
 	filename2 = argv[1];
 
 	/*
 	 * Create temporary files for diff and sdiff to share if file1
 	 * or file2 are not regular files.  This allows sdiff and diff
 	 * to read the same inputs if one or both inputs are stdin.
 	 *
 	 * If any temporary files were created, their names would be
 	 * saved in tmp1 or tmp2.  tmp1 should never equal tmp2.
 	 */
 	tmp1 = tmp2 = NULL;
 	/* file1 and file2 are the same, so copy to same temp file. */
 	if (strcmp(filename1, filename2) == 0) {
 		if ((tmp1 = mktmpcpy(filename1)))
 			filename1 = filename2 = tmp1;
 	/* Copy file1 and file2 into separate temp files. */
 	} else {
 		if ((tmp1 = mktmpcpy(filename1)))
 			filename1 = tmp1;
 		if ((tmp2 = mktmpcpy(filename2)))
 			filename2 = tmp2;
 	}
 
 	diffargv[diffargc++] = filename1;
 	diffargv[diffargc++] = filename2;
 	/* Add NULL to end of array to indicate end of array. */
 	diffargv[diffargc++] = NULL;
 
 	/* Subtract column divider and divide by two. */
 	width = (wflag - 3) / 2;
 	/* Make sure line_width can fit in size_t. */
 	if (width > (SIZE_MAX - 3) / 2)
 		errx(2, "width is too large: %zu", width);
 	line_width = width * 2 + 3;
 
 	if (ppid == -1 ) {
 		if (pipe(fd))
 			err(2, "pipe");
 
 		switch (pid = fork()) {
 		case 0:
 			/* child */
 			/* We don't read from the pipe. */
 			close(fd[0]);
 			if (dup2(fd[1], STDOUT_FILENO) == -1)
 				err(2, "child could not duplicate descriptor");
 			/* Free unused descriptor. */
 			close(fd[1]);
 			execvp(diffprog, diffargv);
 			err(2, "could not execute diff: %s", diffprog);
 			break;
 		case -1:
 			err(2, "could not fork");
 			break;
 		}
 
 		/* parent */
 		/* We don't write to the pipe. */
 		close(fd[1]);
 
 		/* Open pipe to diff command. */
 		if ((diffpipe = fdopen(fd[0], "r")) == NULL)
 			err(2, "could not open diff pipe");
 	}
 	if ((file1 = fopen(filename1, "r")) == NULL)
 		err(2, "could not open %s", filename1);
 	if ((file2 = fopen(filename2, "r")) == NULL)
 		err(2, "could not open %s", filename2);
 	if (!istextfile(file1) || !istextfile(file2)) {
 		/* Close open files and pipe, delete temps */
 		fclose(file1);
 		fclose(file2);
 		if (diffpipe != NULL)
 			fclose(diffpipe);
 		if (tmp1)
 			if (unlink(tmp1))
 				warn("Error deleting %s.", tmp1);
 		if (tmp2)
 			if (unlink(tmp2))
 				warn("Error deleting %s.", tmp2);
 		free(tmp1);
 		free(tmp2);
 		binexec(diffprog, filename1, filename2);
 	}
 	/* Line numbers start at one. */
 	file1ln = file2ln = 1;
 
 	/* Read and parse diff output. */
 	while (parsecmd(diffpipe, file1, file2) != EOF)
 		;
 	fclose(diffpipe);
 
 	/* Wait for diff to exit. */
 	if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
 	    WEXITSTATUS(status) >= 2)
 		err(2, "diff exited abnormally.");
 
 	/* Delete and free unneeded temporary files. */
 	if (tmp1)
 		if (unlink(tmp1))
 			warn("Error deleting %s.", tmp1);
 	if (tmp2)
 		if (unlink(tmp2))
 			warn("Error deleting %s.", tmp2);
 	free(tmp1);
 	free(tmp2);
 	filename1 = filename2 = tmp1 = tmp2 = NULL;
 
 	/* No more diffs, so print common lines. */
 	if (lflag)
 		while ((s1 = xfgets(file1)))
 			enqueue(s1, ' ', NULL);
 	else
 		for (;;) {
 			s1 = xfgets(file1);
 			s2 = xfgets(file2);
 			if (s1 || s2)
 				enqueue(s1, ' ', s2);
 			else
 				break;
 		}
 	fclose(file1);
 	fclose(file2);
 	/* Process unmodified lines. */
 	processq();
 
 	/* Return diff exit status. */
 	return (WEXITSTATUS(status));
 }
 
 /*
  * When sdiff/zsdiff detects a binary file as input, executes them with
  * diff/zdiff to maintain the same behavior as GNU sdiff with binary input.
  */
 static void
 binexec(char *diffprog, char *f1, char *f2)
 {
 
 	char *args[] = {diffprog, f1, f2, (char *) 0};
 	execv(diffprog, args);
 
 	/* If execv() fails, sdiff's execution will continue below. */
 	errx(1, "Could not execute diff process.\n");
 }
 
 /*
  * Checks whether a file appears to be a text file.
  */
 static int
 istextfile(FILE *f)
 {
 	int	ch, i;
 
 	if (f == NULL)
 		return (1);
 	rewind(f);
 	for (i = 0; i <= MAX_CHECK; i++) {
 		ch = fgetc(f);
 		if (ch == '\0') {
 			rewind(f);
 			return (0);
 		}
 		if (ch == EOF)
 			break;
 	}
 	rewind(f);
 	return (1);
 }
 
 /*
  * Prints an individual column (left or right), taking into account
  * that tabs are variable-width.  Takes a string, the current column
  * the cursor is on the screen, and the maximum value of the column.
  * The column value is updated as we go along.
  */
 static void
 printcol(const char *s, size_t *col, const size_t col_max)
 {
 
 	for (; *s && *col < col_max; ++s) {
 		size_t new_col;
 
 		switch (*s) {
 		case '\t':
 			/*
 			 * If rounding to next multiple of eight causes
 			 * an integer overflow, just return.
 			 */
 			if (*col > SIZE_MAX - 8)
 				return;
 
 			/* Round to next multiple of eight. */
 			new_col = (*col / 8 + 1) * 8;
 
 			/*
 			 * If printing the tab goes past the column
 			 * width, don't print it and just quit.
 			 */
 			if (new_col > col_max)
 				return;
 			*col = new_col;
 			break;
 		default:
 			++(*col);
 		}
 		putchar(*s);
 	}
 }
 
 /*
  * Prompts user to either choose between two strings or edit one, both,
  * or neither.
  */
 static void
 prompt(const char *s1, const char *s2)
 {
 	char *cmd;
 
 	/* Print command prompt. */
 	putchar('%');
 
 	/* Get user input. */
 	for (; (cmd = xfgets(stdin)); free(cmd)) {
 		const char *p;
 
 		/* Skip leading whitespace. */
 		for (p = cmd; isspace(*p); ++p)
 			;
 		switch (*p) {
 		case 'e':
 			/* Skip `e'. */
 			++p;
 			if (eparse(p, s1, s2) == -1)
 				goto USAGE;
 			break;
 		case 'l':
 		case '1':
 			/* Choose left column as-is. */
 			if (s1 != NULL)
 				fprintf(outfp, "%s\n", s1);
 			/* End of command parsing. */
 			break;
 		case 'q':
 			goto QUIT;
 		case 'r':
 		case '2':
 			/* Choose right column as-is. */
 			if (s2 != NULL)
 				fprintf(outfp, "%s\n", s2);
 			/* End of command parsing. */
 			break;
 		case 's':
 			sflag = 1;
 			goto PROMPT;
 		case 'v':
 			sflag = 0;
 			/* FALLTHROUGH */
 		default:
 			/* Interactive usage help. */
 USAGE:
 			int_usage();
 PROMPT:
 			putchar('%');
 
 			/* Prompt user again. */
 			continue;
 		}
 		free(cmd);
 		return;
 	}
 
 	/*
 	 * If there was no error, we received an EOF from stdin, so we
 	 * should quit.
 	 */
 QUIT:
 	fclose(outfp);
 	exit(0);
 }
 
 /*
  * Takes two strings, separated by a column divider.  NULL strings are
  * treated as empty columns.  If the divider is the ` ' character, the
  * second column is not printed (-l flag).  In this case, the second
  * string must be NULL.  When the second column is NULL, the divider
  * does not print the trailing space following the divider character.
  *
  * Takes into account that tabs can take multiple columns.
  */
 static void
 println(const char *s1, const char div, const char *s2)
 {
 	size_t col;
 
 	/* Print first column.  Skips if s1 == NULL. */
 	col = 0;
 	if (s1) {
 		/* Skip angle bracket and space. */
 		printcol(s1, &col, width);
 
 	}
 
 	/* Otherwise, we pad this column up to width. */
 	for (; col < width; ++col)
 		putchar(' ');
 
 	/* Only print left column. */
 	if (div == ' ' && !s2) {
 		printf(" (\n");
 		return;
 	}
 
 	/*
 	 * Print column divider.  If there is no second column, we don't
 	 * need to add the space for padding.
 	 */
 	if (!s2) {
 		printf(" %c\n", div);
 		return;
 	}
 	printf(" %c ", div);
 	col += 3;
 
 	/* Skip angle bracket and space. */
 	printcol(s2, &col, line_width);
 
 	putchar('\n');
 }
 
 /*
  * Reads a line from file and returns as a string.  If EOF is reached,
  * NULL is returned.  The returned string must be freed afterwards.
  */
 static char *
 xfgets(FILE *file)
 {
 	size_t linecap;
 	ssize_t l;
 	char *s;
 
 	clearerr(file);
 	linecap = 0;
 	s = NULL;
 
 	if ((l = getline(&s, &linecap, file)) == -1) {
 		if (ferror(file))
 			err(2, "error reading file");
 		return (NULL);
 	}
 
 	if (s[l-1] == '\n')
 		s[l-1] = '\0';
 
 	return (s);
 }
 
 /*
  * Parse ed commands from diffpipe and print lines from file1 (lines
  * to change or delete) or file2 (lines to add or change).
  * Returns EOF or 0.
  */
 static int
 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
 {
 	size_t file1start, file1end, file2start, file2end, n;
 	/* ed command line and pointer to characters in line */
 	char *line, *p, *q;
 	const char *errstr;
 	char c, cmd;
 
 	/* Read ed command. */
 	if (!(line = xfgets(diffpipe)))
 		return (EOF);
 
 	p = line;
 	/* Go to character after line number. */
 	while (isdigit(*p))
 		++p;
 	c = *p;
 	*p++ = 0;
 	file1start = strtonum(line, 0, INT_MAX, &errstr);
 	if (errstr)
 		errx(2, "file1 start is %s: %s", errstr, line);
 
 	/* A range is specified for file1. */
 	if (c == ',') {
 		q = p;
 		/* Go to character after file2end. */
 		while (isdigit(*p))
 			++p;
 		c = *p;
 		*p++ = 0;
 		file1end = strtonum(q, 0, INT_MAX, &errstr);
 		if (errstr)
 			errx(2, "file1 end is %s: %s", errstr, line);
 		if (file1start > file1end)
 			errx(2, "invalid line range in file1: %s", line);
 	} else
 		file1end = file1start;
 
 	cmd = c;
 	/* Check that cmd is valid. */
 	if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
 		errx(2, "ed command not recognized: %c: %s", cmd, line);
 
 	q = p;
 	/* Go to character after line number. */
 	while (isdigit(*p))
 		++p;
 	c = *p;
 	*p++ = 0;
 	file2start = strtonum(q, 0, INT_MAX, &errstr);
 	if (errstr)
 		errx(2, "file2 start is %s: %s", errstr, line);
 
 	/*
 	 * There should either be a comma signifying a second line
 	 * number or the line should just end here.
 	 */
 	if (c != ',' && c != '\0')
 		errx(2, "invalid line range in file2: %c: %s", c, line);
 
 	if (c == ',') {
 
 		file2end = strtonum(p, 0, INT_MAX, &errstr);
 		if (errstr)
 			errx(2, "file2 end is %s: %s", errstr, line);
 		if (file2start >= file2end)
 			errx(2, "invalid line range in file2: %s", line);
 	} else
 		file2end = file2start;
 
 	/* Appends happen _after_ stated line. */
 	if (cmd == 'a') {
 		if (file1start != file1end)
 			errx(2, "append cannot have a file1 range: %s",
 			    line);
 		if (file1start == SIZE_MAX)
 			errx(2, "file1 line range too high: %s", line);
 		file1start = ++file1end;
 	}
 	/*
 	 * I'm not sure what the deal is with the line numbers for
 	 * deletes, though.
 	 */
 	else if (cmd == 'd') {
 		if (file2start != file2end)
 			errx(2, "delete cannot have a file2 range: %s",
 			    line);
 		if (file2start == SIZE_MAX)
 			errx(2, "file2 line range too high: %s", line);
 		file2start = ++file2end;
 	}
 
 	/*
 	 * Continue reading file1 and file2 until we reach line numbers
 	 * specified by diff.  Should only happen with -I flag.
 	 */
 	for (; file1ln < file1start && file2ln < file2start;
 	    ++file1ln, ++file2ln) {
 		char *s1, *s2;
 
 		if (!(s1 = xfgets(file1)))
 			errx(2, "file1 shorter than expected");
 		if (!(s2 = xfgets(file2)))
 			errx(2, "file2 shorter than expected");
 
 		/* If the -l flag was specified, print only left column. */
 		if (lflag) {
 			free(s2);
 			/*
 			 * XXX - If -l and -I are both specified, all
 			 * unchanged or ignored lines are shown with a
 			 * `(' divider.  This matches GNU sdiff, but I
 			 * believe it is a bug.  Just check out:
 			 * gsdiff -l -I '^$' samefile samefile.
 			 */
 			if (Iflag)
 				enqueue(s1, '(', NULL);
 			else
 				enqueue(s1, ' ', NULL);
 		} else
 			enqueue(s1, ' ', s2);
 	}
 	/* Ignore deleted lines. */
 	for (; file1ln < file1start; ++file1ln) {
 		char *s;
 
 		if (!(s = xfgets(file1)))
 			errx(2, "file1 shorter than expected");
 
 		enqueue(s, '(', NULL);
 	}
 	/* Ignore added lines. */
 	for (; file2ln < file2start; ++file2ln) {
 		char *s;
 
 		if (!(s = xfgets(file2)))
 			errx(2, "file2 shorter than expected");
 
 		/* If -l flag was given, don't print right column. */
 		if (lflag)
 			free(s);
 		else
 			enqueue(NULL, ')', s);
 	}
 
 	/* Process unmodified or skipped lines. */
 	processq();
 
 	switch (cmd) {
 	case 'a':
 		printa(file2, file2end);
 		n = file2end - file2start + 1;
 		break;
 	case 'c':
 		printc(file1, file1end, file2, file2end);
 		n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
 		break;
 	case 'd':
 		printd(file1, file1end);
 		n = file1end - file1start + 1;
 		break;
 	default:
 		errx(2, "invalid diff command: %c: %s", cmd, line);
 	}
 	free(line);
 
 	/* Skip to next ed line. */
 	while (n--) {
 		if (!(line = xfgets(diffpipe)))
 			errx(2, "diff ended early");
 		free(line);
 	}
 
 	return (0);
 }
 
 /*
  * Queues up a diff line.
  */
 static void
 enqueue(char *left, char div, char *right)
 {
 	struct diffline *diffp;
 
 	if (!(diffp = malloc(sizeof(struct diffline))))
 		err(2, "enqueue");
 	diffp->left = left;
 	diffp->div = div;
 	diffp->right = right;
 	STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
 }
 
 /*
  * Free a diffline structure and its elements.
  */
 static void
 freediff(struct diffline *diffp)
 {
 
 	free(diffp->left);
 	free(diffp->right);
 	free(diffp);
 }
 
 /*
  * Append second string into first.  Repeated appends to the same string
  * are cached, making this an O(n) function, where n = strlen(append).
  */
 static void
 astrcat(char **s, const char *append)
 {
 	/* Length of string in previous run. */
 	static size_t offset = 0;
 	size_t newsiz;
 	/*
 	 * String from previous run.  Compared to *s to see if we are
 	 * dealing with the same string.  If so, we can use offset.
 	 */
 	static const char *oldstr = NULL;
 	char *newstr;
 
 	/*
 	 * First string is NULL, so just copy append.
 	 */
 	if (!*s) {
 		if (!(*s = strdup(append)))
 			err(2, "astrcat");
 
 		/* Keep track of string. */
 		offset = strlen(*s);
 		oldstr = *s;
 
 		return;
 	}
 
 	/*
 	 * *s is a string so concatenate.
 	 */
 
 	/* Did we process the same string in the last run? */
 	/*
 	 * If this is a different string from the one we just processed
 	 * cache new string.
 	 */
 	if (oldstr != *s) {
 		offset = strlen(*s);
 		oldstr = *s;
 	}
 
 	/* Size = strlen(*s) + \n + strlen(append) + '\0'. */
 	newsiz = offset + 1 + strlen(append) + 1;
 
 	/* Resize *s to fit new string. */
 	newstr = realloc(*s, newsiz);
 	if (newstr == NULL)
 		err(2, "astrcat");
 	*s = newstr;
 
 	/* *s + offset should be end of string. */
 	/* Concatenate. */
 	strlcpy(*s + offset, "\n", newsiz - offset);
 	strlcat(*s + offset, append, newsiz - offset);
 
 	/* New string length should be exactly newsiz - 1 characters. */
 	/* Store generated string's values. */
 	offset = newsiz - 1;
 	oldstr = *s;
 }
 
 /*
  * Process diff set queue, printing, prompting, and saving each diff
  * line stored in queue.
  */
 static void
 processq(void)
 {
 	struct diffline *diffp;
 	char divc, *left, *right;
 
 	/* Don't process empty queue. */
 	if (STAILQ_EMPTY(&diffhead))
 		return;
 
 	/* Remember the divider. */
 	divc = STAILQ_FIRST(&diffhead)->div;
 
 	left = NULL;
 	right = NULL;
 	/*
 	 * Go through set of diffs, concatenating each line in left or
 	 * right column into two long strings, `left' and `right'.
 	 */
 	STAILQ_FOREACH(diffp, &diffhead, diffentries) {
 		/*
 		 * Print changed lines if -s was given,
 		 * print all lines if -s was not given.
 		 */
 		if (!sflag || diffp->div == '|' || diffp->div == '<' ||
 		    diffp->div == '>')
 			println(diffp->left, diffp->div, diffp->right);
 
 		/* Append new lines to diff set. */
 		if (diffp->left)
 			astrcat(&left, diffp->left);
 		if (diffp->right)
 			astrcat(&right, diffp->right);
 	}
 
 	/* Empty queue and free each diff line and its elements. */
 	while (!STAILQ_EMPTY(&diffhead)) {
 		diffp = STAILQ_FIRST(&diffhead);
 		STAILQ_REMOVE_HEAD(&diffhead, diffentries);
 		freediff(diffp);
 	}
 
 	/* Write to outfp, prompting user if lines are different. */
 	if (outfp)
 		switch (divc) {
 		case ' ': case '(': case ')':
 			fprintf(outfp, "%s\n", left);
 			break;
 		case '|': case '<': case '>':
 			prompt(left, right);
 			break;
 		default:
 			errx(2, "invalid divider: %c", divc);
 		}
 
 	/* Free left and right. */
 	free(left);
 	free(right);
 }
 
 /*
  * Print lines following an (a)ppend command.
  */
 static void
 printa(FILE *file, size_t line2)
 {
 	char *line;
 
 	for (; file2ln <= line2; ++file2ln) {
 		if (!(line = xfgets(file)))
 			errx(2, "append ended early");
 		enqueue(NULL, '>', line);
 	}
 	processq();
 }
 
 /*
  * Print lines following a (c)hange command, from file1ln to file1end
  * and from file2ln to file2end.
  */
 static void
 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
 {
 	struct fileline {
 		STAILQ_ENTRY(fileline)	 fileentries;
 		char			*line;
 	};
 	STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
 
 	/* Read lines to be deleted. */
 	for (; file1ln <= file1end; ++file1ln) {
 		struct fileline *linep;
 		char *line1;
 
 		/* Read lines from both. */
 		if (!(line1 = xfgets(file1)))
 			errx(2, "error reading file1 in delete in change");
 
 		/* Add to delete queue. */
 		if (!(linep = malloc(sizeof(struct fileline))))
 			err(2, "printc");
 		linep->line = line1;
 		STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
 	}
 
 	/* Process changed lines.. */
 	for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
 	    ++file2ln) {
 		struct fileline *del;
 		char *add;
 
 		/* Get add line. */
 		if (!(add = xfgets(file2)))
 			errx(2, "error reading add in change");
 
 		del = STAILQ_FIRST(&delqhead);
 		enqueue(del->line, '|', add);
 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
 		/*
 		 * Free fileline structure but not its elements since
 		 * they are queued up.
 		 */
 		free(del);
 	}
 	processq();
 
 	/* Process remaining lines to add. */
 	for (; file2ln <= file2end; ++file2ln) {
 		char *add;
 
 		/* Get add line. */
 		if (!(add = xfgets(file2)))
 			errx(2, "error reading add in change");
 
 		enqueue(NULL, '>', add);
 	}
 	processq();
 
 	/* Process remaining lines to delete. */
 	while (!STAILQ_EMPTY(&delqhead)) {
 		struct fileline *filep;
 
 		filep = STAILQ_FIRST(&delqhead);
 		enqueue(filep->line, '<', NULL);
 		STAILQ_REMOVE_HEAD(&delqhead, fileentries);
 		free(filep);
 	}
 	processq();
 }
 
 /*
  * Print deleted lines from file, from file1ln to file1end.
  */
 static void
 printd(FILE *file1, size_t file1end)
 {
 	char *line1;
 
 	/* Print out lines file1ln to line2. */
 	for (; file1ln <= file1end; ++file1ln) {
 		if (!(line1 = xfgets(file1)))
 			errx(2, "file1 ended early in delete");
 		enqueue(line1, '<', NULL);
 	}
 	processq();
 }
 
 /*
  * Interactive mode usage.
  */
 static void
 int_usage(void)
 {
 
 	puts("e:\tedit blank diff\n"
 	    "eb:\tedit both diffs concatenated\n"
 	    "el:\tedit left diff\n"
 	    "er:\tedit right diff\n"
 	    "l | 1:\tchoose left diff\n"
 	    "r | 2:\tchoose right diff\n"
 	    "s:\tsilent mode--don't print identical lines\n"
 	    "v:\tverbose mode--print identical lines\n"
 	    "q:\tquit");
 }
 
 static void
 usage(void)
 {
 
 	fprintf(stderr,
 	    "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1"
 	    " file2\n");
 	exit(2);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/sed/compile.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/sed/compile.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/sed/compile.c	(revision 303642)
@@ -1,945 +1,945 @@
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Diomidis Spinellis of Imperial College, University of London.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifndef lint
 static const char sccsid[] = "@(#)compile.c	8.1 (Berkeley) 6/6/93";
 #endif
 
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <regex.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <wchar.h>
 
 #include "defs.h"
 #include "extern.h"
 
 #define LHSZ	128
 #define	LHMASK	(LHSZ - 1)
 static struct labhash {
 	struct	labhash *lh_next;
 	u_int	lh_hash;
 	struct	s_command *lh_cmd;
 	int	lh_ref;
 } *labels[LHSZ];
 
 static const char	 *compile_addr(const char *, struct s_addr *);
 static       char	 *compile_ccl(const char **, char *);
 static const char	 *compile_delimited(const char *, char *, int);
 static const char	 *compile_flags(const char *, struct s_subst *);
 static const regex_t	 *compile_re(const char *, int);
 static const char	 *compile_subst(const char *, struct s_subst *);
 static       char	 *compile_text(size_t *);
 static const char	 *compile_tr(const char *, struct s_tr **);
 static struct s_command
 		**compile_stream(struct s_command **);
 static char	 *duptoeol(const char *, const char *, size_t *);
 static void	  enterlabel(struct s_command *);
 static struct s_command
 		 *findlabel(const char *);
 static void	  fixuplabel(struct s_command *, const struct s_command *);
 static void	  uselabel(void);
 
 /*
  * Command specification.  This is used to drive the command parser.
  */
 struct s_format {
 	char code;				/* Command code */
 	int naddr;				/* Number of address args */
 	enum e_args args;			/* Argument type */
 };
 
 static struct s_format cmd_fmts[] = {
 	{'{', 2, GROUP},
 	{'}', 0, ENDGROUP},
 	{'a', 1, TEXT},
 	{'b', 2, BRANCH},
 	{'c', 2, TEXT},
 	{'d', 2, EMPTY},
 	{'D', 2, EMPTY},
 	{'g', 2, EMPTY},
 	{'G', 2, EMPTY},
 	{'h', 2, EMPTY},
 	{'H', 2, EMPTY},
 	{'i', 1, TEXT},
 	{'l', 2, EMPTY},
 	{'n', 2, EMPTY},
 	{'N', 2, EMPTY},
 	{'p', 2, EMPTY},
 	{'P', 2, EMPTY},
 	{'q', 1, EMPTY},
 	{'r', 1, RFILE},
 	{'s', 2, SUBST},
 	{'t', 2, BRANCH},
 	{'w', 2, WFILE},
 	{'x', 2, EMPTY},
 	{'y', 2, TR},
 	{'!', 2, NONSEL},
 	{':', 0, LABEL},
 	{'#', 0, COMMENT},
 	{'=', 1, EMPTY},
 	{'\0', 0, COMMENT},
 };
 
 /* The compiled program. */
 struct s_command *prog;
 
 /*
  * Compile the program into prog.
  * Initialise appends.
  */
 void
 compile(void)
 {
 	*compile_stream(&prog) = NULL;
 	fixuplabel(prog, NULL);
 	uselabel();
 	if (appendnum == 0)
 		appends = NULL;
 	else if ((appends = malloc(sizeof(struct s_appends) * appendnum)) ==
 	    NULL)
 		err(1, "malloc");
 	if ((match = malloc((maxnsub + 1) * sizeof(regmatch_t))) == NULL)
 		err(1, "malloc");
 }
 
 #define	EATSPACE() do {						\
 	while (*p && isspace((unsigned char)*p))		\
 		p++;						\
 	} while (0)
 
 #define	EATSPACEN() do {					\
 	while (*p && *p != '\n' && isspace((unsigned char)*p))  \
 		p++;						\
 	} while (0)
 
 static struct s_command **
 compile_stream(struct s_command **link)
 {
 	const char *p;
 	struct s_command *cmd, *cmd2, *stack;
 	struct s_format *fp;
 	char re[_POSIX2_LINE_MAX + 1];
 	int naddr;				/* Number of addresses */
 
 	stack = NULL;
 	for (;;) {
 		if ((p = cu_fgets(NULL)) == NULL) {
 			if (stack != NULL)
 				errx(1, "%lu: %s: unexpected EOF (pending }'s)",
 							linenum, fname);
 			return (link);
 		}
 
 semicolon:	EATSPACEN();
 		switch (*p) {
 		case '#': case '\0': case '\n':
 			continue;	/* to next command-unit */
 		case ';':
 			p++;
 			goto semicolon;
 		}
 
 		if ((*link = cmd = malloc(sizeof(struct s_command))) == NULL)
 			err(1, "malloc");
 		link = &cmd->next;
 		cmd->startline = cmd->nonsel = 0;
 		/* First parse the addresses */
 		naddr = 0;
 
 /* Valid characters to start an address */
 #define	addrchar(c)	(strchr("0123456789/\\$", (c)))
 		if (addrchar(*p)) {
 			naddr++;
 			if ((cmd->a1 = malloc(sizeof(struct s_addr))) == NULL)
 				err(1, "malloc");
 			p = compile_addr(p, cmd->a1);
 			EATSPACE();				/* EXTENSION */
 			if (*p == ',') {
 				p++;
 				EATSPACE();			/* EXTENSION */
 				naddr++;
 				if ((cmd->a2 = malloc(sizeof(struct s_addr)))
 				    == NULL)
 					err(1, "malloc");
 				p = compile_addr(p, cmd->a2);
 				EATSPACE();
 			} else
 				cmd->a2 = NULL;
 		} else
 			cmd->a1 = cmd->a2 = NULL;
 
 nonsel:		/* Now parse the command */
 		if (*p == '\0' || *p == '\n')
 			errx(1, "%lu: %s: command expected", linenum, fname);
 		cmd->code = *p;
 		for (fp = cmd_fmts; fp->code; fp++)
 			if (fp->code == *p)
 				break;
 		if (!fp->code)
 			errx(1, "%lu: %s: invalid command code %c (%s)", linenum, fname, *p, p);
 		if (naddr > fp->naddr)
 			errx(1,
 				"%lu: %s: command %c expects up to %d address(es), found %d",
 				linenum, fname, *p, fp->naddr, naddr);
 		switch (fp->args) {
 		case NONSEL:			/* ! */
 			p++;
 			EATSPACE();
 			cmd->nonsel = 1;
 			goto nonsel;
 		case GROUP:			/* { */
 			p++;
 			EATSPACEN();
 			cmd->next = stack;
 			stack = cmd;
 			link = &cmd->u.c;
 			if (*p != '\0' && *p != '\n')
 				goto semicolon;
 			break;
 		case ENDGROUP:
 			/*
 			 * Short-circuit command processing, since end of
 			 * group is really just a noop.
 			 */
 			cmd->nonsel = 1;
 			if (stack == NULL)
 				errx(1, "%lu: %s: unexpected }", linenum, fname);
 			cmd2 = stack;
 			stack = cmd2->next;
 			cmd2->next = cmd;
 			/*FALLTHROUGH*/
 		case EMPTY:		/* d D g G h H l n N p P q x = \0 */
 			p++;
 			EATSPACEN();
 			if (*p == ';') {
 				p++;
 				link = &cmd->next;
 				goto semicolon;
 			}
 			if (*p != '\0' && *p != '\n')
 				errx(1, "%lu: %s: extra characters at the end of %c command",
 						linenum, fname, cmd->code);
 			break;
 		case TEXT:			/* a c i */
 			p++;
 			EATSPACE();
 			if (*p != '\\')
 				errx(1,
 "%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code);
 			p++;
 			EATSPACEN();
 			if (*p != '\n')
 				errx(1,
 				"%lu: %s: extra characters (%c) after \\ at the end of %c command",
 				linenum, fname, *p, cmd->code);
 			cmd->t = compile_text(&cmd->tlen);
 			break;
 		case COMMENT:			/* \0 # */
 			break;
 		case WFILE:			/* w */
 			p++;
 			EATSPACE();
 			if (*p == '\0')
 				errx(1, "%lu: %s: filename expected", linenum, fname);
 			cmd->t = duptoeol(p, "w command", &cmd->tlen);
 			if (aflag)
 				cmd->u.fd = -1;
 			else if ((cmd->u.fd = open(cmd->t,
 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
 			    DEFFILEMODE)) == -1)
 				err(1, "%s", p);
 			break;
 		case RFILE:			/* r */
 			p++;
 			EATSPACE();
 			if (*p == '\0')
 				errx(1, "%lu: %s: filename expected", linenum, fname);
 			else
 				cmd->t = duptoeol(p, "read command", &cmd->tlen);
 			break;
 		case BRANCH:			/* b t */
 			p++;
 			EATSPACEN();
 			if (*p == '\0' || *p == '\n')
 				cmd->t = NULL;
 			else
 				cmd->t = duptoeol(p, "branch", &cmd->tlen);
 			break;
 		case LABEL:			/* : */
 			p++;
 			EATSPACE();
 			cmd->t = duptoeol(p, "label", &cmd->tlen);
 			if (cmd->t[0] == '\0')
 				errx(1, "%lu: %s: empty label", linenum, fname);
 			enterlabel(cmd);
 			break;
 		case SUBST:			/* s */
 			p++;
 			if (*p == '\0' || *p == '\\' || *p == '\n')
 				errx(1,
 "%lu: %s: substitute pattern can not be delimited by newline or backslash",
 					linenum, fname);
 			if ((cmd->u.s = calloc(1, sizeof(struct s_subst))) == NULL)
 				err(1, "malloc");
 			p = compile_delimited(p, re, 0);
 			if (p == NULL)
 				errx(1,
 				"%lu: %s: unterminated substitute pattern", linenum, fname);
 
 			--p;
 			p = compile_subst(p, cmd->u.s);
 			p = compile_flags(p, cmd->u.s);
 
 			if (*re != '\0')
 				cmd->u.s->re = compile_re(re, cmd->u.s->icase);
 
 			EATSPACE();
 
 			if (*p == ';') {
 				p++;
 				link = &cmd->next;
 				goto semicolon;
 			}
 			break;
 		case TR:			/* y */
 			p++;
 			p = compile_tr(p, &cmd->u.y);
 			EATSPACE();
 			if (*p == ';') {
 				p++;
 				link = &cmd->next;
 				goto semicolon;
 			}
 			if (*p)
 				errx(1,
 "%lu: %s: extra text at the end of a transform command", linenum, fname);
 			break;
 		}
 	}
 }
 
 /*
  * Get a delimited string.  P points to the delimiter of the string; d points
  * to a buffer area.  Newline and delimiter escapes are processed; other
  * escapes are ignored.
  *
  * Returns a pointer to the first character after the final delimiter or NULL
  * in the case of a non-terminated string.  The character array d is filled
  * with the processed string.
  */
 static const char *
 compile_delimited(const char *p, char *d, int is_tr)
 {
 	char c;
 
 	c = *p++;
 	if (c == '\0')
 		return (NULL);
 	else if (c == '\\')
 		errx(1, "%lu: %s: \\ can not be used as a string delimiter",
 				linenum, fname);
 	else if (c == '\n')
 		errx(1, "%lu: %s: newline can not be used as a string delimiter",
 				linenum, fname);
 	while (*p) {
 		if (*p == '[' && *p != c) {
 			if ((d = compile_ccl(&p, d)) == NULL)
 				errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname);
 			continue;
 		} else if (*p == '\\' && p[1] == '[') {
 			*d++ = *p++;
 		} else if (*p == '\\' && p[1] == c)
 			p++;
 		else if (*p == '\\' && p[1] == 'n') {
 			*d++ = '\n';
 			p += 2;
 			continue;
 		} else if (*p == '\\' && p[1] == '\\') {
 			if (is_tr)
 				p++;
 			else
 				*d++ = *p++;
 		} else if (*p == c) {
 			*d = '\0';
 			return (p + 1);
 		}
 		*d++ = *p++;
 	}
 	return (NULL);
 }
 
 
 /* compile_ccl: expand a POSIX character class */
 static char *
 compile_ccl(const char **sp, char *t)
 {
 	int c, d;
 	const char *s = *sp;
 
 	*t++ = *s++;
 	if (*s == '^')
 		*t++ = *s++;
 	if (*s == ']')
 		*t++ = *s++;
 	for (; *s && (*t = *s) != ']'; s++, t++)
 		if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
 			*++t = *++s, t++, s++;
 			for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
 				if ((c = *s) == '\0')
 					return NULL;
 		}
 	return (*s == ']') ? *sp = ++s, ++t : NULL;
 }
 
 /*
  * Compiles the regular expression in RE and returns a pointer to the compiled
  * regular expression.
  * Cflags are passed to regcomp.
  */
 static const regex_t *
 compile_re(const char *re, int case_insensitive)
 {
 	regex_t *rep;
 	int eval, flags;
 
 
 	flags = rflags;
 	if (case_insensitive)
 		flags |= REG_ICASE;
 	if ((rep = malloc(sizeof(regex_t))) == NULL)
 		err(1, "malloc");
 	if ((eval = regcomp(rep, re, flags)) != 0)
 		errx(1, "%lu: %s: RE error: %s",
 				linenum, fname, strregerror(eval, rep));
 	if (maxnsub < rep->re_nsub)
 		maxnsub = rep->re_nsub;
 	return (rep);
 }
 
 /*
  * Compile the substitution string of a regular expression and set res to
  * point to a saved copy of it.  Nsub is the number of parenthesized regular
  * expressions.
  */
 static const char *
 compile_subst(const char *p, struct s_subst *s)
 {
 	int asize, size;
 	u_char ref;
 	char c, *text, *op, *sp;
 	int more = 0, sawesc = 0;
 
 	c = *p++;			/* Terminator character */
 	if (c == '\0')
 		return (NULL);
 
 	s->maxbref = 0;
 	s->linenum = linenum;
 	asize = 2 * _POSIX2_LINE_MAX + 1;
 	if ((text = malloc(asize)) == NULL)
 		err(1, "malloc");
 	size = 0;
 	do {
 		op = sp = text + size;
 		for (; *p != '\0' && *p != '\n'; p++) {
 			if (*p == '\\' || sawesc) {
 				/*
 				 * If this is a continuation from the last
 				 * buffer, we won't have a character to
 				 * skip over.
 				 */
 				if (sawesc)
 					sawesc = 0;
 				else
 					p++;
 
 				if (*p == '\0') {
 					/*
 					 * This escaped character is continued
 					 * in the next part of the line.  Note
 					 * this fact, then cause the loop to
 					 * exit w/ normal EOL case and reenter
 					 * above with the new buffer.
 					 */
 					sawesc = 1;
 					p--;
 					break;
 				} else if (*p == '\n') {
 					*sp++ = '\n';
 					break;
 				} else if (strchr("123456789", *p) != NULL) {
 					*sp++ = '\\';
 					ref = *p - '0';
 					if (s->re != NULL &&
 					    ref > s->re->re_nsub)
 						errx(1, "%lu: %s: \\%c not defined in the RE",
 								linenum, fname, *p);
 					if (s->maxbref < ref)
 						s->maxbref = ref;
 				} else if (*p == '&' || *p == '\\')
 					*sp++ = '\\';
 			} else if (*p == c) {
 				if (*++p == '\0' && more) {
 					const char *nextp;
 
 					nextp = cu_fgets(&more);
 					if (nextp != NULL)
 						p = nextp;
 				}
 				*sp++ = '\0';
 				size += sp - op;
 				if ((s->new = realloc(text, size)) == NULL)
 					err(1, "realloc");
 				return (p);
 			} else if (*p == '\n') {
 				errx(1,
 "%lu: %s: unescaped newline inside substitute pattern", linenum, fname);
 				/* NOTREACHED */
 			}
 			*sp++ = *p;
 		}
 		size += sp - op;
 		if (asize - size < _POSIX2_LINE_MAX + 1) {
 			asize *= 2;
 			if ((text = realloc(text, asize)) == NULL)
 				err(1, "realloc");
 		}
-	} while ((p = cu_fgets(&more)));
+	} while ((p = cu_fgets(&more)) != NULL);
 	errx(1, "%lu: %s: unterminated substitute in regular expression",
 			linenum, fname);
 	/* NOTREACHED */
 }
 
 /*
  * Compile the flags of the s command
  */
 static const char *
 compile_flags(const char *p, struct s_subst *s)
 {
 	int gn;			/* True if we have seen g or n */
 	unsigned long nval;
 	char *q;
 
 	s->n = 1;				/* Default */
 	s->p = 0;
 	s->wfile = NULL;
 	s->wfd = -1;
 	s->icase = 0;
 	for (gn = 0;;) {
 		EATSPACEN();			/* EXTENSION */
 		switch (*p) {
 		case 'g':
 			if (gn)
 				errx(1,
 "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
 			gn = 1;
 			s->n = 0;
 			break;
 		case '\0':
 		case '\n':
 		case ';':
 			return (p);
 		case 'p':
 			s->p = 1;
 			break;
 		case 'i':
 		case 'I':
 			s->icase = 1;
 			break;
 		case '1': case '2': case '3':
 		case '4': case '5': case '6':
 		case '7': case '8': case '9':
 			if (gn)
 				errx(1,
 "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
 			gn = 1;
 			errno = 0;
 			nval = strtol(p, &q, 10);
 			if (errno == ERANGE || nval > INT_MAX)
 				errx(1,
 "%lu: %s: overflow in the 'N' substitute flag", linenum, fname);
 			s->n = nval;
 			p = q;
 			continue;
 		case 'w':
 			p++;
 #ifdef HISTORIC_PRACTICE
 			if (*p != ' ') {
 				warnx("%lu: %s: space missing before w wfile", linenum, fname);
 				return (p);
 			}
 #endif
 			EATSPACE();
 			s->wfile = duptoeol(p, "w flag", NULL);
 			if (!aflag && (s->wfd = open(s->wfile,
 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
 			    DEFFILEMODE)) == -1)
 				err(1, "%s", s->wfile);
 			return (p);
 		default:
 			errx(1, "%lu: %s: bad flag in substitute command: '%c' (%.10s)",
 					linenum, fname, *p, p);
 			break;
 		}
 		p++;
 	}
 }
 
 /*
  * Compile a translation set of strings into a lookup table.
  */
 static const char *
 compile_tr(const char *p, struct s_tr **py)
 {
 	struct s_tr *y;
 	int i;
 	const char *op, *np;
 	char old[_POSIX2_LINE_MAX + 1];
 	char new[_POSIX2_LINE_MAX + 1];
 	size_t oclen, oldlen, nclen, newlen;
 	mbstate_t mbs1, mbs2;
 
 	if ((*py = y = malloc(sizeof(*y))) == NULL)
 		err(1, "malloc");
 	y->multis = NULL;
 	y->nmultis = 0;
 
 	if (*p == '\0' || *p == '\\')
 		errx(1,
 	"%lu: %s: transform pattern can not be delimited by newline or backslash",
 			linenum, fname);
 	p = compile_delimited(p, old, 1);
 	if (p == NULL)
 		errx(1, "%lu: %s: unterminated transform source string",
 				linenum, fname);
 	p = compile_delimited(p - 1, new, 1);
 	if (p == NULL)
 		errx(1, "%lu: %s: unterminated transform target string",
 				linenum, fname);
 	EATSPACE();
 	op = old;
 	oldlen = mbsrtowcs(NULL, &op, 0, NULL);
 	if (oldlen == (size_t)-1)
 		err(1, "mbsrtowcs");
 	np = new;
 	newlen = mbsrtowcs(NULL, &np, 0, NULL);
 	if (newlen == (size_t)-1)
 		err(1, "mbsrtowcs");
 	if (newlen != oldlen)
 		errx(1, "%lu: %s: transform strings are not the same length",
 				linenum, fname);
 	if (MB_CUR_MAX == 1) {
 		/*
 		 * The single-byte encoding case is easy: generate a
 		 * lookup table.
 		 */
 		for (i = 0; i <= UCHAR_MAX; i++)
 			y->bytetab[i] = (char)i;
 		for (; *op; op++, np++)
 			y->bytetab[(u_char)*op] = *np;
 	} else {
 		/*
 		 * Multi-byte encoding case: generate a lookup table as
 		 * above, but only for single-byte characters. The first
 		 * bytes of multi-byte characters have their lookup table
 		 * entries set to 0, which causes do_tr() to search through
 		 * an auxiliary vector of multi-byte mappings.
 		 */
 		memset(&mbs1, 0, sizeof(mbs1));
 		memset(&mbs2, 0, sizeof(mbs2));
 		for (i = 0; i <= UCHAR_MAX; i++)
 			y->bytetab[i] = (btowc(i) != WEOF) ? i : 0;
 		while (*op != '\0') {
 			oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
 			if (oclen == (size_t)-1 || oclen == (size_t)-2)
 				errc(1, EILSEQ, NULL);
 			nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
 			if (nclen == (size_t)-1 || nclen == (size_t)-2)
 				errc(1, EILSEQ, NULL);
 			if (oclen == 1 && nclen == 1)
 				y->bytetab[(u_char)*op] = *np;
 			else {
 				y->bytetab[(u_char)*op] = 0;
 				y->multis = realloc(y->multis,
 				    (y->nmultis + 1) * sizeof(*y->multis));
 				if (y->multis == NULL)
 					err(1, "realloc");
 				i = y->nmultis++;
 				y->multis[i].fromlen = oclen;
 				memcpy(y->multis[i].from, op, oclen);
 				y->multis[i].tolen = nclen;
 				memcpy(y->multis[i].to, np, nclen);
 			}
 			op += oclen;
 			np += nclen;
 		}
 	}
 	return (p);
 }
 
 /*
  * Compile the text following an a, c, or i command.
  */
 static char *
 compile_text(size_t *ptlen)
 {
 	int asize, esc_nl, size;
 	char *text, *s;
 	const char *p, *op;
 
 	asize = 2 * _POSIX2_LINE_MAX + 1;
 	if ((text = malloc(asize)) == NULL)
 		err(1, "malloc");
 	size = 0;
-	while ((p = cu_fgets(NULL))) {
+	while ((p = cu_fgets(NULL)) != NULL) {
 		op = s = text + size;
 		for (esc_nl = 0; *p != '\0'; p++) {
 			if (*p == '\\' && p[1] != '\0' && *++p == '\n')
 				esc_nl = 1;
 			*s++ = *p;
 			if (*p == '\n')
 				break;
 		}
 		size += s - op;
 		if (!esc_nl) {
 			*s = '\0';
 			break;
 		}
 		if (asize - size < _POSIX2_LINE_MAX + 1) {
 			asize *= 2;
 			if ((text = realloc(text, asize)) == NULL)
 				err(1, "realloc");
 		}
 	}
 	text[size] = '\0';
 	if ((text = realloc(text, size + 1)) == NULL)
 		err(1, "realloc");
 	*ptlen = size;
 	return (text);
 }
 
 /*
  * Get an address and return a pointer to the first character after
  * it.  Fill the structure pointed to according to the address.
  */
 static const char *
 compile_addr(const char *p, struct s_addr *a)
 {
 	char *end, re[_POSIX2_LINE_MAX + 1];
 	int icase;
 
 	icase = 0;
 
 	a->type = 0;
 	switch (*p) {
 	case '\\':				/* Context address */
 		++p;
 		/* FALLTHROUGH */
 	case '/':				/* Context address */
 		p = compile_delimited(p, re, 0);
 		if (p == NULL)
 			errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
 		/* Check for case insensitive regexp flag */
 		if (*p == 'I') {
 			icase = 1;
 			p++;
 		}
 		if (*re == '\0')
 			a->u.r = NULL;
 		else
 			a->u.r = compile_re(re, icase);
 		a->type = AT_RE;
 		return (p);
 
 	case '$':				/* Last line */
 		a->type = AT_LAST;
 		return (p + 1);
 
 	case '+':				/* Relative line number */
 		a->type = AT_RELLINE;
 		p++;
 		/* FALLTHROUGH */
 						/* Line number */
 	case '0': case '1': case '2': case '3': case '4':
 	case '5': case '6': case '7': case '8': case '9':
 		if (a->type == 0)
 			a->type = AT_LINE;
 		a->u.l = strtol(p, &end, 10);
 		return (end);
 	default:
 		errx(1, "%lu: %s: expected context address", linenum, fname);
 		return (NULL);
 	}
 }
 
 /*
  * duptoeol --
  *	Return a copy of all the characters up to \n or \0.
  */
 static char *
 duptoeol(const char *s, const char *ctype, size_t *ptlen)
 {
 	size_t len;
 	int ws;
 	char *p;
 	const char *start;
 
 	ws = 0;
 	for (start = s; *s != '\0' && *s != '\n'; ++s)
 		ws = isspace((unsigned char)*s);
 	if (ws)
 		warnx("%lu: %s: whitespace after %s", linenum, fname, ctype);
 	len = s - start;
 	if ((p = malloc(len + 1)) == NULL)
 		err(1, "malloc");
 	memmove(p, start, len);
 	p[len] = '\0';
 	if (ptlen != NULL)
 		*ptlen = len;
 	return p;
 }
 
 /*
  * Convert goto label names to addresses, and count a and r commands, in
  * the given subset of the script.  Free the memory used by labels in b
  * and t commands (but not by :).
  *
  * TODO: Remove } nodes
  */
 static void
 fixuplabel(struct s_command *cp, const struct s_command *end)
 {
 
 	for (; cp != end; cp = cp->next)
 		switch (cp->code) {
 		case 'a':
 		case 'r':
 			appendnum++;
 			break;
 		case 'b':
 		case 't':
 			/* Resolve branch target. */
 			if (cp->t == NULL) {
 				cp->u.c = NULL;
 				break;
 			}
 			if ((cp->u.c = findlabel(cp->t)) == NULL)
 				errx(1, "%lu: %s: %c: undefined label '%s'", linenum, fname, cp->code, cp->t);
 			free(cp->t);
 			break;
 		case '{':
 			/* Do interior commands. */
 			fixuplabel(cp->u.c, cp->next);
 			break;
 		}
 }
 
 /*
  * Associate the given command label for later lookup.
  */
 static void
 enterlabel(struct s_command *cp)
 {
 	struct labhash **lhp, *lh;
 	u_char *p;
 	u_int h, c;
 
 	for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
 		h = (h << 5) + h + c;
 	lhp = &labels[h & LHMASK];
 	for (lh = *lhp; lh != NULL; lh = lh->lh_next)
 		if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
 			errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t);
 	if ((lh = malloc(sizeof *lh)) == NULL)
 		err(1, "malloc");
 	lh->lh_next = *lhp;
 	lh->lh_hash = h;
 	lh->lh_cmd = cp;
 	lh->lh_ref = 0;
 	*lhp = lh;
 }
 
 /*
  * Find the label contained in the command l in the command linked
  * list cp.  L is excluded from the search.  Return NULL if not found.
  */
 static struct s_command *
 findlabel(const char *name)
 {
 	struct labhash *lh;
 	const u_char *p;
 	u_int h, c;
 
 	for (h = 0, p = (const u_char *)name; (c = *p) != 0; p++)
 		h = (h << 5) + h + c;
 	for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
 		if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
 			lh->lh_ref = 1;
 			return (lh->lh_cmd);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Warn about any unused labels.  As a side effect, release the label hash
  * table space.
  */
 static void
 uselabel(void)
 {
 	struct labhash *lh, *next;
 	int i;
 
 	for (i = 0; i < LHSZ; i++) {
 		for (lh = labels[i]; lh != NULL; lh = next) {
 			next = lh->lh_next;
 			if (!lh->lh_ref)
 				warnx("%lu: %s: unused label '%s'",
 				    linenum, fname, lh->lh_cmd->t);
 			free(lh);
 		}
 	}
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/sed/main.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/sed/main.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/sed/main.c	(revision 303642)
@@ -1,529 +1,532 @@
 /*-
  * Copyright (c) 2013 Johann 'Myrkraverk' Oskarsson.
  * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Diomidis Spinellis of Imperial College, University of London.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1992, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif
 
 #ifndef lint
 static const char sccsid[] = "@(#)main.c	8.2 (Berkeley) 1/3/94";
 #endif
 
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <limits.h>
 #include <locale.h>
 #include <regex.h>
 #include <stddef.h>
-#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "defs.h"
 #include "extern.h"
 
 /*
  * Linked list of units (strings and files) to be compiled
  */
 struct s_compunit {
 	struct s_compunit *next;
 	enum e_cut {CU_FILE, CU_STRING} type;
 	const char *s;			/* Pointer to string or fname */
 };
 
 /*
  * Linked list pointer to compilation units and pointer to current
  * next pointer.
  */
 static struct s_compunit *script, **cu_nextp = &script;
 
 /*
  * Linked list of files to be processed
  */
 struct s_flist {
 	const char *fname;
 	struct s_flist *next;
 };
 
 /*
  * Linked list pointer to files and pointer to current
  * next pointer.
  */
 static struct s_flist *files, **fl_nextp = &files;
 
 FILE *infile;			/* Current input file */
 FILE *outfile;			/* Current output file */
 
 int aflag, eflag, nflag;
 int rflags = 0;
 static int rval;		/* Exit status */
 
 static int ispan;		/* Whether inplace editing spans across files */
 
 /*
  * Current file and line number; line numbers restart across compilation
  * units, but span across input files.  The latter is optional if editing
  * in place.
  */
 const char *fname;		/* File name. */
 const char *outfname;		/* Output file name */
 static char oldfname[PATH_MAX];	/* Old file name (for in-place editing) */
 static char tmpfname[PATH_MAX];	/* Temporary file name (for in-place editing) */
 static const char *inplace;	/* Inplace edit file extension. */
 u_long linenum;
 
 static void add_compunit(enum e_cut, const char *);
 static void add_file(const char *);
 static void usage(void);
 
 int
 main(int argc, char *argv[])
 {
+	char *temp_arg;
 	int c, fflag;
 
 	(void) setlocale(LC_ALL, "");
 
 	fflag = 0;
 	inplace = NULL;
 
 	while ((c = getopt(argc, argv, "EI:ae:f:i:lnru")) != -1)
 		switch (c) {
 		case 'r':		/* Gnu sed compat */
 		case 'E':
 			rflags = REG_EXTENDED;
 			break;
 		case 'I':
 			inplace = optarg;
 			ispan = 1;	/* span across input files */
 			break;
 		case 'a':
 			aflag = 1;
 			break;
 		case 'e':
 			eflag = 1;
-			add_compunit(CU_STRING, optarg);
+			asprintf(&temp_arg, "%s\n", optarg);
+			if (temp_arg == NULL)
+				errx(1, "Couldn't allocate temporary buffer");
+			add_compunit(CU_STRING, temp_arg);
 			break;
 		case 'f':
 			fflag = 1;
 			add_compunit(CU_FILE, optarg);
 			break;
 		case 'i':
 			inplace = optarg;
 			ispan = 0;	/* don't span across input files */
 			break;
 		case 'l':
 			if(setvbuf(stdout, NULL, _IOLBF, 0) != 0)
 				warnx("setting line buffered output failed");
 			break;
 		case 'n':
 			nflag = 1;
 			break;
 		case 'u':
 			if(setvbuf(stdout, NULL, _IONBF, 0) != 0)
 				warnx("setting unbuffered output failed");
 			break;
 		default:
 		case '?':
 			usage();
 		}
 	argc -= optind;
 	argv += optind;
 
 	/* First usage case; script is the first arg */
 	if (!eflag && !fflag && *argv) {
 		add_compunit(CU_STRING, *argv);
 		argv++;
 	}
 
 	compile();
 
 	/* Continue with first and start second usage */
 	if (*argv)
 		for (; *argv; argv++)
 			add_file(*argv);
 	else
 		add_file(NULL);
 	process();
 	cfclose(prog, NULL);
 	if (fclose(stdout))
 		err(1, "stdout");
 	exit(rval);
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr,
 	    "usage: %s script [-Ealnru] [-i extension] [file ...]\n"
 	    "\t%s [-Ealnu] [-i extension] [-e script] ... [-f script_file]"
 	    " ... [file ...]\n", getprogname(), getprogname());
 	exit(1);
 }
 
 /*
  * Like fgets, but go through the chain of compilation units chaining them
  * together.  Empty strings and files are ignored.
  */
 const char *
 cu_fgets(int *more)
 {
 	static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF;
 	static FILE *f;		/* Current open file */
 	static const char *s;	/* Current pointer inside string */
 	static char string_ident[30], *lastresult;
 	static size_t lastsize;
 	char *p;
 	const char *start;
 
 again:
 	switch (state) {
 	case ST_EOF:
 		if (script == NULL) {
 			if (more != NULL)
 				*more = 0;
 			return (NULL);
 		}
 		linenum = 0;
 		switch (script->type) {
 		case CU_FILE:
 			if ((f = fopen(script->s, "r")) == NULL)
 				err(1, "%s", script->s);
 			fname = script->s;
 			state = ST_FILE;
 			goto again;
 		case CU_STRING:
 			if (((size_t)snprintf(string_ident,
 			    sizeof(string_ident), "\"%s\"", script->s)) >=
 			    sizeof(string_ident) - 1)
 				(void)strcpy(string_ident +
 				    sizeof(string_ident) - 6, " ...\"");
 			fname = string_ident;
 			s = script->s;
 			state = ST_STRING;
 			goto again;
 		}
 	case ST_FILE:
 		p = lastresult;
 		if (getline(&p, &lastsize, f) != -1) {
 			linenum++;
 			if (linenum == 1 && p[0] == '#' && p[1] == 'n')
 				nflag = 1;
 			if (more != NULL)
 				*more = !feof(f);
 			return (lastresult = p);
 		} else if (ferror(f))
 			err(1, "%s", script->s);
 		script = script->next;
 		(void)fclose(f);
 		state = ST_EOF;
 		goto again;
 	case ST_STRING:
 		if (linenum == 0 && s[0] == '#' && s[1] == 'n')
 			nflag = 1;
 		else if (s[0] == '\0') {
 			state = ST_EOF;
 			script = script->next;
 			goto again;
 		}
 		start = s;
 		for (;;) {
 			switch (*s) {
 			case '\0':
 				state = ST_EOF;
 				script = script->next;
 				/* FALLTHROUGH */
 			case '\n':
 				s++;
 				linenum++;
 				if (more != NULL)
 					*more = 0;
 				return (start);
 			default:
 				s++;
 			}
 		}
 	}
 	/* NOTREACHED */
 	return (NULL);
 }
 
 /*
  * Like fgets, but go through the list of files chaining them together.
  * Set len to the length of the line.
  */
 int
 mf_fgets(SPACE *sp, enum e_spflag spflag)
 {
 	struct stat sb;
 	ssize_t len;
 	char *dirbuf, *basebuf;
 	static char *p = NULL;
 	static size_t plen = 0;
 	int c;
 	static int firstfile;
 
 	if (infile == NULL) {
 		/* stdin? */
 		if (files->fname == NULL) {
 			if (inplace != NULL)
 				errx(1, "-I or -i may not be used with stdin");
 			infile = stdin;
 			fname = "stdin";
 			outfile = stdout;
 			outfname = "stdout";
 		}
 		firstfile = 1;
 	}
 
 	for (;;) {
 		if (infile != NULL && (c = getc(infile)) != EOF) {
 			(void)ungetc(c, infile);
 			break;
 		}
 		/* If we are here then either eof or no files are open yet */
 		if (infile == stdin) {
 			sp->len = 0;
 			return (0);
 		}
 		if (infile != NULL) {
 			fclose(infile);
 			if (*oldfname != '\0') {
 				/* if there was a backup file, remove it */
 				unlink(oldfname);
 				/*
 				 * Backup the original.  Note that hard links
 				 * are not supported on all filesystems.
 				 */
 				if ((link(fname, oldfname) != 0) &&
 				   (rename(fname, oldfname) != 0)) {
 					warn("rename()");
 					if (*tmpfname)
 						unlink(tmpfname);
 					exit(1);
 				}
 				*oldfname = '\0';
 			}
 			if (*tmpfname != '\0') {
 				if (outfile != NULL && outfile != stdout)
 					if (fclose(outfile) != 0) {
 						warn("fclose()");
 						unlink(tmpfname);
 						exit(1);
 					}
 				outfile = NULL;
 				if (rename(tmpfname, fname) != 0) {
 					/* this should not happen really! */
 					warn("rename()");
 					unlink(tmpfname);
 					exit(1);
 				}
 				*tmpfname = '\0';
 			}
 			outfname = NULL;
 		}
 		if (firstfile == 0)
 			files = files->next;
 		else
 			firstfile = 0;
 		if (files == NULL) {
 			sp->len = 0;
 			return (0);
 		}
 		fname = files->fname;
 		if (inplace != NULL) {
 			if (lstat(fname, &sb) != 0)
 				err(1, "%s", fname);
 			if (!(sb.st_mode & S_IFREG))
 				errx(1, "%s: %s %s", fname,
 				    "in-place editing only",
 				    "works for regular files");
 			if (*inplace != '\0') {
 				strlcpy(oldfname, fname,
 				    sizeof(oldfname));
 				len = strlcat(oldfname, inplace,
 				    sizeof(oldfname));
 				if ((size_t)len > sizeof(oldfname))
 					errx(1, "%s: name too long", fname);
 			}
 			if ((dirbuf = strdup(fname)) == NULL ||
 			    (basebuf = strdup(fname)) == NULL)
 				err(1, "strdup");
 			len = snprintf(tmpfname, sizeof(tmpfname),
 			    "%s/.!%ld!%s", dirname(dirbuf), (long)getpid(),
 			    basename(basebuf));
 			free(dirbuf);
 			free(basebuf);
 			if ((size_t)len >= sizeof(tmpfname))
 				errx(1, "%s: name too long", fname);
 			unlink(tmpfname);
 			if (outfile != NULL && outfile != stdout)
 				fclose(outfile);
 			if ((outfile = fopen(tmpfname, "w")) == NULL)
 				err(1, "%s", fname);
 			fchown(fileno(outfile), sb.st_uid, sb.st_gid);
 			fchmod(fileno(outfile), sb.st_mode & ALLPERMS);
 			outfname = tmpfname;
 			if (!ispan) {
 				linenum = 0;
 				resetstate();
 			}
 		} else {
 			outfile = stdout;
 			outfname = "stdout";
 		}
 		if ((infile = fopen(fname, "r")) == NULL) {
 			warn("%s", fname);
 			rval = 1;
 			continue;
 		}
 	}
 	/*
 	 * We are here only when infile is open and we still have something
 	 * to read from it.
 	 *
 	 * Use getline() so that we can handle essentially infinite input
 	 * data.  The p and plen are static so each invocation gives
 	 * getline() the same buffer which is expanded as needed.
 	 */
 	len = getline(&p, &plen, infile);
 	if (len == -1)
 		err(1, "%s", fname);
 	if (len != 0 && p[len - 1] == '\n') {
 		sp->append_newline = 1;
 		len--;
 	} else if (!lastline()) {
 		sp->append_newline = 1;
 	} else {
 		sp->append_newline = 0;
 	}
 	cspace(sp, p, len, spflag);
 
 	linenum++;
 
 	return (1);
 }
 
 /*
  * Add a compilation unit to the linked list
  */
 static void
 add_compunit(enum e_cut type, const char *s)
 {
 	struct s_compunit *cu;
 
 	if ((cu = malloc(sizeof(struct s_compunit))) == NULL)
 		err(1, "malloc");
 	cu->type = type;
 	cu->s = s;
 	cu->next = NULL;
 	*cu_nextp = cu;
 	cu_nextp = &cu->next;
 }
 
 /*
  * Add a file to the linked list
  */
 static void
 add_file(const char *s)
 {
 	struct s_flist *fp;
 
 	if ((fp = malloc(sizeof(struct s_flist))) == NULL)
 		err(1, "malloc");
 	fp->next = NULL;
 	*fl_nextp = fp;
 	fp->fname = s;
 	fl_nextp = &fp->next;
 }
 
 static int
 next_files_have_lines(void)
 {
 	struct s_flist *file;
 	FILE *file_fd;
 	int ch;
 
 	file = files;
 	while ((file = file->next) != NULL) {
 		if ((file_fd = fopen(file->fname, "r")) == NULL)
 			continue;
 
 		if ((ch = getc(file_fd)) != EOF) {
 			/*
 			 * This next file has content, therefore current
 			 * file doesn't contains the last line.
 			 */
 			ungetc(ch, file_fd);
 			fclose(file_fd);
 			return (1);
 		}
 
 		fclose(file_fd);
 	}
 
 	return (0);
 }
 
 int
 lastline(void)
 {
 	int ch;
 
 	if (feof(infile)) {
 		return !(
 		    (inplace == NULL || ispan) &&
 		    next_files_have_lines());
 	}
 	if ((ch = getc(infile)) == EOF) {
 		return !(
 		    (inplace == NULL || ispan) &&
 		    next_files_have_lines());
 	}
 	ungetc(ch, infile);
 	return (0);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/soelim/soelim.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/soelim/soelim.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/soelim/soelim.c	(revision 303642)
@@ -1,178 +1,177 @@
 /*-
  * Copyright (c) 2014 Baptiste Daroussin <bapt@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <limits.h>
-#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stringlist.h>
 #include <unistd.h>
 
 #define C_OPTION 0x1
 
 static StringList *includes;
 
 static void
 usage(void)
 {
 
 	fprintf(stderr, "usage: soelim [-Crtv] [-I dir] [files]\n");
 
 	exit(EXIT_FAILURE);
 }
 
 static FILE *
 soelim_fopen(const char *name)
 {
 	FILE *f;
 	char path[PATH_MAX];
 	size_t i;
 
 	if (strcmp(name, "-") == 0)
 		return (stdin);
 
 	if ((f = fopen(name, "r")) != NULL)
 		return (f);
 
 	if (*name == '/') {
 		warn("can't open '%s'", name);
 		return (NULL);
 	}
 
 	for (i = 0; i < includes->sl_cur; i++) {
 		snprintf(path, sizeof(path), "%s/%s", includes->sl_str[i],
 		    name);
 		if ((f = fopen(path, "r")) != NULL)
 			return (f);
 	}
 
 	warn("can't open '%s'", name);
 
 	return (f);
 }
 
 static int
 soelim_file(FILE *f, int flag)
 {
 	char *line = NULL;
 	char *walk, *cp;
 	size_t linecap = 0;
 	ssize_t linelen;
 
 	if (f == NULL)
 		return (1);
 
 	while ((linelen = getline(&line, &linecap, f)) > 0) {
 		if (strncmp(line, ".so", 3) != 0) {
 			printf("%s", line);
 			continue;
 		}
 
 		walk = line + 3;
 		if (!isspace(*walk) && ((flag & C_OPTION) == 0)) {
 			printf("%s", line);
 			continue;
 		}
 
 		while (isspace(*walk))
 			walk++;
 
 		cp = walk;
 		while (*cp != '\0' && !isspace(*cp))
 			cp++;
 		*cp = 0;
 		if (cp < line + linelen)
 			cp++;
 
 		if (*walk == '\0') {
 			printf("%s", line);
 			continue;
 		}
 		if (soelim_file(soelim_fopen(walk), flag) == 1) {
 			free(line);
 			return (1);
 		}
 		if (*cp != '\0')
 			printf("%s", cp);
 	}
 
 	free(line);
 	fclose(f);
 
 	return (0);
 }
 
 int
 main(int argc, char **argv)
 {
 	int ch, i;
 	int ret = 0;
 	int flags = 0;
 
 	includes = sl_init();
 	if (includes == NULL)
 		err(EXIT_FAILURE, "sl_init()");
 
 	while ((ch = getopt(argc, argv, "CrtvI:")) != -1) {
 		switch (ch) {
 		case 'C':
 			flags |= C_OPTION;
 			break;
 		case 'r':
 		case 'v':
 		case 't':
 			/* stub compatibility with groff's soelim */
 			break;
 		case 'I':
 			sl_add(includes, optarg);
 			break;
 		default:
 			sl_free(includes, 0);
 			usage();
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc == 0)
 		ret = soelim_file(stdin, flags);
 
 	for (i = 0; i < argc; i++)
 		ret = soelim_file(soelim_fopen(argv[i]), flags);
 
 	sl_free(includes, 0);
 
 	return (ret);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/uniq/uniq.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/uniq/uniq.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.bin/uniq/uniq.c	(revision 303642)
@@ -1,357 +1,356 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Case Larsen.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1989, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)uniq.c	8.3 (Berkeley) 5/4/95";
 #endif
 static const char rcsid[] =
   "$FreeBSD$";
 #endif /* not lint */
 
 #include <sys/capsicum.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <limits.h>
 #include <locale.h>
 #include <nl_types.h>
 #include <stdint.h>
-#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <termios.h>
 #include <unistd.h>
 #include <wchar.h>
 #include <wctype.h>
 
 static int cflag, dflag, uflag, iflag;
 static int numchars, numfields, repeats;
 
 static FILE	*file(const char *, const char *);
 static wchar_t	*convert(const char *);
 static int	 inlcmp(const char *, const char *);
 static void	 show(FILE *, const char *);
 static wchar_t	*skip(wchar_t *);
 static void	 obsolete(char *[]);
 static void	 usage(void);
 
 static void
 strerror_init(void)
 {
 
 	/*
 	 * Cache NLS data before entering capability mode.
 	 * XXXPJD: There should be strerror_init() and strsignal_init() in libc.
 	 */
 	(void)catopen("libc", NL_CAT_LOCALE);
 }
 
 int
 main (int argc, char *argv[])
 {
 	wchar_t *tprev, *tthis;
 	FILE *ifp, *ofp;
 	int ch, comp;
 	size_t prevbuflen, thisbuflen, b1;
 	char *prevline, *thisline, *p;
 	const char *ifn;
 	cap_rights_t rights;
 
 	(void) setlocale(LC_ALL, "");
 
 	obsolete(argv);
 	while ((ch = getopt(argc, argv, "cdif:s:u")) != -1)
 		switch (ch) {
 		case 'c':
 			cflag = 1;
 			break;
 		case 'd':
 			dflag = 1;
 			break;
 		case 'i':
 			iflag = 1;
 			break;
 		case 'f':
 			numfields = strtol(optarg, &p, 10);
 			if (numfields < 0 || *p)
 				errx(1, "illegal field skip value: %s", optarg);
 			break;
 		case 's':
 			numchars = strtol(optarg, &p, 10);
 			if (numchars < 0 || *p)
 				errx(1, "illegal character skip value: %s", optarg);
 			break;
 		case 'u':
 			uflag = 1;
 			break;
 		case '?':
 		default:
 			usage();
 		}
 
 	argc -= optind;
 	argv += optind;
 
 	/* If no flags are set, default is -d -u. */
 	if (cflag) {
 		if (dflag || uflag)
 			usage();
 	} else if (!dflag && !uflag)
 		dflag = uflag = 1;
 
 	if (argc > 2)
 		usage();
 
 	ifp = stdin;
 	ifn = "stdin";
 	ofp = stdout;
 	if (argc > 0 && strcmp(argv[0], "-") != 0)
 		ifp = file(ifn = argv[0], "r");
 	cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
 	if (cap_rights_limit(fileno(ifp), &rights) < 0 && errno != ENOSYS)
 		err(1, "unable to limit rights for %s", ifn);
 	cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE);
 	if (argc > 1)
 		ofp = file(argv[1], "w");
 	else
 		cap_rights_set(&rights, CAP_IOCTL);
 	if (cap_rights_limit(fileno(ofp), &rights) < 0 && errno != ENOSYS) {
 		err(1, "unable to limit rights for %s",
 		    argc > 1 ? argv[1] : "stdout");
 	}
 	if (cap_rights_is_set(&rights, CAP_IOCTL)) {
 		unsigned long cmd;
 
 		cmd = TIOCGETA; /* required by isatty(3) in printf(3) */
 
 		if (cap_ioctls_limit(fileno(ofp), &cmd, 1) < 0 &&
 		    errno != ENOSYS) {
 			err(1, "unable to limit ioctls for %s",
 			    argc > 1 ? argv[1] : "stdout");
 		}
 	}
 
 	strerror_init();
 	if (cap_enter() < 0 && errno != ENOSYS)
 		err(1, "unable to enter capability mode");
 
 	prevbuflen = thisbuflen = 0;
 	prevline = thisline = NULL;
 
 	if (getline(&prevline, &prevbuflen, ifp) < 0) {
 		if (ferror(ifp))
 			err(1, "%s", ifn);
 		exit(0);
 	}
 	tprev = convert(prevline);
 
 	if (!cflag && uflag && dflag)
 		show(ofp, prevline);
 
 	tthis = NULL;
 	while (getline(&thisline, &thisbuflen, ifp) >= 0) {
 		if (tthis != NULL)
 			free(tthis);
 		tthis = convert(thisline);
 
 		if (tthis == NULL && tprev == NULL)
 			comp = inlcmp(thisline, prevline);
 		else if (tthis == NULL || tprev == NULL)
 			comp = 1;
 		else
 			comp = wcscoll(tthis, tprev);
 
 		if (comp) {
 			/* If different, print; set previous to new value. */
 			if (cflag || !dflag || !uflag)
 				show(ofp, prevline);
 			p = prevline;
 			b1 = prevbuflen;
 			prevline = thisline;
 			prevbuflen = thisbuflen;
 			if (tprev != NULL)
 				free(tprev);
 			tprev = tthis;
 			if (!cflag && uflag && dflag)
 				show(ofp, prevline);
 			thisline = p;
 			thisbuflen = b1;
 			tthis = NULL;
 			repeats = 0;
 		} else
 			++repeats;
 	}
 	if (ferror(ifp))
 		err(1, "%s", ifn);
 	if (cflag || !dflag || !uflag)
 		show(ofp, prevline);
 	exit(0);
 }
 
 static wchar_t *
 convert(const char *str)
 {
 	size_t n;
 	wchar_t *buf, *ret, *p;
 
 	if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
 		return (NULL);
 	if (SIZE_MAX / sizeof(*buf) < n + 1)
 		errx(1, "conversion buffer length overflow");
 	if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
 		err(1, "malloc");
 	if (mbstowcs(buf, str, n + 1) != n)
 		errx(1, "internal mbstowcs() error");
 	/* The last line may not end with \n. */
 	if (n > 0 && buf[n - 1] == L'\n')
 		buf[n - 1] = L'\0';
 
 	/* If requested get the chosen fields + character offsets. */
 	if (numfields || numchars) {
 		if ((ret = wcsdup(skip(buf))) == NULL)
 			err(1, "wcsdup");
 		free(buf);
 	} else
 		ret = buf;
 
 	if (iflag) {
 		for (p = ret; *p != L'\0'; p++)
 			*p = towlower(*p);
 	}
 
 	return (ret);
 }
 
 static int
 inlcmp(const char *s1, const char *s2)
 {
 	int c1, c2;
 
 	while (*s1 == *s2++)
 		if (*s1++ == '\0')
 			return (0);
 	c1 = (unsigned char)*s1;
 	c2 = (unsigned char)*(s2 - 1);
 	/* The last line may not end with \n. */
 	if (c1 == '\n')
 		c1 = '\0';
 	if (c2 == '\n')
 		c2 = '\0';
 	return (c1 - c2);
 }
 
 /*
  * show --
  *	Output a line depending on the flags and number of repetitions
  *	of the line.
  */
 static void
 show(FILE *ofp, const char *str)
 {
 
 	if (cflag)
 		(void)fprintf(ofp, "%4d %s", repeats + 1, str);
 	if ((dflag && repeats) || (uflag && !repeats))
 		(void)fprintf(ofp, "%s", str);
 }
 
 static wchar_t *
 skip(wchar_t *str)
 {
 	int nchars, nfields;
 
 	for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) {
 		while (iswblank(*str))
 			str++;
 		while (*str != L'\0' && !iswblank(*str))
 			str++;
 	}
 	for (nchars = numchars; nchars-- && *str != L'\0'; ++str)
 		;
 	return(str);
 }
 
 static FILE *
 file(const char *name, const char *mode)
 {
 	FILE *fp;
 
 	if ((fp = fopen(name, mode)) == NULL)
 		err(1, "%s", name);
 	return(fp);
 }
 
 static void
 obsolete(char *argv[])
 {
 	int len;
 	char *ap, *p, *start;
 
 	while ((ap = *++argv)) {
 		/* Return if "--" or not an option of any form. */
 		if (ap[0] != '-') {
 			if (ap[0] != '+')
 				return;
 		} else if (ap[1] == '-')
 			return;
 		if (!isdigit((unsigned char)ap[1]))
 			continue;
 		/*
 		 * Digit signifies an old-style option.  Malloc space for dash,
 		 * new option and argument.
 		 */
 		len = strlen(ap);
 		if ((start = p = malloc(len + 3)) == NULL)
 			err(1, "malloc");
 		*p++ = '-';
 		*p++ = ap[0] == '+' ? 's' : 'f';
 		(void)strcpy(p, ap + 1);
 		*argv = start;
 	}
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr,
 "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n");
 	exit(1);
 }
Index: user/alc/PQ_LAUNDRY/usr.sbin/autofs/common.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.sbin/autofs/common.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.sbin/autofs/common.c	(revision 303642)
@@ -1,1224 +1,1223 @@
 /*-
  * Copyright (c) 2014 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/ioctl.h>
 #include <sys/param.h>
 #include <sys/linker.h>
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <sys/utsname.h>
 #include <assert.h>
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <libutil.h>
 #include <netdb.h>
 #include <paths.h>
 #include <signal.h>
 #include <stdbool.h>
 #include <stdint.h>
-#define	_WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "autofs_ioctl.h"
 
 #include "common.h"
 
 extern FILE *yyin;
 extern char *yytext;
 extern int yylex(void);
 
 static void	parse_master_yyin(struct node *root, const char *master);
 static void	parse_map_yyin(struct node *parent, const char *map,
 		    const char *executable_key);
 
 char *
 checked_strdup(const char *s)
 {
 	char *c;
 
 	assert(s != NULL);
 
 	c = strdup(s);
 	if (c == NULL)
 		log_err(1, "strdup");
 	return (c);
 }
 
 /*
  * Concatenate two strings, inserting separator between them, unless not needed.
  */
 char *
 concat(const char *s1, char separator, const char *s2)
 {
 	char *result;
 	char s1last, s2first;
 	int ret;
 
 	if (s1 == NULL)
 		s1 = "";
 	if (s2 == NULL)
 		s2 = "";
 
 	if (s1[0] == '\0')
 		s1last = '\0';
 	else
 		s1last = s1[strlen(s1) - 1];
 
 	s2first = s2[0];
 
 	if (s1last == separator && s2first == separator) {
 		/*
 		 * If s1 ends with the separator and s2 begins with
 		 * it - skip the latter; otherwise concatenating "/"
 		 * and "/foo" would end up returning "//foo".
 		 */
 		ret = asprintf(&result, "%s%s", s1, s2 + 1);
 	} else if (s1last == separator || s2first == separator ||
 	    s1[0] == '\0' || s2[0] == '\0') {
 		ret = asprintf(&result, "%s%s", s1, s2);
 	} else {
 		ret = asprintf(&result, "%s%c%s", s1, separator, s2);
 	}
 	if (ret < 0)
 		log_err(1, "asprintf");
 
 	//log_debugx("%s: got %s and %s, returning %s", __func__, s1, s2, result);
 
 	return (result);
 }
 
 void
 create_directory(const char *path)
 {
 	char *component, *copy, *tofree, *partial, *tmp;
 	int error;
 
 	assert(path[0] == '/');
 
 	/*
 	 * +1 to skip the leading slash.
 	 */
 	copy = tofree = checked_strdup(path + 1);
 
 	partial = checked_strdup("");
 	for (;;) {
 		component = strsep(&copy, "/");
 		if (component == NULL)
 			break;
 		tmp = concat(partial, '/', component);
 		free(partial);
 		partial = tmp;
 		//log_debugx("creating \"%s\"", partial);
 		error = mkdir(partial, 0755);
 		if (error != 0 && errno != EEXIST) {
 			log_warn("cannot create %s", partial);
 			return;
 		}
 	}
 
 	free(tofree);
 }
 
 struct node *
 node_new_root(void)
 {
 	struct node *n;
 
 	n = calloc(1, sizeof(*n));
 	if (n == NULL)
 		log_err(1, "calloc");
 	// XXX
 	n->n_key = checked_strdup("/");
 	n->n_options = checked_strdup("");
 
 	TAILQ_INIT(&n->n_children);
 
 	return (n);
 }
 
 struct node *
 node_new(struct node *parent, char *key, char *options, char *location,
     const char *config_file, int config_line)
 {
 	struct node *n;
 
 	n = calloc(1, sizeof(*n));
 	if (n == NULL)
 		log_err(1, "calloc");
 
 	TAILQ_INIT(&n->n_children);
 	assert(key != NULL);
 	assert(key[0] != '\0');
 	n->n_key = key;
 	if (options != NULL)
 		n->n_options = options;
 	else
 		n->n_options = strdup("");
 	n->n_location = location;
 	assert(config_file != NULL);
 	n->n_config_file = config_file;
 	assert(config_line >= 0);
 	n->n_config_line = config_line;
 
 	assert(parent != NULL);
 	n->n_parent = parent;
 	TAILQ_INSERT_TAIL(&parent->n_children, n, n_next);
 
 	return (n);
 }
 
 struct node *
 node_new_map(struct node *parent, char *key, char *options, char *map,
     const char *config_file, int config_line)
 {
 	struct node *n;
 
 	n = calloc(1, sizeof(*n));
 	if (n == NULL)
 		log_err(1, "calloc");
 
 	TAILQ_INIT(&n->n_children);
 	assert(key != NULL);
 	assert(key[0] != '\0');
 	n->n_key = key;
 	if (options != NULL)
 		n->n_options = options;
 	else
 		n->n_options = strdup("");
 	n->n_map = map;
 	assert(config_file != NULL);
 	n->n_config_file = config_file;
 	assert(config_line >= 0);
 	n->n_config_line = config_line;
 
 	assert(parent != NULL);
 	n->n_parent = parent;
 	TAILQ_INSERT_TAIL(&parent->n_children, n, n_next);
 
 	return (n);
 }
 
 static struct node *
 node_duplicate(const struct node *o, struct node *parent)
 {
 	const struct node *child;
 	struct node *n;
 
 	if (parent == NULL)
 		parent = o->n_parent;
 
 	n = node_new(parent, o->n_key, o->n_options, o->n_location,
 	    o->n_config_file, o->n_config_line);
 
 	TAILQ_FOREACH(child, &o->n_children, n_next)
 		node_duplicate(child, n);
 
 	return (n);
 }
 
 static void
 node_delete(struct node *n)
 {
 	struct node *child, *tmp;
 
 	assert (n != NULL);
 
 	TAILQ_FOREACH_SAFE(child, &n->n_children, n_next, tmp)
 		node_delete(child);
 
 	if (n->n_parent != NULL)
 		TAILQ_REMOVE(&n->n_parent->n_children, n, n_next);
 
 	free(n);
 }
 
 /*
  * Move (reparent) node 'n' to make it sibling of 'previous', placed
  * just after it.
  */
 static void
 node_move_after(struct node *n, struct node *previous)
 {
 
 	TAILQ_REMOVE(&n->n_parent->n_children, n, n_next);
 	n->n_parent = previous->n_parent;
 	TAILQ_INSERT_AFTER(&previous->n_parent->n_children, previous, n, n_next);
 }
 
 static void
 node_expand_includes(struct node *root, bool is_master)
 {
 	struct node *n, *n2, *tmp, *tmp2, *tmproot;
 	int error;
 
 	TAILQ_FOREACH_SAFE(n, &root->n_children, n_next, tmp) {
 		if (n->n_key[0] != '+')
 			continue;
 
 		error = access(AUTO_INCLUDE_PATH, F_OK);
 		if (error != 0) {
 			log_errx(1, "directory services not configured; "
 			    "%s does not exist", AUTO_INCLUDE_PATH);
 		}
 
 		/*
 		 * "+1" to skip leading "+".
 		 */
 		yyin = auto_popen(AUTO_INCLUDE_PATH, n->n_key + 1, NULL);
 		assert(yyin != NULL);
 
 		tmproot = node_new_root();
 		if (is_master)
 			parse_master_yyin(tmproot, n->n_key);
 		else
 			parse_map_yyin(tmproot, n->n_key, NULL);
 
 		error = auto_pclose(yyin);
 		yyin = NULL;
 		if (error != 0) {
 			log_errx(1, "failed to handle include \"%s\"",
 			    n->n_key);
 		}
 
 		/*
 		 * Entries to be included are now in tmproot.  We need to merge
 		 * them with the rest, preserving their place and ordering.
 		 */
 		TAILQ_FOREACH_REVERSE_SAFE(n2,
 		    &tmproot->n_children, nodehead, n_next, tmp2) {
 			node_move_after(n2, n);
 		}
 
 		node_delete(n);
 		node_delete(tmproot);
 	}
 }
 
 static char *
 expand_ampersand(char *string, const char *key)
 {
 	char c, *expanded;
 	int i, ret, before_len = 0;
 	bool backslashed = false;
 
 	assert(key[0] != '\0');
 
 	expanded = checked_strdup(string);
 
 	for (i = 0; string[i] != '\0'; i++) {
 		c = string[i];
 		if (c == '\\' && backslashed == false) {
 			backslashed = true;
 			continue;
 		}
 		if (backslashed) {
 			backslashed = false;
 			continue;
 		}
 		backslashed = false;
 		if (c != '&')
 			continue;
 
 		/*
 		 * The 'before_len' variable contains the number
 		 * of characters before the '&'.
 		 */
 		before_len = i;
 		//assert(i + 1 < (int)strlen(string));
 
 		ret = asprintf(&expanded, "%.*s%s%s",
 		    before_len, string, key, string + before_len + 1);
 		if (ret < 0)
 			log_err(1, "asprintf");
 
 		//log_debugx("\"%s\" expanded with key \"%s\" to \"%s\"",
 		//    string, key, expanded);
 
 		/*
 		 * Figure out where to start searching for next variable.
 		 */
 		string = expanded;
 		i = before_len + strlen(key);
 		backslashed = false;
 		//assert(i < (int)strlen(string));
 	}
 
 	return (expanded);
 }
 
 /*
  * Expand "&" in n_location.  If the key is NULL, try to use
  * key from map entries themselves.  Keep in mind that maps
  * consist of tho levels of node structures, the key is one
  * level up.
  *
  * Variant with NULL key is for "automount -LL".
  */
 void
 node_expand_ampersand(struct node *n, const char *key)
 {
 	struct node *child;
 
 	if (n->n_location != NULL) {
 		if (key == NULL) {
 			if (n->n_parent != NULL &&
 			    strcmp(n->n_parent->n_key, "*") != 0) {
 				n->n_location = expand_ampersand(n->n_location,
 				    n->n_parent->n_key);
 			}
 		} else {
 			n->n_location = expand_ampersand(n->n_location, key);
 		}
 	}
 
 	TAILQ_FOREACH(child, &n->n_children, n_next)
 		node_expand_ampersand(child, key);
 }
 
 /*
  * Expand "*" in n_key.
  */
 void
 node_expand_wildcard(struct node *n, const char *key)
 {
 	struct node *child, *expanded;
 
 	assert(key != NULL);
 
 	if (strcmp(n->n_key, "*") == 0) {
 		expanded = node_duplicate(n, NULL);
 		expanded->n_key = checked_strdup(key);
 		node_move_after(expanded, n);
 	}
 
 	TAILQ_FOREACH(child, &n->n_children, n_next)
 		node_expand_wildcard(child, key);
 }
 
 int
 node_expand_defined(struct node *n)
 {
 	struct node *child;
 	int error, cumulated_error = 0;
 
 	if (n->n_location != NULL) {
 		n->n_location = defined_expand(n->n_location);
 		if (n->n_location == NULL) {
 			log_warnx("failed to expand location for %s",
 			    node_path(n));
 			return (EINVAL);
 		}
 	}
 
 	TAILQ_FOREACH(child, &n->n_children, n_next) {
 		error = node_expand_defined(child);
 		if (error != 0 && cumulated_error == 0)
 			cumulated_error = error;
 	}
 
 	return (cumulated_error);
 }
 
 static bool
 node_is_direct_key(const struct node *n)
 {
 
 	if (n->n_parent != NULL && n->n_parent->n_parent == NULL &&
 	    strcmp(n->n_key, "/-") == 0) {
 		return (true);
 	}
 
 	return (false);
 }
 
 bool
 node_is_direct_map(const struct node *n)
 {
 
 	for (;;) {
 		assert(n->n_parent != NULL);
 		if (n->n_parent->n_parent == NULL)
 			break;
 		n = n->n_parent;
 	}
 
 	return (node_is_direct_key(n));
 }
 
 bool
 node_has_wildcards(const struct node *n)
 {
 	const struct node *child;
 
 	TAILQ_FOREACH(child, &n->n_children, n_next) {
 		if (strcmp(child->n_key, "*") == 0)
 			return (true);
 	}
 
 	return (false);
 }
 
 static void
 node_expand_maps(struct node *n, bool indirect)
 {
 	struct node *child, *tmp;
 
 	TAILQ_FOREACH_SAFE(child, &n->n_children, n_next, tmp) {
 		if (node_is_direct_map(child)) {
 			if (indirect)
 				continue;
 		} else {
 			if (indirect == false)
 				continue;
 		}
 
 		/*
 		 * This is the first-level map node; the one that contains
 		 * the key and subnodes with mountpoints and actual map names.
 		 */
 		if (child->n_map == NULL)
 			continue;
 
 		if (indirect) {
 			log_debugx("map \"%s\" is an indirect map, parsing",
 			    child->n_map);
 		} else {
 			log_debugx("map \"%s\" is a direct map, parsing",
 			    child->n_map);
 		}
 		parse_map(child, child->n_map, NULL, NULL);
 	}
 }
 
 static void
 node_expand_direct_maps(struct node *n)
 {
 
 	node_expand_maps(n, false);
 }
 
 void
 node_expand_indirect_maps(struct node *n)
 {
 
 	node_expand_maps(n, true);
 }
 
 static char *
 node_path_x(const struct node *n, char *x)
 {
 	char *path;
 
 	if (n->n_parent == NULL)
 		return (x);
 
 	/*
 	 * Return "/-" for direct maps only if we were asked for path
 	 * to the "/-" node itself, not to any of its subnodes.
 	 */
 	if (node_is_direct_key(n) && x[0] != '\0')
 		return (x);
 
 	assert(n->n_key[0] != '\0');
 	path = concat(n->n_key, '/', x);
 	free(x);
 
 	return (node_path_x(n->n_parent, path));
 }
 
 /*
  * Return full path for node, consisting of concatenated
  * paths of node itself and all its parents, up to the root.
  */
 char *
 node_path(const struct node *n)
 {
 	char *path;
 	size_t len;
 
 	path = node_path_x(n, checked_strdup(""));
 
 	/*
 	 * Strip trailing slash, unless the whole path is "/".
 	 */
 	len = strlen(path);
 	if (len > 1 && path[len - 1] == '/')
 		path[len - 1] = '\0';
 
 	return (path);
 }
 
 static char *
 node_options_x(const struct node *n, char *x)
 {
 	char *options;
 
 	if (n == NULL)
 		return (x);
 
 	options = concat(x, ',', n->n_options);
 	free(x);
 
 	return (node_options_x(n->n_parent, options));
 }
 
 /*
  * Return options for node, consisting of concatenated
  * options from the node itself and all its parents,
  * up to the root.
  */
 char *
 node_options(const struct node *n)
 {
 
 	return (node_options_x(n, checked_strdup("")));
 }
 
 static void
 node_print_indent(const struct node *n, const char *cmdline_options,
     int indent)
 {
 	const struct node *child, *first_child;
 	char *path, *options, *tmp;
 
 	path = node_path(n);
 	tmp = node_options(n);
 	options = concat(cmdline_options, ',', tmp);
 	free(tmp);
 
 	/*
 	 * Do not show both parent and child node if they have the same
 	 * mountpoint; only show the child node.  This means the typical,
 	 * "key location", map entries are shown in a single line;
 	 * the "key mountpoint1 location2 mountpoint2 location2" entries
 	 * take multiple lines.
 	 */
 	first_child = TAILQ_FIRST(&n->n_children);
 	if (first_child == NULL || TAILQ_NEXT(first_child, n_next) != NULL ||
 	    strcmp(path, node_path(first_child)) != 0) {
 		assert(n->n_location == NULL || n->n_map == NULL);
 		printf("%*.s%-*s %s%-*s %-*s # %s map %s at %s:%d\n",
 		    indent, "",
 		    25 - indent,
 		    path,
 		    options[0] != '\0' ? "-" : " ",
 		    20,
 		    options[0] != '\0' ? options : "",
 		    20,
 		    n->n_location != NULL ? n->n_location : n->n_map != NULL ? n->n_map : "",
 		    node_is_direct_map(n) ? "direct" : "indirect",
 		    indent == 0 ? "referenced" : "defined",
 		    n->n_config_file, n->n_config_line);
 	}
 
 	free(path);
 	free(options);
 
 	TAILQ_FOREACH(child, &n->n_children, n_next)
 		node_print_indent(child, cmdline_options, indent + 2);
 }
 
 /*
  * Recursively print node with all its children.  The cmdline_options
  * argument is used for additional options to be prepended to all the
  * others - usually those are the options passed by command line.
  */
 void
 node_print(const struct node *n, const char *cmdline_options)
 {
 	const struct node *child;
 
 	TAILQ_FOREACH(child, &n->n_children, n_next)
 		node_print_indent(child, cmdline_options, 0);
 }
 
 static struct node *
 node_find_x(struct node *node, const char *path)
 {
 	struct node *child, *found;
 	char *tmp;
 	size_t tmplen;
 
 	//log_debugx("looking up %s in %s", path, node_path(node));
 
 	if (!node_is_direct_key(node)) {
 		tmp = node_path(node);
 		tmplen = strlen(tmp);
 		if (strncmp(tmp, path, tmplen) != 0) {
 			free(tmp);
 			return (NULL);
 		}
 		if (path[tmplen] != '/' && path[tmplen] != '\0') {
 			/*
 			 * If we have two map entries like 'foo' and 'foobar', make
 			 * sure the search for 'foobar' won't match 'foo' instead.
 			 */
 			free(tmp);
 			return (NULL);
 		}
 		free(tmp);
 	}
 
 	TAILQ_FOREACH(child, &node->n_children, n_next) {
 		found = node_find_x(child, path);
 		if (found != NULL)
 			return (found);
 	}
 
 	if (node->n_parent == NULL || node_is_direct_key(node))
 		return (NULL);
 
 	return (node);
 }
 
 struct node *
 node_find(struct node *root, const char *path)
 {
 	struct node *node;
 
 	assert(root->n_parent == NULL);
 
 	node = node_find_x(root, path);
 	if (node != NULL)
 		assert(node != root);
 
 	return (node);
 }
 
 /*
  * Canonical form of a map entry looks like this:
  *
  * key [-options] [ [/mountpoint] [-options2] location ... ]
  *
  * Entries for executable maps are slightly different, as they
  * lack the 'key' field and are always single-line; the key field
  * for those maps is taken from 'executable_key' argument.
  *
  * We parse it in such a way that a map always has two levels - first
  * for key, and the second, for the mountpoint.
  */
 static void
 parse_map_yyin(struct node *parent, const char *map, const char *executable_key)
 {
 	char *key = NULL, *options = NULL, *mountpoint = NULL,
 	    *options2 = NULL, *location = NULL;
 	int ret;
 	struct node *node;
 
 	lineno = 1;
 
 	if (executable_key != NULL)
 		key = checked_strdup(executable_key);
 
 	for (;;) {
 		ret = yylex();
 		if (ret == 0 || ret == NEWLINE) {
 			/*
 			 * In case of executable map, the key is always
 			 * non-NULL, even if the map is empty.  So, make sure
 			 * we don't fail empty maps here.
 			 */
 			if ((key != NULL && executable_key == NULL) ||
 			    options != NULL) {
 				log_errx(1, "truncated entry at %s, line %d",
 				    map, lineno);
 			}
 			if (ret == 0 || executable_key != NULL) {
 				/*
 				 * End of file.
 				 */
 				break;
 			} else {
 				key = options = NULL;
 				continue;
 			}
 		}
 		if (key == NULL) {
 			key = checked_strdup(yytext);
 			if (key[0] == '+') {
 				node_new(parent, key, NULL, NULL, map, lineno);
 				key = options = NULL;
 				continue;
 			}
 			continue;
 		} else if (yytext[0] == '-') {
 			if (options != NULL) {
 				log_errx(1, "duplicated options at %s, line %d",
 				    map, lineno);
 			}
 			/*
 			 * +1 to skip leading "-".
 			 */
 			options = checked_strdup(yytext + 1);
 			continue;
 		}
 
 		/*
 		 * We cannot properly handle a situation where the map key
 		 * is "/".  Ignore such entries.
 		 *
 		 * XXX: According to Piete Brooks, Linux automounter uses
 		 *	"/" as a wildcard character in LDAP maps.  Perhaps
 		 *	we should work around this braindamage by substituting
 		 *	"*" for "/"?
 		 */
 		if (strcmp(key, "/") == 0) {
 			log_warnx("nonsensical map key \"/\" at %s, line %d; "
 			    "ignoring map entry ", map, lineno);
 
 			/*
 			 * Skip the rest of the entry.
 			 */
 			do {
 				ret = yylex();
 			} while (ret != 0 && ret != NEWLINE);
 
 			key = options = NULL;
 			continue;
 		}
 
 		//log_debugx("adding map node, %s", key);
 		node = node_new(parent, key, options, NULL, map, lineno);
 		key = options = NULL;
 
 		for (;;) {
 			if (yytext[0] == '/') {
 				if (mountpoint != NULL) {
 					log_errx(1, "duplicated mountpoint "
 					    "in %s, line %d", map, lineno);
 				}
 				if (options2 != NULL || location != NULL) {
 					log_errx(1, "mountpoint out of order "
 					    "in %s, line %d", map, lineno);
 				}
 				mountpoint = checked_strdup(yytext);
 				goto again;
 			}
 
 			if (yytext[0] == '-') {
 				if (options2 != NULL) {
 					log_errx(1, "duplicated options "
 					    "in %s, line %d", map, lineno);
 				}
 				if (location != NULL) {
 					log_errx(1, "options out of order "
 					    "in %s, line %d", map, lineno);
 				}
 				options2 = checked_strdup(yytext + 1);
 				goto again;
 			}
 
 			if (location != NULL) {
 				log_errx(1, "too many arguments "
 				    "in %s, line %d", map, lineno);
 			}
 
 			/*
 			 * If location field starts with colon, e.g. ":/dev/cd0",
 			 * then strip it.
 			 */
 			if (yytext[0] == ':') {
 				location = checked_strdup(yytext + 1);
 				if (location[0] == '\0') {
 					log_errx(1, "empty location in %s, "
 					    "line %d", map, lineno);
 				}
 			} else {
 				location = checked_strdup(yytext);
 			}
 
 			if (mountpoint == NULL)
 				mountpoint = checked_strdup("/");
 			if (options2 == NULL)
 				options2 = checked_strdup("");
 
 #if 0
 			log_debugx("adding map node, %s %s %s",
 			    mountpoint, options2, location);
 #endif
 			node_new(node, mountpoint, options2, location,
 			    map, lineno);
 			mountpoint = options2 = location = NULL;
 again:
 			ret = yylex();
 			if (ret == 0 || ret == NEWLINE) {
 				if (mountpoint != NULL || options2 != NULL ||
 				    location != NULL) {
 					log_errx(1, "truncated entry "
 					    "in %s, line %d", map, lineno);
 				}
 				break;
 			}
 		}
 	}
 }
 
 /*
  * Parse output of a special map called without argument.  It is a list
  * of keys, separated by newlines.  They can contain whitespace, so use
  * getline(3) instead of lexer used for maps.
  */
 static void
 parse_map_keys_yyin(struct node *parent, const char *map)
 {
 	char *line = NULL, *key;
 	size_t linecap = 0;
 	ssize_t linelen;
 
 	lineno = 1;
 
 	for (;;) {
 		linelen = getline(&line, &linecap, yyin);
 		if (linelen < 0) {
 			/*
 			 * End of file.
 			 */
 			break;
 		}
 		if (linelen <= 1) {
 			/*
 			 * Empty line, consisting of just the newline.
 			 */
 			continue;
 		}
 
 		/*
 		 * "-1" to strip the trailing newline.
 		 */
 		key = strndup(line, linelen - 1);
 
 		log_debugx("adding key \"%s\"", key);
 		node_new(parent, key, NULL, NULL, map, lineno);
 		lineno++;
 	}
 	free(line);
 }
 
 static bool
 file_is_executable(const char *path)
 {
 	struct stat sb;
 	int error;
 
 	error = stat(path, &sb);
 	if (error != 0)
 		log_err(1, "cannot stat %s", path);
 	if ((sb.st_mode & S_IXUSR) || (sb.st_mode & S_IXGRP) ||
 	    (sb.st_mode & S_IXOTH))
 		return (true);
 	return (false);
 }
 
 /*
  * Parse a special map, e.g. "-hosts".
  */
 static void
 parse_special_map(struct node *parent, const char *map, const char *key)
 {
 	char *path;
 	int error, ret;
 
 	assert(map[0] == '-');
 
 	/*
 	 * +1 to skip leading "-" in map name.
 	 */
 	ret = asprintf(&path, "%s/special_%s", AUTO_SPECIAL_PREFIX, map + 1);
 	if (ret < 0)
 		log_err(1, "asprintf");
 
 	yyin = auto_popen(path, key, NULL);
 	assert(yyin != NULL);
 
 	if (key == NULL) {
 		parse_map_keys_yyin(parent, map);
 	} else {
 		parse_map_yyin(parent, map, key);
 	}
 
 	error = auto_pclose(yyin);
 	yyin = NULL;
 	if (error != 0)
 		log_errx(1, "failed to handle special map \"%s\"", map);
 
 	node_expand_includes(parent, false);
 	node_expand_direct_maps(parent);
 
 	free(path);
 }
 
 /*
  * Retrieve and parse map from directory services, e.g. LDAP.
  * Note that it is different from executable maps, in that
  * the include script outputs the whole map to standard output
  * (as opposed to executable maps that only output a single
  * entry, without the key), and it takes the map name as an
  * argument, instead of key.
  */
 static void
 parse_included_map(struct node *parent, const char *map)
 {
 	int error;
 
 	assert(map[0] != '-');
 	assert(map[0] != '/');
 
 	error = access(AUTO_INCLUDE_PATH, F_OK);
 	if (error != 0) {
 		log_errx(1, "directory services not configured;"
 		    " %s does not exist", AUTO_INCLUDE_PATH);
 	}
 
 	yyin = auto_popen(AUTO_INCLUDE_PATH, map, NULL);
 	assert(yyin != NULL);
 
 	parse_map_yyin(parent, map, NULL);
 
 	error = auto_pclose(yyin);
 	yyin = NULL;
 	if (error != 0)
 		log_errx(1, "failed to handle remote map \"%s\"", map);
 
 	node_expand_includes(parent, false);
 	node_expand_direct_maps(parent);
 }
 
 void
 parse_map(struct node *parent, const char *map, const char *key,
     bool *wildcards)
 {
 	char *path = NULL;
 	int error, ret;
 	bool executable;
 
 	assert(map != NULL);
 	assert(map[0] != '\0');
 
 	log_debugx("parsing map \"%s\"", map);
 
 	if (wildcards != NULL)
 		*wildcards = false;
 
 	if (map[0] == '-') {
 		if (wildcards != NULL)
 			*wildcards = true;
 		return (parse_special_map(parent, map, key));
 	}
 
 	if (map[0] == '/') {
 		path = checked_strdup(map);
 	} else {
 		ret = asprintf(&path, "%s/%s", AUTO_MAP_PREFIX, map);
 		if (ret < 0)
 			log_err(1, "asprintf");
 		log_debugx("map \"%s\" maps to \"%s\"", map, path);
 
 		/*
 		 * See if the file exists.  If not, try to obtain the map
 		 * from directory services.
 		 */
 		error = access(path, F_OK);
 		if (error != 0) {
 			log_debugx("map file \"%s\" does not exist; falling "
 			    "back to directory services", path);
 			return (parse_included_map(parent, map));
 		}
 	}
 
 	executable = file_is_executable(path);
 
 	if (executable) {
 		log_debugx("map \"%s\" is executable", map);
 
 		if (wildcards != NULL)
 			*wildcards = true;
 
 		if (key != NULL) {
 			yyin = auto_popen(path, key, NULL);
 		} else {
 			yyin = auto_popen(path, NULL);
 		}
 		assert(yyin != NULL);
 	} else {
 		yyin = fopen(path, "r");
 		if (yyin == NULL)
 			log_err(1, "unable to open \"%s\"", path);
 	}
 
 	free(path);
 	path = NULL;
 
 	parse_map_yyin(parent, map, executable ? key : NULL);
 
 	if (executable) {
 		error = auto_pclose(yyin);
 		yyin = NULL;
 		if (error != 0) {
 			log_errx(1, "failed to handle executable map \"%s\"",
 			    map);
 		}
 	} else {
 		fclose(yyin);
 	}
 	yyin = NULL;
 
 	log_debugx("done parsing map \"%s\"", map);
 
 	node_expand_includes(parent, false);
 	node_expand_direct_maps(parent);
 }
 
 static void
 parse_master_yyin(struct node *root, const char *master)
 {
 	char *mountpoint = NULL, *map = NULL, *options = NULL;
 	int ret;
 
 	/*
 	 * XXX: 1 gives incorrect values; wtf?
 	 */
 	lineno = 0;
 
 	for (;;) {
 		ret = yylex();
 		if (ret == 0 || ret == NEWLINE) {
 			if (mountpoint != NULL) {
 				//log_debugx("adding map for %s", mountpoint);
 				node_new_map(root, mountpoint, options, map,
 				    master, lineno);
 			}
 			if (ret == 0) {
 				break;
 			} else {
 				mountpoint = map = options = NULL;
 				continue;
 			}
 		}
 		if (mountpoint == NULL) {
 			mountpoint = checked_strdup(yytext);
 		} else if (map == NULL) {
 			map = checked_strdup(yytext);
 		} else if (options == NULL) {
 			/*
 			 * +1 to skip leading "-".
 			 */
 			options = checked_strdup(yytext + 1);
 		} else {
 			log_errx(1, "too many arguments at %s, line %d",
 			    master, lineno);
 		}
 	}
 }
 
 void
 parse_master(struct node *root, const char *master)
 {
 
 	log_debugx("parsing auto_master file at \"%s\"", master);
 
 	yyin = fopen(master, "r");
 	if (yyin == NULL)
 		err(1, "unable to open %s", master);
 
 	parse_master_yyin(root, master);
 
 	fclose(yyin);
 	yyin = NULL;
 
 	log_debugx("done parsing \"%s\"", master);
 
 	node_expand_includes(root, true);
 	node_expand_direct_maps(root);
 }
 
 /*
  * Two things daemon(3) does, that we actually also want to do
  * when running in foreground, is closing the stdin and chdiring
  * to "/".  This is what we do here.
  */
 void
 lesser_daemon(void)
 {
 	int error, fd;
 
 	error = chdir("/");
 	if (error != 0)
 		log_warn("chdir");
 
 	fd = open(_PATH_DEVNULL, O_RDWR, 0);
 	if (fd < 0) {
 		log_warn("cannot open %s", _PATH_DEVNULL);
 		return;
 	}
 
 	error = dup2(fd, STDIN_FILENO);
 	if (error != 0)
 		log_warn("dup2");
 
 	error = close(fd);
 	if (error != 0) {
 		/* Bloody hell. */
 		log_warn("close");
 	}
 }
 
 int
 main(int argc, char **argv)
 {
 	char *cmdname;
 
 	if (argv[0] == NULL)
 		log_errx(1, "NULL command name");
 
 	cmdname = basename(argv[0]);
 
 	if (strcmp(cmdname, "automount") == 0)
 		return (main_automount(argc, argv));
 	else if (strcmp(cmdname, "automountd") == 0)
 		return (main_automountd(argc, argv));
 	else if (strcmp(cmdname, "autounmountd") == 0)
 		return (main_autounmountd(argc, argv));
 	else
 		log_errx(1, "binary name should be either \"automount\", "
 		    "\"automountd\", or \"autounmountd\"");
 }
Index: user/alc/PQ_LAUNDRY/usr.sbin/bsdinstall/scripts/hardening
===================================================================
--- user/alc/PQ_LAUNDRY/usr.sbin/bsdinstall/scripts/hardening	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.sbin/bsdinstall/scripts/hardening	(revision 303642)
@@ -1,79 +1,79 @@
 #!/bin/sh
 #-
 # Copyright (c) 2016 Bartek Rutkowski
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # $FreeBSD$
 
 : ${DIALOG_OK=0}
 
-echo -n > $BSDINSTALL_TMPETC/rc.conf.services
+echo -n > $BSDINSTALL_TMPETC/rc.conf.hardening
 
 exec 3>&1
 FEATURES=$( dialog --backtitle "FreeBSD Installer" \
     --title "System Hardening" --nocancel --notags --separate-output \
     --checklist "Choose system security hardening options:" \
     0 0 0 \
 	"hide_uids" "Hide processes running as other users" ${hide_uids:-off} \
 	"hide_gids" "Hide processes running as other groups" ${hide_gids:-off} \
 	"read_msgbuf" "Disable reading kernel message buffer for unprivileged users" ${read_msgbuf:-off} \
 	"proc_debug" "Disable process debugging facilities for unprivileged users" ${proc_debug:-off} \
 	"random_pid" "Randomize the PID of newly created processes" ${random_id:-off} \
 	"stack_guard" "Insert stack guard page ahead of the growable segments" ${stack_guard:-off} \
 	"clear_tmp" "Clean the /tmp filesystem on system startup" ${clear_tmp:-off} \
 	"disable_syslogd" "Disable opening Syslogd network socket (disables remote logging)" ${disable_syslogd:-off} \
 	"disable_sendmail" "Disable Sendmail service" ${disable_sendmail:-off} \
 2>&1 1>&3 )
 exec 3>&-
 
 for feature in $FEATURES; do
 	if [ "$feature" = "hide_uids" ]; then
 		echo security.bsd.see_other_uids=0 >> $BSDINSTALL_TMPETC/sysctl.conf.hardening
 	fi
 	if [ "$feature" = "hide_gids" ]; then
 		echo security.bsd.see_other_gids=0 >> $BSDINSTALL_TMPETC/sysctl.conf.hardening
 	fi
 	if [ "$feature" = "read_msgbuf" ]; then
 		echo security.bsd.unprivileged_read_msgbuf=0 >> $BSDINSTALL_TMPETC/sysctl.conf.hardening
 	fi
 	if [ "$feature" = "proc_debug" ]; then
 		echo security.bsd.unprivileged_proc_debug=0 >> $BSDINSTALL_TMPETC/sysctl.conf.hardening
 	fi
 	if [ "$feature" = "random_id" ]; then
 		echo kern.randompid=$(jot -r 1 9999) >> $BSDINSTALL_TMPETC/sysctl.conf.hardening
 	fi
 	if [ "$feature" = "stack_guard" ]; then
 		echo security.bsd.stack_guard_page=1 >> $BSDINSTALL_TMPETC/sysctl.conf.hardening
 	fi
 	if [ "$feature" = "clear_tmp" ]; then
 		echo 'clear_tmp_enable="YES"' >> $BSDINSTALL_TMPETC/rc.conf.hardening
 	fi
 	if [ "$feature" = "disable_syslogd" ]; then
 		echo 'syslogd_flags="-ss"' >> $BSDINSTALL_TMPETC/rc.conf.hardening
 	fi
 	if [ "$feature" = "disable_sendmail" ]; then
 		echo 'sendmail_enable="NONE"' >> $BSDINSTALL_TMPETC/rc.conf.hardening
 	fi
 done
 
Index: user/alc/PQ_LAUNDRY/usr.sbin/pkg/pkg.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.sbin/pkg/pkg.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.sbin/pkg/pkg.c	(revision 303642)
@@ -1,1109 +1,1108 @@
 /*-
  * Copyright (c) 2012-2014 Baptiste Daroussin <bapt@FreeBSD.org>
  * Copyright (c) 2013 Bryan Drewery <bdrewery@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/types.h>
 #include <sys/sbuf.h>
 #include <sys/wait.h>
 
-#define _WITH_GETLINE
 #include <archive.h>
 #include <archive_entry.h>
 #include <dirent.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <fetch.h>
 #include <paths.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 #include <ucl.h>
 
 #include <openssl/err.h>
 #include <openssl/ssl.h>
 
 #include "dns_utils.h"
 #include "config.h"
 
 struct sig_cert {
 	char *name;
 	unsigned char *sig;
 	int siglen;
 	unsigned char *cert;
 	int certlen;
 	bool trusted;
 };
 
 struct pubkey {
 	unsigned char *sig;
 	int siglen;
 };
 
 typedef enum {
 	HASH_UNKNOWN,
 	HASH_SHA256,
 } hash_t;
 
 struct fingerprint {
 	hash_t type;
 	char *name;
 	char hash[BUFSIZ];
 	STAILQ_ENTRY(fingerprint) next;
 };
 
 STAILQ_HEAD(fingerprint_list, fingerprint);
 
 static int
 extract_pkg_static(int fd, char *p, int sz)
 {
 	struct archive *a;
 	struct archive_entry *ae;
 	char *end;
 	int ret, r;
 
 	ret = -1;
 	a = archive_read_new();
 	if (a == NULL) {
 		warn("archive_read_new");
 		return (ret);
 	}
 	archive_read_support_filter_all(a);
 	archive_read_support_format_tar(a);
 
 	if (lseek(fd, 0, 0) == -1) {
 		warn("lseek");
 		goto cleanup;
 	}
 
 	if (archive_read_open_fd(a, fd, 4096) != ARCHIVE_OK) {
 		warnx("archive_read_open_fd: %s", archive_error_string(a));
 		goto cleanup;
 	}
 
 	ae = NULL;
 	while ((r = archive_read_next_header(a, &ae)) == ARCHIVE_OK) {
 		end = strrchr(archive_entry_pathname(ae), '/');
 		if (end == NULL)
 			continue;
 
 		if (strcmp(end, "/pkg-static") == 0) {
 			r = archive_read_extract(a, ae,
 			    ARCHIVE_EXTRACT_OWNER | ARCHIVE_EXTRACT_PERM |
 			    ARCHIVE_EXTRACT_TIME | ARCHIVE_EXTRACT_ACL |
 			    ARCHIVE_EXTRACT_FFLAGS | ARCHIVE_EXTRACT_XATTR);
 			strlcpy(p, archive_entry_pathname(ae), sz);
 			break;
 		}
 	}
 
 	if (r == ARCHIVE_OK)
 		ret = 0;
 	else
 		warnx("failed to extract pkg-static: %s",
 		    archive_error_string(a));
 
 cleanup:
 	archive_read_free(a);
 	return (ret);
 
 }
 
 static int
 install_pkg_static(const char *path, const char *pkgpath, bool force)
 {
 	int pstat;
 	pid_t pid;
 
 	switch ((pid = fork())) {
 	case -1:
 		return (-1);
 	case 0:
 		if (force)
 			execl(path, "pkg-static", "add", "-f", pkgpath,
 			    (char *)NULL);
 		else
 			execl(path, "pkg-static", "add", pkgpath,
 			    (char *)NULL);
 		_exit(1);
 	default:
 		break;
 	}
 
 	while (waitpid(pid, &pstat, 0) == -1)
 		if (errno != EINTR)
 			return (-1);
 
 	if (WEXITSTATUS(pstat))
 		return (WEXITSTATUS(pstat));
 	else if (WIFSIGNALED(pstat))
 		return (128 & (WTERMSIG(pstat)));
 	return (pstat);
 }
 
 static int
 fetch_to_fd(const char *url, char *path)
 {
 	struct url *u;
 	struct dns_srvinfo *mirrors, *current;
 	struct url_stat st;
 	FILE *remote;
 	/* To store _https._tcp. + hostname + \0 */
 	int fd;
 	int retry, max_retry;
 	ssize_t r;
 	char buf[10240];
 	char zone[MAXHOSTNAMELEN + 13];
 	static const char *mirror_type = NULL;
 
 	max_retry = 3;
 	current = mirrors = NULL;
 	remote = NULL;
 
 	if (mirror_type == NULL && config_string(MIRROR_TYPE, &mirror_type)
 	    != 0) {
 		warnx("No MIRROR_TYPE defined");
 		return (-1);
 	}
 
 	if ((fd = mkstemp(path)) == -1) {
 		warn("mkstemp()");
 		return (-1);
 	}
 
 	retry = max_retry;
 
 	if ((u = fetchParseURL(url)) == NULL) {
 		warn("fetchParseURL('%s')", url);
 		return (-1);
 	}
 
 	while (remote == NULL) {
 		if (retry == max_retry) {
 			if (strcmp(u->scheme, "file") != 0 &&
 			    strcasecmp(mirror_type, "srv") == 0) {
 				snprintf(zone, sizeof(zone),
 				    "_%s._tcp.%s", u->scheme, u->host);
 				mirrors = dns_getsrvinfo(zone);
 				current = mirrors;
 			}
 		}
 
 		if (mirrors != NULL) {
 			strlcpy(u->host, current->host, sizeof(u->host));
 			u->port = current->port;
 		}
 
 		remote = fetchXGet(u, &st, "");
 		if (remote == NULL) {
 			--retry;
 			if (retry <= 0)
 				goto fetchfail;
 			if (mirrors == NULL) {
 				sleep(1);
 			} else {
 				current = current->next;
 				if (current == NULL)
 					current = mirrors;
 			}
 		}
 	}
 
 	while ((r = fread(buf, 1, sizeof(buf), remote)) > 0) {
 		if (write(fd, buf, r) != r) {
 			warn("write()");
 			goto fetchfail;
 		}
 	}
 
 	if (r != 0) {
 		warn("An error occurred while fetching pkg(8)");
 		goto fetchfail;
 	}
 
 	if (ferror(remote))
 		goto fetchfail;
 
 	goto cleanup;
 
 fetchfail:
 	if (fd != -1) {
 		close(fd);
 		fd = -1;
 		unlink(path);
 	}
 
 cleanup:
 	if (remote != NULL)
 		fclose(remote);
 
 	return fd;
 }
 
 static struct fingerprint *
 parse_fingerprint(ucl_object_t *obj)
 {
 	const ucl_object_t *cur;
 	ucl_object_iter_t it = NULL;
 	const char *function, *fp, *key;
 	struct fingerprint *f;
 	hash_t fct = HASH_UNKNOWN;
 
 	function = fp = NULL;
 
 	while ((cur = ucl_iterate_object(obj, &it, true))) {
 		key = ucl_object_key(cur);
 		if (cur->type != UCL_STRING)
 			continue;
 		if (strcasecmp(key, "function") == 0) {
 			function = ucl_object_tostring(cur);
 			continue;
 		}
 		if (strcasecmp(key, "fingerprint") == 0) {
 			fp = ucl_object_tostring(cur);
 			continue;
 		}
 	}
 
 	if (fp == NULL || function == NULL)
 		return (NULL);
 
 	if (strcasecmp(function, "sha256") == 0)
 		fct = HASH_SHA256;
 
 	if (fct == HASH_UNKNOWN) {
 		warnx("Unsupported hashing function: %s", function);
 		return (NULL);
 	}
 
 	f = calloc(1, sizeof(struct fingerprint));
 	f->type = fct;
 	strlcpy(f->hash, fp, sizeof(f->hash));
 
 	return (f);
 }
 
 static void
 free_fingerprint_list(struct fingerprint_list* list)
 {
 	struct fingerprint *fingerprint, *tmp;
 
 	STAILQ_FOREACH_SAFE(fingerprint, list, next, tmp) {
 		free(fingerprint->name);
 		free(fingerprint);
 	}
 	free(list);
 }
 
 static struct fingerprint *
 load_fingerprint(const char *dir, const char *filename)
 {
 	ucl_object_t *obj = NULL;
 	struct ucl_parser *p = NULL;
 	struct fingerprint *f;
 	char path[MAXPATHLEN];
 
 	f = NULL;
 
 	snprintf(path, MAXPATHLEN, "%s/%s", dir, filename);
 
 	p = ucl_parser_new(0);
 	if (!ucl_parser_add_file(p, path)) {
 		warnx("%s: %s", path, ucl_parser_get_error(p));
 		ucl_parser_free(p);
 		return (NULL);
 	}
 
 	obj = ucl_parser_get_object(p);
 
 	if (obj->type == UCL_OBJECT)
 		f = parse_fingerprint(obj);
 
 	if (f != NULL)
 		f->name = strdup(filename);
 
 	ucl_object_unref(obj);
 	ucl_parser_free(p);
 
 	return (f);
 }
 
 static struct fingerprint_list *
 load_fingerprints(const char *path, int *count)
 {
 	DIR *d;
 	struct dirent *ent;
 	struct fingerprint *finger;
 	struct fingerprint_list *fingerprints;
 
 	*count = 0;
 
 	fingerprints = calloc(1, sizeof(struct fingerprint_list));
 	if (fingerprints == NULL)
 		return (NULL);
 	STAILQ_INIT(fingerprints);
 
 	if ((d = opendir(path)) == NULL) {
 		free(fingerprints);
 
 		return (NULL);
 	}
 
 	while ((ent = readdir(d))) {
 		if (strcmp(ent->d_name, ".") == 0 ||
 		    strcmp(ent->d_name, "..") == 0)
 			continue;
 		finger = load_fingerprint(path, ent->d_name);
 		if (finger != NULL) {
 			STAILQ_INSERT_TAIL(fingerprints, finger, next);
 			++(*count);
 		}
 	}
 
 	closedir(d);
 
 	return (fingerprints);
 }
 
 static void
 sha256_hash(unsigned char hash[SHA256_DIGEST_LENGTH],
     char out[SHA256_DIGEST_LENGTH * 2 + 1])
 {
 	int i;
 
 	for (i = 0; i < SHA256_DIGEST_LENGTH; i++)
 		sprintf(out + (i * 2), "%02x", hash[i]);
 
 	out[SHA256_DIGEST_LENGTH * 2] = '\0';
 }
 
 static void
 sha256_buf(char *buf, size_t len, char out[SHA256_DIGEST_LENGTH * 2 + 1])
 {
 	unsigned char hash[SHA256_DIGEST_LENGTH];
 	SHA256_CTX sha256;
 
 	out[0] = '\0';
 
 	SHA256_Init(&sha256);
 	SHA256_Update(&sha256, buf, len);
 	SHA256_Final(hash, &sha256);
 	sha256_hash(hash, out);
 }
 
 static int
 sha256_fd(int fd, char out[SHA256_DIGEST_LENGTH * 2 + 1])
 {
 	int my_fd;
 	FILE *fp;
 	char buffer[BUFSIZ];
 	unsigned char hash[SHA256_DIGEST_LENGTH];
 	size_t r;
 	int ret;
 	SHA256_CTX sha256;
 
 	my_fd = -1;
 	fp = NULL;
 	r = 0;
 	ret = 1;
 
 	out[0] = '\0';
 
 	/* Duplicate the fd so that fclose(3) does not close it. */
 	if ((my_fd = dup(fd)) == -1) {
 		warnx("dup");
 		goto cleanup;
 	}
 
 	if ((fp = fdopen(my_fd, "rb")) == NULL) {
 		warnx("fdopen");
 		goto cleanup;
 	}
 
 	SHA256_Init(&sha256);
 
 	while ((r = fread(buffer, 1, BUFSIZ, fp)) > 0)
 		SHA256_Update(&sha256, buffer, r);
 
 	if (ferror(fp) != 0) {
 		warnx("fread");
 		goto cleanup;
 	}
 
 	SHA256_Final(hash, &sha256);
 	sha256_hash(hash, out);
 	ret = 0;
 
 cleanup:
 	if (fp != NULL)
 		fclose(fp);
 	else if (my_fd != -1)
 		close(my_fd);
 	(void)lseek(fd, 0, SEEK_SET);
 
 	return (ret);
 }
 
 static EVP_PKEY *
 load_public_key_file(const char *file)
 {
 	EVP_PKEY *pkey;
 	BIO *bp;
 	char errbuf[1024];
 
 	bp = BIO_new_file(file, "r");
 	if (!bp)
 		errx(EXIT_FAILURE, "Unable to read %s", file);
 
 	if ((pkey = PEM_read_bio_PUBKEY(bp, NULL, NULL, NULL)) == NULL)
 		warnx("ici: %s", ERR_error_string(ERR_get_error(), errbuf));
 
 	BIO_free(bp);
 
 	return (pkey);
 }
 
 static EVP_PKEY *
 load_public_key_buf(const unsigned char *cert, int certlen)
 {
 	EVP_PKEY *pkey;
 	BIO *bp;
 	char errbuf[1024];
 
 	bp = BIO_new_mem_buf(__DECONST(void *, cert), certlen);
 
 	if ((pkey = PEM_read_bio_PUBKEY(bp, NULL, NULL, NULL)) == NULL)
 		warnx("%s", ERR_error_string(ERR_get_error(), errbuf));
 
 	BIO_free(bp);
 
 	return (pkey);
 }
 
 static bool
 rsa_verify_cert(int fd, const char *sigfile, const unsigned char *key,
     int keylen, unsigned char *sig, int siglen)
 {
 	EVP_MD_CTX *mdctx;
 	EVP_PKEY *pkey;
 	char sha256[(SHA256_DIGEST_LENGTH * 2) + 2];
 	char errbuf[1024];
 	bool ret;
 
 	pkey = NULL;
 	mdctx = NULL;
 	ret = false;
 
 	SSL_load_error_strings();
 
 	/* Compute SHA256 of the package. */
 	if (lseek(fd, 0, 0) == -1) {
 		warn("lseek");
 		goto cleanup;
 	}
 	if ((sha256_fd(fd, sha256)) == -1) {
 		warnx("Error creating SHA256 hash for package");
 		goto cleanup;
 	}
 
 	if (sigfile != NULL) {
 		if ((pkey = load_public_key_file(sigfile)) == NULL) {
 			warnx("Error reading public key");
 			goto cleanup;
 		}
 	} else {
 		if ((pkey = load_public_key_buf(key, keylen)) == NULL) {
 			warnx("Error reading public key");
 			goto cleanup;
 		}
 	}
 
 	/* Verify signature of the SHA256(pkg) is valid. */
 	if ((mdctx = EVP_MD_CTX_create()) == NULL) {
 		warnx("%s", ERR_error_string(ERR_get_error(), errbuf));
 		goto error;
 	}
 
 	if (EVP_DigestVerifyInit(mdctx, NULL, EVP_sha256(), NULL, pkey) != 1) {
 		warnx("%s", ERR_error_string(ERR_get_error(), errbuf));
 		goto error;
 	}
 	if (EVP_DigestVerifyUpdate(mdctx, sha256, strlen(sha256)) != 1) {
 		warnx("%s", ERR_error_string(ERR_get_error(), errbuf));
 		goto error;
 	}
 
 	if (EVP_DigestVerifyFinal(mdctx, sig, siglen) != 1) {
 		warnx("%s", ERR_error_string(ERR_get_error(), errbuf));
 		goto error;
 	}
 
 	ret = true;
 	printf("done\n");
 	goto cleanup;
 
 error:
 	printf("failed\n");
 
 cleanup:
 	if (pkey)
 		EVP_PKEY_free(pkey);
 	if (mdctx)
 		EVP_MD_CTX_destroy(mdctx);
 	ERR_free_strings();
 
 	return (ret);
 }
 
 static struct pubkey *
 read_pubkey(int fd)
 {
 	struct pubkey *pk;
 	struct sbuf *sig;
 	char buf[4096];
 	int r;
 
 	if (lseek(fd, 0, 0) == -1) {
 		warn("lseek");
 		return (NULL);
 	}
 
 	sig = sbuf_new_auto();
 
 	while ((r = read(fd, buf, sizeof(buf))) >0) {
 		sbuf_bcat(sig, buf, r);
 	}
 
 	sbuf_finish(sig);
 	pk = calloc(1, sizeof(struct pubkey));
 	pk->siglen = sbuf_len(sig);
 	pk->sig = calloc(1, pk->siglen);
 	memcpy(pk->sig, sbuf_data(sig), pk->siglen);
 	sbuf_delete(sig);
 
 	return (pk);
 }
 
 static struct sig_cert *
 parse_cert(int fd) {
 	int my_fd;
 	struct sig_cert *sc;
 	FILE *fp;
 	struct sbuf *buf, *sig, *cert;
 	char *line;
 	size_t linecap;
 	ssize_t linelen;
 
 	buf = NULL;
 	my_fd = -1;
 	sc = NULL;
 	line = NULL;
 	linecap = 0;
 
 	if (lseek(fd, 0, 0) == -1) {
 		warn("lseek");
 		return (NULL);
 	}
 
 	/* Duplicate the fd so that fclose(3) does not close it. */
 	if ((my_fd = dup(fd)) == -1) {
 		warnx("dup");
 		return (NULL);
 	}
 
 	if ((fp = fdopen(my_fd, "rb")) == NULL) {
 		warn("fdopen");
 		close(my_fd);
 		return (NULL);
 	}
 
 	sig = sbuf_new_auto();
 	cert = sbuf_new_auto();
 
 	while ((linelen = getline(&line, &linecap, fp)) > 0) {
 		if (strcmp(line, "SIGNATURE\n") == 0) {
 			buf = sig;
 			continue;
 		} else if (strcmp(line, "CERT\n") == 0) {
 			buf = cert;
 			continue;
 		} else if (strcmp(line, "END\n") == 0) {
 			break;
 		}
 		if (buf != NULL)
 			sbuf_bcat(buf, line, linelen);
 	}
 
 	fclose(fp);
 
 	/* Trim out unrelated trailing newline */
 	sbuf_setpos(sig, sbuf_len(sig) - 1);
 
 	sbuf_finish(sig);
 	sbuf_finish(cert);
 
 	sc = calloc(1, sizeof(struct sig_cert));
 	sc->siglen = sbuf_len(sig);
 	sc->sig = calloc(1, sc->siglen);
 	memcpy(sc->sig, sbuf_data(sig), sc->siglen);
 
 	sc->certlen = sbuf_len(cert);
 	sc->cert = strdup(sbuf_data(cert));
 
 	sbuf_delete(sig);
 	sbuf_delete(cert);
 
 	return (sc);
 }
 
 static bool
 verify_pubsignature(int fd_pkg, int fd_sig)
 {
 	struct pubkey *pk;
 	const char *pubkey;
 	bool ret;
 
 	pk = NULL;
 	pubkey = NULL;
 	ret = false;
 	if (config_string(PUBKEY, &pubkey) != 0) {
 		warnx("No CONFIG_PUBKEY defined");
 		goto cleanup;
 	}
 
 	if ((pk = read_pubkey(fd_sig)) == NULL) {
 		warnx("Error reading signature");
 		goto cleanup;
 	}
 
 	/* Verify the signature. */
 	printf("Verifying signature with public key %s... ", pubkey);
 	if (rsa_verify_cert(fd_pkg, pubkey, NULL, 0, pk->sig,
 	    pk->siglen) == false) {
 		fprintf(stderr, "Signature is not valid\n");
 		goto cleanup;
 	}
 
 	ret = true;
 
 cleanup:
 	if (pk) {
 		free(pk->sig);
 		free(pk);
 	}
 
 	return (ret);
 }
 
 static bool
 verify_signature(int fd_pkg, int fd_sig)
 {
 	struct fingerprint_list *trusted, *revoked;
 	struct fingerprint *fingerprint;
 	struct sig_cert *sc;
 	bool ret;
 	int trusted_count, revoked_count;
 	const char *fingerprints;
 	char path[MAXPATHLEN];
 	char hash[SHA256_DIGEST_LENGTH * 2 + 1];
 
 	sc = NULL;
 	trusted = revoked = NULL;
 	ret = false;
 
 	/* Read and parse fingerprints. */
 	if (config_string(FINGERPRINTS, &fingerprints) != 0) {
 		warnx("No CONFIG_FINGERPRINTS defined");
 		goto cleanup;
 	}
 
 	snprintf(path, MAXPATHLEN, "%s/trusted", fingerprints);
 	if ((trusted = load_fingerprints(path, &trusted_count)) == NULL) {
 		warnx("Error loading trusted certificates");
 		goto cleanup;
 	}
 
 	if (trusted_count == 0 || trusted == NULL) {
 		fprintf(stderr, "No trusted certificates found.\n");
 		goto cleanup;
 	}
 
 	snprintf(path, MAXPATHLEN, "%s/revoked", fingerprints);
 	if ((revoked = load_fingerprints(path, &revoked_count)) == NULL) {
 		warnx("Error loading revoked certificates");
 		goto cleanup;
 	}
 
 	/* Read certificate and signature in. */
 	if ((sc = parse_cert(fd_sig)) == NULL) {
 		warnx("Error parsing certificate");
 		goto cleanup;
 	}
 	/* Explicitly mark as non-trusted until proven otherwise. */
 	sc->trusted = false;
 
 	/* Parse signature and pubkey out of the certificate */
 	sha256_buf(sc->cert, sc->certlen, hash);
 
 	/* Check if this hash is revoked */
 	if (revoked != NULL) {
 		STAILQ_FOREACH(fingerprint, revoked, next) {
 			if (strcasecmp(fingerprint->hash, hash) == 0) {
 				fprintf(stderr, "The package was signed with "
 				    "revoked certificate %s\n",
 				    fingerprint->name);
 				goto cleanup;
 			}
 		}
 	}
 
 	STAILQ_FOREACH(fingerprint, trusted, next) {
 		if (strcasecmp(fingerprint->hash, hash) == 0) {
 			sc->trusted = true;
 			sc->name = strdup(fingerprint->name);
 			break;
 		}
 	}
 
 	if (sc->trusted == false) {
 		fprintf(stderr, "No trusted fingerprint found matching "
 		    "package's certificate\n");
 		goto cleanup;
 	}
 
 	/* Verify the signature. */
 	printf("Verifying signature with trusted certificate %s... ", sc->name);
 	if (rsa_verify_cert(fd_pkg, NULL, sc->cert, sc->certlen, sc->sig,
 	    sc->siglen) == false) {
 		fprintf(stderr, "Signature is not valid\n");
 		goto cleanup;
 	}
 
 	ret = true;
 
 cleanup:
 	if (trusted)
 		free_fingerprint_list(trusted);
 	if (revoked)
 		free_fingerprint_list(revoked);
 	if (sc) {
 		free(sc->cert);
 		free(sc->sig);
 		free(sc->name);
 		free(sc);
 	}
 
 	return (ret);
 }
 
 static int
 bootstrap_pkg(bool force)
 {
 	int fd_pkg, fd_sig;
 	int ret;
 	char url[MAXPATHLEN];
 	char tmppkg[MAXPATHLEN];
 	char tmpsig[MAXPATHLEN];
 	const char *packagesite;
 	const char *signature_type;
 	char pkgstatic[MAXPATHLEN];
 
 	fd_sig = -1;
 	ret = -1;
 
 	if (config_string(PACKAGESITE, &packagesite) != 0) {
 		warnx("No PACKAGESITE defined");
 		return (-1);
 	}
 
 	if (config_string(SIGNATURE_TYPE, &signature_type) != 0) {
 		warnx("Error looking up SIGNATURE_TYPE");
 		return (-1);
 	}
 
 	printf("Bootstrapping pkg from %s, please wait...\n", packagesite);
 
 	/* Support pkg+http:// for PACKAGESITE which is the new format
 	   in 1.2 to avoid confusion on why http://pkg.FreeBSD.org has
 	   no A record. */
 	if (strncmp(URL_SCHEME_PREFIX, packagesite,
 	    strlen(URL_SCHEME_PREFIX)) == 0)
 		packagesite += strlen(URL_SCHEME_PREFIX);
 	snprintf(url, MAXPATHLEN, "%s/Latest/pkg.txz", packagesite);
 
 	snprintf(tmppkg, MAXPATHLEN, "%s/pkg.txz.XXXXXX",
 	    getenv("TMPDIR") ? getenv("TMPDIR") : _PATH_TMP);
 
 	if ((fd_pkg = fetch_to_fd(url, tmppkg)) == -1)
 		goto fetchfail;
 
 	if (signature_type != NULL &&
 	    strcasecmp(signature_type, "NONE") != 0) {
 		if (strcasecmp(signature_type, "FINGERPRINTS") == 0) {
 
 			snprintf(tmpsig, MAXPATHLEN, "%s/pkg.txz.sig.XXXXXX",
 			    getenv("TMPDIR") ? getenv("TMPDIR") : _PATH_TMP);
 			snprintf(url, MAXPATHLEN, "%s/Latest/pkg.txz.sig",
 			    packagesite);
 
 			if ((fd_sig = fetch_to_fd(url, tmpsig)) == -1) {
 				fprintf(stderr, "Signature for pkg not "
 				    "available.\n");
 				goto fetchfail;
 			}
 
 			if (verify_signature(fd_pkg, fd_sig) == false)
 				goto cleanup;
 		} else if (strcasecmp(signature_type, "PUBKEY") == 0) {
 
 			snprintf(tmpsig, MAXPATHLEN,
 			    "%s/pkg.txz.pubkeysig.XXXXXX",
 			    getenv("TMPDIR") ? getenv("TMPDIR") : _PATH_TMP);
 			snprintf(url, MAXPATHLEN, "%s/Latest/pkg.txz.pubkeysig",
 			    packagesite);
 
 			if ((fd_sig = fetch_to_fd(url, tmpsig)) == -1) {
 				fprintf(stderr, "Signature for pkg not "
 				    "available.\n");
 				goto fetchfail;
 			}
 
 			if (verify_pubsignature(fd_pkg, fd_sig) == false)
 				goto cleanup;
 		} else {
 			warnx("Signature type %s is not supported for "
 			    "bootstrapping.", signature_type);
 			goto cleanup;
 		}
 	}
 
 	if ((ret = extract_pkg_static(fd_pkg, pkgstatic, MAXPATHLEN)) == 0)
 		ret = install_pkg_static(pkgstatic, tmppkg, force);
 
 	goto cleanup;
 
 fetchfail:
 	warnx("Error fetching %s: %s", url, fetchLastErrString);
 	fprintf(stderr, "A pre-built version of pkg could not be found for "
 	    "your system.\n");
 	fprintf(stderr, "Consider changing PACKAGESITE or installing it from "
 	    "ports: 'ports-mgmt/pkg'.\n");
 
 cleanup:
 	if (fd_sig != -1) {
 		close(fd_sig);
 		unlink(tmpsig);
 	}
 
 	if (fd_pkg != -1) {
 		close(fd_pkg);
 		unlink(tmppkg);
 	}
 
 	return (ret);
 }
 
 static const char confirmation_message[] =
 "The package management tool is not yet installed on your system.\n"
 "Do you want to fetch and install it now? [y/N]: ";
 
 static const char non_interactive_message[] =
 "The package management tool is not yet installed on your system.\n"
 "Please set ASSUME_ALWAYS_YES=yes environment variable to be able to bootstrap "
 "in non-interactive (stdin not being a tty)\n";
 
 static int
 pkg_query_yes_no(void)
 {
 	int ret, c;
 
 	c = getchar();
 
 	if (c == 'y' || c == 'Y')
 		ret = 1;
 	else
 		ret = 0;
 
 	while (c != '\n' && c != EOF)
 		c = getchar();
 
 	return (ret);
 }
 
 static int
 bootstrap_pkg_local(const char *pkgpath, bool force)
 {
 	char path[MAXPATHLEN];
 	char pkgstatic[MAXPATHLEN];
 	const char *signature_type;
 	int fd_pkg, fd_sig, ret;
 
 	fd_sig = -1;
 	ret = -1;
 
 	fd_pkg = open(pkgpath, O_RDONLY);
 	if (fd_pkg == -1)
 		err(EXIT_FAILURE, "Unable to open %s", pkgpath);
 
 	if (config_string(SIGNATURE_TYPE, &signature_type) != 0) {
 		warnx("Error looking up SIGNATURE_TYPE");
 		goto cleanup;
 	}
 	if (signature_type != NULL &&
 	    strcasecmp(signature_type, "NONE") != 0) {
 		if (strcasecmp(signature_type, "FINGERPRINTS") == 0) {
 
 			snprintf(path, sizeof(path), "%s.sig", pkgpath);
 
 			if ((fd_sig = open(path, O_RDONLY)) == -1) {
 				fprintf(stderr, "Signature for pkg not "
 				    "available.\n");
 				goto cleanup;
 			}
 
 			if (verify_signature(fd_pkg, fd_sig) == false)
 				goto cleanup;
 
 		} else if (strcasecmp(signature_type, "PUBKEY") == 0) {
 
 			snprintf(path, sizeof(path), "%s.pubkeysig", pkgpath);
 
 			if ((fd_sig = open(path, O_RDONLY)) == -1) {
 				fprintf(stderr, "Signature for pkg not "
 				    "available.\n");
 				goto cleanup;
 			}
 
 			if (verify_pubsignature(fd_pkg, fd_sig) == false)
 				goto cleanup;
 
 		} else {
 			warnx("Signature type %s is not supported for "
 			    "bootstrapping.", signature_type);
 			goto cleanup;
 		}
 	}
 
 	if ((ret = extract_pkg_static(fd_pkg, pkgstatic, MAXPATHLEN)) == 0)
 		ret = install_pkg_static(pkgstatic, pkgpath, force);
 
 cleanup:
 	close(fd_pkg);
 	if (fd_sig != -1)
 		close(fd_sig);
 
 	return (ret);
 }
 
 int
 main(int argc, char *argv[])
 {
 	char pkgpath[MAXPATHLEN];
 	const char *pkgarg;
 	bool bootstrap_only, force, yes;
 
 	bootstrap_only = false;
 	force = false;
 	pkgarg = NULL;
 	yes = false;
 
 	snprintf(pkgpath, MAXPATHLEN, "%s/sbin/pkg",
 	    getenv("LOCALBASE") ? getenv("LOCALBASE") : _LOCALBASE);
 
 	if (argc > 1 && strcmp(argv[1], "bootstrap") == 0) {
 		bootstrap_only = true;
 		if (argc == 3 && strcmp(argv[2], "-f") == 0)
 			force = true;
 	}
 
 	if ((bootstrap_only && force) || access(pkgpath, X_OK) == -1) {
 		/* 
 		 * To allow 'pkg -N' to be used as a reliable test for whether
 		 * a system is configured to use pkg, don't bootstrap pkg
 		 * when that argument is given as argv[1].
 		 */
 		if (argv[1] != NULL && strcmp(argv[1], "-N") == 0)
 			errx(EXIT_FAILURE, "pkg is not installed");
 
 		config_init();
 
 		if (argc > 1 && strcmp(argv[1], "add") == 0) {
 			if (argc > 2 && strcmp(argv[2], "-f") == 0) {
 				force = true;
 				pkgarg = argv[3];
 			} else
 				pkgarg = argv[2];
 			if (pkgarg == NULL) {
 				fprintf(stderr, "Path to pkg.txz required\n");
 				exit(EXIT_FAILURE);
 			}
 			if (access(pkgarg, R_OK) == -1) {
 				fprintf(stderr, "No such file: %s\n", pkgarg);
 				exit(EXIT_FAILURE);
 			}
 			if (bootstrap_pkg_local(pkgarg, force) != 0)
 				exit(EXIT_FAILURE);
 			exit(EXIT_SUCCESS);
 		}
 		/*
 		 * Do not ask for confirmation if either of stdin or stdout is
 		 * not tty. Check the environment to see if user has answer
 		 * tucked in there already.
 		 */
 		config_bool(ASSUME_ALWAYS_YES, &yes);
 		if (!yes) {
 			if (!isatty(fileno(stdin))) {
 				fprintf(stderr, non_interactive_message);
 				exit(EXIT_FAILURE);
 			}
 
 			printf("%s", confirmation_message);
 			if (pkg_query_yes_no() == 0)
 				exit(EXIT_FAILURE);
 		}
 		if (bootstrap_pkg(force) != 0)
 			exit(EXIT_FAILURE);
 		config_finish();
 
 		if (bootstrap_only)
 			exit(EXIT_SUCCESS);
 	} else if (bootstrap_only) {
 		printf("pkg already bootstrapped at %s\n", pkgpath);
 		exit(EXIT_SUCCESS);
 	}
 
 	execv(pkgpath, argv);
 
 	/* NOT REACHED */
 	return (EXIT_FAILURE);
 }
Index: user/alc/PQ_LAUNDRY/usr.sbin/pw/pw.h
===================================================================
--- user/alc/PQ_LAUNDRY/usr.sbin/pw/pw.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.sbin/pw/pw.h	(revision 303642)
@@ -1,107 +1,106 @@
 /*-
  * Copyright (C) 1996
  *	David L. Nugent.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY DAVID L. NUGENT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL DAVID L. NUGENT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/stat.h>
 
-#define _WITH_GETLINE
 #include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 #include "pwupd.h"
 
 enum _mode
 {
         M_ADD,
         M_DELETE,
         M_UPDATE,
         M_PRINT,
 	M_NEXT,
 	M_LOCK,
 	M_UNLOCK,
         M_NUM
 };
 
 enum _which
 {
         W_USER,
         W_GROUP,
         W_NUM
 };
 
 #define	_DEF_DIRMODE	(S_IRWXU | S_IRWXG | S_IRWXO)
 #define _PATH_PW_CONF	"/etc/pw.conf"
 #define _UC_MAXLINE	1024
 #define _UC_MAXSHELLS	32
 
 struct userconf *get_userconfig(const char *cfg);
 struct userconf *read_userconfig(char const * file);
 int write_userconfig(struct userconf *cnf, char const * file);
 
 int pw_group_add(int argc, char **argv, char *name);
 int pw_group_del(int argc, char **argv, char *name);
 int pw_group_mod(int argc, char **argv, char *name);
 int pw_group_next(int argc, char **argv, char *name);
 int pw_group_show(int argc, char **argv, char *name);
 int pw_user_add(int argc, char **argv, char *name);
 int pw_user_add(int argc, char **argv, char *name);
 int pw_user_add(int argc, char **argv, char *name);
 int pw_user_add(int argc, char **argv, char *name);
 int pw_user_del(int argc, char **argv, char *name);
 int pw_user_lock(int argc, char **argv, char *name);
 int pw_user_mod(int argc, char **argv, char *name);
 int pw_user_next(int argc, char **argv, char *name);
 int pw_user_show(int argc, char **argv, char *name);
 int pw_user_unlock(int argc, char **argv, char *name);
 int pw_groupnext(struct userconf *cnf, bool quiet);
 char *pw_checkname(char *name, int gecos);
 uintmax_t pw_checkid(char *nptr, uintmax_t maxval);
 int pw_checkfd(char *nptr);
 
 int addnispwent(const char *path, struct passwd *pwd);
 int delnispwent(const char *path, const char *login);
 int chgnispwent(const char *path, const char *login, struct passwd *pwd);
 
 int groupadd(struct userconf *, char *name, gid_t id, char *members, int fd,
     bool dryrun, bool pretty, bool precrypted);
 
 int nis_update(void);
 
 int boolean_val(char const * str, int dflt);
 int passwd_val(char const * str, int dflt);
 char const *boolean_str(int val);
 char *newstr(char const * p);
 
 void pw_log(struct userconf * cnf, int mode, int which, char const * fmt,...) __printflike(4, 5);
 char *pw_pwcrypt(char *password);
 
 extern const char *Modes[];
 extern const char *Which[];
 
 uintmax_t strtounum(const char * __restrict, uintmax_t, uintmax_t,
     const char ** __restrict);
Index: user/alc/PQ_LAUNDRY/usr.sbin/pw/pw_vpw.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.sbin/pw/pw_vpw.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.sbin/pw/pw_vpw.c	(revision 303642)
@@ -1,204 +1,200 @@
 /*-
  * Copyright (C) 1996
  *	David L. Nugent.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY DAVID L. NUGENT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL DAVID L. NUGENT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #ifndef lint
 static const char rcsid[] =
   "$FreeBSD$";
 #endif /* not lint */
 
 #include <pwd.h>
 #include <grp.h>
 #include <libutil.h>
-#define _WITH_GETLINE
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <err.h>
 
 #include "pwupd.h"
 
 static FILE * pwd_fp = NULL;
 
 void
 vendpwent(void)
 {
 	if (pwd_fp != NULL) {
 		fclose(pwd_fp);
 		pwd_fp = NULL;
 	}
 }
 
 void
 vsetpwent(void)
 {
 	vendpwent();
 }
 
 static struct passwd *
 vnextpwent(char const *nam, uid_t uid, int doclose)
 {
 	struct passwd *pw;
 	char *line;
 	size_t linecap;
 	ssize_t linelen;
 
 	pw = NULL;
 	line = NULL;
 	linecap = 0;
 
 	if (pwd_fp != NULL || (pwd_fp = fopen(getpwpath(_MASTERPASSWD), "r")) != NULL) {
 		while ((linelen = getline(&line, &linecap, pwd_fp)) > 0) {
 			/* Skip comments and empty lines */
 			if (*line == '\n' || *line == '#')
 				continue;
 			/* trim latest \n */
 			if (line[linelen - 1 ] == '\n')
 				line[linelen - 1] = '\0';
 			pw = pw_scan(line, PWSCAN_MASTER);
 			if (pw == NULL)
 				errx(EXIT_FAILURE, "Invalid user entry in '%s':"
 				    " '%s'", getpwpath(_MASTERPASSWD), line);
 			if (uid != (uid_t)-1) {
 				if (uid == pw->pw_uid)
 					break;
 			} else if (nam != NULL) {
 				if (strcmp(nam, pw->pw_name) == 0)
 					break;
 			} else
 				break;
 			free(pw);
 			pw = NULL;
 		}
 		if (doclose)
 			vendpwent();
 	}
 	free(line);
 
 	return (pw);
 }
 
 struct passwd *
 vgetpwent(void)
 {
   return vnextpwent(NULL, -1, 0);
 }
 
 struct passwd *
 vgetpwuid(uid_t uid)
 {
   return vnextpwent(NULL, uid, 1);
 }
 
 struct passwd *
 vgetpwnam(const char * nam)
 {
   return vnextpwent(nam, -1, 1);
 }
 
 
 static FILE * grp_fp = NULL;
 
 void
 vendgrent(void)
 {
 	if (grp_fp != NULL) {
 		fclose(grp_fp);
 		grp_fp = NULL;
 	}
 }
 
-RET_SETGRENT
+void
 vsetgrent(void)
 {
 	vendgrent();
-#if defined(__FreeBSD__)
-	return 0;
-#endif
 }
 
 static struct group *
 vnextgrent(char const *nam, gid_t gid, int doclose)
 {
 	struct group *gr;
 	char *line;
 	size_t linecap;
 	ssize_t linelen;
 
 	gr = NULL;
 	line = NULL;
 	linecap = 0;
 
 	if (grp_fp != NULL || (grp_fp = fopen(getgrpath(_GROUP), "r")) != NULL) {
 		while ((linelen = getline(&line, &linecap, grp_fp)) > 0) {
 			/* Skip comments and empty lines */
 			if (*line == '\n' || *line == '#')
 				continue;
 			/* trim latest \n */
 			if (line[linelen - 1 ] == '\n')
 				line[linelen - 1] = '\0';
 			gr = gr_scan(line);
 			if (gr == NULL)
 				errx(EXIT_FAILURE, "Invalid group entry in '%s':"
 				    " '%s'", getgrpath(_GROUP), line);
 			if (gid != (gid_t)-1) {
 				if (gid == gr->gr_gid)
 					break;
 			} else if (nam != NULL) {
 				if (strcmp(nam, gr->gr_name) == 0)
 					break;
 			} else
 				break;
 			free(gr);
 			gr = NULL;
 		}
 		if (doclose)
 			vendgrent();
 	}
 	free(line);
 
 	return (gr);
 }
 
 struct group *
 vgetgrent(void)
 {
   return vnextgrent(NULL, -1, 0);
 }
 
 
 struct group *
 vgetgrgid(gid_t gid)
 {
   return vnextgrent(NULL, gid, 1);
 }
 
 struct group *
 vgetgrnam(const char * nam)
 {
   return vnextgrent(nam, -1, 1);
 }
 
Index: user/alc/PQ_LAUNDRY/usr.sbin/pw/pwupd.h
===================================================================
--- user/alc/PQ_LAUNDRY/usr.sbin/pw/pwupd.h	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.sbin/pw/pwupd.h	(revision 303642)
@@ -1,152 +1,146 @@
 /*-
  * Copyright (C) 1996
  *	David L. Nugent.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY DAVID L. NUGENT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL DAVID L. NUGENT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _PWUPD_H_
 #define _PWUPD_H_
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/types.h>
 
 #include <pwd.h>
 #include <grp.h>
 #include <stdbool.h>
 #include <stringlist.h>
 
-#if defined(__FreeBSD__)
-#define	RET_SETGRENT	int
-#else
-#define	RET_SETGRENT	void
-#endif
-
 struct pwf {
 	int		    _altdir;
 	void		  (*_setpwent)(void);
 	void		  (*_endpwent)(void);
 	struct passwd * (*_getpwent)(void);
 	struct passwd	* (*_getpwuid)(uid_t uid);
 	struct passwd	* (*_getpwnam)(const char * nam);
-	RET_SETGRENT	  (*_setgrent)(void);
+	void		  (*_setgrent)(void);
 	void		  (*_endgrent)(void);
 	struct group  * (*_getgrent)(void);
 	struct group  * (*_getgrgid)(gid_t gid);
 	struct group  * (*_getgrnam)(const char * nam);
 };
 
 struct userconf {
 	int		default_password;	/* Default password for new users? */
 	int		reuse_uids;		/* Reuse uids? */
 	int		reuse_gids;		/* Reuse gids? */
 	char		*nispasswd;		/* Path to NIS version of the passwd file */
 	char		*dotdir;		/* Where to obtain skeleton files */
 	char		*newmail;		/* Mail to send to new accounts */
 	char		*logfile;		/* Where to log changes */
 	char		*home;			/* Where to create home directory */
 	mode_t		homemode;		/* Home directory permissions */
 	char		*shelldir;		/* Where shells are located */
 	char		**shells;		/* List of shells */
 	char		*shell_default;		/* Default shell */
 	char		*default_group;		/* Default group number */
 	StringList	*groups;		/* Default (additional) groups */
 	char		*default_class;		/* Default user class */
 	uid_t		min_uid, max_uid;	/* Allowed range of uids */
 	gid_t		min_gid, max_gid;	/* Allowed range of gids */
 	time_t		expire_days;		/* Days to expiry */
 	time_t		password_days;		/* Days to password expiry */
 };
 
 struct pwconf {
 	char		 rootdir[MAXPATHLEN];
 	char		 etcpath[MAXPATHLEN];
 	int		 fd;
 	int		 rootfd;
 	bool		 checkduplicate;
 };
 
 extern struct pwf PWF;
 extern struct pwf VPWF;
 extern struct pwconf conf;
 
 #define SETPWENT()	PWF._setpwent()
 #define ENDPWENT()	PWF._endpwent()
 #define GETPWENT()	PWF._getpwent()
 #define GETPWUID(uid)	PWF._getpwuid(uid)
 #define GETPWNAM(nam)	PWF._getpwnam(nam)
 
 #define SETGRENT()	PWF._setgrent()
 #define ENDGRENT()	PWF._endgrent()
 #define GETGRENT()	PWF._getgrent()
 #define GETGRGID(gid)	PWF._getgrgid(gid)
 #define GETGRNAM(nam)	PWF._getgrnam(nam)
 
 #define PWF_REGULAR 0
 #define PWF_ALT 1
 #define PWF_ROOTDIR 2
 
 #define PWALTDIR()	PWF._altdir
 #ifndef _PATH_PWD
 #define _PATH_PWD	"/etc"
 #endif
 #ifndef _GROUP
 #define _GROUP		"group"
 #endif
 #ifndef _MASTERPASSWD
 #define _MASTERPASSWD	"master.passwd"
 #endif
 
 __BEGIN_DECLS
 int addpwent(struct passwd * pwd);
 int delpwent(struct passwd * pwd);
 int chgpwent(char const * login, struct passwd * pwd);
 
 char * getpwpath(char const * file);
 
 int addgrent(struct group * grp);
 int delgrent(struct group * grp);
 int chggrent(char const * name, struct group * grp);
 
 char * getgrpath(const char *file);
 
 void vsetpwent(void);
 void vendpwent(void);
 struct passwd * vgetpwent(void);
 struct passwd * vgetpwuid(uid_t uid);
 struct passwd * vgetpwnam(const char * nam);
 
 struct group * vgetgrent(void);
 struct group * vgetgrgid(gid_t gid);
 struct group * vgetgrnam(const char * nam);
-RET_SETGRENT   vsetgrent(void);
+void           vsetgrent(void);
 void           vendgrent(void);
 
 void copymkdir(int rootfd, char const * dir, int skelfd, mode_t mode, uid_t uid,
     gid_t gid, int flags);
 void rm_r(int rootfd, char const * dir, uid_t uid);
 __END_DECLS
 
 #endif				/* !_PWUPD_H */
Index: user/alc/PQ_LAUNDRY/usr.sbin/services_mkdb/services_mkdb.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.sbin/services_mkdb/services_mkdb.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.sbin/services_mkdb/services_mkdb.c	(revision 303642)
@@ -1,463 +1,462 @@
 /*	$NetBSD: services_mkdb.c,v 1.14 2008/04/28 20:24:17 martin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Luke Mewburn and Christos Zoulas.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #include <assert.h>
 #include <db.h>
 #include <err.h>
 #include <fcntl.h>
 #include <netdb.h>
-#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
 #include <ctype.h>
 #include <errno.h>
 #include <stringlist.h>
 
 #include "extern.h"
 
 static char tname[MAXPATHLEN];
 
 #define	PMASK		0xffff
 #define PROTOMAX	5
 
 static void	add(DB *, StringList *, size_t, const char *, size_t *, int);
 static StringList ***parseservices(const char *, StringList *);
 static void	cleanup(void);
 static void	store(DB *, DBT *, DBT *, int);
 static void	killproto(DBT *);
 static char    *getstring(const char *, size_t, char **, const char *);
 static size_t	getprotoindex(StringList *, const char *);
 static const char *getprotostr(StringList *, size_t);
 static const char *mkaliases(StringList *, char *, size_t);
 static void	usage(void);
 
 HASHINFO hinfo = {
 	.bsize = 256,
 	.ffactor = 4,
 	.nelem = 32768,
 	.cachesize = 1024,
 	.hash = NULL,
 	.lorder = 0
 };
 
 
 int
 main(int argc, char *argv[])
 {
 	DB	*db;
 	int	 ch;
 	const char *fname = _PATH_SERVICES;
 	const char *dbname = _PATH_SERVICES_DB;
 	int	 warndup = 1;
 	int	 unique = 0;
 	int	 otherflag = 0;
 	int	 byteorder = 0;
 	size_t	 cnt = 0;
 	StringList *sl, ***svc;
 	size_t port, proto;
 	char *dbname_dir, *dbname_dirbuf;
 	int dbname_dir_fd = -1;
 
 	setprogname(argv[0]);
 
 	while ((ch = getopt(argc, argv, "blo:qu")) != -1)
 		switch (ch) {
 		case 'b':
 		case 'l':
 			if (byteorder != 0)
 				usage();
 			byteorder = ch == 'b' ? 4321 : 1234;
 			break;
 		case 'q':
 			otherflag = 1;
 			warndup = 0;
 			break;
 		case 'o':
 			otherflag = 1;
 			dbname = optarg;
 			break;
 		case 'u':
 			unique++;
 			break;
 		case '?':
 		default:
 			usage();
 		}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc > 1 || (unique && otherflag))
 		usage();
 	if (argc == 1)
 		fname = argv[0];
 
 	/* Set byte order. */
 	hinfo.lorder = byteorder;
 
 	if (unique)
 		uniq(fname);
 
 	svc = parseservices(fname, sl = sl_init());
 
 	if (atexit(cleanup))
 		err(1, "Cannot install exit handler");
 
 	(void)snprintf(tname, sizeof(tname), "%s.tmp", dbname);
 	db = dbopen(tname, O_RDWR | O_CREAT | O_EXCL,
 	    (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH), DB_HASH, &hinfo);
 	if (!db)
 		err(1, "Error opening temporary database `%s'", tname);
 
 
 	for (port = 0; port < PMASK + 1; port++) {
 		if (svc[port] == NULL)
 			continue;
 
 		for (proto = 0; proto < PROTOMAX; proto++) {
 			StringList *s;
 			if ((s = svc[port][proto]) == NULL)
 				continue;
 			add(db, s, port, getprotostr(sl, proto), &cnt, warndup);
 		}
 
 		free(svc[port]);
 	}
 
 	free(svc);
 	sl_free(sl, 1);
 
 	if ((db->close)(db))
 		err(1, "Error closing temporary database `%s'", tname);
 
 	/*
 	 * Make sure file is safe on disk. To improve performance we will call
 	 * fsync() to the directory where file lies
 	 */
 	if (rename(tname, dbname) == -1 ||
 	    (dbname_dirbuf = strdup(dbname)) == NULL ||
 	    (dbname_dir = dirname(dbname_dirbuf)) == NULL ||
 	    (dbname_dir_fd = open(dbname_dir, O_RDONLY|O_DIRECTORY)) == -1 ||
 	    fsync(dbname_dir_fd) != 0) {
 		if (dbname_dir_fd != -1)
 			close(dbname_dir_fd);
 		err(1, "Cannot rename `%s' to `%s'", tname, dbname);
 	}
 
 	if (dbname_dir_fd != -1)
 		close(dbname_dir_fd);
 
 	return 0;
 }
 
 static void
 add(DB *db, StringList *sl, size_t port, const char *proto, size_t *cnt,
     int warndup)
 {
 	size_t i;
 	char	 keyb[BUFSIZ], datab[BUFSIZ], abuf[BUFSIZ];
 	DBT	 data, key;
 	key.data = keyb;
 	data.data = datab;
 
 #ifdef DEBUG
 	(void)printf("add %s %zu %s [ ", sl->sl_str[0], port, proto);
 	for (i = 1; i < sl->sl_cur; i++)
 	    (void)printf("%s ", sl->sl_str[i]);
 	(void)printf("]\n");
 #endif
 
 	/* key `indirect key', data `full line' */
 	data.size = snprintf(datab, sizeof(datab), "%zu", (*cnt)++) + 1;
 	key.size = snprintf(keyb, sizeof(keyb), "%s %zu/%s %s",
 	    sl->sl_str[0], port, proto, mkaliases(sl, abuf, sizeof(abuf))) + 1;
 	store(db, &data, &key, warndup);
 
 	/* key `\377port/proto', data = `indirect key' */
 	key.size = snprintf(keyb, sizeof(keyb), "\377%zu/%s",
 	    port, proto) + 1;
 	store(db, &key, &data, warndup);
 
 	/* key `\377port', data = `indirect key' */
 	killproto(&key);
 	store(db, &key, &data, warndup);
 
 	/* add references for service and all aliases */
 	for (i = 0; i < sl->sl_cur; i++) {
 		/* key `\376service/proto', data = `indirect key' */
 		key.size = snprintf(keyb, sizeof(keyb), "\376%s/%s",
 		    sl->sl_str[i], proto) + 1;
 		store(db, &key, &data, warndup);
 
 		/* key `\376service', data = `indirect key' */
 		killproto(&key);
 		store(db, &key, &data, warndup);
 	}
 	sl_free(sl, 1);
 }
 
 static StringList ***
 parseservices(const char *fname, StringList *sl)
 {
 	ssize_t len;
 	size_t linecap, line, pindex;
 	FILE *fp;
 	StringList ***svc, *s;
 	char *p, *ep;
 
 	if ((fp = fopen(fname, "r")) == NULL)
 		err(1, "Cannot open `%s'", fname);
 
 	line = linecap = 0;
 	if ((svc = calloc(PMASK + 1, sizeof(StringList **))) == NULL)
 		err(1, "Cannot allocate %zu bytes", (size_t)(PMASK + 1));
 
 	p = NULL;
 	while ((len = getline(&p, &linecap, fp)) != -1) {
 		char	*name, *port, *proto, *aliases, *cp, *alias;
 		unsigned long pnum;
 
 		line++;
 
 		if (len == 0)
 			continue;
 
 		if (p[len - 1] == '\n')
 			p[len - 1] = '\0';
 
 		for (cp = p; *cp && isspace((unsigned char)*cp); cp++)
 			continue;
 
 		if (*cp == '\0' || *cp == '#')
 			continue;
 
 		if ((name = getstring(fname, line, &cp, "name")) == NULL)
 			continue;
 
 		if ((port = getstring(fname, line, &cp, "port")) == NULL)
 			continue;
 
 		if (cp) {
 			for (aliases = cp; *cp && *cp != '#'; cp++)
 				continue;
 
 			if (*cp)
 				*cp = '\0';
 		} else
 			aliases = NULL;
 
 		proto = strchr(port, '/');
 		if (proto == NULL || proto[1] == '\0') {
 			warnx("%s, %zu: no protocol found", fname, line);
 			continue;
 		}
 		*proto++ = '\0';
 
 		errno = 0;
 		pnum = strtoul(port, &ep, 0);
 		if (*port == '\0' || *ep != '\0') {
 			warnx("%s, %zu: invalid port `%s'", fname, line, port);
 			continue;
 		}
 		if ((errno == ERANGE && pnum == ULONG_MAX) || pnum > PMASK) {
 			warnx("%s, %zu: port too big `%s'", fname, line, port);
 			continue;
 		}
 
 		if (svc[pnum] == NULL) {
 			svc[pnum] = calloc(PROTOMAX, sizeof(StringList *));
 			if (svc[pnum] == NULL)
 				err(1, "Cannot allocate %zu bytes",
 				    (size_t)PROTOMAX);
 		}
 
 		pindex = getprotoindex(sl, proto);
 		if (svc[pnum][pindex] == NULL)
 			s = svc[pnum][pindex] = sl_init();
 		else
 			s = svc[pnum][pindex];
 
 		/* build list of aliases */
 		if (sl_find(s, name) == NULL) {
 			char *p2;
 
 			if ((p2 = strdup(name)) == NULL)
 				err(1, "Cannot copy string");
 			(void)sl_add(s, p2);
 		}
 
 		if (aliases) {
 			while ((alias = strsep(&aliases, " \t")) != NULL) {
 				if (alias[0] == '\0')
 					continue;
 				if (sl_find(s, alias) == NULL) {
 					char *p2;
 
 					if ((p2 = strdup(alias)) == NULL)
 						err(1, "Cannot copy string");
 					(void)sl_add(s, p2);
 				}
 			}
 		}
 	}
 	(void)fclose(fp);
 	return svc;
 }
 
 /*
  * cleanup(): Remove temporary files upon exit
  */
 static void
 cleanup(void)
 {
 	if (tname[0])
 		(void)unlink(tname);
 }
 
 static char *
 getstring(const char *fname, size_t line, char **cp, const char *tag)
 {
 	char *str;
 
 	while ((str = strsep(cp, " \t")) != NULL && *str == '\0')
 		continue;
 
 	if (str == NULL)
 		warnx("%s, %zu: no %s found", fname, line, tag);
 
 	return str;
 }
 
 static void
 killproto(DBT *key)
 {
 	char *p, *d = key->data;
 
 	if ((p = strchr(d, '/')) == NULL)
 		abort();
 	*p++ = '\0';
 	key->size = p - d;
 }
 
 static void
 store(DB *db, DBT *key, DBT *data, int warndup)
 {
 #ifdef DEBUG
 	int k = key->size - 1;
 	int d = data->size - 1;
 	(void)printf("store [%*.*s] [%*.*s]\n",
 		k, k, (char *)key->data + 1,
 		d, d, (char *)data->data + 1);
 #endif
 	switch ((db->put)(db, key, data, R_NOOVERWRITE)) {
 	case 0:
 		break;
 	case 1:
 		if (warndup)
 			warnx("duplicate service `%s'",
 			    &((char *)key->data)[1]);
 		break;
 	case -1:
 		err(1, "put");
 		break;
 	default:
 		abort();
 		break;
 	}
 }
 
 static size_t
 getprotoindex(StringList *sl, const char *str)
 {
 	size_t i;
 	char *p;
 
 	for (i= 0; i < sl->sl_cur; i++)
 		if (strcmp(sl->sl_str[i], str) == 0)
 			return i;
 
 	if (i == PROTOMAX)
 		errx(1, "Ran out of protocols adding `%s';"
 		    " recompile with larger PROTOMAX", str);
 	if ((p = strdup(str)) == NULL)
 		err(1, "Cannot copy string");
 	(void)sl_add(sl, p);
 	return i;
 }
 
 static const char *
 getprotostr(StringList *sl, size_t i)
 {
 	assert(i < sl->sl_cur);
 	return sl->sl_str[i];
 }
 
 static const char *
 mkaliases(StringList *sl, char *buf, size_t len)
 {
 	size_t nc, i, pos;
 
 	buf[0] = 0;
 	for (i = 1, pos = 0; i < sl->sl_cur; i++) {
 		nc = strlcpy(buf + pos, sl->sl_str[i], len);
 		if (nc >= len)
 			goto out;
 		pos += nc;
 		len -= nc;
 		nc = strlcpy(buf + pos, " ", len);
 		if (nc >= len)
 			goto out;
 		pos += nc;
 		len -= nc;
 	}
 	return buf;
 out:
 	warn("aliases for `%s' truncated", sl->sl_str[0]);
 	return buf;
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr,
 	    "Usage:\t%s [-b | -l] [-q] [-o <db>] [<servicefile>]\n"
 	    "\t%s -u [<servicefile>]\n", getprogname(), getprogname());
 	exit(1);
 }
Index: user/alc/PQ_LAUNDRY/usr.sbin/uathload/uathload.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.sbin/uathload/uathload.c	(revision 303641)
+++ user/alc/PQ_LAUNDRY/usr.sbin/uathload/uathload.c	(revision 303642)
@@ -1,241 +1,241 @@
 /*-
  * Copyright (c) 2006 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  *
  * $FreeBSD$
  */
 
 /*
  * Atheros AR5523 USB Station Firmware downloader.
  *
  *    uathload -d ugen-device [firmware-file]
  *
  * Intended to be called from devd on device discovery.
  */
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/endian.h>
 #include <sys/mman.h>
 
 #include <sys/ioctl.h>
 #include <dev/usb/usb.h>
 #include <dev/usb/usb_ioctl.h>
 
 #include <err.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <paths.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #include <unistd.h>
 
 /* all fields are big endian */
 struct uath_fwmsg {
 	uint32_t	flags;
 #define UATH_WRITE_BLOCK	(1 << 4)
 
 	uint32_t	len;
 #define UATH_MAX_FWBLOCK_SIZE	2048
 
 	uint32_t	total;
 	uint32_t	remain;
 	uint32_t	rxtotal;
 	uint32_t	pad[123];
 } __packed;
 
 #define UATH_DATA_TIMEOUT	10000
 #define UATH_CMD_TIMEOUT	1000
 
 #define	VERBOSE(_fmt, ...) do {			\
 	if (verbose) {				\
 		printf(_fmt, __VA_ARGS__);	\
 		fflush(stdout);			\
 	}					\
 } while (0)
 
 extern	uint8_t _binary_ar5523_bin_start;
 extern	uint8_t _binary_ar5523_bin_end;
 
 static int
-getdevname(const char *devname, char *msgdev, char *datadev)
+getdevname(const char *udevname, char *msgdev, char *datadev)
 {
 	char *bn, *bnbuf, *dn, *dnbuf;
 
-	dnbuf = strdup(devname);
+	dnbuf = strdup(udevname);
 	if (dnbuf == NULL)
 		return (-1);
 	dn = dirname(dnbuf);
-	bnbuf = strdup(devname);
+	bnbuf = strdup(udevname);
 	if (bnbuf == NULL) {
 		free(dnbuf);
 		return (-1);
 	}
 	bn = basename(bnbuf);
 	if (strncmp(bn, "ugen", 4) != 0) {
 		free(dnbuf);
 		free(bnbuf);
 		return (-1);
 	}
 	bn += 4;
 
 	/* NB: pipes are hardcoded */
 	snprintf(msgdev, 256, "%s/usb/%s.1", dn, bn);
 	snprintf(datadev, 256, "%s/usb/%s.2", dn, bn);
 	free(dnbuf);
 	free(bnbuf);
 	return (0);
 }
 
 static void
 usage(void)
 {
 	errx(-1, "usage: uathload [-v] -d devname [firmware]");
 }
 
 int
 main(int argc, char *argv[])
 {
-	const char *fwname, *devname;
+	const char *fwname, *udevname;
 	char msgdev[256], datadev[256];
 	struct uath_fwmsg txmsg, rxmsg;
 	char *txdata;
 	struct stat sb;
 	int msg, data, fw, timeout, b, c;
 	int bufsize = 512, verbose = 0;
 	ssize_t len;
 
-	devname = NULL;
+	udevname = NULL;
 	while ((c = getopt(argc, argv, "d:v")) != -1) {
 		switch (c) {
 		case 'd':
-			devname = optarg;
+			udevname = optarg;
 			break;
 		case 'v':
 			verbose = 1;
 			break;
 		default:
 			usage();
 			/*NOTREACHED*/
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
-	if (devname == NULL)
+	if (udevname == NULL)
 		errx(-1, "No device name; use -d to specify the ugen device");
 	if (argc > 1)
 		usage();
 
 	if (argc == 1)
 		fwname = argv[0];
 	else
 		fwname = _PATH_FIRMWARE "/ar5523.bin";
 	fw = open(fwname, O_RDONLY, 0);
 	if (fw < 0)
 		err(-1, "open(%s)", fwname);
 	if (fstat(fw, &sb) < 0)
 		err(-1, "fstat(%s)", fwname);
 	txdata = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fw, 0);
 	if (txdata == MAP_FAILED)
 		err(-1, "mmap(%s)", fwname);
 	len = sb.st_size;
 	/* XXX verify device is an AR5005 part */
-	if (getdevname(devname, msgdev, datadev))
+	if (getdevname(udevname, msgdev, datadev))
 		err(-1, "getdevname error");
 
 	msg = open(msgdev, O_RDWR, 0);
 	if (msg < 0)
 		err(-1, "open(%s)", msgdev);
 	timeout = UATH_DATA_TIMEOUT;
 	if (ioctl(msg, USB_SET_RX_TIMEOUT, &timeout) < 0)
 		err(-1, "%s: USB_SET_RX_TIMEOUT(%u)", msgdev, UATH_DATA_TIMEOUT);
 	if (ioctl(msg, USB_SET_RX_BUFFER_SIZE, &bufsize) < 0)
 		err(-1, "%s: USB_SET_RX_BUFFER_SIZE(%u)", msgdev, bufsize);
 
 	data = open(datadev, O_WRONLY, 0);
 	if (data < 0)
 		err(-1, "open(%s)", datadev);
 	timeout = UATH_DATA_TIMEOUT;
 	if (ioctl(data, USB_SET_TX_TIMEOUT, &timeout) < 0)
 		err(-1, "%s: USB_SET_TX_TIMEOUT(%u)", datadev,
 		    UATH_DATA_TIMEOUT);
 
-	VERBOSE("Load firmware %s to %s\n", fwname, devname);
+	VERBOSE("Load firmware %s to %s\n", fwname, udevname);
 
 	bzero(&txmsg, sizeof (struct uath_fwmsg));
 	txmsg.flags = htobe32(UATH_WRITE_BLOCK);
 	txmsg.total = htobe32(len);
 
 	b = 0;
 	while (len > 0) {
 		int mlen;
 
 		mlen = len;
 		if (mlen > UATH_MAX_FWBLOCK_SIZE)
 			mlen = UATH_MAX_FWBLOCK_SIZE;
 		txmsg.remain = htobe32(len - mlen);
 		txmsg.len = htobe32(mlen);
 
 		/* send firmware block meta-data */
 		VERBOSE("send block %2u: %zd bytes remaining", b, len - mlen);
 		if (write(msg, &txmsg, sizeof(txmsg)) != sizeof(txmsg)) {
 			VERBOSE("%s", "\n");
 			err(-1, "error sending msg (%s)", msgdev);
 			break;
 		}
 
 		/* send firmware block data */
 		VERBOSE("%s", "\n             : data...");
 		if (write(data, txdata, mlen) != mlen) {
 			VERBOSE("%s", "\n");
 			err(-1, "error sending data (%s)", datadev);
 			break;
 		}
 
 		/* wait for ack from firmware */
 		VERBOSE("%s", "\n             : wait for ack...");
 		bzero(&rxmsg, sizeof(rxmsg));
 		if (read(msg, &rxmsg, sizeof(rxmsg)) != sizeof(rxmsg)) {
 			VERBOSE("%s", "\n");
 			err(-1, "error reading msg (%s)", msgdev);
 			break;
 		}
 
 		VERBOSE("flags=0x%x total=%d\n",
 		    be32toh(rxmsg.flags), be32toh(rxmsg.rxtotal));
 		len -= mlen;
 		txdata += mlen;
 		b++;
 	}
 	sleep(1);
 	close(fw);
 	close(msg);
 	close(data);
 	return 0;
 }
Index: user/alc/PQ_LAUNDRY
===================================================================
--- user/alc/PQ_LAUNDRY	(revision 303641)
+++ user/alc/PQ_LAUNDRY	(revision 303642)

Property changes on: user/alc/PQ_LAUNDRY
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r303517-303641