Index: user/ngie/more-tests/Makefile
===================================================================
--- user/ngie/more-tests/Makefile	(revision 281675)
+++ user/ngie/more-tests/Makefile	(revision 281676)
@@ -1,514 +1,525 @@
 #
 # $FreeBSD$
 #
 # The user-driven targets are:
 #
 # universe            - *Really* build *everything* (buildworld and
 #                       all kernels on all architectures).
 # tinderbox           - Same as universe, but presents a list of failed build
 #                       targets and exits with an error if there were any.
 # buildworld          - Rebuild *everything*, including glue to help do
 #                       upgrades.
 # installworld        - Install everything built by "buildworld".
 # world               - buildworld + installworld, no kernel.
 # buildkernel         - Rebuild the kernel and the kernel-modules.
 # installkernel       - Install the kernel and the kernel-modules.
 # installkernel.debug
 # reinstallkernel     - Reinstall the kernel and the kernel-modules.
 # reinstallkernel.debug
 # kernel              - buildkernel + installkernel.
 # kernel-toolchain    - Builds the subset of world necessary to build a kernel
 # kernel-toolchains   - Build kernel-toolchain for all universe targets.
 # doxygen             - Build API documentation of the kernel, needs doxygen.
 # update              - Convenient way to update your source tree(s).
 # check-old           - List obsolete directories/files/libraries.
 # check-old-dirs      - List obsolete directories.
 # check-old-files     - List obsolete files.
 # check-old-libs      - List obsolete libraries.
 # delete-old          - Delete obsolete directories/files.
 # delete-old-dirs     - Delete obsolete directories.
 # delete-old-files    - Delete obsolete files.
 # delete-old-libs     - Delete obsolete libraries.
 # targets             - Print a list of supported TARGET/TARGET_ARCH pairs
 #                       for world and kernel targets.
 # toolchains          - Build a toolchain for all world and kernel targets.
 # xdev                - xdev-build + xdev-install for the architecture
 #                       specified with XDEV and XDEV_ARCH.
 # xdev-build          - Build cross-development tools.
 # xdev-install        - Install cross-development tools.
 # xdev-links          - Create traditional links in /usr/bin for cc, etc
 # native-xtools       - Create host binaries that produce target objects
 #                       for use in qemu user-mode jails.
 # 
 # "quick" way to test all kernel builds:
 # 	_jflag=`sysctl -n hw.ncpu`
 # 	_jflag=$(($_jflag * 2))
 # 	[ $_jflag -gt 12 ] && _jflag=12
 # 	make universe -DMAKE_JUST_KERNELS JFLAG=-j${_jflag}
 #
 # This makefile is simple by design. The FreeBSD make automatically reads
 # the /usr/share/mk/sys.mk unless the -m argument is specified on the
 # command line. By keeping this makefile simple, it doesn't matter too
 # much how different the installed mk files are from those in the source
 # tree. This makefile executes a child make process, forcing it to use
 # the mk files from the source tree which are supposed to DTRT.
 #
 # Most of the user-driven targets (as listed above) are implemented in
 # Makefile.inc1.  The exceptions are universe, tinderbox and targets.
 #
 # If you want to build your system from source be sure that /usr/obj has
 # at least 1GB of diskspace available.  A complete 'universe' build requires
 # about 15GB of space.
 #
 # For individuals wanting to build from the sources currently on their
 # system, the simple instructions are:
 #
 # 1.  `cd /usr/src'  (or to the directory containing your source tree).
 # 2.  Define `HISTORICAL_MAKE_WORLD' variable (see README).
 # 3.  `make world'
 #
 # For individuals wanting to upgrade their sources (even if only a
 # delta of a few days):
 #
 #  1.  `cd /usr/src'       (or to the directory containing your source tree).
 #  2.  `make buildworld'
 #  3.  `make buildkernel KERNCONF=YOUR_KERNEL_HERE'     (default is GENERIC).
 #  4.  `make installkernel KERNCONF=YOUR_KERNEL_HERE'   (default is GENERIC).
 #       [steps 3. & 4. can be combined by using the "kernel" target]
 #  5.  `reboot'        (in single user mode: boot -s from the loader prompt).
 #  6.  `mergemaster -p'
 #  7.  `make installworld'
 #  8.  `mergemaster'		(you may wish to use -i, along with -U or -F).
 #  9.  `make delete-old'
 # 10.  `reboot'
 # 11.  `make delete-old-libs' (in case no 3rd party program uses them anymore)
 #
 # See src/UPDATING `COMMON ITEMS' for more complete information.
 #
 # If TARGET=machine (e.g. powerpc, sparc64, ...) is specified you can
 # cross build world for other machine types using the buildworld target,
 # and once the world is built you can cross build a kernel using the
 # buildkernel target.
 #
 # Define the user-driven targets. These are listed here in alphabetical
 # order, but that's not important.
 #
 # Targets that begin with underscore are internal targets intended for
 # developer convenience only.  They are intentionally not documented and
 # completely subject to change without notice.
 #
 # For more information, see the build(7) manual page.
 #
 TGTS=	all all-man buildenv buildenvvars buildkernel buildworld \
 	check-old check-old-dirs check-old-files check-old-libs \
 	checkdpadd clean cleandepend cleandir \
 	delete-old delete-old-dirs delete-old-files delete-old-libs \
 	depend distribute distributekernel distributekernel.debug \
 	distributeworld distrib-dirs distribution doxygen \
 	everything hier hierarchy install installcheck installkernel \
 	installkernel.debug packagekernel packageworld \
 	reinstallkernel reinstallkernel.debug \
 	installworld kernel-toolchain libraries lint maninstall \
 	obj objlink regress rerelease showconfig tags toolchain update \
 	_worldtmp _legacy _bootstrap-tools _cleanobj _obj \
 	_build-tools _cross-tools _includes _libraries _depend \
 	build32 builddtb distribute32 install32 xdev xdev-build xdev-install \
 	xdev-links native-xtools \
 
 TGTS+=	${SUBDIR_TARGETS}
 
 BITGTS=	files includes
 BITGTS:=${BITGTS} ${BITGTS:S/^/build/} ${BITGTS:S/^/install/}
 TGTS+=	${BITGTS}
 
 .ORDER: buildworld installworld
 .ORDER: buildworld distributeworld
 .ORDER: buildworld buildkernel
 .ORDER: buildkernel installkernel
 .ORDER: buildkernel installkernel.debug
 .ORDER: buildkernel reinstallkernel
 .ORDER: buildkernel reinstallkernel.debug
 
 PATH=	/sbin:/bin:/usr/sbin:/usr/bin
 MAKEOBJDIRPREFIX?=	/usr/obj
 _MAKEOBJDIRPREFIX!= /usr/bin/env -i PATH=${PATH} ${MAKE} \
     ${.MAKEFLAGS:MMAKEOBJDIRPREFIX=*} __MAKE_CONF=${__MAKE_CONF} \
     -f /dev/null -V MAKEOBJDIRPREFIX dummy
 .if !empty(_MAKEOBJDIRPREFIX)
 .error MAKEOBJDIRPREFIX can only be set in environment, not as a global\
 	(in make.conf(5)) or command-line variable.
 .endif
 
 # We often need to use the tree's version of make to build it.
 # Choices add to complexity though.
 # We cannot blindly use a make which may not be the one we want
 # so be exlicit - until all choice is removed.
 WANT_MAKE=	bmake
 MYMAKE=		${MAKEOBJDIRPREFIX}${.CURDIR}/make.${MACHINE}/${WANT_MAKE}
 .if defined(.PARSEDIR)
 HAVE_MAKE=	bmake
 .else
 HAVE_MAKE=	fmake
 .endif
 .if exists(${MYMAKE})
 SUB_MAKE:= ${MYMAKE} -m ${.CURDIR}/share/mk
 .elif ${WANT_MAKE} != ${HAVE_MAKE}
 # It may not exist yet but we may cause it to.
 # In the case of fmake, upgrade_checks may cause a newer version to be built.
 SUB_MAKE= `test -x ${MYMAKE} && echo ${MYMAKE} || echo ${MAKE}` \
 	-m ${.CURDIR}/share/mk
 .else
 SUB_MAKE= ${MAKE} -m ${.CURDIR}/share/mk
 .endif
 
 _MAKE=	PATH=${PATH} ${SUB_MAKE} -f Makefile.inc1 TARGET=${_TARGET} TARGET_ARCH=${_TARGET_ARCH}
 
 # Guess machine architecture from machine type, and vice versa.
 .if !defined(TARGET_ARCH) && defined(TARGET)
 _TARGET_ARCH=	${TARGET:S/pc98/i386/:S/arm64/aarch64/}
 .elif !defined(TARGET) && defined(TARGET_ARCH) && \
     ${TARGET_ARCH} != ${MACHINE_ARCH}
 _TARGET=		${TARGET_ARCH:C/mips(n32|64)?(el)?/mips/:C/arm(v6)?(eb|hf)?/arm/:C/aarch64/arm64/:C/powerpc64/powerpc/}
 .endif
 .if defined(TARGET) && !defined(_TARGET)
 _TARGET=${TARGET}
 .endif
 .if defined(TARGET_ARCH) && !defined(_TARGET_ARCH)
 _TARGET_ARCH=${TARGET_ARCH}
 .endif
 # for historical compatibility for xdev targets
 .if defined(XDEV)
 _TARGET=	${XDEV}
 .endif
 .if defined(XDEV_ARCH)
 _TARGET_ARCH=	${XDEV_ARCH}
 .endif
 # Otherwise, default to current machine type and architecture.
 _TARGET?=	${MACHINE}
 _TARGET_ARCH?=	${MACHINE_ARCH}
 
 #
 # Make sure we have an up-to-date make(1). Only world and buildworld
 # should do this as those are the initial targets used for upgrades.
 # The user can define ALWAYS_CHECK_MAKE to have this check performed
 # for all targets.
 #
 .if defined(ALWAYS_CHECK_MAKE)
 ${TGTS}: upgrade_checks
 .else
 buildworld: upgrade_checks
 .endif
 
 #
 # This 'cleanworld' target is not included in TGTS, because it is not a
 # recursive target.  All of the work for it is done right here.   It is
 # expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be
 # created by bsd.obj.mk, except that we don't want to .include that file
 # in this makefile.  
 #
 # In the following, the first 'rm' in a series will usually remove all
 # files and directories.  If it does not, then there are probably some
 # files with file flags set, so this unsets them and tries the 'rm' a
 # second time.  There are situations where this target will be cleaning
 # some directories via more than one method, but that duplication is
 # needed to correctly handle all the possible situations.  Removing all
 # files without file flags set in the first 'rm' instance saves time,
 # because 'chflags' will need to operate on fewer files afterwards.
 #
 BW_CANONICALOBJDIR:=${MAKEOBJDIRPREFIX}${.CURDIR}
 cleanworld:
 .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR}
 .if exists(${BW_CANONICALOBJDIR}/)
 	-rm -rf ${BW_CANONICALOBJDIR}/*
 	-chflags -R 0 ${BW_CANONICALOBJDIR}
 	rm -rf ${BW_CANONICALOBJDIR}/*
 .endif
 	#   To be safe in this case, fall back to a 'make cleandir'
 	${_+_}@cd ${.CURDIR}; ${_MAKE} cleandir
 .else
 	-rm -rf ${.OBJDIR}/*
 	-chflags -R 0 ${.OBJDIR}
 	rm -rf ${.OBJDIR}/*
 .endif
 
 #
 # Handle the user-driven targets, using the source relative mk files.
 #
 
 .if empty(.MAKEFLAGS:M-n)
 # skip this for -n to avoid changing previous behavior of 
 # 'make -n buildworld' etc.
 ${TGTS}: .MAKE
 tinderbox toolchains kernel-toolchains: .MAKE
 .endif
 
 ${TGTS}:
 	${_+_}@cd ${.CURDIR}; ${_MAKE} ${.TARGET}
 
 # The historic default "all" target creates files which may cause stale
 # or (in the cross build case) unlinkable results. Fail with an error
 # when no target is given. The users can explicitly specify "all"
 # if they want the historic behavior.
 .MAIN:	_guard
 
 _guard:
 	@echo
 	@echo "Explicit target required (use \"all\" for historic behavior)"
 	@echo
 	@false
 
 STARTTIME!= LC_ALL=C date
 CHECK_TIME!= find ${.CURDIR}/sys/sys/param.h -mtime -0s ; echo
 .if !empty(CHECK_TIME)
 .error check your date/time: ${STARTTIME}
 .endif
 
 .if defined(HISTORICAL_MAKE_WORLD) || defined(DESTDIR)
 #
 # world
 #
 # Attempt to rebuild and reinstall everything. This target is not to be
 # used for upgrading an existing FreeBSD system, because the kernel is
 # not included. One can argue that this target doesn't build everything
 # then.
 #
 world: upgrade_checks
 	@echo "--------------------------------------------------------------"
 	@echo ">>> make world started on ${STARTTIME}"
 	@echo "--------------------------------------------------------------"
 .if target(pre-world)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Making 'pre-world' target"
 	@echo "--------------------------------------------------------------"
 	${_+_}@cd ${.CURDIR}; ${_MAKE} pre-world
 .endif
 	${_+_}@cd ${.CURDIR}; ${_MAKE} buildworld
 	${_+_}@cd ${.CURDIR}; ${_MAKE} -B installworld
 .if target(post-world)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Making 'post-world' target"
 	@echo "--------------------------------------------------------------"
 	${_+_}@cd ${.CURDIR}; ${_MAKE} post-world
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> make world completed on `LC_ALL=C date`"
 	@echo "                   (started ${STARTTIME})"
 	@echo "--------------------------------------------------------------"
 .else
 world:
 	@echo "WARNING: make world will overwrite your existing FreeBSD"
 	@echo "installation without also building and installing a new"
 	@echo "kernel.  This can be dangerous.  Please read the handbook,"
 	@echo "'Rebuilding world', for how to upgrade your system."
 	@echo "Define DESTDIR to where you want to install FreeBSD,"
 	@echo "including /, to override this warning and proceed as usual."
 	@echo ""
 	@echo "Bailing out now..."
 	@false
 .endif
 
 #
 # kernel
 #
 # Short hand for `make buildkernel installkernel'
 #
 kernel: buildkernel installkernel
 
 #
 # Perform a few tests to determine if the installed tools are adequate
 # for building the world.
 #
 # Note: if we ever need to care about the version of bmake, simply testing
 # MAKE_VERSION against a required version should suffice.
 #
 upgrade_checks:
 .if ${HAVE_MAKE} != ${WANT_MAKE}
 	@(cd ${.CURDIR} && ${MAKE} ${WANT_MAKE:S,^f,,})
 .endif
 
 #
 # Upgrade make(1) to the current version using the installed
 # headers, libraries and tools.  Also, allow the location of
 # the system bsdmake-like utility to be overridden.
 #
 MMAKEENV=	MAKEOBJDIRPREFIX=${MYMAKE:H} \
 		DESTDIR= \
 		INSTALL="sh ${.CURDIR}/tools/install.sh"
 MMAKE=		${MMAKEENV} ${MAKE} \
 		-DNO_MAN -DNO_SHARED \
 		-DNO_CPU_CFLAGS -DNO_WERROR \
 		MK_TESTS=no \
 		DESTDIR= PROGNAME=${MYMAKE:T}
 
 bmake: .PHONY
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Building an up-to-date ${.TARGET}(1)"
 	@echo "--------------------------------------------------------------"
 	${_+_}@cd ${.CURDIR}/usr.bin/${.TARGET}; \
 		${MMAKE} obj && \
 		${MMAKE} depend && \
 		${MMAKE} all && \
 		${MMAKE} install DESTDIR=${MYMAKE:H} BINDIR=
 
 tinderbox toolchains kernel-toolchains: upgrade_checks
 
 tinderbox:
 	@cd ${.CURDIR} && ${SUB_MAKE} DOING_TINDERBOX=YES universe
 
 toolchains:
 	@cd ${.CURDIR} && ${SUB_MAKE} UNIVERSE_TARGET=toolchain universe
 
 kernel-toolchains:
 	@cd ${.CURDIR} && ${SUB_MAKE} UNIVERSE_TARGET=kernel-toolchain universe
 
 #
 # universe
 #
 # Attempt to rebuild *everything* for all supported architectures,
 # with a reasonable chance of success, regardless of how old your
 # existing system is.
 #
 .if make(universe) || make(universe_kernels) || make(tinderbox) || make(targets)
 TARGETS?=amd64 arm i386 mips pc98 powerpc sparc64
+# XXX Add arm64 to universe only if we have an external binutils installed.
+# It does not build with the in-tree linnker.
+.if exists(/usr/local/aarch64-freebsd/bin/ld)
+TARGETS+=arm64
+TARGET_ARCHES_arm64?=	aarch64
+.else
+universe: universe_arm64_skip
+universe_epilogue: universe_arm64_skip
+universe_arm64_skip: universe_prologue
+	@echo ">> arm64 skipped - install aarch64-binutils port or package to build"
+.endif
 TARGET_ARCHES_arm?=	arm armeb armv6 armv6hf
 TARGET_ARCHES_mips?=	mipsel mips mips64el mips64 mipsn32
 TARGET_ARCHES_powerpc?=	powerpc powerpc64
 TARGET_ARCHES_pc98?=	i386
 .for target in ${TARGETS}
 TARGET_ARCHES_${target}?= ${target}
 .endfor
 
 .if defined(UNIVERSE_TARGET)
 MAKE_JUST_WORLDS=	YES
 .else
 UNIVERSE_TARGET?=	buildworld
 .endif
 KERNSRCDIR?=		${.CURDIR}/sys
 
 targets:
 	@echo "Supported TARGET/TARGET_ARCH pairs for world and kernel targets"
 .for target in ${TARGETS}
 .for target_arch in ${TARGET_ARCHES_${target}}
 	@echo "    ${target}/${target_arch}"
 .endfor
 .endfor
 
 .if defined(DOING_TINDERBOX)
 FAILFILE=${.CURDIR}/_.tinderbox.failed
 MAKEFAIL=tee -a ${FAILFILE}
 .else
 MAKEFAIL=cat
 .endif
 
 universe_prologue:  upgrade_checks
 universe: universe_prologue
 universe_prologue:
 	@echo "--------------------------------------------------------------"
 	@echo ">>> make universe started on ${STARTTIME}"
 	@echo "--------------------------------------------------------------"
 .if defined(DOING_TINDERBOX)
 	@rm -f ${FAILFILE}
 .endif
 .for target in ${TARGETS}
 universe: universe_${target}
 universe_epilogue: universe_${target}
 universe_${target}: universe_${target}_prologue
 universe_${target}_prologue: universe_prologue
 	@echo ">> ${target} started on `LC_ALL=C date`"
 .if !defined(MAKE_JUST_KERNELS)
 .for target_arch in ${TARGET_ARCHES_${target}}
 universe_${target}: universe_${target}_${target_arch}
 universe_${target}_${target_arch}: universe_${target}_prologue .MAKE
 	@echo ">> ${target}.${target_arch} ${UNIVERSE_TARGET} started on `LC_ALL=C date`"
 	@(cd ${.CURDIR} && env __MAKE_CONF=/dev/null \
 	    ${SUB_MAKE} ${JFLAG} ${UNIVERSE_TARGET} \
 	    TARGET=${target} \
 	    TARGET_ARCH=${target_arch} \
 	    > _.${target}.${target_arch}.${UNIVERSE_TARGET} 2>&1 || \
 	    (echo "${target}.${target_arch} ${UNIVERSE_TARGET} failed," \
 	    "check _.${target}.${target_arch}.${UNIVERSE_TARGET} for details" | \
 	    ${MAKEFAIL}))
 	@echo ">> ${target}.${target_arch} ${UNIVERSE_TARGET} completed on `LC_ALL=C date`"
 .endfor
 .endif
 .if !defined(MAKE_JUST_WORLDS)
 # If we are building world and kernels wait for the required worlds to finish
 .if !defined(MAKE_JUST_KERNELS)
 .for target_arch in ${TARGET_ARCHES_${target}}
 universe_${target}_kernels: universe_${target}_${target_arch}
 .endfor
 .endif
 universe_${target}: universe_${target}_kernels
 universe_${target}_kernels: universe_${target}_prologue .MAKE
 .if exists(${KERNSRCDIR}/${target}/conf/NOTES)
 	@(cd ${KERNSRCDIR}/${target}/conf && env __MAKE_CONF=/dev/null \
 	    ${SUB_MAKE} LINT > ${.CURDIR}/_.${target}.makeLINT 2>&1 || \
 	    (echo "${target} 'make LINT' failed," \
 	    "check _.${target}.makeLINT for details"| ${MAKEFAIL}))
 .endif
 	@cd ${.CURDIR} && ${SUB_MAKE} ${.MAKEFLAGS} TARGET=${target} \
 	    universe_kernels
 .endif
 	@echo ">> ${target} completed on `LC_ALL=C date`"
 .endfor
 universe_kernels: universe_kernconfs
 .if !defined(TARGET)
 TARGET!=	uname -m
 .endif
 .if defined(MAKE_ALL_KERNELS)
 _THINNER=cat
 .else
 _THINNER=xargs grep -L "^.NO_UNIVERSE" || true
 .endif
 KERNCONFS!=	cd ${KERNSRCDIR}/${TARGET}/conf && \
 		find [A-Z0-9]*[A-Z0-9] -type f -maxdepth 0 \
 		! -name DEFAULTS ! -name NOTES | \
 		${_THINNER}
 universe_kernconfs:
 .for kernel in ${KERNCONFS}
 TARGET_ARCH_${kernel}!=	cd ${KERNSRCDIR}/${TARGET}/conf && \
 	config -m ${KERNSRCDIR}/${TARGET}/conf/${kernel} 2> /dev/null | \
 	grep -v WARNING: | cut -f 2
 .if empty(TARGET_ARCH_${kernel})
 .error "Target architecture for ${TARGET}/conf/${kernel} unknown.  config(8) likely too old."
 .endif
 universe_kernconfs: universe_kernconf_${TARGET}_${kernel}
 universe_kernconf_${TARGET}_${kernel}: .MAKE
 	@(cd ${.CURDIR} && env __MAKE_CONF=/dev/null \
 	    ${SUB_MAKE} ${JFLAG} buildkernel \
 	    TARGET=${TARGET} \
 	    TARGET_ARCH=${TARGET_ARCH_${kernel}} \
 	    KERNCONF=${kernel} \
 	    > _.${TARGET}.${kernel} 2>&1 || \
 	    (echo "${TARGET} ${kernel} kernel failed," \
 	    "check _.${TARGET}.${kernel} for details"| ${MAKEFAIL}))
 .endfor
 universe: universe_epilogue
 universe_epilogue:
 	@echo "--------------------------------------------------------------"
 	@echo ">>> make universe completed on `LC_ALL=C date`"
 	@echo "                      (started ${STARTTIME})"
 	@echo "--------------------------------------------------------------"
 .if defined(DOING_TINDERBOX)
 	@if [ -e ${FAILFILE} ] ; then \
 		echo "Tinderbox failed:" ;\
 		cat ${FAILFILE} ;\
 		exit 1 ;\
 	fi
 .endif
 .endif
 
 buildLINT:
 	${MAKE} -C ${.CURDIR}/sys/${_TARGET}/conf LINT
 
 .if defined(.PARSEDIR)
 .if make(universe)
 # we do not want a failure of one branch abort all.
 MAKE_JOB_ERROR_TOKEN= no
 .export MAKE_JOB_ERROR_TOKEN
 .endif
 .endif
Index: user/ngie/more-tests/Makefile.inc1
===================================================================
--- user/ngie/more-tests/Makefile.inc1	(revision 281675)
+++ user/ngie/more-tests/Makefile.inc1	(revision 281676)
@@ -1,2206 +1,2202 @@
 #
 # $FreeBSD$
 #
 # Make command line options:
 #	-DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir
 #	-DNO_CLEAN do not clean at all
 #	-DDB_FROM_SRC use the user/group databases in src/etc instead of
 #	    the system database when installing.
 #	-DNO_SHARE do not go into share subdir
 #	-DKERNFAST define NO_KERNEL{CONFIG,CLEAN,DEPEND,OBJ}
 #	-DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel
 #	-DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel
 #	-DNO_KERNELDEPEND do not run ${MAKE} depend in ${MAKE} buildkernel
 #	-DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel
 #	-DNO_PORTSUPDATE do not update ports in ${MAKE} update
 #	-DNO_ROOT install without using root privilege
 #	-DNO_DOCUPDATE do not update doc in ${MAKE} update
 #	-DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects
 #	LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list
 #	LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list
 #	LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target
 #	LOCAL_MTREE="list of mtree files" to process to allow local directories
 #	    to be created before files are installed
 #	LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools
 #	    list
 #	METALOG="path to metadata log" to write permission and ownership
 #	    when NO_ROOT is set.  (default: ${DESTDIR}/METALOG)
 #	TARGET="machine" to crossbuild world for a different machine type
 #	TARGET_ARCH= may be required when a TARGET supports multiple endians
 #	BUILDENV_SHELL= shell to launch for the buildenv target (def:/bin/sh)
 #	WORLD_FLAGS= additional flags to pass to make(1) during buildworld
 #	KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel
 
 #
 # The intended user-driven targets are:
 # buildworld  - rebuild *everything*, including glue to help do upgrades
 # installworld- install everything built by "buildworld"
 # doxygen     - build API documentation of the kernel
 # update      - convenient way to update your source tree (eg: svn/svnup)
 #
 # Standard targets (not defined here) are documented in the makefiles in
 # /usr/share/mk.  These include:
 #		obj depend all install clean cleandepend cleanobj
 
 .if !defined(TARGET) || !defined(TARGET_ARCH)
 .error "Both TARGET and TARGET_ARCH must be defined."
 .endif
 
 .include "share/mk/src.opts.mk"
 .include <bsd.arch.inc.mk>
 .include <bsd.compiler.mk>
 
 # We must do lib/ and libexec/ before bin/, because if installworld
 # installs a new /bin/sh, the 'make' command will *immediately*
 # use that new version.  And the new (dynamically-linked) /bin/sh
 # will expect to find appropriate libraries in /lib and /libexec.
 #
 SRCDIR?=	${.CURDIR}
 .if defined(SUBDIR_OVERRIDE)
 SUBDIR=	${SUBDIR_OVERRIDE}
 .else
 SUBDIR=	lib libexec
 SUBDIR+=bin
 .if ${MK_GAMES} != "no"
 SUBDIR+=games
 .endif
 .if ${MK_CDDL} != "no"
 SUBDIR+=cddl
 .endif
 SUBDIR+=gnu include
 .if ${MK_KERBEROS} != "no"
 SUBDIR+=kerberos5
 .endif
 .if ${MK_RESCUE} != "no"
 SUBDIR+=rescue
 .endif
 SUBDIR+=sbin
 .if ${MK_CRYPT} != "no"
 SUBDIR+=secure
 .endif
 .if !defined(NO_SHARE)
 SUBDIR+=share
 .endif
 SUBDIR+=sys usr.bin usr.sbin
 .if ${MK_TESTS} != "no"
 SUBDIR+=	tests
 .endif
 .if ${MK_OFED} != "no"
 SUBDIR+=contrib/ofed
 .endif
 #
 # We must do etc/ last for install/distribute to work.
 #
 SUBDIR+=etc
 
 # Local directories are last, since it is nice to at least get the base
 # system rebuilt before you do them.
 .for _DIR in ${LOCAL_DIRS}
 .if exists(${.CURDIR}/${_DIR}/Makefile)
 SUBDIR+=	${_DIR}
 .endif
 .endfor
 # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR
 # of a LOCAL_DIRS directory.  This allows LOCAL_DIRS=foo and
 # LOCAL_LIB_DIRS=foo/lib to behave as expected.
 .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|}
 _REDUNDENT_LIB_DIRS+=    ${LOCAL_LIB_DIRS:M${_DIR}*}
 .endfor
 .for _DIR in ${LOCAL_LIB_DIRS}
 .if empty(_REDUNDENT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile)
 SUBDIR+=	${_DIR}
 .else
 .warning ${_DIR} not added to SUBDIR list.  See UPDATING 20141121.
 .endif
 .endfor
 .endif
 
 .if defined(NOCLEAN)
 NO_CLEAN=	${NOCLEAN}
 .endif
 .if defined(NO_CLEANDIR)
 CLEANDIR=	clean cleandepend
 .else
 CLEANDIR=	cleandir
 .endif
 
 LOCAL_TOOL_DIRS?=
 
 BUILDENV_SHELL?=/bin/sh
 
 SVN?=		/usr/local/bin/svn
 SVNFLAGS?=	-r HEAD
 
 MAKEOBJDIRPREFIX?=	/usr/obj
 .if !defined(OSRELDATE)
 .if exists(/usr/include/osreldate.h)
 OSRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		/usr/include/osreldate.h
 .else
 OSRELDATE=	0
 .endif
 .endif
 
 .if !defined(VERSION)
 REVISION!=	${MAKE} -C ${SRCDIR}/release -V REVISION
 BRANCH!=	${MAKE} -C ${SRCDIR}/release -V BRANCH
 SRCRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		${SRCDIR}/sys/sys/param.h
 VERSION=	FreeBSD ${REVISION}-${BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE}
 .endif
 
 KNOWN_ARCHES?=	aarch64/arm64 amd64 arm armeb/arm armv6/arm armv6hf/arm i386 i386/pc98 mips mipsel/mips mips64el/mips mips64/mips mipsn32el/mips mipsn32/mips powerpc powerpc64/powerpc sparc64
 .if ${TARGET} == ${TARGET_ARCH}
 _t=		${TARGET}
 .else
 _t=		${TARGET_ARCH}/${TARGET}
 .endif
 .for _t in ${_t}
 .if empty(KNOWN_ARCHES:M${_t})
 .error Unknown target ${TARGET_ARCH}:${TARGET}.
 .endif
 .endfor
 
 .if ${TARGET} == ${MACHINE}
 TARGET_CPUTYPE?=${CPUTYPE}
 .else
 TARGET_CPUTYPE?=
 .endif
 
 .if !empty(TARGET_CPUTYPE)
 _TARGET_CPUTYPE=${TARGET_CPUTYPE}
 .else
 _TARGET_CPUTYPE=dummy
 .endif
 _CPUTYPE!=	MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} \
 		-f /dev/null -m ${.CURDIR}/share/mk -V CPUTYPE
 .if ${_CPUTYPE} != ${_TARGET_CPUTYPE}
 .error CPUTYPE global should be set with ?=.
 .endif
 .if make(buildworld)
 BUILD_ARCH!=	uname -p
 .if ${MACHINE_ARCH} != ${BUILD_ARCH}
 .error To cross-build, set TARGET_ARCH.
 .endif
 .endif
 .if ${MACHINE} == ${TARGET} && ${MACHINE_ARCH} == ${TARGET_ARCH} && !defined(CROSS_BUILD_TESTING)
 OBJTREE=	${MAKEOBJDIRPREFIX}
 .else
 OBJTREE=	${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH}
 .endif
 WORLDTMP=	${OBJTREE}${.CURDIR}/tmp
 BPATH=		${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin
 XPATH=		${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin
 STRICTTMPPATH=	${BPATH}:${XPATH}
 TMPPATH=	${STRICTTMPPATH}:${PATH}
 
 #
 # Avoid running mktemp(1) unless actually needed.
 # It may not be functional, e.g., due to new ABI
 # when in the middle of installing over this system.
 #
 .if make(distributeworld) || make(installworld)
 INSTALLTMP!=	/usr/bin/mktemp -d -u -t install
 .endif
 
 #
 # Building a world goes through the following stages
 #
 # 1. legacy stage [BMAKE]
 #	This stage is responsible for creating compatibility
 #	shims that are needed by the bootstrap-tools,
 #	build-tools and cross-tools stages. These are generally
 #	APIs that tools from one of those three stages need to
 #	build that aren't present on the host.
 # 1. bootstrap-tools stage [BMAKE]
 #	This stage is responsible for creating programs that
 #	are needed for backward compatibility reasons. They
 #	are not built as cross-tools.
 # 2. build-tools stage [TMAKE]
 #	This stage is responsible for creating the object
 #	tree and building any tools that are needed during
 #	the build process.
 # 3. cross-tools stage [XMAKE]
 #	This stage is responsible for creating any tools that
 #	are needed for building the system. A cross-compiler is one
 #	of them.
 # 4. world stage [WMAKE]
 #	This stage actually builds the world.
 # 5. install stage (optional) [IMAKE]
 #	This stage installs a previously built world.
 #
 
 BOOTSTRAPPING?=	0
 
 # Common environment for world related stages
 CROSSENV=	MAKEOBJDIRPREFIX=${OBJTREE} \
 		MACHINE_ARCH=${TARGET_ARCH} \
 		MACHINE=${TARGET} \
 		CPUTYPE=${TARGET_CPUTYPE}
 .if ${MK_GROFF} != "no"
 CROSSENV+=	GROFF_BIN_PATH=${WORLDTMP}/legacy/usr/bin \
 		GROFF_FONT_PATH=${WORLDTMP}/legacy/usr/share/groff_font \
 		GROFF_TMAC_PATH=${WORLDTMP}/legacy/usr/share/tmac
 .endif
 .if defined(TARGET_CFLAGS)
 CROSSENV+=	${TARGET_CFLAGS}
 .endif
 
 # bootstrap-tools stage
 BMAKEENV=	INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${BPATH}:${PATH} \
 		WORLDTMP=${WORLDTMP} \
 		VERSION="${VERSION}" \
 		MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}"
 BMAKE=		MAKEOBJDIRPREFIX=${WORLDTMP} \
 		${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		MK_HTML=no NO_LINT=yes MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_TESTS=no \
 		MK_INCLUDES=yes 
 
 # build-tools stage
 TMAKE=		MAKEOBJDIRPREFIX=${OBJTREE} \
 		${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 		DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		-DNO_LINT \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_TESTS=no
 
 # cross-tools stage
 XMAKE=		TOOLS_PREFIX=${WORLDTMP} ${BMAKE} \
 		TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 		MK_GDB=no MK_TESTS=no
 
 # kernel-tools stage
 KTMAKEENV=	INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${BPATH}:${PATH} \
 		WORLDTMP=${WORLDTMP} \
 		VERSION="${VERSION}"
 KTMAKE=		TOOLS_PREFIX=${WORLDTMP} MAKEOBJDIRPREFIX=${WORLDTMP} \
 		${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		MK_HTML=no -DNO_LINT MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no
 
 # world stage
 WMAKEENV=	${CROSSENV} \
 		_SHLIBDIRPREFIX=${WORLDTMP} \
 		_LDSCRIPTROOT= \
 		VERSION="${VERSION}" \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${TMPPATH}
 
 # make hierarchy
 HMAKE=		PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q}
 .if defined(NO_ROOT)
 HMAKE+=		PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT
 .endif
 
 .if ${MK_CDDL} == "no"
 WMAKEENV+=	MK_CTF=no
 .endif
 
 .if defined(CROSS_TOOLCHAIN)
 LOCALBASE?=	/usr/local
 .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk"
 .endif
 .if defined(CROSS_TOOLCHAIN_PREFIX)
 CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
 CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
 .endif
 
 # If we do not have a bootstrap binutils (because the in-tree one does not
 # support the target architecture), provide a default cross-binutils prefix.
 # This allows aarch64 builds, for example, to automatically use the
 # aarch64-binutils port or package.
 .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \
     !defined(CROSS_BINUTILS_PREFIX)
 CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_ARCH}-freebsd/bin/
 .if !exists(${CROSS_BINUTILS_PREFIX})
 .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX.
 .endif
 .endif
 
 XCOMPILERS=	CC CXX CPP
 .for COMPILER in ${XCOMPILERS}
 .if defined(CROSS_COMPILER_PREFIX)
 X${COMPILER}?=	${CROSS_COMPILER_PREFIX}${${COMPILER}}
 .else
 X${COMPILER}?=	${${COMPILER}}
 .endif
 .endfor
 XBINUTILS=	AS AR LD NM OBJCOPY OBJDUMP RANLIB SIZE STRINGS
 .for BINUTIL in ${XBINUTILS}
 .if defined(CROSS_BINUTILS_PREFIX) && \
     exists(${CROSS_BINUTILS_PREFIX}${${BINUTIL}})
 X${BINUTIL}?=	${CROSS_BINUTILS_PREFIX}${${BINUTIL}}
 .else
 X${BINUTIL}?=	${${BINUTIL}}
 .endif
 .endfor
 WMAKEENV+=	CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCFLAGS} ${XCXXFLAGS}" \
 		DEPFLAGS="${DEPFLAGS}" \
 		CPP="${XCPP} ${XCFLAGS}" \
 		AS="${XAS}" AR="${XAR}" LD="${XLD}" NM=${XNM} \
 		OBJDUMP=${XOBJDUMP} OBJCOPY="${XOBJCOPY}" \
 		RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \
 		SIZE="${XSIZE}"
 
 .if ${XCC:M/*}
 .if defined(CROSS_BINUTILS_PREFIX)
 # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a
 # directory, but the compiler will look in the right place for it's
 # tools so we don't need to tell it where to look.
 .if exists(${CROSS_BINUTILS_PREFIX})
 BFLAGS+=	-B${CROSS_BINUTILS_PREFIX}
 .endif
 .else
 BFLAGS+=	-B${WORLDTMP}/usr/bin
 .endif
 .if ${TARGET} == "arm"
 .if ${TARGET_ARCH:M*hf*} != ""
 TARGET_ABI=	gnueabihf
 .else
 TARGET_ABI=	gnueabi
 .endif
 .endif
 .if defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc
 XCFLAGS+=	-isystem ${WORLDTMP}/usr/include -L${WORLDTMP}/usr/lib
 XCXXFLAGS+=	-I${WORLDTMP}/usr/include/c++/v1 -std=gnu++11 -L${WORLDTMP}/../lib/libc++
 DEPFLAGS+=	-I${WORLDTMP}/usr/include/c++/v1
 .else
 TARGET_ABI?=	unknown
 TARGET_TRIPLE?=	${TARGET_ARCH:C/amd64/x86_64/}-${TARGET_ABI}-freebsd11.0
 XCFLAGS+=	-target ${TARGET_TRIPLE}
 .endif
 XCFLAGS+=	--sysroot=${WORLDTMP} ${BFLAGS}
 XCXXFLAGS+=	--sysroot=${WORLDTMP} ${BFLAGS}
 .else
 .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX})
 BFLAGS+=	-B${CROSS_BINUTILS_PREFIX}
 XCFLAGS+=	${BFLAGS}
 XCXXFLAGS+=	${BFLAGS}
 .endif
 .endif # ${XCC:M/*}
 
 WMAKE=		${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 DESTDIR=${WORLDTMP}
 
 .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
 # 32 bit world
 LIB32_OBJTREE=	${OBJTREE}${.CURDIR}/world32
 LIB32TMP=	${OBJTREE}${.CURDIR}/lib32
 
 .if ${TARGET_ARCH} == "amd64"
 .if empty(TARGET_CPUTYPE)
 LIB32CPUFLAGS=	-march=i686 -mmmx -msse -msse2
 .else
 LIB32CPUFLAGS=	-march=${TARGET_CPUTYPE}
 .endif
 LIB32WMAKEENV=	MACHINE=i386 MACHINE_ARCH=i386 \
 		MACHINE_CPU="i686 mmx sse sse2"
 LIB32WMAKEFLAGS=	\
 		AS="${XAS} --32" \
 		LD="${XLD} -m elf_i386_fbsd -Y P,${LIB32TMP}/usr/lib32" \
 		OBJCOPY="${XOBJCOPY}"
 
 .elif ${TARGET_ARCH} == "powerpc64"
 .if empty(TARGET_CPUTYPE)
 LIB32CPUFLAGS=	-mcpu=powerpc
 .else
 LIB32CPUFLAGS=	-mcpu=${TARGET_CPUTYPE}
 .endif
 LIB32WMAKEENV=	MACHINE=powerpc MACHINE_ARCH=powerpc
 LIB32WMAKEFLAGS=	\
 		LD="${XLD} -m elf32ppc_fbsd" \
 		OBJCOPY="${XOBJCOPY}"
 .endif
 
 
 LIB32FLAGS=	-m32 ${LIB32CPUFLAGS} -DCOMPAT_32BIT \
 		-isystem ${LIB32TMP}/usr/include/ \
 		-L${LIB32TMP}/usr/lib32 \
 		-B${LIB32TMP}/usr/lib32
 .if ${XCC:M/*}
 LIB32FLAGS+=		--sysroot=${WORLDTMP}
 .endif
 
 # Yes, the flags are redundant.
 LIB32WMAKEENV+=	MAKEOBJDIRPREFIX=${LIB32_OBJTREE} \
 		_SHLIBDIRPREFIX=${LIB32TMP} \
 		_LDSCRIPTROOT=${LIB32TMP} \
 		VERSION="${VERSION}" \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${TMPPATH} \
 		LIBDIR=/usr/lib32 \
 		SHLIBDIR=/usr/lib32 \
 		LIBPRIVATEDIR=/usr/lib32/private \
 		DTRACE="${DTRACE} -32"
 LIB32WMAKEFLAGS+= CC="${XCC} ${LIB32FLAGS}" \
 		CXX="${XCXX} ${LIB32FLAGS}" \
 		DESTDIR=${LIB32TMP} \
 		-DCOMPAT_32BIT \
 		-DLIBRARIES_ONLY \
 		-DNO_CPU_CFLAGS \
 		MK_CTF=no \
 		-DNO_LINT \
 		MK_TESTS=no
 
 LIB32WMAKE=	${LIB32WMAKEENV} ${MAKE} ${LIB32WMAKEFLAGS} \
 		MK_MAN=no MK_HTML=no
 LIB32IMAKE=	${LIB32WMAKE:NINSTALL=*:NDESTDIR=*:N_LDSCRIPTROOT=*} \
 		MK_TOOLCHAIN=no ${IMAKE_INSTALL}
 .endif
 
 IMAKEENV=	${CROSSENV:N_LDSCRIPTROOT=*}
 IMAKE=		${IMAKEENV} ${MAKE} -f Makefile.inc1 \
 		${IMAKE_INSTALL} ${IMAKE_MTREE}
 .if empty(.MAKEFLAGS:M-n)
 IMAKEENV+=	PATH=${STRICTTMPPATH}:${INSTALLTMP} \
 		LD_LIBRARY_PATH=${INSTALLTMP} \
 		PATH_LOCALE=${INSTALLTMP}/locale
 IMAKE+=		__MAKE_SHELL=${INSTALLTMP}/sh
 .else
 IMAKEENV+=	PATH=${TMPPATH}:${INSTALLTMP}
 .endif
 .if defined(DB_FROM_SRC)
 INSTALLFLAGS+=	-N ${.CURDIR}/etc
 MTREEFLAGS+=	-N ${.CURDIR}/etc
 .endif
 _INSTALL_DDIR=	${DESTDIR}/${DISTDIR}
 INSTALL_DDIR=	${_INSTALL_DDIR:S://:/:g:C:/$::}
 .if defined(NO_ROOT)
 METALOG?=	${DESTDIR}/${DISTDIR}/METALOG
 IMAKE+=		-DNO_ROOT METALOG=${METALOG}
 INSTALLFLAGS+=	-U -M ${METALOG} -D ${INSTALL_DDIR}
 MTREEFLAGS+=	-W
 .endif
 .if defined(DB_FROM_SRC) || defined(NO_ROOT)
 IMAKE_INSTALL=	INSTALL="install ${INSTALLFLAGS}"
 IMAKE_MTREE=	MTREE_CMD="mtree ${MTREEFLAGS}"
 .endif
 
 # kernel stage
 KMAKEENV=	${WMAKEENV}
 KMAKE=		${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME}
 
 #
 # buildworld
 #
 # Attempt to rebuild the entire system, with reasonable chance of
 # success, regardless of how old your existing system is.
 #
 _worldtmp:
 .if ${.CURDIR:C/[^,]//g} != ""
 #	The m4 build of sendmail files doesn't like it if ',' is used
 #	anywhere in the path of it's files.
 	@echo
 	@echo "*** Error: path to source tree contains a comma ','"
 	@echo
 	false
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Rebuilding the temporary build tree"
 	@echo "--------------------------------------------------------------"
 .if !defined(NO_CLEAN)
 	rm -rf ${WORLDTMP}
 .if defined(LIB32TMP)
 	rm -rf ${LIB32TMP}
 .endif
 .else
 	rm -rf ${WORLDTMP}/legacy/usr/include
 #	XXX - These three can depend on any header file.
 	rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/ioctl.c
 	rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/kdump_subr.c
 	rm -f ${OBJTREE}${.CURDIR}/usr.bin/truss/ioctl.c
 .endif
 .for _dir in \
     lib usr legacy/bin legacy/usr
 	mkdir -p ${WORLDTMP}/${_dir}
 .endfor
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${WORLDTMP}/legacy/usr >/dev/null
 .if ${MK_GROFF} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.groff.dist \
 	    -p ${WORLDTMP}/legacy/usr >/dev/null
 .endif
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${WORLDTMP}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${WORLDTMP}/usr/include >/dev/null
 	ln -sf ${.CURDIR}/sys ${WORLDTMP}
 .if ${MK_DEBUG_FILES} != "no"
 	# We could instead disable debug files for these build stages
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${WORLDTMP}/legacy/usr/lib >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${WORLDTMP}/usr/lib >/dev/null
 .endif
 .if ${MK_TESTS} != "no"
 	mkdir -p ${WORLDTMP}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${WORLDTMP}${TESTSBASE} >/dev/null
 .endif
 .for _mtree in ${LOCAL_MTREE}
 	mtree -deU -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null
 .endfor
 _legacy:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1.1: legacy release compatibility shims"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${BMAKE} legacy
 _bootstrap-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1.2: bootstrap tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools
 _cleanobj:
 .if !defined(NO_CLEAN)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.1: cleaning up the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} ${CLEANDIR:S/^/par-/}
 .if defined(LIB32TMP)
 	${_+_}cd ${.CURDIR}; ${LIB32WMAKE} -f Makefile.inc1 ${CLEANDIR:S/^/par-/}
 .endif
 .endif
 _obj:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.2: rebuilding the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} par-obj
 _build-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.3: build tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${TMAKE} build-tools
 _cross-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3: cross tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools
 	${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools
 _includes:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.1: building includes"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} SHARED=symlinks par-includes
 _libraries:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.2: building libraries"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; \
 	    ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \
 	    MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries
 _depend:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.3: make dependencies"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} par-depend
 everything:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.4: building everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} par-all
 .if defined(LIB32TMP)
 build32:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 5.1: building 32 bit shim libraries"
 	@echo "--------------------------------------------------------------"
 	mkdir -p ${LIB32TMP}/usr/include
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${LIB32TMP}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${LIB32TMP}/usr/include >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${LIB32TMP}/usr/lib >/dev/null
 .endif
 	mkdir -p ${WORLDTMP}
 	ln -sf ${.CURDIR}/sys ${WORLDTMP}
 .for _t in obj includes
 	cd ${.CURDIR}/include; ${LIB32WMAKE} DIRPRFX=include/ ${_t}
 	cd ${.CURDIR}/lib; ${LIB32WMAKE} DIRPRFX=lib/ ${_t}
 .if ${MK_CDDL} != "no"
 	cd ${.CURDIR}/cddl/lib; ${LIB32WMAKE} DIRPRFX=cddl/lib/ ${_t}
 .endif
 	cd ${.CURDIR}/gnu/lib; ${LIB32WMAKE} DIRPRFX=gnu/lib/ ${_t}
 .if ${MK_CRYPT} != "no"
 	cd ${.CURDIR}/secure/lib; ${LIB32WMAKE} DIRPRFX=secure/lib/ ${_t}
 .endif
 .if ${MK_KERBEROS} != "no"
 	cd ${.CURDIR}/kerberos5/lib; ${LIB32WMAKE} DIRPRFX=kerberos5/lib ${_t}
 .endif
 .endfor
 .for _dir in usr.bin/lex/lib
 	cd ${.CURDIR}/${_dir}; ${LIB32WMAKE} DIRPRFX=${_dir}/ obj
 .endfor
 .for _dir in lib/ncurses/ncurses lib/ncurses/ncursesw lib/libmagic
 	cd ${.CURDIR}/${_dir}; \
 	    WORLDTMP=${WORLDTMP} \
 	    MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" \
 	    MAKEOBJDIRPREFIX=${LIB32_OBJTREE} ${MAKE} SSP_CFLAGS= DESTDIR= \
 	    DIRPRFX=${_dir}/ -DNO_LINT -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 	    build-tools
 .endfor
 	cd ${.CURDIR}; \
 	    ${LIB32WMAKE} -f Makefile.inc1 libraries
 .for _t in obj depend all
 	cd ${.CURDIR}/libexec/rtld-elf; PROG=ld-elf32.so.1 ${LIB32WMAKE} \
 	    DIRPRFX=libexec/rtld-elf/ ${_t}
 	cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIB32WMAKE} \
 	    DIRPRFX=usr.bin/ldd ${_t}
 .endfor
 
 distribute32 install32:
 	cd ${.CURDIR}/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .if ${MK_CDDL} != "no"
 	cd ${.CURDIR}/cddl/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .endif
 	cd ${.CURDIR}/gnu/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .if ${MK_CRYPT} != "no"
 	cd ${.CURDIR}/secure/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .endif
 .if ${MK_KERBEROS} != "no"
 	cd ${.CURDIR}/kerberos5/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .endif
 	cd ${.CURDIR}/libexec/rtld-elf; \
 	    PROG=ld-elf32.so.1 ${LIB32IMAKE} ${.TARGET:S/32$//}
 	cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIB32IMAKE} ${.TARGET:S/32$//}
 .endif
 
 WMAKE_TGTS=
 .if !defined(SUBDIR_OVERRIDE)
 WMAKE_TGTS+=	_worldtmp _legacy _bootstrap-tools
 .endif
 WMAKE_TGTS+=	_cleanobj _obj _build-tools
 .if !defined(SUBDIR_OVERRIDE)
 WMAKE_TGTS+=	_cross-tools
 .endif
 WMAKE_TGTS+=	_includes _libraries _depend everything
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 WMAKE_TGTS+=	build32
 .endif
 
 buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue
 .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue
 
 buildworld_prologue:
 	@echo "--------------------------------------------------------------"
 	@echo ">>> World build started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 
 buildworld_epilogue:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> World build completed on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 
 #
 # We need to have this as a target because the indirection between Makefile
 # and Makefile.inc1 causes the correct PATH to be used, rather than a
 # modification of the current environment's PATH.  In addition, we need
 # to quote multiword values.
 #
 buildenvvars:
 	@echo ${WMAKEENV:Q}
 
 .if ${.TARGETS:Mbuildenv}
 .if ${.MAKEFLAGS:M-j}
 .error The buildenv target is incompatible with -j
 .endif
 .endif
 buildenv:
 	@echo Entering world for ${TARGET_ARCH}:${TARGET}
 	@cd ${.CURDIR} && env ${WMAKEENV} ${BUILDENV_SHELL} || true
 
 TOOLCHAIN_TGTS=	${WMAKE_TGTS:N_depend:Neverything:Nbuild32}
 toolchain: ${TOOLCHAIN_TGTS}
 kernel-toolchain: ${TOOLCHAIN_TGTS:N_includes:N_libraries}
 
 #
 # installcheck
 #
 # Checks to be sure system is ready for installworld/installkernel.
 #
 installcheck: _installcheck_world _installcheck_kernel
 _installcheck_world:
 _installcheck_kernel:
 
 #
 # Require DESTDIR to be set if installing for a different architecture or
 # using the user/group database in the source tree.
 #
 .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \
     defined(DB_FROM_SRC)
 .if !make(distributeworld)
 _installcheck_world: __installcheck_DESTDIR
 _installcheck_kernel: __installcheck_DESTDIR
 __installcheck_DESTDIR:
 .if !defined(DESTDIR) || empty(DESTDIR)
 	@echo "ERROR: Please set DESTDIR!"; \
 	false
 .endif
 .endif
 .endif
 
 .if !defined(DB_FROM_SRC)
 #
 # Check for missing UIDs/GIDs.
 #
 CHECK_UIDS=	auditdistd
 CHECK_GIDS=	audit
 .if ${MK_SENDMAIL} != "no"
 CHECK_UIDS+=	smmsp
 CHECK_GIDS+=	smmsp
 .endif
 .if ${MK_PF} != "no"
 CHECK_UIDS+=	proxy
 CHECK_GIDS+=	proxy authpf
 .endif
 .if ${MK_UNBOUND} != "no"
 CHECK_UIDS+=	unbound
 CHECK_GIDS+=	unbound
 .endif
 _installcheck_world: __installcheck_UGID
 __installcheck_UGID:
 .for uid in ${CHECK_UIDS}
 	@if ! `id -u ${uid} >/dev/null 2>&1`; then \
 		echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \
 		false; \
 	fi
 .endfor
 .for gid in ${CHECK_GIDS}
 	@if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \
 		echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \
 		false; \
 	fi
 .endfor
 .endif
 
 #
 # Required install tools to be saved in a scratch dir for safety.
 #
 .if ${MK_ZONEINFO} != "no"
 _zoneinfo=	zic tzsetup
 .endif
 
 ITOOLS=	[ awk cap_mkdb cat chflags chmod chown \
 	date echo egrep find grep id install ${_install-info} \
 	ln lockf make mkdir mtree mv pwd_mkdb \
 	rm sed services_mkdb sh strip sysctl test true uname wc ${_zoneinfo} \
 	${LOCAL_ITOOLS}
 
 # Needed for share/man
 .if ${MK_MAN} != "no"
 ITOOLS+=makewhatis
 .endif
 
 #
 # distributeworld
 #
 # Distributes everything compiled by a `buildworld'.
 #
 # installworld
 #
 # Installs everything compiled by a 'buildworld'.
 #
 
 # Non-base distributions produced by the base system
 EXTRA_DISTRIBUTIONS=	doc
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 EXTRA_DISTRIBUTIONS+=	lib32
 .endif
 .if ${MK_TESTS} != "no"
 EXTRA_DISTRIBUTIONS+=	tests
 .endif
 
 DEBUG_DISTRIBUTIONS=
 .if ${MK_DEBUG_FILES} != "no"
 DEBUG_DISTRIBUTIONS+=	base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,}
 .endif
 
 MTREE_MAGIC?=	mtree 2.0
 
 distributeworld installworld: _installcheck_world
 	mkdir -p ${INSTALLTMP}
 	progs=$$(for prog in ${ITOOLS}; do \
 		if progpath=`which $$prog`; then \
 			echo $$progpath; \
 		else \
 			echo "Required tool $$prog not found in PATH." >&2; \
 			exit 1; \
 		fi; \
 	    done); \
 	libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \
 	    while read line; do \
 		set -- $$line; \
 		if [ "$$2 $$3" != "not found" ]; then \
 			echo $$2; \
 		else \
 			echo "Required library $$1 not found." >&2; \
 			exit 1; \
 		fi; \
 	    done); \
 	cp $$libs $$progs ${INSTALLTMP}
 	cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale
 .if defined(NO_ROOT)
 	echo "#${MTREE_MAGIC}" > ${METALOG}
 .endif
 .if make(distributeworld)
 .for dist in ${EXTRA_DISTRIBUTIONS}
 	-mkdir ${DESTDIR}/${DISTDIR}/${dist}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null
 .endif
 .if ${MK_TESTS} != "no" && ${dist} == "tests"
 	-mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null
 .endif
 .if defined(NO_ROOT)
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \
 	    sed -e 's#^\./#./${dist}/#' >> ${METALOG}
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \
 	    sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG}
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \
 	    sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG}
 .endif
 .endfor
 	-mkdir ${DESTDIR}/${DISTDIR}/base
 	cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \
 	    DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs
 .endif
 	${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \
 	    ${IMAKEENV} rm -rf ${INSTALLTMP}
 .if make(distributeworld)
 .for dist in ${EXTRA_DISTRIBUTIONS}
 	find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -empty -delete
 .endfor
 .if defined(NO_ROOT)
 .for dist in base ${EXTRA_DISTRIBUTIONS}
 	@# For each file that exists in this dist, print the corresponding
 	@# line from the METALOG.  This relies on the fact that
 	@# a line containing only the filename will sort immediatly before
 	@# the relevant mtree line.
 	cd ${DESTDIR}/${DISTDIR}; \
 	find ./${dist} | sort -u ${METALOG} - | \
 	awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \
 	${DESTDIR}/${DISTDIR}/${dist}.meta
 .endfor
 .for dist in ${DEBUG_DISTRIBUTIONS}
 	@# For each file that exists in this dist, print the corresponding
 	@# line from the METALOG.  This relies on the fact that
 	@# a line containing only the filename will sort immediatly before
 	@# the relevant mtree line.
 	cd ${DESTDIR}/${DISTDIR}; \
 	find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \
 	awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \
 	${DESTDIR}/${DISTDIR}/${dist}.debug.meta
 .endfor
 .endif
 .endif
 
 packageworld:
 .for dist in base ${EXTRA_DISTRIBUTIONS}
 .if defined(NO_ROOT)
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - --exclude usr/lib/debug \
 	    @${DESTDIR}/${DISTDIR}/${dist}.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/${dist}.txz
 .else
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - --exclude usr/lib/debug . | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/${dist}.txz
 .endif
 .endfor
 
 .for dist in ${DEBUG_DISTRIBUTIONS}
 . if defined(NO_ROOT)
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/${dist}-dbg.txz
 . else
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvLf - usr/lib/debug | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/${dist}-dbg.txz
 . endif
 .endfor
 
 #
 # reinstall
 #
 # If you have a build server, you can NFS mount the source and obj directories
 # and do a 'make reinstall' on the *client* to install new binaries from the
 # most recent server build.
 #
 reinstall: .MAKE
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Making hierarchy"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install32
 .endif
 
 redistribute: .MAKE
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Distributing everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute32 \
 	    DISTRIBUTION=lib32
 .endif
 
 distrib-dirs distribution: .MAKE
 	cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET}
 
 #
 # buildkernel and installkernel
 #
 # Which kernels to build and/or install is specified by setting
 # KERNCONF. If not defined a GENERIC kernel is built/installed.
 # Only the existing (depending TARGET) config files are used
 # for building kernels and only the first of these is designated
 # as the one being installed.
 #
 # Note that we have to use TARGET instead of TARGET_ARCH when
 # we're in kernel-land. Since only TARGET_ARCH is (expected) to
 # be set to cross-build, we have to make sure TARGET is set
 # properly.
 
 .if defined(KERNFAST)
 NO_KERNELCLEAN=	t
 NO_KERNELCONFIG=	t
 NO_KERNELDEPEND=	t
 NO_KERNELOBJ=		t
 # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah
 .if !defined(KERNCONF) && ${KERNFAST} != "1"
 KERNCONF=${KERNFAST}
 .endif
 .endif
 .if ${TARGET_ARCH} == "powerpc64"
 KERNCONF?=	GENERIC64
 .else
 KERNCONF?=	GENERIC
 .endif
 INSTKERNNAME?=	kernel
 
 KERNSRCDIR?=	${.CURDIR}/sys
 KRNLCONFDIR=	${KERNSRCDIR}/${TARGET}/conf
 KRNLOBJDIR=	${OBJTREE}${KERNSRCDIR}
 KERNCONFDIR?=	${KRNLCONFDIR}
 
 BUILDKERNELS=
 INSTALLKERNEL=
 .for _kernel in ${KERNCONF}
 .if exists(${KERNCONFDIR}/${_kernel})
 BUILDKERNELS+=	${_kernel}
 .if empty(INSTALLKERNEL)
 INSTALLKERNEL= ${_kernel}
 .endif
 .endif
 .endfor
 
 buildkernel ${WMAKE_TGTS} ${.ALLTARGETS:M_*}: .MAKE
 
 #
 # buildkernel
 #
 # Builds all kernels defined by BUILDKERNELS.
 #
 buildkernel:
 .if empty(BUILDKERNELS)
 	@echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \
 	false
 .endif
 	@echo
 .for _kernel in ${BUILDKERNELS}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 	@echo "===> ${_kernel}"
 	mkdir -p ${KRNLOBJDIR}
 .if !defined(NO_KERNELCONFIG)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1: configuring the kernel"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLCONFDIR}; \
 		PATH=${TMPPATH} \
 		    config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \
 			-I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}'
 .endif
 .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.1: cleaning up the object tree"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR}
 .endif
 .if !defined(NO_KERNELOBJ)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.2: rebuilding the object tree"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.3: build tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools
 .if !defined(NO_KERNELDEPEND)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3.1: making dependencies"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} depend -DNO_MODULES_OBJ
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3.2: building everything"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 .endfor
 
 #
 # installkernel, etc.
 #
 # Install the kernel defined by INSTALLKERNEL
 #
 installkernel installkernel.debug \
 reinstallkernel reinstallkernel.debug: _installcheck_kernel
 .if empty(INSTALLKERNEL)
 	@echo "ERROR: No kernel \"${KERNCONF}\" to install."; \
 	false
 .endif
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${INSTALLKERNEL}"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${CROSSENV} PATH=${TMPPATH} \
 	    ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//}
 
 distributekernel distributekernel.debug:
 .if empty(INSTALLKERNEL)
 	@echo "ERROR: No kernel \"${KERNCONF}\" to install."; \
 	false
 .endif
 	mkdir -p ${DESTDIR}/${DISTDIR}
 .if defined(NO_ROOT)
 	echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta
 .endif
 	cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \
 	    DESTDIR=${INSTALL_DDIR}/kernel \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
 	sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \
 	    ${DESTDIR}/${DISTDIR}/kernel.meta
 .endif
 .for _kernel in ${BUILDKERNELS:S/${INSTALLKERNEL}//}
 .if defined(NO_ROOT)
 	echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta
 .endif
 	cd ${KRNLOBJDIR}/${_kernel}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \
 	    KERNEL=${INSTKERNNAME}.${_kernel} \
 	    DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
 	sed -e 's|^./kernel|.|' \
 	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \
 	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta
 .endif
 .endfor
 
 packagekernel:
 .if defined(NO_ROOT)
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - @${DESTDIR}/${DISTDIR}/kernel.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.txz
 .for _kernel in ${BUILDKERNELS:S/${INSTALLKERNEL}//}
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.txz
 .endfor
 .else
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - . | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.txz
 .for _kernel in ${BUILDKERNELS:S/${INSTALLKERNEL}//}
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - . | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.txz
 .endfor
 .endif
 
 #
 # doxygen
 #
 # Build the API documentation with doxygen
 #
 doxygen:
 	@if [ ! -x `/usr/bin/which doxygen` ]; then \
 		echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \
 		exit 1; \
 	fi
 	cd ${.CURDIR}/tools/kerneldoc/subsys && ${MAKE} obj all
 
 #
 # update
 #
 # Update the source tree(s), by running svn/svnup to update to the
 # latest copy.
 #
 update:
 .if (defined(CVS_UPDATE) || defined(SUP_UPDATE)) && !defined(SVN_UPDATE)
 	@echo "--------------------------------------------------------------"
 	@echo "CVS_UPDATE and SUP_UPDATE are no longer supported."
 	@echo "Please see: https://wiki.freebsd.org/CvsIsDeprecated"
 	@echo "--------------------------------------------------------------"
 	@exit 1
 .endif
 .if defined(SVN_UPDATE)
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Updating ${.CURDIR} using Subversion"
 	@echo "--------------------------------------------------------------"
 	@(cd ${.CURDIR} && ${SVN} update ${SVNFLAGS})
 .endif
 
 #
 # ------------------------------------------------------------------------
 #
 # From here onwards are utility targets used by the 'make world' and
 # related targets.  If your 'world' breaks, you may like to try to fix
 # the problem and manually run the following targets to attempt to
 # complete the build.  Beware, this is *not* guaranteed to work, you
 # need to have a pretty good grip on the current state of the system
 # to attempt to manually finish it.  If in doubt, 'make world' again.
 #
 
 #
 # legacy: Build compatibility shims for the next three targets. This is a minimal
 # set of tools and shims necessary to compensate for older systems which don't have
 # the APIs that the targets built in bootstrap-tools, build-tools or cross-tools.
 #
 legacy:
 .if ${BOOTSTRAPPING} < 800107 && ${BOOTSTRAPPING} != 0
 	@echo "ERROR: Source upgrades from versions prior to 8.0 not supported."; \
 	false
 .endif
 .for _tool in tools/build
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,depend,all,install)"; \
 	    cd ${.CURDIR}/${_tool} && \
 	    ${MAKE} DIRPRFX=${_tool}/ obj && \
 	    ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy includes && \
 	    ${MAKE} DIRPRFX=${_tool}/ depend && \
 	    ${MAKE} DIRPRFX=${_tool}/ all && \
 	    ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install
 .endfor
 
 #
 # bootstrap-tools: Build tools needed for compatibility. These are binaries that
 # are built to build other binaries in the system. However, the focus of these
 # binaries is usually quite narrow. Bootstrap tools use the host's compiler and
 # libraries, augmented by -legacy.
 #
 _bt=		_bootstrap-tools	
 
 .if ${MK_GAMES} != "no"
 _strfile=	games/fortune/strfile
 .endif
 
 .if ${MK_CXX} != "no"
 _gperf=		gnu/usr.bin/gperf
 .endif
 
 .if ${MK_GROFF} != "no"
 _groff=		gnu/usr.bin/groff
 .endif
 
 .if ${MK_VT} != "no"
 _vtfontcvt=	usr.bin/vtfontcvt
 .endif
 
 .if ${BOOTSTRAPPING} < 900002
 _sed=		usr.bin/sed
 .endif
 
 .if ${BOOTSTRAPPING} < 1000002
 _m4=		lib/libohash \
 		usr.bin/m4
 
 ${_bt}-usr.bin/m4: ${_bt}-lib/libohash
 .endif
 
-.if ${BOOTSTRAPPING} < 1000014
-_crunch=	usr.sbin/crunch
-.endif
-
 .if ${BOOTSTRAPPING} < 1000026
 _nmtree=	lib/libnetbsd \
 		usr.sbin/nmtree
 
 ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd
 .endif
 
 .if ${BOOTSTRAPPING} < 1000027
 _cat=		bin/cat
 .endif
 
 .if ${BOOTSTRAPPING} < 1000033
 _lex=		usr.bin/lex
 .endif
 
+.if ${BOOTSTRAPPING} < 1001507
+_crunch=	usr.sbin/crunch
+.endif
+
 .if ${BOOTSTRAPPING} >= 900040 && ${BOOTSTRAPPING} < 900041
 _awk=		usr.bin/awk
 .endif
 
 _yacc=		lib/liby \
 		usr.bin/yacc
 
 ${_bt}-usr.bin/yacc: ${_bt}-lib/liby
 
 .if ${MK_BSNMP} != "no"
 _gensnmptree=	usr.sbin/bsnmpd/gensnmptree
 .endif
 
 # We need to build tblgen when we're building clang either as
 # the bootstrap compiler, or as the part of the normal build.
 .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no"
 _clang_tblgen= \
 	lib/clang/libllvmsupport \
 	lib/clang/libllvmtablegen \
 	usr.bin/clang/tblgen \
 	usr.bin/clang/clang-tblgen
 
 ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
 ${_bt}-usr.bin/clang/tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
 .endif
 
 # ELF Tool Chain libraries are needed for ELF tools and dtrace tools.
 # dtrace tools are required for older bootstrap env and cross-build
 # pre libdwarf
 .if ${BOOTSTRAPPING} < 1100006 || (${MACHINE} != ${TARGET} || \
     ${MACHINE_ARCH} != ${TARGET_ARCH})
 _elftoolchain_libs= lib/libelf lib/libdwarf 
 .if ${MK_CDDL} != "no"
 _dtrace_tools= cddl/usr.bin/sgsmsg cddl/lib/libctf cddl/usr.bin/ctfconvert \
     cddl/usr.bin/ctfmerge
 
 ${_bt}-cddl/usr.bin/ctfconvert: ${_bt}-lib/libelf ${_bt}-lib/libdwarf ${_bt}-cddl/lib/libctf
 ${_bt}-cddl/usr.bin/ctfmerge: ${_bt}-lib/libelf ${_bt}-lib/libdwarf ${_bt}-cddl/lib/libctf
 .endif
 .endif
 
 # Default to building the GPL DTC, but build the BSDL one if users explicitly
 # request it.
 _dtc= usr.bin/dtc
 .if ${MK_GPL_DTC} != "no"
 _dtc= gnu/usr.bin/dtc
 .endif
 
 .if ${MK_KERBEROS} != "no"
 _kerberos5_bootstrap_tools= \
 	kerberos5/tools/make-roken \
 	kerberos5/lib/libroken \
 	kerberos5/lib/libvers \
 	kerberos5/tools/asn1_compile \
 	kerberos5/tools/slc \
 	usr.bin/compile_et
 
 ${_bt}-kerberos5/lib/libroken: ${_bt}-kerberos5/tools/make-roken
 ${_bt}-kerberos5/tools/asn1_compile: \
     ${_bt}-kerberos5/lib/libroken ${_bt}-kerberos5/lib/libvers
 ${_bt}-kerberos5/tools/slc: \
     ${_bt}-kerberos5/lib/libroken ${_bt}-kerberos5/lib/libvers
 ${_bt}-usr.bin/compile_et: \
     ${_bt}-kerberos5/lib/libroken ${_bt}-kerberos5/lib/libvers
 .endif
 
 bootstrap-tools: .PHONY
 
 #	Please document (add comment) why something is in 'bootstrap-tools'.
 #	Try to bound the building of the bootstrap-tool to just the
 #	FreeBSD versions that need the tool built at this stage of the build.
 .for _tool in \
     ${_clang_tblgen} \
     ${_kerberos5_bootstrap_tools} \
     ${_elftoolchain_libs} \
     ${_dtrace_tools} \
     ${_strfile} \
     ${_gperf} \
     ${_groff} \
     ${_dtc} \
     ${_awk} \
     ${_cat} \
     usr.bin/lorder \
     usr.bin/makewhatis \
     usr.bin/rpcgen \
     ${_sed} \
     ${_yacc} \
     ${_m4} \
     ${_lex} \
     lib/libmd \
     usr.bin/xinstall \
     ${_gensnmptree} \
     usr.sbin/config \
     ${_crunch} \
     ${_nmtree} \
     ${_vtfontcvt}
 ${_bt}-${_tool}: .PHONY .MAKE
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_tool} && \
 		${MAKE} DIRPRFX=${_tool}/ obj && \
 		${MAKE} DIRPRFX=${_tool}/ depend && \
 		${MAKE} DIRPRFX=${_tool}/ all && \
 		${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install
 
 bootstrap-tools: ${_bt}-${_tool}
 .endfor
 
 #
 # build-tools: Build special purpose build tools
 #
 .if !defined(NO_SHARE)
 _share=	share/syscons/scrnmaps
 .endif
 
 .if ${MK_GCC} != "no"
 _gcc_tools= gnu/usr.bin/cc/cc_tools
 .endif
 
 .if ${MK_RESCUE} != "no"
 _rescue= rescue/rescue
 .endif
 
 build-tools: .MAKE
 .for _tool in \
     bin/csh \
     bin/sh \
     ${_rescue} \
     ${LOCAL_TOOL_DIRS} \
     lib/ncurses/ncurses \
     lib/ncurses/ncursesw \
     ${_share} \
     usr.bin/awk \
     lib/libmagic \
     usr.bin/mkesdb_static \
     usr.bin/mkcsmapper_static \
     usr.bin/vi/catalog
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,build-tools)"; \
 		cd ${.CURDIR}/${_tool} && \
 		${MAKE} DIRPRFX=${_tool}/ obj && \
 		${MAKE} DIRPRFX=${_tool}/ build-tools
 .endfor
 .for _tool in \
     ${_gcc_tools}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all)"; \
 		cd ${.CURDIR}/${_tool} && \
 		${MAKE} DIRPRFX=${_tool}/ obj && \
 		${MAKE} DIRPRFX=${_tool}/ depend && \
 		${MAKE} DIRPRFX=${_tool}/ all
 .endfor
 
 #
 # kernel-tools: Build kernel-building tools
 #
 kernel-tools: .MAKE
 	mkdir -p ${MAKEOBJDIRPREFIX}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${MAKEOBJDIRPREFIX}/usr >/dev/null
 
 #
 # cross-tools: All the tools needed to build the rest of the system after
 # we get done with the earlier stages. It is the last set of tools needed
 # to begin building the target binaries.
 #
 .if ${TARGET_ARCH} != ${MACHINE_ARCH}
 .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386"
 _btxld=		usr.sbin/btxld
 .endif
 .endif
 .if ${TARGET_ARCH} != ${MACHINE_ARCH}
-.if ${MK_RESCUE} != "no" || defined(RELEASEDIR)
+.if ${MK_RESCUE} != "no"
 _crunchide=	usr.sbin/crunch/crunchide
 .endif
-.if ${TARGET_ARCH} == "i386" && defined(RELEASEDIR)
-_kgzip=		usr.sbin/kgzip
 .endif
-.endif
 
 # If we're given an XAS, don't build binutils.
 .if ${XAS:M/*} == ""
 .if ${MK_BINUTILS_BOOTSTRAP} != "no"
 _binutils=	gnu/usr.bin/binutils
 .endif
 .if ${MK_ELFTOOLCHAIN_TOOLS} != "no"
 _elftctools=	lib/libelftc \
 		usr.bin/elfcopy \
 		usr.bin/nm \
 		usr.bin/size \
 		usr.bin/strings
 # These are not required by the build, but can be useful for developers who
 # cross-build on a FreeBSD 10 host:
 _elftctools+=	usr.bin/addr2line
 .endif
 .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_TOOLS} != "no"
 # If cross-building with an external binutils we still need to build strip for
 # the target (for at least crunchide).
 _elftctools=	lib/libelftc \
 		usr.bin/elfcopy
 .endif
 
 # If an full path to an external cross compiler is given, don't build
 # a cross compiler.
 .if ${XCC:M/*} == "" && ${MK_CROSS_COMPILER} != "no"
 .if ${MK_CLANG_BOOTSTRAP} != "no"
 _clang=		usr.bin/clang
 _clang_libs=	lib/clang
 .endif
 .if ${MK_GCC_BOOTSTRAP} != "no"
 _cc=		gnu/usr.bin/cc
 .endif
 .endif
 
 cross-tools: .MAKE
 .for _tool in \
     ${_clang_libs} \
     ${_clang} \
     ${_binutils} \
     ${_elftctools} \
     ${_cc} \
     ${_btxld} \
     ${_crunchide} \
-    ${_kgzip} \
     sys/boot/usb/tools
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_tool} && \
 		${MAKE} DIRPRFX=${_tool}/ obj && \
 		${MAKE} DIRPRFX=${_tool}/ depend && \
 		${MAKE} DIRPRFX=${_tool}/ all && \
 		${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX} install
 .endfor
 
 NXBENV=		MAKEOBJDIRPREFIX=${OBJTREE}/nxb \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		VERSION="${VERSION}"
 NXBMAKE=	${NXBENV} ${MAKE} \
 		TBLGEN=${OBJTREE}/nxb-bin/usr/bin/tblgen \
 		CLANG_TBLGEN=${OBJTREE}/nxb-bin/usr/bin/clang-tblgen \
 		MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \
 		MK_GDB=no MK_TESTS=no \
 		SSP_CFLAGS= \
 		MK_HTML=no NO_LINT=yes MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no
 
 native-xtools: .MAKE
 	mkdir -p ${OBJTREE}/nxb-bin/bin
 	mkdir -p ${OBJTREE}/nxb-bin/sbin
 	mkdir -p ${OBJTREE}/nxb-bin/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${OBJTREE}/nxb-bin/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${OBJTREE}/nxb-bin/usr/include >/dev/null
 .for _tool in \
     bin/cat \
     bin/chmod \
     bin/cp \
     bin/csh \
     bin/echo \
     bin/expr \
     bin/hostname \
     bin/ln \
     bin/ls \
     bin/mkdir \
     bin/mv \
     bin/ps \
     bin/realpath \
     bin/rm \
     bin/rmdir \
     bin/sh \
     bin/sleep \
     ${_clang_tblgen} \
     usr.bin/ar \
     ${_binutils} \
     ${_elftctools} \
     ${_cc} \
     ${_gcc_tools} \
     ${_clang_libs} \
     ${_clang} \
     sbin/md5 \
     sbin/sysctl \
     gnu/usr.bin/diff \
     usr.bin/awk \
     usr.bin/basename \
     usr.bin/bmake \
     usr.bin/bzip2 \
     usr.bin/cmp \
     usr.bin/dirname \
     usr.bin/env \
     usr.bin/fetch \
     usr.bin/find \
     usr.bin/grep \
     usr.bin/gzip \
     usr.bin/id \
     usr.bin/lex \
     usr.bin/lorder \
     usr.bin/mktemp \
     usr.bin/mt \
     usr.bin/patch \
     usr.bin/sed \
     usr.bin/sort \
     usr.bin/tar \
     usr.bin/touch \
     usr.bin/tr \
     usr.bin/true \
     usr.bin/uniq \
     usr.bin/unzip \
     usr.bin/xargs \
     usr.bin/xinstall \
     usr.bin/xz \
     usr.bin/yacc \
     usr.sbin/chown
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_tool} && \
 		${NXBMAKE} DIRPRFX=${_tool}/ obj && \
 		${NXBMAKE} DIRPRFX=${_tool}/ depend && \
 		${NXBMAKE} DIRPRFX=${_tool}/ all && \
 		${NXBMAKE} DIRPRFX=${_tool}/ DESTDIR=${OBJTREE}/nxb-bin install
 .endfor
 
 #
 # hierarchy - ensure that all the needed directories are present
 #
 hierarchy hier: .MAKE
 	cd ${.CURDIR}/etc && ${HMAKE} distrib-dirs
 
 #
 # libraries - build all libraries, and install them under ${DESTDIR}.
 #
 # The list of libraries with dependents (${_prebuild_libs}) and their
 # interdependencies (__L) are built automatically by the
 # ${.CURDIR}/tools/make_libdeps.sh script.
 #
 libraries: .MAKE
 	cd ${.CURDIR} && \
 	    ${MAKE} -f Makefile.inc1 _prereq_libs && \
 	    ${MAKE} -f Makefile.inc1 _startup_libs && \
 	    ${MAKE} -f Makefile.inc1 _prebuild_libs && \
 	    ${MAKE} -f Makefile.inc1 _generic_libs
 
 #
 # static libgcc.a prerequisite for shared libc
 #
 _prereq_libs= gnu/lib/libssp/libssp_nonshared gnu/lib/libgcc lib/libcompiler_rt
 
 # These dependencies are not automatically generated:
 #
 # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before
 # all shared libraries for ELF.
 #
 _startup_libs=	gnu/lib/csu
 .if exists(${.CURDIR}/lib/csu/${MACHINE_ARCH}-elf)
 _startup_libs+=	lib/csu/${MACHINE_ARCH}-elf
 .elif exists(${.CURDIR}/lib/csu/${MACHINE_ARCH})
 _startup_libs+=	lib/csu/${MACHINE_ARCH}
 .else
 _startup_libs+=	lib/csu/${MACHINE_CPUARCH}
 .endif
 _startup_libs+=	gnu/lib/libgcc
 _startup_libs+=	lib/libcompiler_rt
 _startup_libs+=	lib/libc
 _startup_libs+=	lib/libc_nonshared
 .if ${MK_LIBCPLUSPLUS} != "no"
 _startup_libs+=	lib/libcxxrt
 .endif
 
 gnu/lib/libgcc__L: lib/libc__L
 gnu/lib/libgcc__L: lib/libc_nonshared__L
 .if ${MK_LIBCPLUSPLUS} != "no"
 lib/libcxxrt__L: gnu/lib/libgcc__L
 .endif
 
 _prebuild_libs=	${_kerberos5_lib_libasn1} \
 		${_kerberos5_lib_libhdb} \
 		${_kerberos5_lib_libheimbase} \
 		${_kerberos5_lib_libheimntlm} \
 		${_kerberos5_lib_libheimsqlite} \
 		${_kerberos5_lib_libheimipcc} \
 		${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \
 		${_kerberos5_lib_libroken} \
 		${_kerberos5_lib_libwind} \
 		lib/libbz2 ${_libcom_err} lib/libcrypt \
 		lib/libelf lib/libexpat \
 		lib/libfigpar \
 		${_lib_libgssapi} \
 		lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \
 		${_lib_libcapsicum} \
 		lib/ncurses/ncurses lib/ncurses/ncursesw \
 		lib/libopie lib/libpam ${_lib_libthr} \
 		${_lib_libradius} lib/libsbuf lib/libtacplus \
 		lib/libgeom \
 		${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \
 		${_cddl_lib_libuutil} \
 		${_cddl_lib_libavl} \
 		${_cddl_lib_libzfs_core} \
 		${_cddl_lib_libctf} \
 		lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \
 		${_secure_lib_libcrypto} ${_lib_libldns} \
 		${_secure_lib_libssh} ${_secure_lib_libssl} \
 		gnu/lib/libdialog
 .if ${MK_GNUCXX} != "no"
 _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++
 gnu/lib/libstdc++__L: lib/msun__L
 gnu/lib/libsupc++__L: gnu/lib/libstdc++__L
 .endif
 
 .if ${MK_LIBCPLUSPLUS} != "no"
 _prebuild_libs+= lib/libc++
 .endif
 
 lib/libgeom__L: lib/libexpat__L
 
 .if ${MK_LIBTHR} != "no"
 _lib_libthr=	lib/libthr
 .endif
 
 .if ${MK_RADIUS_SUPPORT} != "no"
 _lib_libradius=	lib/libradius
 .endif
 
 .if ${MK_OFED} != "no"
 _ofed_lib=	contrib/ofed/usr.lib/
 .endif
 
 .if ${MK_CASPER} != "no"
 _lib_libcapsicum=lib/libcapsicum
 .endif
 
 lib/libcapsicum__L: lib/libnv__L
 lib/libpjdlog__L: lib/libutil__L
 lib/liblzma__L: lib/libthr__L
 
 _generic_libs=	${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib ${_ofed_lib}
 .for _DIR in ${LOCAL_LIB_DIRS}
 .if exists(${.CURDIR}/${_DIR}/Makefile)
 _generic_libs+= ${_DIR}
 .endif
 .endfor
 
 lib/libopie__L lib/libtacplus__L: lib/libmd__L
 
 .if ${MK_CDDL} != "no"
 _cddl_lib_libumem= cddl/lib/libumem
 _cddl_lib_libnvpair= cddl/lib/libnvpair
 _cddl_lib_libavl= cddl/lib/libavl
 _cddl_lib_libuutil= cddl/lib/libuutil
 _cddl_lib_libzfs_core= cddl/lib/libzfs_core
 _cddl_lib_libctf= cddl/lib/libctf
 _cddl_lib= cddl/lib
 cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L
 cddl/lib/libzfs__L: lib/libgeom__L
 cddl/lib/libctf__L: lib/libz__L
 .endif
 
 .if ${MK_CRYPT} != "no"
 .if ${MK_OPENSSL} != "no"
 _secure_lib_libcrypto= secure/lib/libcrypto
 _secure_lib_libssl= secure/lib/libssl
 lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L
 .if ${MK_LDNS} != "no"
 _lib_libldns= lib/libldns
 lib/libldns__L: secure/lib/libcrypto__L
 .endif
 .if ${MK_OPENSSH} != "no"
 _secure_lib_libssh= secure/lib/libssh
 secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L
 .if ${MK_LDNS} != "no"
 secure/lib/libssh__L: lib/libldns__L
 .endif
 .if ${MK_KERBEROS_SUPPORT} != "no"
 secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \
     kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \
     lib/libmd__L kerberos5/lib/libroken__L
 .endif
 .endif
 .endif
 _secure_lib=	secure/lib
 .endif
 
 .if ${MK_KERBEROS} != "no"
 kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L
 kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \
     kerberos5/lib/libwind__L kerberos5/lib/libheimsqlite__L 
 kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \
     kerberos5/lib/libroken__L lib/libcom_err__L
 kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L
 kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \
     kerberos5/lib/libroken__L kerberos5/lib/libwind__L \
     kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L
 kerberos5/lib/libroken__L: lib/libcrypt__L
 kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L
 kerberos5/lib/libheimbase__L: lib/libthr__L
 kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L
 kerberos5/lib/libheimsqlite__L: lib/libthr__L
 .endif
 
 .if ${MK_GSSAPI} != "no"
 _lib_libgssapi=	lib/libgssapi
 .endif
 
 .if ${MK_KERBEROS} != "no"
 _kerberos5_lib=	kerberos5/lib
 _kerberos5_lib_libasn1= kerberos5/lib/libasn1
 _kerberos5_lib_libhdb= kerberos5/lib/libhdb
 _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase
 _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5
 _kerberos5_lib_libhx509= kerberos5/lib/libhx509
 _kerberos5_lib_libroken= kerberos5/lib/libroken
 _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm
 _kerberos5_lib_libheimsqlite= kerberos5/lib/libheimsqlite
 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc
 _kerberos5_lib_libwind= kerberos5/lib/libwind
 _libcom_err= lib/libcom_err
 .endif
 
 .if ${MK_NIS} != "no"
 _lib_libypclnt=	lib/libypclnt
 .endif
 
 .if ${MK_OPENSSL} == "no"
 lib/libradius__L: lib/libmd__L
 .endif
 
 gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L
 
 .for _lib in ${_prereq_libs}
 ${_lib}__PL: .PHONY .MAKE
 .if exists(${.CURDIR}/${_lib})
 	${_+_}@${ECHODIR} "===> ${_lib} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_lib} && \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj && \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ depend && \
 		${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \
 		    DIRPRFX=${_lib}/ all && \
 		${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \
 		    DIRPRFX=${_lib}/ install
 .endif
 .endfor
 
 .for _lib in ${_startup_libs} ${_prebuild_libs:Nlib/libpam} ${_generic_libs}
 ${_lib}__L: .PHONY .MAKE
 .if exists(${.CURDIR}/${_lib})
 	${_+_}@${ECHODIR} "===> ${_lib} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_lib} && \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj && \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ depend && \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all && \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install
 .endif
 .endfor
 
 # libpam is special: we need to build static PAM modules before
 # static PAM library, and dynamic PAM library before dynamic PAM
 # modules.
 lib/libpam__L: .PHONY .MAKE
 	${_+_}@${ECHODIR} "===> lib/libpam (obj,depend,all,install)"; \
 		cd ${.CURDIR}/lib/libpam && \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ obj && \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ depend && \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ \
 		    -D_NO_LIBPAM_SO_YET all && \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ \
 		    -D_NO_LIBPAM_SO_YET install
 
 _prereq_libs: ${_prereq_libs:S/$/__PL/}
 _startup_libs: ${_startup_libs:S/$/__L/}
 _prebuild_libs: ${_prebuild_libs:S/$/__L/}
 _generic_libs: ${_generic_libs:S/$/__L/}
 
 .for __target in all clean cleandepend cleandir depend includes obj
 .for entry in ${SUBDIR}
 ${entry}.${__target}__D: .PHONY .MAKE
 	${_+_}@set -e; if test -d ${.CURDIR}/${entry}.${MACHINE_ARCH}; then \
 		${ECHODIR} "===> ${DIRPRFX}${entry}.${MACHINE_ARCH} (${__target})"; \
 		edir=${entry}.${MACHINE_ARCH}; \
 		cd ${.CURDIR}/$${edir}; \
 	else \
 		${ECHODIR} "===> ${DIRPRFX}${entry} (${__target})"; \
 		edir=${entry}; \
 		cd ${.CURDIR}/$${edir}; \
 	fi; \
 	${MAKE} ${__target} DIRPRFX=${DIRPRFX}$${edir}/
 .endfor
 par-${__target}: ${SUBDIR:S/$/.${__target}__D/}
 .endfor
 
 .include <bsd.subdir.mk>
 
 .if make(check-old) || make(check-old-dirs) || \
     make(check-old-files) || make(check-old-libs) || \
     make(delete-old) || make(delete-old-dirs) || \
     make(delete-old-files) || make(delete-old-libs)
 
 #
 # check for / delete old files section
 #
 
 .include "ObsoleteFiles.inc"
 
 OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \
 else you can not start such an application. Consult UPDATING for more \
 information regarding how to cope with the removal/revision bump of a \
 specific library."
 
 .if !defined(BATCH_DELETE_OLD_FILES)
 RM_I=-i
 .else
 RM_I=-v
 .endif
 
 delete-old-files:
 	@echo ">>> Removing old files (only deletes safe to delete libs)"
 # Ask for every old file if the user really wants to remove it.
 # It's annoying, but better safe than sorry.
 # NB: We cannot pass the list of OLD_FILES as a parameter because the
 # argument list will get too long. Using .for/.endfor make "loops" will make
 # the Makefile parser segfault.
 	@exec 3<&0; \
 	cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \
 			rm ${RM_I} "${DESTDIR}/$${file}" <&3; \
 		fi; \
 	done
 # Remove catpages without corresponding manpages.
 	@exec 3<&0; \
 	find ${DESTDIR}/usr/share/man/cat* ! -type d | \
 	sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \
 	while read catpage; do \
 		read manpage; \
 		if [ ! -e "$${manpage}" ]; then \
 			rm ${RM_I} $${catpage} <&3; \
 	        fi; \
 	done
 	@echo ">>> Old files removed"
 
 check-old-files:
 	@echo ">>> Checking for old files"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 		 	echo "${DESTDIR}/$${file}"; \
 		fi; \
 	done
 # Check for catpages without corresponding manpages.
 	@find ${DESTDIR}/usr/share/man/cat* ! -type d | \
 	sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \
 	while read catpage; do \
 		read manpage; \
 		if [ ! -e "$${manpage}" ]; then \
 			echo $${catpage}; \
 	        fi; \
 	done
 
 delete-old-libs:
 	@echo ">>> Removing old libraries"
 	@echo "${OLD_LIBS_MESSAGE}" | fmt
 	@exec 3<&0; \
 	cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_LIBS | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \
 			rm ${RM_I} "${DESTDIR}/$${file}" <&3; \
 		fi; \
 		for ext in debug symbols; do \
 		  if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \
 		      "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \
 			      <&3; \
 		  fi; \
 		done; \
 	done
 	@echo ">>> Old libraries removed"
 
 check-old-libs:
 	@echo ">>> Checking for old libraries"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_LIBS | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			echo "${DESTDIR}/$${file}"; \
 		fi; \
 		for ext in debug symbols; do \
 		  if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \
 		  fi; \
 		done; \
 	done
 
 delete-old-dirs:
 	@echo ">>> Removing old directories"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_DIRS | xargs -n1 | sort -r | \
 	while read dir; do \
 		if [ -d "${DESTDIR}/$${dir}" ]; then \
 			rmdir -v "${DESTDIR}/$${dir}" || true; \
 		elif [ -L "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 	done
 	@echo ">>> Old directories removed"
 
 check-old-dirs:
 	@echo ">>> Checking for old directories"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_DIRS | xargs -n1 | \
 	while read dir; do \
 		if [ -d "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir}"; \
 		elif [ -L "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 	done
 
 delete-old: delete-old-files delete-old-dirs
 	@echo "To remove old libraries run '${MAKE} delete-old-libs'."
 
 check-old: check-old-files check-old-libs check-old-dirs
 	@echo "To remove old files and directories run '${MAKE} delete-old'."
 	@echo "To remove old libraries run '${MAKE} delete-old-libs'."
 
 .endif
 
 #
 # showconfig - show build configuration.
 #
 showconfig:
 	@(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1; \
 	  ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1) 2>&1 | grep ^MK_ | sort -u
 
 .if !empty(KRNLOBJDIR) && !empty(KERNCONF)
 DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/
 
 .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE)
 .if exists(${KERNCONFDIR}/${KERNCONF})
 FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \
 	'${KERNCONFDIR}/${KERNCONF}' ; echo
 .endif
 .endif
 
 .endif
 
 .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH})
 DTBOUTPUTPATH= ${.CURDIR}
 .endif
 
 #
 # Build 'standalone' Device Tree Blob
 #
 builddtb:
 	@PATH=${TMPPATH} MACHINE=${TARGET} \
 	${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \
 	    "${FDT_DTS_FILE}" ${DTBOUTPUTPATH}
 
 ###############
 
 .if defined(TARGET) && defined(TARGET_ARCH)
 
 .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH}
 XDEV_CPUTYPE?=${CPUTYPE}
 .else
 XDEV_CPUTYPE?=${TARGET_CPUTYPE}
 .endif
 
 NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \
 	MK_MAN=no MK_NLS=no MK_PROFILE=no \
 	MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \
 	TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 	CPUTYPE=${XDEV_CPUTYPE}
 
 XDDIR=${TARGET_ARCH}-freebsd
 XDTP?=/usr/${XDDIR}
 .if ${XDTP:N/*}
 .error XDTP variable should be an absolute path
 .endif
 
 CDBENV=MAKEOBJDIRPREFIX=${MAKEOBJDIRPREFIX}/${XDDIR} \
 	INSTALL="sh ${.CURDIR}/tools/install.sh"
 CDENV= ${CDBENV} \
 	_SHLIBDIRPREFIX=${XDDESTDIR} \
 	TOOLS_PREFIX=${XDTP}
 CD2CFLAGS=-isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib \
 	--sysroot=${XDDESTDIR}/ -B${XDDESTDIR}/usr/libexec \
 	-B${XDDESTDIR}/usr/bin -B${XDDESTDIR}/usr/lib
 CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CFLAGS}" \
 	CPP="${CPP} ${CD2CFLAGS}" \
 	MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH}
 
 CDTMP=	${MAKEOBJDIRPREFIX}/${XDDIR}/${.CURDIR}/tmp
 CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${NOFUN}
 CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} ${MAKE} ${NOFUN}
 XDDESTDIR=${DESTDIR}/${XDTP}
 .if !defined(OSREL)
 OSREL!= uname -r | sed -e 's/[-(].*//'
 .endif
 
 .ORDER: xdev-build xdev-install xdev-links
 xdev: xdev-build xdev-install
 
 .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools
 xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools
 
 _xb-worldtmp:
 	mkdir -p ${CDTMP}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${CDTMP}/usr >/dev/null
 
 _xb-bootstrap-tools:
 .for _tool in \
     ${_clang_tblgen} \
     ${_gperf}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 	cd ${.CURDIR}/${_tool} && \
 	${CDMAKE} DIRPRFX=${_tool}/ obj && \
 	${CDMAKE} DIRPRFX=${_tool}/ depend && \
 	${CDMAKE} DIRPRFX=${_tool}/ all && \
 	${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install
 .endfor
 
 _xb-build-tools:
 	${_+_}@cd ${.CURDIR}; \
 	${CDBENV} ${MAKE} -f Makefile.inc1 ${NOFUN} build-tools
 
 _xb-cross-tools:
 .for _tool in \
     ${_binutils} \
     ${_elftctools} \
     usr.bin/ar \
     ${_clang_libs} \
     ${_clang} \
     ${_cc}
 	${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,depend,all)"; \
 	cd ${.CURDIR}/${_tool} && \
 	${CDMAKE} DIRPRFX=${_tool}/ obj && \
 	${CDMAKE} DIRPRFX=${_tool}/ depend && \
 	${CDMAKE} DIRPRFX=${_tool}/ all
 .endfor
 
 _xi-mtree:
 	${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}"
 	mkdir -p ${XDDESTDIR}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \
 	    -p ${XDDESTDIR} >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${XDDESTDIR}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${XDDESTDIR}/usr/include >/dev/null
 .if ${MK_TESTS} != "no"
 	mkdir -p ${XDDESTDIR}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${XDDESTDIR}${TESTSBASE} >/dev/null
 .endif
 
 .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries
 xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries
 
 _xi-cross-tools:
 	@echo "_xi-cross-tools"
 .for _tool in \
     ${_binutils} \
     ${_elftctools} \
     usr.bin/ar \
     ${_clang_libs} \
     ${_clang} \
     ${_cc}
 	${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR}
 .endfor
 
 _xi-includes:
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 par-includes \
 		DESTDIR=${XDDESTDIR}
 
 _xi-libraries:
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \
 		DESTDIR=${XDDESTDIR}
 
 xdev-links:
 	${_+_}cd ${XDDESTDIR}/usr/bin; \
 	mkdir -p ../../../../usr/bin; \
 		for i in *; do \
 			ln -sf ../../${XDTP}/usr/bin/$$i \
 			    ../../../../usr/bin/${XDDIR}-$$i; \
 			ln -sf ../../${XDTP}/usr/bin/$$i \
 			    ../../../../usr/bin/${XDDIR}${OSREL}-$$i; \
 		done
 .else
 xdev xdev-build xdev-install xdev-links:
 	@echo "*** Error: Both TARGET and TARGET_ARCH must be defined for \"${.TARGET}\" target"
 .endif
Index: user/ngie/more-tests/sys/amd64/vmm/vmm_lapic.c
===================================================================
--- user/ngie/more-tests/sys/amd64/vmm/vmm_lapic.c	(revision 281675)
+++ user/ngie/more-tests/sys/amd64/vmm/vmm_lapic.c	(revision 281676)
@@ -1,243 +1,247 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/smp.h>
 
 #include <x86/specialreg.h>
 #include <x86/apicreg.h>
 
 #include <machine/vmm.h>
 #include "vmm_ktr.h"
 #include "vmm_lapic.h"
 #include "vlapic.h"
 
 /*
  * Some MSI message definitions
  */
 #define	MSI_X86_ADDR_MASK	0xfff00000
 #define	MSI_X86_ADDR_BASE	0xfee00000
 #define	MSI_X86_ADDR_RH		0x00000008	/* Redirection Hint */
 #define	MSI_X86_ADDR_LOG	0x00000004	/* Destination Mode */
 
 int
 lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
 {
 	struct vlapic *vlapic;
 
 	if (cpu < 0 || cpu >= VM_MAXCPU)
 		return (EINVAL);
 
-	if (vector < 32 || vector > 255)
+	/*
+	 * According to section "Maskable Hardware Interrupts" in Intel SDM
+	 * vectors 16 through 255 can be delivered through the local APIC.
+	 */
+	if (vector < 16 || vector > 255)
 		return (EINVAL);
 
 	vlapic = vm_lapic(vm, cpu);
 	if (vlapic_set_intr_ready(vlapic, vector, level))
 		vcpu_notify_event(vm, cpu, true);
 	return (0);
 }
 
 int
 lapic_set_local_intr(struct vm *vm, int cpu, int vector)
 {
 	struct vlapic *vlapic;
 	cpuset_t dmask;
 	int error;
 
 	if (cpu < -1 || cpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (cpu == -1)
 		dmask = vm_active_cpus(vm);
 	else
 		CPU_SETOF(cpu, &dmask);
 	error = 0;
 	while ((cpu = CPU_FFS(&dmask)) != 0) {
 		cpu--;
 		CPU_CLR(cpu, &dmask);
 		vlapic = vm_lapic(vm, cpu);
 		error = vlapic_trigger_lvt(vlapic, vector);
 		if (error)
 			break;
 	}
 
 	return (error);
 }
 
 int
 lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg)
 {
 	int delmode, vec;
 	uint32_t dest;
 	bool phys;
 
 	VM_CTR2(vm, "lapic MSI addr: %#lx msg: %#lx", addr, msg);
 
 	if ((addr & MSI_X86_ADDR_MASK) != MSI_X86_ADDR_BASE) {
 		VM_CTR1(vm, "lapic MSI invalid addr %#lx", addr);
 		return (-1);
 	}
 
 	/*
 	 * Extract the x86-specific fields from the MSI addr/msg
 	 * params according to the Intel Arch spec, Vol3 Ch 10.
 	 *
 	 * The PCI specification does not support level triggered
 	 * MSI/MSI-X so ignore trigger level in 'msg'.
 	 *
 	 * The 'dest' is interpreted as a logical APIC ID if both
 	 * the Redirection Hint and Destination Mode are '1' and
 	 * physical otherwise.
 	 */
 	dest = (addr >> 12) & 0xff;
 	phys = ((addr & (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)) !=
 	    (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG));
 	delmode = msg & APIC_DELMODE_MASK;
 	vec = msg & 0xff;
 
 	VM_CTR3(vm, "lapic MSI %s dest %#x, vec %d",
 	    phys ? "physical" : "logical", dest, vec);
 
 	vlapic_deliver_intr(vm, LAPIC_TRIG_EDGE, dest, phys, delmode, vec);
 	return (0);
 }
 
 static boolean_t
 x2apic_msr(u_int msr)
 {
 	if (msr >= 0x800 && msr <= 0xBFF)
 		return (TRUE);
 	else
 		return (FALSE);
 }
 
 static u_int
 x2apic_msr_to_regoff(u_int msr)
 {
 
 	return ((msr - 0x800) << 4);
 }
 
 boolean_t
 lapic_msr(u_int msr)
 {
 
 	if (x2apic_msr(msr) || (msr == MSR_APICBASE))
 		return (TRUE);
 	else
 		return (FALSE);
 }
 
 int
 lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu)
 {
 	int error;
 	u_int offset;
 	struct vlapic *vlapic;
 
 	vlapic = vm_lapic(vm, cpu);
 
 	if (msr == MSR_APICBASE) {
 		*rval = vlapic_get_apicbase(vlapic);
 		error = 0;
 	} else {
 		offset = x2apic_msr_to_regoff(msr);
 		error = vlapic_read(vlapic, 0, offset, rval, retu);
 	}
 
 	return (error);
 }
 
 int
 lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val, bool *retu)
 {
 	int error;
 	u_int offset;
 	struct vlapic *vlapic;
 
 	vlapic = vm_lapic(vm, cpu);
 
 	if (msr == MSR_APICBASE) {
 		error = vlapic_set_apicbase(vlapic, val);
 	} else {
 		offset = x2apic_msr_to_regoff(msr);
 		error = vlapic_write(vlapic, 0, offset, val, retu);
 	}
 
 	return (error);
 }
 
 int
 lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size,
 		 void *arg)
 {
 	int error;
 	uint64_t off;
 	struct vlapic *vlapic;
 
 	off = gpa - DEFAULT_APIC_BASE;
 
 	/*
 	 * Memory mapped local apic accesses must be 4 bytes wide and
 	 * aligned on a 16-byte boundary.
 	 */
 	if (size != 4 || off & 0xf)
 		return (EINVAL);
 
 	vlapic = vm_lapic(vm, cpu);
 	error = vlapic_write(vlapic, 1, off, wval, arg);
 	return (error);
 }
 
 int
 lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size,
 		void *arg)
 {
 	int error;
 	uint64_t off;
 	struct vlapic *vlapic;
 
 	off = gpa - DEFAULT_APIC_BASE;
 
 	/*
 	 * Memory mapped local apic accesses should be aligned on a
 	 * 16-byte boundary.  They are also suggested to be 4 bytes
 	 * wide, alas not all OSes follow suggestions.
 	 */
 	off &= ~3;
 	if (off & 0xf)
 		return (EINVAL);
 
 	vlapic = vm_lapic(vm, cpu);
 	error = vlapic_read(vlapic, 1, off, rval, arg);
 	return (error);
 }
Index: user/ngie/more-tests/sys/amd64/vmm
===================================================================
--- user/ngie/more-tests/sys/amd64/vmm	(revision 281675)
+++ user/ngie/more-tests/sys/amd64/vmm	(revision 281676)

Property changes on: user/ngie/more-tests/sys/amd64/vmm
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/amd64/vmm:r281621-281675
Index: user/ngie/more-tests/sys/arm/allwinner/if_emac.c
===================================================================
--- user/ngie/more-tests/sys/arm/allwinner/if_emac.c	(revision 281675)
+++ user/ngie/more-tests/sys/arm/allwinner/if_emac.c	(revision 281676)
@@ -1,1152 +1,1166 @@
 /*-
  * Copyright (c) 2013 Ganbold Tsagaankhuu <ganbold@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /* A10/A20 EMAC driver */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/gpio.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/intr.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_mib.h>
 #include <net/ethernet.h>
 #include <net/if_vlan_var.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #endif
 
 #include <net/bpf.h>
 #include <net/bpfdesc.h>
 
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
 #include <arm/allwinner/if_emacreg.h>
 
 #include "miibus_if.h"
 
 #include "gpio_if.h"
 
 #include "a10_clk.h"
 #include "a10_sramc.h"
 #include "a10_gpio.h"
 
 struct emac_softc {
 	struct ifnet		*emac_ifp;
 	device_t		emac_dev;
 	device_t		emac_miibus;
 	bus_space_handle_t	emac_handle;
 	bus_space_tag_t		emac_tag;
 	struct resource		*emac_res;
 	struct resource		*emac_irq;
 	void			*emac_intrhand;
 	int			emac_if_flags;
 	struct mtx		emac_mtx;
 	struct callout		emac_tick_ch;
 	int			emac_watchdog_timer;
 	int			emac_rx_process_limit;
 	int			emac_link;
+	uint32_t		emac_fifo_mask;
 };
 
 static int	emac_probe(device_t);
 static int	emac_attach(device_t);
 static int	emac_detach(device_t);
 static int	emac_shutdown(device_t);
 static int	emac_suspend(device_t);
 static int	emac_resume(device_t);
 
 static void	emac_sys_setup(void);
 static void	emac_reset(struct emac_softc *);
 
 static void	emac_init_locked(struct emac_softc *);
 static void	emac_start_locked(struct ifnet *);
 static void	emac_init(void *);
 static void	emac_stop_locked(struct emac_softc *);
 static void	emac_intr(void *);
 static int	emac_ioctl(struct ifnet *, u_long, caddr_t);
 
 static void	emac_rxeof(struct emac_softc *, int);
-static void	emac_txeof(struct emac_softc *);
+static void	emac_txeof(struct emac_softc *, uint32_t);
 
 static int	emac_miibus_readreg(device_t, int, int);
 static int	emac_miibus_writereg(device_t, int, int, int);
 static void	emac_miibus_statchg(device_t);
 
 static int	emac_ifmedia_upd(struct ifnet *);
 static void	emac_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 
 static int	sysctl_int_range(SYSCTL_HANDLER_ARGS, int, int);
 static int	sysctl_hw_emac_proc_limit(SYSCTL_HANDLER_ARGS);
 
 #define	EMAC_READ_REG(sc, reg)		\
     bus_space_read_4(sc->emac_tag, sc->emac_handle, reg)
 #define	EMAC_WRITE_REG(sc, reg, val)	\
     bus_space_write_4(sc->emac_tag, sc->emac_handle, reg, val)
 
 static void
 emac_sys_setup(void)
 {
 	int i;
 
 	a10_clk_emac_activate();
 
 	/*
 	 * Configure pin mux settings for MII.
 	 * Pins PA0 from PA17.
 	 */
 	for (i = 0; i <= 17; i++)
 		a10_emac_gpio_config(i);
 	/* Map sram */
 	a10_map_to_emac();
 }
 
 static void
 emac_get_hwaddr(struct emac_softc *sc, uint8_t *hwaddr)
 {
 	uint32_t val0, val1, rnd;
 
 	/*
 	 * Try to get MAC address from running hardware.
 	 * If there is something non-zero there just use it.
 	 *
 	 * Otherwise set the address to a convenient locally assigned address,
 	 * 'bsd' + random 24 low-order bits. 'b' is 0x62, which has the locally
 	 * assigned bit set, and the broadcast/multicast bit clear.
 	 */
 	val0 = EMAC_READ_REG(sc, EMAC_MAC_A0);
 	val1 = EMAC_READ_REG(sc, EMAC_MAC_A1);
 	if ((val0 | val1) != 0 && (val0 | val1) != 0xffffff) {
 		hwaddr[0] = (val1 >> 16) & 0xff;
 		hwaddr[1] = (val1 >> 8) & 0xff;
 		hwaddr[2] = (val1 >> 0) & 0xff;
 		hwaddr[3] = (val0 >> 16) & 0xff;
 		hwaddr[4] = (val0 >> 8) & 0xff;
 		hwaddr[5] = (val0 >> 0) & 0xff;
 	} else {
 		rnd = arc4random() & 0x00ffffff;
 		hwaddr[0] = 'b';
 		hwaddr[1] = 's';
 		hwaddr[2] = 'd';
 		hwaddr[3] = (rnd >> 16) & 0xff;
 		hwaddr[4] = (rnd >> 8) & 0xff;
 		hwaddr[5] = (rnd >> 0) & 0xff;
 	}
 	if (bootverbose)
 		printf("MAC address: %s\n", ether_sprintf(hwaddr));
 }
 
 static void
 emac_set_rx_mode(struct emac_softc *sc)
 {
 	struct ifnet *ifp;
 	struct ifmultiaddr *ifma;
 	uint32_t h, hashes[2];
 	uint32_t rcr = 0;
 
 	EMAC_ASSERT_LOCKED(sc);
 
 	ifp = sc->emac_ifp;
 
 	rcr = EMAC_READ_REG(sc, EMAC_RX_CTL);
 
 	/* Unicast packet and DA filtering */
 	rcr |= EMAC_RX_UCAD;
 	rcr |= EMAC_RX_DAF;
 
 	hashes[0] = 0;
 	hashes[1] = 0;
 	if (ifp->if_flags & IFF_ALLMULTI) {
 		hashes[0] = 0xffffffff;
 		hashes[1] = 0xffffffff;
 	} else {
 		if_maddr_rlock(ifp);
 		TAILQ_FOREACH(ifma, &sc->emac_ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK)
 				continue;
 			h = ether_crc32_be(LLADDR((struct sockaddr_dl *)
 			    ifma->ifma_addr), ETHER_ADDR_LEN) >> 26;
 			hashes[h >> 5] |= 1 << (h & 0x1f);
 		}
 		if_maddr_runlock(ifp);
 	}
 	rcr |= EMAC_RX_MCO;
 	rcr |= EMAC_RX_MHF;
 	EMAC_WRITE_REG(sc, EMAC_RX_HASH0, hashes[0]);
 	EMAC_WRITE_REG(sc, EMAC_RX_HASH1, hashes[1]);
 
 	if (ifp->if_flags & IFF_BROADCAST) {
 		rcr |= EMAC_RX_BCO;
 		rcr |= EMAC_RX_MCO;
 	}
 
 	if (ifp->if_flags & IFF_PROMISC)
 		rcr |= EMAC_RX_PA;
 	else
 		rcr |= EMAC_RX_UCAD;
 
 	EMAC_WRITE_REG(sc, EMAC_RX_CTL, rcr);
 }
 
 static void
 emac_reset(struct emac_softc *sc)
 {
 
 	EMAC_WRITE_REG(sc, EMAC_CTL, 0);
 	DELAY(200);
 	EMAC_WRITE_REG(sc, EMAC_CTL, 1);
 	DELAY(200);
 }
 
 static void
-emac_txeof(struct emac_softc *sc)
+emac_drain_rxfifo(struct emac_softc *sc)
 {
+	uint32_t data;
+
+	while (EMAC_READ_REG(sc, EMAC_RX_FBC) > 0)
+		data = EMAC_READ_REG(sc, EMAC_RX_IO_DATA);
+}
+
+static void
+emac_txeof(struct emac_softc *sc, uint32_t status)
+{
 	struct ifnet *ifp;
 
 	EMAC_ASSERT_LOCKED(sc);
 
 	ifp = sc->emac_ifp;
-	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+	status &= (EMAC_TX_FIFO0 | EMAC_TX_FIFO1);
+	sc->emac_fifo_mask &= ~status;
+	if (status == (EMAC_TX_FIFO0 | EMAC_TX_FIFO1))
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 2);
+	else
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	/* Unarm watchdog timer if no TX */
 	sc->emac_watchdog_timer = 0;
 }
 
 static void
 emac_rxeof(struct emac_softc *sc, int count)
 {
 	struct ifnet *ifp;
 	struct mbuf *m, *m0;
 	uint32_t reg_val, rxcount;
 	int16_t len;
 	uint16_t status;
-	int good_packet, i;
+	int i;
 
 	ifp = sc->emac_ifp;
 	for (; count > 0 &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0; count--) {
 		/*
 		 * Race warning: The first packet might arrive with
 		 * the interrupts disabled, but the second will fix
 		 */
 		rxcount = EMAC_READ_REG(sc, EMAC_RX_FBC);
 		if (!rxcount) {
 			/* Had one stuck? */
 			rxcount = EMAC_READ_REG(sc, EMAC_RX_FBC);
 			if (!rxcount)
 				return;
 		}
 		/* Check packet header */
 		reg_val = EMAC_READ_REG(sc, EMAC_RX_IO_DATA);
 		if (reg_val != EMAC_PACKET_HEADER) {
 			/* Packet header is wrong */
 			if (bootverbose)
 				if_printf(ifp, "wrong packet header\n");
 			/* Disable RX */
 			reg_val = EMAC_READ_REG(sc, EMAC_CTL);
 			reg_val &= ~EMAC_CTL_RX_EN;
 			EMAC_WRITE_REG(sc, EMAC_CTL, reg_val);
 
 			/* Flush RX FIFO */
 			reg_val = EMAC_READ_REG(sc, EMAC_RX_CTL);
 			reg_val |= EMAC_RX_FLUSH_FIFO;
 			EMAC_WRITE_REG(sc, EMAC_RX_CTL, reg_val);
 			for (i = 100; i > 0; i--) {
 				DELAY(100);
 				if ((EMAC_READ_REG(sc, EMAC_RX_CTL) &
 				    EMAC_RX_FLUSH_FIFO) == 0)
 					break;
 			}
 			if (i == 0) {
 				device_printf(sc->emac_dev,
 				    "flush FIFO timeout\n");
 				/* Reinitialize controller */
 				emac_init_locked(sc);
 				return;
 			}
 			/* Enable RX */
 			reg_val = EMAC_READ_REG(sc, EMAC_CTL);
 			reg_val |= EMAC_CTL_RX_EN;
 			EMAC_WRITE_REG(sc, EMAC_CTL, reg_val);
 
 			return;
 		}
 
-		good_packet = 1;
-
 		/* Get packet size and status */
 		reg_val = EMAC_READ_REG(sc, EMAC_RX_IO_DATA);
 		len = reg_val & 0xffff;
 		status = (reg_val >> 16) & 0xffff;
 
-		if (len < 64) {
-			good_packet = 0;
+		if (len < 64 || (status & EMAC_PKT_OK) == 0) {
 			if (bootverbose)
 				if_printf(ifp,
 				    "bad packet: len = %i status = %i\n",
 				    len, status);
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+			emac_drain_rxfifo(sc);
+			continue;
 		}
 #if 0
 		if (status & (EMAC_CRCERR | EMAC_LENERR)) {
 			good_packet = 0;
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			if (status & EMAC_CRCERR)
 				if_printf(ifp, "crc error\n");
 			if (status & EMAC_LENERR)
 				if_printf(ifp, "length error\n");
 		}
 #endif
-		if (good_packet) {
-			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
-			if (m == NULL)
-				return;
-			m->m_len = m->m_pkthdr.len = MCLBYTES;
+		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+		if (m == NULL) {
+			emac_drain_rxfifo(sc);
+			return;
+		}
+		m->m_len = m->m_pkthdr.len = MCLBYTES;
 
-			len -= ETHER_CRC_LEN;
+		/* Copy entire frame to mbuf first. */
+		bus_space_read_multi_4(sc->emac_tag, sc->emac_handle,
+		    EMAC_RX_IO_DATA, mtod(m, uint32_t *), roundup2(len, 4) / 4);
 
-			/* Copy entire frame to mbuf first. */
-			bus_space_read_multi_4(sc->emac_tag, sc->emac_handle,
-			    EMAC_RX_IO_DATA, mtod(m, uint32_t *),
-			    roundup2(len, 4) / 4);
+		m->m_pkthdr.rcvif = ifp;
+		m->m_len = m->m_pkthdr.len = len - ETHER_CRC_LEN;
 
-			m->m_pkthdr.rcvif = ifp;
-			m->m_len = m->m_pkthdr.len = len;
-
-			/*
-			 * Emac controller needs strict aligment, so to avoid
-			 * copying over an entire frame to align, we allocate
-			 * a new mbuf and copy ethernet header + IP header to
-			 * the new mbuf. The new mbuf is prepended into the
-			 * existing mbuf chain.
-			 */
-			if (m->m_len <= (MHLEN - ETHER_HDR_LEN)) {
-				bcopy(m->m_data, m->m_data + ETHER_HDR_LEN,
-				    m->m_len);
-				m->m_data += ETHER_HDR_LEN;
-			} else if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN) &&
-			    m->m_len > (MHLEN - ETHER_HDR_LEN)) {
-				MGETHDR(m0, M_NOWAIT, MT_DATA);
-				if (m0 != NULL) {
-					len = ETHER_HDR_LEN +
-					    m->m_pkthdr.l2hlen;
-					bcopy(m->m_data, m0->m_data, len);
-					m->m_data += len;
-					m->m_len -= len;
-					m0->m_len = len;
-					M_MOVE_PKTHDR(m0, m);
-					m0->m_next = m;
-					m = m0;
-				} else {
-					if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
-					m_freem(m);
-					m = NULL;
-					continue;
-				}
-			} else if (m->m_len > EMAC_MAC_MAXF) {
+		/*
+		 * Emac controller needs strict aligment, so to avoid
+		 * copying over an entire frame to align, we allocate
+		 * a new mbuf and copy ethernet header + IP header to
+		 * the new mbuf. The new mbuf is prepended into the
+		 * existing mbuf chain.
+		 */
+		if (m->m_len <= (MHLEN - ETHER_HDR_LEN)) {
+			bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
+			m->m_data += ETHER_HDR_LEN;
+		} else if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN) &&
+		    m->m_len > (MHLEN - ETHER_HDR_LEN)) {
+			MGETHDR(m0, M_NOWAIT, MT_DATA);
+			if (m0 != NULL) {
+				len = ETHER_HDR_LEN + m->m_pkthdr.l2hlen;
+				bcopy(m->m_data, m0->m_data, len);
+				m->m_data += len;
+				m->m_len -= len;
+				m0->m_len = len;
+				M_MOVE_PKTHDR(m0, m);
+				m0->m_next = m;
+				m = m0;
+			} else {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				m_freem(m);
 				m = NULL;
 				continue;
 			}
-			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
-			EMAC_UNLOCK(sc);
-			(*ifp->if_input)(ifp, m);
-			EMAC_LOCK(sc);
+		} else if (m->m_len > EMAC_MAC_MAXF) {
+			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+			m_freem(m);
+			m = NULL;
+			continue;
 		}
+		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+		EMAC_UNLOCK(sc);
+		(*ifp->if_input)(ifp, m);
+		EMAC_LOCK(sc);
 	}
 }
 
 static void
 emac_watchdog(struct emac_softc *sc)
 {
 	struct ifnet *ifp;
 
 	EMAC_ASSERT_LOCKED(sc);
 
 	if (sc->emac_watchdog_timer == 0 || --sc->emac_watchdog_timer)
 		return;
 
 	ifp = sc->emac_ifp;
 
 	if (sc->emac_link == 0) {
 		if (bootverbose)
 			if_printf(sc->emac_ifp, "watchdog timeout "
 			    "(missed link)\n");
 	} else
 		if_printf(sc->emac_ifp, "watchdog timeout -- resetting\n");
 	
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	emac_init_locked(sc);
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		emac_start_locked(ifp);
 }
 
 static void
 emac_tick(void *arg)
 {
 	struct emac_softc *sc;
 	struct mii_data *mii;
 
 	sc = (struct emac_softc *)arg;
 	mii = device_get_softc(sc->emac_miibus);
 	mii_tick(mii);
 
 	emac_watchdog(sc);
 	callout_reset(&sc->emac_tick_ch, hz, emac_tick, sc);
 }
 
 static void
 emac_init(void *xcs)
 {
 	struct emac_softc *sc;
 
 	sc = (struct emac_softc *)xcs;
 	EMAC_LOCK(sc);
 	emac_init_locked(sc);
 	EMAC_UNLOCK(sc);
 }
 
 static void
 emac_init_locked(struct emac_softc *sc)
 {
 	struct ifnet *ifp;
 	struct mii_data *mii;
 	uint32_t reg_val;
 	uint8_t *eaddr;
 
 	EMAC_ASSERT_LOCKED(sc);
 
 	ifp = sc->emac_ifp;
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 		return;
 
 	/* Flush RX FIFO */
 	reg_val = EMAC_READ_REG(sc, EMAC_RX_CTL);
 	reg_val |= EMAC_RX_FLUSH_FIFO;
 	EMAC_WRITE_REG(sc, EMAC_RX_CTL, reg_val);
 	DELAY(1);
 
 	/* Soft reset MAC */
 	reg_val = EMAC_READ_REG(sc, EMAC_MAC_CTL0);
 	reg_val &= (~EMAC_MAC_CTL0_SOFT_RST);
 	EMAC_WRITE_REG(sc, EMAC_MAC_CTL0, reg_val);
 
 	/* Set MII clock */
 	reg_val = EMAC_READ_REG(sc, EMAC_MAC_MCFG);
 	reg_val &= (~(0xf << 2));
 	reg_val |= (0xd << 2);
 	EMAC_WRITE_REG(sc, EMAC_MAC_MCFG, reg_val);
 
 	/* Clear RX counter */
 	EMAC_WRITE_REG(sc, EMAC_RX_FBC, 0);
 
 	/* Disable all interrupt and clear interrupt status */
 	EMAC_WRITE_REG(sc, EMAC_INT_CTL, 0);
 	reg_val = EMAC_READ_REG(sc, EMAC_INT_STA);
 	EMAC_WRITE_REG(sc, EMAC_INT_STA, reg_val);
 	DELAY(1);
 
 	/* Set up TX */
 	reg_val = EMAC_READ_REG(sc, EMAC_TX_MODE);
 	reg_val |= EMAC_TX_AB_M;
 	reg_val &= EMAC_TX_TM;
 	EMAC_WRITE_REG(sc, EMAC_TX_MODE, reg_val);
 
 	/* Set up RX */
 	reg_val = EMAC_READ_REG(sc, EMAC_RX_CTL);
 	reg_val |= EMAC_RX_SETUP;
 	reg_val &= EMAC_RX_TM;
 	EMAC_WRITE_REG(sc, EMAC_RX_CTL, reg_val);
 
 	/* Set up MAC CTL0. */
 	reg_val = EMAC_READ_REG(sc, EMAC_MAC_CTL0);
 	reg_val |= EMAC_MAC_CTL0_SETUP;
 	EMAC_WRITE_REG(sc, EMAC_MAC_CTL0, reg_val);
 
 	/* Set up MAC CTL1. */
 	reg_val = EMAC_READ_REG(sc, EMAC_MAC_CTL1);
 	reg_val |= EMAC_MAC_CTL1_SETUP;
 	EMAC_WRITE_REG(sc, EMAC_MAC_CTL1, reg_val);
 
 	/* Set up IPGT */
 	EMAC_WRITE_REG(sc, EMAC_MAC_IPGT, EMAC_MAC_IPGT_FD);
 
 	/* Set up IPGR */
 	EMAC_WRITE_REG(sc, EMAC_MAC_IPGR, EMAC_MAC_NBTB_IPG2 |
 	    (EMAC_MAC_NBTB_IPG1 << 8));
 
 	/* Set up Collison window */
 	EMAC_WRITE_REG(sc, EMAC_MAC_CLRT, EMAC_MAC_RM | (EMAC_MAC_CW << 8));
 
 	/* Set up Max Frame Length */
 	EMAC_WRITE_REG(sc, EMAC_MAC_MAXF, EMAC_MAC_MFL);
 
 	/* Setup ethernet address */
 	eaddr = IF_LLADDR(ifp);
 	EMAC_WRITE_REG(sc, EMAC_MAC_A1, eaddr[0] << 16 |
 	    eaddr[1] << 8 | eaddr[2]);
 	EMAC_WRITE_REG(sc, EMAC_MAC_A0, eaddr[3] << 16 |
 	    eaddr[4] << 8 | eaddr[5]);
 
 	/* Setup rx filter */
 	emac_set_rx_mode(sc);
 
 	/* Enable RX/TX0/RX Hlevel interrupt */
 	reg_val = EMAC_READ_REG(sc, EMAC_INT_CTL);
 	reg_val |= EMAC_INT_EN;
 	EMAC_WRITE_REG(sc, EMAC_INT_CTL, reg_val);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	sc->emac_link = 0;
 
 	/* Switch to the current media. */
 	mii = device_get_softc(sc->emac_miibus);
 	mii_mediachg(mii);
 
 	callout_reset(&sc->emac_tick_ch, hz, emac_tick, sc);
 }
 
 
 static void
 emac_start(struct ifnet *ifp)
 {
 	struct emac_softc *sc;
 
 	sc = ifp->if_softc;
 	EMAC_LOCK(sc);
 	emac_start_locked(ifp);
 	EMAC_UNLOCK(sc);
 }
 
 static void
 emac_start_locked(struct ifnet *ifp)
 {
 	struct emac_softc *sc;
 	struct mbuf *m, *m0;
-	uint32_t reg_val;
+	uint32_t fifo, reg;
 
 	sc = ifp->if_softc;
 	if (ifp->if_drv_flags & IFF_DRV_OACTIVE)
 		return;
+	if (sc->emac_fifo_mask == (EMAC_TX_FIFO0 | EMAC_TX_FIFO1))
+		return;
 	if (sc->emac_link == 0)
 		return;
 	IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
 	if (m == NULL)
 		return;
 
 	/* Select channel */
-	EMAC_WRITE_REG(sc, EMAC_TX_INS, 0);
+	if (sc->emac_fifo_mask & EMAC_TX_FIFO0)
+		fifo = 1;
+	else
+		fifo = 0;
+	sc->emac_fifo_mask |= (1 << fifo);
+	if (sc->emac_fifo_mask == (EMAC_TX_FIFO0 | EMAC_TX_FIFO1))
+		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+	EMAC_WRITE_REG(sc, EMAC_TX_INS, fifo);
 
 	/*
 	 * Emac controller wants 4 byte aligned TX buffers.
 	 * We have to copy pretty much all the time.
 	 */
 	if (m->m_next != NULL || (mtod(m, uintptr_t) & 3) != 0) {
 		m0 = m_defrag(m, M_NOWAIT);
 		if (m0 == NULL) {
 			m_freem(m);
 			m = NULL;
 			return;
 		}
 		m = m0;
 	}
 	/* Write data */
 	bus_space_write_multi_4(sc->emac_tag, sc->emac_handle,
 	    EMAC_TX_IO_DATA, mtod(m, uint32_t *),
 	    roundup2(m->m_len, 4) / 4);
 
 	/* Send the data lengh. */
-	EMAC_WRITE_REG(sc, EMAC_TX_PL0, m->m_len);
+	reg = (fifo == 0) ? EMAC_TX_PL0 : EMAC_TX_PL1;
+	EMAC_WRITE_REG(sc, reg, m->m_len);
 
 	/* Start translate from fifo to phy. */
-	reg_val = EMAC_READ_REG(sc, EMAC_TX_CTL0);
-	reg_val |= 1;
-	EMAC_WRITE_REG(sc, EMAC_TX_CTL0, reg_val);
+	reg = (fifo == 0) ? EMAC_TX_CTL0 : EMAC_TX_CTL1;
+	EMAC_WRITE_REG(sc, reg, EMAC_READ_REG(sc, reg) | 1);
 
 	/* Set timeout */
 	sc->emac_watchdog_timer = 5;
 
-	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+	/* Data have been sent to hardware, it is okay to free the mbuf now. */
 	BPF_MTAP(ifp, m);
 	m_freem(m);
 }
 
 static void
 emac_stop_locked(struct emac_softc *sc)
 {
 	struct ifnet *ifp;
 	uint32_t reg_val;
 
 	EMAC_ASSERT_LOCKED(sc);
 
 	ifp = sc->emac_ifp;
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	sc->emac_link = 0;
 
 	/* Disable all interrupt and clear interrupt status */
 	EMAC_WRITE_REG(sc, EMAC_INT_CTL, 0);
 	reg_val = EMAC_READ_REG(sc, EMAC_INT_STA);
 	EMAC_WRITE_REG(sc, EMAC_INT_STA, reg_val);
 
 	/* Disable RX/TX */
 	reg_val = EMAC_READ_REG(sc, EMAC_CTL);
 	reg_val &= ~(EMAC_CTL_RST | EMAC_CTL_TX_EN | EMAC_CTL_RX_EN);
 	EMAC_WRITE_REG(sc, EMAC_CTL, reg_val);
 
 	callout_stop(&sc->emac_tick_ch);
 }
 
 static void
 emac_intr(void *arg)
 {
 	struct emac_softc *sc;
 	struct ifnet *ifp;
 	uint32_t reg_val;
 
 	sc = (struct emac_softc *)arg;
 	EMAC_LOCK(sc);
-	ifp = sc->emac_ifp;
-	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
-		return;
 
 	/* Disable all interrupts */
 	EMAC_WRITE_REG(sc, EMAC_INT_CTL, 0);
 	/* Get EMAC interrupt status */
 	reg_val = EMAC_READ_REG(sc, EMAC_INT_STA);
 	/* Clear ISR status */
 	EMAC_WRITE_REG(sc, EMAC_INT_STA, reg_val);
 
 	/* Received incoming packet */
 	if (reg_val & EMAC_INT_STA_RX)
 		emac_rxeof(sc, sc->emac_rx_process_limit);
 
 	/* Transmit Interrupt check */
-	if (reg_val & EMAC_INT_STA_TX){
-		emac_txeof(sc);
+	if (reg_val & EMAC_INT_STA_TX) {
+		emac_txeof(sc, reg_val);
+		ifp = sc->emac_ifp;
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 			emac_start_locked(ifp);
 	}
 
-	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
-		/* Re-enable interrupt mask */
-		reg_val = EMAC_READ_REG(sc, EMAC_INT_CTL);
-		reg_val |= EMAC_INT_EN;
-		EMAC_WRITE_REG(sc, EMAC_INT_CTL, reg_val);
-	}
+	/* Re-enable interrupt mask */
+	reg_val = EMAC_READ_REG(sc, EMAC_INT_CTL);
+	reg_val |= EMAC_INT_EN;
+	EMAC_WRITE_REG(sc, EMAC_INT_CTL, reg_val);
 	EMAC_UNLOCK(sc);
 }
 
 static int
 emac_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct emac_softc *sc;
 	struct mii_data *mii;
 	struct ifreq *ifr;
 	int error = 0;
 
 	sc = ifp->if_softc;
 	ifr = (struct ifreq *)data;
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		EMAC_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
 				if ((ifp->if_flags ^ sc->emac_if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI))
 					emac_set_rx_mode(sc);
 			} else
 				emac_init_locked(sc);
 		} else {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 				emac_stop_locked(sc);
 		}
 		sc->emac_if_flags = ifp->if_flags;
 		EMAC_UNLOCK(sc);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		EMAC_LOCK(sc);
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			emac_set_rx_mode(sc);
 		}
 		EMAC_UNLOCK(sc);
 		break;
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		mii = device_get_softc(sc->emac_miibus);
 		error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command);
 		break;
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 	return (error);
 }
 
 static int
 emac_probe(device_t dev)
 {
 
 	if (!ofw_bus_is_compatible(dev, "allwinner,sun4i-emac"))
 		return (ENXIO);
 
 	device_set_desc(dev, "A10/A20 EMAC ethernet controller");
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 emac_detach(device_t dev)
 {
 	struct emac_softc *sc;
 
 	sc = device_get_softc(dev);
 	sc->emac_ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	if (device_is_attached(dev)) {
 		ether_ifdetach(sc->emac_ifp);
 		EMAC_LOCK(sc);
 		emac_stop_locked(sc);
 		EMAC_UNLOCK(sc);
 		callout_drain(&sc->emac_tick_ch);
 	}
 
 	if (sc->emac_intrhand != NULL)
 		bus_teardown_intr(sc->emac_dev, sc->emac_irq,
 		    sc->emac_intrhand);
 
 	if (sc->emac_miibus != NULL) {
 		device_delete_child(sc->emac_dev, sc->emac_miibus);
 		bus_generic_detach(sc->emac_dev);
 	}
 
 	if (sc->emac_res != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->emac_res);
 
 	if (sc->emac_irq != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ, 0, sc->emac_irq);
 
 	if (sc->emac_ifp != NULL)
 		if_free(sc->emac_ifp);
 
 	if (mtx_initialized(&sc->emac_mtx))
 		mtx_destroy(&sc->emac_mtx);
 
 	return (0);
 }
 
 static int
 emac_shutdown(device_t dev)
 {
 
 	return (emac_suspend(dev));
 }
 
 static int
 emac_suspend(device_t dev)
 {
 	struct emac_softc *sc;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 
 	EMAC_LOCK(sc);
 	ifp = sc->emac_ifp;
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0)
 		emac_stop_locked(sc);
 	EMAC_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 emac_resume(device_t dev)
 {
 	struct emac_softc *sc;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 
 	EMAC_LOCK(sc);
 	ifp = sc->emac_ifp;
 	if ((ifp->if_flags & IFF_UP) != 0) {
 		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 		emac_init_locked(sc);
 	}
 	EMAC_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 emac_attach(device_t dev)
 {
 	struct emac_softc *sc;
 	struct ifnet *ifp;
 	int error, rid;
 	uint8_t eaddr[ETHER_ADDR_LEN];
 
 	sc = device_get_softc(dev);
 	sc->emac_dev = dev;
 
 	error = 0;
 	mtx_init(&sc->emac_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 	callout_init_mtx(&sc->emac_tick_ch, &sc->emac_mtx, 0);
 
 	rid = 0;
 	sc->emac_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->emac_res == NULL) {
 		device_printf(dev, "unable to map memory\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	sc->emac_tag = rman_get_bustag(sc->emac_res);
 	sc->emac_handle = rman_get_bushandle(sc->emac_res);
 
 	rid = 0;
 	sc->emac_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 	    RF_SHAREABLE | RF_ACTIVE);
 	if (sc->emac_irq == NULL) {
 		device_printf(dev, "cannot allocate IRQ resources.\n");
 		error = ENXIO;
 		goto fail;
 	}
 	/* Create device sysctl node. */
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "process_limit", CTLTYPE_INT | CTLFLAG_RW,
 	    &sc->emac_rx_process_limit, 0, sysctl_hw_emac_proc_limit, "I",
 	    "max number of Rx events to process");
 
 	sc->emac_rx_process_limit = EMAC_PROC_DEFAULT;
 	error = resource_int_value(device_get_name(dev), device_get_unit(dev),
 	    "process_limit", &sc->emac_rx_process_limit);
 	if (error == 0) {
 		if (sc->emac_rx_process_limit < EMAC_PROC_MIN ||
 		    sc->emac_rx_process_limit > EMAC_PROC_MAX) {
 			device_printf(dev, "process_limit value out of range; "
 			    "using default: %d\n", EMAC_PROC_DEFAULT);
 			sc->emac_rx_process_limit = EMAC_PROC_DEFAULT;
 		}
 	}
 	/* Setup EMAC */
 	emac_sys_setup();
 	emac_reset(sc);
 
 	ifp = sc->emac_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "unable to allocate ifp\n");
 		error = ENOSPC;
 		goto fail;
 	}
 	ifp->if_softc = sc;
 
 	/* Setup MII */
 	error = mii_attach(dev, &sc->emac_miibus, ifp, emac_ifmedia_upd,
 	    emac_ifmedia_sts, BMSR_DEFCAPMASK, MII_PHY_ANY, MII_OFFSET_ANY, 0);
 	if (error != 0) {
 		device_printf(dev, "PHY probe failed\n");
 		goto fail;
 	}
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_start = emac_start;
 	ifp->if_ioctl = emac_ioctl;
 	ifp->if_init = emac_init;
 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
 
 	/* Get MAC address */
 	emac_get_hwaddr(sc, eaddr);
 	ether_ifattach(ifp, eaddr);
 
 	/* VLAN capability setup. */
 	ifp->if_capabilities |= IFCAP_VLAN_MTU;
 	ifp->if_capenable = ifp->if_capabilities;
 	/* Tell the upper layer we support VLAN over-sized frames. */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 
 	error = bus_setup_intr(dev, sc->emac_irq, INTR_TYPE_NET | INTR_MPSAFE,
 	    NULL, emac_intr, sc, &sc->emac_intrhand);
 	if (error != 0) {
 		device_printf(dev, "could not set up interrupt handler.\n");
 		ether_ifdetach(ifp);
 		goto fail;
 	}
 
 fail:
 	if (error != 0)
 		emac_detach(dev);
 	return (error);
 }
 
 static boolean_t
 emac_miibus_iowait(struct emac_softc *sc)
 {
 	uint32_t timeout;
 
 	for (timeout = 100; timeout != 0; --timeout) {
 		DELAY(100);
 		if ((EMAC_READ_REG(sc, EMAC_MAC_MIND) & 0x1) == 0)
 			return (true);
 	}
 
 	return (false);
 }
 
 /*
  * The MII bus interface
  */
 static int
 emac_miibus_readreg(device_t dev, int phy, int reg)
 {
 	struct emac_softc *sc;
 	int rval;
 
 	sc = device_get_softc(dev);
 
 	/* Issue phy address and reg */
 	EMAC_WRITE_REG(sc, EMAC_MAC_MADR, (phy << 8) | reg);
 	/* Pull up the phy io line */
 	EMAC_WRITE_REG(sc, EMAC_MAC_MCMD, 0x1);
 	if (!emac_miibus_iowait(sc)) {
 		device_printf(dev, "timeout waiting for mii read\n");
 		return (0);
 	}
 	/* Push down the phy io line */
 	EMAC_WRITE_REG(sc, EMAC_MAC_MCMD, 0x0);
 	/* Read data */
 	rval = EMAC_READ_REG(sc, EMAC_MAC_MRDD);
 
 	return (rval);
 }
 
 static int
 emac_miibus_writereg(device_t dev, int phy, int reg, int data)
 {
 	struct emac_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	/* Issue phy address and reg */
 	EMAC_WRITE_REG(sc, EMAC_MAC_MADR, (phy << 8) | reg);
 	/* Write data */
 	EMAC_WRITE_REG(sc, EMAC_MAC_MWTD, data);
 	/* Pull up the phy io line */
 	EMAC_WRITE_REG(sc, EMAC_MAC_MCMD, 0x1);
 	if (!emac_miibus_iowait(sc)) {
 		device_printf(dev, "timeout waiting for mii write\n");
 		return (0);
 	}
 	/* Push down the phy io line */
 	EMAC_WRITE_REG(sc, EMAC_MAC_MCMD, 0x0);
 
 	return (0);
 }
 
 static void
 emac_miibus_statchg(device_t dev)
 {
 	struct emac_softc *sc;
 	struct mii_data *mii;
 	struct ifnet *ifp;
 	uint32_t reg_val;
 
 	sc = device_get_softc(dev);
 
 	mii = device_get_softc(sc->emac_miibus);
 	ifp = sc->emac_ifp;
 	if (mii == NULL || ifp == NULL ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	sc->emac_link = 0;
 	if ((mii->mii_media_status & (IFM_ACTIVE | IFM_AVALID)) ==
 	    (IFM_ACTIVE | IFM_AVALID)) {
 		switch (IFM_SUBTYPE(mii->mii_media_active)) {
 		case IFM_10_T:
 		case IFM_100_TX:
 			sc->emac_link = 1;
 			break;
 		default:
 			break;
 		}
 	}
 	/* Program MACs with resolved speed/duplex. */
 	if (sc->emac_link != 0) {
 		reg_val = EMAC_READ_REG(sc, EMAC_MAC_IPGT);
 		if ((IFM_OPTIONS(mii->mii_media_active) & IFM_FDX) != 0) {
 			reg_val &= ~EMAC_MAC_IPGT_HD;
 			reg_val |= EMAC_MAC_IPGT_FD;
 		} else {
 			reg_val &= ~EMAC_MAC_IPGT_FD;
 			reg_val |= EMAC_MAC_IPGT_HD;
 		}
 		EMAC_WRITE_REG(sc, EMAC_MAC_IPGT, reg_val);
 		/* Enable RX/TX */
 		reg_val = EMAC_READ_REG(sc, EMAC_CTL);
 		reg_val |= EMAC_CTL_RST | EMAC_CTL_TX_EN | EMAC_CTL_RX_EN;
 		EMAC_WRITE_REG(sc, EMAC_CTL, reg_val);
 	} else {
 		/* Disable RX/TX */
 		reg_val = EMAC_READ_REG(sc, EMAC_CTL);
 		reg_val &= ~(EMAC_CTL_RST | EMAC_CTL_TX_EN | EMAC_CTL_RX_EN);
 		EMAC_WRITE_REG(sc, EMAC_CTL, reg_val);
 	}
 }
 
 static int
 emac_ifmedia_upd(struct ifnet *ifp)
 {
 	struct emac_softc *sc;
 	struct mii_data *mii;
 	struct mii_softc *miisc;
 	int error;
 
 	sc = ifp->if_softc;
 	mii = device_get_softc(sc->emac_miibus);
 	EMAC_LOCK(sc);
 	LIST_FOREACH(miisc, &mii->mii_phys, mii_list)
 		PHY_RESET(miisc);
 	error = mii_mediachg(mii);
 	EMAC_UNLOCK(sc);
 
 	return (error);
 }
 
 static void
 emac_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct emac_softc *sc;
 	struct mii_data *mii;
 
 	sc = ifp->if_softc;
 	mii = device_get_softc(sc->emac_miibus);
 
 	EMAC_LOCK(sc);
 	mii_pollstat(mii);
 	ifmr->ifm_active = mii->mii_media_active;
 	ifmr->ifm_status = mii->mii_media_status;
 	EMAC_UNLOCK(sc);
 }
 
 static device_method_t emac_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		emac_probe),
 	DEVMETHOD(device_attach,	emac_attach),
 	DEVMETHOD(device_detach,	emac_detach),
 	DEVMETHOD(device_shutdown,	emac_shutdown),
 	DEVMETHOD(device_suspend,	emac_suspend),
 	DEVMETHOD(device_resume,	emac_resume),
 
 	/* bus interface, for miibus */
 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
 
 	/* MII interface */
 	DEVMETHOD(miibus_readreg,	emac_miibus_readreg),
 	DEVMETHOD(miibus_writereg,	emac_miibus_writereg),
 	DEVMETHOD(miibus_statchg,	emac_miibus_statchg),
 
 	DEVMETHOD_END
 };
 
 static driver_t emac_driver = {
 	"emac",
 	emac_methods,
 	sizeof(struct emac_softc)
 };
 
 static devclass_t emac_devclass;
 
 DRIVER_MODULE(emac, simplebus, emac_driver, emac_devclass, 0, 0);
 DRIVER_MODULE(miibus, emac, miibus_driver, miibus_devclass, 0, 0);
 MODULE_DEPEND(emac, miibus, 1, 1, 1);
 MODULE_DEPEND(emac, ether, 1, 1, 1);
 
 static int
 sysctl_int_range(SYSCTL_HANDLER_ARGS, int low, int high)
 {
 	int error, value;
 
 	if (arg1 == NULL)
 		return (EINVAL);
 	value = *(int *)arg1;
 	error = sysctl_handle_int(oidp, &value, 0, req);
 	if (error || req->newptr == NULL)
 		return (error);
 	if (value < low || value > high)
 		return (EINVAL);
 	*(int *)arg1 = value;
 
 	return (0);
 }
 
 static int
 sysctl_hw_emac_proc_limit(SYSCTL_HANDLER_ARGS)
 {
 
 	return (sysctl_int_range(oidp, arg1, arg2, req,
 	    EMAC_PROC_MIN, EMAC_PROC_MAX));
 }
Index: user/ngie/more-tests/sys/arm/allwinner/if_emacreg.h
===================================================================
--- user/ngie/more-tests/sys/arm/allwinner/if_emacreg.h	(revision 281675)
+++ user/ngie/more-tests/sys/arm/allwinner/if_emacreg.h	(revision 281676)
@@ -1,239 +1,242 @@
 /*
  * Copyright (C) 2013 Ganbold Tsagaankhuu <ganbold@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	__IF_EMACREG_H__
 #define	__IF_EMACREG_H__
 
 /*
  * EMAC register definitions
  */
 #define	EMAC_CTL		0x00
 #define	EMAC_CTL_RST		(1 << 0)
 #define	EMAC_CTL_TX_EN		(1 << 1)
 #define	EMAC_CTL_RX_EN		(1 << 2)
 
 #define	EMAC_TX_MODE		0x04
 #define	EMAC_TX_FLOW		0x08
 #define	EMAC_TX_CTL0		0x0C
 #define	EMAC_TX_CTL1		0x10
 #define	EMAC_TX_INS		0x14
 #define	EMAC_TX_PL0		0x18
 #define	EMAC_TX_PL1		0x1C
 #define	EMAC_TX_STA		0x20
 #define	EMAC_TX_IO_DATA		0x24
 #define	EMAC_TX_IO_DATA1	0x28
 #define	EMAC_TX_TSVL0		0x2C
 #define	EMAC_TX_TSVH0		0x30
 #define	EMAC_TX_TSVL1		0x34
 #define	EMAC_TX_TSVH1		0x38
+#define	EMAC_TX_FIFO0		(1 << 0)
+#define	EMAC_TX_FIFO1		(1 << 1)
 
 #define	EMAC_RX_CTL		0x3C
 #define	EMAC_RX_HASH0		0x40
 #define	EMAC_RX_HASH1		0x44
 #define	EMAC_RX_STA		0x48
 #define	EMAC_RX_IO_DATA		0x4C
 #define	EMAC_RX_FBC		0x50
 
 #define	EMAC_INT_CTL		0x54
 #define	EMAC_INT_STA		0x58
-#define	EMAC_INT_STA_TX		(0x01 | 0x02)
+#define	EMAC_INT_STA_TX		(EMAC_TX_FIFO0 | EMAC_TX_FIFO1)
 #define	EMAC_INT_STA_RX		0x100
 #define	EMAC_INT_EN		(0xf << 0) | (1 << 8)
 
 #define	EMAC_MAC_CTL0		0x5C
 #define	EMAC_MAC_CTL1		0x60
 #define	EMAC_MAC_IPGT		0x64
 #define	EMAC_MAC_IPGR		0x68
 #define	EMAC_MAC_CLRT		0x6C
 #define	EMAC_MAC_MAXF		0x70
 #define	EMAC_MAC_SUPP		0x74
 #define	EMAC_MAC_TEST		0x78
 #define	EMAC_MAC_MCFG		0x7C
 #define	EMAC_MAC_MCMD		0x80
 #define	EMAC_MAC_MADR		0x84
 #define	EMAC_MAC_MWTD		0x88
 #define	EMAC_MAC_MRDD		0x8C
 #define	EMAC_MAC_MIND		0x90
 #define	EMAC_MAC_SSRR		0x94
 #define	EMAC_MAC_A0		0x98
 #define	EMAC_MAC_A1		0x9C
 #define	EMAC_MAC_A2		0xA0
 
 #define	EMAC_SAFX_L0		0xA4
 #define	EMAC_SAFX_H0		0xA8
 #define	EMAC_SAFX_L1		0xAC
 #define	EMAC_SAFX_H1		0xB0
 #define	EMAC_SAFX_L2		0xB4
 #define	EMAC_SAFX_H2		0xB8
 #define	EMAC_SAFX_L3		0xBC
 #define	EMAC_SAFX_H3		0xC0
 
 #define	EMAC_PHY_DUPLEX		(1 << 8)
 
 /*
  * Each received packet has 8 bytes header:
  * Byte 0: Packet valid flag: 0x01 valid, 0x00 not valid
  * Byte 1: 0x43 -> Ascii code 'C'
  * Byte 2: 0x41 -> Ascii code 'A'
  * Byte 3: 0x4d -> Ascii code 'M'
  * Byte 4: High byte of received packet's status
  * Byte 5: Low byte of received packet's status
  * Byte 6: High byte of packet size
  * Byte 7: Low byte of packet size
  */
 #define	EMAC_PACKET_HEADER	(0x0143414d)
 
 /* Aborted frame enable */
 #define	EMAC_TX_AB_M		(1 << 0)
 
 /* 0: Enable CPU mode for TX, 1: DMA */
 #define	EMAC_TX_TM		~(1 << 1)
 
 /* 0: DRQ asserted, 1: DRQ automatically */
 #define	EMAC_RX_DRQ_MODE	(1 << 1)
 
 /* 0: Enable CPU mode for RX, 1: DMA */
 #define	EMAC_RX_TM		~(1 << 2)
 
 /* Pass all Frames */
 #define	EMAC_RX_PA		(1 << 4)
 
 /* Pass Control Frames */
 #define	EMAC_RX_PCF		(1 << 5)
 
 /* Pass Frames with CRC Error */
 #define	EMAC_RX_PCRCE		(1 << 6)
 
 /* Pass Frames with Length Error */
 #define	EMAC_RX_PLE		(1 << 7)
 
 /* Pass Frames length out of range */
 #define	EMAC_RX_POR		(1 << 8)
 
 /* Accept unicast Packets */
 #define	EMAC_RX_UCAD		(1 << 16)
 
 /* Enable DA Filtering */
 #define	EMAC_RX_DAF		(1 << 17)
 
 /* Accept multicast Packets */
 #define	EMAC_RX_MCO		(1 << 20)
 
 /* Enable Hash filter */
 #define	EMAC_RX_MHF		(1 << 21)
 
 /* Accept Broadcast Packets */
 #define	EMAC_RX_BCO		(1 << 22)
 
 /* Enable SA Filtering */
 #define	EMAC_RX_SAF		(1 << 24)
 
 /* Inverse Filtering */
 #define	EMAC_RX_SAIF		(1 << 25)
 
 #define	EMAC_RX_SETUP		(EMAC_RX_POR | EMAC_RX_UCAD | \
     EMAC_RX_DAF | EMAC_RX_MCO | EMAC_RX_BCO)
 
 /* Enable Receive Flow Control */
 #define	EMAC_MAC_CTL0_RFC	(1 << 2)
 
 /* Enable Transmit Flow Control */
 #define	EMAC_MAC_CTL0_TFC	(1 << 3)
 
 /* Enable soft reset */
 #define	EMAC_MAC_CTL0_SOFT_RST	(1 << 15)
 
 #define	EMAC_MAC_CTL0_SETUP	(EMAC_MAC_CTL0_RFC | EMAC_MAC_CTL0_TFC)
 
 /* Enable duplex */
 #define	EMAC_MAC_CTL1_DUP	(1 << 0)
 
 /* Enable MAC Frame Length Checking */
 #define	EMAC_MAC_CTL1_FLC	(1 << 1)
 
 /* Enable Huge Frame */
 #define	EMAC_MAC_CTL1_HF	(1 << 2)
 
 /* Enable MAC Delayed CRC */
 #define	EMAC_MAC_CTL1_DCRC	(1 << 3)
 
 /* Enable MAC CRC */
 #define	EMAC_MAC_CTL1_CRC	(1 << 4)
 
 /* Enable MAC PAD Short frames */
 #define	EMAC_MAC_CTL1_PC	(1 << 5)
 
 /* Enable MAC PAD Short frames and append CRC */
 #define	EMAC_MAC_CTL1_VC	(1 << 6)
 
 /* Enable MAC auto detect Short frames */
 #define	EMAC_MAC_CTL1_ADP	(1 << 7)
 
 #define	EMAC_MAC_CTL1_PRE	(1 << 8)
 #define	EMAC_MAC_CTL1_LPE	(1 << 9)
 
 /* Enable no back off */
 #define	EMAC_MAC_CTL1_NB	(1 << 12)
 
 #define	EMAC_MAC_CTL1_BNB	(1 << 13)
 #define	EMAC_MAC_CTL1_ED	(1 << 14)
 
 #define	EMAC_MAC_CTL1_SETUP	(EMAC_MAC_CTL1_FLC | EMAC_MAC_CTL1_CRC | \
     EMAC_MAC_CTL1_PC)
 
 /* half duplex */
 #define	EMAC_MAC_IPGT_HD	0x12
 
 /* full duplex */
 #define	EMAC_MAC_IPGT_FD	0x15
 
 #define	EMAC_MAC_NBTB_IPG1	0xC
 #define	EMAC_MAC_NBTB_IPG2	0x12
 
 #define	EMAC_MAC_CW		0x37
 #define	EMAC_MAC_RM		0xF
 
 #define	EMAC_MAC_MFL		0x0600
 
 /* Receive status */
 #define	EMAC_CRCERR		(1 << 4)
 #define	EMAC_LENERR		(3 << 5)
+#define	EMAC_PKT_OK		(1 << 7)
 
 #define	EMAC_RX_FLUSH_FIFO	(1 << 3)
 #define	EMAC_PHY_RESET		(1 << 15)
 #define	EMAC_PHY_PWRDOWN	(1 << 11)
 
 #define	EMAC_PROC_MIN		16
 #define	EMAC_PROC_MAX		255
 #define	EMAC_PROC_DEFAULT	64
 
 #define	EMAC_LOCK(cs)		mtx_lock(&(sc)->emac_mtx)
 #define	EMAC_UNLOCK(cs)		mtx_unlock(&(sc)->emac_mtx)
 #define	EMAC_ASSERT_LOCKED(sc)	mtx_assert(&(sc)->emac_mtx, MA_OWNED);
 
 #endif	/* __IF_EMACREG_H__ */
Index: user/ngie/more-tests/sys/arm/arm/trap-v6.c
===================================================================
--- user/ngie/more-tests/sys/arm/arm/trap-v6.c	(revision 281675)
+++ user/ngie/more-tests/sys/arm/arm/trap-v6.c	(revision 281676)
@@ -1,663 +1,668 @@
 /*-
  * Copyright 2014 Olivier Houchard <cognet@FreeBSD.org>
  * Copyright 2014 Svatopluk Kraus <onwahe@gmail.com>
  * Copyright 2014 Michal Meloun <meloun@miracle.cz>
  * Copyright 2014 Andrew Turner <andrew@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_ktrace.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/signalvar.h>
 #include <sys/ktr.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 
+#include <machine/acle-compat.h>
 #include <machine/cpu.h>
 #include <machine/cpu-v6.h>
 #include <machine/frame.h>
 #include <machine/machdep.h>
 #include <machine/pcb.h>
 #include <machine/vmparam.h>
 
 #ifdef KDB
 #include <sys/kdb.h>
 #include <machine/db_machdep.h>
 #endif
 
 extern char fusubailout[];
 
 #ifdef DEBUG
 int last_fault_code;	/* For the benefit of pmap_fault_fixup() */
 #endif
 
 struct ksig {
 	int sig;
 	u_long code;
 	vm_offset_t	addr;
 };
 
 typedef int abort_func_t(struct trapframe *, u_int, u_int, u_int, u_int,
     struct thread *, struct ksig *);
 
 static abort_func_t abort_fatal;
 static abort_func_t abort_align;
 static abort_func_t abort_icache;
 
 struct abort {
 	abort_func_t	*func;
 	const char	*desc;
 };
 
 /*
  * How are the aborts handled?
  *
  * Undefined Code:
  *  - Always fatal as we do not know what does it mean.
  * Imprecise External Abort:
  *  - Always fatal, but can be handled somehow in the future.
  *    Now, due to PCIe buggy harware, ignored.
  * Precise External Abort:
  *  - Always fatal, but who knows in the future???
  * Debug Event:
  *  - Special handling.
  * External Translation Abort (L1 & L2)
  *  - Always fatal as something is screwed up in page tables or harware.
  * Domain Fault (L1 & L2):
  *  - Always fatal as we do not play game with domains.
  * Alignment Fault:
  *  - Everything should be aligned in kernel including user to kernel and
  *    vice versa data copying, so we ignore pcb_onfault, and it's always fatal.
  *    We generate signal in case of abort from user mode.
  * Instruction cache maintenance:
  *  - According to manual, this is translation fault during cache maintenance
  *    operation. So, it could be really complex in SMP case and fuzzy too
  *    for cache operations working on virtual addresses. For now, we will
  *    consider this abort as fatal. In fact, no cache maintenance on
  *    not mapped virtual addresses should be called. As cache maintenance
  *    operation (except DMB, DSB, and Flush Prefetch Buffer) are priviledged,
  *    the abort is fatal for user mode as well for now. (This is good place to
  *    note that cache maintenance on virtual address fill TLB.)
  * Acces Bit (L1 & L2):
  *  - Fast hardware emulation for kernel and user mode.
  * Translation Fault (L1 & L2):
  *  - Standard fault mechanism is held including vm_fault().
  * Permission Fault (L1 & L2):
  *  - Fast harware emulation of modify bits and in other cases, standard
  *    fault mechanism is held including vm_fault().
  */
 
 static const struct abort aborts[] = {
 	{abort_fatal,	"Undefined Code (0x000)"},
 	{abort_align,	"Alignment Fault"},
 	{abort_fatal,	"Debug Event"},
 	{NULL,		"Access Bit (L1)"},
 	{abort_icache,	"Instruction cache maintenance"},
 	{NULL,		"Translation Fault (L1)"},
 	{NULL,		"Access Bit (L2)"},
 	{NULL,		"Translation Fault (L2)"},
 
 	{abort_fatal,	"External Abort"},
 	{abort_fatal,	"Domain Fault (L1)"},
 	{abort_fatal,	"Undefined Code (0x00A)"},
 	{abort_fatal,	"Domain Fault (L2)"},
 	{abort_fatal,	"External Translation Abort (L1)"},
 	{NULL,		"Permission Fault (L1)"},
 	{abort_fatal,	"External Translation Abort (L2)"},
 	{NULL,		"Permission Fault (L2)"},
 
 	{abort_fatal,	"TLB Conflict Abort"},
 	{abort_fatal,	"Undefined Code (0x401)"},
 	{abort_fatal,	"Undefined Code (0x402)"},
 	{abort_fatal,	"Undefined Code (0x403)"},
 	{abort_fatal,	"Undefined Code (0x404)"},
 	{abort_fatal,	"Undefined Code (0x405)"},
 	{abort_fatal,	"Asynchronous External Abort"},
 	{abort_fatal,	"Undefined Code (0x407)"},
 
 	{abort_fatal,	"Asynchronous Parity Error on Memory Access"},
 	{abort_fatal,	"Parity Error on Memory Access"},
 	{abort_fatal,	"Undefined Code (0x40A)"},
 	{abort_fatal,	"Undefined Code (0x40B)"},
 	{abort_fatal,	"Parity Error on Translation (L1)"},
 	{abort_fatal,	"Undefined Code (0x40D)"},
 	{abort_fatal,	"Parity Error on Translation (L2)"},
 	{abort_fatal,	"Undefined Code (0x40F)"}
 };
 
 
 static __inline void
 call_trapsignal(struct thread *td, int sig, int code, vm_offset_t addr)
 {
 	ksiginfo_t ksi;
 
 	CTR4(KTR_TRAP, "%s: addr: %#x, sig: %d, code: %d",
 	   __func__, addr, sig, code);
 
 	/*
 	 * TODO: some info would be nice to know
 	 * if we are serving data or prefetch abort.
 	 */
 
 	ksiginfo_init_trap(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = code;
 	ksi.ksi_addr = (void *)addr;
 	trapsignal(td, &ksi);
 }
 
 /*
  * abort_imprecise() handles the following abort:
  *
  *  FAULT_EA_IMPREC - Imprecise External Abort
  *
  * The imprecise means that we don't know where the abort happened,
  * thus FAR is undefined. The abort should not never fire, but hot
  * plugging or accidental harware failure can be the cause of it.
  * If the abort happens, it can even be on different (thread) context.
  * Without any additional support, the abort is fatal, as we do not
  * know what really happened.
  *
  * QQQ: Some additional functionality, like pcb_onfault but global,
  *      can be implemented. Imprecise handlers could be registered
  *      which tell us if the abort is caused by something they know
  *      about. They should return one of three codes like:
  *		FAULT_IS_MINE,
  *		FAULT_CAN_BE_MINE,
  *		FAULT_IS_NOT_MINE.
  *      The handlers should be called until some of them returns
  *      FAULT_IS_MINE value or all was called. If all handlers return
  *	FAULT_IS_NOT_MINE value, then the abort is fatal.
  */
 static __inline void
 abort_imprecise(struct trapframe *tf, u_int fsr, u_int prefetch, u_int usermode)
 {
 	/* XXXX  We can got imprecise abort as result of access
 	 * to not-present PCI/PCIe configuration space.
 	 */
 #if 0
 	goto out;
 #endif
 	abort_fatal(tf, FAULT_EA_IMPREC, fsr, 0, prefetch, curthread, NULL);
 
 	/*
 	 * Returning from this function means that we ignore
 	 * the abort for good reason. Note that imprecise abort
 	 * could fire any time even in user mode.
 	 */
 
 #if 0
 out:
 	if (usermode)
 		userret(curthread, tf);
 #endif
 }
 
 /*
  * abort_debug() handles the following abort:
  *
  *  FAULT_DEBUG - Debug Event
  *
  */
 static __inline void
 abort_debug(struct trapframe *tf, u_int fsr, u_int prefetch, u_int usermode,
     u_int far)
 {
 	if (usermode) {
 		struct thread *td;
 
 		td = curthread;
 		call_trapsignal(td, SIGTRAP, TRAP_BRKPT, far);
 		userret(td, tf);
 	} else {
 #ifdef KDB
 		kdb_trap(T_BREAKPOINT, 0, tf);
 #else
 		printf("No debugger in kernel.\n");
 #endif
 	}
 }
 
 /*
  * Abort handler.
  *
  * FAR, FSR, and everything what can be lost after enabling
  * interrupts must be grabbed before the interrupts will be
  * enabled. Note that when interrupts will be enabled, we
  * could even migrate to another CPU ...
  *
  * TODO: move quick cases to ASM
  */
 void
 abort_handler(struct trapframe *tf, int prefetch)
 {
 	struct thread *td;
 	vm_offset_t far, va;
 	int idx, usermode;
 	uint32_t fsr;
 	struct ksig ksig;
 	struct proc *p;
 	struct pcb *pcb;
 	struct vm_map *map;
 	struct vmspace *vm;
 	vm_prot_t ftype;
 	int rv;
 #ifdef INVARIANTS
 	void *onfault;
 #endif
 	td = curthread;
 	fsr = (prefetch) ? cp15_ifsr_get(): cp15_dfsr_get();
+#if __ARM_ARCH >= 7
+	far = (prefetch) ? cp15_ifar_get() : cp15_dfar_get();
+#else
 	far = (prefetch) ? TRAPF_PC(tf) : cp15_dfar_get();
+#endif
 
 	idx = FSR_TO_FAULT(fsr);
 	usermode = TRAPF_USERMODE(tf);	/* Abort came from user mode? */
 	if (usermode)
 		td->td_frame = tf;
 
 	CTR4(KTR_TRAP, "abort_handler: fsr %#x (idx %u) far %#x prefetch %u",
 	fsr, idx, far, prefetch);
 
 	/*
 	 * Firstly, handle aborts that are not directly related to mapping.
 	 */
 	if (__predict_false(idx == FAULT_EA_IMPREC)) {
 		abort_imprecise(tf, fsr, prefetch, usermode);
 		return;
 	}
 
 	if (__predict_false(idx == FAULT_DEBUG)) {
 		abort_debug(tf, fsr, prefetch, usermode, far);
 		return;
 	}
 
 #ifdef ARM_NEW_PMAP
 	rv = pmap_fault(PCPU_GET(curpmap), far, fsr, idx, usermode);
 	if (rv == 0) {
 		return;
 	} else if (rv == EFAULT) {
 
 		call_trapsignal(td, SIGSEGV, SEGV_MAPERR, far);
 		userret(td, tf);
 		return;
 	}
 #endif
 	/*
 	 * Now, when we handled imprecise and debug aborts, the rest of
 	 * aborts should be really related to mapping.
 	 *
 	 */
 
 	PCPU_INC(cnt.v_trap);
 
 #ifdef KDB
 	if (kdb_active) {
 		kdb_reenter();
 		goto out;
 	}
 #endif
 	if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
 		/*
 		 * Due to both processor errata and lazy TLB invalidation when
 		 * access restrictions are removed from virtual pages, memory
 		 * accesses that are allowed by the physical mapping layer may
 		 * nonetheless cause one spurious page fault per virtual page.
 		 * When the thread is executing a "no faulting" section that
 		 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
 		 * every page fault is treated as a spurious page fault,
 		 * unless it accesses the same virtual address as the most
 		 * recent page fault within the same "no faulting" section.
 		 */
 		if (td->td_md.md_spurflt_addr != far ||
 		    (td->td_pflags & TDP_RESETSPUR) != 0) {
 			td->td_md.md_spurflt_addr = far;
 			td->td_pflags &= ~TDP_RESETSPUR;
 
 			tlb_flush_local(far & ~PAGE_MASK);
 			return;
 		}
 	} else {
 		/*
 		 * If we get a page fault while in a critical section, then
 		 * it is most likely a fatal kernel page fault.  The kernel
 		 * is already going to panic trying to get a sleep lock to
 		 * do the VM lookup, so just consider it a fatal trap so the
 		 * kernel can print out a useful trap message and even get
 		 * to the debugger.
 		 *
 		 * If we get a page fault while holding a non-sleepable
 		 * lock, then it is most likely a fatal kernel page fault.
 		 * If WITNESS is enabled, then it's going to whine about
 		 * bogus LORs with various VM locks, so just skip to the
 		 * fatal trap handling directly.
 		 */
 		if (td->td_critnest != 0 ||
 		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
 		    "Kernel page fault") != 0) {
 			abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
 			return;
 		}
 	}
 
 	/* Re-enable interrupts if they were enabled previously. */
 	if (td->td_md.md_spinlock_count == 0) {
 		if (__predict_true(tf->tf_spsr & PSR_I) == 0)
 			enable_interrupts(PSR_I);
 		if (__predict_true(tf->tf_spsr & PSR_F) == 0)
 			enable_interrupts(PSR_F);
 	}
 
 	p = td->td_proc;
 	if (usermode) {
 		td->td_pticks = 0;
 		if (td->td_ucred != p->p_ucred)
 			cred_update_thread(td);
 	}
 
 	/* Invoke the appropriate handler, if necessary. */
 	if (__predict_false(aborts[idx].func != NULL)) {
 		if ((aborts[idx].func)(tf, idx, fsr, far, prefetch, td, &ksig))
 			goto do_trapsignal;
 		goto out;
 	}
 
 	/*
 	 * At this point, we're dealing with one of the following aborts:
 	 *
 	 *  FAULT_TRAN_xx  - Translation
 	 *  FAULT_PERM_xx  - Permission
 	 *
 	 * These are the main virtual memory-related faults signalled by
 	 * the MMU.
 	 */
 
 	/* fusubailout is used by [fs]uswintr to avoid page faulting */
 	pcb = td->td_pcb;
 	if (__predict_false(pcb->pcb_onfault == fusubailout)) {
 		tf->tf_r0 = EFAULT;
 		tf->tf_pc = (register_t)pcb->pcb_onfault;
 		return;
 	}
 
 	/*
 	 * QQQ: ARM has a set of unprivileged load and store instructions
 	 *      (LDRT/LDRBT/STRT/STRBT ...) which are supposed to be used
 	 *      in other than user mode and OS should recognize their
 	 *      aborts and behaved appropriately. However, there is no way
 	 *      how to do that reasonably in general unless we restrict
 	 *      the handling somehow. One way is to limit the handling for
 	 *      aborts which come from undefined mode only.
 	 *
 	 *      Anyhow, we do not use these instructions and do not implement
 	 *      any special handling for them.
 	 */
 
 	va = trunc_page(far);
 	if (va >= KERNBASE) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 */
 		if (usermode)
 			goto nogo;
 
 		map = kernel_map;
 	} else {
 		/*
 		 * This is a fault on non-kernel virtual memory. If curproc
 		 * is NULL or curproc->p_vmspace is NULL the fault is fatal.
 		 */
 		vm = (p != NULL) ? p->p_vmspace : NULL;
 		if (vm == NULL)
 			goto nogo;
 
 		map = &vm->vm_map;
 		if (!usermode && (td->td_intr_nesting_level != 0 ||
 		    pcb->pcb_onfault == NULL)) {
 			abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
 			return;
 		}
 	}
 
 	ftype = (fsr & FSR_WNR) ? VM_PROT_WRITE : VM_PROT_READ;
 	if (prefetch)
 		ftype |= VM_PROT_EXECUTE;
 
 #ifdef DEBUG
 	last_fault_code = fsr;
 #endif
 
 #ifndef ARM_NEW_PMAP
 	if (pmap_fault_fixup(vmspace_pmap(td->td_proc->p_vmspace), va, ftype,
 	    usermode)) {
 		goto out;
 	}
 #endif
 
 #ifdef INVARIANTS
 	onfault = pcb->pcb_onfault;
 	pcb->pcb_onfault = NULL;
 #endif
 	if (map != kernel_map) {
 		/*
 		 * Keep swapout from messing with us during this
 		 *	critical time.
 		 */
 		PROC_LOCK(p);
 		++p->p_lock;
 		PROC_UNLOCK(p);
 
 		/* Fault in the user page: */
 		rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 
 		PROC_LOCK(p);
 		--p->p_lock;
 		PROC_UNLOCK(p);
 	} else {
 		/*
 		 * Don't have to worry about process locking or stacks in the
 		 * kernel.
 		 */
 		rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 	}
 
 #ifdef INVARIANTS
 	pcb->pcb_onfault = onfault;
 #endif
 
 	if (__predict_true(rv == KERN_SUCCESS))
 		goto out;
 nogo:
 	if (!usermode) {
 		if (td->td_intr_nesting_level == 0 &&
 		    pcb->pcb_onfault != NULL) {
 			tf->tf_r0 = rv;
 			tf->tf_pc = (int)pcb->pcb_onfault;
 			return;
 		}
 		CTR2(KTR_TRAP, "%s: vm_fault() failed with %d", __func__, rv);
 		abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
 		return;
 	}
 
 	ksig.sig = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
 	ksig.code = 0;
 	ksig.addr = far;
 
 do_trapsignal:
 	call_trapsignal(td, ksig.sig, ksig.code, ksig.addr);
 out:
 	if (usermode)
 		userret(td, tf);
 }
 
 /*
  * abort_fatal() handles the following data aborts:
 
  *  FAULT_DEBUG		- Debug Event
  *  FAULT_ACCESS_xx	- Acces Bit
  *  FAULT_EA_PREC	- Precise External Abort
  *  FAULT_DOMAIN_xx	- Domain Fault
  *  FAULT_EA_TRAN_xx	- External Translation Abort
  *  FAULT_EA_IMPREC	- Imprecise External Abort
  *  + all undefined codes for ABORT
  *
  * We should never see these on a properly functioning system.
  *
  * This function is also called by the other handlers if they
  * detect a fatal problem.
  *
  * Note: If 'l' is NULL, we assume we're dealing with a prefetch abort.
  */
 static int
 abort_fatal(struct trapframe *tf, u_int idx, u_int fsr, u_int far, u_int prefetch,
     struct thread *td, struct ksig *ksig)
 {
 	u_int usermode;
 	const char *mode;
 	const char *rw_mode;
 
 	usermode = TRAPF_USERMODE(tf);
 	mode = usermode ? "user" : "kernel";
 	rw_mode  = fsr & FSR_WNR ? "write" : "read";
 	disable_interrupts(PSR_I|PSR_F);
 
 	if (td != NULL) {
 		printf("Fatal %s mode data abort: '%s' on %s\n", mode,
 		    aborts[idx].desc, rw_mode);
 		printf("trapframe: %p\nFSR=%08x, FAR=", tf, fsr);
 		if (idx != FAULT_EA_IMPREC)
 			printf("%08x, ", far);
 		else
 			printf("Invalid,  ");
 		printf("spsr=%08x\n", tf->tf_spsr);
 	} else {
 		printf("Fatal %s mode prefetch abort at 0x%08x\n",
 		    mode, tf->tf_pc);
 		printf("trapframe: %p, spsr=%08x\n", tf, tf->tf_spsr);
 	}
 
 	printf("r0 =%08x, r1 =%08x, r2 =%08x, r3 =%08x\n",
 	    tf->tf_r0, tf->tf_r1, tf->tf_r2, tf->tf_r3);
 	printf("r4 =%08x, r5 =%08x, r6 =%08x, r7 =%08x\n",
 	    tf->tf_r4, tf->tf_r5, tf->tf_r6, tf->tf_r7);
 	printf("r8 =%08x, r9 =%08x, r10=%08x, r11=%08x\n",
 	    tf->tf_r8, tf->tf_r9, tf->tf_r10, tf->tf_r11);
 	printf("r12=%08x, ", tf->tf_r12);
 
 	if (usermode)
 		printf("usp=%08x, ulr=%08x",
 		    tf->tf_usr_sp, tf->tf_usr_lr);
 	else
 		printf("ssp=%08x, slr=%08x",
 		    tf->tf_svc_sp, tf->tf_svc_lr);
 	printf(", pc =%08x\n\n", tf->tf_pc);
 
 #ifdef KDB
 	if (debugger_on_panic || kdb_active)
 		kdb_trap(fsr, 0, tf);
 #endif
 	panic("Fatal abort");
 	/*NOTREACHED*/
 }
 
 /*
  * abort_align() handles the following data abort:
  *
  *  FAULT_ALIGN - Alignment fault
  *
  * Every memory access should be correctly aligned in kernel including
  * user to kernel and vice versa data copying, so we ignore pcb_onfault,
  * and it's always fatal. We generate a signal in case of abort from user mode.
  */
 static int
 abort_align(struct trapframe *tf, u_int idx, u_int fsr, u_int far, u_int prefetch,
     struct thread *td, struct ksig *ksig)
 {
 	u_int usermode;
 
 	usermode = TRAPF_USERMODE(tf);
 
 	/*
 	 * Alignment faults are always fatal if they occur in any but user mode.
 	 *
 	 * XXX The old trap code handles pcb fault even for alignment traps.
 	 * Unfortunately, we don't known why and if is this need.
 	 */
 	if (!usermode) {
 		if (td->td_intr_nesting_level == 0 && td != NULL &&
 		    td->td_pcb->pcb_onfault != NULL) {
 			printf("%s: Got alignment fault with pcb_onfault set"
 			    ", please report this issue\n", __func__);
 			tf->tf_r0 = EFAULT;;
 			tf->tf_pc = (int)td->td_pcb->pcb_onfault;
 			return (0);
 		}
 		abort_fatal(tf, idx, fsr, far, prefetch, td, ksig);
 	}
 	/* Deliver a bus error signal to the process */
 	ksig->code = 0;
 	ksig->sig = SIGBUS;
 	ksig->addr = far;
 	return (1);
 }
 
 /*
  * abort_icache() handles the following data abort:
  *
  * FAULT_ICACHE - Instruction cache maintenance
  *
  * According to manual, FAULT_ICACHE is translation fault during cache
  * maintenance operation. In fact, no cache maintenance operation on
  * not mapped virtual addresses should be called. As cache maintenance
  * operation (except DMB, DSB, and Flush Prefetch Buffer) are priviledged,
  * the abort is concider as fatal for now. However, all the matter with
  * cache maintenance operation on virtual addresses could be really complex
  * and fuzzy in SMP case, so maybe in future standard fault mechanism
  * should be held here including vm_fault() calling.
  */
 static int
 abort_icache(struct trapframe *tf, u_int idx, u_int fsr, u_int far, u_int prefetch,
     struct thread *td, struct ksig *ksig)
 {
 	abort_fatal(tf, idx, fsr, far, prefetch, td, ksig);
 	return(0);
 }
Index: user/ngie/more-tests/sys/arm/arm/vm_machdep.c
===================================================================
--- user/ngie/more-tests/sys/arm/arm/vm_machdep.c	(revision 281675)
+++ user/ngie/more-tests/sys/arm/arm/vm_machdep.c	(revision 281676)
@@ -1,346 +1,342 @@
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * Copyright (c) 1989, 1990 William Jolitz
  * Copyright (c) 1994 John Dyson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department, and William Jolitz.
  *
  * Redistribution and use in source and binary :forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/socketvar.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/unistd.h>
 #include <machine/cpu.h>
 #include <machine/frame.h>
 #include <machine/pcb.h>
 #include <machine/sysarch.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_param.h>
 #include <vm/vm_pageout.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 
 #include <machine/md_var.h>
 #include <machine/vfp.h>
 
 /*
  * struct switchframe and trapframe must both be a multiple of 8
  * for correct stack alignment.
  */
 CTASSERT(sizeof(struct switchframe) == 48);
 CTASSERT(sizeof(struct trapframe) == 80);
 
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
  * ready to run and return to user mode.
  */
 void
 cpu_fork(register struct thread *td1, register struct proc *p2,
     struct thread *td2, int flags)
 {
 	struct pcb *pcb2;
 	struct trapframe *tf;
 	struct mdproc *mdp2;
 
 	if ((flags & RFPROC) == 0)
 		return;
 
 	/* Point the pcb to the top of the stack */
 	pcb2 = (struct pcb *)
 	    (td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1;
 #ifdef __XSCALE__
 #ifndef CPU_XSCALE_CORE3
 	pmap_use_minicache(td2->td_kstack, td2->td_kstack_pages * PAGE_SIZE);
 #endif
 #endif
 	td2->td_pcb = pcb2;
 	
 	/* Clone td1's pcb */
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 	
 	/* Point to mdproc and then copy over td1's contents */
 	mdp2 = &p2->p_md;
 	bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2));
 
 	/* Point the frame to the stack in front of pcb and copy td1's frame */
 	td2->td_frame = (struct trapframe *)pcb2 - 1;
 	*td2->td_frame = *td1->td_frame;
 
 	/*
 	 * Create a new fresh stack for the new process.
 	 * Copy the trap frame for the return to user mode as if from a
 	 * syscall.  This copies most of the user mode register values.
 	 */
 	pmap_set_pcb_pagedir(vmspace_pmap(p2->p_vmspace), pcb2);
 	pcb2->pcb_regs.sf_r4 = (register_t)fork_return;
 	pcb2->pcb_regs.sf_r5 = (register_t)td2;
 	pcb2->pcb_regs.sf_lr = (register_t)fork_trampoline;
 	pcb2->pcb_regs.sf_sp = STACKALIGN(td2->td_frame);
 
 	pcb2->pcb_vfpcpu = -1;
 	pcb2->pcb_vfpstate.fpscr = VFPSCR_DN | VFPSCR_FZ;
 	
 	tf = td2->td_frame;
 	tf->tf_spsr &= ~PSR_C;
 	tf->tf_r0 = 0;
 	tf->tf_r1 = 0;
 
 
 	/* Setup to release spin count in fork_exit(). */
 	td2->td_md.md_spinlock_count = 1;
 	td2->td_md.md_saved_cspr = PSR_SVC32_MODE;;
 #ifdef ARM_TP_ADDRESS
 	td2->td_md.md_tp = *(register_t *)ARM_TP_ADDRESS;
 #else
 	td2->td_md.md_tp = td1->td_md.md_tp;
 #endif
 }
 				
 void
 cpu_thread_swapin(struct thread *td)
 {
 }
 
 void
 cpu_thread_swapout(struct thread *td)
 {
 }
 
 void
 cpu_set_syscall_retval(struct thread *td, int error)
 {
 	struct trapframe *frame;
 	int fixup;
 #ifdef __ARMEB__
 	u_int call;
 #endif
 
 	frame = td->td_frame;
 	fixup = 0;
 
 #ifdef __ARMEB__
 	/*
 	 * __syscall returns an off_t while most other syscalls return an
 	 * int. As an off_t is 64-bits and an int is 32-bits we need to
 	 * place the returned data into r1. As the lseek and frerebsd6_lseek
 	 * syscalls also return an off_t they do not need this fixup.
 	 */
-#ifdef __ARM_EABI__
 	call = frame->tf_r7;
-#else
-	call = *(u_int32_t *)(frame->tf_pc - INSN_SIZE) & 0x000fffff;
-#endif
 	if (call == SYS___syscall) {
 		register_t *ap = &frame->tf_r0;
 		register_t code = ap[_QUAD_LOWWORD];
 		if (td->td_proc->p_sysent->sv_mask)
 			code &= td->td_proc->p_sysent->sv_mask;
 		fixup = (code != SYS_freebsd6_lseek && code != SYS_lseek)
 		    ? 1 : 0;
 	}
 #endif
 
 	switch (error) {
 	case 0:
 		if (fixup) {
 			frame->tf_r0 = 0;
 			frame->tf_r1 = td->td_retval[0];
 		} else {
 			frame->tf_r0 = td->td_retval[0];
 			frame->tf_r1 = td->td_retval[1];
 		}
 		frame->tf_spsr &= ~PSR_C;   /* carry bit */
 		break;
 	case ERESTART:
 		/*
 		 * Reconstruct the pc to point at the swi.
 		 */
 		frame->tf_pc -= INSN_SIZE;
 		break;
 	case EJUSTRETURN:
 		/* nothing to do */
 		break;
 	default:
 		frame->tf_r0 = error;
 		frame->tf_spsr |= PSR_C;    /* carry bit */
 		break;
 	}
 }
 
 /*
  * Initialize machine state (pcb and trap frame) for a new thread about to
  * upcall. Put enough state in the new thread's PCB to get it to go back
  * userret(), where we can intercept it again to set the return (upcall)
  * Address and stack, along with those from upcals that are from other sources
  * such as those generated in thread_userret() itself.
  */
 void
 cpu_set_upcall(struct thread *td, struct thread *td0)
 {
 
 	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
 	bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb));
 
 	td->td_pcb->pcb_regs.sf_r4 = (register_t)fork_return;
 	td->td_pcb->pcb_regs.sf_r5 = (register_t)td;
 	td->td_pcb->pcb_regs.sf_lr = (register_t)fork_trampoline;
 	td->td_pcb->pcb_regs.sf_sp = STACKALIGN(td->td_frame);
 
 	td->td_frame->tf_spsr &= ~PSR_C;
 	td->td_frame->tf_r0 = 0;
 
 	/* Setup to release spin count in fork_exit(). */
 	td->td_md.md_spinlock_count = 1;
 	td->td_md.md_saved_cspr = PSR_SVC32_MODE;
 }
 
 /*
  * Set that machine state for performing an upcall that has to
  * be done in thread_userret() so that those upcalls generated
  * in thread_userret() itself can be done as well.
  */
 void
 cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
 	stack_t *stack)
 {
 	struct trapframe *tf = td->td_frame;
 
 	tf->tf_usr_sp = STACKALIGN((int)stack->ss_sp + stack->ss_size);
 	tf->tf_pc = (int)entry;
 	tf->tf_r0 = (int)arg;
 	tf->tf_spsr = PSR_USR32_MODE;
 }
 
 int
 cpu_set_user_tls(struct thread *td, void *tls_base)
 {
 
 	td->td_md.md_tp = (register_t)tls_base;
 	if (td == curthread) {
 		critical_enter();
 #ifdef ARM_TP_ADDRESS
 		*(register_t *)ARM_TP_ADDRESS = (register_t)tls_base;
 #else
 		set_tls(tls_base);
 #endif
 		critical_exit();
 	}
 	return (0);
 }
 
 void
 cpu_thread_exit(struct thread *td)
 {
 }
 
 void
 cpu_thread_alloc(struct thread *td)
 {
 	td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages *
 	    PAGE_SIZE) - 1;
 	/*
 	 * Ensure td_frame is aligned to an 8 byte boundary as it will be
 	 * placed into the stack pointer which must be 8 byte aligned in
 	 * the ARM EABI.
 	 */
 	td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb) - 1;
 
 #ifdef __XSCALE__
 #ifndef CPU_XSCALE_CORE3
 	pmap_use_minicache(td->td_kstack, td->td_kstack_pages * PAGE_SIZE);
 #endif
 #endif
 }
 
 void
 cpu_thread_free(struct thread *td)
 {
 }
 
 void
 cpu_thread_clean(struct thread *td)
 {
 }
 
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
  *
  * This is needed to make kernel threads stay in kernel mode.
  */
 void
 cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg)
 {
 	td->td_pcb->pcb_regs.sf_r4 = (register_t)func;	/* function */
 	td->td_pcb->pcb_regs.sf_r5 = (register_t)arg;	/* first arg */
 }
 
 /*
  * Software interrupt handler for queued VM system processing.
  */
 void
 swi_vm(void *dummy)
 {
 	
 	if (busdma_swi_pending)
 		busdma_swi();
 }
 
 void
 cpu_exit(struct thread *td)
 {
 }
 
Index: user/ngie/more-tests/sys/arm64/arm64/trap.c
===================================================================
--- user/ngie/more-tests/sys/arm64/arm64/trap.c	(revision 281675)
+++ user/ngie/more-tests/sys/arm64/arm64/trap.c	(revision 281676)
@@ -1,311 +1,311 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #ifdef KDB
 #include <sys/kdb.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 
 #include <machine/frame.h>
 #include <machine/pcb.h>
 #include <machine/pcpu.h>
 #include <machine/vmparam.h>
 
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 #ifdef KDB
 #include <machine/db_machdep.h>
 #endif
 
 #ifdef DDB
 #include <ddb/db_output.h>
 #endif
 
-extern uintptr_t fsu_intr_fault;
+extern register_t fsu_intr_fault;
 
 /* Called from exception.S */
 void do_el1h_sync(struct trapframe *);
 void do_el0_sync(struct trapframe *);
 void do_el0_error(struct trapframe *);
 
 static __inline void
 call_trapsignal(struct thread *td, int sig, u_long code)
 {
 	ksiginfo_t ksi;
 
 	ksiginfo_init_trap(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = (int)code;
 	trapsignal(td, &ksi);
 }
 
 int
 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 {
 	struct proc *p;
 	register_t *ap;
 	int nap;
 
 	nap = 8;
 	p = td->td_proc;
 	ap = td->td_frame->tf_x;
 
 	sa->code = td->td_frame->tf_x[8];
 
 	if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
 		sa->code = *ap++;
 		nap--;
 	}
 
 	if (p->p_sysent->sv_mask)
 		sa->code &= p->p_sysent->sv_mask;
 	if (sa->code >= p->p_sysent->sv_size)
 		sa->callp = &p->p_sysent->sv_table[0];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	sa->narg = sa->callp->sy_narg;
 	memcpy(sa->args, ap, nap * sizeof(register_t));
 	if (sa->narg > nap)
 		panic("TODO: Could we have more then 8 args?");
 
 	td->td_retval[0] = 0;
 	td->td_retval[1] = 0;
 
 	return (0);
 }
 
 #include "../../kern/subr_syscall.c"
 
 static void
 svc_handler(struct trapframe *frame)
 {
 	struct syscall_args sa;
 	struct thread *td;
 	int error;
 
 	td = curthread;
 	td->td_frame = frame;
 
 	error = syscallenter(td, &sa);
 	syscallret(td, error, &sa);
 }
 
 static void
 data_abort(struct trapframe *frame, uint64_t esr, int lower)
 {
 	struct vm_map *map;
 	struct thread *td;
 	struct proc *p;
 	struct pcb *pcb;
 	vm_prot_t ftype;
 	vm_offset_t va;
 	uint64_t far;
 	int error, sig;
 
 	td = curthread;
 	pcb = td->td_pcb;
 
 	/*
 	 * Special case for fuswintr and suswintr. These can't sleep so
 	 * handle them early on in the trap handler.
 	 */
-	if (__predict_false(pcb->pcb_onfault == fsu_intr_fault)) {
+	if (__predict_false(pcb->pcb_onfault == (vm_offset_t)&fsu_intr_fault)) {
 		frame->tf_elr = pcb->pcb_onfault;
 		return;
 	}
 
 	far = READ_SPECIALREG(far_el1);
 	p = td->td_proc;
 
 	if (lower)
 		map = &td->td_proc->p_vmspace->vm_map;
 	else {
 		/* The top bit tells us which range to use */
 		if ((far >> 63) == 1)
 			map = kernel_map;
 		else
 			map = &td->td_proc->p_vmspace->vm_map;
 	}
 
 	va = trunc_page(far);
 	ftype = ((esr >> 6) & 1) ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ;
 
 	if (map != kernel_map) {
 		/*
 		 * Keep swapout from messing with us during this
 		 *	critical time.
 		 */
 		PROC_LOCK(p);
 		++p->p_lock;
 		PROC_UNLOCK(p);
 
 		/* Fault in the user page: */
 		error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 
 		PROC_LOCK(p);
 		--p->p_lock;
 		PROC_UNLOCK(p);
 	} else {
 		/*
 		 * Don't have to worry about process locking or stacks in the
 		 * kernel.
 		 */
 		error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 	}
 
 	if (error != 0) {
 		if (lower) {
 			if (error == ENOMEM)
 				sig = SIGKILL;
 			else
 				sig = SIGSEGV;
 			call_trapsignal(td, sig, 0);
 		} else {
 			if (td->td_intr_nesting_level == 0 &&
 			    pcb->pcb_onfault != 0) {
 				frame->tf_x[0] = error;
 				frame->tf_elr = pcb->pcb_onfault;
 				return;
 			}
 			panic("vm_fault failed: %lx", frame->tf_elr);
 		}
 	}
 
 	if (lower)
 		userret(td, frame);
 }
 
 void
 do_el1h_sync(struct trapframe *frame)
 {
 	uint32_t exception;
 	uint64_t esr;
 
 	/* Read the esr register to get the exception details */
 	esr = READ_SPECIALREG(esr_el1);
 	exception = ESR_ELx_EXCEPTION(esr);
 
 	/*
 	 * Sanity check we are in an exception er can handle. The IL bit
 	 * is used to indicate the instruction length, except in a few
 	 * exceptions described in the ARMv8 ARM.
 	 *
 	 * It is unclear in some cases if the bit is implementation defined.
 	 * The Foundation Model and QEMU disagree on if the IL bit should
 	 * be set when we are in a data fault from the same EL and the ISV
 	 * bit (bit 24) is also set.
 	 */
 	KASSERT((esr & ESR_ELx_IL) == ESR_ELx_IL ||
 	    (exception == EXCP_DATA_ABORT && ((esr & ISS_DATA_ISV) == 0)),
 	    ("Invalid instruction length in exception"));
 
 	switch(exception) {
 	case EXCP_FP_SIMD:
 	case EXCP_TRAP_FP:
 		panic("VFP exception in the kernel");
 	case EXCP_DATA_ABORT:
 		data_abort(frame, esr, 0);
 		break;
 	case EXCP_BRK:
 	case EXCP_WATCHPT_EL1:
 	case EXCP_SOFTSTP_EL1:
 #ifdef KDB
 		kdb_trap(exception, 0, frame);
 #else
 		panic("No debugger in kernel.\n");
 #endif
 		break;
 	default:
 		panic("Unknown kernel exception %x esr_el1 %lx\n", exception,
 		    esr);
 	}
 }
 
 void
 do_el0_sync(struct trapframe *frame)
 {
 	uint32_t exception;
 	uint64_t esr;
 
 	/* Check we have a sane environment when entering from userland */
 	KASSERT((uintptr_t)get_pcpu() >= VM_MIN_KERNEL_ADDRESS,
 	    ("Invalid pcpu address from userland: %p (tpidr %lx)",
 	     get_pcpu(), READ_SPECIALREG(tpidr_el1)));
 
 	esr = READ_SPECIALREG(esr_el1);
 	exception = ESR_ELx_EXCEPTION(esr);
 
 	switch(exception) {
 	case EXCP_FP_SIMD:
 	case EXCP_TRAP_FP:
 #ifdef VFP
 		vfp_restore_state();
 #else
 		panic("VFP exception in userland");
 #endif
 		break;
 	case EXCP_SVC:
 		svc_handler(frame);
 		break;
 	case EXCP_INSN_ABORT_L:
 	case EXCP_DATA_ABORT_L:
 		data_abort(frame, esr, 1);
 		break;
 	default:
 		panic("Unknown userland exception %x esr_el1 %lx\n", exception,
 		    esr);
 	}
 }
 
 void
 do_el0_error(struct trapframe *frame)
 {
 
 	panic("do_el0_error");
 }
 
Index: user/ngie/more-tests/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
===================================================================
--- user/ngie/more-tests/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c	(revision 281675)
+++ user/ngie/more-tests/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c	(revision 281676)
@@ -1,6556 +1,6551 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011-2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
  * All rights reserved.
  * Copyright 2013 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
  * Copyright 2014 Xin Li <delphij@FreeBSD.org>. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
  */
 
 /*
  * ZFS ioctls.
  *
  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
  *
  * There are two ways that we handle ioctls: the legacy way where almost
  * all of the logic is in the ioctl callback, and the new way where most
  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
  *
  * Non-legacy ioctls should be registered by calling
  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
  * from userland by lzc_ioctl().
  *
  * The registration arguments are as follows:
  *
  * const char *name
  *   The name of the ioctl.  This is used for history logging.  If the
  *   ioctl returns successfully (the callback returns 0), and allow_log
  *   is true, then a history log entry will be recorded with the input &
  *   output nvlists.  The log entry can be printed with "zpool history -i".
  *
  * zfs_ioc_t ioc
  *   The ioctl request number, which userland will pass to ioctl(2).
  *   The ioctl numbers can change from release to release, because
  *   the caller (libzfs) must be matched to the kernel.
  *
  * zfs_secpolicy_func_t *secpolicy
  *   This function will be called before the zfs_ioc_func_t, to
  *   determine if this operation is permitted.  It should return EPERM
  *   on failure, and 0 on success.  Checks include determining if the
  *   dataset is visible in this zone, and if the user has either all
  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
  *   to do this operation on this dataset with "zfs allow".
  *
  * zfs_ioc_namecheck_t namecheck
  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
  *   name, a dataset name, or nothing.  If the name is not well-formed,
  *   the ioctl will fail and the callback will not be called.
  *   Therefore, the callback can assume that the name is well-formed
  *   (e.g. is null-terminated, doesn't have more than one '@' character,
  *   doesn't have invalid characters).
  *
  * zfs_ioc_poolcheck_t pool_check
  *   This specifies requirements on the pool state.  If the pool does
  *   not meet them (is suspended or is readonly), the ioctl will fail
  *   and the callback will not be called.  If any checks are specified
  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
  *   POOL_CHECK_READONLY).
  *
  * boolean_t smush_outnvlist
  *   If smush_outnvlist is true, then the output is presumed to be a
  *   list of errors, and it will be "smushed" down to fit into the
  *   caller's buffer, by removing some entries and replacing them with a
  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
  *   outnvlist does not fit into the userland-provided buffer, then the
  *   ioctl will fail with ENOMEM.
  *
  * zfs_ioc_func_t *func
  *   The callback function that will perform the operation.
  *
  *   The callback should return 0 on success, or an error number on
  *   failure.  If the function fails, the userland ioctl will return -1,
  *   and errno will be set to the callback's return value.  The callback
  *   will be called with the following arguments:
  *
  *   const char *name
  *     The name of the pool or dataset to operate on, from
  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
  *     expected type (pool, dataset, or none).
  *
  *   nvlist_t *innvl
  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
  *     NULL if no input nvlist was provided.  Changes to this nvlist are
  *     ignored.  If the input nvlist could not be deserialized, the
  *     ioctl will fail and the callback will not be called.
  *
  *   nvlist_t *outnvl
  *     The output nvlist, initially empty.  The callback can fill it in,
  *     and it will be returned to userland by serializing it into
  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
  *     fails (e.g. because the caller didn't supply a large enough
  *     buffer), then the overall ioctl will fail.  See the
  *     'smush_nvlist' argument above for additional behaviors.
  *
  *     There are two typical uses of the output nvlist:
  *       - To return state, e.g. property values.  In this case,
  *         smush_outnvlist should be false.  If the buffer was not large
  *         enough, the caller will reallocate a larger buffer and try
  *         the ioctl again.
  *
  *       - To return multiple errors from an ioctl which makes on-disk
  *         changes.  In this case, smush_outnvlist should be true.
  *         Ioctls which make on-disk modifications should generally not
  *         use the outnvl if they succeed, because the caller can not
  *         distinguish between the operation failing, and
  *         deserialization failing.
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/uio.h>
 #include <sys/buf.h>
 #include <sys/file.h>
 #include <sys/kmem.h>
 #include <sys/conf.h>
 #include <sys/cmn_err.h>
 #include <sys/stat.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_znode.h>
 #include <sys/zap.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/vdev.h>
 #include <sys/dmu.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_deleg.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/sunddi.h>
 #include <sys/policy.h>
 #include <sys/zone.h>
 #include <sys/nvpair.h>
 #include <sys/mount.h>
 #include <sys/taskqueue.h>
 #include <sys/sdt.h>
 #include <sys/varargs.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_send.h>
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_bookmark.h>
 #include <sys/dsl_userhold.h>
 #include <sys/zfeature.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "zfs_deleg.h"
 #include "zfs_comutil.h"
 #include "zfs_ioctl_compat.h"
 
 CTASSERT(sizeof(zfs_cmd_t) < IOCPARM_MAX);
 
-static int snapshot_list_prefetch;
-SYSCTL_DECL(_vfs_zfs);
-SYSCTL_INT(_vfs_zfs, OID_AUTO, snapshot_list_prefetch, CTLFLAG_RWTUN,
-    &snapshot_list_prefetch, 0, "Prefetch data when listing snapshots");
-
 static struct cdev *zfsdev;
 
 extern void zfs_init(void);
 extern void zfs_fini(void);
 
 uint_t zfs_fsyncer_key;
 extern uint_t rrw_tsd_key;
 static uint_t zfs_allow_log_key;
 
 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
 
 typedef enum {
 	NO_NAME,
 	POOL_NAME,
 	DATASET_NAME
 } zfs_ioc_namecheck_t;
 
 typedef enum {
 	POOL_CHECK_NONE		= 1 << 0,
 	POOL_CHECK_SUSPENDED	= 1 << 1,
 	POOL_CHECK_READONLY	= 1 << 2,
 } zfs_ioc_poolcheck_t;
 
 typedef struct zfs_ioc_vec {
 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
 	zfs_ioc_func_t		*zvec_func;
 	zfs_secpolicy_func_t	*zvec_secpolicy;
 	zfs_ioc_namecheck_t	zvec_namecheck;
 	boolean_t		zvec_allow_log;
 	zfs_ioc_poolcheck_t	zvec_pool_check;
 	boolean_t		zvec_smush_outnvlist;
 	const char		*zvec_name;
 } zfs_ioc_vec_t;
 
 /* This array is indexed by zfs_userquota_prop_t */
 static const char *userquota_perms[] = {
 	ZFS_DELEG_PERM_USERUSED,
 	ZFS_DELEG_PERM_USERQUOTA,
 	ZFS_DELEG_PERM_GROUPUSED,
 	ZFS_DELEG_PERM_GROUPQUOTA,
 };
 
 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 static int zfs_check_settable(const char *name, nvpair_t *property,
     cred_t *cr);
 static int zfs_check_clearable(char *dataset, nvlist_t *props,
     nvlist_t **errors);
 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
     boolean_t *);
 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
  
 static void zfsdev_close(void *data);
 
 static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
 
 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
 void
 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 {
 	const char *newfile;
 	char buf[512];
 	va_list adx;
 
 	/*
 	 * Get rid of annoying "../common/" prefix to filename.
 	 */
 	newfile = strrchr(file, '/');
 	if (newfile != NULL) {
 		newfile = newfile + 1; /* Get rid of leading / */
 	} else {
 		newfile = file;
 	}
 
 	va_start(adx, fmt);
 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
 	va_end(adx);
 
 	/*
 	 * To get this data, use the zfs-dprintf probe as so:
 	 * dtrace -q -n 'zfs-dprintf \
 	 *	/stringof(arg0) == "dbuf.c"/ \
 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
 	 * arg0 = file name
 	 * arg1 = function name
 	 * arg2 = line number
 	 * arg3 = message
 	 */
 	DTRACE_PROBE4(zfs__dprintf,
 	    char *, newfile, char *, func, int, line, char *, buf);
 }
 
 static void
 history_str_free(char *buf)
 {
 	kmem_free(buf, HIS_MAX_RECORD_LEN);
 }
 
 static char *
 history_str_get(zfs_cmd_t *zc)
 {
 	char *buf;
 
 	if (zc->zc_history == 0)
 		return (NULL);
 
 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 		history_str_free(buf);
 		return (NULL);
 	}
 
 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
 
 	return (buf);
 }
 
 /*
  * Check to see if the named dataset is currently defined as bootable
  */
 static boolean_t
 zfs_is_bootfs(const char *name)
 {
 	objset_t *os;
 
 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
 		boolean_t ret;
 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
 		dmu_objset_rele(os, FTAG);
 		return (ret);
 	}
 	return (B_FALSE);
 }
 
 /*
  * Return non-zero if the spa version is less than requested version.
  */
 static int
 zfs_earlier_version(const char *name, int version)
 {
 	spa_t *spa;
 
 	if (spa_open(name, &spa, FTAG) == 0) {
 		if (spa_version(spa) < version) {
 			spa_close(spa, FTAG);
 			return (1);
 		}
 		spa_close(spa, FTAG);
 	}
 	return (0);
 }
 
 /*
  * Return TRUE if the ZPL version is less than requested version.
  */
 static boolean_t
 zpl_earlier_version(const char *name, int version)
 {
 	objset_t *os;
 	boolean_t rc = B_TRUE;
 
 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
 		uint64_t zplversion;
 
 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
 			dmu_objset_rele(os, FTAG);
 			return (B_TRUE);
 		}
 		/* XXX reading from non-owned objset */
 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 			rc = zplversion < version;
 		dmu_objset_rele(os, FTAG);
 	}
 	return (rc);
 }
 
 static void
 zfs_log_history(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *buf;
 
 	if ((buf = history_str_get(zc)) == NULL)
 		return;
 
 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
 			(void) spa_history_log(spa, buf);
 		spa_close(spa, FTAG);
 	}
 	history_str_free(buf);
 }
 
 /*
  * Policy for top-level read operations (list pools).  Requires no privileges,
  * and can be used in the local zone, as there is no associated dataset.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (0);
 }
 
 /*
  * Policy for dataset read operations (list children, get statistics).  Requires
  * no privileges, but must be visible in the local zone.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (INGLOBALZONE(curthread) ||
 	    zone_dataset_visible(zc->zc_name, NULL))
 		return (0);
 
 	return (SET_ERROR(ENOENT));
 }
 
 static int
 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 {
 	int writable = 1;
 
 	/*
 	 * The dataset must be visible by this zone -- check this first
 	 * so they don't see EPERM on something they shouldn't know about.
 	 */
 	if (!INGLOBALZONE(curthread) &&
 	    !zone_dataset_visible(dataset, &writable))
 		return (SET_ERROR(ENOENT));
 
 	if (INGLOBALZONE(curthread)) {
 		/*
 		 * If the fs is zoned, only root can access it from the
 		 * global zone.
 		 */
 		if (secpolicy_zfs(cr) && zoned)
 			return (SET_ERROR(EPERM));
 	} else {
 		/*
 		 * If we are in a local zone, the 'zoned' property must be set.
 		 */
 		if (!zoned)
 			return (SET_ERROR(EPERM));
 
 		/* must be writable by this zone */
 		if (!writable)
 			return (SET_ERROR(EPERM));
 	}
 	return (0);
 }
 
 static int
 zfs_dozonecheck(const char *dataset, cred_t *cr)
 {
 	uint64_t zoned;
 
 	if (dsl_prop_get_integer(dataset, "jailed", &zoned, NULL))
 		return (SET_ERROR(ENOENT));
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
 }
 
 static int
 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 {
 	uint64_t zoned;
 
 	if (dsl_prop_get_int_ds(ds, "jailed", &zoned))
 		return (SET_ERROR(ENOENT));
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
 }
 
 static int
 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
     const char *perm, cred_t *cr)
 {
 	int error;
 
 	error = zfs_dozonecheck_ds(name, ds, cr);
 	if (error == 0) {
 		error = secpolicy_zfs(cr);
 		if (error != 0)
 			error = dsl_deleg_access_impl(ds, perm, cr);
 	}
 	return (error);
 }
 
 static int
 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 {
 	int error;
 	dsl_dataset_t *ds;
 	dsl_pool_t *dp;
 
 	error = dsl_pool_hold(name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
 
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 #ifdef SECLABEL
 /*
  * Policy for setting the security label property.
  *
  * Returns 0 for success, non-zero for access and other errors.
  */
 static int
 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
 {
 	char		ds_hexsl[MAXNAMELEN];
 	bslabel_t	ds_sl, new_sl;
 	boolean_t	new_default = FALSE;
 	uint64_t	zoned;
 	int		needed_priv = -1;
 	int		error;
 
 	/* First get the existing dataset label. */
 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 	if (error != 0)
 		return (SET_ERROR(EPERM));
 
 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 		new_default = TRUE;
 
 	/* The label must be translatable */
 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 		return (SET_ERROR(EINVAL));
 
 	/*
 	 * In a non-global zone, disallow attempts to set a label that
 	 * doesn't match that of the zone; otherwise no other checks
 	 * are needed.
 	 */
 	if (!INGLOBALZONE(curproc)) {
 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 			return (SET_ERROR(EPERM));
 		return (0);
 	}
 
 	/*
 	 * For global-zone datasets (i.e., those whose zoned property is
 	 * "off", verify that the specified new label is valid for the
 	 * global zone.
 	 */
 	if (dsl_prop_get_integer(name,
 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 		return (SET_ERROR(EPERM));
 	if (!zoned) {
 		if (zfs_check_global_label(name, strval) != 0)
 			return (SET_ERROR(EPERM));
 	}
 
 	/*
 	 * If the existing dataset label is nondefault, check if the
 	 * dataset is mounted (label cannot be changed while mounted).
 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
 	 * mounted (or isn't a dataset, doesn't exist, ...).
 	 */
 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 		objset_t *os;
 		static char *setsl_tag = "setsl_tag";
 
 		/*
 		 * Try to own the dataset; abort if there is any error,
 		 * (e.g., already mounted, in use, or other error).
 		 */
 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
 		    setsl_tag, &os);
 		if (error != 0)
 			return (SET_ERROR(EPERM));
 
 		dmu_objset_disown(os, setsl_tag);
 
 		if (new_default) {
 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
 			goto out_check;
 		}
 
 		if (hexstr_to_label(strval, &new_sl) != 0)
 			return (SET_ERROR(EPERM));
 
 		if (blstrictdom(&ds_sl, &new_sl))
 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
 		else if (blstrictdom(&new_sl, &ds_sl))
 			needed_priv = PRIV_FILE_UPGRADE_SL;
 	} else {
 		/* dataset currently has a default label */
 		if (!new_default)
 			needed_priv = PRIV_FILE_UPGRADE_SL;
 	}
 
 out_check:
 	if (needed_priv != -1)
 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 	return (0);
 }
 #endif	/* SECLABEL */
 
 static int
 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
     cred_t *cr)
 {
 	char *strval;
 
 	/*
 	 * Check permissions for special properties.
 	 */
 	switch (prop) {
 	case ZFS_PROP_ZONED:
 		/*
 		 * Disallow setting of 'zoned' from within a local zone.
 		 */
 		if (!INGLOBALZONE(curthread))
 			return (SET_ERROR(EPERM));
 		break;
 
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_FILESYSTEM_LIMIT:
 	case ZFS_PROP_SNAPSHOT_LIMIT:
 		if (!INGLOBALZONE(curthread)) {
 			uint64_t zoned;
 			char setpoint[MAXNAMELEN];
 			/*
 			 * Unprivileged users are allowed to modify the
 			 * limit on things *under* (ie. contained by)
 			 * the thing they own.
 			 */
 			if (dsl_prop_get_integer(dsname, "jailed", &zoned,
 			    setpoint))
 				return (SET_ERROR(EPERM));
 			if (!zoned || strlen(dsname) <= strlen(setpoint))
 				return (SET_ERROR(EPERM));
 		}
 		break;
 
 	case ZFS_PROP_MLSLABEL:
 #ifdef SECLABEL
 		if (!is_system_labeled())
 			return (SET_ERROR(EPERM));
 
 		if (nvpair_value_string(propval, &strval) == 0) {
 			int err;
 
 			err = zfs_set_slabel_policy(dsname, strval, CRED());
 			if (err != 0)
 				return (err);
 		}
 #else
 		return (EOPNOTSUPP);
 #endif
 		break;
 	}
 
 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
 	error = zfs_dozonecheck(zc->zc_name, cr);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * permission to set permissions will be evaluated later in
 	 * dsl_deleg_can_allow()
 	 */
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_ROLLBACK, cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	char *cp;
 	int error;
 
 	/*
 	 * Generate the current snapshot name from the given objsetid, then
 	 * use that name for the secpolicy/zone checks.
 	 */
 	cp = strchr(zc->zc_name, '@');
 	if (cp == NULL)
 		return (SET_ERROR(EINVAL));
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	dsl_dataset_name(ds, zc->zc_name);
 
 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 	    ZFS_DELEG_PERM_SEND, cr);
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_SEND, cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	vnode_t *vp;
 	int error;
 
 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 	    NO_FOLLOW, NULL, &vp)) != 0)
 		return (error);
 
 	/* Now make sure mntpnt and dataset are ZFS */
 
 	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 	    zc->zc_name) != 0)) {
 		VN_RELE(vp);
 		return (SET_ERROR(EPERM));
 	}
 
 	VN_RELE(vp);
 	return (dsl_deleg_access(zc->zc_name,
 	    ZFS_DELEG_PERM_SHARE, cr));
 }
 
 int
 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (!INGLOBALZONE(curthread))
 		return (SET_ERROR(EPERM));
 
 	if (secpolicy_nfs(cr) == 0) {
 		return (0);
 	} else {
 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 	}
 }
 
 int
 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (!INGLOBALZONE(curthread))
 		return (SET_ERROR(EPERM));
 
 	if (secpolicy_smb(cr) == 0) {
 		return (0);
 	} else {
 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 	}
 }
 
 static int
 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 {
 	char *cp;
 
 	/*
 	 * Remove the @bla or /bla from the end of the name to get the parent.
 	 */
 	(void) strncpy(parent, datasetname, parentsize);
 	cp = strrchr(parent, '@');
 	if (cp != NULL) {
 		cp[0] = '\0';
 	} else {
 		cp = strrchr(parent, '/');
 		if (cp == NULL)
 			return (SET_ERROR(ENOENT));
 		cp[0] = '\0';
 	}
 
 	return (0);
 }
 
 int
 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 {
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(name,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 }
 
 /*
  * Destroying snapshots with delegated permissions requires
  * descendant mount and destroy permissions.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvlist_t *snaps;
 	nvpair_t *pair, *nextpair;
 	int error = 0;
 
 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
 		return (SET_ERROR(EINVAL));
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nextpair) {
 		nextpair = nvlist_next_nvpair(snaps, pair);
 		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
 		if (error == ENOENT) {
 			/*
 			 * Ignore any snapshots that don't exist (we consider
 			 * them "already destroyed").  Remove the name from the
 			 * nvl here in case the snapshot is created between
 			 * now and when we try to destroy it (in which case
 			 * we don't want to destroy it since we haven't
 			 * checked for permission).
 			 */
 			fnvlist_remove_nvpair(snaps, pair);
 			error = 0;
 		}
 		if (error != 0)
 			break;
 	}
 
 	return (error);
 }
 
 int
 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 {
 	char	parentname[MAXNAMELEN];
 	int	error;
 
 	if ((error = zfs_secpolicy_write_perms(from,
 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(from,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_get_parent(to, parentname,
 	    sizeof (parentname))) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	char *at = NULL;
 	int error;
 
 	if ((zc->zc_cookie & 1) != 0) {
 		/*
 		 * This is recursive rename, so the starting snapshot might
 		 * not exist. Check file system or volume permission instead.
 		 */
 		at = strchr(zc->zc_name, '@');
 		if (at == NULL)
 			return (EINVAL);
 		*at = '\0';
 	}
 
 	error = zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr);
 
 	if (at != NULL)
 		*at = '@';
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *clone;
 	int error;
 
 	error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_PROMOTE, cr);
 	if (error != 0)
 		return (error);
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
 
 	if (error == 0) {
 		char parentname[MAXNAMELEN];
 		dsl_dataset_t *origin = NULL;
 		dsl_dir_t *dd;
 		dd = clone->ds_dir;
 
 		error = dsl_dataset_hold_obj(dd->dd_pool,
 		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
 		if (error != 0) {
 			dsl_dataset_rele(clone, FTAG);
 			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 
 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
 		    ZFS_DELEG_PERM_MOUNT, cr);
 
 		dsl_dataset_name(origin, parentname);
 		if (error == 0) {
 			error = zfs_secpolicy_write_perms_ds(parentname, origin,
 			    ZFS_DELEG_PERM_PROMOTE, cr);
 		}
 		dsl_dataset_rele(clone, FTAG);
 		dsl_dataset_rele(origin, FTAG);
 	}
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_CREATE, cr));
 }
 
 int
 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(name,
 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
 }
 
 /*
  * Check for permission to create each snapshot in the nvlist.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvlist_t *snaps;
 	int error;
 	nvpair_t *pair;
 
 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
 		return (SET_ERROR(EINVAL));
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		char *name = nvpair_name(pair);
 		char *atp = strchr(name, '@');
 
 		if (atp == NULL) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 		*atp = '\0';
 		error = zfs_secpolicy_snapshot_perms(name, cr);
 		*atp = '@';
 		if (error != 0)
 			break;
 	}
 	return (error);
 }
 
 /*
  * Check for permission to create each snapshot in the nvlist.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error = 0;
 
 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 		char *name = nvpair_name(pair);
 		char *hashp = strchr(name, '#');
 
 		if (hashp == NULL) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 		*hashp = '\0';
 		error = zfs_secpolicy_write_perms(name,
 		    ZFS_DELEG_PERM_BOOKMARK, cr);
 		*hashp = '#';
 		if (error != 0)
 			break;
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvpair_t *pair, *nextpair;
 	int error = 0;
 
 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
 	    pair = nextpair) {
 		char *name = nvpair_name(pair);
 		char *hashp = strchr(name, '#');
 		nextpair = nvlist_next_nvpair(innvl, pair);
 
 		if (hashp == NULL) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 
 		*hashp = '\0';
 		error = zfs_secpolicy_write_perms(name,
 		    ZFS_DELEG_PERM_DESTROY, cr);
 		*hashp = '#';
 		if (error == ENOENT) {
 			/*
 			 * Ignore any filesystems that don't exist (we consider
 			 * their bookmarks "already destroyed").  Remove
 			 * the name from the nvl here in case the filesystem
 			 * is created between now and when we try to destroy
 			 * the bookmark (in which case we don't want to
 			 * destroy it since we haven't checked for permission).
 			 */
 			fnvlist_remove_nvpair(innvl, pair);
 			error = 0;
 		}
 		if (error != 0)
 			break;
 	}
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	/*
 	 * Even root must have a proper TSD so that we know what pool
 	 * to log to.
 	 */
 	if (tsd_get(zfs_allow_log_key) == NULL)
 		return (SET_ERROR(EPERM));
 	return (0);
 }
 
 static int
 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	char	parentname[MAXNAMELEN];
 	int	error;
 	char	*origin;
 
 	if ((error = zfs_get_parent(zc->zc_name, parentname,
 	    sizeof (parentname))) != 0)
 		return (error);
 
 	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
 	    (error = zfs_secpolicy_write_perms(origin,
 	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_MOUNT, cr));
 }
 
 /*
  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
  * SYS_CONFIG privilege, which is not available in a local zone.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
 		return (SET_ERROR(EPERM));
 
 	return (0);
 }
 
 /*
  * Policy for object to name lookups.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
 	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
 		return (0);
 
 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
 	return (error);
 }
 
 /*
  * Policy for fault injection.  Requires all privileges.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (secpolicy_zinject(cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
 
 	if (prop == ZPROP_INVAL) {
 		if (!zfs_prop_user(zc->zc_value))
 			return (SET_ERROR(EINVAL));
 		return (zfs_secpolicy_write_perms(zc->zc_name,
 		    ZFS_DELEG_PERM_USERPROP, cr));
 	} else {
 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
 		    NULL, cr));
 	}
 }
 
 static int
 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int err = zfs_secpolicy_read(zc, innvl, cr);
 	if (err)
 		return (err);
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (SET_ERROR(EINVAL));
 
 	if (zc->zc_value[0] == 0) {
 		/*
 		 * They are asking about a posix uid/gid.  If it's
 		 * themself, allow it.
 		 */
 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
 			if (zc->zc_guid == crgetuid(cr))
 				return (0);
 		} else {
 			if (groupmember(zc->zc_guid, cr))
 				return (0);
 		}
 	}
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    userquota_perms[zc->zc_objset_type], cr));
 }
 
 static int
 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int err = zfs_secpolicy_read(zc, innvl, cr);
 	if (err)
 		return (err);
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (SET_ERROR(EINVAL));
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    userquota_perms[zc->zc_objset_type], cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
 	    NULL, cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvpair_t *pair;
 	nvlist_t *holds;
 	int error;
 
 	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
 	if (error != 0)
 		return (SET_ERROR(EINVAL));
 
 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(holds, pair)) {
 		char fsname[MAXNAMELEN];
 		error = dmu_fsname(nvpair_name(pair), fsname);
 		if (error != 0)
 			return (error);
 		error = zfs_secpolicy_write_perms(fsname,
 		    ZFS_DELEG_PERM_HOLD, cr);
 		if (error != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvpair_t *pair;
 	int error;
 
 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(innvl, pair)) {
 		char fsname[MAXNAMELEN];
 		error = dmu_fsname(nvpair_name(pair), fsname);
 		if (error != 0)
 			return (error);
 		error = zfs_secpolicy_write_perms(fsname,
 		    ZFS_DELEG_PERM_RELEASE, cr);
 		if (error != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /*
  * Policy for allowing temporary snapshots to be taken or released
  */
 static int
 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	/*
 	 * A temporary snapshot is the same as a snapshot,
 	 * hold, destroy and release all rolled into one.
 	 * Delegated diff alone is sufficient that we allow this.
 	 */
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
 		return (0);
 
 	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
 	if (error == 0)
 		error = zfs_secpolicy_hold(zc, innvl, cr);
 	if (error == 0)
 		error = zfs_secpolicy_release(zc, innvl, cr);
 	if (error == 0)
 		error = zfs_secpolicy_destroy(zc, innvl, cr);
 	return (error);
 }
 
 /*
  * Returns the nvlist as specified by the user in the zfs_cmd_t.
  */
 static int
 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
 {
 	char *packed;
 	int error;
 	nvlist_t *list = NULL;
 
 	/*
 	 * Read in and unpack the user-supplied nvlist.
 	 */
 	if (size == 0)
 		return (SET_ERROR(EINVAL));
 
 	packed = kmem_alloc(size, KM_SLEEP);
 
 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
 	    iflag)) != 0) {
 		kmem_free(packed, size);
 		return (error);
 	}
 
 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
 		kmem_free(packed, size);
 		return (error);
 	}
 
 	kmem_free(packed, size);
 
 	*nvp = list;
 	return (0);
 }
 
 /*
  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
  * Entries will be removed from the end of the nvlist, and one int32 entry
  * named "N_MORE_ERRORS" will be added indicating how many entries were
  * removed.
  */
 static int
 nvlist_smush(nvlist_t *errors, size_t max)
 {
 	size_t size;
 
 	size = fnvlist_size(errors);
 
 	if (size > max) {
 		nvpair_t *more_errors;
 		int n = 0;
 
 		if (max < 1024)
 			return (SET_ERROR(ENOMEM));
 
 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
 		more_errors = nvlist_prev_nvpair(errors, NULL);
 
 		do {
 			nvpair_t *pair = nvlist_prev_nvpair(errors,
 			    more_errors);
 			fnvlist_remove_nvpair(errors, pair);
 			n++;
 			size = fnvlist_size(errors);
 		} while (size > max);
 
 		fnvlist_remove_nvpair(errors, more_errors);
 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
 		ASSERT3U(fnvlist_size(errors), <=, max);
 	}
 
 	return (0);
 }
 
 static int
 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
 {
 	char *packed = NULL;
 	int error = 0;
 	size_t size;
 
 	size = fnvlist_size(nvl);
 
 	if (size > zc->zc_nvlist_dst_size) {
 		/*
 		 * Solaris returns ENOMEM here, because even if an error is
 		 * returned from an ioctl(2), new zc_nvlist_dst_size will be
 		 * passed to the userland. This is not the case for FreeBSD.
 		 * We need to return 0, so the kernel will copy the
 		 * zc_nvlist_dst_size back and the userland can discover that a
 		 * bigger buffer is needed.
 		 */
 		error = 0;
 	} else {
 		packed = fnvlist_pack(nvl, &size);
 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
 		    size, zc->zc_iflags) != 0)
 			error = SET_ERROR(EFAULT);
 		fnvlist_pack_free(packed, size);
 	}
 
 	zc->zc_nvlist_dst_size = size;
 	zc->zc_nvlist_dst_filled = B_TRUE;
 	return (error);
 }
 
 static int
 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold(dsname, FTAG, &os);
 	if (error != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele(os, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	mutex_enter(&os->os_user_ptr_lock);
 	*zfvp = dmu_objset_get_user(os);
 	if (*zfvp) {
 		VFS_HOLD((*zfvp)->z_vfs);
 	} else {
 		error = SET_ERROR(ESRCH);
 	}
 	mutex_exit(&os->os_user_ptr_lock);
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 /*
  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
  * case its z_vfs will be NULL, and it will be opened as the owner.
  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
  * which prevents all vnode ops from running.
  */
 static int
 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
 {
 	int error = 0;
 
 	if (getzfsvfs(name, zfvp) != 0)
 		error = zfsvfs_create(name, zfvp);
 	if (error == 0) {
 		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
 		    RW_READER, tag);
 		if ((*zfvp)->z_unmounted) {
 			/*
 			 * XXX we could probably try again, since the unmounting
 			 * thread should be just about to disassociate the
 			 * objset from the zfsvfs.
 			 */
 			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
 			return (SET_ERROR(EBUSY));
 		}
 	}
 	return (error);
 }
 
 static void
 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
 {
 	rrm_exit(&zfsvfs->z_teardown_lock, tag);
 
 	if (zfsvfs->z_vfs) {
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	}
 }
 
 static int
 zfs_ioc_pool_create(zfs_cmd_t *zc)
 {
 	int error;
 	nvlist_t *config, *props = NULL;
 	nvlist_t *rootprops = NULL;
 	nvlist_t *zplprops = NULL;
 
 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config))
 		return (error);
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		nvlist_free(config);
 		return (error);
 	}
 
 	if (props) {
 		nvlist_t *nvl = NULL;
 		uint64_t version = SPA_VERSION;
 
 		(void) nvlist_lookup_uint64(props,
 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
 			error = SET_ERROR(EINVAL);
 			goto pool_props_bad;
 		}
 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
 		if (nvl) {
 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
 			if (error != 0) {
 				nvlist_free(config);
 				nvlist_free(props);
 				return (error);
 			}
 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
 		}
 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		error = zfs_fill_zplprops_root(version, rootprops,
 		    zplprops, NULL);
 		if (error != 0)
 			goto pool_props_bad;
 	}
 
 	error = spa_create(zc->zc_name, config, props, zplprops);
 
 	/*
 	 * Set the remaining root properties
 	 */
 	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
 		(void) spa_destroy(zc->zc_name);
 
 pool_props_bad:
 	nvlist_free(rootprops);
 	nvlist_free(zplprops);
 	nvlist_free(config);
 	nvlist_free(props);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
 {
 	int error;
 	zfs_log_history(zc);
 	error = spa_destroy(zc->zc_name);
 	if (error == 0)
 		zvol_remove_minors(zc->zc_name);
 	return (error);
 }
 
 static int
 zfs_ioc_pool_import(zfs_cmd_t *zc)
 {
 	nvlist_t *config, *props = NULL;
 	uint64_t guid;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)) != 0)
 		return (error);
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		nvlist_free(config);
 		return (error);
 	}
 
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
 	    guid != zc->zc_guid)
 		error = SET_ERROR(EINVAL);
 	else
 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
 
 	if (zc->zc_nvlist_dst != 0) {
 		int err;
 
 		if ((err = put_nvlist(zc, config)) != 0)
 			error = err;
 	}
 
 	nvlist_free(config);
 
 	if (props)
 		nvlist_free(props);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_export(zfs_cmd_t *zc)
 {
 	int error;
 	boolean_t force = (boolean_t)zc->zc_cookie;
 	boolean_t hardforce = (boolean_t)zc->zc_guid;
 
 	zfs_log_history(zc);
 	error = spa_export(zc->zc_name, NULL, force, hardforce);
 	if (error == 0)
 		zvol_remove_minors(zc->zc_name);
 	return (error);
 }
 
 static int
 zfs_ioc_pool_configs(zfs_cmd_t *zc)
 {
 	nvlist_t *configs;
 	int error;
 
 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
 		return (SET_ERROR(EEXIST));
 
 	error = put_nvlist(zc, configs);
 
 	nvlist_free(configs);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of the pool
  *
  * outputs:
  * zc_cookie		real errno
  * zc_nvlist_dst	config nvlist
  * zc_nvlist_dst_size	size of config nvlist
  */
 static int
 zfs_ioc_pool_stats(zfs_cmd_t *zc)
 {
 	nvlist_t *config;
 	int error;
 	int ret = 0;
 
 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
 	    sizeof (zc->zc_value));
 
 	if (config != NULL) {
 		ret = put_nvlist(zc, config);
 		nvlist_free(config);
 
 		/*
 		 * The config may be present even if 'error' is non-zero.
 		 * In this case we return success, and preserve the real errno
 		 * in 'zc_cookie'.
 		 */
 		zc->zc_cookie = error;
 	} else {
 		ret = error;
 	}
 
 	return (ret);
 }
 
 /*
  * Try to import the given pool, returning pool stats as appropriate so that
  * user land knows which devices are available and overall pool health.
  */
 static int
 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
 {
 	nvlist_t *tryconfig, *config;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &tryconfig)) != 0)
 		return (error);
 
 	config = spa_tryimport(tryconfig);
 
 	nvlist_free(tryconfig);
 
 	if (config == NULL)
 		return (SET_ERROR(EINVAL));
 
 	error = put_nvlist(zc, config);
 	nvlist_free(config);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name              name of the pool
  * zc_cookie            scan func (pool_scan_func_t)
  */
 static int
 zfs_ioc_pool_scan(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (zc->zc_cookie == POOL_SCAN_NONE)
 		error = spa_scan_stop(spa);
 	else
 		error = spa_scan(spa, zc->zc_cookie);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error == 0) {
 		spa_freeze(spa);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 static int
 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (zc->zc_cookie < spa_version(spa) ||
 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	spa_upgrade(spa, zc->zc_cookie);
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *hist_buf;
 	uint64_t size;
 	int error;
 
 	if ((size = zc->zc_history_len) == 0)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	hist_buf = kmem_alloc(size, KM_SLEEP);
 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
 	    &zc->zc_history_len, hist_buf)) == 0) {
 		error = ddi_copyout(hist_buf,
 		    (void *)(uintptr_t)zc->zc_history,
 		    zc->zc_history_len, zc->zc_iflags);
 	}
 
 	spa_close(spa, FTAG);
 	kmem_free(hist_buf, size);
 	return (error);
 }
 
 static int
 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error == 0) {
 		error = spa_change_guid(spa);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 static int
 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
 {
 	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_obj		object to find
  *
  * outputs:
  * zc_value		name of object
  */
 static int
 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	/* XXX reading from objset not owned */
 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele(os, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
 	    sizeof (zc->zc_value));
 	dmu_objset_rele(os, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_obj		object to find
  *
  * outputs:
  * zc_stat		stats on object
  * zc_value		path to object
  */
 static int
 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	/* XXX reading from objset not owned */
 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele(os, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
 	    sizeof (zc->zc_value));
 	dmu_objset_rele(os, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_add(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	nvlist_t *config, **l2cache, **spares;
 	uint_t nl2cache = 0, nspares = 0;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config);
 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
 	    &l2cache, &nl2cache);
 
 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares);
 
 #ifdef illumos
 	/*
 	 * A root pool with concatenated devices is not supported.
 	 * Thus, can not add a device to a root pool.
 	 *
 	 * Intent log device can not be added to a rootpool because
 	 * during mountroot, zil is replayed, a seperated log device
 	 * can not be accessed during the mountroot time.
 	 *
 	 * l2cache and spare devices are ok to be added to a rootpool.
 	 */
 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
 		nvlist_free(config);
 		spa_close(spa, FTAG);
 		return (SET_ERROR(EDOM));
 	}
 #endif /* illumos */
 
 	if (error == 0) {
 		error = spa_vdev_add(spa, config);
 		nvlist_free(config);
 	}
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of the pool
  * zc_nvlist_conf	nvlist of devices to remove
  * zc_cookie		to stop the remove?
  */
 static int
 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 	switch (zc->zc_cookie) {
 	case VDEV_STATE_ONLINE:
 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
 		break;
 
 	case VDEV_STATE_OFFLINE:
 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	case VDEV_STATE_FAULTED:
 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
 
 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	case VDEV_STATE_DEGRADED:
 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
 
 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	default:
 		error = SET_ERROR(EINVAL);
 	}
 	zc->zc_cookie = newstate;
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int replacing = zc->zc_cookie;
 	nvlist_t *config;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)) == 0) {
 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
 		nvlist_free(config);
 	}
 
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
 
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_split(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	nvlist_t *config, *props = NULL;
 	int error;
 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)) {
 		spa_close(spa, FTAG);
 		return (error);
 	}
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		spa_close(spa, FTAG);
 		nvlist_free(config);
 		return (error);
 	}
 
 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
 
 	spa_close(spa, FTAG);
 
 	nvlist_free(config);
 	nvlist_free(props);
 
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *path = zc->zc_value;
 	uint64_t guid = zc->zc_guid;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = spa_vdev_setpath(spa, guid, path);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *fru = zc->zc_value;
 	uint64_t guid = zc->zc_guid;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = spa_vdev_setfru(spa, guid, fru);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
 {
 	int error = 0;
 	nvlist_t *nv;
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 
 	if (zc->zc_nvlist_dst != 0 &&
 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
 		dmu_objset_stats(os, nv);
 		/*
 		 * NB: zvol_get_stats() will read the objset contents,
 		 * which we aren't supposed to do with a
 		 * DS_MODE_USER hold, because it could be
 		 * inconsistent.  So this is a bit of a workaround...
 		 * XXX reading with out owning
 		 */
 		if (!zc->zc_objset_stats.dds_inconsistent &&
 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
 			error = zvol_get_stats(os, nv);
 			if (error == EIO)
 				return (error);
 			VERIFY0(error);
 		}
 		error = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	}
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_objset_stats(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error == 0) {
 		error = zfs_ioc_objset_stats_impl(zc, os);
 		dmu_objset_rele(os, FTAG);
 	}
 
 	if (error == ENOMEM)
 		error = 0;
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_nvlist_dst	received property nvlist
  * zc_nvlist_dst_size	size of received property nvlist
  *
  * Gets received properties (distinct from local properties on or after
  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
  * local property values.
  */
 static int
 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
 {
 	int error = 0;
 	nvlist_t *nv;
 
 	/*
 	 * Without this check, we would return local property values if the
 	 * caller has not already received properties on or after
 	 * SPA_VERSION_RECVD_PROPS.
 	 */
 	if (!dsl_prop_get_hasrecvd(zc->zc_name))
 		return (SET_ERROR(ENOTSUP));
 
 	if (zc->zc_nvlist_dst != 0 &&
 	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
 		error = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	}
 
 	return (error);
 }
 
 static int
 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
 {
 	uint64_t value;
 	int error;
 
 	/*
 	 * zfs_get_zplprop() will either find a value or give us
 	 * the default value (if there is one).
 	 */
 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
 	return (0);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
  *
  * outputs:
  * zc_nvlist_dst	zpl property nvlist
  * zc_nvlist_dst_size	size of zpl property nvlist
  */
 static int
 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int err;
 
 	/* XXX reading without owning */
 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
 		return (err);
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 
 	/*
 	 * NB: nvl_add_zplprop() will read the objset contents,
 	 * which we aren't supposed to do with a DS_MODE_USER
 	 * hold, because it could be inconsistent.
 	 */
 	if (zc->zc_nvlist_dst != 0 &&
 	    !zc->zc_objset_stats.dds_inconsistent &&
 	    dmu_objset_type(os) == DMU_OST_ZFS) {
 		nvlist_t *nv;
 
 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
 			err = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	} else {
 		err = SET_ERROR(ENOENT);
 	}
 	dmu_objset_rele(os, FTAG);
 	return (err);
 }
 
 boolean_t
 dataset_name_hidden(const char *name)
 {
 	/*
 	 * Skip over datasets that are not visible in this zone,
 	 * internal datasets (which have a $ in their name), and
 	 * temporary datasets (which have a % in their name).
 	 */
 	if (strchr(name, '$') != NULL)
 		return (B_TRUE);
 	if (strchr(name, '%') != NULL)
 		return (B_TRUE);
 	if (!INGLOBALZONE(curthread) && !zone_dataset_visible(name, NULL))
 		return (B_TRUE);
 	return (B_FALSE);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_name		name of next filesystem
  * zc_cookie		zap cursor
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 	char *p;
 	size_t orig_len = strlen(zc->zc_name);
 
 top:
 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
 		if (error == ENOENT)
 			error = SET_ERROR(ESRCH);
 		return (error);
 	}
 
 	p = strrchr(zc->zc_name, '/');
 	if (p == NULL || p[1] != '\0')
 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
 	p = zc->zc_name + strlen(zc->zc_name);
 
 	do {
 		error = dmu_dir_list_next(os,
 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
 		    NULL, &zc->zc_cookie);
 		if (error == ENOENT)
 			error = SET_ERROR(ESRCH);
 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
 	dmu_objset_rele(os, FTAG);
 
 	/*
 	 * If it's an internal dataset (ie. with a '$' in its name),
 	 * don't try to get stats for it, otherwise we'll return ENOENT.
 	 */
 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
 		if (error == ENOENT) {
 			/* We lost a race with destroy, get the next one. */
 			zc->zc_name[orig_len] = '\0';
 			goto top;
 		}
 	}
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_nvlist_dst_size	size of buffer for property nvlist
  * zc_simple		when set, only name is requested
  *
  * outputs:
  * zc_name		name of next snapshot
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error != 0) {
 		return (error == ENOENT ? ESRCH : error);
 	}
 
 	/*
 	 * A dataset name of maximum length cannot have any snapshots,
 	 * so exit immediately.
 	 */
 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
 		dmu_objset_rele(os, FTAG);
 		return (SET_ERROR(ESRCH));
 	}
 
 	error = dmu_snapshot_list_next(os,
 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
 	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
 	    NULL);
 
 	if (error == 0 && !zc->zc_simple) {
 		dsl_dataset_t *ds;
 		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
 
 		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
 		if (error == 0) {
 			objset_t *ossnap;
 
 			error = dmu_objset_from_ds(ds, &ossnap);
 			if (error == 0)
 				error = zfs_ioc_objset_stats_impl(zc, ossnap);
 			dsl_dataset_rele(ds, FTAG);
 		}
 	} else if (error == ENOENT) {
 		error = SET_ERROR(ESRCH);
 	}
 
 	dmu_objset_rele(os, FTAG);
 	/* if we failed, undo the @ that we tacked on to zc_name */
 	if (error != 0)
 		*strchr(zc->zc_name, '@') = '\0';
 	return (error);
 }
 
 static int
 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
 {
 	const char *propname = nvpair_name(pair);
 	uint64_t *valary;
 	unsigned int vallen;
 	const char *domain;
 	char *dash;
 	zfs_userquota_prop_t type;
 	uint64_t rid;
 	uint64_t quota;
 	zfsvfs_t *zfsvfs;
 	int err;
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) != 0)
 			return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * A correctly constructed propname is encoded as
 	 * userquota@<rid>-<domain>.
 	 */
 	if ((dash = strchr(propname, '-')) == NULL ||
 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
 	    vallen != 3)
 		return (SET_ERROR(EINVAL));
 
 	domain = dash + 1;
 	type = valary[0];
 	rid = valary[1];
 	quota = valary[2];
 
 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
 	if (err == 0) {
 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
 		zfsvfs_rele(zfsvfs, FTAG);
 	}
 
 	return (err);
 }
 
 /*
  * If the named property is one that has a special function to set its value,
  * return 0 on success and a positive error code on failure; otherwise if it is
  * not one of the special properties handled by this function, return -1.
  *
  * XXX: It would be better for callers of the property interface if we handled
  * these special cases in dsl_prop.c (in the dsl layer).
  */
 static int
 zfs_prop_set_special(const char *dsname, zprop_source_t source,
     nvpair_t *pair)
 {
 	const char *propname = nvpair_name(pair);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	uint64_t intval;
 	int err = -1;
 
 	if (prop == ZPROP_INVAL) {
 		if (zfs_prop_userquota(propname))
 			return (zfs_prop_set_userquota(dsname, pair));
 		return (-1);
 	}
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) == 0);
 	}
 
 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
 		return (-1);
 
 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
 
 	switch (prop) {
 	case ZFS_PROP_QUOTA:
 		err = dsl_dir_set_quota(dsname, source, intval);
 		break;
 	case ZFS_PROP_REFQUOTA:
 		err = dsl_dataset_set_refquota(dsname, source, intval);
 		break;
 	case ZFS_PROP_FILESYSTEM_LIMIT:
 	case ZFS_PROP_SNAPSHOT_LIMIT:
 		if (intval == UINT64_MAX) {
 			/* clearing the limit, just do it */
 			err = 0;
 		} else {
 			err = dsl_dir_activate_fs_ss_limit(dsname);
 		}
 		/*
 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
 		 * default path to set the value in the nvlist.
 		 */
 		if (err == 0)
 			err = -1;
 		break;
 	case ZFS_PROP_RESERVATION:
 		err = dsl_dir_set_reservation(dsname, source, intval);
 		break;
 	case ZFS_PROP_REFRESERVATION:
 		err = dsl_dataset_set_refreservation(dsname, source, intval);
 		break;
 	case ZFS_PROP_VOLSIZE:
 		err = zvol_set_volsize(dsname, intval);
 		break;
 	case ZFS_PROP_VERSION:
 	{
 		zfsvfs_t *zfsvfs;
 
 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
 			break;
 
 		err = zfs_set_version(zfsvfs, intval);
 		zfsvfs_rele(zfsvfs, FTAG);
 
 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
 			zfs_cmd_t *zc;
 
 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
 			(void) strcpy(zc->zc_name, dsname);
 			(void) zfs_ioc_userspace_upgrade(zc);
 			kmem_free(zc, sizeof (zfs_cmd_t));
 		}
 		break;
 	}
 	default:
 		err = -1;
 	}
 
 	return (err);
 }
 
 /*
  * This function is best effort. If it fails to set any of the given properties,
  * it continues to set as many as it can and returns the last error
  * encountered. If the caller provides a non-NULL errlist, it will be filled in
  * with the list of names of all the properties that failed along with the
  * corresponding error numbers.
  *
  * If every property is set successfully, zero is returned and errlist is not
  * modified.
  */
 int
 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
     nvlist_t *errlist)
 {
 	nvpair_t *pair;
 	nvpair_t *propval;
 	int rv = 0;
 	uint64_t intval;
 	char *strval;
 	nvlist_t *genericnvl = fnvlist_alloc();
 	nvlist_t *retrynvl = fnvlist_alloc();
 
 retry:
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
 		const char *propname = nvpair_name(pair);
 		zfs_prop_t prop = zfs_name_to_prop(propname);
 		int err = 0;
 
 		/* decode the property value */
 		propval = pair;
 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 			nvlist_t *attrs;
 			attrs = fnvpair_value_nvlist(pair);
 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 			    &propval) != 0)
 				err = SET_ERROR(EINVAL);
 		}
 
 		/* Validate value type */
 		if (err == 0 && prop == ZPROP_INVAL) {
 			if (zfs_prop_user(propname)) {
 				if (nvpair_type(propval) != DATA_TYPE_STRING)
 					err = SET_ERROR(EINVAL);
 			} else if (zfs_prop_userquota(propname)) {
 				if (nvpair_type(propval) !=
 				    DATA_TYPE_UINT64_ARRAY)
 					err = SET_ERROR(EINVAL);
 			} else {
 				err = SET_ERROR(EINVAL);
 			}
 		} else if (err == 0) {
 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
 					err = SET_ERROR(EINVAL);
 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
 				const char *unused;
 
 				intval = fnvpair_value_uint64(propval);
 
 				switch (zfs_prop_get_type(prop)) {
 				case PROP_TYPE_NUMBER:
 					break;
 				case PROP_TYPE_STRING:
 					err = SET_ERROR(EINVAL);
 					break;
 				case PROP_TYPE_INDEX:
 					if (zfs_prop_index_to_string(prop,
 					    intval, &unused) != 0)
 						err = SET_ERROR(EINVAL);
 					break;
 				default:
 					cmn_err(CE_PANIC,
 					    "unknown property type");
 				}
 			} else {
 				err = SET_ERROR(EINVAL);
 			}
 		}
 
 		/* Validate permissions */
 		if (err == 0)
 			err = zfs_check_settable(dsname, pair, CRED());
 
 		if (err == 0) {
 			err = zfs_prop_set_special(dsname, source, pair);
 			if (err == -1) {
 				/*
 				 * For better performance we build up a list of
 				 * properties to set in a single transaction.
 				 */
 				err = nvlist_add_nvpair(genericnvl, pair);
 			} else if (err != 0 && nvl != retrynvl) {
 				/*
 				 * This may be a spurious error caused by
 				 * receiving quota and reservation out of order.
 				 * Try again in a second pass.
 				 */
 				err = nvlist_add_nvpair(retrynvl, pair);
 			}
 		}
 
 		if (err != 0) {
 			if (errlist != NULL)
 				fnvlist_add_int32(errlist, propname, err);
 			rv = err;
 		}
 	}
 
 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
 		nvl = retrynvl;
 		goto retry;
 	}
 
 	if (!nvlist_empty(genericnvl) &&
 	    dsl_props_set(dsname, source, genericnvl) != 0) {
 		/*
 		 * If this fails, we still want to set as many properties as we
 		 * can, so try setting them individually.
 		 */
 		pair = NULL;
 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
 			const char *propname = nvpair_name(pair);
 			int err = 0;
 
 			propval = pair;
 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 				nvlist_t *attrs;
 				attrs = fnvpair_value_nvlist(pair);
 				propval = fnvlist_lookup_nvpair(attrs,
 				    ZPROP_VALUE);
 			}
 
 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
 				strval = fnvpair_value_string(propval);
 				err = dsl_prop_set_string(dsname, propname,
 				    source, strval);
 			} else {
 				intval = fnvpair_value_uint64(propval);
 				err = dsl_prop_set_int(dsname, propname, source,
 				    intval);
 			}
 
 			if (err != 0) {
 				if (errlist != NULL) {
 					fnvlist_add_int32(errlist, propname,
 					    err);
 				}
 				rv = err;
 			}
 		}
 	}
 	nvlist_free(genericnvl);
 	nvlist_free(retrynvl);
 
 	return (rv);
 }
 
 /*
  * Check that all the properties are valid user properties.
  */
 static int
 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
 {
 	nvpair_t *pair = NULL;
 	int error = 0;
 
 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
 		const char *propname = nvpair_name(pair);
 
 		if (!zfs_prop_user(propname) ||
 		    nvpair_type(pair) != DATA_TYPE_STRING)
 			return (SET_ERROR(EINVAL));
 
 		if (error = zfs_secpolicy_write_perms(fsname,
 		    ZFS_DELEG_PERM_USERPROP, CRED()))
 			return (error);
 
 		if (strlen(propname) >= ZAP_MAXNAMELEN)
 			return (SET_ERROR(ENAMETOOLONG));
 
 		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
 			return (E2BIG);
 	}
 	return (0);
 }
 
 static void
 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
 {
 	nvpair_t *pair;
 
 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
 		if (nvlist_exists(skipped, nvpair_name(pair)))
 			continue;
 
 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
 	}
 }
 
 static int
 clear_received_props(const char *dsname, nvlist_t *props,
     nvlist_t *skipped)
 {
 	int err = 0;
 	nvlist_t *cleared_props = NULL;
 	props_skip(props, skipped, &cleared_props);
 	if (!nvlist_empty(cleared_props)) {
 		/*
 		 * Acts on local properties until the dataset has received
 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
 		 */
 		zprop_source_t flags = (ZPROP_SRC_NONE |
 		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
 		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
 	}
 	nvlist_free(cleared_props);
 	return (err);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		name of property to set
  * zc_nvlist_src{_size}	nvlist of properties to apply
  * zc_cookie		received properties flag
  *
  * outputs:
  * zc_nvlist_dst{_size} error for each unapplied received property
  */
 static int
 zfs_ioc_set_prop(zfs_cmd_t *zc)
 {
 	nvlist_t *nvl;
 	boolean_t received = zc->zc_cookie;
 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
 	    ZPROP_SRC_LOCAL);
 	nvlist_t *errors;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &nvl)) != 0)
 		return (error);
 
 	if (received) {
 		nvlist_t *origprops;
 
 		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
 			(void) clear_received_props(zc->zc_name,
 			    origprops, nvl);
 			nvlist_free(origprops);
 		}
 
 		error = dsl_prop_set_hasrecvd(zc->zc_name);
 	}
 
 	errors = fnvlist_alloc();
 	if (error == 0)
 		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
 
 	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
 		(void) put_nvlist(zc, errors);
 	}
 
 	nvlist_free(errors);
 	nvlist_free(nvl);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		name of property to inherit
  * zc_cookie		revert to received value if TRUE
  *
  * outputs:		none
  */
 static int
 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
 {
 	const char *propname = zc->zc_value;
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	boolean_t received = zc->zc_cookie;
 	zprop_source_t source = (received
 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
 
 	if (received) {
 		nvlist_t *dummy;
 		nvpair_t *pair;
 		zprop_type_t type;
 		int err;
 
 		/*
 		 * zfs_prop_set_special() expects properties in the form of an
 		 * nvpair with type info.
 		 */
 		if (prop == ZPROP_INVAL) {
 			if (!zfs_prop_user(propname))
 				return (SET_ERROR(EINVAL));
 
 			type = PROP_TYPE_STRING;
 		} else if (prop == ZFS_PROP_VOLSIZE ||
 		    prop == ZFS_PROP_VERSION) {
 			return (SET_ERROR(EINVAL));
 		} else {
 			type = zfs_prop_get_type(prop);
 		}
 
 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 		switch (type) {
 		case PROP_TYPE_STRING:
 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
 			break;
 		case PROP_TYPE_NUMBER:
 		case PROP_TYPE_INDEX:
 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
 			break;
 		default:
 			nvlist_free(dummy);
 			return (SET_ERROR(EINVAL));
 		}
 
 		pair = nvlist_next_nvpair(dummy, NULL);
 		err = zfs_prop_set_special(zc->zc_name, source, pair);
 		nvlist_free(dummy);
 		if (err != -1)
 			return (err); /* special property already handled */
 	} else {
 		/*
 		 * Only check this in the non-received case. We want to allow
 		 * 'inherit -S' to revert non-inheritable properties like quota
 		 * and reservation to the received or default values even though
 		 * they are not considered inheritable.
 		 */
 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
 			return (SET_ERROR(EINVAL));
 	}
 
 	/* property name has been validated by zfs_secpolicy_inherit_prop() */
 	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
 }
 
 static int
 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
 {
 	nvlist_t *props;
 	spa_t *spa;
 	int error;
 	nvpair_t *pair;
 
 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))
 		return (error);
 
 	/*
 	 * If the only property is the configfile, then just do a spa_lookup()
 	 * to handle the faulted case.
 	 */
 	pair = nvlist_next_nvpair(props, NULL);
 	if (pair != NULL && strcmp(nvpair_name(pair),
 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
 	    nvlist_next_nvpair(props, pair) == NULL) {
 		mutex_enter(&spa_namespace_lock);
 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
 			spa_configfile_set(spa, props, B_FALSE);
 			spa_config_sync(spa, B_FALSE, B_TRUE);
 		}
 		mutex_exit(&spa_namespace_lock);
 		if (spa != NULL) {
 			nvlist_free(props);
 			return (0);
 		}
 	}
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
 		nvlist_free(props);
 		return (error);
 	}
 
 	error = spa_prop_set(spa, props);
 
 	nvlist_free(props);
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	nvlist_t *nvp = NULL;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
 		/*
 		 * If the pool is faulted, there may be properties we can still
 		 * get (such as altroot and cachefile), so attempt to get them
 		 * anyway.
 		 */
 		mutex_enter(&spa_namespace_lock);
 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
 			error = spa_prop_get(spa, &nvp);
 		mutex_exit(&spa_namespace_lock);
 	} else {
 		error = spa_prop_get(spa, &nvp);
 		spa_close(spa, FTAG);
 	}
 
 	if (error == 0 && zc->zc_nvlist_dst != 0)
 		error = put_nvlist(zc, nvp);
 	else
 		error = SET_ERROR(EFAULT);
 
 	nvlist_free(nvp);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_src{_size}	nvlist of delegated permissions
  * zc_perm_action	allow/unallow flag
  *
  * outputs:		none
  */
 static int
 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
 {
 	int error;
 	nvlist_t *fsaclnv = NULL;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &fsaclnv)) != 0)
 		return (error);
 
 	/*
 	 * Verify nvlist is constructed correctly
 	 */
 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
 		nvlist_free(fsaclnv);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * If we don't have PRIV_SYS_MOUNT, then validate
 	 * that user is allowed to hand out each permission in
 	 * the nvlist(s)
 	 */
 
 	error = secpolicy_zfs(CRED());
 	if (error != 0) {
 		if (zc->zc_perm_action == B_FALSE) {
 			error = dsl_deleg_can_allow(zc->zc_name,
 			    fsaclnv, CRED());
 		} else {
 			error = dsl_deleg_can_unallow(zc->zc_name,
 			    fsaclnv, CRED());
 		}
 	}
 
 	if (error == 0)
 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
 
 	nvlist_free(fsaclnv);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * zc_nvlist_src{_size}	nvlist of delegated permissions
  */
 static int
 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
 {
 	nvlist_t *nvp;
 	int error;
 
 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
 		error = put_nvlist(zc, nvp);
 		nvlist_free(nvp);
 	}
 
 	return (error);
 }
 
 /*
  * Search the vfs list for a specified resource.  Returns a pointer to it
  * or NULL if no suitable entry is found. The caller of this routine
  * is responsible for releasing the returned vfs pointer.
  */
 static vfs_t *
 zfs_get_vfs(const char *resource)
 {
 	vfs_t *vfsp;
 
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(vfsp, &mountlist, mnt_list) {
 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
 			VFS_HOLD(vfsp);
 			break;
 		}
 	}
 	mtx_unlock(&mountlist_mtx);
 	return (vfsp);
 }
 
 /* ARGSUSED */
 static void
 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 {
 	zfs_creat_t *zct = arg;
 
 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
 }
 
 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
 
 /*
  * inputs:
  * os			parent objset pointer (NULL if root fs)
  * fuids_ok		fuids allowed in this version of the spa?
  * sa_ok		SAs allowed in this version of the spa?
  * createprops		list of properties requested by creator
  *
  * outputs:
  * zplprops	values for the zplprops we attach to the master node object
  * is_ci	true if requested file system will be purely case-insensitive
  *
  * Determine the settings for utf8only, normalization and
  * casesensitivity.  Specific values may have been requested by the
  * creator and/or we can inherit values from the parent dataset.  If
  * the file system is of too early a vintage, a creator can not
  * request settings for these properties, even if the requested
  * setting is the default value.  We don't actually want to create dsl
  * properties for these, so remove them from the source nvlist after
  * processing.
  */
 static int
 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	uint64_t sense = ZFS_PROP_UNDEFINED;
 	uint64_t norm = ZFS_PROP_UNDEFINED;
 	uint64_t u8 = ZFS_PROP_UNDEFINED;
 
 	ASSERT(zplprops != NULL);
 
 	/*
 	 * Pull out creator prop choices, if any.
 	 */
 	if (createprops) {
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_CASE));
 	}
 
 	/*
 	 * If the zpl version requested is whacky or the file system
 	 * or pool is version is too "young" to support normalization
 	 * and the creator tried to set a value for one of the props,
 	 * error out.
 	 */
 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
 	    (zplver < ZPL_VERSION_NORMALIZATION &&
 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
 	    sense != ZFS_PROP_UNDEFINED)))
 		return (SET_ERROR(ENOTSUP));
 
 	/*
 	 * Put the version in the zplprops
 	 */
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
 
 	if (norm == ZFS_PROP_UNDEFINED)
 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
 
 	/*
 	 * If we're normalizing, names must always be valid UTF-8 strings.
 	 */
 	if (norm)
 		u8 = 1;
 	if (u8 == ZFS_PROP_UNDEFINED)
 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
 
 	if (sense == ZFS_PROP_UNDEFINED)
 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
 
 	if (is_ci)
 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
 
 	return (0);
 }
 
 static int
 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	boolean_t fuids_ok, sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	objset_t *os = NULL;
 	char parentname[MAXNAMELEN];
 	char *cp;
 	spa_t *spa;
 	uint64_t spa_vers;
 	int error;
 
 	(void) strlcpy(parentname, dataset, sizeof (parentname));
 	cp = strrchr(parentname, '/');
 	ASSERT(cp != NULL);
 	cp[0] = '\0';
 
 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
 		return (error);
 
 	spa_vers = spa_version(spa);
 	spa_close(spa, FTAG);
 
 	zplver = zfs_zpl_version_map(spa_vers);
 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
 	sa_ok = (zplver >= ZPL_VERSION_SA);
 
 	/*
 	 * Open parent object set so we can inherit zplprop values.
 	 */
 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
 		return (error);
 
 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
 	    zplprops, is_ci);
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 static int
 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	boolean_t fuids_ok;
 	boolean_t sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	int error;
 
 	zplver = zfs_zpl_version_map(spa_vers);
 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
 	sa_ok = (zplver >= ZPL_VERSION_SA);
 
 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
 	    createprops, zplprops, is_ci);
 	return (error);
 }
 
 /*
  * innvl: {
  *     "type" -> dmu_objset_type_t (int32)
  *     (optional) "props" -> { prop -> value }
  * }
  *
  * outnvl: propname -> error code (int32)
  */
 static int
 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error = 0;
 	zfs_creat_t zct = { 0 };
 	nvlist_t *nvprops = NULL;
 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
 	int32_t type32;
 	dmu_objset_type_t type;
 	boolean_t is_insensitive = B_FALSE;
 
 	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
 		return (SET_ERROR(EINVAL));
 	type = type32;
 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 
 	switch (type) {
 	case DMU_OST_ZFS:
 		cbfunc = zfs_create_cb;
 		break;
 
 	case DMU_OST_ZVOL:
 		cbfunc = zvol_create_cb;
 		break;
 
 	default:
 		cbfunc = NULL;
 		break;
 	}
 	if (strchr(fsname, '@') ||
 	    strchr(fsname, '%'))
 		return (SET_ERROR(EINVAL));
 
 	zct.zct_props = nvprops;
 
 	if (cbfunc == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if (type == DMU_OST_ZVOL) {
 		uint64_t volsize, volblocksize;
 
 		if (nvprops == NULL)
 			return (SET_ERROR(EINVAL));
 		if (nvlist_lookup_uint64(nvprops,
 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
 			return (SET_ERROR(EINVAL));
 
 		if ((error = nvlist_lookup_uint64(nvprops,
 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 		    &volblocksize)) != 0 && error != ENOENT)
 			return (SET_ERROR(EINVAL));
 
 		if (error != 0)
 			volblocksize = zfs_prop_default_numeric(
 			    ZFS_PROP_VOLBLOCKSIZE);
 
 		if ((error = zvol_check_volblocksize(
 		    volblocksize)) != 0 ||
 		    (error = zvol_check_volsize(volsize,
 		    volblocksize)) != 0)
 			return (error);
 	} else if (type == DMU_OST_ZFS) {
 		int error;
 
 		/*
 		 * We have to have normalization and
 		 * case-folding flags correct when we do the
 		 * file system creation, so go figure them out
 		 * now.
 		 */
 		VERIFY(nvlist_alloc(&zct.zct_zplprops,
 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		error = zfs_fill_zplprops(fsname, nvprops,
 		    zct.zct_zplprops, &is_insensitive);
 		if (error != 0) {
 			nvlist_free(zct.zct_zplprops);
 			return (error);
 		}
 	}
 
 	error = dmu_objset_create(fsname, type,
 	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
 	nvlist_free(zct.zct_zplprops);
 
 	/*
 	 * It would be nice to do this atomically.
 	 */
 	if (error == 0) {
 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
 		    nvprops, outnvl);
 		if (error != 0)
 			(void) dsl_destroy_head(fsname);
 	}
 #ifdef __FreeBSD__
 	if (error == 0 && type == DMU_OST_ZVOL)
 		zvol_create_minors(fsname);
 #endif
 	return (error);
 }
 
 /*
  * innvl: {
  *     "origin" -> name of origin snapshot
  *     (optional) "props" -> { prop -> value }
  * }
  *
  * outnvl: propname -> error code (int32)
  */
 static int
 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error = 0;
 	nvlist_t *nvprops = NULL;
 	char *origin_name;
 
 	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
 		return (SET_ERROR(EINVAL));
 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 
 	if (strchr(fsname, '@') ||
 	    strchr(fsname, '%'))
 		return (SET_ERROR(EINVAL));
 
 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
 		return (SET_ERROR(EINVAL));
 	error = dmu_objset_clone(fsname, origin_name);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * It would be nice to do this atomically.
 	 */
 	if (error == 0) {
 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
 		    nvprops, outnvl);
 		if (error != 0)
 			(void) dsl_destroy_head(fsname);
 	}
 #ifdef __FreeBSD__
 	if (error == 0)
 		zvol_create_minors(fsname);
 #endif
 	return (error);
 }
 
 /*
  * innvl: {
  *     "snaps" -> { snapshot1, snapshot2 }
  *     (optional) "props" -> { prop -> value (string) }
  * }
  *
  * outnvl: snapshot -> error code (int32)
  */
 static int
 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	nvlist_t *snaps;
 	nvlist_t *props = NULL;
 	int error, poollen;
 	nvpair_t *pair;
 
 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
 	if ((error = zfs_check_userprops(poolname, props)) != 0)
 		return (error);
 
 	if (!nvlist_empty(props) &&
 	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
 		return (SET_ERROR(ENOTSUP));
 
 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
 		return (SET_ERROR(EINVAL));
 	poollen = strlen(poolname);
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		const char *name = nvpair_name(pair);
 		const char *cp = strchr(name, '@');
 
 		/*
 		 * The snap name must contain an @, and the part after it must
 		 * contain only valid characters.
 		 */
 		if (cp == NULL ||
 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 			return (SET_ERROR(EINVAL));
 
 		/*
 		 * The snap must be in the specified pool.
 		 */
 		if (strncmp(name, poolname, poollen) != 0 ||
 		    (name[poollen] != '/' && name[poollen] != '@'))
 			return (SET_ERROR(EXDEV));
 
 		/* This must be the only snap of this fs. */
 		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
 		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
 			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
 			    == 0) {
 				return (SET_ERROR(EXDEV));
 			}
 		}
 	}
 
 	error = dsl_dataset_snapshot(snaps, props, outnvl);
 	return (error);
 }
 
 /*
  * innvl: "message" -> string
  */
 /* ARGSUSED */
 static int
 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	char *message;
 	spa_t *spa;
 	int error;
 	char *poolname;
 
 	/*
 	 * The poolname in the ioctl is not set, we get it from the TSD,
 	 * which was set at the end of the last successful ioctl that allows
 	 * logging.  The secpolicy func already checked that it is set.
 	 * Only one log ioctl is allowed after each successful ioctl, so
 	 * we clear the TSD here.
 	 */
 	poolname = tsd_get(zfs_allow_log_key);
 	(void) tsd_set(zfs_allow_log_key, NULL);
 	error = spa_open(poolname, &spa, FTAG);
 	strfree(poolname);
 	if (error != 0)
 		return (error);
 
 	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	error = spa_history_log(spa, message);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 /*
  * The dp_config_rwlock must not be held when calling this, because the
  * unmount may need to write out data.
  *
  * This function is best-effort.  Callers must deal gracefully if it
  * remains mounted (or is remounted after this call).
  *
  * Returns 0 if the argument is not a snapshot, or it is not currently a
  * filesystem, or we were able to unmount it.  Returns error code otherwise.
  */
 int
 zfs_unmount_snap(const char *snapname)
 {
 	vfs_t *vfsp;
 	zfsvfs_t *zfsvfs;
 	int err;
 
 	if (strchr(snapname, '@') == NULL)
 		return (0);
 
 	vfsp = zfs_get_vfs(snapname);
 	if (vfsp == NULL)
 		return (0);
 
 	zfsvfs = vfsp->vfs_data;
 	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
 
 	err = vn_vfswlock(vfsp->vfs_vnodecovered);
 	VFS_RELE(vfsp);
 	if (err != 0)
 		return (SET_ERROR(err));
 
 	/*
 	 * Always force the unmount for snapshots.
 	 */
 
 #ifdef illumos
 	(void) dounmount(vfsp, MS_FORCE, kcred);
 #else
 	mtx_lock(&Giant);	/* dounmount() */
 	(void) dounmount(vfsp, MS_FORCE, curthread);
 	mtx_unlock(&Giant);	/* dounmount() */
 #endif
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 zfs_unmount_snap_cb(const char *snapname, void *arg)
 {
 	return (zfs_unmount_snap(snapname));
 }
 
 /*
  * When a clone is destroyed, its origin may also need to be destroyed,
  * in which case it must be unmounted.  This routine will do that unmount
  * if necessary.
  */
 void
 zfs_destroy_unmount_origin(const char *fsname)
 {
 	int error;
 	objset_t *os;
 	dsl_dataset_t *ds;
 
 	error = dmu_objset_hold(fsname, FTAG, &os);
 	if (error != 0)
 		return;
 	ds = dmu_objset_ds(os);
 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
 		char originname[MAXNAMELEN];
 		dsl_dataset_name(ds->ds_prev, originname);
 		dmu_objset_rele(os, FTAG);
 		(void) zfs_unmount_snap(originname);
 	} else {
 		dmu_objset_rele(os, FTAG);
 	}
 }
 
 /*
  * innvl: {
  *     "snaps" -> { snapshot1, snapshot2 }
  *     (optional boolean) "defer"
  * }
  *
  * outnvl: snapshot -> error code (int32)
  *
  */
 /* ARGSUSED */
 static int
 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error, poollen;
 	nvlist_t *snaps;
 	nvpair_t *pair;
 	boolean_t defer;
 
 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
 		return (SET_ERROR(EINVAL));
 	defer = nvlist_exists(innvl, "defer");
 
 	poollen = strlen(poolname);
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		const char *name = nvpair_name(pair);
 
 		/*
 		 * The snap must be in the specified pool to prevent the
 		 * invalid removal of zvol minors below.
 		 */
 		if (strncmp(name, poolname, poollen) != 0 ||
 		    (name[poollen] != '/' && name[poollen] != '@'))
 			return (SET_ERROR(EXDEV));
 
 		error = zfs_unmount_snap(name);
 		if (error != 0)
 			return (error);
 #if defined(__FreeBSD__)
 		zvol_remove_minors(name);
 #endif
 	}
 
 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
 }
 
 /*
  * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
  * All bookmarks must be in the same pool.
  *
  * innvl: {
  *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
  * }
  *
  * outnvl: bookmark -> error code (int32)
  *
  */
 /* ARGSUSED */
 static int
 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 		char *snap_name;
 
 		/*
 		 * Verify the snapshot argument.
 		 */
 		if (nvpair_value_string(pair, &snap_name) != 0)
 			return (SET_ERROR(EINVAL));
 
 
 		/* Verify that the keys (bookmarks) are unique */
 		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
 		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
 			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
 				return (SET_ERROR(EINVAL));
 		}
 	}
 
 	return (dsl_bookmark_create(innvl, outnvl));
 }
 
 /*
  * innvl: {
  *     property 1, property 2, ...
  * }
  *
  * outnvl: {
  *     bookmark name 1 -> { property 1, property 2, ... },
  *     bookmark name 2 -> { property 1, property 2, ... }
  * }
  *
  */
 static int
 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	return (dsl_get_bookmarks(fsname, innvl, outnvl));
 }
 
 /*
  * innvl: {
  *     bookmark name 1, bookmark name 2
  * }
  *
  * outnvl: bookmark -> error code (int32)
  *
  */
 static int
 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
     nvlist_t *outnvl)
 {
 	int error, poollen;
 
 	poollen = strlen(poolname);
 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 		const char *name = nvpair_name(pair);
 		const char *cp = strchr(name, '#');
 
 		/*
 		 * The bookmark name must contain an #, and the part after it
 		 * must contain only valid characters.
 		 */
 		if (cp == NULL ||
 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 			return (SET_ERROR(EINVAL));
 
 		/*
 		 * The bookmark must be in the specified pool.
 		 */
 		if (strncmp(name, poolname, poollen) != 0 ||
 		    (name[poollen] != '/' && name[poollen] != '#'))
 			return (SET_ERROR(EXDEV));
 	}
 
 	error = dsl_bookmark_destroy(innvl, outnvl);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of dataset to destroy
  * zc_objset_type	type of objset
  * zc_defer_destroy	mark for deferred destroy
  *
  * outputs:		none
  */
 static int
 zfs_ioc_destroy(zfs_cmd_t *zc)
 {
 	int err;
 
 	if (zc->zc_objset_type == DMU_OST_ZFS) {
 		err = zfs_unmount_snap(zc->zc_name);
 		if (err != 0)
 			return (err);
 	}
 
 	if (strchr(zc->zc_name, '@'))
 		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
 	else
 		err = dsl_destroy_head(zc->zc_name);
 	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
 #ifdef __FreeBSD__
 		zvol_remove_minors(zc->zc_name);
 #else
 		(void) zvol_remove_minor(zc->zc_name);
 #endif
 	return (err);
 }
 
 /*
  * fsname is name of dataset to rollback (to most recent snapshot)
  *
  * innvl is not used.
  *
  * outnvl: "target" -> name of most recent snapshot
  * }
  */
 /* ARGSUSED */
 static int
 zfs_ioc_rollback(const char *fsname, nvlist_t *args, nvlist_t *outnvl)
 {
 	zfsvfs_t *zfsvfs;
 	int error;
 
 	if (getzfsvfs(fsname, &zfsvfs) == 0) {
 		error = zfs_suspend_fs(zfsvfs);
 		if (error == 0) {
 			int resume_err;
 
 			error = dsl_dataset_rollback(fsname, zfsvfs, outnvl);
 			resume_err = zfs_resume_fs(zfsvfs, fsname);
 			error = error ? error : resume_err;
 		}
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
 		error = dsl_dataset_rollback(fsname, NULL, outnvl);
 	}
 	return (error);
 }
 
 static int
 recursive_unmount(const char *fsname, void *arg)
 {
 	const char *snapname = arg;
 	char fullname[MAXNAMELEN];
 
 	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
 	return (zfs_unmount_snap(fullname));
 }
 
 /*
  * inputs:
  * zc_name	old name of dataset
  * zc_value	new name of dataset
  * zc_cookie	recursive flag (only valid for snapshots)
  *
  * outputs:	none
  */
 static int
 zfs_ioc_rename(zfs_cmd_t *zc)
 {
 	boolean_t recursive = zc->zc_cookie & 1;
 	char *at;
 	boolean_t allow_mounted = B_TRUE;
 
 #ifdef __FreeBSD__
 	allow_mounted = (zc->zc_cookie & 2) != 0;
 #endif
 
 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_value, '%'))
 		return (SET_ERROR(EINVAL));
 
 	at = strchr(zc->zc_name, '@');
 	if (at != NULL) {
 		/* snaps must be in same fs */
 		int error;
 
 		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
 			return (SET_ERROR(EXDEV));
 		*at = '\0';
 		if (zc->zc_objset_type == DMU_OST_ZFS && allow_mounted) {
 			error = dmu_objset_find(zc->zc_name,
 			    recursive_unmount, at + 1,
 			    recursive ? DS_FIND_CHILDREN : 0);
 			if (error != 0) {
 				*at = '@';
 				return (error);
 			}
 		}
 		error = dsl_dataset_rename_snapshot(zc->zc_name,
 		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
 		*at = '@';
 
 		return (error);
 	} else {
 #ifdef illumos
 		if (zc->zc_objset_type == DMU_OST_ZVOL)
 			(void) zvol_remove_minor(zc->zc_name);
 #endif
 		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
 	}
 }
 
 static int
 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 {
 	const char *propname = nvpair_name(pair);
 	boolean_t issnap = (strchr(dsname, '@') != NULL);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	uint64_t intval;
 	int err;
 
 	if (prop == ZPROP_INVAL) {
 		if (zfs_prop_user(propname)) {
 			if (err = zfs_secpolicy_write_perms(dsname,
 			    ZFS_DELEG_PERM_USERPROP, cr))
 				return (err);
 			return (0);
 		}
 
 		if (!issnap && zfs_prop_userquota(propname)) {
 			const char *perm = NULL;
 			const char *uq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
 			const char *gq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
 
 			if (strncmp(propname, uq_prefix,
 			    strlen(uq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_USERQUOTA;
 			} else if (strncmp(propname, gq_prefix,
 			    strlen(gq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
 			} else {
 				/* USERUSED and GROUPUSED are read-only */
 				return (SET_ERROR(EINVAL));
 			}
 
 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
 				return (err);
 			return (0);
 		}
 
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (issnap)
 		return (SET_ERROR(EINVAL));
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		/*
 		 * dsl_prop_get_all_impl() returns properties in this
 		 * format.
 		 */
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) == 0);
 	}
 
 	/*
 	 * Check that this value is valid for this pool version
 	 */
 	switch (prop) {
 	case ZFS_PROP_COMPRESSION:
 		/*
 		 * If the user specified gzip compression, make sure
 		 * the SPA supports it. We ignore any errors here since
 		 * we'll catch them later.
 		 */
 		if (nvpair_value_uint64(pair, &intval) == 0) {
 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
 			    intval <= ZIO_COMPRESS_GZIP_9 &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_GZIP_COMPRESSION)) {
 				return (SET_ERROR(ENOTSUP));
 			}
 
 			if (intval == ZIO_COMPRESS_ZLE &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_ZLE_COMPRESSION))
 				return (SET_ERROR(ENOTSUP));
 
 			if (intval == ZIO_COMPRESS_LZ4) {
 				spa_t *spa;
 
 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 					return (err);
 
 				if (!spa_feature_is_enabled(spa,
 				    SPA_FEATURE_LZ4_COMPRESS)) {
 					spa_close(spa, FTAG);
 					return (SET_ERROR(ENOTSUP));
 				}
 				spa_close(spa, FTAG);
 			}
 
 			/*
 			 * If this is a bootable dataset then
 			 * verify that the compression algorithm
 			 * is supported for booting. We must return
 			 * something other than ENOTSUP since it
 			 * implies a downrev pool version.
 			 */
 			if (zfs_is_bootfs(dsname) &&
 			    !BOOTFS_COMPRESS_VALID(intval)) {
 				return (SET_ERROR(ERANGE));
 			}
 		}
 		break;
 
 	case ZFS_PROP_COPIES:
 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
 			return (SET_ERROR(ENOTSUP));
 		break;
 
 	case ZFS_PROP_DEDUP:
 		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
 			return (SET_ERROR(ENOTSUP));
 		break;
 
 	case ZFS_PROP_RECORDSIZE:
 		/* Record sizes above 128k need the feature to be enabled */
 		if (nvpair_value_uint64(pair, &intval) == 0 &&
 		    intval > SPA_OLD_MAXBLOCKSIZE) {
 			spa_t *spa;
 
 			/*
 			 * If this is a bootable dataset then
 			 * the we don't allow large (>128K) blocks,
 			 * because GRUB doesn't support them.
 			 */
 			if (zfs_is_bootfs(dsname) &&
 			    intval > SPA_OLD_MAXBLOCKSIZE) {
 				return (SET_ERROR(EDOM));
 			}
 
 			/*
 			 * We don't allow setting the property above 1MB,
 			 * unless the tunable has been changed.
 			 */
 			if (intval > zfs_max_recordsize ||
 			    intval > SPA_MAXBLOCKSIZE)
 				return (SET_ERROR(EDOM));
 
 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 				return (err);
 
 			if (!spa_feature_is_enabled(spa,
 			    SPA_FEATURE_LARGE_BLOCKS)) {
 				spa_close(spa, FTAG);
 				return (SET_ERROR(ENOTSUP));
 			}
 			spa_close(spa, FTAG);
 		}
 		break;
 
 	case ZFS_PROP_SHARESMB:
 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
 			return (SET_ERROR(ENOTSUP));
 		break;
 
 	case ZFS_PROP_ACLINHERIT:
 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
 		    nvpair_value_uint64(pair, &intval) == 0) {
 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_PASSTHROUGH_X))
 				return (SET_ERROR(ENOTSUP));
 		}
 		break;
 	}
 
 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
 }
 
 /*
  * Checks for a race condition to make sure we don't increment a feature flag
  * multiple times.
  */
 static int
 zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
 {
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	spa_feature_t *featurep = arg;
 
 	if (!spa_feature_is_active(spa, *featurep))
 		return (0);
 	else
 		return (SET_ERROR(EBUSY));
 }
 
 /*
  * The callback invoked on feature activation in the sync task caused by
  * zfs_prop_activate_feature.
  */
 static void
 zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
 {
 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
 	spa_feature_t *featurep = arg;
 
 	spa_feature_incr(spa, *featurep, tx);
 }
 
 /*
  * Activates a feature on a pool in response to a property setting. This
  * creates a new sync task which modifies the pool to reflect the feature
  * as being active.
  */
 static int
 zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
 {
 	int err;
 
 	/* EBUSY here indicates that the feature is already active */
 	err = dsl_sync_task(spa_name(spa),
 	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
 	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
 
 	if (err != 0 && err != EBUSY)
 		return (err);
 	else
 		return (0);
 }
 
 /*
  * Removes properties from the given props list that fail permission checks
  * needed to clear them and to restore them in case of a receive error. For each
  * property, make sure we have both set and inherit permissions.
  *
  * Returns the first error encountered if any permission checks fail. If the
  * caller provides a non-NULL errlist, it also gives the complete list of names
  * of all the properties that failed a permission check along with the
  * corresponding error numbers. The caller is responsible for freeing the
  * returned errlist.
  *
  * If every property checks out successfully, zero is returned and the list
  * pointed at by errlist is NULL.
  */
 static int
 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
 {
 	zfs_cmd_t *zc;
 	nvpair_t *pair, *next_pair;
 	nvlist_t *errors;
 	int err, rv = 0;
 
 	if (props == NULL)
 		return (0);
 
 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
 	(void) strcpy(zc->zc_name, dataset);
 	pair = nvlist_next_nvpair(props, NULL);
 	while (pair != NULL) {
 		next_pair = nvlist_next_nvpair(props, pair);
 
 		(void) strcpy(zc->zc_value, nvpair_name(pair));
 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
 			VERIFY(nvlist_add_int32(errors,
 			    zc->zc_value, err) == 0);
 		}
 		pair = next_pair;
 	}
 	kmem_free(zc, sizeof (zfs_cmd_t));
 
 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
 		nvlist_free(errors);
 		errors = NULL;
 	} else {
 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
 	}
 
 	if (errlist == NULL)
 		nvlist_free(errors);
 	else
 		*errlist = errors;
 
 	return (rv);
 }
 
 static boolean_t
 propval_equals(nvpair_t *p1, nvpair_t *p2)
 {
 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
 		/* dsl_prop_get_all_impl() format */
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &p1) == 0);
 	}
 
 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &p2) == 0);
 	}
 
 	if (nvpair_type(p1) != nvpair_type(p2))
 		return (B_FALSE);
 
 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
 		char *valstr1, *valstr2;
 
 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
 		return (strcmp(valstr1, valstr2) == 0);
 	} else {
 		uint64_t intval1, intval2;
 
 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
 		return (intval1 == intval2);
 	}
 }
 
 /*
  * Remove properties from props if they are not going to change (as determined
  * by comparison with origprops). Remove them from origprops as well, since we
  * do not need to clear or restore properties that won't change.
  */
 static void
 props_reduce(nvlist_t *props, nvlist_t *origprops)
 {
 	nvpair_t *pair, *next_pair;
 
 	if (origprops == NULL)
 		return; /* all props need to be received */
 
 	pair = nvlist_next_nvpair(props, NULL);
 	while (pair != NULL) {
 		const char *propname = nvpair_name(pair);
 		nvpair_t *match;
 
 		next_pair = nvlist_next_nvpair(props, pair);
 
 		if ((nvlist_lookup_nvpair(origprops, propname,
 		    &match) != 0) || !propval_equals(pair, match))
 			goto next; /* need to set received value */
 
 		/* don't clear the existing received value */
 		(void) nvlist_remove_nvpair(origprops, match);
 		/* don't bother receiving the property */
 		(void) nvlist_remove_nvpair(props, pair);
 next:
 		pair = next_pair;
 	}
 }
 
 #ifdef	DEBUG
 static boolean_t zfs_ioc_recv_inject_err;
 #endif
 
 /*
  * inputs:
  * zc_name		name of containing filesystem
  * zc_nvlist_src{_size}	nvlist of properties to apply
  * zc_value		name of snapshot to create
  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
  * zc_cookie		file descriptor to recv from
  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
  * zc_guid		force flag
  * zc_cleanup_fd	cleanup-on-exit file descriptor
  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
  *
  * outputs:
  * zc_cookie		number of bytes read
  * zc_nvlist_dst{_size} error for each unapplied received property
  * zc_obj		zprop_errflags_t
  * zc_action_handle	handle for this guid/ds mapping
  */
 static int
 zfs_ioc_recv(zfs_cmd_t *zc)
 {
 	file_t *fp;
 	dmu_recv_cookie_t drc;
 	boolean_t force = (boolean_t)zc->zc_guid;
 	int fd;
 	int error = 0;
 	int props_error = 0;
 	nvlist_t *errors;
 	offset_t off;
 	nvlist_t *props = NULL; /* sent properties */
 	nvlist_t *origprops = NULL; /* existing properties */
 	char *origin = NULL;
 	char *tosnap;
 	char tofs[ZFS_MAXNAMELEN];
 	cap_rights_t rights;
 	boolean_t first_recvd_props = B_FALSE;
 
 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_value, '@') == NULL ||
 	    strchr(zc->zc_value, '%'))
 		return (SET_ERROR(EINVAL));
 
 	(void) strcpy(tofs, zc->zc_value);
 	tosnap = strchr(tofs, '@');
 	*tosnap++ = '\0';
 
 	if (zc->zc_nvlist_src != 0 &&
 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props)) != 0)
 		return (error);
 
 	fd = zc->zc_cookie;
 	fp = getf(fd, cap_rights_init(&rights, CAP_PREAD));
 	if (fp == NULL) {
 		nvlist_free(props);
 		return (SET_ERROR(EBADF));
 	}
 
 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	if (zc->zc_string[0])
 		origin = zc->zc_string;
 
 	error = dmu_recv_begin(tofs, tosnap,
 	    &zc->zc_begin_record, force, origin, &drc);
 	if (error != 0)
 		goto out;
 
 	/*
 	 * Set properties before we receive the stream so that they are applied
 	 * to the new data. Note that we must call dmu_recv_stream() if
 	 * dmu_recv_begin() succeeds.
 	 */
 	if (props != NULL && !drc.drc_newfs) {
 		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
 		    SPA_VERSION_RECVD_PROPS &&
 		    !dsl_prop_get_hasrecvd(tofs))
 			first_recvd_props = B_TRUE;
 
 		/*
 		 * If new received properties are supplied, they are to
 		 * completely replace the existing received properties, so stash
 		 * away the existing ones.
 		 */
 		if (dsl_prop_get_received(tofs, &origprops) == 0) {
 			nvlist_t *errlist = NULL;
 			/*
 			 * Don't bother writing a property if its value won't
 			 * change (and avoid the unnecessary security checks).
 			 *
 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
 			 * special case where we blow away all local properties
 			 * regardless.
 			 */
 			if (!first_recvd_props)
 				props_reduce(props, origprops);
 			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
 				(void) nvlist_merge(errors, errlist, 0);
 			nvlist_free(errlist);
 
 			if (clear_received_props(tofs, origprops,
 			    first_recvd_props ? NULL : props) != 0)
 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
 		} else {
 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
 		}
 	}
 
 	if (props != NULL) {
 		props_error = dsl_prop_set_hasrecvd(tofs);
 
 		if (props_error == 0) {
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
 			    props, errors);
 		}
 	}
 
 	if (zc->zc_nvlist_dst_size != 0 &&
 	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
 	    put_nvlist(zc, errors) != 0)) {
 		/*
 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
 		 * size or supplied an invalid address.
 		 */
 		props_error = SET_ERROR(EINVAL);
 	}
 
 	off = fp->f_offset;
 	error = dmu_recv_stream(&drc, fp, &off, zc->zc_cleanup_fd,
 	    &zc->zc_action_handle);
 
 	if (error == 0) {
 		zfsvfs_t *zfsvfs = NULL;
 
 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
 			/* online recv */
 			int end_err;
 
 			error = zfs_suspend_fs(zfsvfs);
 			/*
 			 * If the suspend fails, then the recv_end will
 			 * likely also fail, and clean up after itself.
 			 */
 			end_err = dmu_recv_end(&drc, zfsvfs);
 			if (error == 0)
 				error = zfs_resume_fs(zfsvfs, tofs);
 			error = error ? error : end_err;
 			VFS_RELE(zfsvfs->z_vfs);
 		} else {
 			error = dmu_recv_end(&drc, NULL);
 		}
 	}
 
 	zc->zc_cookie = off - fp->f_offset;
 	if (off >= 0 && off <= MAXOFFSET_T)
 		fp->f_offset = off;
 
 #ifdef	DEBUG
 	if (zfs_ioc_recv_inject_err) {
 		zfs_ioc_recv_inject_err = B_FALSE;
 		error = 1;
 	}
 #endif
 
 #ifdef __FreeBSD__
 	if (error == 0)
 		zvol_create_minors(tofs);
 #endif
 
 	/*
 	 * On error, restore the original props.
 	 */
 	if (error != 0 && props != NULL && !drc.drc_newfs) {
 		if (clear_received_props(tofs, props, NULL) != 0) {
 			/*
 			 * We failed to clear the received properties.
 			 * Since we may have left a $recvd value on the
 			 * system, we can't clear the $hasrecvd flag.
 			 */
 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
 		} else if (first_recvd_props) {
 			dsl_prop_unset_hasrecvd(tofs);
 		}
 
 		if (origprops == NULL && !drc.drc_newfs) {
 			/* We failed to stash the original properties. */
 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
 		}
 
 		/*
 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
 		 * explictly if we're restoring local properties cleared in the
 		 * first new-style receive.
 		 */
 		if (origprops != NULL &&
 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
 		    origprops, NULL) != 0) {
 			/*
 			 * We stashed the original properties but failed to
 			 * restore them.
 			 */
 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
 		}
 	}
 out:
 	nvlist_free(props);
 	nvlist_free(origprops);
 	nvlist_free(errors);
 	releasef(fd);
 
 	if (error == 0)
 		error = props_error;
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name	name of snapshot to send
  * zc_cookie	file descriptor to send stream to
  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
  * zc_sendobj	objsetid of snapshot to send
  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
  *		output size in zc_objset_type.
  * zc_flags	lzc_send_flags
  *
  * outputs:
  * zc_objset_type	estimated size, if zc_guid is set
  */
 static int
 zfs_ioc_send(zfs_cmd_t *zc)
 {
 	int error;
 	offset_t off;
 	boolean_t estimate = (zc->zc_guid != 0);
 	boolean_t embedok = (zc->zc_flags & 0x1);
 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
 
 	if (zc->zc_obj != 0) {
 		dsl_pool_t *dp;
 		dsl_dataset_t *tosnap;
 
 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 		if (error != 0)
 			return (error);
 
 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
 		if (error != 0) {
 			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 
 		if (dsl_dir_is_clone(tosnap->ds_dir))
 			zc->zc_fromobj =
 			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 	}
 
 	if (estimate) {
 		dsl_pool_t *dp;
 		dsl_dataset_t *tosnap;
 		dsl_dataset_t *fromsnap = NULL;
 
 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 		if (error != 0)
 			return (error);
 
 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
 		if (error != 0) {
 			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 
 		if (zc->zc_fromobj != 0) {
 			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
 			    FTAG, &fromsnap);
 			if (error != 0) {
 				dsl_dataset_rele(tosnap, FTAG);
 				dsl_pool_rele(dp, FTAG);
 				return (error);
 			}
 		}
 
 		error = dmu_send_estimate(tosnap, fromsnap,
 		    &zc->zc_objset_type);
 
 		if (fromsnap != NULL)
 			dsl_dataset_rele(fromsnap, FTAG);
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 	} else {
 		file_t *fp;
 		cap_rights_t rights;
 
 		fp = getf(zc->zc_cookie,
 		    cap_rights_init(&rights, CAP_WRITE));
 		if (fp == NULL)
 			return (SET_ERROR(EBADF));
 
 		off = fp->f_offset;
 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
 		    zc->zc_fromobj, embedok, large_block_ok,
 #ifdef illumos
 		    zc->zc_cookie, fp->f_vnode, &off);
 #else
 		    zc->zc_cookie, fp, &off);
 #endif
 
 		if (off >= 0 && off <= MAXOFFSET_T)
 			fp->f_offset = off;
 		releasef(zc->zc_cookie);
 	}
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name	name of snapshot on which to report progress
  * zc_cookie	file descriptor of send stream
  *
  * outputs:
  * zc_cookie	number of bytes written in send stream thus far
  */
 static int
 zfs_ioc_send_progress(zfs_cmd_t *zc)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	dmu_sendarg_t *dsp = NULL;
 	int error;
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	mutex_enter(&ds->ds_sendstream_lock);
 
 	/*
 	 * Iterate over all the send streams currently active on this dataset.
 	 * If there's one which matches the specified file descriptor _and_ the
 	 * stream was started by the current process, return the progress of
 	 * that stream.
 	 */
 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
 		if (dsp->dsa_outfd == zc->zc_cookie &&
 		    dsp->dsa_proc == curproc)
 			break;
 	}
 
 	if (dsp != NULL)
 		zc->zc_cookie = *(dsp->dsa_off);
 	else
 		error = SET_ERROR(ENOENT);
 
 	mutex_exit(&ds->ds_sendstream_lock);
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_inject_fault(zfs_cmd_t *zc)
 {
 	int id, error;
 
 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
 	    &zc->zc_inject_record);
 
 	if (error == 0)
 		zc->zc_guid = (uint64_t)id;
 
 	return (error);
 }
 
 static int
 zfs_ioc_clear_fault(zfs_cmd_t *zc)
 {
 	return (zio_clear_fault((int)zc->zc_guid));
 }
 
 static int
 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
 {
 	int id = (int)zc->zc_guid;
 	int error;
 
 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
 	    &zc->zc_inject_record);
 
 	zc->zc_guid = id;
 
 	return (error);
 }
 
 static int
 zfs_ioc_error_log(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	size_t count = (size_t)zc->zc_nvlist_dst_size;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
 	    &count);
 	if (error == 0)
 		zc->zc_nvlist_dst_size = count;
 	else
 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_clear(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	vdev_t *vd;
 	int error;
 
 	/*
 	 * On zpool clear we also fix up missing slogs
 	 */
 	mutex_enter(&spa_namespace_lock);
 	spa = spa_lookup(zc->zc_name);
 	if (spa == NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (SET_ERROR(EIO));
 	}
 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
 		/* we need to let spa_open/spa_load clear the chains */
 		spa_set_log_state(spa, SPA_LOG_CLEAR);
 	}
 	spa->spa_last_open_failed = 0;
 	mutex_exit(&spa_namespace_lock);
 
 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
 		error = spa_open(zc->zc_name, &spa, FTAG);
 	} else {
 		nvlist_t *policy;
 		nvlist_t *config = NULL;
 
 		if (zc->zc_nvlist_src == 0)
 			return (SET_ERROR(EINVAL));
 
 		if ((error = get_nvlist(zc->zc_nvlist_src,
 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
 			    policy, &config);
 			if (config != NULL) {
 				int err;
 
 				if ((err = put_nvlist(zc, config)) != 0)
 					error = err;
 				nvlist_free(config);
 			}
 			nvlist_free(policy);
 		}
 	}
 
 	if (error != 0)
 		return (error);
 
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	if (zc->zc_guid == 0) {
 		vd = NULL;
 	} else {
 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
 		if (vd == NULL) {
 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
 			spa_close(spa, FTAG);
 			return (SET_ERROR(ENODEV));
 		}
 	}
 
 	vdev_clear(spa, vd);
 
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 
 	/*
 	 * Resume any suspended I/Os.
 	 */
 	if (zio_resume(spa) != 0)
 		error = SET_ERROR(EIO);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	/*
 	 * If a resilver is already in progress then set the
 	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
 	 * the scan as a side effect of the reopen. Otherwise, let
 	 * vdev_open() decided if a resilver is required.
 	 */
 	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
 	vdev_reopen(spa->spa_root_vdev);
 	spa->spa_scrub_reopen = B_FALSE;
 
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	spa_close(spa, FTAG);
 	return (0);
 }
 /*
  * inputs:
  * zc_name	name of filesystem
  * zc_value	name of origin snapshot
  *
  * outputs:
  * zc_string	name of conflicting snapshot, if there is one
  */
 static int
 zfs_ioc_promote(zfs_cmd_t *zc)
 {
 	char *cp;
 
 	/*
 	 * We don't need to unmount *all* the origin fs's snapshots, but
 	 * it's easier.
 	 */
 	cp = strchr(zc->zc_value, '@');
 	if (cp)
 		*cp = '\0';
 	(void) dmu_objset_find(zc->zc_value,
 	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
 }
 
 /*
  * Retrieve a single {user|group}{used|quota}@... property.
  *
  * inputs:
  * zc_name	name of filesystem
  * zc_objset_type zfs_userquota_prop_t
  * zc_value	domain name (eg. "S-1-234-567-89")
  * zc_guid	RID/UID/GID
  *
  * outputs:
  * zc_cookie	property value
  */
 static int
 zfs_ioc_userspace_one(zfs_cmd_t *zc)
 {
 	zfsvfs_t *zfsvfs;
 	int error;
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (SET_ERROR(EINVAL));
 
 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
 	if (error != 0)
 		return (error);
 
 	error = zfs_userspace_one(zfsvfs,
 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
 	zfsvfs_rele(zfsvfs, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_objset_type	zfs_userquota_prop_t
  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
  *
  * outputs:
  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
  * zc_cookie	zap cursor
  */
 static int
 zfs_ioc_userspace_many(zfs_cmd_t *zc)
 {
 	zfsvfs_t *zfsvfs;
 	int bufsize = zc->zc_nvlist_dst_size;
 
 	if (bufsize <= 0)
 		return (SET_ERROR(ENOMEM));
 
 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
 	if (error != 0)
 		return (error);
 
 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
 
 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
 	    buf, &zc->zc_nvlist_dst_size);
 
 	if (error == 0) {
 		error = ddi_copyout(buf,
 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
 		    zc->zc_nvlist_dst_size, zc->zc_iflags);
 	}
 	kmem_free(buf, bufsize);
 	zfsvfs_rele(zfsvfs, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * none
  */
 static int
 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error = 0;
 	zfsvfs_t *zfsvfs;
 
 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
 			/*
 			 * If userused is not enabled, it may be because the
 			 * objset needs to be closed & reopened (to grow the
 			 * objset_phys_t).  Suspend/resume the fs will do that.
 			 */
 			error = zfs_suspend_fs(zfsvfs);
 			if (error == 0) {
 				dmu_objset_refresh_ownership(zfsvfs->z_os,
 				    zfsvfs);
 				error = zfs_resume_fs(zfsvfs, zc->zc_name);
 			}
 		}
 		if (error == 0)
 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
 		VFS_RELE(zfsvfs->z_vfs);
 	} else {
 		/* XXX kind of reading contents without owning */
 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 		if (error != 0)
 			return (error);
 
 		error = dmu_objset_userspace_upgrade(os);
 		dmu_objset_rele(os, FTAG);
 	}
 
 	return (error);
 }
 
 #ifdef illumos
 /*
  * We don't want to have a hard dependency
  * against some special symbols in sharefs
  * nfs, and smbsrv.  Determine them if needed when
  * the first file system is shared.
  * Neither sharefs, nfs or smbsrv are unloadable modules.
  */
 int (*znfsexport_fs)(void *arg);
 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
 
 int zfs_nfsshare_inited;
 int zfs_smbshare_inited;
 
 ddi_modhandle_t nfs_mod;
 ddi_modhandle_t sharefs_mod;
 ddi_modhandle_t smbsrv_mod;
 #endif	/* illumos */
 kmutex_t zfs_share_lock;
 
 #ifdef illumos
 static int
 zfs_init_sharefs()
 {
 	int error;
 
 	ASSERT(MUTEX_HELD(&zfs_share_lock));
 	/* Both NFS and SMB shares also require sharetab support. */
 	if (sharefs_mod == NULL && ((sharefs_mod =
 	    ddi_modopen("fs/sharefs",
 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
 		return (SET_ERROR(ENOSYS));
 	}
 	if (zshare_fs == NULL && ((zshare_fs =
 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
 		return (SET_ERROR(ENOSYS));
 	}
 	return (0);
 }
 #endif	/* illumos */
 
 static int
 zfs_ioc_share(zfs_cmd_t *zc)
 {
 #ifdef illumos
 	int error;
 	int opcode;
 
 	switch (zc->zc_share.z_sharetype) {
 	case ZFS_SHARE_NFS:
 	case ZFS_UNSHARE_NFS:
 		if (zfs_nfsshare_inited == 0) {
 			mutex_enter(&zfs_share_lock);
 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
 				mutex_exit(&zfs_share_lock);
 				return (SET_ERROR(ENOSYS));
 			}
 			if (znfsexport_fs == NULL &&
 			    ((znfsexport_fs = (int (*)(void *))
 			    ddi_modsym(nfs_mod,
 			    "nfs_export", &error)) == NULL)) {
 				mutex_exit(&zfs_share_lock);
 				return (SET_ERROR(ENOSYS));
 			}
 			error = zfs_init_sharefs();
 			if (error != 0) {
 				mutex_exit(&zfs_share_lock);
 				return (SET_ERROR(ENOSYS));
 			}
 			zfs_nfsshare_inited = 1;
 			mutex_exit(&zfs_share_lock);
 		}
 		break;
 	case ZFS_SHARE_SMB:
 	case ZFS_UNSHARE_SMB:
 		if (zfs_smbshare_inited == 0) {
 			mutex_enter(&zfs_share_lock);
 			if (smbsrv_mod == NULL && ((smbsrv_mod =
 			    ddi_modopen("drv/smbsrv",
 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
 				mutex_exit(&zfs_share_lock);
 				return (SET_ERROR(ENOSYS));
 			}
 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
 			    "smb_server_share", &error)) == NULL)) {
 				mutex_exit(&zfs_share_lock);
 				return (SET_ERROR(ENOSYS));
 			}
 			error = zfs_init_sharefs();
 			if (error != 0) {
 				mutex_exit(&zfs_share_lock);
 				return (SET_ERROR(ENOSYS));
 			}
 			zfs_smbshare_inited = 1;
 			mutex_exit(&zfs_share_lock);
 		}
 		break;
 	default:
 		return (SET_ERROR(EINVAL));
 	}
 
 	switch (zc->zc_share.z_sharetype) {
 	case ZFS_SHARE_NFS:
 	case ZFS_UNSHARE_NFS:
 		if (error =
 		    znfsexport_fs((void *)
 		    (uintptr_t)zc->zc_share.z_exportdata))
 			return (error);
 		break;
 	case ZFS_SHARE_SMB:
 	case ZFS_UNSHARE_SMB:
 		if (error = zsmbexport_fs((void *)
 		    (uintptr_t)zc->zc_share.z_exportdata,
 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
 		    B_TRUE: B_FALSE)) {
 			return (error);
 		}
 		break;
 	}
 
 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
 	    SHAREFS_ADD : SHAREFS_REMOVE;
 
 	/*
 	 * Add or remove share from sharetab
 	 */
 	error = zshare_fs(opcode,
 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
 	    zc->zc_share.z_sharemax);
 
 	return (error);
 
 #else	/* !illumos */
 	return (ENOSYS);
 #endif	/* illumos */
 }
 
 ace_t full_access[] = {
 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
 };
 
 /*
  * inputs:
  * zc_name		name of containing filesystem
  * zc_obj		object # beyond which we want next in-use object #
  *
  * outputs:
  * zc_obj		next in-use object #
  */
 static int
 zfs_ioc_next_obj(zfs_cmd_t *zc)
 {
 	objset_t *os = NULL;
 	int error;
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error != 0)
 		return (error);
 
 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
 	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
 
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		prefix name for snapshot
  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
  *
  * outputs:
  * zc_value		short name of new snapshot
  */
 static int
 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
 {
 	char *snap_name;
 	char *hold_name;
 	int error;
 	minor_t minor;
 
 	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
 	if (error != 0)
 		return (error);
 
 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
 	    (u_longlong_t)ddi_get_lbolt64());
 	hold_name = kmem_asprintf("%%%s", zc->zc_value);
 
 	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
 	    hold_name);
 	if (error == 0)
 		(void) strcpy(zc->zc_value, snap_name);
 	strfree(snap_name);
 	strfree(hold_name);
 	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of "to" snapshot
  * zc_value		name of "from" snapshot
  * zc_cookie		file descriptor to write diff data on
  *
  * outputs:
  * dmu_diff_record_t's to the file descriptor
  */
 static int
 zfs_ioc_diff(zfs_cmd_t *zc)
 {
 	file_t *fp;
 	cap_rights_t rights;
 	offset_t off;
 	int error;
 
 	fp = getf(zc->zc_cookie, cap_rights_init(&rights, CAP_WRITE));
 	if (fp == NULL)
 		return (SET_ERROR(EBADF));
 
 	off = fp->f_offset;
 
 #ifdef illumos
 	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
 #else
 	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
 #endif
 
 	if (off >= 0 && off <= MAXOFFSET_T)
 		fp->f_offset = off;
 	releasef(zc->zc_cookie);
 
 	return (error);
 }
 
 #ifdef illumos
 /*
  * Remove all ACL files in shares dir
  */
 static int
 zfs_smb_acl_purge(znode_t *dzp)
 {
 	zap_cursor_t	zc;
 	zap_attribute_t	zap;
 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
 	int error;
 
 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
 	    zap_cursor_advance(&zc)) {
 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
 		    NULL, 0)) != 0)
 			break;
 	}
 	zap_cursor_fini(&zc);
 	return (error);
 }
 #endif	/* illumos */
 
 static int
 zfs_ioc_smb_acl(zfs_cmd_t *zc)
 {
 #ifdef illumos
 	vnode_t *vp;
 	znode_t *dzp;
 	vnode_t *resourcevp = NULL;
 	znode_t *sharedir;
 	zfsvfs_t *zfsvfs;
 	nvlist_t *nvlist;
 	char *src, *target;
 	vattr_t vattr;
 	vsecattr_t vsec;
 	int error = 0;
 
 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 	    NO_FOLLOW, NULL, &vp)) != 0)
 		return (error);
 
 	/* Now make sure mntpnt and dataset are ZFS */
 
 	if (strcmp(vp->v_vfsp->mnt_stat.f_fstypename, "zfs") != 0 ||
 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 	    zc->zc_name) != 0)) {
 		VN_RELE(vp);
 		return (SET_ERROR(EINVAL));
 	}
 
 	dzp = VTOZ(vp);
 	zfsvfs = dzp->z_zfsvfs;
 	ZFS_ENTER(zfsvfs);
 
 	/*
 	 * Create share dir if its missing.
 	 */
 	mutex_enter(&zfsvfs->z_lock);
 	if (zfsvfs->z_shares_dir == 0) {
 		dmu_tx_t *tx;
 
 		tx = dmu_tx_create(zfsvfs->z_os);
 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
 		    ZFS_SHARES_DIR);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error != 0) {
 			dmu_tx_abort(tx);
 		} else {
 			error = zfs_create_share_dir(zfsvfs, tx);
 			dmu_tx_commit(tx);
 		}
 		if (error != 0) {
 			mutex_exit(&zfsvfs->z_lock);
 			VN_RELE(vp);
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 	mutex_exit(&zfsvfs->z_lock);
 
 	ASSERT(zfsvfs->z_shares_dir);
 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
 		VN_RELE(vp);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	switch (zc->zc_cookie) {
 	case ZFS_SMB_ACL_ADD:
 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
 		vattr.va_type = VREG;
 		vattr.va_mode = S_IFREG|0777;
 		vattr.va_uid = 0;
 		vattr.va_gid = 0;
 
 		vsec.vsa_mask = VSA_ACE;
 		vsec.vsa_aclentp = &full_access;
 		vsec.vsa_aclentsz = sizeof (full_access);
 		vsec.vsa_aclcnt = 1;
 
 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
 		if (resourcevp)
 			VN_RELE(resourcevp);
 		break;
 
 	case ZFS_SMB_ACL_REMOVE:
 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
 		    NULL, 0);
 		break;
 
 	case ZFS_SMB_ACL_RENAME:
 		if ((error = get_nvlist(zc->zc_nvlist_src,
 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
 			VN_RELE(vp);
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
 		    &target)) {
 			VN_RELE(vp);
 			VN_RELE(ZTOV(sharedir));
 			ZFS_EXIT(zfsvfs);
 			nvlist_free(nvlist);
 			return (error);
 		}
 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
 		    kcred, NULL, 0);
 		nvlist_free(nvlist);
 		break;
 
 	case ZFS_SMB_ACL_PURGE:
 		error = zfs_smb_acl_purge(sharedir);
 		break;
 
 	default:
 		error = SET_ERROR(EINVAL);
 		break;
 	}
 
 	VN_RELE(vp);
 	VN_RELE(ZTOV(sharedir));
 
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 #else	/* !illumos */
 	return (EOPNOTSUPP);
 #endif	/* illumos */
 }
 
 /*
  * innvl: {
  *     "holds" -> { snapname -> holdname (string), ... }
  *     (optional) "cleanup_fd" -> fd (int32)
  * }
  *
  * outnvl: {
  *     snapname -> error value (int32)
  *     ...
  * }
  */
 /* ARGSUSED */
 static int
 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
 {
 	nvlist_t *holds;
 	int cleanup_fd = -1;
 	int error;
 	minor_t minor = 0;
 
 	error = nvlist_lookup_nvlist(args, "holds", &holds);
 	if (error != 0)
 		return (SET_ERROR(EINVAL));
 
 	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
 		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
 		if (error != 0)
 			return (error);
 	}
 
 	error = dsl_dataset_user_hold(holds, minor, errlist);
 	if (minor != 0)
 		zfs_onexit_fd_rele(cleanup_fd);
 	return (error);
 }
 
 /*
  * innvl is not used.
  *
  * outnvl: {
  *    holdname -> time added (uint64 seconds since epoch)
  *    ...
  * }
  */
 /* ARGSUSED */
 static int
 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
 {
 	return (dsl_dataset_get_holds(snapname, outnvl));
 }
 
 /*
  * innvl: {
  *     snapname -> { holdname, ... }
  *     ...
  * }
  *
  * outnvl: {
  *     snapname -> error value (int32)
  *     ...
  * }
  */
 /* ARGSUSED */
 static int
 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
 {
 	return (dsl_dataset_user_release(holds, errlist));
 }
 
 /*
  * inputs:
  * zc_name		name of new filesystem or snapshot
  * zc_value		full name of old snapshot
  *
  * outputs:
  * zc_cookie		space in bytes
  * zc_objset_type	compressed space in bytes
  * zc_perm_action	uncompressed space in bytes
  */
 static int
 zfs_ioc_space_written(zfs_cmd_t *zc)
 {
 	int error;
 	dsl_pool_t *dp;
 	dsl_dataset_t *new, *old;
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
 	if (error != 0) {
 		dsl_dataset_rele(new, FTAG);
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
 	    &zc->zc_objset_type, &zc->zc_perm_action);
 	dsl_dataset_rele(old, FTAG);
 	dsl_dataset_rele(new, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 /*
  * innvl: {
  *     "firstsnap" -> snapshot name
  * }
  *
  * outnvl: {
  *     "used" -> space in bytes
  *     "compressed" -> compressed space in bytes
  *     "uncompressed" -> uncompressed space in bytes
  * }
  */
 static int
 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error;
 	dsl_pool_t *dp;
 	dsl_dataset_t *new, *old;
 	char *firstsnap;
 	uint64_t used, comp, uncomp;
 
 	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
 		return (SET_ERROR(EINVAL));
 
 	error = dsl_pool_hold(lastsnap, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
 	if (error != 0) {
 		dsl_dataset_rele(new, FTAG);
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
 	dsl_dataset_rele(old, FTAG);
 	dsl_dataset_rele(new, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	fnvlist_add_uint64(outnvl, "used", used);
 	fnvlist_add_uint64(outnvl, "compressed", comp);
 	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
 	return (error);
 }
 
 static int
 zfs_ioc_jail(zfs_cmd_t *zc)
 {
 
 	return (zone_dataset_attach(curthread->td_ucred, zc->zc_name,
 	    (int)zc->zc_jailid));
 }
 
 static int
 zfs_ioc_unjail(zfs_cmd_t *zc)
 {
 
 	return (zone_dataset_detach(curthread->td_ucred, zc->zc_name,
 	    (int)zc->zc_jailid));
 }
 
 /*
  * innvl: {
  *     "fd" -> file descriptor to write stream to (int32)
  *     (optional) "fromsnap" -> full snap name to send an incremental from
  *     (optional) "largeblockok" -> (value ignored)
  *         indicates that blocks > 128KB are permitted
  *     (optional) "embedok" -> (value ignored)
  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
  * }
  *
  * outnvl is unused
  */
 /* ARGSUSED */
 static int
 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	cap_rights_t rights;
 	int error;
 	offset_t off;
 	char *fromname = NULL;
 	int fd;
 	boolean_t largeblockok;
 	boolean_t embedok;
 
 	error = nvlist_lookup_int32(innvl, "fd", &fd);
 	if (error != 0)
 		return (SET_ERROR(EINVAL));
 
 	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
 
 	largeblockok = nvlist_exists(innvl, "largeblockok");
 	embedok = nvlist_exists(innvl, "embedok");
 
 	file_t *fp = getf(fd, cap_rights_init(&rights, CAP_READ));
 	if (fp == NULL)
 		return (SET_ERROR(EBADF));
 
 	off = fp->f_offset;
 	error = dmu_send(snapname, fromname, embedok, largeblockok,
 #ifdef illumos
 	    fd, fp->f_vnode, &off);
 #else
 	    fd, fp, &off);
 #endif
 
 #ifdef illumos
 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
 		fp->f_offset = off;
 #else
 	fp->f_offset = off;
 #endif
 
 	releasef(fd);
 	return (error);
 }
 
 /*
  * Determine approximately how large a zfs send stream will be -- the number
  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
  *
  * innvl: {
  *     (optional) "fromsnap" -> full snap name to send an incremental from
  * }
  *
  * outnvl: {
  *     "space" -> bytes of space (uint64)
  * }
  */
 static int
 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *fromsnap = NULL;
 	dsl_dataset_t *tosnap;
 	int error;
 	char *fromname;
 	uint64_t space;
 
 	error = dsl_pool_hold(snapname, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
 	if (error == 0) {
 		error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
 		if (error != 0) {
 			dsl_dataset_rele(tosnap, FTAG);
 			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 	}
 
 	error = dmu_send_estimate(tosnap, fromsnap, &space);
 	fnvlist_add_uint64(outnvl, "space", space);
 
 	if (fromsnap != NULL)
 		dsl_dataset_rele(fromsnap, FTAG);
 	dsl_dataset_rele(tosnap, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 
 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
 
 static void
 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
 {
 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
 
 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
 	ASSERT3P(vec->zvec_func, ==, NULL);
 
 	vec->zvec_legacy_func = func;
 	vec->zvec_secpolicy = secpolicy;
 	vec->zvec_namecheck = namecheck;
 	vec->zvec_allow_log = log_history;
 	vec->zvec_pool_check = pool_check;
 }
 
 /*
  * See the block comment at the beginning of this file for details on
  * each argument to this function.
  */
 static void
 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
     boolean_t allow_log)
 {
 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
 
 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
 	ASSERT3P(vec->zvec_func, ==, NULL);
 
 	/* if we are logging, the name must be valid */
 	ASSERT(!allow_log || namecheck != NO_NAME);
 
 	vec->zvec_name = name;
 	vec->zvec_func = func;
 	vec->zvec_secpolicy = secpolicy;
 	vec->zvec_namecheck = namecheck;
 	vec->zvec_pool_check = pool_check;
 	vec->zvec_smush_outnvlist = smush_outnvlist;
 	vec->zvec_allow_log = allow_log;
 }
 
 static void
 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
     zfs_ioc_poolcheck_t pool_check)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    POOL_NAME, log_history, pool_check);
 }
 
 static void
 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    DATASET_NAME, B_FALSE, pool_check);
 }
 
 static void
 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
 {
 	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
 	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 }
 
 static void
 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
 }
 
 static void
 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
 }
 
 static void
 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
 {
 	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
 	    zfs_secpolicy_read);
 }
 
 static void
 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
 	zfs_secpolicy_func_t *secpolicy)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 }
 
 static void
 zfs_ioctl_init(void)
 {
 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
 
 	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
 	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
 
 	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
 	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
 
 	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
 	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
 
 	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
 	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
 
 	zfs_ioctl_register("create", ZFS_IOC_CREATE,
 	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
 
 	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
 	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
 
 	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
 	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
 
 	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
 	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
 	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
 	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
 
 	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
 	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
 
 	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
 	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
 
 	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
 	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
 
 	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
 	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
 
 	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
 	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
 	    POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
 
 	/* IOCTLS that use the legacy function signature */
 
 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
 
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
 	    zfs_ioc_pool_scan);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
 	    zfs_ioc_pool_upgrade);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
 	    zfs_ioc_vdev_add);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
 	    zfs_ioc_vdev_remove);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
 	    zfs_ioc_vdev_set_state);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
 	    zfs_ioc_vdev_attach);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
 	    zfs_ioc_vdev_detach);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
 	    zfs_ioc_vdev_setpath);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
 	    zfs_ioc_vdev_setfru);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
 	    zfs_ioc_pool_set_props);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
 	    zfs_ioc_vdev_split);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
 	    zfs_ioc_pool_reguid);
 
 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
 	    zfs_ioc_pool_configs, zfs_secpolicy_none);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
 	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
 	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
 	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
 	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
 
 	/*
 	 * pool destroy, and export don't log the history as part of
 	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
 	 * does the logging of those commands.
 	 */
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
 
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
 
 	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
 	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
 	    zfs_ioc_dsobj_to_dsname,
 	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
 	    zfs_ioc_pool_get_history,
 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
 
 	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
 
 	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
 	    zfs_ioc_space_written);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
 	    zfs_ioc_objset_recvd_props);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
 	    zfs_ioc_next_obj);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
 	    zfs_ioc_get_fsacl);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
 	    zfs_ioc_objset_stats);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
 	    zfs_ioc_objset_zplprops);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
 	    zfs_ioc_dataset_list_next);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
 	    zfs_ioc_snapshot_list_next);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
 	    zfs_ioc_send_progress);
 
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
 	    zfs_ioc_diff, zfs_secpolicy_diff);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
 	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
 	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
 	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
 	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
 	    zfs_ioc_send, zfs_secpolicy_send);
 
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
 	    zfs_secpolicy_none);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
 	    zfs_secpolicy_destroy);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
 	    zfs_secpolicy_rename);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
 	    zfs_secpolicy_recv);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
 	    zfs_secpolicy_promote);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
 	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
 	    zfs_secpolicy_set_fsacl);
 
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
 	    zfs_secpolicy_share, POOL_CHECK_NONE);
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
 	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
 	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 
 #ifdef __FreeBSD__
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_JAIL, zfs_ioc_jail,
 	    zfs_secpolicy_config, POOL_CHECK_NONE);
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_UNJAIL, zfs_ioc_unjail,
 	    zfs_secpolicy_config, POOL_CHECK_NONE);
 #endif
 }
 
 int
 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
     zfs_ioc_poolcheck_t check)
 {
 	spa_t *spa;
 	int error;
 
 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
 
 	if (check & POOL_CHECK_NONE)
 		return (0);
 
 	error = spa_open(name, &spa, FTAG);
 	if (error == 0) {
 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
 			error = SET_ERROR(EAGAIN);
 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
 			error = SET_ERROR(EROFS);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 /*
  * Find a free minor number.
  */
 minor_t
 zfsdev_minor_alloc(void)
 {
 	static minor_t last_minor;
 	minor_t m;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	for (m = last_minor + 1; m != last_minor; m++) {
 		if (m > ZFSDEV_MAX_MINOR)
 			m = 1;
 		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
 			last_minor = m;
 			return (m);
 		}
 	}
 
 	return (0);
 }
 
 static int
 zfs_ctldev_init(struct cdev *devp)
 {
 	minor_t minor;
 	zfs_soft_state_t *zs;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	minor = zfsdev_minor_alloc();
 	if (minor == 0)
 		return (SET_ERROR(ENXIO));
 
 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
 		return (SET_ERROR(EAGAIN));
 
 	devfs_set_cdevpriv((void *)(uintptr_t)minor, zfsdev_close);
 
 	zs = ddi_get_soft_state(zfsdev_state, minor);
 	zs->zss_type = ZSST_CTLDEV;
 	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
 
 	return (0);
 }
 
 static void
 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
 {
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	zfs_onexit_destroy(zo);
 	ddi_soft_state_free(zfsdev_state, minor);
 }
 
 void *
 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
 {
 	zfs_soft_state_t *zp;
 
 	zp = ddi_get_soft_state(zfsdev_state, minor);
 	if (zp == NULL || zp->zss_type != which)
 		return (NULL);
 
 	return (zp->zss_data);
 }
 
 static int
 zfsdev_open(struct cdev *devp, int flag, int mode, struct thread *td)
 {
 	int error = 0;
 
 #ifdef illumos
 	if (getminor(*devp) != 0)
 		return (zvol_open(devp, flag, otyp, cr));
 #endif
 
 	/* This is the control device. Allocate a new minor if requested. */
 	if (flag & FEXCL) {
 		mutex_enter(&spa_namespace_lock);
 		error = zfs_ctldev_init(devp);
 		mutex_exit(&spa_namespace_lock);
 	}
 
 	return (error);
 }
 
 static void
 zfsdev_close(void *data)
 {
 	zfs_onexit_t *zo;
 	minor_t minor = (minor_t)(uintptr_t)data;
 
 	if (minor == 0)
 		return;
 
 	mutex_enter(&spa_namespace_lock);
 	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
 	if (zo == NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return;
 	}
 	zfs_ctldev_destroy(zo, minor);
 	mutex_exit(&spa_namespace_lock);
 }
 
 static int
 zfsdev_ioctl(struct cdev *dev, u_long zcmd, caddr_t arg, int flag,
     struct thread *td)
 {
 	zfs_cmd_t *zc;
 	uint_t vecnum;
 	int error, rc, len;
 #ifdef illumos
 	minor_t minor = getminor(dev);
 #else
 	zfs_iocparm_t *zc_iocparm;
 	int cflag, cmd, oldvecnum;
 	boolean_t newioc, compat;
 	void *compat_zc = NULL;
 	cred_t *cr = td->td_ucred;
 #endif
 	const zfs_ioc_vec_t *vec;
 	char *saved_poolname = NULL;
 	nvlist_t *innvl = NULL;
 
 	cflag = ZFS_CMD_COMPAT_NONE;
 	compat = B_FALSE;
 	newioc = B_TRUE;	/* "new" style (zfs_iocparm_t) ioctl */
 
 	len = IOCPARM_LEN(zcmd);
 	vecnum = cmd = zcmd & 0xff;
 
 	/*
 	 * Check if we are talking to supported older binaries
 	 * and translate zfs_cmd if necessary
 	 */
 	if (len != sizeof(zfs_iocparm_t)) {
 		newioc = B_FALSE;
 		compat = B_TRUE;
 
 		vecnum = cmd;
 
 		switch (len) {
 		case sizeof(zfs_cmd_zcmd_t):
 			cflag = ZFS_CMD_COMPAT_LZC;
 			break;
 		case sizeof(zfs_cmd_deadman_t):
 			cflag = ZFS_CMD_COMPAT_DEADMAN;
 			break;
 		case sizeof(zfs_cmd_v28_t):
 			cflag = ZFS_CMD_COMPAT_V28;
 			break;
 		case sizeof(zfs_cmd_v15_t):
 			cflag = ZFS_CMD_COMPAT_V15;
 			vecnum = zfs_ioctl_v15_to_v28[cmd];
 
 			/*
 			 * Return without further handling
 			 * if the command is blacklisted.
 			 */
 			if (vecnum == ZFS_IOC_COMPAT_PASS)
 				return (0);
 			else if (vecnum == ZFS_IOC_COMPAT_FAIL)
 				return (ENOTSUP);
 			break;
 		default:
 			return (EINVAL);
 		}
 	}
 
 #ifdef illumos
 	vecnum = cmd - ZFS_IOC_FIRST;
 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
 #endif
 
 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
 		return (SET_ERROR(EINVAL));
 	vec = &zfs_ioc_vec[vecnum];
 
 	zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
 
 #ifdef illumos
 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
 	if (error != 0) {
 		error = SET_ERROR(EFAULT);
 		goto out;
 	}
 #else	/* !illumos */
 	bzero(zc, sizeof(zfs_cmd_t));
 
 	if (newioc) {
 		zc_iocparm = (void *)arg;
 
 		switch (zc_iocparm->zfs_ioctl_version) {
 		case ZFS_IOCVER_CURRENT:
 			if (zc_iocparm->zfs_cmd_size != sizeof(zfs_cmd_t)) {
 				error = SET_ERROR(EINVAL);
 				goto out;
 			}
 			break;
 		case ZFS_IOCVER_ZCMD:
 			if (zc_iocparm->zfs_cmd_size > sizeof(zfs_cmd_t) ||
 			    zc_iocparm->zfs_cmd_size < sizeof(zfs_cmd_zcmd_t)) {
 				error = SET_ERROR(EFAULT);
 				goto out;
 			}
 			compat = B_TRUE;
 			cflag = ZFS_CMD_COMPAT_ZCMD;
 			break;
 		default:
 			error = SET_ERROR(EINVAL);
 			goto out;
 			/* NOTREACHED */
 		}
 
 		if (compat) {
 			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
 			compat_zc = kmem_zalloc(sizeof(zfs_cmd_t), KM_SLEEP);
 			bzero(compat_zc, sizeof(zfs_cmd_t));
 
 			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
 			    compat_zc, zc_iocparm->zfs_cmd_size, flag);
 			if (error != 0) {
 				error = SET_ERROR(EFAULT);
 				goto out;
 			}
 		} else {
 			error = ddi_copyin((void *)(uintptr_t)zc_iocparm->zfs_cmd,
 			    zc, zc_iocparm->zfs_cmd_size, flag);
 			if (error != 0) {
 				error = SET_ERROR(EFAULT);
 				goto out;
 			}
 		}
 	}
 
 	if (compat) {
 		if (newioc) {
 			ASSERT(compat_zc != NULL);
 			zfs_cmd_compat_get(zc, compat_zc, cflag);
 		} else {
 			ASSERT(compat_zc == NULL);
 			zfs_cmd_compat_get(zc, arg, cflag);
 		}
 		oldvecnum = vecnum;
 		error = zfs_ioctl_compat_pre(zc, &vecnum, cflag);
 		if (error != 0)
 			goto out;
 		if (oldvecnum != vecnum)
 			vec = &zfs_ioc_vec[vecnum];
 	}
 #endif	/* !illumos */
 
 	zc->zc_iflags = flag & FKIOCTL;
 	if (zc->zc_nvlist_src_size != 0) {
 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 		    zc->zc_iflags, &innvl);
 		if (error != 0)
 			goto out;
 	}
 
 	/* rewrite innvl for backwards compatibility */
 	if (compat)
 		innvl = zfs_ioctl_compat_innvl(zc, innvl, vecnum, cflag);
 
 	/*
 	 * Ensure that all pool/dataset names are valid before we pass down to
 	 * the lower layers.
 	 */
 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
 	switch (vec->zvec_namecheck) {
 	case POOL_NAME:
 		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
 			error = SET_ERROR(EINVAL);
 		else
 			error = pool_status_check(zc->zc_name,
 			    vec->zvec_namecheck, vec->zvec_pool_check);
 		break;
 
 	case DATASET_NAME:
 		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
 			error = SET_ERROR(EINVAL);
 		else
 			error = pool_status_check(zc->zc_name,
 			    vec->zvec_namecheck, vec->zvec_pool_check);
 		break;
 
 	case NO_NAME:
 		break;
 	}
 
 	if (error == 0 && !(flag & FKIOCTL))
 		error = vec->zvec_secpolicy(zc, innvl, cr);
 
 	if (error != 0)
 		goto out;
 
 	/* legacy ioctls can modify zc_name */
 	len = strcspn(zc->zc_name, "/@#") + 1;
 	saved_poolname = kmem_alloc(len, KM_SLEEP);
 	(void) strlcpy(saved_poolname, zc->zc_name, len);
 
 	if (vec->zvec_func != NULL) {
 		nvlist_t *outnvl;
 		int puterror = 0;
 		spa_t *spa;
 		nvlist_t *lognv = NULL;
 
 		ASSERT(vec->zvec_legacy_func == NULL);
 
 		/*
 		 * Add the innvl to the lognv before calling the func,
 		 * in case the func changes the innvl.
 		 */
 		if (vec->zvec_allow_log) {
 			lognv = fnvlist_alloc();
 			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
 			    vec->zvec_name);
 			if (!nvlist_empty(innvl)) {
 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
 				    innvl);
 			}
 		}
 
 		outnvl = fnvlist_alloc();
 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
 
 		if (error == 0 && vec->zvec_allow_log &&
 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
 			if (!nvlist_empty(outnvl)) {
 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
 				    outnvl);
 			}
 			(void) spa_history_log_nvl(spa, lognv);
 			spa_close(spa, FTAG);
 		}
 		fnvlist_free(lognv);
 
 		/* rewrite outnvl for backwards compatibility */
 		if (compat)
 			outnvl = zfs_ioctl_compat_outnvl(zc, outnvl, vecnum,
 			    cflag);
 
 		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
 			int smusherror = 0;
 			if (vec->zvec_smush_outnvlist) {
 				smusherror = nvlist_smush(outnvl,
 				    zc->zc_nvlist_dst_size);
 			}
 			if (smusherror == 0)
 				puterror = put_nvlist(zc, outnvl);
 		}
 
 		if (puterror != 0)
 			error = puterror;
 
 		nvlist_free(outnvl);
 	} else {
 		error = vec->zvec_legacy_func(zc);
 	}
 
 out:
 	nvlist_free(innvl);
 
 #ifdef illumos
 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
 	if (error == 0 && rc != 0)
 		error = SET_ERROR(EFAULT);
 #else
 	if (compat) {
 		zfs_ioctl_compat_post(zc, cmd, cflag);
 		if (newioc) {
 			ASSERT(compat_zc != NULL);
 			ASSERT(sizeof(zfs_cmd_t) >= zc_iocparm->zfs_cmd_size);
 
 			zfs_cmd_compat_put(zc, compat_zc, vecnum, cflag);
 			rc = ddi_copyout(compat_zc,
 			    (void *)(uintptr_t)zc_iocparm->zfs_cmd,
 			    zc_iocparm->zfs_cmd_size, flag);
 			if (error == 0 && rc != 0)
 				error = SET_ERROR(EFAULT);
 			kmem_free(compat_zc, sizeof (zfs_cmd_t));
 		} else {
 			zfs_cmd_compat_put(zc, arg, vecnum, cflag);
 		}
 	} else {
 		ASSERT(newioc);
 
 		rc = ddi_copyout(zc, (void *)(uintptr_t)zc_iocparm->zfs_cmd,
 		    sizeof (zfs_cmd_t), flag);
 		if (error == 0 && rc != 0)
 			error = SET_ERROR(EFAULT);
 	}
 #endif
 	if (error == 0 && vec->zvec_allow_log) {
 		char *s = tsd_get(zfs_allow_log_key);
 		if (s != NULL)
 			strfree(s);
 		(void) tsd_set(zfs_allow_log_key, saved_poolname);
 	} else {
 		if (saved_poolname != NULL)
 			strfree(saved_poolname);
 	}
 
 	kmem_free(zc, sizeof (zfs_cmd_t));
 	return (error);
 }
 
 #ifdef illumos
 static int
 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 {
 	if (cmd != DDI_ATTACH)
 		return (DDI_FAILURE);
 
 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
 	    DDI_PSEUDO, 0) == DDI_FAILURE)
 		return (DDI_FAILURE);
 
 	zfs_dip = dip;
 
 	ddi_report_dev(dip);
 
 	return (DDI_SUCCESS);
 }
 
 static int
 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 {
 	if (spa_busy() || zfs_busy() || zvol_busy())
 		return (DDI_FAILURE);
 
 	if (cmd != DDI_DETACH)
 		return (DDI_FAILURE);
 
 	zfs_dip = NULL;
 
 	ddi_prop_remove_all(dip);
 	ddi_remove_minor_node(dip, NULL);
 
 	return (DDI_SUCCESS);
 }
 
 /*ARGSUSED*/
 static int
 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 {
 	switch (infocmd) {
 	case DDI_INFO_DEVT2DEVINFO:
 		*result = zfs_dip;
 		return (DDI_SUCCESS);
 
 	case DDI_INFO_DEVT2INSTANCE:
 		*result = (void *)0;
 		return (DDI_SUCCESS);
 	}
 
 	return (DDI_FAILURE);
 }
 #endif	/* illumos */
 
 /*
  * OK, so this is a little weird.
  *
  * /dev/zfs is the control node, i.e. minor 0.
  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
  *
  * /dev/zfs has basically nothing to do except serve up ioctls,
  * so most of the standard driver entry points are in zvol.c.
  */
 #ifdef illumos
 static struct cb_ops zfs_cb_ops = {
 	zfsdev_open,	/* open */
 	zfsdev_close,	/* close */
 	zvol_strategy,	/* strategy */
 	nodev,		/* print */
 	zvol_dump,	/* dump */
 	zvol_read,	/* read */
 	zvol_write,	/* write */
 	zfsdev_ioctl,	/* ioctl */
 	nodev,		/* devmap */
 	nodev,		/* mmap */
 	nodev,		/* segmap */
 	nochpoll,	/* poll */
 	ddi_prop_op,	/* prop_op */
 	NULL,		/* streamtab */
 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
 	CB_REV,		/* version */
 	nodev,		/* async read */
 	nodev,		/* async write */
 };
 
 static struct dev_ops zfs_dev_ops = {
 	DEVO_REV,	/* version */
 	0,		/* refcnt */
 	zfs_info,	/* info */
 	nulldev,	/* identify */
 	nulldev,	/* probe */
 	zfs_attach,	/* attach */
 	zfs_detach,	/* detach */
 	nodev,		/* reset */
 	&zfs_cb_ops,	/* driver operations */
 	NULL,		/* no bus operations */
 	NULL,		/* power */
 	ddi_quiesce_not_needed,	/* quiesce */
 };
 
 static struct modldrv zfs_modldrv = {
 	&mod_driverops,
 	"ZFS storage pool",
 	&zfs_dev_ops
 };
 
 static struct modlinkage modlinkage = {
 	MODREV_1,
 	(void *)&zfs_modlfs,
 	(void *)&zfs_modldrv,
 	NULL
 };
 #endif	/* illumos */
 
 static struct cdevsw zfs_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_open =	zfsdev_open,
 	.d_ioctl =	zfsdev_ioctl,
 	.d_name =	ZFS_DEV_NAME
 };
 
 static void
 zfs_allow_log_destroy(void *arg)
 {
 	char *poolname = arg;
 	strfree(poolname);
 }
 
 static void
 zfsdev_init(void)
 {
 	zfsdev = make_dev(&zfs_cdevsw, 0x0, UID_ROOT, GID_OPERATOR, 0666,
 	    ZFS_DEV_NAME);
 }
 
 static void
 zfsdev_fini(void)
 {
 	if (zfsdev != NULL)
 		destroy_dev(zfsdev);
 }
 
 static struct root_hold_token *zfs_root_token;
 struct proc *zfsproc;
 
 #ifdef illumos
 int
 _init(void)
 {
 	int error;
 
 	spa_init(FREAD | FWRITE);
 	zfs_init();
 	zvol_init();
 	zfs_ioctl_init();
 
 	if ((error = mod_install(&modlinkage)) != 0) {
 		zvol_fini();
 		zfs_fini();
 		spa_fini();
 		return (error);
 	}
 
 	tsd_create(&zfs_fsyncer_key, NULL);
 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 
 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
 	ASSERT(error == 0);
 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	return (0);
 }
 
 int
 _fini(void)
 {
 	int error;
 
 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
 		return (SET_ERROR(EBUSY));
 
 	if ((error = mod_remove(&modlinkage)) != 0)
 		return (error);
 
 	zvol_fini();
 	zfs_fini();
 	spa_fini();
 	if (zfs_nfsshare_inited)
 		(void) ddi_modclose(nfs_mod);
 	if (zfs_smbshare_inited)
 		(void) ddi_modclose(smbsrv_mod);
 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
 		(void) ddi_modclose(sharefs_mod);
 
 	tsd_destroy(&zfs_fsyncer_key);
 	ldi_ident_release(zfs_li);
 	zfs_li = NULL;
 	mutex_destroy(&zfs_share_lock);
 
 	return (error);
 }
 
 int
 _info(struct modinfo *modinfop)
 {
 	return (mod_info(&modlinkage, modinfop));
 }
 #endif	/* illumos */
 
 static int zfs__init(void);
 static int zfs__fini(void);
 static void zfs_shutdown(void *, int);
 
 static eventhandler_tag zfs_shutdown_event_tag;
 
 int
 zfs__init(void)
 {
 
 	zfs_root_token = root_mount_hold("ZFS");
 
 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	spa_init(FREAD | FWRITE);
 	zfs_init();
 	zvol_init();
 	zfs_ioctl_init();
 
 	tsd_create(&zfs_fsyncer_key, NULL);
 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 
 	printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
 	root_mount_rel(zfs_root_token);
 
 	zfsdev_init();
 
 	return (0);
 }
 
 int
 zfs__fini(void)
 {
 	if (spa_busy() || zfs_busy() || zvol_busy() ||
 	    zio_injection_enabled) {
 		return (EBUSY);
 	}
 
 	zfsdev_fini();
 	zvol_fini();
 	zfs_fini();
 	spa_fini();
 
 	tsd_destroy(&zfs_fsyncer_key);
 	tsd_destroy(&rrw_tsd_key);
 	tsd_destroy(&zfs_allow_log_key);
 
 	mutex_destroy(&zfs_share_lock);
 
 	return (0);
 }
 
 static void
 zfs_shutdown(void *arg __unused, int howto __unused)
 {
 
 	/*
 	 * ZFS fini routines can not properly work in a panic-ed system.
 	 */
 	if (panicstr == NULL)
 		(void)zfs__fini();
 }
 
 
 static int
 zfs_modevent(module_t mod, int type, void *unused __unused)
 {
 	int err;
 
 	switch (type) {
 	case MOD_LOAD:
 		err = zfs__init();
 		if (err == 0)
 			zfs_shutdown_event_tag = EVENTHANDLER_REGISTER(
 			    shutdown_post_sync, zfs_shutdown, NULL,
 			    SHUTDOWN_PRI_FIRST);
 		return (err);
 	case MOD_UNLOAD:
 		err = zfs__fini();
 		if (err == 0 && zfs_shutdown_event_tag != NULL)
 			EVENTHANDLER_DEREGISTER(shutdown_post_sync,
 			    zfs_shutdown_event_tag);
 		return (err);
 	case MOD_SHUTDOWN:
 		return (0);
 	default:
 		break;
 	}
 	return (EOPNOTSUPP);
 }
 
 static moduledata_t zfs_mod = {
 	"zfsctrl",
 	zfs_modevent,
 	0
 };
 DECLARE_MODULE(zfsctrl, zfs_mod, SI_SUB_VFS, SI_ORDER_ANY);
 MODULE_VERSION(zfsctrl, 1);
 MODULE_DEPEND(zfsctrl, opensolaris, 1, 1, 1);
 MODULE_DEPEND(zfsctrl, krpc, 1, 1, 1);
 MODULE_DEPEND(zfsctrl, acl_nfs4, 1, 1, 1);
Index: user/ngie/more-tests/sys/cddl/contrib/opensolaris
===================================================================
--- user/ngie/more-tests/sys/cddl/contrib/opensolaris	(revision 281675)
+++ user/ngie/more-tests/sys/cddl/contrib/opensolaris	(revision 281676)

Property changes on: user/ngie/more-tests/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/cddl/contrib/opensolaris:r281414-281675
Index: user/ngie/more-tests/sys/conf/Makefile.pc98
===================================================================
--- user/ngie/more-tests/sys/conf/Makefile.pc98	(revision 281675)
+++ user/ngie/more-tests/sys/conf/Makefile.pc98	(revision 281676)
@@ -1,49 +1,52 @@
 # Makefile for FreeBSD(98) after:
 #
 # Makefile.i386 -- with config changes.
 # Copyright 1990 W. Jolitz
 #	from: @(#)Makefile.i386	7.1 5/10/91
 # $FreeBSD$
 #
 # Makefile for FreeBSD
 #
 # This makefile is constructed from a machine description:
 #	config machineid
 # Most changes should be made in the machine description
 #	/sys/pc98/conf/``machineid''
 # after which you should do
 #	 config machineid
 # Generic makefile changes should be made in
 #	/sys/conf/Makefile.pc98
 # after which config should be rerun for all machines.
 #
 
 # Which version of config(8) is required.
 %VERSREQ=	600012
 
 .if !defined(S)
 .if exists(./@/.)
 S=	./@
 .else
 S=	../../..
 .endif
 .endif
+
+LDSCRIPT_NAME?= ldscript.${MACHINE_ARCH}
+
 .include "$S/conf/kern.pre.mk"
 
 ASM_CFLAGS.mpboot.s=		${CLANG_NO_IAS34}
 
 %BEFORE_DEPEND
 
 %OBJS
 
 %FILES.c
 
 %FILES.s
 
 %FILES.m
 
 %CLEAN
 
 %RULES
 
 .include "$S/conf/kern.post.mk"
Index: user/ngie/more-tests/sys/conf/kern.pre.mk
===================================================================
--- user/ngie/more-tests/sys/conf/kern.pre.mk	(revision 281675)
+++ user/ngie/more-tests/sys/conf/kern.pre.mk	(revision 281676)
@@ -1,200 +1,200 @@
 # $FreeBSD$
 
 # Part of a unified Makefile for building kernels.  This part contains all
 # of the definitions that need to be before %BEFORE_DEPEND.
 
 # Allow user to configure things that only effect src tree builds.
 # Note: This is duplicated from src.sys.mk to ensure that we include
 # /etc/src.conf when building the kernel. Kernels can be built without
 # the rest of /usr/src, but they still always process SRCCONF even though
 # the normal mechanisms to prevent that (compiling out of tree) won't
 # work. To ensure they do work, we have to duplicate thee few lines here.
 SRCCONF?=	/etc/src.conf
 .if (exists(${SRCCONF}) || ${SRCCONF} != "/etc/src.conf") && !target(_srcconf_included_)
 .include "${SRCCONF}"
 _srcconf_included_:
 .endif
 
 .include <bsd.own.mk>
 .include <bsd.compiler.mk>
 .include "kern.opts.mk"
 
 # Can be overridden by makeoptions or /etc/make.conf
 KERNEL_KO?=	kernel
 KERNEL?=	kernel
 KODIR?=		/boot/${KERNEL}
 LDSCRIPT_NAME?=	ldscript.$M
 LDSCRIPT?=	$S/conf/${LDSCRIPT_NAME}
 
-M=		${MACHINE_CPUARCH}
+M=		${MACHINE}
 
 AWK?=		awk
 CP?=		cp
 LINT?=		lint
 NM?=		nm
 OBJCOPY?=	objcopy
 SIZE?=		size
 
 .if defined(DEBUG)
 _MINUS_O=	-O
 CTFFLAGS+=	-g
 .else
 .if ${MACHINE_CPUARCH} == "powerpc"
 _MINUS_O=	-O	# gcc miscompiles some code at -O2
 .else
 _MINUS_O=	-O2
 .endif
 .endif
 .if ${MACHINE_CPUARCH} == "amd64"
 .if ${COMPILER_TYPE} == "clang"
 COPTFLAGS?=-O2 -pipe
 .else
 COPTFLAGS?=-O2 -frename-registers -pipe
 .endif
 .else
 COPTFLAGS?=${_MINUS_O} -pipe
 .endif
 .if !empty(COPTFLAGS:M-O[23s]) && empty(COPTFLAGS:M-fno-strict-aliasing)
 COPTFLAGS+= -fno-strict-aliasing
 .endif
 .if !defined(NO_CPU_COPTFLAGS)
 COPTFLAGS+= ${_CPUCFLAGS}
 .endif
 NOSTDINC= -nostdinc
 
 INCLUDES= ${NOSTDINC} ${INCLMAGIC} -I. -I$S
 
 .if make(depend) || make(kernel-depend)
 
 # This hack lets us use the ipfilter code without spamming a new
 # include path into contrib'ed source files.
 INCLUDES+= -I$S/contrib/ipfilter
 
 # ... and the same for ath
 INCLUDES+= -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal
 
 # ... and the same for the NgATM stuff
 INCLUDES+= -I$S/contrib/ngatm
 
 # ... and the same for vchiq
 INCLUDES+= -I$S/contrib/vchiq
 
 # ... and the same for twa
 INCLUDES+= -I$S/dev/twa
 
 # ... and the same for cxgb and cxgbe
 INCLUDES+= -I$S/dev/cxgb -I$S/dev/cxgbe
 
 .endif
 
 CFLAGS=	${COPTFLAGS} ${DEBUG} ${CWARNFLAGS}
 CFLAGS+= ${INCLUDES} -D_KERNEL -DHAVE_KERNEL_OPTION_HEADERS -include opt_global.h
 CFLAGS_PARAM_INLINE_UNIT_GROWTH?=100
 CFLAGS_PARAM_LARGE_FUNCTION_GROWTH?=1000
 .if ${MACHINE_CPUARCH} == "mips"
 CFLAGS_ARCH_PARAMS?=--param max-inline-insns-single=1000
 .endif
 CFLAGS.gcc+= -fno-common -fms-extensions -finline-limit=${INLINE_LIMIT}
 CFLAGS.gcc+= --param inline-unit-growth=${CFLAGS_PARAM_INLINE_UNIT_GROWTH}
 CFLAGS.gcc+= --param large-function-growth=${CFLAGS_PARAM_LARGE_FUNCTION_GROWTH}
 .if defined(CFLAGS_ARCH_PARAMS)
 CFLAGS.gcc+=${CFLAGS_ARCH_PARAMS}
 .endif
 WERROR?= -Werror
 
 # XXX LOCORE means "don't declare C stuff" not "for locore.s".
 ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${CFLAGS} ${ASM_CFLAGS.${.IMPSRC:T}} 
 
 .if defined(PROFLEVEL) && ${PROFLEVEL} >= 1
 CFLAGS+=	-DGPROF
 CFLAGS.gcc+=	-falign-functions=16
 .if ${PROFLEVEL} >= 2
 CFLAGS+=	-DGPROF4 -DGUPROF
 PROF=		-pg
 .if ${COMPILER_TYPE} == "gcc"
 PROF+=		-mprofiler-epilogue
 .endif
 .else
 PROF=		-pg
 .endif
 .endif
 DEFINED_PROF=	${PROF}
 
 # Put configuration-specific C flags last (except for ${PROF}) so that they
 # can override the others.
 CFLAGS+=	${CONF_CFLAGS}
 
 # Optional linting. This can be overridden in /etc/make.conf.
 LINTFLAGS=	${LINTOBJKERNFLAGS}
 
 NORMAL_C= ${CC} -c ${CFLAGS} ${WERROR} ${PROF} ${.IMPSRC}
 NORMAL_S= ${CC} -c ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}
 PROFILE_C= ${CC} -c ${CFLAGS} ${WERROR} ${.IMPSRC}
 NORMAL_C_NOWERROR= ${CC} -c ${CFLAGS} ${PROF} ${.IMPSRC}
 
 NORMAL_M= ${AWK} -f $S/tools/makeobjops.awk ${.IMPSRC} -c ; \
 	  ${CC} -c ${CFLAGS} ${WERROR} ${PROF} ${.PREFIX}.c
 
 NORMAL_FW= uudecode -o ${.TARGET} ${.ALLSRC}
 NORMAL_FWO= ${LD} -b binary --no-warn-mismatch -d -warn-common -r \
 	-o ${.TARGET} ${.ALLSRC:M*.fw}
 
 # Special flags for managing the compat compiles for ZFS
 ZFS_CFLAGS=	-DFREEBSD_NAMECACHE -DBUILDING_ZFS -nostdinc -I$S/cddl/compat/opensolaris -I$S/cddl/contrib/opensolaris/uts/common/fs/zfs -I$S/cddl/contrib/opensolaris/uts/common/zmod -I$S/cddl/contrib/opensolaris/uts/common -I$S -I$S/cddl/contrib/opensolaris/common/zfs -I$S/cddl/contrib/opensolaris/common ${CFLAGS} -Wno-unknown-pragmas -Wno-missing-prototypes -Wno-undef -Wno-strict-prototypes -Wno-cast-qual -Wno-parentheses -Wno-redundant-decls -Wno-missing-braces -Wno-uninitialized -Wno-unused -Wno-inline -Wno-switch -Wno-pointer-arith -Wno-unknown-pragmas
 ZFS_CFLAGS+=	-include $S/cddl/compat/opensolaris/sys/debug_compat.h
 ZFS_ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${ZFS_CFLAGS}
 ZFS_C=		${CC} -c ${ZFS_CFLAGS} ${WERROR} ${PROF} ${.IMPSRC}
 ZFS_S=		${CC} -c ${ZFS_ASM_CFLAGS} ${WERROR} ${.IMPSRC}
 
 .if ${MK_CTF} != "no"
 NORMAL_CTFCONVERT=	${CTFCONVERT} ${CTFFLAGS} ${.TARGET}
 .elif ${MAKE_VERSION} >= 5201111300
 NORMAL_CTFCONVERT=
 .else
 NORMAL_CTFCONVERT=	@:
 .endif
 
 NORMAL_LINT=	${LINT} ${LINTFLAGS} ${CFLAGS:M-[DIU]*} ${.IMPSRC}
 
 # Infiniband C flags.  Correct include paths and omit errors that linux
 # does not honor.
 OFEDINCLUDES=	-I$S/ofed/include/
 OFEDNOERR=	-Wno-cast-qual -Wno-pointer-arith
 OFEDCFLAGS=	${CFLAGS:N-I*} ${OFEDINCLUDES} ${CFLAGS:M-I*} ${OFEDNOERR}
 OFED_C_NOIMP=	${CC} -c -o ${.TARGET} ${OFEDCFLAGS} ${WERROR} ${PROF}
 OFED_C=		${OFED_C_NOIMP} ${.IMPSRC}
 
 GEN_CFILES= $S/$M/$M/genassym.c ${MFILES:T:S/.m$/.c/}
 SYSTEM_CFILES= config.c env.c hints.c vnode_if.c
 SYSTEM_DEP= Makefile ${SYSTEM_OBJS}
 SYSTEM_OBJS= locore.o ${MDOBJS} ${OBJS}
 SYSTEM_OBJS+= ${SYSTEM_CFILES:.c=.o}
 SYSTEM_OBJS+= hack.So
 SYSTEM_LD= @${LD} -Bdynamic -T ${LDSCRIPT} ${_LDFLAGS} --no-warn-mismatch \
 	--warn-common --export-dynamic --dynamic-linker /red/herring \
 	-o ${.TARGET} -X ${SYSTEM_OBJS} vers.o
 SYSTEM_LD_TAIL= @${OBJCOPY} --strip-symbol gcc2_compiled. ${.TARGET} ; \
 	${SIZE} ${.TARGET} ; chmod 755 ${.TARGET}
 SYSTEM_DEP+= ${LDSCRIPT}
 
 # MKMODULESENV is set here so that port makefiles can augment
 # them.
 
 MKMODULESENV+=	MAKEOBJDIRPREFIX=${.OBJDIR}/modules KMODDIR=${KODIR}
 MKMODULESENV+=	MACHINE_CPUARCH=${MACHINE_CPUARCH}
 MKMODULESENV+=	MACHINE=${MACHINE} MACHINE_ARCH=${MACHINE_ARCH}
 MKMODULESENV+=	MODULES_EXTRA="${MODULES_EXTRA}" WITHOUT_MODULES="${WITHOUT_MODULES}"
 .if (${KERN_IDENT} == LINT)
 MKMODULESENV+=	ALL_MODULES=LINT
 .endif
 .if defined(MODULES_OVERRIDE)
 MKMODULESENV+=	MODULES_OVERRIDE="${MODULES_OVERRIDE}"
 .endif
 .if defined(DEBUG)
 MKMODULESENV+=	DEBUG_FLAGS="${DEBUG}"
 .endif
 
 # Detect kernel config options that force stack frames to be turned on.
 DDB_ENABLED!=	grep DDB opt_ddb.h || true ; echo
 DTR_ENABLED!=	grep KDTRACE_FRAME opt_kdtrace.h || true ; echo
 HWPMC_ENABLED!=	grep HWPMC opt_hwpmc_hooks.h || true ; echo
Index: user/ngie/more-tests/sys/conf/kmod.mk
===================================================================
--- user/ngie/more-tests/sys/conf/kmod.mk	(revision 281675)
+++ user/ngie/more-tests/sys/conf/kmod.mk	(revision 281676)
@@ -1,453 +1,453 @@
 #	From: @(#)bsd.prog.mk	5.26 (Berkeley) 6/25/91
 # $FreeBSD$
 #
 # The include file <bsd.kmod.mk> handles building and installing loadable
 # kernel modules.
 #
 #
 # +++ variables +++
 #
 # CLEANFILES	Additional files to remove for the clean and cleandir targets.
 #
 # EXPORT_SYMS	A list of symbols that should be exported from the module,
 #		or the name of a file containing a list of symbols, or YES
 #		to export all symbols.  If not defined, no symbols are
 #		exported.
 #
 # KMOD		The name of the kernel module to build.
 #
 # KMODDIR	Base path for kernel modules (see kld(4)). [/boot/kernel]
 #
 # KMODOWN	Module file owner. [${BINOWN}]
 #
 # KMODGRP	Module file group. [${BINGRP}]
 #
 # KMODMODE	Module file mode. [${BINMODE}]
 #
 # KMODLOAD	Command to load a kernel module [/sbin/kldload]
 #
 # KMODUNLOAD	Command to unload a kernel module [/sbin/kldunload]
 #
 # MFILES	Optionally a list of interfaces used by the module.
 #		This file contains a default list of interfaces.
 #
 # PROG		The name of the kernel module to build.
 #		If not supplied, ${KMOD}.ko is used.
 #
 # SRCS		List of source files.
 #
 # FIRMWS	List of firmware images in format filename:shortname:version
 #
 # FIRMWARE_LICENSE
 #		Set to the name of the license the user has to agree on in
 #		order to use this firmware. See /usr/share/doc/legal
 #
 # DESTDIR	The tree where the module gets installed. [not set]
 #
 # +++ targets +++
 #
 # 	install:
 #               install the kernel module; if the Makefile
 #               does not itself define the target install, the targets
 #               beforeinstall and afterinstall may also be used to cause
 #               actions immediately before and after the install target
 #		is executed.
 #
 # 	load:
 #		Load a module.
 #
 # 	unload:
 #		Unload a module.
 #
 
 AWK?=		awk
 KMODLOAD?=	/sbin/kldload
 KMODUNLOAD?=	/sbin/kldunload
 OBJCOPY?=	objcopy
 
 .include <bsd.init.mk>
 # Grab all the options for a kernel build. For backwards compat, we need to
 # do this after bsd.own.mk.
 .include "kern.opts.mk"
 .include <bsd.compiler.mk>
 .include "config.mk"
 
 .SUFFIXES: .out .o .c .cc .cxx .C .y .l .s .S
 
 # amd64 and mips use direct linking for kmod, all others use shared binaries
 .if ${MACHINE_CPUARCH} != amd64 && ${MACHINE_CPUARCH} != mips
 __KLD_SHARED=yes
 .else
 __KLD_SHARED=no
 .endif
 
 .if !empty(CFLAGS:M-O[23s]) && empty(CFLAGS:M-fno-strict-aliasing)
 CFLAGS+=	-fno-strict-aliasing
 .endif
 WERROR?=	-Werror
 CFLAGS+=	${WERROR}
 CFLAGS+=	-D_KERNEL
 CFLAGS+=	-DKLD_MODULE
 
 # Don't use any standard or source-relative include directories.
 NOSTDINC=	-nostdinc
 CFLAGS:=	${CFLAGS:N-I*} ${NOSTDINC} ${INCLMAGIC} ${CFLAGS:M-I*}
 .if defined(KERNBUILDDIR)
 CFLAGS+=	-DHAVE_KERNEL_OPTION_HEADERS -include ${KERNBUILDDIR}/opt_global.h
 .endif
 
 # Add -I paths for system headers.  Individual module makefiles don't
 # need any -I paths for this.  Similar defaults for .PATH can't be
 # set because there are no standard paths for non-headers.
 CFLAGS+=	-I. -I${SYSDIR}
 
 CFLAGS.gcc+=	-finline-limit=${INLINE_LIMIT}
 CFLAGS.gcc+=	-fms-extensions
 CFLAGS.gcc+= --param inline-unit-growth=100
 CFLAGS.gcc+= --param large-function-growth=1000
 
 # Disallow common variables, and if we end up with commons from
 # somewhere unexpected, allocate storage for them in the module itself.
 CFLAGS+=	-fno-common
 LDFLAGS+=	-d -warn-common
 
 CFLAGS+=	${DEBUG_FLAGS}
 .if ${MACHINE_CPUARCH} == amd64
 CFLAGS+=	-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer
 .endif
 
 # Temporary workaround for PR 196407, which contains the fascinating details.
 # Don't allow clang to use fpu instructions or registers in kernel modules.
 .if ${MACHINE_CPUARCH} == arm
 CFLAGS.clang+=	-mllvm -arm-use-movt=0
 CFLAGS.clang+=	-mfpu=none
 CFLAGS+=	-funwind-tables
 .endif
 
 .if ${MACHINE_CPUARCH} == powerpc
 CFLAGS+=	-mlongcall -fno-omit-frame-pointer
 .endif
 
 .if ${MACHINE_CPUARCH} == mips
 CFLAGS+=	-G0 -fno-pic -mno-abicalls -mlong-calls
 .endif
 
 .if defined(DEBUG) || defined(DEBUG_FLAGS)
 CTFFLAGS+=	-g
 .endif
 
 .if defined(FIRMWS)
 ${KMOD:S/$/.c/}: ${SYSDIR}/tools/fw_stub.awk
 	${AWK} -f ${SYSDIR}/tools/fw_stub.awk ${FIRMWS} -m${KMOD} -c${KMOD:S/$/.c/g} \
 	    ${FIRMWARE_LICENSE:C/.+/-l/}${FIRMWARE_LICENSE}
 
 SRCS+=	${KMOD:S/$/.c/}
 CLEANFILES+=	${KMOD:S/$/.c/}
 
 .for _firmw in ${FIRMWS}
 ${_firmw:C/\:.*$/.fwo/}:	${_firmw:C/\:.*$//}
 	@${ECHO} ${_firmw:C/\:.*$//} ${.ALLSRC:M*${_firmw:C/\:.*$//}}
 	@if [ -e ${_firmw:C/\:.*$//} ]; then			\
 		${LD} -b binary --no-warn-mismatch ${_LDFLAGS}	\
 		    -r -d -o ${.TARGET}	${_firmw:C/\:.*$//};	\
 	else							\
 		ln -s ${.ALLSRC:M*${_firmw:C/\:.*$//}} ${_firmw:C/\:.*$//}; \
 		${LD} -b binary --no-warn-mismatch ${_LDFLAGS}	\
 		    -r -d -o ${.TARGET}	${_firmw:C/\:.*$//};	\
 		rm ${_firmw:C/\:.*$//};				\
 	fi
 
 OBJS+=	${_firmw:C/\:.*$/.fwo/}
 .endfor
 .endif
 
 # Conditionally include SRCS based on kernel config options.
 .for _o in ${KERN_OPTS}
 SRCS+=${SRCS.${_o}}
 .endfor
 
 OBJS+=	${SRCS:N*.h:R:S/$/.o/g}
 
 .if !defined(PROG)
 PROG=	${KMOD}.ko
 .endif
 
 .if !defined(DEBUG_FLAGS)
 FULLPROG=	${PROG}
 .else
 FULLPROG=	${PROG}.debug
 ${PROG}: ${FULLPROG} ${PROG}.symbols
 	${OBJCOPY} --strip-debug --add-gnu-debuglink=${PROG}.symbols\
 	    ${FULLPROG} ${.TARGET}
 ${PROG}.symbols: ${FULLPROG}
 	${OBJCOPY} --only-keep-debug ${FULLPROG} ${.TARGET}
 .endif
 
 .if ${__KLD_SHARED} == yes
 ${FULLPROG}: ${KMOD}.kld
 	${LD} -Bshareable ${_LDFLAGS} -o ${.TARGET} ${KMOD}.kld
 .if !defined(DEBUG_FLAGS)
 	${OBJCOPY} --strip-debug ${.TARGET}
 .endif
 .endif
 
 EXPORT_SYMS?=	NO
 .if ${EXPORT_SYMS} != YES
 CLEANFILES+=	export_syms
 .endif
 
 .if ${__KLD_SHARED} == yes
 ${KMOD}.kld: ${OBJS}
 .else
 ${FULLPROG}: ${OBJS}
 .endif
 	${LD} ${_LDFLAGS} -r -d -o ${.TARGET} ${OBJS}
 .if ${MK_CTF} != "no"
 	${CTFMERGE} ${CTFFLAGS} -o ${.TARGET} ${OBJS}
 .endif
 .if defined(EXPORT_SYMS)
 .if ${EXPORT_SYMS} != YES
 .if ${EXPORT_SYMS} == NO
 	:> export_syms
 .elif !exists(${.CURDIR}/${EXPORT_SYMS})
 	echo ${EXPORT_SYMS} > export_syms
 .else
 	grep -v '^#' < ${EXPORT_SYMS} > export_syms
 .endif
 	awk -f ${SYSDIR}/conf/kmod_syms.awk ${.TARGET} \
 	    export_syms | xargs -J% ${OBJCOPY} % ${.TARGET}
 .endif
 .endif
 .if !defined(DEBUG_FLAGS) && ${__KLD_SHARED} == no
 	${OBJCOPY} --strip-debug ${.TARGET}
 .endif
 
 _ILINKS=machine
 .if ${MACHINE} != ${MACHINE_CPUARCH}
 _ILINKS+=${MACHINE_CPUARCH}
 .endif
 .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
 _ILINKS+=x86
 .endif
 CLEANFILES+=${_ILINKS}
 
 all: objwarn ${PROG}
 
 beforedepend: ${_ILINKS}
 
 # Ensure that the links exist without depending on it when it exists which
 # causes all the modules to be rebuilt when the directory pointed to changes.
 .for _link in ${_ILINKS}
 .if !exists(${.OBJDIR}/${_link})
 ${OBJS}: ${.OBJDIR}/${_link}
 .endif
 .endfor
 
 # Search for kernel source tree in standard places.
 .for _dir in ${.CURDIR}/../.. ${.CURDIR}/../../.. /sys /usr/src/sys
 .if !defined(SYSDIR) && exists(${_dir}/kern/)
 SYSDIR=	${_dir}
 .endif
 .endfor
 .if !defined(SYSDIR) || !exists(${SYSDIR}/kern/)
 .error "can't find kernel source tree"
 .endif
 
 .for _link in ${_ILINKS}
 .PHONY: ${_link}
 ${_link}: ${.OBJDIR}/${_link}
 
 ${.OBJDIR}/${_link}:
 	@case ${.TARGET:T} in \
 	machine) \
 		path=${SYSDIR}/${MACHINE}/include ;; \
 	*) \
 		path=${SYSDIR}/${.TARGET:T}/include ;; \
 	esac ; \
 	path=`(cd $$path && /bin/pwd)` ; \
 	${ECHO} ${.TARGET:T} "->" $$path ; \
 	ln -sf $$path ${.TARGET:T}
 .endfor
 
 CLEANFILES+= ${PROG} ${KMOD}.kld ${OBJS}
 
 .if defined(DEBUG_FLAGS)
 CLEANFILES+= ${FULLPROG} ${PROG}.symbols
 .endif
 
 .if !target(install)
 
 _INSTALLFLAGS:=	${INSTALLFLAGS}
 .for ie in ${INSTALLFLAGS_EDIT}
 _INSTALLFLAGS:=	${_INSTALLFLAGS${ie}}
 .endfor
 
 .if !target(realinstall)
 realinstall: _kmodinstall
 .ORDER: beforeinstall _kmodinstall
 _kmodinstall:
 	${INSTALL} -o ${KMODOWN} -g ${KMODGRP} -m ${KMODMODE} \
 	    ${_INSTALLFLAGS} ${PROG} ${DESTDIR}${KMODDIR}
 .if defined(DEBUG_FLAGS) && !defined(INSTALL_NODEBUG) && ${MK_KERNEL_SYMBOLS} != "no"
 	${INSTALL} -o ${KMODOWN} -g ${KMODGRP} -m ${KMODMODE} \
 	    ${_INSTALLFLAGS} ${PROG}.symbols ${DESTDIR}${KMODDIR}
 .endif
 
 .include <bsd.links.mk>
 
 .if !defined(NO_XREF)
 afterinstall: _kldxref
 .ORDER: realinstall _kldxref
 .ORDER: _installlinks _kldxref
 _kldxref:
 	@if type kldxref >/dev/null 2>&1; then \
 		${ECHO} kldxref ${DESTDIR}${KMODDIR}; \
 		kldxref ${DESTDIR}${KMODDIR}; \
 	fi
 .endif
 .endif # !target(realinstall)
 
 .endif # !target(install)
 
 .if !target(load)
 load: ${PROG}
 	${KMODLOAD} -v ${.OBJDIR}/${PROG}
 .endif
 
 .if !target(unload)
 unload:
 	${KMODUNLOAD} -v ${PROG}
 .endif
 
 .if defined(KERNBUILDDIR)
 .PATH: ${KERNBUILDDIR}
 CFLAGS+=	-I${KERNBUILDDIR}
 .for _src in ${SRCS:Mopt_*.h}
 CLEANFILES+=	${_src}
 .if !target(${_src})
 ${_src}:
 	ln -sf ${KERNBUILDDIR}/${_src} ${.TARGET}
 .endif
 .endfor
 .else
 .for _src in ${SRCS:Mopt_*.h}
 CLEANFILES+=	${_src}
 .if !target(${_src})
 ${_src}:
 	:> ${.TARGET}
 .endif
 .endfor
 .endif
 
 # Respect configuration-specific C flags.
 CFLAGS+=	${CONF_CFLAGS}
 
 MFILES?= dev/acpica/acpi_if.m dev/acpi_support/acpi_wmi_if.m \
 	dev/agp/agp_if.m dev/ata/ata_if.m dev/eisa/eisa_if.m \
 	dev/fb/fb_if.m dev/gpio/gpio_if.m dev/gpio/gpiobus_if.m \
 	dev/iicbus/iicbb_if.m dev/iicbus/iicbus_if.m \
 	dev/mbox/mbox_if.m dev/mmc/mmcbr_if.m dev/mmc/mmcbus_if.m \
 	dev/mii/miibus_if.m dev/mvs/mvs_if.m dev/ofw/ofw_bus_if.m \
 	dev/pccard/card_if.m dev/pccard/power_if.m dev/pci/pci_if.m \
 	dev/pci/pcib_if.m dev/ppbus/ppbus_if.m \
 	dev/sdhci/sdhci_if.m dev/smbus/smbus_if.m dev/spibus/spibus_if.m \
 	dev/sound/pci/hda/hdac_if.m \
 	dev/sound/pcm/ac97_if.m dev/sound/pcm/channel_if.m \
 	dev/sound/pcm/feeder_if.m dev/sound/pcm/mixer_if.m \
 	dev/sound/midi/mpu_if.m dev/sound/midi/mpufoi_if.m \
 	dev/sound/midi/synth_if.m dev/usb/usb_if.m isa/isa_if.m \
 	kern/bus_if.m kern/clock_if.m \
 	kern/cpufreq_if.m kern/device_if.m kern/serdev_if.m \
 	libkern/iconv_converter_if.m opencrypto/cryptodev_if.m \
 	pc98/pc98/canbus_if.m dev/etherswitch/mdio_if.m
 
 .for _srcsrc in ${MFILES}
 .for _ext in c h
 .for _src in ${SRCS:M${_srcsrc:T:R}.${_ext}}
 CLEANFILES+=	${_src}
 .if !target(${_src})
 ${_src}: ${SYSDIR}/tools/makeobjops.awk ${SYSDIR}/${_srcsrc}
 	${AWK} -f ${SYSDIR}/tools/makeobjops.awk ${SYSDIR}/${_srcsrc} -${_ext}
 .endif
 .endfor # _src
 .endfor # _ext
 .endfor # _srcsrc
 
 .if !empty(SRCS:Mvnode_if.c)
 CLEANFILES+=	vnode_if.c
 vnode_if.c: ${SYSDIR}/tools/vnode_if.awk ${SYSDIR}/kern/vnode_if.src
 	${AWK} -f ${SYSDIR}/tools/vnode_if.awk ${SYSDIR}/kern/vnode_if.src -c
 .endif
 
 .if !empty(SRCS:Mvnode_if.h)
 CLEANFILES+=	vnode_if.h vnode_if_newproto.h vnode_if_typedef.h
 vnode_if.h vnode_if_newproto.h vnode_if_typedef.h: ${SYSDIR}/tools/vnode_if.awk \
     ${SYSDIR}/kern/vnode_if.src
 vnode_if.h: vnode_if_newproto.h vnode_if_typedef.h
 	${AWK} -f ${SYSDIR}/tools/vnode_if.awk ${SYSDIR}/kern/vnode_if.src -h
 vnode_if_newproto.h:
 	${AWK} -f ${SYSDIR}/tools/vnode_if.awk ${SYSDIR}/kern/vnode_if.src -p
 vnode_if_typedef.h:
 	${AWK} -f ${SYSDIR}/tools/vnode_if.awk ${SYSDIR}/kern/vnode_if.src -q
 .endif
 
 .for _i in mii pccard
 .if !empty(SRCS:M${_i}devs.h)
 CLEANFILES+=	${_i}devs.h
 ${_i}devs.h: ${SYSDIR}/tools/${_i}devs2h.awk ${SYSDIR}/dev/${_i}/${_i}devs
 	${AWK} -f ${SYSDIR}/tools/${_i}devs2h.awk ${SYSDIR}/dev/${_i}/${_i}devs
 .endif
 .endfor # _i
 
 .if !empty(SRCS:Musbdevs.h)
 CLEANFILES+=	usbdevs.h
 usbdevs.h: ${SYSDIR}/tools/usbdevs2h.awk ${SYSDIR}/dev/usb/usbdevs
 	${AWK} -f ${SYSDIR}/tools/usbdevs2h.awk ${SYSDIR}/dev/usb/usbdevs -h
 .endif
 
 .if !empty(SRCS:Musbdevs_data.h)
 CLEANFILES+=	usbdevs_data.h
 usbdevs_data.h: ${SYSDIR}/tools/usbdevs2h.awk ${SYSDIR}/dev/usb/usbdevs
 	${AWK} -f ${SYSDIR}/tools/usbdevs2h.awk ${SYSDIR}/dev/usb/usbdevs -d
 .endif
 
 .if !empty(SRCS:Macpi_quirks.h)
 CLEANFILES+=	acpi_quirks.h
 acpi_quirks.h: ${SYSDIR}/tools/acpi_quirks2h.awk ${SYSDIR}/dev/acpica/acpi_quirks
 	${AWK} -f ${SYSDIR}/tools/acpi_quirks2h.awk ${SYSDIR}/dev/acpica/acpi_quirks
 .endif
 
 .if !empty(SRCS:Massym.s)
 CLEANFILES+=	assym.s genassym.o
 assym.s: genassym.o
 .if defined(KERNBUILDDIR)
 genassym.o: opt_global.h
 .endif
 assym.s: ${SYSDIR}/kern/genassym.sh
 	sh ${SYSDIR}/kern/genassym.sh genassym.o > ${.TARGET}
-genassym.o: ${SYSDIR}/${MACHINE_CPUARCH}/${MACHINE_CPUARCH}/genassym.c
+genassym.o: ${SYSDIR}/${MACHINE}/${MACHINE}/genassym.c
 genassym.o: ${SRCS:Mopt_*.h}
 	${CC} -c ${CFLAGS:N-fno-common} \
-	    ${SYSDIR}/${MACHINE_CPUARCH}/${MACHINE_CPUARCH}/genassym.c
+	    ${SYSDIR}/${MACHINE}/${MACHINE}/genassym.c
 .endif
 
 lint: ${SRCS}
 	${LINT} ${LINTKERNFLAGS} ${CFLAGS:M-[DILU]*} ${.ALLSRC:M*.c}
 
 .if defined(KERNBUILDDIR)
 ${OBJS}: opt_global.h
 .endif
 
 .include <bsd.dep.mk>
 
 cleandepend: cleanilinks
 # .depend needs include links so we remove them only together.
 cleanilinks:
 	rm -f ${_ILINKS}
 
 .if !exists(${.OBJDIR}/${DEPENDFILE})
 ${OBJS}: ${SRCS:M*.h}
 .endif
 
 .include <bsd.obj.mk>
 .include "kern.mk"
Index: user/ngie/more-tests/sys/conf
===================================================================
--- user/ngie/more-tests/sys/conf	(revision 281675)
+++ user/ngie/more-tests/sys/conf	(revision 281676)

Property changes on: user/ngie/more-tests/sys/conf
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/conf:r281621-281675
Index: user/ngie/more-tests/sys/dev/cxgbe/tom/t4_listen.c
===================================================================
--- user/ngie/more-tests/sys/dev/cxgbe/tom/t4_listen.c	(revision 281675)
+++ user/ngie/more-tests/sys/dev/cxgbe/tom/t4_listen.c	(revision 281676)
@@ -1,1627 +1,1575 @@
 /*-
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/protosw.h>
 #include <sys/refcount.h>
 #include <sys/domain.h>
 #include <sys/fnv_hash.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet6/scope6_var.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/toecore.h>
 
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 
 /* stid services */
 static int alloc_stid(struct adapter *, struct listen_ctx *, int);
 static struct listen_ctx *lookup_stid(struct adapter *, int);
 static void free_stid(struct adapter *, struct listen_ctx *);
 
 /* lctx services */
 static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
     struct port_info *);
 static int free_lctx(struct adapter *, struct listen_ctx *);
 static void hold_lctx(struct listen_ctx *);
 static void listen_hash_add(struct adapter *, struct listen_ctx *);
 static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
 static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
 static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
 
 static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *);
 static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *);
 static void send_reset_synqe(struct toedev *, struct synq_entry *);
 
 static int
 alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6)
 {
 	struct tid_info *t = &sc->tids;
 	u_int stid, n, f, mask;
 	struct stid_region *sr = &lctx->stid_region;
 
 	/*
 	 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in
 	 * the TCAM.  The start of the stid region is properly aligned (the chip
 	 * requires each region to be 128-cell aligned).
 	 */
 	n = isipv6 ? 2 : 1;
 	mask = n - 1;
 	KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0,
 	    ("%s: stid region (%u, %u) not properly aligned.  n = %u",
 	    __func__, t->stid_base, t->nstids, n));
 
 	mtx_lock(&t->stid_lock);
 	if (n > t->nstids - t->stids_in_use) {
 		mtx_unlock(&t->stid_lock);
 		return (-1);
 	}
 
 	if (t->nstids_free_head >= n) {
 		/*
 		 * This allocation will definitely succeed because the region
 		 * starts at a good alignment and we just checked we have enough
 		 * stids free.
 		 */
 		f = t->nstids_free_head & mask;
 		t->nstids_free_head -= n + f;
 		stid = t->nstids_free_head;
 		TAILQ_INSERT_HEAD(&t->stids, sr, link);
 	} else {
 		struct stid_region *s;
 
 		stid = t->nstids_free_head;
 		TAILQ_FOREACH(s, &t->stids, link) {
 			stid += s->used + s->free;
 			f = stid & mask;
 			if (s->free >= n + f) {
 				stid -= n + f;
 				s->free -= n + f;
 				TAILQ_INSERT_AFTER(&t->stids, s, sr, link);
 				goto allocated;
 			}
 		}
 
 		if (__predict_false(stid != t->nstids)) {
 			panic("%s: stids TAILQ (%p) corrupt."
 			    "  At %d instead of %d at the end of the queue.",
 			    __func__, &t->stids, stid, t->nstids);
 		}
 
 		mtx_unlock(&t->stid_lock);
 		return (-1);
 	}
 
 allocated:
 	sr->used = n;
 	sr->free = f;
 	t->stids_in_use += n;
 	t->stid_tab[stid] = lctx;
 	mtx_unlock(&t->stid_lock);
 
 	KASSERT(((stid + t->stid_base) & mask) == 0,
 	    ("%s: EDOOFUS.", __func__));
 	return (stid + t->stid_base);
 }
 
 static struct listen_ctx *
 lookup_stid(struct adapter *sc, int stid)
 {
 	struct tid_info *t = &sc->tids;
 
 	return (t->stid_tab[stid - t->stid_base]);
 }
 
 static void
 free_stid(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct tid_info *t = &sc->tids;
 	struct stid_region *sr = &lctx->stid_region;
 	struct stid_region *s;
 
 	KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used));
 
 	mtx_lock(&t->stid_lock);
 	s = TAILQ_PREV(sr, stid_head, link);
 	if (s != NULL)
 		s->free += sr->used + sr->free;
 	else
 		t->nstids_free_head += sr->used + sr->free;
 	KASSERT(t->stids_in_use >= sr->used,
 	    ("%s: stids_in_use (%u) < stids being freed (%u)", __func__,
 	    t->stids_in_use, sr->used));
 	t->stids_in_use -= sr->used;
 	TAILQ_REMOVE(&t->stids, sr, link);
 	mtx_unlock(&t->stid_lock);
 }
 
 static struct listen_ctx *
 alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi)
 {
 	struct listen_ctx *lctx;
 
 	INP_WLOCK_ASSERT(inp);
 
 	lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
 	if (lctx == NULL)
 		return (NULL);
 
 	lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6);
 	if (lctx->stid < 0) {
 		free(lctx, M_CXGBE);
 		return (NULL);
 	}
 
 	if (inp->inp_vflag & INP_IPV6 &&
 	    !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
 		struct tom_data *td = sc->tom_softc;
 
 		lctx->ce = hold_lip(td, &inp->in6p_laddr);
 		if (lctx->ce == NULL) {
 			free(lctx, M_CXGBE);
 			return (NULL);
 		}
 	}
 
 	lctx->ctrlq = &sc->sge.ctrlq[pi->port_id];
 	lctx->ofld_rxq = &sc->sge.ofld_rxq[pi->first_ofld_rxq];
 	refcount_init(&lctx->refcount, 1);
 	TAILQ_INIT(&lctx->synq);
 
 	lctx->inp = inp;
 	in_pcbref(inp);
 
 	return (lctx);
 }
 
 /* Don't call this directly, use release_lctx instead */
 static int
 free_lctx(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct inpcb *inp = lctx->inp;
 	struct tom_data *td = sc->tom_softc;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(lctx->refcount == 0,
 	    ("%s: refcount %d", __func__, lctx->refcount));
 	KASSERT(TAILQ_EMPTY(&lctx->synq),
 	    ("%s: synq not empty.", __func__));
 	KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
 
 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
 	    __func__, lctx->stid, lctx, lctx->inp);
 
 	if (lctx->ce)
 		release_lip(td, lctx->ce);
 	free_stid(sc, lctx);
 	free(lctx, M_CXGBE);
 
 	return (in_pcbrele_wlocked(inp));
 }
 
 static void
 hold_lctx(struct listen_ctx *lctx)
 {
 
 	refcount_acquire(&lctx->refcount);
 }
 
 static inline uint32_t
 listen_hashfn(void *key, u_long mask)
 {
 
 	return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
 }
 
 /*
  * Add a listen_ctx entry to the listen hash table.
  */
 static void
 listen_hash_add(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct tom_data *td = sc->tom_softc;
 	int bucket = listen_hashfn(lctx->inp, td->listen_mask);
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
 	td->lctx_count++;
 	mtx_unlock(&td->lctx_hash_lock);
 }
 
 /*
  * Look for the listening socket's context entry in the hash and return it.
  */
 static struct listen_ctx *
 listen_hash_find(struct adapter *sc, struct inpcb *inp)
 {
 	struct tom_data *td = sc->tom_softc;
 	int bucket = listen_hashfn(inp, td->listen_mask);
 	struct listen_ctx *lctx;
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
 		if (lctx->inp == inp)
 			break;
 	}
 	mtx_unlock(&td->lctx_hash_lock);
 
 	return (lctx);
 }
 
 /*
  * Removes the listen_ctx structure for inp from the hash and returns it.
  */
 static struct listen_ctx *
 listen_hash_del(struct adapter *sc, struct inpcb *inp)
 {
 	struct tom_data *td = sc->tom_softc;
 	int bucket = listen_hashfn(inp, td->listen_mask);
 	struct listen_ctx *lctx, *l;
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
 		if (lctx->inp == inp) {
 			LIST_REMOVE(lctx, link);
 			td->lctx_count--;
 			break;
 		}
 	}
 	mtx_unlock(&td->lctx_hash_lock);
 
 	return (lctx);
 }
 
 /*
  * Releases a hold on the lctx.  Must be called with the listening socket's inp
  * locked.  The inp may be freed by this function and it returns NULL to
  * indicate this.
  */
 static struct inpcb *
 release_lctx(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct inpcb *inp = lctx->inp;
 	int inp_freed = 0;
 
 	INP_WLOCK_ASSERT(inp);
 	if (refcount_release(&lctx->refcount))
 		inp_freed = free_lctx(sc, lctx);
 
 	return (inp_freed ? NULL : inp);
 }
 
 static void
 send_reset_synqe(struct toedev *tod, struct synq_entry *synqe)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct mbuf *m = synqe->syn;
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct port_info *pi = ifp->if_softc;
 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
 	struct wrqe *wr;
 	struct fw_flowc_wr *flowc;
 	struct cpl_abort_req *req;
 	int txqid, rxqid, flowclen;
 	struct sge_wrq *ofld_txq;
 	struct sge_ofld_rxq *ofld_rxq;
 	const int nparams = 6;
 	unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN;
 
 	INP_WLOCK_ASSERT(synqe->lctx->inp);
 
 	CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
 	    __func__, synqe, synqe->flags, synqe->tid,
 	    synqe->flags & TPF_ABORT_SHUTDOWN ?
 	    " (abort already in progress)" : "");
 	if (synqe->flags & TPF_ABORT_SHUTDOWN)
 		return;	/* abort already in progress */
 	synqe->flags |= TPF_ABORT_SHUTDOWN;
 
 	get_qids_from_mbuf(m, &txqid, &rxqid);
 	ofld_txq = &sc->sge.ofld_txq[txqid];
 	ofld_rxq = &sc->sge.ofld_rxq[rxqid];
 
 	/* The wrqe will have two WRs - a flowc followed by an abort_req */
 	flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
 
 	wr = alloc_wrqe(roundup2(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	flowc = wrtod(wr);
 	req = (void *)((caddr_t)flowc + roundup2(flowclen, EQ_ESIZE));
 
 	/* First the flowc ... */
 	memset(flowc, 0, wr->wr_len);
 	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
 	    V_FW_FLOWC_WR_NPARAMS(nparams));
 	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
 	    V_FW_WR_FLOWID(synqe->tid));
 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
 	flowc->mnemval[0].val = htobe32(pfvf);
 	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
 	flowc->mnemval[1].val = htobe32(pi->tx_chan);
 	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
 	flowc->mnemval[2].val = htobe32(pi->tx_chan);
 	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
 	flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
  	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
  	flowc->mnemval[4].val = htobe32(512);
  	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
  	flowc->mnemval[5].val = htobe32(512);
 	synqe->flags |= TPF_FLOWC_WR_SENT;
 
 	/* ... then ABORT request */
 	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
 	req->rsvd0 = 0;	/* don't have a snd_nxt */
 	req->rsvd1 = 1;	/* no data sent yet */
 	req->cmd = CPL_ABORT_SEND_RST;
 
 	t4_l2t_send(sc, wr, e);
 }
 
 static int
 create_server(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct wrqe *wr;
 	struct cpl_pass_open_req *req;
 	struct inpcb *inp = lctx->inp;
 
 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
 	if (wr == NULL) {
 		log(LOG_ERR, "%s: allocation failure", __func__);
 		return (ENOMEM);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
 	req->local_port = inp->inp_lport;
 	req->peer_port = 0;
 	req->local_ip = inp->inp_laddr.s_addr;
 	req->peer_ip = 0;
 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
 
 	t4_wrq_tx(sc, wr);
 	return (0);
 }
 
 static int
 create_server6(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct wrqe *wr;
 	struct cpl_pass_open_req6 *req;
 	struct inpcb *inp = lctx->inp;
 
 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
 	if (wr == NULL) {
 		log(LOG_ERR, "%s: allocation failure", __func__);
 		return (ENOMEM);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
 	req->local_port = inp->inp_lport;
 	req->peer_port = 0;
 	req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
 	req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
 	req->peer_ip_hi = 0;
 	req->peer_ip_lo = 0;
 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
 
 	t4_wrq_tx(sc, wr);
 	return (0);
 }
 
 static int
 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct wrqe *wr;
 	struct cpl_close_listsvr_req *req;
 
 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
 	    lctx->stid));
 	req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
 	req->rsvd = htobe16(0);
 
 	t4_wrq_tx(sc, wr);
 	return (0);
 }
 
 /*
  * Start a listening server by sending a passive open request to HW.
  *
  * Can't take adapter lock here and access to sc->flags, sc->open_device_map,
  * sc->offload_map, if_capenable are all race prone.
  */
 int
 t4_listen_start(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct port_info *pi;
 	struct inpcb *inp = tp->t_inpcb;
 	struct listen_ctx *lctx;
 	int i, rc;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/* Don't start a hardware listener for any loopback address. */
 	if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
 		return (0);
 	if (!(inp->inp_vflag & INP_IPV6) &&
 	    IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
 		return (0);
 #if 0
 	ADAPTER_LOCK(sc);
 	if (IS_BUSY(sc)) {
 		log(LOG_ERR, "%s: listen request ignored, %s is busy",
 		    __func__, device_get_nameunit(sc->dev));
 		goto done;
 	}
 
 	KASSERT(uld_active(sc, ULD_TOM),
 	    ("%s: TOM not initialized", __func__));
 #endif
 
 	if ((sc->open_device_map & sc->offload_map) == 0)
 		goto done;	/* no port that's UP with IFCAP_TOE enabled */
 
 	/*
 	 * Find a running port with IFCAP_TOE (4 or 6).  We'll use the first
 	 * such port's queues to send the passive open and receive the reply to
 	 * it.
 	 *
 	 * XXX: need a way to mark a port in use by offload.  if_cxgbe should
 	 * then reject any attempt to bring down such a port (and maybe reject
 	 * attempts to disable IFCAP_TOE on that port too?).
 	 */
 	for_each_port(sc, i) {
 		if (isset(&sc->open_device_map, i) &&
 		    sc->port[i]->ifp->if_capenable & IFCAP_TOE)
 				break;
 	}
 	KASSERT(i < sc->params.nports,
 	    ("%s: no running port with TOE capability enabled.", __func__));
 	pi = sc->port[i];
 
 	if (listen_hash_find(sc, inp) != NULL)
 		goto done;	/* already setup */
 
 	lctx = alloc_lctx(sc, inp, pi);
 	if (lctx == NULL) {
 		log(LOG_ERR,
 		    "%s: listen request ignored, %s couldn't allocate lctx\n",
 		    __func__, device_get_nameunit(sc->dev));
 		goto done;
 	}
 	listen_hash_add(sc, lctx);
 
 	CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
 	    __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
 	    inp->inp_vflag);
 
 	if (inp->inp_vflag & INP_IPV6)
 		rc = create_server6(sc, lctx);
 	else
 		rc = create_server(sc, lctx);
 	if (rc != 0) {
 		log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
 		    __func__, device_get_nameunit(sc->dev), rc);
 		(void) listen_hash_del(sc, inp);
 		inp = release_lctx(sc, lctx);
 		/* can't be freed, host stack has a reference */
 		KASSERT(inp != NULL, ("%s: inp freed", __func__));
 		goto done;
 	}
 	lctx->flags |= LCTX_RPL_PENDING;
 done:
 #if 0
 	ADAPTER_UNLOCK(sc);
 #endif
 	return (0);
 }
 
 int
 t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
 {
 	struct listen_ctx *lctx;
 	struct adapter *sc = tod->tod_softc;
 	struct inpcb *inp = tp->t_inpcb;
 	struct synq_entry *synqe;
 
 	INP_WLOCK_ASSERT(inp);
 
 	lctx = listen_hash_del(sc, inp);
 	if (lctx == NULL)
 		return (ENOENT);	/* no hardware listener for this inp */
 
 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
 	    lctx, lctx->flags);
 
 	/*
 	 * If the reply to the PASS_OPEN is still pending we'll wait for it to
 	 * arrive and clean up when it does.
 	 */
 	if (lctx->flags & LCTX_RPL_PENDING) {
 		KASSERT(TAILQ_EMPTY(&lctx->synq),
 		    ("%s: synq not empty.", __func__));
 		return (EINPROGRESS);
 	}
 
 	/*
 	 * The host stack will abort all the connections on the listening
 	 * socket's so_comp.  It doesn't know about the connections on the synq
 	 * so we need to take care of those.
 	 */
 	TAILQ_FOREACH(synqe, &lctx->synq, link) {
 		if (synqe->flags & TPF_SYNQE_HAS_L2TE)
 			send_reset_synqe(tod, synqe);
 	}
 
 	destroy_server(sc, lctx);
 	return (0);
 }
 
 static inline void
 hold_synqe(struct synq_entry *synqe)
 {
 
 	refcount_acquire(&synqe->refcnt);
 }
 
 static inline void
 release_synqe(struct synq_entry *synqe)
 {
 
 	if (refcount_release(&synqe->refcnt)) {
 		int needfree = synqe->flags & TPF_SYNQE_NEEDFREE;
 
 		m_freem(synqe->syn);
 		if (needfree)
 			free(synqe, M_CXGBE);
 	}
 }
 
 void
 t4_syncache_added(struct toedev *tod __unused, void *arg)
 {
 	struct synq_entry *synqe = arg;
 
 	hold_synqe(synqe);
 }
 
 void
 t4_syncache_removed(struct toedev *tod __unused, void *arg)
 {
 	struct synq_entry *synqe = arg;
 
 	release_synqe(synqe);
 }
 
 /* XXX */
 extern void tcp_dooptions(struct tcpopt *, u_char *, int, int);
 
 int
 t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct synq_entry *synqe = arg;
 	struct wrqe *wr;
 	struct l2t_entry *e;
 	struct tcpopt to;
 	struct ip *ip = mtod(m, struct ip *);
 	struct tcphdr *th;
 
 	wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr);
 	if (wr == NULL) {
 		m_freem(m);
 		return (EALREADY);
 	}
 
 	if (ip->ip_v == IPVERSION)
 		th = (void *)(ip + 1);
 	else
 		th = (void *)((struct ip6_hdr *)ip + 1);
 	bzero(&to, sizeof(to));
 	tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
 	    TO_SYN);
 
 	/* save these for later */
 	synqe->iss = be32toh(th->th_seq);
 	synqe->ts = to.to_tsval;
 
 	if (is_t5(sc)) {
 		struct cpl_t5_pass_accept_rpl *rpl5 = wrtod(wr);
 
 		rpl5->iss = th->th_seq;
 	}
 
 	e = &sc->l2t->l2tab[synqe->l2e_idx];
 	t4_l2t_send(sc, wr, e);
 
 	m_freem(m);	/* don't need this any more */
 	return (0);
 }
 
 static int
 do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
 	int stid = GET_TID(cpl);
 	unsigned int status = cpl->status;
 	struct listen_ctx *lctx = lookup_stid(sc, stid);
 	struct inpcb *inp = lctx->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PASS_OPEN_RPL,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 
 	INP_WLOCK(inp);
 
 	CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
 	    __func__, stid, status, lctx->flags);
 
 	lctx->flags &= ~LCTX_RPL_PENDING;
 
 	if (status != CPL_ERR_NONE)
 		log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
 
 #ifdef INVARIANTS
 	/*
 	 * If the inp has been dropped (listening socket closed) then
 	 * listen_stop must have run and taken the inp out of the hash.
 	 */
 	if (inp->inp_flags & INP_DROPPED) {
 		KASSERT(listen_hash_del(sc, inp) == NULL,
 		    ("%s: inp %p still in listen hash", __func__, inp));
 	}
 #endif
 
 	if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) {
 		if (release_lctx(sc, lctx) != NULL)
 			INP_WUNLOCK(inp);
 		return (status);
 	}
 
 	/*
 	 * Listening socket stopped listening earlier and now the chip tells us
 	 * it has started the hardware listener.  Stop it; the lctx will be
 	 * released in do_close_server_rpl.
 	 */
 	if (inp->inp_flags & INP_DROPPED) {
 		destroy_server(sc, lctx);
 		INP_WUNLOCK(inp);
 		return (status);
 	}
 
 	/*
 	 * Failed to start hardware listener.  Take inp out of the hash and
 	 * release our reference on it.  An error message has been logged
 	 * already.
 	 */
 	if (status != CPL_ERR_NONE) {
 		listen_hash_del(sc, inp);
 		if (release_lctx(sc, lctx) != NULL)
 			INP_WUNLOCK(inp);
 		return (status);
 	}
 
 	/* hardware listener open for business */
 
 	INP_WUNLOCK(inp);
 	return (status);
 }
 
 static int
 do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
 	int stid = GET_TID(cpl);
 	unsigned int status = cpl->status;
 	struct listen_ctx *lctx = lookup_stid(sc, stid);
 	struct inpcb *inp = lctx->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 
 	CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
 
 	if (status != CPL_ERR_NONE) {
 		log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
 		    __func__, status, stid);
 		return (status);
 	}
 
 	INP_WLOCK(inp);
 	inp = release_lctx(sc, lctx);
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 
 	return (status);
 }
 
 static void
 done_with_synqe(struct adapter *sc, struct synq_entry *synqe)
 {
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 	struct port_info *pi = synqe->syn->m_pkthdr.rcvif->if_softc;
 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
 
 	INP_WLOCK_ASSERT(inp);
 
 	TAILQ_REMOVE(&lctx->synq, synqe, link);
 	inp = release_lctx(sc, lctx);
 	if (inp)
 		INP_WUNLOCK(inp);
 	remove_tid(sc, synqe->tid);
 	release_tid(sc, synqe->tid, &sc->sge.ctrlq[pi->port_id]);
 	t4_l2t_release(e);
 	release_synqe(synqe);	/* removed from synq list */
 }
 
 int
 do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 	int txqid;
 	struct sge_wrq *ofld_txq;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_REQ_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
 
 	if (negative_advice(cpl->status))
 		return (0);	/* Ignore negative advice */
 
 	INP_WLOCK(inp);
 
 	get_qids_from_mbuf(synqe->syn, &txqid, NULL);
 	ofld_txq = &sc->sge.ofld_txq[txqid];
 
 	/*
 	 * If we'd initiated an abort earlier the reply to it is responsible for
 	 * cleaning up resources.  Otherwise we tear everything down right here
 	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
 	 */
 	if (synqe->flags & TPF_ABORT_SHUTDOWN) {
 		INP_WUNLOCK(inp);
 		goto done;
 	}
 
 	done_with_synqe(sc, synqe);
 	/* inp lock released by done_with_synqe */
 done:
 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
 	return (0);
 }
 
 int
 do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_RPL_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
 
 	INP_WLOCK(inp);
 	KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
 	    ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
 	    __func__, synqe, synqe->flags));
 
 	done_with_synqe(sc, synqe);
 	/* inp lock released by done_with_synqe */
 
 	return (0);
 }
 
 void
 t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct synq_entry *synqe = arg;
 #ifdef INVARIANTS
 	struct inpcb *inp = sotoinpcb(so);
 #endif
 	struct cpl_pass_establish *cpl = mtod(synqe->syn, void *);
 	struct toepcb *toep = *(struct toepcb **)(cpl + 1);
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(synqe->flags & TPF_SYNQE,
 	    ("%s: %p not a synq_entry?", __func__, arg));
 
 	offload_socket(so, toep);
 	make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
 	toep->flags |= TPF_CPL_PENDING;
 	update_tid(sc, synqe->tid, toep);
 	synqe->flags |= TPF_SYNQE_EXPANDED;
 }
 
 static inline void
 save_qids_in_mbuf(struct mbuf *m, struct port_info *pi)
 {
 	uint32_t txqid, rxqid;
 
 	txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq;
 	rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq;
 
 	m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff);
 }
 
 static inline void
 get_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid)
 {
 
 	if (txqid)
 		*txqid = m->m_pkthdr.flowid >> 16;
 	if (rxqid)
 		*rxqid = m->m_pkthdr.flowid & 0xffff;
 }
 
 /*
  * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to
  * store some state temporarily.
  */
 static struct synq_entry *
 mbuf_to_synqe(struct mbuf *m)
 {
 	int len = roundup2(sizeof (struct synq_entry), 8);
 	int tspace = M_TRAILINGSPACE(m);
 	struct synq_entry *synqe = NULL;
 
 	if (tspace < len) {
 		synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT);
 		if (synqe == NULL)
 			return (NULL);
 		synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE;
 	} else {
 		synqe = (void *)(m->m_data + m->m_len + tspace - len);
 		synqe->flags = TPF_SYNQE;
 	}
 
 	return (synqe);
 }
 
 static void
 t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
 {
 	bzero(to, sizeof(*to));
 
 	if (t4opt->mss) {
 		to->to_flags |= TOF_MSS;
 		to->to_mss = be16toh(t4opt->mss);
 	}
 
 	if (t4opt->wsf) {
 		to->to_flags |= TOF_SCALE;
 		to->to_wscale = t4opt->wsf;
 	}
 
 	if (t4opt->tstamp)
 		to->to_flags |= TOF_TS;
 
 	if (t4opt->sack)
 		to->to_flags |= TOF_SACKPERM;
 }
 
 /*
  * Options2 for passive open.
  */
 static uint32_t
 calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid,
     const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode)
 {
 	struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid];
 	uint32_t opt2;
 
 	opt2 = V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]) |
 	    F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id);
 
 	if (V_tcp_do_rfc1323) {
 		if (tcpopt->tstamp)
 			opt2 |= F_TSTAMPS_EN;
 		if (tcpopt->sack)
 			opt2 |= F_SACK_EN;
 		if (tcpopt->wsf <= 14)
 			opt2 |= F_WND_SCALE_EN;
 	}
 
 	if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR))
 		opt2 |= F_CCTRL_ECN;
 
 	/* RX_COALESCE is always a valid value (0 or M_RX_COALESCE). */
 	if (is_t4(sc))
 		opt2 |= F_RX_COALESCE_VALID;
 	else {
 		opt2 |= F_T5_OPT_2_VALID;
 		opt2 |= F_CONG_CNTRL_VALID; /* OPT_2_ISS really, for T5 */
 	}
 	if (sc->tt.rx_coalesce)
 		opt2 |= V_RX_COALESCE(M_RX_COALESCE);
 
 #ifdef USE_DDP_RX_FLOW_CONTROL
 	if (ulp_mode == ULP_MODE_TCPDDP)
 		opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
 #endif
 
 	return htobe32(opt2);
 }
 
 static void
 pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc,
     struct tcphdr *th)
 {
 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
 	const struct ether_header *eh;
 	unsigned int hlen = be32toh(cpl->hdr_len);
 	uintptr_t l3hdr;
 	const struct tcphdr *tcp;
 
 	eh = (const void *)(cpl + 1);
 	l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
 	tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
 
 	if (inc) {
 		bzero(inc, sizeof(*inc));
 		inc->inc_fport = tcp->th_sport;
 		inc->inc_lport = tcp->th_dport;
 		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
 			const struct ip *ip = (const void *)l3hdr;
 
 			inc->inc_faddr = ip->ip_src;
 			inc->inc_laddr = ip->ip_dst;
 		} else {
 			const struct ip6_hdr *ip6 = (const void *)l3hdr;
 
 			inc->inc_flags |= INC_ISIPV6;
 			inc->inc6_faddr = ip6->ip6_src;
 			inc->inc6_laddr = ip6->ip6_dst;
 		}
 	}
 
 	if (th) {
 		bcopy(tcp, th, sizeof(*th));
 		tcp_fields_to_host(th);		/* just like tcp_input */
 	}
 }
 
-static int
-ifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6)
-{
-	struct ifaddr *ifa;
-	struct sockaddr_in6 *sin6;
-	int found = 0;
-	struct in6_addr in6 = *ip6;
-
-	/* Just as in ip6_input */
-	if (in6_clearscope(&in6) || in6_clearscope(&in6))
-		return (0);
-	in6_setscope(&in6, ifp, NULL);
-
-	if_addr_rlock(ifp);
-	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-		sin6 = (void *)ifa->ifa_addr;
-		if (sin6->sin6_family != AF_INET6)
-			continue;
-
-		if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) {
-			found = 1;
-			break;
-		}
-	}
-	if_addr_runlock(ifp);
-
-	return (found);
-}
-
 static struct l2t_entry *
 get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp,
     struct in_conninfo *inc)
 {
 	struct rtentry *rt;
 	struct l2t_entry *e;
 	struct sockaddr_in6 sin6;
 	struct sockaddr *dst = (void *)&sin6;
  
 	if (inc->inc_flags & INC_ISIPV6) {
 		dst->sa_len = sizeof(struct sockaddr_in6);
 		dst->sa_family = AF_INET6;
 		((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
 
 		if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
 			/* no need for route lookup */
 			e = t4_l2t_get(pi, ifp, dst);
 			return (e);
 		}
 	} else {
 		dst->sa_len = sizeof(struct sockaddr_in);
 		dst->sa_family = AF_INET;
 		((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
 	}
 
 	rt = rtalloc1(dst, 0, 0);
 	if (rt == NULL)
 		return (NULL);
 	else {
 		struct sockaddr *nexthop;
 
 		RT_UNLOCK(rt);
 		if (rt->rt_ifp != ifp)
 			e = NULL;
 		else {
 			if (rt->rt_flags & RTF_GATEWAY)
 				nexthop = rt->rt_gateway;
 			else
 				nexthop = dst;
 			e = t4_l2t_get(pi, ifp, nexthop);
 		}
 		RTFREE(rt);
 	}
 
 	return (e);
 }
 
-static int
-ifnet_has_ip(struct ifnet *ifp, struct in_addr in)
-{
-	struct ifaddr *ifa;
-	struct sockaddr_in *sin;
-	int found = 0;
-
-	if_addr_rlock(ifp);
-	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
-		sin = (void *)ifa->ifa_addr;
-		if (sin->sin_family != AF_INET)
-			continue;
-
-		if (sin->sin_addr.s_addr == in.s_addr) {
-			found = 1;
-			break;
-		}
-	}
-	if_addr_runlock(ifp);
-
-	return (found);
-}
-
 #define REJECT_PASS_ACCEPT()	do { \
 	reject_reason = __LINE__; \
 	goto reject; \
 } while (0)
 
 /*
  * The context associated with a tid entry via insert_tid could be a synq_entry
  * or a toepcb.  The only way CPL handlers can tell is via a bit in these flags.
  */
 CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
 
 /*
  * Incoming SYN on a listening socket.
  *
  * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
  * etc.
  */
 static int
 do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	struct toedev *tod;
 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
 	struct cpl_pass_accept_rpl *rpl;
 	struct wrqe *wr;
 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
 	unsigned int tid = GET_TID(cpl);
 	struct listen_ctx *lctx = lookup_stid(sc, stid);
 	struct inpcb *inp;
 	struct socket *so;
 	struct in_conninfo inc;
 	struct tcphdr th;
 	struct tcpopt to;
 	struct port_info *pi;
 	struct ifnet *hw_ifp, *ifp;
 	struct l2t_entry *e = NULL;
 	int rscale, mtu_idx, rx_credits, rxqid, ulp_mode;
 	struct synq_entry *synqe = NULL;
 	int reject_reason;
 	uint16_t vid;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 
 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
 	    lctx);
 
 	pass_accept_req_to_protohdrs(m, &inc, &th);
 	t4opt_to_tcpopt(&cpl->tcpopt, &to);
 
 	pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
 	hw_ifp = pi->ifp;	/* the cxgbeX ifnet */
 	m->m_pkthdr.rcvif = hw_ifp;
 	tod = TOEDEV(hw_ifp);
 
 	/*
 	 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
 	 * involved.  Don't offload if the SYN had a VLAN tag and the vid
 	 * doesn't match anything on this interface.
 	 *
 	 * XXX: lagg support, lagg + vlan support.
 	 */
 	vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
 	if (vid != 0xfff) {
 		ifp = VLAN_DEVAT(hw_ifp, vid);
 		if (ifp == NULL)
 			REJECT_PASS_ACCEPT();
 	} else
 		ifp = hw_ifp;
 
 	/*
 	 * Don't offload if the peer requested a TCP option that's not known to
 	 * the silicon.
 	 */
 	if (cpl->tcpopt.unknown)
 		REJECT_PASS_ACCEPT();
 
 	if (inc.inc_flags & INC_ISIPV6) {
 
 		/* Don't offload if the ifcap isn't enabled */
 		if ((ifp->if_capenable & IFCAP_TOE6) == 0)
 			REJECT_PASS_ACCEPT();
 
 		/*
 		 * SYN must be directed to an IP6 address on this ifnet.  This
 		 * is more restrictive than in6_localip.
 		 */
-		if (!ifnet_has_ip6(ifp, &inc.inc6_laddr))
+		if (!in6_ifhasaddr(ifp, &inc.inc6_laddr))
 			REJECT_PASS_ACCEPT();
 	} else {
 
 		/* Don't offload if the ifcap isn't enabled */
 		if ((ifp->if_capenable & IFCAP_TOE4) == 0)
 			REJECT_PASS_ACCEPT();
 
 		/*
 		 * SYN must be directed to an IP address on this ifnet.  This
 		 * is more restrictive than in_localip.
 		 */
-		if (!ifnet_has_ip(ifp, inc.inc_laddr))
+		if (!in_ifhasaddr(ifp, inc.inc_laddr))
 			REJECT_PASS_ACCEPT();
 	}
 
 	e = get_l2te_for_nexthop(pi, ifp, &inc);
 	if (e == NULL)
 		REJECT_PASS_ACCEPT();
 
 	synqe = mbuf_to_synqe(m);
 	if (synqe == NULL)
 		REJECT_PASS_ACCEPT();
 
 	wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
 	    sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[pi->port_id]);
 	if (wr == NULL)
 		REJECT_PASS_ACCEPT();
 	rpl = wrtod(wr);
 
 	INP_INFO_WLOCK(&V_tcbinfo);	/* for 4-tuple check */
 
 	/* Don't offload if the 4-tuple is already in use */
 	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		free(wr, M_CXGBE);
 		REJECT_PASS_ACCEPT();
 	}
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 
 	inp = lctx->inp;		/* listening socket, not owned by TOE */
 	INP_WLOCK(inp);
 
 	/* Don't offload if the listening socket has closed */
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 		/*
 		 * The listening socket has closed.  The reply from the TOE to
 		 * our CPL_CLOSE_LISTSRV_REQ will ultimately release all
 		 * resources tied to this listen context.
 		 */
 		INP_WUNLOCK(inp);
 		free(wr, M_CXGBE);
 		REJECT_PASS_ACCEPT();
 	}
 	so = inp->inp_socket;
 
 	mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss));
 	rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
 	SOCKBUF_LOCK(&so->so_rcv);
 	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
 	rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	save_qids_in_mbuf(m, pi);
 	get_qids_from_mbuf(m, NULL, &rxqid);
 
 	if (is_t4(sc))
 		INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
 	else {
 		struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
 
 		INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
 	}
 	if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) {
 		ulp_mode = ULP_MODE_TCPDDP;
 		synqe->flags |= TPF_SYNQE_TCPDDP;
 	} else
 		ulp_mode = ULP_MODE_NONE;
 	rpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, rx_credits, ulp_mode);
 	rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode);
 
 	synqe->tid = tid;
 	synqe->lctx = lctx;
 	synqe->syn = m;
 	m = NULL;
 	refcount_init(&synqe->refcnt, 1);	/* 1 means extra hold */
 	synqe->l2e_idx = e->idx;
 	synqe->rcv_bufsize = rx_credits;
 	atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr);
 
 	insert_tid(sc, tid, synqe);
 	TAILQ_INSERT_TAIL(&lctx->synq, synqe, link);
 	hold_synqe(synqe);	/* hold for the duration it's in the synq */
 	hold_lctx(lctx);	/* A synqe on the list has a ref on its lctx */
 
 	/*
 	 * If all goes well t4_syncache_respond will get called during
 	 * syncache_add.  Note that syncache_add releases the pcb lock.
 	 */
 	toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
 	INP_UNLOCK_ASSERT(inp);	/* ok to assert, we have a ref on the inp */
 
 	/*
 	 * If we replied during syncache_add (synqe->wr has been consumed),
 	 * good.  Otherwise, set it to 0 so that further syncache_respond
 	 * attempts by the kernel will be ignored.
 	 */
 	if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) {
 
 		/*
 		 * syncache may or may not have a hold on the synqe, which may
 		 * or may not be stashed in the original SYN mbuf passed to us.
 		 * Just copy it over instead of dealing with all possibilities.
 		 */
 		m = m_dup(synqe->syn, M_NOWAIT);
 		if (m)
 			m->m_pkthdr.rcvif = hw_ifp;
 
 		remove_tid(sc, synqe->tid);
 		free(wr, M_CXGBE);
 
 		/* Yank the synqe out of the lctx synq. */
 		INP_WLOCK(inp);
 		TAILQ_REMOVE(&lctx->synq, synqe, link);
 		release_synqe(synqe);	/* removed from synq list */
 		inp = release_lctx(sc, lctx);
 		if (inp)
 			INP_WUNLOCK(inp);
 
 		release_synqe(synqe);	/* extra hold */
 		REJECT_PASS_ACCEPT();
 	}
 
 	CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK",
 	    __func__, stid, tid, lctx, synqe);
 
 	INP_WLOCK(inp);
 	synqe->flags |= TPF_SYNQE_HAS_L2TE;
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 		/*
 		 * Listening socket closed but tod_listen_stop did not abort
 		 * this tid because there was no L2T entry for the tid at that
 		 * time.  Abort it now.  The reply to the abort will clean up.
 		 */
 		CTR6(KTR_CXGBE,
 		    "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT",
 		    __func__, stid, tid, lctx, synqe, synqe->flags);
 		if (!(synqe->flags & TPF_SYNQE_EXPANDED))
 			send_reset_synqe(tod, synqe);
 		INP_WUNLOCK(inp);
 
 		release_synqe(synqe);	/* extra hold */
 		return (__LINE__);
 	}
 	INP_WUNLOCK(inp);
 
 	release_synqe(synqe);	/* extra hold */
 	return (0);
 reject:
 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
 	    reject_reason);
 
 	if (e)
 		t4_l2t_release(e);
 	release_tid(sc, tid, lctx->ctrlq);
 
 	if (__predict_true(m != NULL)) {
 		m_adj(m, sizeof(*cpl));
 		m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 		m->m_pkthdr.csum_data = 0xffff;
 		hw_ifp->if_input(hw_ifp, m);
 	}
 
 	return (reject_reason);
 }
 
 static void
 synqe_to_protohdrs(struct synq_entry *synqe,
     const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
     struct tcphdr *th, struct tcpopt *to)
 {
 	uint16_t tcp_opt = be16toh(cpl->tcp_opt);
 
 	/* start off with the original SYN */
 	pass_accept_req_to_protohdrs(synqe->syn, inc, th);
 
 	/* modify parts to make it look like the ACK to our SYN|ACK */
 	th->th_flags = TH_ACK;
 	th->th_ack = synqe->iss + 1;
 	th->th_seq = be32toh(cpl->rcv_isn);
 	bzero(to, sizeof(*to));
 	if (G_TCPOPT_TSTAMP(tcp_opt)) {
 		to->to_flags |= TOF_TS;
 		to->to_tsecr = synqe->ts;
 	}
 }
 
 static int
 do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	struct port_info *pi;
 	struct ifnet *ifp;
 	const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
 #if defined(KTR) || defined(INVARIANTS)
 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
 #endif
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 	struct socket *so;
 	struct tcphdr th;
 	struct tcpopt to;
 	struct in_conninfo inc;
 	struct toepcb *toep;
 	u_int txqid, rxqid;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PASS_ESTABLISH,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 	KASSERT(synqe->flags & TPF_SYNQE,
 	    ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
 
 	INP_INFO_WLOCK(&V_tcbinfo);	/* for syncache_expand */
 	INP_WLOCK(inp);
 
 	CTR6(KTR_CXGBE,
 	    "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
 	    __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
 
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 
 		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
 			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
 			    ("%s: listen socket closed but tid %u not aborted.",
 			    __func__, tid));
 		}
 
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		return (0);
 	}
 
 	ifp = synqe->syn->m_pkthdr.rcvif;
 	pi = ifp->if_softc;
 	KASSERT(pi->adapter == sc,
 	    ("%s: pi %p, sc %p mismatch", __func__, pi, sc));
 
 	get_qids_from_mbuf(synqe->syn, &txqid, &rxqid);
 	KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
 	    ("%s: CPL arrived on unexpected rxq.  %d %d", __func__, rxqid,
 	    (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
 
 	toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT);
 	if (toep == NULL) {
 reset:
 		/*
 		 * The reply to this abort will perform final cleanup.  There is
 		 * no need to check for HAS_L2TE here.  We can be here only if
 		 * we responded to the PASS_ACCEPT_REQ, and our response had the
 		 * L2T idx.
 		 */
 		send_reset_synqe(TOEDEV(ifp), synqe);
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		return (0);
 	}
 	toep->tid = tid;
 	toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx];
 	if (synqe->flags & TPF_SYNQE_TCPDDP)
 		set_tcpddp_ulp_mode(toep);
 	else
 		toep->ulp_mode = ULP_MODE_NONE;
 	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
 	toep->rx_credits = synqe->rcv_bufsize;
 
 	so = inp->inp_socket;
 	KASSERT(so != NULL, ("%s: socket is NULL", __func__));
 
 	/* Come up with something that syncache_expand should be ok with. */
 	synqe_to_protohdrs(synqe, cpl, &inc, &th, &to);
 
 	/*
 	 * No more need for anything in the mbuf that carried the
 	 * CPL_PASS_ACCEPT_REQ.  Drop the CPL_PASS_ESTABLISH and toep pointer
 	 * there.  XXX: bad form but I don't want to increase the size of synqe.
 	 */
 	m = synqe->syn;
 	KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len,
 	    ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len));
 	bcopy(cpl, mtod(m, void *), sizeof(*cpl));
 	*(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep;
 
 	if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) {
 		free_toepcb(toep);
 		goto reset;
 	}
 
 	/*
 	 * This is for the unlikely case where the syncache entry that we added
 	 * has been evicted from the syncache, but the syncache_expand above
 	 * works because of syncookies.
 	 *
 	 * XXX: we've held the tcbinfo lock throughout so there's no risk of
 	 * anyone accept'ing a connection before we've installed our hooks, but
 	 * this somewhat defeats the purpose of having a tod_offload_socket :-(
 	 */
 	if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
 		struct inpcb *new_inp = sotoinpcb(so);
 
 		INP_WLOCK(new_inp);
 		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
 		t4_offload_socket(TOEDEV(ifp), synqe, so);
 		INP_WUNLOCK(new_inp);
 	}
 
 	/* Done with the synqe */
 	TAILQ_REMOVE(&lctx->synq, synqe, link);
 	inp = release_lctx(sc, lctx);
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	release_synqe(synqe);
 
 	return (0);
 }
 
 void
 t4_init_listen_cpl_handlers(struct adapter *sc)
 {
 
 	t4_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl);
 	t4_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
 	t4_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
 	t4_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish);
 }
 #endif
Index: user/ngie/more-tests/sys/dev/usb/video/udl.c
===================================================================
--- user/ngie/more-tests/sys/dev/usb/video/udl.c	(revision 281675)
+++ user/ngie/more-tests/sys/dev/usb/video/udl.c	(revision 281676)
@@ -1,1097 +1,1154 @@
 /*	$OpenBSD: udl.c,v 1.81 2014/12/09 07:05:06 doug Exp $ */
 /*	$FreeBSD$ */
 
 /*-
  * Copyright (c) 2015 Hans Petter Selasky <hselasky@freebsd.org>
  * Copyright (c) 2009 Marcus Glocker <mglocker@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 /*
  * Driver for the "DisplayLink DL-120 / DL-160" graphic chips based on
  * the reversed engineered specifications of Florian Echtler
  * <floe@butterbrot.org>:
  *
  * 	http://floe.butterbrot.org/displaylink/doku.php
  */
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/consio.h>
 #include <sys/fbio.h>
 
 #include <dev/fb/fbreg.h>
 #include <dev/syscons/syscons.h>
 
 #include <dev/videomode/videomode.h>
 #include <dev/videomode/edidvar.h>
 
 #include <dev/usb/usb.h>
 #include <dev/usb/usbdi.h>
 #include <dev/usb/usbdi_util.h>
 #include "usbdevs.h"
 
 #include <dev/usb/video/udl.h>
 
 #include "fb_if.h"
 
 #undef DPRINTF
 #undef DPRINTFN
 #define	USB_DEBUG_VAR udl_debug
 #include <dev/usb/usb_debug.h>
 
 static	SYSCTL_NODE(_hw_usb, OID_AUTO, udl, CTLFLAG_RW, 0, "USB UDL");
 
 #ifdef USB_DEBUG
 static int udl_debug = 0;
 
 SYSCTL_INT(_hw_usb_udl, OID_AUTO, debug, CTLFLAG_RWTUN,
     &udl_debug, 0, "Debug level");
 #endif
 
 #define	UDL_FPS_MAX	60
 #define	UDL_FPS_MIN	1
 
 static int udl_fps = 25;
 SYSCTL_INT(_hw_usb_udl, OID_AUTO, fps, CTLFLAG_RWTUN,
     &udl_fps, 0, "Frames Per Second, 1-60");
 
+static struct mtx udl_buffer_mtx;
+static struct udl_buffer_head udl_buffer_head;
+
+MALLOC_DEFINE(M_USB_DL, "USB", "USB DisplayLink");
+
 /*
  * Prototypes.
  */
 static usb_callback_t udl_bulk_write_callback;
 
 static device_probe_t udl_probe;
 static device_attach_t udl_attach;
 static device_detach_t udl_detach;
 static fb_getinfo_t udl_fb_getinfo;
 static fb_setblankmode_t udl_fb_setblankmode;
 
 static void udl_select_chip(struct udl_softc *, struct usb_attach_arg *);
 static int udl_init_chip(struct udl_softc *);
 static void udl_select_mode(struct udl_softc *);
 static int udl_init_resolution(struct udl_softc *);
 static void udl_fbmem_alloc(struct udl_softc *);
 static int udl_cmd_write_buf_le16(struct udl_softc *, const uint8_t *, uint32_t, uint8_t, int);
 static int udl_cmd_buf_copy_le16(struct udl_softc *, uint32_t, uint32_t, uint8_t, int);
 static void udl_cmd_insert_int_1(struct udl_cmd_buf *, uint8_t);
 static void udl_cmd_insert_int_3(struct udl_cmd_buf *, uint32_t);
 static void udl_cmd_insert_buf_le16(struct udl_cmd_buf *, const uint8_t *, uint32_t);
 static void udl_cmd_write_reg_1(struct udl_cmd_buf *, uint8_t, uint8_t);
 static void udl_cmd_write_reg_3(struct udl_cmd_buf *, uint8_t, uint32_t);
 static int udl_power_save(struct udl_softc *, int, int);
 
 static const struct usb_config udl_config[UDL_N_TRANSFER] = {
 	[UDL_BULK_WRITE_0] = {
 		.type = UE_BULK,
 		.endpoint = UE_ADDR_ANY,
 		.direction = UE_DIR_TX,
 		.flags = {.pipe_bof = 1,.force_short_xfer = 1,.ext_buffer = 1,},
 		.bufsize = UDL_CMD_MAX_DATA_SIZE * UDL_CMD_MAX_FRAMES,
 		.callback = &udl_bulk_write_callback,
 		.frames = UDL_CMD_MAX_FRAMES,
 		.timeout = 5000,	/* 5 seconds */
 	},
 	[UDL_BULK_WRITE_1] = {
 		.type = UE_BULK,
 		.endpoint = UE_ADDR_ANY,
 		.direction = UE_DIR_TX,
 		.flags = {.pipe_bof = 1,.force_short_xfer = 1,.ext_buffer = 1,},
 		.bufsize = UDL_CMD_MAX_DATA_SIZE * UDL_CMD_MAX_FRAMES,
 		.callback = &udl_bulk_write_callback,
 		.frames = UDL_CMD_MAX_FRAMES,
 		.timeout = 5000,	/* 5 seconds */
 	},
 };
 
 /*
  * Driver glue.
  */
 static devclass_t udl_devclass;
 
 static device_method_t udl_methods[] = {
 	DEVMETHOD(device_probe, udl_probe),
 	DEVMETHOD(device_attach, udl_attach),
 	DEVMETHOD(device_detach, udl_detach),
 	DEVMETHOD(fb_getinfo, udl_fb_getinfo),
 	DEVMETHOD_END
 };
 
 static driver_t udl_driver = {
 	.name = "udl",
 	.methods = udl_methods,
 	.size = sizeof(struct udl_softc),
 };
 
 DRIVER_MODULE(udl, uhub, udl_driver, udl_devclass, NULL, NULL);
 MODULE_DEPEND(udl, usb, 1, 1, 1);
 MODULE_DEPEND(udl, fbd, 1, 1, 1);
 MODULE_DEPEND(udl, videomode, 1, 1, 1);
 MODULE_VERSION(udl, 1);
 
 /*
  * Matching devices.
  */
 static const STRUCT_USB_HOST_ID udl_devs[] = {
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD4300U, DL120)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LCD8000U, DL120)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_GUC2020, DL160)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LD220, DL165)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_VCUD60, DL160)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_DLDVI, DL160)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_VGA10, DL120)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_WSDVI, DLUNK)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_EC008, DL160)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_HPDOCK, DL160)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_NL571, DL160)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_M01061, DL195)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_NBDOCK, DL165)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_SWDVI, DLUNK)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_UM7X0, DL120)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_CONV, DL160)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_PLUGABLE, DL160)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LUM70, DL125)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_POLARIS2, DLUNK)},
 	{USB_VPI(USB_VENDOR_DISPLAYLINK, USB_PRODUCT_DISPLAYLINK_LT1421, DLUNK)}
 };
 
+static void
+udl_buffer_init(void *arg)
+{
+	mtx_init(&udl_buffer_mtx, "USB", "UDL", MTX_DEF);
+	TAILQ_INIT(&udl_buffer_head);
+}
+SYSINIT(udl_buffer_init, SI_SUB_LOCK, SI_ORDER_FIRST, udl_buffer_init, NULL);
+
+CTASSERT(sizeof(struct udl_buffer) < PAGE_SIZE);
+
+static void *
+udl_buffer_alloc(uint32_t size)
+{
+	struct udl_buffer *buf;
+	mtx_lock(&udl_buffer_mtx);
+	TAILQ_FOREACH(buf, &udl_buffer_head, entry) {
+		if (buf->size == size) {
+			TAILQ_REMOVE(&udl_buffer_head, buf, entry);
+			break;
+		}
+	}
+	mtx_unlock(&udl_buffer_mtx);
+	if (buf != NULL) {
+		/* wipe and recycle buffer */
+		memset(buf, 0, size);
+		return (buf);
+	}
+	/* allocate new buffer */
+	return (malloc(size, M_USB_DL, M_WAITOK | M_ZERO));
+}
+
+static void
+udl_buffer_free(void *_buf, uint32_t size)
+{
+	struct udl_buffer *buf;
+
+	buf = (struct udl_buffer *)_buf;
+	if (buf == NULL)
+		return;
+
+	/*
+	 * Memory mapped buffers should never be freed.
+	 * Put display buffer into a recycle list.
+	 */
+	mtx_lock(&udl_buffer_mtx);
+	buf->size = size;
+	TAILQ_INSERT_TAIL(&udl_buffer_head, buf, entry);
+	mtx_unlock(&udl_buffer_mtx);
+}
+
 static uint32_t
 udl_get_fb_size(struct udl_softc *sc)
 {
 	unsigned i = sc->sc_cur_mode;
 
 	return ((uint32_t)udl_modes[i].hdisplay *
 	    (uint32_t)udl_modes[i].vdisplay * 2);
 }
 
 static uint32_t
 udl_get_fb_width(struct udl_softc *sc)
 {
 	unsigned i = sc->sc_cur_mode;
 
 	return (udl_modes[i].hdisplay);
 }
 
 static uint32_t
 udl_get_fb_height(struct udl_softc *sc)
 {
 	unsigned i = sc->sc_cur_mode;
 
 	return (udl_modes[i].vdisplay);
 }
 
 static uint32_t
 udl_get_fb_hz(struct udl_softc *sc)
 {
 	unsigned i = sc->sc_cur_mode;
 
 	return (udl_modes[i].hz);
 }
 
 static void
 udl_callout(void *arg)
 {
 	struct udl_softc *sc = arg;
 	const uint32_t max = udl_get_fb_size(sc);
 	int fps;
 
 	if (sc->sc_power_save == 0) {
 		fps = udl_fps;
 
 	  	/* figure out number of frames per second */
 		if (fps < UDL_FPS_MIN)
 			fps = UDL_FPS_MIN;
 		else if (fps > UDL_FPS_MAX)
 			fps = UDL_FPS_MAX;
 
 		if (sc->sc_sync_off >= max)
 			sc->sc_sync_off = 0;
 		usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_0]);
 		usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_1]);
 	} else {
 		fps = 1;
 	}
 	callout_reset(&sc->sc_callout, hz / fps, &udl_callout, sc);
 }
 
 static int
 udl_probe(device_t dev)
 {
 	struct usb_attach_arg *uaa = device_get_ivars(dev);
 
 	if (uaa->usb_mode != USB_MODE_HOST)
 		return (ENXIO);
 	if (uaa->info.bConfigIndex != 0)
 		return (ENXIO);
 	if (uaa->info.bIfaceIndex != 0)
 		return (ENXIO);
 
 	return (usbd_lookup_id_by_uaa(udl_devs, sizeof(udl_devs), uaa));
 }
 
 static int
 udl_attach(device_t dev)
 {
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
 	struct udl_softc *sc = device_get_softc(dev);
 	struct usb_attach_arg *uaa = device_get_ivars(dev);
 	int error;
 	int i;
 
 	device_set_usb_desc(dev);
 
 	mtx_init(&sc->sc_mtx, "UDL lock", NULL, MTX_DEF);
 	cv_init(&sc->sc_cv, "UDLCV");
 	callout_init_mtx(&sc->sc_callout, &sc->sc_mtx, 0);
 	sc->sc_udev = uaa->device;
 
 	error = usbd_transfer_setup(uaa->device, &uaa->info.bIfaceIndex,
 	    sc->sc_xfer, udl_config, UDL_N_TRANSFER, sc, &sc->sc_mtx);
 
 	if (error) {
 		DPRINTF("usbd_transfer_setup error=%s\n", usbd_errstr(error));
 		goto detach;
 	}
 	usbd_xfer_set_priv(sc->sc_xfer[UDL_BULK_WRITE_0], &sc->sc_xfer_head[0]);
 	usbd_xfer_set_priv(sc->sc_xfer[UDL_BULK_WRITE_1], &sc->sc_xfer_head[1]);
 
 	TAILQ_INIT(&sc->sc_xfer_head[0]);
 	TAILQ_INIT(&sc->sc_xfer_head[1]);
 	TAILQ_INIT(&sc->sc_cmd_buf_free);
 	TAILQ_INIT(&sc->sc_cmd_buf_pending);
 
 	sc->sc_def_chip = -1;
 	sc->sc_chip = USB_GET_DRIVER_INFO(uaa);
 	sc->sc_def_mode = -1;
 	sc->sc_cur_mode = UDL_MAX_MODES;
 
 	/* Allow chip ID to be overwritten */
 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "chipid_force",
 	    CTLFLAG_RWTUN, &sc->sc_def_chip, 0, "chip ID");
 
 	/* Export current chip ID */
 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "chipid",
 	    CTLFLAG_RD, &sc->sc_chip, 0, "chip ID");
 
 	if (sc->sc_def_chip > -1 && sc->sc_def_chip <= DLMAX) {
 		device_printf(dev, "Forcing chip ID to 0x%04x\n", sc->sc_def_chip);
 		sc->sc_chip = sc->sc_def_chip;
 	}
 	/*
 	 * The product might have more than one chip
 	 */
 	if (sc->sc_chip == DLUNK)
 		udl_select_chip(sc, uaa);
 
 	for (i = 0; i != UDL_CMD_MAX_BUFFERS; i++) {
 		struct udl_cmd_buf *cb = &sc->sc_cmd_buf_temp[i];
 
 		TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_free, cb, entry);
 	}
 
 	/*
 	 * Initialize chip.
 	 */
 	error = udl_init_chip(sc);
 	if (error != USB_ERR_NORMAL_COMPLETION)
 		goto detach;
 
 	/*
 	 * Select edid mode.
 	 */
 	udl_select_mode(sc);
 
 	/* Allow default mode to be overwritten */
 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "mode_force",
 	    CTLFLAG_RWTUN, &sc->sc_def_mode, 0, "mode");
 
 	/* Export current mode */
 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "mode",
 	    CTLFLAG_RD, &sc->sc_cur_mode, 0, "mode");
 
 	i = sc->sc_def_mode;
 	if (i > -1 && i < UDL_MAX_MODES) {
 		if (udl_modes[i].chip <= sc->sc_chip) {
 			device_printf(dev, "Forcing mode to %d\n", i);
 			sc->sc_cur_mode = i;
 		}
 	}
 	/* Printout current mode */
 	device_printf(dev, "Mode selected %dx%d @ %dHz\n",
 	    (int)udl_get_fb_width(sc),
 	    (int)udl_get_fb_height(sc),
 	    (int)udl_get_fb_hz(sc));
 
 	udl_init_resolution(sc);
 
 	/* Allocate frame buffer */
 	udl_fbmem_alloc(sc);
 
 	UDL_LOCK(sc);
 	udl_callout(sc);
 	UDL_UNLOCK(sc);
 
 	sc->sc_fb_info.fb_name = device_get_nameunit(dev);
 	sc->sc_fb_info.fb_size = sc->sc_fb_size;
 	sc->sc_fb_info.fb_bpp = 16;
 	sc->sc_fb_info.fb_depth = 16;
 	sc->sc_fb_info.fb_width = udl_get_fb_width(sc);
 	sc->sc_fb_info.fb_height = udl_get_fb_height(sc);
 	sc->sc_fb_info.fb_stride = sc->sc_fb_info.fb_width * 2;
 	sc->sc_fb_info.fb_pbase = 0;
 	sc->sc_fb_info.fb_vbase = (uintptr_t)sc->sc_fb_addr;
 	sc->sc_fb_info.fb_priv = sc;
 	sc->sc_fb_info.setblankmode = &udl_fb_setblankmode;
 
 	sc->sc_fbdev = device_add_child(dev, "fbd", -1);
 	if (sc->sc_fbdev == NULL)
 		goto detach;
 	if (device_probe_and_attach(sc->sc_fbdev) != 0)
 		goto detach;
 
 	return (0);
 
 detach:
 	udl_detach(dev);
 
 	return (ENXIO);
 }
 
 static int
 udl_detach(device_t dev)
 {
 	struct udl_softc *sc = device_get_softc(dev);
 
 	if (sc->sc_fbdev != NULL) {
 		device_t bdev;
 
 		bdev = sc->sc_fbdev;
 		sc->sc_fbdev = NULL;
 		device_detach(bdev);
 		device_delete_child(dev, bdev);
 	}
 	UDL_LOCK(sc);
 	sc->sc_gone = 1;
 	callout_stop(&sc->sc_callout);
 	UDL_UNLOCK(sc);
 
 	usbd_transfer_unsetup(sc->sc_xfer, UDL_N_TRANSFER);
 
 	callout_drain(&sc->sc_callout);
 
 	mtx_destroy(&sc->sc_mtx);
 	cv_destroy(&sc->sc_cv);
 
-	/*
-	 * Free framebuffer memory, if any.
-	 */
-	free(sc->sc_fb_addr, M_DEVBUF);
-	free(sc->sc_fb_copy, M_DEVBUF);
+	/* put main framebuffer into a recycle list, if any */
+	udl_buffer_free(sc->sc_fb_addr, sc->sc_fb_size);
 
+	/* free shadow framebuffer memory, if any */
+	free(sc->sc_fb_copy, M_USB_DL);
+
 	return (0);
 }
 
 static struct fb_info *
 udl_fb_getinfo(device_t dev)
 {
 	struct udl_softc *sc = device_get_softc(dev);
 
 	return (&sc->sc_fb_info);
 }
 
 static int
 udl_fb_setblankmode(void *arg, int mode)
 {
 	struct udl_softc *sc = arg;
 
 	switch (mode) {
 	case V_DISPLAY_ON:
 		udl_power_save(sc, 1, M_WAITOK);
 		break;
 	case V_DISPLAY_BLANK:
 		udl_power_save(sc, 1, M_WAITOK);
 		if (sc->sc_fb_addr != 0) {
 			const uint32_t max = udl_get_fb_size(sc);
 
 			memset((void *)sc->sc_fb_addr, 0, max);
 		}
 		break;
 	case V_DISPLAY_STAND_BY:
 	case V_DISPLAY_SUSPEND:
 		udl_power_save(sc, 0, M_WAITOK);
 		break;
 	}
 	return (0);
 }
 
 static struct udl_cmd_buf *
 udl_cmd_buf_alloc_locked(struct udl_softc *sc, int flags)
 {
 	struct udl_cmd_buf *cb;
 
 	while ((cb = TAILQ_FIRST(&sc->sc_cmd_buf_free)) == NULL) {
 		if (flags != M_WAITOK)
 			break;
 		cv_wait(&sc->sc_cv, &sc->sc_mtx);
 	}
 	if (cb != NULL) {
 		TAILQ_REMOVE(&sc->sc_cmd_buf_free, cb, entry);
 		cb->off = 0;
 	}
 	return (cb);
 }
 
 static struct udl_cmd_buf *
 udl_cmd_buf_alloc(struct udl_softc *sc, int flags)
 {
 	struct udl_cmd_buf *cb;
 
 	UDL_LOCK(sc);
 	cb = udl_cmd_buf_alloc_locked(sc, flags);
 	UDL_UNLOCK(sc);
 	return (cb);
 }
 
 static void
 udl_cmd_buf_send(struct udl_softc *sc, struct udl_cmd_buf *cb)
 {
 	UDL_LOCK(sc);
 	if (sc->sc_gone) {
 		TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_free, cb, entry);
 	} else {
 		/* mark end of command stack */
 		udl_cmd_insert_int_1(cb, UDL_BULK_SOC);
 		udl_cmd_insert_int_1(cb, UDL_BULK_CMD_EOC);
 
 		TAILQ_INSERT_TAIL(&sc->sc_cmd_buf_pending, cb, entry);
 		usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_0]);
 		usbd_transfer_start(sc->sc_xfer[UDL_BULK_WRITE_1]);
 	}
 	UDL_UNLOCK(sc);
 }
 
 static struct udl_cmd_buf *
 udl_fb_synchronize_locked(struct udl_softc *sc)
 {
 	const uint32_t max = udl_get_fb_size(sc);
 
 	/* check if framebuffer is not ready */
 	if (sc->sc_fb_addr == NULL ||
 	    sc->sc_fb_copy == NULL)
 		return (NULL);
 
 	while (sc->sc_sync_off < max) {
 		uint32_t delta = max - sc->sc_sync_off;
 
 		if (delta > UDL_CMD_MAX_PIXEL_COUNT * 2)
 			delta = UDL_CMD_MAX_PIXEL_COUNT * 2;
 		if (bcmp(sc->sc_fb_addr + sc->sc_sync_off, sc->sc_fb_copy + sc->sc_sync_off, delta) != 0) {
 			struct udl_cmd_buf *cb;
 
 			cb = udl_cmd_buf_alloc_locked(sc, M_NOWAIT);
 			if (cb == NULL)
 				goto done;
 			memcpy(sc->sc_fb_copy + sc->sc_sync_off,
 			    sc->sc_fb_addr + sc->sc_sync_off, delta);
 			udl_cmd_insert_int_1(cb, UDL_BULK_SOC);
 			udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_WRITE | UDL_BULK_CMD_FB_WORD);
 			udl_cmd_insert_int_3(cb, sc->sc_sync_off);
 			udl_cmd_insert_int_1(cb, delta / 2);
 			udl_cmd_insert_buf_le16(cb, sc->sc_fb_copy + sc->sc_sync_off, delta);
 			sc->sc_sync_off += delta;
 			return (cb);
 		} else {
 			sc->sc_sync_off += delta;
 		}
 	}
 done:
 	return (NULL);
 }
 
 static void
 udl_bulk_write_callback(struct usb_xfer *xfer, usb_error_t error)
 {
 	struct udl_softc *sc = usbd_xfer_softc(xfer);
 	struct udl_cmd_head *phead = usbd_xfer_get_priv(xfer);
 	struct udl_cmd_buf *cb;
 	unsigned i;
 
 	switch (USB_GET_STATE(xfer)) {
 	case USB_ST_TRANSFERRED:
 		TAILQ_CONCAT(&sc->sc_cmd_buf_free, phead, entry);
 	case USB_ST_SETUP:
 tr_setup:
 		for (i = 0; i != UDL_CMD_MAX_FRAMES; i++) {
 			cb = TAILQ_FIRST(&sc->sc_cmd_buf_pending);
 			if (cb == NULL) {
 				cb = udl_fb_synchronize_locked(sc);
 				if (cb == NULL)
 					break;
 			} else {
 				TAILQ_REMOVE(&sc->sc_cmd_buf_pending, cb, entry);
 			}
 			TAILQ_INSERT_TAIL(phead, cb, entry);
 			usbd_xfer_set_frame_data(xfer, i, cb->buf, cb->off);
 		}
 		if (i != 0) {
 			usbd_xfer_set_frames(xfer, i);
 			usbd_transfer_submit(xfer);
 		}
 		break;
 	default:
 		TAILQ_CONCAT(&sc->sc_cmd_buf_free, phead, entry);
 		if (error != USB_ERR_CANCELLED) {
 			/* try clear stall first */
 			usbd_xfer_set_stall(xfer);
 			goto tr_setup;
 		}
 		break;
 	}
 	/* wakeup any waiters */
 	cv_signal(&sc->sc_cv);
 }
 
 static int
 udl_power_save(struct udl_softc *sc, int on, int flags)
 {
 	struct udl_cmd_buf *cb;
 
 	/* get new buffer */
 	cb = udl_cmd_buf_alloc(sc, flags);
 	if (cb == NULL)
 		return (EAGAIN);
 
 	DPRINTF("screen %s\n", on ? "ON" : "OFF");
 
 	sc->sc_power_save = on ? 0 : 1;
 
 	if (on)
 		udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_ON);
 	else
 		udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_OFF);
 
 	udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff);
 	udl_cmd_buf_send(sc, cb);
 	return (0);
 }
 
 static int
 udl_ctrl_msg(struct udl_softc *sc, uint8_t rt, uint8_t r,
     uint16_t index, uint16_t value, uint8_t *buf, size_t len)
 {
 	usb_device_request_t req;
 	int error;
 
 	req.bmRequestType = rt;
 	req.bRequest = r;
 	USETW(req.wIndex, index);
 	USETW(req.wValue, value);
 	USETW(req.wLength, len);
 
 	error = usbd_do_request_flags(sc->sc_udev, NULL,
 	    &req, buf, 0, NULL, USB_DEFAULT_TIMEOUT);
 
 	DPRINTF("%s\n", usbd_errstr(error));
 
 	return (error);
 }
 
 static int
 udl_poll(struct udl_softc *sc, uint32_t *buf)
 {
 	uint32_t lbuf;
 	int error;
 
 	error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE,
 	    UDL_CTRL_CMD_POLL, 0x0000, 0x0000, (uint8_t *)&lbuf, sizeof(lbuf));
 	if (error == USB_ERR_NORMAL_COMPLETION)
 		*buf = le32toh(lbuf);
 	return (error);
 }
 
 static int
 udl_read_1(struct udl_softc *sc, uint16_t addr, uint8_t *buf)
 {
 	uint8_t lbuf[1];
 	int error;
 
 	error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE,
 	    UDL_CTRL_CMD_READ_1, addr, 0x0000, lbuf, 1);
 	if (error == USB_ERR_NORMAL_COMPLETION)
 		*buf = *(uint8_t *)lbuf;
 	return (error);
 }
 
 static int
 udl_write_1(struct udl_softc *sc, uint16_t addr, uint8_t buf)
 {
 	int error;
 
 	error = udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE,
 	    UDL_CTRL_CMD_WRITE_1, addr, 0x0000, &buf, 1);
 	return (error);
 }
 
 static int
 udl_read_edid(struct udl_softc *sc, uint8_t *buf)
 {
 	uint8_t lbuf[64];
 	uint16_t offset;
 	int error;
 
 	offset = 0;
 
 	error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE,
 	    UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 64);
 	if (error != USB_ERR_NORMAL_COMPLETION)
 		goto fail;
 	bcopy(lbuf + 1, buf + offset, 63);
 	offset += 63;
 
 	error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE,
 	    UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 64);
 	if (error != USB_ERR_NORMAL_COMPLETION)
 		goto fail;
 	bcopy(lbuf + 1, buf + offset, 63);
 	offset += 63;
 
 	error = udl_ctrl_msg(sc, UT_READ_VENDOR_DEVICE,
 	    UDL_CTRL_CMD_READ_EDID, 0x00a1, (offset << 8), lbuf, 3);
 	if (error != USB_ERR_NORMAL_COMPLETION)
 		goto fail;
 	bcopy(lbuf + 1, buf + offset, 2);
 fail:
 	return (error);
 }
 
 static uint8_t
 udl_lookup_mode(uint16_t hdisplay, uint16_t vdisplay, uint8_t hz,
     uint16_t chip, uint32_t clock)
 {
 	uint8_t idx;
 
 	/*
 	 * Check first if we have a matching mode with pixelclock
 	 */
 	for (idx = 0; idx != UDL_MAX_MODES; idx++) {
 		if ((udl_modes[idx].hdisplay == hdisplay) &&
 		    (udl_modes[idx].vdisplay == vdisplay) &&
 		    (udl_modes[idx].clock == clock) &&
 		    (udl_modes[idx].chip <= chip)) {
 			return (idx);
 		}
 	}
 
 	/*
 	 * If not, check for matching mode with update frequency
 	 */
 	for (idx = 0; idx != UDL_MAX_MODES; idx++) {
 		if ((udl_modes[idx].hdisplay == hdisplay) &&
 		    (udl_modes[idx].vdisplay == vdisplay) &&
 		    (udl_modes[idx].hz == hz) &&
 		    (udl_modes[idx].chip <= chip)) {
 			return (idx);
 		}
 	}
 	return (idx);
 }
 
 static void
 udl_select_chip(struct udl_softc *sc, struct usb_attach_arg *uaa)
 {
 	const char *pserial;
 
 	pserial = usb_get_serial(uaa->device);
 
 	sc->sc_chip = DL120;
 
 	if ((uaa->info.idVendor == USB_VENDOR_DISPLAYLINK) &&
 	    (uaa->info.idProduct == USB_PRODUCT_DISPLAYLINK_WSDVI)) {
 
 		/*
 		 * WS Tech DVI is DL120 or DL160. All deviced uses the
 		 * same revision (0.04) so iSerialNumber must be used
 		 * to determin which chip it is.
 		 */
 
 		if (strlen(pserial) > 7) {
 			if (strncmp(pserial, "0198-13", 7) == 0)
 				sc->sc_chip = DL160;
 		}
 		DPRINTF("iSerialNumber (%s) used to select chip (%d)\n",
 		    pserial, sc->sc_chip);
 	}
 	if ((uaa->info.idVendor == USB_VENDOR_DISPLAYLINK) &&
 	    (uaa->info.idProduct == USB_PRODUCT_DISPLAYLINK_SWDVI)) {
 
 		/*
 		 * SUNWEIT DVI is DL160, DL125, DL165 or DL195. Major revision
 		 * can be used to differ between DL1x0 and DL1x5. Minor to
 		 * differ between DL1x5. iSerialNumber seems not to be uniqe.
 		 */
 
 		sc->sc_chip = DL160;
 
 		if (uaa->info.bcdDevice >= 0x100) {
 			sc->sc_chip = DL165;
 			if (uaa->info.bcdDevice == 0x104)
 				sc->sc_chip = DL195;
 			if (uaa->info.bcdDevice == 0x108)
 				sc->sc_chip = DL125;
 		}
 		DPRINTF("bcdDevice (%02x) used to select chip (%d)\n",
 		    uaa->info.bcdDevice, sc->sc_chip);
 	}
 }
 
 static int
 udl_set_enc_key(struct udl_softc *sc, uint8_t *buf, uint8_t len)
 {
 	int error;
 
 	error = udl_ctrl_msg(sc, UT_WRITE_VENDOR_DEVICE,
 	    UDL_CTRL_CMD_SET_KEY, 0x0000, 0x0000, buf, len);
 	return (error);
 }
 
 static void
 udl_fbmem_alloc(struct udl_softc *sc)
 {
 	uint32_t size;
 
 	size = udl_get_fb_size(sc);
 	size = round_page(size);
-
+	/* check for zero size */
+	if (size == 0)
+		size = PAGE_SIZE;
 	/*
 	 * It is assumed that allocations above PAGE_SIZE bytes will
 	 * be PAGE_SIZE aligned for use with mmap()
 	 */
-	sc->sc_fb_addr = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
-	sc->sc_fb_copy = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
+	sc->sc_fb_addr = udl_buffer_alloc(size);
+	sc->sc_fb_copy = malloc(size, M_USB_DL, M_WAITOK | M_ZERO);
 	sc->sc_fb_size = size;
 }
 
 static void
 udl_cmd_insert_int_1(struct udl_cmd_buf *cb, uint8_t value)
 {
 
 	cb->buf[cb->off] = value;
 	cb->off += 1;
 }
 
 #if 0
 static void
 udl_cmd_insert_int_2(struct udl_cmd_buf *cb, uint16_t value)
 {
 	uint16_t lvalue;
 
 	lvalue = htobe16(value);
 	bcopy(&lvalue, cb->buf + cb->off, 2);
 
 	cb->off += 2;
 }
 
 #endif
 
 static void
 udl_cmd_insert_int_3(struct udl_cmd_buf *cb, uint32_t value)
 {
 	uint32_t lvalue;
 
 #if BYTE_ORDER == BIG_ENDIAN
 	lvalue = htobe32(value) << 8;
 #else
 	lvalue = htobe32(value) >> 8;
 #endif
 	bcopy(&lvalue, cb->buf + cb->off, 3);
 
 	cb->off += 3;
 }
 
 #if 0
 static void
 udl_cmd_insert_int_4(struct udl_cmd_buf *cb, uint32_t value)
 {
 	uint32_t lvalue;
 
 	lvalue = htobe32(value);
 	bcopy(&lvalue, cb->buf + cb->off, 4);
 
 	cb->off += 4;
 }
 
 #endif
 
 static void
 udl_cmd_insert_buf_le16(struct udl_cmd_buf *cb, const uint8_t *buf, uint32_t len)
 {
 	uint32_t x;
 
 	for (x = 0; x != len; x += 2) {
 		/* byte swap from little endian to big endian */
 		cb->buf[cb->off + x + 0] = buf[x + 1];
 		cb->buf[cb->off + x + 1] = buf[x + 0];
 	}
 	cb->off += len;
 }
 
 static void
 udl_cmd_write_reg_1(struct udl_cmd_buf *cb, uint8_t reg, uint8_t val)
 {
 
 	udl_cmd_insert_int_1(cb, UDL_BULK_SOC);
 	udl_cmd_insert_int_1(cb, UDL_BULK_CMD_REG_WRITE_1);
 	udl_cmd_insert_int_1(cb, reg);
 	udl_cmd_insert_int_1(cb, val);
 }
 
 static void
 udl_cmd_write_reg_3(struct udl_cmd_buf *cb, uint8_t reg, uint32_t val)
 {
 
 	udl_cmd_write_reg_1(cb, reg + 0, (val >> 16) & 0xff);
 	udl_cmd_write_reg_1(cb, reg + 1, (val >> 8) & 0xff);
 	udl_cmd_write_reg_1(cb, reg + 2, (val >> 0) & 0xff);
 }
 
 static int
 udl_init_chip(struct udl_softc *sc)
 {
 	uint32_t ui32;
 	uint8_t ui8;
 	int error;
 
 	error = udl_poll(sc, &ui32);
 	if (error != USB_ERR_NORMAL_COMPLETION)
 		return (error);
 	DPRINTF("poll=0x%08x\n", ui32);
 
 	/* Some products may use later chip too */
 	switch (ui32 & 0xff) {
 	case 0xf1:			/* DL1x5 */
 		switch (sc->sc_chip) {
 		case DL120:
 			sc->sc_chip = DL125;
 			break;
 		case DL160:
 			sc->sc_chip = DL165;
 			break;
 		}
 		break;
 	}
 	DPRINTF("chip 0x%04x\n", sc->sc_chip);
 
 	error = udl_read_1(sc, 0xc484, &ui8);
 	if (error != USB_ERR_NORMAL_COMPLETION)
 		return (error);
 	DPRINTF("read 0x%02x from 0xc484\n", ui8);
 
 	error = udl_write_1(sc, 0xc41f, 0x01);
 	if (error != USB_ERR_NORMAL_COMPLETION)
 		return (error);
 	DPRINTF("write 0x01 to 0xc41f\n");
 
 	error = udl_read_edid(sc, sc->sc_edid);
 	if (error != USB_ERR_NORMAL_COMPLETION)
 		return (error);
 	DPRINTF("read EDID\n");
 
 	error = udl_set_enc_key(sc, __DECONST(void *, udl_null_key_1),
 	    sizeof(udl_null_key_1));
 	if (error != USB_ERR_NORMAL_COMPLETION)
 		return (error);
 	DPRINTF("set encryption key\n");
 
 	error = udl_write_1(sc, 0xc40b, 0x00);
 	if (error != USB_ERR_NORMAL_COMPLETION)
 		return (error);
 	DPRINTF("write 0x00 to 0xc40b\n");
 
 	return (USB_ERR_NORMAL_COMPLETION);
 }
 
 static void
 udl_init_fb_offsets(struct udl_cmd_buf *cb, uint32_t start16, uint32_t stride16,
     uint32_t start8, uint32_t stride8)
 {
 	udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0x00);
 	udl_cmd_write_reg_3(cb, UDL_REG_ADDR_START16, start16);
 	udl_cmd_write_reg_3(cb, UDL_REG_ADDR_STRIDE16, stride16);
 	udl_cmd_write_reg_3(cb, UDL_REG_ADDR_START8, start8);
 	udl_cmd_write_reg_3(cb, UDL_REG_ADDR_STRIDE8, stride8);
 	udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff);
 }
 
 static int
 udl_init_resolution(struct udl_softc *sc)
 {
 	const uint32_t max = udl_get_fb_size(sc);
 	const uint8_t *buf = udl_modes[sc->sc_cur_mode].mode;
 	struct udl_cmd_buf *cb;
 	uint32_t delta;
 	uint32_t i;
 	int error;
 
 	/* get new buffer */
 	cb = udl_cmd_buf_alloc(sc, M_WAITOK);
 	if (cb == NULL)
 		return (EAGAIN);
 
 	/* write resolution values and set video memory offsets */
 	udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0x00);
 	for (i = 0; i < UDL_MODE_SIZE; i++)
 		udl_cmd_write_reg_1(cb, i, buf[i]);
 	udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff);
 
 	udl_init_fb_offsets(cb, 0x000000, 0x000a00, 0x555555, 0x000500);
 	udl_cmd_buf_send(sc, cb);
 
 	/* fill screen with black color */
 	for (i = 0; i < max; i += delta) {
 		static const uint8_t udl_black[UDL_CMD_MAX_PIXEL_COUNT * 2] __aligned(4);
 
 		delta = max - i;
 		if (delta > UDL_CMD_MAX_PIXEL_COUNT * 2)
 			delta = UDL_CMD_MAX_PIXEL_COUNT * 2;
 		if (i == 0)
 			error = udl_cmd_write_buf_le16(sc, udl_black, i, delta / 2, M_WAITOK);
 		else
 			error = udl_cmd_buf_copy_le16(sc, 0, i, delta / 2, M_WAITOK);
 		if (error)
 			return (error);
 	}
 
 	/* get new buffer */
 	cb = udl_cmd_buf_alloc(sc, M_WAITOK);
 	if (cb == NULL)
 		return (EAGAIN);
 
 	/* show framebuffer content */
 	udl_cmd_write_reg_1(cb, UDL_REG_SCREEN, UDL_REG_SCREEN_ON);
 	udl_cmd_write_reg_1(cb, UDL_REG_SYNC, 0xff);
 	udl_cmd_buf_send(sc, cb);
 	return (0);
 }
 
 static void
 udl_select_mode(struct udl_softc *sc)
 {
 	struct udl_mode mode;
 	int index = UDL_MAX_MODES;
 	int i;
 
 	/* try to get the preferred mode from EDID */
 	edid_parse(sc->sc_edid, &sc->sc_edid_info);
 #ifdef USB_DEBUG
 	edid_print(&sc->sc_edid_info);
 #endif
 	if (sc->sc_edid_info.edid_preferred_mode != NULL) {
 		mode.hz =
 		    (sc->sc_edid_info.edid_preferred_mode->dot_clock * 1000) /
 		    (sc->sc_edid_info.edid_preferred_mode->htotal *
 		    sc->sc_edid_info.edid_preferred_mode->vtotal);
 		mode.clock =
 		    sc->sc_edid_info.edid_preferred_mode->dot_clock / 10;
 		mode.hdisplay =
 		    sc->sc_edid_info.edid_preferred_mode->hdisplay;
 		mode.vdisplay =
 		    sc->sc_edid_info.edid_preferred_mode->vdisplay;
 		index = udl_lookup_mode(mode.hdisplay, mode.vdisplay, mode.hz,
 		    sc->sc_chip, mode.clock);
 		sc->sc_cur_mode = index;
 	} else {
 		DPRINTF("no preferred mode found!\n");
 	}
 
 	if (index == UDL_MAX_MODES) {
 		DPRINTF("no mode line found for %dx%d @ %dHz!\n",
 		    mode.hdisplay, mode.vdisplay, mode.hz);
 
 		i = 0;
 		while (i < sc->sc_edid_info.edid_nmodes) {
 			mode.hz =
 			    (sc->sc_edid_info.edid_modes[i].dot_clock * 1000) /
 			    (sc->sc_edid_info.edid_modes[i].htotal *
 			    sc->sc_edid_info.edid_modes[i].vtotal);
 			mode.clock =
 			    sc->sc_edid_info.edid_modes[i].dot_clock / 10;
 			mode.hdisplay =
 			    sc->sc_edid_info.edid_modes[i].hdisplay;
 			mode.vdisplay =
 			    sc->sc_edid_info.edid_modes[i].vdisplay;
 			index = udl_lookup_mode(mode.hdisplay, mode.vdisplay,
 			    mode.hz, sc->sc_chip, mode.clock);
 			if (index < UDL_MAX_MODES)
 				if ((sc->sc_cur_mode == UDL_MAX_MODES) ||
 				    (index > sc->sc_cur_mode))
 					sc->sc_cur_mode = index;
 			i++;
 		}
 	}
 	/*
 	 * If no mode found use default.
 	 */
 	if (sc->sc_cur_mode == UDL_MAX_MODES)
 		sc->sc_cur_mode = udl_lookup_mode(800, 600, 60, sc->sc_chip, 0);
 }
 
 static int
 udl_cmd_write_buf_le16(struct udl_softc *sc, const uint8_t *buf, uint32_t off,
     uint8_t pixels, int flags)
 {
 	struct udl_cmd_buf *cb;
 
 	cb = udl_cmd_buf_alloc(sc, flags);
 	if (cb == NULL)
 		return (EAGAIN);
 
 	udl_cmd_insert_int_1(cb, UDL_BULK_SOC);
 	udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_WRITE | UDL_BULK_CMD_FB_WORD);
 	udl_cmd_insert_int_3(cb, off);
 	udl_cmd_insert_int_1(cb, pixels);
 	udl_cmd_insert_buf_le16(cb, buf, 2 * pixels);
 	udl_cmd_buf_send(sc, cb);
 
 	return (0);
 }
 
 static int
 udl_cmd_buf_copy_le16(struct udl_softc *sc, uint32_t src, uint32_t dst,
     uint8_t pixels, int flags)
 {
 	struct udl_cmd_buf *cb;
 
 	cb = udl_cmd_buf_alloc(sc, flags);
 	if (cb == NULL)
 		return (EAGAIN);
 
 	udl_cmd_insert_int_1(cb, UDL_BULK_SOC);
 	udl_cmd_insert_int_1(cb, UDL_BULK_CMD_FB_COPY | UDL_BULK_CMD_FB_WORD);
 	udl_cmd_insert_int_3(cb, dst);
 	udl_cmd_insert_int_1(cb, pixels);
 	udl_cmd_insert_int_3(cb, src);
 	udl_cmd_buf_send(sc, cb);
 
 	return (0);
 }
Index: user/ngie/more-tests/sys/dev/usb/video/udl.h
===================================================================
--- user/ngie/more-tests/sys/dev/usb/video/udl.h	(revision 281675)
+++ user/ngie/more-tests/sys/dev/usb/video/udl.h	(revision 281676)
@@ -1,311 +1,320 @@
 /*	$OpenBSD: udl.h,v 1.21 2013/04/15 09:23:02 mglocker Exp $ */
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (c) 2009 Marcus Glocker <mglocker@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 #ifndef _UDL_H_
 #define	_UDL_H_
 
 #include <sys/types.h>
 #include <sys/queue.h>
 
 /*
  * BULK command transfer structure.
  */
 #define	UDL_CMD_MAX_FRAMES	64	/* units */
 #define	UDL_CMD_MAX_DATA_SIZE	512	/* bytes */
 #define	UDL_CMD_MAX_HEAD_SIZE	16	/* bytes */
 #define	UDL_CMD_MAX_PIXEL_COUNT	((UDL_CMD_MAX_DATA_SIZE - UDL_CMD_MAX_HEAD_SIZE) / 2)
 #define	UDL_CMD_MAX_BUFFERS	(3 * UDL_CMD_MAX_FRAMES)
 #define	UDL_FONT_HEIGHT		16	/* pixels */
 #define	UDL_MAX_MODES		25	/* units */
 
+MALLOC_DECLARE(M_USB_DL);
+
+struct udl_buffer {
+	TAILQ_ENTRY(udl_buffer) entry;
+	uint32_t size;
+};
+
+TAILQ_HEAD(udl_buffer_head, udl_buffer);
+
 struct udl_cmd_buf {
 	TAILQ_ENTRY(udl_cmd_buf) entry;
 	uint32_t off;
 	uint8_t	buf[UDL_CMD_MAX_DATA_SIZE] __aligned(4);
 };
 
 TAILQ_HEAD(udl_cmd_head, udl_cmd_buf);
 
 enum {
 	UDL_BULK_WRITE_0,
 	UDL_BULK_WRITE_1,
 	UDL_N_TRANSFER,
 };
 
 /*
  * Our per device structure.
  */
 struct udl_softc {
 	struct mtx sc_mtx;
 	struct cv sc_cv;
 	struct callout sc_callout;
 	struct usb_xfer *sc_xfer[UDL_N_TRANSFER];
 	struct usb_device *sc_udev;
 	device_t sc_fbdev;
 	struct fb_info sc_fb_info;
 	uint8_t	sc_edid[128];
 	struct edid_info sc_edid_info;
 	struct udl_cmd_head sc_xfer_head[2];
 	struct udl_cmd_head sc_cmd_buf_free;
 	struct udl_cmd_head sc_cmd_buf_pending;
 	struct udl_cmd_buf sc_cmd_buf_temp[UDL_CMD_MAX_BUFFERS];
 	uint32_t sc_sync_off;
 	uint32_t sc_fb_size;
 	uint8_t *sc_fb_addr;
 	uint8_t *sc_fb_copy;
 	int	sc_def_chip;		/* default chip version */
 	int	sc_chip;
 #define	DLALL	0x0000
 #define	DL125	0x0000			/* max 1280x1024, 1440x900 */
 #define	DL120	0x0001			/* max 1280x1024, 1440x1050 */
 #define	DL160	0x0002			/* max 1600x1200, 1680x1050 */
 #define	DL165	0x0003			/* max 1600x1200, 1920x1080 */
 #define	DL195	0x0004			/* max 1920x1200, 2048x1152 */
 #define	DLMAX	0x0004
 #define	DLUNK	0x00ff			/* unknown */
 	int	sc_def_mode;		/* default mode */
 	int	sc_cur_mode;
 	uint8_t	sc_power_save;		/* set if power save is enabled */
 	uint8_t	sc_gone;
 };
 
 #define	UDL_LOCK(sc)	mtx_lock(&(sc)->sc_mtx)
 #define	UDL_UNLOCK(sc)	mtx_unlock(&(sc)->sc_mtx)
 
 /*
  * Chip commands.
  */
 #define	UDL_CTRL_CMD_READ_EDID		0x02
 #define	UDL_CTRL_CMD_WRITE_1		0x03
 #define	UDL_CTRL_CMD_READ_1		0x04
 #define	UDL_CTRL_CMD_POLL		0x06
 #define	UDL_CTRL_CMD_SET_KEY		0x12
 
 #define	UDL_BULK_SOC			0xaf	/* start of command token */
 
 #define	UDL_BULK_CMD_REG_WRITE_1	0x20	/* write 1 byte to register */
 #define	UDL_BULK_CMD_EOC		0xa0	/* end of command stack */
 #define	UDL_BULK_CMD_DECOMP		0xe0	/* send decompression table */
 
 #define	UDL_BULK_CMD_FB_BASE		0x60
 #define	UDL_BULK_CMD_FB_WORD		0x08
 #define	UDL_BULK_CMD_FB_COMP		0x10
 #define	UDL_BULK_CMD_FB_WRITE		(UDL_BULK_CMD_FB_BASE | 0x00)
 #define	UDL_BULK_CMD_FB_COPY		(UDL_BULK_CMD_FB_BASE | 0x02)
 
 /*
  * Chip registers.
  */
 #define	UDL_REG_ADDR_START16		0x20
 #define	UDL_REG_ADDR_STRIDE16		0x23
 #define	UDL_REG_ADDR_START8		0x26
 #define	UDL_REG_ADDR_STRIDE8		0x29
 
 #define	UDL_REG_SCREEN			0x1f
 #define	UDL_REG_SCREEN_ON		0x00
 #define	UDL_REG_SCREEN_OFF		0x01
 #define	UDL_REG_SYNC			0xff
 
 #define	UDL_MODE_SIZE 29
 
 /*
  * Register values for screen resolution initialization.
  */
 static const uint8_t udl_reg_vals_640x480_60[UDL_MODE_SIZE] = {	/* 25.17 Mhz 59.9 Hz
 								 * VESA std */
 	0x00, 0x99, 0x30, 0x26, 0x94, 0x60, 0xa9, 0xce, 0x60, 0x07, 0xb3, 0x0f,
 	0x79, 0xff, 0xff, 0x02, 0x80, 0x83, 0xbc, 0xff, 0xfc, 0xff, 0xff, 0x01,
 	0xe0, 0x01, 0x02, 0xab, 0x13
 };
 static const uint8_t udl_reg_vals_640x480_67[UDL_MODE_SIZE] = {	/* 30.25 MHz 66.6 Hz MAC
 								 * std */
 	0x00, 0x1d, 0x33, 0x07, 0xb3, 0x60, 0xa9, 0xce, 0x60, 0xb6, 0xa8, 0xff,
 	0xff, 0xbf, 0x70, 0x02, 0x80, 0x83, 0xbc, 0xff, 0xff, 0xff, 0xf9, 0x01,
 	0xe0, 0x01, 0x02, 0xa2, 0x17
 };
 static const uint8_t udl_reg_vals_640x480_72[UDL_MODE_SIZE] = {	/* 31.50 Mhz 72.8 Hz
 								 * VESA std */
 	0x00, 0x2b, 0xeb, 0x35, 0xd3, 0x0a, 0x95, 0xe6, 0x0e, 0x0f, 0xb5, 0x15,
 	0x2a, 0xff, 0xff, 0x02, 0x80, 0xcc, 0x1d, 0xff, 0xf9, 0xff, 0xff, 0x01,
 	0xe0, 0x01, 0x02, 0x9c, 0x18
 };
 static const uint8_t udl_reg_vals_640x480_75[UDL_MODE_SIZE] = {	/* 31.50 Mhz 75.7 Hz
 								 * VESA std */
 	0x00, 0xeb, 0xf7, 0xd3, 0x0f, 0x4f, 0x93, 0xfa, 0x47, 0xb5, 0x58, 0xff,
 	0xff, 0xbf, 0x70, 0x02, 0x80, 0xf4, 0x8f, 0xff, 0xff, 0xff, 0xf9, 0x01,
 	0xe0, 0x01, 0x02, 0x9c, 0x18
 };
 static const uint8_t udl_reg_vals_800x480_61[UDL_MODE_SIZE] = {	/* 33.00 MHz 61.9 Hz */
 	0x00, 0x20, 0x3c, 0x7a, 0xc9, 0xf2, 0x6c, 0x48, 0xf9, 0x70, 0x53, 0xff,
 	0xff, 0x21, 0x27, 0x03, 0x20, 0x91, 0xf3, 0xff, 0xff, 0xff, 0xf9, 0x01,
 	0xe0, 0x01, 0x02, 0xc8, 0x19
 };
 static const uint8_t udl_reg_vals_800x600_56[UDL_MODE_SIZE] = {	/* 36.00 MHz 56.2 Hz
 								 * VESA std */
 	0x00, 0x65, 0x35, 0x48, 0xf4, 0xf2, 0x6c, 0x19, 0x18, 0xc9, 0x4b, 0xff,
 	0xff, 0x70, 0x35, 0x03, 0x20, 0x32, 0x31, 0xff, 0xff, 0xff, 0xfc, 0x02,
 	0x58, 0x01, 0x02, 0x20, 0x1c
 };
 static const uint8_t udl_reg_vals_800x600_60[UDL_MODE_SIZE] = {	/* 40.00 MHz 60.3 Hz
 								 * VESA std */
 	0x00, 0x20, 0x3c, 0x7a, 0xc9, 0x93, 0x60, 0xc8, 0xc7, 0x70, 0x53, 0xff,
 	0xff, 0x21, 0x27, 0x03, 0x20, 0x91, 0x8f, 0xff, 0xff, 0xff, 0xf2, 0x02,
 	0x58, 0x01, 0x02, 0x40, 0x1f
 };
 static const uint8_t udl_reg_vals_800x600_72[UDL_MODE_SIZE] = {	/* 50.00 MHz 72.1 Hz
 								 * VESA std */
 	0x00, 0xeb, 0xf7, 0xd1, 0x90, 0x4d, 0x82, 0x23, 0x1f, 0x39, 0xcf, 0xff,
 	0xff, 0x43, 0x21, 0x03, 0x20, 0x62, 0xc5, 0xff, 0xff, 0xff, 0xca, 0x02,
 	0x58, 0x01, 0x02, 0x10, 0x27
 };
 static const uint8_t udl_reg_vals_800x600_74[UDL_MODE_SIZE] = {	/* 50.00 MHz 74.4 Hz */
 	0x00, 0xb3, 0x76, 0x39, 0xcf, 0x60, 0xa9, 0xc7, 0xf4, 0x70, 0x53, 0xff,
 	0xff, 0x35, 0x33, 0x03, 0x20, 0x8f, 0xe9, 0xff, 0xff, 0xff, 0xf9, 0x02,
 	0x58, 0x01, 0x02, 0x10, 0x27
 };
 static const uint8_t udl_reg_vals_800x600_75[UDL_MODE_SIZE] = {	/* 49.50 MHz 75.0 Hz
 								 * VESA std */
 	0x00, 0xb3, 0x76, 0x39, 0xcf, 0xf2, 0x6c, 0x19, 0x18, 0x70, 0x53, 0xff,
 	0xff, 0x35, 0x33, 0x03, 0x20, 0x32, 0x31, 0xff, 0xff, 0xff, 0xf9, 0x02,
 	0x58, 0x01, 0x02, 0xac, 0x26
 };
 static const uint8_t udl_reg_vals_1024x768_60[UDL_MODE_SIZE] = {	/* 65.00 MHz 60.0 Hz
 									 * VESA std */
 	0x00, 0x36, 0x18, 0xd5, 0x10, 0x60, 0xa9, 0x7b, 0x33, 0xa1, 0x2b, 0x27,
 	0x32, 0xff, 0xff, 0x04, 0x00, 0xd9, 0x9a, 0xff, 0xca, 0xff, 0xff, 0x03,
 	0x00, 0x04, 0x03, 0xc8, 0x32
 };
 static const uint8_t udl_reg_vals_1024x768_70[UDL_MODE_SIZE] = {	/* 75.00 MHz 70.0 Hz
 									 * VESA std */
 	0x00, 0xb4, 0xed, 0x4c, 0x5e, 0x60, 0xa9, 0x7b, 0x33, 0x10, 0x4d, 0xff,
 	0xff, 0x27, 0x32, 0x04, 0x00, 0xd9, 0x9a, 0xff, 0xff, 0xff, 0xca, 0x03,
 	0x00, 0x04, 0x02, 0x98, 0x3a
 };
 static const uint8_t udl_reg_vals_1024x768_75[UDL_MODE_SIZE] = {	/* 78.75 MHz 75.0 Hz
 									 * VESA std */
 	0x00, 0xec, 0xb4, 0xa0, 0x4c, 0x36, 0x0a, 0x07, 0xb3, 0x5e, 0xd5, 0xff,
 	0xff, 0x0f, 0x79, 0x04, 0x00, 0x0f, 0x66, 0xff, 0xff, 0xff, 0xf9, 0x03,
 	0x00, 0x04, 0x02, 0x86, 0x3d
 };
 static const uint8_t udl_reg_vals_1280x800_60[UDL_MODE_SIZE] = {	/* 83.46 MHz 59.9 MHz */
 	0x00, 0xb2, 0x19, 0x34, 0xdf, 0x93, 0x60, 0x30, 0xfb, 0x9f, 0xca, 0xff,
 	0xff, 0x27, 0x32, 0x05, 0x00, 0x61, 0xf6, 0xff, 0xff, 0xff, 0xf9, 0x03,
 	0x20, 0x04, 0x02, 0x34, 0x41
 };
 static const uint8_t udl_reg_vals_1280x960_60[UDL_MODE_SIZE] = {	/* 108.00 MHz 60.0 Hz
 									 * VESA std */
 	0x00, 0xa6, 0x03, 0x5c, 0x7e, 0x0a, 0x95, 0x48, 0xf4, 0x61, 0xbd, 0xff,
 	0xff, 0x94, 0x43, 0x05, 0x00, 0x91, 0xe8, 0xff, 0xff, 0xff, 0xf9, 0x03,
 	0xc0, 0x04, 0x02, 0x60, 0x54
 };
 static const uint8_t udl_reg_vals_1280x1024_60[UDL_MODE_SIZE] = {	/* 108.00 MHz 60.0 Hz
 									 * VESA std */
 	0x00, 0x98, 0xf8, 0x0d, 0x57, 0x2a, 0x55, 0x4d, 0x54, 0xca, 0x0d, 0xff,
 	0xff, 0x94, 0x43, 0x05, 0x00, 0x9a, 0xa8, 0xff, 0xff, 0xff, 0xf9, 0x04,
 	0x00, 0x04, 0x02, 0x60, 0x54
 };
 static const uint8_t udl_reg_vals_1280x1024_75[UDL_MODE_SIZE] = {	/* 135.00 MHz 75.0 Hz
 									 * VESA std */
 	0x00, 0xce, 0x12, 0x3f, 0x9f, 0x2a, 0x55, 0x4d, 0x54, 0xca, 0x0d, 0xff,
 	0xff, 0x32, 0x60, 0x05, 0x00, 0x9a, 0xa8, 0xff, 0xff, 0xff, 0xf9, 0x04,
 	0x00, 0x04, 0x02, 0x78, 0x69
 };
 static const uint8_t udl_reg_vals_1366x768_60[UDL_MODE_SIZE] = {	/* 90 MHz 60.0 Hz */
 	0x01, 0x19, 0x1e, 0x1f, 0xb0, 0x93, 0x60, 0x40, 0x7b, 0x36, 0xe8, 0x27,
 	0x32, 0xff, 0xff, 0x05, 0x56, 0x03, 0xd9, 0xff, 0xff, 0xfc, 0xa7, 0x03,
 	0x00, 0x04, 0x02, 0x9a, 0x42
 };
 static const uint8_t udl_reg_vals_1440x900_60[UDL_MODE_SIZE] = {	/* 106.47 MHz 59.9 Hz */
 	0x00, 0x24, 0xce, 0xe7, 0x72, 0x36, 0x0a, 0x86, 0xca, 0x1c, 0x10, 0xff,
 	0xff, 0x60, 0x3a, 0x05, 0xa0, 0x0d, 0x94, 0xff, 0xff, 0xff, 0xf9, 0x03,
 	0x84, 0x04, 0x02, 0x2e, 0x53
 };
 static const uint8_t udl_reg_vals_1440x900_59[UDL_MODE_SIZE] = {	/* 106.50 MHz 59.8 Hz */
 	0x00, 0x24, 0xce, 0xe7, 0x72, 0xd8, 0x2a, 0x1b, 0x28, 0x1c, 0x10, 0xff,
 	0xff, 0x60, 0x3a, 0x05, 0xa0, 0x36, 0x50, 0xff, 0xff, 0xff, 0xf9, 0x03,
 	0x84, 0x04, 0x02, 0x34, 0x53
 };
 static const uint8_t udl_reg_vals_1440x900_75[UDL_MODE_SIZE] = {	/* 136.49 MHz 75.0 Hz */
 	0x00, 0x73, 0xa6, 0x14, 0xea, 0x0a, 0x95, 0xca, 0x10, 0x7f, 0x46, 0xff,
 	0xff, 0x60, 0x3a, 0x05, 0xa0, 0x94, 0x20, 0xff, 0xff, 0xff, 0xf9, 0x03,
 	0x84, 0x04, 0x02, 0xa2, 0x6a
 };
 static const uint8_t udl_reg_vals_1680x1050_60[UDL_MODE_SIZE] = {	/* 147.14 MHz 60.0 Hz */
 	0x00, 0x53, 0x43, 0xa6, 0x71, 0xc1, 0x52, 0xd9, 0x29, 0x69, 0x9f, 0xff,
 	0xff, 0xd7, 0xee, 0x06, 0x90, 0xb2, 0x53, 0xff, 0xff, 0xff, 0xf9, 0x04,
 	0x1a, 0x04, 0x02, 0xf4, 0x72
 };
 static const uint8_t udl_reg_vals_1600x1200_60[UDL_MODE_SIZE] = {	/* 162.00 MHz 60.0 Hz
 									 * VESA std */
 	0x00, 0xcf, 0xa4, 0x3c, 0x4e, 0x55, 0x73, 0x71, 0x2b, 0x71, 0x52, 0xff,
 	0xff, 0xee, 0xca, 0x06, 0x40, 0xe2, 0x57, 0xff, 0xff, 0xff, 0xf9, 0x04,
 	0xb0, 0x04, 0x02, 0x90, 0x7e
 };
 static const uint8_t udl_reg_vals_1920x1080_60[UDL_MODE_SIZE] = {	/* 138.50 MHz 59.9 Hz */
 	0x00, 0x73, 0xa6, 0x28, 0xb3, 0x54, 0xaa, 0x41, 0x5d, 0x0d, 0x9f, 0x32,
 	0x60, 0xff, 0xff, 0x07, 0x80, 0x0a, 0xea, 0xff, 0xf9, 0xff, 0xff, 0x04,
 	0x38, 0x04, 0x02, 0xe0, 0x7c
 };
 
 struct udl_mode {
 	uint16_t hdisplay;
 	uint16_t vdisplay;
 	uint8_t	hz;
 	uint16_t chip;
 	uint32_t clock;
 	const uint8_t *mode;
 };
 
 static const struct udl_mode udl_modes[UDL_MAX_MODES] = {
 	{640, 480, 60, DLALL, 2520, udl_reg_vals_640x480_60},
 	{640, 480, 67, DLALL, 3025, udl_reg_vals_640x480_67},
 	{640, 480, 72, DLALL, 3150, udl_reg_vals_640x480_72},
 	{640, 480, 75, DLALL, 3150, udl_reg_vals_640x480_75},
 	{800, 480, 59, DLALL, 5000, udl_reg_vals_800x480_61},
 	{800, 480, 61, DLALL, 3300, udl_reg_vals_800x480_61},
 	{800, 600, 56, DLALL, 3600, udl_reg_vals_800x600_56},
 	{800, 600, 60, DLALL, 4000, udl_reg_vals_800x600_60},
 	{800, 600, 72, DLALL, 5000, udl_reg_vals_800x600_72},
 	{800, 600, 74, DLALL, 5000, udl_reg_vals_800x600_74},
 	{800, 600, 75, DLALL, 4950, udl_reg_vals_800x600_75},
 	{1024, 768, 60, DLALL, 6500, udl_reg_vals_1024x768_60},
 	{1024, 768, 70, DLALL, 7500, udl_reg_vals_1024x768_70},
 	{1024, 768, 75, DLALL, 7850, udl_reg_vals_1024x768_75},
 	{1280, 800, 60, DLALL, 8346, udl_reg_vals_1280x800_60},
 	{1280, 960, 60, DLALL, 10800, udl_reg_vals_1280x960_60},
 	{1280, 1024, 60, DLALL, 10800, udl_reg_vals_1280x1024_60},
 	{1280, 1024, 75, DLALL, 13500, udl_reg_vals_1280x1024_75},
 	{1366, 768, 60, DLALL, 9000, udl_reg_vals_1366x768_60},
 	{1440, 900, 59, DL125, 10650, udl_reg_vals_1440x900_59},
 	{1440, 900, 60, DL125, 10647, udl_reg_vals_1440x900_60},
 	{1440, 900, 75, DL125, 13649, udl_reg_vals_1440x900_75},
 	{1680, 1050, 60, DL160, 14714, udl_reg_vals_1680x1050_60},
 	{1600, 1200, 60, DL160, 16200, udl_reg_vals_1600x1200_60},
 	{1920, 1080, 60, DL165, 13850, udl_reg_vals_1920x1080_60}
 };
 
 /*
  * Encryption.
  */
 static const uint8_t udl_null_key_1[] = {
 	0x57, 0xcd, 0xdc, 0xa7, 0x1c, 0x88, 0x5e, 0x15, 0x60, 0xfe, 0xc6, 0x97,
 	0x16, 0x3d, 0x47, 0xf2
 };
 
 #endif					/* _UDL_H_ */
Index: user/ngie/more-tests/sys/fs/ext2fs/ext2_hash.c
===================================================================
--- user/ngie/more-tests/sys/fs/ext2fs/ext2_hash.c	(revision 281675)
+++ user/ngie/more-tests/sys/fs/ext2fs/ext2_hash.c	(nonexistent)
@@ -1,316 +0,0 @@
-/*-
- * Copyright (c) 2010, 2013 Zheng Liu <lz@freebsd.org>
- * Copyright (c) 2012, Vyacheslav Matyushin
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-/*
- * The following notice applies to the code in ext2_half_md4():
- *
- * Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD4 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD4 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/conf.h>
-#include <sys/vnode.h>
-#include <sys/stat.h>
-#include <sys/mount.h>
-
-#include <fs/ext2fs/htree.h>
-#include <fs/ext2fs/inode.h>
-#include <fs/ext2fs/ext2_mount.h>
-#include <fs/ext2fs/ext2_extern.h>
-
-/* F, G, and H are MD4 functions */
-#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
-#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z)))
-#define H(x, y, z) ((x) ^ (y) ^ (z))
-
-/* ROTATE_LEFT rotates x left n bits */
-#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
-
-/*
- * FF, GG, and HH are transformations for rounds 1, 2, and 3.
- * Rotation is separated from addition to prevent recomputation.
- */
-#define FF(a, b, c, d, x, s) { \
-	(a) += F ((b), (c), (d)) + (x); \
-	(a) = ROTATE_LEFT ((a), (s)); \
-}
-
-#define GG(a, b, c, d, x, s) { \
-	(a) += G ((b), (c), (d)) + (x) + (uint32_t)0x5A827999; \
-	(a) = ROTATE_LEFT ((a), (s)); \
-}
-
-#define HH(a, b, c, d, x, s) { \
-	(a) += H ((b), (c), (d)) + (x) + (uint32_t)0x6ED9EBA1; \
-	(a) = ROTATE_LEFT ((a), (s)); \
-}
-
-/*
- * MD4 basic transformation.  It transforms state based on block.
- *
- * This is a half md4 algorithm since Linux uses this algorithm for dir
- * index.  This function is derived from the RSA Data Security, Inc. MD4
- * Message-Digest Algorithm and was modified as necessary.
- *
- * The return value of this function is uint32_t in Linux, but actually we don't
- * need to check this value, so in our version this function doesn't return any
- * value.
- */
-static void
-ext2_half_md4(uint32_t hash[4], uint32_t data[8])
-{
-	uint32_t a = hash[0], b = hash[1], c = hash[2], d = hash[3];
-
-	/* Round 1 */
-	FF(a, b, c, d, data[0],  3);
-	FF(d, a, b, c, data[1],  7);
-	FF(c, d, a, b, data[2], 11);
-	FF(b, c, d, a, data[3], 19);
-	FF(a, b, c, d, data[4],  3);
-	FF(d, a, b, c, data[5],  7);
-	FF(c, d, a, b, data[6], 11);
-	FF(b, c, d, a, data[7], 19);
-
-	/* Round 2 */
-	GG(a, b, c, d, data[1],  3);
-	GG(d, a, b, c, data[3],  5);
-	GG(c, d, a, b, data[5],  9);
-	GG(b, c, d, a, data[7], 13);
-	GG(a, b, c, d, data[0],  3);
-	GG(d, a, b, c, data[2],  5);
-	GG(c, d, a, b, data[4],  9);
-	GG(b, c, d, a, data[6], 13);
-
-	/* Round 3 */
-	HH(a, b, c, d, data[3],  3);
-	HH(d, a, b, c, data[7],  9);
-	HH(c, d, a, b, data[2], 11);
-	HH(b, c, d, a, data[6], 15);
-	HH(a, b, c, d, data[1],  3);
-	HH(d, a, b, c, data[5],  9);
-	HH(c, d, a, b, data[0], 11);
-	HH(b, c, d, a, data[4], 15);
-
-	hash[0] += a;
-	hash[1] += b;
-	hash[2] += c;
-	hash[3] += d;
-}
-
-/*
- * Tiny Encryption Algorithm.
- */
-static void
-ext2_tea(uint32_t hash[4], uint32_t data[8])
-{
-	uint32_t tea_delta = 0x9E3779B9;
-	uint32_t sum;
-	uint32_t x = hash[0], y = hash[1];
-	int n = 16;
-	int i = 1;
-
-	while (n-- > 0) {
-		sum = i * tea_delta;
-		x += ((y << 4) + data[0]) ^ (y + sum) ^ ((y >> 5) + data[1]);
-		y += ((x << 4) + data[2]) ^ (x + sum) ^ ((x >> 5) + data[3]);
-		i++;
-	}
-
-	hash[0] += x;
-	hash[1] += y;
-}
-
-static uint32_t
-ext2_legacy_hash(const char *name, int len, int unsigned_char)
-{
-	uint32_t h0, h1 = 0x12A3FE2D, h2 = 0x37ABE8F9;
-	uint32_t multi = 0x6D22F5;
-	const unsigned char *uname = (const unsigned char *)name;
-	const signed char *sname = (const signed char *)name;
-	int val, i;
-
-	for (i = 0; i < len; i++) {
-		if (unsigned_char)
-			val = (u_int)*uname++;
-		else
-			val = (int)*sname++;
-
-		h0 = h2 + (h1 ^ (val * multi));
-		if (h0 & 0x80000000)
-			h0 -= 0x7FFFFFFF;
-		h2 = h1;
-		h1 = h0;
-	}
-
-	return (h1 << 1);
-}
-
-static void
-ext2_prep_hashbuf(const char *src, int slen, uint32_t *dst, int dlen,
-	     int unsigned_char)
-{
-	uint32_t padding = slen | (slen << 8) | (slen << 16) | (slen << 24);
-	uint32_t buf_val;
-	const unsigned char *ubuf = (const unsigned char *)src;
-	const signed char *sbuf = (const signed char *)src;
-	int len, i;
-	int buf_byte;
-
-	if (slen > dlen)
-		len = dlen;
-	else
-		len = slen;
-
-	buf_val = padding;
-
-	for (i = 0; i < len; i++) {
-		if (unsigned_char)
-			buf_byte = (u_int)ubuf[i];
-		else
-			buf_byte = (int)sbuf[i];
-
-		if ((i % 4) == 0)
-			buf_val = padding;
-
-		buf_val <<= 8;
-		buf_val += buf_byte;
-
-		if ((i % 4) == 3) {
-			*dst++ = buf_val;
-			dlen -= sizeof(uint32_t);
-			buf_val = padding;
-		}
-	}
-
-	dlen -= sizeof(uint32_t);
-	if (dlen >= 0)
-		*dst++ = buf_val;
-
-	dlen -= sizeof(uint32_t);
-	while (dlen >= 0) {
-		*dst++ = padding;
-		dlen -= sizeof(uint32_t);
-	}
-}
-
-int
-ext2_htree_hash(const char *name, int len,
-		uint32_t *hash_seed, int hash_version,
-		uint32_t *hash_major, uint32_t *hash_minor)
-{
-	uint32_t hash[4];
-	uint32_t data[8];
-	uint32_t major = 0, minor = 0;
-	int unsigned_char = 0;
-
-	if (!name || !hash_major)
-		return (-1);
-
-	if (len < 1 || len > 255)
-		goto error;
-
-	hash[0] = 0x67452301;
-	hash[1] = 0xEFCDAB89;
-	hash[2] = 0x98BADCFE;
-	hash[3] = 0x10325476;
-
-	if (hash_seed)
-		memcpy(hash, hash_seed, sizeof(hash));
-
-	switch (hash_version) {
-	case EXT2_HTREE_TEA_UNSIGNED:
-		unsigned_char = 1;
-		/* FALLTHROUGH */
-	case EXT2_HTREE_TEA:
-		while (len > 0) {
-			ext2_prep_hashbuf(name, len, data, 16, unsigned_char);
-			ext2_tea(hash, data);
-			len -= 16;
-			name += 16;
-		}
-		major = hash[0];
-		minor = hash[1];
-		break;
-	case EXT2_HTREE_LEGACY_UNSIGNED:
-		unsigned_char = 1;
-		/* FALLTHROUGH */
-	case EXT2_HTREE_LEGACY:
-		major = ext2_legacy_hash(name, len, unsigned_char);
-		break;
-	case EXT2_HTREE_HALF_MD4_UNSIGNED:
-		unsigned_char = 1;
-		/* FALLTHROUGH */
-	case EXT2_HTREE_HALF_MD4:
-		while (len > 0) {
-			ext2_prep_hashbuf(name, len, data, 32, unsigned_char);
-			ext2_half_md4(hash, data);
-			len -= 32;
-			name += 32;
-		}
-		major = hash[1];
-		minor = hash[2];
-		break;
-	default:
-		goto error;
-	}
-
-	major &= ~1;
-	if (major == (EXT2_HTREE_EOF << 1))
-		major = (EXT2_HTREE_EOF - 1) << 1;
-	*hash_major = major;
-	if (hash_minor)
-		*hash_minor = minor;
-
-	return (0);
-
-error:
-	*hash_major = 0;
-	if (hash_minor)
-		*hash_minor = 0;
-	return (-1);
-}

Property changes on: user/ngie/more-tests/sys/fs/ext2fs/ext2_hash.c
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: user/ngie/more-tests/sys/fs/ext2fs/ext2_htree.c
===================================================================
--- user/ngie/more-tests/sys/fs/ext2fs/ext2_htree.c	(revision 281675)
+++ user/ngie/more-tests/sys/fs/ext2fs/ext2_htree.c	(nonexistent)
@@ -1,899 +0,0 @@
-/*-
- * Copyright (c) 2010, 2012 Zheng Liu <lz@freebsd.org>
- * Copyright (c) 2012, Vyacheslav Matyushin
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <sys/param.h>
-#include <sys/endian.h>
-#include <sys/systm.h>
-#include <sys/namei.h>
-#include <sys/bio.h>
-#include <sys/buf.h>
-#include <sys/endian.h>
-#include <sys/mount.h>
-#include <sys/vnode.h>
-#include <sys/malloc.h>
-#include <sys/dirent.h>
-#include <sys/sysctl.h>
-
-#include <ufs/ufs/dir.h>
-
-#include <fs/ext2fs/inode.h>
-#include <fs/ext2fs/ext2_mount.h>
-#include <fs/ext2fs/ext2fs.h>
-#include <fs/ext2fs/fs.h>
-#include <fs/ext2fs/ext2_extern.h>
-#include <fs/ext2fs/ext2_dinode.h>
-#include <fs/ext2fs/ext2_dir.h>
-#include <fs/ext2fs/htree.h>
-
-static void	ext2_append_entry(char *block, uint32_t blksize,
-		    struct ext2fs_direct_2 *last_entry,
-		    struct ext2fs_direct_2 *new_entry);
-static int	ext2_htree_append_block(struct vnode *vp, char *data,
-		    struct componentname *cnp, uint32_t blksize);
-static int	ext2_htree_check_next(struct inode *ip, uint32_t hash,
-		    const char *name, struct ext2fs_htree_lookup_info *info);
-static int	ext2_htree_cmp_sort_entry(const void *e1, const void *e2);
-static int	ext2_htree_find_leaf(struct inode *ip, const char *name,
-		    int namelen, uint32_t *hash, uint8_t *hash_version,
-		    struct ext2fs_htree_lookup_info *info);
-static uint32_t ext2_htree_get_block(struct ext2fs_htree_entry *ep);
-static uint16_t	ext2_htree_get_count(struct ext2fs_htree_entry *ep);
-static uint32_t ext2_htree_get_hash(struct ext2fs_htree_entry *ep);
-static uint16_t	ext2_htree_get_limit(struct ext2fs_htree_entry *ep);
-static void	ext2_htree_insert_entry_to_level(struct ext2fs_htree_lookup_level *level,
-		    uint32_t hash, uint32_t blk);
-static void	ext2_htree_insert_entry(struct ext2fs_htree_lookup_info *info,
-		    uint32_t hash, uint32_t blk);
-static uint32_t	ext2_htree_node_limit(struct inode *ip);
-static void	ext2_htree_set_block(struct ext2fs_htree_entry *ep,
-		    uint32_t blk);
-static void	ext2_htree_set_count(struct ext2fs_htree_entry *ep,
-		    uint16_t cnt);
-static void	ext2_htree_set_hash(struct ext2fs_htree_entry *ep,
-		    uint32_t hash);
-static void	ext2_htree_set_limit(struct ext2fs_htree_entry *ep,
-		    uint16_t limit);
-static int	ext2_htree_split_dirblock(char *block1, char *block2,
-		    uint32_t blksize, uint32_t *hash_seed, uint8_t hash_version,
-		    uint32_t *split_hash, struct  ext2fs_direct_2 *entry);
-static void	ext2_htree_release(struct ext2fs_htree_lookup_info *info);
-static uint32_t	ext2_htree_root_limit(struct inode *ip, int len);
-static int	ext2_htree_writebuf(struct ext2fs_htree_lookup_info *info);
-
-int
-ext2_htree_has_idx(struct inode *ip)
-{
-	if (EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_DIRHASHINDEX) &&
-	    ip->i_flag & IN_E4INDEX)
-		return (1);
-	else
-		return (0);
-}
-
-static int
-ext2_htree_check_next(struct inode *ip, uint32_t hash, const char *name,
-		struct ext2fs_htree_lookup_info *info)
-{
-	struct vnode *vp = ITOV(ip);
-	struct ext2fs_htree_lookup_level *level;
-	struct buf *bp;
-	uint32_t next_hash;
-	int idx = info->h_levels_num - 1;
-	int levels = 0;
-
-	do {
-		level = &info->h_levels[idx];
-		level->h_entry++;
-		if (level->h_entry < level->h_entries +
-		    ext2_htree_get_count(level->h_entries))
-			break;
-		if (idx == 0)
-			return (0);
-		idx--;
-		levels++;
-	} while (1);
-
-	next_hash = ext2_htree_get_hash(level->h_entry);
-	if ((hash & 1) == 0) {
-		if (hash != (next_hash & ~1))
-			return (0);
-	}
-
-	while (levels > 0) {
-		levels--;
-		if (ext2_blkatoff(vp, ext2_htree_get_block(level->h_entry) *
-		    ip->i_e2fs->e2fs_bsize, NULL, &bp) != 0)
-			return (0);
-		level = &info->h_levels[idx + 1];
-		brelse(level->h_bp);
-		level->h_bp = bp;
-		level->h_entry = level->h_entries =
-		    ((struct ext2fs_htree_node *)bp->b_data)->h_entries;
-	}
-
-	return (1);
-}
-
-static uint32_t
-ext2_htree_get_block(struct ext2fs_htree_entry *ep)
-{
-	return (ep->h_blk & 0x00FFFFFF);
-}
-
-static void
-ext2_htree_set_block(struct ext2fs_htree_entry *ep, uint32_t blk)
-{
-	ep->h_blk = blk;
-}
-
-static uint16_t
-ext2_htree_get_count(struct ext2fs_htree_entry *ep)
-{
-	return (((struct ext2fs_htree_count *)(ep))->h_entries_num);
-}
-
-static void
-ext2_htree_set_count(struct ext2fs_htree_entry *ep, uint16_t cnt)
-{
-	((struct ext2fs_htree_count *)(ep))->h_entries_num = cnt;
-}
-
-static uint32_t
-ext2_htree_get_hash(struct ext2fs_htree_entry *ep)
-{
-	return (ep->h_hash);
-}
-
-static uint16_t
-ext2_htree_get_limit(struct ext2fs_htree_entry *ep)
-{
-	return (((struct ext2fs_htree_count *)(ep))->h_entries_max);
-}
-
-static void
-ext2_htree_set_hash(struct ext2fs_htree_entry *ep, uint32_t hash)
-{
-	ep->h_hash = hash;
-}
-
-static void
-ext2_htree_set_limit(struct ext2fs_htree_entry *ep, uint16_t limit)
-{
-	((struct ext2fs_htree_count *)(ep))->h_entries_max = limit;
-}
-
-static void
-ext2_htree_release(struct ext2fs_htree_lookup_info *info)
-{
-	int i;
-
-	for (i = 0; i < info->h_levels_num; i++) {
-		struct buf *bp = info->h_levels[i].h_bp;
-		if (bp != NULL)
-			brelse(bp);
-	}
-}
-
-static uint32_t
-ext2_htree_root_limit(struct inode *ip, int len)
-{
-	uint32_t space;
-
-	space = ip->i_e2fs->e2fs_bsize - EXT2_DIR_REC_LEN(1) -
-	    EXT2_DIR_REC_LEN(2) - len;
-	return (space / sizeof(struct ext2fs_htree_entry));
-}
-
-static uint32_t
-ext2_htree_node_limit(struct inode *ip)
-{
-	struct m_ext2fs *fs;
-	uint32_t space;
-
-	fs = ip->i_e2fs;
-	space = fs->e2fs_bsize - EXT2_DIR_REC_LEN(0);
-
-	return (space / sizeof(struct ext2fs_htree_entry));
-}
-
-static int
-ext2_htree_find_leaf(struct inode *ip, const char *name, int namelen,
-		     uint32_t *hash, uint8_t *hash_ver,
-		     struct ext2fs_htree_lookup_info *info)
-{
-	struct vnode *vp;
-	struct ext2fs *fs;
-	struct m_ext2fs *m_fs;
-	struct buf *bp = NULL;
-	struct ext2fs_htree_root *rootp;
-	struct ext2fs_htree_entry *entp, *start, *end, *middle, *found;
-	struct ext2fs_htree_lookup_level *level_info;
-	uint32_t hash_major = 0, hash_minor = 0;
-	uint32_t levels, cnt;
-	uint8_t hash_version;
-
-	if (name == NULL || info == NULL)
-		return (-1);
-
-	vp = ITOV(ip);
-	fs = ip->i_e2fs->e2fs;
-	m_fs = ip->i_e2fs;
-
-	if (ext2_blkatoff(vp, 0, NULL, &bp) != 0)
-		return (-1);
-
-	info->h_levels_num = 1;
-	info->h_levels[0].h_bp = bp;
-	rootp = (struct ext2fs_htree_root *)bp->b_data;
-	if (rootp->h_info.h_hash_version != EXT2_HTREE_LEGACY &&
-	    rootp->h_info.h_hash_version != EXT2_HTREE_HALF_MD4 &&
-	    rootp->h_info.h_hash_version != EXT2_HTREE_TEA)
-		goto error;
-
-	hash_version = rootp->h_info.h_hash_version;
-	if (hash_version <= EXT2_HTREE_TEA)
-		hash_version += m_fs->e2fs_uhash;
-	*hash_ver = hash_version;
-
-	ext2_htree_hash(name, namelen, fs->e3fs_hash_seed,
-	    hash_version, &hash_major, &hash_minor);
-	*hash = hash_major;
-
-	if ((levels = rootp->h_info.h_ind_levels) > 1)
-		goto error;
-
-	entp = (struct ext2fs_htree_entry *)(((char *)&rootp->h_info) +
-	    rootp->h_info.h_info_len);
-
-	if (ext2_htree_get_limit(entp) !=
-	    ext2_htree_root_limit(ip, rootp->h_info.h_info_len))
-		goto error;
-
-	while (1) {
-		cnt = ext2_htree_get_count(entp);
-		if (cnt == 0 || cnt > ext2_htree_get_limit(entp))
-			goto error;
-
-		start = entp + 1;
-		end = entp + cnt - 1;
-		while (start <= end) {
-			middle = start + (end - start) / 2;
-			if (ext2_htree_get_hash(middle) > hash_major)
-				end = middle - 1;
-			else
-				start = middle + 1;
-		}
-		found = start - 1;
-
-		level_info = &(info->h_levels[info->h_levels_num - 1]);
-		level_info->h_bp = bp;
-		level_info->h_entries = entp;
-		level_info->h_entry = found;
-		if (levels == 0)
-			return (0);
-		levels--;
-		if (ext2_blkatoff(vp,
-		    ext2_htree_get_block(found) * m_fs->e2fs_bsize,
-		    NULL, &bp) != 0)
-			goto error;
-		entp = ((struct ext2fs_htree_node *)bp->b_data)->h_entries;
-		info->h_levels_num++;
-		info->h_levels[info->h_levels_num - 1].h_bp = bp;
-	}
-
-error:
-	ext2_htree_release(info);
-	return (-1);
-}
-
-/*
- * Try to lookup a directory entry in HTree index
- */
-int
-ext2_htree_lookup(struct inode *ip, const char *name, int namelen,
-		  struct buf **bpp, int *entryoffp, doff_t *offp,
-		  doff_t *prevoffp, doff_t *endusefulp,
-		  struct ext2fs_searchslot *ss)
-{
-	struct vnode *vp;
-	struct ext2fs_htree_lookup_info info;
-	struct ext2fs_htree_entry *leaf_node;
-	struct m_ext2fs *m_fs;
-	struct buf *bp;
-	uint32_t blk;
-	uint32_t dirhash;
-	uint32_t bsize;
-	uint8_t hash_version;
-	int search_next;
-	int found = 0;
-
-	m_fs = ip->i_e2fs;
-	bsize = m_fs->e2fs_bsize;
-	vp = ITOV(ip);
-
-	/* TODO: print error msg because we don't lookup '.' and '..' */
-
-	memset(&info, 0, sizeof(info));
-	if (ext2_htree_find_leaf(ip, name, namelen, &dirhash,
-	    &hash_version, &info))
-		return (-1);
-
-	do {
-		leaf_node = info.h_levels[info.h_levels_num - 1].h_entry;
-		blk = ext2_htree_get_block(leaf_node);
-		if (ext2_blkatoff(vp, blk * bsize, NULL, &bp) != 0) {
-			ext2_htree_release(&info);
-			return (-1);
-		}
-
-		*offp = blk * bsize;
-		*entryoffp = 0;
-		*prevoffp = blk * bsize;
-		*endusefulp = blk * bsize;
-
-		if (ss->slotstatus == NONE) {
-			ss->slotoffset = -1;
-			ss->slotfreespace = 0;
-		}
-
-		if (ext2_search_dirblock(ip, bp->b_data, &found,
-		    name, namelen, entryoffp, offp, prevoffp,
-		    endusefulp, ss) != 0) {
-			brelse(bp);
-			ext2_htree_release(&info);
-			return (-1);
-		}
-
-		if (found) {
-			*bpp = bp;
-			ext2_htree_release(&info);
-			return (0);
-		}
-
-		brelse(bp);
-		search_next = ext2_htree_check_next(ip, dirhash, name, &info);
-	} while (search_next);
-
-	ext2_htree_release(&info);
-	return (ENOENT);
-}
-
-static int
-ext2_htree_append_block(struct vnode *vp, char *data,
-			struct componentname *cnp, uint32_t blksize)
-{
-	struct iovec aiov;
-	struct uio auio;
-	struct inode *dp = VTOI(vp);
-	uint64_t cursize, newsize;
-	int error;
-
-	cursize = roundup(dp->i_size, blksize);
-	newsize = cursize + blksize;
-
-	auio.uio_offset = cursize;
-	auio.uio_resid = blksize;
-	aiov.iov_len = blksize;
-	aiov.iov_base = data;
-	auio.uio_iov = &aiov;
-	auio.uio_iovcnt = 1;
-	auio.uio_rw = UIO_WRITE;
-	auio.uio_segflg = UIO_SYSSPACE;
-	error = VOP_WRITE(vp, &auio, IO_SYNC, cnp->cn_cred);
-	if (!error)
-		dp->i_size = newsize;
-
-	return (error);
-}
-
-static int
-ext2_htree_writebuf(struct ext2fs_htree_lookup_info *info)
-{
-	int i, error;
-
-	for (i = 0; i < info->h_levels_num; i++) {
-		struct buf *bp = info->h_levels[i].h_bp;
-		error = bwrite(bp);
-		if (error)
-			return (error);
-	}
-
-	return (0);
-}
-
-static void
-ext2_htree_insert_entry_to_level(struct ext2fs_htree_lookup_level *level,
-				 uint32_t hash, uint32_t blk)
-{
-	struct ext2fs_htree_entry *target;
-	int entries_num;
-
-	target = level->h_entry + 1;
-	entries_num = ext2_htree_get_count(level->h_entries);
-
-	memmove(target + 1, target, (char *)(level->h_entries + entries_num) -
-	    (char *)target);
-	ext2_htree_set_block(target, blk);
-	ext2_htree_set_hash(target, hash);
-	ext2_htree_set_count(level->h_entries, entries_num + 1);
-}
-
-/*
- * Insert an index entry to the index node.
- */
-static void
-ext2_htree_insert_entry(struct ext2fs_htree_lookup_info *info,
-			uint32_t hash, uint32_t blk)
-{
-	struct ext2fs_htree_lookup_level *level;
-
-	level = &info->h_levels[info->h_levels_num - 1];
-	ext2_htree_insert_entry_to_level(level, hash, blk);
-}
-
-/*
- * Compare two entry sort descriptors by name hash value.
- * This is used together with qsort.
- */
-static int
-ext2_htree_cmp_sort_entry(const void *e1, const void *e2)
-{
-	const struct ext2fs_htree_sort_entry *entry1, *entry2;
-
-	entry1 = (const struct ext2fs_htree_sort_entry *)e1;
-	entry2 = (const struct ext2fs_htree_sort_entry *)e2;
-
-	if (entry1->h_hash < entry2->h_hash)
-		return (-1);
-	if (entry1->h_hash > entry2->h_hash)
-		return (1);
-	return (0);
-}
-
-/*
- * Append an entry to the end of the directory block.
- */
-static void
-ext2_append_entry(char *block, uint32_t blksize,
-		  struct ext2fs_direct_2 *last_entry,
-		  struct ext2fs_direct_2 *new_entry)
-{
-	uint16_t entry_len;
-
-	entry_len = EXT2_DIR_REC_LEN(last_entry->e2d_namlen);
-	last_entry->e2d_reclen = entry_len;
-	last_entry = (struct ext2fs_direct_2 *)((char *)last_entry + entry_len);
-	new_entry->e2d_reclen = block + blksize - (char *)last_entry;
-	memcpy(last_entry, new_entry, EXT2_DIR_REC_LEN(new_entry->e2d_namlen));
-}
-
-/*
- * Move half of entries from the old directory block to the new one.
- */
-static int
-ext2_htree_split_dirblock(char *block1, char *block2, uint32_t blksize,
-			  uint32_t *hash_seed, uint8_t hash_version,
-			  uint32_t *split_hash, struct ext2fs_direct_2 *entry)
-{
-	int entry_cnt = 0;
-	int size = 0;
-	int i, k;
-	uint32_t offset;
-	uint16_t entry_len = 0;
-	uint32_t entry_hash;
-	struct ext2fs_direct_2 *ep, *last;
-	char *dest;
-	struct ext2fs_htree_sort_entry *sort_info;
-
-	ep = (struct ext2fs_direct_2 *)block1;
-	dest = block2;
-	sort_info = (struct ext2fs_htree_sort_entry *)
-	    ((char *)block2 + blksize);
-
-	/*
-	 * Calculate name hash value for the entry which is to be added.
-	 */
-	ext2_htree_hash(entry->e2d_name, entry->e2d_namlen, hash_seed,
-	    hash_version, &entry_hash, NULL);
-
-	/*
-	 * Fill in directory entry sort descriptors.
-	 */
-	while ((char *)ep < block1 + blksize) {
-		if (ep->e2d_ino && ep->e2d_namlen) {
-			entry_cnt++;
-			sort_info--;
-			sort_info->h_size = ep->e2d_reclen;
-			sort_info->h_offset = (char *)ep - block1;
-			ext2_htree_hash(ep->e2d_name, ep->e2d_namlen,
-			    hash_seed, hash_version,
-			    &sort_info->h_hash, NULL);
-		}
-		ep = (struct ext2fs_direct_2 *)
-		    ((char *)ep + ep->e2d_reclen);
-	}
-
-	/*
-	 * Sort directory entry descriptors by name hash value.
-	 */
-	qsort(sort_info, entry_cnt, sizeof(struct ext2fs_htree_sort_entry),
-	    ext2_htree_cmp_sort_entry);
-
-	/*
-	 * Count the number of entries to move to directory block 2.
-	 */
-	for (i = entry_cnt - 1; i >= 0; i--) {
-		if (sort_info[i].h_size + size > blksize / 2)
-			break;
-		size += sort_info[i].h_size;
-	}
-
-	*split_hash = sort_info[i + 1].h_hash;
-
-	/*
-	 * Set collision bit.
-	 */
-	if (*split_hash == sort_info[i].h_hash)
-		*split_hash += 1;
-
-	/*
-	 * Move half of directory entries from block 1 to block 2.
-	 */
-	for (k = i + 1; k < entry_cnt; k++) {
-		ep = (struct ext2fs_direct_2 *)((char *)block1 +
-		    sort_info[k].h_offset);
-		entry_len = EXT2_DIR_REC_LEN(ep->e2d_namlen);
-		memcpy(dest, ep, entry_len);
-		((struct ext2fs_direct_2 *)dest)->e2d_reclen = entry_len;
-		/* Mark directory entry as unused. */
-		ep->e2d_ino = 0;
-		dest += entry_len;
-	}
-	dest -= entry_len;
-
-	/* Shrink directory entries in block 1. */
-	last = (struct ext2fs_direct_2 *)block1;
-	entry_len = EXT2_DIR_REC_LEN(last->e2d_namlen);
-	for (offset = last->e2d_reclen; offset < blksize; ) {
-		ep = (struct ext2fs_direct_2 *)(block1 + offset);
-		offset += ep->e2d_reclen;
-		if (last->e2d_ino) {
-			/* Trim the existing slot */
-			last->e2d_reclen = entry_len;
-			last = (struct ext2fs_direct_2 *)
-			   ((char *)last + entry_len);
-		}
-		entry_len = EXT2_DIR_REC_LEN(ep->e2d_namlen);
-		memcpy((void *)last, (void *)ep, entry_len);
-	}
-
-	if (entry_hash >= *split_hash) {
-		/* Add entry to block 2. */
-		ext2_append_entry(block2, blksize,
-		    (struct ext2fs_direct_2 *)dest, entry);
-
-		/* Adjust length field of last entry of block 1. */
-		last->e2d_reclen = block1 + blksize - (char *)last;
-	} else {
-		/* Add entry to block 1. */
-		ext2_append_entry(block1, blksize, last, entry);
-
-		/* Adjust length field of last entry of block 2. */
-		((struct ext2fs_direct_2 *)dest)->e2d_reclen =
-		    block2 + blksize - dest;
-	}
-
-	return (0);
-}
-
-/*
- * Create an HTree index for a directory
- */
-int
-ext2_htree_create_index(struct vnode *vp, struct componentname *cnp,
-			struct ext2fs_direct_2 *new_entry)
-{
-	struct buf *bp = NULL;
-	struct inode *dp;
-	struct ext2fs *fs;
-	struct m_ext2fs *m_fs;
-	struct ext2fs_direct_2 *ep, *dotdot;
-	struct ext2fs_htree_root *root;
-	struct ext2fs_htree_lookup_info info;
-	uint32_t blksize, dirlen, split_hash;
-	uint8_t hash_version;
-	char *buf1 = NULL;
-	char *buf2 = NULL;
-	int error = 0;
-
-	dp = VTOI(vp);
-	fs = dp->i_e2fs->e2fs;
-	m_fs = dp->i_e2fs;
-	blksize = m_fs->e2fs_bsize;
-
-	buf1 = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
-	buf2 = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
-
-	if ((error = ext2_blkatoff(vp, 0, NULL, &bp)) != 0)
-		goto out;
-
-	root = (struct ext2fs_htree_root *)bp->b_data;
-	dotdot = (struct ext2fs_direct_2 *)((char *)&(root->h_dotdot));
-	ep = (struct ext2fs_direct_2 *)((char *)dotdot + dotdot->e2d_reclen);
-	dirlen = (char *)root + blksize - (char *)ep;
-	memcpy(buf1, ep, dirlen);
-	ep = (struct ext2fs_direct_2 *)buf1;
-	while ((char *)ep < buf1 + dirlen)
-		ep = (struct ext2fs_direct_2 *)
-		    ((char *)ep + ep->e2d_reclen);
-	ep->e2d_reclen = buf1 + blksize - (char *)ep;
-
-	dp->i_flag |= IN_E4INDEX;
-
-	/*
-	 * Initialize index root.
-	 */
-	dotdot->e2d_reclen = blksize - EXT2_DIR_REC_LEN(1);
-	memset(&root->h_info, 0, sizeof(root->h_info));
-	root->h_info.h_hash_version = fs->e3fs_def_hash_version;
-	root->h_info.h_info_len = sizeof(root->h_info);
-	ext2_htree_set_block(root->h_entries, 1);
-	ext2_htree_set_count(root->h_entries, 1);
-	ext2_htree_set_limit(root->h_entries,
-	    ext2_htree_root_limit(dp, sizeof(root->h_info)));
-
-	memset(&info, 0, sizeof(info));
-	info.h_levels_num = 1;
-	info.h_levels[0].h_entries = root->h_entries;
-	info.h_levels[0].h_entry = root->h_entries;
-
-	hash_version = root->h_info.h_hash_version;
-	if (hash_version <= EXT2_HTREE_TEA)
-		hash_version += m_fs->e2fs_uhash;
-	ext2_htree_split_dirblock(buf1, buf2, blksize, fs->e3fs_hash_seed,
-	    hash_version, &split_hash, new_entry);
-	ext2_htree_insert_entry(&info, split_hash, 2);
-
-	/*
-	 * Write directory block 0.
-	 */
-	if (DOINGASYNC(vp)) {
-		bdwrite(bp);
-		error = 0;
-	} else {
-		error = bwrite(bp);
-	}
-	dp->i_flag |= IN_CHANGE | IN_UPDATE;
-	if (error)
-		goto out;
-
-	/*
-	 * Write directory block 1.
-	 */
-	error = ext2_htree_append_block(vp, buf1, cnp, blksize);
-	if (error)
-		goto out1;
-
-	/*
-	 * Write directory block 2.
-	 */
-	error = ext2_htree_append_block(vp, buf2, cnp, blksize);
-
-	free(buf1, M_TEMP);
-	free(buf2, M_TEMP);
-	return (error);
-out:
-	if (bp != NULL)
-		brelse(bp);
-out1:
-	free(buf1, M_TEMP);
-	free(buf2, M_TEMP);
-	return (error);
-}
-
-/*
- * Add an entry to the directory using htree index.
- */
-int
-ext2_htree_add_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry,
-		     struct componentname *cnp)
-{
-	struct ext2fs_htree_entry *entries, *leaf_node;
-	struct ext2fs_htree_lookup_info info;
-	struct buf *bp = NULL;
-	struct ext2fs *fs;
-	struct m_ext2fs *m_fs;
-	struct inode *ip;
-	uint16_t ent_num;
-	uint32_t dirhash, split_hash;
-	uint32_t blksize, blknum;
-	uint64_t cursize, dirsize;
-	uint8_t hash_version;
-	char *newdirblock = NULL;
-	char *newidxblock = NULL;
-	struct ext2fs_htree_node *dst_node;
-	struct ext2fs_htree_entry *dst_entries;
-	struct ext2fs_htree_entry *root_entires;
-	struct buf *dst_bp = NULL;
-	int error, write_bp = 0, write_dst_bp = 0, write_info = 0;
-
-	ip = VTOI(dvp);
-	m_fs = ip->i_e2fs;
-	fs = m_fs->e2fs;
-	blksize = m_fs->e2fs_bsize;
-
-	if (ip->i_count != 0) 
-		return ext2_add_entry(dvp, entry);
-
-	/* Target directory block is full, split it */
-	memset(&info, 0, sizeof(info));
-	error = ext2_htree_find_leaf(ip, entry->e2d_name, entry->e2d_namlen,
-	    &dirhash, &hash_version, &info);
-	if (error)
-		return (error);
-
-	entries = info.h_levels[info.h_levels_num - 1].h_entries;
-	ent_num = ext2_htree_get_count(entries);
-	if (ent_num == ext2_htree_get_limit(entries)) {
-		/* Split the index node. */
-		root_entires = info.h_levels[0].h_entries;
-		newidxblock = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
-		dst_node = (struct ext2fs_htree_node *)newidxblock;
-		dst_entries = dst_node->h_entries;
-		memset(&dst_node->h_fake_dirent, 0,
-		    sizeof(dst_node->h_fake_dirent));
-		dst_node->h_fake_dirent.e2d_reclen = blksize;
-
-		cursize = roundup(ip->i_size, blksize);
-		dirsize = cursize + blksize;
-		blknum = dirsize / blksize - 1;
-
-		error = ext2_htree_append_block(dvp, newidxblock,
-		    cnp, blksize);
-		if (error)
-			goto finish;
-		error = ext2_blkatoff(dvp, cursize, NULL, &dst_bp);
-		if (error)
-			goto finish;
-		dst_node = (struct ext2fs_htree_node *)dst_bp->b_data;
-		dst_entries = dst_node->h_entries;
-
-		if (info.h_levels_num == 2) {
-			uint16_t src_ent_num, dst_ent_num;
-
-			if (ext2_htree_get_count(root_entires) ==
-			    ext2_htree_get_limit(root_entires)) {
-				/* Directory index is full */
-				error = EIO;
-				goto finish;
-			}
-
-			src_ent_num = ent_num / 2;
-			dst_ent_num = ent_num - src_ent_num;
-			split_hash = ext2_htree_get_hash(entries + src_ent_num);
-
-			/* Move half of index entries to the new index node */
-			memcpy(dst_entries, entries + src_ent_num,
-			    dst_ent_num * sizeof(struct ext2fs_htree_entry));
-			ext2_htree_set_count(entries, src_ent_num);
-			ext2_htree_set_count(dst_entries, dst_ent_num);
-			ext2_htree_set_limit(dst_entries,
-			    ext2_htree_node_limit(ip));
-
-			if (info.h_levels[1].h_entry >= entries + src_ent_num) {
-				struct buf *tmp = info.h_levels[1].h_bp;
-				info.h_levels[1].h_bp = dst_bp;
-				dst_bp = tmp;
-
-				info.h_levels[1].h_entry =
-				    info.h_levels[1].h_entry -
-				    (entries + src_ent_num) +
-				    dst_entries;
-				info.h_levels[1].h_entries = dst_entries;
-			}
-			ext2_htree_insert_entry_to_level(&info.h_levels[0],
-			    split_hash, blknum);
-
-			/* Write new index node to disk */
-			error = bwrite(dst_bp);
-			ip->i_flag |= IN_CHANGE | IN_UPDATE;
-			if (error)
-				goto finish;
-			write_dst_bp = 1;
-		} else {
-			/* Create second level for htree index */
-			struct ext2fs_htree_root *idx_root;
-
-			memcpy(dst_entries, entries,
-			    ent_num * sizeof(struct ext2fs_htree_entry));
-			ext2_htree_set_limit(dst_entries,
-			    ext2_htree_node_limit(ip));
-
-			idx_root = (struct ext2fs_htree_root *)
-			    info.h_levels[0].h_bp->b_data;
-			idx_root->h_info.h_ind_levels = 1;
-
-			ext2_htree_set_count(entries, 1);
-			ext2_htree_set_block(entries, blknum);
-
-			info.h_levels_num = 2;
-			info.h_levels[1].h_entries = dst_entries;
-			info.h_levels[1].h_entry = info.h_levels[0].h_entry -
-			    info.h_levels[0].h_entries + dst_entries;
-			info.h_levels[1].h_bp = dst_bp;
-		}
-	}
-
-	leaf_node = info.h_levels[info.h_levels_num - 1].h_entry;
-	blknum = ext2_htree_get_block(leaf_node);
-	error = ext2_blkatoff(dvp, blknum * blksize, NULL, &bp);
-	if (error)
-		goto finish;
-
-	/* Split target directory block */
-	newdirblock = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO);
-	ext2_htree_split_dirblock((char *)bp->b_data, newdirblock, blksize,
-	    fs->e3fs_hash_seed, hash_version, &split_hash, entry);
-	cursize = roundup(ip->i_size, blksize);
-	dirsize = cursize + blksize;
-	blknum = dirsize / blksize - 1;
-
-	/* Add index entry for the new directory block */
-	ext2_htree_insert_entry(&info, split_hash, blknum);
-
-	/* Write the new directory block to the end of the directory */
-	error = ext2_htree_append_block(dvp, newdirblock, cnp, blksize);
-	if (error)
-		goto finish;
-
-	/* Write the target directory block */
-	error = bwrite(bp);
-	ip->i_flag |= IN_CHANGE | IN_UPDATE;
-	if (error)
-		goto finish;
-	write_bp = 1;
-
-	/* Write the index block */
-	error = ext2_htree_writebuf(&info);
-	if (!error)
-		write_info = 1;
-
-finish:
-	if (dst_bp != NULL && !write_dst_bp)
-		brelse(dst_bp);
-	if (bp != NULL && !write_bp)
-		brelse(bp);
-	if (newdirblock != NULL)
-		free(newdirblock, M_TEMP);
-	if (newidxblock != NULL)
-		free(newidxblock, M_TEMP);
-	if (!write_info)
-		ext2_htree_release(&info);
-	return (error);
-}

Property changes on: user/ngie/more-tests/sys/fs/ext2fs/ext2_htree.c
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: user/ngie/more-tests/sys/fs/ext2fs/ext2_dir.h
===================================================================
--- user/ngie/more-tests/sys/fs/ext2fs/ext2_dir.h	(revision 281675)
+++ user/ngie/more-tests/sys/fs/ext2fs/ext2_dir.h	(revision 281676)
@@ -1,101 +1,86 @@
 /*-
  * Copyright (c) 2009 Aditya Sarawgi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _FS_EXT2FS_EXT2_DIR_H_
 #define	_FS_EXT2FS_EXT2_DIR_H_
 
 /*
  * Structure of a directory entry
  */
 #define	EXT2FS_MAXNAMLEN	255
 
 struct	ext2fs_direct {
 	uint32_t e2d_ino;		/* inode number of entry */
 	uint16_t e2d_reclen;		/* length of this record */
 	uint16_t e2d_namlen;		/* length of string in e2d_name */
 	char e2d_name[EXT2FS_MAXNAMLEN];/* name with length<=EXT2FS_MAXNAMLEN */
 };
-
-enum slotstatus {
-	NONE,
-	COMPACT,
-	FOUND
-};
-
-struct ext2fs_searchslot {
-	enum slotstatus slotstatus;
-	doff_t slotoffset;	/* offset of area with free space */
-	int slotsize;		/* size of area at slotoffset */
-	int slotfreespace;	/* amount of space free in slot */
-	int slotneeded;		/* sizeof the entry we are seeking */
-};
-
 /*
  * The new version of the directory entry.  Since EXT2 structures are
  * stored in intel byte order, and the name_len field could never be
  * bigger than 255 chars, it's safe to reclaim the extra byte for the
  * file_type field.
  */
 struct	ext2fs_direct_2 {
 	uint32_t e2d_ino;		/* inode number of entry */
 	uint16_t e2d_reclen;		/* length of this record */
 	uint8_t e2d_namlen;		/* length of string in e2d_name */
 	uint8_t e2d_type;		/* file type */
 	char e2d_name[EXT2FS_MAXNAMLEN];/* name with length<=EXT2FS_MAXNAMLEN */
 };
 
 /*
  * Maximal count of links to a file
  */
 #define	EXT2_LINK_MAX	32000
 
 /*
  * Ext2 directory file types.  Only the low 3 bits are used.  The
  * other bits are reserved for now.
  */
 #define	EXT2_FT_UNKNOWN		0
 #define	EXT2_FT_REG_FILE	1
 #define	EXT2_FT_DIR		2
 #define	EXT2_FT_CHRDEV		3
 #define	EXT2_FT_BLKDEV 		4
 #define	EXT2_FT_FIFO		5
 #define	EXT2_FT_SOCK		6
 #define	EXT2_FT_SYMLINK		7
 #define	EXT2_FT_MAX		8
 
 /*
  * EXT2_DIR_PAD defines the directory entries boundaries
  *
  * NOTE: It must be a multiple of 4
  */
 #define	EXT2_DIR_PAD		 	4
 #define	EXT2_DIR_ROUND			(EXT2_DIR_PAD - 1)
 #define	EXT2_DIR_REC_LEN(name_len)	(((name_len) + 8 + EXT2_DIR_ROUND) & \
 					 ~EXT2_DIR_ROUND)
 #endif /* !_FS_EXT2FS_EXT2_DIR_H_ */
 
Index: user/ngie/more-tests/sys/fs/ext2fs/ext2_extern.h
===================================================================
--- user/ngie/more-tests/sys/fs/ext2fs/ext2_extern.h	(revision 281675)
+++ user/ngie/more-tests/sys/fs/ext2fs/ext2_extern.h	(revision 281676)
@@ -1,113 +1,98 @@
 /*-
  *  modified for EXT2FS support in Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*-
  * Copyright (c) 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_extern.h	8.3 (Berkeley) 4/16/94
  * $FreeBSD$
  */
 
 #ifndef _FS_EXT2FS_EXT2_EXTERN_H_
 #define	_FS_EXT2FS_EXT2_EXTERN_H_
 
 struct ext2fs_dinode;
-struct ext2fs_direct_2;
-struct ext2fs_searchslot;
 struct indir;
 struct inode;
 struct mount;
 struct vfsconf;
 struct vnode;
 
-int	ext2_add_entry(struct vnode *, struct ext2fs_direct_2 *);
 int	ext2_alloc(struct inode *, daddr_t, e4fs_daddr_t, int,
 	    struct ucred *, e4fs_daddr_t *);
 int	ext2_balloc(struct inode *,
 	    e2fs_lbn_t, int, struct ucred *, struct buf **, int);
 int	ext2_blkatoff(struct vnode *, off_t, char **, struct buf **);
 void	ext2_blkfree(struct inode *,  e4fs_daddr_t, long);
 e4fs_daddr_t	ext2_blkpref(struct inode *, e2fs_lbn_t, int, e2fs_daddr_t *,
 	    e2fs_daddr_t);
 int	ext2_bmap(struct vop_bmap_args *);
 int	ext2_bmaparray(struct vnode *, daddr_t, daddr_t *, int *, int *);
 void	ext2_clusteracct(struct m_ext2fs *, char *, int, daddr_t, int);
 void	ext2_dirbad(struct inode *ip, doff_t offset, char *how);
 void	ext2_ei2i(struct ext2fs_dinode *, struct inode *);
 int	ext2_getlbns(struct vnode *, daddr_t, struct indir *, int *);
 void	ext2_i2ei(struct inode *, struct ext2fs_dinode *);
 void	ext2_itimes(struct vnode *vp);
 int	ext2_reallocblks(struct vop_reallocblks_args *);
 int	ext2_reclaim(struct vop_reclaim_args *);
 int	ext2_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *);
 int	ext2_update(struct vnode *, int);
 int	ext2_valloc(struct vnode *, int, struct ucred *, struct vnode **);
 int	ext2_vfree(struct vnode *, ino_t, int);
 int	ext2_vinit(struct mount *, struct vop_vector *, struct vnode **vpp);
 int	ext2_lookup(struct vop_cachedlookup_args *);
 int	ext2_readdir(struct vop_readdir_args *);
 #ifdef EXT2FS_DEBUG
 void	ext2_print_inode(struct inode *);
 #endif
 int	ext2_direnter(struct inode *, 
 		struct vnode *, struct componentname *);
 int	ext2_dirremove(struct vnode *, struct componentname *);
 int	ext2_dirrewrite(struct inode *,
 		struct inode *, struct componentname *);
 int	ext2_dirempty(struct inode *, ino_t, struct ucred *);
 int	ext2_checkpath(struct inode *, struct inode *, struct ucred *);
 int	cg_has_sb(int i);
 int	ext2_inactive(struct vop_inactive_args *);
-int	ext2_htree_add_entry(struct vnode *, struct ext2fs_direct_2 *,
-	    struct componentname *);
-int	ext2_htree_create_index(struct vnode *, struct componentname *,
-	    struct ext2fs_direct_2 *);
-int	ext2_htree_has_idx(struct inode *);
-int	ext2_htree_hash(const char *, int, uint32_t *, int, uint32_t *,
-	    uint32_t *);
-int	ext2_htree_lookup(struct inode *, const char *, int, struct buf **,
-	    int *, doff_t *, doff_t *, doff_t *, struct ext2fs_searchslot *);
-int	ext2_search_dirblock(struct inode *, void *, int *, const char *, int,
-	    int *, doff_t *, doff_t *, doff_t *, struct ext2fs_searchslot *);
-
 
 /* Flags to low-level allocation routines.
  * The low 16-bits are reserved for IO_ flags from vnode.h.
  */
 #define	BA_CLRBUF	0x00010000	/* Clear invalid areas of buffer. */
 #define	BA_SEQMASK	0x7F000000	/* Bits holding seq heuristic. */
 #define	BA_SEQSHIFT	24
 #define	BA_SEQMAX	0x7F
 
 extern struct vop_vector ext2_vnodeops;
 extern struct vop_vector ext2_fifoops;
 
 #endif /* !_FS_EXT2FS_EXT2_EXTERN_H_ */
Index: user/ngie/more-tests/sys/fs/ext2fs/ext2_lookup.c
===================================================================
--- user/ngie/more-tests/sys/fs/ext2fs/ext2_lookup.c	(revision 281675)
+++ user/ngie/more-tests/sys/fs/ext2fs/ext2_lookup.c	(revision 281676)
@@ -1,1236 +1,1126 @@
 /*-
  *  modified for Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_lookup.c	8.6 (Berkeley) 4/1/94
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/endian.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/dirent.h>
 #include <sys/sysctl.h>
 
 #include <ufs/ufs/dir.h>
 
 #include <fs/ext2fs/inode.h>
 #include <fs/ext2fs/ext2_mount.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/ext2_dinode.h>
 #include <fs/ext2fs/ext2_dir.h>
 #include <fs/ext2fs/ext2_extern.h>
 
 #ifdef INVARIANTS
 static int dirchk = 1;
 #else
 static int dirchk = 0;
 #endif
 
 static SYSCTL_NODE(_vfs, OID_AUTO, e2fs, CTLFLAG_RD, 0, "EXT2FS filesystem");
 SYSCTL_INT(_vfs_e2fs, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, "");
 
 /*
    DIRBLKSIZE in ffs is DEV_BSIZE (in most cases 512)
    while it is the native blocksize in ext2fs - thus, a #define
    is no longer appropriate
 */
 #undef  DIRBLKSIZ
 
 static u_char ext2_ft_to_dt[] = {
 	DT_UNKNOWN,		/* EXT2_FT_UNKNOWN */
 	DT_REG,			/* EXT2_FT_REG_FILE */
 	DT_DIR,			/* EXT2_FT_DIR */
 	DT_CHR,			/* EXT2_FT_CHRDEV */
 	DT_BLK,			/* EXT2_FT_BLKDEV */
 	DT_FIFO,		/* EXT2_FT_FIFO */
 	DT_SOCK,		/* EXT2_FT_SOCK */
 	DT_LNK,			/* EXT2_FT_SYMLINK */
 };
 #define	FTTODT(ft) \
     ((ft) < nitems(ext2_ft_to_dt) ? ext2_ft_to_dt[(ft)] : DT_UNKNOWN)
 
 static u_char dt_to_ext2_ft[] = {
 	EXT2_FT_UNKNOWN,	/* DT_UNKNOWN */
 	EXT2_FT_FIFO,		/* DT_FIFO */
 	EXT2_FT_CHRDEV,		/* DT_CHR */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_DIR,		/* DT_DIR */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_BLKDEV,		/* DT_BLK */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_REG_FILE,	/* DT_REG */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_SYMLINK,	/* DT_LNK */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_SOCK,		/* DT_SOCK */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_UNKNOWN,	/* DT_WHT */
 };
 #define	DTTOFT(dt) \
     ((dt) < nitems(dt_to_ext2_ft) ? dt_to_ext2_ft[(dt)] : EXT2_FT_UNKNOWN)
 
 static int	ext2_dirbadentry(struct vnode *dp, struct ext2fs_direct_2 *de,
 		    int entryoffsetinblock);
-static int	ext2_is_dot_entry(struct componentname *cnp);
 static int	ext2_lookup_ino(struct vnode *vdp, struct vnode **vpp,
 		    struct componentname *cnp, ino_t *dd_ino);
 
-static int
-ext2_is_dot_entry(struct componentname *cnp)
-{
-	if (cnp->cn_namelen <= 2 && cnp->cn_nameptr[0] == '.' &&
-	    (cnp->cn_nameptr[1] == '.' || cnp->cn_nameptr[1] == '0'))
-		return (1);
-	return (0);
-}
-
 /*
  * Vnode op for reading directories.
  */
 int
 ext2_readdir(struct vop_readdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct buf *bp;
 	struct inode *ip;
 	struct ext2fs_direct_2 *dp, *edp;
 	u_long *cookies;
 	struct dirent dstdp;
 	off_t offset, startoffset;
 	size_t readcnt, skipcnt;
 	ssize_t startresid;
 	int ncookies;
 	int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->e2fs_bsize;
 	int error;
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	ip = VTOI(vp);
 	if (ap->a_ncookies != NULL) {
 		ncookies = uio->uio_resid;
 		if (uio->uio_offset >= ip->i_size)
 			ncookies = 0;
 		else if (ip->i_size - uio->uio_offset < ncookies)
 			ncookies = ip->i_size - uio->uio_offset;
 		ncookies = ncookies / (offsetof(struct ext2fs_direct_2,
 		    e2d_namlen) + 4) + 1;
 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	} else {
 		ncookies = 0;
 		cookies = NULL;
 	}
 	offset = startoffset = uio->uio_offset;
 	startresid = uio->uio_resid;
 	error = 0;
 	while (error == 0 && uio->uio_resid > 0 &&
 	    uio->uio_offset < ip->i_size) {
 		error = ext2_blkatoff(vp, uio->uio_offset, NULL, &bp);
 		if (error)
 			break;
 		if (bp->b_offset + bp->b_bcount > ip->i_size)
 			readcnt = ip->i_size - bp->b_offset;
 		else
 			readcnt = bp->b_bcount;
 		skipcnt = (size_t)(uio->uio_offset - bp->b_offset) &
 		    ~(size_t)(DIRBLKSIZ - 1);
 		offset = bp->b_offset + skipcnt;
 		dp = (struct ext2fs_direct_2 *)&bp->b_data[skipcnt];
 		edp = (struct ext2fs_direct_2 *)&bp->b_data[readcnt];
 		while (error == 0 && uio->uio_resid > 0 && dp < edp) {
 			if (dp->e2d_reclen <= offsetof(struct ext2fs_direct_2,
 			    e2d_namlen) || (caddr_t)dp + dp->e2d_reclen >
 			    (caddr_t)edp) {
 				error = EIO;
 				break;
 			}
 			/*-
 			 * "New" ext2fs directory entries differ in 3 ways
 			 * from ufs on-disk ones:
 			 * - the name is not necessarily NUL-terminated.
 			 * - the file type field always exists and always
 			 *   follows the name length field.
 			 * - the file type is encoded in a different way.
 			 *
 			 * "Old" ext2fs directory entries need no special
 			 * conversions, since they are binary compatible
 			 * with "new" entries having a file type of 0 (i.e.,
 			 * EXT2_FT_UNKNOWN).  Splitting the old name length
 			 * field didn't make a mess like it did in ufs,
 			 * because ext2fs uses a machine-independent disk
 			 * layout.
 			 */
 			dstdp.d_namlen = dp->e2d_namlen;
 			dstdp.d_type = FTTODT(dp->e2d_type);
 			if (offsetof(struct ext2fs_direct_2, e2d_namlen) +
 			    dstdp.d_namlen > dp->e2d_reclen) {
 				error = EIO;
 				break;
 			}
 			if (offset < startoffset || dp->e2d_ino == 0)
 				goto nextentry;
 			dstdp.d_fileno = dp->e2d_ino;
 			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
 			bcopy(dp->e2d_name, dstdp.d_name, dstdp.d_namlen);
 			dstdp.d_name[dstdp.d_namlen] = '\0';
 			if (dstdp.d_reclen > uio->uio_resid) {
 				if (uio->uio_resid == startresid)
 					error = EINVAL;
 				else
 					error = EJUSTRETURN;
 				break;
 			}
 			/* Advance dp. */
 			error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio);
 			if (error)
 				break;
 			if (cookies != NULL) {
 				KASSERT(ncookies > 0,
 				    ("ext2_readdir: cookies buffer too small"));
 				*cookies = offset + dp->e2d_reclen;
 				cookies++;
 				ncookies--;
 			}
 nextentry:
 			offset += dp->e2d_reclen;
 			dp = (struct ext2fs_direct_2 *)((caddr_t)dp +
 			   dp->e2d_reclen);
 		}
 		bqrelse(bp);
 		uio->uio_offset = offset;
 	}
 	/* We need to correct uio_offset. */
 	uio->uio_offset = offset;
 	if (error == EJUSTRETURN)
 		error = 0;
 	if (ap->a_ncookies != NULL) {
 		if (error == 0) {
 			ap->a_ncookies -= ncookies;
 		} else {
 			free(*ap->a_cookies, M_TEMP);
 			*ap->a_ncookies = 0;
 			*ap->a_cookies = NULL;
 		}
 	}
 	if (error == 0 && ap->a_eofflag)
 		*ap->a_eofflag = ip->i_size <= uio->uio_offset;
 	return (error);
 }
 
 /*
  * Convert a component of a pathname into a pointer to a locked inode.
  * This is a very central and rather complicated routine.
  * If the file system is not maintained in a strict tree hierarchy,
  * this can result in a deadlock situation (see comments in code below).
  *
  * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
  * on whether the name is to be looked up, created, renamed, or deleted.
  * When CREATE, RENAME, or DELETE is specified, information usable in
  * creating, renaming, or deleting a directory entry may be calculated.
  * If flag has LOCKPARENT or'ed into it and the target of the pathname
  * exists, lookup returns both the target and its parent directory locked.
  * When creating or renaming and LOCKPARENT is specified, the target may
  * not be ".".  When deleting and LOCKPARENT is specified, the target may
  * be "."., but the caller must check to ensure it does an vrele and vput
  * instead of two vputs.
  *
  * Overall outline of ext2_lookup:
  *
  *	search for name in directory, to found or notfound
  * notfound:
  *	if creating, return locked directory, leaving info on available slots
  *	else return error
  * found:
  *	if at end of path and deleting, return information to allow delete
  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
  *	  inode and return info to allow rewrite
  *	if not at end, add name to cache; if at end and neither creating
  *	  nor deleting, add name to cache
  */
 int
 ext2_lookup(struct vop_cachedlookup_args *ap)
 {
 
 	return (ext2_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
 }
 
 static int
 ext2_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
     ino_t *dd_ino)
 {
 	struct inode *dp;		/* inode for directory being searched */
 	struct buf *bp;			/* a buffer of directory entries */
 	struct ext2fs_direct_2 *ep;	/* the current directory entry */
 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
-	struct ext2fs_searchslot ss;
+	enum {NONE, COMPACT, FOUND} slotstatus;
+	doff_t slotoffset;		/* offset of area with free space */
 	doff_t i_diroff;		/* cached i_diroff value */
 	doff_t i_offset;		/* cached i_offset value */
+	int slotsize;			/* size of area at slotoffset */
+	int slotfreespace;		/* amount of space free in slot */
+	int slotneeded;			/* size of the entry we're seeking */
 	int numdirpasses;		/* strategy for directory search */
 	doff_t endsearch;		/* offset to end directory search */
 	doff_t prevoff;			/* prev entry dp->i_offset */
 	struct vnode *pdp;		/* saved dp during symlink work */
 	struct vnode *tdp;		/* returned by VFS_VGET */
 	doff_t enduseful;		/* pointer past last used dir slot */
 	u_long bmask;			/* block offset mask */
-	int error;
+	int namlen, error;
 	struct ucred *cred = cnp->cn_cred;
 	int flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 	ino_t ino, ino1;
 	int ltype;
-	int entry_found = 0;
 
 	int	DIRBLKSIZ = VTOI(vdp)->i_e2fs->e2fs_bsize;
 
 	if (vpp != NULL)
 		*vpp = NULL;
 
 	dp = VTOI(vdp);
 	bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
 restart:
 	bp = NULL;
-	ss.slotoffset = -1;
+	slotoffset = -1;
 
 	/*
 	 * We now have a segment name to search for, and a directory to search.
-	 *
+	 */
+
+	/*
 	 * Suppress search for slots unless creating
 	 * file and at end of pathname, in which case
 	 * we watch for a place to put the new file in
 	 * case it doesn't already exist.
 	 */
 	i_diroff = dp->i_diroff;
-	ss.slotstatus = FOUND;
-	ss.slotfreespace = ss.slotsize = ss.slotneeded = 0;
+	slotstatus = FOUND;
+	slotfreespace = slotsize = slotneeded = 0;
 	if ((nameiop == CREATE || nameiop == RENAME) &&
 	    (flags & ISLASTCN)) {
-		ss.slotstatus = NONE;
-		ss.slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen);
+		slotstatus = NONE;
+		slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen);
 		/* was
-		ss.slotneeded = (sizeof(struct direct) - MAXNAMLEN +
+		slotneeded = (sizeof(struct direct) - MAXNAMLEN +
 			cnp->cn_namelen + 3) &~ 3; */
 	}
 
 	/*
-	 * Try to lookup dir entry using htree directory index.
-	 *
-	 * If we got an error or we want to find '.' or '..' entry,
-	 * we will fall back to linear search.
-	 */
-	if (!ext2_is_dot_entry(cnp) && ext2_htree_has_idx(dp)) {
-		numdirpasses = 1;
-		entryoffsetinblock = 0;
-		switch (ext2_htree_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
-				&bp, &entryoffsetinblock, &i_offset, &prevoff,
-				&enduseful, &ss)) {
-		case 0:
-			ep = (struct ext2fs_direct_2 *)((char *)bp->b_data +
-				(i_offset & bmask));
-			goto foundentry;
-		case ENOENT:
-			i_offset = roundup2(dp->i_size, DIRBLKSIZ);
-			goto notfound;
-		default:
-			/*
-			 * Something failed; just fallback to do a linear
-			 * search.
-			 */
-			break;
-		}
-	}
-
-	/*
 	 * If there is cached information on a previous search of
 	 * this directory, pick up where we last left off.
 	 * We cache only lookups as these are the most common
 	 * and have the greatest payoff. Caching CREATE has little
 	 * benefit as it usually must search the entire directory
 	 * to determine that the entry does not exist. Caching the
 	 * location of the last DELETE or RENAME has not reduced
 	 * profiling time and hence has been removed in the interest
 	 * of simplicity.
 	 */
 	if (nameiop != LOOKUP || i_diroff == 0 ||
 	    i_diroff > dp->i_size) {
 		entryoffsetinblock = 0;
 		i_offset = 0;
 		numdirpasses = 1;
 	} else {
 		i_offset = i_diroff;
 		if ((entryoffsetinblock = i_offset & bmask) &&
 		    (error = ext2_blkatoff(vdp, (off_t)i_offset, NULL,
 		    &bp)))
 			return (error);
 		numdirpasses = 2;
 		nchstats.ncs_2passes++;
 	}
 	prevoff = i_offset;
 	endsearch = roundup2(dp->i_size, DIRBLKSIZ);
 	enduseful = 0;
 
 searchloop:
 	while (i_offset < endsearch) {
 		/*
 		 * If necessary, get the next directory block.
 		 */
-		if (bp != NULL)
-			brelse(bp);
-		error = ext2_blkatoff(vdp, (off_t)i_offset, NULL, &bp);
-		if (error != 0)
-			return (error);
-		entryoffsetinblock = 0;
+		if ((i_offset & bmask) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if ((error =
+			    ext2_blkatoff(vdp, (off_t)i_offset, NULL,
+			    &bp)) != 0)
+				return (error);
+			entryoffsetinblock = 0;
+		}
 		/*
 		 * If still looking for a slot, and at a DIRBLKSIZE
 		 * boundary, have to start looking for free space again.
 		 */
-		if (ss.slotstatus == NONE &&
+		if (slotstatus == NONE &&
 		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
-			ss.slotoffset = -1;
-			ss.slotfreespace = 0;
+			slotoffset = -1;
+			slotfreespace = 0;
 		}
-		error = ext2_search_dirblock(dp, bp->b_data, &entry_found,
-				cnp->cn_nameptr, cnp->cn_namelen,
-				&entryoffsetinblock, &i_offset, &prevoff,
-				&enduseful, &ss);
-		if (error != 0) {
-			brelse(bp);
-			return (error);
+		/*
+		 * Get pointer to next entry.
+		 * Full validation checks are slow, so we only check
+		 * enough to insure forward progress through the
+		 * directory. Complete checks can be run by setting
+		 * "vfs.e2fs.dirchk" to be true.
+		 */
+		ep = (struct ext2fs_direct_2 *)
+			((char *)bp->b_data + entryoffsetinblock);
+		if (ep->e2d_reclen == 0 ||
+		    (dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock))) {
+			int i;
+			ext2_dirbad(dp, i_offset, "mangled entry");
+			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
+			i_offset += i;
+			entryoffsetinblock += i;
+			continue;
 		}
-		if (entry_found) {
-			ep = (struct ext2fs_direct_2 *)((char *)bp->b_data +
-				(entryoffsetinblock & bmask));
-foundentry:
-			ino = ep->e2d_ino;
-			goto found;
+
+		/*
+		 * If an appropriate sized slot has not yet been found,
+		 * check to see if one is available. Also accumulate space
+		 * in the current block so that we can determine if
+		 * compaction is viable.
+		 */
+		if (slotstatus != FOUND) {
+			int size = ep->e2d_reclen;
+
+			if (ep->e2d_ino != 0)
+				size -= EXT2_DIR_REC_LEN(ep->e2d_namlen);
+			if (size > 0) {
+				if (size >= slotneeded) {
+					slotstatus = FOUND;
+					slotoffset = i_offset;
+					slotsize = ep->e2d_reclen;
+				} else if (slotstatus == NONE) {
+					slotfreespace += size;
+					if (slotoffset == -1)
+						slotoffset = i_offset;
+					if (slotfreespace >= slotneeded) {
+						slotstatus = COMPACT;
+						slotsize = i_offset +
+						      ep->e2d_reclen - slotoffset;
+					}
+				}
+			}
 		}
+
+		/*
+		 * Check for a name match.
+		 */
+		if (ep->e2d_ino) {
+			namlen = ep->e2d_namlen;
+			if (namlen == cnp->cn_namelen &&
+			    !bcmp(cnp->cn_nameptr, ep->e2d_name,
+				(unsigned)namlen)) {
+				/*
+				 * Save directory entry's inode number and
+				 * reclen in ndp->ni_ufs area, and release
+				 * directory buffer.
+				 */
+				ino = ep->e2d_ino;
+				goto found;
+			}
+		}
+		prevoff = i_offset;
+		i_offset += ep->e2d_reclen;
+		entryoffsetinblock += ep->e2d_reclen;
+		if (ep->e2d_ino)
+			enduseful = i_offset;
 	}
-notfound:
+/* notfound: */
 	/*
 	 * If we started in the middle of the directory and failed
 	 * to find our target, we must check the beginning as well.
 	 */
 	if (numdirpasses == 2) {
 		numdirpasses--;
 		i_offset = 0;
 		endsearch = i_diroff;
 		goto searchloop;
 	}
 	if (bp != NULL)
 		brelse(bp);
 	/*
 	 * If creating, and at end of pathname and current
 	 * directory has not been removed, then can consider
 	 * allowing file to be created.
 	 */
 	if ((nameiop == CREATE || nameiop == RENAME) &&
 	    (flags & ISLASTCN) && dp->i_nlink != 0) {
 		/*
 		 * Access for write is interpreted as allowing
 		 * creation of files in the directory.
 		 */
 		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0)
 			return (error);
 		/*
 		 * Return an indication of where the new directory
 		 * entry should be put.  If we didn't find a slot,
 		 * then set dp->i_count to 0 indicating
 		 * that the new slot belongs at the end of the
 		 * directory. If we found a slot, then the new entry
 		 * can be put in the range from dp->i_offset to
 		 * dp->i_offset + dp->i_count.
 		 */
-		if (ss.slotstatus == NONE) {
+		if (slotstatus == NONE) {
 			dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ);
 			dp->i_count = 0;
 			enduseful = dp->i_offset;
 		} else {
-			dp->i_offset = ss.slotoffset;
-			dp->i_count = ss.slotsize;
-			if (enduseful < ss.slotoffset + ss.slotsize)
-				enduseful = ss.slotoffset + ss.slotsize;
+			dp->i_offset = slotoffset;
+			dp->i_count = slotsize;
+			if (enduseful < slotoffset + slotsize)
+				enduseful = slotoffset + slotsize;
 		}
 		dp->i_endoff = roundup2(enduseful, DIRBLKSIZ);
 		/*
 		 * We return with the directory locked, so that
 		 * the parameters we set up above will still be
 		 * valid if we actually decide to do a direnter().
 		 * We return ni_vp == NULL to indicate that the entry
 		 * does not currently exist; we leave a pointer to
 		 * the (locked) directory inode in ndp->ni_dvp.
 		 * The pathname buffer is saved so that the name
 		 * can be obtained later.
 		 *
 		 * NB - if the directory is unlocked, then this
 		 * information cannot be used.
 		 */
 		cnp->cn_flags |= SAVENAME;
 		return (EJUSTRETURN);
 	}
 	/*
 	 * Insert name into cache (as non-existent) if appropriate.
 	 */
 	if ((cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(vdp, NULL, cnp);
 	return (ENOENT);
 
 found:
 	if (dd_ino != NULL)
 		*dd_ino = ino;
 	if (numdirpasses == 2)
 		nchstats.ncs_pass2++;
 	/*
 	 * Check that directory length properly reflects presence
 	 * of this entry.
 	 */
 	if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->e2d_namlen)
 		> dp->i_size) {
 		ext2_dirbad(dp, i_offset, "i_size too small");
 		dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->e2d_namlen);
 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	brelse(bp);
 
 	/*
 	 * Found component in pathname.
 	 * If the final component of path name, save information
 	 * in the cache as to where the entry was found.
 	 */
 	if ((flags & ISLASTCN) && nameiop == LOOKUP)
 		dp->i_diroff = i_offset &~ (DIRBLKSIZ - 1);
 	/*
 	 * If deleting, and at end of pathname, return
 	 * parameters which can be used to remove file.
 	 */
 	if (nameiop == DELETE && (flags & ISLASTCN)) {
 		if (flags & LOCKPARENT)
 			ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
 		/*
 		 * Write access to directory required to delete files.
 		 */
 		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0)
 			return (error);
 		/*
 		 * Return pointer to current entry in dp->i_offset,
 		 * and distance past previous entry (if there
 		 * is a previous entry in this block) in dp->i_count.
 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
 		 *
 		 * Technically we shouldn't be setting these in the
 		 * WANTPARENT case (first lookup in rename()), but any
 		 * lookups that will result in directory changes will
 		 * overwrite these.
 		 */
 		dp->i_offset = i_offset;
 		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
 			dp->i_count = 0;
 		else
 			dp->i_count = dp->i_offset - prevoff;
 		if (dd_ino != NULL)
 			return (0);
 		if (dp->i_number == ino) {
 			VREF(vdp);
 			*vpp = vdp;
 			return (0);
 		}
 		if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE,
 		    &tdp)) != 0)
 			return (error);
 		/*
 		 * If directory is "sticky", then user must own
 		 * the directory, or the file in it, else she
 		 * may not delete it (unless she's root). This
 		 * implements append-only directories.
 		 */
 		if ((dp->i_mode & ISVTX) &&
 		    cred->cr_uid != 0 &&
 		    cred->cr_uid != dp->i_uid &&
 		    VTOI(tdp)->i_uid != cred->cr_uid) {
 			vput(tdp);
 			return (EPERM);
 		}
 		*vpp = tdp;
 		return (0);
 	}
 
 	/*
 	 * If rewriting (RENAME), return the inode and the
 	 * information required to rewrite the present directory
 	 * Must get inode of directory entry to verify it's a
 	 * regular file, or empty directory.
 	 */
 	if (nameiop == RENAME && (flags & ISLASTCN)) {
 		if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0)
 			return (error);
 		/*
 		 * Careful about locking second inode.
 		 * This can only occur if the target is ".".
 		 */
 		dp->i_offset = i_offset;
 		if (dp->i_number == ino)
 			return (EISDIR);
 		if (dd_ino != NULL)
 			return (0);
 		if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE,
 		    &tdp)) != 0)
 			return (error);
 		*vpp = tdp;
 		cnp->cn_flags |= SAVENAME;
 		return (0);
 	}
 	if (dd_ino != NULL)
 		return (0);
 
 	/*
 	 * Step through the translation in the name.  We do not `vput' the
 	 * directory because we may need it again if a symbolic link
 	 * is relative to the current directory.  Instead we save it
 	 * unlocked as "pdp".  We must get the target inode before unlocking
 	 * the directory to insure that the inode will not be removed
 	 * before we get it.  We prevent deadlock by always fetching
 	 * inodes from the root, moving down the directory tree. Thus
 	 * when following backward pointers ".." we must unlock the
 	 * parent directory before getting the requested directory.
 	 * There is a potential race condition here if both the current
 	 * and parent directories are removed before the VFS_VGET for the
 	 * inode associated with ".." returns.  We hope that this occurs
 	 * infrequently since we cannot avoid this race condition without
 	 * implementing a sophisticated deadlock detection algorithm.
 	 * Note also that this simple deadlock detection scheme will not
 	 * work if the file system has any hard links other than ".."
 	 * that point backwards in the directory structure.
 	 */
 	pdp = vdp;
 	if (flags & ISDOTDOT) {
 		error = vn_vget_ino(pdp, ino, cnp->cn_lkflags, &tdp);
 		if (pdp->v_iflag & VI_DOOMED) {
 			if (error == 0)
 				vput(tdp);
 			error = ENOENT;
 		}
 		if (error)
 			return (error);
 		/*
 		 * Recheck that ".." entry in the vdp directory points
 		 * to the inode we looked up before vdp lock was
 		 * dropped.
 		 */
 		error = ext2_lookup_ino(pdp, NULL, cnp, &ino1);
 		if (error) {
 			vput(tdp);
 			return (error);
 		}
 		if (ino1 != ino) {
 			vput(tdp);
 			goto restart;
 		}
 		*vpp = tdp;
 	} else if (dp->i_number == ino) {
 		VREF(vdp);	/* we want ourself, ie "." */
 		/*
 		 * When we lookup "." we still can be asked to lock it
 		 * differently.
 		 */
 		ltype = cnp->cn_lkflags & LK_TYPE_MASK;
 		if (ltype != VOP_ISLOCKED(vdp)) {
 			if (ltype == LK_EXCLUSIVE)
 				vn_lock(vdp, LK_UPGRADE | LK_RETRY);
 			else /* if (ltype == LK_SHARED) */
 				vn_lock(vdp, LK_DOWNGRADE | LK_RETRY);
 		}
 		*vpp = vdp;
 	} else {
 		if ((error = VFS_VGET(vdp->v_mount, ino, cnp->cn_lkflags,
 		    &tdp)) != 0)
 			return (error);
 		*vpp = tdp;
 	}
 
 	/*
 	 * Insert name into cache if appropriate.
 	 */
 	if (cnp->cn_flags & MAKEENTRY)
 		cache_enter(vdp, *vpp, cnp);
 	return (0);
 }
 
-int
-ext2_search_dirblock(struct inode *ip, void *data, int *foundp,
-	const char *name, int namelen, int *entryoffsetinblockp,
-	doff_t *offp, doff_t *prevoffp, doff_t *endusefulp,
-	struct ext2fs_searchslot *ssp)
-{
-	struct vnode *vdp;
-	struct ext2fs_direct_2 *ep, *top;
-	uint32_t bsize = ip->i_e2fs->e2fs_bsize;
-	int offset = *entryoffsetinblockp;
-	int namlen;
-
-	vdp = ITOV(ip);
-
-	ep = (struct ext2fs_direct_2 *)((char *)data + offset);
-	top = (struct ext2fs_direct_2 *)((char *)data +
-		bsize - EXT2_DIR_REC_LEN(0));
-
-	while (ep < top) {
-		/*
-		 * Full validation checks are slow, so we only check
-		 * enough to insure forward progress through the
-		 * directory. Complete checks can be run by setting
-		 * "vfs.e2fs.dirchk" to be true.
-		 */
-		if (ep->e2d_reclen == 0 ||
-		    (dirchk && ext2_dirbadentry(vdp, ep, offset))) {
-			int i;
-			ext2_dirbad(ip, *offp, "mangled entry");
-			i = bsize - (offset & (bsize - 1));
-			*offp += i;
-			offset += i;
-			continue;
-		}
-
-		/*
-		 * If an appropriate sized slot has not yet been found,
-		 * check to see if one is available. Also accumulate space
-		 * in the current block so that we can determine if
-		 * compaction is viable.
-		 */
-		if (ssp->slotstatus != FOUND) {
-			int size = ep->e2d_reclen;
-
-			if (ep->e2d_ino != 0)
-				size -= EXT2_DIR_REC_LEN(ep->e2d_namlen);
-			if (size > 0) {
-				if (size >= ssp->slotneeded) {
-					ssp->slotstatus = FOUND;
-					ssp->slotoffset = *offp;
-					ssp->slotsize = ep->e2d_reclen;
-				} else if (ssp->slotstatus == NONE) {
-					ssp->slotfreespace += size;
-					if (ssp->slotoffset == -1)
-						ssp->slotoffset = *offp;
-					if (ssp->slotfreespace >= ssp->slotneeded) {
-						ssp->slotstatus = COMPACT;
-						ssp->slotsize = *offp +
-							ep->e2d_reclen -
-							ssp->slotoffset;
-					}
-				}
-			}
-		}
-
-		/*
-		 * Check for a name match.
-		 */
-		if (ep->e2d_ino) {
-			namlen = ep->e2d_namlen;
-			if (namlen == namelen &&
-			    !bcmp(name, ep->e2d_name, (unsigned)namlen)) {
-				/*
-				 * Save directory entry's inode number and
-				 * reclen in ndp->ni_ufs area, and release
-				 * directory buffer.
-				 */
-				*foundp = 1;
-				return (0);
-			}
-		}
-		*prevoffp = *offp;
-		*offp += ep->e2d_reclen;
-		offset += ep->e2d_reclen;
-		*entryoffsetinblockp = offset;
-		if (ep->e2d_ino)
-			*endusefulp = *offp;
-		/*
-		 * Get pointer to the next entry.
-		 */
-		ep = (struct ext2fs_direct_2 *)((char *)data + offset);
-	}
-
-	return (0);
-}
-
 void
 ext2_dirbad(struct inode *ip, doff_t offset, char *how)
 {
 	struct mount *mp;
 
 	mp = ITOV(ip)->v_mount;
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		panic("ext2_dirbad: %s: bad dir ino %ju at offset %ld: %s\n",
 		    mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
 		    (long)offset, how);
 	else
 		(void)printf("%s: bad dir ino %ju at offset %ld: %s\n",
 		    mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
 		    (long)offset, how);
 
 }
 
 /*
  * Do consistency checking on a directory entry:
  *	record length must be multiple of 4
  *	entry must fit in rest of its DIRBLKSIZ block
  *	record must be large enough to contain entry
  *	name is not longer than MAXNAMLEN
  *	name must be as long as advertised, and null terminated
  */
 /*
  *	changed so that it confirms to ext2_check_dir_entry
  */
 static int
 ext2_dirbadentry(struct vnode *dp, struct ext2fs_direct_2 *de,
     int entryoffsetinblock)
 {
 	int	DIRBLKSIZ = VTOI(dp)->i_e2fs->e2fs_bsize;
 
 	char * error_msg = NULL;
 
 	if (de->e2d_reclen < EXT2_DIR_REC_LEN(1))
 		error_msg = "rec_len is smaller than minimal";
 	else if (de->e2d_reclen % 4 != 0)
 		error_msg = "rec_len % 4 != 0";
 	else if (de->e2d_reclen < EXT2_DIR_REC_LEN(de->e2d_namlen))
 		error_msg = "reclen is too small for name_len";
 	else if (entryoffsetinblock + de->e2d_reclen > DIRBLKSIZ)
 		error_msg = "directory entry across blocks";
 	/* else LATER
 	     if (de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count)
 		error_msg = "inode out of bounds";
 	*/
 
 	if (error_msg != NULL) {
 		printf("bad directory entry: %s\n", error_msg);
 		printf("offset=%d, inode=%lu, rec_len=%u, name_len=%u\n",
 			entryoffsetinblock, (unsigned long)de->e2d_ino,
 			de->e2d_reclen, de->e2d_namlen);
 	}
 	return error_msg == NULL ? 0 : 1;
 }
 
 /*
  * Write a directory entry after a call to namei, using the parameters
  * that it left in nameidata.  The argument ip is the inode which the new
  * directory entry will refer to.  Dvp is a pointer to the directory to
  * be written, which was left locked by namei. Remaining parameters
  * (dp->i_offset, dp->i_count) indicate how the space for the new
  * entry is to be obtained.
  */
 int
 ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp)
 {
+	struct ext2fs_direct_2 *ep, *nep;
 	struct inode *dp;
+	struct buf *bp;
 	struct ext2fs_direct_2 newdir;
 	struct iovec aiov;
 	struct uio auio;
-	int error, newentrysize;
-	int DIRBLKSIZ = ip->i_e2fs->e2fs_bsize;
+	u_int dsize;
+	int error, loc, newentrysize, spacefree;
+	char *dirbuf;
+	int     DIRBLKSIZ = ip->i_e2fs->e2fs_bsize;
 
 
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & SAVENAME) == 0)
 		panic("ext2_direnter: missing name");
 #endif
 	dp = VTOI(dvp);
 	newdir.e2d_ino = ip->i_number;
 	newdir.e2d_namlen = cnp->cn_namelen;
 	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs,
 	    EXT2F_INCOMPAT_FTYPE))
 		newdir.e2d_type = DTTOFT(IFTODT(ip->i_mode));
 	else
 		newdir.e2d_type = EXT2_FT_UNKNOWN;
 	bcopy(cnp->cn_nameptr, newdir.e2d_name, (unsigned)cnp->cn_namelen + 1);
 	newentrysize = EXT2_DIR_REC_LEN(newdir.e2d_namlen);
-
-	if (ext2_htree_has_idx(dp)) {
-		error = ext2_htree_add_entry(dvp, &newdir, cnp);
-		if (error) {
-			dp->i_flag &= ~IN_E4INDEX;
-			dp->i_flag |= IN_CHANGE | IN_UPDATE;
-		}
-		return (error);
-	}
-
-	if (EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_DIRHASHINDEX) &&
-	    !ext2_htree_has_idx(dp)) {
-		if ((dp->i_size / DIRBLKSIZ) == 1 &&
-		    dp->i_offset == DIRBLKSIZ) {
-			/*
-			 * Making indexed directory when one block is not
-			 * enough to save all entries.
-			 */
-			return ext2_htree_create_index(dvp, cnp, &newdir);
-		}
-	}
-
 	if (dp->i_count == 0) {
 		/*
 		 * If dp->i_count is 0, then namei could find no
 		 * space in the directory. Here, dp->i_offset will
 		 * be on a directory block boundary and we will write the
 		 * new entry into a fresh block.
 		 */
 		if (dp->i_offset & (DIRBLKSIZ - 1))
 			panic("ext2_direnter: newblk");
 		auio.uio_offset = dp->i_offset;
 		newdir.e2d_reclen = DIRBLKSIZ;
 		auio.uio_resid = newentrysize;
 		aiov.iov_len = newentrysize;
 		aiov.iov_base = (caddr_t)&newdir;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_rw = UIO_WRITE;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_td = (struct thread *)0;
 		error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
 		if (DIRBLKSIZ >
 		    VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
 			/* XXX should grow with balloc() */
 			panic("ext2_direnter: frag size");
 		else if (!error) {
 			dp->i_size = roundup2(dp->i_size, DIRBLKSIZ);
 			dp->i_flag |= IN_CHANGE;
 		}
 		return (error);
 	}
 
-	error = ext2_add_entry(dvp, &newdir);
-	if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
-		error = ext2_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC,
-		    cnp->cn_cred, cnp->cn_thread);
-	return (error);
-}
-
-/*
- * Insert an entry into the directory block.
- * Compact the contents.
- */
-int
-ext2_add_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry)
-{
-	struct ext2fs_direct_2 *ep, *nep;
-	struct inode *dp;
-	struct buf *bp;
-	u_int dsize;
-	int error, loc, newentrysize, spacefree;
-	char *dirbuf;
-
-	dp = VTOI(dvp);
-
 	/*
 	 * If dp->i_count is non-zero, then namei found space
 	 * for the new entry in the range dp->i_offset to
 	 * dp->i_offset + dp->i_count in the directory.
 	 * To use this space, we may have to compact the entries located
 	 * there, by copying them together towards the beginning of the
 	 * block, leaving the free space in one usable chunk at the end.
 	 */
 
 	/*
 	 * Increase size of directory if entry eats into new space.
 	 * This should never push the size past a new multiple of
 	 * DIRBLKSIZE.
 	 *
 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
 	 */
 	if (dp->i_offset + dp->i_count > dp->i_size)
 		dp->i_size = dp->i_offset + dp->i_count;
 	/*
 	 * Get the block containing the space for the new directory entry.
 	 */
 	if ((error = ext2_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf,
 	    &bp)) != 0)
 		return (error);
 	/*
 	 * Find space for the new entry. In the simple case, the entry at
 	 * offset base will have the space. If it does not, then namei
 	 * arranged that compacting the region dp->i_offset to
 	 * dp->i_offset + dp->i_count would yield the
 	 * space.
 	 */
-	newentrysize = EXT2_DIR_REC_LEN(entry->e2d_namlen);
 	ep = (struct ext2fs_direct_2 *)dirbuf;
 	dsize = EXT2_DIR_REC_LEN(ep->e2d_namlen);
 	spacefree = ep->e2d_reclen - dsize;
 	for (loc = ep->e2d_reclen; loc < dp->i_count; ) {
 		nep = (struct ext2fs_direct_2 *)(dirbuf + loc);
 		if (ep->e2d_ino) {
 			/* trim the existing slot */
 			ep->e2d_reclen = dsize;
 			ep = (struct ext2fs_direct_2 *)((char *)ep + dsize);
 		} else {
 			/* overwrite; nothing there; header is ours */
 			spacefree += dsize;
 		}
 		dsize = EXT2_DIR_REC_LEN(nep->e2d_namlen);
 		spacefree += nep->e2d_reclen - dsize;
 		loc += nep->e2d_reclen;
 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
 	}
 	/*
 	 * Update the pointer fields in the previous entry (if any),
 	 * copy in the new entry, and write out the block.
 	 */
 	if (ep->e2d_ino == 0) {
 		if (spacefree + dsize < newentrysize)
 			panic("ext2_direnter: compact1");
-		entry->e2d_reclen = spacefree + dsize;
+		newdir.e2d_reclen = spacefree + dsize;
 	} else {
 		if (spacefree < newentrysize)
 			panic("ext2_direnter: compact2");
-		entry->e2d_reclen = spacefree;
+		newdir.e2d_reclen = spacefree;
 		ep->e2d_reclen = dsize;
 		ep = (struct ext2fs_direct_2 *)((char *)ep + dsize);
 	}
-	bcopy((caddr_t)entry, (caddr_t)ep, (u_int)newentrysize);
+	bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize);
 	if (DOINGASYNC(dvp)) {
 		bdwrite(bp);
 		error = 0;
 	} else {
 		error = bwrite(bp);
 	}
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
+	if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
+		error = ext2_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC,
+		    cnp->cn_cred, cnp->cn_thread);
 	return (error);
 }
 
 /*
  * Remove a directory entry after a call to namei, using
  * the parameters which it left in nameidata. The entry
  * dp->i_offset contains the offset into the directory of the
  * entry to be eliminated.  The dp->i_count field contains the
  * size of the previous record in the directory.  If this
  * is 0, the first entry is being deleted, so we need only
  * zero the inode number to mark the entry as free.  If the
  * entry is not the first in the directory, we must reclaim
  * the space of the now empty record by adding the record size
  * to the size of the previous entry.
  */
 int
 ext2_dirremove(struct vnode *dvp, struct componentname *cnp)
 {
 	struct inode *dp;
 	struct ext2fs_direct_2 *ep, *rep;
 	struct buf *bp;
 	int error;
 
 	dp = VTOI(dvp);
 	if (dp->i_count == 0) {
 		/*
 		 * First entry in block: set d_ino to zero.
 		 */
 		if ((error =
 		    ext2_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep,
 		    &bp)) != 0)
 			return (error);
 		ep->e2d_ino = 0;
 		error = bwrite(bp);
 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
 		return (error);
 	}
 	/*
 	 * Collapse new free space into previous entry.
 	 */
 	if ((error = ext2_blkatoff(dvp, (off_t)(dp->i_offset - dp->i_count),
 	    (char **)&ep, &bp)) != 0)
 		return (error);
 
 	/* Set 'rep' to the entry being removed. */
 	if (dp->i_count == 0)
 		rep = ep;
 	else
 		rep = (struct ext2fs_direct_2 *)((char *)ep + ep->e2d_reclen);
 	ep->e2d_reclen += rep->e2d_reclen;
 	if (DOINGASYNC(dvp) && dp->i_count != 0)
 		bdwrite(bp);
 	else
 		error = bwrite(bp);
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
 
 /*
  * Rewrite an existing directory entry to point at the inode
  * supplied.  The parameters describing the directory entry are
  * set up by a call to namei.
  */
 int
 ext2_dirrewrite(struct inode *dp, struct inode *ip, struct componentname *cnp)
 {
 	struct buf *bp;
 	struct ext2fs_direct_2 *ep;
 	struct vnode *vdp = ITOV(dp);
 	int error;
 
 	if ((error = ext2_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep,
 	    &bp)) != 0)
 		return (error);
 	ep->e2d_ino = ip->i_number;
 	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs,
 	    EXT2F_INCOMPAT_FTYPE))
 		ep->e2d_type = DTTOFT(IFTODT(ip->i_mode));
 	else
 		ep->e2d_type = EXT2_FT_UNKNOWN;
 	error = bwrite(bp);
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
 
 /*
  * Check if a directory is empty or not.
  * Inode supplied must be locked.
  *
  * Using a struct dirtemplate here is not precisely
  * what we want, but better than using a struct direct.
  *
  * NB: does not handle corrupted directories.
  */
 int
 ext2_dirempty(struct inode *ip, ino_t parentino, struct ucred *cred)
 {
 	off_t off;
 	struct dirtemplate dbuf;
 	struct ext2fs_direct_2 *dp = (struct ext2fs_direct_2 *)&dbuf;
 	int error, namlen;
 	ssize_t count;
 #define	MINDIRSIZ (sizeof(struct dirtemplate) / 2)
 
 	for (off = 0; off < ip->i_size; off += dp->e2d_reclen) {
 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ,
 		    off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred,
 		    NOCRED, &count, (struct thread *)0);
 		/*
 		 * Since we read MINDIRSIZ, residual must
 		 * be 0 unless we're at end of file.
 		 */
 		if (error || count != 0)
 			return (0);
 		/* avoid infinite loops */
 		if (dp->e2d_reclen == 0)
 			return (0);
 		/* skip empty entries */
 		if (dp->e2d_ino == 0)
 			continue;
 		/* accept only "." and ".." */
 		namlen = dp->e2d_namlen;
 		if (namlen > 2)
 			return (0);
 		if (dp->e2d_name[0] != '.')
 			return (0);
 		/*
 		 * At this point namlen must be 1 or 2.
 		 * 1 implies ".", 2 implies ".." if second
 		 * char is also "."
 		 */
 		if (namlen == 1)
 			continue;
 		if (dp->e2d_name[1] == '.' && dp->e2d_ino == parentino)
 			continue;
 		return (0);
 	}
 	return (1);
 }
 
 /*
  * Check if source directory is in the path of the target directory.
  * Target is supplied locked, source is unlocked.
  * The target is always vput before returning.
  */
 int
 ext2_checkpath(struct inode *source, struct inode *target, struct ucred *cred)
 {
 	struct vnode *vp;
 	int error, namlen;
 	struct dirtemplate dirbuf;
 
 	vp = ITOV(target);
 	if (target->i_number == source->i_number) {
 		error = EEXIST;
 		goto out;
 	}
 	if (target->i_number == EXT2_ROOTINO) {
 		error = 0;
 		goto out;
 	}
 
 	for (;;) {
 		if (vp->v_type != VDIR) {
 			error = ENOTDIR;
 			break;
 		}
 		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
 			sizeof(struct dirtemplate), (off_t)0, UIO_SYSSPACE,
 			IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL,
 			NULL);
 		if (error != 0)
 			break;
 		namlen = dirbuf.dotdot_type;	/* like ufs little-endian */
 		if (namlen != 2 ||
 		    dirbuf.dotdot_name[0] != '.' ||
 		    dirbuf.dotdot_name[1] != '.') {
 			error = ENOTDIR;
 			break;
 		}
 		if (dirbuf.dotdot_ino == source->i_number) {
 			error = EINVAL;
 			break;
 		}
 		if (dirbuf.dotdot_ino == EXT2_ROOTINO)
 			break;
 		vput(vp);
 		if ((error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino,
 		    LK_EXCLUSIVE, &vp)) != 0) {
 			vp = NULL;
 			break;
 		}
 	}
 
 out:
 	if (error == ENOTDIR)
 		printf("checkpath: .. not a directory\n");
 	if (vp != NULL)
 		vput(vp);
 	return (error);
 }
Index: user/ngie/more-tests/sys/fs/ext2fs/ext2_vfsops.c
===================================================================
--- user/ngie/more-tests/sys/fs/ext2fs/ext2_vfsops.c	(revision 281675)
+++ user/ngie/more-tests/sys/fs/ext2fs/ext2_vfsops.c	(revision 281676)
@@ -1,1115 +1,1101 @@
 /*-
  *  modified for EXT2FS support in Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*-
  * Copyright (c) 1989, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/endian.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 #include <sys/mutex.h>
 
 #include <geom/geom.h>
 #include <geom/geom_vfs.h>
 
 #include <fs/ext2fs/ext2_mount.h>
 #include <fs/ext2fs/inode.h>
 
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/ext2_dinode.h>
 #include <fs/ext2fs/ext2_extern.h>
 
 static int	ext2_flushfiles(struct mount *mp, int flags, struct thread *td);
 static int	ext2_mountfs(struct vnode *, struct mount *);
 static int	ext2_reload(struct mount *mp, struct thread *td);
 static int	ext2_sbupdate(struct ext2mount *, int);
 static int	ext2_cgupdate(struct ext2mount *, int);
 static vfs_unmount_t		ext2_unmount;
 static vfs_root_t		ext2_root;
 static vfs_statfs_t		ext2_statfs;
 static vfs_sync_t		ext2_sync;
 static vfs_vget_t		ext2_vget;
 static vfs_fhtovp_t		ext2_fhtovp;
 static vfs_mount_t		ext2_mount;
 
 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part");
 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure");
 
 static struct vfsops ext2fs_vfsops = {
 	.vfs_fhtovp =		ext2_fhtovp,
 	.vfs_mount =		ext2_mount,
 	.vfs_root =		ext2_root,	/* root inode via vget */
 	.vfs_statfs =		ext2_statfs,
 	.vfs_sync =		ext2_sync,
 	.vfs_unmount =		ext2_unmount,
 	.vfs_vget =		ext2_vget,
 };
 
 VFS_SET(ext2fs_vfsops, ext2fs, 0);
 
 static int	ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev,
 		    int ronly);
 static int	compute_sb_data(struct vnode * devvp,
 		    struct ext2fs * es, struct m_ext2fs * fs);
 
 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr", 
     "noclusterw", "noexec", "export", "force", "from", "multilabel",
     "suiddir", "nosymfollow", "sync", "union", NULL };
 
 /*
  * VFS Operations.
  *
  * mount system call
  */
 static int
 ext2_mount(struct mount *mp)
 {
 	struct vfsoptlist *opts;
 	struct vnode *devvp;
 	struct thread *td;
 	struct ext2mount *ump = NULL;
 	struct m_ext2fs *fs;
 	struct nameidata nd, *ndp = &nd;
 	accmode_t accmode;
 	char *path, *fspec;
 	int error, flags, len;
 
 	td = curthread;
 	opts = mp->mnt_optnew;
 
 	if (vfs_filteropt(opts, ext2_opts))
 		return (EINVAL);
 
 	vfs_getopt(opts, "fspath", (void **)&path, NULL);
 	/* Double-check the length of path.. */
 	if (strlen(path) >= MAXMNTLEN)
 		return (ENAMETOOLONG);
 
 	fspec = NULL;
 	error = vfs_getopt(opts, "from", (void **)&fspec, &len);
 	if (!error && fspec[len - 1] != '\0')
 		return (EINVAL);
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		ump = VFSTOEXT2(mp);
 		fs = ump->um_e2fs; 
 		error = 0;
 		if (fs->e2fs_ronly == 0 &&
 		    vfs_flagopt(opts, "ro", NULL, 0)) {
 			error = VFS_SYNC(mp, MNT_WAIT);
 			if (error)
 				return (error);
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			error = ext2_flushfiles(mp, flags, td);
 			if ( error == 0 && fs->e2fs_wasvalid && ext2_cgupdate(ump, MNT_WAIT) == 0) {
 				fs->e2fs->e2fs_state |= E2FS_ISCLEAN;
 				ext2_sbupdate(ump, MNT_WAIT);
 			}
 			fs->e2fs_ronly = 1;
 			vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY);
 			DROP_GIANT();
 			g_topology_lock();
 			g_access(ump->um_cp, 0, -1, 0);
 			g_topology_unlock();
 			PICKUP_GIANT();
 		}
 		if (!error && (mp->mnt_flag & MNT_RELOAD))
 			error = ext2_reload(mp, td);
 		if (error)
 			return (error);
 		devvp = ump->um_devvp;
 		if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) {
 			if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0))
 				return (EPERM);
 
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 			error = VOP_ACCESS(devvp, VREAD | VWRITE,
 			    td->td_ucred, td);
 			if (error)
 				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 			if (error) {
 				VOP_UNLOCK(devvp, 0);
 				return (error);
 			}
 			VOP_UNLOCK(devvp, 0);
 			DROP_GIANT();
 			g_topology_lock();
 			error = g_access(ump->um_cp, 0, 1, 0);
 			g_topology_unlock();
 			PICKUP_GIANT();
 			if (error)
 				return (error);
 
 			if ((fs->e2fs->e2fs_state & E2FS_ISCLEAN) == 0 ||
 			    (fs->e2fs->e2fs_state & E2FS_ERRORS)) {
 				if (mp->mnt_flag & MNT_FORCE) {
 					printf(
 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt);
 				} else {
 					printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 					    fs->e2fs_fsmnt);
 					return (EPERM);
 				}
 			}
 			fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN;
 			(void)ext2_cgupdate(ump, MNT_WAIT);
 			fs->e2fs_ronly = 0;
 			MNT_ILOCK(mp);
 			mp->mnt_flag &= ~MNT_RDONLY;
 			MNT_IUNLOCK(mp);
 		}
 		if (vfs_flagopt(opts, "export", NULL, 0)) {
 			/* Process export requests in vfs_mount.c. */
 			return (error);
 		}
 	}
 
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible disk device.
 	 */
 	if (fspec == NULL)
 		return (EINVAL);
 	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
 	if ((error = namei(ndp)) != 0)
 		return (error);
 	NDFREE(ndp, NDF_ONLY_PNBUF);
 	devvp = ndp->ni_vp;
 
 	if (!vn_isdisk(devvp, &error)) {
 		vput(devvp);
 		return (error);
 	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 *
 	 * XXXRW: VOP_ACCESS() enough?
 	 */
 	accmode = VREAD;
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		accmode |= VWRITE;
 	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
 	if (error)
 		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 	if (error) {
 		vput(devvp);
 		return (error);
 	}
 
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		error = ext2_mountfs(devvp, mp);
 	} else {
 		if (devvp != ump->um_devvp) {
 			vput(devvp);
 			return (EINVAL);	/* needs translation */
 		} else
 			vput(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return (error);
 	}
 	ump = VFSTOEXT2(mp);
 	fs = ump->um_e2fs;
 
 	/*
 	 * Note that this strncpy() is ok because of a check at the start
 	 * of ext2_mount().
 	 */
 	strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN);
 	fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0';
 	vfs_mountedfrom(mp, fspec);
 	return (0);
 }
 
 static int
 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly)
 {
 
 	if (es->e2fs_magic != E2FS_MAGIC) {
 		printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n",
 		    devtoname(dev), es->e2fs_magic, E2FS_MAGIC);
 		return (1);
 	}
 	if (es->e2fs_rev > E2FS_REV0) {
 		if (es->e2fs_features_incompat & ~(EXT2F_INCOMPAT_SUPP |
 						   EXT4F_RO_INCOMPAT_SUPP)) {
 			printf(
 "WARNING: mount of %s denied due to unsupported optional features\n",
 			    devtoname(dev));
 			return (1);
 		}
 		if (!ronly &&
 		    (es->e2fs_features_rocompat & ~EXT2F_ROCOMPAT_SUPP)) {
 			printf("WARNING: R/W mount of %s denied due to "
 			    "unsupported optional features\n", devtoname(dev));
 			return (1);
 		}
 	}
 	return (0);
 }
 
 /*
  * This computes the fields of the  ext2_sb_info structure from the
  * data in the ext2_super_block structure read in.
  */
 static int
 compute_sb_data(struct vnode *devvp, struct ext2fs *es,
     struct m_ext2fs *fs)
 {
 	int db_count, error;
 	int i;
 	int logic_sb_block = 1;	/* XXX for now */
 	struct buf *bp;
 	uint32_t e2fs_descpb;
 
 	fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->e2fs_log_bsize;
 	fs->e2fs_bsize = 1U << fs->e2fs_bshift;
 	fs->e2fs_fsbtodb = es->e2fs_log_bsize + 1;
 	fs->e2fs_qbmask = fs->e2fs_bsize - 1;
 	fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << es->e2fs_log_fsize;
 	if (fs->e2fs_fsize)
 		fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize;
 	fs->e2fs_bpg = es->e2fs_bpg;
 	fs->e2fs_fpg = es->e2fs_fpg;
 	fs->e2fs_ipg = es->e2fs_ipg;
 	if (es->e2fs_rev == E2FS_REV0) {
 		fs->e2fs_isize = E2FS_REV0_INODE_SIZE ;
 	} else {
 		fs->e2fs_isize = es->e2fs_inode_size;
 
 		/*
 		 * Simple sanity check for superblock inode size value.
 		 */
 		if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE ||
 		    EXT2_INODE_SIZE(fs) > fs->e2fs_bsize ||
 		    (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) {
 			printf("ext2fs: invalid inode size %d\n",
 			    fs->e2fs_isize);
 			return (EIO);
 		}
 	}
 	/* Check for extra isize in big inodes. */
 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) &&
 	    EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) {
 		printf("ext2fs: no space for extra inode timestamps\n");
 		return (EINVAL);
 	}
 
 	fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs);
 	fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb;
 	/* s_resuid / s_resgid ? */
 	fs->e2fs_gcount = (es->e2fs_bcount - es->e2fs_first_dblock +
 	    EXT2_BLOCKS_PER_GROUP(fs) - 1) / EXT2_BLOCKS_PER_GROUP(fs);
 	e2fs_descpb = fs->e2fs_bsize / sizeof(struct ext2_gd);
 	db_count = (fs->e2fs_gcount + e2fs_descpb - 1) / e2fs_descpb;
 	fs->e2fs_gdbcount = db_count;
 	fs->e2fs_gd = malloc(db_count * fs->e2fs_bsize,
 	    M_EXT2MNT, M_WAITOK);
 	fs->e2fs_contigdirs = malloc(fs->e2fs_gcount *
 	    sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO);
 
 	/*
 	 * Adjust logic_sb_block.
 	 * Godmar thinks: if the blocksize is greater than 1024, then
 	 * the superblock is logically part of block zero.
 	 */
 	if(fs->e2fs_bsize > SBSIZE)
 		logic_sb_block = 0;
 	for (i = 0; i < db_count; i++) {
 		error = bread(devvp ,
 			 fsbtodb(fs, logic_sb_block + i + 1 ),
 			fs->e2fs_bsize, NOCRED, &bp);
 		if (error) {
 			free(fs->e2fs_contigdirs, M_EXT2MNT);
 			free(fs->e2fs_gd, M_EXT2MNT);
 			brelse(bp);
 			return (error);
 		}
 		e2fs_cgload((struct ext2_gd *)bp->b_data,
 		    &fs->e2fs_gd[
 			i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
 		    fs->e2fs_bsize);
 		brelse(bp);
 		bp = NULL;
 	}
 	/* Initialization for the ext2 Orlov allocator variant. */
 	fs->e2fs_total_dir = 0;
 	for (i = 0; i < fs->e2fs_gcount; i++)
 		fs->e2fs_total_dir += fs->e2fs_gd[i].ext2bgd_ndirs;
 
 	if (es->e2fs_rev == E2FS_REV0 ||
 	    !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE))
 		fs->e2fs_maxfilesize = 0x7fffffff;
-	else {
-		fs->e2fs_maxfilesize = 0xffffffffffff;
-		if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE))
-			fs->e2fs_maxfilesize = 0x7fffffffffffffff;
-	}
-	if (es->e4fs_flags & E2FS_UNSIGNED_HASH) {
-		fs->e2fs_uhash = 3;
-	} else if ((es->e4fs_flags & E2FS_SIGNED_HASH) == 0) {
-#ifdef __CHAR_UNSIGNED__
-		es->e4fs_flags |= E2FS_UNSIGNED_HASH;
-		fs->e2fs_uhash = 3;
-#else
-		es->e4fs_flags |= E2FS_SIGNED_HASH;
-#endif
-	}
-
+	else
+		fs->e2fs_maxfilesize = 0x7fffffffffffffff;
 	return (0);
 }
 
 /*
  * Reload all incore data for a filesystem (used after running fsck on
  * the root filesystem and finding things to fix). The filesystem must
  * be mounted read-only.
  *
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
  *	3) invalidate all cluster summary information.
  *	4) invalidate all inactive vnodes.
  *	5) invalidate all cached file data.
  *	6) re-read inode data for all active vnodes.
  * XXX we are missing some steps, in particular # 3, this has to be reviewed.
  */
 static int
 ext2_reload(struct mount *mp, struct thread *td)
 {
 	struct vnode *vp, *mvp, *devvp;
 	struct inode *ip;
 	struct buf *bp;
 	struct ext2fs *es;
 	struct m_ext2fs *fs;
 	struct csum *sump;
 	int error, i;
 	int32_t *lp;
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EINVAL);
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOEXT2(mp)->um_devvp;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 	if (vinvalbuf(devvp, 0, 0, 0) != 0)
 		panic("ext2_reload: dirty1");
 	VOP_UNLOCK(devvp, 0);
 
 	/*
 	 * Step 2: re-read superblock from disk.
 	 * constants have been adjusted for ext2
 	 */
 	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
 		return (error);
 	es = (struct ext2fs *)bp->b_data;
 	if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
 		brelse(bp);
 		return (EIO);		/* XXX needs translation */
 	}
 	fs = VFSTOEXT2(mp)->um_e2fs;
 	bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs));
 
 	if((error = compute_sb_data(devvp, es, fs)) != 0) {
 		brelse(bp);
 		return (error);
 	}
 #ifdef UNKLAR
 	if (fs->fs_sbsize < SBSIZE)
 		bp->b_flags |= B_INVAL;
 #endif
 	brelse(bp);
 
 	/*
 	 * Step 3: invalidate all cluster summary information.
 	 */
 	if (fs->e2fs_contigsumsize > 0) {
 		lp = fs->e2fs_maxcluster;
 		sump = fs->e2fs_clustersum;
 		for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
 			*lp++ = fs->e2fs_contigsumsize;
 			sump->cs_init = 0;
 			bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
 		}
 	}
 
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		/*
 		 * Step 4: invalidate all cached file data.
 		 */
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, 0, 0))
 			panic("ext2_reload: dirty2");
 
 		/*
 		 * Step 5: re-read inode data for all active vnodes.
 		 */
 		ip = VTOI(vp);
 		error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->e2fs_bsize, NOCRED, &bp);
 		if (error) {
 			VOP_UNLOCK(vp, 0);
 			vrele(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			return (error);
 		}
 		ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data +
 		    EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip);
 		brelse(bp);
 		VOP_UNLOCK(vp, 0);
 		vrele(vp);
 	}
 	return (0);
 }
 
 /*
  * Common code for mount and mountroot.
  */
 static int
 ext2_mountfs(struct vnode *devvp, struct mount *mp)
 {
 	struct ext2mount *ump;
 	struct buf *bp;
 	struct m_ext2fs *fs;
 	struct ext2fs *es;
 	struct cdev *dev = devvp->v_rdev;
 	struct g_consumer *cp;
 	struct bufobj *bo;
 	struct csum *sump;
 	int error;
 	int ronly;
 	int i, size;
 	int32_t *lp;
 	int32_t e2fs_maxcontig;
 
 	ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0);
 	/* XXX: use VOP_ACESS to check FS perms */
 	DROP_GIANT();
 	g_topology_lock();
 	error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1);
 	g_topology_unlock();
 	PICKUP_GIANT();
 	VOP_UNLOCK(devvp, 0);
 	if (error)
 		return (error);
 
 	/* XXX: should we check for some sectorsize or 512 instead? */
 	if (((SBSIZE % cp->provider->sectorsize) != 0) ||
 	    (SBSIZE < cp->provider->sectorsize)) {
 		DROP_GIANT();
 		g_topology_lock();
 		g_vfs_close(cp);
 		g_topology_unlock();
 		PICKUP_GIANT();
 		return (EINVAL);
 	}
 
 	bo = &devvp->v_bufobj;
 	bo->bo_private = cp;
 	bo->bo_ops = g_vfs_bufops;
 	if (devvp->v_rdev->si_iosize_max != 0)
 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
 	if (mp->mnt_iosize_max > MAXPHYS)
 		mp->mnt_iosize_max = MAXPHYS;
 
 	bp = NULL;
 	ump = NULL;
 	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
 		goto out;
 	es = (struct ext2fs *)bp->b_data;
 	if (ext2_check_sb_compat(es, dev, ronly) != 0) {
 		error = EINVAL;		/* XXX needs translation */
 		goto out;
 	}
 	if ((es->e2fs_state & E2FS_ISCLEAN) == 0 ||
 	    (es->e2fs_state & E2FS_ERRORS)) {
 		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
 			printf(
 "WARNING: Filesystem was not properly dismounted\n");
 		} else {
 			printf(
 "WARNING: R/W mount denied.  Filesystem is not clean - run fsck\n");
 			error = EPERM;
 			goto out;
 		}
 	}
 	ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO);
 
 	/*
 	 * I don't know whether this is the right strategy. Note that
 	 * we dynamically allocate both an ext2_sb_info and an ext2_super_block
 	 * while Linux keeps the super block in a locked buffer.
 	 */
 	ump->um_e2fs = malloc(sizeof(struct m_ext2fs),
 		M_EXT2MNT, M_WAITOK);
 	ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs),
 		M_EXT2MNT, M_WAITOK);
 	mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF);
 	bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs));
 	if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
 		goto out;
 
 	/*
 	 * Calculate the maximum contiguous blocks and size of cluster summary
 	 * array.  In FFS this is done by newfs; however, the superblock 
 	 * in ext2fs doesn't have these variables, so we can calculate 
 	 * them here.
 	 */
 	e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize);
 	ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG);
 	if (ump->um_e2fs->e2fs_contigsumsize > 0) {
 		size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t);
 		ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK);
 		size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum);
 		ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK);
 		lp = ump->um_e2fs->e2fs_maxcluster;
 		sump = ump->um_e2fs->e2fs_clustersum;
 		for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) {
 			*lp++ = ump->um_e2fs->e2fs_contigsumsize;
 			sump->cs_init = 0;
 			sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) *
 			    sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO);
 		}
 	}
 
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_e2fs;
 	fs->e2fs_ronly = ronly;	/* ronly is set according to mnt_flags */
 
 	/*
 	 * If the fs is not mounted read-only, make sure the super block is
 	 * always written back on a sync().
 	 */
 	fs->e2fs_wasvalid = fs->e2fs->e2fs_state & E2FS_ISCLEAN ? 1 : 0;
 	if (ronly == 0) {
 		fs->e2fs_fmod = 1;		/* mark it modified */
 		fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN;	/* set fs invalid */
 	}
 	mp->mnt_data = ump;
 	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_bo = &devvp->v_bufobj;
 	ump->um_cp = cp;
 
 	/*
 	 * Setting those two parameters allowed us to use
 	 * ufs_bmap w/o changse!
 	 */
 	ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
 	ump->um_bptrtodb = fs->e2fs->e2fs_log_bsize + 1;
 	ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
 	if (ronly == 0)
 		ext2_sbupdate(ump, MNT_WAIT);
 	/*
 	 * Initialize filesystem stat information in mount struct.
 	 */
 	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
 	    MNTK_USES_BCACHE;
 	MNT_IUNLOCK(mp);
 	return (0);
 out:
 	if (bp)
 		brelse(bp);
 	if (cp != NULL) {
 		DROP_GIANT();
 		g_topology_lock();
 		g_vfs_close(cp);
 		g_topology_unlock();
 		PICKUP_GIANT();
 	}
 	if (ump) {
 		mtx_destroy(EXT2_MTX(ump));
 		free(ump->um_e2fs->e2fs_gd, M_EXT2MNT);
 		free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT);
 		free(ump->um_e2fs->e2fs, M_EXT2MNT);
 		free(ump->um_e2fs, M_EXT2MNT);
 		free(ump, M_EXT2MNT);
 		mp->mnt_data = NULL;
 	}
 	return (error);
 }
 
 /*
  * Unmount system call.
  */
 static int
 ext2_unmount(struct mount *mp, int mntflags)
 {
 	struct ext2mount *ump;
 	struct m_ext2fs *fs;
 	struct csum *sump;
 	int error, flags, i, ronly;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE) {
 		if (mp->mnt_flag & MNT_ROOTFS)
 			return (EINVAL);
 		flags |= FORCECLOSE;
 	}
 	if ((error = ext2_flushfiles(mp, flags, curthread)) != 0)
 		return (error);
 	ump = VFSTOEXT2(mp);
 	fs = ump->um_e2fs;
 	ronly = fs->e2fs_ronly;
 	if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) {
 		if (fs->e2fs_wasvalid)
 			fs->e2fs->e2fs_state |= E2FS_ISCLEAN;
 		ext2_sbupdate(ump, MNT_WAIT);
 	}
 
 	DROP_GIANT();
 	g_topology_lock();
 	g_vfs_close(ump->um_cp);
 	g_topology_unlock();
 	PICKUP_GIANT();
 	vrele(ump->um_devvp);
 	sump = fs->e2fs_clustersum;
 	for (i = 0; i < fs->e2fs_gcount; i++, sump++)
 		free(sump->cs_sum, M_EXT2MNT);
 	free(fs->e2fs_clustersum, M_EXT2MNT);
 	free(fs->e2fs_maxcluster, M_EXT2MNT);
 	free(fs->e2fs_gd, M_EXT2MNT);
 	free(fs->e2fs_contigdirs, M_EXT2MNT);
 	free(fs->e2fs, M_EXT2MNT);
 	free(fs, M_EXT2MNT);
 	free(ump, M_EXT2MNT);
 	mp->mnt_data = NULL;
 	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	return (error);
 }
 
 /*
  * Flush out all the files in a filesystem.
  */
 static int
 ext2_flushfiles(struct mount *mp, int flags, struct thread *td)
 {
 	int error;
 
 	error = vflush(mp, 0, flags, td);
 	return (error);
 }
 /*
  * Get filesystem statistics.
  */
 int
 ext2_statfs(struct mount *mp, struct statfs *sbp)
 {
 	struct ext2mount *ump;
 	struct m_ext2fs *fs;
 	uint32_t overhead, overhead_per_group, ngdb;
 	int i, ngroups;
 
 	ump = VFSTOEXT2(mp);
 	fs = ump->um_e2fs;
 	if (fs->e2fs->e2fs_magic != E2FS_MAGIC)
 		panic("ext2_statfs");
 
 	/*
 	 * Compute the overhead (FS structures)
 	 */
 	overhead_per_group =
 	    1 /* block bitmap */ +
 	    1 /* inode bitmap */ +
 	    fs->e2fs_itpg;
 	overhead = fs->e2fs->e2fs_first_dblock +
 	    fs->e2fs_gcount * overhead_per_group;
 	if (fs->e2fs->e2fs_rev > E2FS_REV0 &&
 	    fs->e2fs->e2fs_features_rocompat & EXT2F_ROCOMPAT_SPARSESUPER) {
 		for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) {
 			if (cg_has_sb(i))
 				ngroups++;
 		}
 	} else {
 		ngroups = fs->e2fs_gcount;
 	}
 	ngdb = fs->e2fs_gdbcount;
 	if (fs->e2fs->e2fs_rev > E2FS_REV0 &&
 	    fs->e2fs->e2fs_features_compat & EXT2F_COMPAT_RESIZE)
 		ngdb += fs->e2fs->e2fs_reserved_ngdb;
 	overhead += ngroups * (1 /* superblock */ + ngdb);
 
 	sbp->f_bsize = EXT2_FRAG_SIZE(fs);
 	sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
 	sbp->f_blocks = fs->e2fs->e2fs_bcount - overhead;
 	sbp->f_bfree = fs->e2fs->e2fs_fbcount;
 	sbp->f_bavail = sbp->f_bfree - fs->e2fs->e2fs_rbcount;
 	sbp->f_files = fs->e2fs->e2fs_icount;
 	sbp->f_ffree = fs->e2fs->e2fs_ficount;
 	return (0);
 }
 
 /*
  * Go through the disk queues to initiate sandbagged IO;
  * go through the inodes to write those that have been modified;
  * initiate the writing of the super block if it has been modified.
  *
  * Note: we are always called with the filesystem marked `MPBUSY'.
  */
 static int
 ext2_sync(struct mount *mp, int waitfor)
 {
 	struct vnode *mvp, *vp;
 	struct thread *td;
 	struct inode *ip;
 	struct ext2mount *ump = VFSTOEXT2(mp);
 	struct m_ext2fs *fs;
 	int error, allerror = 0;
 
 	td = curthread;
 	fs = ump->um_e2fs;
 	if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) {		/* XXX */
 		printf("fs = %s\n", fs->e2fs_fsmnt);
 		panic("ext2_sync: rofs mod");
 	}
 
 	/*
 	 * Write back each (modified) inode.
 	 */
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		ip = VTOI(vp);
 		if ((ip->i_flag &
 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 		    (vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
 		    waitfor == MNT_LAZY)) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td);
 		if (error) {
 			if (error == ENOENT) {
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				goto loop;
 			}
 			continue;
 		}
 		if ((error = VOP_FSYNC(vp, waitfor, td)) != 0)
 			allerror = error;
 		VOP_UNLOCK(vp, 0);
 		vrele(vp);
 	}
 
 	/*
 	 * Force stale filesystem control information to be flushed.
 	 */
 	if (waitfor != MNT_LAZY) {
 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
 		if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0)
 			allerror = error;
 		VOP_UNLOCK(ump->um_devvp, 0);
 	}
 
 	/*
 	 * Write back modified superblock.
 	 */
 	if (fs->e2fs_fmod != 0) {
 		fs->e2fs_fmod = 0;
 		fs->e2fs->e2fs_wtime = time_second;
 		if ((error = ext2_cgupdate(ump, waitfor)) != 0)
 			allerror = error;
 	}
 	return (allerror);
 }
 
 /*
  * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it
  * in from disk.  If it is in core, wait for the lock bit to clear, then
  * return the inode locked.  Detection and handling of mount points must be
  * done by the calling routine.
  */
 static int
 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
 {
 	struct m_ext2fs *fs;
 	struct inode *ip;
 	struct ext2mount *ump;
 	struct buf *bp;
 	struct vnode *vp;
 	struct thread *td;
 	int i, error;
 	int used_blocks;
 
 	td = curthread;
 	error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL);
 	if (error || *vpp != NULL)
 		return (error);
 
 	ump = VFSTOEXT2(mp);
 	ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO);
 
 	/* Allocate a new vnode/inode. */
 	if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) {
 		*vpp = NULL;
 		free(ip, M_EXT2NODE);
 		return (error);
 	}
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_e2fs = fs = ump->um_e2fs;
 	ip->i_ump  = ump;
 	ip->i_number = ino;
 
 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
 	error = insmntque(vp, mp);
 	if (error != 0) {
 		free(ip, M_EXT2NODE);
 		*vpp = NULL;
 		return (error);
 	}
 	error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
 	if (error || *vpp != NULL)
 		return (error);
 
 	/* Read in the disk contents for the inode, copy into the inode. */
 	if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 	    (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
 		/*
 		 * The inode does not contain anything useful, so it would
 		 * be misleading to leave it on its hash chain. With mode
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
 		brelse(bp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	/* convert ext2 inode to dinode */
 	ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) *
 			ino_to_fsbo(fs, ino)), ip);
 	ip->i_block_group = ino_to_cg(fs, ino);
 	ip->i_next_alloc_block = 0;
 	ip->i_next_alloc_goal = 0;
 
 	/*
 	 * Now we want to make sure that block pointers for unused
 	 * blocks are zeroed out - ext2_balloc depends on this
 	 * although for regular files and directories only
 	 *
 	 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed
 	 * out because we could corrupt the extent tree.
 	 */
 	if (!(ip->i_flag & IN_E4EXTENTS) &&
 	    (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) {
 		used_blocks = (ip->i_size+fs->e2fs_bsize-1) / fs->e2fs_bsize;
 		for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
 			ip->i_db[i] = 0;
 	}
 #ifdef EXT2FS_DEBUG
 	ext2_print_inode(ip);
 #endif
 	bqrelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.
 	 * Note that the underlying vnode may have changed.
 	 */
 	if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) {
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 
 	/*
 	 * Finish inode initialization.
 	 */
 
 	/*
 	 * Set up a generation number for this inode if it does not
 	 * already have one. This should only happen on old filesystems.
 	 */
 	if (ip->i_gen == 0) {
 		ip->i_gen = random() + 1;
 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
 			ip->i_flag |= IN_MODIFIED;
 	}
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is valid
  * - call ext2_vget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the given client host has export rights and return
  *   those rights via. exflagsp and credanonp
  */
 static int
 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
 {
 	struct inode *ip;
 	struct ufid *ufhp;
 	struct vnode *nvp;
 	struct m_ext2fs *fs;
 	int error;
 
 	ufhp = (struct ufid *)fhp;
 	fs = VFSTOEXT2(mp)->um_e2fs;
 	if (ufhp->ufid_ino < EXT2_ROOTINO ||
 	    ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs->e2fs_ipg)
 		return (ESTALE);
 
 	error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	ip = VTOI(nvp);
 	if (ip->i_mode == 0 ||
 	    ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) {
 		vput(nvp);
 		*vpp = NULLVP;
 		return (ESTALE);
 	}
 	*vpp = nvp;
 	vnode_create_vobject(*vpp, 0, curthread);
 	return (0);
 }
 
 /*
  * Write a superblock and associated information back to disk.
  */
 static int
 ext2_sbupdate(struct ext2mount *mp, int waitfor)
 {
 	struct m_ext2fs *fs = mp->um_e2fs;
 	struct ext2fs *es = fs->e2fs;
 	struct buf *bp;
 	int error = 0;
 
 	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0);
 	bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs));
 	if (waitfor == MNT_WAIT)
 		error = bwrite(bp);
 	else
 		bawrite(bp);
 
 	/*
 	 * The buffers for group descriptors, inode bitmaps and block bitmaps
 	 * are not busy at this point and are (hopefully) written by the
 	 * usual sync mechanism. No need to write them here.
 	 */
 	return (error);
 }
 int
 ext2_cgupdate(struct ext2mount *mp, int waitfor)
 {
 	struct m_ext2fs *fs = mp->um_e2fs;
 	struct buf *bp;
 	int i, error = 0, allerror = 0;
 
 	allerror = ext2_sbupdate(mp, waitfor);
 	for (i = 0; i < fs->e2fs_gdbcount; i++) {
 		bp = getblk(mp->um_devvp, fsbtodb(fs,
 		    fs->e2fs->e2fs_first_dblock +
 		    1 /* superblock */ + i), fs->e2fs_bsize, 0, 0, 0);
 		e2fs_cgsave(&fs->e2fs_gd[
 		    i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
 		    (struct ext2_gd *)bp->b_data, fs->e2fs_bsize);
 		if (waitfor == MNT_WAIT)
 			error = bwrite(bp);
 		else
 			bawrite(bp);
 	}
 
 	if (!allerror && error)
 		allerror = error;
 	return (allerror);
 }
 /*
  * Return the root of a filesystem.
  */
 static int
 ext2_root(struct mount *mp, int flags, struct vnode **vpp)
 {
 	struct vnode *nvp;
 	int error;
 
 	error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp);
 	if (error)
 		return (error);
 	*vpp = nvp;
 	return (0);
 }
Index: user/ngie/more-tests/sys/fs/ext2fs/ext2fs.h
===================================================================
--- user/ngie/more-tests/sys/fs/ext2fs/ext2fs.h	(revision 281675)
+++ user/ngie/more-tests/sys/fs/ext2fs/ext2fs.h	(revision 281676)
@@ -1,299 +1,291 @@
 /*-
  *  modified for EXT2FS support in Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  *
  * $FreeBSD$
  */
 /*-
  * Copyright (c) 2009 Aditya Sarawgi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * 
  */
 
 #ifndef _FS_EXT2FS_EXT2FS_H_
 #define	_FS_EXT2FS_EXT2FS_H_
 
 #include <sys/types.h>
 
 /*
  * Super block for an ext2fs file system.
  */
 struct ext2fs {
 	uint32_t  e2fs_icount;		/* Inode count */
 	uint32_t  e2fs_bcount;		/* blocks count */
 	uint32_t  e2fs_rbcount;		/* reserved blocks count */
 	uint32_t  e2fs_fbcount;		/* free blocks count */
 	uint32_t  e2fs_ficount;		/* free inodes count */
 	uint32_t  e2fs_first_dblock;	/* first data block */
 	uint32_t  e2fs_log_bsize;	/* block size = 1024*(2^e2fs_log_bsize) */
 	uint32_t  e2fs_log_fsize;	/* fragment size */
 	uint32_t  e2fs_bpg;		/* blocks per group */
 	uint32_t  e2fs_fpg;		/* frags per group */
 	uint32_t  e2fs_ipg;		/* inodes per group */
 	uint32_t  e2fs_mtime;		/* mount time */
 	uint32_t  e2fs_wtime;		/* write time */
 	uint16_t  e2fs_mnt_count;	/* mount count */
 	uint16_t  e2fs_max_mnt_count;	/* max mount count */
 	uint16_t  e2fs_magic;		/* magic number */
 	uint16_t  e2fs_state;		/* file system state */
 	uint16_t  e2fs_beh;		/* behavior on errors */
 	uint16_t  e2fs_minrev;		/* minor revision level */
 	uint32_t  e2fs_lastfsck;	/* time of last fsck */
 	uint32_t  e2fs_fsckintv;	/* max time between fscks */
 	uint32_t  e2fs_creator;		/* creator OS */
 	uint32_t  e2fs_rev;		/* revision level */
 	uint16_t  e2fs_ruid;		/* default uid for reserved blocks */
 	uint16_t  e2fs_rgid;		/* default gid for reserved blocks */
 	/* EXT2_DYNAMIC_REV superblocks */
 	uint32_t  e2fs_first_ino;	/* first non-reserved inode */
 	uint16_t  e2fs_inode_size;	/* size of inode structure */
 	uint16_t  e2fs_block_group_nr;	/* block grp number of this sblk*/
 	uint32_t  e2fs_features_compat; /*  compatible feature set */
 	uint32_t  e2fs_features_incompat; /* incompatible feature set */
 	uint32_t  e2fs_features_rocompat; /* RO-compatible feature set */
 	uint8_t	  e2fs_uuid[16];	/* 128-bit uuid for volume */
 	char      e2fs_vname[16];	/* volume name */
 	char      e2fs_fsmnt[64];	/* name mounted on */
 	uint32_t  e2fs_algo;		/* For compression */
 	uint8_t   e2fs_prealloc;	/* # of blocks for old prealloc */
 	uint8_t   e2fs_dir_prealloc;	/* # of blocks for old prealloc dirs */
 	uint16_t  e2fs_reserved_ngdb;	/* # of reserved gd blocks for resize */
 	char      e3fs_journal_uuid[16]; /* uuid of journal superblock */
 	uint32_t  e3fs_journal_inum;	/* inode number of journal file */
 	uint32_t  e3fs_journal_dev;	/* device number of journal file */
 	uint32_t  e3fs_last_orphan;	/* start of list of inodes to delete */
 	uint32_t  e3fs_hash_seed[4];	/* HTREE hash seed */
 	char      e3fs_def_hash_version; /* Default hash version to use */
 	char      e3fs_reserved_char_pad;
 	uint32_t  e3fs_default_mount_opts;
 	uint32_t  e3fs_first_meta_bg;	/* First metablock block group */
 	uint32_t  e3fs_mkfs_time;      /* when the fs was created */
 	uint32_t  e3fs_jnl_blks[17];   /* backup of the journal inode */
 	uint32_t  e4fs_bcount_hi;      /* block count */
 	uint32_t  e4fs_rbcount_hi;     /* reserved blocks count */
 	uint32_t  e4fs_fbcount_hi;     /* free blocks count */
 	uint16_t  e4fs_min_extra_isize;/* all inodes have at least some bytes */
 	uint16_t  e4fs_want_extra_isize; /* inodes must reserve some bytes */
 	uint32_t  e4fs_flags;	  /* miscellaneous flags */
 	uint16_t  e4fs_raid_stride;    /* RAID stride */
 	uint16_t  e4fs_mmpintv;	/* number of seconds to wait in MMP checking */
 	uint64_t  e4fs_mmpblk;	 /* block for multi-mount protection */
 	uint32_t  e4fs_raid_stripe_wid;/* blocks on all data disks (N * stride) */
 	uint8_t   e4fs_log_gpf;	/* FLEX_BG group size */
 	uint8_t   e4fs_char_pad2;
 	uint16_t  e4fs_pad;
 	uint32_t  reserved2[162];	/* Padding to the end of the block */
 };
 
 /*
  * The path name on which the file system is mounted is maintained
  * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in
  * the super block for this name.
  */
 #define	MAXMNTLEN 512
 
 /*
  * In-Memory Superblock
  */
 
 struct m_ext2fs {
 	struct ext2fs * e2fs;
 	char     e2fs_fsmnt[MAXMNTLEN];/* name mounted on */
 	char     e2fs_ronly;	  /* mounted read-only flag */
 	char     e2fs_fmod;	  /* super block modified flag */
 	uint32_t e2fs_bsize;	  /* Block size */
 	uint32_t e2fs_bshift;	  /* calc of logical block no */
 	uint32_t e2fs_bpg;	  /* Number of blocks per group */
 	int64_t  e2fs_qbmask;	  /* = s_blocksize -1 */
 	uint32_t e2fs_fsbtodb;	  /* Shift to get disk block */
 	uint32_t e2fs_ipg;	  /* Number of inodes per group */
 	uint32_t e2fs_ipb;	  /* Number of inodes per block */
 	uint32_t e2fs_itpg;	  /* Number of inode table per group */
 	uint32_t e2fs_fsize;	  /* Size of fragments per block */
 	uint32_t e2fs_fpb;	  /* Number of fragments per block */
 	uint32_t e2fs_fpg;	  /* Number of fragments per group */
 	uint32_t e2fs_gdbcount;	  /* Number of group descriptors */
 	uint32_t e2fs_gcount;	  /* Number of groups */
 	uint32_t e2fs_isize;	  /* Size of inode */
 	uint32_t e2fs_total_dir;  /* Total number of directories */
 	uint8_t	*e2fs_contigdirs; /* (u) # of contig. allocated dirs */
 	char     e2fs_wasvalid;	  /* valid at mount time */
 	off_t    e2fs_maxfilesize;
 	struct   ext2_gd *e2fs_gd; /* Group Descriptors */
 	int32_t  e2fs_contigsumsize;    /* size of cluster summary array */
 	int32_t *e2fs_maxcluster;       /* max cluster in each cyl group */
 	struct   csum *e2fs_clustersum; /* cluster summary in each cyl group */
-	int32_t  e2fs_uhash;	  /* 3 if hash should be signed, 0 if not */
 };
 
 /* cluster summary information */
 
 struct csum {
 	int8_t   cs_init; /* cluster summary has been initialized */
 	int32_t *cs_sum;  /* cluster summary array */
 };
 
 /*
  * The second extended file system magic number
  */
 #define	E2FS_MAGIC		0xEF53
 
 /*
  * Revision levels
  */
 #define	E2FS_REV0		0	/* The good old (original) format */
 #define	E2FS_REV1		1	/* V2 format w/ dynamic inode sizes */
 
 #define	E2FS_REV0_INODE_SIZE 128
 
 /*
  * compatible/incompatible features
  */
 #define	EXT2F_COMPAT_PREALLOC		0x0001
 #define	EXT2F_COMPAT_HASJOURNAL		0x0004
 #define	EXT2F_COMPAT_RESIZE		0x0010
 #define	EXT2F_COMPAT_DIRHASHINDEX	0x0020
 
 #define	EXT2F_ROCOMPAT_SPARSESUPER	0x0001
 #define	EXT2F_ROCOMPAT_LARGEFILE	0x0002
 #define	EXT2F_ROCOMPAT_BTREE_DIR	0x0004
 #define	EXT2F_ROCOMPAT_HUGE_FILE	0x0008
 #define	EXT2F_ROCOMPAT_GDT_CSUM		0x0010
 #define	EXT2F_ROCOMPAT_DIR_NLINK	0x0020
 #define	EXT2F_ROCOMPAT_EXTRA_ISIZE	0x0040
 
 #define	EXT2F_INCOMPAT_COMP		0x0001
 #define	EXT2F_INCOMPAT_FTYPE		0x0002
 #define	EXT2F_INCOMPAT_META_BG		0x0010
 #define	EXT2F_INCOMPAT_EXTENTS		0x0040
 #define	EXT2F_INCOMPAT_64BIT		0x0080
 #define	EXT2F_INCOMPAT_MMP		0x0100
 #define	EXT2F_INCOMPAT_FLEX_BG		0x0200
 
 /*
  * Features supported in this implementation
  *
  * We support the following REV1 features:
  * - EXT2F_ROCOMPAT_SPARSESUPER
  * - EXT2F_ROCOMPAT_LARGEFILE
  * - EXT2F_ROCOMPAT_EXTRA_ISIZE
  * - EXT2F_INCOMPAT_FTYPE
  *
  * We partially (read-only) support the following EXT4 features:
  * - EXT2F_ROCOMPAT_HUGE_FILE
  * - EXT2F_INCOMPAT_EXTENTS
  *
  * We do not support these EXT4 features but they are irrelevant
  * for read-only support:
  * - EXT2F_INCOMPAT_FLEX_BG
  * - EXT2F_INCOMPAT_META_BG
  */
-#define	EXT2F_COMPAT_SUPP		EXT2F_COMPAT_DIRHASHINDEX
 #define	EXT2F_ROCOMPAT_SUPP		(EXT2F_ROCOMPAT_SPARSESUPER | \
 					 EXT2F_ROCOMPAT_LARGEFILE | \
 					 EXT2F_ROCOMPAT_EXTRA_ISIZE)
 #define	EXT2F_INCOMPAT_SUPP		EXT2F_INCOMPAT_FTYPE
 #define	EXT4F_RO_INCOMPAT_SUPP		(EXT2F_INCOMPAT_EXTENTS | \
 					 EXT2F_INCOMPAT_FLEX_BG | \
 					 EXT2F_INCOMPAT_META_BG )
 
 /* Assume that user mode programs are passing in an ext2fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
  * macros from user land. */
 #define	EXT2_SB(sb)	(sb)
 
 /*
  * Feature set definitions
  */
 #define	EXT2_HAS_COMPAT_FEATURE(sb,mask)			\
 	( EXT2_SB(sb)->e2fs->e2fs_features_compat & htole32(mask) )
 #define	EXT2_HAS_RO_COMPAT_FEATURE(sb,mask)			\
 	( EXT2_SB(sb)->e2fs->e2fs_features_rocompat & htole32(mask) )
 #define	EXT2_HAS_INCOMPAT_FEATURE(sb,mask)			\
 	( EXT2_SB(sb)->e2fs->e2fs_features_incompat & htole32(mask) )
 
 /*
  * File clean flags
  */
 #define	E2FS_ISCLEAN			0x0001	/* Unmounted cleanly */
 #define	E2FS_ERRORS			0x0002	/* Errors detected */
-
-/*
- * Filesystem miscellaneous flags
- */
-#define	E2FS_SIGNED_HASH	0x0001
-#define	E2FS_UNSIGNED_HASH	0x0002
 
 /* ext2 file system block group descriptor */
 
 struct ext2_gd {
 	uint32_t ext2bgd_b_bitmap;	/* blocks bitmap block */
 	uint32_t ext2bgd_i_bitmap;	/* inodes bitmap block */
 	uint32_t ext2bgd_i_tables;	/* inodes table block  */
 	uint16_t ext2bgd_nbfree;	/* number of free blocks */
 	uint16_t ext2bgd_nifree;	/* number of free inodes */
 	uint16_t ext2bgd_ndirs;		/* number of directories */
 	uint16_t ext4bgd_flags;		/* block group flags */
 	uint32_t ext4bgd_x_bitmap;	/* snapshot exclusion bitmap loc. */
 	uint16_t ext4bgd_b_bmap_csum;	/* block bitmap checksum */
 	uint16_t ext4bgd_i_bmap_csum;	/* inode bitmap checksum */
 	uint16_t ext4bgd_i_unused;	/* unused inode count */
 	uint16_t ext4bgd_csum;		/* group descriptor checksum */
 };
 
 
 /* EXT2FS metadatas are stored in little-endian byte order. These macros
  * helps reading these metadatas
  */
 
 #define	e2fs_cgload(old, new, size) memcpy((new), (old), (size));
 #define	e2fs_cgsave(old, new, size) memcpy((new), (old), (size));
 
 /*
  * Macro-instructions used to manage several block sizes
  */
 #define	EXT2_MAX_BLOCK_SIZE		4096
 #define	EXT2_MIN_BLOCK_LOG_SIZE		  10
 #define	EXT2_BLOCK_SIZE(s)		((s)->e2fs_bsize)
 #define	EXT2_ADDR_PER_BLOCK(s)		(EXT2_BLOCK_SIZE(s) / sizeof(uint32_t))
 #define	EXT2_INODE_SIZE(s)		(EXT2_SB(s)->e2fs_isize)
 
 /*
  * Macro-instructions used to manage fragments
  */
 #define	EXT2_MIN_FRAG_SIZE		1024
 #define	EXT2_MAX_FRAG_SIZE		4096
 #define	EXT2_MIN_FRAG_LOG_SIZE		  10
 #define	EXT2_FRAG_SIZE(s)		(EXT2_SB(s)->e2fs_fsize)
 #define	EXT2_FRAGS_PER_BLOCK(s)		(EXT2_SB(s)->e2fs_fpb)
 
 /*
  * Macro-instructions used to manage group descriptors
  */
 #define	EXT2_BLOCKS_PER_GROUP(s)	(EXT2_SB(s)->e2fs_bpg)
 
 #endif	/* !_FS_EXT2FS_EXT2FS_H_ */
Index: user/ngie/more-tests/sys/fs/nfs/nfs.h
===================================================================
--- user/ngie/more-tests/sys/fs/nfs/nfs.h	(revision 281675)
+++ user/ngie/more-tests/sys/fs/nfs/nfs.h	(revision 281676)
@@ -1,730 +1,730 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NFS_NFS_H_
 #define	_NFS_NFS_H_
 /*
  * Tunable constants for nfs
  */
 
 #define	NFS_MAXIOVEC	34
 #define	NFS_TICKINTVL	500		/* Desired time for a tick (msec) */
 #define	NFS_HZ		(hz / nfscl_ticks) /* Ticks/sec */
 #define	NFS_TIMEO	(1 * NFS_HZ)	/* Default timeout = 1 second */
 #define	NFS_MINTIMEO	(1 * NFS_HZ)	/* Min timeout to use */
 #define	NFS_MAXTIMEO	(60 * NFS_HZ)	/* Max timeout to backoff to */
 #define	NFS_TCPTIMEO	300		/* TCP timeout */
 #define	NFS_MAXRCVTIMEO	60		/* 1 minute in seconds */
 #define	NFS_MINIDEMTIMEO (5 * NFS_HZ)	/* Min timeout for non-idempotent ops*/
 #define	NFS_MAXREXMIT	100		/* Stop counting after this many */
 #define	NFSV4_CALLBACKTIMEO (2 * NFS_HZ) /* Timeout in ticks */
 #define	NFSV4_CALLBACKRETRY 5		/* Number of retries before failure */
 #define	NFSV4_SLOTS	64		/* Number of slots, fore channel */
 #define	NFSV4_CBSLOTS	8		/* Number of slots, back channel */
 #define	NFSV4_CBRETRYCNT 4		/* # of CBRecall retries upon err */
 #define	NFSV4_UPCALLTIMEO (15 * NFS_HZ)	/* Timeout in ticks for upcalls */
 					/* to gssd or nfsuserd */
 #define	NFSV4_UPCALLRETRY 4		/* Number of retries before failure */
 #define	NFS_MAXWINDOW	1024		/* Max number of outstanding requests */
 #define	NFS_RETRANS	10		/* Num of retrans for soft mounts */
 #define	NFS_RETRANS_TCP	2		/* Num of retrans for TCP soft mounts */
 #define	NFS_MAXGRPS	16		/* Max. size of groups list */
 #define	NFS_TRYLATERDEL	15		/* Maximum delay timeout (sec) */
 #ifndef NFS_REMOVETIMEO
 #define	NFS_REMOVETIMEO 15  /* # sec to wait for delegret in local syscall */
 #endif
 #ifndef NFS_MINATTRTIMO
 #define	NFS_MINATTRTIMO 5		/* Attribute cache timeout in sec */
 #endif
 #ifndef NFS_MAXATTRTIMO
 #define	NFS_MAXATTRTIMO 60
 #endif
 #define	NFS_WSIZE	8192		/* Def. write data size <= 8192 */
 #define	NFS_RSIZE	8192		/* Def. read data size <= 8192 */
 #define	NFS_READDIRSIZE	8192		/* Def. readdir size */
 #define	NFS_DEFRAHEAD	1		/* Def. read ahead # blocks */
 #define	NFS_MAXRAHEAD	16		/* Max. read ahead # blocks */
 #define	NFS_MAXASYNCDAEMON 	64	/* Max. number async_daemons runnable */
 #define	NFS_MAXUIDHASH	64		/* Max. # of hashed uid entries/mp */
 #ifndef	NFSRV_LEASE
 #define	NFSRV_LEASE		120	/* Lease time in seconds for V4 */
 #endif					/* assigned to nfsrv_lease */
 #ifndef NFSRV_STALELEASE
 #define	NFSRV_STALELEASE	(5 * nfsrv_lease)
 #endif
 #ifndef NFSRV_MOULDYLEASE
 #define	NFSRV_MOULDYLEASE	604800	/* One week (in sec) */
 #endif
 #ifndef NFSCLIENTHASHSIZE
 #define	NFSCLIENTHASHSIZE	20	/* Size of server client hash table */
 #endif
 #ifndef NFSLOCKHASHSIZE
 #define	NFSLOCKHASHSIZE		20	/* Size of server nfslock hash table */
 #endif
 #ifndef NFSSESSIONHASHSIZE
 #define	NFSSESSIONHASHSIZE	20	/* Size of server session hash table */
 #endif
 #define	NFSSTATEHASHSIZE	10	/* Size of server stateid hash table */
 #ifndef NFSUSERHASHSIZE
 #define	NFSUSERHASHSIZE		30	/* Size of user id hash table */
 #endif
 #ifndef NFSGROUPHASHSIZE
 #define	NFSGROUPHASHSIZE	5	/* Size of group id hash table */
 #endif
 #ifndef	NFSCLDELEGHIGHWATER
 #define	NFSCLDELEGHIGHWATER	10000	/* limit for client delegations */
 #endif
 #ifndef	NFSCLLAYOUTHIGHWATER
 #define	NFSCLLAYOUTHIGHWATER	10000	/* limit for client pNFS layouts */
 #endif
 #ifndef NFSNOOPEN			/* Inactive open owner (sec) */
 #define	NFSNOOPEN		120
 #endif
 #define	NFSRV_LEASEDELTA	15	/* # of seconds to delay beyond lease */
 #define	NFS_IDMAXSIZE		4	/* max sizeof (in_addr_t) */
 #ifndef NFSRVCACHE_UDPTIMEOUT
 #define	NFSRVCACHE_UDPTIMEOUT	30	/* # of sec to hold cached rpcs(udp) */
 #endif
 #ifndef NFSRVCACHE_UDPHIGHWATER
 #define	NFSRVCACHE_UDPHIGHWATER	500	/* Max # of udp cache entries */
 #endif
 #ifndef NFSRVCACHE_TCPTIMEOUT
 #define	NFSRVCACHE_TCPTIMEOUT	(3600*12) /*#of sec to hold cached rpcs(tcp) */
 #endif
 #ifndef	NFSRVCACHE_FLOODLEVEL
 #define	NFSRVCACHE_FLOODLEVEL	16384	/* Very high water mark for cache */
 #endif
 #ifndef	NFSRV_CLIENTHIGHWATER
 #define	NFSRV_CLIENTHIGHWATER	1000
 #endif
 #ifndef	NFSRV_MAXDUMPLIST
 #define	NFSRV_MAXDUMPLIST	10000
 #endif
 #ifndef NFS_ACCESSCACHESIZE
 #define	NFS_ACCESSCACHESIZE	8
 #endif
 #define	NFSV4_CBPORT	7745		/* Callback port for testing */
 
 /*
  * This macro defines the high water mark for issuing V4 delegations.
  * (It is currently set at a conservative 20% of NFSRV_V4STATELIMIT. This
  *  may want to increase when clients can make more effective use of
  *  delegations.)
  */
 #define	NFSRV_V4DELEGLIMIT(c) (((c) * 5) > NFSRV_V4STATELIMIT)
 
 #define	NFS_READDIRBLKSIZ	DIRBLKSIZ	/* Minimal nm_readdirsize */
 
 /*
  * Oddballs
  */
 #define	NFS_CMPFH(n, f, s) 						\
     ((n)->n_fhp->nfh_len == (s) && !NFSBCMP((n)->n_fhp->nfh_fh, (caddr_t)(f), (s)))
 #define	NFSRV_CMPFH(nf, ns, f, s) 					\
 	((ns) == (s) && !NFSBCMP((caddr_t)(nf), (caddr_t)(f), (s)))
 #define	NFS_CMPTIME(t1, t2) 						\
 	((t1).tv_sec == (t2).tv_sec && (t1).tv_nsec == (t2).tv_nsec)
 #define	NFS_SETTIME(t) do { 						\
 	(t).tv_sec = time.tv_sec; (t).tv_nsec = 1000 * time.tv_usec; } while (0)
 #define	NFS_SRVMAXDATA(n) 						\
 		(((n)->nd_flag & (ND_NFSV3 | ND_NFSV4)) ? 		\
-		 NFS_MAXDATA : NFS_V2MAXDATA)
+		 NFS_SRVMAXIO : NFS_V2MAXDATA)
 #define	NFS64BITSSET	0xffffffffffffffffull
 #define	NFS64BITSMINUS1	0xfffffffffffffffeull
 
 /*
  * Structures for the nfssvc(2) syscall. Not that anyone but nfsd, mount_nfs
  * and nfsloaduser should ever try and use it.
  */
 struct nfsd_addsock_args {
 	int	sock;		/* Socket to serve */
 	caddr_t	name;		/* Client addr for connection based sockets */
 	int	namelen;	/* Length of name */
 };
 
 /*
  * nfsd argument for new krpc.
  */
 struct nfsd_nfsd_args {
 	const char *principal;	/* GSS-API service principal name */
 	int	minthreads;	/* minimum service thread count */
 	int	maxthreads;	/* maximum service thread count */
 };
 
 /*
  * Arguments for use by the callback daemon.
  */
 struct nfsd_nfscbd_args {
 	const char *principal;	/* GSS-API service principal name */
 };
 
 struct nfscbd_args {
 	int	sock;		/* Socket to serve */
 	caddr_t	name;		/* Client addr for connection based sockets */
 	int	namelen;	/* Length of name */
 	u_short	port;		/* Port# for callbacks */
 };
 
 struct nfsd_idargs {
 	int		nid_flag;	/* Flags (see below) */
 	uid_t		nid_uid;	/* user/group id */
 	gid_t		nid_gid;
 	int		nid_usermax;	/* Upper bound on user name cache */
 	int		nid_usertimeout;/* User name timeout (minutes) */
 	u_char		*nid_name;	/* Name */
 	int		nid_namelen;	/* and its length */
 };
 
 struct nfsd_clid {
 	int		nclid_idlen;	/* Length of client id */
 	u_char		nclid_id[NFSV4_OPAQUELIMIT]; /* and name */
 };
 
 struct nfsd_dumplist {
 	int		ndl_size;	/* Number of elements */
 	void		*ndl_list;	/* and the list of elements */
 };
 
 struct nfsd_dumpclients {
 	u_int32_t	ndcl_flags;		/* LCL_xxx flags */
 	u_int32_t	ndcl_nopenowners;	/* Number of openowners */
 	u_int32_t	ndcl_nopens;		/* and opens */
 	u_int32_t	ndcl_nlockowners;	/* and of lockowners */
 	u_int32_t	ndcl_nlocks;		/* and of locks */
 	u_int32_t	ndcl_ndelegs;		/* and of delegations */
 	u_int32_t	ndcl_nolddelegs;	/* and old delegations */
 	sa_family_t	ndcl_addrfam;		/* Callback address */
 	union {
 		struct in_addr sin_addr;
 		struct in6_addr sin6_addr;
 	} ndcl_cbaddr;
 	struct nfsd_clid ndcl_clid;	/* and client id */
 };
 
 struct nfsd_dumplocklist {
 	char		*ndllck_fname;	/* File Name */
 	int		ndllck_size;	/* Number of elements */
 	void		*ndllck_list;	/* and the list of elements */
 };
 
 struct nfsd_dumplocks {
 	u_int32_t	ndlck_flags;		/* state flags NFSLCK_xxx */
 	nfsv4stateid_t	ndlck_stateid;		/* stateid */
 	u_int64_t	ndlck_first;		/* lock byte range */
 	u_int64_t	ndlck_end;
 	struct nfsd_clid ndlck_owner;		/* Owner of open/lock */
 	sa_family_t	ndlck_addrfam;		/* Callback address */
 	union {
 		struct in_addr sin_addr;
 		struct in6_addr sin6_addr;
 	} ndlck_cbaddr;
 	struct nfsd_clid ndlck_clid;	/* and client id */
 };
 
 /*
  * Structure for referral information.
  */
 struct nfsreferral {
 	u_char		*nfr_srvlist;	/* List of servers */
 	int		nfr_srvcnt;	/* number of servers */
 	vnode_t		nfr_vp;	/* vnode for referral */
 	u_int32_t	nfr_dfileno;	/* assigned dir inode# */
 };
 
 /*
  * Flags for lc_flags and opsflags for nfsrv_getclient().
  */
 #define	LCL_NEEDSCONFIRM	0x00000001
 #define	LCL_DONTCLEAN		0x00000002
 #define	LCL_WAKEUPWANTED	0x00000004
 #define	LCL_TCPCALLBACK		0x00000008
 #define	LCL_CALLBACKSON		0x00000010
 #define	LCL_INDEXNOTOK		0x00000020
 #define	LCL_STAMPEDSTABLE	0x00000040
 #define	LCL_EXPIREIT		0x00000080
 #define	LCL_CBDOWN		0x00000100
 #define	LCL_KERBV		0x00000400
 #define	LCL_NAME		0x00000800
 #define	LCL_NEEDSCBNULL		0x00001000
 #define	LCL_GSSINTEGRITY	0x00002000
 #define	LCL_GSSPRIVACY		0x00004000
 #define	LCL_ADMINREVOKED	0x00008000
 #define	LCL_RECLAIMCOMPLETE	0x00010000
 #define	LCL_NFSV41		0x00020000
 
 #define	LCL_GSS		LCL_KERBV	/* Or of all mechs */
 
 /*
  * Bits for flags in nfslock and nfsstate.
  * The access, deny, NFSLCK_READ and NFSLCK_WRITE bits must be defined as
  * below, in the correct order, so the shifts work for tests.
  */
 #define	NFSLCK_READACCESS	0x00000001
 #define	NFSLCK_WRITEACCESS	0x00000002
 #define	NFSLCK_ACCESSBITS	(NFSLCK_READACCESS | NFSLCK_WRITEACCESS)
 #define	NFSLCK_SHIFT		2
 #define	NFSLCK_READDENY		0x00000004
 #define	NFSLCK_WRITEDENY	0x00000008
 #define	NFSLCK_DENYBITS		(NFSLCK_READDENY | NFSLCK_WRITEDENY)
 #define	NFSLCK_SHAREBITS 						\
     (NFSLCK_READACCESS|NFSLCK_WRITEACCESS|NFSLCK_READDENY|NFSLCK_WRITEDENY)
 #define	NFSLCK_LOCKSHIFT	4
 #define	NFSLCK_READ		0x00000010
 #define	NFSLCK_WRITE		0x00000020
 #define	NFSLCK_BLOCKING		0x00000040
 #define	NFSLCK_RECLAIM		0x00000080
 #define	NFSLCK_OPENTOLOCK	0x00000100
 #define	NFSLCK_TEST		0x00000200
 #define	NFSLCK_LOCK		0x00000400
 #define	NFSLCK_UNLOCK		0x00000800
 #define	NFSLCK_OPEN		0x00001000
 #define	NFSLCK_CLOSE		0x00002000
 #define	NFSLCK_CHECK		0x00004000
 #define	NFSLCK_RELEASE		0x00008000
 #define	NFSLCK_NEEDSCONFIRM	0x00010000
 #define	NFSLCK_CONFIRM		0x00020000
 #define	NFSLCK_DOWNGRADE	0x00040000
 #define	NFSLCK_DELEGREAD	0x00080000
 #define	NFSLCK_DELEGWRITE	0x00100000
 #define	NFSLCK_DELEGCUR		0x00200000
 #define	NFSLCK_DELEGPREV	0x00400000
 #define	NFSLCK_OLDDELEG		0x00800000
 #define	NFSLCK_DELEGRECALL	0x01000000
 #define	NFSLCK_SETATTR		0x02000000
 #define	NFSLCK_DELEGPURGE	0x04000000
 #define	NFSLCK_DELEGRETURN	0x08000000
 #define	NFSLCK_WANTWDELEG	0x10000000
 #define	NFSLCK_WANTRDELEG	0x20000000
 #define	NFSLCK_WANTNODELEG	0x40000000
 #define	NFSLCK_WANTBITS							\
     (NFSLCK_WANTWDELEG | NFSLCK_WANTRDELEG | NFSLCK_WANTNODELEG)
 
 /* And bits for nid_flag */
 #define	NFSID_INITIALIZE	0x0001
 #define	NFSID_ADDUID		0x0002
 #define	NFSID_DELUID		0x0004
 #define	NFSID_ADDUSERNAME	0x0008
 #define	NFSID_DELUSERNAME	0x0010
 #define	NFSID_ADDGID		0x0020
 #define	NFSID_DELGID		0x0040
 #define	NFSID_ADDGROUPNAME	0x0080
 #define	NFSID_DELGROUPNAME	0x0100
 
 /*
  * fs.nfs sysctl(3) identifiers
  */
 #define	NFS_NFSSTATS	1		/* struct: struct nfsstats */
 
 /*
  * Here is the definition of the attribute bits array and macros that
  * manipulate it.
  * THE MACROS MUST BE MANUALLY MODIFIED IF NFSATTRBIT_MAXWORDS CHANGES!!
  * It is (NFSATTRBIT_MAX + 31) / 32.
  */
 #define	NFSATTRBIT_MAXWORDS	3
 
 typedef struct {
 	u_int32_t bits[NFSATTRBIT_MAXWORDS];
 } nfsattrbit_t;
 
 #define	NFSZERO_ATTRBIT(b) do {						\
 	(b)->bits[0] = 0;						\
 	(b)->bits[1] = 0;						\
 	(b)->bits[2] = 0;						\
 } while (0)
 
 #define	NFSSET_ATTRBIT(t, f) do {					\
 	(t)->bits[0] = (f)->bits[0];			 		\
 	(t)->bits[1] = (f)->bits[1];					\
 	(t)->bits[2] = (f)->bits[2];					\
 } while (0)
 
 #define	NFSSETSUPP_ATTRBIT(b) do { 					\
 	(b)->bits[0] = NFSATTRBIT_SUPP0; 				\
 	(b)->bits[1] = (NFSATTRBIT_SUPP1 | NFSATTRBIT_SUPPSETONLY);	\
 	(b)->bits[2] = NFSATTRBIT_SUPP2;				\
 } while (0)
 
 #define	NFSISSET_ATTRBIT(b, p)	((b)->bits[(p) / 32] & (1 << ((p) % 32)))
 #define	NFSSETBIT_ATTRBIT(b, p)	((b)->bits[(p) / 32] |= (1 << ((p) % 32)))
 #define	NFSCLRBIT_ATTRBIT(b, p)	((b)->bits[(p) / 32] &= ~(1 << ((p) % 32)))
 
 #define	NFSCLRALL_ATTRBIT(b, a)	do { 					\
 	(b)->bits[0] &= ~((a)->bits[0]);	 			\
 	(b)->bits[1] &= ~((a)->bits[1]);	 			\
 	(b)->bits[2] &= ~((a)->bits[2]);				\
 } while (0)
 
 #define	NFSCLRNOT_ATTRBIT(b, a)	do { 					\
 	(b)->bits[0] &= ((a)->bits[0]);		 			\
 	(b)->bits[1] &= ((a)->bits[1]);		 			\
 	(b)->bits[2] &= ((a)->bits[2]);		 			\
 } while (0)
 
 #define	NFSCLRNOTFILLABLE_ATTRBIT(b) do { 				\
 	(b)->bits[0] &= NFSATTRBIT_SUPP0;	 			\
 	(b)->bits[1] &= NFSATTRBIT_SUPP1;				\
 	(b)->bits[2] &= NFSATTRBIT_SUPP2;				\
 } while (0)
 
 #define	NFSCLRNOTSETABLE_ATTRBIT(b) do { 				\
 	(b)->bits[0] &= NFSATTRBIT_SETABLE0;	 			\
 	(b)->bits[1] &= NFSATTRBIT_SETABLE1;				\
 	(b)->bits[2] &= NFSATTRBIT_SETABLE2;				\
 } while (0)
 
 #define	NFSNONZERO_ATTRBIT(b)	((b)->bits[0] || (b)->bits[1] || (b)->bits[2])
 #define	NFSEQUAL_ATTRBIT(b, p)	((b)->bits[0] == (p)->bits[0] &&	\
 	(b)->bits[1] == (p)->bits[1] && (b)->bits[2] == (p)->bits[2])
 
 #define	NFSGETATTR_ATTRBIT(b) do { 					\
 	(b)->bits[0] = NFSATTRBIT_GETATTR0;	 			\
 	(b)->bits[1] = NFSATTRBIT_GETATTR1;				\
 	(b)->bits[2] = NFSATTRBIT_GETATTR2;				\
 } while (0)
 
 #define	NFSWCCATTR_ATTRBIT(b) do { 					\
 	(b)->bits[0] = NFSATTRBIT_WCCATTR0;	 			\
 	(b)->bits[1] = NFSATTRBIT_WCCATTR1;				\
 	(b)->bits[2] = NFSATTRBIT_WCCATTR2;				\
 } while (0)
 
 #define	NFSWRITEGETATTR_ATTRBIT(b) do { 				\
 	(b)->bits[0] = NFSATTRBIT_WRITEGETATTR0;			\
 	(b)->bits[1] = NFSATTRBIT_WRITEGETATTR1;			\
 	(b)->bits[2] = NFSATTRBIT_WRITEGETATTR2;			\
 } while (0)
 
 #define	NFSCBGETATTR_ATTRBIT(b, c) do { 				\
 	(c)->bits[0] = ((b)->bits[0] & NFSATTRBIT_CBGETATTR0);		\
 	(c)->bits[1] = ((b)->bits[1] & NFSATTRBIT_CBGETATTR1);		\
 	(c)->bits[2] = ((b)->bits[2] & NFSATTRBIT_CBGETATTR2);		\
 } while (0)
 
 #define	NFSPATHCONF_GETATTRBIT(b) do { 					\
 	(b)->bits[0] = NFSGETATTRBIT_PATHCONF0;		 		\
 	(b)->bits[1] = NFSGETATTRBIT_PATHCONF1;				\
 	(b)->bits[2] = NFSGETATTRBIT_PATHCONF2;				\
 } while (0)
 
 #define	NFSSTATFS_GETATTRBIT(b)	do { 					\
 	(b)->bits[0] = NFSGETATTRBIT_STATFS0;	 			\
 	(b)->bits[1] = NFSGETATTRBIT_STATFS1;				\
 	(b)->bits[2] = NFSGETATTRBIT_STATFS2;				\
 } while (0)
 
 #define	NFSISSETSTATFS_ATTRBIT(b) 					\
 		(((b)->bits[0] & NFSATTRBIT_STATFS0) || 		\
 		 ((b)->bits[1] & NFSATTRBIT_STATFS1) ||			\
 		 ((b)->bits[2] & NFSATTRBIT_STATFS2))
 
 #define	NFSCLRSTATFS_ATTRBIT(b)	do { 					\
 	(b)->bits[0] &= ~NFSATTRBIT_STATFS0;	 			\
 	(b)->bits[1] &= ~NFSATTRBIT_STATFS1;				\
 	(b)->bits[2] &= ~NFSATTRBIT_STATFS2;				\
 } while (0)
 
 #define	NFSREADDIRPLUS_ATTRBIT(b) do { 					\
 	(b)->bits[0] = NFSATTRBIT_READDIRPLUS0;		 		\
 	(b)->bits[1] = NFSATTRBIT_READDIRPLUS1;				\
 	(b)->bits[2] = NFSATTRBIT_READDIRPLUS2;				\
 } while (0)
 
 #define	NFSREFERRAL_ATTRBIT(b) do { 					\
 	(b)->bits[0] = NFSATTRBIT_REFERRAL0;		 		\
 	(b)->bits[1] = NFSATTRBIT_REFERRAL1;				\
 	(b)->bits[2] = NFSATTRBIT_REFERRAL2;				\
 } while (0)
 
 /*
  * Store uid, gid creds that were used when the stateid was acquired.
  * The RPC layer allows NFS_MAXGRPS + 1 groups to go out on the wire,
  * so that's how many gets stored here.
  */
 struct nfscred {
 	uid_t 		nfsc_uid;
 	gid_t		nfsc_groups[NFS_MAXGRPS + 1];
 	int		nfsc_ngroups;
 };
 
 /*
  * Constants that define the file handle for the V4 root directory.
  * (The FSID must never be used by other file systems that are exported.)
  */
 #define	NFSV4ROOT_FSID0		((int32_t) -1)
 #define	NFSV4ROOT_FSID1		((int32_t) -1)
 #define	NFSV4ROOT_REFERRAL	((int32_t) -2)
 #define	NFSV4ROOT_INO		2	/* It's traditional */
 #define	NFSV4ROOT_GEN		1
 
 /*
  * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
  * What should be in this set is open to debate, but I believe that since
  * I/O system calls on ufs are never interrupted by signals the set should
  * be minimal. My reasoning is that many current programs that use signals
  * such as SIGALRM will not expect file I/O system calls to be interrupted
  * by them and break.
  */
 #if defined(_KERNEL) || defined(KERNEL)
 
 struct uio; struct buf; struct vattr; struct nameidata;	/* XXX */
 
 /*
  * Socket errors ignored for connectionless sockets?
  * For now, ignore them all
  */
 #define	NFSIGNORE_SOERROR(s, e) 					\
 		((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
 		((s) & PR_CONNREQUIRED) == 0)
 
 
 /*
  * This structure holds socket information for a connection. Used by the
  * client and the server for callbacks.
  */
 struct nfssockreq {
 	NFSSOCKADDR_T	nr_nam;
 	int		nr_sotype;
 	int		nr_soproto;
 	int		nr_soflags;
 	struct ucred	*nr_cred;
 	int		nr_lock;
 	NFSMUTEX_T	nr_mtx;
 	u_int32_t	nr_prog;
 	u_int32_t	nr_vers;
 	struct __rpc_client *nr_client;
 	AUTH		*nr_auth;
 };
 
 /*
  * And associated nr_lock bits.
  */
 #define	NFSR_SNDLOCK		0x01
 #define	NFSR_WANTSND		0x02
 #define	NFSR_RCVLOCK		0x04
 #define	NFSR_WANTRCV		0x08
 #define	NFSR_RESERVEDPORT	0x10
 #define	NFSR_LOCALHOST		0x20
 
 /*
  * Queue head for nfsreq's
  */
 TAILQ_HEAD(nfsreqhead, nfsreq);
 
 /* This is the only nfsreq R_xxx flag still used. */
 #define	R_DONTRECOVER	0x00000100	/* don't initiate recovery when this
 					   rpc gets a stale state reply */
 
 /*
  * Network address hash list element
  */
 union nethostaddr {
 	struct in_addr	had_inet;
 	struct in6_addr had_inet6;
 };
 
 /*
  * Structure of list of mechanisms.
  */
 struct nfsgss_mechlist {
 	int	len;
 	const u_char	*str;
 	int	totlen;
 };
 #define	KERBV_MECH	0	/* position in list */
 
 /*
  * This structure is used by the server for describing each request.
  */
 struct nfsrv_descript {
 	mbuf_t			nd_mrep;	/* Request mbuf list */
 	mbuf_t			nd_md;		/* Current dissect mbuf */
 	mbuf_t			nd_mreq;	/* Reply mbuf list */
 	mbuf_t			nd_mb;		/* Current build mbuf */
 	NFSSOCKADDR_T		nd_nam;		/* and socket addr */
 	NFSSOCKADDR_T		nd_nam2;	/* return socket addr */
 	caddr_t			nd_dpos;	/* Current dissect pos */
 	caddr_t			nd_bpos;	/* Current build pos */
 	u_int16_t		nd_procnum;	/* RPC # */
 	u_int32_t		nd_flag;	/* nd_flag */
 	u_int32_t		nd_repstat;	/* Reply status */
 	int			*nd_errp;	/* Pointer to ret status */
 	u_int32_t		nd_retxid;	/* Reply xid */
 	struct nfsrvcache	*nd_rp;		/* Assoc. cache entry */
 	fhandle_t		nd_fh;		/* File handle */
 	struct ucred		*nd_cred;	/* Credentials */
 	uid_t			nd_saveduid;	/* Saved uid */
 	u_int64_t		nd_sockref;	/* Rcv socket ref# */
 	u_int64_t		nd_compref;	/* Compound RPC ref# */
 	time_t			nd_tcpconntime;	/* Time TCP connection est. */
 	nfsquad_t		nd_clientid;	/* Implied clientid */
 	int			nd_gssnamelen;	/* principal name length */
 	char			*nd_gssname;	/* principal name */
 	uint32_t		*nd_slotseq;	/* ptr to slot seq# in req */
 	uint8_t			nd_sessionid[NFSX_V4SESSIONID];	/* Session id */
 	uint32_t		nd_slotid;	/* Slotid for this RPC */
 	SVCXPRT			*nd_xprt;	/* Server RPC handle */
 };
 
 #define	nd_princlen	nd_gssnamelen
 #define	nd_principal	nd_gssname
 
 /* Bits for "nd_flag" */
 #define	ND_DONTSAVEREPLY 	0x00000001
 #define	ND_SAVEREPLY		0x00000002
 #define	ND_NFSV2		0x00000004
 #define	ND_NFSV3		0x00000008
 #define	ND_NFSV4		0x00000010
 #define	ND_KERBV		0x00000020
 #define	ND_GSSINTEGRITY		0x00000040
 #define	ND_GSSPRIVACY		0x00000080
 #define	ND_WINDOWVERF		0x00000100
 #define	ND_GSSINITREPLY		0x00000200
 #define	ND_STREAMSOCK		0x00000400
 #define	ND_PUBLOOKUP		0x00000800
 #define	ND_USEGSSNAME		0x00001000
 #define	ND_SAMETCPCONN		0x00002000
 #define	ND_IMPLIEDCLID		0x00004000
 #define	ND_NOMOREDATA		0x00008000
 #define	ND_V4WCCATTR		0x00010000
 #define	ND_NFSCB		0x00020000
 #define	ND_AUTHNONE		0x00040000
 #define	ND_EXAUTHSYS		0x00080000
 #define	ND_EXGSS		0x00100000
 #define	ND_EXGSSINTEGRITY	0x00200000
 #define	ND_EXGSSPRIVACY		0x00400000
 #define	ND_INCRSEQID		0x00800000
 #define	ND_NFSCL		0x01000000
 #define	ND_NFSV41		0x02000000
 #define	ND_HASSEQUENCE		0x04000000
 #define	ND_CACHETHIS		0x08000000
 #define	ND_LASTOP		0x10000000
 
 /*
  * ND_GSS should be the "or" of all GSS type authentications.
  */
 #define	ND_GSS		(ND_KERBV)
 
 struct nfsv4_opflag {
 	int	retfh;
 	int	needscfh;
 	int	savereply;
 	int	modifyfs;
 	int	lktype;
 	int	needsseq;
 };
 
 /*
  * Flags used to indicate what to do w.r.t. seqid checking.
  */
 #define	NFSRVSEQID_FIRST	0x01
 #define	NFSRVSEQID_LAST		0x02
 #define	NFSRVSEQID_OPEN		0x04
 
 /*
  * assign a doubly linked list to a new head
  * and prepend one list into another.
  */
 #define	LIST_NEWHEAD(nhead, ohead, field) do { 				\
 	if (((nhead)->lh_first = (ohead)->lh_first) != NULL) 		\
 		(ohead)->lh_first->field.le_prev = &(nhead)->lh_first; 	\
 	(ohead)->lh_first = NULL; 					\
     } while (0)
 
 #define	LIST_PREPEND(head, phead, lelm, field) do {			\
 	if ((head)->lh_first != NULL) {					\
 		(lelm)->field.le_next = (head)->lh_first;		\
 		(lelm)->field.le_next->field.le_prev =			\
 		    &(lelm)->field.le_next;				\
 	}								\
 	(head)->lh_first = (phead)->lh_first;				\
 	(head)->lh_first->field.le_prev = &(head)->lh_first;		\
     } while (0)
 
 /*
  * File handle structure for client. Malloc'd to the correct length with
  * malloc type M_NFSFH.
  */
 struct nfsfh {
 	u_int16_t	nfh_len;	/* Length of file handle */
 	u_int8_t	nfh_fh[1];	/* and the file handle */
 };
 
 /*
  * File handle structure for server. The NFSRV_MAXFH constant is
  * set in nfsdport.h. I use a 32bit length, so that alignment is
  * preserved.
  */
 struct nfsrvfh {
 	u_int32_t	nfsrvfh_len;
 	u_int8_t	nfsrvfh_data[NFSRV_MAXFH];
 };
 
 /*
  * This structure is used for sleep locks on the NFSv4 nfsd threads and
  * NFSv4 client data structures.
  */
 struct nfsv4lock {
 	u_int32_t	nfslock_usecnt;
 	u_int8_t	nfslock_lock;
 };
 #define	NFSV4LOCK_LOCK		0x01
 #define	NFSV4LOCK_LOCKWANTED	0x02
 #define	NFSV4LOCK_WANTED	0x04
 
 /*
  * Values for the override argument for nfsvno_accchk().
  */
 #define	NFSACCCHK_NOOVERRIDE		0
 #define	NFSACCCHK_ALLOWROOT		1
 #define	NFSACCCHK_ALLOWOWNER		2
 
 /*
  * and values for the vpislocked argument for nfsvno_accchk().
  */
 #define	NFSACCCHK_VPNOTLOCKED		0
 #define	NFSACCCHK_VPISLOCKED		1
 
 /*
  * Slot for the NFSv4.1 Sequence Op.
  */
 struct nfsslot {
 	int		nfssl_inprog;
 	uint32_t	nfssl_seq;
 	struct mbuf	*nfssl_reply;
 };
 
 #endif	/* _KERNEL */
 
 #endif	/* _NFS_NFS_H */
Index: user/ngie/more-tests/sys/fs/nfs/nfs_commonport.c
===================================================================
--- user/ngie/more-tests/sys/fs/nfs/nfs_commonport.c	(revision 281675)
+++ user/ngie/more-tests/sys/fs/nfs/nfs_commonport.c	(revision 281676)
@@ -1,637 +1,637 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Functions that need to be different for different versions of BSD
  * kernel should be kept here, along with any global storage specific
  * to this BSD variant.
  */
 #include <fs/nfs/nfsport.h>
 #include <sys/sysctl.h>
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 extern int nfscl_ticks;
 extern int nfsrv_nfsuserd;
 extern struct nfssockreq nfsrv_nfsuserdsock;
 extern void (*nfsd_call_recall)(struct vnode *, int, struct ucred *,
     struct thread *);
 extern int nfsrv_useacl;
 struct mount nfsv4root_mnt;
 int newnfs_numnfsd = 0;
 struct nfsstats newnfsstats;
 int nfs_numnfscbd = 0;
 int nfscl_debuglevel = 0;
 char nfsv4_callbackaddr[INET6_ADDRSTRLEN];
 struct callout newnfsd_callout;
 void (*nfsd_call_servertimer)(void) = NULL;
 void (*ncl_call_invalcaches)(struct vnode *) = NULL;
 
 static int nfs_realign_test;
 static int nfs_realign_count;
 
 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "New NFS filesystem");
 SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RW, &nfs_realign_test,
     0, "Number of realign tests done");
 SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RW, &nfs_realign_count,
     0, "Number of mbuf realignments done");
 SYSCTL_STRING(_vfs_nfs, OID_AUTO, callback_addr, CTLFLAG_RW,
     nfsv4_callbackaddr, sizeof(nfsv4_callbackaddr),
     "NFSv4 callback addr for server to use");
 SYSCTL_INT(_vfs_nfs, OID_AUTO, debuglevel, CTLFLAG_RW, &nfscl_debuglevel,
     0, "Debug level for new nfs client");
 
 /*
  * Defines for malloc
  * (Here for FreeBSD, since they allocate storage.)
  */
 MALLOC_DEFINE(M_NEWNFSRVCACHE, "NFSD srvcache", "NFSD Server Request Cache");
 MALLOC_DEFINE(M_NEWNFSDCLIENT, "NFSD V4client", "NFSD V4 Client Id");
 MALLOC_DEFINE(M_NEWNFSDSTATE, "NFSD V4state",
     "NFSD V4 State (Openowner, Open, Lockowner, Delegation");
 MALLOC_DEFINE(M_NEWNFSDLOCK, "NFSD V4lock", "NFSD V4 byte range lock");
 MALLOC_DEFINE(M_NEWNFSDLOCKFILE, "NFSD lckfile", "NFSD Open/Lock file");
 MALLOC_DEFINE(M_NEWNFSSTRING, "NFSD string", "NFSD V4 long string");
 MALLOC_DEFINE(M_NEWNFSUSERGROUP, "NFSD usrgroup", "NFSD V4 User/group map");
 MALLOC_DEFINE(M_NEWNFSDREQ, "NFS req", "NFS request header");
 MALLOC_DEFINE(M_NEWNFSFH, "NFS fh", "NFS file handle");
 MALLOC_DEFINE(M_NEWNFSCLOWNER, "NFSCL owner", "NFSCL Open Owner");
 MALLOC_DEFINE(M_NEWNFSCLOPEN, "NFSCL open", "NFSCL Open");
 MALLOC_DEFINE(M_NEWNFSCLDELEG, "NFSCL deleg", "NFSCL Delegation");
 MALLOC_DEFINE(M_NEWNFSCLCLIENT, "NFSCL client", "NFSCL Client");
 MALLOC_DEFINE(M_NEWNFSCLLOCKOWNER, "NFSCL lckown", "NFSCL Lock Owner");
 MALLOC_DEFINE(M_NEWNFSCLLOCK, "NFSCL lck", "NFSCL Lock");
 MALLOC_DEFINE(M_NEWNFSV4NODE, "NEWNFSnode", "New nfs vnode");
 MALLOC_DEFINE(M_NEWNFSDIRECTIO, "NEWdirectio", "New nfs Direct IO buffer");
 MALLOC_DEFINE(M_NEWNFSDIROFF, "NFSCL diroffdiroff",
     "New NFS directory offset data");
 MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback",
     "New NFS local lock rollback");
 MALLOC_DEFINE(M_NEWNFSLAYOUT, "NFSCL layout", "NFSv4.1 Layout");
 MALLOC_DEFINE(M_NEWNFSFLAYOUT, "NFSCL flayout", "NFSv4.1 File Layout");
 MALLOC_DEFINE(M_NEWNFSDEVINFO, "NFSCL devinfo", "NFSv4.1 Device Info");
 MALLOC_DEFINE(M_NEWNFSSOCKREQ, "NFSCL sockreq", "NFS Sock Req");
 MALLOC_DEFINE(M_NEWNFSCLDS, "NFSCL session", "NFSv4.1 Session");
 MALLOC_DEFINE(M_NEWNFSLAYRECALL, "NFSCL layrecall", "NFSv4.1 Layout Recall");
 MALLOC_DEFINE(M_NEWNFSDSESSION, "NFSD session", "NFSD Session for a client");
 
 /*
  * Definition of mutex locks.
  * newnfsd_mtx is used in nfsrvd_nfsd() to protect the nfs socket list
  * and assorted other nfsd structures.
  */
 struct mtx newnfsd_mtx;
 struct mtx nfs_sockl_mutex;
 struct mtx nfs_state_mutex;
 struct mtx nfs_nameid_mutex;
 struct mtx nfs_req_mutex;
 struct mtx nfs_slock_mutex;
 
 /* local functions */
 static int nfssvc_call(struct thread *, struct nfssvc_args *, struct ucred *);
 
 #ifdef __NO_STRICT_ALIGNMENT
 /*
  * These architectures don't need re-alignment, so just return.
  */
 int
 newnfs_realign(struct mbuf **pm, int how)
 {
 
 	return (0);
 }
 #else	/* !__NO_STRICT_ALIGNMENT */
 /*
  *	newnfs_realign:
  *
  *	Check for badly aligned mbuf data and realign by copying the unaligned
  *	portion of the data into a new mbuf chain and freeing the portions
  *	of the old chain that were replaced.
  *
  *	We cannot simply realign the data within the existing mbuf chain
  *	because the underlying buffers may contain other rpc commands and
  *	we cannot afford to overwrite them.
  *
  *	We would prefer to avoid this situation entirely.  The situation does
  *	not occur with NFS/UDP and is supposed to only occassionally occur
  *	with TCP.  Use vfs.nfs.realign_count and realign_test to check this.
  *
  */
 int
 newnfs_realign(struct mbuf **pm, int how)
 {
 	struct mbuf *m, *n;
 	int off, space;
 
 	++nfs_realign_test;
 	while ((m = *pm) != NULL) {
 		if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
 			/*
 			 * NB: we can't depend on m_pkthdr.len to help us
 			 * decide what to do here.  May not be worth doing
 			 * the m_length calculation as m_copyback will
 			 * expand the mbuf chain below as needed.
 			 */
 			space = m_length(m, NULL);
 			if (space >= MINCLSIZE) {
 				/* NB: m_copyback handles space > MCLBYTES */
 				n = m_getcl(how, MT_DATA, 0);
 			} else
 				n = m_get(how, MT_DATA);
 			if (n == NULL)
 				return (ENOMEM);
 			/*
 			 * Align the remainder of the mbuf chain.
 			 */
 			n->m_len = 0;
 			off = 0;
 			while (m != NULL) {
 				m_copyback(n, off, m->m_len, mtod(m, caddr_t));
 				off += m->m_len;
 				m = m->m_next;
 			}
 			m_freem(*pm);
 			*pm = n;
 			++nfs_realign_count;
 			break;
 		}
 		pm = &m->m_next;
 	}
 
 	return (0);
 }
 #endif	/* __NO_STRICT_ALIGNMENT */
 
 #ifdef notdef
 static void
 nfsrv_object_create(struct vnode *vp, struct thread *td)
 {
 
 	if (vp == NULL || vp->v_type != VREG)
 		return;
 	(void) vfs_object_create(vp, td, td->td_ucred);
 }
 #endif
 
 /*
  * Look up a file name. Basically just initialize stuff and call namei().
  */
 int
 nfsrv_lookupfilename(struct nameidata *ndp, char *fname, NFSPROC_T *p)
 {
 	int error;
 
 	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, fname,
 	    p);
 	error = namei(ndp);
 	if (!error) {
 		NDFREE(ndp, NDF_ONLY_PNBUF);
 	}
 	return (error);
 }
 
 /*
  * Copy NFS uid, gids to the cred structure.
  */
 void
 newnfs_copycred(struct nfscred *nfscr, struct ucred *cr)
 {
 
 	KASSERT(nfscr->nfsc_ngroups >= 0,
 	    ("newnfs_copycred: negative nfsc_ngroups"));
 	cr->cr_uid = nfscr->nfsc_uid;
 	crsetgroups(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups);
 }
 
 /*
  * Map args from nfsmsleep() to msleep().
  */
 int
 nfsmsleep(void *chan, void *mutex, int prio, const char *wmesg,
     struct timespec *ts)
 {
 	u_int64_t nsecval;
 	int error, timeo;
 
 	if (ts) {
 		timeo = hz * ts->tv_sec;
 		nsecval = (u_int64_t)ts->tv_nsec;
 		nsecval = ((nsecval * ((u_int64_t)hz)) + 500000000) /
 		    1000000000;
 		timeo += (int)nsecval;
 	} else {
 		timeo = 0;
 	}
 	error = msleep(chan, (struct mtx *)mutex, prio, wmesg, timeo);
 	return (error);
 }
 
 /*
  * Get the file system info for the server. For now, just assume FFS.
  */
 void
 nfsvno_getfs(struct nfsfsinfo *sip, int isdgram)
 {
 	int pref;
 
 	/*
 	 * XXX
 	 * There should be file system VFS OP(s) to get this information.
 	 * For now, assume ufs.
 	 */
 	if (isdgram)
 		pref = NFS_MAXDGRAMDATA;
 	else
-		pref = NFS_MAXDATA;
-	sip->fs_rtmax = NFS_MAXDATA;
+		pref = NFS_SRVMAXIO;
+	sip->fs_rtmax = NFS_SRVMAXIO;
 	sip->fs_rtpref = pref;
 	sip->fs_rtmult = NFS_FABLKSIZE;
-	sip->fs_wtmax = NFS_MAXDATA;
+	sip->fs_wtmax = NFS_SRVMAXIO;
 	sip->fs_wtpref = pref;
 	sip->fs_wtmult = NFS_FABLKSIZE;
 	sip->fs_dtpref = pref;
 	sip->fs_maxfilesize = 0xffffffffffffffffull;
 	sip->fs_timedelta.tv_sec = 0;
 	sip->fs_timedelta.tv_nsec = 1;
 	sip->fs_properties = (NFSV3FSINFO_LINK |
 	    NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
 	    NFSV3FSINFO_CANSETTIME);
 }
 
 /*
  * Do the pathconf vnode op.
  */
 int
 nfsvno_pathconf(struct vnode *vp, int flag, register_t *retf,
     struct ucred *cred, struct thread *p)
 {
 	int error;
 
 	error = VOP_PATHCONF(vp, flag, retf);
 	if (error == EOPNOTSUPP || error == EINVAL) {
 		/*
 		 * Some file systems return EINVAL for name arguments not
 		 * supported and some return EOPNOTSUPP for this case.
 		 * So the NFSv3 Pathconf RPC doesn't fail for these cases,
 		 * just fake them.
 		 */
 		switch (flag) {
 		case _PC_LINK_MAX:
 			*retf = LINK_MAX;
 			break;
 		case _PC_NAME_MAX:
 			*retf = NAME_MAX;
 			break;
 		case _PC_CHOWN_RESTRICTED:
 			*retf = 1;
 			break;
 		case _PC_NO_TRUNC:
 			*retf = 1;
 			break;
 		default:
 			/*
 			 * Only happens if a _PC_xxx is added to the server,
 			 * but this isn't updated.
 			 */
 			*retf = 0;
 			printf("nfsrvd pathconf flag=%d not supp\n", flag);
 		};
 		error = 0;
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /* Fake nfsrv_atroot. Just return 0 */
 int
 nfsrv_atroot(struct vnode *vp, long *retp)
 {
 
 	return (0);
 }
 
 /*
  * Set the credentials to refer to root.
  * If only the various BSDen could agree on whether cr_gid is a separate
  * field or cr_groups[0]...
  */
 void
 newnfs_setroot(struct ucred *cred)
 {
 
 	cred->cr_uid = 0;
 	cred->cr_groups[0] = 0;
 	cred->cr_ngroups = 1;
 }
 
 /*
  * Get the client credential. Used for Renew and recovery.
  */
 struct ucred *
 newnfs_getcred(void)
 {
 	struct ucred *cred;
 	struct thread *td = curthread;
 
 	cred = crdup(td->td_ucred);
 	newnfs_setroot(cred);
 	return (cred);
 }
 
 /*
  * Nfs timer routine
  * Call the nfsd's timer function once/sec.
  */
 void
 newnfs_timer(void *arg)
 {
 	static time_t lasttime = 0;
 	/*
 	 * Call the server timer, if set up.
 	 * The argument indicates if it is the next second and therefore
 	 * leases should be checked.
 	 */
 	if (lasttime != NFSD_MONOSEC) {
 		lasttime = NFSD_MONOSEC;
 		if (nfsd_call_servertimer != NULL)
 			(*nfsd_call_servertimer)();
 	}
 	callout_reset(&newnfsd_callout, nfscl_ticks, newnfs_timer, NULL);
 }
 
 
 /*
  * Sleep for a short period of time unless errval == NFSERR_GRACE, where
  * the sleep should be for 5 seconds.
  * Since lbolt doesn't exist in FreeBSD-CURRENT, just use a timeout on
  * an event that never gets a wakeup. Only return EINTR or 0.
  */
 int
 nfs_catnap(int prio, int errval, const char *wmesg)
 {
 	static int non_event;
 	int ret;
 
 	if (errval == NFSERR_GRACE)
 		ret = tsleep(&non_event, prio, wmesg, 5 * hz);
 	else
 		ret = tsleep(&non_event, prio, wmesg, 1);
 	if (ret != EINTR)
 		ret = 0;
 	return (ret);
 }
 
 /*
  * Get referral. For now, just fail.
  */
 struct nfsreferral *
 nfsv4root_getreferral(struct vnode *vp, struct vnode *dvp, u_int32_t fileno)
 {
 
 	return (NULL);
 }
 
 static int
 nfssvc_nfscommon(struct thread *td, struct nfssvc_args *uap)
 {
 	int error;
 
 	error = nfssvc_call(td, uap, td->td_ucred);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 static int
 nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
 {
 	int error = EINVAL;
 	struct nfsd_idargs nid;
 
 	if (uap->flag & NFSSVC_IDNAME) {
 		error = copyin(uap->argp, (caddr_t)&nid, sizeof (nid));
 		if (error)
 			goto out;
 		error = nfssvc_idname(&nid);
 		goto out;
 	} else if (uap->flag & NFSSVC_GETSTATS) {
 		error = copyout(&newnfsstats,
 		    CAST_USER_ADDR_T(uap->argp), sizeof (newnfsstats));
 		if (error == 0) {
 			if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) {
 				newnfsstats.attrcache_hits = 0;
 				newnfsstats.attrcache_misses = 0;
 				newnfsstats.lookupcache_hits = 0;
 				newnfsstats.lookupcache_misses = 0;
 				newnfsstats.direofcache_hits = 0;
 				newnfsstats.direofcache_misses = 0;
 				newnfsstats.accesscache_hits = 0;
 				newnfsstats.accesscache_misses = 0;
 				newnfsstats.biocache_reads = 0;
 				newnfsstats.read_bios = 0;
 				newnfsstats.read_physios = 0;
 				newnfsstats.biocache_writes = 0;
 				newnfsstats.write_bios = 0;
 				newnfsstats.write_physios = 0;
 				newnfsstats.biocache_readlinks = 0;
 				newnfsstats.readlink_bios = 0;
 				newnfsstats.biocache_readdirs = 0;
 				newnfsstats.readdir_bios = 0;
 				newnfsstats.rpcretries = 0;
 				newnfsstats.rpcrequests = 0;
 				newnfsstats.rpctimeouts = 0;
 				newnfsstats.rpcunexpected = 0;
 				newnfsstats.rpcinvalid = 0;
 				bzero(newnfsstats.rpccnt,
 				    sizeof(newnfsstats.rpccnt));
 			}
 			if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) {
 				newnfsstats.srvrpc_errs = 0;
 				newnfsstats.srv_errs = 0;
 				newnfsstats.srvcache_inproghits = 0;
 				newnfsstats.srvcache_idemdonehits = 0;
 				newnfsstats.srvcache_nonidemdonehits = 0;
 				newnfsstats.srvcache_misses = 0;
 				newnfsstats.srvcache_tcppeak = 0;
 				newnfsstats.srvclients = 0;
 				newnfsstats.srvopenowners = 0;
 				newnfsstats.srvopens = 0;
 				newnfsstats.srvlockowners = 0;
 				newnfsstats.srvlocks = 0;
 				newnfsstats.srvdelegates = 0;
 				newnfsstats.clopenowners = 0;
 				newnfsstats.clopens = 0;
 				newnfsstats.cllockowners = 0;
 				newnfsstats.cllocks = 0;
 				newnfsstats.cldelegates = 0;
 				newnfsstats.cllocalopenowners = 0;
 				newnfsstats.cllocalopens = 0;
 				newnfsstats.cllocallockowners = 0;
 				newnfsstats.cllocallocks = 0;
 				bzero(newnfsstats.srvrpccnt,
 				    sizeof(newnfsstats.srvrpccnt));
 				bzero(newnfsstats.cbrpccnt,
 				    sizeof(newnfsstats.cbrpccnt));
 			}
 		}
 		goto out;
 	} else if (uap->flag & NFSSVC_NFSUSERDPORT) {
 		u_short sockport;
 
 		error = copyin(uap->argp, (caddr_t)&sockport,
 		    sizeof (u_short));
 		if (!error)
 			error = nfsrv_nfsuserdport(sockport, p);
 	} else if (uap->flag & NFSSVC_NFSUSERDDELPORT) {
 		nfsrv_nfsuserddelport();
 		error = 0;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * called by all three modevent routines, so that it gets things
  * initialized soon enough.
  */
 void
 newnfs_portinit(void)
 {
 	static int inited = 0;
 
 	if (inited)
 		return;
 	inited = 1;
 	/* Initialize SMP locks used by both client and server. */
 	mtx_init(&newnfsd_mtx, "newnfsd_mtx", NULL, MTX_DEF);
 	mtx_init(&nfs_state_mutex, "nfs_state_mutex", NULL, MTX_DEF);
 }
 
 /*
  * Determine if the file system supports NFSv4 ACLs.
  * Return 1 if it does, 0 otherwise.
  */
 int
 nfs_supportsnfsv4acls(struct vnode *vp)
 {
 	int error;
 	register_t retval;
 
 	ASSERT_VOP_LOCKED(vp, "nfs supports nfsv4acls");
 
 	if (nfsrv_useacl == 0)
 		return (0);
 	error = VOP_PATHCONF(vp, _PC_ACL_NFS4, &retval);
 	if (error == 0 && retval != 0)
 		return (1);
 	return (0);
 }
 
 extern int (*nfsd_call_nfscommon)(struct thread *, struct nfssvc_args *);
 
 /*
  * Called once to initialize data structures...
  */
 static int
 nfscommon_modevent(module_t mod, int type, void *data)
 {
 	int error = 0;
 	static int loaded = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		if (loaded)
 			goto out;
 		newnfs_portinit();
 		mtx_init(&nfs_nameid_mutex, "nfs_nameid_mutex", NULL, MTX_DEF);
 		mtx_init(&nfs_sockl_mutex, "nfs_sockl_mutex", NULL, MTX_DEF);
 		mtx_init(&nfs_slock_mutex, "nfs_slock_mutex", NULL, MTX_DEF);
 		mtx_init(&nfs_req_mutex, "nfs_req_mutex", NULL, MTX_DEF);
 		mtx_init(&nfsrv_nfsuserdsock.nr_mtx, "nfsuserd", NULL,
 		    MTX_DEF);
 		callout_init(&newnfsd_callout, CALLOUT_MPSAFE);
 		newnfs_init();
 		nfsd_call_nfscommon = nfssvc_nfscommon;
 		loaded = 1;
 		break;
 
 	case MOD_UNLOAD:
 		if (newnfs_numnfsd != 0 || nfsrv_nfsuserd != 0 ||
 		    nfs_numnfscbd != 0) {
 			error = EBUSY;
 			break;
 		}
 
 		nfsd_call_nfscommon = NULL;
 		callout_drain(&newnfsd_callout);
 		/* and get rid of the mutexes */
 		mtx_destroy(&nfs_nameid_mutex);
 		mtx_destroy(&newnfsd_mtx);
 		mtx_destroy(&nfs_state_mutex);
 		mtx_destroy(&nfs_sockl_mutex);
 		mtx_destroy(&nfs_slock_mutex);
 		mtx_destroy(&nfs_req_mutex);
 		mtx_destroy(&nfsrv_nfsuserdsock.nr_mtx);
 		loaded = 0;
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return error;
 }
 static moduledata_t nfscommon_mod = {
 	"nfscommon",
 	nfscommon_modevent,
 	NULL,
 };
 DECLARE_MODULE(nfscommon, nfscommon_mod, SI_SUB_VFS, SI_ORDER_ANY);
 
 /* So that loader and kldload(2) can find us, wherever we are.. */
 MODULE_VERSION(nfscommon, 1);
 MODULE_DEPEND(nfscommon, nfssvc, 1, 1, 1);
 MODULE_DEPEND(nfscommon, krpc, 1, 1, 1);
 
Index: user/ngie/more-tests/sys/fs/nfs/nfsproto.h
===================================================================
--- user/ngie/more-tests/sys/fs/nfs/nfsproto.h	(revision 281675)
+++ user/ngie/more-tests/sys/fs/nfs/nfsproto.h	(revision 281676)
@@ -1,1335 +1,1346 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NFS_NFSPROTO_H_
 #define	_NFS_NFSPROTO_H_
 
 /*
  * nfs definitions as per the Version 2, 3 and 4 specs
  */
 
 /*
  * Constants as defined in the NFS Version 2, 3 and 4 specs.
  * "NFS: Network File System Protocol Specification" RFC1094
  * and in the "NFS: Network File System Version 3 Protocol
  * Specification"
  */
 
 #define	NFS_PORT	2049
 #define	NFS_PROG	100003
 #define	NFS_CALLBCKPROG	0x40000000	/* V4 only */
 #define	NFS_VER2	2
 #define	NFS_VER3	3
 #define	NFS_VER4	4
 #define	NFS_V2MAXDATA	8192
 #define	NFS_MAXDGRAMDATA 16384
-#define	NFS_MAXDATA	NFS_MAXBSIZE
 #define	NFS_MAXPATHLEN	1024
 #define	NFS_MAXNAMLEN	255
 #define	NFS_MAXPKTHDR	404
-#define	NFS_MAXPACKET	(NFS_MAXDATA + 2048)
+#define	NFS_MAXPACKET	(NFS_SRVMAXIO + 2048)
 #define	NFS_MINPACKET	20
 #define	NFS_FABLKSIZE	512	/* Size in bytes of a block wrt fa_blocks */
 #define	NFSV4_MINORVERSION	0	/* V4 Minor version */
 #define	NFSV41_MINORVERSION	1	/* V4 Minor version */
 #define	NFSV4_CBVERS		1	/* V4 CB Version */
 #define	NFSV41_CBVERS		4	/* V4.1 CB Version */
 #define	NFSV4_SMALLSTR	50		/* Strings small enough for stack */
+
+/*
+ * This value isn't a fixed value in the RFCs.
+ * It is the maximum data size supported by NFSv3 or NFSv4 over TCP for
+ * the server.  It should be set to the I/O size preferred by ZFS or
+ * MAXBSIZE, whichever is greater.
+ * ZFS currently prefers 128K.
+ * It used to be called NFS_MAXDATA, but has been renamed to clarify that
+ * it refers to server side only and doesn't conflict with the NFS_MAXDATA
+ * defined in rpcsvc/nfs_prot.h for userland.
+ */
+#define	NFS_SRVMAXIO	(128 * 1024)
 
 /* Stat numbers for rpc returns (version 2, 3 and 4) */
 /*
  * These numbers are hard-wired in the RFCs, so they can't be changed.
  * The code currently assumes that the ones < 10000 are the same as
  * sys/errno.h and that sys/errno.h will never go as high as 10000.
  * If the value in sys/errno.h of any entry listed below is changed,
  * the NFS code must be modified to do the mapping between them.
  * (You can ignore NFSERR_WFLUSH, since it is never actually used.)
  */
 #define	NFSERR_OK		0
 #define	NFSERR_PERM		1
 #define	NFSERR_NOENT		2
 #define	NFSERR_IO		5
 #define	NFSERR_NXIO		6
 #define	NFSERR_ACCES		13
 #define	NFSERR_EXIST		17
 #define	NFSERR_XDEV		18	/* Version 3, 4 only */
 #define	NFSERR_NODEV		19
 #define	NFSERR_NOTDIR		20
 #define	NFSERR_ISDIR		21
 #define	NFSERR_INVAL		22	/* Version 3, 4 only */
 #define	NFSERR_FBIG		27
 #define	NFSERR_NOSPC		28
 #define	NFSERR_ROFS		30
 #define	NFSERR_MLINK		31	/* Version 3, 4 only */
 #define	NFSERR_NAMETOL		63
 #define	NFSERR_NOTEMPTY		66
 #define	NFSERR_DQUOT		69
 #define	NFSERR_STALE		70
 #define	NFSERR_REMOTE		71	/* Version 3 only */
 #define	NFSERR_WFLUSH		99	/* Version 2 only */
 #define	NFSERR_BADHANDLE	10001	/* These are Version 3, 4 only */
 #define	NFSERR_NOT_SYNC		10002	/* Version 3 Only */
 #define	NFSERR_BAD_COOKIE	10003
 #define	NFSERR_NOTSUPP		10004
 #define	NFSERR_TOOSMALL		10005
 #define	NFSERR_SERVERFAULT	10006
 #define	NFSERR_BADTYPE		10007
 #define	NFSERR_DELAY		10008	/* Called NFSERR_JUKEBOX for V3 */
 #define	NFSERR_SAME		10009	/* These are Version 4 only */
 #define	NFSERR_DENIED		10010
 #define	NFSERR_EXPIRED		10011
 #define	NFSERR_LOCKED		10012
 #define	NFSERR_GRACE		10013
 #define	NFSERR_FHEXPIRED	10014
 #define	NFSERR_SHAREDENIED	10015
 #define	NFSERR_WRONGSEC		10016
 #define	NFSERR_CLIDINUSE	10017
 #define	NFSERR_RESOURCE		10018
 #define	NFSERR_MOVED		10019
 #define	NFSERR_NOFILEHANDLE	10020
 #define	NFSERR_MINORVERMISMATCH	10021
 #define	NFSERR_STALECLIENTID	10022
 #define	NFSERR_STALESTATEID	10023
 #define	NFSERR_OLDSTATEID	10024
 #define	NFSERR_BADSTATEID	10025
 #define	NFSERR_BADSEQID		10026
 #define	NFSERR_NOTSAME		10027
 #define	NFSERR_LOCKRANGE	10028
 #define	NFSERR_SYMLINK		10029
 #define	NFSERR_RESTOREFH	10030
 #define	NFSERR_LEASEMOVED	10031
 #define	NFSERR_ATTRNOTSUPP	10032
 #define	NFSERR_NOGRACE		10033
 #define	NFSERR_RECLAIMBAD	10034
 #define	NFSERR_RECLAIMCONFLICT	10035
 #define	NFSERR_BADXDR		10036
 #define	NFSERR_LOCKSHELD	10037
 #define	NFSERR_OPENMODE		10038
 #define	NFSERR_BADOWNER		10039
 #define	NFSERR_BADCHAR		10040
 #define	NFSERR_BADNAME		10041
 #define	NFSERR_BADRANGE		10042
 #define	NFSERR_LOCKNOTSUPP	10043
 #define	NFSERR_OPILLEGAL	10044
 #define	NFSERR_DEADLOCK		10045
 #define	NFSERR_FILEOPEN		10046
 #define	NFSERR_ADMINREVOKED	10047
 #define	NFSERR_CBPATHDOWN	10048
 
 /* NFSv4.1 specific errors. */
 #define	NFSERR_BADIOMODE	10049
 #define	NFSERR_BADLAYOUT	10050
 #define	NFSERR_BADSESSIONDIGEST	10051
 #define	NFSERR_BADSESSION	10052
 #define	NFSERR_BADSLOT		10053
 #define	NFSERR_COMPLETEALREADY	10054
 #define	NFSERR_NOTBNDTOSESS	10055
 #define	NFSERR_DELEGALREADYWANT	10056
 #define	NFSERR_BACKCHANBUSY	10057
 #define	NFSERR_LAYOUTTRYLATER	10058
 #define	NFSERR_LAYOUTUNAVAIL	10059
 #define	NFSERR_NOMATCHLAYOUT	10060
 #define	NFSERR_RECALLCONFLICT	10061
 #define	NFSERR_UNKNLAYOUTTYPE	10062
 #define	NFSERR_SEQMISORDERED	10063
 #define	NFSERR_SEQUENCEPOS	10064
 #define	NFSERR_REQTOOBIG	10065
 #define	NFSERR_REPTOOBIG	10066
 #define	NFSERR_REPTOOBIGTOCACHE	10067
 #define	NFSERR_RETRYUNCACHEDREP	10068
 #define	NFSERR_UNSAFECOMPOUND	10069
 #define	NFSERR_TOOMANYOPS	10070
 #define	NFSERR_OPNOTINSESS	10071
 #define	NFSERR_HASHALGUNSUPP	10072
 #define	NFSERR_CLIENTIDBUSY	10074
 #define	NFSERR_PNFSIOHOLE	10075
 #define	NFSERR_SEQFALSERETRY	10076
 #define	NFSERR_BADHIGHSLOT	10077
 #define	NFSERR_DEADSESSION	10078
 #define	NFSERR_ENCRALGUNSUPP	10079
 #define	NFSERR_PNFSNOLAYOUT	10080
 #define	NFSERR_NOTONLYOP	10081
 #define	NFSERR_WRONGCRED	10082
 #define	NFSERR_WRONGTYPE	10083
 #define	NFSERR_DIRDELEGUNAVAIL	10084
 #define	NFSERR_REJECTDELEG	10085
 #define	NFSERR_RETURNCONFLICT	10086
 #define	NFSERR_DELEGREVOKED	10087
 
 #define	NFSERR_STALEWRITEVERF	30001	/* Fake return for nfs_commit() */
 #define	NFSERR_DONTREPLY	30003	/* Don't process request */
 #define	NFSERR_RETVOID		30004	/* Return void, not error */
 #define	NFSERR_REPLYFROMCACHE	30005	/* Reply from recent request cache */
 #define	NFSERR_STALEDONTRECOVER	30006	/* Don't initiate recovery */
 
 #define	NFSERR_RPCERR		0x40000000 /* Mark an RPC layer error */
 #define	NFSERR_AUTHERR		0x80000000 /* Mark an authentication error */
 
 #define	NFSERR_RPCMISMATCH	(NFSERR_RPCERR | RPC_MISMATCH)
 #define	NFSERR_PROGUNAVAIL	(NFSERR_RPCERR | RPC_PROGUNAVAIL)
 #define	NFSERR_PROGMISMATCH	(NFSERR_RPCERR | RPC_PROGMISMATCH)
 #define	NFSERR_PROGNOTV4	(NFSERR_RPCERR | 0xffff)
 #define	NFSERR_PROCUNAVAIL	(NFSERR_RPCERR | RPC_PROCUNAVAIL)
 #define	NFSERR_GARBAGE		(NFSERR_RPCERR | RPC_GARBAGE)
 
 /* Sizes in bytes of various nfs rpc components */
 #define	NFSX_UNSIGNED	4
 #define	NFSX_HYPER	(2 * NFSX_UNSIGNED)
 
 /* specific to NFS Version 2 */
 #define	NFSX_V2FH	32
 #define	NFSX_V2FATTR	68
 #define	NFSX_V2SATTR	32
 #define	NFSX_V2COOKIE	4
 #define	NFSX_V2STATFS	20
 
 /* specific to NFS Version 3 */
 #define	NFSX_V3FHMAX		64	/* max. allowed by protocol */
 #define	NFSX_V3FATTR		84
 #define	NFSX_V3SATTR		60	/* max. all fields filled in */
 #define	NFSX_V3SRVSATTR		(sizeof (struct nfsv3_sattr))
 #define	NFSX_V3POSTOPATTR	(NFSX_V3FATTR + NFSX_UNSIGNED)
 #define	NFSX_V3WCCDATA		(NFSX_V3POSTOPATTR + 8 * NFSX_UNSIGNED)
 #define	NFSX_V3STATFS		52
 #define	NFSX_V3FSINFO		48
 #define	NFSX_V3PATHCONF		24
 
 /* specific to NFS Version 4 */
 #define	NFSX_V4FHMAX		128
 #define	NFSX_V4FSID		(2 * NFSX_HYPER)
 #define	NFSX_V4SPECDATA		(2 * NFSX_UNSIGNED)
 #define	NFSX_V4TIME		(NFSX_HYPER + NFSX_UNSIGNED)
 #define	NFSX_V4SETTIME		(NFSX_UNSIGNED + NFSX_V4TIME)
 #define	NFSX_V4SESSIONID	16
 #define	NFSX_V4DEVICEID		16
 
 /* sizes common to multiple NFS versions */
 #define	NFSX_FHMAX		(NFSX_V4FHMAX)
 #define	NFSX_MYFH		(sizeof (fhandle_t)) /* size this server uses */
 #define	NFSX_VERF 		8
 #define	NFSX_STATEIDOTHER	12
 #define	NFSX_STATEID		(NFSX_UNSIGNED + NFSX_STATEIDOTHER)
 #define	NFSX_GSSH		12
 
 /* variants for multiple versions */
 #define	NFSX_STATFS(v3)		((v3) ? NFSX_V3STATFS : NFSX_V2STATFS)
 
 /* nfs rpc procedure numbers (before version mapping) */
 #define	NFSPROC_NULL		0
 #define	NFSPROC_GETATTR		1
 #define	NFSPROC_SETATTR		2
 #define	NFSPROC_LOOKUP		3
 #define	NFSPROC_ACCESS		4
 #define	NFSPROC_READLINK	5
 #define	NFSPROC_READ		6
 #define	NFSPROC_WRITE		7
 #define	NFSPROC_CREATE		8
 #define	NFSPROC_MKDIR		9
 #define	NFSPROC_SYMLINK		10
 #define	NFSPROC_MKNOD		11
 #define	NFSPROC_REMOVE		12
 #define	NFSPROC_RMDIR		13
 #define	NFSPROC_RENAME		14
 #define	NFSPROC_LINK		15
 #define	NFSPROC_READDIR		16
 #define	NFSPROC_READDIRPLUS	17
 #define	NFSPROC_FSSTAT		18
 #define	NFSPROC_FSINFO		19
 #define	NFSPROC_PATHCONF	20
 #define	NFSPROC_COMMIT		21
 
 /*
  * The lower numbers -> 21 are used by NFSv2 and v3. These define higher
  * numbers used by NFSv4.
  * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is
  * one greater than the last number.
  */
 #ifndef	NFS_V3NPROCS
 #define	NFS_V3NPROCS		22
 
 #define	NFSPROC_LOOKUPP		22
 #define	NFSPROC_SETCLIENTID	23
 #define	NFSPROC_SETCLIENTIDCFRM	24
 #define	NFSPROC_LOCK		25
 #define	NFSPROC_LOCKU		26
 #define	NFSPROC_OPEN		27
 #define	NFSPROC_CLOSE		28
 #define	NFSPROC_OPENCONFIRM	29
 #define	NFSPROC_LOCKT		30
 #define	NFSPROC_OPENDOWNGRADE	31
 #define	NFSPROC_RENEW		32
 #define	NFSPROC_PUTROOTFH	33
 #define	NFSPROC_RELEASELCKOWN	34
 #define	NFSPROC_DELEGRETURN	35
 #define	NFSPROC_RETDELEGREMOVE	36
 #define	NFSPROC_RETDELEGRENAME1	37
 #define	NFSPROC_RETDELEGRENAME2	38
 #define	NFSPROC_GETACL		39
 #define	NFSPROC_SETACL		40
 
 /*
  * Must be defined as one higher than the last Proc# above.
  */
 #define	NFSV4_NPROCS		41
 
 /* Additional procedures for NFSv4.1. */
 #define	NFSPROC_EXCHANGEID	41
 #define	NFSPROC_CREATESESSION	42
 #define	NFSPROC_DESTROYSESSION	43
 #define	NFSPROC_DESTROYCLIENT	44
 #define	NFSPROC_FREESTATEID	45
 #define	NFSPROC_LAYOUTGET	46
 #define	NFSPROC_GETDEVICEINFO	47
 #define	NFSPROC_LAYOUTCOMMIT	48
 #define	NFSPROC_LAYOUTRETURN	49
 #define	NFSPROC_RECLAIMCOMPL	50
 #define	NFSPROC_WRITEDS		51
 #define	NFSPROC_READDS		52
 #define	NFSPROC_COMMITDS	53
 
 /*
  * Must be defined as one higher than the last NFSv4.1 Proc# above.
  */
 #define	NFSV41_NPROCS		54
 
 #endif	/* NFS_V3NPROCS */
 
 /*
  * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code.
  */
 #ifndef	NFS_NPROCS
 #define	NFS_NPROCS		NFSV4_NPROCS
 #endif
 
 /*
  * NFSPROC_NOOP is a fake op# that can't be the same as any V2/3/4 Procedure
  * or Operation#. Since the NFS V4 Op #s go higher, use NFSV41_NOPS, which
  * is one greater than the highest Op#.
  */
 #define	NFSPROC_NOOP		NFSV41_NOPS
 
 /* Actual Version 2 procedure numbers */
 #define	NFSV2PROC_NULL		0
 #define	NFSV2PROC_GETATTR	1
 #define	NFSV2PROC_SETATTR	2
 #define	NFSV2PROC_NOOP		3
 #define	NFSV2PROC_ROOT		NFSV2PROC_NOOP	/* Obsolete */
 #define	NFSV2PROC_LOOKUP	4
 #define	NFSV2PROC_READLINK	5
 #define	NFSV2PROC_READ		6
 #define	NFSV2PROC_WRITECACHE	NFSV2PROC_NOOP	/* Obsolete */
 #define	NFSV2PROC_WRITE		8
 #define	NFSV2PROC_CREATE	9
 #define	NFSV2PROC_REMOVE	10
 #define	NFSV2PROC_RENAME	11
 #define	NFSV2PROC_LINK		12
 #define	NFSV2PROC_SYMLINK	13
 #define	NFSV2PROC_MKDIR		14
 #define	NFSV2PROC_RMDIR		15
 #define	NFSV2PROC_READDIR	16
 #define	NFSV2PROC_STATFS	17
 
 /*
  * V4 Procedure numbers
  */
 #define	NFSV4PROC_COMPOUND	1
 #define	NFSV4PROC_CBNULL	0
 #define	NFSV4PROC_CBCOMPOUND	1
 
 /*
  * Constants used by the Version 3 and 4 protocols for various RPCs
  */
 #define	NFSV3SATTRTIME_DONTCHANGE	0
 #define	NFSV3SATTRTIME_TOSERVER		1
 #define	NFSV3SATTRTIME_TOCLIENT		2
 
 #define	NFSV4SATTRTIME_TOSERVER		0
 #define	NFSV4SATTRTIME_TOCLIENT		1
 
 #define	NFSV4LOCKT_READ			1
 #define	NFSV4LOCKT_WRITE		2
 #define	NFSV4LOCKT_READW		3
 #define	NFSV4LOCKT_WRITEW		4
 #define	NFSV4LOCKT_RELEASE		5
 
 #define	NFSV4OPEN_NOCREATE		0
 #define	NFSV4OPEN_CREATE		1
 #define	NFSV4OPEN_CLAIMNULL		0
 #define	NFSV4OPEN_CLAIMPREVIOUS		1
 #define	NFSV4OPEN_CLAIMDELEGATECUR	2
 #define	NFSV4OPEN_CLAIMDELEGATEPREV	3
 #define	NFSV4OPEN_CLAIMFH		4
 #define	NFSV4OPEN_CLAIMDELEGATECURFH	5
 #define	NFSV4OPEN_CLAIMDELEGATEPREVFH	6
 #define	NFSV4OPEN_DELEGATENONE		0
 #define	NFSV4OPEN_DELEGATEREAD		1
 #define	NFSV4OPEN_DELEGATEWRITE		2
 #define	NFSV4OPEN_DELEGATENONEEXT	3
 #define	NFSV4OPEN_LIMITSIZE		1
 #define	NFSV4OPEN_LIMITBLOCKS		2
 
 /*
  * Nfs V4 ACE stuff
  */
 #define	NFSV4ACE_ALLOWEDTYPE		0x00000000
 #define	NFSV4ACE_DENIEDTYPE		0x00000001
 #define	NFSV4ACE_AUDITTYPE		0x00000002
 #define	NFSV4ACE_ALARMTYPE		0x00000003
 
 #define	NFSV4ACE_SUPALLOWED		0x00000001
 #define	NFSV4ACE_SUPDENIED		0x00000002
 #define	NFSV4ACE_SUPAUDIT		0x00000004
 #define	NFSV4ACE_SUPALARM		0x00000008
 
 #define	NFSV4ACE_SUPTYPES	(NFSV4ACE_SUPALLOWED | NFSV4ACE_SUPDENIED)
 
 #define	NFSV4ACE_FILEINHERIT		0x00000001
 #define	NFSV4ACE_DIRECTORYINHERIT	0x00000002
 #define	NFSV4ACE_NOPROPAGATEINHERIT	0x00000004
 #define	NFSV4ACE_INHERITONLY		0x00000008
 #define	NFSV4ACE_SUCCESSFULACCESS	0x00000010
 #define	NFSV4ACE_FAILEDACCESS		0x00000020
 #define	NFSV4ACE_IDENTIFIERGROUP	0x00000040
 
 #define	NFSV4ACE_READDATA		0x00000001
 #define	NFSV4ACE_LISTDIRECTORY		0x00000001
 #define	NFSV4ACE_WRITEDATA		0x00000002
 #define	NFSV4ACE_ADDFILE		0x00000002
 #define	NFSV4ACE_APPENDDATA		0x00000004
 #define	NFSV4ACE_ADDSUBDIRECTORY	0x00000004
 #define	NFSV4ACE_READNAMEDATTR		0x00000008
 #define	NFSV4ACE_WRITENAMEDATTR		0x00000010
 #define	NFSV4ACE_EXECUTE		0x00000020
 #define	NFSV4ACE_SEARCH			0x00000020
 #define	NFSV4ACE_DELETECHILD		0x00000040
 #define	NFSV4ACE_READATTRIBUTES		0x00000080
 #define	NFSV4ACE_WRITEATTRIBUTES	0x00000100
 #define	NFSV4ACE_DELETE			0x00010000
 #define	NFSV4ACE_READACL		0x00020000
 #define	NFSV4ACE_WRITEACL		0x00040000
 #define	NFSV4ACE_WRITEOWNER		0x00080000
 #define	NFSV4ACE_SYNCHRONIZE		0x00100000
 
 /*
  * Here are the mappings between mode bits and acl mask bits for
  * directories and other files.
  * (Named attributes have not been included, since named attributes are
  *  not yet supported.)
  * The mailing list seems to indicate that NFSV4ACE_EXECUTE refers to
  * searching a directory, although I can't find a statement of that in
  * the RFC.
  */
 #define	NFSV4ACE_ALLFILESMASK	(NFSV4ACE_READATTRIBUTES | NFSV4ACE_READACL)
 #define	NFSV4ACE_OWNERMASK	(NFSV4ACE_WRITEATTRIBUTES | NFSV4ACE_WRITEACL)
 #define	NFSV4ACE_DIRREADMASK	NFSV4ACE_LISTDIRECTORY
 #define	NFSV4ACE_DIREXECUTEMASK	NFSV4ACE_EXECUTE
 #define	NFSV4ACE_DIRWRITEMASK	(NFSV4ACE_ADDFILE | 			\
 		NFSV4ACE_ADDSUBDIRECTORY | NFSV4ACE_DELETECHILD)
 #define	NFSV4ACE_READMASK	NFSV4ACE_READDATA
 #define	NFSV4ACE_WRITEMASK	(NFSV4ACE_WRITEDATA | NFSV4ACE_APPENDDATA)
 #define	NFSV4ACE_EXECUTEMASK	NFSV4ACE_EXECUTE
 #define	NFSV4ACE_ALLFILEBITS	(NFSV4ACE_READMASK | NFSV4ACE_WRITEMASK | \
 	NFSV4ACE_EXECUTEMASK | NFSV4ACE_SYNCHRONIZE)
 #define	NFSV4ACE_ALLDIRBITS	(NFSV4ACE_DIRREADMASK | 		\
 	NFSV4ACE_DIRWRITEMASK | NFSV4ACE_DIREXECUTEMASK)
 #define	NFSV4ACE_AUDITMASK	0x0
 
 /*
  * These GENERIC masks are not used and are no longer believed to be useful.
  */
 #define	NFSV4ACE_GENERICREAD		0x00120081
 #define	NFSV4ACE_GENERICWRITE		0x00160106
 #define	NFSV4ACE_GENERICEXECUTE		0x001200a0
 
 #define	NFSSTATEID_PUTALLZERO		0
 #define	NFSSTATEID_PUTALLONE		1
 #define	NFSSTATEID_PUTSTATEID		2
 #define	NFSSTATEID_PUTSEQIDZERO		3
 
 /*
  * Bits for share access and deny.
  */
 #define	NFSV4OPEN_ACCESSREAD		0x00000001
 #define	NFSV4OPEN_ACCESSWRITE		0x00000002
 #define	NFSV4OPEN_ACCESSBOTH		0x00000003
 #define	NFSV4OPEN_WANTDELEGMASK		0x0000ff00
 #define	NFSV4OPEN_WANTREADDELEG		0x00000100
 #define	NFSV4OPEN_WANTWRITEDELEG	0x00000200
 #define	NFSV4OPEN_WANTANYDELEG		0x00000300
 #define	NFSV4OPEN_WANTNODELEG		0x00000400
 #define	NFSV4OPEN_WANTCANCEL		0x00000500
 #define	NFSV4OPEN_WANTSIGNALDELEG	0x00010000
 #define	NFSV4OPEN_WANTPUSHDELEG		0x00020000
 
 #define	NFSV4OPEN_DENYNONE		0x00000000
 #define	NFSV4OPEN_DENYREAD		0x00000001
 #define	NFSV4OPEN_DENYWRITE		0x00000002
 #define	NFSV4OPEN_DENYBOTH		0x00000003
 
 /*
  * Delegate_none_ext reply values.
  */
 #define	NFSV4OPEN_NOTWANTED		0
 #define	NFSV4OPEN_CONTENTION		1
 #define	NFSV4OPEN_RESOURCE		2
 #define	NFSV4OPEN_NOTSUPPFTYPE		3
 #define	NFSV4OPEN_NOTSUPPWRITEFTYPE	4
 #define	NFSV4OPEN_NOTSUPPUPGRADE	5
 #define	NFSV4OPEN_NOTSUPPDOWNGRADE	6
 #define	NFSV4OPEN_CANCELLED		7
 #define	NFSV4OPEN_ISDIR			8
 
 /*
  * Open result flags
  * (The first four are in the spec. The rest are used internally.)
  */
 #define	NFSV4OPEN_RESULTCONFIRM		0x00000002
 #define	NFSV4OPEN_LOCKTYPEPOSIX		0x00000004
 #define	NFSV4OPEN_PRESERVEUNLINKED	0x00000008
 #define	NFSV4OPEN_MAYNOTIFYLOCK		0x00000020
 #define	NFSV4OPEN_RFLAGS 						\
     (NFSV4OPEN_RESULTCONFIRM | NFSV4OPEN_LOCKTYPEPOSIX |		\
     NFSV4OPEN_PRESERVEUNLINKED | NFSV4OPEN_MAYNOTIFYLOCK)
 #define	NFSV4OPEN_RECALL		0x00010000
 #define	NFSV4OPEN_READDELEGATE		0x00020000
 #define	NFSV4OPEN_WRITEDELEGATE		0x00040000
 #define	NFSV4OPEN_WDRESOURCE		0x00080000
 #define	NFSV4OPEN_WDCONTENTION		0x00100000
 #define	NFSV4OPEN_WDNOTWANTED		0x00200000
 
 /*
  * NFS V4 File Handle types
  */
 #define	NFSV4FHTYPE_PERSISTENT		0x0
 #define	NFSV4FHTYPE_NOEXPIREWITHOPEN	0x1
 #define	NFSV4FHTYPE_VOLATILEANY		0x2
 #define	NFSV4FHTYPE_VOLATILEMIGRATE	0x4
 #define	NFSV4FHTYPE_VOLATILERENAME	0x8
 
 /*
  * Maximum size of V4 opaque strings.
  */
 #define	NFSV4_OPAQUELIMIT	1024
 
 /*
  * These are the same for V3 and V4.
  */
 #define	NFSACCESS_READ			0x01
 #define	NFSACCESS_LOOKUP		0x02
 #define	NFSACCESS_MODIFY		0x04
 #define	NFSACCESS_EXTEND		0x08
 #define	NFSACCESS_DELETE		0x10
 #define	NFSACCESS_EXECUTE		0x20
 
 #define	NFSWRITE_UNSTABLE		0
 #define	NFSWRITE_DATASYNC		1
 #define	NFSWRITE_FILESYNC		2
 
 #define	NFSCREATE_UNCHECKED		0
 #define	NFSCREATE_GUARDED		1
 #define	NFSCREATE_EXCLUSIVE		2
 #define	NFSCREATE_EXCLUSIVE41		3
 
 #define	NFSV3FSINFO_LINK		0x01
 #define	NFSV3FSINFO_SYMLINK		0x02
 #define	NFSV3FSINFO_HOMOGENEOUS		0x08
 #define	NFSV3FSINFO_CANSETTIME		0x10
 
 /* Flags for Exchange ID */
 #define	NFSV4EXCH_SUPPMOVEDREFER	0x00000001
 #define	NFSV4EXCH_SUPPMOVEDMIGR	0x00000002
 #define	NFSV4EXCH_BINDPRINCSTATEID	0x00000100
 #define	NFSV4EXCH_USENONPNFS		0x00010000
 #define	NFSV4EXCH_USEPNFSMDS		0x00020000
 #define	NFSV4EXCH_USEPNFSDS		0x00040000
 #define	NFSV4EXCH_MASKPNFS		0x00070000
 #define	NFSV4EXCH_UPDCONFIRMEDRECA	0x40000000
 #define	NFSV4EXCH_CONFIRMEDR		0x80000000
 
 /* State Protects */
 #define	NFSV4EXCH_SP4NONE		0
 #define	NFSV4EXCH_SP4MACHCRED		1
 #define	NFSV4EXCH_SP4SSV		2
 
 /* Flags for Create Session */
 #define	NFSV4CRSESS_PERSIST		0x00000001
 #define	NFSV4CRSESS_CONNBACKCHAN	0x00000002
 #define	NFSV4CRSESS_CONNRDMA		0x00000004
 
 /* Flags for Sequence */
 #define	NFSV4SEQ_CBPATHDOWN		0x00000001
 #define	NFSV4SEQ_CBGSSCONTEXPIRING	0x00000002
 #define	NFSV4SEQ_CBGSSCONTEXPIRED	0x00000004
 #define	NFSV4SEQ_EXPIREDALLSTATEREVOKED	0x00000008
 #define	NFSV4SEQ_EXPIREDSOMESTATEREVOKED 0x00000010
 #define	NFSV4SEQ_ADMINSTATEREVOKED	0x00000020
 #define	NFSV4SEQ_RECALLABLESTATEREVOKED	0x00000040
 #define	NFSV4SEQ_LEASEMOVED		0x00000080
 #define	NFSV4SEQ_RESTARTRECLAIMNEEDED	0x00000100
 #define	NFSV4SEQ_CBPATHDOWNSESSION	0x00000200
 #define	NFSV4SEQ_BACKCHANNELFAULT	0x00000400
 #define	NFSV4SEQ_DEVIDCHANGED		0x00000800
 #define	NFSV4SEQ_DEVIDDELETED		0x00001000
 
 /* Flags for Layout. */
 #define	NFSLAYOUTRETURN_FILE		1
 #define	NFSLAYOUTRETURN_FSID		2
 #define	NFSLAYOUTRETURN_ALL		3
 
 #define	NFSLAYOUT_NFSV4_1_FILES		0x1
 #define	NFSLAYOUT_OSD2_OBJECTS		0x2
 #define	NFSLAYOUT_BLOCK_VOLUME		0x3
 
 #define	NFSLAYOUTIOMODE_READ		1
 #define	NFSLAYOUTIOMODE_RW		2
 #define	NFSLAYOUTIOMODE_ANY		3
 
 /* Flags for Get Device Info. */
 #define	NFSDEVICEIDNOTIFY_CHANGEBIT	0x1
 #define	NFSDEVICEIDNOTIFY_DELETEBIT	0x2
 
 /* Flags for File Layout. */
 #define	NFSFLAYUTIL_DENSE		0x1
 #define	NFSFLAYUTIL_COMMIT_THRU_MDS	0x2
 
 /* Conversion macros */
 #define	vtonfsv2_mode(t,m) 						\
 		txdr_unsigned(((t) == VFIFO) ? MAKEIMODE(VCHR, (m)) : 	\
 				MAKEIMODE((t), (m)))
 #define	vtonfsv34_mode(m)	txdr_unsigned((m) & 07777)
 #define	nfstov_mode(a)		(fxdr_unsigned(u_int16_t, (a))&07777)
 #define	vtonfsv2_type(a)  (((u_int32_t)(a)) >= 9 ? txdr_unsigned(NFNON) : \
 		txdr_unsigned(newnfsv2_type[((u_int32_t)(a))]))
 #define	vtonfsv34_type(a)  (((u_int32_t)(a)) >= 9 ? txdr_unsigned(NFNON) : \
 		txdr_unsigned(nfsv34_type[((u_int32_t)(a))]))
 #define	nfsv2tov_type(a)	newnv2tov_type[fxdr_unsigned(u_int32_t,(a))&0x7]
 #define	nfsv34tov_type(a)	nv34tov_type[fxdr_unsigned(u_int32_t,(a))&0x7]
 #define	vtonfs_dtype(a)	(((u_int32_t)(a)) >= 9 ? IFTODT(VTTOIF(VNON)) : \
 			 IFTODT(VTTOIF(a)))
 
 /* File types */
 typedef enum { NFNON=0, NFREG=1, NFDIR=2, NFBLK=3, NFCHR=4, NFLNK=5,
 	NFSOCK=6, NFFIFO=7, NFATTRDIR=8, NFNAMEDATTR=9 } nfstype;
 
 /* Structs for common parts of the rpc's */
 
 struct nfsv2_time {
 	u_int32_t nfsv2_sec;
 	u_int32_t nfsv2_usec;
 };
 typedef struct nfsv2_time	nfstime2;
 
 struct nfsv3_time {
 	u_int32_t nfsv3_sec;
 	u_int32_t nfsv3_nsec;
 };
 typedef struct nfsv3_time	nfstime3;
 
 struct nfsv4_time {
 	u_int32_t nfsv4_highsec;
 	u_int32_t nfsv4_sec;
 	u_int32_t nfsv4_nsec;
 };
 typedef struct nfsv4_time	nfstime4;
 
 /*
  * Quads are defined as arrays of 2 longs to ensure dense packing for the
  * protocol and to facilitate xdr conversion.
  */
 struct nfs_uquad {
 	u_int32_t nfsuquad[2];
 };
 typedef	struct nfs_uquad	nfsuint64;
 
 /*
  * Used to convert between two u_longs and a u_quad_t.
  */
 union nfs_quadconvert {
 	u_int32_t lval[2];
 	u_quad_t  qval;
 };
 typedef union nfs_quadconvert	nfsquad_t;
 
 /*
  * NFS Version 3 special file number.
  */
 struct nfsv3_spec {
 	u_int32_t specdata1;
 	u_int32_t specdata2;
 };
 typedef	struct nfsv3_spec	nfsv3spec;
 
 /*
  * File attributes and setable attributes. These structures cover both
  * NFS version 2 and the version 3 protocol. Note that the union is only
  * used so that one pointer can refer to both variants. These structures
  * go out on the wire and must be densely packed, so no quad data types
  * are used. (all fields are longs or u_longs or structures of same)
  * NB: You can't do sizeof(struct nfs_fattr), you must use the
  *     NFSX_FATTR(v3) macro.
  */
 struct nfs_fattr {
 	u_int32_t fa_type;
 	u_int32_t fa_mode;
 	u_int32_t fa_nlink;
 	u_int32_t fa_uid;
 	u_int32_t fa_gid;
 	union {
 		struct {
 			u_int32_t nfsv2fa_size;
 			u_int32_t nfsv2fa_blocksize;
 			u_int32_t nfsv2fa_rdev;
 			u_int32_t nfsv2fa_blocks;
 			u_int32_t nfsv2fa_fsid;
 			u_int32_t nfsv2fa_fileid;
 			nfstime2  nfsv2fa_atime;
 			nfstime2  nfsv2fa_mtime;
 			nfstime2  nfsv2fa_ctime;
 		} fa_nfsv2;
 		struct {
 			nfsuint64 nfsv3fa_size;
 			nfsuint64 nfsv3fa_used;
 			nfsv3spec nfsv3fa_rdev;
 			nfsuint64 nfsv3fa_fsid;
 			nfsuint64 nfsv3fa_fileid;
 			nfstime3  nfsv3fa_atime;
 			nfstime3  nfsv3fa_mtime;
 			nfstime3  nfsv3fa_ctime;
 		} fa_nfsv3;
 	} fa_un;
 };
 
 /* and some ugly defines for accessing union components */
 #define	fa2_size		fa_un.fa_nfsv2.nfsv2fa_size
 #define	fa2_blocksize		fa_un.fa_nfsv2.nfsv2fa_blocksize
 #define	fa2_rdev		fa_un.fa_nfsv2.nfsv2fa_rdev
 #define	fa2_blocks		fa_un.fa_nfsv2.nfsv2fa_blocks
 #define	fa2_fsid		fa_un.fa_nfsv2.nfsv2fa_fsid
 #define	fa2_fileid		fa_un.fa_nfsv2.nfsv2fa_fileid
 #define	fa2_atime		fa_un.fa_nfsv2.nfsv2fa_atime
 #define	fa2_mtime		fa_un.fa_nfsv2.nfsv2fa_mtime
 #define	fa2_ctime		fa_un.fa_nfsv2.nfsv2fa_ctime
 #define	fa3_size		fa_un.fa_nfsv3.nfsv3fa_size
 #define	fa3_used		fa_un.fa_nfsv3.nfsv3fa_used
 #define	fa3_rdev		fa_un.fa_nfsv3.nfsv3fa_rdev
 #define	fa3_fsid		fa_un.fa_nfsv3.nfsv3fa_fsid
 #define	fa3_fileid		fa_un.fa_nfsv3.nfsv3fa_fileid
 #define	fa3_atime		fa_un.fa_nfsv3.nfsv3fa_atime
 #define	fa3_mtime		fa_un.fa_nfsv3.nfsv3fa_mtime
 #define	fa3_ctime		fa_un.fa_nfsv3.nfsv3fa_ctime
 
 struct nfsv2_sattr {
 	u_int32_t sa_mode;
 	u_int32_t sa_uid;
 	u_int32_t sa_gid;
 	u_int32_t sa_size;
 	nfstime2  sa_atime;
 	nfstime2  sa_mtime;
 };
 
 /*
  * NFS Version 3 sattr structure for the new node creation case.
  */
 struct nfsv3_sattr {
 	u_int32_t sa_modetrue;
 	u_int32_t sa_mode;
 	u_int32_t sa_uidfalse;
 	u_int32_t sa_gidfalse;
 	u_int32_t sa_sizefalse;
 	u_int32_t sa_atimetype;
 	nfstime3  sa_atime;
 	u_int32_t sa_mtimetype;
 	nfstime3  sa_mtime;
 };
 
 /*
  * The attribute bits used for V4.
  * NFSATTRBIT_xxx defines the attribute# (and its bit position)
  * NFSATTRBM_xxx is a 32bit mask with the correct bit set within the
  *	appropriate 32bit word.
  * NFSATTRBIT_MAX is one greater than the largest NFSATTRBIT_xxx
  */
 #define	NFSATTRBIT_SUPPORTEDATTRS	0
 #define	NFSATTRBIT_TYPE			1
 #define	NFSATTRBIT_FHEXPIRETYPE		2
 #define	NFSATTRBIT_CHANGE		3
 #define	NFSATTRBIT_SIZE			4
 #define	NFSATTRBIT_LINKSUPPORT		5
 #define	NFSATTRBIT_SYMLINKSUPPORT	6
 #define	NFSATTRBIT_NAMEDATTR		7
 #define	NFSATTRBIT_FSID			8
 #define	NFSATTRBIT_UNIQUEHANDLES	9
 #define	NFSATTRBIT_LEASETIME		10
 #define	NFSATTRBIT_RDATTRERROR		11
 #define	NFSATTRBIT_ACL			12
 #define	NFSATTRBIT_ACLSUPPORT		13
 #define	NFSATTRBIT_ARCHIVE		14
 #define	NFSATTRBIT_CANSETTIME		15
 #define	NFSATTRBIT_CASEINSENSITIVE	16
 #define	NFSATTRBIT_CASEPRESERVING	17
 #define	NFSATTRBIT_CHOWNRESTRICTED	18
 #define	NFSATTRBIT_FILEHANDLE		19
 #define	NFSATTRBIT_FILEID		20
 #define	NFSATTRBIT_FILESAVAIL		21
 #define	NFSATTRBIT_FILESFREE		22
 #define	NFSATTRBIT_FILESTOTAL		23
 #define	NFSATTRBIT_FSLOCATIONS		24
 #define	NFSATTRBIT_HIDDEN		25
 #define	NFSATTRBIT_HOMOGENEOUS		26
 #define	NFSATTRBIT_MAXFILESIZE		27
 #define	NFSATTRBIT_MAXLINK		28
 #define	NFSATTRBIT_MAXNAME		29
 #define	NFSATTRBIT_MAXREAD		30
 #define	NFSATTRBIT_MAXWRITE		31
 #define	NFSATTRBIT_MIMETYPE		32
 #define	NFSATTRBIT_MODE			33
 #define	NFSATTRBIT_NOTRUNC		34
 #define	NFSATTRBIT_NUMLINKS		35
 #define	NFSATTRBIT_OWNER		36
 #define	NFSATTRBIT_OWNERGROUP		37
 #define	NFSATTRBIT_QUOTAHARD		38
 #define	NFSATTRBIT_QUOTASOFT		39
 #define	NFSATTRBIT_QUOTAUSED		40
 #define	NFSATTRBIT_RAWDEV		41
 #define	NFSATTRBIT_SPACEAVAIL		42
 #define	NFSATTRBIT_SPACEFREE		43
 #define	NFSATTRBIT_SPACETOTAL		44
 #define	NFSATTRBIT_SPACEUSED		45
 #define	NFSATTRBIT_SYSTEM		46
 #define	NFSATTRBIT_TIMEACCESS		47
 #define	NFSATTRBIT_TIMEACCESSSET	48
 #define	NFSATTRBIT_TIMEBACKUP		49
 #define	NFSATTRBIT_TIMECREATE		50
 #define	NFSATTRBIT_TIMEDELTA		51
 #define	NFSATTRBIT_TIMEMETADATA		52
 #define	NFSATTRBIT_TIMEMODIFY		53
 #define	NFSATTRBIT_TIMEMODIFYSET	54
 #define	NFSATTRBIT_MOUNTEDONFILEID	55
 #define	NFSATTRBIT_DIRNOTIFDELAY	56
 #define	NFSATTRBIT_DIRENTNOTIFDELAY	57
 #define	NFSATTRBIT_DACL			58
 #define	NFSATTRBIT_SACL			59
 #define	NFSATTRBIT_CHANGEPOLICY		60
 #define	NFSATTRBIT_FSSTATUS		61
 #define	NFSATTRBIT_FSLAYOUTTYPE		62
 #define	NFSATTRBIT_LAYOUTHINT		63
 #define	NFSATTRBIT_LAYOUTTYPE		64
 #define	NFSATTRBIT_LAYOUTBLKSIZE	65
 #define	NFSATTRBIT_LAYOUTALIGNMENT	66
 #define	NFSATTRBIT_FSLOCATIONSINFO	67
 #define	NFSATTRBIT_MDSTHRESHOLD		68
 #define	NFSATTRBIT_RETENTIONGET		69
 #define	NFSATTRBIT_RETENTIONSET		70
 #define	NFSATTRBIT_RETENTEVTGET		71
 #define	NFSATTRBIT_RETENTEVTSET		72
 #define	NFSATTRBIT_RETENTIONHOLD	73
 #define	NFSATTRBIT_MODESETMASKED	74
 #define	NFSATTRBIT_SUPPATTREXCLCREAT	75
 #define	NFSATTRBIT_FSCHARSETCAP		76
 
 #define	NFSATTRBM_SUPPORTEDATTRS	0x00000001
 #define	NFSATTRBM_TYPE			0x00000002
 #define	NFSATTRBM_FHEXPIRETYPE		0x00000004
 #define	NFSATTRBM_CHANGE		0x00000008
 #define	NFSATTRBM_SIZE			0x00000010
 #define	NFSATTRBM_LINKSUPPORT		0x00000020
 #define	NFSATTRBM_SYMLINKSUPPORT	0x00000040
 #define	NFSATTRBM_NAMEDATTR		0x00000080
 #define	NFSATTRBM_FSID			0x00000100
 #define	NFSATTRBM_UNIQUEHANDLES		0x00000200
 #define	NFSATTRBM_LEASETIME		0x00000400
 #define	NFSATTRBM_RDATTRERROR		0x00000800
 #define	NFSATTRBM_ACL			0x00001000
 #define	NFSATTRBM_ACLSUPPORT		0x00002000
 #define	NFSATTRBM_ARCHIVE		0x00004000
 #define	NFSATTRBM_CANSETTIME		0x00008000
 #define	NFSATTRBM_CASEINSENSITIVE	0x00010000
 #define	NFSATTRBM_CASEPRESERVING	0x00020000
 #define	NFSATTRBM_CHOWNRESTRICTED	0x00040000
 #define	NFSATTRBM_FILEHANDLE		0x00080000
 #define	NFSATTRBM_FILEID		0x00100000
 #define	NFSATTRBM_FILESAVAIL		0x00200000
 #define	NFSATTRBM_FILESFREE		0x00400000
 #define	NFSATTRBM_FILESTOTAL		0x00800000
 #define	NFSATTRBM_FSLOCATIONS		0x01000000
 #define	NFSATTRBM_HIDDEN		0x02000000
 #define	NFSATTRBM_HOMOGENEOUS		0x04000000
 #define	NFSATTRBM_MAXFILESIZE		0x08000000
 #define	NFSATTRBM_MAXLINK		0x10000000
 #define	NFSATTRBM_MAXNAME		0x20000000
 #define	NFSATTRBM_MAXREAD		0x40000000
 #define	NFSATTRBM_MAXWRITE		0x80000000
 #define	NFSATTRBM_MIMETYPE		0x00000001
 #define	NFSATTRBM_MODE			0x00000002
 #define	NFSATTRBM_NOTRUNC		0x00000004
 #define	NFSATTRBM_NUMLINKS		0x00000008
 #define	NFSATTRBM_OWNER			0x00000010
 #define	NFSATTRBM_OWNERGROUP		0x00000020
 #define	NFSATTRBM_QUOTAHARD		0x00000040
 #define	NFSATTRBM_QUOTASOFT		0x00000080
 #define	NFSATTRBM_QUOTAUSED		0x00000100
 #define	NFSATTRBM_RAWDEV		0x00000200
 #define	NFSATTRBM_SPACEAVAIL		0x00000400
 #define	NFSATTRBM_SPACEFREE		0x00000800
 #define	NFSATTRBM_SPACETOTAL		0x00001000
 #define	NFSATTRBM_SPACEUSED		0x00002000
 #define	NFSATTRBM_SYSTEM		0x00004000
 #define	NFSATTRBM_TIMEACCESS		0x00008000
 #define	NFSATTRBM_TIMEACCESSSET		0x00010000
 #define	NFSATTRBM_TIMEBACKUP		0x00020000
 #define	NFSATTRBM_TIMECREATE		0x00040000
 #define	NFSATTRBM_TIMEDELTA		0x00080000
 #define	NFSATTRBM_TIMEMETADATA		0x00100000
 #define	NFSATTRBM_TIMEMODIFY		0x00200000
 #define	NFSATTRBM_TIMEMODIFYSET		0x00400000
 #define	NFSATTRBM_MOUNTEDONFILEID	0x00800000
 #define	NFSATTRBM_DIRNOTIFDELAY		0x01000000
 #define	NFSATTRBM_DIRENTNOTIFDELAY	0x02000000
 #define	NFSATTRBM_DACL			0x04000000
 #define	NFSATTRBM_SACL			0x08000000
 #define	NFSATTRBM_CHANGEPOLICY		0x10000000
 #define	NFSATTRBM_FSSTATUS		0x20000000
 #define	NFSATTRBM_FSLAYOUTTYPE		0x40000000
 #define	NFSATTRBM_LAYOUTHINT		0x80000000
 #define	NFSATTRBM_LAYOUTTYPE		0x00000001
 #define	NFSATTRBM_LAYOUTBLKSIZE		0x00000002
 #define	NFSATTRBM_LAYOUTALIGNMENT	0x00000004
 #define	NFSATTRBM_FSLOCATIONSINFO	0x00000008
 #define	NFSATTRBM_MDSTHRESHOLD		0x00000010
 #define	NFSATTRBM_RETENTIONGET		0x00000020
 #define	NFSATTRBM_RETENTIONSET		0x00000040
 #define	NFSATTRBM_RETENTEVTGET		0x00000080
 #define	NFSATTRBM_RETENTEVTSET		0x00000100
 #define	NFSATTRBM_RETENTIONHOLD		0x00000200
 #define	NFSATTRBM_MODESETMASKED		0x00000400
 #define	NFSATTRBM_SUPPATTREXCLCREAT	0x00000800
 #define	NFSATTRBM_FSCHARSETCAP		0x00001000
 
 #define	NFSATTRBIT_MAX			77
 
 /*
  * Sets of attributes that are supported, by words in the bitmap.
  */
 /*
  * NFSATTRBIT_SUPPORTED - SUPP0 - bits 0<->31
  *			  SUPP1 - bits 32<->63
  *			  SUPP2 - bits 64<->95
  */
 #define	NFSATTRBIT_SUPP0						\
  	(NFSATTRBM_SUPPORTEDATTRS |					\
  	NFSATTRBM_TYPE |						\
  	NFSATTRBM_FHEXPIRETYPE |					\
  	NFSATTRBM_CHANGE |						\
  	NFSATTRBM_SIZE |						\
  	NFSATTRBM_LINKSUPPORT |						\
  	NFSATTRBM_SYMLINKSUPPORT |					\
  	NFSATTRBM_NAMEDATTR |						\
  	NFSATTRBM_FSID |						\
  	NFSATTRBM_UNIQUEHANDLES |					\
  	NFSATTRBM_LEASETIME |						\
  	NFSATTRBM_RDATTRERROR |						\
  	NFSATTRBM_ACL |							\
  	NFSATTRBM_ACLSUPPORT |						\
  	NFSATTRBM_CANSETTIME |						\
  	NFSATTRBM_CASEINSENSITIVE |					\
  	NFSATTRBM_CASEPRESERVING |					\
  	NFSATTRBM_CHOWNRESTRICTED |					\
  	NFSATTRBM_FILEHANDLE |						\
  	NFSATTRBM_FILEID |						\
  	NFSATTRBM_FILESAVAIL |						\
  	NFSATTRBM_FILESFREE |						\
  	NFSATTRBM_FILESTOTAL |						\
 	NFSATTRBM_FSLOCATIONS |						\
  	NFSATTRBM_HOMOGENEOUS |						\
  	NFSATTRBM_MAXFILESIZE |						\
  	NFSATTRBM_MAXLINK |						\
  	NFSATTRBM_MAXNAME |						\
  	NFSATTRBM_MAXREAD |						\
  	NFSATTRBM_MAXWRITE)
 
 /*
  * NFSATTRBIT_S1 - subset of SUPP1 - OR of the following bits:
  */
 #define	NFSATTRBIT_S1							\
  	(NFSATTRBM_MODE |						\
  	NFSATTRBM_NOTRUNC |						\
  	NFSATTRBM_NUMLINKS |						\
  	NFSATTRBM_OWNER |						\
  	NFSATTRBM_OWNERGROUP |						\
  	NFSATTRBM_RAWDEV |						\
  	NFSATTRBM_SPACEAVAIL |						\
  	NFSATTRBM_SPACEFREE |						\
  	NFSATTRBM_SPACETOTAL |						\
  	NFSATTRBM_SPACEUSED |						\
  	NFSATTRBM_TIMEACCESS |						\
  	NFSATTRBM_TIMEDELTA |						\
  	NFSATTRBM_TIMEMETADATA |					\
  	NFSATTRBM_TIMEMODIFY |						\
  	NFSATTRBM_MOUNTEDONFILEID |					\
 	NFSATTRBM_QUOTAHARD |                        			\
     	NFSATTRBM_QUOTASOFT |                        			\
     	NFSATTRBM_QUOTAUSED)
 
 
 #ifdef QUOTA
 /*
  * If QUOTA OR in NFSATTRBIT_QUOTAHARD, NFSATTRBIT_QUOTASOFT and
  * NFSATTRBIT_QUOTAUSED.
  */
 #define	NFSATTRBIT_SUPP1	(NFSATTRBIT_S1 |			\
 				NFSATTRBM_QUOTAHARD |			\
 				NFSATTRBM_QUOTASOFT |			\
 				NFSATTRBM_QUOTAUSED)
 #else
 #define	NFSATTRBIT_SUPP1	NFSATTRBIT_S1
 #endif
 
 #define	NFSATTRBIT_SUPP2	NFSATTRBM_SUPPATTREXCLCREAT
 
 /*
  * NFSATTRBIT_SUPPSETONLY is the OR of NFSATTRBIT_TIMEACCESSSET and
  * NFSATTRBIT_TIMEMODIFYSET.
  */
 #define	NFSATTRBIT_SUPPSETONLY	 (NFSATTRBM_TIMEACCESSSET |		\
 				 NFSATTRBM_TIMEMODIFYSET)
 
 /*
  * NFSATTRBIT_SETABLE - SETABLE0 - bits 0<->31
  *			SETABLE1 - bits 32<->63
  *			SETABLE2 - bits 64<->95
  */
 #define	NFSATTRBIT_SETABLE0						\
 	(NFSATTRBM_SIZE |						\
 	NFSATTRBM_ACL)
 #define	NFSATTRBIT_SETABLE1						\
  	(NFSATTRBM_MODE |						\
  	NFSATTRBM_OWNER |						\
  	NFSATTRBM_OWNERGROUP |						\
  	NFSATTRBM_TIMEACCESSSET |					\
  	NFSATTRBM_TIMEMODIFYSET)
 #define	NFSATTRBIT_SETABLE2		0
 
 /*
  * Set of attributes that the getattr vnode op needs.
  * OR of the following bits.
  * NFSATTRBIT_GETATTR0 - bits 0<->31
  */
 #define	NFSATTRBIT_GETATTR0						\
  	(NFSATTRBM_SUPPORTEDATTRS |					\
  	NFSATTRBM_TYPE |						\
  	NFSATTRBM_CHANGE |						\
  	NFSATTRBM_SIZE |						\
  	NFSATTRBM_FSID |						\
  	NFSATTRBM_FILEID |						\
  	NFSATTRBM_MAXREAD)
 
 /*
  * NFSATTRBIT_GETATTR1 - bits 32<->63
  */
 #define	NFSATTRBIT_GETATTR1						\
  	(NFSATTRBM_MODE |						\
  	NFSATTRBM_NUMLINKS |						\
  	NFSATTRBM_OWNER |						\
  	NFSATTRBM_OWNERGROUP |						\
  	NFSATTRBM_RAWDEV |						\
  	NFSATTRBM_SPACEUSED |						\
  	NFSATTRBM_TIMEACCESS |						\
  	NFSATTRBM_TIMEMETADATA |					\
  	NFSATTRBM_TIMEMODIFY)
 
 /*
  * NFSATTRBIT_GETATTR2 - bits 64<->95
  */
 #define	NFSATTRBIT_GETATTR2		0
 
 /*
  * Subset of the above that the Write RPC gets.
  * OR of the following bits.
  * NFSATTRBIT_WRITEGETATTR0 - bits 0<->31
  */
 #define	NFSATTRBIT_WRITEGETATTR0					\
  	(NFSATTRBM_SUPPORTEDATTRS |					\
  	NFSATTRBM_TYPE |						\
  	NFSATTRBM_CHANGE |						\
  	NFSATTRBM_SIZE |						\
  	NFSATTRBM_FSID |						\
  	NFSATTRBM_FILEID |						\
  	NFSATTRBM_MAXREAD)
 
 /*
  * NFSATTRBIT_WRITEGETATTR1 - bits 32<->63
  */
 #define	NFSATTRBIT_WRITEGETATTR1					\
  	(NFSATTRBM_MODE |						\
  	NFSATTRBM_NUMLINKS |						\
  	NFSATTRBM_RAWDEV |						\
  	NFSATTRBM_SPACEUSED |						\
  	NFSATTRBM_TIMEACCESS |						\
  	NFSATTRBM_TIMEMETADATA |					\
  	NFSATTRBM_TIMEMODIFY)
 
 /*
  * NFSATTRBIT_WRITEGETATTR2 - bits 64<->95
  */
 #define	NFSATTRBIT_WRITEGETATTR2	0
 
 /*
  * Set of attributes that the wccattr operation op needs.
  * OR of the following bits.
  * NFSATTRBIT_WCCATTR0 - bits 0<->31
  */
 #define	NFSATTRBIT_WCCATTR0	0
 
 /*
  * NFSATTRBIT_WCCATTR1 - bits 32<->63
  */
 #define	NFSATTRBIT_WCCATTR1						\
  	(NFSATTRBM_TIMEMODIFY)
 
 /*
  * NFSATTRBIT_WCCATTR2 - bits 64<->95
  */
 #define	NFSATTRBIT_WCCATTR2		0
 
 /*
  * NFSATTRBIT_CBGETATTR0 - bits 0<->31
  */
 #define	NFSATTRBIT_CBGETATTR0	(NFSATTRBM_CHANGE | NFSATTRBM_SIZE)
 
 /*
  * NFSATTRBIT_CBGETATTR1 - bits 32<->63
  */
 #define	NFSATTRBIT_CBGETATTR1		0x0
 
 /*
  * NFSATTRBIT_CBGETATTR2 - bits 64<->95
  */
 #define	NFSATTRBIT_CBGETATTR2		0x0
 
 /*
  * Sets of attributes that require a VFS_STATFS() call to get the
  * values of.
  * NFSATTRBIT_STATFS0 - bits 0<->31
  */
 #define	NFSATTRBIT_STATFS0						\
 	(NFSATTRBM_LINKSUPPORT |					\
 	NFSATTRBM_SYMLINKSUPPORT |					\
 	NFSATTRBM_CANSETTIME |						\
  	NFSATTRBM_FILESAVAIL |						\
  	NFSATTRBM_FILESFREE |						\
  	NFSATTRBM_FILESTOTAL |						\
  	NFSATTRBM_HOMOGENEOUS |						\
  	NFSATTRBM_MAXFILESIZE |						\
 	NFSATTRBM_MAXNAME |						\
 	NFSATTRBM_MAXREAD |						\
 	NFSATTRBM_MAXWRITE)
 
 /*
  * NFSATTRBIT_STATFS1 - bits 32<->63
  */
 #define	NFSATTRBIT_STATFS1						\
  	(NFSATTRBM_QUOTAHARD |						\
  	NFSATTRBM_QUOTASOFT |						\
  	NFSATTRBM_QUOTAUSED |						\
  	NFSATTRBM_SPACEAVAIL |						\
  	NFSATTRBM_SPACEFREE |						\
  	NFSATTRBM_SPACETOTAL |						\
  	NFSATTRBM_SPACEUSED |						\
 	NFSATTRBM_TIMEDELTA)
 
 /*
  * NFSATTRBIT_STATFS2 - bits 64<->95
  */
 #define	NFSATTRBIT_STATFS2		0
 
 /*
  * These are the bits that are needed by the nfs_statfs() call.
  * (The regular getattr bits are or'd in so the vnode gets the correct
  *  type, etc.)
  * NFSGETATTRBIT_STATFS0 - bits 0<->31
  */
 #define	NFSGETATTRBIT_STATFS0	(NFSATTRBIT_GETATTR0 |			\
 				NFSATTRBM_LINKSUPPORT |			\
 				NFSATTRBM_SYMLINKSUPPORT |		\
 				NFSATTRBM_CANSETTIME |			\
 				NFSATTRBM_FILESFREE |			\
 				NFSATTRBM_FILESTOTAL |			\
 				NFSATTRBM_HOMOGENEOUS |			\
 				NFSATTRBM_MAXFILESIZE |			\
 				NFSATTRBM_MAXNAME |			\
 				NFSATTRBM_MAXREAD |			\
 				NFSATTRBM_MAXWRITE)
 
 /*
  * NFSGETATTRBIT_STATFS1 - bits 32<->63
  */
 #define	NFSGETATTRBIT_STATFS1	(NFSATTRBIT_GETATTR1 |			\
 				NFSATTRBM_SPACEAVAIL |			\
 				NFSATTRBM_SPACEFREE |			\
 				NFSATTRBM_SPACETOTAL |			\
 				NFSATTRBM_TIMEDELTA)
 
 /*
  * NFSGETATTRBIT_STATFS2 - bits 64<->95
  */
 #define	NFSGETATTRBIT_STATFS2		0
 
 /*
  * Set of attributes for the equivalent of an nfsv3 pathconf rpc.
  * NFSGETATTRBIT_PATHCONF0 - bits 0<->31
  */
 #define	NFSGETATTRBIT_PATHCONF0	(NFSATTRBIT_GETATTR0 |			\
 			 	NFSATTRBM_CASEINSENSITIVE |		\
 			 	NFSATTRBM_CASEPRESERVING |		\
 			 	NFSATTRBM_CHOWNRESTRICTED |		\
 			 	NFSATTRBM_MAXLINK |			\
 			 	NFSATTRBM_MAXNAME)
 
 /*
  * NFSGETATTRBIT_PATHCONF1 - bits 32<->63
  */
 #define	NFSGETATTRBIT_PATHCONF1	(NFSATTRBIT_GETATTR1 |			\
 				NFSATTRBM_NOTRUNC)
 
 /*
  * NFSGETATTRBIT_PATHCONF2 - bits 64<->95
  */
 #define	NFSGETATTRBIT_PATHCONF2		0
 
 /*
  * Sets of attributes required by readdir and readdirplus.
  * NFSATTRBIT_READDIRPLUS0	(NFSATTRBIT_GETATTR0 | NFSATTRBIT_FILEHANDLE |
  *				 NFSATTRBIT_RDATTRERROR)
  */
 #define	NFSATTRBIT_READDIRPLUS0	(NFSATTRBIT_GETATTR0 | NFSATTRBM_FILEHANDLE | \
 				NFSATTRBM_RDATTRERROR)
 #define	NFSATTRBIT_READDIRPLUS1	NFSATTRBIT_GETATTR1
 #define	NFSATTRBIT_READDIRPLUS2		0
 
 /*
  * Set of attributes supported by Referral vnodes.
  */
 #define	NFSATTRBIT_REFERRAL0	(NFSATTRBM_TYPE | NFSATTRBM_FSID |	\
 	NFSATTRBM_RDATTRERROR | NFSATTRBM_FSLOCATIONS)
 #define	NFSATTRBIT_REFERRAL1	NFSATTRBM_MOUNTEDONFILEID
 #define	NFSATTRBIT_REFERRAL2		0
 
 /*
  * Structure for data handled by the statfs rpc. Since some fields are
  * u_int64_t, this cannot be used for copying data on/off the wire, due
  * to alignment concerns.
  */
 struct nfsstatfs {
 	union {
 		struct {
 			u_int32_t nfsv2sf_tsize;
 			u_int32_t nfsv2sf_bsize;
 			u_int32_t nfsv2sf_blocks;
 			u_int32_t nfsv2sf_bfree;
 			u_int32_t nfsv2sf_bavail;
 		} sf_nfsv2;
 		struct {
 			u_int64_t nfsv3sf_tbytes;
 			u_int64_t nfsv3sf_fbytes;
 			u_int64_t nfsv3sf_abytes;
 			u_int64_t nfsv3sf_tfiles;
 			u_int64_t nfsv3sf_ffiles;
 			u_int64_t nfsv3sf_afiles;
 			u_int32_t nfsv3sf_invarsec;
 		} sf_nfsv3;
 	} sf_un;
 };
 
 #define	sf_tsize	sf_un.sf_nfsv2.nfsv2sf_tsize
 #define	sf_bsize	sf_un.sf_nfsv2.nfsv2sf_bsize
 #define	sf_blocks	sf_un.sf_nfsv2.nfsv2sf_blocks
 #define	sf_bfree	sf_un.sf_nfsv2.nfsv2sf_bfree
 #define	sf_bavail	sf_un.sf_nfsv2.nfsv2sf_bavail
 #define	sf_tbytes	sf_un.sf_nfsv3.nfsv3sf_tbytes
 #define	sf_fbytes	sf_un.sf_nfsv3.nfsv3sf_fbytes
 #define	sf_abytes	sf_un.sf_nfsv3.nfsv3sf_abytes
 #define	sf_tfiles	sf_un.sf_nfsv3.nfsv3sf_tfiles
 #define	sf_ffiles	sf_un.sf_nfsv3.nfsv3sf_ffiles
 #define	sf_afiles	sf_un.sf_nfsv3.nfsv3sf_afiles
 #define	sf_invarsec	sf_un.sf_nfsv3.nfsv3sf_invarsec
 
 /*
  * Now defined using u_int64_t for the 64 bit field(s).
  * (Cannot be used to move data on/off the wire, due to alignment concerns.)
  */
 struct nfsfsinfo {
 	u_int32_t fs_rtmax;
 	u_int32_t fs_rtpref;
 	u_int32_t fs_rtmult;
 	u_int32_t fs_wtmax;
 	u_int32_t fs_wtpref;
 	u_int32_t fs_wtmult;
 	u_int32_t fs_dtpref;
 	u_int64_t fs_maxfilesize;
 	struct timespec fs_timedelta;
 	u_int32_t fs_properties;
 };
 
 /*
  * Bits for fs_properties
  */
 #define	NFSV3_FSFLINK		0x1
 #define	NFSV3_FSFSYMLINK	0x2
 #define	NFSV3_FSFHOMOGENEOUS	0x4
 #define	NFSV3_FSFCANSETTIME	0x8
 
 /*
  * Yikes, overload fs_rtmult as fs_maxname for V4.
  */
 #define	fs_maxname	fs_rtmult
 
 struct nfsv3_pathconf {
 	u_int32_t pc_linkmax;
 	u_int32_t pc_namemax;
 	u_int32_t pc_notrunc;
 	u_int32_t pc_chownrestricted;
 	u_int32_t pc_caseinsensitive;
 	u_int32_t pc_casepreserving;
 };
 
 /*
  * NFS V4 data structures.
  */
 struct nfsv4stateid {
 	u_int32_t	seqid;
 	u_int32_t	other[NFSX_STATEIDOTHER / NFSX_UNSIGNED];
 };
 typedef struct nfsv4stateid nfsv4stateid_t;
 
 #endif	/* _NFS_NFSPROTO_H_ */
Index: user/ngie/more-tests/sys/fs/nfsserver/nfs_nfsdserv.c
===================================================================
--- user/ngie/more-tests/sys/fs/nfsserver/nfs_nfsdserv.c	(revision 281675)
+++ user/ngie/more-tests/sys/fs/nfsserver/nfs_nfsdserv.c	(revision 281676)
@@ -1,4090 +1,4090 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * nfs version 2, 3 and 4 server calls to vnode ops
  * - these routines generally have 3 phases
  *   1 - break down and validate rpc request in mbuf list
  *   2 - do the vnode ops for the request, usually by calling a nfsvno_XXX()
  *       function in nfsd_port.c
  *   3 - build the rpc reply in an mbuf list
  * For nfsv4, these functions are called for each Op within the Compound RPC.
  */
 
 #ifndef APPLEKEXT
 #include <fs/nfs/nfsport.h>
 
 /* Global vars */
 extern u_int32_t newnfs_false, newnfs_true;
 extern enum vtype nv34tov_type[8];
 extern struct timeval nfsboottime;
 extern int nfs_rootfhset;
 extern int nfsrv_enable_crossmntpt;
 #endif	/* !APPLEKEXT */
 
 static int	nfs_async = 0;
 SYSCTL_DECL(_vfs_nfsd);
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
     "Tell client that writes were synced even though they were not");
 
 /*
  * This list defines the GSS mechanisms supported.
  * (Don't ask me how you get these strings from the RFC stuff like
  *  iso(1), org(3)... but someone did it, so I don't need to know.)
  */
 static struct nfsgss_mechlist nfsgss_mechlist[] = {
 	{ 9, "\052\206\110\206\367\022\001\002\002", 11 },
 	{ 0, "", 0 },
 };
 
 /* local functions */
 static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp,
     vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp,
     int *diraft_retp, nfsattrbit_t *attrbitp,
     NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp,
     int pathlen);
 static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp,
     vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp,
     int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp,
     NFSPROC_T *p, struct nfsexstuff *exp);
 
 /*
  * nfs access service (not a part of NFS V2)
  */
 APPLESTATIC int
 nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int getret, error = 0;
 	struct nfsvattr nva;
 	u_int32_t testmode, nfsmode, supported = 0;
 	accmode_t deletebit;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, 1, &nva);
 		goto out;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
 	if ((nd->nd_flag & ND_NFSV4) &&
 	    (nfsmode & ~(NFSACCESS_READ | NFSACCESS_LOOKUP |
 	     NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE |
 	     NFSACCESS_EXECUTE))) {
 		nd->nd_repstat = NFSERR_INVAL;
 		vput(vp);
 		goto out;
 	}
 	if (nfsmode & NFSACCESS_READ) {
 		supported |= NFSACCESS_READ;
 		if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_READ;
 	}
 	if (nfsmode & NFSACCESS_MODIFY) {
 		supported |= NFSACCESS_MODIFY;
 		if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_MODIFY;
 	}
 	if (nfsmode & NFSACCESS_EXTEND) {
 		supported |= NFSACCESS_EXTEND;
 		if (nfsvno_accchk(vp, VWRITE | VAPPEND, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_EXTEND;
 	}
 	if (nfsmode & NFSACCESS_DELETE) {
 		supported |= NFSACCESS_DELETE;
 		if (vp->v_type == VDIR)
 			deletebit = VDELETE_CHILD;
 		else
 			deletebit = VDELETE;
 		if (nfsvno_accchk(vp, deletebit, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_DELETE;
 	}
 	if (vnode_vtype(vp) == VDIR)
 		testmode = NFSACCESS_LOOKUP;
 	else
 		testmode = NFSACCESS_EXECUTE;
 	if (nfsmode & testmode) {
 		supported |= (nfsmode & testmode);
 		if (nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~testmode;
 	}
 	nfsmode &= supported;
 	if (nd->nd_flag & ND_NFSV3) {
 		getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1);
 		nfsrv_postopattr(nd, getret, &nva);
 	}
 	vput(vp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(supported);
 	} else
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(nfsmode);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs getattr service
  */
 APPLESTATIC int
 nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	struct nfsvattr nva;
 	fhandle_t fh;
 	int at_root = 0, error = 0, supports_nfsv4acls;
 	struct nfsreferral *refp;
 	nfsattrbit_t attrbits, tmpbits;
 	struct mount *mp;
 	struct vnode *tvp = NULL;
 	struct vattr va;
 	uint64_t mounted_on_fileno = 0;
 	accmode_t accmode;
 
 	if (nd->nd_repstat)
 		goto out;
 	if (nd->nd_flag & ND_NFSV4) {
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		if (error) {
 			vput(vp);
 			goto out;
 		}
 
 		/*
 		 * Check for a referral.
 		 */
 		refp = nfsv4root_getreferral(vp, NULL, 0);
 		if (refp != NULL) {
 			(void) nfsrv_putreferralattr(nd, &attrbits, refp, 1,
 			    &nd->nd_repstat);
 			vput(vp);
 			goto out;
 		}
 		if (nd->nd_repstat == 0) {
 			accmode = 0;
 			NFSSET_ATTRBIT(&tmpbits, &attrbits);
 	
 			/*
 			 * GETATTR with write-only attr time_access_set and time_modify_set
 			 * should return NFS4ERR_INVAL.
 			 */
 			if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEACCESSSET) ||
 					NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEMODIFYSET)){
 				error = NFSERR_INVAL;
 				vput(vp);
 				goto out;
 			}
 			if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_ACL)) {
 				NFSCLRBIT_ATTRBIT(&tmpbits, NFSATTRBIT_ACL);
 				accmode |= VREAD_ACL;
 			}
 			if (NFSNONZERO_ATTRBIT(&tmpbits))
 				accmode |= VREAD_ATTRIBUTES;
 			if (accmode != 0)
 				nd->nd_repstat = nfsvno_accchk(vp, accmode,
 				    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 				    NFSACCCHK_VPISLOCKED, NULL);
 		}
 	}
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1);
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
 			if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_FILEHANDLE))
 				nd->nd_repstat = nfsvno_getfh(vp, &fh, p);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = nfsrv_checkgetattr(nd, vp,
 				    &nva, &attrbits, nd->nd_cred, p);
 			if (nd->nd_repstat == 0) {
 				supports_nfsv4acls = nfs_supportsnfsv4acls(vp);
 				mp = vp->v_mount;
 				if (nfsrv_enable_crossmntpt != 0 &&
 				    vp->v_type == VDIR &&
 				    (vp->v_vflag & VV_ROOT) != 0 &&
 				    vp != rootvnode) {
 					tvp = mp->mnt_vnodecovered;
 					VREF(tvp);
 					at_root = 1;
 				} else
 					at_root = 0;
 				vfs_ref(mp);
 				NFSVOPUNLOCK(vp, 0);
 				if (at_root != 0) {
 					if ((nd->nd_repstat =
 					     NFSVOPLOCK(tvp, LK_SHARED)) == 0) {
 						nd->nd_repstat = VOP_GETATTR(
 						    tvp, &va, nd->nd_cred);
 						vput(tvp);
 					} else
 						vrele(tvp);
 					if (nd->nd_repstat == 0)
 						mounted_on_fileno = (uint64_t)
 						    va.va_fileid;
 					else
 						at_root = 0;
 				}
 				if (nd->nd_repstat == 0)
 					nd->nd_repstat = vfs_busy(mp, 0);
 				vfs_rel(mp);
 				if (nd->nd_repstat == 0) {
 					(void)nfsvno_fillattr(nd, mp, vp, &nva,
 					    &fh, 0, &attrbits, nd->nd_cred, p,
 					    isdgram, 1, supports_nfsv4acls,
 					    at_root, mounted_on_fileno);
 					vfs_unbusy(mp);
 				}
 				vrele(vp);
 			} else
 				vput(vp);
 		} else {
 			nfsrv_fillattr(nd, &nva);
 			vput(vp);
 		}
 	} else {
 		vput(vp);
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs setattr service
  */
 APPLESTATIC int
 nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	struct nfsvattr nva, nva2;
 	u_int32_t *tl;
 	int preat_ret = 1, postat_ret = 1, gcheck = 0, error = 0;
 	struct timespec guard = { 0, 0 };
 	nfsattrbit_t attrbits, retbits;
 	nfsv4stateid_t stateid;
 	NFSACL_T *aclp = NULL;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva);
 		goto out;
 	}
 #ifdef NFS4_ACL_EXTATTR_NAME
 	aclp = acl_alloc(M_WAITOK);
 	aclp->acl_cnt = 0;
 #endif
 	NFSVNO_ATTRINIT(&nva);
 	NFSZERO_ATTRBIT(&retbits);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		NFSBCOPY((caddr_t)tl,(caddr_t)stateid.other,NFSX_STATEIDOTHER);
 	}
 	error = nfsrv_sattr(nd, vp, &nva, &attrbits, aclp, p);
 	if (error)
 		goto nfsmout;
 	preat_ret = nfsvno_getattr(vp, &nva2, nd->nd_cred, p, 1);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = preat_ret;
 	if (nd->nd_flag & ND_NFSV3) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		gcheck = fxdr_unsigned(int, *tl);
 		if (gcheck) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			fxdr_nfsv3time(tl, &guard);
 		}
 		if (!nd->nd_repstat && gcheck &&
 		    (nva2.na_ctime.tv_sec != guard.tv_sec ||
 		     nva2.na_ctime.tv_nsec != guard.tv_nsec))
 			nd->nd_repstat = NFSERR_NOT_SYNC;
 		if (nd->nd_repstat) {
 			vput(vp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 			acl_free(aclp);
 #endif
 			nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva);
 			goto out;
 		}
 	} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4))
 		nd->nd_repstat = nfsrv_checkuidgid(nd, &nva);
 
 	/*
 	 * Now that we have all the fields, lets do it.
 	 * If the size is being changed write access is required, otherwise
 	 * just check for a read only file system.
 	 */
 	if (!nd->nd_repstat) {
 		if (NFSVNO_NOTSETSIZE(&nva)) {
 			if (NFSVNO_EXRDONLY(exp) ||
 			    (vfs_flags(vnode_mount(vp)) & MNT_RDONLY))
 				nd->nd_repstat = EROFS;
 		} else {
 			if (vnode_vtype(vp) != VREG)
 				nd->nd_repstat = EINVAL;
 			else if (nva2.na_uid != nd->nd_cred->cr_uid ||
 			    NFSVNO_EXSTRICTACCESS(exp))
 				nd->nd_repstat = nfsvno_accchk(vp,
 				    VWRITE, nd->nd_cred, exp, p,
 				    NFSACCCHK_NOOVERRIDE,
 				    NFSACCCHK_VPISLOCKED, NULL);
 		}
 	}
 	if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4))
 		nd->nd_repstat = nfsrv_checksetattr(vp, nd, &stateid,
 		    &nva, &attrbits, exp, p);
 
 	if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) {
 	    /*
 	     * For V4, try setting the attrbutes in sets, so that the
 	     * reply bitmap will be correct for an error case.
 	     */
 	    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER) ||
 		NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) {
 		NFSVNO_ATTRINIT(&nva2);
 		NFSVNO_SETATTRVAL(&nva2, uid, nva.na_uid);
 		NFSVNO_SETATTRVAL(&nva2, gid, nva.na_gid);
 		nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
 		    exp);
 		if (!nd->nd_repstat) {
 		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER))
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER);
 		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP))
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNERGROUP);
 		}
 	    }
 	    if (!nd->nd_repstat &&
 		NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SIZE)) {
 		NFSVNO_ATTRINIT(&nva2);
 		NFSVNO_SETATTRVAL(&nva2, size, nva.na_size);
 		nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
 		    exp);
 		if (!nd->nd_repstat)
 		    NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SIZE);
 	    }
 	    if (!nd->nd_repstat &&
 		(NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET) ||
 		 NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET))) {
 		NFSVNO_ATTRINIT(&nva2);
 		NFSVNO_SETATTRVAL(&nva2, atime, nva.na_atime);
 		NFSVNO_SETATTRVAL(&nva2, mtime, nva.na_mtime);
 		if (nva.na_vaflags & VA_UTIMES_NULL) {
 			nva2.na_vaflags |= VA_UTIMES_NULL;
 			NFSVNO_SETACTIVE(&nva2, vaflags);
 		}
 		nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
 		    exp);
 		if (!nd->nd_repstat) {
 		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET))
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEACCESSSET);
 		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET))
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEMODIFYSET);
 		}
 	    }
 	    if (!nd->nd_repstat &&
 		NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODE)) {
 		NFSVNO_ATTRINIT(&nva2);
 		NFSVNO_SETATTRVAL(&nva2, mode, nva.na_mode);
 		nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
 		    exp);
 		if (!nd->nd_repstat)
 		    NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODE);
 	    }
 
 #ifdef NFS4_ACL_EXTATTR_NAME
 	    if (!nd->nd_repstat && aclp->acl_cnt > 0 &&
 		NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_ACL)) {
 		nd->nd_repstat = nfsrv_setacl(vp, aclp, nd->nd_cred, p);
 		if (!nd->nd_repstat) 
 		    NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_ACL);
 	    }
 #endif
 	} else if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_setattr(vp, &nva, nd->nd_cred, p,
 		    exp);
 	}
 	if (nd->nd_flag & (ND_NFSV2 | ND_NFSV3)) {
 		postat_ret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1);
 		if (!nd->nd_repstat)
 			nd->nd_repstat = postat_ret;
 	}
 	vput(vp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 	if (nd->nd_flag & ND_NFSV3)
 		nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva);
 	else if (nd->nd_flag & ND_NFSV4)
 		(void) nfsrv_putattrbit(nd, &retbits);
 	else if (!nd->nd_repstat)
 		nfsrv_fillattr(nd, &nva);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 	if (nd->nd_flag & ND_NFSV4) {
 		/*
 		 * For all nd_repstat, the V4 reply includes a bitmap,
 		 * even NFSERR_BADXDR, which is what this will end up
 		 * returning.
 		 */
 		(void) nfsrv_putattrbit(nd, &retbits);
 	}
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs lookup rpc
  * (Also performs lookup parent for v4)
  */
 APPLESTATIC int
 nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p,
     struct nfsexstuff *exp)
 {
 	struct nameidata named;
 	vnode_t vp, dirp = NULL;
 	int error = 0, dattr_ret = 1;
 	struct nfsvattr nva, dattr;
 	char *bufp;
 	u_long *hashp;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, dattr_ret, &dattr);
 		goto out;
 	}
 
 	/*
 	 * For some reason, if dp is a symlink, the error
 	 * returned is supposed to be NFSERR_SYMLINK and not NFSERR_NOTDIR.
 	 */
 	if (dp->v_type == VLNK && (nd->nd_flag & ND_NFSV4)) {
 		nd->nd_repstat = NFSERR_SYMLINK;
 		vrele(dp);
 		goto out;
 	}
 
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP,
 	    LOCKLEAF | SAVESTART);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error) {
 		vrele(dp);
 		nfsvno_relpathbuf(&named);
 		goto out;
 	}
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp);
 	} else {
 		vrele(dp);
 		nfsvno_relpathbuf(&named);
 	}
 	if (nd->nd_repstat) {
 		if (dirp) {
 			if (nd->nd_flag & ND_NFSV3)
 				dattr_ret = nfsvno_getattr(dirp, &dattr,
 				    nd->nd_cred, p, 0);
 			vrele(dirp);
 		}
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, dattr_ret, &dattr);
 		goto out;
 	}
 	if (named.ni_startdir)
 		vrele(named.ni_startdir);
 	nfsvno_relpathbuf(&named);
 	vp = named.ni_vp;
 	if ((nd->nd_flag & ND_NFSV4) != 0 && !NFSVNO_EXPORTED(exp) &&
 	    vp->v_type != VDIR && vp->v_type != VLNK)
 		/*
 		 * Only allow lookup of VDIR and VLNK for traversal of
 		 * non-exported volumes during NFSv4 mounting.
 		 */
 		nd->nd_repstat = ENOENT;
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = nfsvno_getfh(vp, fhp, p);
 	if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat)
 		nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1);
 	if (vpp != NULL && nd->nd_repstat == 0)
 		*vpp = vp;
 	else
 		vput(vp);
 	if (dirp) {
 		if (nd->nd_flag & ND_NFSV3)
 			dattr_ret = nfsvno_getattr(dirp, &dattr, nd->nd_cred,
 			    p, 0);
 		vrele(dirp);
 	}
 	if (nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, dattr_ret, &dattr);
 		goto out;
 	}
 	if (nd->nd_flag & ND_NFSV2) {
 		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0);
 		nfsrv_fillattr(nd, &nva);
 	} else if (nd->nd_flag & ND_NFSV3) {
 		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0);
 		nfsrv_postopattr(nd, 0, &nva);
 		nfsrv_postopattr(nd, dattr_ret, &dattr);
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs readlink service
  */
 APPLESTATIC int
 nfsrvd_readlink(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	mbuf_t mp = NULL, mpend = NULL;
 	int getret = 1, len;
 	struct nfsvattr nva;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &nva);
 		goto out;
 	}
 	if (vnode_vtype(vp) != VLNK) {
 		if (nd->nd_flag & ND_NFSV2)
 			nd->nd_repstat = ENXIO;
 		else
 			nd->nd_repstat = EINVAL;
 	}
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_readlink(vp, nd->nd_cred, p,
 		    &mp, &mpend, &len);
 	if (nd->nd_flag & ND_NFSV3)
 		getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1);
 	vput(vp);
 	if (nd->nd_flag & ND_NFSV3)
 		nfsrv_postopattr(nd, getret, &nva);
 	if (nd->nd_repstat)
 		goto out;
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(len);
 	mbuf_setnext(nd->nd_mb, mp);
 	nd->nd_mb = mpend;
 	nd->nd_bpos = NFSMTOD(mpend, caddr_t) + mbuf_len(mpend);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfs read service
  */
 APPLESTATIC int
 nfsrvd_read(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0, cnt, getret = 1, reqlen, eof = 0;
 	mbuf_t m2, m3;
 	struct nfsvattr nva;
 	off_t off = 0x0;
 	struct nfsstate st, *stp = &st;
 	struct nfslock lo, *lop = &lo;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &nva);
 		goto out;
 	}
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		off = (off_t)fxdr_unsigned(u_int32_t, *tl++);
 		reqlen = fxdr_unsigned(int, *tl);
 	} else if (nd->nd_flag & ND_NFSV3) {
 		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
 		tl += 2;
 		reqlen = fxdr_unsigned(int, *tl);
 	} else {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3*NFSX_UNSIGNED);
 		reqlen = fxdr_unsigned(int, *(tl + 6));
 	}
 	if (reqlen > NFS_SRVMAXDATA(nd)) {
 		reqlen = NFS_SRVMAXDATA(nd);
 	} else if (reqlen < 0) {
 		error = EBADRPC;
 		goto nfsmout;
 	}
 	if (nd->nd_flag & ND_NFSV4) {
 		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS);
 		lop->lo_flags = NFSLCK_READ;
 		stp->ls_ownerlen = 0;
 		stp->ls_op = NULL;
 		stp->ls_uid = nd->nd_cred->cr_uid;
 		stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
 		clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
 		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				clientid.qval = nd->nd_clientid.qval;
 			else if (nd->nd_clientid.qval != clientid.qval)
 				printf("EEK1 multiple clids\n");
 		} else {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
 		stp->ls_stateid.other[2] = *tl++;
 		off = fxdr_hyper(tl);
 		lop->lo_first = off;
 		tl += 2;
 		lop->lo_end = off + reqlen;
 		/*
 		 * Paranoia, just in case it wraps around.
 		 */
 		if (lop->lo_end < off)
 			lop->lo_end = NFS64BITSSET;
 	}
 	if (vnode_vtype(vp) != VREG) {
 		if (nd->nd_flag & ND_NFSV3)
 			nd->nd_repstat = EINVAL;
 		else
 			nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR :
 			    EINVAL;
 	}
 	getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = getret;
 	if (!nd->nd_repstat &&
 	    (nva.na_uid != nd->nd_cred->cr_uid ||
 	     NFSVNO_EXSTRICTACCESS(exp))) {
 		nd->nd_repstat = nfsvno_accchk(vp, VREAD,
 		    nd->nd_cred, exp, p,
 		    NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL);
 		if (nd->nd_repstat)
 			nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 			    nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER,
 			    NFSACCCHK_VPISLOCKED, NULL);
 	}
 	if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat)
 		nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 		    &stateid, exp, nd, p);
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &nva);
 		goto out;
 	}
 	if (off >= nva.na_size) {
 		cnt = 0;
 		eof = 1;
 	} else if (reqlen == 0)
 		cnt = 0;
 	else if ((off + reqlen) >= nva.na_size) {
 		cnt = nva.na_size - off;
 		eof = 1;
 	} else
 		cnt = reqlen;
 	m3 = NULL;
 	if (cnt > 0) {
 		nd->nd_repstat = nfsvno_read(vp, off, cnt, nd->nd_cred, p,
 		    &m3, &m2);
 		if (!(nd->nd_flag & ND_NFSV4)) {
 			getret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = getret;
 		}
 		if (nd->nd_repstat) {
 			vput(vp);
 			if (m3)
 				mbuf_freem(m3);
 			if (nd->nd_flag & ND_NFSV3)
 				nfsrv_postopattr(nd, getret, &nva);
 			goto out;
 		}
 	}
 	vput(vp);
 	if (nd->nd_flag & ND_NFSV2) {
 		nfsrv_fillattr(nd, &nva);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	} else {
 		if (nd->nd_flag & ND_NFSV3) {
 			nfsrv_postopattr(nd, getret, &nva);
 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(cnt);
 		} else
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		if (eof)
 			*tl++ = newnfs_true;
 		else
 			*tl++ = newnfs_false;
 	}
 	*tl = txdr_unsigned(cnt);
 	if (m3) {
 		mbuf_setnext(nd->nd_mb, m3);
 		nd->nd_mb = m2;
 		nd->nd_bpos = NFSMTOD(m2, caddr_t) + mbuf_len(m2);
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs write service
  */
 APPLESTATIC int
 nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	int i, cnt;
 	u_int32_t *tl;
 	mbuf_t mp;
 	struct nfsvattr nva, forat;
 	int aftat_ret = 1, retlen, len, error = 0, forat_ret = 1;
 	int stable = NFSWRITE_FILESYNC;
 	off_t off;
 	struct nfsstate st, *stp = &st;
 	struct nfslock lo, *lop = &lo;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva);
 		goto out;
 	}
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
 		tl += 2;
 		retlen = len = fxdr_unsigned(int32_t, *tl);
 	} else if (nd->nd_flag & ND_NFSV3) {
 		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
 		tl += 3;
 		stable = fxdr_unsigned(int, *tl++);
 		retlen = len = fxdr_unsigned(int32_t, *tl);
 	} else {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 4 * NFSX_UNSIGNED);
 		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
 		lop->lo_flags = NFSLCK_WRITE;
 		stp->ls_ownerlen = 0;
 		stp->ls_op = NULL;
 		stp->ls_uid = nd->nd_cred->cr_uid;
 		stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
 		clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
 		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				clientid.qval = nd->nd_clientid.qval;
 			else if (nd->nd_clientid.qval != clientid.qval)
 				printf("EEK2 multiple clids\n");
 		} else {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
 		stp->ls_stateid.other[2] = *tl++;
 		off = fxdr_hyper(tl);
 		lop->lo_first = off;
 		tl += 2;
 		stable = fxdr_unsigned(int, *tl++);
 		retlen = len = fxdr_unsigned(int32_t, *tl);
 		lop->lo_end = off + len;
 		/*
 		 * Paranoia, just in case it wraps around, which shouldn't
 		 * ever happen anyhow.
 		 */
 		if (lop->lo_end < lop->lo_first)
 			lop->lo_end = NFS64BITSSET;
 	}
 
 	/*
 	 * Loop through the mbuf chain, counting how many mbufs are a
 	 * part of this write operation, so the iovec size is known.
 	 */
 	cnt = 0;
 	mp = nd->nd_md;
 	i = NFSMTOD(mp, caddr_t) + mbuf_len(mp) - nd->nd_dpos;
 	while (len > 0) {
 		if (i > 0) {
 			len -= i;
 			cnt++;
 		}
 		mp = mbuf_next(mp);
 		if (!mp) {
 			if (len > 0) {
 				error = EBADRPC;
 				goto nfsmout;
 			}
 		} else
 			i = mbuf_len(mp);
 	}
 
-	if (retlen > NFS_MAXDATA || retlen < 0)
+	if (retlen > NFS_SRVMAXIO || retlen < 0)
 		nd->nd_repstat = EIO;
 	if (vnode_vtype(vp) != VREG && !nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3)
 			nd->nd_repstat = EINVAL;
 		else
 			nd->nd_repstat = (vnode_vtype(vp) == VDIR) ? EISDIR :
 			    EINVAL;
 	}
 	forat_ret = nfsvno_getattr(vp, &forat, nd->nd_cred, p, 1);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = forat_ret;
 	if (!nd->nd_repstat &&
 	    (forat.na_uid != nd->nd_cred->cr_uid ||
 	     NFSVNO_EXSTRICTACCESS(exp)))
 		nd->nd_repstat = nfsvno_accchk(vp, VWRITE,
 		    nd->nd_cred, exp, p,
 		    NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL);
 	if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) {
 		nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 		    &stateid, exp, nd, p);
 	}
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva);
 		goto out;
 	}
 
 	/*
 	 * For NFS Version 2, it is not obvious what a write of zero length
 	 * should do, but I might as well be consistent with Version 3,
 	 * which is to return ok so long as there are no permission problems.
 	 */
 	if (retlen > 0) {
 		nd->nd_repstat = nfsvno_write(vp, off, retlen, cnt, stable,
 		    nd->nd_md, nd->nd_dpos, nd->nd_cred, p);
 		error = nfsm_advance(nd, NFSM_RNDUP(retlen), -1);
 		if (error)
 			panic("nfsrv_write mbuf");
 	}
 	if (nd->nd_flag & ND_NFSV4)
 		aftat_ret = 0;
 	else
 		aftat_ret = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1);
 	vput(vp);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = aftat_ret;
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva);
 		if (nd->nd_repstat)
 			goto out;
 		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(retlen);
 		/*
 		 * If nfs_async is set, then pretend the write was FILESYNC.
 		 * Warning: Doing this violates RFC1813 and runs a risk
 		 * of data written by a client being lost when the server
 		 * crashes/reboots.
 		 */
 		if (stable == NFSWRITE_UNSTABLE && nfs_async == 0)
 			*tl++ = txdr_unsigned(stable);
 		else
 			*tl++ = txdr_unsigned(NFSWRITE_FILESYNC);
 		/*
 		 * Actually, there is no need to txdr these fields,
 		 * but it may make the values more human readable,
 		 * for debugging purposes.
 		 */
 		*tl++ = txdr_unsigned(nfsboottime.tv_sec);
 		*tl = txdr_unsigned(nfsboottime.tv_usec);
 	} else if (!nd->nd_repstat)
 		nfsrv_fillattr(nd, &nva);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs create service (creates regular files for V2 and V3. Spec. files for V2.)
  * now does a truncate to 0 length via. setattr if it already exists
  * The core creation routine has been extracted out into nfsrv_creatsub(),
  * so it can also be used by nfsrv_open() for V4.
  */
 APPLESTATIC int
 nfsrvd_create(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	struct nfsvattr nva, dirfor, diraft;
 	struct nfsv2_sattr *sp;
 	struct nameidata named;
 	u_int32_t *tl;
 	int error = 0, tsize, dirfor_ret = 1, diraft_ret = 1;
 	int how = NFSCREATE_UNCHECKED, exclusive_flag = 0;
 	NFSDEV_T rdev = 0;
 	vnode_t vp = NULL, dirp = NULL;
 	fhandle_t fh;
 	char *bufp;
 	u_long *hashp;
 	enum vtype vtyp;
 	int32_t cverf[2], tverf[2] = { 0, 0 };
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
 	    LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error)
 		goto nfsmout;
 	if (!nd->nd_repstat) {
 		NFSVNO_ATTRINIT(&nva);
 		if (nd->nd_flag & ND_NFSV2) {
 			NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 			vtyp = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
 			if (vtyp == VNON)
 				vtyp = VREG;
 			NFSVNO_SETATTRVAL(&nva, type, vtyp);
 			NFSVNO_SETATTRVAL(&nva, mode,
 			    nfstov_mode(sp->sa_mode));
 			switch (nva.na_type) {
 			case VREG:
 				tsize = fxdr_unsigned(int32_t, sp->sa_size);
 				if (tsize != -1)
 					NFSVNO_SETATTRVAL(&nva, size,
 					    (u_quad_t)tsize);
 				break;
 			case VCHR:
 			case VBLK:
 			case VFIFO:
 				rdev = fxdr_unsigned(NFSDEV_T, sp->sa_size);
 				break;
 			default:
 				break;
 			};
 		} else {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			how = fxdr_unsigned(int, *tl);
 			switch (how) {
 			case NFSCREATE_GUARDED:
 			case NFSCREATE_UNCHECKED:
 				error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p);
 				if (error)
 					goto nfsmout;
 				break;
 			case NFSCREATE_EXCLUSIVE:
 				NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 				cverf[0] = *tl++;
 				cverf[1] = *tl;
 				exclusive_flag = 1;
 				break;
 			};
 			NFSVNO_SETATTRVAL(&nva, type, VREG);
 		}
 	}
 	if (nd->nd_repstat) {
 		nfsvno_relpathbuf(&named);
 		if (nd->nd_flag & ND_NFSV3) {
 			dirfor_ret = nfsvno_getattr(dp, &dirfor, nd->nd_cred,
 			    p, 1);
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		}
 		vput(dp);
 		goto out;
 	}
 
 	nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp);
 	if (dirp) {
 		if (nd->nd_flag & ND_NFSV2) {
 			vrele(dirp);
 			dirp = NULL;
 		} else {
 			dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred,
 			    p, 0);
 		}
 	}
 	if (nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		if (dirp)
 			vrele(dirp);
 		goto out;
 	}
 
 	if (!(nd->nd_flag & ND_NFSV2)) {
 		switch (how) {
 		case NFSCREATE_GUARDED:
 			if (named.ni_vp)
 				nd->nd_repstat = EEXIST;
 			break;
 		case NFSCREATE_UNCHECKED:
 			break;
 		case NFSCREATE_EXCLUSIVE:
 			if (named.ni_vp == NULL)
 				NFSVNO_SETATTRVAL(&nva, mode, 0);
 			break;
 		};
 	}
 
 	/*
 	 * Iff doesn't exist, create it
 	 * otherwise just truncate to 0 length
 	 *   should I set the mode too ?
 	 */
 	nd->nd_repstat = nfsvno_createsub(nd, &named, &vp, &nva,
 	    &exclusive_flag, cverf, rdev, p, exp);
 
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_getfh(vp, &fh, p);
 		if (!nd->nd_repstat)
 			nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred,
 			    p, 1);
 		vput(vp);
 		if (!nd->nd_repstat) {
 			tverf[0] = nva.na_atime.tv_sec;
 			tverf[1] = nva.na_atime.tv_nsec;
 		}
 	}
 	if (nd->nd_flag & ND_NFSV2) {
 		if (!nd->nd_repstat) {
 			(void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0);
 			nfsrv_fillattr(nd, &nva);
 		}
 	} else {
 		if (exclusive_flag && !nd->nd_repstat && (cverf[0] != tverf[0]
 		    || cverf[1] != tverf[1]))
 			nd->nd_repstat = EEXIST;
 		diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0);
 		vrele(dirp);
 		if (!nd->nd_repstat) {
 			(void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 1);
 			nfsrv_postopattr(nd, 0, &nva);
 		}
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(dp);
 	nfsvno_relpathbuf(&named);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs v3 mknod service (and v4 create)
  */
 APPLESTATIC int
 nfsrvd_mknod(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p,
     struct nfsexstuff *exp)
 {
 	struct nfsvattr nva, dirfor, diraft;
 	u_int32_t *tl;
 	struct nameidata named;
 	int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen;
 	u_int32_t major, minor;
 	enum vtype vtyp = VNON;
 	nfstype nfs4type = NFNON;
 	vnode_t vp, dirp = NULL;
 	nfsattrbit_t attrbits;
 	char *bufp = NULL, *pathcp = NULL;
 	u_long *hashp, cnflags;
 	NFSACL_T *aclp = NULL;
 
 	NFSVNO_ATTRINIT(&nva);
 	cnflags = (LOCKPARENT | SAVESTART);
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 #ifdef NFS4_ACL_EXTATTR_NAME
 	aclp = acl_alloc(M_WAITOK);
 	aclp->acl_cnt = 0;
 #endif
 
 	/*
 	 * For V4, the creation stuff is here, Yuck!
 	 */
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		vtyp = nfsv34tov_type(*tl);
 		nfs4type = fxdr_unsigned(nfstype, *tl);
 		switch (nfs4type) {
 		case NFLNK:
 			error = nfsvno_getsymlink(nd, &nva, p, &pathcp,
 			    &pathlen);
 			if (error)
 				goto nfsmout;
 			break;
 		case NFCHR:
 		case NFBLK:
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			major = fxdr_unsigned(u_int32_t, *tl++);
 			minor = fxdr_unsigned(u_int32_t, *tl);
 			nva.na_rdev = NFSMAKEDEV(major, minor);
 			break;
 		case NFSOCK:
 		case NFFIFO:
 			break;
 		case NFDIR:
 			cnflags = (LOCKPARENT | SAVENAME);
 			break;
 		default:
 			nd->nd_repstat = NFSERR_BADTYPE;
 			vrele(dp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 			acl_free(aclp);
 #endif
 			goto out;
 		}
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, cnflags | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error)
 		goto nfsmout;
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			vtyp = nfsv34tov_type(*tl);
 		}
 		error = nfsrv_sattr(nd, NULL, &nva, &attrbits, aclp, p);
 		if (error)
 			goto nfsmout;
 		nva.na_type = vtyp;
 		if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV3) &&
 		    (vtyp == VCHR || vtyp == VBLK)) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			major = fxdr_unsigned(u_int32_t, *tl++);
 			minor = fxdr_unsigned(u_int32_t, *tl);
 			nva.na_rdev = NFSMAKEDEV(major, minor);
 		}
 	}
 
 	dirfor_ret = nfsvno_getattr(dp, &dirfor, nd->nd_cred, p, 0);
 	if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) {
 		if (!dirfor_ret && NFSVNO_ISSETGID(&nva) &&
 		    dirfor.na_gid == nva.na_gid)
 			NFSVNO_UNSET(&nva, gid);
 		nd->nd_repstat = nfsrv_checkuidgid(nd, &nva);
 	}
 	if (nd->nd_repstat) {
 		vrele(dp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 		acl_free(aclp);
 #endif
 		nfsvno_relpathbuf(&named);
 		if (pathcp)
 			FREE(pathcp, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		goto out;
 	}
 
 	/*
 	 * Yuck! For V4, mkdir and link are here and some V4 clients don't fill
 	 * in va_mode, so we'll have to set a default here.
 	 */
 	if (NFSVNO_NOTSETMODE(&nva)) {
 		if (vtyp == VLNK)
 			nva.na_mode = 0755;
 		else
 			nva.na_mode = 0400;
 	}
 
 	if (vtyp == VDIR)
 		named.ni_cnd.cn_flags |= WILLBEDIR;
 	nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp);
 	if (nd->nd_repstat) {
 		if (dirp) {
 			if (nd->nd_flag & ND_NFSV3)
 				dirfor_ret = nfsvno_getattr(dirp, &dirfor,
 				    nd->nd_cred, p, 0);
 			vrele(dirp);
 		}
 #ifdef NFS4_ACL_EXTATTR_NAME
 		acl_free(aclp);
 #endif
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		goto out;
 	}
 	if (dirp)
 		dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0);
 
 	if ((nd->nd_flag & ND_NFSV4) && (vtyp == VDIR || vtyp == VLNK)) {
 		if (vtyp == VDIR) {
 			nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp,
 			    &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p,
 			    exp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 			acl_free(aclp);
 #endif
 			goto out;
 		} else if (vtyp == VLNK) {
 			nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp,
 			    &dirfor, &diraft, &diraft_ret, &attrbits,
 			    aclp, p, exp, pathcp, pathlen);
 #ifdef NFS4_ACL_EXTATTR_NAME
 			acl_free(aclp);
 #endif
 			FREE(pathcp, M_TEMP);
 			goto out;
 		}
 	}
 
 	nd->nd_repstat = nfsvno_mknod(&named, &nva, nd->nd_cred, p);
 	if (!nd->nd_repstat) {
 		vp = named.ni_vp;
 		nfsrv_fixattr(nd, vp, &nva, aclp, p, &attrbits, exp);
 		nd->nd_repstat = nfsvno_getfh(vp, fhp, p);
 		if ((nd->nd_flag & ND_NFSV3) && !nd->nd_repstat)
 			nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred,
 			    p, 1);
 		if (vpp != NULL && nd->nd_repstat == 0) {
 			NFSVOPUNLOCK(vp, 0);
 			*vpp = vp;
 		} else
 			vput(vp);
 	}
 
 	diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0);
 	vrele(dirp);
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3) {
 			(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1);
 			nfsrv_postopattr(nd, 0, &nva);
 		} else {
 			NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			*tl++ = newnfs_false;
 			txdr_hyper(dirfor.na_filerev, tl);
 			tl += 2;
 			txdr_hyper(diraft.na_filerev, tl);
 			(void) nfsrv_putattrbit(nd, &attrbits);
 		}
 	}
 	if (nd->nd_flag & ND_NFSV3)
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vrele(dp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 	if (bufp)
 		nfsvno_relpathbuf(&named);
 	if (pathcp)
 		FREE(pathcp, M_TEMP);
 
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs remove service
  */
 APPLESTATIC int
 nfsrvd_remove(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	struct nameidata named;
 	u_int32_t *tl;
 	int error = 0, dirfor_ret = 1, diraft_ret = 1;
 	vnode_t dirp = NULL;
 	struct nfsvattr dirfor, diraft;
 	char *bufp;
 	u_long *hashp;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, DELETE,
 	    LOCKPARENT | LOCKLEAF);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error) {
 		vput(dp);
 		nfsvno_relpathbuf(&named);
 		goto out;
 	}
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp);
 	} else {
 		vput(dp);
 		nfsvno_relpathbuf(&named);
 	}
 	if (dirp) {
 		if (!(nd->nd_flag & ND_NFSV2)) {
 			dirfor_ret = nfsvno_getattr(dirp, &dirfor,
 			    nd->nd_cred, p, 0);
 		} else {
 			vrele(dirp);
 			dirp = NULL;
 		}
 	}
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
 			if (vnode_vtype(named.ni_vp) == VDIR)
 				nd->nd_repstat = nfsvno_rmdirsub(&named, 1,
 				    nd->nd_cred, p, exp);
 			else
 				nd->nd_repstat = nfsvno_removesub(&named, 1,
 				    nd->nd_cred, p, exp);
 		} else if (nd->nd_procnum == NFSPROC_RMDIR) {
 			nd->nd_repstat = nfsvno_rmdirsub(&named, 0,
 			    nd->nd_cred, p, exp);
 		} else {
 			nd->nd_repstat = nfsvno_removesub(&named, 0,
 			    nd->nd_cred, p, exp);
 		}
 	}
 	if (!(nd->nd_flag & ND_NFSV2)) {
 		if (dirp) {
 			diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred,
 			    p, 0);
 			vrele(dirp);
 		}
 		if (nd->nd_flag & ND_NFSV3) {
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		} else if (!nd->nd_repstat) {
 			NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			*tl++ = newnfs_false;
 			txdr_hyper(dirfor.na_filerev, tl);
 			tl += 2;
 			txdr_hyper(diraft.na_filerev, tl);
 		}
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs rename service
  */
 APPLESTATIC int
 nfsrvd_rename(struct nfsrv_descript *nd, int isdgram,
     vnode_t dp, vnode_t todp, NFSPROC_T *p, struct nfsexstuff *exp,
     struct nfsexstuff *toexp)
 {
 	u_int32_t *tl;
 	int error = 0, fdirfor_ret = 1, fdiraft_ret = 1;
 	int tdirfor_ret = 1, tdiraft_ret = 1;
 	struct nameidata fromnd, tond;
 	vnode_t fdirp = NULL, tdirp = NULL, tdp = NULL;
 	struct nfsvattr fdirfor, fdiraft, tdirfor, tdiraft;
 	struct nfsexstuff tnes;
 	struct nfsrvfh tfh;
 	char *bufp, *tbufp = NULL;
 	u_long *hashp;
 	fhandle_t fh;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 		nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 		goto out;
 	}
 	if (!(nd->nd_flag & ND_NFSV2))
 		fdirfor_ret = nfsvno_getattr(dp, &fdirfor, nd->nd_cred, p, 1);
 	tond.ni_cnd.cn_nameiop = 0;
 	tond.ni_startdir = NULL;
 	NFSNAMEICNDSET(&fromnd.ni_cnd, nd->nd_cred, DELETE, WANTPARENT | SAVESTART);
 	nfsvno_setpathbuf(&fromnd, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &fromnd.ni_pathlen);
 	if (error) {
 		vput(dp);
 		if (todp)
 			vrele(todp);
 		nfsvno_relpathbuf(&fromnd);
 		goto out;
 	}
 	/*
 	 * Unlock dp in this code section, so it is unlocked before
 	 * tdp gets locked. This avoids a potential LOR if tdp is the
 	 * parent directory of dp.
 	 */
 	if (nd->nd_flag & ND_NFSV4) {
 		tdp = todp;
 		tnes = *toexp;
 		if (dp != tdp) {
 			NFSVOPUNLOCK(dp, 0);
 			tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd->nd_cred,
 			    p, 0);	/* Might lock tdp. */
 		} else {
 			tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd->nd_cred,
 			    p, 1);
 			NFSVOPUNLOCK(dp, 0);
 		}
 	} else {
 		tfh.nfsrvfh_len = 0;
 		error = nfsrv_mtofh(nd, &tfh);
 		if (error == 0)
 			error = nfsvno_getfh(dp, &fh, p);
 		if (error) {
 			vput(dp);
 			/* todp is always NULL except NFSv4 */
 			nfsvno_relpathbuf(&fromnd);
 			goto out;
 		}
 
 		/* If this is the same file handle, just VREF() the vnode. */
 		if (tfh.nfsrvfh_len == NFSX_MYFH &&
 		    !NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) {
 			VREF(dp);
 			tdp = dp;
 			tnes = *exp;
 			tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd->nd_cred,
 			    p, 1);
 			NFSVOPUNLOCK(dp, 0);
 		} else {
 			NFSVOPUNLOCK(dp, 0);
 			nd->nd_cred->cr_uid = nd->nd_saveduid;
 			nfsd_fhtovp(nd, &tfh, LK_EXCLUSIVE, &tdp, &tnes, NULL,
 			    0, p);	/* Locks tdp. */
 			if (tdp) {
 				tdirfor_ret = nfsvno_getattr(tdp, &tdirfor,
 				    nd->nd_cred, p, 1);
 				NFSVOPUNLOCK(tdp, 0);
 			}
 		}
 	}
 	NFSNAMEICNDSET(&tond.ni_cnd, nd->nd_cred, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART);
 	nfsvno_setpathbuf(&tond, &tbufp, &hashp);
 	if (!nd->nd_repstat) {
 		error = nfsrv_parsename(nd, tbufp, hashp, &tond.ni_pathlen);
 		if (error) {
 			if (tdp)
 				vrele(tdp);
 			vrele(dp);
 			nfsvno_relpathbuf(&fromnd);
 			nfsvno_relpathbuf(&tond);
 			goto out;
 		}
 	}
 	if (nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3) {
 			nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret,
 			    &fdiraft);
 			nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret,
 			    &tdiraft);
 		}
 		if (tdp)
 			vrele(tdp);
 		vrele(dp);
 		nfsvno_relpathbuf(&fromnd);
 		nfsvno_relpathbuf(&tond);
 		goto out;
 	}
 
 	/*
 	 * Done parsing, now down to business.
 	 */
 	nd->nd_repstat = nfsvno_namei(nd, &fromnd, dp, 0, exp, p, &fdirp);
 	if (nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3) {
 			nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret,
 			    &fdiraft);
 			nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret,
 			    &tdiraft);
 		}
 		if (fdirp)
 			vrele(fdirp);
 		if (tdp)
 			vrele(tdp);
 		nfsvno_relpathbuf(&tond);
 		goto out;
 	}
 	if (vnode_vtype(fromnd.ni_vp) == VDIR)
 		tond.ni_cnd.cn_flags |= WILLBEDIR;
 	nd->nd_repstat = nfsvno_namei(nd, &tond, tdp, 0, &tnes, p, &tdirp);
 	nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd->nd_repstat,
 	    nd->nd_flag, nd->nd_cred, p);
 	if (fdirp)
 		fdiraft_ret = nfsvno_getattr(fdirp, &fdiraft, nd->nd_cred, p,
 		    0);
 	if (tdirp)
 		tdiraft_ret = nfsvno_getattr(tdirp, &tdiraft, nd->nd_cred, p,
 		    0);
 	if (fdirp)
 		vrele(fdirp);
 	if (tdirp)
 		vrele(tdirp);
 	if (nd->nd_flag & ND_NFSV3) {
 		nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 		nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 	} else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		txdr_hyper(fdirfor.na_filerev, tl);
 		tl += 2;
 		txdr_hyper(fdiraft.na_filerev, tl);
 		tl += 2;
 		*tl++ = newnfs_false;
 		txdr_hyper(tdirfor.na_filerev, tl);
 		tl += 2;
 		txdr_hyper(tdiraft.na_filerev, tl);
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs link service
  */
 APPLESTATIC int
 nfsrvd_link(struct nfsrv_descript *nd, int isdgram,
     vnode_t vp, vnode_t tovp, NFSPROC_T *p, struct nfsexstuff *exp,
     struct nfsexstuff *toexp)
 {
 	struct nameidata named;
 	u_int32_t *tl;
 	int error = 0, dirfor_ret = 1, diraft_ret = 1, getret = 1;
 	vnode_t dirp = NULL, dp = NULL;
 	struct nfsvattr dirfor, diraft, at;
 	struct nfsexstuff tnes;
 	struct nfsrvfh dfh;
 	char *bufp;
 	u_long *hashp;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 	NFSVOPUNLOCK(vp, 0);
 	if (vnode_vtype(vp) == VDIR) {
 		if (nd->nd_flag & ND_NFSV4)
 			nd->nd_repstat = NFSERR_ISDIR;
 		else
 			nd->nd_repstat = NFSERR_INVAL;
 		if (tovp)
 			vrele(tovp);
 	}
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
 			dp = tovp;
 			tnes = *toexp;
 		} else {
 			error = nfsrv_mtofh(nd, &dfh);
 			if (error) {
 				vrele(vp);
 				/* tovp is always NULL unless NFSv4 */
 				goto out;
 			}
 			nfsd_fhtovp(nd, &dfh, LK_EXCLUSIVE, &dp, &tnes, NULL, 0,
 			    p);
 			if (dp)
 				NFSVOPUNLOCK(dp, 0);
 		}
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
 	    LOCKPARENT | SAVENAME | NOCACHE);
 	if (!nd->nd_repstat) {
 		nfsvno_setpathbuf(&named, &bufp, &hashp);
 		error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 		if (error) {
 			vrele(vp);
 			if (dp)
 				vrele(dp);
 			nfsvno_relpathbuf(&named);
 			goto out;
 		}
 		if (!nd->nd_repstat) {
 			nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, &tnes,
 			    p, &dirp);
 		} else {
 			if (dp)
 				vrele(dp);
 			nfsvno_relpathbuf(&named);
 		}
 	}
 	if (dirp) {
 		if (nd->nd_flag & ND_NFSV2) {
 			vrele(dirp);
 			dirp = NULL;
 		} else {
 			dirfor_ret = nfsvno_getattr(dirp, &dirfor,
 			    nd->nd_cred, p, 0);
 		}
 	}
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_link(&named, vp, nd->nd_cred, p, exp);
 	if (nd->nd_flag & ND_NFSV3)
 		getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 0);
 	if (dirp) {
 		diraft_ret = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p, 0);
 		vrele(dirp);
 	}
 	vrele(vp);
 	if (nd->nd_flag & ND_NFSV3) {
 		nfsrv_postopattr(nd, getret, &at);
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 	} else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		txdr_hyper(dirfor.na_filerev, tl);
 		tl += 2;
 		txdr_hyper(diraft.na_filerev, tl);
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs symbolic link service
  */
 APPLESTATIC int
 nfsrvd_symlink(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p,
     struct nfsexstuff *exp)
 {
 	struct nfsvattr nva, dirfor, diraft;
 	struct nameidata named;
 	int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen;
 	vnode_t dirp = NULL;
 	char *bufp, *pathcp = NULL;
 	u_long *hashp;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 	if (vpp)
 		*vpp = NULL;
 	NFSVNO_ATTRINIT(&nva);
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
 	    LOCKPARENT | SAVESTART | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (!error && !nd->nd_repstat)
 		error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen);
 	if (error) {
 		vrele(dp);
 		nfsvno_relpathbuf(&named);
 		goto out;
 	}
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp);
 	} else {
 		vrele(dp);
 		nfsvno_relpathbuf(&named);
 	}
 	if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) {
 		vrele(dirp);
 		dirp = NULL;
 	}
 
 	/*
 	 * And call nfsrvd_symlinksub() to do the common code. It will
 	 * return EBADRPC upon a parsing error, 0 otherwise.
 	 */
 	if (!nd->nd_repstat) {
 		if (dirp != NULL)
 			dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred,
 			    p, 0);
 		nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp,
 		    &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp,
 		    pathcp, pathlen);
 	} else if (dirp != NULL) {
 		dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0);
 		vrele(dirp);
 	}
 	if (pathcp)
 		FREE(pathcp, M_TEMP);
 
 	if (nd->nd_flag & ND_NFSV3) {
 		if (!nd->nd_repstat) {
 			(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1);
 			nfsrv_postopattr(nd, 0, &nva);
 		}
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Common code for creating a symbolic link.
  */
 static void
 nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp,
     vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp,
     int *diraft_retp, nfsattrbit_t *attrbitp,
     NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp,
     int pathlen)
 {
 	u_int32_t *tl;
 
 	nd->nd_repstat = nfsvno_symlink(ndp, nvap, pathcp, pathlen,
 	    !(nd->nd_flag & ND_NFSV2), nd->nd_saveduid, nd->nd_cred, p, exp);
 	if (!nd->nd_repstat && !(nd->nd_flag & ND_NFSV2)) {
 		nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp);
 		if (nd->nd_flag & ND_NFSV3) {
 			nd->nd_repstat = nfsvno_getfh(ndp->ni_vp, fhp, p);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = nfsvno_getattr(ndp->ni_vp,
 				    nvap, nd->nd_cred, p, 1);
 		}
 		if (vpp != NULL && nd->nd_repstat == 0) {
 			NFSVOPUNLOCK(ndp->ni_vp, 0);
 			*vpp = ndp->ni_vp;
 		} else
 			vput(ndp->ni_vp);
 	}
 	if (dirp) {
 		*diraft_retp = nfsvno_getattr(dirp, diraftp, nd->nd_cred, p, 0);
 		vrele(dirp);
 	}
 	if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		txdr_hyper(dirforp->na_filerev, tl);
 		tl += 2;
 		txdr_hyper(diraftp->na_filerev, tl);
 		(void) nfsrv_putattrbit(nd, attrbitp);
 	}
 
 	NFSEXITCODE2(0, nd);
 }
 
 /*
  * nfs mkdir service
  */
 APPLESTATIC int
 nfsrvd_mkdir(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, vnode_t *vpp, fhandle_t *fhp, NFSPROC_T *p,
     struct nfsexstuff *exp)
 {
 	struct nfsvattr nva, dirfor, diraft;
 	struct nameidata named;
 	u_int32_t *tl;
 	int error = 0, dirfor_ret = 1, diraft_ret = 1;
 	vnode_t dirp = NULL;
 	char *bufp;
 	u_long *hashp;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
 	    LOCKPARENT | SAVENAME | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error)
 		goto nfsmout;
 	if (!nd->nd_repstat) {
 		NFSVNO_ATTRINIT(&nva);
 		if (nd->nd_flag & ND_NFSV3) {
 			error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p);
 			if (error)
 				goto nfsmout;
 		} else {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nva.na_mode = nfstov_mode(*tl++);
 		}
 	}
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, p, &dirp);
 	} else {
 		vrele(dp);
 		nfsvno_relpathbuf(&named);
 	}
 	if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) {
 		vrele(dirp);
 		dirp = NULL;
 	}
 	if (nd->nd_repstat) {
 		if (dirp != NULL) {
 			dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred,
 			    p, 0);
 			vrele(dirp);
 		}
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		goto out;
 	}
 	if (dirp != NULL)
 		dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd->nd_cred, p, 0);
 
 	/*
 	 * Call nfsrvd_mkdirsub() for the code common to V4 as well.
 	 */
 	nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft,
 	    &diraft_ret, NULL, NULL, p, exp);
 
 	if (nd->nd_flag & ND_NFSV3) {
 		if (!nd->nd_repstat) {
 			(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1);
 			nfsrv_postopattr(nd, 0, &nva);
 		}
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 	} else if (!nd->nd_repstat) {
 		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0);
 		nfsrv_fillattr(nd, &nva);
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vrele(dp);
 	nfsvno_relpathbuf(&named);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Code common to mkdir for V2,3 and 4.
  */
 static void
 nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp,
     vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp,
     int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp,
     NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	vnode_t vp;
 	u_int32_t *tl;
 
 	NFSVNO_SETATTRVAL(nvap, type, VDIR);
 	nd->nd_repstat = nfsvno_mkdir(ndp, nvap, nd->nd_saveduid,
 	    nd->nd_cred, p, exp);
 	if (!nd->nd_repstat) {
 		vp = ndp->ni_vp;
 		nfsrv_fixattr(nd, vp, nvap, aclp, p, attrbitp, exp);
 		nd->nd_repstat = nfsvno_getfh(vp, fhp, p);
 		if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat)
 			nd->nd_repstat = nfsvno_getattr(vp, nvap, nd->nd_cred,
 			    p, 1);
 		if (vpp && !nd->nd_repstat) {
 			NFSVOPUNLOCK(vp, 0);
 			*vpp = vp;
 		} else {
 			vput(vp);
 		}
 	}
 	if (dirp) {
 		*diraft_retp = nfsvno_getattr(dirp, diraftp, nd->nd_cred, p, 0);
 		vrele(dirp);
 	}
 	if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		txdr_hyper(dirforp->na_filerev, tl);
 		tl += 2;
 		txdr_hyper(diraftp->na_filerev, tl);
 		(void) nfsrv_putattrbit(nd, attrbitp);
 	}
 
 	NFSEXITCODE2(0, nd);
 }
 
 /*
  * nfs commit service
  */
 APPLESTATIC int
 nfsrvd_commit(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	struct nfsvattr bfor, aft;
 	u_int32_t *tl;
 	int error = 0, for_ret = 1, aft_ret = 1, cnt;
 	u_int64_t off;
 
        if (nd->nd_repstat) {
 		nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft);
 		goto out;
 	}
 
 	/* Return NFSERR_ISDIR in NFSv4 when commit on a directory. */
 	if (vp->v_type != VREG) {
 		if (nd->nd_flag & ND_NFSV3)
 			error = NFSERR_NOTSUPP;
 		else
 			error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 
 	/*
 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
 	 * count parameters, so these arguments are useless (someday maybe).
 	 */
 	off = fxdr_hyper(tl);
 	tl += 2;
 	cnt = fxdr_unsigned(int, *tl);
 	if (nd->nd_flag & ND_NFSV3)
 		for_ret = nfsvno_getattr(vp, &bfor, nd->nd_cred, p, 1);
 	nd->nd_repstat = nfsvno_fsync(vp, off, cnt, nd->nd_cred, p);
 	if (nd->nd_flag & ND_NFSV3) {
 		aft_ret = nfsvno_getattr(vp, &aft, nd->nd_cred, p, 1);
 		nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft);
 	}
 	vput(vp);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 		*tl++ = txdr_unsigned(nfsboottime.tv_sec);
 		*tl = txdr_unsigned(nfsboottime.tv_usec);
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs statfs service
  */
 APPLESTATIC int
 nfsrvd_statfs(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	struct statfs *sf;
 	u_int32_t *tl;
 	int getret = 1;
 	struct nfsvattr at;
 	struct statfs sfs;
 	u_quad_t tval;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	sf = &sfs;
 	nd->nd_repstat = nfsvno_statfs(vp, sf);
 	getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
 	vput(vp);
 	if (nd->nd_flag & ND_NFSV3)
 		nfsrv_postopattr(nd, getret, &at);
 	if (nd->nd_repstat)
 		goto out;
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_V2STATFS);
 		*tl++ = txdr_unsigned(NFS_V2MAXDATA);
 		*tl++ = txdr_unsigned(sf->f_bsize);
 		*tl++ = txdr_unsigned(sf->f_blocks);
 		*tl++ = txdr_unsigned(sf->f_bfree);
 		*tl = txdr_unsigned(sf->f_bavail);
 	} else {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_V3STATFS);
 		tval = (u_quad_t)sf->f_blocks;
 		tval *= (u_quad_t)sf->f_bsize;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = (u_quad_t)sf->f_bfree;
 		tval *= (u_quad_t)sf->f_bsize;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = (u_quad_t)sf->f_bavail;
 		tval *= (u_quad_t)sf->f_bsize;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = (u_quad_t)sf->f_files;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = (u_quad_t)sf->f_ffree;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = (u_quad_t)sf->f_ffree;
 		txdr_hyper(tval, tl); tl += 2;
 		*tl = 0;
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfs fsinfo service
  */
 APPLESTATIC int
 nfsrvd_fsinfo(struct nfsrv_descript *nd, int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	struct nfsfsinfo fs;
 	int getret = 1;
 	struct nfsvattr at;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
 	nfsvno_getfs(&fs, isdgram);
 	vput(vp);
 	nfsrv_postopattr(nd, getret, &at);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_V3FSINFO);
 	*tl++ = txdr_unsigned(fs.fs_rtmax);
 	*tl++ = txdr_unsigned(fs.fs_rtpref);
 	*tl++ = txdr_unsigned(fs.fs_rtmult);
 	*tl++ = txdr_unsigned(fs.fs_wtmax);
 	*tl++ = txdr_unsigned(fs.fs_wtpref);
 	*tl++ = txdr_unsigned(fs.fs_wtmult);
 	*tl++ = txdr_unsigned(fs.fs_dtpref);
 	txdr_hyper(fs.fs_maxfilesize, tl);
 	tl += 2;
 	txdr_nfsv3time(&fs.fs_timedelta, tl);
 	tl += 2;
 	*tl = txdr_unsigned(fs.fs_properties);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfs pathconf service
  */
 APPLESTATIC int
 nfsrvd_pathconf(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	struct nfsv3_pathconf *pc;
 	int getret = 1;
 	register_t linkmax, namemax, chownres, notrunc;
 	struct nfsvattr at;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	nd->nd_repstat = nfsvno_pathconf(vp, _PC_LINK_MAX, &linkmax,
 	    nd->nd_cred, p);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_pathconf(vp, _PC_NAME_MAX, &namemax,
 		    nd->nd_cred, p);
 	if (!nd->nd_repstat)
 		nd->nd_repstat=nfsvno_pathconf(vp, _PC_CHOWN_RESTRICTED,
 		    &chownres, nd->nd_cred, p);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_pathconf(vp, _PC_NO_TRUNC, &notrunc,
 		    nd->nd_cred, p);
 	getret = nfsvno_getattr(vp, &at, nd->nd_cred, p, 1);
 	vput(vp);
 	nfsrv_postopattr(nd, getret, &at);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF);
 		pc->pc_linkmax = txdr_unsigned(linkmax);
 		pc->pc_namemax = txdr_unsigned(namemax);
 		pc->pc_notrunc = txdr_unsigned(notrunc);
 		pc->pc_chownrestricted = txdr_unsigned(chownres);
 
 		/*
 		 * These should probably be supported by VOP_PATHCONF(), but
 		 * until msdosfs is exportable (why would you want to?), the
 		 * Unix defaults should be ok.
 		 */
 		pc->pc_caseinsensitive = newnfs_false;
 		pc->pc_casepreserving = newnfs_true;
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfsv4 lock service
  */
 APPLESTATIC int
 nfsrvd_lock(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i;
 	struct nfsstate *stp = NULL;
 	struct nfslock *lop;
 	struct nfslockconflict cf;
 	int error = 0;
 	u_short flags = NFSLCK_LOCK, lflags;
 	u_int64_t offset, len;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 
 	NFSM_DISSECT(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *tl++);
 	switch (i) {
 	case NFSV4LOCKT_READW:
 		flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_READ:
 		lflags = NFSLCK_READ;
 		break;
 	case NFSV4LOCKT_WRITEW:
 		flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_WRITE:
 		lflags = NFSLCK_WRITE;
 		break;
 	default:
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	};
 	if (*tl++ == newnfs_true)
 		flags |= NFSLCK_RECLAIM;
 	offset = fxdr_hyper(tl);
 	tl += 2;
 	len = fxdr_hyper(tl);
 	tl += 2;
 	if (*tl == newnfs_true)
 		flags |= NFSLCK_OPENTOLOCK;
 	if (flags & NFSLCK_OPENTOLOCK) {
 		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID);
 		i = fxdr_unsigned(int, *(tl+4+(NFSX_STATEID / NFSX_UNSIGNED)));
 		if (i <= 0 || i > NFSV4_OPAQUELIMIT) {
 			nd->nd_repstat = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + i,
 			M_NFSDSTATE, M_WAITOK);
 		stp->ls_ownerlen = i;
 		stp->ls_op = nd->nd_rp;
 		stp->ls_seq = fxdr_unsigned(int, *tl++);
 		stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 			NFSX_STATEIDOTHER);
 		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 		stp->ls_opentolockseq = fxdr_unsigned(int, *tl++);
 		clientid.lval[0] = *tl++;
 		clientid.lval[1] = *tl++;
 		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				clientid.qval = nd->nd_clientid.qval;
 			else if (nd->nd_clientid.qval != clientid.qval)
 				printf("EEK3 multiple clids\n");
 		} else {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
 		error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen);
 		if (error)
 			goto nfsmout;
 	} else {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
 		MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate),
 			M_NFSDSTATE, M_WAITOK);
 		stp->ls_ownerlen = 0;
 		stp->ls_op = nd->nd_rp;
 		stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 			NFSX_STATEIDOTHER);
 		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 		stp->ls_seq = fxdr_unsigned(int, *tl);
 		clientid.lval[0] = stp->ls_stateid.other[0];
 		clientid.lval[1] = stp->ls_stateid.other[1];
 		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				clientid.qval = nd->nd_clientid.qval;
 			else if (nd->nd_clientid.qval != clientid.qval)
 				printf("EEK4 multiple clids\n");
 		} else {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
 	}
 	MALLOC(lop, struct nfslock *, sizeof (struct nfslock),
 		M_NFSDLOCK, M_WAITOK);
 	lop->lo_first = offset;
 	if (len == NFS64BITSSET) {
 		lop->lo_end = NFS64BITSSET;
 	} else {
 		lop->lo_end = offset + len;
 		if (lop->lo_end <= lop->lo_first)
 			nd->nd_repstat = NFSERR_INVAL;
 	}
 	lop->lo_flags = lflags;
 	stp->ls_flags = flags;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 
 	/*
 	 * Do basic access checking.
 	 */
 	if (!nd->nd_repstat && vnode_vtype(vp) != VREG) {
 	    if (vnode_vtype(vp) == VDIR)
 		nd->nd_repstat = NFSERR_ISDIR;
 	    else
 		nd->nd_repstat = NFSERR_INVAL;
 	}
 	if (!nd->nd_repstat) {
 	    if (lflags & NFSLCK_WRITE) {
 		nd->nd_repstat = nfsvno_accchk(vp, VWRITE,
 		    nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	    } else {
 		nd->nd_repstat = nfsvno_accchk(vp, VREAD,
 		    nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER,
 		    NFSACCCHK_VPISLOCKED, NULL);
 		if (nd->nd_repstat)
 		    nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 			nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER,
 			NFSACCCHK_VPISLOCKED, NULL);
 	    }
 	}
 
 	/*
 	 * We call nfsrv_lockctrl() even if nd_repstat set, so that the
 	 * seqid# gets updated. nfsrv_lockctrl() will return the value
 	 * of nd_repstat, if it gets that far.
 	 */
 	nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, 
 		&stateid, exp, nd, p);
 	if (lop)
 		FREE((caddr_t)lop, M_NFSDLOCK);
 	if (stp)
 		FREE((caddr_t)stp, M_NFSDSTATE);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 	} else if (nd->nd_repstat == NFSERR_DENIED) {
 		NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 		txdr_hyper(cf.cl_first, tl);
 		tl += 2;
 		if (cf.cl_end == NFS64BITSSET)
 			len = NFS64BITSSET;
 		else
 			len = cf.cl_end - cf.cl_first;
 		txdr_hyper(len, tl);
 		tl += 2;
 		if (cf.cl_flags == NFSLCK_WRITE)
 			*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
 		else
 			*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
 		*tl++ = stateid.other[0];
 		*tl = stateid.other[1];
 		(void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen);
 	}
 	vput(vp);
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	if (stp)
 		free((caddr_t)stp, M_NFSDSTATE);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 lock test service
  */
 APPLESTATIC int
 nfsrvd_lockt(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i;
 	struct nfsstate *stp = NULL;
 	struct nfslock lo, *lop = &lo;
 	struct nfslockconflict cf;
 	int error = 0;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	u_int64_t len;
 
 	NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *(tl + 7));
 	if (i <= 0 || i > NFSV4_OPAQUELIMIT) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + i,
 	    M_NFSDSTATE, M_WAITOK);
 	stp->ls_ownerlen = i;
 	stp->ls_op = NULL;
 	stp->ls_flags = NFSLCK_TEST;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	i = fxdr_unsigned(int, *tl++);
 	switch (i) {
 	case NFSV4LOCKT_READW:
 		stp->ls_flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_READ:
 		lo.lo_flags = NFSLCK_READ;
 		break;
 	case NFSV4LOCKT_WRITEW:
 		stp->ls_flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_WRITE:
 		lo.lo_flags = NFSLCK_WRITE;
 		break;
 	default:
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	};
 	lo.lo_first = fxdr_hyper(tl);
 	tl += 2;
 	len = fxdr_hyper(tl);
 	if (len == NFS64BITSSET) {
 		lo.lo_end = NFS64BITSSET;
 	} else {
 		lo.lo_end = lo.lo_first + len;
 		if (lo.lo_end <= lo.lo_first)
 			nd->nd_repstat = NFSERR_INVAL;
 	}
 	tl += 2;
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK5 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen);
 	if (error)
 		goto nfsmout;
 	if (!nd->nd_repstat && vnode_vtype(vp) != VREG) {
 	    if (vnode_vtype(vp) == VDIR)
 		nd->nd_repstat = NFSERR_ISDIR;
 	    else
 		nd->nd_repstat = NFSERR_INVAL;
 	}
 	if (!nd->nd_repstat)
 	  nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid,
 	    &stateid, exp, nd, p);
 	if (stp)
 		FREE((caddr_t)stp, M_NFSDSTATE);
 	if (nd->nd_repstat) {
 	    if (nd->nd_repstat == NFSERR_DENIED) {
 		NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 		txdr_hyper(cf.cl_first, tl);
 		tl += 2;
 		if (cf.cl_end == NFS64BITSSET)
 			len = NFS64BITSSET;
 		else
 			len = cf.cl_end - cf.cl_first;
 		txdr_hyper(len, tl);
 		tl += 2;
 		if (cf.cl_flags == NFSLCK_WRITE)
 			*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
 		else
 			*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
 		*tl++ = stp->ls_stateid.other[0];
 		*tl = stp->ls_stateid.other[1];
 		(void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen);
 	    }
 	}
 	vput(vp);
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	if (stp)
 		free((caddr_t)stp, M_NFSDSTATE);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 unlock service
  */
 APPLESTATIC int
 nfsrvd_locku(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i;
 	struct nfsstate *stp;
 	struct nfslock *lop;
 	int error = 0;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	u_int64_t len;
 
 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED + NFSX_STATEID);
 	MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate),
 	    M_NFSDSTATE, M_WAITOK);
 	MALLOC(lop, struct nfslock *, sizeof (struct nfslock),
 	    M_NFSDLOCK, M_WAITOK);
 	stp->ls_flags = NFSLCK_UNLOCK;
 	lop->lo_flags = NFSLCK_UNLOCK;
 	stp->ls_op = nd->nd_rp;
 	i = fxdr_unsigned(int, *tl++);
 	switch (i) {
 	case NFSV4LOCKT_READW:
 		stp->ls_flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_READ:
 		break;
 	case NFSV4LOCKT_WRITEW:
 		stp->ls_flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_WRITE:
 		break;
 	default:
 		nd->nd_repstat = NFSERR_BADXDR;
 		free(stp, M_NFSDSTATE);
 		free(lop, M_NFSDLOCK);
 		goto nfsmout;
 	};
 	stp->ls_ownerlen = 0;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_seq = fxdr_unsigned(int, *tl++);
 	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 	    NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 	lop->lo_first = fxdr_hyper(tl);
 	tl += 2;
 	len = fxdr_hyper(tl);
 	if (len == NFS64BITSSET) {
 		lop->lo_end = NFS64BITSSET;
 	} else {
 		lop->lo_end = lop->lo_first + len;
 		if (lop->lo_end <= lop->lo_first)
 			nd->nd_repstat = NFSERR_INVAL;
 	}
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK6 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	if (!nd->nd_repstat && vnode_vtype(vp) != VREG) {
 	    if (vnode_vtype(vp) == VDIR)
 		nd->nd_repstat = NFSERR_ISDIR;
 	    else
 		nd->nd_repstat = NFSERR_INVAL;
 	}
 	/*
 	 * Call nfsrv_lockctrl() even if nd_repstat is set, so that the
 	 * seqid# gets incremented. nfsrv_lockctrl() will return the
 	 * value of nd_repstat, if it gets that far.
 	 */
 	nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 	    &stateid, exp, nd, p);
 	if (stp)
 		FREE((caddr_t)stp, M_NFSDSTATE);
 	if (lop)
 		free((caddr_t)lop, M_NFSDLOCK);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 	}
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 open service
  */
 APPLESTATIC int
 nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, vnode_t *vpp, __unused fhandle_t *fhp, NFSPROC_T *p,
     struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i, retext;
 	struct nfsstate *stp = NULL;
 	int error = 0, create, claim, exclusive_flag = 0;
 	u_int32_t rflags = NFSV4OPEN_LOCKTYPEPOSIX, acemask;
 	int how = NFSCREATE_UNCHECKED;
 	int32_t cverf[2], tverf[2] = { 0, 0 };
 	vnode_t vp = NULL, dirp = NULL;
 	struct nfsvattr nva, dirfor, diraft;
 	struct nameidata named;
 	nfsv4stateid_t stateid, delegstateid;
 	nfsattrbit_t attrbits;
 	nfsquad_t clientid;
 	char *bufp = NULL;
 	u_long *hashp;
 	NFSACL_T *aclp = NULL;
 
 #ifdef NFS4_ACL_EXTATTR_NAME
 	aclp = acl_alloc(M_WAITOK);
 	aclp->acl_cnt = 0;
 #endif
 	NFSZERO_ATTRBIT(&attrbits);
 	named.ni_startdir = NULL;
 	named.ni_cnd.cn_nameiop = 0;
 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *(tl + 5));
 	if (i <= 0 || i > NFSV4_OPAQUELIMIT) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + i,
 	    M_NFSDSTATE, M_WAITOK);
 	stp->ls_ownerlen = i;
 	stp->ls_op = nd->nd_rp;
 	stp->ls_flags = NFSLCK_OPEN;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++);
 	i = fxdr_unsigned(int, *tl++);
 	retext = 0;
 	if ((i & (NFSV4OPEN_WANTDELEGMASK | NFSV4OPEN_WANTSIGNALDELEG |
 	    NFSV4OPEN_WANTPUSHDELEG)) != 0 && (nd->nd_flag & ND_NFSV41) != 0) {
 		retext = 1;
 		/* For now, ignore these. */
 		i &= ~(NFSV4OPEN_WANTPUSHDELEG | NFSV4OPEN_WANTSIGNALDELEG);
 		switch (i & NFSV4OPEN_WANTDELEGMASK) {
 		case NFSV4OPEN_WANTANYDELEG:
 			stp->ls_flags |= (NFSLCK_WANTRDELEG |
 			    NFSLCK_WANTWDELEG);
 			i &= ~NFSV4OPEN_WANTDELEGMASK;
 			break;
 		case NFSV4OPEN_WANTREADDELEG:
 			stp->ls_flags |= NFSLCK_WANTRDELEG;
 			i &= ~NFSV4OPEN_WANTDELEGMASK;
 			break;
 		case NFSV4OPEN_WANTWRITEDELEG:
 			stp->ls_flags |= NFSLCK_WANTWDELEG;
 			i &= ~NFSV4OPEN_WANTDELEGMASK;
 			break;
 		case NFSV4OPEN_WANTNODELEG:
 			stp->ls_flags |= NFSLCK_WANTNODELEG;
 			i &= ~NFSV4OPEN_WANTDELEGMASK;
 			break;
 		case NFSV4OPEN_WANTCANCEL:
 			printf("NFSv4: ignore Open WantCancel\n");
 			i &= ~NFSV4OPEN_WANTDELEGMASK;
 			break;
 		default:
 			/* nd_repstat will be set to NFSERR_INVAL below. */
 			break;
 		};
 	}
 	switch (i) {
 	case NFSV4OPEN_ACCESSREAD:
 		stp->ls_flags |= NFSLCK_READACCESS;
 		break;
 	case NFSV4OPEN_ACCESSWRITE:
 		stp->ls_flags |= NFSLCK_WRITEACCESS;
 		break;
 	case NFSV4OPEN_ACCESSBOTH:
 		stp->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 		break;
 	default:
 		nd->nd_repstat = NFSERR_INVAL;
 	};
 	i = fxdr_unsigned(int, *tl++);
 	switch (i) {
 	case NFSV4OPEN_DENYNONE:
 		break;
 	case NFSV4OPEN_DENYREAD:
 		stp->ls_flags |= NFSLCK_READDENY;
 		break;
 	case NFSV4OPEN_DENYWRITE:
 		stp->ls_flags |= NFSLCK_WRITEDENY;
 		break;
 	case NFSV4OPEN_DENYBOTH:
 		stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY);
 		break;
 	default:
 		nd->nd_repstat = NFSERR_INVAL;
 	};
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK7 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen);
 	if (error)
 		goto nfsmout;
 	NFSVNO_ATTRINIT(&nva);
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	create = fxdr_unsigned(int, *tl);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_getattr(dp, &dirfor, nd->nd_cred, p, 0);
 	if (create == NFSV4OPEN_CREATE) {
 		nva.na_type = VREG;
 		nva.na_mode = 0;
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		how = fxdr_unsigned(int, *tl);
 		switch (how) {
 		case NFSCREATE_UNCHECKED:
 		case NFSCREATE_GUARDED:
 			error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p);
 			if (error)
 				goto nfsmout;
 			/*
 			 * If the na_gid being set is the same as that of
 			 * the directory it is going in, clear it, since
 			 * that is what will be set by default. This allows
 			 * a user that isn't in that group to do the create.
 			 */
 			if (!nd->nd_repstat && NFSVNO_ISSETGID(&nva) &&
 			    nva.na_gid == dirfor.na_gid)
 				NFSVNO_UNSET(&nva, gid);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = nfsrv_checkuidgid(nd, &nva);
 			break;
 		case NFSCREATE_EXCLUSIVE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 			cverf[0] = *tl++;
 			cverf[1] = *tl;
 			break;
 		case NFSCREATE_EXCLUSIVE41:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 			cverf[0] = *tl++;
 			cverf[1] = *tl;
 			error = nfsv4_sattr(nd, vp, &nva, &attrbits, aclp, p);
 			if (error != 0)
 				goto nfsmout;
 			if (NFSISSET_ATTRBIT(&attrbits,
 			    NFSATTRBIT_TIMEACCESSSET))
 				nd->nd_repstat = NFSERR_INVAL;
 			/*
 			 * If the na_gid being set is the same as that of
 			 * the directory it is going in, clear it, since
 			 * that is what will be set by default. This allows
 			 * a user that isn't in that group to do the create.
 			 */
 			if (nd->nd_repstat == 0 && NFSVNO_ISSETGID(&nva) &&
 			    nva.na_gid == dirfor.na_gid)
 				NFSVNO_UNSET(&nva, gid);
 			if (nd->nd_repstat == 0)
 				nd->nd_repstat = nfsrv_checkuidgid(nd, &nva);
 			break;
 		default:
 			nd->nd_repstat = NFSERR_BADXDR;
 			goto nfsmout;
 		};
 	} else if (create != NFSV4OPEN_NOCREATE) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 
 	/*
 	 * Now, handle the claim, which usually includes looking up a
 	 * name in the directory referenced by dp. The exception is
 	 * NFSV4OPEN_CLAIMPREVIOUS.
 	 */
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	claim = fxdr_unsigned(int, *tl);
 	if (claim == NFSV4OPEN_CLAIMDELEGATECUR) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		NFSBCOPY((caddr_t)tl,(caddr_t)stateid.other,NFSX_STATEIDOTHER);
 		stp->ls_flags |= NFSLCK_DELEGCUR;
 	} else if (claim == NFSV4OPEN_CLAIMDELEGATEPREV) {
 		stp->ls_flags |= NFSLCK_DELEGPREV;
 	}
 	if (claim == NFSV4OPEN_CLAIMNULL || claim == NFSV4OPEN_CLAIMDELEGATECUR
 	    || claim == NFSV4OPEN_CLAIMDELEGATEPREV) {
 		if (!nd->nd_repstat && create == NFSV4OPEN_CREATE &&
 		    claim != NFSV4OPEN_CLAIMNULL)
 			nd->nd_repstat = NFSERR_INVAL;
 		if (nd->nd_repstat) {
 			nd->nd_repstat = nfsrv_opencheck(clientid,
 			    &stateid, stp, NULL, nd, p, nd->nd_repstat);
 			goto nfsmout;
 		}
 		if (create == NFSV4OPEN_CREATE)
 		    NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
 			LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
 		else
 		    NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP,
 			LOCKLEAF | SAVESTART);
 		nfsvno_setpathbuf(&named, &bufp, &hashp);
 		error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 		if (error) {
 			vrele(dp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 			acl_free(aclp);
 #endif
 			FREE((caddr_t)stp, M_NFSDSTATE);
 			nfsvno_relpathbuf(&named);
 			NFSEXITCODE2(error, nd);
 			return (error);
 		}
 		if (!nd->nd_repstat) {
 			nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp,
 			    p, &dirp);
 		} else {
 			vrele(dp);
 			nfsvno_relpathbuf(&named);
 		}
 		if (create == NFSV4OPEN_CREATE) {
 		    switch (how) {
 		    case NFSCREATE_UNCHECKED:
 			if (named.ni_vp) {
 				/*
 				 * Clear the setable attribute bits, except
 				 * for Size, if it is being truncated.
 				 */
 				NFSZERO_ATTRBIT(&attrbits);
 				if (NFSVNO_ISSETSIZE(&nva))
 					NFSSETBIT_ATTRBIT(&attrbits,
 					    NFSATTRBIT_SIZE);
 			}
 			break;
 		    case NFSCREATE_GUARDED:
 			if (named.ni_vp && !nd->nd_repstat)
 				nd->nd_repstat = EEXIST;
 			break;
 		    case NFSCREATE_EXCLUSIVE:
 			exclusive_flag = 1;
 			if (!named.ni_vp)
 				nva.na_mode = 0;
 			break;
 		    case NFSCREATE_EXCLUSIVE41:
 			exclusive_flag = 1;
 			break;
 		    };
 		}
 		nfsvno_open(nd, &named, clientid, &stateid, stp,
 		    &exclusive_flag, &nva, cverf, create, aclp, &attrbits,
 		    nd->nd_cred, p, exp, &vp);
 	} else if (claim == NFSV4OPEN_CLAIMPREVIOUS || claim ==
 	    NFSV4OPEN_CLAIMFH) {
 		if (claim == NFSV4OPEN_CLAIMPREVIOUS) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			i = fxdr_unsigned(int, *tl);
 			switch (i) {
 			case NFSV4OPEN_DELEGATEREAD:
 				stp->ls_flags |= NFSLCK_DELEGREAD;
 				break;
 			case NFSV4OPEN_DELEGATEWRITE:
 				stp->ls_flags |= NFSLCK_DELEGWRITE;
 			case NFSV4OPEN_DELEGATENONE:
 				break;
 			default:
 				nd->nd_repstat = NFSERR_BADXDR;
 				goto nfsmout;
 			};
 			stp->ls_flags |= NFSLCK_RECLAIM;
 		} else {
 			/* CLAIM_NULL_FH */
 			if (nd->nd_repstat == 0 && create == NFSV4OPEN_CREATE)
 				nd->nd_repstat = NFSERR_INVAL;
 		}
 		vp = dp;
 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		if ((vp->v_iflag & VI_DOOMED) == 0)
 			nd->nd_repstat = nfsrv_opencheck(clientid, &stateid,
 			    stp, vp, nd, p, nd->nd_repstat);
 		else
 			nd->nd_repstat = NFSERR_PERM;
 	} else {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 
 	/*
 	 * Do basic access checking.
 	 */
 	if (!nd->nd_repstat && vnode_vtype(vp) != VREG) {
 		/*
 		 * The IETF working group decided that this is the correct
 		 * error return for all non-regular files.
 		 */
 		nd->nd_repstat = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_SYMLINK;
 	}
 	if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_WRITEACCESS))
 	    nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred,
 	        exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL);
 	if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_READACCESS)) {
 	    nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred,
 	        exp, p, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL);
 	    if (nd->nd_repstat)
 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 		    nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	}
 
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1);
 		if (!nd->nd_repstat) {
 			tverf[0] = nva.na_atime.tv_sec;
 			tverf[1] = nva.na_atime.tv_nsec;
 		}
 	}
 	if (!nd->nd_repstat && exclusive_flag && (cverf[0] != tverf[0] ||
 	    cverf[1] != tverf[1]))
 		nd->nd_repstat = EEXIST;
 	/*
 	 * Do the open locking/delegation stuff.
 	 */
 	if (!nd->nd_repstat)
 	    nd->nd_repstat = nfsrv_openctrl(nd, vp, &stp, clientid, &stateid,
 		&delegstateid, &rflags, exp, p, nva.na_filerev);
 
 	/*
 	 * vp must be unlocked before the call to nfsvno_getattr(dirp,...)
 	 * below, to avoid a deadlock with the lookup in nfsvno_namei() above.
 	 * (ie: Leave the NFSVOPUNLOCK() about here.)
 	 */
 	if (vp)
 		NFSVOPUNLOCK(vp, 0);
 	if (stp)
 		FREE((caddr_t)stp, M_NFSDSTATE);
 	if (!nd->nd_repstat && dirp)
 		nd->nd_repstat = nfsvno_getattr(dirp, &diraft, nd->nd_cred, p,
 		    0);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 		if (claim == NFSV4OPEN_CLAIMPREVIOUS) {
 			*tl++ = newnfs_true;
 			*tl++ = 0;
 			*tl++ = 0;
 			*tl++ = 0;
 			*tl++ = 0;
 		} else {
 			*tl++ = newnfs_false;	/* Since dirp is not locked */
 			txdr_hyper(dirfor.na_filerev, tl);
 			tl += 2;
 			txdr_hyper(diraft.na_filerev, tl);
 			tl += 2;
 		}
 		*tl = txdr_unsigned(rflags & NFSV4OPEN_RFLAGS);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (rflags & NFSV4OPEN_READDELEGATE)
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATEREAD);
 		else if (rflags & NFSV4OPEN_WRITEDELEGATE)
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATEWRITE);
 		else if (retext != 0) {
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATENONEEXT);
 			if ((rflags & NFSV4OPEN_WDCONTENTION) != 0) {
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV4OPEN_CONTENTION);
 				*tl = newnfs_false;
 			} else if ((rflags & NFSV4OPEN_WDRESOURCE) != 0) {
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV4OPEN_RESOURCE);
 				*tl = newnfs_false;
 			} else {
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OPEN_NOTWANTED);
 			}
 		} else
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATENONE);
 		if (rflags & (NFSV4OPEN_READDELEGATE|NFSV4OPEN_WRITEDELEGATE)) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID+NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(delegstateid.seqid);
 			NFSBCOPY((caddr_t)delegstateid.other, (caddr_t)tl,
 			    NFSX_STATEIDOTHER);
 			tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 			if (rflags & NFSV4OPEN_RECALL)
 				*tl = newnfs_true;
 			else
 				*tl = newnfs_false;
 			if (rflags & NFSV4OPEN_WRITEDELEGATE) {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV4OPEN_LIMITSIZE);
 				txdr_hyper(nva.na_size, tl);
 			}
 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE);
 			*tl++ = txdr_unsigned(0x0);
 			acemask = NFSV4ACE_ALLFILESMASK;
 			if (nva.na_mode & S_IRUSR)
 			    acemask |= NFSV4ACE_READMASK;
 			if (nva.na_mode & S_IWUSR)
 			    acemask |= NFSV4ACE_WRITEMASK;
 			if (nva.na_mode & S_IXUSR)
 			    acemask |= NFSV4ACE_EXECUTEMASK;
 			*tl = txdr_unsigned(acemask);
 			(void) nfsm_strtom(nd, "OWNER@", 6);
 		}
 		*vpp = vp;
 	} else if (vp) {
 		vrele(vp);
 	}
 	if (dirp)
 		vrele(dirp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vrele(dp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 	if (stp)
 		FREE((caddr_t)stp, M_NFSDSTATE);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 close service
  */
 APPLESTATIC int
 nfsrvd_close(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	struct nfsstate st, *stp = &st;
 	int error = 0;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 	stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++);
 	stp->ls_ownerlen = 0;
 	stp->ls_op = nd->nd_rp;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 	    NFSX_STATEIDOTHER);
 	stp->ls_flags = NFSLCK_CLOSE;
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK8 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p);
 	vput(vp);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 	}
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 delegpurge service
  */
 APPLESTATIC int
 nfsrvd_delegpurge(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0;
 	nfsquad_t clientid;
 
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK9 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_delegupdate(nd, clientid, NULL, NULL,
 	    NFSV4OP_DELEGPURGE, nd->nd_cred, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 delegreturn service
  */
 APPLESTATIC int
 nfsrvd_delegreturn(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 	stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER);
 	clientid.lval[0] = stateid.other[0];
 	clientid.lval[1] = stateid.other[1];
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK10 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_delegupdate(nd, clientid, &stateid, vp,
 	    NFSV4OP_DELEGRETURN, nd->nd_cred, p);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 get file handle service
  */
 APPLESTATIC int
 nfsrvd_getfh(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	fhandle_t fh;
 
 	nd->nd_repstat = nfsvno_getfh(vp, &fh, p);
 	vput(vp);
 	if (!nd->nd_repstat)
 		(void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0);
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfsv4 open confirm service
  */
 APPLESTATIC int
 nfsrvd_openconfirm(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	struct nfsstate st, *stp = &st;
 	int error = 0;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
 	stp->ls_ownerlen = 0;
 	stp->ls_op = nd->nd_rp;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 	    NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 	stp->ls_seq = fxdr_unsigned(u_int32_t, *tl);
 	stp->ls_flags = NFSLCK_CONFIRM;
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK11 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 	}
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 open downgrade service
  */
 APPLESTATIC int
 nfsrvd_opendowngrade(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i;
 	struct nfsstate st, *stp = &st;
 	int error = 0;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 
 	/* opendowngrade can only work on a file object.*/
 	if (vp->v_type != VREG) {
 		error = NFSERR_INVAL;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
 	stp->ls_ownerlen = 0;
 	stp->ls_op = nd->nd_rp;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 	    NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 	stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++);
 	i = fxdr_unsigned(int, *tl++);
 	switch (i) {
 	case NFSV4OPEN_ACCESSREAD:
 		stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_DOWNGRADE);
 		break;
 	case NFSV4OPEN_ACCESSWRITE:
 		stp->ls_flags = (NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE);
 		break;
 	case NFSV4OPEN_ACCESSBOTH:
 		stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_WRITEACCESS |
 		    NFSLCK_DOWNGRADE);
 		break;
 	default:
 		nd->nd_repstat = NFSERR_BADXDR;
 	};
 	i = fxdr_unsigned(int, *tl);
 	switch (i) {
 	case NFSV4OPEN_DENYNONE:
 		break;
 	case NFSV4OPEN_DENYREAD:
 		stp->ls_flags |= NFSLCK_READDENY;
 		break;
 	case NFSV4OPEN_DENYWRITE:
 		stp->ls_flags |= NFSLCK_WRITEDENY;
 		break;
 	case NFSV4OPEN_DENYBOTH:
 		stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY);
 		break;
 	default:
 		nd->nd_repstat = NFSERR_BADXDR;
 	};
 
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK12 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid,
 		    nd, p);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 	}
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 renew lease service
  */
 APPLESTATIC int
 nfsrvd_renew(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0;
 	nfsquad_t clientid;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK13 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_RENEWOP|CLOPS_RENEW),
 	    NULL, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 security info service
  */
 APPLESTATIC int
 nfsrvd_secinfo(struct nfsrv_descript *nd, int isdgram,
     vnode_t dp, NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int len;
 	struct nameidata named;
 	vnode_t dirp = NULL, vp;
 	struct nfsrvfh fh;
 	struct nfsexstuff retnes;
 	u_int32_t *sizp;
 	int error = 0, savflag, i;
 	char *bufp;
 	u_long *hashp;
 
 	/*
 	 * All this just to get the export flags for the name.
 	 */
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP,
 	    LOCKLEAF | SAVESTART);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error) {
 		vput(dp);
 		nfsvno_relpathbuf(&named);
 		goto out;
 	}
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, p, &dirp);
 	} else {
 		vput(dp);
 		nfsvno_relpathbuf(&named);
 	}
 	if (dirp)
 		vrele(dirp);
 	if (nd->nd_repstat)
 		goto out;
 	vrele(named.ni_startdir);
 	nfsvno_relpathbuf(&named);
 	fh.nfsrvfh_len = NFSX_MYFH;
 	vp = named.ni_vp;
 	nd->nd_repstat = nfsvno_getfh(vp, (fhandle_t *)fh.nfsrvfh_data, p);
 	vput(vp);
 	savflag = nd->nd_flag;
 	if (!nd->nd_repstat) {
 		nfsd_fhtovp(nd, &fh, LK_SHARED, &vp, &retnes, NULL, 0, p);
 		if (vp)
 			vput(vp);
 	}
 	nd->nd_flag = savflag;
 	if (nd->nd_repstat)
 		goto out;
 
 	/*
 	 * Finally have the export flags for name, so we can create
 	 * the security info.
 	 */
 	len = 0;
 	NFSM_BUILD(sizp, u_int32_t *, NFSX_UNSIGNED);
 	for (i = 0; i < retnes.nes_numsecflavor; i++) {
 		if (retnes.nes_secflavors[i] == AUTH_SYS) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(RPCAUTH_UNIX);
 			len++;
 		} else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(RPCAUTH_GSS);
 			(void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 			    nfsgss_mechlist[KERBV_MECH].len);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 			*tl = txdr_unsigned(RPCAUTHGSS_SVCNONE);
 			len++;
 		} else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5I) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(RPCAUTH_GSS);
 			(void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 			    nfsgss_mechlist[KERBV_MECH].len);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 			*tl = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY);
 			len++;
 		} else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5P) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(RPCAUTH_GSS);
 			(void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 			    nfsgss_mechlist[KERBV_MECH].len);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 			*tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY);
 			len++;
 		}
 	}
 	*sizp = txdr_unsigned(len);
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 set client id service
  */
 APPLESTATIC int
 nfsrvd_setclientid(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i;
 	int error = 0, idlen;
 	struct nfsclient *clp = NULL;
 	struct sockaddr_in *rad;
 	u_char *verf, *ucp, *ucp2, addrbuf[24];
 	nfsquad_t clientid, confirm;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto out;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED);
 	verf = (u_char *)tl;
 	tl += (NFSX_VERF / NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *tl);
 	if (i > NFSV4_OPAQUELIMIT || i <= 0) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	idlen = i;
 	if (nd->nd_flag & ND_GSS)
 		i += nd->nd_princlen;
 	MALLOC(clp, struct nfsclient *, sizeof (struct nfsclient) + i,
 	    M_NFSDCLIENT, M_WAITOK);
 	NFSBZERO((caddr_t)clp, sizeof (struct nfsclient) + i);
 	NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx);
 	NFSSOCKADDRALLOC(clp->lc_req.nr_nam);
 	NFSSOCKADDRSIZE(clp->lc_req.nr_nam, sizeof (struct sockaddr_in));
 	clp->lc_req.nr_cred = NULL;
 	NFSBCOPY(verf, clp->lc_verf, NFSX_VERF);
 	clp->lc_idlen = idlen;
 	error = nfsrv_mtostr(nd, clp->lc_id, idlen);
 	if (error)
 		goto nfsmout;
 	if (nd->nd_flag & ND_GSS) {
 		clp->lc_flags = LCL_GSS;
 		if (nd->nd_flag & ND_GSSINTEGRITY)
 			clp->lc_flags |= LCL_GSSINTEGRITY;
 		else if (nd->nd_flag & ND_GSSPRIVACY)
 			clp->lc_flags |= LCL_GSSPRIVACY;
 	} else {
 		clp->lc_flags = 0;
 	}
 	if ((nd->nd_flag & ND_GSS) && nd->nd_princlen > 0) {
 		clp->lc_flags |= LCL_NAME;
 		clp->lc_namelen = nd->nd_princlen;
 		clp->lc_name = &clp->lc_id[idlen];
 		NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen);
 	} else {
 		clp->lc_uid = nd->nd_cred->cr_uid;
 		clp->lc_gid = nd->nd_cred->cr_gid;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	clp->lc_program = fxdr_unsigned(u_int32_t, *tl);
 	error = nfsrv_getclientipaddr(nd, clp);
 	if (error)
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	clp->lc_callback = fxdr_unsigned(u_int32_t, *tl);
 
 	/*
 	 * nfsrv_setclient() does the actual work of adding it to the
 	 * client list. If there is no error, the structure has been
 	 * linked into the client list and clp should no longer be used
 	 * here. When an error is returned, it has not been linked in,
 	 * so it should be free'd.
 	 */
 	nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p);
 	if (nd->nd_repstat == NFSERR_CLIDINUSE) {
 		if (clp->lc_flags & LCL_TCPCALLBACK)
 			(void) nfsm_strtom(nd, "tcp", 3);
 		else 
 			(void) nfsm_strtom(nd, "udp", 3);
 		rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *);
 		ucp = (u_char *)&rad->sin_addr.s_addr;
 		ucp2 = (u_char *)&rad->sin_port;
 		sprintf(addrbuf, "%d.%d.%d.%d.%d.%d", ucp[0] & 0xff,
 		    ucp[1] & 0xff, ucp[2] & 0xff, ucp[3] & 0xff,
 		    ucp2[0] & 0xff, ucp2[1] & 0xff);
 		(void) nfsm_strtom(nd, addrbuf, strlen(addrbuf));
 	}
 	if (clp) {
 		NFSSOCKADDRFREE(clp->lc_req.nr_nam);
 		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 		free((caddr_t)clp, M_NFSDCLIENT);
 	}
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER);
 		*tl++ = clientid.lval[0];
 		*tl++ = clientid.lval[1];
 		*tl++ = confirm.lval[0];
 		*tl = confirm.lval[1];
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	if (clp) {
 		NFSSOCKADDRFREE(clp->lc_req.nr_nam);
 		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 		free((caddr_t)clp, M_NFSDCLIENT);
 	}
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 set client id confirm service
  */
 APPLESTATIC int
 nfsrvd_setclientidcfrm(struct nfsrv_descript *nd,
     __unused int isdgram, __unused vnode_t vp, NFSPROC_T *p,
     __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0;
 	nfsquad_t clientid, confirm;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl++;
 	confirm.lval[0] = *tl++;
 	confirm.lval[1] = *tl;
 
 	/*
 	 * nfsrv_getclient() searches the client list for a match and
 	 * returns the appropriate NFSERR status.
 	 */
 	nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_CONFIRM|CLOPS_RENEW),
 	    NULL, NULL, confirm, 0, nd, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 verify service
  */
 APPLESTATIC int
 nfsrvd_verify(struct nfsrv_descript *nd, int isdgram,
     vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	int error = 0, ret, fhsize = NFSX_MYFH;
 	struct nfsvattr nva;
 	struct statfs sf;
 	struct nfsfsinfo fs;
 	fhandle_t fh;
 
 	nd->nd_repstat = nfsvno_getattr(vp, &nva, nd->nd_cred, p, 1);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_statfs(vp, &sf);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_getfh(vp, &fh, p);
 	if (!nd->nd_repstat) {
 		nfsvno_getfs(&fs, isdgram);
 		error = nfsv4_loadattr(nd, vp, &nva, NULL, &fh, fhsize, NULL,
 		    &sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, p, nd->nd_cred);
 		if (!error) {
 			if (nd->nd_procnum == NFSV4OP_NVERIFY) {
 				if (ret == 0)
 					nd->nd_repstat = NFSERR_SAME;
 				else if (ret != NFSERR_NOTSAME)
 					nd->nd_repstat = ret;
 			} else if (ret)
 				nd->nd_repstat = ret;
 		}
 	}
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs openattr rpc
  */
 APPLESTATIC int
 nfsrvd_openattr(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, __unused vnode_t *vpp, __unused fhandle_t *fhp,
     __unused NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0, createdir;
 
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	createdir = fxdr_unsigned(int, *tl);
 	nd->nd_repstat = NFSERR_NOTSUPP;
 nfsmout:
 	vrele(dp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 release lock owner service
  */
 APPLESTATIC int
 nfsrvd_releaselckown(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	struct nfsstate *stp = NULL;
 	int error = 0, len;
 	nfsquad_t clientid;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 	len = fxdr_unsigned(int, *(tl + 2));
 	if (len <= 0 || len > NFSV4_OPAQUELIMIT) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	MALLOC(stp, struct nfsstate *, sizeof (struct nfsstate) + len,
 	    M_NFSDSTATE, M_WAITOK);
 	stp->ls_ownerlen = len;
 	stp->ls_op = NULL;
 	stp->ls_flags = NFSLCK_RELEASE;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK14 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	error = nfsrv_mtostr(nd, stp->ls_owner, len);
 	if (error)
 		goto nfsmout;
 	nd->nd_repstat = nfsrv_releaselckown(stp, clientid, p);
 	FREE((caddr_t)stp, M_NFSDSTATE);
 
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	if (stp)
 		free((caddr_t)stp, M_NFSDSTATE);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 exchange_id service
  */
 APPLESTATIC int
 nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	int error = 0, i, idlen;
 	struct nfsclient *clp = NULL;
 	nfsquad_t clientid, confirm;
 	uint8_t *verf;
 	uint32_t sp4type, v41flags;
 	uint64_t owner_minor;
 	struct timespec verstime;
 
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED);
 	verf = (uint8_t *)tl;
 	tl += (NFSX_VERF / NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *tl);
 	if (i > NFSV4_OPAQUELIMIT || i <= 0) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	idlen = i;
 	if (nd->nd_flag & ND_GSS)
 		i += nd->nd_princlen;
 	clp = (struct nfsclient *)malloc(sizeof(struct nfsclient) + i,
 	    M_NFSDCLIENT, M_WAITOK | M_ZERO);
 	NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx);
 	NFSSOCKADDRALLOC(clp->lc_req.nr_nam);
 	NFSSOCKADDRSIZE(clp->lc_req.nr_nam, sizeof (struct sockaddr_in));
 	clp->lc_req.nr_cred = NULL;
 	NFSBCOPY(verf, clp->lc_verf, NFSX_VERF);
 	clp->lc_idlen = idlen;
 	error = nfsrv_mtostr(nd, clp->lc_id, idlen);
 	if (error != 0)
 		goto nfsmout;
 	if ((nd->nd_flag & ND_GSS) != 0) {
 		clp->lc_flags = LCL_GSS | LCL_NFSV41;
 		if ((nd->nd_flag & ND_GSSINTEGRITY) != 0)
 			clp->lc_flags |= LCL_GSSINTEGRITY;
 		else if ((nd->nd_flag & ND_GSSPRIVACY) != 0)
 			clp->lc_flags |= LCL_GSSPRIVACY;
 	} else
 		clp->lc_flags = LCL_NFSV41;
 	if ((nd->nd_flag & ND_GSS) != 0 && nd->nd_princlen > 0) {
 		clp->lc_flags |= LCL_NAME;
 		clp->lc_namelen = nd->nd_princlen;
 		clp->lc_name = &clp->lc_id[idlen];
 		NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen);
 	} else {
 		clp->lc_uid = nd->nd_cred->cr_uid;
 		clp->lc_gid = nd->nd_cred->cr_gid;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	v41flags = fxdr_unsigned(uint32_t, *tl++);
 	if ((v41flags & ~(NFSV4EXCH_SUPPMOVEDREFER | NFSV4EXCH_SUPPMOVEDMIGR |
 	    NFSV4EXCH_BINDPRINCSTATEID | NFSV4EXCH_MASKPNFS |
 	    NFSV4EXCH_UPDCONFIRMEDRECA)) != 0) {
 		nd->nd_repstat = NFSERR_INVAL;
 		goto nfsmout;
 	}
 	if ((v41flags & NFSV4EXCH_UPDCONFIRMEDRECA) != 0)
 		confirm.lval[1] = 1;
 	else
 		confirm.lval[1] = 0;
 	v41flags = NFSV4EXCH_USENONPNFS;
 	sp4type = fxdr_unsigned(uint32_t, *tl);
 	if (sp4type != NFSV4EXCH_SP4NONE) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 
 	/*
 	 * nfsrv_setclient() does the actual work of adding it to the
 	 * client list. If there is no error, the structure has been
 	 * linked into the client list and clp should no longer be used
 	 * here. When an error is returned, it has not been linked in,
 	 * so it should be free'd.
 	 */
 	nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p);
 	if (clp != NULL) {
 		NFSSOCKADDRFREE(clp->lc_req.nr_nam);
 		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 		free(clp, M_NFSDCLIENT);
 	}
 	if (nd->nd_repstat == 0) {
 		if (confirm.lval[1] != 0)
 			v41flags |= NFSV4EXCH_CONFIRMEDR;
 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED);
 		*tl++ = clientid.lval[0];			/* ClientID */
 		*tl++ = clientid.lval[1];
 		*tl++ = txdr_unsigned(confirm.lval[0]);		/* SequenceID */
 		*tl++ = txdr_unsigned(v41flags);		/* Exch flags */
 		*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);	/* No SSV */
 		owner_minor = 0;				/* Owner */
 		txdr_hyper(owner_minor, tl);			/* Minor */
 		(void)nfsm_strtom(nd, nd->nd_cred->cr_prison->pr_hostuuid,
 		    strlen(nd->nd_cred->cr_prison->pr_hostuuid)); /* Major */
 		NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(NFSX_UNSIGNED);
 		*tl++ = time_uptime;		/* Make scope a unique value. */
 		*tl = txdr_unsigned(1);
 		(void)nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
 		(void)nfsm_strtom(nd, version, strlen(version));
 		NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
 		verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
 		verstime.tv_nsec = 0;
 		txdr_nfsv4time(&verstime, tl);
 	}
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	if (clp != NULL) {
 		NFSSOCKADDRFREE(clp->lc_req.nr_nam);
 		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 		free(clp, M_NFSDCLIENT);
 	}
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 create session service
  */
 APPLESTATIC int
 nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	int error = 0;
 	nfsquad_t clientid, confirm;
 	struct nfsdsession *sep = NULL;
 	uint32_t rdmacnt;
 
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	sep = (struct nfsdsession *)malloc(sizeof(struct nfsdsession),
 	    M_NFSDSESSION, M_WAITOK | M_ZERO);
 	sep->sess_refcnt = 1;
 	mtx_init(&sep->sess_cbsess.nfsess_mtx, "nfscbsession", NULL, MTX_DEF);
 	NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl++;
 	confirm.lval[0] = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_crflags = fxdr_unsigned(uint32_t, *tl);
 	/* Persistent sessions and RDMA are not supported. */
 	sep->sess_crflags &= NFSV4CRSESS_CONNBACKCHAN;
 
 	/* Fore channel attributes. */
 	NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 	tl++;					/* Header pad always 0. */
 	sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_maxslots = fxdr_unsigned(uint32_t, *tl++);
 	if (sep->sess_maxslots > NFSV4_SLOTS)
 		sep->sess_maxslots = NFSV4_SLOTS;
 	rdmacnt = fxdr_unsigned(uint32_t, *tl);
 	if (rdmacnt > 1) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	} else if (rdmacnt == 1)
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 
 	/* Back channel attributes. */
 	NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 	tl++;					/* Header pad always 0. */
 	sep->sess_cbmaxreq = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_cbmaxresp = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_cbmaxrespcached = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_cbmaxops = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_cbsess.nfsess_foreslots = fxdr_unsigned(uint32_t, *tl++);
 	rdmacnt = fxdr_unsigned(uint32_t, *tl);
 	if (rdmacnt > 1) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	} else if (rdmacnt == 1)
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	sep->sess_cbprogram = fxdr_unsigned(uint32_t, *tl);
 
 	/*
 	 * nfsrv_getclient() searches the client list for a match and
 	 * returns the appropriate NFSERR status.
 	 */
 	nd->nd_repstat = nfsrv_getclient(clientid, CLOPS_CONFIRM | CLOPS_RENEW,
 	    NULL, sep, confirm, sep->sess_cbprogram, nd, p);
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
 		NFSBCOPY(sep->sess_sessionid, tl, NFSX_V4SESSIONID);
 		NFSM_BUILD(tl, uint32_t *, 18 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(confirm.lval[0]);	/* sequenceid */
 		*tl++ = txdr_unsigned(sep->sess_crflags);
 
 		/* Fore channel attributes. */
 		*tl++ = 0;
 		*tl++ = txdr_unsigned(sep->sess_maxreq);
 		*tl++ = txdr_unsigned(sep->sess_maxresp);
 		*tl++ = txdr_unsigned(sep->sess_maxrespcached);
 		*tl++ = txdr_unsigned(sep->sess_maxops);
 		*tl++ = txdr_unsigned(sep->sess_maxslots);
 		*tl++ = txdr_unsigned(1);
 		*tl++ = txdr_unsigned(0);			/* No RDMA. */
 
 		/* Back channel attributes. */
 		*tl++ = 0;
 		*tl++ = txdr_unsigned(sep->sess_cbmaxreq);
 		*tl++ = txdr_unsigned(sep->sess_cbmaxresp);
 		*tl++ = txdr_unsigned(sep->sess_cbmaxrespcached);
 		*tl++ = txdr_unsigned(sep->sess_cbmaxops);
 		*tl++ = txdr_unsigned(sep->sess_cbsess.nfsess_foreslots);
 		*tl++ = txdr_unsigned(1);
 		*tl = txdr_unsigned(0);			/* No RDMA. */
 	}
 nfsmout:
 	if (nd->nd_repstat != 0 && sep != NULL)
 		free(sep, M_NFSDSESSION);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 sequence service
  */
 APPLESTATIC int
 nfsrvd_sequence(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	uint32_t highest_slotid, sequenceid, sflags, target_highest_slotid;
 	int cache_this, error = 0;
 
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID);
 	NFSBCOPY(tl, nd->nd_sessionid, NFSX_V4SESSIONID);
 	NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 	sequenceid = fxdr_unsigned(uint32_t, *tl++);
 	nd->nd_slotid = fxdr_unsigned(uint32_t, *tl++);
 	highest_slotid = fxdr_unsigned(uint32_t, *tl++);
 	if (*tl == newnfs_true)
 		cache_this = 1;
 	else
 		cache_this = 0;
 	nd->nd_flag |= ND_HASSEQUENCE;
 	nd->nd_repstat = nfsrv_checksequence(nd, sequenceid, &highest_slotid,
 	    &target_highest_slotid, cache_this, &sflags, p);
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
 		NFSBCOPY(nd->nd_sessionid, tl, NFSX_V4SESSIONID);
 		NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(sequenceid);
 		*tl++ = txdr_unsigned(nd->nd_slotid);
 		*tl++ = txdr_unsigned(highest_slotid);
 		*tl++ = txdr_unsigned(target_highest_slotid);
 		*tl = txdr_unsigned(sflags);
 	}
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 reclaim complete service
  */
 APPLESTATIC int
 nfsrvd_reclaimcomplete(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	int error = 0;
 
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	if (*tl == newnfs_true)
 		nd->nd_repstat = NFSERR_NOTSUPP;
 	else
 		nd->nd_repstat = nfsrv_checkreclaimcomplete(nd);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 destroy clientid service
  */
 APPLESTATIC int
 nfsrvd_destroyclientid(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	nfsquad_t clientid;
 	int error = 0;
 
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	nd->nd_repstat = nfsrv_destroyclient(clientid, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 destroy session service
  */
 APPLESTATIC int
 nfsrvd_destroysession(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	uint8_t *cp, sessid[NFSX_V4SESSIONID];
 	int error = 0;
 
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(cp, uint8_t *, NFSX_V4SESSIONID);
 	NFSBCOPY(cp, sessid, NFSX_V4SESSIONID);
 	nd->nd_repstat = nfsrv_destroysession(nd, sessid);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 free stateid service
  */
 APPLESTATIC int
 nfsrvd_freestateid(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	nfsv4stateid_t stateid;
 	int error = 0;
 
 	if (nfs_rootfhset == 0 || nfsd_checkrootexp(nd) != 0) {
 		nd->nd_repstat = NFSERR_WRONGSEC;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
 	stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
 	NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
 	nd->nd_repstat = nfsrv_freestateid(nd, &stateid, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 service not supported
  */
 APPLESTATIC int
 nfsrvd_notsupp(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused NFSPROC_T *p, __unused struct nfsexstuff *exp)
 {
 
 	nd->nd_repstat = NFSERR_NOTSUPP;
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
Index: user/ngie/more-tests/sys/modules/ext2fs/Makefile
===================================================================
--- user/ngie/more-tests/sys/modules/ext2fs/Makefile	(revision 281675)
+++ user/ngie/more-tests/sys/modules/ext2fs/Makefile	(revision 281676)
@@ -1,10 +1,10 @@
 # $FreeBSD$
 
 .PATH:	${.CURDIR}/../../fs/ext2fs
 KMOD=	ext2fs
 SRCS=	opt_ddb.h opt_directio.h opt_quota.h opt_suiddir.h vnode_if.h \
-	ext2_alloc.c ext2_balloc.c ext2_bmap.c ext2_extents.c ext2_hash.c \
-	ext2_htree.c ext2_inode.c ext2_inode_cnv.c ext2_lookup.c ext2_subr.c \
+	ext2_alloc.c ext2_balloc.c ext2_bmap.c ext2_extents.c \
+	ext2_inode.c ext2_inode_cnv.c ext2_lookup.c ext2_subr.c \
 	ext2_vfsops.c ext2_vnops.c
 
 .include <bsd.kmod.mk>
Index: user/ngie/more-tests/sys/modules/usb/Makefile
===================================================================
--- user/ngie/more-tests/sys/modules/usb/Makefile	(revision 281675)
+++ user/ngie/more-tests/sys/modules/usb/Makefile	(revision 281676)
@@ -1,109 +1,110 @@
 #
 # $FreeBSD$
 #
 # Copyright (c) 2008 Hans Petter Selasky. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 
 SYSDIR?=${.CURDIR}/../..
 .include "${SYSDIR}/conf/kern.opts.mk"
 
 #
 # Allow USB modules to be built in parallel
 #
 SUBDIR_PARALLEL=
 
 #
 # Check for common USB debug flags to pass when building the USB
 # modules in this directory:
 #
 .if defined(USB_DEBUG)
 MAKE+=" DEBUG_FLAGS+=-DUSB_DEBUG"
 .endif
 
 .if defined(USB_DEBUG) && defined(USB_REQ_DEBUG)
 MAKE+=" DEBUG_FLAGS+=-DUSB_REQ_DEBUG"
 .endif
 
 # Modules that include binary-only blobs of microcode should be selectable by
 # MK_SOURCELESS_UCODE option (see below).
 
 SUBDIR = usb
 SUBDIR += ${_dwc_otg} ehci ${_musb} ohci uhci xhci ${_uss820dci} ${_at91dci} \
 	  ${_atmegadci} ${_avr32dci} ${_rsu} ${_rsufw} ${_saf1761otg}
 SUBDIR += ${_rum} ${_run} ${_runfw} ${_uath} upgt usie ural ${_zyd} ${_urtw} 
 SUBDIR += ${_urtwn} ${_urtwnfw}
 SUBDIR += atp uhid ukbd ums udbp ufm uep wsp uled
 SUBDIR += ucom u3g uark ubsa ubser uchcom ucycom ufoma uftdi ugensa uipaq ulpt \
 	  umct umcs umodem umoscom uplcom uslcom uvisor uvscom
+SUBDIR += udl
 SUBDIR += uether aue axe axge cdce cue ${_kue} mos rue smsc udav uhso ipheth
 SUBDIR += urndis
 SUBDIR += usfs umass urio
 SUBDIR += quirk template
 SUBDIR += ${_g_audio} ${_g_keyboard} ${_g_modem} ${_g_mouse}
 
 .if ${MK_USB_GADGET_EXAMPLES} == "yes"
 _g_audio=	g_audio
 _g_keyboard=	g_keyboard
 _g_modem=	g_modem
 _g_mouse=	g_mouse
 .endif
 
 .if ${MK_SOURCELESS_UCODE} != "no"
 _rum=		rum
 _uath=		uath
 _zyd=		zyd
 _kue=		kue
 _urtwn=		urtwn
 _urtwnfw=	urtwnfw
 _run=		run
 _runfw=		runfw
 _rsu=		rsu
 _rsufw=		rsufw
 .endif
 
 .if ${MACHINE_CPUARCH} == "amd64"
 _urtw=		urtw
 .endif
 
 .if ${MACHINE_CPUARCH} == "arm"
 _at91dci=	at91dci
 _atmegadci=	atmegadci
 _dwc_otg=	dwc_otg
 _musb=		musb
 _uss820dci=	uss820dci
 .endif
 
 .if ${MACHINE_CPUARCH} == "i386"
 _urtw=		urtw
 .endif
 
 .if ${MACHINE_CPUARCH} == "avr32"
 _avr32dci=	avr32dci
 .endif
 
 .if ${MACHINE_CPUARCH} == "mips"
 _saf1761otg=	saf1761otg
 .endif
 
 .include <bsd.subdir.mk>
Index: user/ngie/more-tests/sys/net/altq/altqconf.h
===================================================================
--- user/ngie/more-tests/sys/net/altq/altqconf.h	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altqconf.h	(nonexistent)
@@ -1,29 +0,0 @@
-/*	$OpenBSD: altqconf.h,v 1.1 2001/06/27 05:28:36 kjc Exp $	*/
-/*	$NetBSD: altqconf.h,v 1.2 2001/05/30 11:57:16 mrg Exp $	*/
-
-#if defined(_KERNEL_OPT) || defined(__OpenBSD__)
-
-#if defined(_KERNEL_OPT)
-#include "opt_altq_enabled.h"
-#endif
-
-#include <sys/conf.h>
-
-#ifdef ALTQ
-#define	NALTQ	1
-#else
-#define	NALTQ	0
-#endif
-
-cdev_decl(altq);
-
-#ifdef __OpenBSD__
-#define cdev_altq_init(c,n) { \
-	dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
-	(dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
-	(dev_type_stop((*))) enodev, 0, (dev_type_select((*))) enodev, \
-	(dev_type_mmap((*))) enodev }
-#else
-#define	cdev_altq_init(x,y)	cdev__oci_init(x,y)
-#endif
-#endif /* defined(_KERNEL_OPT) || defined(__OpenBSD__) */

Property changes on: user/ngie/more-tests/sys/net/altq/altqconf.h
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: user/ngie/more-tests/sys/net/altq/altq.h
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq.h	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq.h	(revision 281676)
@@ -1,204 +1,204 @@
-/*	$FreeBSD$	*/
-/*	$KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $	*/
-
-/*
+/*-
  * Copyright (C) 1998-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $KAME: altq.h,v 1.10 2003/07/10 12:07:47 kjc Exp $
+ * $FreeBSD$
  */
 #ifndef _ALTQ_ALTQ_H_
 #define	_ALTQ_ALTQ_H_
 
 #if 0
 /*
  * allow altq-3 (altqd(8) and /dev/altq) to coexist with the new pf-based altq.
  * altq3 is mainly for research experiments. pf-based altq is for daily use.
  */
 #define ALTQ3_COMPAT		/* for compatibility with altq-3 */
 #define ALTQ3_CLFIER_COMPAT	/* for compatibility with altq-3 classifier */
 #endif
 
 #ifdef ALTQ3_COMPAT
 #include <sys/param.h>
 #include <sys/ioccom.h>
 #include <sys/queue.h>
 #include <netinet/in.h>
 
 #ifndef IFNAMSIZ
 #define	IFNAMSIZ	16
 #endif
 #endif /* ALTQ3_COMPAT */
 
 /* altq discipline type */
 #define	ALTQT_NONE		0	/* reserved */
 #define	ALTQT_CBQ		1	/* cbq */
 #define	ALTQT_WFQ		2	/* wfq */
 #define	ALTQT_AFMAP		3	/* afmap */
 #define	ALTQT_FIFOQ		4	/* fifoq */
 #define	ALTQT_RED		5	/* red */
 #define	ALTQT_RIO		6	/* rio */
 #define	ALTQT_LOCALQ		7	/* local use */
 #define	ALTQT_HFSC		8	/* hfsc */
 #define	ALTQT_CDNR		9	/* traffic conditioner */
 #define	ALTQT_BLUE		10	/* blue */
 #define	ALTQT_PRIQ		11	/* priority queue */
 #define	ALTQT_JOBS		12	/* JoBS */
 #define	ALTQT_MAX		13	/* should be max discipline type + 1 */
 
 #ifdef ALTQ3_COMPAT
 struct	altqreq {
 	char	ifname[IFNAMSIZ];	/* if name, e.g. "en0" */
 	u_long	arg;			/* request-specific argument */
 };
 #endif
 
 /* simple token backet meter profile */
 struct	tb_profile {
 	u_int	rate;	/* rate in bit-per-sec */
 	u_int	depth;	/* depth in bytes */
 };
 
 #ifdef ALTQ3_COMPAT
 struct	tbrreq {
 	char	ifname[IFNAMSIZ];	/* if name, e.g. "en0" */
 	struct	tb_profile tb_prof;	/* token bucket profile */
 };
 
 #ifdef ALTQ3_CLFIER_COMPAT
 /*
  * common network flow info structure
  */
 struct flowinfo {
 	u_char		fi_len;		/* total length */
 	u_char		fi_family;	/* address family */
 	u_int8_t	fi_data[46];	/* actually longer; address family
 					   specific flow info. */
 };
 
 /*
  * flow info structure for internet protocol family.
  * (currently this is the only protocol family supported)
  */
 struct flowinfo_in {
 	u_char		fi_len;		/* sizeof(struct flowinfo_in) */
 	u_char		fi_family;	/* AF_INET */
 	u_int8_t	fi_proto;	/* IPPROTO_XXX */
 	u_int8_t	fi_tos;		/* type-of-service */
 	struct in_addr	fi_dst;		/* dest address */
 	struct in_addr	fi_src;		/* src address */
 	u_int16_t	fi_dport;	/* dest port */
 	u_int16_t	fi_sport;	/* src port */
 	u_int32_t	fi_gpi;		/* generalized port id for ipsec */
 	u_int8_t	_pad[28];	/* make the size equal to
 					   flowinfo_in6 */
 };
 
 #ifdef SIN6_LEN
 struct flowinfo_in6 {
 	u_char		fi6_len;	/* sizeof(struct flowinfo_in6) */
 	u_char		fi6_family;	/* AF_INET6 */
 	u_int8_t	fi6_proto;	/* IPPROTO_XXX */
 	u_int8_t	fi6_tclass;	/* traffic class */
 	u_int32_t	fi6_flowlabel;	/* ipv6 flowlabel */
 	u_int16_t	fi6_dport;	/* dest port */
 	u_int16_t	fi6_sport;	/* src port */
 	u_int32_t	fi6_gpi;	/* generalized port id */
 	struct in6_addr fi6_dst;	/* dest address */
 	struct in6_addr fi6_src;	/* src address */
 };
 #endif /* INET6 */
 
 /*
  * flow filters for AF_INET and AF_INET6
  */
 struct flow_filter {
 	int			ff_ruleno;
 	struct flowinfo_in	ff_flow;
 	struct {
 		struct in_addr	mask_dst;
 		struct in_addr	mask_src;
 		u_int8_t	mask_tos;
 		u_int8_t	_pad[3];
 	} ff_mask;
 	u_int8_t _pad2[24];	/* make the size equal to flow_filter6 */
 };
 
 #ifdef SIN6_LEN
 struct flow_filter6 {
 	int			ff_ruleno;
 	struct flowinfo_in6	ff_flow6;
 	struct {
 		struct in6_addr	mask6_dst;
 		struct in6_addr	mask6_src;
 		u_int8_t	mask6_tclass;
 		u_int8_t	_pad[3];
 	} ff_mask6;
 };
 #endif /* INET6 */
 #endif /* ALTQ3_CLFIER_COMPAT */
 #endif /* ALTQ3_COMPAT */
 
 /*
  * generic packet counter
  */
 struct pktcntr {
 	u_int64_t	packets;
 	u_int64_t	bytes;
 };
 
 #define	PKTCNTR_ADD(cntr, len)	\
 	do { (cntr)->packets++; (cntr)->bytes += len; } while (/*CONSTCOND*/ 0)
 
 #ifdef ALTQ3_COMPAT
 /*
  * altq related ioctls
  */
 #define	ALTQGTYPE	_IOWR('q', 0, struct altqreq)	/* get queue type */
 #if 0
 /*
  * these ioctls are currently discipline-specific but could be shared
  * in the future.
  */
 #define	ALTQATTACH	_IOW('q', 1, struct altqreq)	/* attach discipline */
 #define	ALTQDETACH	_IOW('q', 2, struct altqreq)	/* detach discipline */
 #define	ALTQENABLE	_IOW('q', 3, struct altqreq)	/* enable discipline */
 #define	ALTQDISABLE	_IOW('q', 4, struct altqreq)	/* disable discipline*/
 #define	ALTQCLEAR	_IOW('q', 5, struct altqreq)	/* (re)initialize */
 #define	ALTQCONFIG	_IOWR('q', 6, struct altqreq)	/* set config params */
 #define	ALTQADDCLASS	_IOWR('q', 7, struct altqreq)	/* add a class */
 #define	ALTQMODCLASS	_IOWR('q', 8, struct altqreq)	/* modify a class */
 #define	ALTQDELCLASS	_IOWR('q', 9, struct altqreq)	/* delete a class */
 #define	ALTQADDFILTER	_IOWR('q', 10, struct altqreq)	/* add a filter */
 #define	ALTQDELFILTER	_IOWR('q', 11, struct altqreq)	/* delete a filter */
 #define	ALTQGETSTATS	_IOWR('q', 12, struct altqreq)	/* get statistics */
 #define	ALTQGETCNTR	_IOWR('q', 13, struct altqreq)	/* get a pkt counter */
 #endif /* 0 */
 #define	ALTQTBRSET	_IOW('q', 14, struct tbrreq)	/* set tb regulator */
 #define	ALTQTBRGET	_IOWR('q', 15, struct tbrreq)	/* get tb regulator */
 #endif /* ALTQ3_COMPAT */
 
 #ifdef _KERNEL
 #include <net/altq/altq_var.h>
 #endif
 
 #endif /* _ALTQ_ALTQ_H_ */
Index: user/ngie/more-tests/sys/net/altq/altq_cbq.c
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_cbq.c	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_cbq.c	(revision 281676)
@@ -1,1173 +1,1165 @@
-/*	$FreeBSD$	*/
-/*	$KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $	*/
-
-/*
+/*-
  * Copyright (c) Sun Microsystems, Inc. 1993-1998 All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by the SMCC Technology
  *      Development Group at Sun Microsystems, Inc.
  *
  * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
  *      promote products derived from this software without specific prior
  *      written permission.
  *
  * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
  * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
  * provided "as is" without express or implied warranty of any kind.
  *
  * These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_cbq.c,v 1.19 2003/09/17 14:23:25 kjc Exp $
+ * $FreeBSD$
  */
 
-#if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
-#ifdef __FreeBSD__
 #include "opt_inet6.h"
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
 #ifdef ALTQ_CBQ	/* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #ifdef ALTQ3_COMPAT
 #include <sys/uio.h>
 #include <sys/kernel.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <netinet/in.h>
 
 #include <netpfil/pf/pf.h>
 #include <netpfil/pf/pf_altq.h>
 #include <netpfil/pf/pf_mtag.h>
 #include <net/altq/altq.h>
 #include <net/altq/altq_cbq.h>
 #ifdef ALTQ3_COMPAT
 #include <net/altq/altq_conf.h>
 #endif
 
 #ifdef ALTQ3_COMPAT
 /*
  * Local Data structures.
  */
 static cbq_state_t *cbq_list = NULL;
 #endif
 
 /*
  * Forward Declarations.
  */
 static int		 cbq_class_destroy(cbq_state_t *, struct rm_class *);
 static struct rm_class  *clh_to_clp(cbq_state_t *, u_int32_t);
 static int		 cbq_clear_interface(cbq_state_t *);
 static int		 cbq_request(struct ifaltq *, int, void *);
 static int		 cbq_enqueue(struct ifaltq *, struct mbuf *,
 			     struct altq_pktattr *);
 static struct mbuf	*cbq_dequeue(struct ifaltq *, int);
 static void		 cbqrestart(struct ifaltq *);
 static void		 get_class_stats(class_stats_t *, struct rm_class *);
 static void		 cbq_purge(cbq_state_t *);
 #ifdef ALTQ3_COMPAT
 static int	cbq_add_class(struct cbq_add_class *);
 static int	cbq_delete_class(struct cbq_delete_class *);
 static int	cbq_modify_class(struct cbq_modify_class *);
 static int 	cbq_class_create(cbq_state_t *, struct cbq_add_class *,
 				 struct rm_class *, struct rm_class *);
 static int	cbq_clear_hierarchy(struct cbq_interface *);
 static int	cbq_set_enable(struct cbq_interface *, int);
 static int	cbq_ifattach(struct cbq_interface *);
 static int	cbq_ifdetach(struct cbq_interface *);
 static int 	cbq_getstats(struct cbq_getstats *);
 
 static int	cbq_add_filter(struct cbq_add_filter *);
 static int	cbq_delete_filter(struct cbq_delete_filter *);
 #endif /* ALTQ3_COMPAT */
 
 /*
  * int
  * cbq_class_destroy(cbq_mod_state_t *, struct rm_class *) - This
  *	function destroys a given traffic class.  Before destroying
  *	the class, all traffic for that class is released.
  */
 static int
 cbq_class_destroy(cbq_state_t *cbqp, struct rm_class *cl)
 {
 	int	i;
 
 	/* delete the class */
 	rmc_delete_class(&cbqp->ifnp, cl);
 
 	/*
 	 * free the class handle
 	 */
 	for (i = 0; i < CBQ_MAX_CLASSES; i++)
 		if (cbqp->cbq_class_tbl[i] == cl)
 			cbqp->cbq_class_tbl[i] = NULL;
 
 	if (cl == cbqp->ifnp.root_)
 		cbqp->ifnp.root_ = NULL;
 	if (cl == cbqp->ifnp.default_)
 		cbqp->ifnp.default_ = NULL;
 #ifdef ALTQ3_COMPAT
 	if (cl == cbqp->ifnp.ctl_)
 		cbqp->ifnp.ctl_ = NULL;
 #endif
 	return (0);
 }
 
 /* convert class handle to class pointer */
 static struct rm_class *
 clh_to_clp(cbq_state_t *cbqp, u_int32_t chandle)
 {
 	int i;
 	struct rm_class *cl;
 
 	if (chandle == 0)
 		return (NULL);
 	/*
 	 * first, try optimistically the slot matching the lower bits of
 	 * the handle.  if it fails, do the linear table search.
 	 */
 	i = chandle % CBQ_MAX_CLASSES;
 	if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
 	    cl->stats_.handle == chandle)
 		return (cl);
 	for (i = 0; i < CBQ_MAX_CLASSES; i++)
 		if ((cl = cbqp->cbq_class_tbl[i]) != NULL &&
 		    cl->stats_.handle == chandle)
 			return (cl);
 	return (NULL);
 }
 
 static int
 cbq_clear_interface(cbq_state_t *cbqp)
 {
 	int		 again, i;
 	struct rm_class	*cl;
 
 #ifdef ALTQ3_CLFIER_COMPAT
 	/* free the filters for this interface */
 	acc_discard_filters(&cbqp->cbq_classifier, NULL, 1);
 #endif
 
 	/* clear out the classes now */
 	do {
 		again = 0;
 		for (i = 0; i < CBQ_MAX_CLASSES; i++) {
 			if ((cl = cbqp->cbq_class_tbl[i]) != NULL) {
 				if (is_a_parent_class(cl))
 					again++;
 				else {
 					cbq_class_destroy(cbqp, cl);
 					cbqp->cbq_class_tbl[i] = NULL;
 					if (cl == cbqp->ifnp.root_)
 						cbqp->ifnp.root_ = NULL;
 					if (cl == cbqp->ifnp.default_)
 						cbqp->ifnp.default_ = NULL;
 #ifdef ALTQ3_COMPAT
 					if (cl == cbqp->ifnp.ctl_)
 						cbqp->ifnp.ctl_ = NULL;
 #endif
 				}
 			}
 		}
 	} while (again);
 
 	return (0);
 }
 
 static int
 cbq_request(struct ifaltq *ifq, int req, void *arg)
 {
 	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	switch (req) {
 	case ALTRQ_PURGE:
 		cbq_purge(cbqp);
 		break;
 	}
 	return (0);
 }
 
 /* copy the stats info in rm_class to class_states_t */
 static void
 get_class_stats(class_stats_t *statsp, struct rm_class *cl)
 {
 	statsp->xmit_cnt	= cl->stats_.xmit_cnt;
 	statsp->drop_cnt	= cl->stats_.drop_cnt;
 	statsp->over		= cl->stats_.over;
 	statsp->borrows		= cl->stats_.borrows;
 	statsp->overactions	= cl->stats_.overactions;
 	statsp->delays		= cl->stats_.delays;
 
 	statsp->depth		= cl->depth_;
 	statsp->priority	= cl->pri_;
 	statsp->maxidle		= cl->maxidle_;
 	statsp->minidle		= cl->minidle_;
 	statsp->offtime		= cl->offtime_;
 	statsp->qmax		= qlimit(cl->q_);
 	statsp->ns_per_byte	= cl->ns_per_byte_;
 	statsp->wrr_allot	= cl->w_allotment_;
 	statsp->qcnt		= qlen(cl->q_);
 	statsp->avgidle		= cl->avgidle_;
 
 	statsp->qtype		= qtype(cl->q_);
 #ifdef ALTQ_RED
 	if (q_is_red(cl->q_))
 		red_getstats(cl->red_, &statsp->red[0]);
 #endif
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->q_))
 		rio_getstats((rio_t *)cl->red_, &statsp->red[0]);
 #endif
 }
 
 int
 cbq_pfattach(struct pf_altq *a)
 {
 	struct ifnet	*ifp;
 	int		 s, error;
 
 	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
 		return (EINVAL);
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	error = altq_attach(&ifp->if_snd, ALTQT_CBQ, a->altq_disc,
 	    cbq_enqueue, cbq_dequeue, cbq_request, NULL, NULL);
 	splx(s);
 	return (error);
 }
 
 int
 cbq_add_altq(struct pf_altq *a)
 {
 	cbq_state_t	*cbqp;
 	struct ifnet	*ifp;
 
 	if ((ifp = ifunit(a->ifname)) == NULL)
 		return (EINVAL);
 	if (!ALTQ_IS_READY(&ifp->if_snd))
 		return (ENODEV);
 
 	/* allocate and initialize cbq_state_t */
 	cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cbqp == NULL)
 		return (ENOMEM);
 	CALLOUT_INIT(&cbqp->cbq_callout);
 	cbqp->cbq_qlen = 0;
 	cbqp->ifnp.ifq_ = &ifp->if_snd;	    /* keep the ifq */
 
 	/* keep the state in pf_altq */
 	a->altq_disc = cbqp;
 
 	return (0);
 }
 
 int
 cbq_remove_altq(struct pf_altq *a)
 {
 	cbq_state_t	*cbqp;
 
 	if ((cbqp = a->altq_disc) == NULL)
 		return (EINVAL);
 	a->altq_disc = NULL;
 
 	cbq_clear_interface(cbqp);
 
 	if (cbqp->ifnp.default_)
 		cbq_class_destroy(cbqp, cbqp->ifnp.default_);
 	if (cbqp->ifnp.root_)
 		cbq_class_destroy(cbqp, cbqp->ifnp.root_);
 
 	/* deallocate cbq_state_t */
 	free(cbqp, M_DEVBUF);
 
 	return (0);
 }
 
 int
 cbq_add_queue(struct pf_altq *a)
 {
 	struct rm_class	*borrow, *parent;
 	cbq_state_t	*cbqp;
 	struct rm_class	*cl;
 	struct cbq_opts	*opts;
 	int		i;
 
 	if ((cbqp = a->altq_disc) == NULL)
 		return (EINVAL);
 	if (a->qid == 0)
 		return (EINVAL);
 
 	/*
 	 * find a free slot in the class table.  if the slot matching
 	 * the lower bits of qid is free, use this slot.  otherwise,
 	 * use the first free slot.
 	 */
 	i = a->qid % CBQ_MAX_CLASSES;
 	if (cbqp->cbq_class_tbl[i] != NULL) {
 		for (i = 0; i < CBQ_MAX_CLASSES; i++)
 			if (cbqp->cbq_class_tbl[i] == NULL)
 				break;
 		if (i == CBQ_MAX_CLASSES)
 			return (EINVAL);
 	}
 
 	opts = &a->pq_u.cbq_opts;
 	/* check parameters */
 	if (a->priority >= CBQ_MAXPRI)
 		return (EINVAL);
 
 	/* Get pointers to parent and borrow classes.  */
 	parent = clh_to_clp(cbqp, a->parent_qid);
 	if (opts->flags & CBQCLF_BORROW)
 		borrow = parent;
 	else
 		borrow = NULL;
 
 	/*
 	 * A class must borrow from it's parent or it can not
 	 * borrow at all.  Hence, borrow can be null.
 	 */
 	if (parent == NULL && (opts->flags & CBQCLF_ROOTCLASS) == 0) {
 		printf("cbq_add_queue: no parent class!\n");
 		return (EINVAL);
 	}
 
 	if ((borrow != parent)  && (borrow != NULL)) {
 		printf("cbq_add_class: borrow class != parent\n");
 		return (EINVAL);
 	}
 
 	/*
 	 * check parameters
 	 */
 	switch (opts->flags & CBQCLF_CLASSMASK) {
 	case CBQCLF_ROOTCLASS:
 		if (parent != NULL)
 			return (EINVAL);
 		if (cbqp->ifnp.root_)
 			return (EINVAL);
 		break;
 	case CBQCLF_DEFCLASS:
 		if (cbqp->ifnp.default_)
 			return (EINVAL);
 		break;
 	case 0:
 		if (a->qid == 0)
 			return (EINVAL);
 		break;
 	default:
 		/* more than two flags bits set */
 		return (EINVAL);
 	}
 
 	/*
 	 * create a class.  if this is a root class, initialize the
 	 * interface.
 	 */
 	if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
 		rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, opts->ns_per_byte,
 		    cbqrestart, a->qlimit, RM_MAXQUEUED,
 		    opts->maxidle, opts->minidle, opts->offtime,
 		    opts->flags);
 		cl = cbqp->ifnp.root_;
 	} else {
 		cl = rmc_newclass(a->priority,
 				  &cbqp->ifnp, opts->ns_per_byte,
 				  rmc_delay_action, a->qlimit, parent, borrow,
 				  opts->maxidle, opts->minidle, opts->offtime,
 				  opts->pktsize, opts->flags);
 	}
 	if (cl == NULL)
 		return (ENOMEM);
 
 	/* return handle to user space. */
 	cl->stats_.handle = a->qid;
 	cl->stats_.depth = cl->depth_;
 
 	/* save the allocated class */
 	cbqp->cbq_class_tbl[i] = cl;
 
 	if ((opts->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
 		cbqp->ifnp.default_ = cl;
 
 	return (0);
 }
 
 int
 cbq_remove_queue(struct pf_altq *a)
 {
 	struct rm_class	*cl;
 	cbq_state_t	*cbqp;
 	int		i;
 
 	if ((cbqp = a->altq_disc) == NULL)
 		return (EINVAL);
 
 	if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
 		return (EINVAL);
 
 	/* if we are a parent class, then return an error. */
 	if (is_a_parent_class(cl))
 		return (EINVAL);
 
 	/* delete the class */
 	rmc_delete_class(&cbqp->ifnp, cl);
 
 	/*
 	 * free the class handle
 	 */
 	for (i = 0; i < CBQ_MAX_CLASSES; i++)
 		if (cbqp->cbq_class_tbl[i] == cl) {
 			cbqp->cbq_class_tbl[i] = NULL;
 			if (cl == cbqp->ifnp.root_)
 				cbqp->ifnp.root_ = NULL;
 			if (cl == cbqp->ifnp.default_)
 				cbqp->ifnp.default_ = NULL;
 			break;
 		}
 
 	return (0);
 }
 
 int
 cbq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
 {
 	cbq_state_t	*cbqp;
 	struct rm_class	*cl;
 	class_stats_t	 stats;
 	int		 error = 0;
 
 	if ((cbqp = altq_lookup(a->ifname, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(cbqp, a->qid)) == NULL)
 		return (EINVAL);
 
 	if (*nbytes < sizeof(stats))
 		return (EINVAL);
 
 	get_class_stats(&stats, cl);
 
 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
 		return (error);
 	*nbytes = sizeof(stats);
 	return (0);
 }
 
 /*
  * int
  * cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pattr)
  *		- Queue data packets.
  *
  *	cbq_enqueue is set to ifp->if_altqenqueue and called by an upper
  *	layer (e.g. ether_output).  cbq_enqueue queues the given packet
  *	to the cbq, then invokes the driver's start routine.
  *
  *	Assumptions:	called in splimp
  *	Returns:	0 if the queueing is successful.
  *			ENOBUFS if a packet dropping occurred as a result of
  *			the queueing.
  */
 
 static int
 cbq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
 {
 	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
 	struct rm_class	*cl;
 	struct pf_mtag	*t;
 	int		 len;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	/* grab class set by classifier */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		/* should not happen */
 		printf("altq: packet for %s does not have pkthdr\n",
 		    ifq->altq_ifp->if_xname);
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	cl = NULL;
 	if ((t = pf_find_mtag(m)) != NULL)
 		cl = clh_to_clp(cbqp, t->qid);
 #ifdef ALTQ3_COMPAT
 	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
 		cl = pktattr->pattr_class;
 #endif
 	if (cl == NULL) {
 		cl = cbqp->ifnp.default_;
 		if (cl == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 	}
 #ifdef ALTQ3_COMPAT
 	if (pktattr != NULL)
 		cl->pktattr_ = pktattr;  /* save proto hdr used by ECN */
 	else
 #endif
 		cl->pktattr_ = NULL;
 	len = m_pktlen(m);
 	if (rmc_queue_packet(cl, m) != 0) {
 		/* drop occurred.  some mbuf was freed in rmc_queue_packet. */
 		PKTCNTR_ADD(&cl->stats_.drop_cnt, len);
 		return (ENOBUFS);
 	}
 
 	/* successfully queued. */
 	++cbqp->cbq_qlen;
 	IFQ_INC_LEN(ifq);
 	return (0);
 }
 
 static struct mbuf *
 cbq_dequeue(struct ifaltq *ifq, int op)
 {
 	cbq_state_t	*cbqp = (cbq_state_t *)ifq->altq_disc;
 	struct mbuf	*m;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	m = rmc_dequeue_next(&cbqp->ifnp, op);
 
 	if (m && op == ALTDQ_REMOVE) {
 		--cbqp->cbq_qlen;  /* decrement # of packets in cbq */
 		IFQ_DEC_LEN(ifq);
 
 		/* Update the class. */
 		rmc_update_class_util(&cbqp->ifnp);
 	}
 	return (m);
 }
 
 /*
  * void
  * cbqrestart(queue_t *) - Restart sending of data.
  * called from rmc_restart in splimp via timeout after waking up
  * a suspended class.
  *	Returns:	NONE
  */
 
 static void
 cbqrestart(struct ifaltq *ifq)
 {
 	cbq_state_t	*cbqp;
 	struct ifnet	*ifp;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (!ALTQ_IS_ENABLED(ifq))
 		/* cbq must have been detached */
 		return;
 
 	if ((cbqp = (cbq_state_t *)ifq->altq_disc) == NULL)
 		/* should not happen */
 		return;
 
 	ifp = ifq->altq_ifp;
 	if (ifp->if_start &&
 	    cbqp->cbq_qlen > 0 && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
 	    	IFQ_UNLOCK(ifq);
 		(*ifp->if_start)(ifp);
 		IFQ_LOCK(ifq);
 	}
 }
 
 static void cbq_purge(cbq_state_t *cbqp)
 {
 	struct rm_class	*cl;
 	int		 i;
 
 	for (i = 0; i < CBQ_MAX_CLASSES; i++)
 		if ((cl = cbqp->cbq_class_tbl[i]) != NULL)
 			rmc_dropall(cl);
 	if (ALTQ_IS_ENABLED(cbqp->ifnp.ifq_))
 		cbqp->ifnp.ifq_->ifq_len = 0;
 }
 #ifdef ALTQ3_COMPAT
 
 static int
 cbq_add_class(acp)
 	struct cbq_add_class *acp;
 {
 	char		*ifacename;
 	struct rm_class	*borrow, *parent;
 	cbq_state_t	*cbqp;
 
 	ifacename = acp->cbq_iface.cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	/* check parameters */
 	if (acp->cbq_class.priority >= CBQ_MAXPRI ||
 	    acp->cbq_class.maxq > CBQ_MAXQSIZE)
 		return (EINVAL);
 
 	/* Get pointers to parent and borrow classes.  */
 	parent = clh_to_clp(cbqp, acp->cbq_class.parent_class_handle);
 	borrow = clh_to_clp(cbqp, acp->cbq_class.borrow_class_handle);
 
 	/*
 	 * A class must borrow from it's parent or it can not
 	 * borrow at all.  Hence, borrow can be null.
 	 */
 	if (parent == NULL && (acp->cbq_class.flags & CBQCLF_ROOTCLASS) == 0) {
 		printf("cbq_add_class: no parent class!\n");
 		return (EINVAL);
 	}
 
 	if ((borrow != parent)  && (borrow != NULL)) {
 		printf("cbq_add_class: borrow class != parent\n");
 		return (EINVAL);
 	}
 
 	return cbq_class_create(cbqp, acp, parent, borrow);
 }
 
 static int
 cbq_delete_class(dcp)
 	struct cbq_delete_class *dcp;
 {
 	char		*ifacename;
 	struct rm_class	*cl;
 	cbq_state_t	*cbqp;
 
 	ifacename = dcp->cbq_iface.cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(cbqp, dcp->cbq_class_handle)) == NULL)
 		return (EINVAL);
 
 	/* if we are a parent class, then return an error. */
 	if (is_a_parent_class(cl))
 		return (EINVAL);
 
 	/* if a filter has a reference to this class delete the filter */
 	acc_discard_filters(&cbqp->cbq_classifier, cl, 0);
 
 	return cbq_class_destroy(cbqp, cl);
 }
 
 static int
 cbq_modify_class(acp)
 	struct cbq_modify_class *acp;
 {
 	char		*ifacename;
 	struct rm_class	*cl;
 	cbq_state_t	*cbqp;
 
 	ifacename = acp->cbq_iface.cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	/* Get pointer to this class */
 	if ((cl = clh_to_clp(cbqp, acp->cbq_class_handle)) == NULL)
 		return (EINVAL);
 
 	if (rmc_modclass(cl, acp->cbq_class.nano_sec_per_byte,
 			 acp->cbq_class.maxq, acp->cbq_class.maxidle,
 			 acp->cbq_class.minidle, acp->cbq_class.offtime,
 			 acp->cbq_class.pktsize) < 0)
 		return (EINVAL);
 	return (0);
 }
 
 /*
  * struct rm_class *
  * cbq_class_create(cbq_mod_state_t *cbqp, struct cbq_add_class *acp,
  *		struct rm_class *parent, struct rm_class *borrow)
  *
  * This function create a new traffic class in the CBQ class hierarchy of
  * given paramters.  The class that created is either the root, default,
  * or a new dynamic class.  If CBQ is not initilaized, the the root class
  * will be created.
  */
 static int
 cbq_class_create(cbqp, acp, parent, borrow)
 	cbq_state_t *cbqp;
 	struct cbq_add_class *acp;
 	struct rm_class *parent, *borrow;
 {
 	struct rm_class	*cl;
 	cbq_class_spec_t *spec = &acp->cbq_class;
 	u_int32_t	chandle;
 	int		i;
 
 	/*
 	 * allocate class handle
 	 */
 	for (i = 1; i < CBQ_MAX_CLASSES; i++)
 		if (cbqp->cbq_class_tbl[i] == NULL)
 			break;
 	if (i == CBQ_MAX_CLASSES)
 		return (EINVAL);
 	chandle = i;	/* use the slot number as class handle */
 
 	/*
 	 * create a class.  if this is a root class, initialize the
 	 * interface.
 	 */
 	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_ROOTCLASS) {
 		rmc_init(cbqp->ifnp.ifq_, &cbqp->ifnp, spec->nano_sec_per_byte,
 			 cbqrestart, spec->maxq, RM_MAXQUEUED,
 			 spec->maxidle, spec->minidle, spec->offtime,
 			 spec->flags);
 		cl = cbqp->ifnp.root_;
 	} else {
 		cl = rmc_newclass(spec->priority,
 				  &cbqp->ifnp, spec->nano_sec_per_byte,
 				  rmc_delay_action, spec->maxq, parent, borrow,
 				  spec->maxidle, spec->minidle, spec->offtime,
 				  spec->pktsize, spec->flags);
 	}
 	if (cl == NULL)
 		return (ENOMEM);
 
 	/* return handle to user space. */
 	acp->cbq_class_handle = chandle;
 
 	cl->stats_.handle = chandle;
 	cl->stats_.depth = cl->depth_;
 
 	/* save the allocated class */
 	cbqp->cbq_class_tbl[i] = cl;
 
 	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_DEFCLASS)
 		cbqp->ifnp.default_ = cl;
 	if ((spec->flags & CBQCLF_CLASSMASK) == CBQCLF_CTLCLASS)
 		cbqp->ifnp.ctl_ = cl;
 
 	return (0);
 }
 
 static int
 cbq_add_filter(afp)
 	struct cbq_add_filter *afp;
 {
 	char		*ifacename;
 	cbq_state_t	*cbqp;
 	struct rm_class	*cl;
 
 	ifacename = afp->cbq_iface.cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	/* Get the pointer to class. */
 	if ((cl = clh_to_clp(cbqp, afp->cbq_class_handle)) == NULL)
 		return (EINVAL);
 
 	return acc_add_filter(&cbqp->cbq_classifier, &afp->cbq_filter,
 			      cl, &afp->cbq_filter_handle);
 }
 
 static int
 cbq_delete_filter(dfp)
 	struct cbq_delete_filter *dfp;
 {
 	char		*ifacename;
 	cbq_state_t	*cbqp;
 
 	ifacename = dfp->cbq_iface.cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	return acc_delete_filter(&cbqp->cbq_classifier,
 				 dfp->cbq_filter_handle);
 }
 
 /*
  * cbq_clear_hierarchy deletes all classes and their filters on the
  * given interface.
  */
 static int
 cbq_clear_hierarchy(ifacep)
 	struct cbq_interface *ifacep;
 {
 	char		*ifacename;
 	cbq_state_t	*cbqp;
 
 	ifacename = ifacep->cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	return cbq_clear_interface(cbqp);
 }
 
 /*
  * static int
  * cbq_set_enable(struct cbq_enable *ep) - this function processed the
  *	ioctl request to enable class based queueing.  It searches the list
  *	of interfaces for the specified interface and then enables CBQ on
  *	that interface.
  *
  *	Returns:	0, for no error.
  *			EBADF, for specified inteface not found.
  */
 
 static int
 cbq_set_enable(ep, enable)
 	struct cbq_interface *ep;
 	int enable;
 {
 	int 	error = 0;
 	cbq_state_t	*cbqp;
 	char 	*ifacename;
 
 	ifacename = ep->cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	switch (enable) {
 	case ENABLE:
 		if (cbqp->ifnp.root_ == NULL || cbqp->ifnp.default_ == NULL ||
 		    cbqp->ifnp.ctl_ == NULL) {
 			if (cbqp->ifnp.root_ == NULL)
 				printf("No Root Class for %s\n", ifacename);
 			if (cbqp->ifnp.default_ == NULL)
 				printf("No Default Class for %s\n", ifacename);
 			if (cbqp->ifnp.ctl_ == NULL)
 				printf("No Control Class for %s\n", ifacename);
 			error = EINVAL;
 		} else if ((error = altq_enable(cbqp->ifnp.ifq_)) == 0) {
 			cbqp->cbq_qlen = 0;
 		}
 		break;
 
 	case DISABLE:
 		error = altq_disable(cbqp->ifnp.ifq_);
 		break;
 	}
 	return (error);
 }
 
 static int
 cbq_getstats(gsp)
 	struct cbq_getstats *gsp;
 {
 	char		*ifacename;
 	int		i, n, nclasses;
 	cbq_state_t	*cbqp;
 	struct rm_class	*cl;
 	class_stats_t	stats, *usp;
 	int error = 0;
 
 	ifacename = gsp->iface.cbq_ifacename;
 	nclasses = gsp->nclasses;
 	usp = gsp->stats;
 
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 	if (nclasses <= 0)
 		return (EINVAL);
 
 	for (n = 0, i = 0; n < nclasses && i < CBQ_MAX_CLASSES; n++, i++) {
 		while ((cl = cbqp->cbq_class_tbl[i]) == NULL)
 			if (++i >= CBQ_MAX_CLASSES)
 				goto out;
 
 		get_class_stats(&stats, cl);
 		stats.handle = cl->stats_.handle;
 
 		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
 		    sizeof(stats))) != 0)
 			return (error);
 	}
 
  out:
 	gsp->nclasses = n;
 	return (error);
 }
 
 static int
 cbq_ifattach(ifacep)
 	struct cbq_interface *ifacep;
 {
 	int		error = 0;
 	char		*ifacename;
 	cbq_state_t	*new_cbqp;
 	struct ifnet 	*ifp;
 
 	ifacename = ifacep->cbq_ifacename;
 	if ((ifp = ifunit(ifacename)) == NULL)
 		return (ENXIO);
 	if (!ALTQ_IS_READY(&ifp->if_snd))
 		return (ENXIO);
 
 	/* allocate and initialize cbq_state_t */
 	new_cbqp = malloc(sizeof(cbq_state_t), M_DEVBUF, M_WAITOK);
 	if (new_cbqp == NULL)
 		return (ENOMEM);
 	bzero(new_cbqp, sizeof(cbq_state_t));
  	CALLOUT_INIT(&new_cbqp->cbq_callout);
 
 	new_cbqp->cbq_qlen = 0;
 	new_cbqp->ifnp.ifq_ = &ifp->if_snd;	    /* keep the ifq */
 
 	/*
 	 * set CBQ to this ifnet structure.
 	 */
 	error = altq_attach(&ifp->if_snd, ALTQT_CBQ, new_cbqp,
 			    cbq_enqueue, cbq_dequeue, cbq_request,
 			    &new_cbqp->cbq_classifier, acc_classify);
 	if (error) {
 		free(new_cbqp, M_DEVBUF);
 		return (error);
 	}
 
 	/* prepend to the list of cbq_state_t's. */
 	new_cbqp->cbq_next = cbq_list;
 	cbq_list = new_cbqp;
 
 	return (0);
 }
 
 static int
 cbq_ifdetach(ifacep)
 	struct cbq_interface *ifacep;
 {
 	char		*ifacename;
 	cbq_state_t 	*cbqp;
 
 	ifacename = ifacep->cbq_ifacename;
 	if ((cbqp = altq_lookup(ifacename, ALTQT_CBQ)) == NULL)
 		return (EBADF);
 
 	(void)cbq_set_enable(ifacep, DISABLE);
 
 	cbq_clear_interface(cbqp);
 
 	/* remove CBQ from the ifnet structure. */
 	(void)altq_detach(cbqp->ifnp.ifq_);
 
 	/* remove from the list of cbq_state_t's. */
 	if (cbq_list == cbqp)
 		cbq_list = cbqp->cbq_next;
 	else {
 		cbq_state_t *cp;
 
 		for (cp = cbq_list; cp != NULL; cp = cp->cbq_next)
 			if (cp->cbq_next == cbqp) {
 				cp->cbq_next = cbqp->cbq_next;
 				break;
 			}
 		ASSERT(cp != NULL);
 	}
 
 	/* deallocate cbq_state_t */
 	free(cbqp, M_DEVBUF);
 
 	return (0);
 }
 
 /*
  * cbq device interface
  */
 
 altqdev_decl(cbq);
 
 int
 cbqopen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	return (0);
 }
 
 int
 cbqclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct ifnet *ifp;
 	struct cbq_interface iface;
 	int err, error = 0;
 
 	while (cbq_list) {
 		ifp = cbq_list->ifnp.ifq_->altq_ifp;
 		sprintf(iface.cbq_ifacename, "%s", ifp->if_xname);
 		err = cbq_ifdetach(&iface);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 
 	return (error);
 }
 
 int
 cbqioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	int	error = 0;
 
 	/* check cmd for superuser only */
 	switch (cmd) {
 	case CBQ_GETSTATS:
 		/* currently only command that an ordinary user can call */
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		error = priv_check(p, PRIV_ALTQ_MANAGE);
 #elsif (__FreeBSD_version > 400000)
 		error = suser(p);
 #else
 		error = suser(p->p_ucred, &p->p_acflag);
 #endif
 		if (error)
 			return (error);
 		break;
 	}
 
 	switch (cmd) {
 
 	case CBQ_ENABLE:
 		error = cbq_set_enable((struct cbq_interface *)addr, ENABLE);
 		break;
 
 	case CBQ_DISABLE:
 		error = cbq_set_enable((struct cbq_interface *)addr, DISABLE);
 		break;
 
 	case CBQ_ADD_FILTER:
 		error = cbq_add_filter((struct cbq_add_filter *)addr);
 		break;
 
 	case CBQ_DEL_FILTER:
 		error = cbq_delete_filter((struct cbq_delete_filter *)addr);
 		break;
 
 	case CBQ_ADD_CLASS:
 		error = cbq_add_class((struct cbq_add_class *)addr);
 		break;
 
 	case CBQ_DEL_CLASS:
 		error = cbq_delete_class((struct cbq_delete_class *)addr);
 		break;
 
 	case CBQ_MODIFY_CLASS:
 		error = cbq_modify_class((struct cbq_modify_class *)addr);
 		break;
 
 	case CBQ_CLEAR_HIERARCHY:
 		error = cbq_clear_hierarchy((struct cbq_interface *)addr);
 		break;
 
 	case CBQ_IF_ATTACH:
 		error = cbq_ifattach((struct cbq_interface *)addr);
 		break;
 
 	case CBQ_IF_DETACH:
 		error = cbq_ifdetach((struct cbq_interface *)addr);
 		break;
 
 	case CBQ_GETSTATS:
 		error = cbq_getstats((struct cbq_getstats *)addr);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 #if 0
 /* for debug */
 static void cbq_class_dump(int);
 
 static void cbq_class_dump(i)
 	int i;
 {
 	struct rm_class *cl;
 	rm_class_stats_t *s;
 	struct _class_queue_ *q;
 
 	if (cbq_list == NULL) {
 		printf("cbq_class_dump: no cbq_state found\n");
 		return;
 	}
 	cl = cbq_list->cbq_class_tbl[i];
 
 	printf("class %d cl=%p\n", i, cl);
 	if (cl != NULL) {
 		s = &cl->stats_;
 		q = cl->q_;
 
 		printf("pri=%d, depth=%d, maxrate=%d, allotment=%d\n",
 		       cl->pri_, cl->depth_, cl->maxrate_, cl->allotment_);
 		printf("w_allotment=%d, bytes_alloc=%d, avgidle=%d, maxidle=%d\n",
 		       cl->w_allotment_, cl->bytes_alloc_, cl->avgidle_,
 		       cl->maxidle_);
 		printf("minidle=%d, offtime=%d, sleeping=%d, leaf=%d\n",
 		       cl->minidle_, cl->offtime_, cl->sleeping_, cl->leaf_);
 		printf("handle=%d, depth=%d, packets=%d, bytes=%d\n",
 		       s->handle, s->depth,
 		       (int)s->xmit_cnt.packets, (int)s->xmit_cnt.bytes);
 		printf("over=%d\n, borrows=%d, drops=%d, overactions=%d, delays=%d\n",
 		       s->over, s->borrows, (int)s->drop_cnt.packets,
 		       s->overactions, s->delays);
 		printf("tail=%p, head=%p, qlen=%d, qlim=%d, qthresh=%d,qtype=%d\n",
 		       q->tail_, q->head_, q->qlen_, q->qlim_,
 		       q->qthresh_, q->qtype_);
 	}
 }
 #endif /* 0 */
 
 #ifdef KLD_MODULE
 
 static struct altqsw cbq_sw =
 	{"cbq", cbqopen, cbqclose, cbqioctl};
 
 ALTQ_MODULE(altq_cbq, ALTQT_CBQ, &cbq_sw);
 MODULE_DEPEND(altq_cbq, altq_red, 1, 1, 1);
 MODULE_DEPEND(altq_cbq, altq_rio, 1, 1, 1);
 
 #endif /* KLD_MODULE */
 #endif /* ALTQ3_COMPAT */
 
 #endif /* ALTQ_CBQ */
Index: user/ngie/more-tests/sys/net/altq/altq_cdnr.c
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_cdnr.c	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_cdnr.c	(revision 281676)
@@ -1,1390 +1,1382 @@
-/*	$FreeBSD$	*/
-/*	$KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $	*/
-
-/*
+/*-
  * Copyright (C) 1999-2002
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $KAME: altq_cdnr.c,v 1.15 2005/04/13 03:44:24 suz Exp $
+ * $FreeBSD$
  */
 
-#if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
-#ifdef __FreeBSD__
 #include "opt_inet6.h"
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
 #include <net/altq/if_altq.h>
 #include <net/altq/altq.h>
 #ifdef ALTQ3_COMPAT
 #include <net/altq/altq_conf.h>
 #endif
 #include <net/altq/altq_cdnr.h>
 
 #ifdef ALTQ3_COMPAT
 /*
  * diffserv traffic conditioning module
  */
 
 int altq_cdnr_enabled = 0;
 
 /* traffic conditioner is enabled by ALTQ_CDNR option in opt_altq.h */
 #ifdef ALTQ_CDNR
 
 /* cdnr_list keeps all cdnr's allocated. */
 static LIST_HEAD(, top_cdnr) tcb_list;
 
 static int altq_cdnr_input(struct mbuf *, int);
 static struct top_cdnr *tcb_lookup(char *ifname);
 static struct cdnr_block *cdnr_handle2cb(u_long);
 static u_long cdnr_cb2handle(struct cdnr_block *);
 static void *cdnr_cballoc(struct top_cdnr *, int,
        struct tc_action *(*)(struct cdnr_block *, struct cdnr_pktinfo *));
 static void cdnr_cbdestroy(void *);
 static int tca_verify_action(struct tc_action *);
 static void tca_import_action(struct tc_action *, struct tc_action *);
 static void tca_invalidate_action(struct tc_action *);
 
 static int generic_element_destroy(struct cdnr_block *);
 static struct top_cdnr *top_create(struct ifaltq *);
 static int top_destroy(struct top_cdnr *);
 static struct cdnr_block *element_create(struct top_cdnr *, struct tc_action *);
 static int element_destroy(struct cdnr_block *);
 static void tb_import_profile(struct tbe *, struct tb_profile *);
 static struct tbmeter *tbm_create(struct top_cdnr *, struct tb_profile *,
 				  struct tc_action *, struct tc_action *);
 static int tbm_destroy(struct tbmeter *);
 static struct tc_action *tbm_input(struct cdnr_block *, struct cdnr_pktinfo *);
 static struct trtcm *trtcm_create(struct top_cdnr *,
 		  struct tb_profile *, struct tb_profile *,
 		  struct tc_action *, struct tc_action *, struct tc_action *,
 		  int);
 static int trtcm_destroy(struct trtcm *);
 static struct tc_action *trtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
 static struct tswtcm *tswtcm_create(struct top_cdnr *,
 		  u_int32_t, u_int32_t, u_int32_t,
 		  struct tc_action *, struct tc_action *, struct tc_action *);
 static int tswtcm_destroy(struct tswtcm *);
 static struct tc_action *tswtcm_input(struct cdnr_block *, struct cdnr_pktinfo *);
 
 static int cdnrcmd_if_attach(char *);
 static int cdnrcmd_if_detach(char *);
 static int cdnrcmd_add_element(struct cdnr_add_element *);
 static int cdnrcmd_delete_element(struct cdnr_delete_element *);
 static int cdnrcmd_add_filter(struct cdnr_add_filter *);
 static int cdnrcmd_delete_filter(struct cdnr_delete_filter *);
 static int cdnrcmd_add_tbm(struct cdnr_add_tbmeter *);
 static int cdnrcmd_modify_tbm(struct cdnr_modify_tbmeter *);
 static int cdnrcmd_tbm_stats(struct cdnr_tbmeter_stats *);
 static int cdnrcmd_add_trtcm(struct cdnr_add_trtcm *);
 static int cdnrcmd_modify_trtcm(struct cdnr_modify_trtcm *);
 static int cdnrcmd_tcm_stats(struct cdnr_tcm_stats *);
 static int cdnrcmd_add_tswtcm(struct cdnr_add_tswtcm *);
 static int cdnrcmd_modify_tswtcm(struct cdnr_modify_tswtcm *);
 static int cdnrcmd_get_stats(struct cdnr_get_stats *);
 
 altqdev_decl(cdnr);
 
 /*
  * top level input function called from ip_input.
  * should be called before converting header fields to host-byte-order.
  */
 int
 altq_cdnr_input(m, af)
 	struct mbuf	*m;
 	int		af;	/* address family */
 {
 	struct ifnet		*ifp;
 	struct ip		*ip;
 	struct top_cdnr		*top;
 	struct tc_action	*tca;
 	struct cdnr_block	*cb;
 	struct cdnr_pktinfo	pktinfo;
 
 	ifp = m->m_pkthdr.rcvif;
 	if (!ALTQ_IS_CNDTNING(&ifp->if_snd))
 		/* traffic conditioner is not enabled on this interface */
 		return (1);
 
 	top = ifp->if_snd.altq_cdnr;
 
 	ip = mtod(m, struct ip *);
 #ifdef INET6
 	if (af == AF_INET6) {
 		u_int32_t flowlabel;
 
 		flowlabel = ((struct ip6_hdr *)ip)->ip6_flow;
 		pktinfo.pkt_dscp = (ntohl(flowlabel) >> 20) & DSCP_MASK;
 	} else
 #endif
 		pktinfo.pkt_dscp = ip->ip_tos & DSCP_MASK;
 	pktinfo.pkt_len = m_pktlen(m);
 
 	tca = NULL;
 
 	cb = acc_classify(&top->tc_classifier, m, af);
 	if (cb != NULL)
 		tca = &cb->cb_action;
 
 	if (tca == NULL)
 		tca = &top->tc_block.cb_action;
 
 	while (1) {
 		PKTCNTR_ADD(&top->tc_cnts[tca->tca_code], pktinfo.pkt_len);
 
 		switch (tca->tca_code) {
 		case TCACODE_PASS:
 			return (1);
 		case TCACODE_DROP:
 			m_freem(m);
 			return (0);
 		case TCACODE_RETURN:
 			return (0);
 		case TCACODE_MARK:
 #ifdef INET6
 			if (af == AF_INET6) {
 				struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;
 				u_int32_t flowlabel;
 
 				flowlabel = ntohl(ip6->ip6_flow);
 				flowlabel = (tca->tca_dscp << 20) |
 					(flowlabel & ~(DSCP_MASK << 20));
 				ip6->ip6_flow = htonl(flowlabel);
 			} else
 #endif
 				ip->ip_tos = tca->tca_dscp |
 					(ip->ip_tos & DSCP_CUMASK);
 			return (1);
 		case TCACODE_NEXT:
 			cb = tca->tca_next;
 			tca = (*cb->cb_input)(cb, &pktinfo);
 			break;
 		case TCACODE_NONE:
 		default:
 			return (1);
 		}
 	}
 }
 
 static struct top_cdnr *
 tcb_lookup(ifname)
 	char *ifname;
 {
 	struct top_cdnr *top;
 	struct ifnet *ifp;
 
 	if ((ifp = ifunit(ifname)) != NULL)
 		LIST_FOREACH(top, &tcb_list, tc_next)
 			if (top->tc_ifq->altq_ifp == ifp)
 				return (top);
 	return (NULL);
 }
 
 static struct cdnr_block *
 cdnr_handle2cb(handle)
 	u_long handle;
 {
 	struct cdnr_block *cb;
 
 	cb = (struct cdnr_block *)handle;
 	if (handle != ALIGN(cb))
 		return (NULL);
 
 	if (cb == NULL || cb->cb_handle != handle)
 		return (NULL);
 	return (cb);
 }
 
 static u_long
 cdnr_cb2handle(cb)
 	struct cdnr_block *cb;
 {
 	return (cb->cb_handle);
 }
 
 static void *
 cdnr_cballoc(top, type, input_func)
 	struct top_cdnr *top;
 	int type;
 	struct tc_action *(*input_func)(struct cdnr_block *,
 					struct cdnr_pktinfo *);
 {
 	struct cdnr_block *cb;
 	int size;
 
 	switch (type) {
 	case TCETYPE_TOP:
 		size = sizeof(struct top_cdnr);
 		break;
 	case TCETYPE_ELEMENT:
 		size = sizeof(struct cdnr_block);
 		break;
 	case TCETYPE_TBMETER:
 		size = sizeof(struct tbmeter);
 		break;
 	case TCETYPE_TRTCM:
 		size = sizeof(struct trtcm);
 		break;
 	case TCETYPE_TSWTCM:
 		size = sizeof(struct tswtcm);
 		break;
 	default:
 		return (NULL);
 	}
 
 	cb = malloc(size, M_DEVBUF, M_WAITOK);
 	if (cb == NULL)
 		return (NULL);
 	bzero(cb, size);
 
 	cb->cb_len = size;
 	cb->cb_type = type;
 	cb->cb_ref = 0;
 	cb->cb_handle = (u_long)cb;
 	if (top == NULL)
 		cb->cb_top = (struct top_cdnr *)cb;
 	else
 		cb->cb_top = top;
 
 	if (input_func != NULL) {
 		/*
 		 * if this cdnr has an action function,
 		 * make tc_action to call itself.
 		 */
 		cb->cb_action.tca_code = TCACODE_NEXT;
 		cb->cb_action.tca_next = cb;
 		cb->cb_input = input_func;
 	} else
 		cb->cb_action.tca_code = TCACODE_NONE;
 
 	/* if this isn't top, register the element to the top level cdnr */
 	if (top != NULL)
 		LIST_INSERT_HEAD(&top->tc_elements, cb, cb_next);
 
 	return ((void *)cb);
 }
 
 static void
 cdnr_cbdestroy(cblock)
 	void *cblock;
 {
 	struct cdnr_block *cb = cblock;
 
 	/* delete filters belonging to this cdnr */
 	acc_discard_filters(&cb->cb_top->tc_classifier, cb, 0);
 
 	/* remove from the top level cdnr */
 	if (cb->cb_top != cblock)
 		LIST_REMOVE(cb, cb_next);
 
 	free(cb, M_DEVBUF);
 }
 
 /*
  * conditioner common destroy routine
  */
 static int
 generic_element_destroy(cb)
 	struct cdnr_block *cb;
 {
 	int error = 0;
 
 	switch (cb->cb_type) {
 	case TCETYPE_TOP:
 		error = top_destroy((struct top_cdnr *)cb);
 		break;
 	case TCETYPE_ELEMENT:
 		error = element_destroy(cb);
 		break;
 	case TCETYPE_TBMETER:
 		error = tbm_destroy((struct tbmeter *)cb);
 		break;
 	case TCETYPE_TRTCM:
 		error = trtcm_destroy((struct trtcm *)cb);
 		break;
 	case TCETYPE_TSWTCM:
 		error = tswtcm_destroy((struct tswtcm *)cb);
 		break;
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
 
 static int
 tca_verify_action(utca)
 	struct tc_action *utca;
 {
 	switch (utca->tca_code) {
 	case TCACODE_PASS:
 	case TCACODE_DROP:
 	case TCACODE_MARK:
 		/* these are ok */
 		break;
 
 	case TCACODE_HANDLE:
 		/* verify handle value */
 		if (cdnr_handle2cb(utca->tca_handle) == NULL)
 			return (-1);
 		break;
 
 	case TCACODE_NONE:
 	case TCACODE_RETURN:
 	case TCACODE_NEXT:
 	default:
 		/* should not be passed from a user */
 		return (-1);
 	}
 	return (0);
 }
 
 static void
 tca_import_action(ktca, utca)
 	struct tc_action *ktca, *utca;
 {
 	struct cdnr_block *cb;
 
 	*ktca = *utca;
 	if (ktca->tca_code == TCACODE_HANDLE) {
 		cb = cdnr_handle2cb(ktca->tca_handle);
 		if (cb == NULL) {
 			ktca->tca_code = TCACODE_NONE;
 			return;
 		}
 		ktca->tca_code = TCACODE_NEXT;
 		ktca->tca_next = cb;
 		cb->cb_ref++;
 	} else if (ktca->tca_code == TCACODE_MARK) {
 		ktca->tca_dscp &= DSCP_MASK;
 	}
 	return;
 }
 
 static void
 tca_invalidate_action(tca)
 	struct tc_action *tca;
 {
 	struct cdnr_block *cb;
 
 	if (tca->tca_code == TCACODE_NEXT) {
 		cb = tca->tca_next;
 		if (cb == NULL)
 			return;
 		cb->cb_ref--;
 	}
 	tca->tca_code = TCACODE_NONE;
 }
 
 /*
  * top level traffic conditioner
  */
 static struct top_cdnr *
 top_create(ifq)
 	struct ifaltq *ifq;
 {
 	struct top_cdnr *top;
 
 	if ((top = cdnr_cballoc(NULL, TCETYPE_TOP, NULL)) == NULL)
 		return (NULL);
 
 	top->tc_ifq = ifq;
 	/* set default action for the top level conditioner */
 	top->tc_block.cb_action.tca_code = TCACODE_PASS;
 
 	LIST_INSERT_HEAD(&tcb_list, top, tc_next);
 
 	ifq->altq_cdnr = top;
 
 	return (top);
 }
 
 static int
 top_destroy(top)
 	struct top_cdnr *top;
 {
 	struct cdnr_block *cb;
 
 	if (ALTQ_IS_CNDTNING(top->tc_ifq))
 		ALTQ_CLEAR_CNDTNING(top->tc_ifq);
 	top->tc_ifq->altq_cdnr = NULL;
 
 	/*
 	 * destroy all the conditioner elements belonging to this interface
 	 */
 	while ((cb = LIST_FIRST(&top->tc_elements)) != NULL) {
 		while (cb != NULL && cb->cb_ref > 0)
 			cb = LIST_NEXT(cb, cb_next);
 		if (cb != NULL)
 			generic_element_destroy(cb);
 	}
 
 	LIST_REMOVE(top, tc_next);
 
 	cdnr_cbdestroy(top);
 
 	/* if there is no active conditioner, remove the input hook */
 	if (altq_input != NULL) {
 		LIST_FOREACH(top, &tcb_list, tc_next)
 			if (ALTQ_IS_CNDTNING(top->tc_ifq))
 				break;
 		if (top == NULL)
 			altq_input = NULL;
 	}
 
 	return (0);
 }
 
 /*
  * simple tc elements without input function (e.g., dropper and makers).
  */
 static struct cdnr_block *
 element_create(top, action)
 	struct top_cdnr *top;
 	struct tc_action *action;
 {
 	struct cdnr_block *cb;
 
 	if (tca_verify_action(action) < 0)
 		return (NULL);
 
 	if ((cb = cdnr_cballoc(top, TCETYPE_ELEMENT, NULL)) == NULL)
 		return (NULL);
 
 	tca_import_action(&cb->cb_action, action);
 
 	return (cb);
 }
 
 static int
 element_destroy(cb)
 	struct cdnr_block *cb;
 {
 	if (cb->cb_ref > 0)
 		return (EBUSY);
 
 	tca_invalidate_action(&cb->cb_action);
 
 	cdnr_cbdestroy(cb);
 	return (0);
 }
 
 /*
  * internal representation of token bucket parameters
  *	rate: 	byte_per_unittime << 32
  *		(((bits_per_sec) / 8) << 32) / machclk_freq
  *	depth:	byte << 32
  *
  */
 #define	TB_SHIFT	32
 #define	TB_SCALE(x)	((u_int64_t)(x) << TB_SHIFT)
 #define	TB_UNSCALE(x)	((x) >> TB_SHIFT)
 
 static void
 tb_import_profile(tb, profile)
 	struct tbe *tb;
 	struct tb_profile *profile;
 {
 	tb->rate = TB_SCALE(profile->rate / 8) / machclk_freq;
 	tb->depth = TB_SCALE(profile->depth);
 	if (tb->rate > 0)
 		tb->filluptime = tb->depth / tb->rate;
 	else
 		tb->filluptime = 0xffffffffffffffffLL;
 	tb->token = tb->depth;
 	tb->last = read_machclk();
 }
 
 /*
  * simple token bucket meter
  */
 static struct tbmeter *
 tbm_create(top, profile, in_action, out_action)
 	struct top_cdnr *top;
 	struct tb_profile *profile;
 	struct tc_action *in_action, *out_action;
 {
 	struct tbmeter *tbm = NULL;
 
 	if (tca_verify_action(in_action) < 0
 	    || tca_verify_action(out_action) < 0)
 		return (NULL);
 
 	if ((tbm = cdnr_cballoc(top, TCETYPE_TBMETER,
 				tbm_input)) == NULL)
 		return (NULL);
 
 	tb_import_profile(&tbm->tb, profile);
 
 	tca_import_action(&tbm->in_action, in_action);
 	tca_import_action(&tbm->out_action, out_action);
 
 	return (tbm);
 }
 
 static int
 tbm_destroy(tbm)
 	struct tbmeter *tbm;
 {
 	if (tbm->cdnrblk.cb_ref > 0)
 		return (EBUSY);
 
 	tca_invalidate_action(&tbm->in_action);
 	tca_invalidate_action(&tbm->out_action);
 
 	cdnr_cbdestroy(tbm);
 	return (0);
 }
 
 static struct tc_action *
 tbm_input(cb, pktinfo)
 	struct cdnr_block *cb;
 	struct cdnr_pktinfo *pktinfo;
 {
 	struct tbmeter *tbm = (struct tbmeter *)cb;
 	u_int64_t	len;
 	u_int64_t	interval, now;
 
 	len = TB_SCALE(pktinfo->pkt_len);
 
 	if (tbm->tb.token < len) {
 		now = read_machclk();
 		interval = now - tbm->tb.last;
 		if (interval >= tbm->tb.filluptime)
 			tbm->tb.token = tbm->tb.depth;
 		else {
 			tbm->tb.token += interval * tbm->tb.rate;
 			if (tbm->tb.token > tbm->tb.depth)
 				tbm->tb.token = tbm->tb.depth;
 		}
 		tbm->tb.last = now;
 	}
 
 	if (tbm->tb.token < len) {
 		PKTCNTR_ADD(&tbm->out_cnt, pktinfo->pkt_len);
 		return (&tbm->out_action);
 	}
 
 	tbm->tb.token -= len;
 	PKTCNTR_ADD(&tbm->in_cnt, pktinfo->pkt_len);
 	return (&tbm->in_action);
 }
 
 /*
  * two rate three color marker
  * as described in draft-heinanen-diffserv-trtcm-01.txt
  */
 static struct trtcm *
 trtcm_create(top, cmtd_profile, peak_profile,
 	     green_action, yellow_action, red_action, coloraware)
 	struct top_cdnr *top;
 	struct tb_profile *cmtd_profile, *peak_profile;
 	struct tc_action *green_action, *yellow_action, *red_action;
 	int	coloraware;
 {
 	struct trtcm *tcm = NULL;
 
 	if (tca_verify_action(green_action) < 0
 	    || tca_verify_action(yellow_action) < 0
 	    || tca_verify_action(red_action) < 0)
 		return (NULL);
 
 	if ((tcm = cdnr_cballoc(top, TCETYPE_TRTCM,
 				trtcm_input)) == NULL)
 		return (NULL);
 
 	tb_import_profile(&tcm->cmtd_tb, cmtd_profile);
 	tb_import_profile(&tcm->peak_tb, peak_profile);
 
 	tca_import_action(&tcm->green_action, green_action);
 	tca_import_action(&tcm->yellow_action, yellow_action);
 	tca_import_action(&tcm->red_action, red_action);
 
 	/* set dscps to use */
 	if (tcm->green_action.tca_code == TCACODE_MARK)
 		tcm->green_dscp = tcm->green_action.tca_dscp & DSCP_MASK;
 	else
 		tcm->green_dscp = DSCP_AF11;
 	if (tcm->yellow_action.tca_code == TCACODE_MARK)
 		tcm->yellow_dscp = tcm->yellow_action.tca_dscp & DSCP_MASK;
 	else
 		tcm->yellow_dscp = DSCP_AF12;
 	if (tcm->red_action.tca_code == TCACODE_MARK)
 		tcm->red_dscp = tcm->red_action.tca_dscp & DSCP_MASK;
 	else
 		tcm->red_dscp = DSCP_AF13;
 
 	tcm->coloraware = coloraware;
 
 	return (tcm);
 }
 
 static int
 trtcm_destroy(tcm)
 	struct trtcm *tcm;
 {
 	if (tcm->cdnrblk.cb_ref > 0)
 		return (EBUSY);
 
 	tca_invalidate_action(&tcm->green_action);
 	tca_invalidate_action(&tcm->yellow_action);
 	tca_invalidate_action(&tcm->red_action);
 
 	cdnr_cbdestroy(tcm);
 	return (0);
 }
 
 static struct tc_action *
 trtcm_input(cb, pktinfo)
 	struct cdnr_block *cb;
 	struct cdnr_pktinfo *pktinfo;
 {
 	struct trtcm *tcm = (struct trtcm *)cb;
 	u_int64_t	len;
 	u_int64_t	interval, now;
 	u_int8_t	color;
 
 	len = TB_SCALE(pktinfo->pkt_len);
 	if (tcm->coloraware) {
 		color = pktinfo->pkt_dscp;
 		if (color != tcm->yellow_dscp && color != tcm->red_dscp)
 			color = tcm->green_dscp;
 	} else {
 		/* if color-blind, precolor it as green */
 		color = tcm->green_dscp;
 	}
 
 	now = read_machclk();
 	if (tcm->cmtd_tb.token < len) {
 		interval = now - tcm->cmtd_tb.last;
 		if (interval >= tcm->cmtd_tb.filluptime)
 			tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
 		else {
 			tcm->cmtd_tb.token += interval * tcm->cmtd_tb.rate;
 			if (tcm->cmtd_tb.token > tcm->cmtd_tb.depth)
 				tcm->cmtd_tb.token = tcm->cmtd_tb.depth;
 		}
 		tcm->cmtd_tb.last = now;
 	}
 	if (tcm->peak_tb.token < len) {
 		interval = now - tcm->peak_tb.last;
 		if (interval >= tcm->peak_tb.filluptime)
 			tcm->peak_tb.token = tcm->peak_tb.depth;
 		else {
 			tcm->peak_tb.token += interval * tcm->peak_tb.rate;
 			if (tcm->peak_tb.token > tcm->peak_tb.depth)
 				tcm->peak_tb.token = tcm->peak_tb.depth;
 		}
 		tcm->peak_tb.last = now;
 	}
 
 	if (color == tcm->red_dscp || tcm->peak_tb.token < len) {
 		pktinfo->pkt_dscp = tcm->red_dscp;
 		PKTCNTR_ADD(&tcm->red_cnt, pktinfo->pkt_len);
 		return (&tcm->red_action);
 	}
 
 	if (color == tcm->yellow_dscp || tcm->cmtd_tb.token < len) {
 		pktinfo->pkt_dscp = tcm->yellow_dscp;
 		tcm->peak_tb.token -= len;
 		PKTCNTR_ADD(&tcm->yellow_cnt, pktinfo->pkt_len);
 		return (&tcm->yellow_action);
 	}
 
 	pktinfo->pkt_dscp = tcm->green_dscp;
 	tcm->cmtd_tb.token -= len;
 	tcm->peak_tb.token -= len;
 	PKTCNTR_ADD(&tcm->green_cnt, pktinfo->pkt_len);
 	return (&tcm->green_action);
 }
 
 /*
  * time sliding window three color marker
  * as described in draft-fang-diffserv-tc-tswtcm-00.txt
  */
 static struct tswtcm *
 tswtcm_create(top, cmtd_rate, peak_rate, avg_interval,
 	      green_action, yellow_action, red_action)
 	struct top_cdnr *top;
 	u_int32_t	cmtd_rate, peak_rate, avg_interval;
 	struct tc_action *green_action, *yellow_action, *red_action;
 {
 	struct tswtcm *tsw;
 
 	if (tca_verify_action(green_action) < 0
 	    || tca_verify_action(yellow_action) < 0
 	    || tca_verify_action(red_action) < 0)
 		return (NULL);
 
 	if ((tsw = cdnr_cballoc(top, TCETYPE_TSWTCM,
 				tswtcm_input)) == NULL)
 		return (NULL);
 
 	tca_import_action(&tsw->green_action, green_action);
 	tca_import_action(&tsw->yellow_action, yellow_action);
 	tca_import_action(&tsw->red_action, red_action);
 
 	/* set dscps to use */
 	if (tsw->green_action.tca_code == TCACODE_MARK)
 		tsw->green_dscp = tsw->green_action.tca_dscp & DSCP_MASK;
 	else
 		tsw->green_dscp = DSCP_AF11;
 	if (tsw->yellow_action.tca_code == TCACODE_MARK)
 		tsw->yellow_dscp = tsw->yellow_action.tca_dscp & DSCP_MASK;
 	else
 		tsw->yellow_dscp = DSCP_AF12;
 	if (tsw->red_action.tca_code == TCACODE_MARK)
 		tsw->red_dscp = tsw->red_action.tca_dscp & DSCP_MASK;
 	else
 		tsw->red_dscp = DSCP_AF13;
 
 	/* convert rates from bits/sec to bytes/sec */
 	tsw->cmtd_rate = cmtd_rate / 8;
 	tsw->peak_rate = peak_rate / 8;
 	tsw->avg_rate = 0;
 
 	/* timewin is converted from msec to machine clock unit */
 	tsw->timewin = (u_int64_t)machclk_freq * avg_interval / 1000;
 
 	return (tsw);
 }
 
 static int
 tswtcm_destroy(tsw)
 	struct tswtcm *tsw;
 {
 	if (tsw->cdnrblk.cb_ref > 0)
 		return (EBUSY);
 
 	tca_invalidate_action(&tsw->green_action);
 	tca_invalidate_action(&tsw->yellow_action);
 	tca_invalidate_action(&tsw->red_action);
 
 	cdnr_cbdestroy(tsw);
 	return (0);
 }
 
 static struct tc_action *
 tswtcm_input(cb, pktinfo)
 	struct cdnr_block *cb;
 	struct cdnr_pktinfo *pktinfo;
 {
 	struct tswtcm	*tsw = (struct tswtcm *)cb;
 	int		len;
 	u_int32_t	avg_rate;
 	u_int64_t	interval, now, tmp;
 
 	/*
 	 * rate estimator
 	 */
 	len = pktinfo->pkt_len;
 	now = read_machclk();
 
 	interval = now - tsw->t_front;
 	/*
 	 * calculate average rate:
 	 *	avg = (avg * timewin + pkt_len)/(timewin + interval)
 	 * pkt_len needs to be multiplied by machclk_freq in order to
 	 * get (bytes/sec).
 	 * note: when avg_rate (bytes/sec) and timewin (machclk unit) are
 	 * less than 32 bits, the following 64-bit operation has enough
 	 * precision.
 	 */
 	tmp = ((u_int64_t)tsw->avg_rate * tsw->timewin
 	       + (u_int64_t)len * machclk_freq) / (tsw->timewin + interval);
 	tsw->avg_rate = avg_rate = (u_int32_t)tmp;
 	tsw->t_front = now;
 
 	/*
 	 * marker
 	 */
 	if (avg_rate > tsw->cmtd_rate) {
 		u_int32_t randval = arc4random() % avg_rate;
 
 		if (avg_rate > tsw->peak_rate) {
 			if (randval < avg_rate - tsw->peak_rate) {
 				/* mark red */
 				pktinfo->pkt_dscp = tsw->red_dscp;
 				PKTCNTR_ADD(&tsw->red_cnt, len);
 				return (&tsw->red_action);
 			} else if (randval < avg_rate - tsw->cmtd_rate)
 				goto mark_yellow;
 		} else {
 			/* peak_rate >= avg_rate > cmtd_rate */
 			if (randval < avg_rate - tsw->cmtd_rate) {
 			mark_yellow:
 				pktinfo->pkt_dscp = tsw->yellow_dscp;
 				PKTCNTR_ADD(&tsw->yellow_cnt, len);
 				return (&tsw->yellow_action);
 			}
 		}
 	}
 
 	/* mark green */
 	pktinfo->pkt_dscp = tsw->green_dscp;
 	PKTCNTR_ADD(&tsw->green_cnt, len);
 	return (&tsw->green_action);
 }
 
 /*
  * ioctl requests
  */
 static int
 cdnrcmd_if_attach(ifname)
 	char *ifname;
 {
 	struct ifnet *ifp;
 	struct top_cdnr *top;
 
 	if ((ifp = ifunit(ifname)) == NULL)
 		return (EBADF);
 
 	if (ifp->if_snd.altq_cdnr != NULL)
 		return (EBUSY);
 
 	if ((top = top_create(&ifp->if_snd)) == NULL)
 		return (ENOMEM);
 	return (0);
 }
 
 static int
 cdnrcmd_if_detach(ifname)
 	char *ifname;
 {
 	struct top_cdnr *top;
 
 	if ((top = tcb_lookup(ifname)) == NULL)
 		return (EBADF);
 
 	return top_destroy(top);
 }
 
 static int
 cdnrcmd_add_element(ap)
 	struct cdnr_add_element *ap;
 {
 	struct top_cdnr *top;
 	struct cdnr_block *cb;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	cb = element_create(top, &ap->action);
 	if (cb == NULL)
 		return (EINVAL);
 	/* return a class handle to the user */
 	ap->cdnr_handle = cdnr_cb2handle(cb);
 	return (0);
 }
 
 static int
 cdnrcmd_delete_element(ap)
 	struct cdnr_delete_element *ap;
 {
 	struct top_cdnr *top;
 	struct cdnr_block *cb;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	if (cb->cb_type != TCETYPE_ELEMENT)
 		return generic_element_destroy(cb);
 
 	return element_destroy(cb);
 }
 
 static int
 cdnrcmd_add_filter(ap)
 	struct cdnr_add_filter *ap;
 {
 	struct top_cdnr *top;
 	struct cdnr_block *cb;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	return acc_add_filter(&top->tc_classifier, &ap->filter,
 			      cb, &ap->filter_handle);
 }
 
 static int
 cdnrcmd_delete_filter(ap)
 	struct cdnr_delete_filter *ap;
 {
 	struct top_cdnr *top;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	return acc_delete_filter(&top->tc_classifier, ap->filter_handle);
 }
 
 static int
 cdnrcmd_add_tbm(ap)
 	struct cdnr_add_tbmeter *ap;
 {
 	struct top_cdnr *top;
 	struct tbmeter *tbm;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	tbm = tbm_create(top, &ap->profile, &ap->in_action, &ap->out_action);
 	if (tbm == NULL)
 		return (EINVAL);
 	/* return a class handle to the user */
 	ap->cdnr_handle = cdnr_cb2handle(&tbm->cdnrblk);
 	return (0);
 }
 
 static int
 cdnrcmd_modify_tbm(ap)
 	struct cdnr_modify_tbmeter *ap;
 {
 	struct tbmeter *tbm;
 
 	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	tb_import_profile(&tbm->tb, &ap->profile);
 
 	return (0);
 }
 
 static int
 cdnrcmd_tbm_stats(ap)
 	struct cdnr_tbmeter_stats *ap;
 {
 	struct tbmeter *tbm;
 
 	if ((tbm = (struct tbmeter *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	ap->in_cnt = tbm->in_cnt;
 	ap->out_cnt = tbm->out_cnt;
 
 	return (0);
 }
 
 static int
 cdnrcmd_add_trtcm(ap)
 	struct cdnr_add_trtcm *ap;
 {
 	struct top_cdnr *top;
 	struct trtcm *tcm;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	tcm = trtcm_create(top, &ap->cmtd_profile, &ap->peak_profile,
 			   &ap->green_action, &ap->yellow_action,
 			   &ap->red_action, ap->coloraware);
 	if (tcm == NULL)
 		return (EINVAL);
 
 	/* return a class handle to the user */
 	ap->cdnr_handle = cdnr_cb2handle(&tcm->cdnrblk);
 	return (0);
 }
 
 static int
 cdnrcmd_modify_trtcm(ap)
 	struct cdnr_modify_trtcm *ap;
 {
 	struct trtcm *tcm;
 
 	if ((tcm = (struct trtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	tb_import_profile(&tcm->cmtd_tb, &ap->cmtd_profile);
 	tb_import_profile(&tcm->peak_tb, &ap->peak_profile);
 
 	return (0);
 }
 
 static int
 cdnrcmd_tcm_stats(ap)
 	struct cdnr_tcm_stats *ap;
 {
 	struct cdnr_block *cb;
 
 	if ((cb = cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	if (cb->cb_type == TCETYPE_TRTCM) {
 	    struct trtcm *tcm = (struct trtcm *)cb;
 
 	    ap->green_cnt = tcm->green_cnt;
 	    ap->yellow_cnt = tcm->yellow_cnt;
 	    ap->red_cnt = tcm->red_cnt;
 	} else if (cb->cb_type == TCETYPE_TSWTCM) {
 	    struct tswtcm *tsw = (struct tswtcm *)cb;
 
 	    ap->green_cnt = tsw->green_cnt;
 	    ap->yellow_cnt = tsw->yellow_cnt;
 	    ap->red_cnt = tsw->red_cnt;
 	} else
 	    return (EINVAL);
 
 	return (0);
 }
 
 static int
 cdnrcmd_add_tswtcm(ap)
 	struct cdnr_add_tswtcm *ap;
 {
 	struct top_cdnr *top;
 	struct tswtcm *tsw;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	if (ap->cmtd_rate > ap->peak_rate)
 		return (EINVAL);
 
 	tsw = tswtcm_create(top, ap->cmtd_rate, ap->peak_rate,
 			    ap->avg_interval, &ap->green_action,
 			    &ap->yellow_action, &ap->red_action);
 	if (tsw == NULL)
 	    return (EINVAL);
 
 	/* return a class handle to the user */
 	ap->cdnr_handle = cdnr_cb2handle(&tsw->cdnrblk);
 	return (0);
 }
 
 static int
 cdnrcmd_modify_tswtcm(ap)
 	struct cdnr_modify_tswtcm *ap;
 {
 	struct tswtcm *tsw;
 
 	if ((tsw = (struct tswtcm *)cdnr_handle2cb(ap->cdnr_handle)) == NULL)
 		return (EINVAL);
 
 	if (ap->cmtd_rate > ap->peak_rate)
 		return (EINVAL);
 
 	/* convert rates from bits/sec to bytes/sec */
 	tsw->cmtd_rate = ap->cmtd_rate / 8;
 	tsw->peak_rate = ap->peak_rate / 8;
 	tsw->avg_rate = 0;
 
 	/* timewin is converted from msec to machine clock unit */
 	tsw->timewin = (u_int64_t)machclk_freq * ap->avg_interval / 1000;
 
 	return (0);
 }
 
 static int
 cdnrcmd_get_stats(ap)
 	struct cdnr_get_stats *ap;
 {
 	struct top_cdnr *top;
 	struct cdnr_block *cb;
 	struct tbmeter *tbm;
 	struct trtcm *tcm;
 	struct tswtcm *tsw;
 	struct tce_stats tce, *usp;
 	int error, n, nskip, nelements;
 
 	if ((top = tcb_lookup(ap->iface.cdnr_ifname)) == NULL)
 		return (EBADF);
 
 	/* copy action stats */
 	bcopy(top->tc_cnts, ap->cnts, sizeof(ap->cnts));
 
 	/* stats for each element */
 	nelements = ap->nelements;
 	usp = ap->tce_stats;
 	if (nelements <= 0 || usp == NULL)
 		return (0);
 
 	nskip = ap->nskip;
 	n = 0;
 	LIST_FOREACH(cb, &top->tc_elements, cb_next) {
 		if (nskip > 0) {
 			nskip--;
 			continue;
 		}
 
 		bzero(&tce, sizeof(tce));
 		tce.tce_handle = cb->cb_handle;
 		tce.tce_type = cb->cb_type;
 		switch (cb->cb_type) {
 		case TCETYPE_TBMETER:
 			tbm = (struct tbmeter *)cb;
 			tce.tce_cnts[0] = tbm->in_cnt;
 			tce.tce_cnts[1] = tbm->out_cnt;
 			break;
 		case TCETYPE_TRTCM:
 			tcm = (struct trtcm *)cb;
 			tce.tce_cnts[0] = tcm->green_cnt;
 			tce.tce_cnts[1] = tcm->yellow_cnt;
 			tce.tce_cnts[2] = tcm->red_cnt;
 			break;
 		case TCETYPE_TSWTCM:
 			tsw = (struct tswtcm *)cb;
 			tce.tce_cnts[0] = tsw->green_cnt;
 			tce.tce_cnts[1] = tsw->yellow_cnt;
 			tce.tce_cnts[2] = tsw->red_cnt;
 			break;
 		default:
 			continue;
 		}
 
 		if ((error = copyout((caddr_t)&tce, (caddr_t)usp++,
 				     sizeof(tce))) != 0)
 			return (error);
 
 		if (++n == nelements)
 			break;
 	}
 	ap->nelements = n;
 
 	return (0);
 }
 
 /*
  * conditioner device interface
  */
 int
 cdnropen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	if (machclk_freq == 0)
 		init_machclk();
 
 	if (machclk_freq == 0) {
 		printf("cdnr: no cpu clock available!\n");
 		return (ENXIO);
 	}
 
 	/* everything will be done when the queueing scheme is attached. */
 	return 0;
 }
 
 int
 cdnrclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct top_cdnr *top;
 	int err, error = 0;
 
 	while ((top = LIST_FIRST(&tcb_list)) != NULL) {
 		/* destroy all */
 		err = top_destroy(top);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 	altq_input = NULL;
 
 	return (error);
 }
 
 int
 cdnrioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct top_cdnr *top;
 	struct cdnr_interface *ifacep;
 	int	s, error = 0;
 
 	/* check super-user privilege */
 	switch (cmd) {
 	case CDNR_GETSTATS:
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
 #elsif (__FreeBSD_version > 400000)
 		if ((error = suser(p)) != 0)
 #else
 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 #endif
 			return (error);
 		break;
 	}
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	switch (cmd) {
 
 	case CDNR_IF_ATTACH:
 		ifacep = (struct cdnr_interface *)addr;
 		error = cdnrcmd_if_attach(ifacep->cdnr_ifname);
 		break;
 
 	case CDNR_IF_DETACH:
 		ifacep = (struct cdnr_interface *)addr;
 		error = cdnrcmd_if_detach(ifacep->cdnr_ifname);
 		break;
 
 	case CDNR_ENABLE:
 	case CDNR_DISABLE:
 		ifacep = (struct cdnr_interface *)addr;
 		if ((top = tcb_lookup(ifacep->cdnr_ifname)) == NULL) {
 			error = EBADF;
 			break;
 		}
 
 		switch (cmd) {
 
 		case CDNR_ENABLE:
 			ALTQ_SET_CNDTNING(top->tc_ifq);
 			if (altq_input == NULL)
 				altq_input = altq_cdnr_input;
 			break;
 
 		case CDNR_DISABLE:
 			ALTQ_CLEAR_CNDTNING(top->tc_ifq);
 			LIST_FOREACH(top, &tcb_list, tc_next)
 				if (ALTQ_IS_CNDTNING(top->tc_ifq))
 					break;
 			if (top == NULL)
 				altq_input = NULL;
 			break;
 		}
 		break;
 
 	case CDNR_ADD_ELEM:
 		error = cdnrcmd_add_element((struct cdnr_add_element *)addr);
 		break;
 
 	case CDNR_DEL_ELEM:
 		error = cdnrcmd_delete_element((struct cdnr_delete_element *)addr);
 		break;
 
 	case CDNR_ADD_TBM:
 		error = cdnrcmd_add_tbm((struct cdnr_add_tbmeter *)addr);
 		break;
 
 	case CDNR_MOD_TBM:
 		error = cdnrcmd_modify_tbm((struct cdnr_modify_tbmeter *)addr);
 		break;
 
 	case CDNR_TBM_STATS:
 		error = cdnrcmd_tbm_stats((struct cdnr_tbmeter_stats *)addr);
 		break;
 
 	case CDNR_ADD_TCM:
 		error = cdnrcmd_add_trtcm((struct cdnr_add_trtcm *)addr);
 		break;
 
 	case CDNR_MOD_TCM:
 		error = cdnrcmd_modify_trtcm((struct cdnr_modify_trtcm *)addr);
 		break;
 
 	case CDNR_TCM_STATS:
 		error = cdnrcmd_tcm_stats((struct cdnr_tcm_stats *)addr);
 		break;
 
 	case CDNR_ADD_FILTER:
 		error = cdnrcmd_add_filter((struct cdnr_add_filter *)addr);
 		break;
 
 	case CDNR_DEL_FILTER:
 		error = cdnrcmd_delete_filter((struct cdnr_delete_filter *)addr);
 		break;
 
 	case CDNR_GETSTATS:
 		error = cdnrcmd_get_stats((struct cdnr_get_stats *)addr);
 		break;
 
 	case CDNR_ADD_TSW:
 		error = cdnrcmd_add_tswtcm((struct cdnr_add_tswtcm *)addr);
 		break;
 
 	case CDNR_MOD_TSW:
 		error = cdnrcmd_modify_tswtcm((struct cdnr_modify_tswtcm *)addr);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	splx(s);
 
 	return error;
 }
 
 #ifdef KLD_MODULE
 
 static struct altqsw cdnr_sw =
 	{"cdnr", cdnropen, cdnrclose, cdnrioctl};
 
 ALTQ_MODULE(altq_cdnr, ALTQT_CDNR, &cdnr_sw);
 
 #endif /* KLD_MODULE */
 
 #endif /* ALTQ3_COMPAT */
 #endif /* ALTQ_CDNR */
Index: user/ngie/more-tests/sys/net/altq/altq_classq.h
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_classq.h	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_classq.h	(revision 281676)
@@ -1,206 +1,207 @@
-/*	$KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $	*/
-
-/*
+/*-
  * Copyright (c) 1991-1997 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the Network Research
  *	Group at Lawrence Berkeley Laboratory.
  * 4. Neither the name of the University nor of the Laboratory may be used
  *    to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $KAME: altq_classq.h,v 1.6 2003/01/07 07:33:38 kjc Exp $
+ * $FreeBSD$
  */
 /*
  * class queue definitions extracted from rm_class.h.
  */
 #ifndef _ALTQ_ALTQ_CLASSQ_H_
 #define	_ALTQ_ALTQ_CLASSQ_H_
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /*
  * Packet Queue types: RED or DROPHEAD.
  */
 #define	Q_DROPHEAD	0x00
 #define	Q_RED		0x01
 #define	Q_RIO		0x02
 #define	Q_DROPTAIL	0x03
 
 #ifdef _KERNEL
 
 /*
  * Packet Queue structures and macros to manipulate them.
  */
 struct _class_queue_ {
 	struct mbuf	*tail_;	/* Tail of packet queue */
 	int	qlen_;		/* Queue length (in number of packets) */
 	int	qlim_;		/* Queue limit (in number of packets*) */
 	int	qtype_;		/* Queue type */
 };
 
 typedef struct _class_queue_	class_queue_t;
 
 #define	qtype(q)	(q)->qtype_		/* Get queue type */
 #define	qlimit(q)	(q)->qlim_		/* Max packets to be queued */
 #define	qlen(q)		(q)->qlen_		/* Current queue length. */
 #define	qtail(q)	(q)->tail_		/* Tail of the queue */
 #define	qhead(q)	((q)->tail_ ? (q)->tail_->m_nextpkt : NULL)
 
 #define	qempty(q)	((q)->qlen_ == 0)	/* Is the queue empty?? */
 #define	q_is_red(q)	((q)->qtype_ == Q_RED)	/* Is the queue a red queue */
 #define	q_is_rio(q)	((q)->qtype_ == Q_RIO)	/* Is the queue a rio queue */
 #define	q_is_red_or_rio(q)	((q)->qtype_ == Q_RED || (q)->qtype_ == Q_RIO)
 
 #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
 
 extern void		_addq(class_queue_t *, struct mbuf *);
 extern struct mbuf	*_getq(class_queue_t *);
 extern struct mbuf	*_getq_tail(class_queue_t *);
 extern struct mbuf	*_getq_random(class_queue_t *);
 extern void		_removeq(class_queue_t *, struct mbuf *);
 extern void		_flushq(class_queue_t *);
 
 #else /* __GNUC__ && !ALTQ_DEBUG */
 /*
  * inlined versions
  */
 static __inline void
 _addq(class_queue_t *q, struct mbuf *m)
 {
         struct mbuf *m0;
 
 	if ((m0 = qtail(q)) != NULL)
 		m->m_nextpkt = m0->m_nextpkt;
 	else
 		m0 = m;
 	m0->m_nextpkt = m;
 	qtail(q) = m;
 	qlen(q)++;
 }
 
 static __inline struct mbuf *
 _getq(class_queue_t *q)
 {
 	struct mbuf  *m, *m0;
 
 	if ((m = qtail(q)) == NULL)
 		return (NULL);
 	if ((m0 = m->m_nextpkt) != m)
 		m->m_nextpkt = m0->m_nextpkt;
 	else
 		qtail(q) = NULL;
 	qlen(q)--;
 	m0->m_nextpkt = NULL;
 	return (m0);
 }
 
 /* drop a packet at the tail of the queue */
 static __inline struct mbuf *
 _getq_tail(class_queue_t *q)
 {
 	struct mbuf *m, *m0, *prev;
 
 	if ((m = m0 = qtail(q)) == NULL)
 		return NULL;
 	do {
 		prev = m0;
 		m0 = m0->m_nextpkt;
 	} while (m0 != m);
 	prev->m_nextpkt = m->m_nextpkt;
 	if (prev == m)
 		qtail(q) = NULL;
 	else
 		qtail(q) = prev;
 	qlen(q)--;
 	m->m_nextpkt = NULL;
 	return (m);
 }
 
 /* randomly select a packet in the queue */
 static __inline struct mbuf *
 _getq_random(class_queue_t *q)
 {
 	struct mbuf *m;
 	int i, n;
 
 	if ((m = qtail(q)) == NULL)
 		return NULL;
 	if (m->m_nextpkt == m)
 		qtail(q) = NULL;
 	else {
 		struct mbuf *prev = NULL;
 
 		n = random() % qlen(q) + 1;
 		for (i = 0; i < n; i++) {
 			prev = m;
 			m = m->m_nextpkt;
 		}
 		prev->m_nextpkt = m->m_nextpkt;
 		if (m == qtail(q))
 			qtail(q) = prev;
 	}
 	qlen(q)--;
 	m->m_nextpkt = NULL;
 	return (m);
 }
 
 static __inline void
 _removeq(class_queue_t *q, struct mbuf *m)
 {
 	struct mbuf *m0, *prev;
 
 	m0 = qtail(q);
 	do {
 		prev = m0;
 		m0 = m0->m_nextpkt;
 	} while (m0 != m);
 	prev->m_nextpkt = m->m_nextpkt;
 	if (prev == m)
 		qtail(q) = NULL;
 	else if (qtail(q) == m)
 		qtail(q) = prev;
 	qlen(q)--;
 }
 
 static __inline void
 _flushq(class_queue_t *q)
 {
 	struct mbuf *m;
 
 	while ((m = _getq(q)) != NULL)
 		m_freem(m);
 }
 
 #endif /* __GNUC__ && !ALTQ_DEBUG */
 
 #endif /* _KERNEL */
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif /* _ALTQ_ALTQ_CLASSQ_H_ */
Index: user/ngie/more-tests/sys/net/altq/altq_hfsc.c
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_hfsc.c	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_hfsc.c	(revision 281676)
@@ -1,2222 +1,2202 @@
-/*	$FreeBSD$	*/
-/*	$KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $	*/
-
-/*
+/*-
  * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation is hereby granted (including for commercial or
  * for-profit use), provided that both the copyright notice and this
  * permission notice appear in all copies of the software, derivative
  * works, or modified versions, and any portions thereof.
  *
  * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
  * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
  * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  *
  * Carnegie Mellon encourages (but does not require) users of this
  * software to return any improvements or extensions that they make,
  * and to grant Carnegie Mellon the rights to redistribute these
  * changes without encumbrance.
+ *
+ * $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $
+ * $FreeBSD$
  */
 /*
  * H-FSC is described in Proceedings of SIGCOMM'97,
  * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
  * Real-Time and Priority Service"
  * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
  *
  * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
  * when a class has an upperlimit, the fit-time is computed from the
  * upperlimit service curve.  the link-sharing scheduler does not schedule
  * a class whose fit-time exceeds the current time.
  */
 
-#if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
-#ifdef __FreeBSD__
 #include "opt_inet6.h"
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
 
 #ifdef ALTQ_HFSC  /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/queue.h>
 #if 1 /* ALTQ3_COMPAT */
 #include <sys/sockio.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #endif /* ALTQ3_COMPAT */
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <netinet/in.h>
 
 #include <netpfil/pf/pf.h>
 #include <netpfil/pf/pf_altq.h>
 #include <netpfil/pf/pf_mtag.h>
 #include <net/altq/altq.h>
 #include <net/altq/altq_hfsc.h>
 #ifdef ALTQ3_COMPAT
 #include <net/altq/altq_conf.h>
 #endif
 
 /*
  * function prototypes
  */
 static int			 hfsc_clear_interface(struct hfsc_if *);
 static int			 hfsc_request(struct ifaltq *, int, void *);
 static void			 hfsc_purge(struct hfsc_if *);
 static struct hfsc_class	*hfsc_class_create(struct hfsc_if *,
     struct service_curve *, struct service_curve *, struct service_curve *,
     struct hfsc_class *, int, int, int);
 static int			 hfsc_class_destroy(struct hfsc_class *);
 static struct hfsc_class	*hfsc_nextclass(struct hfsc_class *);
 static int			 hfsc_enqueue(struct ifaltq *, struct mbuf *,
 				    struct altq_pktattr *);
 static struct mbuf		*hfsc_dequeue(struct ifaltq *, int);
 
 static int		 hfsc_addq(struct hfsc_class *, struct mbuf *);
 static struct mbuf	*hfsc_getq(struct hfsc_class *);
 static struct mbuf	*hfsc_pollq(struct hfsc_class *);
 static void		 hfsc_purgeq(struct hfsc_class *);
 
 static void		 update_cfmin(struct hfsc_class *);
 static void		 set_active(struct hfsc_class *, int);
 static void		 set_passive(struct hfsc_class *);
 
 static void		 init_ed(struct hfsc_class *, int);
 static void		 update_ed(struct hfsc_class *, int);
 static void		 update_d(struct hfsc_class *, int);
 static void		 init_vf(struct hfsc_class *, int);
 static void		 update_vf(struct hfsc_class *, int, u_int64_t);
 static void		 ellist_insert(struct hfsc_class *);
 static void		 ellist_remove(struct hfsc_class *);
 static void		 ellist_update(struct hfsc_class *);
 struct hfsc_class	*hfsc_get_mindl(struct hfsc_if *, u_int64_t);
 static void		 actlist_insert(struct hfsc_class *);
 static void		 actlist_remove(struct hfsc_class *);
 static void		 actlist_update(struct hfsc_class *);
 
 static struct hfsc_class	*actlist_firstfit(struct hfsc_class *,
 				    u_int64_t);
 
 static __inline u_int64_t	seg_x2y(u_int64_t, u_int64_t);
 static __inline u_int64_t	seg_y2x(u_int64_t, u_int64_t);
 static __inline u_int64_t	m2sm(u_int);
 static __inline u_int64_t	m2ism(u_int);
 static __inline u_int64_t	d2dx(u_int);
 static u_int			sm2m(u_int64_t);
 static u_int			dx2d(u_int64_t);
 
 static void		sc2isc(struct service_curve *, struct internal_sc *);
 static void		rtsc_init(struct runtime_sc *, struct internal_sc *,
 			    u_int64_t, u_int64_t);
 static u_int64_t	rtsc_y2x(struct runtime_sc *, u_int64_t);
 static u_int64_t	rtsc_x2y(struct runtime_sc *, u_int64_t);
 static void		rtsc_min(struct runtime_sc *, struct internal_sc *,
 			    u_int64_t, u_int64_t);
 
 static void			 get_class_stats(struct hfsc_classstats *,
 				    struct hfsc_class *);
 static struct hfsc_class	*clh_to_clp(struct hfsc_if *, u_int32_t);
 
 
 #ifdef ALTQ3_COMPAT
 static struct hfsc_if *hfsc_attach(struct ifaltq *, u_int);
 static int hfsc_detach(struct hfsc_if *);
 static int hfsc_class_modify(struct hfsc_class *, struct service_curve *,
     struct service_curve *, struct service_curve *);
 
 static int hfsccmd_if_attach(struct hfsc_attach *);
 static int hfsccmd_if_detach(struct hfsc_interface *);
 static int hfsccmd_add_class(struct hfsc_add_class *);
 static int hfsccmd_delete_class(struct hfsc_delete_class *);
 static int hfsccmd_modify_class(struct hfsc_modify_class *);
 static int hfsccmd_add_filter(struct hfsc_add_filter *);
 static int hfsccmd_delete_filter(struct hfsc_delete_filter *);
 static int hfsccmd_class_stats(struct hfsc_class_stats *);
 
 altqdev_decl(hfsc);
 #endif /* ALTQ3_COMPAT */
 
 /*
  * macros
  */
 #define	is_a_parent_class(cl)	((cl)->cl_children != NULL)
 
 #define	HT_INFINITY	0xffffffffffffffffLL	/* infinite time value */
 
 #ifdef ALTQ3_COMPAT
 /* hif_list keeps all hfsc_if's allocated. */
 static struct hfsc_if *hif_list = NULL;
 #endif /* ALTQ3_COMPAT */
 
 int
 hfsc_pfattach(struct pf_altq *a)
 {
 	struct ifnet *ifp;
 	int s, error;
 
 	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
 		return (EINVAL);
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc,
 	    hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL);
 	splx(s);
 	return (error);
 }
 
 int
 hfsc_add_altq(struct pf_altq *a)
 {
 	struct hfsc_if *hif;
 	struct ifnet *ifp;
 
 	if ((ifp = ifunit(a->ifname)) == NULL)
 		return (EINVAL);
 	if (!ALTQ_IS_READY(&ifp->if_snd))
 		return (ENODEV);
 
 	hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (hif == NULL)
 		return (ENOMEM);
 
 	TAILQ_INIT(&hif->hif_eligible);
 	hif->hif_ifq = &ifp->if_snd;
 
 	/* keep the state in pf_altq */
 	a->altq_disc = hif;
 
 	return (0);
 }
 
 int
 hfsc_remove_altq(struct pf_altq *a)
 {
 	struct hfsc_if *hif;
 
 	if ((hif = a->altq_disc) == NULL)
 		return (EINVAL);
 	a->altq_disc = NULL;
 
 	(void)hfsc_clear_interface(hif);
 	(void)hfsc_class_destroy(hif->hif_rootclass);
 
 	free(hif, M_DEVBUF);
 
 	return (0);
 }
 
 int
 hfsc_add_queue(struct pf_altq *a)
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl, *parent;
 	struct hfsc_opts *opts;
 	struct service_curve rtsc, lssc, ulsc;
 
 	if ((hif = a->altq_disc) == NULL)
 		return (EINVAL);
 
 	opts = &a->pq_u.hfsc_opts;
 
 	if (a->parent_qid == HFSC_NULLCLASS_HANDLE &&
 	    hif->hif_rootclass == NULL)
 		parent = NULL;
 	else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL)
 		return (EINVAL);
 
 	if (a->qid == 0)
 		return (EINVAL);
 
 	if (clh_to_clp(hif, a->qid) != NULL)
 		return (EBUSY);
 
 	rtsc.m1 = opts->rtsc_m1;
 	rtsc.d  = opts->rtsc_d;
 	rtsc.m2 = opts->rtsc_m2;
 	lssc.m1 = opts->lssc_m1;
 	lssc.d  = opts->lssc_d;
 	lssc.m2 = opts->lssc_m2;
 	ulsc.m1 = opts->ulsc_m1;
 	ulsc.d  = opts->ulsc_d;
 	ulsc.m2 = opts->ulsc_m2;
 
 	cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
 	    parent, a->qlimit, opts->flags, a->qid);
 	if (cl == NULL)
 		return (ENOMEM);
 
 	return (0);
 }
 
 int
 hfsc_remove_queue(struct pf_altq *a)
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 
 	if ((hif = a->altq_disc) == NULL)
 		return (EINVAL);
 
 	if ((cl = clh_to_clp(hif, a->qid)) == NULL)
 		return (EINVAL);
 
 	return (hfsc_class_destroy(cl));
 }
 
 int
 hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 	struct hfsc_classstats stats;
 	int error = 0;
 
 	if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(hif, a->qid)) == NULL)
 		return (EINVAL);
 
 	if (*nbytes < sizeof(stats))
 		return (EINVAL);
 
 	get_class_stats(&stats, cl);
 
 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
 		return (error);
 	*nbytes = sizeof(stats);
 	return (0);
 }
 
 /*
  * bring the interface back to the initial state by discarding
  * all the filters and classes except the root class.
  */
 static int
 hfsc_clear_interface(struct hfsc_if *hif)
 {
 	struct hfsc_class	*cl;
 
 #ifdef ALTQ3_COMPAT
 	/* free the filters for this interface */
 	acc_discard_filters(&hif->hif_classifier, NULL, 1);
 #endif
 
 	/* clear out the classes */
 	while (hif->hif_rootclass != NULL &&
 	    (cl = hif->hif_rootclass->cl_children) != NULL) {
 		/*
 		 * remove the first leaf class found in the hierarchy
 		 * then start over
 		 */
 		for (; cl != NULL; cl = hfsc_nextclass(cl)) {
 			if (!is_a_parent_class(cl)) {
 				(void)hfsc_class_destroy(cl);
 				break;
 			}
 		}
 	}
 
 	return (0);
 }
 
 static int
 hfsc_request(struct ifaltq *ifq, int req, void *arg)
 {
 	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	switch (req) {
 	case ALTRQ_PURGE:
 		hfsc_purge(hif);
 		break;
 	}
 	return (0);
 }
 
 /* discard all the queued packets on the interface */
 static void
 hfsc_purge(struct hfsc_if *hif)
 {
 	struct hfsc_class *cl;
 
 	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
 		if (!qempty(cl->cl_q))
 			hfsc_purgeq(cl);
 	if (ALTQ_IS_ENABLED(hif->hif_ifq))
 		hif->hif_ifq->ifq_len = 0;
 }
 
 struct hfsc_class *
 hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
     struct service_curve *fsc, struct service_curve *usc,
     struct hfsc_class *parent, int qlimit, int flags, int qid)
 {
 	struct hfsc_class *cl, *p;
 	int i, s;
 
 	if (hif->hif_classes >= HFSC_MAX_CLASSES)
 		return (NULL);
 
 #ifndef ALTQ_RED
 	if (flags & HFCF_RED) {
 #ifdef ALTQ_DEBUG
 		printf("hfsc_class_create: RED not configured for HFSC!\n");
 #endif
 		return (NULL);
 	}
 #endif
 
 	cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cl == NULL)
 		return (NULL);
 
 	cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cl->cl_q == NULL)
 		goto err_ret;
 
 	TAILQ_INIT(&cl->cl_actc);
 
 	if (qlimit == 0)
 		qlimit = 50;  /* use default */
 	qlimit(cl->cl_q) = qlimit;
 	qtype(cl->cl_q) = Q_DROPTAIL;
 	qlen(cl->cl_q) = 0;
 	cl->cl_flags = flags;
 #ifdef ALTQ_RED
 	if (flags & (HFCF_RED|HFCF_RIO)) {
 		int red_flags, red_pkttime;
 		u_int m2;
 
 		m2 = 0;
 		if (rsc != NULL && rsc->m2 > m2)
 			m2 = rsc->m2;
 		if (fsc != NULL && fsc->m2 > m2)
 			m2 = fsc->m2;
 		if (usc != NULL && usc->m2 > m2)
 			m2 = usc->m2;
 
 		red_flags = 0;
 		if (flags & HFCF_ECN)
 			red_flags |= REDF_ECN;
 #ifdef ALTQ_RIO
 		if (flags & HFCF_CLEARDSCP)
 			red_flags |= RIOF_CLEARDSCP;
 #endif
 		if (m2 < 8)
 			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
 		else
 			red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu
 				* 1000 * 1000 * 1000 / (m2 / 8);
 		if (flags & HFCF_RED) {
 			cl->cl_red = red_alloc(0, 0,
 			    qlimit(cl->cl_q) * 10/100,
 			    qlimit(cl->cl_q) * 30/100,
 			    red_flags, red_pkttime);
 			if (cl->cl_red != NULL)
 				qtype(cl->cl_q) = Q_RED;
 		}
 #ifdef ALTQ_RIO
 		else {
 			cl->cl_red = (red_t *)rio_alloc(0, NULL,
 			    red_flags, red_pkttime);
 			if (cl->cl_red != NULL)
 				qtype(cl->cl_q) = Q_RIO;
 		}
 #endif
 	}
 #endif /* ALTQ_RED */
 
 	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
 		cl->cl_rsc = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_NOWAIT);
 		if (cl->cl_rsc == NULL)
 			goto err_ret;
 		sc2isc(rsc, cl->cl_rsc);
 		rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
 		rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
 	}
 	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
 		cl->cl_fsc = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_NOWAIT);
 		if (cl->cl_fsc == NULL)
 			goto err_ret;
 		sc2isc(fsc, cl->cl_fsc);
 		rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
 	}
 	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
 		cl->cl_usc = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_NOWAIT);
 		if (cl->cl_usc == NULL)
 			goto err_ret;
 		sc2isc(usc, cl->cl_usc);
 		rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
 	}
 
 	cl->cl_id = hif->hif_classid++;
 	cl->cl_handle = qid;
 	cl->cl_hif = hif;
 	cl->cl_parent = parent;
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	IFQ_LOCK(hif->hif_ifq);
 	hif->hif_classes++;
 
 	/*
 	 * find a free slot in the class table.  if the slot matching
 	 * the lower bits of qid is free, use this slot.  otherwise,
 	 * use the first free slot.
 	 */
 	i = qid % HFSC_MAX_CLASSES;
 	if (hif->hif_class_tbl[i] == NULL)
 		hif->hif_class_tbl[i] = cl;
 	else {
 		for (i = 0; i < HFSC_MAX_CLASSES; i++)
 			if (hif->hif_class_tbl[i] == NULL) {
 				hif->hif_class_tbl[i] = cl;
 				break;
 			}
 		if (i == HFSC_MAX_CLASSES) {
 			IFQ_UNLOCK(hif->hif_ifq);
 			splx(s);
 			goto err_ret;
 		}
 	}
 
 	if (flags & HFCF_DEFAULTCLASS)
 		hif->hif_defaultclass = cl;
 
 	if (parent == NULL) {
 		/* this is root class */
 		hif->hif_rootclass = cl;
 	} else {
 		/* add this class to the children list of the parent */
 		if ((p = parent->cl_children) == NULL)
 			parent->cl_children = cl;
 		else {
 			while (p->cl_siblings != NULL)
 				p = p->cl_siblings;
 			p->cl_siblings = cl;
 		}
 	}
 	IFQ_UNLOCK(hif->hif_ifq);
 	splx(s);
 
 	return (cl);
 
  err_ret:
 	if (cl->cl_red != NULL) {
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->cl_q))
 			rio_destroy((rio_t *)cl->cl_red);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->cl_q))
 			red_destroy(cl->cl_red);
 #endif
 	}
 	if (cl->cl_fsc != NULL)
 		free(cl->cl_fsc, M_DEVBUF);
 	if (cl->cl_rsc != NULL)
 		free(cl->cl_rsc, M_DEVBUF);
 	if (cl->cl_usc != NULL)
 		free(cl->cl_usc, M_DEVBUF);
 	if (cl->cl_q != NULL)
 		free(cl->cl_q, M_DEVBUF);
 	free(cl, M_DEVBUF);
 	return (NULL);
 }
 
 static int
 hfsc_class_destroy(struct hfsc_class *cl)
 {
 	int i, s;
 
 	if (cl == NULL)
 		return (0);
 
 	if (is_a_parent_class(cl))
 		return (EBUSY);
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	IFQ_LOCK(cl->cl_hif->hif_ifq);
 
 #ifdef ALTQ3_COMPAT
 	/* delete filters referencing to this class */
 	acc_discard_filters(&cl->cl_hif->hif_classifier, cl, 0);
 #endif /* ALTQ3_COMPAT */
 
 	if (!qempty(cl->cl_q))
 		hfsc_purgeq(cl);
 
 	if (cl->cl_parent == NULL) {
 		/* this is root class */
 	} else {
 		struct hfsc_class *p = cl->cl_parent->cl_children;
 
 		if (p == cl)
 			cl->cl_parent->cl_children = cl->cl_siblings;
 		else do {
 			if (p->cl_siblings == cl) {
 				p->cl_siblings = cl->cl_siblings;
 				break;
 			}
 		} while ((p = p->cl_siblings) != NULL);
 		ASSERT(p != NULL);
 	}
 
 	for (i = 0; i < HFSC_MAX_CLASSES; i++)
 		if (cl->cl_hif->hif_class_tbl[i] == cl) {
 			cl->cl_hif->hif_class_tbl[i] = NULL;
 			break;
 		}
 
 	cl->cl_hif->hif_classes--;
 	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
 	splx(s);
 
 	if (cl->cl_red != NULL) {
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->cl_q))
 			rio_destroy((rio_t *)cl->cl_red);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->cl_q))
 			red_destroy(cl->cl_red);
 #endif
 	}
 
 	IFQ_LOCK(cl->cl_hif->hif_ifq);
 	if (cl == cl->cl_hif->hif_rootclass)
 		cl->cl_hif->hif_rootclass = NULL;
 	if (cl == cl->cl_hif->hif_defaultclass)
 		cl->cl_hif->hif_defaultclass = NULL;
 	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
 
 	if (cl->cl_usc != NULL)
 		free(cl->cl_usc, M_DEVBUF);
 	if (cl->cl_fsc != NULL)
 		free(cl->cl_fsc, M_DEVBUF);
 	if (cl->cl_rsc != NULL)
 		free(cl->cl_rsc, M_DEVBUF);
 	free(cl->cl_q, M_DEVBUF);
 	free(cl, M_DEVBUF);
 
 	return (0);
 }
 
 /*
  * hfsc_nextclass returns the next class in the tree.
  *   usage:
  *	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
  *		do_something;
  */
 static struct hfsc_class *
 hfsc_nextclass(struct hfsc_class *cl)
 {
 	if (cl->cl_children != NULL)
 		cl = cl->cl_children;
 	else if (cl->cl_siblings != NULL)
 		cl = cl->cl_siblings;
 	else {
 		while ((cl = cl->cl_parent) != NULL)
 			if (cl->cl_siblings) {
 				cl = cl->cl_siblings;
 				break;
 			}
 	}
 
 	return (cl);
 }
 
 /*
  * hfsc_enqueue is an enqueue function to be registered to
  * (*altq_enqueue) in struct ifaltq.
  */
 static int
 hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
 {
 	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
 	struct hfsc_class *cl;
 	struct pf_mtag *t;
 	int len;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	/* grab class set by classifier */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		/* should not happen */
 		printf("altq: packet for %s does not have pkthdr\n",
 		    ifq->altq_ifp->if_xname);
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	cl = NULL;
 	if ((t = pf_find_mtag(m)) != NULL)
 		cl = clh_to_clp(hif, t->qid);
 #ifdef ALTQ3_COMPAT
 	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
 		cl = pktattr->pattr_class;
 #endif
 	if (cl == NULL || is_a_parent_class(cl)) {
 		cl = hif->hif_defaultclass;
 		if (cl == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 	}
 #ifdef ALTQ3_COMPAT
 	if (pktattr != NULL)
 		cl->cl_pktattr = pktattr;  /* save proto hdr used by ECN */
 	else
 #endif
 		cl->cl_pktattr = NULL;
 	len = m_pktlen(m);
 	if (hfsc_addq(cl, m) != 0) {
 		/* drop occurred.  mbuf was freed in hfsc_addq. */
 		PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len);
 		return (ENOBUFS);
 	}
 	IFQ_INC_LEN(ifq);
 	cl->cl_hif->hif_packets++;
 
 	/* successfully queued. */
 	if (qlen(cl->cl_q) == 1)
 		set_active(cl, m_pktlen(m));
 
 	return (0);
 }
 
 /*
  * hfsc_dequeue is a dequeue function to be registered to
  * (*altq_dequeue) in struct ifaltq.
  *
  * note: ALTDQ_POLL returns the next packet without removing the packet
  *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
  *	ALTDQ_REMOVE must return the same packet if called immediately
  *	after ALTDQ_POLL.
  */
 static struct mbuf *
 hfsc_dequeue(struct ifaltq *ifq, int op)
 {
 	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
 	struct hfsc_class *cl;
 	struct mbuf *m;
 	int len, next_len;
 	int realtime = 0;
 	u_int64_t cur_time;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (hif->hif_packets == 0)
 		/* no packet in the tree */
 		return (NULL);
 
 	cur_time = read_machclk();
 
 	if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) {
 
 		cl = hif->hif_pollcache;
 		hif->hif_pollcache = NULL;
 		/* check if the class was scheduled by real-time criteria */
 		if (cl->cl_rsc != NULL)
 			realtime = (cl->cl_e <= cur_time);
 	} else {
 		/*
 		 * if there are eligible classes, use real-time criteria.
 		 * find the class with the minimum deadline among
 		 * the eligible classes.
 		 */
 		if ((cl = hfsc_get_mindl(hif, cur_time))
 		    != NULL) {
 			realtime = 1;
 		} else {
 #ifdef ALTQ_DEBUG
 			int fits = 0;
 #endif
 			/*
 			 * use link-sharing criteria
 			 * get the class with the minimum vt in the hierarchy
 			 */
 			cl = hif->hif_rootclass;
 			while (is_a_parent_class(cl)) {
 
 				cl = actlist_firstfit(cl, cur_time);
 				if (cl == NULL) {
 #ifdef ALTQ_DEBUG
 					if (fits > 0)
 						printf("%d fit but none found\n",fits);
 #endif
 					return (NULL);
 				}
 				/*
 				 * update parent's cl_cvtmin.
 				 * don't update if the new vt is smaller.
 				 */
 				if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
 					cl->cl_parent->cl_cvtmin = cl->cl_vt;
 #ifdef ALTQ_DEBUG
 				fits++;
 #endif
 			}
 		}
 
 		if (op == ALTDQ_POLL) {
 			hif->hif_pollcache = cl;
 			m = hfsc_pollq(cl);
 			return (m);
 		}
 	}
 
 	m = hfsc_getq(cl);
 	if (m == NULL)
 		panic("hfsc_dequeue:");
 	len = m_pktlen(m);
 	cl->cl_hif->hif_packets--;
 	IFQ_DEC_LEN(ifq);
 	PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len);
 
 	update_vf(cl, len, cur_time);
 	if (realtime)
 		cl->cl_cumul += len;
 
 	if (!qempty(cl->cl_q)) {
 		if (cl->cl_rsc != NULL) {
 			/* update ed */
 			next_len = m_pktlen(qhead(cl->cl_q));
 
 			if (realtime)
 				update_ed(cl, next_len);
 			else
 				update_d(cl, next_len);
 		}
 	} else {
 		/* the class becomes passive */
 		set_passive(cl);
 	}
 
 	return (m);
 }
 
 static int
 hfsc_addq(struct hfsc_class *cl, struct mbuf *m)
 {
 
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		return rio_addq((rio_t *)cl->cl_red, cl->cl_q,
 				m, cl->cl_pktattr);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
 #endif
 	if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
 		m_freem(m);
 		return (-1);
 	}
 
 	if (cl->cl_flags & HFCF_CLEARDSCP)
 		write_dsfield(m, cl->cl_pktattr, 0);
 
 	_addq(cl->cl_q, m);
 
 	return (0);
 }
 
 static struct mbuf *
 hfsc_getq(struct hfsc_class *cl)
 {
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		return red_getq(cl->cl_red, cl->cl_q);
 #endif
 	return _getq(cl->cl_q);
 }
 
 static struct mbuf *
 hfsc_pollq(struct hfsc_class *cl)
 {
 	return qhead(cl->cl_q);
 }
 
 static void
 hfsc_purgeq(struct hfsc_class *cl)
 {
 	struct mbuf *m;
 
 	if (qempty(cl->cl_q))
 		return;
 
 	while ((m = _getq(cl->cl_q)) != NULL) {
 		PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m));
 		m_freem(m);
 		cl->cl_hif->hif_packets--;
 		IFQ_DEC_LEN(cl->cl_hif->hif_ifq);
 	}
 	ASSERT(qlen(cl->cl_q) == 0);
 
 	update_vf(cl, 0, 0);	/* remove cl from the actlist */
 	set_passive(cl);
 }
 
 static void
 set_active(struct hfsc_class *cl, int len)
 {
 	if (cl->cl_rsc != NULL)
 		init_ed(cl, len);
 	if (cl->cl_fsc != NULL)
 		init_vf(cl, len);
 
 	cl->cl_stats.period++;
 }
 
 static void
 set_passive(struct hfsc_class *cl)
 {
 	if (cl->cl_rsc != NULL)
 		ellist_remove(cl);
 
 	/*
 	 * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
 	 * needs to be called explicitly to remove a class from actlist
 	 */
 }
 
 static void
 init_ed(struct hfsc_class *cl, int next_len)
 {
 	u_int64_t cur_time;
 
 	cur_time = read_machclk();
 
 	/* update the deadline curve */
 	rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
 
 	/*
 	 * update the eligible curve.
 	 * for concave, it is equal to the deadline curve.
 	 * for convex, it is a linear curve with slope m2.
 	 */
 	cl->cl_eligible = cl->cl_deadline;
 	if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
 		cl->cl_eligible.dx = 0;
 		cl->cl_eligible.dy = 0;
 	}
 
 	/* compute e and d */
 	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
 	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
 
 	ellist_insert(cl);
 }
 
 static void
 update_ed(struct hfsc_class *cl, int next_len)
 {
 	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
 	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
 
 	ellist_update(cl);
 }
 
 static void
 update_d(struct hfsc_class *cl, int next_len)
 {
 	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
 }
 
 static void
 init_vf(struct hfsc_class *cl, int len)
 {
 	struct hfsc_class *max_cl, *p;
 	u_int64_t vt, f, cur_time;
 	int go_active;
 
 	cur_time = 0;
 	go_active = 1;
 	for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
 
 		if (go_active && cl->cl_nactive++ == 0)
 			go_active = 1;
 		else
 			go_active = 0;
 
 		if (go_active) {
 			max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
 			if (max_cl != NULL) {
 				/*
 				 * set vt to the average of the min and max
 				 * classes.  if the parent's period didn't
 				 * change, don't decrease vt of the class.
 				 */
 				vt = max_cl->cl_vt;
 				if (cl->cl_parent->cl_cvtmin != 0)
 					vt = (cl->cl_parent->cl_cvtmin + vt)/2;
 
 				if (cl->cl_parent->cl_vtperiod !=
 				    cl->cl_parentperiod || vt > cl->cl_vt)
 					cl->cl_vt = vt;
 			} else {
 				/*
 				 * first child for a new parent backlog period.
 				 * add parent's cvtmax to vtoff of children
 				 * to make a new vt (vtoff + vt) larger than
 				 * the vt in the last period for all children.
 				 */
 				vt = cl->cl_parent->cl_cvtmax;
 				for (p = cl->cl_parent->cl_children; p != NULL;
 				     p = p->cl_siblings)
 					p->cl_vtoff += vt;
 				cl->cl_vt = 0;
 				cl->cl_parent->cl_cvtmax = 0;
 				cl->cl_parent->cl_cvtmin = 0;
 			}
 			cl->cl_initvt = cl->cl_vt;
 
 			/* update the virtual curve */
 			vt = cl->cl_vt + cl->cl_vtoff;
 			rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total);
 			if (cl->cl_virtual.x == vt) {
 				cl->cl_virtual.x -= cl->cl_vtoff;
 				cl->cl_vtoff = 0;
 			}
 			cl->cl_vtadj = 0;
 
 			cl->cl_vtperiod++;  /* increment vt period */
 			cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
 			if (cl->cl_parent->cl_nactive == 0)
 				cl->cl_parentperiod++;
 			cl->cl_f = 0;
 
 			actlist_insert(cl);
 
 			if (cl->cl_usc != NULL) {
 				/* class has upper limit curve */
 				if (cur_time == 0)
 					cur_time = read_machclk();
 
 				/* update the ulimit curve */
 				rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
 				    cl->cl_total);
 				/* compute myf */
 				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
 				    cl->cl_total);
 				cl->cl_myfadj = 0;
 			}
 		}
 
 		if (cl->cl_myf > cl->cl_cfmin)
 			f = cl->cl_myf;
 		else
 			f = cl->cl_cfmin;
 		if (f != cl->cl_f) {
 			cl->cl_f = f;
 			update_cfmin(cl->cl_parent);
 		}
 	}
 }
 
 static void
 update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
 {
 	u_int64_t f, myf_bound, delta;
 	int go_passive;
 
 	go_passive = qempty(cl->cl_q);
 
 	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
 
 		cl->cl_total += len;
 
 		if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
 			continue;
 
 		if (go_passive && --cl->cl_nactive == 0)
 			go_passive = 1;
 		else
 			go_passive = 0;
 
 		if (go_passive) {
 			/* no more active child, going passive */
 
 			/* update cvtmax of the parent class */
 			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
 				cl->cl_parent->cl_cvtmax = cl->cl_vt;
 
 			/* remove this class from the vt list */
 			actlist_remove(cl);
 
 			update_cfmin(cl->cl_parent);
 
 			continue;
 		}
 
 		/*
 		 * update vt and f
 		 */
 		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
 		    - cl->cl_vtoff + cl->cl_vtadj;
 
 		/*
 		 * if vt of the class is smaller than cvtmin,
 		 * the class was skipped in the past due to non-fit.
 		 * if so, we need to adjust vtadj.
 		 */
 		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
 			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
 			cl->cl_vt = cl->cl_parent->cl_cvtmin;
 		}
 
 		/* update the vt list */
 		actlist_update(cl);
 
 		if (cl->cl_usc != NULL) {
 			cl->cl_myf = cl->cl_myfadj
 			    + rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
 
 			/*
 			 * if myf lags behind by more than one clock tick
 			 * from the current time, adjust myfadj to prevent
 			 * a rate-limited class from going greedy.
 			 * in a steady state under rate-limiting, myf
 			 * fluctuates within one clock tick.
 			 */
 			myf_bound = cur_time - machclk_per_tick;
 			if (cl->cl_myf < myf_bound) {
 				delta = cur_time - cl->cl_myf;
 				cl->cl_myfadj += delta;
 				cl->cl_myf += delta;
 			}
 		}
 
 		/* cl_f is max(cl_myf, cl_cfmin) */
 		if (cl->cl_myf > cl->cl_cfmin)
 			f = cl->cl_myf;
 		else
 			f = cl->cl_cfmin;
 		if (f != cl->cl_f) {
 			cl->cl_f = f;
 			update_cfmin(cl->cl_parent);
 		}
 	}
 }
 
 static void
 update_cfmin(struct hfsc_class *cl)
 {
 	struct hfsc_class *p;
 	u_int64_t cfmin;
 
 	if (TAILQ_EMPTY(&cl->cl_actc)) {
 		cl->cl_cfmin = 0;
 		return;
 	}
 	cfmin = HT_INFINITY;
 	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
 		if (p->cl_f == 0) {
 			cl->cl_cfmin = 0;
 			return;
 		}
 		if (p->cl_f < cfmin)
 			cfmin = p->cl_f;
 	}
 	cl->cl_cfmin = cfmin;
 }
 
 /*
  * TAILQ based ellist and actlist implementation
  * (ion wanted to make a calendar queue based implementation)
  */
 /*
  * eligible list holds backlogged classes being sorted by their eligible times.
  * there is one eligible list per interface.
  */
 
 static void
 ellist_insert(struct hfsc_class *cl)
 {
 	struct hfsc_if	*hif = cl->cl_hif;
 	struct hfsc_class *p;
 
 	/* check the last entry first */
 	if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL ||
 	    p->cl_e <= cl->cl_e) {
 		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
 		return;
 	}
 
 	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
 		if (cl->cl_e < p->cl_e) {
 			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
 			return;
 		}
 	}
 	ASSERT(0); /* should not reach here */
 }
 
 static void
 ellist_remove(struct hfsc_class *cl)
 {
 	struct hfsc_if	*hif = cl->cl_hif;
 
 	TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
 }
 
 static void
 ellist_update(struct hfsc_class *cl)
 {
 	struct hfsc_if	*hif = cl->cl_hif;
 	struct hfsc_class *p, *last;
 
 	/*
 	 * the eligible time of a class increases monotonically.
 	 * if the next entry has a larger eligible time, nothing to do.
 	 */
 	p = TAILQ_NEXT(cl, cl_ellist);
 	if (p == NULL || cl->cl_e <= p->cl_e)
 		return;
 
 	/* check the last entry */
 	last = TAILQ_LAST(&hif->hif_eligible, elighead);
 	ASSERT(last != NULL);
 	if (last->cl_e <= cl->cl_e) {
 		TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
 		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
 		return;
 	}
 
 	/*
 	 * the new position must be between the next entry
 	 * and the last entry
 	 */
 	while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
 		if (cl->cl_e < p->cl_e) {
 			TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
 			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
 			return;
 		}
 	}
 	ASSERT(0); /* should not reach here */
 }
 
 /* find the class with the minimum deadline among the eligible classes */
 struct hfsc_class *
 hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time)
 {
 	struct hfsc_class *p, *cl = NULL;
 
 	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
 		if (p->cl_e > cur_time)
 			break;
 		if (cl == NULL || p->cl_d < cl->cl_d)
 			cl = p;
 	}
 	return (cl);
 }
 
 /*
  * active children list holds backlogged child classes being sorted
  * by their virtual time.
  * each intermediate class has one active children list.
  */
 
 static void
 actlist_insert(struct hfsc_class *cl)
 {
 	struct hfsc_class *p;
 
 	/* check the last entry first */
 	if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL
 	    || p->cl_vt <= cl->cl_vt) {
 		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
 		return;
 	}
 
 	TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
 		if (cl->cl_vt < p->cl_vt) {
 			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
 			return;
 		}
 	}
 	ASSERT(0); /* should not reach here */
 }
 
 static void
 actlist_remove(struct hfsc_class *cl)
 {
 	TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
 }
 
 static void
 actlist_update(struct hfsc_class *cl)
 {
 	struct hfsc_class *p, *last;
 
 	/*
 	 * the virtual time of a class increases monotonically during its
 	 * backlogged period.
 	 * if the next entry has a larger virtual time, nothing to do.
 	 */
 	p = TAILQ_NEXT(cl, cl_actlist);
 	if (p == NULL || cl->cl_vt < p->cl_vt)
 		return;
 
 	/* check the last entry */
 	last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
 	ASSERT(last != NULL);
 	if (last->cl_vt <= cl->cl_vt) {
 		TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
 		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
 		return;
 	}
 
 	/*
 	 * the new position must be between the next entry
 	 * and the last entry
 	 */
 	while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
 		if (cl->cl_vt < p->cl_vt) {
 			TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
 			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
 			return;
 		}
 	}
 	ASSERT(0); /* should not reach here */
 }
 
 static struct hfsc_class *
 actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
 {
 	struct hfsc_class *p;
 
 	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
 		if (p->cl_f <= cur_time)
 			return (p);
 	}
 	return (NULL);
 }
 
 /*
  * service curve support functions
  *
  *  external service curve parameters
  *	m: bits/sec
  *	d: msec
  *  internal service curve parameters
  *	sm: (bytes/tsc_interval) << SM_SHIFT
  *	ism: (tsc_count/byte) << ISM_SHIFT
  *	dx: tsc_count
  *
  * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
  * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
  * speed.  SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
  * digits in decimal using the following table.
  *
  *  bits/sec    100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
  *  ----------+-------------------------------------------------------
  *  bytes/nsec  12.5e-6    125e-6     1250e-6    12500e-6   125000e-6
  *  sm(500MHz)  25.0e-6    250e-6     2500e-6    25000e-6   250000e-6
  *  sm(200MHz)  62.5e-6    625e-6     6250e-6    62500e-6   625000e-6
  *
  *  nsec/byte   80000      8000       800        80         8
  *  ism(500MHz) 40000      4000       400        40         4
  *  ism(200MHz) 16000      1600       160        16         1.6
  */
 #define	SM_SHIFT	24
 #define	ISM_SHIFT	10
 
 #define	SM_MASK		((1LL << SM_SHIFT) - 1)
 #define	ISM_MASK	((1LL << ISM_SHIFT) - 1)
 
 static __inline u_int64_t
 seg_x2y(u_int64_t x, u_int64_t sm)
 {
 	u_int64_t y;
 
 	/*
 	 * compute
 	 *	y = x * sm >> SM_SHIFT
 	 * but divide it for the upper and lower bits to avoid overflow
 	 */
 	y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
 	return (y);
 }
 
 static __inline u_int64_t
 seg_y2x(u_int64_t y, u_int64_t ism)
 {
 	u_int64_t x;
 
 	if (y == 0)
 		x = 0;
 	else if (ism == HT_INFINITY)
 		x = HT_INFINITY;
 	else {
 		x = (y >> ISM_SHIFT) * ism
 		    + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
 	}
 	return (x);
 }
 
 static __inline u_int64_t
 m2sm(u_int m)
 {
 	u_int64_t sm;
 
 	sm = ((u_int64_t)m << SM_SHIFT) / 8 / machclk_freq;
 	return (sm);
 }
 
 static __inline u_int64_t
 m2ism(u_int m)
 {
 	u_int64_t ism;
 
 	if (m == 0)
 		ism = HT_INFINITY;
 	else
 		ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m;
 	return (ism);
 }
 
 static __inline u_int64_t
 d2dx(u_int d)
 {
 	u_int64_t dx;
 
 	dx = ((u_int64_t)d * machclk_freq) / 1000;
 	return (dx);
 }
 
 static u_int
 sm2m(u_int64_t sm)
 {
 	u_int64_t m;
 
 	m = (sm * 8 * machclk_freq) >> SM_SHIFT;
 	return ((u_int)m);
 }
 
 static u_int
 dx2d(u_int64_t dx)
 {
 	u_int64_t d;
 
 	d = dx * 1000 / machclk_freq;
 	return ((u_int)d);
 }
 
 static void
 sc2isc(struct service_curve *sc, struct internal_sc *isc)
 {
 	isc->sm1 = m2sm(sc->m1);
 	isc->ism1 = m2ism(sc->m1);
 	isc->dx = d2dx(sc->d);
 	isc->dy = seg_x2y(isc->dx, isc->sm1);
 	isc->sm2 = m2sm(sc->m2);
 	isc->ism2 = m2ism(sc->m2);
 }
 
 /*
  * initialize the runtime service curve with the given internal
  * service curve starting at (x, y).
  */
 static void
 rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x,
     u_int64_t y)
 {
 	rtsc->x =	x;
 	rtsc->y =	y;
 	rtsc->sm1 =	isc->sm1;
 	rtsc->ism1 =	isc->ism1;
 	rtsc->dx =	isc->dx;
 	rtsc->dy =	isc->dy;
 	rtsc->sm2 =	isc->sm2;
 	rtsc->ism2 =	isc->ism2;
 }
 
 /*
  * calculate the y-projection of the runtime service curve by the
  * given x-projection value
  */
 static u_int64_t
 rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y)
 {
 	u_int64_t	x;
 
 	if (y < rtsc->y)
 		x = rtsc->x;
 	else if (y <= rtsc->y + rtsc->dy) {
 		/* x belongs to the 1st segment */
 		if (rtsc->dy == 0)
 			x = rtsc->x + rtsc->dx;
 		else
 			x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
 	} else {
 		/* x belongs to the 2nd segment */
 		x = rtsc->x + rtsc->dx
 		    + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
 	}
 	return (x);
 }
 
 static u_int64_t
 rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x)
 {
 	u_int64_t	y;
 
 	if (x <= rtsc->x)
 		y = rtsc->y;
 	else if (x <= rtsc->x + rtsc->dx)
 		/* y belongs to the 1st segment */
 		y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
 	else
 		/* y belongs to the 2nd segment */
 		y = rtsc->y + rtsc->dy
 		    + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
 	return (y);
 }
 
 /*
  * update the runtime service curve by taking the minimum of the current
  * runtime service curve and the service curve starting at (x, y).
  */
 static void
 rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
     u_int64_t y)
 {
 	u_int64_t	y1, y2, dx, dy;
 
 	if (isc->sm1 <= isc->sm2) {
 		/* service curve is convex */
 		y1 = rtsc_x2y(rtsc, x);
 		if (y1 < y)
 			/* the current rtsc is smaller */
 			return;
 		rtsc->x = x;
 		rtsc->y = y;
 		return;
 	}
 
 	/*
 	 * service curve is concave
 	 * compute the two y values of the current rtsc
 	 *	y1: at x
 	 *	y2: at (x + dx)
 	 */
 	y1 = rtsc_x2y(rtsc, x);
 	if (y1 <= y) {
 		/* rtsc is below isc, no change to rtsc */
 		return;
 	}
 
 	y2 = rtsc_x2y(rtsc, x + isc->dx);
 	if (y2 >= y + isc->dy) {
 		/* rtsc is above isc, replace rtsc by isc */
 		rtsc->x = x;
 		rtsc->y = y;
 		rtsc->dx = isc->dx;
 		rtsc->dy = isc->dy;
 		return;
 	}
 
 	/*
 	 * the two curves intersect
 	 * compute the offsets (dx, dy) using the reverse
 	 * function of seg_x2y()
 	 *	seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
 	 */
 	dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
 	/*
 	 * check if (x, y1) belongs to the 1st segment of rtsc.
 	 * if so, add the offset.
 	 */
 	if (rtsc->x + rtsc->dx > x)
 		dx += rtsc->x + rtsc->dx - x;
 	dy = seg_x2y(dx, isc->sm1);
 
 	rtsc->x = x;
 	rtsc->y = y;
 	rtsc->dx = dx;
 	rtsc->dy = dy;
 	return;
 }
 
 static void
 get_class_stats(struct hfsc_classstats *sp, struct hfsc_class *cl)
 {
 	sp->class_id = cl->cl_id;
 	sp->class_handle = cl->cl_handle;
 
 	if (cl->cl_rsc != NULL) {
 		sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
 		sp->rsc.d = dx2d(cl->cl_rsc->dx);
 		sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
 	} else {
 		sp->rsc.m1 = 0;
 		sp->rsc.d = 0;
 		sp->rsc.m2 = 0;
 	}
 	if (cl->cl_fsc != NULL) {
 		sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
 		sp->fsc.d = dx2d(cl->cl_fsc->dx);
 		sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
 	} else {
 		sp->fsc.m1 = 0;
 		sp->fsc.d = 0;
 		sp->fsc.m2 = 0;
 	}
 	if (cl->cl_usc != NULL) {
 		sp->usc.m1 = sm2m(cl->cl_usc->sm1);
 		sp->usc.d = dx2d(cl->cl_usc->dx);
 		sp->usc.m2 = sm2m(cl->cl_usc->sm2);
 	} else {
 		sp->usc.m1 = 0;
 		sp->usc.d = 0;
 		sp->usc.m2 = 0;
 	}
 
 	sp->total = cl->cl_total;
 	sp->cumul = cl->cl_cumul;
 
 	sp->d = cl->cl_d;
 	sp->e = cl->cl_e;
 	sp->vt = cl->cl_vt;
 	sp->f = cl->cl_f;
 
 	sp->initvt = cl->cl_initvt;
 	sp->vtperiod = cl->cl_vtperiod;
 	sp->parentperiod = cl->cl_parentperiod;
 	sp->nactive = cl->cl_nactive;
 	sp->vtoff = cl->cl_vtoff;
 	sp->cvtmax = cl->cl_cvtmax;
 	sp->myf = cl->cl_myf;
 	sp->cfmin = cl->cl_cfmin;
 	sp->cvtmin = cl->cl_cvtmin;
 	sp->myfadj = cl->cl_myfadj;
 	sp->vtadj = cl->cl_vtadj;
 
 	sp->cur_time = read_machclk();
 	sp->machclk_freq = machclk_freq;
 
 	sp->qlength = qlen(cl->cl_q);
 	sp->qlimit = qlimit(cl->cl_q);
 	sp->xmit_cnt = cl->cl_stats.xmit_cnt;
 	sp->drop_cnt = cl->cl_stats.drop_cnt;
 	sp->period = cl->cl_stats.period;
 
 	sp->qtype = qtype(cl->cl_q);
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		red_getstats(cl->cl_red, &sp->red[0]);
 #endif
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
 #endif
 }
 
 /* convert a class handle to the corresponding class pointer */
 static struct hfsc_class *
 clh_to_clp(struct hfsc_if *hif, u_int32_t chandle)
 {
 	int i;
 	struct hfsc_class *cl;
 
 	if (chandle == 0)
 		return (NULL);
 	/*
 	 * first, try optimistically the slot matching the lower bits of
 	 * the handle.  if it fails, do the linear table search.
 	 */
 	i = chandle % HFSC_MAX_CLASSES;
 	if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
 		return (cl);
 	for (i = 0; i < HFSC_MAX_CLASSES; i++)
 		if ((cl = hif->hif_class_tbl[i]) != NULL &&
 		    cl->cl_handle == chandle)
 			return (cl);
 	return (NULL);
 }
 
 #ifdef ALTQ3_COMPAT
 static struct hfsc_if *
 hfsc_attach(ifq, bandwidth)
 	struct ifaltq *ifq;
 	u_int bandwidth;
 {
 	struct hfsc_if *hif;
 
 	hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK);
 	if (hif == NULL)
 		return (NULL);
 	bzero(hif, sizeof(struct hfsc_if));
 
 	hif->hif_eligible = ellist_alloc();
 	if (hif->hif_eligible == NULL) {
 		free(hif, M_DEVBUF);
 		return NULL;
 	}
 
 	hif->hif_ifq = ifq;
 
 	/* add this state to the hfsc list */
 	hif->hif_next = hif_list;
 	hif_list = hif;
 
 	return (hif);
 }
 
 static int
 hfsc_detach(hif)
 	struct hfsc_if *hif;
 {
 	(void)hfsc_clear_interface(hif);
 	(void)hfsc_class_destroy(hif->hif_rootclass);
 
 	/* remove this interface from the hif list */
 	if (hif_list == hif)
 		hif_list = hif->hif_next;
 	else {
 		struct hfsc_if *h;
 
 		for (h = hif_list; h != NULL; h = h->hif_next)
 			if (h->hif_next == hif) {
 				h->hif_next = hif->hif_next;
 				break;
 			}
 		ASSERT(h != NULL);
 	}
 
 	ellist_destroy(hif->hif_eligible);
 
 	free(hif, M_DEVBUF);
 
 	return (0);
 }
 
 static int
 hfsc_class_modify(cl, rsc, fsc, usc)
 	struct hfsc_class *cl;
 	struct service_curve *rsc, *fsc, *usc;
 {
 	struct internal_sc *rsc_tmp, *fsc_tmp, *usc_tmp;
 	u_int64_t cur_time;
 	int s;
 
 	rsc_tmp = fsc_tmp = usc_tmp = NULL;
 	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0) &&
 	    cl->cl_rsc == NULL) {
 		rsc_tmp = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_WAITOK);
 		if (rsc_tmp == NULL)
 			return (ENOMEM);
 	}
 	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0) &&
 	    cl->cl_fsc == NULL) {
 		fsc_tmp = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_WAITOK);
 		if (fsc_tmp == NULL) {
 			free(rsc_tmp);
 			return (ENOMEM);
 		}
 	}
 	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0) &&
 	    cl->cl_usc == NULL) {
 		usc_tmp = malloc(sizeof(struct internal_sc),
 		    M_DEVBUF, M_WAITOK);
 		if (usc_tmp == NULL) {
 			free(rsc_tmp);
 			free(fsc_tmp);
 			return (ENOMEM);
 		}
 	}
 
 	cur_time = read_machclk();
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	IFQ_LOCK(cl->cl_hif->hif_ifq);
 
 	if (rsc != NULL) {
 		if (rsc->m1 == 0 && rsc->m2 == 0) {
 			if (cl->cl_rsc != NULL) {
 				if (!qempty(cl->cl_q))
 					hfsc_purgeq(cl);
 				free(cl->cl_rsc, M_DEVBUF);
 				cl->cl_rsc = NULL;
 			}
 		} else {
 			if (cl->cl_rsc == NULL)
 				cl->cl_rsc = rsc_tmp;
 			sc2isc(rsc, cl->cl_rsc);
 			rtsc_init(&cl->cl_deadline, cl->cl_rsc, cur_time,
 			    cl->cl_cumul);
 			cl->cl_eligible = cl->cl_deadline;
 			if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
 				cl->cl_eligible.dx = 0;
 				cl->cl_eligible.dy = 0;
 			}
 		}
 	}
 
 	if (fsc != NULL) {
 		if (fsc->m1 == 0 && fsc->m2 == 0) {
 			if (cl->cl_fsc != NULL) {
 				if (!qempty(cl->cl_q))
 					hfsc_purgeq(cl);
 				free(cl->cl_fsc, M_DEVBUF);
 				cl->cl_fsc = NULL;
 			}
 		} else {
 			if (cl->cl_fsc == NULL)
 				cl->cl_fsc = fsc_tmp;
 			sc2isc(fsc, cl->cl_fsc);
 			rtsc_init(&cl->cl_virtual, cl->cl_fsc, cl->cl_vt,
 			    cl->cl_total);
 		}
 	}
 
 	if (usc != NULL) {
 		if (usc->m1 == 0 && usc->m2 == 0) {
 			if (cl->cl_usc != NULL) {
 				free(cl->cl_usc, M_DEVBUF);
 				cl->cl_usc = NULL;
 				cl->cl_myf = 0;
 			}
 		} else {
 			if (cl->cl_usc == NULL)
 				cl->cl_usc = usc_tmp;
 			sc2isc(usc, cl->cl_usc);
 			rtsc_init(&cl->cl_ulimit, cl->cl_usc, cur_time,
 			    cl->cl_total);
 		}
 	}
 
 	if (!qempty(cl->cl_q)) {
 		if (cl->cl_rsc != NULL)
 			update_ed(cl, m_pktlen(qhead(cl->cl_q)));
 		if (cl->cl_fsc != NULL)
 			update_vf(cl, 0, cur_time);
 		/* is this enough? */
 	}
 
 	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
 	splx(s);
 
 	return (0);
 }
 
 /*
  * hfsc device interface
  */
 int
 hfscopen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	if (machclk_freq == 0)
 		init_machclk();
 
 	if (machclk_freq == 0) {
 		printf("hfsc: no cpu clock available!\n");
 		return (ENXIO);
 	}
 
 	/* everything will be done when the queueing scheme is attached. */
 	return 0;
 }
 
 int
 hfscclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct hfsc_if *hif;
 	int err, error = 0;
 
 	while ((hif = hif_list) != NULL) {
 		/* destroy all */
 		if (ALTQ_IS_ENABLED(hif->hif_ifq))
 			altq_disable(hif->hif_ifq);
 
 		err = altq_detach(hif->hif_ifq);
 		if (err == 0)
 			err = hfsc_detach(hif);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 
 	return error;
 }
 
 int
 hfscioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct hfsc_if *hif;
 	struct hfsc_interface *ifacep;
 	int	error = 0;
 
 	/* check super-user privilege */
 	switch (cmd) {
 	case HFSC_GETSTATS:
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
 			return (error);
 #elsif (__FreeBSD_version > 400000)
 		if ((error = suser(p)) != 0)
 			return (error);
 #else
 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 			return (error);
 #endif
 		break;
 	}
 
 	switch (cmd) {
 
 	case HFSC_IF_ATTACH:
 		error = hfsccmd_if_attach((struct hfsc_attach *)addr);
 		break;
 
 	case HFSC_IF_DETACH:
 		error = hfsccmd_if_detach((struct hfsc_interface *)addr);
 		break;
 
 	case HFSC_ENABLE:
 	case HFSC_DISABLE:
 	case HFSC_CLEAR_HIERARCHY:
 		ifacep = (struct hfsc_interface *)addr;
 		if ((hif = altq_lookup(ifacep->hfsc_ifname,
 				       ALTQT_HFSC)) == NULL) {
 			error = EBADF;
 			break;
 		}
 
 		switch (cmd) {
 
 		case HFSC_ENABLE:
 			if (hif->hif_defaultclass == NULL) {
 #ifdef ALTQ_DEBUG
 				printf("hfsc: no default class\n");
 #endif
 				error = EINVAL;
 				break;
 			}
 			error = altq_enable(hif->hif_ifq);
 			break;
 
 		case HFSC_DISABLE:
 			error = altq_disable(hif->hif_ifq);
 			break;
 
 		case HFSC_CLEAR_HIERARCHY:
 			hfsc_clear_interface(hif);
 			break;
 		}
 		break;
 
 	case HFSC_ADD_CLASS:
 		error = hfsccmd_add_class((struct hfsc_add_class *)addr);
 		break;
 
 	case HFSC_DEL_CLASS:
 		error = hfsccmd_delete_class((struct hfsc_delete_class *)addr);
 		break;
 
 	case HFSC_MOD_CLASS:
 		error = hfsccmd_modify_class((struct hfsc_modify_class *)addr);
 		break;
 
 	case HFSC_ADD_FILTER:
 		error = hfsccmd_add_filter((struct hfsc_add_filter *)addr);
 		break;
 
 	case HFSC_DEL_FILTER:
 		error = hfsccmd_delete_filter((struct hfsc_delete_filter *)addr);
 		break;
 
 	case HFSC_GETSTATS:
 		error = hfsccmd_class_stats((struct hfsc_class_stats *)addr);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return error;
 }
 
 static int
 hfsccmd_if_attach(ap)
 	struct hfsc_attach *ap;
 {
 	struct hfsc_if *hif;
 	struct ifnet *ifp;
 	int error;
 
 	if ((ifp = ifunit(ap->iface.hfsc_ifname)) == NULL)
 		return (ENXIO);
 
 	if ((hif = hfsc_attach(&ifp->if_snd, ap->bandwidth)) == NULL)
 		return (ENOMEM);
 
 	/*
 	 * set HFSC to this ifnet structure.
 	 */
 	if ((error = altq_attach(&ifp->if_snd, ALTQT_HFSC, hif,
 				 hfsc_enqueue, hfsc_dequeue, hfsc_request,
 				 &hif->hif_classifier, acc_classify)) != 0)
 		(void)hfsc_detach(hif);
 
 	return (error);
 }
 
 static int
 hfsccmd_if_detach(ap)
 	struct hfsc_interface *ap;
 {
 	struct hfsc_if *hif;
 	int error;
 
 	if ((hif = altq_lookup(ap->hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if (ALTQ_IS_ENABLED(hif->hif_ifq))
 		altq_disable(hif->hif_ifq);
 
 	if ((error = altq_detach(hif->hif_ifq)))
 		return (error);
 
 	return hfsc_detach(hif);
 }
 
 static int
 hfsccmd_add_class(ap)
 	struct hfsc_add_class *ap;
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl, *parent;
 	int	i;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if (ap->parent_handle == HFSC_NULLCLASS_HANDLE &&
 	    hif->hif_rootclass == NULL)
 		parent = NULL;
 	else if ((parent = clh_to_clp(hif, ap->parent_handle)) == NULL)
 		return (EINVAL);
 
 	/* assign a class handle (use a free slot number for now) */
 	for (i = 1; i < HFSC_MAX_CLASSES; i++)
 		if (hif->hif_class_tbl[i] == NULL)
 			break;
 	if (i == HFSC_MAX_CLASSES)
 		return (EBUSY);
 
 	if ((cl = hfsc_class_create(hif, &ap->service_curve, NULL, NULL,
 	    parent, ap->qlimit, ap->flags, i)) == NULL)
 		return (ENOMEM);
 
 	/* return a class handle to the user */
 	ap->class_handle = i;
 
 	return (0);
 }
 
 static int
 hfsccmd_delete_class(ap)
 	struct hfsc_delete_class *ap;
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	return hfsc_class_destroy(cl);
 }
 
 static int
 hfsccmd_modify_class(ap)
 	struct hfsc_modify_class *ap;
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 	struct service_curve *rsc = NULL;
 	struct service_curve *fsc = NULL;
 	struct service_curve *usc = NULL;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	if (ap->sctype & HFSC_REALTIMESC)
 		rsc = &ap->service_curve;
 	if (ap->sctype & HFSC_LINKSHARINGSC)
 		fsc = &ap->service_curve;
 	if (ap->sctype & HFSC_UPPERLIMITSC)
 		usc = &ap->service_curve;
 
 	return hfsc_class_modify(cl, rsc, fsc, usc);
 }
 
 static int
 hfsccmd_add_filter(ap)
 	struct hfsc_add_filter *ap;
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(hif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	if (is_a_parent_class(cl)) {
 #ifdef ALTQ_DEBUG
 		printf("hfsccmd_add_filter: not a leaf class!\n");
 #endif
 		return (EINVAL);
 	}
 
 	return acc_add_filter(&hif->hif_classifier, &ap->filter,
 			      cl, &ap->filter_handle);
 }
 
 static int
 hfsccmd_delete_filter(ap)
 	struct hfsc_delete_filter *ap;
 {
 	struct hfsc_if *hif;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	return acc_delete_filter(&hif->hif_classifier,
 				 ap->filter_handle);
 }
 
 static int
 hfsccmd_class_stats(ap)
 	struct hfsc_class_stats *ap;
 {
 	struct hfsc_if *hif;
 	struct hfsc_class *cl;
 	struct hfsc_classstats stats, *usp;
 	int	n, nclasses, error;
 
 	if ((hif = altq_lookup(ap->iface.hfsc_ifname, ALTQT_HFSC)) == NULL)
 		return (EBADF);
 
 	ap->cur_time = read_machclk();
 	ap->machclk_freq = machclk_freq;
 	ap->hif_classes = hif->hif_classes;
 	ap->hif_packets = hif->hif_packets;
 
 	/* skip the first N classes in the tree */
 	nclasses = ap->nskip;
 	for (cl = hif->hif_rootclass, n = 0; cl != NULL && n < nclasses;
 	     cl = hfsc_nextclass(cl), n++)
 		;
 	if (n != nclasses)
 		return (EINVAL);
 
 	/* then, read the next N classes in the tree */
 	nclasses = ap->nclasses;
 	usp = ap->stats;
 	for (n = 0; cl != NULL && n < nclasses; cl = hfsc_nextclass(cl), n++) {
 
 		get_class_stats(&stats, cl);
 
 		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
 				     sizeof(stats))) != 0)
 			return (error);
 	}
 
 	ap->nclasses = n;
 
 	return (0);
 }
 
 #ifdef KLD_MODULE
 
 static struct altqsw hfsc_sw =
 	{"hfsc", hfscopen, hfscclose, hfscioctl};
 
 ALTQ_MODULE(altq_hfsc, ALTQT_HFSC, &hfsc_sw);
 MODULE_DEPEND(altq_hfsc, altq_red, 1, 1, 1);
 MODULE_DEPEND(altq_hfsc, altq_rio, 1, 1, 1);
 
 #endif /* KLD_MODULE */
 #endif /* ALTQ3_COMPAT */
 
 #endif /* ALTQ_HFSC */
Index: user/ngie/more-tests/sys/net/altq/altq_priq.c
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_priq.c	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_priq.c	(revision 281676)
@@ -1,1046 +1,1031 @@
-/*	$FreeBSD$	*/
-/*	$KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $	*/
-/*
+/*-
  * Copyright (C) 2000-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $KAME: altq_priq.c,v 1.11 2003/09/17 14:23:25 kjc Exp $
+ * $FreeBSD$
  */
 /*
  * priority queue
  */
 
-#if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
-#ifdef __FreeBSD__
 #include "opt_inet6.h"
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
 
 #ifdef ALTQ_PRIQ  /* priq is enabled by ALTQ_PRIQ option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <netinet/in.h>
 
 #include <netpfil/pf/pf.h>
 #include <netpfil/pf/pf_altq.h>
 #include <netpfil/pf/pf_mtag.h>
 #include <net/altq/altq.h>
 #ifdef ALTQ3_COMPAT
 #include <net/altq/altq_conf.h>
 #endif
 #include <net/altq/altq_priq.h>
 
 /*
  * function prototypes
  */
 #ifdef ALTQ3_COMPAT
 static struct priq_if *priq_attach(struct ifaltq *, u_int);
 static int priq_detach(struct priq_if *);
 #endif
 static int priq_clear_interface(struct priq_if *);
 static int priq_request(struct ifaltq *, int, void *);
 static void priq_purge(struct priq_if *);
 static struct priq_class *priq_class_create(struct priq_if *, int, int, int,
     int);
 static int priq_class_destroy(struct priq_class *);
 static int priq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
 static struct mbuf *priq_dequeue(struct ifaltq *, int);
 
 static int priq_addq(struct priq_class *, struct mbuf *);
 static struct mbuf *priq_getq(struct priq_class *);
 static struct mbuf *priq_pollq(struct priq_class *);
 static void priq_purgeq(struct priq_class *);
 
 #ifdef ALTQ3_COMPAT
 static int priqcmd_if_attach(struct priq_interface *);
 static int priqcmd_if_detach(struct priq_interface *);
 static int priqcmd_add_class(struct priq_add_class *);
 static int priqcmd_delete_class(struct priq_delete_class *);
 static int priqcmd_modify_class(struct priq_modify_class *);
 static int priqcmd_add_filter(struct priq_add_filter *);
 static int priqcmd_delete_filter(struct priq_delete_filter *);
 static int priqcmd_class_stats(struct priq_class_stats *);
 #endif /* ALTQ3_COMPAT */
 
 static void get_class_stats(struct priq_classstats *, struct priq_class *);
 static struct priq_class *clh_to_clp(struct priq_if *, u_int32_t);
 
 #ifdef ALTQ3_COMPAT
 altqdev_decl(priq);
 
 /* pif_list keeps all priq_if's allocated. */
 static struct priq_if *pif_list = NULL;
 #endif /* ALTQ3_COMPAT */
 
 int
 priq_pfattach(struct pf_altq *a)
 {
 	struct ifnet *ifp;
 	int s, error;
 
 	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
 		return (EINVAL);
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, a->altq_disc,
 	    priq_enqueue, priq_dequeue, priq_request, NULL, NULL);
 	splx(s);
 	return (error);
 }
 
 int
 priq_add_altq(struct pf_altq *a)
 {
 	struct priq_if	*pif;
 	struct ifnet	*ifp;
 
 	if ((ifp = ifunit(a->ifname)) == NULL)
 		return (EINVAL);
 	if (!ALTQ_IS_READY(&ifp->if_snd))
 		return (ENODEV);
 
 	pif = malloc(sizeof(struct priq_if), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (pif == NULL)
 		return (ENOMEM);
 	pif->pif_bandwidth = a->ifbandwidth;
 	pif->pif_maxpri = -1;
 	pif->pif_ifq = &ifp->if_snd;
 
 	/* keep the state in pf_altq */
 	a->altq_disc = pif;
 
 	return (0);
 }
 
 int
 priq_remove_altq(struct pf_altq *a)
 {
 	struct priq_if *pif;
 
 	if ((pif = a->altq_disc) == NULL)
 		return (EINVAL);
 	a->altq_disc = NULL;
 
 	(void)priq_clear_interface(pif);
 
 	free(pif, M_DEVBUF);
 	return (0);
 }
 
 int
 priq_add_queue(struct pf_altq *a)
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 
 	if ((pif = a->altq_disc) == NULL)
 		return (EINVAL);
 
 	/* check parameters */
 	if (a->priority >= PRIQ_MAXPRI)
 		return (EINVAL);
 	if (a->qid == 0)
 		return (EINVAL);
 	if (pif->pif_classes[a->priority] != NULL)
 		return (EBUSY);
 	if (clh_to_clp(pif, a->qid) != NULL)
 		return (EBUSY);
 
 	cl = priq_class_create(pif, a->priority, a->qlimit,
 	    a->pq_u.priq_opts.flags, a->qid);
 	if (cl == NULL)
 		return (ENOMEM);
 
 	return (0);
 }
 
 int
 priq_remove_queue(struct pf_altq *a)
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 
 	if ((pif = a->altq_disc) == NULL)
 		return (EINVAL);
 
 	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
 		return (EINVAL);
 
 	return (priq_class_destroy(cl));
 }
 
 int
 priq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 	struct priq_classstats stats;
 	int error = 0;
 
 	if ((pif = altq_lookup(a->ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
 		return (EINVAL);
 
 	if (*nbytes < sizeof(stats))
 		return (EINVAL);
 
 	get_class_stats(&stats, cl);
 
 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
 		return (error);
 	*nbytes = sizeof(stats);
 	return (0);
 }
 
 /*
  * bring the interface back to the initial state by discarding
  * all the filters and classes.
  */
 static int
 priq_clear_interface(struct priq_if *pif)
 {
 	struct priq_class	*cl;
 	int pri;
 
 #ifdef ALTQ3_CLFIER_COMPAT
 	/* free the filters for this interface */
 	acc_discard_filters(&pif->pif_classifier, NULL, 1);
 #endif
 
 	/* clear out the classes */
 	for (pri = 0; pri <= pif->pif_maxpri; pri++)
 		if ((cl = pif->pif_classes[pri]) != NULL)
 			priq_class_destroy(cl);
 
 	return (0);
 }
 
 static int
 priq_request(struct ifaltq *ifq, int req, void *arg)
 {
 	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	switch (req) {
 	case ALTRQ_PURGE:
 		priq_purge(pif);
 		break;
 	}
 	return (0);
 }
 
 /* discard all the queued packets on the interface */
 static void
 priq_purge(struct priq_if *pif)
 {
 	struct priq_class *cl;
 	int pri;
 
 	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
 		if ((cl = pif->pif_classes[pri]) != NULL && !qempty(cl->cl_q))
 			priq_purgeq(cl);
 	}
 	if (ALTQ_IS_ENABLED(pif->pif_ifq))
 		pif->pif_ifq->ifq_len = 0;
 }
 
 static struct priq_class *
 priq_class_create(struct priq_if *pif, int pri, int qlimit, int flags, int qid)
 {
 	struct priq_class *cl;
 	int s;
 
 #ifndef ALTQ_RED
 	if (flags & PRCF_RED) {
 #ifdef ALTQ_DEBUG
 		printf("priq_class_create: RED not configured for PRIQ!\n");
 #endif
 		return (NULL);
 	}
 #endif
 
 	if ((cl = pif->pif_classes[pri]) != NULL) {
 		/* modify the class instead of creating a new one */
-#ifdef __NetBSD__
 		s = splnet();
-#else
-		s = splimp();
-#endif
 		IFQ_LOCK(cl->cl_pif->pif_ifq);
 		if (!qempty(cl->cl_q))
 			priq_purgeq(cl);
 		IFQ_UNLOCK(cl->cl_pif->pif_ifq);
 		splx(s);
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->cl_q))
 			rio_destroy((rio_t *)cl->cl_red);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->cl_q))
 			red_destroy(cl->cl_red);
 #endif
 	} else {
 		cl = malloc(sizeof(struct priq_class), M_DEVBUF,
 		    M_NOWAIT | M_ZERO);
 		if (cl == NULL)
 			return (NULL);
 
 		cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF,
 		    M_NOWAIT | M_ZERO);
 		if (cl->cl_q == NULL)
 			goto err_ret;
 	}
 
 	pif->pif_classes[pri] = cl;
 	if (flags & PRCF_DEFAULTCLASS)
 		pif->pif_default = cl;
 	if (qlimit == 0)
 		qlimit = 50;  /* use default */
 	qlimit(cl->cl_q) = qlimit;
 	qtype(cl->cl_q) = Q_DROPTAIL;
 	qlen(cl->cl_q) = 0;
 	cl->cl_flags = flags;
 	cl->cl_pri = pri;
 	if (pri > pif->pif_maxpri)
 		pif->pif_maxpri = pri;
 	cl->cl_pif = pif;
 	cl->cl_handle = qid;
 
 #ifdef ALTQ_RED
 	if (flags & (PRCF_RED|PRCF_RIO)) {
 		int red_flags, red_pkttime;
 
 		red_flags = 0;
 		if (flags & PRCF_ECN)
 			red_flags |= REDF_ECN;
 #ifdef ALTQ_RIO
 		if (flags & PRCF_CLEARDSCP)
 			red_flags |= RIOF_CLEARDSCP;
 #endif
 		if (pif->pif_bandwidth < 8)
 			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
 		else
 			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
 			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
 #ifdef ALTQ_RIO
 		if (flags & PRCF_RIO) {
 			cl->cl_red = (red_t *)rio_alloc(0, NULL,
 						red_flags, red_pkttime);
 			if (cl->cl_red == NULL)
 				goto err_ret;
 			qtype(cl->cl_q) = Q_RIO;
 		} else
 #endif
 		if (flags & PRCF_RED) {
 			cl->cl_red = red_alloc(0, 0,
 			    qlimit(cl->cl_q) * 10/100,
 			    qlimit(cl->cl_q) * 30/100,
 			    red_flags, red_pkttime);
 			if (cl->cl_red == NULL)
 				goto err_ret;
 			qtype(cl->cl_q) = Q_RED;
 		}
 	}
 #endif /* ALTQ_RED */
 
 	return (cl);
 
  err_ret:
 	if (cl->cl_red != NULL) {
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->cl_q))
 			rio_destroy((rio_t *)cl->cl_red);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->cl_q))
 			red_destroy(cl->cl_red);
 #endif
 	}
 	if (cl->cl_q != NULL)
 		free(cl->cl_q, M_DEVBUF);
 	free(cl, M_DEVBUF);
 	return (NULL);
 }
 
 static int
 priq_class_destroy(struct priq_class *cl)
 {
 	struct priq_if *pif;
 	int s, pri;
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	IFQ_LOCK(cl->cl_pif->pif_ifq);
 
 #ifdef ALTQ3_CLFIER_COMPAT
 	/* delete filters referencing to this class */
 	acc_discard_filters(&cl->cl_pif->pif_classifier, cl, 0);
 #endif
 
 	if (!qempty(cl->cl_q))
 		priq_purgeq(cl);
 
 	pif = cl->cl_pif;
 	pif->pif_classes[cl->cl_pri] = NULL;
 	if (pif->pif_maxpri == cl->cl_pri) {
 		for (pri = cl->cl_pri; pri >= 0; pri--)
 			if (pif->pif_classes[pri] != NULL) {
 				pif->pif_maxpri = pri;
 				break;
 			}
 		if (pri < 0)
 			pif->pif_maxpri = -1;
 	}
 	IFQ_UNLOCK(cl->cl_pif->pif_ifq);
 	splx(s);
 
 	if (cl->cl_red != NULL) {
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->cl_q))
 			rio_destroy((rio_t *)cl->cl_red);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->cl_q))
 			red_destroy(cl->cl_red);
 #endif
 	}
 	free(cl->cl_q, M_DEVBUF);
 	free(cl, M_DEVBUF);
 	return (0);
 }
 
 /*
  * priq_enqueue is an enqueue function to be registered to
  * (*altq_enqueue) in struct ifaltq.
  */
 static int
 priq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
 {
 	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
 	struct priq_class *cl;
 	struct pf_mtag *t;
 	int len;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	/* grab class set by classifier */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		/* should not happen */
 		printf("altq: packet for %s does not have pkthdr\n",
 		    ifq->altq_ifp->if_xname);
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	cl = NULL;
 	if ((t = pf_find_mtag(m)) != NULL)
 		cl = clh_to_clp(pif, t->qid);
 #ifdef ALTQ3_COMPAT
 	else if ((ifq->altq_flags & ALTQF_CLASSIFY) && pktattr != NULL)
 		cl = pktattr->pattr_class;
 #endif
 	if (cl == NULL) {
 		cl = pif->pif_default;
 		if (cl == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 	}
 #ifdef ALTQ3_COMPAT
 	if (pktattr != NULL)
 		cl->cl_pktattr = pktattr;  /* save proto hdr used by ECN */
 	else
 #endif
 		cl->cl_pktattr = NULL;
 	len = m_pktlen(m);
 	if (priq_addq(cl, m) != 0) {
 		/* drop occurred.  mbuf was freed in priq_addq. */
 		PKTCNTR_ADD(&cl->cl_dropcnt, len);
 		return (ENOBUFS);
 	}
 	IFQ_INC_LEN(ifq);
 
 	/* successfully queued. */
 	return (0);
 }
 
 /*
  * priq_dequeue is a dequeue function to be registered to
  * (*altq_dequeue) in struct ifaltq.
  *
  * note: ALTDQ_POLL returns the next packet without removing the packet
  *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
  *	ALTDQ_REMOVE must return the same packet if called immediately
  *	after ALTDQ_POLL.
  */
 static struct mbuf *
 priq_dequeue(struct ifaltq *ifq, int op)
 {
 	struct priq_if	*pif = (struct priq_if *)ifq->altq_disc;
 	struct priq_class *cl;
 	struct mbuf *m;
 	int pri;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (IFQ_IS_EMPTY(ifq))
 		/* no packet in the queue */
 		return (NULL);
 
 	for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
 		if ((cl = pif->pif_classes[pri]) != NULL &&
 		    !qempty(cl->cl_q)) {
 			if (op == ALTDQ_POLL)
 				return (priq_pollq(cl));
 
 			m = priq_getq(cl);
 			if (m != NULL) {
 				IFQ_DEC_LEN(ifq);
 				if (qempty(cl->cl_q))
 					cl->cl_period++;
 				PKTCNTR_ADD(&cl->cl_xmitcnt, m_pktlen(m));
 			}
 			return (m);
 		}
 	}
 	return (NULL);
 }
 
 static int
 priq_addq(struct priq_class *cl, struct mbuf *m)
 {
 
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		return rio_addq((rio_t *)cl->cl_red, cl->cl_q, m,
 				cl->cl_pktattr);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
 #endif
 	if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
 		m_freem(m);
 		return (-1);
 	}
 
 	if (cl->cl_flags & PRCF_CLEARDSCP)
 		write_dsfield(m, cl->cl_pktattr, 0);
 
 	_addq(cl->cl_q, m);
 
 	return (0);
 }
 
 static struct mbuf *
 priq_getq(struct priq_class *cl)
 {
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		return red_getq(cl->cl_red, cl->cl_q);
 #endif
 	return _getq(cl->cl_q);
 }
 
 static struct mbuf *
 priq_pollq(cl)
 	struct priq_class *cl;
 {
 	return qhead(cl->cl_q);
 }
 
 static void
 priq_purgeq(struct priq_class *cl)
 {
 	struct mbuf *m;
 
 	if (qempty(cl->cl_q))
 		return;
 
 	while ((m = _getq(cl->cl_q)) != NULL) {
 		PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
 		m_freem(m);
 	}
 	ASSERT(qlen(cl->cl_q) == 0);
 }
 
 static void
 get_class_stats(struct priq_classstats *sp, struct priq_class *cl)
 {
 	sp->class_handle = cl->cl_handle;
 	sp->qlength = qlen(cl->cl_q);
 	sp->qlimit = qlimit(cl->cl_q);
 	sp->period = cl->cl_period;
 	sp->xmitcnt = cl->cl_xmitcnt;
 	sp->dropcnt = cl->cl_dropcnt;
 
 	sp->qtype = qtype(cl->cl_q);
 #ifdef ALTQ_RED
 	if (q_is_red(cl->cl_q))
 		red_getstats(cl->cl_red, &sp->red[0]);
 #endif
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->cl_q))
 		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
 #endif
 
 }
 
 /* convert a class handle to the corresponding class pointer */
 static struct priq_class *
 clh_to_clp(struct priq_if *pif, u_int32_t chandle)
 {
 	struct priq_class *cl;
 	int idx;
 
 	if (chandle == 0)
 		return (NULL);
 
 	for (idx = pif->pif_maxpri; idx >= 0; idx--)
 		if ((cl = pif->pif_classes[idx]) != NULL &&
 		    cl->cl_handle == chandle)
 			return (cl);
 
 	return (NULL);
 }
 
 
 #ifdef ALTQ3_COMPAT
 
 static struct priq_if *
 priq_attach(ifq, bandwidth)
 	struct ifaltq *ifq;
 	u_int bandwidth;
 {
 	struct priq_if *pif;
 
 	pif = malloc(sizeof(struct priq_if),
 	       M_DEVBUF, M_WAITOK);
 	if (pif == NULL)
 		return (NULL);
 	bzero(pif, sizeof(struct priq_if));
 	pif->pif_bandwidth = bandwidth;
 	pif->pif_maxpri = -1;
 	pif->pif_ifq = ifq;
 
 	/* add this state to the priq list */
 	pif->pif_next = pif_list;
 	pif_list = pif;
 
 	return (pif);
 }
 
 static int
 priq_detach(pif)
 	struct priq_if *pif;
 {
 	(void)priq_clear_interface(pif);
 
 	/* remove this interface from the pif list */
 	if (pif_list == pif)
 		pif_list = pif->pif_next;
 	else {
 		struct priq_if *p;
 
 		for (p = pif_list; p != NULL; p = p->pif_next)
 			if (p->pif_next == pif) {
 				p->pif_next = pif->pif_next;
 				break;
 			}
 		ASSERT(p != NULL);
 	}
 
 	free(pif, M_DEVBUF);
 	return (0);
 }
 
 /*
  * priq device interface
  */
 int
 priqopen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	/* everything will be done when the queueing scheme is attached. */
 	return 0;
 }
 
 int
 priqclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct priq_if *pif;
 	int err, error = 0;
 
 	while ((pif = pif_list) != NULL) {
 		/* destroy all */
 		if (ALTQ_IS_ENABLED(pif->pif_ifq))
 			altq_disable(pif->pif_ifq);
 
 		err = altq_detach(pif->pif_ifq);
 		if (err == 0)
 			err = priq_detach(pif);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 
 	return error;
 }
 
 int
 priqioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	struct priq_if *pif;
 	struct priq_interface *ifacep;
 	int	error = 0;
 
 	/* check super-user privilege */
 	switch (cmd) {
 	case PRIQ_GETSTATS:
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
 			return (error);
 #elsif (__FreeBSD_version > 400000)
 		if ((error = suser(p)) != 0)
 			return (error);
 #else
 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 			return (error);
 #endif
 		break;
 	}
 
 	switch (cmd) {
 
 	case PRIQ_IF_ATTACH:
 		error = priqcmd_if_attach((struct priq_interface *)addr);
 		break;
 
 	case PRIQ_IF_DETACH:
 		error = priqcmd_if_detach((struct priq_interface *)addr);
 		break;
 
 	case PRIQ_ENABLE:
 	case PRIQ_DISABLE:
 	case PRIQ_CLEAR:
 		ifacep = (struct priq_interface *)addr;
 		if ((pif = altq_lookup(ifacep->ifname,
 				       ALTQT_PRIQ)) == NULL) {
 			error = EBADF;
 			break;
 		}
 
 		switch (cmd) {
 		case PRIQ_ENABLE:
 			if (pif->pif_default == NULL) {
 #ifdef ALTQ_DEBUG
 				printf("priq: no default class\n");
 #endif
 				error = EINVAL;
 				break;
 			}
 			error = altq_enable(pif->pif_ifq);
 			break;
 
 		case PRIQ_DISABLE:
 			error = altq_disable(pif->pif_ifq);
 			break;
 
 		case PRIQ_CLEAR:
 			priq_clear_interface(pif);
 			break;
 		}
 		break;
 
 	case PRIQ_ADD_CLASS:
 		error = priqcmd_add_class((struct priq_add_class *)addr);
 		break;
 
 	case PRIQ_DEL_CLASS:
 		error = priqcmd_delete_class((struct priq_delete_class *)addr);
 		break;
 
 	case PRIQ_MOD_CLASS:
 		error = priqcmd_modify_class((struct priq_modify_class *)addr);
 		break;
 
 	case PRIQ_ADD_FILTER:
 		error = priqcmd_add_filter((struct priq_add_filter *)addr);
 		break;
 
 	case PRIQ_DEL_FILTER:
 		error = priqcmd_delete_filter((struct priq_delete_filter *)addr);
 		break;
 
 	case PRIQ_GETSTATS:
 		error = priqcmd_class_stats((struct priq_class_stats *)addr);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return error;
 }
 
 static int
 priqcmd_if_attach(ap)
 	struct priq_interface *ap;
 {
 	struct priq_if *pif;
 	struct ifnet *ifp;
 	int error;
 
 	if ((ifp = ifunit(ap->ifname)) == NULL)
 		return (ENXIO);
 
 	if ((pif = priq_attach(&ifp->if_snd, ap->arg)) == NULL)
 		return (ENOMEM);
 
 	/*
 	 * set PRIQ to this ifnet structure.
 	 */
 	if ((error = altq_attach(&ifp->if_snd, ALTQT_PRIQ, pif,
 				 priq_enqueue, priq_dequeue, priq_request,
 				 &pif->pif_classifier, acc_classify)) != 0)
 		(void)priq_detach(pif);
 
 	return (error);
 }
 
 static int
 priqcmd_if_detach(ap)
 	struct priq_interface *ap;
 {
 	struct priq_if *pif;
 	int error;
 
 	if ((pif = altq_lookup(ap->ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if (ALTQ_IS_ENABLED(pif->pif_ifq))
 		altq_disable(pif->pif_ifq);
 
 	if ((error = altq_detach(pif->pif_ifq)))
 		return (error);
 
 	return priq_detach(pif);
 }
 
 static int
 priqcmd_add_class(ap)
 	struct priq_add_class *ap;
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 	int qid;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
 		return (EINVAL);
 	if (pif->pif_classes[ap->pri] != NULL)
 		return (EBUSY);
 
 	qid = ap->pri + 1;
 	if ((cl = priq_class_create(pif, ap->pri,
 	    ap->qlimit, ap->flags, qid)) == NULL)
 		return (ENOMEM);
 
 	/* return a class handle to the user */
 	ap->class_handle = cl->cl_handle;
 
 	return (0);
 }
 
 static int
 priqcmd_delete_class(ap)
 	struct priq_delete_class *ap;
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	return priq_class_destroy(cl);
 }
 
 static int
 priqcmd_modify_class(ap)
 	struct priq_modify_class *ap;
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if (ap->pri < 0 || ap->pri >= PRIQ_MAXPRI)
 		return (EINVAL);
 
 	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	/*
 	 * if priority is changed, move the class to the new priority
 	 */
 	if (pif->pif_classes[ap->pri] != cl) {
 		if (pif->pif_classes[ap->pri] != NULL)
 			return (EEXIST);
 		pif->pif_classes[cl->cl_pri] = NULL;
 		pif->pif_classes[ap->pri] = cl;
 		cl->cl_pri = ap->pri;
 	}
 
 	/* call priq_class_create to change class parameters */
 	if ((cl = priq_class_create(pif, ap->pri,
 	    ap->qlimit, ap->flags, ap->class_handle)) == NULL)
 		return (ENOMEM);
 	return 0;
 }
 
 static int
 priqcmd_add_filter(ap)
 	struct priq_add_filter *ap;
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	if ((cl = clh_to_clp(pif, ap->class_handle)) == NULL)
 		return (EINVAL);
 
 	return acc_add_filter(&pif->pif_classifier, &ap->filter,
 			      cl, &ap->filter_handle);
 }
 
 static int
 priqcmd_delete_filter(ap)
 	struct priq_delete_filter *ap;
 {
 	struct priq_if *pif;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	return acc_delete_filter(&pif->pif_classifier,
 				 ap->filter_handle);
 }
 
 static int
 priqcmd_class_stats(ap)
 	struct priq_class_stats *ap;
 {
 	struct priq_if *pif;
 	struct priq_class *cl;
 	struct priq_classstats stats, *usp;
 	int	pri, error;
 
 	if ((pif = altq_lookup(ap->iface.ifname, ALTQT_PRIQ)) == NULL)
 		return (EBADF);
 
 	ap->maxpri = pif->pif_maxpri;
 
 	/* then, read the next N classes in the tree */
 	usp = ap->stats;
 	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
 		cl = pif->pif_classes[pri];
 		if (cl != NULL)
 			get_class_stats(&stats, cl);
 		else
 			bzero(&stats, sizeof(stats));
 		if ((error = copyout((caddr_t)&stats, (caddr_t)usp++,
 				     sizeof(stats))) != 0)
 			return (error);
 	}
 	return (0);
 }
 
 #ifdef KLD_MODULE
 
 static struct altqsw priq_sw =
 	{"priq", priqopen, priqclose, priqioctl};
 
 ALTQ_MODULE(altq_priq, ALTQT_PRIQ, &priq_sw);
 MODULE_DEPEND(altq_priq, altq_red, 1, 1, 1);
 MODULE_DEPEND(altq_priq, altq_rio, 1, 1, 1);
 
 #endif /* KLD_MODULE */
 
 #endif /* ALTQ3_COMPAT */
 #endif /* ALTQ_PRIQ */
Index: user/ngie/more-tests/sys/net/altq/altq_red.c
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_red.c	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_red.c	(revision 281676)
@@ -1,1500 +1,1492 @@
-/*	$FreeBSD$	*/
-/*	$KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $	*/
-
-/*
+/*-
  * Copyright (C) 1997-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
-/*
+/*-
  * Copyright (c) 1990-1994 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the Computer Systems
  *	Engineering Group at Lawrence Berkeley Laboratory.
  * 4. Neither the name of the University nor of the Laboratory may be used
  *    to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $
+ * $FreeBSD$	
  */
 
-#if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
-#ifdef __FreeBSD__
 #include "opt_inet6.h"
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
 #ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #if 1 /* ALTQ3_COMPAT */
 #include <sys/sockio.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #ifdef ALTQ_FLOWVALVE
 #include <sys/queue.h>
 #include <sys/time.h>
 #endif
 #endif /* ALTQ3_COMPAT */
 
 #include <net/if.h>
 #include <net/if_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
 #include <netpfil/pf/pf.h>
 #include <netpfil/pf/pf_altq.h>
 #include <netpfil/pf/pf_mtag.h>
 #include <net/altq/altq.h>
 #include <net/altq/altq_red.h>
 #ifdef ALTQ3_COMPAT
 #include <net/altq/altq_conf.h>
 #ifdef ALTQ_FLOWVALVE
 #include <net/altq/altq_flowvalve.h>
 #endif
 #endif
 
 /*
  * ALTQ/RED (Random Early Detection) implementation using 32-bit
  * fixed-point calculation.
  *
  * written by kjc using the ns code as a reference.
  * you can learn more about red and ns from Sally's home page at
  * http://www-nrg.ee.lbl.gov/floyd/
  *
  * most of the red parameter values are fixed in this implementation
  * to prevent fixed-point overflow/underflow.
  * if you change the parameters, watch out for overflow/underflow!
  *
  * the parameters used are recommended values by Sally.
  * the corresponding ns config looks:
  *	q_weight=0.00195
  *	minthresh=5 maxthresh=15 queue-size=60
  *	linterm=30
  *	dropmech=drop-tail
  *	bytes=false (can't be handled by 32-bit fixed-point)
  *	doubleq=false dqthresh=false
  *	wait=true
  */
 /*
  * alternative red parameters for a slow link.
  *
  * assume the queue length becomes from zero to L and keeps L, it takes
  * N packets for q_avg to reach 63% of L.
  * when q_weight is 0.002, N is about 500 packets.
  * for a slow link like dial-up, 500 packets takes more than 1 minute!
  * when q_weight is 0.008, N is about 127 packets.
  * when q_weight is 0.016, N is about 63 packets.
  * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
  * are allowed for 0.016.
  * see Sally's paper for more details.
  */
 /* normal red parameters */
 #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
 				/* q_weight = 0.00195 */
 
 /* red parameters for a slow link */
 #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
 				/* q_weight = 0.0078125 */
 
 /* red parameters for a very slow link (e.g., dialup) */
 #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
 				/* q_weight = 0.015625 */
 
 /* fixed-point uses 12-bit decimal places */
 #define	FP_SHIFT	12	/* fixed-point shift */
 
 /* red parameters for drop probability */
 #define	INV_P_MAX	10	/* inverse of max drop probability */
 #define	TH_MIN		5	/* min threshold */
 #define	TH_MAX		15	/* max threshold */
 
 #define	RED_LIMIT	60	/* default max queue lenght */
 #define	RED_STATS		/* collect statistics */
 
 /*
  * our default policy for forced-drop is drop-tail.
  * (in altq-1.1.2 or earlier, the default was random-drop.
  * but it makes more sense to punish the cause of the surge.)
  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
  */
 
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_FLOWVALVE
 /*
  * flow-valve is an extention to protect red from unresponsive flows
  * and to promote end-to-end congestion control.
  * flow-valve observes the average drop rates of the flows that have
  * experienced packet drops in the recent past.
  * when the average drop rate exceeds the threshold, the flow is
  * blocked by the flow-valve.  the trapped flow should back off
  * exponentially to escape from the flow-valve.
  */
 #ifdef RED_RANDOM_DROP
 #error "random-drop can't be used with flow-valve!"
 #endif
 #endif /* ALTQ_FLOWVALVE */
 
 /* red_list keeps all red_queue_t's allocated. */
 static red_queue_t *red_list = NULL;
 
 #endif /* ALTQ3_COMPAT */
 
 /* default red parameter values */
 static int default_th_min = TH_MIN;
 static int default_th_max = TH_MAX;
 static int default_inv_pmax = INV_P_MAX;
 
 #ifdef ALTQ3_COMPAT
 /* internal function prototypes */
 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
 static struct mbuf *red_dequeue(struct ifaltq *, int);
 static int red_request(struct ifaltq *, int, void *);
 static void red_purgeq(red_queue_t *);
 static int red_detach(red_queue_t *);
 #ifdef ALTQ_FLOWVALVE
 static __inline struct fve *flowlist_lookup(struct flowvalve *,
 			 struct altq_pktattr *, struct timeval *);
 static __inline struct fve *flowlist_reclaim(struct flowvalve *,
 					     struct altq_pktattr *);
 static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
 static __inline int fv_p2f(struct flowvalve *, int);
 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */
 static struct flowvalve *fv_alloc(struct red *);
 #endif
 static void fv_destroy(struct flowvalve *);
 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
 			struct fve **);
 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
 			 struct fve *);
 #endif
 #endif /* ALTQ3_COMPAT */
 
 /*
  * red support routines
  */
 red_t *
 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
    int pkttime)
 {
 	red_t	*rp;
 	int	 w, i;
 	int	 npkts_per_sec;
 
 	rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (rp == NULL)
 		return (NULL);
 
 	if (weight == 0)
 		rp->red_weight = W_WEIGHT;
 	else
 		rp->red_weight = weight;
 
 	/* allocate weight table */
 	rp->red_wtab = wtab_alloc(rp->red_weight);
 	if (rp->red_wtab == NULL) {
 		free(rp, M_DEVBUF);
 		return (NULL);
 	}
 
 	rp->red_avg = 0;
 	rp->red_idle = 1;
 
 	if (inv_pmax == 0)
 		rp->red_inv_pmax = default_inv_pmax;
 	else
 		rp->red_inv_pmax = inv_pmax;
 	if (th_min == 0)
 		rp->red_thmin = default_th_min;
 	else
 		rp->red_thmin = th_min;
 	if (th_max == 0)
 		rp->red_thmax = default_th_max;
 	else
 		rp->red_thmax = th_max;
 
 	rp->red_flags = flags;
 
 	if (pkttime == 0)
 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
 		rp->red_pkttime = 800;
 	else
 		rp->red_pkttime = pkttime;
 
 	if (weight == 0) {
 		/* when the link is very slow, adjust red parameters */
 		npkts_per_sec = 1000000 / rp->red_pkttime;
 		if (npkts_per_sec < 50) {
 			/* up to about 400Kbps */
 			rp->red_weight = W_WEIGHT_2;
 		} else if (npkts_per_sec < 300) {
 			/* up to about 2.4Mbps */
 			rp->red_weight = W_WEIGHT_1;
 		}
 	}
 
 	/* calculate wshift.  weight must be power of 2 */
 	w = rp->red_weight;
 	for (i = 0; w > 1; i++)
 		w = w >> 1;
 	rp->red_wshift = i;
 	w = 1 << rp->red_wshift;
 	if (w != rp->red_weight) {
 		printf("invalid weight value %d for red! use %d\n",
 		       rp->red_weight, w);
 		rp->red_weight = w;
 	}
 
 	/*
 	 * thmin_s and thmax_s are scaled versions of th_min and th_max
 	 * to be compared with avg.
 	 */
 	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
 	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
 
 	/*
 	 * precompute probability denominator
 	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
 	 */
 	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
 			 * rp->red_inv_pmax) << FP_SHIFT;
 
 	microtime(&rp->red_last);
 	return (rp);
 }
 
 void
 red_destroy(red_t *rp)
 {
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_FLOWVALVE
 	if (rp->red_flowvalve != NULL)
 		fv_destroy(rp->red_flowvalve);
 #endif
 #endif /* ALTQ3_COMPAT */
 	wtab_destroy(rp->red_wtab);
 	free(rp, M_DEVBUF);
 }
 
 void
 red_getstats(red_t *rp, struct redstats *sp)
 {
 	sp->q_avg		= rp->red_avg >> rp->red_wshift;
 	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
 	sp->drop_cnt		= rp->red_stats.drop_cnt;
 	sp->drop_forced		= rp->red_stats.drop_forced;
 	sp->drop_unforced	= rp->red_stats.drop_unforced;
 	sp->marked_packets	= rp->red_stats.marked_packets;
 }
 
 int
 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
     struct altq_pktattr *pktattr)
 {
 	int avg, droptype;
 	int n;
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_FLOWVALVE
 	struct fve *fve = NULL;
 
 	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
 		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
 			m_freem(m);
 			return (-1);
 		}
 #endif
 #endif /* ALTQ3_COMPAT */
 
 	avg = rp->red_avg;
 
 	/*
 	 * if we were idle, we pretend that n packets arrived during
 	 * the idle period.
 	 */
 	if (rp->red_idle) {
 		struct timeval now;
 		int t;
 
 		rp->red_idle = 0;
 		microtime(&now);
 		t = (now.tv_sec - rp->red_last.tv_sec);
 		if (t > 60) {
 			/*
 			 * being idle for more than 1 minute, set avg to zero.
 			 * this prevents t from overflow.
 			 */
 			avg = 0;
 		} else {
 			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
 			n = t / rp->red_pkttime - 1;
 
 			/* the following line does (avg = (1 - Wq)^n * avg) */
 			if (n > 0)
 				avg = (avg >> FP_SHIFT) *
 				    pow_w(rp->red_wtab, n);
 		}
 	}
 
 	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
 	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
 	rp->red_avg = avg;		/* save the new value */
 
 	/*
 	 * red_count keeps a tally of arriving traffic that has not
 	 * been dropped.
 	 */
 	rp->red_count++;
 
 	/* see if we drop early */
 	droptype = DTYPE_NODROP;
 	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
 		if (avg >= rp->red_thmax_s) {
 			/* avg >= th_max: forced drop */
 			droptype = DTYPE_FORCED;
 		} else if (rp->red_old == 0) {
 			/* first exceeds th_min */
 			rp->red_count = 1;
 			rp->red_old = 1;
 		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
 				      rp->red_probd, rp->red_count)) {
 			/* mark or drop by red */
 			if ((rp->red_flags & REDF_ECN) &&
 			    mark_ecn(m, pktattr, rp->red_flags)) {
 				/* successfully marked.  do not drop. */
 				rp->red_count = 0;
 #ifdef RED_STATS
 				rp->red_stats.marked_packets++;
 #endif
 			} else {
 				/* unforced drop by red */
 				droptype = DTYPE_EARLY;
 			}
 		}
 	} else {
 		/* avg < th_min */
 		rp->red_old = 0;
 	}
 
 	/*
 	 * if the queue length hits the hard limit, it's a forced drop.
 	 */
 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
 		droptype = DTYPE_FORCED;
 
 #ifdef RED_RANDOM_DROP
 	/* if successful or forced drop, enqueue this packet. */
 	if (droptype != DTYPE_EARLY)
 		_addq(q, m);
 #else
 	/* if successful, enqueue this packet. */
 	if (droptype == DTYPE_NODROP)
 		_addq(q, m);
 #endif
 	if (droptype != DTYPE_NODROP) {
 		if (droptype == DTYPE_EARLY) {
 			/* drop the incoming packet */
 #ifdef RED_STATS
 			rp->red_stats.drop_unforced++;
 #endif
 		} else {
 			/* forced drop, select a victim packet in the queue. */
 #ifdef RED_RANDOM_DROP
 			m = _getq_random(q);
 #endif
 #ifdef RED_STATS
 			rp->red_stats.drop_forced++;
 #endif
 		}
 #ifdef RED_STATS
 		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
 #endif
 		rp->red_count = 0;
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_FLOWVALVE
 		if (rp->red_flowvalve != NULL)
 			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
 #endif
 #endif /* ALTQ3_COMPAT */
 		m_freem(m);
 		return (-1);
 	}
 	/* successfully queued */
 #ifdef RED_STATS
 	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
 #endif
 	return (0);
 }
 
 /*
  * early-drop probability is calculated as follows:
  *   prob = p_max * (avg - th_min) / (th_max - th_min)
  *   prob_a = prob / (2 - count*prob)
  *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
  * here prob_a increases as successive undrop count increases.
  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
  * becomes 1 when (count >= (2 / prob))).
  */
 int
 drop_early(int fp_len, int fp_probd, int count)
 {
 	int	d;		/* denominator of drop-probability */
 
 	d = fp_probd - count * fp_len;
 	if (d <= 0)
 		/* count exceeds the hard limit: drop or mark */
 		return (1);
 
 	/*
 	 * now the range of d is [1..600] in fixed-point. (when
 	 * th_max-th_min=10 and p_max=1/30)
 	 * drop probability = (avg - TH_MIN) / d
 	 */
 
 	if ((arc4random() % d) < fp_len) {
 		/* drop or mark */
 		return (1);
 	}
 	/* no drop/mark */
 	return (0);
 }
 
 /*
  * try to mark CE bit to the packet.
  *    returns 1 if successfully marked, 0 otherwise.
  */
 int
 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
 {
 	struct mbuf	*m0;
 	struct pf_mtag	*at;
 	void		*hdr;
 
 	at = pf_find_mtag(m);
 	if (at != NULL) {
 		hdr = at->hdr;
 #ifdef ALTQ3_COMPAT
 	} else if (pktattr != NULL) {
 		af = pktattr->pattr_af;
 		hdr = pktattr->pattr_hdr;
 #endif /* ALTQ3_COMPAT */
 	} else
 		return (0);
 
 	/* verify that pattr_hdr is within the mbuf data */
 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
 		if (((caddr_t)hdr >= m0->m_data) &&
 		    ((caddr_t)hdr < m0->m_data + m0->m_len))
 			break;
 	if (m0 == NULL) {
 		/* ick, tag info is stale */
 		return (0);
 	}
 
 	switch (((struct ip *)hdr)->ip_v) {
 	case IPVERSION:
 		if (flags & REDF_ECN4) {
 			struct ip *ip = hdr;
 			u_int8_t otos;
 			int sum;
 
 			if (ip->ip_v != 4)
 				return (0);	/* version mismatch! */
 
 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
 				return (0);	/* not-ECT */
 			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
 				return (1);	/* already marked */
 
 			/*
 			 * ecn-capable but not marked,
 			 * mark CE and update checksum
 			 */
 			otos = ip->ip_tos;
 			ip->ip_tos |= IPTOS_ECN_CE;
 			/*
 			 * update checksum (from RFC1624)
 			 *	   HC' = ~(~HC + ~m + m')
 			 */
 			sum = ~ntohs(ip->ip_sum) & 0xffff;
 			sum += (~otos & 0xffff) + ip->ip_tos;
 			sum = (sum >> 16) + (sum & 0xffff);
 			sum += (sum >> 16);  /* add carry */
 			ip->ip_sum = htons(~sum & 0xffff);
 			return (1);
 		}
 		break;
 #ifdef INET6
 	case (IPV6_VERSION >> 4):
 		if (flags & REDF_ECN6) {
 			struct ip6_hdr *ip6 = hdr;
 			u_int32_t flowlabel;
 
 			flowlabel = ntohl(ip6->ip6_flow);
 			if ((flowlabel >> 28) != 6)
 				return (0);	/* version mismatch! */
 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
 			    (IPTOS_ECN_NOTECT << 20))
 				return (0);	/* not-ECT */
 			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
 			    (IPTOS_ECN_CE << 20))
 				return (1);	/* already marked */
 			/*
 			 * ecn-capable but not marked,  mark CE
 			 */
 			flowlabel |= (IPTOS_ECN_CE << 20);
 			ip6->ip6_flow = htonl(flowlabel);
 			return (1);
 		}
 		break;
 #endif  /* INET6 */
 	}
 
 	/* not marked */
 	return (0);
 }
 
 struct mbuf *
 red_getq(rp, q)
 	red_t *rp;
 	class_queue_t *q;
 {
 	struct mbuf *m;
 
 	if ((m = _getq(q)) == NULL) {
 		if (rp->red_idle == 0) {
 			rp->red_idle = 1;
 			microtime(&rp->red_last);
 		}
 		return NULL;
 	}
 
 	rp->red_idle = 0;
 	return (m);
 }
 
 /*
  * helper routine to calibrate avg during idle.
  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
  * here Wq = 1/weight and the code assumes Wq is close to zero.
  *
  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
  */
 static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
 
 struct wtab *
 wtab_alloc(int weight)
 {
 	struct wtab	*w;
 	int		 i;
 
 	for (w = wtab_list; w != NULL; w = w->w_next)
 		if (w->w_weight == weight) {
 			w->w_refcount++;
 			return (w);
 		}
 
 	w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (w == NULL)
 		return (NULL);
 	w->w_weight = weight;
 	w->w_refcount = 1;
 	w->w_next = wtab_list;
 	wtab_list = w;
 
 	/* initialize the weight table */
 	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
 	for (i = 1; i < 32; i++) {
 		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
 		if (w->w_tab[i] == 0 && w->w_param_max == 0)
 			w->w_param_max = 1 << i;
 	}
 
 	return (w);
 }
 
 int
 wtab_destroy(struct wtab *w)
 {
 	struct wtab	*prev;
 
 	if (--w->w_refcount > 0)
 		return (0);
 
 	if (wtab_list == w)
 		wtab_list = w->w_next;
 	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
 		if (prev->w_next == w) {
 			prev->w_next = w->w_next;
 			break;
 		}
 
 	free(w, M_DEVBUF);
 	return (0);
 }
 
 int32_t
 pow_w(struct wtab *w, int n)
 {
 	int	i, bit;
 	int32_t	val;
 
 	if (n >= w->w_param_max)
 		return (0);
 
 	val = 1 << FP_SHIFT;
 	if (n <= 0)
 		return (val);
 
 	bit = 1;
 	i = 0;
 	while (n) {
 		if (n & bit) {
 			val = (val * w->w_tab[i]) >> FP_SHIFT;
 			n &= ~bit;
 		}
 		i++;
 		bit <<=  1;
 	}
 	return (val);
 }
 
 #ifdef ALTQ3_COMPAT
 /*
  * red device interface
  */
 altqdev_decl(red);
 
 int
 redopen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	/* everything will be done when the queueing scheme is attached. */
 	return 0;
 }
 
 int
 redclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	red_queue_t *rqp;
 	int err, error = 0;
 
 	while ((rqp = red_list) != NULL) {
 		/* destroy all */
 		err = red_detach(rqp);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 
 	return error;
 }
 
 int
 redioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	red_queue_t *rqp;
 	struct red_interface *ifacep;
 	struct ifnet *ifp;
 	int	error = 0;
 
 	/* check super-user privilege */
 	switch (cmd) {
 	case RED_GETSTATS:
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
 #elsif (__FreeBSD_version > 400000)
 		if ((error = suser(p)) != 0)
 #else
 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 #endif
 			return (error);
 		break;
 	}
 
 	switch (cmd) {
 
 	case RED_ENABLE:
 		ifacep = (struct red_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = altq_enable(rqp->rq_ifq);
 		break;
 
 	case RED_DISABLE:
 		ifacep = (struct red_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = altq_disable(rqp->rq_ifq);
 		break;
 
 	case RED_IF_ATTACH:
 		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
 		if (ifp == NULL) {
 			error = ENXIO;
 			break;
 		}
 
 		/* allocate and initialize red_queue_t */
 		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
 		if (rqp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		bzero(rqp, sizeof(red_queue_t));
 
 		rqp->rq_q = malloc(sizeof(class_queue_t),
 		       M_DEVBUF, M_WAITOK);
 		if (rqp->rq_q == NULL) {
 			free(rqp, M_DEVBUF);
 			error = ENOMEM;
 			break;
 		}
 		bzero(rqp->rq_q, sizeof(class_queue_t));
 
 		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
 		if (rqp->rq_red == NULL) {
 			free(rqp->rq_q, M_DEVBUF);
 			free(rqp, M_DEVBUF);
 			error = ENOMEM;
 			break;
 		}
 
 		rqp->rq_ifq = &ifp->if_snd;
 		qtail(rqp->rq_q) = NULL;
 		qlen(rqp->rq_q) = 0;
 		qlimit(rqp->rq_q) = RED_LIMIT;
 		qtype(rqp->rq_q) = Q_RED;
 
 		/*
 		 * set RED to this ifnet structure.
 		 */
 		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
 				    red_enqueue, red_dequeue, red_request,
 				    NULL, NULL);
 		if (error) {
 			red_destroy(rqp->rq_red);
 			free(rqp->rq_q, M_DEVBUF);
 			free(rqp, M_DEVBUF);
 			break;
 		}
 
 		/* add this state to the red list */
 		rqp->rq_next = red_list;
 		red_list = rqp;
 		break;
 
 	case RED_IF_DETACH:
 		ifacep = (struct red_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = red_detach(rqp);
 		break;
 
 	case RED_GETSTATS:
 		do {
 			struct red_stats *q_stats;
 			red_t *rp;
 
 			q_stats = (struct red_stats *)addr;
 			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
 					     ALTQT_RED)) == NULL) {
 				error = EBADF;
 				break;
 			}
 
 			q_stats->q_len 	   = qlen(rqp->rq_q);
 			q_stats->q_limit   = qlimit(rqp->rq_q);
 
 			rp = rqp->rq_red;
 			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
 			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
 			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
 			q_stats->drop_forced   = rp->red_stats.drop_forced;
 			q_stats->drop_unforced = rp->red_stats.drop_unforced;
 			q_stats->marked_packets = rp->red_stats.marked_packets;
 
 			q_stats->weight		= rp->red_weight;
 			q_stats->inv_pmax	= rp->red_inv_pmax;
 			q_stats->th_min		= rp->red_thmin;
 			q_stats->th_max		= rp->red_thmax;
 
 #ifdef ALTQ_FLOWVALVE
 			if (rp->red_flowvalve != NULL) {
 				struct flowvalve *fv = rp->red_flowvalve;
 				q_stats->fv_flows    = fv->fv_flows;
 				q_stats->fv_pass     = fv->fv_stats.pass;
 				q_stats->fv_predrop  = fv->fv_stats.predrop;
 				q_stats->fv_alloc    = fv->fv_stats.alloc;
 				q_stats->fv_escape   = fv->fv_stats.escape;
 			} else {
 #endif /* ALTQ_FLOWVALVE */
 				q_stats->fv_flows    = 0;
 				q_stats->fv_pass     = 0;
 				q_stats->fv_predrop  = 0;
 				q_stats->fv_alloc    = 0;
 				q_stats->fv_escape   = 0;
 #ifdef ALTQ_FLOWVALVE
 			}
 #endif /* ALTQ_FLOWVALVE */
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	case RED_CONFIG:
 		do {
 			struct red_conf *fc;
 			red_t *new;
 			int s, limit;
 
 			fc = (struct red_conf *)addr;
 			if ((rqp = altq_lookup(fc->iface.red_ifname,
 					       ALTQT_RED)) == NULL) {
 				error = EBADF;
 				break;
 			}
 			new = red_alloc(fc->red_weight,
 					fc->red_inv_pmax,
 					fc->red_thmin,
 					fc->red_thmax,
 					fc->red_flags,
 					fc->red_pkttime);
 			if (new == NULL) {
 				error = ENOMEM;
 				break;
 			}
 
-#ifdef __NetBSD__
 			s = splnet();
-#else
-			s = splimp();
-#endif
 			red_purgeq(rqp);
 			limit = fc->red_limit;
 			if (limit < fc->red_thmax)
 				limit = fc->red_thmax;
 			qlimit(rqp->rq_q) = limit;
 			fc->red_limit = limit;	/* write back the new value */
 
 			red_destroy(rqp->rq_red);
 			rqp->rq_red = new;
 
 			splx(s);
 
 			/* write back new values */
 			fc->red_limit = limit;
 			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
 			fc->red_thmin = rqp->rq_red->red_thmin;
 			fc->red_thmax = rqp->rq_red->red_thmax;
 
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	case RED_SETDEFAULTS:
 		do {
 			struct redparams *rp;
 
 			rp = (struct redparams *)addr;
 
 			default_th_min = rp->th_min;
 			default_th_max = rp->th_max;
 			default_inv_pmax = rp->inv_pmax;
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return error;
 }
 
 static int
 red_detach(rqp)
 	red_queue_t *rqp;
 {
 	red_queue_t *tmp;
 	int error = 0;
 
 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
 		altq_disable(rqp->rq_ifq);
 
 	if ((error = altq_detach(rqp->rq_ifq)))
 		return (error);
 
 	if (red_list == rqp)
 		red_list = rqp->rq_next;
 	else {
 		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
 			if (tmp->rq_next == rqp) {
 				tmp->rq_next = rqp->rq_next;
 				break;
 			}
 		if (tmp == NULL)
 			printf("red_detach: no state found in red_list!\n");
 	}
 
 	red_destroy(rqp->rq_red);
 	free(rqp->rq_q, M_DEVBUF);
 	free(rqp, M_DEVBUF);
 	return (error);
 }
 
 /*
  * enqueue routine:
  *
  *	returns: 0 when successfully queued.
  *		 ENOBUFS when drop occurs.
  */
 static int
 red_enqueue(ifq, m, pktattr)
 	struct ifaltq *ifq;
 	struct mbuf *m;
 	struct altq_pktattr *pktattr;
 {
 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
 		return ENOBUFS;
 	ifq->ifq_len++;
 	return 0;
 }
 
 /*
  * dequeue routine:
  *	must be called in splimp.
  *
  *	returns: mbuf dequeued.
  *		 NULL when no packet is available in the queue.
  */
 
 static struct mbuf *
 red_dequeue(ifq, op)
 	struct ifaltq *ifq;
 	int op;
 {
 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
 	struct mbuf *m;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (op == ALTDQ_POLL)
 		return qhead(rqp->rq_q);
 
 	/* op == ALTDQ_REMOVE */
 	m =  red_getq(rqp->rq_red, rqp->rq_q);
 	if (m != NULL)
 		ifq->ifq_len--;
 	return (m);
 }
 
 static int
 red_request(ifq, req, arg)
 	struct ifaltq *ifq;
 	int req;
 	void *arg;
 {
 	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	switch (req) {
 	case ALTRQ_PURGE:
 		red_purgeq(rqp);
 		break;
 	}
 	return (0);
 }
 
 static void
 red_purgeq(rqp)
 	red_queue_t *rqp;
 {
 	_flushq(rqp->rq_q);
 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
 		rqp->rq_ifq->ifq_len = 0;
 }
 
 #ifdef ALTQ_FLOWVALVE
 
 #define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
 #define	FV_PSCALE(x)	((x) << FV_PSHIFT)
 #define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
 #define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
 #define	FV_FSCALE(x)	((x) << FV_FSHIFT)
 #define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
 
 #define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
 #define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
 
 #define	FV_N			10	/* update fve_f every FV_N packets */
 
 #define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
 #define	FV_TTHRESH		3  /* time threshold to delete fve */
 #define	FV_ALPHA		5  /* extra packet count */
 
 #define	FV_STATS
 
 #if (__FreeBSD_version > 300000)
 #define	FV_TIMESTAMP(tp)	getmicrotime(tp)
 #else
 #define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
 #endif
 
 /*
  * Brtt table: 127 entry table to convert drop rate (p) to
  * the corresponding bandwidth fraction (f)
  * the following equation is implemented to use scaled values,
  * fve_p and fve_f, in the fixed point format.
  *
  *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
  *   f = Brtt(p) / (max_th + alpha)
  */
 #define	BRTT_SIZE	128
 #define	BRTT_SHIFT	12
 #define	BRTT_MASK	0x0007f000
 #define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
 
 const int brtt_tab[BRTT_SIZE] = {
 	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
 	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
 	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
 	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
 	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
 	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
 	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
 	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
 	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
 	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
 	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
 	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
 	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
 	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
 	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
 	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
 };
 
 static __inline struct fve *
 flowlist_lookup(fv, pktattr, now)
 	struct flowvalve *fv;
 	struct altq_pktattr *pktattr;
 	struct timeval *now;
 {
 	struct fve *fve;
 	int flows;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct timeval tthresh;
 
 	if (pktattr == NULL)
 		return (NULL);
 
 	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
 	flows = 0;
 	/*
 	 * search the flow list
 	 */
 	switch (pktattr->pattr_af) {
 	case AF_INET:
 		ip = (struct ip *)pktattr->pattr_hdr;
 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
 			if (fve->fve_lastdrop.tv_sec == 0)
 				break;
 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
 				fve->fve_lastdrop.tv_sec = 0;
 				break;
 			}
 			if (fve->fve_flow.flow_af == AF_INET &&
 			    fve->fve_flow.flow_ip.ip_src.s_addr ==
 			    ip->ip_src.s_addr &&
 			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
 			    ip->ip_dst.s_addr)
 				return (fve);
 			flows++;
 		}
 		break;
 #ifdef INET6
 	case AF_INET6:
 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
 		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
 			if (fve->fve_lastdrop.tv_sec == 0)
 				break;
 			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
 				fve->fve_lastdrop.tv_sec = 0;
 				break;
 			}
 			if (fve->fve_flow.flow_af == AF_INET6 &&
 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
 					       &ip6->ip6_src) &&
 			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
 					       &ip6->ip6_dst))
 				return (fve);
 			flows++;
 		}
 		break;
 #endif /* INET6 */
 
 	default:
 		/* unknown protocol.  no drop. */
 		return (NULL);
 	}
 	fv->fv_flows = flows;	/* save the number of active fve's */
 	return (NULL);
 }
 
 static __inline struct fve *
 flowlist_reclaim(fv, pktattr)
 	struct flowvalve *fv;
 	struct altq_pktattr *pktattr;
 {
 	struct fve *fve;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 
 	/*
 	 * get an entry from the tail of the LRU list.
 	 */
 	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
 
 	switch (pktattr->pattr_af) {
 	case AF_INET:
 		ip = (struct ip *)pktattr->pattr_hdr;
 		fve->fve_flow.flow_af = AF_INET;
 		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
 		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
 		fve->fve_flow.flow_af = AF_INET6;
 		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
 		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
 		break;
 #endif
 	}
 
 	fve->fve_state = Green;
 	fve->fve_p = 0.0;
 	fve->fve_f = 0.0;
 	fve->fve_ifseq = fv->fv_ifseq - 1;
 	fve->fve_count = 0;
 
 	fv->fv_flows++;
 #ifdef FV_STATS
 	fv->fv_stats.alloc++;
 #endif
 	return (fve);
 }
 
 static __inline void
 flowlist_move_to_head(fv, fve)
 	struct flowvalve *fv;
 	struct fve *fve;
 {
 	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
 		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
 		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
 	}
 }
 
 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */
 /*
  * allocate flowvalve structure
  */
 static struct flowvalve *
 fv_alloc(rp)
 	struct red *rp;
 {
 	struct flowvalve *fv;
 	struct fve *fve;
 	int i, num;
 
 	num = FV_FLOWLISTSIZE;
 	fv = malloc(sizeof(struct flowvalve),
 	       M_DEVBUF, M_WAITOK);
 	if (fv == NULL)
 		return (NULL);
 	bzero(fv, sizeof(struct flowvalve));
 
 	fv->fv_fves = malloc(sizeof(struct fve) * num,
 	       M_DEVBUF, M_WAITOK);
 	if (fv->fv_fves == NULL) {
 		free(fv, M_DEVBUF);
 		return (NULL);
 	}
 	bzero(fv->fv_fves, sizeof(struct fve) * num);
 
 	fv->fv_flows = 0;
 	TAILQ_INIT(&fv->fv_flowlist);
 	for (i = 0; i < num; i++) {
 		fve = &fv->fv_fves[i];
 		fve->fve_lastdrop.tv_sec = 0;
 		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
 	}
 
 	/* initialize drop rate threshold in scaled fixed-point */
 	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
 
 	/* initialize drop rate to fraction table */
 	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
 	       M_DEVBUF, M_WAITOK);
 	if (fv->fv_p2ftab == NULL) {
 		free(fv->fv_fves, M_DEVBUF);
 		free(fv, M_DEVBUF);
 		return (NULL);
 	}
 	/*
 	 * create the p2f table.
 	 * (shift is used to keep the precision)
 	 */
 	for (i = 1; i < BRTT_SIZE; i++) {
 		int f;
 
 		f = brtt_tab[i] << 8;
 		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
 	}
 
 	return (fv);
 }
 #endif
 
 static void fv_destroy(fv)
 	struct flowvalve *fv;
 {
 	free(fv->fv_p2ftab, M_DEVBUF);
 	free(fv->fv_fves, M_DEVBUF);
 	free(fv, M_DEVBUF);
 }
 
 static __inline int
 fv_p2f(fv, p)
 	struct flowvalve	*fv;
 	int	p;
 {
 	int val, f;
 
 	if (p >= BRTT_PMAX)
 		f = fv->fv_p2ftab[BRTT_SIZE-1];
 	else if ((val = (p & BRTT_MASK)))
 		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
 	else
 		f = fv->fv_p2ftab[1];
 	return (f);
 }
 
 /*
  * check if an arriving packet should be pre-dropped.
  * called from red_addq() when a packet arrives.
  * returns 1 when the packet should be pre-dropped.
  * should be called in splimp.
  */
 static int
 fv_checkflow(fv, pktattr, fcache)
 	struct flowvalve *fv;
 	struct altq_pktattr *pktattr;
 	struct fve **fcache;
 {
 	struct fve *fve;
 	struct timeval now;
 
 	fv->fv_ifseq++;
 	FV_TIMESTAMP(&now);
 
 	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
 		/* no matching entry in the flowlist */
 		return (0);
 
 	*fcache = fve;
 
 	/* update fraction f for every FV_N packets */
 	if (++fve->fve_count == FV_N) {
 		/*
 		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
 		 */
 		fve->fve_f =
 			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
 			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
 		fve->fve_ifseq = fv->fv_ifseq;
 		fve->fve_count = 0;
 	}
 
 	/*
 	 * overpumping test
 	 */
 	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
 		int fthresh;
 
 		/* calculate a threshold */
 		fthresh = fv_p2f(fv, fve->fve_p);
 		if (fve->fve_f > fthresh)
 			fve->fve_state = Red;
 	}
 
 	if (fve->fve_state == Red) {
 		/*
 		 * backoff test
 		 */
 		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
 			/* no drop for at least FV_BACKOFFTHRESH sec */
 			fve->fve_p = 0;
 			fve->fve_state = Green;
 #ifdef FV_STATS
 			fv->fv_stats.escape++;
 #endif
 		} else {
 			/* block this flow */
 			flowlist_move_to_head(fv, fve);
 			fve->fve_lastdrop = now;
 #ifdef FV_STATS
 			fv->fv_stats.predrop++;
 #endif
 			return (1);
 		}
 	}
 
 	/*
 	 * p = (1 - Wp) * p
 	 */
 	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
 	if (fve->fve_p < 0)
 		fve->fve_p = 0;
 #ifdef FV_STATS
 	fv->fv_stats.pass++;
 #endif
 	return (0);
 }
 
 /*
  * called from red_addq when a packet is dropped by red.
  * should be called in splimp.
  */
 static void fv_dropbyred(fv, pktattr, fcache)
 	struct flowvalve *fv;
 	struct altq_pktattr *pktattr;
 	struct fve *fcache;
 {
 	struct fve *fve;
 	struct timeval now;
 
 	if (pktattr == NULL)
 		return;
 	FV_TIMESTAMP(&now);
 
 	if (fcache != NULL)
 		/* the fve of this packet is already cached */
 		fve = fcache;
 	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
 		fve = flowlist_reclaim(fv, pktattr);
 
 	flowlist_move_to_head(fv, fve);
 
 	/*
 	 * update p:  the following line cancels the update
 	 *	      in fv_checkflow() and calculate
 	 *	p = Wp + (1 - Wp) * p
 	 */
 	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
 
 	fve->fve_lastdrop = now;
 }
 
 #endif /* ALTQ_FLOWVALVE */
 
 #ifdef KLD_MODULE
 
 static struct altqsw red_sw =
 	{"red", redopen, redclose, redioctl};
 
 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
 MODULE_VERSION(altq_red, 1);
 
 #endif /* KLD_MODULE */
 #endif /* ALTQ3_COMPAT */
 
 #endif /* ALTQ_RED */
Index: user/ngie/more-tests/sys/net/altq/altq_rio.c
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_rio.c	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_rio.c	(revision 281676)
@@ -1,852 +1,844 @@
-/*	$FreeBSD$	*/
-/*	$KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $	*/
-
-/*
+/*-
  * Copyright (C) 1998-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-/*
+/*-
  * Copyright (c) 1990-1994 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the Computer Systems
  *	Engineering Group at Lawrence Berkeley Laboratory.
  * 4. Neither the name of the University nor of the Laboratory may be used
  *    to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $
+ * $FreeBSD$
  */
 
-#if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
-#ifdef __FreeBSD__
 #include "opt_inet6.h"
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
 #ifdef ALTQ_RIO	/* rio is enabled by ALTQ_RIO option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #if 1 /* ALTQ3_COMPAT */
 #include <sys/proc.h>
 #include <sys/sockio.h>
 #include <sys/kernel.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 
 #include <netpfil/pf/pf.h>
 #include <netpfil/pf/pf_altq.h>
 #include <net/altq/altq.h>
 #include <net/altq/altq_cdnr.h>
 #include <net/altq/altq_red.h>
 #include <net/altq/altq_rio.h>
 #ifdef ALTQ3_COMPAT
 #include <net/altq/altq_conf.h>
 #endif
 
 /*
  * RIO: RED with IN/OUT bit
  *   described in
  *	"Explicit Allocation of Best Effort Packet Delivery Service"
  *	David D. Clark and Wenjia Fang, MIT Lab for Computer Science
  *	http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
  *
  * this implementation is extended to support more than 2 drop precedence
  * values as described in RFC2597 (Assured Forwarding PHB Group).
  *
  */
 /*
  * AF DS (differentiated service) codepoints.
  * (classes can be mapped to CBQ or H-FSC classes.)
  *
  *      0   1   2   3   4   5   6   7
  *    +---+---+---+---+---+---+---+---+
  *    |   CLASS   |DropPre| 0 |  CU   |
  *    +---+---+---+---+---+---+---+---+
  *
  *    class 1: 001
  *    class 2: 010
  *    class 3: 011
  *    class 4: 100
  *
  *    low drop prec:    01
  *    medium drop prec: 10
  *    high drop prec:   01
  */
 
 /* normal red parameters */
 #define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
 				/* q_weight = 0.00195 */
 
 /* red parameters for a slow link */
 #define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
 				/* q_weight = 0.0078125 */
 
 /* red parameters for a very slow link (e.g., dialup) */
 #define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
 				/* q_weight = 0.015625 */
 
 /* fixed-point uses 12-bit decimal places */
 #define	FP_SHIFT	12	/* fixed-point shift */
 
 /* red parameters for drop probability */
 #define	INV_P_MAX	10	/* inverse of max drop probability */
 #define	TH_MIN		 5	/* min threshold */
 #define	TH_MAX		15	/* max threshold */
 
 #define	RIO_LIMIT	60	/* default max queue lenght */
 #define	RIO_STATS		/* collect statistics */
 
 #define	TV_DELTA(a, b, delta) {					\
 	register int	xxs;					\
 								\
 	delta = (a)->tv_usec - (b)->tv_usec; 			\
 	if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { 		\
 		if (xxs < 0) { 					\
 			delta = 60000000;			\
 		} else if (xxs > 4)  {				\
 			if (xxs > 60)				\
 				delta = 60000000;		\
 			else					\
 				delta += xxs * 1000000;		\
 		} else while (xxs > 0) {			\
 			delta += 1000000;			\
 			xxs--;					\
 		}						\
 	}							\
 }
 
 #ifdef ALTQ3_COMPAT
 /* rio_list keeps all rio_queue_t's allocated. */
 static rio_queue_t *rio_list = NULL;
 #endif
 /* default rio parameter values */
 static struct redparams default_rio_params[RIO_NDROPPREC] = {
   /* th_min,		 th_max,     inv_pmax */
   { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
   { TH_MAX + TH_MIN,	 TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
   { TH_MIN,		 TH_MAX,     INV_P_MAX }  /* high drop precedence */
 };
 
 /* internal function prototypes */
 static int dscp2index(u_int8_t);
 #ifdef ALTQ3_COMPAT
 static int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
 static struct mbuf *rio_dequeue(struct ifaltq *, int);
 static int rio_request(struct ifaltq *, int, void *);
 static int rio_detach(rio_queue_t *);
 
 /*
  * rio device interface
  */
 altqdev_decl(rio);
 
 #endif /* ALTQ3_COMPAT */
 
 rio_t *
 rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
 {
 	rio_t	*rp;
 	int	 w, i;
 	int	 npkts_per_sec;
 
 	rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (rp == NULL)
 		return (NULL);
 
 	rp->rio_flags = flags;
 	if (pkttime == 0)
 		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
 		rp->rio_pkttime = 800;
 	else
 		rp->rio_pkttime = pkttime;
 
 	if (weight != 0)
 		rp->rio_weight = weight;
 	else {
 		/* use default */
 		rp->rio_weight = W_WEIGHT;
 
 		/* when the link is very slow, adjust red parameters */
 		npkts_per_sec = 1000000 / rp->rio_pkttime;
 		if (npkts_per_sec < 50) {
 			/* up to about 400Kbps */
 			rp->rio_weight = W_WEIGHT_2;
 		} else if (npkts_per_sec < 300) {
 			/* up to about 2.4Mbps */
 			rp->rio_weight = W_WEIGHT_1;
 		}
 	}
 
 	/* calculate wshift.  weight must be power of 2 */
 	w = rp->rio_weight;
 	for (i = 0; w > 1; i++)
 		w = w >> 1;
 	rp->rio_wshift = i;
 	w = 1 << rp->rio_wshift;
 	if (w != rp->rio_weight) {
 		printf("invalid weight value %d for red! use %d\n",
 		       rp->rio_weight, w);
 		rp->rio_weight = w;
 	}
 
 	/* allocate weight table */
 	rp->rio_wtab = wtab_alloc(rp->rio_weight);
 
 	for (i = 0; i < RIO_NDROPPREC; i++) {
 		struct dropprec_state *prec = &rp->rio_precstate[i];
 
 		prec->avg = 0;
 		prec->idle = 1;
 
 		if (params == NULL || params[i].inv_pmax == 0)
 			prec->inv_pmax = default_rio_params[i].inv_pmax;
 		else
 			prec->inv_pmax = params[i].inv_pmax;
 		if (params == NULL || params[i].th_min == 0)
 			prec->th_min = default_rio_params[i].th_min;
 		else
 			prec->th_min = params[i].th_min;
 		if (params == NULL || params[i].th_max == 0)
 			prec->th_max = default_rio_params[i].th_max;
 		else
 			prec->th_max = params[i].th_max;
 
 		/*
 		 * th_min_s and th_max_s are scaled versions of th_min
 		 * and th_max to be compared with avg.
 		 */
 		prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
 		prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
 
 		/*
 		 * precompute probability denominator
 		 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
 		 */
 		prec->probd = (2 * (prec->th_max - prec->th_min)
 			       * prec->inv_pmax) << FP_SHIFT;
 
 		microtime(&prec->last);
 	}
 
 	return (rp);
 }
 
 void
 rio_destroy(rio_t *rp)
 {
 	wtab_destroy(rp->rio_wtab);
 	free(rp, M_DEVBUF);
 }
 
 void
 rio_getstats(rio_t *rp, struct redstats *sp)
 {
 	int	i;
 
 	for (i = 0; i < RIO_NDROPPREC; i++) {
 		bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
 		sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
 		sp++;
 	}
 }
 
 #if (RIO_NDROPPREC == 3)
 /*
  * internally, a drop precedence value is converted to an index
  * starting from 0.
  */
 static int
 dscp2index(u_int8_t dscp)
 {
 	int	dpindex = dscp & AF_DROPPRECMASK;
 
 	if (dpindex == 0)
 		return (0);
 	return ((dpindex >> 3) - 1);
 }
 #endif
 
 #if 1
 /*
  * kludge: when a packet is dequeued, we need to know its drop precedence
  * in order to keep the queue length of each drop precedence.
  * use m_pkthdr.rcvif to pass this info.
  */
 #define	RIOM_SET_PRECINDEX(m, idx)	\
 	do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0)
 #define	RIOM_GET_PRECINDEX(m)	\
 	({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
 	(m)->m_pkthdr.rcvif = NULL; idx; })
 #endif
 
 int
 rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
     struct altq_pktattr *pktattr)
 {
 	int			 avg, droptype;
 	u_int8_t		 dsfield, odsfield;
 	int			 dpindex, i, n, t;
 	struct timeval		 now;
 	struct dropprec_state	*prec;
 
 	dsfield = odsfield = read_dsfield(m, pktattr);
 	dpindex = dscp2index(dsfield);
 
 	/*
 	 * update avg of the precedence states whose drop precedence
 	 * is larger than or equal to the drop precedence of the packet
 	 */
 	now.tv_sec = 0;
 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
 		prec = &rp->rio_precstate[i];
 		avg = prec->avg;
 		if (prec->idle) {
 			prec->idle = 0;
 			if (now.tv_sec == 0)
 				microtime(&now);
 			t = (now.tv_sec - prec->last.tv_sec);
 			if (t > 60)
 				avg = 0;
 			else {
 				t = t * 1000000 +
 					(now.tv_usec - prec->last.tv_usec);
 				n = t / rp->rio_pkttime;
 				/* calculate (avg = (1 - Wq)^n * avg) */
 				if (n > 0)
 					avg = (avg >> FP_SHIFT) *
 						pow_w(rp->rio_wtab, n);
 			}
 		}
 
 		/* run estimator. (avg is scaled by WEIGHT in fixed-point) */
 		avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
 		prec->avg = avg;		/* save the new value */
 		/*
 		 * count keeps a tally of arriving traffic that has not
 		 * been dropped.
 		 */
 		prec->count++;
 	}
 
 	prec = &rp->rio_precstate[dpindex];
 	avg = prec->avg;
 
 	/* see if we drop early */
 	droptype = DTYPE_NODROP;
 	if (avg >= prec->th_min_s && prec->qlen > 1) {
 		if (avg >= prec->th_max_s) {
 			/* avg >= th_max: forced drop */
 			droptype = DTYPE_FORCED;
 		} else if (prec->old == 0) {
 			/* first exceeds th_min */
 			prec->count = 1;
 			prec->old = 1;
 		} else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
 				      prec->probd, prec->count)) {
 			/* unforced drop by red */
 			droptype = DTYPE_EARLY;
 		}
 	} else {
 		/* avg < th_min */
 		prec->old = 0;
 	}
 
 	/*
 	 * if the queue length hits the hard limit, it's a forced drop.
 	 */
 	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
 		droptype = DTYPE_FORCED;
 
 	if (droptype != DTYPE_NODROP) {
 		/* always drop incoming packet (as opposed to randomdrop) */
 		for (i = dpindex; i < RIO_NDROPPREC; i++)
 			rp->rio_precstate[i].count = 0;
 #ifdef RIO_STATS
 		if (droptype == DTYPE_EARLY)
 			rp->q_stats[dpindex].drop_unforced++;
 		else
 			rp->q_stats[dpindex].drop_forced++;
 		PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
 #endif
 		m_freem(m);
 		return (-1);
 	}
 
 	for (i = dpindex; i < RIO_NDROPPREC; i++)
 		rp->rio_precstate[i].qlen++;
 
 	/* save drop precedence index in mbuf hdr */
 	RIOM_SET_PRECINDEX(m, dpindex);
 
 	if (rp->rio_flags & RIOF_CLEARDSCP)
 		dsfield &= ~DSCP_MASK;
 
 	if (dsfield != odsfield)
 		write_dsfield(m, pktattr, dsfield);
 
 	_addq(q, m);
 
 #ifdef RIO_STATS
 	PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
 #endif
 	return (0);
 }
 
 struct mbuf *
 rio_getq(rio_t *rp, class_queue_t *q)
 {
 	struct mbuf	*m;
 	int		 dpindex, i;
 
 	if ((m = _getq(q)) == NULL)
 		return NULL;
 
 	dpindex = RIOM_GET_PRECINDEX(m);
 	for (i = dpindex; i < RIO_NDROPPREC; i++) {
 		if (--rp->rio_precstate[i].qlen == 0) {
 			if (rp->rio_precstate[i].idle == 0) {
 				rp->rio_precstate[i].idle = 1;
 				microtime(&rp->rio_precstate[i].last);
 			}
 		}
 	}
 	return (m);
 }
 
 #ifdef ALTQ3_COMPAT
 int
 rioopen(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	/* everything will be done when the queueing scheme is attached. */
 	return 0;
 }
 
 int
 rioclose(dev, flag, fmt, p)
 	dev_t dev;
 	int flag, fmt;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	rio_queue_t *rqp;
 	int err, error = 0;
 
 	while ((rqp = rio_list) != NULL) {
 		/* destroy all */
 		err = rio_detach(rqp);
 		if (err != 0 && error == 0)
 			error = err;
 	}
 
 	return error;
 }
 
 int
 rioioctl(dev, cmd, addr, flag, p)
 	dev_t dev;
 	ioctlcmd_t cmd;
 	caddr_t addr;
 	int flag;
 #if (__FreeBSD_version > 500000)
 	struct thread *p;
 #else
 	struct proc *p;
 #endif
 {
 	rio_queue_t *rqp;
 	struct rio_interface *ifacep;
 	struct ifnet *ifp;
 	int	error = 0;
 
 	/* check super-user privilege */
 	switch (cmd) {
 	case RIO_GETSTATS:
 		break;
 	default:
 #if (__FreeBSD_version > 700000)
 		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
 			return (error);
 #elsif (__FreeBSD_version > 400000)
 		if ((error = suser(p)) != 0)
 			return (error);
 #else
 		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 			return (error);
 #endif
 		break;
 	}
 
 	switch (cmd) {
 
 	case RIO_ENABLE:
 		ifacep = (struct rio_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = altq_enable(rqp->rq_ifq);
 		break;
 
 	case RIO_DISABLE:
 		ifacep = (struct rio_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = altq_disable(rqp->rq_ifq);
 		break;
 
 	case RIO_IF_ATTACH:
 		ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
 		if (ifp == NULL) {
 			error = ENXIO;
 			break;
 		}
 
 		/* allocate and initialize rio_queue_t */
 		rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK);
 		if (rqp == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		bzero(rqp, sizeof(rio_queue_t));
 
 		rqp->rq_q = malloc(sizeof(class_queue_t),
 		       M_DEVBUF, M_WAITOK);
 		if (rqp->rq_q == NULL) {
 			free(rqp, M_DEVBUF);
 			error = ENOMEM;
 			break;
 		}
 		bzero(rqp->rq_q, sizeof(class_queue_t));
 
 		rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
 		if (rqp->rq_rio == NULL) {
 			free(rqp->rq_q, M_DEVBUF);
 			free(rqp, M_DEVBUF);
 			error = ENOMEM;
 			break;
 		}
 
 		rqp->rq_ifq = &ifp->if_snd;
 		qtail(rqp->rq_q) = NULL;
 		qlen(rqp->rq_q) = 0;
 		qlimit(rqp->rq_q) = RIO_LIMIT;
 		qtype(rqp->rq_q) = Q_RIO;
 
 		/*
 		 * set RIO to this ifnet structure.
 		 */
 		error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
 				    rio_enqueue, rio_dequeue, rio_request,
 				    NULL, NULL);
 		if (error) {
 			rio_destroy(rqp->rq_rio);
 			free(rqp->rq_q, M_DEVBUF);
 			free(rqp, M_DEVBUF);
 			break;
 		}
 
 		/* add this state to the rio list */
 		rqp->rq_next = rio_list;
 		rio_list = rqp;
 		break;
 
 	case RIO_IF_DETACH:
 		ifacep = (struct rio_interface *)addr;
 		if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
 			error = EBADF;
 			break;
 		}
 		error = rio_detach(rqp);
 		break;
 
 	case RIO_GETSTATS:
 		do {
 			struct rio_stats *q_stats;
 			rio_t *rp;
 			int i;
 
 			q_stats = (struct rio_stats *)addr;
 			if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
 					       ALTQT_RIO)) == NULL) {
 				error = EBADF;
 				break;
 			}
 
 			rp = rqp->rq_rio;
 
 			q_stats->q_limit = qlimit(rqp->rq_q);
 			q_stats->weight	= rp->rio_weight;
 			q_stats->flags = rp->rio_flags;
 
 			for (i = 0; i < RIO_NDROPPREC; i++) {
 				q_stats->q_len[i] = rp->rio_precstate[i].qlen;
 				bcopy(&rp->q_stats[i], &q_stats->q_stats[i],
 				      sizeof(struct redstats));
 				q_stats->q_stats[i].q_avg =
 				    rp->rio_precstate[i].avg >> rp->rio_wshift;
 
 				q_stats->q_params[i].inv_pmax
 					= rp->rio_precstate[i].inv_pmax;
 				q_stats->q_params[i].th_min
 					= rp->rio_precstate[i].th_min;
 				q_stats->q_params[i].th_max
 					= rp->rio_precstate[i].th_max;
 			}
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	case RIO_CONFIG:
 		do {
 			struct rio_conf *fc;
 			rio_t	*new;
 			int s, limit, i;
 
 			fc = (struct rio_conf *)addr;
 			if ((rqp = altq_lookup(fc->iface.rio_ifname,
 					       ALTQT_RIO)) == NULL) {
 				error = EBADF;
 				break;
 			}
 
 			new = rio_alloc(fc->rio_weight, &fc->q_params[0],
 					fc->rio_flags, fc->rio_pkttime);
 			if (new == NULL) {
 				error = ENOMEM;
 				break;
 			}
 
-#ifdef __NetBSD__
 			s = splnet();
-#else
-			s = splimp();
-#endif
 			_flushq(rqp->rq_q);
 			limit = fc->rio_limit;
 			if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
 				limit = fc->q_params[RIO_NDROPPREC-1].th_max;
 			qlimit(rqp->rq_q) = limit;
 
 			rio_destroy(rqp->rq_rio);
 			rqp->rq_rio = new;
 
 			splx(s);
 
 			/* write back new values */
 			fc->rio_limit = limit;
 			for (i = 0; i < RIO_NDROPPREC; i++) {
 				fc->q_params[i].inv_pmax =
 					rqp->rq_rio->rio_precstate[i].inv_pmax;
 				fc->q_params[i].th_min =
 					rqp->rq_rio->rio_precstate[i].th_min;
 				fc->q_params[i].th_max =
 					rqp->rq_rio->rio_precstate[i].th_max;
 			}
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	case RIO_SETDEFAULTS:
 		do {
 			struct redparams *rp;
 			int i;
 
 			rp = (struct redparams *)addr;
 			for (i = 0; i < RIO_NDROPPREC; i++)
 				default_rio_params[i] = rp[i];
 		} while (/*CONSTCOND*/ 0);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return error;
 }
 
 static int
 rio_detach(rqp)
 	rio_queue_t *rqp;
 {
 	rio_queue_t *tmp;
 	int error = 0;
 
 	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
 		altq_disable(rqp->rq_ifq);
 
 	if ((error = altq_detach(rqp->rq_ifq)))
 		return (error);
 
 	if (rio_list == rqp)
 		rio_list = rqp->rq_next;
 	else {
 		for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
 			if (tmp->rq_next == rqp) {
 				tmp->rq_next = rqp->rq_next;
 				break;
 			}
 		if (tmp == NULL)
 			printf("rio_detach: no state found in rio_list!\n");
 	}
 
 	rio_destroy(rqp->rq_rio);
 	free(rqp->rq_q, M_DEVBUF);
 	free(rqp, M_DEVBUF);
 	return (error);
 }
 
 /*
  * rio support routines
  */
 static int
 rio_request(ifq, req, arg)
 	struct ifaltq *ifq;
 	int req;
 	void *arg;
 {
 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	switch (req) {
 	case ALTRQ_PURGE:
 		_flushq(rqp->rq_q);
 		if (ALTQ_IS_ENABLED(ifq))
 			ifq->ifq_len = 0;
 		break;
 	}
 	return (0);
 }
 
 /*
  * enqueue routine:
  *
  *	returns: 0 when successfully queued.
  *		 ENOBUFS when drop occurs.
  */
 static int
 rio_enqueue(ifq, m, pktattr)
 	struct ifaltq *ifq;
 	struct mbuf *m;
 	struct altq_pktattr *pktattr;
 {
 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
 	int error = 0;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
 		ifq->ifq_len++;
 	else
 		error = ENOBUFS;
 	return error;
 }
 
 /*
  * dequeue routine:
  *	must be called in splimp.
  *
  *	returns: mbuf dequeued.
  *		 NULL when no packet is available in the queue.
  */
 
 static struct mbuf *
 rio_dequeue(ifq, op)
 	struct ifaltq *ifq;
 	int op;
 {
 	rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
 	struct mbuf *m = NULL;
 
 	IFQ_LOCK_ASSERT(ifq);
 
 	if (op == ALTDQ_POLL)
 		return qhead(rqp->rq_q);
 
 	m = rio_getq(rqp->rq_rio, rqp->rq_q);
 	if (m != NULL)
 		ifq->ifq_len--;
 	return m;
 }
 
 #ifdef KLD_MODULE
 
 static struct altqsw rio_sw =
 	{"rio", rioopen, rioclose, rioioctl};
 
 ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
 MODULE_VERSION(altq_rio, 1);
 MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1);
 
 #endif /* KLD_MODULE */
 #endif /* ALTQ3_COMPAT */
 
 #endif /* ALTQ_RIO */
Index: user/ngie/more-tests/sys/net/altq/altq_rmclass.c
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_rmclass.c	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_rmclass.c	(revision 281676)
@@ -1,1836 +1,1810 @@
-/*	$FreeBSD$	*/
-/*	$KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $	*/
-
-/*
+/*-
  * Copyright (c) 1991-1997 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by the Network Research
  *      Group at Lawrence Berkeley Laboratory.
  * 4. Neither the name of the University nor of the Laboratory may be used
  *    to endorse or promote products derived from this software without
  *    specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * LBL code modified by speer@eng.sun.com, May 1977.
  * For questions and/or comments, please send mail to cbq@ee.lbl.gov
  *
  * @(#)rm_class.c  1.48     97/12/05 SMI
+ * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $
+ * $FreeBSD$
  */
-#if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
-#ifdef __FreeBSD__
 #include "opt_inet6.h"
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
 #ifdef ALTQ_CBQ	/* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #ifdef ALTQ3_COMPAT
 #include <sys/kernel.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_var.h>
 #ifdef ALTQ3_COMPAT
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #endif
 
 #include <net/altq/if_altq.h>
 #include <net/altq/altq.h>
 #include <net/altq/altq_rmclass.h>
 #include <net/altq/altq_rmclass_debug.h>
 #include <net/altq/altq_red.h>
 #include <net/altq/altq_rio.h>
 
 /*
  * Local Macros
  */
 
 #define	reset_cutoff(ifd)	{ ifd->cutoff_ = RM_MAXDEPTH; }
 
 /*
  * Local routines.
  */
 
 static int	rmc_satisfied(struct rm_class *, struct timeval *);
 static void	rmc_wrr_set_weights(struct rm_ifdat *);
 static void	rmc_depth_compute(struct rm_class *);
 static void	rmc_depth_recompute(rm_class_t *);
 
 static mbuf_t	*_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
 static mbuf_t	*_rmc_prr_dequeue_next(struct rm_ifdat *, int);
 
 static int	_rmc_addq(rm_class_t *, mbuf_t *);
 static void	_rmc_dropq(rm_class_t *);
 static mbuf_t	*_rmc_getq(rm_class_t *);
 static mbuf_t	*_rmc_pollq(rm_class_t *);
 
 static int	rmc_under_limit(struct rm_class *, struct timeval *);
 static void	rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
 static void	rmc_drop_action(struct rm_class *);
 static void	rmc_restart(struct rm_class *);
 static void	rmc_root_overlimit(struct rm_class *, struct rm_class *);
 
 #define	BORROW_OFFTIME
 /*
  * BORROW_OFFTIME (experimental):
  * borrow the offtime of the class borrowing from.
  * the reason is that when its own offtime is set, the class is unable
  * to borrow much, especially when cutoff is taking effect.
  * but when the borrowed class is overloaded (advidle is close to minidle),
  * use the borrowing class's offtime to avoid overload.
  */
 #define	ADJUST_CUTOFF
 /*
  * ADJUST_CUTOFF (experimental):
  * if no underlimit class is found due to cutoff, increase cutoff and
  * retry the scheduling loop.
  * also, don't invoke delay_actions while cutoff is taking effect,
  * since a sleeping class won't have a chance to be scheduled in the
  * next loop.
  *
  * now heuristics for setting the top-level variable (cutoff_) becomes:
  *	1. if a packet arrives for a not-overlimit class, set cutoff
  *	   to the depth of the class.
  *	2. if cutoff is i, and a packet arrives for an overlimit class
  *	   with an underlimit ancestor at a lower level than i (say j),
  *	   then set cutoff to j.
  *	3. at scheduling a packet, if there is no underlimit class
  *	   due to the current cutoff level, increase cutoff by 1 and
  *	   then try to schedule again.
  */
 
 /*
  * rm_class_t *
  * rmc_newclass(...) - Create a new resource management class at priority
  * 'pri' on the interface given by 'ifd'.
  *
  * nsecPerByte  is the data rate of the interface in nanoseconds/byte.
  *              E.g., 800 for a 10Mb/s ethernet.  If the class gets less
  *              than 100% of the bandwidth, this number should be the
  *              'effective' rate for the class.  Let f be the
  *              bandwidth fraction allocated to this class, and let
  *              nsPerByte be the data rate of the output link in
  *              nanoseconds/byte.  Then nsecPerByte is set to
  *              nsPerByte / f.  E.g., 1600 (= 800 / .5)
  *              for a class that gets 50% of an ethernet's bandwidth.
  *
  * action       the routine to call when the class is over limit.
  *
  * maxq         max allowable queue size for class (in packets).
  *
  * parent       parent class pointer.
  *
  * borrow       class to borrow from (should be either 'parent' or null).
  *
  * maxidle      max value allowed for class 'idle' time estimate (this
  *              parameter determines how large an initial burst of packets
  *              can be before overlimit action is invoked.
  *
  * offtime      how long 'delay' action will delay when class goes over
  *              limit (this parameter determines the steady-state burst
  *              size when a class is running over its limit).
  *
  * Maxidle and offtime have to be computed from the following:  If the
  * average packet size is s, the bandwidth fraction allocated to this
  * class is f, we want to allow b packet bursts, and the gain of the
  * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
  *
  *   ptime = s * nsPerByte * (1 - f) / f
  *   maxidle = ptime * (1 - g^b) / g^b
  *   minidle = -ptime * (1 / (f - 1))
  *   offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
  *
  * Operationally, it's convenient to specify maxidle & offtime in units
  * independent of the link bandwidth so the maxidle & offtime passed to
  * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
  * (The constant factor is a scale factor needed to make the parameters
  * integers.  This scaling also means that the 'unscaled' values of
  * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
  * not nanoseconds.)  Also note that the 'idle' filter computation keeps
  * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
  * maxidle also must be scaled upward by this value.  Thus, the passed
  * values for maxidle and offtime can be computed as follows:
  *
  * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
  * offtime = offtime * 8 / (1000 * nsecPerByte)
  *
  * When USE_HRTIME is employed, then maxidle and offtime become:
  * 	maxidle = maxilde * (8.0 / nsecPerByte);
  * 	offtime = offtime * (8.0 / nsecPerByte);
  */
 struct rm_class *
 rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
     void (*action)(rm_class_t *, rm_class_t *), int maxq,
     struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
     int minidle, u_int offtime, int pktsize, int flags)
 {
 	struct rm_class	*cl;
 	struct rm_class	*peer;
 	int		 s;
 
 	if (pri >= RM_MAXPRIO)
 		return (NULL);
 #ifndef ALTQ_RED
 	if (flags & RMCF_RED) {
 #ifdef ALTQ_DEBUG
 		printf("rmc_newclass: RED not configured for CBQ!\n");
 #endif
 		return (NULL);
 	}
 #endif
 #ifndef ALTQ_RIO
 	if (flags & RMCF_RIO) {
 #ifdef ALTQ_DEBUG
 		printf("rmc_newclass: RIO not configured for CBQ!\n");
 #endif
 		return (NULL);
 	}
 #endif
 
 	cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cl == NULL)
 		return (NULL);
 	CALLOUT_INIT(&cl->callout_);
 	cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cl->q_ == NULL) {
 		free(cl, M_DEVBUF);
 		return (NULL);
 	}
 
 	/*
 	 * Class initialization.
 	 */
 	cl->children_ = NULL;
 	cl->parent_ = parent;
 	cl->borrow_ = borrow;
 	cl->leaf_ = 1;
 	cl->ifdat_ = ifd;
 	cl->pri_ = pri;
 	cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
 	cl->depth_ = 0;
 	cl->qthresh_ = 0;
 	cl->ns_per_byte_ = nsecPerByte;
 
 	qlimit(cl->q_) = maxq;
 	qtype(cl->q_) = Q_DROPHEAD;
 	qlen(cl->q_) = 0;
 	cl->flags_ = flags;
 
 #if 1 /* minidle is also scaled in ALTQ */
 	cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
 	if (cl->minidle_ > 0)
 		cl->minidle_ = 0;
 #else
 	cl->minidle_ = minidle;
 #endif
 	cl->maxidle_ = (maxidle * nsecPerByte) / 8;
 	if (cl->maxidle_ == 0)
 		cl->maxidle_ = 1;
 #if 1 /* offtime is also scaled in ALTQ */
 	cl->avgidle_ = cl->maxidle_;
 	cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
 	if (cl->offtime_ == 0)
 		cl->offtime_ = 1;
 #else
 	cl->avgidle_ = 0;
 	cl->offtime_ = (offtime * nsecPerByte) / 8;
 #endif
 	cl->overlimit = action;
 
 #ifdef ALTQ_RED
 	if (flags & (RMCF_RED|RMCF_RIO)) {
 		int red_flags, red_pkttime;
 
 		red_flags = 0;
 		if (flags & RMCF_ECN)
 			red_flags |= REDF_ECN;
 		if (flags & RMCF_FLOWVALVE)
 			red_flags |= REDF_FLOWVALVE;
 #ifdef ALTQ_RIO
 		if (flags & RMCF_CLEARDSCP)
 			red_flags |= RIOF_CLEARDSCP;
 #endif
 		red_pkttime = nsecPerByte * pktsize  / 1000;
 
 		if (flags & RMCF_RED) {
 			cl->red_ = red_alloc(0, 0,
 			    qlimit(cl->q_) * 10/100,
 			    qlimit(cl->q_) * 30/100,
 			    red_flags, red_pkttime);
 			if (cl->red_ != NULL)
 				qtype(cl->q_) = Q_RED;
 		}
 #ifdef ALTQ_RIO
 		else {
 			cl->red_ = (red_t *)rio_alloc(0, NULL,
 						      red_flags, red_pkttime);
 			if (cl->red_ != NULL)
 				qtype(cl->q_) = Q_RIO;
 		}
 #endif
 	}
 #endif /* ALTQ_RED */
 
 	/*
 	 * put the class into the class tree
 	 */
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	IFQ_LOCK(ifd->ifq_);
 	if ((peer = ifd->active_[pri]) != NULL) {
 		/* find the last class at this pri */
 		cl->peer_ = peer;
 		while (peer->peer_ != ifd->active_[pri])
 			peer = peer->peer_;
 		peer->peer_ = cl;
 	} else {
 		ifd->active_[pri] = cl;
 		cl->peer_ = cl;
 	}
 
 	if (cl->parent_) {
 		cl->next_ = parent->children_;
 		parent->children_ = cl;
 		parent->leaf_ = 0;
 	}
 
 	/*
 	 * Compute the depth of this class and its ancestors in the class
 	 * hierarchy.
 	 */
 	rmc_depth_compute(cl);
 
 	/*
 	 * If CBQ's WRR is enabled, then initialize the class WRR state.
 	 */
 	if (ifd->wrr_) {
 		ifd->num_[pri]++;
 		ifd->alloc_[pri] += cl->allotment_;
 		rmc_wrr_set_weights(ifd);
 	}
 	IFQ_UNLOCK(ifd->ifq_);
 	splx(s);
 	return (cl);
 }
 
 int
 rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
     int minidle, u_int offtime, int pktsize)
 {
 	struct rm_ifdat	*ifd;
 	u_int		 old_allotment;
 	int		 s;
 
 	ifd = cl->ifdat_;
 	old_allotment = cl->allotment_;
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	IFQ_LOCK(ifd->ifq_);
 	cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
 	cl->qthresh_ = 0;
 	cl->ns_per_byte_ = nsecPerByte;
 
 	qlimit(cl->q_) = maxq;
 
 #if 1 /* minidle is also scaled in ALTQ */
 	cl->minidle_ = (minidle * nsecPerByte) / 8;
 	if (cl->minidle_ > 0)
 		cl->minidle_ = 0;
 #else
 	cl->minidle_ = minidle;
 #endif
 	cl->maxidle_ = (maxidle * nsecPerByte) / 8;
 	if (cl->maxidle_ == 0)
 		cl->maxidle_ = 1;
 #if 1 /* offtime is also scaled in ALTQ */
 	cl->avgidle_ = cl->maxidle_;
 	cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
 	if (cl->offtime_ == 0)
 		cl->offtime_ = 1;
 #else
 	cl->avgidle_ = 0;
 	cl->offtime_ = (offtime * nsecPerByte) / 8;
 #endif
 
 	/*
 	 * If CBQ's WRR is enabled, then initialize the class WRR state.
 	 */
 	if (ifd->wrr_) {
 		ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
 		rmc_wrr_set_weights(ifd);
 	}
 	IFQ_UNLOCK(ifd->ifq_);
 	splx(s);
 	return (0);
 }
 
 /*
  * static void
  * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
  *	the appropriate run robin weights for the CBQ weighted round robin
  *	algorithm.
  *
  *	Returns: NONE
  */
 
 static void
 rmc_wrr_set_weights(struct rm_ifdat *ifd)
 {
 	int		i;
 	struct rm_class	*cl, *clh;
 
 	for (i = 0; i < RM_MAXPRIO; i++) {
 		/*
 		 * This is inverted from that of the simulator to
 		 * maintain precision.
 		 */
 		if (ifd->num_[i] == 0)
 			ifd->M_[i] = 0;
 		else
 			ifd->M_[i] = ifd->alloc_[i] /
 				(ifd->num_[i] * ifd->maxpkt_);
 		/*
 		 * Compute the weighted allotment for each class.
 		 * This takes the expensive div instruction out
 		 * of the main loop for the wrr scheduling path.
 		 * These only get recomputed when a class comes or
 		 * goes.
 		 */
 		if (ifd->active_[i] != NULL) {
 			clh = cl = ifd->active_[i];
 			do {
 				/* safe-guard for slow link or alloc_ == 0 */
 				if (ifd->M_[i] == 0)
 					cl->w_allotment_ = 0;
 				else
 					cl->w_allotment_ = cl->allotment_ /
 						ifd->M_[i];
 				cl = cl->peer_;
 			} while ((cl != NULL) && (cl != clh));
 		}
 	}
 }
 
 int
 rmc_get_weight(struct rm_ifdat *ifd, int pri)
 {
 	if ((pri >= 0) && (pri < RM_MAXPRIO))
 		return (ifd->M_[pri]);
 	else
 		return (0);
 }
 
 /*
  * static void
  * rmc_depth_compute(struct rm_class *cl) - This function computes the
  *	appropriate depth of class 'cl' and its ancestors.
  *
  *	Returns:	NONE
  */
 
 static void
 rmc_depth_compute(struct rm_class *cl)
 {
 	rm_class_t	*t = cl, *p;
 
 	/*
 	 * Recompute the depth for the branch of the tree.
 	 */
 	while (t != NULL) {
 		p = t->parent_;
 		if (p && (t->depth_ >= p->depth_)) {
 			p->depth_ = t->depth_ + 1;
 			t = p;
 		} else
 			t = NULL;
 	}
 }
 
 /*
  * static void
  * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
  *	the depth of the tree after a class has been deleted.
  *
  *	Returns:	NONE
  */
 
 static void
 rmc_depth_recompute(rm_class_t *cl)
 {
 #if 1 /* ALTQ */
 	rm_class_t	*p, *t;
 
 	p = cl;
 	while (p != NULL) {
 		if ((t = p->children_) == NULL) {
 			p->depth_ = 0;
 		} else {
 			int cdepth = 0;
 
 			while (t != NULL) {
 				if (t->depth_ > cdepth)
 					cdepth = t->depth_;
 				t = t->next_;
 			}
 
 			if (p->depth_ == cdepth + 1)
 				/* no change to this parent */
 				return;
 
 			p->depth_ = cdepth + 1;
 		}
 
 		p = p->parent_;
 	}
 #else
 	rm_class_t	*t;
 
 	if (cl->depth_ >= 1) {
 		if (cl->children_ == NULL) {
 			cl->depth_ = 0;
 		} else if ((t = cl->children_) != NULL) {
 			while (t != NULL) {
 				if (t->children_ != NULL)
 					rmc_depth_recompute(t);
 				t = t->next_;
 			}
 		} else
 			rmc_depth_compute(cl);
 	}
 #endif
 }
 
 /*
  * void
  * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
  *	function deletes a class from the link-sharing structure and frees
  *	all resources associated with the class.
  *
  *	Returns: NONE
  */
 
 void
 rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
 {
 	struct rm_class	*p, *head, *previous;
 	int		 s;
 
 	ASSERT(cl->children_ == NULL);
 
 	if (cl->sleeping_)
 		CALLOUT_STOP(&cl->callout_);
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	IFQ_LOCK(ifd->ifq_);
 	/*
 	 * Free packets in the packet queue.
 	 * XXX - this may not be a desired behavior.  Packets should be
 	 *		re-queued.
 	 */
 	rmc_dropall(cl);
 
 	/*
 	 * If the class has a parent, then remove the class from the
 	 * class from the parent's children chain.
 	 */
 	if (cl->parent_ != NULL) {
 		head = cl->parent_->children_;
 		p = previous = head;
 		if (head->next_ == NULL) {
 			ASSERT(head == cl);
 			cl->parent_->children_ = NULL;
 			cl->parent_->leaf_ = 1;
 		} else while (p != NULL) {
 			if (p == cl) {
 				if (cl == head)
 					cl->parent_->children_ = cl->next_;
 				else
 					previous->next_ = cl->next_;
 				cl->next_ = NULL;
 				p = NULL;
 			} else {
 				previous = p;
 				p = p->next_;
 			}
 		}
 	}
 
 	/*
 	 * Delete class from class priority peer list.
 	 */
 	if ((p = ifd->active_[cl->pri_]) != NULL) {
 		/*
 		 * If there is more than one member of this priority
 		 * level, then look for class(cl) in the priority level.
 		 */
 		if (p != p->peer_) {
 			while (p->peer_ != cl)
 				p = p->peer_;
 			p->peer_ = cl->peer_;
 
 			if (ifd->active_[cl->pri_] == cl)
 				ifd->active_[cl->pri_] = cl->peer_;
 		} else {
 			ASSERT(p == cl);
 			ifd->active_[cl->pri_] = NULL;
 		}
 	}
 
 	/*
 	 * Recompute the WRR weights.
 	 */
 	if (ifd->wrr_) {
 		ifd->alloc_[cl->pri_] -= cl->allotment_;
 		ifd->num_[cl->pri_]--;
 		rmc_wrr_set_weights(ifd);
 	}
 
 	/*
 	 * Re-compute the depth of the tree.
 	 */
 #if 1 /* ALTQ */
 	rmc_depth_recompute(cl->parent_);
 #else
 	rmc_depth_recompute(ifd->root_);
 #endif
 
 	IFQ_UNLOCK(ifd->ifq_);
 	splx(s);
 
 	/*
 	 * Free the class structure.
 	 */
 	if (cl->red_ != NULL) {
 #ifdef ALTQ_RIO
 		if (q_is_rio(cl->q_))
 			rio_destroy((rio_t *)cl->red_);
 #endif
 #ifdef ALTQ_RED
 		if (q_is_red(cl->q_))
 			red_destroy(cl->red_);
 #endif
 	}
 	free(cl->q_, M_DEVBUF);
 	free(cl, M_DEVBUF);
 }
 
 
 /*
  * void
  * rmc_init(...) - Initialize the resource management data structures
  *	associated with the output portion of interface 'ifp'.  'ifd' is
  *	where the structures will be built (for backwards compatibility, the
  *	structures aren't kept in the ifnet struct).  'nsecPerByte'
  *	gives the link speed (inverse of bandwidth) in nanoseconds/byte.
  *	'restart' is the driver-specific routine that the generic 'delay
  *	until under limit' action will call to restart output.  `maxq'
  *	is the queue size of the 'link' & 'default' classes.  'maxqueued'
  *	is the maximum number of packets that the resource management
  *	code will allow to be queued 'downstream' (this is typically 1).
  *
  *	Returns:	NONE
  */
 
 void
 rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
     void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
     int minidle, u_int offtime, int flags)
 {
 	int		i, mtu;
 
 	/*
 	 * Initialize the CBQ tracing/debug facility.
 	 */
 	CBQTRACEINIT();
 
 	bzero((char *)ifd, sizeof (*ifd));
 	mtu = ifq->altq_ifp->if_mtu;
 	ifd->ifq_ = ifq;
 	ifd->restart = restart;
 	ifd->maxqueued_ = maxqueued;
 	ifd->ns_per_byte_ = nsecPerByte;
 	ifd->maxpkt_ = mtu;
 	ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
 	ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
 #if 1
 	ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
 	if (mtu * nsecPerByte > 10 * 1000000)
 		ifd->maxiftime_ /= 4;
 #endif
 
 	reset_cutoff(ifd);
 	CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
 
 	/*
 	 * Initialize the CBQ's WRR state.
 	 */
 	for (i = 0; i < RM_MAXPRIO; i++) {
 		ifd->alloc_[i] = 0;
 		ifd->M_[i] = 0;
 		ifd->num_[i] = 0;
 		ifd->na_[i] = 0;
 		ifd->active_[i] = NULL;
 	}
 
 	/*
 	 * Initialize current packet state.
 	 */
 	ifd->qi_ = 0;
 	ifd->qo_ = 0;
 	for (i = 0; i < RM_MAXQUEUED; i++) {
 		ifd->class_[i] = NULL;
 		ifd->curlen_[i] = 0;
 		ifd->borrowed_[i] = NULL;
 	}
 
 	/*
 	 * Create the root class of the link-sharing structure.
 	 */
 	if ((ifd->root_ = rmc_newclass(0, ifd,
 				       nsecPerByte,
 				       rmc_root_overlimit, maxq, 0, 0,
 				       maxidle, minidle, offtime,
 				       0, 0)) == NULL) {
 		printf("rmc_init: root class not allocated\n");
 		return ;
 	}
 	ifd->root_->depth_ = 0;
 }
 
 /*
  * void
  * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
  *	mbuf 'm' to queue for resource class 'cl'.  This routine is called
  *	by a driver's if_output routine.  This routine must be called with
  *	output packet completion interrupts locked out (to avoid racing with
  *	rmc_dequeue_next).
  *
  *	Returns:	0 on successful queueing
  *			-1 when packet drop occurs
  */
 int
 rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
 {
 	struct timeval	 now;
 	struct rm_ifdat *ifd = cl->ifdat_;
 	int		 cpri = cl->pri_;
 	int		 is_empty = qempty(cl->q_);
 
 	RM_GETTIME(now);
 	if (ifd->cutoff_ > 0) {
 		if (TV_LT(&cl->undertime_, &now)) {
 			if (ifd->cutoff_ > cl->depth_)
 				ifd->cutoff_ = cl->depth_;
 			CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
 		}
 #if 1 /* ALTQ */
 		else {
 			/*
 			 * the class is overlimit. if the class has
 			 * underlimit ancestors, set cutoff to the lowest
 			 * depth among them.
 			 */
 			struct rm_class *borrow = cl->borrow_;
 
 			while (borrow != NULL &&
 			       borrow->depth_ < ifd->cutoff_) {
 				if (TV_LT(&borrow->undertime_, &now)) {
 					ifd->cutoff_ = borrow->depth_;
 					CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
 					break;
 				}
 				borrow = borrow->borrow_;
 			}
 		}
 #else /* !ALTQ */
 		else if ((ifd->cutoff_ > 1) && cl->borrow_) {
 			if (TV_LT(&cl->borrow_->undertime_, &now)) {
 				ifd->cutoff_ = cl->borrow_->depth_;
 				CBQTRACE(rmc_queue_packet, 'ffob',
 					 cl->borrow_->depth_);
 			}
 		}
 #endif /* !ALTQ */
 	}
 
 	if (_rmc_addq(cl, m) < 0)
 		/* failed */
 		return (-1);
 
 	if (is_empty) {
 		CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
 		ifd->na_[cpri]++;
 	}
 
 	if (qlen(cl->q_) > qlimit(cl->q_)) {
 		/* note: qlimit can be set to 0 or 1 */
 		rmc_drop_action(cl);
 		return (-1);
 	}
 	return (0);
 }
 
 /*
  * void
  * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
  *	classes to see if there are satified.
  */
 
 static void
 rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
 {
 	int		 i;
 	rm_class_t	*p, *bp;
 
 	for (i = RM_MAXPRIO - 1; i >= 0; i--) {
 		if ((bp = ifd->active_[i]) != NULL) {
 			p = bp;
 			do {
 				if (!rmc_satisfied(p, now)) {
 					ifd->cutoff_ = p->depth_;
 					return;
 				}
 				p = p->peer_;
 			} while (p != bp);
 		}
 	}
 
 	reset_cutoff(ifd);
 }
 
 /*
  * rmc_satisfied - Return 1 of the class is satisfied.  O, otherwise.
  */
 
 static int
 rmc_satisfied(struct rm_class *cl, struct timeval *now)
 {
 	rm_class_t	*p;
 
 	if (cl == NULL)
 		return (1);
 	if (TV_LT(now, &cl->undertime_))
 		return (1);
 	if (cl->depth_ == 0) {
 		if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
 			return (0);
 		else
 			return (1);
 	}
 	if (cl->children_ != NULL) {
 		p = cl->children_;
 		while (p != NULL) {
 			if (!rmc_satisfied(p, now))
 				return (0);
 			p = p->next_;
 		}
 	}
 
 	return (1);
 }
 
 /*
  * Return 1 if class 'cl' is under limit or can borrow from a parent,
  * 0 if overlimit.  As a side-effect, this routine will invoke the
  * class overlimit action if the class if overlimit.
  */
 
 static int
 rmc_under_limit(struct rm_class *cl, struct timeval *now)
 {
 	rm_class_t	*p = cl;
 	rm_class_t	*top;
 	struct rm_ifdat	*ifd = cl->ifdat_;
 
 	ifd->borrowed_[ifd->qi_] = NULL;
 	/*
 	 * If cl is the root class, then always return that it is
 	 * underlimit.  Otherwise, check to see if the class is underlimit.
 	 */
 	if (cl->parent_ == NULL)
 		return (1);
 
 	if (cl->sleeping_) {
 		if (TV_LT(now, &cl->undertime_))
 			return (0);
 
 		CALLOUT_STOP(&cl->callout_);
 		cl->sleeping_ = 0;
 		cl->undertime_.tv_sec = 0;
 		return (1);
 	}
 
 	top = NULL;
 	while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
 		if (((cl = cl->borrow_) == NULL) ||
 		    (cl->depth_ > ifd->cutoff_)) {
 #ifdef ADJUST_CUTOFF
 			if (cl != NULL)
 				/* cutoff is taking effect, just
 				   return false without calling
 				   the delay action. */
 				return (0);
 #endif
 #ifdef BORROW_OFFTIME
 			/*
 			 * check if the class can borrow offtime too.
 			 * borrow offtime from the top of the borrow
 			 * chain if the top class is not overloaded.
 			 */
 			if (cl != NULL) {
 				/* cutoff is taking effect, use this class as top. */
 				top = cl;
 				CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
 			}
 			if (top != NULL && top->avgidle_ == top->minidle_)
 				top = NULL;
 			p->overtime_ = *now;
 			(p->overlimit)(p, top);
 #else
 			p->overtime_ = *now;
 			(p->overlimit)(p, NULL);
 #endif
 			return (0);
 		}
 		top = cl;
 	}
 
 	if (cl != p)
 		ifd->borrowed_[ifd->qi_] = cl;
 	return (1);
 }
 
 /*
  * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
  *	Packet-by-packet round robin.
  *
  * The heart of the weighted round-robin scheduler, which decides which
  * class next gets to send a packet.  Highest priority first, then
  * weighted round-robin within priorites.
  *
  * Each able-to-send class gets to send until its byte allocation is
  * exhausted.  Thus, the active pointer is only changed after a class has
  * exhausted its allocation.
  *
  * If the scheduler finds no class that is underlimit or able to borrow,
  * then the first class found that had a nonzero queue and is allowed to
  * borrow gets to send.
  */
 
 static mbuf_t *
 _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
 {
 	struct rm_class	*cl = NULL, *first = NULL;
 	u_int		 deficit;
 	int		 cpri;
 	mbuf_t		*m;
 	struct timeval	 now;
 
 	RM_GETTIME(now);
 
 	/*
 	 * if the driver polls the top of the queue and then removes
 	 * the polled packet, we must return the same packet.
 	 */
 	if (op == ALTDQ_REMOVE && ifd->pollcache_) {
 		cl = ifd->pollcache_;
 		cpri = cl->pri_;
 		if (ifd->efficient_) {
 			/* check if this class is overlimit */
 			if (cl->undertime_.tv_sec != 0 &&
 			    rmc_under_limit(cl, &now) == 0)
 				first = cl;
 		}
 		ifd->pollcache_ = NULL;
 		goto _wrr_out;
 	}
 	else {
 		/* mode == ALTDQ_POLL || pollcache == NULL */
 		ifd->pollcache_ = NULL;
 		ifd->borrowed_[ifd->qi_] = NULL;
 	}
 #ifdef ADJUST_CUTOFF
  _again:
 #endif
 	for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
 		if (ifd->na_[cpri] == 0)
 			continue;
 		deficit = 0;
 		/*
 		 * Loop through twice for a priority level, if some class
 		 * was unable to send a packet the first round because
 		 * of the weighted round-robin mechanism.
 		 * During the second loop at this level, deficit==2.
 		 * (This second loop is not needed if for every class,
 		 * "M[cl->pri_])" times "cl->allotment" is greater than
 		 * the byte size for the largest packet in the class.)
 		 */
  _wrr_loop:
 		cl = ifd->active_[cpri];
 		ASSERT(cl != NULL);
 		do {
 			if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
 				cl->bytes_alloc_ += cl->w_allotment_;
 			if (!qempty(cl->q_)) {
 				if ((cl->undertime_.tv_sec == 0) ||
 				    rmc_under_limit(cl, &now)) {
 					if (cl->bytes_alloc_ > 0 || deficit > 1)
 						goto _wrr_out;
 
 					/* underlimit but no alloc */
 					deficit = 1;
 #if 1
 					ifd->borrowed_[ifd->qi_] = NULL;
 #endif
 				}
 				else if (first == NULL && cl->borrow_ != NULL)
 					first = cl; /* borrowing candidate */
 			}
 
 			cl->bytes_alloc_ = 0;
 			cl = cl->peer_;
 		} while (cl != ifd->active_[cpri]);
 
 		if (deficit == 1) {
 			/* first loop found an underlimit class with deficit */
 			/* Loop on same priority level, with new deficit.  */
 			deficit = 2;
 			goto _wrr_loop;
 		}
 	}
 
 #ifdef ADJUST_CUTOFF
 	/*
 	 * no underlimit class found.  if cutoff is taking effect,
 	 * increase cutoff and try again.
 	 */
 	if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
 		ifd->cutoff_++;
 		CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
 		goto _again;
 	}
 #endif /* ADJUST_CUTOFF */
 	/*
 	 * If LINK_EFFICIENCY is turned on, then the first overlimit
 	 * class we encounter will send a packet if all the classes
 	 * of the link-sharing structure are overlimit.
 	 */
 	reset_cutoff(ifd);
 	CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
 
 	if (!ifd->efficient_ || first == NULL)
 		return (NULL);
 
 	cl = first;
 	cpri = cl->pri_;
 #if 0	/* too time-consuming for nothing */
 	if (cl->sleeping_)
 		CALLOUT_STOP(&cl->callout_);
 	cl->sleeping_ = 0;
 	cl->undertime_.tv_sec = 0;
 #endif
 	ifd->borrowed_[ifd->qi_] = cl->borrow_;
 	ifd->cutoff_ = cl->borrow_->depth_;
 
 	/*
 	 * Deque the packet and do the book keeping...
 	 */
  _wrr_out:
 	if (op == ALTDQ_REMOVE) {
 		m = _rmc_getq(cl);
 		if (m == NULL)
 			panic("_rmc_wrr_dequeue_next");
 		if (qempty(cl->q_))
 			ifd->na_[cpri]--;
 
 		/*
 		 * Update class statistics and link data.
 		 */
 		if (cl->bytes_alloc_ > 0)
 			cl->bytes_alloc_ -= m_pktlen(m);
 
 		if ((cl->bytes_alloc_ <= 0) || first == cl)
 			ifd->active_[cl->pri_] = cl->peer_;
 		else
 			ifd->active_[cl->pri_] = cl;
 
 		ifd->class_[ifd->qi_] = cl;
 		ifd->curlen_[ifd->qi_] = m_pktlen(m);
 		ifd->now_[ifd->qi_] = now;
 		ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
 		ifd->queued_++;
 	} else {
 		/* mode == ALTDQ_PPOLL */
 		m = _rmc_pollq(cl);
 		ifd->pollcache_ = cl;
 	}
 	return (m);
 }
 
 /*
  * Dequeue & return next packet from the highest priority class that
  * has a packet to send & has enough allocation to send it.  This
  * routine is called by a driver whenever it needs a new packet to
  * output.
  */
 static mbuf_t *
 _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
 {
 	mbuf_t		*m;
 	int		 cpri;
 	struct rm_class	*cl, *first = NULL;
 	struct timeval	 now;
 
 	RM_GETTIME(now);
 
 	/*
 	 * if the driver polls the top of the queue and then removes
 	 * the polled packet, we must return the same packet.
 	 */
 	if (op == ALTDQ_REMOVE && ifd->pollcache_) {
 		cl = ifd->pollcache_;
 		cpri = cl->pri_;
 		ifd->pollcache_ = NULL;
 		goto _prr_out;
 	} else {
 		/* mode == ALTDQ_POLL || pollcache == NULL */
 		ifd->pollcache_ = NULL;
 		ifd->borrowed_[ifd->qi_] = NULL;
 	}
 #ifdef ADJUST_CUTOFF
  _again:
 #endif
 	for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
 		if (ifd->na_[cpri] == 0)
 			continue;
 		cl = ifd->active_[cpri];
 		ASSERT(cl != NULL);
 		do {
 			if (!qempty(cl->q_)) {
 				if ((cl->undertime_.tv_sec == 0) ||
 				    rmc_under_limit(cl, &now))
 					goto _prr_out;
 				if (first == NULL && cl->borrow_ != NULL)
 					first = cl;
 			}
 			cl = cl->peer_;
 		} while (cl != ifd->active_[cpri]);
 	}
 
 #ifdef ADJUST_CUTOFF
 	/*
 	 * no underlimit class found.  if cutoff is taking effect, increase
 	 * cutoff and try again.
 	 */
 	if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
 		ifd->cutoff_++;
 		goto _again;
 	}
 #endif /* ADJUST_CUTOFF */
 	/*
 	 * If LINK_EFFICIENCY is turned on, then the first overlimit
 	 * class we encounter will send a packet if all the classes
 	 * of the link-sharing structure are overlimit.
 	 */
 	reset_cutoff(ifd);
 	if (!ifd->efficient_ || first == NULL)
 		return (NULL);
 
 	cl = first;
 	cpri = cl->pri_;
 #if 0	/* too time-consuming for nothing */
 	if (cl->sleeping_)
 		CALLOUT_STOP(&cl->callout_);
 	cl->sleeping_ = 0;
 	cl->undertime_.tv_sec = 0;
 #endif
 	ifd->borrowed_[ifd->qi_] = cl->borrow_;
 	ifd->cutoff_ = cl->borrow_->depth_;
 
 	/*
 	 * Deque the packet and do the book keeping...
 	 */
  _prr_out:
 	if (op == ALTDQ_REMOVE) {
 		m = _rmc_getq(cl);
 		if (m == NULL)
 			panic("_rmc_prr_dequeue_next");
 		if (qempty(cl->q_))
 			ifd->na_[cpri]--;
 
 		ifd->active_[cpri] = cl->peer_;
 
 		ifd->class_[ifd->qi_] = cl;
 		ifd->curlen_[ifd->qi_] = m_pktlen(m);
 		ifd->now_[ifd->qi_] = now;
 		ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
 		ifd->queued_++;
 	} else {
 		/* mode == ALTDQ_POLL */
 		m = _rmc_pollq(cl);
 		ifd->pollcache_ = cl;
 	}
 	return (m);
 }
 
 /*
  * mbuf_t *
  * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
  *	is invoked by the packet driver to get the next packet to be
  *	dequeued and output on the link.  If WRR is enabled, then the
  *	WRR dequeue next routine will determine the next packet to sent.
  *	Otherwise, packet-by-packet round robin is invoked.
  *
  *	Returns:	NULL, if a packet is not available or if all
  *			classes are overlimit.
  *
  *			Otherwise, Pointer to the next packet.
  */
 
 mbuf_t *
 rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
 {
 	if (ifd->queued_ >= ifd->maxqueued_)
 		return (NULL);
 	else if (ifd->wrr_)
 		return (_rmc_wrr_dequeue_next(ifd, mode));
 	else
 		return (_rmc_prr_dequeue_next(ifd, mode));
 }
 
 /*
  * Update the utilization estimate for the packet that just completed.
  * The packet's class & the parent(s) of that class all get their
  * estimators updated.  This routine is called by the driver's output-
  * packet-completion interrupt service routine.
  */
 
 /*
  * a macro to approximate "divide by 1000" that gives 0.000999,
  * if a value has enough effective digits.
  * (on pentium, mul takes 9 cycles but div takes 46!)
  */
 #define	NSEC_TO_USEC(t)	(((t) >> 10) + ((t) >> 16) + ((t) >> 17))
 void
 rmc_update_class_util(struct rm_ifdat *ifd)
 {
 	int		 idle, avgidle, pktlen;
 	int		 pkt_time, tidle;
 	rm_class_t	*cl, *borrowed;
 	rm_class_t	*borrows;
 	struct timeval	*nowp;
 
 	/*
 	 * Get the most recent completed class.
 	 */
 	if ((cl = ifd->class_[ifd->qo_]) == NULL)
 		return;
 
 	pktlen = ifd->curlen_[ifd->qo_];
 	borrowed = ifd->borrowed_[ifd->qo_];
 	borrows = borrowed;
 
 	PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
 
 	/*
 	 * Run estimator on class and its ancestors.
 	 */
 	/*
 	 * rm_update_class_util is designed to be called when the
 	 * transfer is completed from a xmit complete interrupt,
 	 * but most drivers don't implement an upcall for that.
 	 * so, just use estimated completion time.
 	 * as a result, ifd->qi_ and ifd->qo_ are always synced.
 	 */
 	nowp = &ifd->now_[ifd->qo_];
 	/* get pkt_time (for link) in usec */
 #if 1  /* use approximation */
 	pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
 	pkt_time = NSEC_TO_USEC(pkt_time);
 #else
 	pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
 #endif
 #if 1 /* ALTQ4PPP */
 	if (TV_LT(nowp, &ifd->ifnow_)) {
 		int iftime;
 
 		/*
 		 * make sure the estimated completion time does not go
 		 * too far.  it can happen when the link layer supports
 		 * data compression or the interface speed is set to
 		 * a much lower value.
 		 */
 		TV_DELTA(&ifd->ifnow_, nowp, iftime);
 		if (iftime+pkt_time < ifd->maxiftime_) {
 			TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
 		} else {
 			TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
 		}
 	} else {
 		TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
 	}
 #else
 	if (TV_LT(nowp, &ifd->ifnow_)) {
 		TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
 	} else {
 		TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
 	}
 #endif
 
 	while (cl != NULL) {
 		TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
 		if (idle >= 2000000)
 			/*
 			 * this class is idle enough, reset avgidle.
 			 * (TV_DELTA returns 2000000 us when delta is large.)
 			 */
 			cl->avgidle_ = cl->maxidle_;
 
 		/* get pkt_time (for class) in usec */
 #if 1  /* use approximation */
 		pkt_time = pktlen * cl->ns_per_byte_;
 		pkt_time = NSEC_TO_USEC(pkt_time);
 #else
 		pkt_time = pktlen * cl->ns_per_byte_ / 1000;
 #endif
 		idle -= pkt_time;
 
 		avgidle = cl->avgidle_;
 		avgidle += idle - (avgidle >> RM_FILTER_GAIN);
 		cl->avgidle_ = avgidle;
 
 		/* Are we overlimit ? */
 		if (avgidle <= 0) {
 			CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
 #if 1 /* ALTQ */
 			/*
 			 * need some lower bound for avgidle, otherwise
 			 * a borrowing class gets unbounded penalty.
 			 */
 			if (avgidle < cl->minidle_)
 				avgidle = cl->avgidle_ = cl->minidle_;
 #endif
 			/* set next idle to make avgidle 0 */
 			tidle = pkt_time +
 				(((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
 			TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
 			++cl->stats_.over;
 		} else {
 			cl->avgidle_ =
 			    (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
 			cl->undertime_.tv_sec = 0;
 			if (cl->sleeping_) {
 				CALLOUT_STOP(&cl->callout_);
 				cl->sleeping_ = 0;
 			}
 		}
 
 		if (borrows != NULL) {
 			if (borrows != cl)
 				++cl->stats_.borrows;
 			else
 				borrows = NULL;
 		}
 		cl->last_ = ifd->ifnow_;
 		cl->last_pkttime_ = pkt_time;
 
 #if 1
 		if (cl->parent_ == NULL) {
 			/* take stats of root class */
 			PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
 		}
 #endif
 
 		cl = cl->parent_;
 	}
 
 	/*
 	 * Check to see if cutoff needs to set to a new level.
 	 */
 	cl = ifd->class_[ifd->qo_];
 	if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
 #if 1 /* ALTQ */
 		if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
 			rmc_tl_satisfied(ifd, nowp);
 			CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
 		} else {
 			ifd->cutoff_ = borrowed->depth_;
 			CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
 		}
 #else /* !ALTQ */
 		if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
 			reset_cutoff(ifd);
 #ifdef notdef
 			rmc_tl_satisfied(ifd, &now);
 #endif
 			CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
 		} else {
 			ifd->cutoff_ = borrowed->depth_;
 			CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
 		}
 #endif /* !ALTQ */
 	}
 
 	/*
 	 * Release class slot
 	 */
 	ifd->borrowed_[ifd->qo_] = NULL;
 	ifd->class_[ifd->qo_] = NULL;
 	ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
 	ifd->queued_--;
 }
 
 /*
  * void
  * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
  *	over-limit action routines.  These get invoked by rmc_under_limit()
  *	if a class with packets to send if over its bandwidth limit & can't
  *	borrow from a parent class.
  *
  *	Returns: NONE
  */
 
 static void
 rmc_drop_action(struct rm_class *cl)
 {
 	struct rm_ifdat	*ifd = cl->ifdat_;
 
 	ASSERT(qlen(cl->q_) > 0);
 	_rmc_dropq(cl);
 	if (qempty(cl->q_))
 		ifd->na_[cl->pri_]--;
 }
 
 void rmc_dropall(struct rm_class *cl)
 {
 	struct rm_ifdat	*ifd = cl->ifdat_;
 
 	if (!qempty(cl->q_)) {
 		_flushq(cl->q_);
 
 		ifd->na_[cl->pri_]--;
 	}
 }
 
 #if (__FreeBSD_version > 300000)
 /* hzto() is removed from FreeBSD-3.0 */
 static int hzto(struct timeval *);
 
 static int
 hzto(tv)
 	struct timeval *tv;
 {
 	struct timeval t2;
 
 	getmicrotime(&t2);
 	t2.tv_sec = tv->tv_sec - t2.tv_sec;
 	t2.tv_usec = tv->tv_usec - t2.tv_usec;
 	return (tvtohz(&t2));
 }
 #endif /* __FreeBSD_version > 300000 */
 
 /*
  * void
  * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
  *	delay action routine.  It is invoked via rmc_under_limit when the
  *	packet is discoverd to be overlimit.
  *
  *	If the delay action is result of borrow class being overlimit, then
  *	delay for the offtime of the borrowing class that is overlimit.
  *
  *	Returns: NONE
  */
 
 void
 rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
 {
 	int	delay, t, extradelay;
 
 	cl->stats_.overactions++;
 	TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
 #ifndef BORROW_OFFTIME
 	delay += cl->offtime_;
 #endif
 
 	if (!cl->sleeping_) {
 		CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
 #ifdef BORROW_OFFTIME
 		if (borrow != NULL)
 			extradelay = borrow->offtime_;
 		else
 #endif
 			extradelay = cl->offtime_;
 
 #ifdef ALTQ
 		/*
 		 * XXX recalculate suspend time:
 		 * current undertime is (tidle + pkt_time) calculated
 		 * from the last transmission.
 		 *	tidle: time required to bring avgidle back to 0
 		 *	pkt_time: target waiting time for this class
 		 * we need to replace pkt_time by offtime
 		 */
 		extradelay -= cl->last_pkttime_;
 #endif
 		if (extradelay > 0) {
 			TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
 			delay += extradelay;
 		}
 
 		cl->sleeping_ = 1;
 		cl->stats_.delays++;
 
 		/*
 		 * Since packets are phased randomly with respect to the
 		 * clock, 1 tick (the next clock tick) can be an arbitrarily
 		 * short time so we have to wait for at least two ticks.
 		 * NOTE:  If there's no other traffic, we need the timer as
 		 * a 'backstop' to restart this class.
 		 */
 		if (delay > tick * 2) {
-#ifdef __FreeBSD__
 			/* FreeBSD rounds up the tick */
 			t = hzto(&cl->undertime_);
-#else
-			/* other BSDs round down the tick */
-			t = hzto(&cl->undertime_) + 1;
-#endif
 		} else
 			t = 2;
 		CALLOUT_RESET(&cl->callout_, t,
 			      (timeout_t *)rmc_restart, (caddr_t)cl);
 	}
 }
 
 /*
  * void
  * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
  *	called by the system timer code & is responsible checking if the
  *	class is still sleeping (it might have been restarted as a side
  *	effect of the queue scan on a packet arrival) and, if so, restarting
  *	output for the class.  Inspecting the class state & restarting output
  *	require locking the class structure.  In general the driver is
  *	responsible for locking but this is the only routine that is not
  *	called directly or indirectly from the interface driver so it has
  *	know about system locking conventions.  Under bsd, locking is done
  *	by raising IPL to splimp so that's what's implemented here.  On a
  *	different system this would probably need to be changed.
  *
  *	Returns:	NONE
  */
 
 static void
 rmc_restart(struct rm_class *cl)
 {
 	struct rm_ifdat	*ifd = cl->ifdat_;
 	int		 s;
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	IFQ_LOCK(ifd->ifq_);
 	if (cl->sleeping_) {
 		cl->sleeping_ = 0;
 		cl->undertime_.tv_sec = 0;
 
 		if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
 			CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
 			(ifd->restart)(ifd->ifq_);
 		}
 	}
 	IFQ_UNLOCK(ifd->ifq_);
 	splx(s);
 }
 
 /*
  * void
  * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
  *	handling routine for the root class of the link sharing structure.
  *
  *	Returns: NONE
  */
 
 static void
 rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow)
 {
     panic("rmc_root_overlimit");
 }
 
 /*
  * Packet Queue handling routines.  Eventually, this is to localize the
  *	effects on the code whether queues are red queues or droptail
  *	queues.
  */
 
 static int
 _rmc_addq(rm_class_t *cl, mbuf_t *m)
 {
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->q_))
 		return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->q_))
 		return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
 #endif /* ALTQ_RED */
 
 	if (cl->flags_ & RMCF_CLEARDSCP)
 		write_dsfield(m, cl->pktattr_, 0);
 
 	_addq(cl->q_, m);
 	return (0);
 }
 
 /* note: _rmc_dropq is not called for red */
 static void
 _rmc_dropq(rm_class_t *cl)
 {
 	mbuf_t	*m;
 
 	if ((m = _getq(cl->q_)) != NULL)
 		m_freem(m);
 }
 
 static mbuf_t *
 _rmc_getq(rm_class_t *cl)
 {
 #ifdef ALTQ_RIO
 	if (q_is_rio(cl->q_))
 		return rio_getq((rio_t *)cl->red_, cl->q_);
 #endif
 #ifdef ALTQ_RED
 	if (q_is_red(cl->q_))
 		return red_getq(cl->red_, cl->q_);
 #endif
 	return _getq(cl->q_);
 }
 
 static mbuf_t *
 _rmc_pollq(rm_class_t *cl)
 {
 	return qhead(cl->q_);
 }
 
 #ifdef CBQ_TRACE
 
 struct cbqtrace		 cbqtrace_buffer[NCBQTRACE+1];
 struct cbqtrace		*cbqtrace_ptr = NULL;
 int			 cbqtrace_count;
 
 /*
  * DDB hook to trace cbq events:
  *  the last 1024 events are held in a circular buffer.
  *  use "call cbqtrace_dump(N)" to display 20 events from Nth event.
  */
 void cbqtrace_dump(int);
 static char *rmc_funcname(void *);
 
 static struct rmc_funcs {
 	void	*func;
 	char	*name;
 } rmc_funcs[] =
 {
 	rmc_init,		"rmc_init",
 	rmc_queue_packet,	"rmc_queue_packet",
 	rmc_under_limit,	"rmc_under_limit",
 	rmc_update_class_util,	"rmc_update_class_util",
 	rmc_delay_action,	"rmc_delay_action",
 	rmc_restart,		"rmc_restart",
 	_rmc_wrr_dequeue_next,	"_rmc_wrr_dequeue_next",
 	NULL,			NULL
 };
 
 static char *rmc_funcname(void *func)
 {
 	struct rmc_funcs *fp;
 
 	for (fp = rmc_funcs; fp->func != NULL; fp++)
 		if (fp->func == func)
 			return (fp->name);
 	return ("unknown");
 }
 
 void cbqtrace_dump(int counter)
 {
 	int	 i, *p;
 	char	*cp;
 
 	counter = counter % NCBQTRACE;
 	p = (int *)&cbqtrace_buffer[counter];
 
 	for (i=0; i<20; i++) {
 		printf("[0x%x] ", *p++);
 		printf("%s: ", rmc_funcname((void *)*p++));
 		cp = (char *)p++;
 		printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
 		printf("%d\n",*p++);
 
 		if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
 			p = (int *)cbqtrace_buffer;
 	}
 }
 #endif /* CBQ_TRACE */
 #endif /* ALTQ_CBQ */
 
 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
 #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
 
 void
 _addq(class_queue_t *q, mbuf_t *m)
 {
         mbuf_t	*m0;
 
 	if ((m0 = qtail(q)) != NULL)
 		m->m_nextpkt = m0->m_nextpkt;
 	else
 		m0 = m;
 	m0->m_nextpkt = m;
 	qtail(q) = m;
 	qlen(q)++;
 }
 
 mbuf_t *
 _getq(class_queue_t *q)
 {
 	mbuf_t	*m, *m0;
 
 	if ((m = qtail(q)) == NULL)
 		return (NULL);
 	if ((m0 = m->m_nextpkt) != m)
 		m->m_nextpkt = m0->m_nextpkt;
 	else {
 		ASSERT(qlen(q) == 1);
 		qtail(q) = NULL;
 	}
 	qlen(q)--;
 	m0->m_nextpkt = NULL;
 	return (m0);
 }
 
 /* drop a packet at the tail of the queue */
 mbuf_t *
 _getq_tail(class_queue_t *q)
 {
 	mbuf_t	*m, *m0, *prev;
 
 	if ((m = m0 = qtail(q)) == NULL)
 		return NULL;
 	do {
 		prev = m0;
 		m0 = m0->m_nextpkt;
 	} while (m0 != m);
 	prev->m_nextpkt = m->m_nextpkt;
 	if (prev == m)  {
 		ASSERT(qlen(q) == 1);
 		qtail(q) = NULL;
 	} else
 		qtail(q) = prev;
 	qlen(q)--;
 	m->m_nextpkt = NULL;
 	return (m);
 }
 
 /* randomly select a packet in the queue */
 mbuf_t *
 _getq_random(class_queue_t *q)
 {
 	struct mbuf	*m;
 	int		 i, n;
 
 	if ((m = qtail(q)) == NULL)
 		return NULL;
 	if (m->m_nextpkt == m) {
 		ASSERT(qlen(q) == 1);
 		qtail(q) = NULL;
 	} else {
 		struct mbuf *prev = NULL;
 
 		n = arc4random() % qlen(q) + 1;
 		for (i = 0; i < n; i++) {
 			prev = m;
 			m = m->m_nextpkt;
 		}
 		prev->m_nextpkt = m->m_nextpkt;
 		if (m == qtail(q))
 			qtail(q) = prev;
 	}
 	qlen(q)--;
 	m->m_nextpkt = NULL;
 	return (m);
 }
 
 void
 _removeq(class_queue_t *q, mbuf_t *m)
 {
 	mbuf_t	*m0, *prev;
 
 	m0 = qtail(q);
 	do {
 		prev = m0;
 		m0 = m0->m_nextpkt;
 	} while (m0 != m);
 	prev->m_nextpkt = m->m_nextpkt;
 	if (prev == m)
 		qtail(q) = NULL;
 	else if (qtail(q) == m)
 		qtail(q) = prev;
 	qlen(q)--;
 }
 
 void
 _flushq(class_queue_t *q)
 {
 	mbuf_t *m;
 
 	while ((m = _getq(q)) != NULL)
 		m_freem(m);
 	ASSERT(qlen(q) == 0);
 }
 
 #endif /* !__GNUC__ || ALTQ_DEBUG */
 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
Index: user/ngie/more-tests/sys/net/altq/altq_rmclass_debug.h
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_rmclass_debug.h	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_rmclass_debug.h	(revision 281676)
@@ -1,112 +1,113 @@
-/*	$KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $	*/
-
-/*
+/*-
  * Copyright (c) Sun Microsystems, Inc. 1998 All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by the SMCC Technology
  *      Development Group at Sun Microsystems, Inc.
  *
  * 4. The name of the Sun Microsystems, Inc nor may not be used to endorse or
  *      promote products derived from this software without specific prior
  *      written permission.
  *
  * SUN MICROSYSTEMS DOES NOT CLAIM MERCHANTABILITY OF THIS SOFTWARE OR THE
  * SUITABILITY OF THIS SOFTWARE FOR ANY PARTICULAR PURPOSE.  The software is
  * provided "as is" without express or implied warranty of any kind.
  *
  * These notices must be retained in any copies of any part of this software.
+ *
+ * $KAME: altq_rmclass_debug.h,v 1.3 2002/11/29 04:36:24 kjc Exp $
+ * $FreeBSD$
  */
 
 #ifndef _ALTQ_ALTQ_RMCLASS_DEBUG_H_
 #define	_ALTQ_ALTQ_RMCLASS_DEBUG_H_
 
 /* #pragma ident	"@(#)rm_class_debug.h	1.7	98/05/04 SMI" */
 
 /*
  * Cbq debugging macros
  */
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #ifdef	CBQ_TRACE
 #ifndef NCBQTRACE
 #define	NCBQTRACE (16 * 1024)
 #endif
 
 /*
  * To view the trace output, using adb, type:
  *	adb -k /dev/ksyms /dev/mem <cr>, then type
  *	cbqtrace_count/D to get the count, then type
  *	cbqtrace_buffer,0tcount/Dp4C" "Xn
  *	This will dump the trace buffer from 0 to count.
  */
 /*
  * in ALTQ, "call cbqtrace_dump(N)" from DDB to display 20 events
  * from Nth event in the circular buffer.
  */
 
 struct cbqtrace {
 	int count;
 	int function;		/* address of function */
 	int trace_action;	/* descriptive 4 characters */
 	int object;		/* object operated on */
 };
 
 extern struct cbqtrace cbqtrace_buffer[];
 extern struct cbqtrace *cbqtrace_ptr;
 extern int cbqtrace_count;
 
 #define	CBQTRACEINIT() {				\
 	if (cbqtrace_ptr == NULL)		\
 		cbqtrace_ptr = cbqtrace_buffer; \
 	else { \
 		cbqtrace_ptr = cbqtrace_buffer; \
 		bzero((void *)cbqtrace_ptr, sizeof(cbqtrace_buffer)); \
 		cbqtrace_count = 0; \
 	} \
 }
 
 #define	LOCK_TRACE()	splimp()
 #define	UNLOCK_TRACE(x)	splx(x)
 
 #define	CBQTRACE(func, act, obj) {		\
 	int __s = LOCK_TRACE();			\
 	int *_p = &cbqtrace_ptr->count;	\
 	*_p++ = ++cbqtrace_count;		\
 	*_p++ = (int)(func);			\
 	*_p++ = (int)(act);			\
 	*_p++ = (int)(obj);			\
 	if ((struct cbqtrace *)(void *)_p >= &cbqtrace_buffer[NCBQTRACE])\
 		cbqtrace_ptr = cbqtrace_buffer; \
 	else					\
 		cbqtrace_ptr = (struct cbqtrace *)(void *)_p; \
 	UNLOCK_TRACE(__s);			\
 	}
 #else
 
 /* If no tracing, define no-ops */
 #define	CBQTRACEINIT()
 #define	CBQTRACE(a, b, c)
 
 #endif	/* !CBQ_TRACE */
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif	/* _ALTQ_ALTQ_RMCLASS_DEBUG_H_ */
Index: user/ngie/more-tests/sys/net/altq/altq_subr.c
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_subr.c	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_subr.c	(revision 281676)
@@ -1,1981 +1,1925 @@
-/*	$FreeBSD$	*/
-/*	$KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $	*/
-
-/*
+/*-
  * Copyright (C) 1997-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $
+ * $FreeBSD$
  */
 
-#if defined(__FreeBSD__) || defined(__NetBSD__)
 #include "opt_altq.h"
 #include "opt_inet.h"
-#ifdef __FreeBSD__
 #include "opt_inet6.h"
-#endif
-#endif /* __FreeBSD__ || __NetBSD__ */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/kernel.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
-#ifdef __FreeBSD__
 #include <net/vnet.h>
-#endif
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <netpfil/pf/pf.h>
 #include <netpfil/pf/pf_altq.h>
 #include <net/altq/altq.h>
 #ifdef ALTQ3_COMPAT
 #include <net/altq/altq_conf.h>
 #endif
 
 /* machine dependent clock related includes */
-#ifdef __FreeBSD__
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <machine/clock.h>
-#endif
 #if defined(__amd64__) || defined(__i386__)
 #include <machine/cpufunc.h>		/* for pentium tsc */
 #include <machine/specialreg.h>		/* for CPUID_TSC */
-#ifdef __FreeBSD__
 #include <machine/md_var.h>		/* for cpu_feature */
-#elif defined(__NetBSD__) || defined(__OpenBSD__)
-#include <machine/cpu.h>		/* for cpu_feature */
-#endif
 #endif /* __amd64 || __i386__ */
 
 /*
  * internal function prototypes
  */
 static void	tbr_timeout(void *);
 int (*altq_input)(struct mbuf *, int) = NULL;
 static struct mbuf *tbr_dequeue(struct ifaltq *, int);
 static int tbr_timer = 0;	/* token bucket regulator timer */
 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
 static struct callout tbr_callout = CALLOUT_INITIALIZER;
 #else
 static struct callout tbr_callout;
 #endif
 
 #ifdef ALTQ3_CLFIER_COMPAT
 static int 	extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
 #ifdef INET6
 static int 	extract_ports6(struct mbuf *, struct ip6_hdr *,
 			       struct flowinfo_in6 *);
 #endif
 static int	apply_filter4(u_int32_t, struct flow_filter *,
 			      struct flowinfo_in *);
 static int	apply_ppfilter4(u_int32_t, struct flow_filter *,
 				struct flowinfo_in *);
 #ifdef INET6
 static int	apply_filter6(u_int32_t, struct flow_filter6 *,
 			      struct flowinfo_in6 *);
 #endif
 static int	apply_tosfilter4(u_int32_t, struct flow_filter *,
 				 struct flowinfo_in *);
 static u_long	get_filt_handle(struct acc_classifier *, int);
 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
 static u_int32_t filt2fibmask(struct flow_filter *);
 
 static void 	ip4f_cache(struct ip *, struct flowinfo_in *);
 static int 	ip4f_lookup(struct ip *, struct flowinfo_in *);
 static int 	ip4f_init(void);
 static struct ip4_frag	*ip4f_alloc(void);
 static void 	ip4f_free(struct ip4_frag *);
 #endif /* ALTQ3_CLFIER_COMPAT */
 
 /*
  * alternate queueing support routines
  */
 
 /* look up the queue state by the interface name and the queueing type. */
 void *
 altq_lookup(name, type)
 	char *name;
 	int type;
 {
 	struct ifnet *ifp;
 
 	if ((ifp = ifunit(name)) != NULL) {
 		/* read if_snd unlocked */
 		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
 			return (ifp->if_snd.altq_disc);
 	}
 
 	return NULL;
 }
 
 int
 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
 	struct ifaltq *ifq;
 	int type;
 	void *discipline;
 	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
 	struct mbuf *(*dequeue)(struct ifaltq *, int);
 	int (*request)(struct ifaltq *, int, void *);
 	void *clfier;
 	void *(*classify)(void *, struct mbuf *, int);
 {
 	IFQ_LOCK(ifq);
 	if (!ALTQ_IS_READY(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return ENXIO;
 	}
 
 #ifdef ALTQ3_COMPAT
 	/*
 	 * pfaltq can override the existing discipline, but altq3 cannot.
 	 * check these if clfier is not NULL (which implies altq3).
 	 */
 	if (clfier != NULL) {
 		if (ALTQ_IS_ENABLED(ifq)) {
 			IFQ_UNLOCK(ifq);
 			return EBUSY;
 		}
 		if (ALTQ_IS_ATTACHED(ifq)) {
 			IFQ_UNLOCK(ifq);
 			return EEXIST;
 		}
 	}
 #endif
 	ifq->altq_type     = type;
 	ifq->altq_disc     = discipline;
 	ifq->altq_enqueue  = enqueue;
 	ifq->altq_dequeue  = dequeue;
 	ifq->altq_request  = request;
 	ifq->altq_clfier   = clfier;
 	ifq->altq_classify = classify;
 	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_KLD
 	altq_module_incref(type);
 #endif
 #endif
 	IFQ_UNLOCK(ifq);
 	return 0;
 }
 
 int
 altq_detach(ifq)
 	struct ifaltq *ifq;
 {
 	IFQ_LOCK(ifq);
 
 	if (!ALTQ_IS_READY(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return ENXIO;
 	}
 	if (ALTQ_IS_ENABLED(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return EBUSY;
 	}
 	if (!ALTQ_IS_ATTACHED(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return (0);
 	}
 #ifdef ALTQ3_COMPAT
 #ifdef ALTQ_KLD
 	altq_module_declref(ifq->altq_type);
 #endif
 #endif
 
 	ifq->altq_type     = ALTQT_NONE;
 	ifq->altq_disc     = NULL;
 	ifq->altq_enqueue  = NULL;
 	ifq->altq_dequeue  = NULL;
 	ifq->altq_request  = NULL;
 	ifq->altq_clfier   = NULL;
 	ifq->altq_classify = NULL;
 	ifq->altq_flags &= ALTQF_CANTCHANGE;
 
 	IFQ_UNLOCK(ifq);
 	return 0;
 }
 
 int
 altq_enable(ifq)
 	struct ifaltq *ifq;
 {
 	int s;
 
 	IFQ_LOCK(ifq);
 
 	if (!ALTQ_IS_READY(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return ENXIO;
 	}
 	if (ALTQ_IS_ENABLED(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return 0;
 	}
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	IFQ_PURGE_NOLOCK(ifq);
 	ASSERT(ifq->ifq_len == 0);
 	ifq->ifq_drv_maxlen = 0;		/* disable bulk dequeue */
 	ifq->altq_flags |= ALTQF_ENABLED;
 	if (ifq->altq_clfier != NULL)
 		ifq->altq_flags |= ALTQF_CLASSIFY;
 	splx(s);
 
 	IFQ_UNLOCK(ifq);
 	return 0;
 }
 
 int
 altq_disable(ifq)
 	struct ifaltq *ifq;
 {
 	int s;
 
 	IFQ_LOCK(ifq);
 	if (!ALTQ_IS_ENABLED(ifq)) {
 		IFQ_UNLOCK(ifq);
 		return 0;
 	}
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	IFQ_PURGE_NOLOCK(ifq);
 	ASSERT(ifq->ifq_len == 0);
 	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
 	splx(s);
 	
 	IFQ_UNLOCK(ifq);
 	return 0;
 }
 
 #ifdef ALTQ_DEBUG
 void
 altq_assert(file, line, failedexpr)
 	const char *file, *failedexpr;
 	int line;
 {
 	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
 		     failedexpr, file, line);
 	panic("altq assertion");
 	/* NOTREACHED */
 }
 #endif
 
 /*
  * internal representation of token bucket parameters
  *	rate:	byte_per_unittime << 32
  *		(((bits_per_sec) / 8) << 32) / machclk_freq
  *	depth:	byte << 32
  *
  */
 #define	TBR_SHIFT	32
 #define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
 #define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
 
 static struct mbuf *
 tbr_dequeue(ifq, op)
 	struct ifaltq *ifq;
 	int op;
 {
 	struct tb_regulator *tbr;
 	struct mbuf *m;
 	int64_t interval;
 	u_int64_t now;
 
 	IFQ_LOCK_ASSERT(ifq);
 	tbr = ifq->altq_tbr;
 	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
 		/* if this is a remove after poll, bypass tbr check */
 	} else {
 		/* update token only when it is negative */
 		if (tbr->tbr_token <= 0) {
 			now = read_machclk();
 			interval = now - tbr->tbr_last;
 			if (interval >= tbr->tbr_filluptime)
 				tbr->tbr_token = tbr->tbr_depth;
 			else {
 				tbr->tbr_token += interval * tbr->tbr_rate;
 				if (tbr->tbr_token > tbr->tbr_depth)
 					tbr->tbr_token = tbr->tbr_depth;
 			}
 			tbr->tbr_last = now;
 		}
 		/* if token is still negative, don't allow dequeue */
 		if (tbr->tbr_token <= 0)
 			return (NULL);
 	}
 
 	if (ALTQ_IS_ENABLED(ifq))
 		m = (*ifq->altq_dequeue)(ifq, op);
 	else {
 		if (op == ALTDQ_POLL)
 			_IF_POLL(ifq, m);
 		else
 			_IF_DEQUEUE(ifq, m);
 	}
 
 	if (m != NULL && op == ALTDQ_REMOVE)
 		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
 	tbr->tbr_lastop = op;
 	return (m);
 }
 
 /*
  * set a token bucket regulator.
  * if the specified rate is zero, the token bucket regulator is deleted.
  */
 int
 tbr_set(ifq, profile)
 	struct ifaltq *ifq;
 	struct tb_profile *profile;
 {
 	struct tb_regulator *tbr, *otbr;
 	
 	if (tbr_dequeue_ptr == NULL)
 		tbr_dequeue_ptr = tbr_dequeue;
 
 	if (machclk_freq == 0)
 		init_machclk();
 	if (machclk_freq == 0) {
 		printf("tbr_set: no cpu clock available!\n");
 		return (ENXIO);
 	}
 
 	IFQ_LOCK(ifq);
 	if (profile->rate == 0) {
 		/* delete this tbr */
 		if ((tbr = ifq->altq_tbr) == NULL) {
 			IFQ_UNLOCK(ifq);
 			return (ENOENT);
 		}
 		ifq->altq_tbr = NULL;
 		free(tbr, M_DEVBUF);
 		IFQ_UNLOCK(ifq);
 		return (0);
 	}
 
 	tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (tbr == NULL) {
 		IFQ_UNLOCK(ifq);
 		return (ENOMEM);
 	}
 
 	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
 	tbr->tbr_depth = TBR_SCALE(profile->depth);
 	if (tbr->tbr_rate > 0)
 		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
 	else
 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
 	tbr->tbr_token = tbr->tbr_depth;
 	tbr->tbr_last = read_machclk();
 	tbr->tbr_lastop = ALTDQ_REMOVE;
 
 	otbr = ifq->altq_tbr;
 	ifq->altq_tbr = tbr;	/* set the new tbr */
 
 	if (otbr != NULL)
 		free(otbr, M_DEVBUF);
 	else {
 		if (tbr_timer == 0) {
 			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
 			tbr_timer = 1;
 		}
 	}
 	IFQ_UNLOCK(ifq);
 	return (0);
 }
 
 /*
  * tbr_timeout goes through the interface list, and kicks the drivers
  * if necessary.
  *
  * MPSAFE
  */
 static void
 tbr_timeout(arg)
 	void *arg;
 {
-#ifdef __FreeBSD__
 	VNET_ITERATOR_DECL(vnet_iter);
-#endif
 	struct ifnet *ifp;
 	int active, s;
 
 	active = 0;
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
-#ifdef __FreeBSD__
 	IFNET_RLOCK_NOSLEEP();
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
-#endif
 		for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
 		    ifp = TAILQ_NEXT(ifp, if_list)) {
 			/* read from if_snd unlocked */
 			if (!TBR_IS_ENABLED(&ifp->if_snd))
 				continue;
 			active++;
 			if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
 			    ifp->if_start != NULL)
 				(*ifp->if_start)(ifp);
 		}
-#ifdef __FreeBSD__
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 	IFNET_RUNLOCK_NOSLEEP();
-#endif
 	splx(s);
 	if (active > 0)
 		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
 	else
 		tbr_timer = 0;	/* don't need tbr_timer anymore */
 }
 
 /*
  * get token bucket regulator profile
  */
 int
 tbr_get(ifq, profile)
 	struct ifaltq *ifq;
 	struct tb_profile *profile;
 {
 	struct tb_regulator *tbr;
 
 	IFQ_LOCK(ifq);
 	if ((tbr = ifq->altq_tbr) == NULL) {
 		profile->rate = 0;
 		profile->depth = 0;
 	} else {
 		profile->rate =
 		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
 		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
 	}
 	IFQ_UNLOCK(ifq);
 	return (0);
 }
 
 /*
  * attach a discipline to the interface.  if one already exists, it is
  * overridden.
  * Locking is done in the discipline specific attach functions. Basically
  * they call back to altq_attach which takes care of the attach and locking.
  */
 int
 altq_pfattach(struct pf_altq *a)
 {
 	int error = 0;
 
 	switch (a->scheduler) {
 	case ALTQT_NONE:
 		break;
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_pfattach(a);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_pfattach(a);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_pfattach(a);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * detach a discipline from the interface.
  * it is possible that the discipline was already overridden by another
  * discipline.
  */
 int
 altq_pfdetach(struct pf_altq *a)
 {
 	struct ifnet *ifp;
 	int s, error = 0;
 
 	if ((ifp = ifunit(a->ifname)) == NULL)
 		return (EINVAL);
 
 	/* if this discipline is no longer referenced, just return */
 	/* read unlocked from if_snd */
 	if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
 		return (0);
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	/* read unlocked from if_snd, _disable and _detach take care */
 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
 		error = altq_disable(&ifp->if_snd);
 	if (error == 0)
 		error = altq_detach(&ifp->if_snd);
 	splx(s);
 
 	return (error);
 }
 
 /*
  * add a discipline or a queue
  * Locking is done in the discipline specific functions with regards to
  * malloc with WAITOK, also it is not yet clear which lock to use.
  */
 int
 altq_add(struct pf_altq *a)
 {
 	int error = 0;
 
 	if (a->qname[0] != 0)
 		return (altq_add_queue(a));
 
 	if (machclk_freq == 0)
 		init_machclk();
 	if (machclk_freq == 0)
 		panic("altq_add: no cpu clock");
 
 	switch (a->scheduler) {
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_add_altq(a);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_add_altq(a);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_add_altq(a);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * remove a discipline or a queue
  * It is yet unclear what lock to use to protect this operation, the
  * discipline specific functions will determine and grab it
  */
 int
 altq_remove(struct pf_altq *a)
 {
 	int error = 0;
 
 	if (a->qname[0] != 0)
 		return (altq_remove_queue(a));
 
 	switch (a->scheduler) {
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_remove_altq(a);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_remove_altq(a);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_remove_altq(a);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * add a queue to the discipline
  * It is yet unclear what lock to use to protect this operation, the
  * discipline specific functions will determine and grab it
  */
 int
 altq_add_queue(struct pf_altq *a)
 {
 	int error = 0;
 
 	switch (a->scheduler) {
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_add_queue(a);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_add_queue(a);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_add_queue(a);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * remove a queue from the discipline
  * It is yet unclear what lock to use to protect this operation, the
  * discipline specific functions will determine and grab it
  */
 int
 altq_remove_queue(struct pf_altq *a)
 {
 	int error = 0;
 
 	switch (a->scheduler) {
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_remove_queue(a);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_remove_queue(a);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_remove_queue(a);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * get queue statistics
  * Locking is done in the discipline specific functions with regards to
  * copyout operations, also it is not yet clear which lock to use.
  */
 int
 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
 {
 	int error = 0;
 
 	switch (a->scheduler) {
 #ifdef ALTQ_CBQ
 	case ALTQT_CBQ:
 		error = cbq_getqstats(a, ubuf, nbytes);
 		break;
 #endif
 #ifdef ALTQ_PRIQ
 	case ALTQT_PRIQ:
 		error = priq_getqstats(a, ubuf, nbytes);
 		break;
 #endif
 #ifdef ALTQ_HFSC
 	case ALTQT_HFSC:
 		error = hfsc_getqstats(a, ubuf, nbytes);
 		break;
 #endif
 	default:
 		error = ENXIO;
 	}
 
 	return (error);
 }
 
 /*
  * read and write diffserv field in IPv4 or IPv6 header
  */
 u_int8_t
 read_dsfield(m, pktattr)
 	struct mbuf *m;
 	struct altq_pktattr *pktattr;
 {
 	struct mbuf *m0;
 	u_int8_t ds_field = 0;
 
 	if (pktattr == NULL ||
 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
 		return ((u_int8_t)0);
 
 	/* verify that pattr_hdr is within the mbuf data */
 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
 		if ((pktattr->pattr_hdr >= m0->m_data) &&
 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
 			break;
 	if (m0 == NULL) {
 		/* ick, pattr_hdr is stale */
 		pktattr->pattr_af = AF_UNSPEC;
 #ifdef ALTQ_DEBUG
 		printf("read_dsfield: can't locate header!\n");
 #endif
 		return ((u_int8_t)0);
 	}
 
 	if (pktattr->pattr_af == AF_INET) {
 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
 
 		if (ip->ip_v != 4)
 			return ((u_int8_t)0);	/* version mismatch! */
 		ds_field = ip->ip_tos;
 	}
 #ifdef INET6
 	else if (pktattr->pattr_af == AF_INET6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
 		u_int32_t flowlabel;
 
 		flowlabel = ntohl(ip6->ip6_flow);
 		if ((flowlabel >> 28) != 6)
 			return ((u_int8_t)0);	/* version mismatch! */
 		ds_field = (flowlabel >> 20) & 0xff;
 	}
 #endif
 	return (ds_field);
 }
 
 void
 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
 {
 	struct mbuf *m0;
 
 	if (pktattr == NULL ||
 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
 		return;
 
 	/* verify that pattr_hdr is within the mbuf data */
 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
 		if ((pktattr->pattr_hdr >= m0->m_data) &&
 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
 			break;
 	if (m0 == NULL) {
 		/* ick, pattr_hdr is stale */
 		pktattr->pattr_af = AF_UNSPEC;
 #ifdef ALTQ_DEBUG
 		printf("write_dsfield: can't locate header!\n");
 #endif
 		return;
 	}
 
 	if (pktattr->pattr_af == AF_INET) {
 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
 		u_int8_t old;
 		int32_t sum;
 
 		if (ip->ip_v != 4)
 			return;		/* version mismatch! */
 		old = ip->ip_tos;
 		dsfield |= old & 3;	/* leave CU bits */
 		if (old == dsfield)
 			return;
 		ip->ip_tos = dsfield;
 		/*
 		 * update checksum (from RFC1624)
 		 *	   HC' = ~(~HC + ~m + m')
 		 */
 		sum = ~ntohs(ip->ip_sum) & 0xffff;
 		sum += 0xff00 + (~old & 0xff) + dsfield;
 		sum = (sum >> 16) + (sum & 0xffff);
 		sum += (sum >> 16);  /* add carry */
 
 		ip->ip_sum = htons(~sum & 0xffff);
 	}
 #ifdef INET6
 	else if (pktattr->pattr_af == AF_INET6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
 		u_int32_t flowlabel;
 
 		flowlabel = ntohl(ip6->ip6_flow);
 		if ((flowlabel >> 28) != 6)
 			return;		/* version mismatch! */
 		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
 		ip6->ip6_flow = htonl(flowlabel);
 	}
 #endif
 	return;
 }
 
 
 /*
  * high resolution clock support taking advantage of a machine dependent
  * high resolution time counter (e.g., timestamp counter of intel pentium).
  * we assume
  *  - 64-bit-long monotonically-increasing counter
  *  - frequency range is 100M-4GHz (CPU speed)
  */
 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
 #define	MACHCLK_SHIFT	8
 
 int machclk_usepcc;
 u_int32_t machclk_freq;
 u_int32_t machclk_per_tick;
 
 #if defined(__i386__) && defined(__NetBSD__)
 extern u_int64_t cpu_tsc_freq;
 #endif
 
 #if (__FreeBSD_version >= 700035)
 /* Update TSC freq with the value indicated by the caller. */
 static void
 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
 {
 	/* If there was an error during the transition, don't do anything. */
 	if (status != 0)
 		return;
 
 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
 	/* If TSC is P-state invariant, don't do anything. */
 	if (tsc_is_invariant)
 		return;
 #endif
 
 	/* Total setting for this level gives the new frequency in MHz. */
 	init_machclk();
 }
 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
     EVENTHANDLER_PRI_LAST);
 #endif /* __FreeBSD_version >= 700035 */
 
 static void
 init_machclk_setup(void)
 {
 #if (__FreeBSD_version >= 600000)
 	callout_init(&tbr_callout, 0);
 #endif
 
 	machclk_usepcc = 1;
 
 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
 	machclk_usepcc = 0;
 #endif
 #if defined(__FreeBSD__) && defined(SMP)
 	machclk_usepcc = 0;
 #endif
 #if defined(__NetBSD__) && defined(MULTIPROCESSOR)
 	machclk_usepcc = 0;
 #endif
 #if defined(__amd64__) || defined(__i386__)
 	/* check if TSC is available */
-#ifdef __FreeBSD__
 	if ((cpu_feature & CPUID_TSC) == 0 ||
 	    atomic_load_acq_64(&tsc_freq) == 0)
-#else
-	if ((cpu_feature & CPUID_TSC) == 0)
-#endif
 		machclk_usepcc = 0;
 #endif
 }
 
 void
 init_machclk(void)
 {
 	static int called;
 
 	/* Call one-time initialization function. */
 	if (!called) {
 		init_machclk_setup();
 		called = 1;
 	}
 
 	if (machclk_usepcc == 0) {
 		/* emulate 256MHz using microtime() */
 		machclk_freq = 1000000 << MACHCLK_SHIFT;
 		machclk_per_tick = machclk_freq / hz;
 #ifdef ALTQ_DEBUG
 		printf("altq: emulate %uHz cpu clock\n", machclk_freq);
 #endif
 		return;
 	}
 
 	/*
 	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
 	 * accessible, just use it.
 	 */
 #if defined(__amd64__) || defined(__i386__)
-#ifdef __FreeBSD__
 	machclk_freq = atomic_load_acq_64(&tsc_freq);
-#elif defined(__NetBSD__)
-	machclk_freq = (u_int32_t)cpu_tsc_freq;
-#elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
-	machclk_freq = pentium_mhz * 1000000;
 #endif
-#endif
 
 	/*
 	 * if we don't know the clock frequency, measure it.
 	 */
 	if (machclk_freq == 0) {
 		static int	wait;
 		struct timeval	tv_start, tv_end;
 		u_int64_t	start, end, diff;
 		int		timo;
 
 		microtime(&tv_start);
 		start = read_machclk();
 		timo = hz;	/* 1 sec */
 		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
 		microtime(&tv_end);
 		end = read_machclk();
 		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
 		    + tv_end.tv_usec - tv_start.tv_usec;
 		if (diff != 0)
 			machclk_freq = (u_int)((end - start) * 1000000 / diff);
 	}
 
 	machclk_per_tick = machclk_freq / hz;
 
 #ifdef ALTQ_DEBUG
 	printf("altq: CPU clock: %uHz\n", machclk_freq);
 #endif
 }
 
 #if defined(__OpenBSD__) && defined(__i386__)
 static __inline u_int64_t
 rdtsc(void)
 {
 	u_int64_t rv;
 	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
 	return (rv);
 }
 #endif /* __OpenBSD__ && __i386__ */
 
 u_int64_t
 read_machclk(void)
 {
 	u_int64_t val;
 
 	if (machclk_usepcc) {
 #if defined(__amd64__) || defined(__i386__)
 		val = rdtsc();
 #else
 		panic("read_machclk");
 #endif
 	} else {
 		struct timeval tv;
 
 		microtime(&tv);
 		val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
 		    + tv.tv_usec) << MACHCLK_SHIFT);
 	}
 	return (val);
 }
 
 #ifdef ALTQ3_CLFIER_COMPAT
 
 #ifndef IPPROTO_ESP
 #define	IPPROTO_ESP	50		/* encapsulating security payload */
 #endif
 #ifndef IPPROTO_AH
 #define	IPPROTO_AH	51		/* authentication header */
 #endif
 
 /*
  * extract flow information from a given packet.
  * filt_mask shows flowinfo fields required.
  * we assume the ip header is in one mbuf, and addresses and ports are
  * in network byte order.
  */
 int
 altq_extractflow(m, af, flow, filt_bmask)
 	struct mbuf *m;
 	int af;
 	struct flowinfo *flow;
 	u_int32_t	filt_bmask;
 {
 
 	switch (af) {
 	case PF_INET: {
 		struct flowinfo_in *fin;
 		struct ip *ip;
 
 		ip = mtod(m, struct ip *);
 
 		if (ip->ip_v != 4)
 			break;
 
 		fin = (struct flowinfo_in *)flow;
 		fin->fi_len = sizeof(struct flowinfo_in);
 		fin->fi_family = AF_INET;
 
 		fin->fi_proto = ip->ip_p;
 		fin->fi_tos = ip->ip_tos;
 
 		fin->fi_src.s_addr = ip->ip_src.s_addr;
 		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
 
 		if (filt_bmask & FIMB4_PORTS)
 			/* if port info is required, extract port numbers */
 			extract_ports4(m, ip, fin);
 		else {
 			fin->fi_sport = 0;
 			fin->fi_dport = 0;
 			fin->fi_gpi = 0;
 		}
 		return (1);
 	}
 
 #ifdef INET6
 	case PF_INET6: {
 		struct flowinfo_in6 *fin6;
 		struct ip6_hdr *ip6;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		/* should we check the ip version? */
 
 		fin6 = (struct flowinfo_in6 *)flow;
 		fin6->fi6_len = sizeof(struct flowinfo_in6);
 		fin6->fi6_family = AF_INET6;
 
 		fin6->fi6_proto = ip6->ip6_nxt;
 		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 
 		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
 		fin6->fi6_src = ip6->ip6_src;
 		fin6->fi6_dst = ip6->ip6_dst;
 
 		if ((filt_bmask & FIMB6_PORTS) ||
 		    ((filt_bmask & FIMB6_PROTO)
 		     && ip6->ip6_nxt > IPPROTO_IPV6))
 			/*
 			 * if port info is required, or proto is required
 			 * but there are option headers, extract port
 			 * and protocol numbers.
 			 */
 			extract_ports6(m, ip6, fin6);
 		else {
 			fin6->fi6_sport = 0;
 			fin6->fi6_dport = 0;
 			fin6->fi6_gpi = 0;
 		}
 		return (1);
 	}
 #endif /* INET6 */
 
 	default:
 		break;
 	}
 
 	/* failed */
 	flow->fi_len = sizeof(struct flowinfo);
 	flow->fi_family = AF_UNSPEC;
 	return (0);
 }
 
 /*
  * helper routine to extract port numbers
  */
 /* structure for ipsec and ipv6 option header template */
 struct _opt6 {
 	u_int8_t	opt6_nxt;	/* next header */
 	u_int8_t	opt6_hlen;	/* header extension length */
 	u_int16_t	_pad;
 	u_int32_t	ah_spi;		/* security parameter index
 					   for authentication header */
 };
 
 /*
  * extract port numbers from a ipv4 packet.
  */
 static int
 extract_ports4(m, ip, fin)
 	struct mbuf *m;
 	struct ip *ip;
 	struct flowinfo_in *fin;
 {
 	struct mbuf *m0;
 	u_short ip_off;
 	u_int8_t proto;
 	int 	off;
 
 	fin->fi_sport = 0;
 	fin->fi_dport = 0;
 	fin->fi_gpi = 0;
 
 	ip_off = ntohs(ip->ip_off);
 	/* if it is a fragment, try cached fragment info */
 	if (ip_off & IP_OFFMASK) {
 		ip4f_lookup(ip, fin);
 		return (1);
 	}
 
 	/* locate the mbuf containing the protocol header */
 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
 		if (((caddr_t)ip >= m0->m_data) &&
 		    ((caddr_t)ip < m0->m_data + m0->m_len))
 			break;
 	if (m0 == NULL) {
 #ifdef ALTQ_DEBUG
 		printf("extract_ports4: can't locate header! ip=%p\n", ip);
 #endif
 		return (0);
 	}
 	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
 	proto = ip->ip_p;
 
 #ifdef ALTQ_IPSEC
  again:
 #endif
 	while (off >= m0->m_len) {
 		off -= m0->m_len;
 		m0 = m0->m_next;
 		if (m0 == NULL)
 			return (0);  /* bogus ip_hl! */
 	}
 	if (m0->m_len < off + 4)
 		return (0);
 
 	switch (proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP: {
 		struct udphdr *udp;
 
 		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
 		fin->fi_sport = udp->uh_sport;
 		fin->fi_dport = udp->uh_dport;
 		fin->fi_proto = proto;
 		}
 		break;
 
 #ifdef ALTQ_IPSEC
 	case IPPROTO_ESP:
 		if (fin->fi_gpi == 0){
 			u_int32_t *gpi;
 
 			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
 			fin->fi_gpi   = *gpi;
 		}
 		fin->fi_proto = proto;
 		break;
 
 	case IPPROTO_AH: {
 			/* get next header and header length */
 			struct _opt6 *opt6;
 
 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
 			proto = opt6->opt6_nxt;
 			off += 8 + (opt6->opt6_hlen * 4);
 			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
 				fin->fi_gpi = opt6->ah_spi;
 		}
 		/* goto the next header */
 		goto again;
 #endif  /* ALTQ_IPSEC */
 
 	default:
 		fin->fi_proto = proto;
 		return (0);
 	}
 
 	/* if this is a first fragment, cache it. */
 	if (ip_off & IP_MF)
 		ip4f_cache(ip, fin);
 
 	return (1);
 }
 
 #ifdef INET6
 static int
 extract_ports6(m, ip6, fin6)
 	struct mbuf *m;
 	struct ip6_hdr *ip6;
 	struct flowinfo_in6 *fin6;
 {
 	struct mbuf *m0;
 	int	off;
 	u_int8_t proto;
 
 	fin6->fi6_gpi   = 0;
 	fin6->fi6_sport = 0;
 	fin6->fi6_dport = 0;
 
 	/* locate the mbuf containing the protocol header */
 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
 		if (((caddr_t)ip6 >= m0->m_data) &&
 		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
 			break;
 	if (m0 == NULL) {
 #ifdef ALTQ_DEBUG
 		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
 #endif
 		return (0);
 	}
 	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
 
 	proto = ip6->ip6_nxt;
 	do {
 		while (off >= m0->m_len) {
 			off -= m0->m_len;
 			m0 = m0->m_next;
 			if (m0 == NULL)
 				return (0);
 		}
 		if (m0->m_len < off + 4)
 			return (0);
 
 		switch (proto) {
 		case IPPROTO_TCP:
 		case IPPROTO_UDP: {
 			struct udphdr *udp;
 
 			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
 			fin6->fi6_sport = udp->uh_sport;
 			fin6->fi6_dport = udp->uh_dport;
 			fin6->fi6_proto = proto;
 			}
 			return (1);
 
 		case IPPROTO_ESP:
 			if (fin6->fi6_gpi == 0) {
 				u_int32_t *gpi;
 
 				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
 				fin6->fi6_gpi   = *gpi;
 			}
 			fin6->fi6_proto = proto;
 			return (1);
 
 		case IPPROTO_AH: {
 			/* get next header and header length */
 			struct _opt6 *opt6;
 
 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
 			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
 				fin6->fi6_gpi = opt6->ah_spi;
 			proto = opt6->opt6_nxt;
 			off += 8 + (opt6->opt6_hlen * 4);
 			/* goto the next header */
 			break;
 			}
 
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_ROUTING:
 		case IPPROTO_DSTOPTS: {
 			/* get next header and header length */
 			struct _opt6 *opt6;
 
 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
 			proto = opt6->opt6_nxt;
 			off += (opt6->opt6_hlen + 1) * 8;
 			/* goto the next header */
 			break;
 			}
 
 		case IPPROTO_FRAGMENT:
 			/* ipv6 fragmentations are not supported yet */
 		default:
 			fin6->fi6_proto = proto;
 			return (0);
 		}
 	} while (1);
 	/*NOTREACHED*/
 }
 #endif /* INET6 */
 
 /*
  * altq common classifier
  */
 int
 acc_add_filter(classifier, filter, class, phandle)
 	struct acc_classifier *classifier;
 	struct flow_filter *filter;
 	void	*class;
 	u_long	*phandle;
 {
 	struct acc_filter *afp, *prev, *tmp;
 	int	i, s;
 
 #ifdef INET6
 	if (filter->ff_flow.fi_family != AF_INET &&
 	    filter->ff_flow.fi_family != AF_INET6)
 		return (EINVAL);
 #else
 	if (filter->ff_flow.fi_family != AF_INET)
 		return (EINVAL);
 #endif
 
 	afp = malloc(sizeof(struct acc_filter),
 	       M_DEVBUF, M_WAITOK);
 	if (afp == NULL)
 		return (ENOMEM);
 	bzero(afp, sizeof(struct acc_filter));
 
 	afp->f_filter = *filter;
 	afp->f_class = class;
 
 	i = ACC_WILDCARD_INDEX;
 	if (filter->ff_flow.fi_family == AF_INET) {
 		struct flow_filter *filter4 = &afp->f_filter;
 
 		/*
 		 * if address is 0, it's a wildcard.  if address mask
 		 * isn't set, use full mask.
 		 */
 		if (filter4->ff_flow.fi_dst.s_addr == 0)
 			filter4->ff_mask.mask_dst.s_addr = 0;
 		else if (filter4->ff_mask.mask_dst.s_addr == 0)
 			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
 		if (filter4->ff_flow.fi_src.s_addr == 0)
 			filter4->ff_mask.mask_src.s_addr = 0;
 		else if (filter4->ff_mask.mask_src.s_addr == 0)
 			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
 
 		/* clear extra bits in addresses  */
 		   filter4->ff_flow.fi_dst.s_addr &=
 		       filter4->ff_mask.mask_dst.s_addr;
 		   filter4->ff_flow.fi_src.s_addr &=
 		       filter4->ff_mask.mask_src.s_addr;
 
 		/*
 		 * if dst address is a wildcard, use hash-entry
 		 * ACC_WILDCARD_INDEX.
 		 */
 		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
 			i = ACC_WILDCARD_INDEX;
 		else
 			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
 	}
 #ifdef INET6
 	else if (filter->ff_flow.fi_family == AF_INET6) {
 		struct flow_filter6 *filter6 =
 			(struct flow_filter6 *)&afp->f_filter;
 #ifndef IN6MASK0 /* taken from kame ipv6 */
 #define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
 #define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
 		const struct in6_addr in6mask0 = IN6MASK0;
 		const struct in6_addr in6mask128 = IN6MASK128;
 #endif
 
 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
 			filter6->ff_mask6.mask6_dst = in6mask0;
 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
 			filter6->ff_mask6.mask6_dst = in6mask128;
 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
 			filter6->ff_mask6.mask6_src = in6mask0;
 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
 			filter6->ff_mask6.mask6_src = in6mask128;
 
 		/* clear extra bits in addresses  */
 		for (i = 0; i < 16; i++)
 			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
 			    filter6->ff_mask6.mask6_dst.s6_addr[i];
 		for (i = 0; i < 16; i++)
 			filter6->ff_flow6.fi6_src.s6_addr[i] &=
 			    filter6->ff_mask6.mask6_src.s6_addr[i];
 
 		if (filter6->ff_flow6.fi6_flowlabel == 0)
 			i = ACC_WILDCARD_INDEX;
 		else
 			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
 	}
 #endif /* INET6 */
 
 	afp->f_handle = get_filt_handle(classifier, i);
 
 	/* update filter bitmask */
 	afp->f_fbmask = filt2fibmask(filter);
 	classifier->acc_fbmask |= afp->f_fbmask;
 
 	/*
 	 * add this filter to the filter list.
 	 * filters are ordered from the highest rule number.
 	 */
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	prev = NULL;
 	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
 		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
 			prev = tmp;
 		else
 			break;
 	}
 	if (prev == NULL)
 		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
 	else
 		LIST_INSERT_AFTER(prev, afp, f_chain);
 	splx(s);
 
 	*phandle = afp->f_handle;
 	return (0);
 }
 
 int
 acc_delete_filter(classifier, handle)
 	struct acc_classifier *classifier;
 	u_long handle;
 {
 	struct acc_filter *afp;
 	int	s;
 
 	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
 		return (EINVAL);
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	LIST_REMOVE(afp, f_chain);
 	splx(s);
 
 	free(afp, M_DEVBUF);
 
 	/* todo: update filt_bmask */
 
 	return (0);
 }
 
 /*
  * delete filters referencing to the specified class.
  * if the all flag is not 0, delete all the filters.
  */
 int
 acc_discard_filters(classifier, class, all)
 	struct acc_classifier *classifier;
 	void	*class;
 	int	all;
 {
 	struct acc_filter *afp;
 	int	i, s;
 
-#ifdef __NetBSD__
 	s = splnet();
-#else
-	s = splimp();
-#endif
 	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
 		do {
 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
 				if (all || afp->f_class == class) {
 					LIST_REMOVE(afp, f_chain);
 					free(afp, M_DEVBUF);
 					/* start again from the head */
 					break;
 				}
 		} while (afp != NULL);
 	}
 	splx(s);
 
 	if (all)
 		classifier->acc_fbmask = 0;
 
 	return (0);
 }
 
 void *
 acc_classify(clfier, m, af)
 	void *clfier;
 	struct mbuf *m;
 	int af;
 {
 	struct acc_classifier *classifier;
 	struct flowinfo flow;
 	struct acc_filter *afp;
 	int	i;
 
 	classifier = (struct acc_classifier *)clfier;
 	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
 
 	if (flow.fi_family == AF_INET) {
 		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
 
 		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
 			/* only tos is used */
 			LIST_FOREACH(afp,
 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
 				 f_chain)
 				if (apply_tosfilter4(afp->f_fbmask,
 						     &afp->f_filter, fp))
 					/* filter matched */
 					return (afp->f_class);
 		} else if ((classifier->acc_fbmask &
 			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
 		    == 0) {
 			/* only proto and ports are used */
 			LIST_FOREACH(afp,
 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
 				 f_chain)
 				if (apply_ppfilter4(afp->f_fbmask,
 						    &afp->f_filter, fp))
 					/* filter matched */
 					return (afp->f_class);
 		} else {
 			/* get the filter hash entry from its dest address */
 			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
 			do {
 				/*
 				 * go through this loop twice.  first for dst
 				 * hash, second for wildcards.
 				 */
 				LIST_FOREACH(afp, &classifier->acc_filters[i],
 					     f_chain)
 					if (apply_filter4(afp->f_fbmask,
 							  &afp->f_filter, fp))
 						/* filter matched */
 						return (afp->f_class);
 
 				/*
 				 * check again for filters with a dst addr
 				 * wildcard.
 				 * (daddr == 0 || dmask != 0xffffffff).
 				 */
 				if (i != ACC_WILDCARD_INDEX)
 					i = ACC_WILDCARD_INDEX;
 				else
 					break;
 			} while (1);
 		}
 	}
 #ifdef INET6
 	else if (flow.fi_family == AF_INET6) {
 		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
 
 		/* get the filter hash entry from its flow ID */
 		if (fp6->fi6_flowlabel != 0)
 			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
 		else
 			/* flowlable can be zero */
 			i = ACC_WILDCARD_INDEX;
 
 		/* go through this loop twice.  first for flow hash, second
 		   for wildcards. */
 		do {
 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
 				if (apply_filter6(afp->f_fbmask,
 					(struct flow_filter6 *)&afp->f_filter,
 					fp6))
 					/* filter matched */
 					return (afp->f_class);
 
 			/*
 			 * check again for filters with a wildcard.
 			 */
 			if (i != ACC_WILDCARD_INDEX)
 				i = ACC_WILDCARD_INDEX;
 			else
 				break;
 		} while (1);
 	}
 #endif /* INET6 */
 
 	/* no filter matched */
 	return (NULL);
 }
 
 static int
 apply_filter4(fbmask, filt, pkt)
 	u_int32_t	fbmask;
 	struct flow_filter *filt;
 	struct flowinfo_in *pkt;
 {
 	if (filt->ff_flow.fi_family != AF_INET)
 		return (0);
 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
 		return (0);
 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
 		return (0);
 	if ((fbmask & FIMB4_DADDR) &&
 	    filt->ff_flow.fi_dst.s_addr !=
 	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
 		return (0);
 	if ((fbmask & FIMB4_SADDR) &&
 	    filt->ff_flow.fi_src.s_addr !=
 	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
 		return (0);
 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
 		return (0);
 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
 		return (0);
 	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
 		return (0);
 	/* match */
 	return (1);
 }
 
 /*
  * filter matching function optimized for a common case that checks
  * only protocol and port numbers
  */
 static int
 apply_ppfilter4(fbmask, filt, pkt)
 	u_int32_t	fbmask;
 	struct flow_filter *filt;
 	struct flowinfo_in *pkt;
 {
 	if (filt->ff_flow.fi_family != AF_INET)
 		return (0);
 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
 		return (0);
 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
 		return (0);
 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
 		return (0);
 	/* match */
 	return (1);
 }
 
 /*
  * filter matching function only for tos field.
  */
 static int
 apply_tosfilter4(fbmask, filt, pkt)
 	u_int32_t	fbmask;
 	struct flow_filter *filt;
 	struct flowinfo_in *pkt;
 {
 	if (filt->ff_flow.fi_family != AF_INET)
 		return (0);
 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
 		return (0);
 	/* match */
 	return (1);
 }
 
 #ifdef INET6
 static int
 apply_filter6(fbmask, filt, pkt)
 	u_int32_t	fbmask;
 	struct flow_filter6 *filt;
 	struct flowinfo_in6 *pkt;
 {
 	int i;
 
 	if (filt->ff_flow6.fi6_family != AF_INET6)
 		return (0);
 	if ((fbmask & FIMB6_FLABEL) &&
 	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
 		return (0);
 	if ((fbmask & FIMB6_PROTO) &&
 	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
 		return (0);
 	if ((fbmask & FIMB6_SPORT) &&
 	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
 		return (0);
 	if ((fbmask & FIMB6_DPORT) &&
 	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
 		return (0);
 	if (fbmask & FIMB6_SADDR) {
 		for (i = 0; i < 4; i++)
 			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
 			    (pkt->fi6_src.s6_addr32[i] &
 			     filt->ff_mask6.mask6_src.s6_addr32[i]))
 				return (0);
 	}
 	if (fbmask & FIMB6_DADDR) {
 		for (i = 0; i < 4; i++)
 			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
 			    (pkt->fi6_dst.s6_addr32[i] &
 			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
 				return (0);
 	}
 	if ((fbmask & FIMB6_TCLASS) &&
 	    filt->ff_flow6.fi6_tclass !=
 	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
 		return (0);
 	if ((fbmask & FIMB6_GPI) &&
 	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
 		return (0);
 	/* match */
 	return (1);
 }
 #endif /* INET6 */
 
 /*
  *  filter handle:
  *	bit 20-28: index to the filter hash table
  *	bit  0-19: unique id in the hash bucket.
  */
 static u_long
 get_filt_handle(classifier, i)
 	struct acc_classifier *classifier;
 	int	i;
 {
 	static u_long handle_number = 1;
 	u_long 	handle;
 	struct acc_filter *afp;
 
 	while (1) {
 		handle = handle_number++ & 0x000fffff;
 
 		if (LIST_EMPTY(&classifier->acc_filters[i]))
 			break;
 
 		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
 			if ((afp->f_handle & 0x000fffff) == handle)
 				break;
 		if (afp == NULL)
 			break;
 		/* this handle is already used, try again */
 	}
 
 	return ((i << 20) | handle);
 }
 
 /* convert filter handle to filter pointer */
 static struct acc_filter *
 filth_to_filtp(classifier, handle)
 	struct acc_classifier *classifier;
 	u_long handle;
 {
 	struct acc_filter *afp;
 	int	i;
 
 	i = ACC_GET_HINDEX(handle);
 
 	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
 		if (afp->f_handle == handle)
 			return (afp);
 
 	return (NULL);
 }
 
 /* create flowinfo bitmask */
 static u_int32_t
 filt2fibmask(filt)
 	struct flow_filter *filt;
 {
 	u_int32_t mask = 0;
 #ifdef INET6
 	struct flow_filter6 *filt6;
 #endif
 
 	switch (filt->ff_flow.fi_family) {
 	case AF_INET:
 		if (filt->ff_flow.fi_proto != 0)
 			mask |= FIMB4_PROTO;
 		if (filt->ff_flow.fi_tos != 0)
 			mask |= FIMB4_TOS;
 		if (filt->ff_flow.fi_dst.s_addr != 0)
 			mask |= FIMB4_DADDR;
 		if (filt->ff_flow.fi_src.s_addr != 0)
 			mask |= FIMB4_SADDR;
 		if (filt->ff_flow.fi_sport != 0)
 			mask |= FIMB4_SPORT;
 		if (filt->ff_flow.fi_dport != 0)
 			mask |= FIMB4_DPORT;
 		if (filt->ff_flow.fi_gpi != 0)
 			mask |= FIMB4_GPI;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		filt6 = (struct flow_filter6 *)filt;
 
 		if (filt6->ff_flow6.fi6_proto != 0)
 			mask |= FIMB6_PROTO;
 		if (filt6->ff_flow6.fi6_tclass != 0)
 			mask |= FIMB6_TCLASS;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
 			mask |= FIMB6_DADDR;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
 			mask |= FIMB6_SADDR;
 		if (filt6->ff_flow6.fi6_sport != 0)
 			mask |= FIMB6_SPORT;
 		if (filt6->ff_flow6.fi6_dport != 0)
 			mask |= FIMB6_DPORT;
 		if (filt6->ff_flow6.fi6_gpi != 0)
 			mask |= FIMB6_GPI;
 		if (filt6->ff_flow6.fi6_flowlabel != 0)
 			mask |= FIMB6_FLABEL;
 		break;
 #endif /* INET6 */
 	}
 	return (mask);
 }
 
 
 /*
  * helper functions to handle IPv4 fragments.
  * currently only in-sequence fragments are handled.
  *	- fragment info is cached in a LRU list.
  *	- when a first fragment is found, cache its flow info.
  *	- when a non-first fragment is found, lookup the cache.
  */
 
 struct ip4_frag {
     TAILQ_ENTRY(ip4_frag) ip4f_chain;
     char    ip4f_valid;
     u_short ip4f_id;
     struct flowinfo_in ip4f_info;
 };
 
 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
 
 #define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
 
 
 static void
 ip4f_cache(ip, fin)
 	struct ip *ip;
 	struct flowinfo_in *fin;
 {
 	struct ip4_frag *fp;
 
 	if (TAILQ_EMPTY(&ip4f_list)) {
 		/* first time call, allocate fragment cache entries. */
 		if (ip4f_init() < 0)
 			/* allocation failed! */
 			return;
 	}
 
 	fp = ip4f_alloc();
 	fp->ip4f_id = ip->ip_id;
 	fp->ip4f_info.fi_proto = ip->ip_p;
 	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
 	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
 
 	/* save port numbers */
 	fp->ip4f_info.fi_sport = fin->fi_sport;
 	fp->ip4f_info.fi_dport = fin->fi_dport;
 	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
 }
 
 static int
 ip4f_lookup(ip, fin)
 	struct ip *ip;
 	struct flowinfo_in *fin;
 {
 	struct ip4_frag *fp;
 
 	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
 	     fp = TAILQ_NEXT(fp, ip4f_chain))
 		if (ip->ip_id == fp->ip4f_id &&
 		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
 		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
 		    ip->ip_p == fp->ip4f_info.fi_proto) {
 
 			/* found the matching entry */
 			fin->fi_sport = fp->ip4f_info.fi_sport;
 			fin->fi_dport = fp->ip4f_info.fi_dport;
 			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
 
 			if ((ntohs(ip->ip_off) & IP_MF) == 0)
 				/* this is the last fragment,
 				   release the entry. */
 				ip4f_free(fp);
 
 			return (1);
 		}
 
 	/* no matching entry found */
 	return (0);
 }
 
 static int
 ip4f_init(void)
 {
 	struct ip4_frag *fp;
 	int i;
 
 	TAILQ_INIT(&ip4f_list);
 	for (i=0; i<IP4F_TABSIZE; i++) {
 		fp = malloc(sizeof(struct ip4_frag),
 		       M_DEVBUF, M_NOWAIT);
 		if (fp == NULL) {
 			printf("ip4f_init: can't alloc %dth entry!\n", i);
 			if (i == 0)
 				return (-1);
 			return (0);
 		}
 		fp->ip4f_valid = 0;
 		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
 	}
 	return (0);
 }
 
 static struct ip4_frag *
 ip4f_alloc(void)
 {
 	struct ip4_frag *fp;
 
 	/* reclaim an entry at the tail, put it at the head */
 	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
 	fp->ip4f_valid = 1;
 	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
 	return (fp);
 }
 
 static void
 ip4f_free(fp)
 	struct ip4_frag *fp;
 {
 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
 	fp->ip4f_valid = 0;
 	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
 }
 
 #endif /* ALTQ3_CLFIER_COMPAT */
Index: user/ngie/more-tests/sys/net/altq/altq_var.h
===================================================================
--- user/ngie/more-tests/sys/net/altq/altq_var.h	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/altq_var.h	(revision 281676)
@@ -1,261 +1,231 @@
-/*	$FreeBSD$	*/
-/*	$KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $	*/
-
-/*
+/*-
  * Copyright (C) 1998-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $KAME: altq_var.h,v 1.16 2003/10/03 05:05:15 kjc Exp $
+ * $FreeBSD$
  */
 #ifndef _ALTQ_ALTQ_VAR_H_
 #define	_ALTQ_ALTQ_VAR_H_
 
 #ifdef _KERNEL
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 
 #ifdef ALTQ3_CLFIER_COMPAT
 /*
  * filter structure for altq common classifier
  */
 struct acc_filter {
 	LIST_ENTRY(acc_filter)	f_chain;
 	void			*f_class;	/* pointer to the class */
 	u_long			f_handle;	/* filter id */
 	u_int32_t		f_fbmask;	/* filter bitmask */
 	struct flow_filter	f_filter;	/* filter value */
 };
 
 /*
  * XXX ACC_FILTER_TABLESIZE can't be larger than 2048 unless we fix
  * the handle assignment.
  */
 #define	ACC_FILTER_TABLESIZE	(256+1)
 #define	ACC_FILTER_MASK		(ACC_FILTER_TABLESIZE - 2)
 #define	ACC_WILDCARD_INDEX	(ACC_FILTER_TABLESIZE - 1)
 #ifdef __GNUC__
 #define	ACC_GET_HASH_INDEX(addr) \
 	({int x = (addr) + ((addr) >> 16); (x + (x >> 8)) & ACC_FILTER_MASK;})
 #else
 #define	ACC_GET_HASH_INDEX(addr) \
 	(((addr) + ((addr) >> 8) + ((addr) >> 16) + ((addr) >> 24)) \
 	& ACC_FILTER_MASK)
 #endif
 #define	ACC_GET_HINDEX(handle) ((handle) >> 20)
 
 #if (__FreeBSD_version > 500000)
 #define ACC_LOCK_INIT(ac)	mtx_init(&(ac)->acc_mtx, "classifier", MTX_DEF)
 #define ACC_LOCK_DESTROY(ac)	mtx_destroy(&(ac)->acc_mtx)
 #define ACC_LOCK(ac)		mtx_lock(&(ac)->acc_mtx)
 #define ACC_UNLOCK(ac)		mtx_unlock(&(ac)->acc_mtx)
 #else
 #define ACC_LOCK_INIT(ac)
 #define ACC_LOCK_DESTROY(ac)
 #define ACC_LOCK(ac)
 #define ACC_UNLOCK(ac)
 #endif
 
 struct acc_classifier {
 	u_int32_t			acc_fbmask;
 	LIST_HEAD(filt, acc_filter)	acc_filters[ACC_FILTER_TABLESIZE];
 
 #if (__FreeBSD_version > 500000)
 	struct	mtx acc_mtx;
 #endif
 };
 
 /*
  * flowinfo mask bits used by classifier
  */
 /* for ipv4 */
 #define	FIMB4_PROTO	0x0001
 #define	FIMB4_TOS	0x0002
 #define	FIMB4_DADDR	0x0004
 #define	FIMB4_SADDR	0x0008
 #define	FIMB4_DPORT	0x0010
 #define	FIMB4_SPORT	0x0020
 #define	FIMB4_GPI	0x0040
 #define	FIMB4_ALL	0x007f
 /* for ipv6 */
 #define	FIMB6_PROTO	0x0100
 #define	FIMB6_TCLASS	0x0200
 #define	FIMB6_DADDR	0x0400
 #define	FIMB6_SADDR	0x0800
 #define	FIMB6_DPORT	0x1000
 #define	FIMB6_SPORT	0x2000
 #define	FIMB6_GPI	0x4000
 #define	FIMB6_FLABEL	0x8000
 #define	FIMB6_ALL	0xff00
 
 #define	FIMB_ALL	(FIMB4_ALL|FIMB6_ALL)
 
 #define	FIMB4_PORTS	(FIMB4_DPORT|FIMB4_SPORT|FIMB4_GPI)
 #define	FIMB6_PORTS	(FIMB6_DPORT|FIMB6_SPORT|FIMB6_GPI)
 #endif /* ALTQ3_CLFIER_COMPAT */
 
 /*
  * machine dependent clock
  * a 64bit high resolution time counter.
  */
 extern int machclk_usepcc;
 extern u_int32_t machclk_freq;
 extern u_int32_t machclk_per_tick;
 extern void init_machclk(void);
 extern u_int64_t read_machclk(void);
 
 /*
  * debug support
  */
 #ifdef ALTQ_DEBUG
 #ifdef __STDC__
 #define	ASSERT(e)	((e) ? (void)0 : altq_assert(__FILE__, __LINE__, #e))
 #else	/* PCC */
 #define	ASSERT(e)	((e) ? (void)0 : altq_assert(__FILE__, __LINE__, "e"))
 #endif
 #else
 #define	ASSERT(e)	((void)0)
 #endif
 
 /*
  * misc stuff for compatibility
  */
 /* ioctl cmd type */
 typedef u_long ioctlcmd_t;
 
 /*
  * queue macros:
  * the interface of TAILQ_LAST macro changed after the introduction
  * of softupdate. redefine it here to make it work with pre-2.2.7.
  */
 #undef TAILQ_LAST
 #define	TAILQ_LAST(head, headname) \
 	(*(((struct headname *)((head)->tqh_last))->tqh_last))
 
 #ifndef TAILQ_EMPTY
 #define	TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
 #endif
 #ifndef TAILQ_FOREACH
 #define TAILQ_FOREACH(var, head, field)					\
 	for (var = TAILQ_FIRST(head); var; var = TAILQ_NEXT(var, field))
 #endif
 
 /* macro for timeout/untimeout */
-#if (__FreeBSD_version > 300000) || defined(__NetBSD__)
 /* use callout */
 #include <sys/callout.h>
 
 #if (__FreeBSD_version > 500000)
 #define	CALLOUT_INIT(c)		callout_init((c), 0)
 #else
 #define	CALLOUT_INIT(c)		callout_init((c))
 #endif
 #define	CALLOUT_RESET(c,t,f,a)	callout_reset((c),(t),(f),(a))
 #define	CALLOUT_STOP(c)		callout_stop((c))
 #if !defined(CALLOUT_INITIALIZER) && (__FreeBSD_version < 600000)
 #define	CALLOUT_INITIALIZER	{ { { NULL } }, 0, NULL, NULL, 0 }
-#endif
-#elif defined(__OpenBSD__)
-#include <sys/timeout.h>
-/* callout structure as a wrapper of struct timeout */
-struct callout {
-	struct timeout	c_to;
-};
-#define	CALLOUT_INIT(c)		do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0)
-#define	CALLOUT_RESET(c,t,f,a)	do { if (!timeout_initialized(&(c)->c_to))  \
-					 timeout_set(&(c)->c_to, (f), (a)); \
-				     timeout_add(&(c)->c_to, (t)); } while (/*CONSTCOND*/ 0)
-#define	CALLOUT_STOP(c)		timeout_del(&(c)->c_to)
-#define	CALLOUT_INITIALIZER	{ { { NULL }, NULL, NULL, 0, 0 } }
-#else
-/* use old-style timeout/untimeout */
-/* dummy callout structure */
-struct callout {
-	void		*c_arg;			/* function argument */
-	void		(*c_func)(void *);	/* functiuon to call */
-};
-#define	CALLOUT_INIT(c)		do { bzero((c), sizeof(*(c))); } while (/*CONSTCOND*/ 0)
-#define	CALLOUT_RESET(c,t,f,a)	do {	(c)->c_arg = (a);	\
-					(c)->c_func = (f);	\
-					timeout((f),(a),(t)); } while (/*CONSTCOND*/ 0)
-#define	CALLOUT_STOP(c)		untimeout((c)->c_func,(c)->c_arg)
-#define	CALLOUT_INITIALIZER	{ NULL, NULL }
-#endif
-#if !defined(__FreeBSD__)
-typedef void (timeout_t)(void *);
 #endif
 
 #define	m_pktlen(m)		((m)->m_pkthdr.len)
 
 struct ifnet; struct mbuf;
 struct pf_altq;
 #ifdef ALTQ3_CLFIER_COMPAT
 struct flowinfo;
 #endif
 
 void	*altq_lookup(char *, int);
 #ifdef ALTQ3_CLFIER_COMPAT
 int	altq_extractflow(struct mbuf *, int, struct flowinfo *, u_int32_t);
 int	acc_add_filter(struct acc_classifier *, struct flow_filter *,
 	    void *, u_long *);
 int	acc_delete_filter(struct acc_classifier *, u_long);
 int	acc_discard_filters(struct acc_classifier *, void *, int);
 void	*acc_classify(void *, struct mbuf *, int);
 #endif
 u_int8_t read_dsfield(struct mbuf *, struct altq_pktattr *);
 void	write_dsfield(struct mbuf *, struct altq_pktattr *, u_int8_t);
 void	altq_assert(const char *, int, const char *);
 int	tbr_set(struct ifaltq *, struct tb_profile *);
 int	tbr_get(struct ifaltq *, struct tb_profile *);
 
 int	altq_pfattach(struct pf_altq *);
 int	altq_pfdetach(struct pf_altq *);
 int	altq_add(struct pf_altq *);
 int	altq_remove(struct pf_altq *);
 int	altq_add_queue(struct pf_altq *);
 int	altq_remove_queue(struct pf_altq *);
 int	altq_getqstats(struct pf_altq *, void *, int *);
 
 int	cbq_pfattach(struct pf_altq *);
 int	cbq_add_altq(struct pf_altq *);
 int	cbq_remove_altq(struct pf_altq *);
 int	cbq_add_queue(struct pf_altq *);
 int	cbq_remove_queue(struct pf_altq *);
 int	cbq_getqstats(struct pf_altq *, void *, int *);
 
 int	priq_pfattach(struct pf_altq *);
 int	priq_add_altq(struct pf_altq *);
 int	priq_remove_altq(struct pf_altq *);
 int	priq_add_queue(struct pf_altq *);
 int	priq_remove_queue(struct pf_altq *);
 int	priq_getqstats(struct pf_altq *, void *, int *);
 
 int	hfsc_pfattach(struct pf_altq *);
 int	hfsc_add_altq(struct pf_altq *);
 int	hfsc_remove_altq(struct pf_altq *);
 int	hfsc_add_queue(struct pf_altq *);
 int	hfsc_remove_queue(struct pf_altq *);
 int	hfsc_getqstats(struct pf_altq *, void *, int *);
 
 #endif /* _KERNEL */
 #endif /* _ALTQ_ALTQ_VAR_H_ */
Index: user/ngie/more-tests/sys/net/altq/if_altq.h
===================================================================
--- user/ngie/more-tests/sys/net/altq/if_altq.h	(revision 281675)
+++ user/ngie/more-tests/sys/net/altq/if_altq.h	(revision 281676)
@@ -1,190 +1,182 @@
-/*	$FreeBSD$	*/
-/*	$KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $	*/
-
-/*
+/*-
  * Copyright (C) 1997-2003
  *	Sony Computer Science Laboratories Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $KAME: if_altq.h,v 1.12 2005/04/13 03:44:25 suz Exp $
+ * $FreeBSD$
  */
 #ifndef _ALTQ_IF_ALTQ_H_
 #define	_ALTQ_IF_ALTQ_H_
 
-#ifdef __FreeBSD__
 #include <sys/lock.h>		/* XXX */
 #include <sys/mutex.h>		/* XXX */
 #include <sys/event.h>		/* XXX */
-#endif
 
-#ifdef _KERNEL_OPT
-#include <net/altq/altqconf.h>
-#endif
-
 struct altq_pktattr; struct tb_regulator; struct top_cdnr;
 
 /*
  * Structure defining a queue for a network interface.
  */
 struct	ifaltq {
 	/* fields compatible with struct ifqueue */
 	struct	mbuf *ifq_head;
 	struct	mbuf *ifq_tail;
 	int	ifq_len;
 	int	ifq_maxlen;
-#ifdef __FreeBSD__
 	struct	mtx ifq_mtx;
-#endif
 
 	/* driver owned queue (used for bulk dequeue and prepend) UNLOCKED */
 	struct	mbuf *ifq_drv_head;
 	struct	mbuf *ifq_drv_tail;
 	int	ifq_drv_len;
 	int	ifq_drv_maxlen;
 
 	/* alternate queueing related fields */
 	int	altq_type;		/* discipline type */
 	int	altq_flags;		/* flags (e.g. ready, in-use) */
 	void	*altq_disc;		/* for discipline-specific use */
 	struct	ifnet *altq_ifp;	/* back pointer to interface */
 
 	int	(*altq_enqueue)(struct ifaltq *, struct mbuf *,
 				struct altq_pktattr *);
 	struct	mbuf *(*altq_dequeue)(struct ifaltq *, int);
 	int	(*altq_request)(struct ifaltq *, int, void *);
 
 	/* classifier fields */
 	void	*altq_clfier;		/* classifier-specific use */
 	void	*(*altq_classify)(void *, struct mbuf *, int);
 
 	/* token bucket regulator */
 	struct	tb_regulator *altq_tbr;
 
 	/* input traffic conditioner (doesn't belong to the output queue...) */
 	struct top_cdnr *altq_cdnr;
 };
 
 
 #ifdef _KERNEL
 
 /*
  * packet attributes used by queueing disciplines.
  * pattr_class is a discipline-dependent scheduling class that is
  * set by a classifier.
  * pattr_hdr and pattr_af may be used by a discipline to access
  * the header within a mbuf.  (e.g. ECN needs to update the CE bit)
  * note that pattr_hdr could be stale after m_pullup, though link
  * layer output routines usually don't use m_pullup.  link-level
  * compression also invalidates these fields.  thus, pattr_hdr needs
  * to be verified when a discipline touches the header.
  */
 struct altq_pktattr {
 	void	*pattr_class;		/* sched class set by classifier */
 	int	pattr_af;		/* address family */
 	caddr_t	pattr_hdr;		/* saved header position in mbuf */
 };
 
 /*
  * mbuf tag to carry a queue id (and hints for ECN).
  */
 struct altq_tag {
 	u_int32_t	qid;		/* queue id */
 	/* hints for ecn */
 	int		af;		/* address family */
 	void		*hdr;		/* saved header position in mbuf */
 };
 
 /*
  * a token-bucket regulator limits the rate that a network driver can
  * dequeue packets from the output queue.
  * modern cards are able to buffer a large amount of packets and dequeue
  * too many packets at a time.  this bursty dequeue behavior makes it
  * impossible to schedule packets by queueing disciplines.
  * a token-bucket is used to control the burst size in a device
  * independent manner.
  */
 struct tb_regulator {
 	int64_t		tbr_rate;	/* (scaled) token bucket rate */
 	int64_t		tbr_depth;	/* (scaled) token bucket depth */
 
 	int64_t		tbr_token;	/* (scaled) current token */
 	int64_t		tbr_filluptime;	/* (scaled) time to fill up bucket */
 	u_int64_t	tbr_last;	/* last time token was updated */
 
 	int		tbr_lastop;	/* last dequeue operation type
 					   needed for poll-and-dequeue */
 };
 
 /* if_altqflags */
 #define	ALTQF_READY	 0x01	/* driver supports alternate queueing */
 #define	ALTQF_ENABLED	 0x02	/* altq is in use */
 #define	ALTQF_CLASSIFY	 0x04	/* classify packets */
 #define	ALTQF_CNDTNING	 0x08	/* altq traffic conditioning is enabled */
 #define	ALTQF_DRIVER1	 0x40	/* driver specific */
 
 /* if_altqflags set internally only: */
 #define	ALTQF_CANTCHANGE 	(ALTQF_READY)
 
 /* altq_dequeue 2nd arg */
 #define	ALTDQ_REMOVE		1	/* dequeue mbuf from the queue */
 #define	ALTDQ_POLL		2	/* don't dequeue mbuf from the queue */
 
 /* altq request types (currently only purge is defined) */
 #define	ALTRQ_PURGE		1	/* purge all packets */
 
 #define	ALTQ_IS_READY(ifq)		((ifq)->altq_flags & ALTQF_READY)
 #define	ALTQ_IS_ENABLED(ifq)		((ifq)->altq_flags & ALTQF_ENABLED)
 #define	ALTQ_NEEDS_CLASSIFY(ifq)	((ifq)->altq_flags & ALTQF_CLASSIFY)
 #define	ALTQ_IS_CNDTNING(ifq)		((ifq)->altq_flags & ALTQF_CNDTNING)
 
 #define	ALTQ_SET_CNDTNING(ifq)		((ifq)->altq_flags |= ALTQF_CNDTNING)
 #define	ALTQ_CLEAR_CNDTNING(ifq)	((ifq)->altq_flags &= ~ALTQF_CNDTNING)
 #define	ALTQ_IS_ATTACHED(ifq)		((ifq)->altq_disc != NULL)
 
 #define	ALTQ_ENQUEUE(ifq, m, pa, err)					\
 	(err) = (*(ifq)->altq_enqueue)((ifq),(m),(pa))
 #define	ALTQ_DEQUEUE(ifq, m)						\
 	(m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_REMOVE)
 #define	ALTQ_POLL(ifq, m)						\
 	(m) = (*(ifq)->altq_dequeue)((ifq), ALTDQ_POLL)
 #define	ALTQ_PURGE(ifq)							\
 	(void)(*(ifq)->altq_request)((ifq), ALTRQ_PURGE, (void *)0)
 #define	ALTQ_IS_EMPTY(ifq)		((ifq)->ifq_len == 0)
 #define	TBR_IS_ENABLED(ifq)		((ifq)->altq_tbr != NULL)
 
 extern int altq_attach(struct ifaltq *, int, void *,
 		       int (*)(struct ifaltq *, struct mbuf *,
 			       struct altq_pktattr *),
 		       struct mbuf *(*)(struct ifaltq *, int),
 		       int (*)(struct ifaltq *, int, void *),
 		       void *,
 		       void *(*)(void *, struct mbuf *, int));
 extern int altq_detach(struct ifaltq *);
 extern int altq_enable(struct ifaltq *);
 extern int altq_disable(struct ifaltq *);
 extern struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int);
 extern int (*altq_input)(struct mbuf *, int);
 #if 0 /* ALTQ3_CLFIER_COMPAT */
 void altq_etherclassify(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
 #endif
 #endif /* _KERNEL */
 
 #endif /* _ALTQ_IF_ALTQ_H_ */
Index: user/ngie/more-tests/sys/net/if_types.h
===================================================================
--- user/ngie/more-tests/sys/net/if_types.h	(revision 281675)
+++ user/ngie/more-tests/sys/net/if_types.h	(revision 281676)
@@ -1,252 +1,257 @@
 /*-
  * Copyright (c) 1989, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_types.h	8.3 (Berkeley) 4/28/95
  * $FreeBSD$
  * $NetBSD: if_types.h,v 1.16 2000/04/19 06:30:53 itojun Exp $
  */
 
 #ifndef _NET_IF_TYPES_H_
 #define _NET_IF_TYPES_H_
 
 /*
  * Interface types for benefit of parsing media address headers.
  * This list is derived from the SNMP list of ifTypes, originally
  * documented in RFC1573, now maintained as:
  *
  * 	http://www.iana.org/assignments/smi-numbers
  */
 
-#define	IFT_OTHER	0x1		/* none of the following */
-#define	IFT_1822	0x2		/* old-style arpanet imp */
-#define	IFT_HDH1822	0x3		/* HDH arpanet imp */
-#define	IFT_X25DDN	0x4		/* x25 to imp */
-#define	IFT_X25		0x5		/* PDN X25 interface (RFC877) */
-#define	IFT_ETHER	0x6		/* Ethernet CSMA/CD */
-#define	IFT_ISO88023	0x7		/* CMSA/CD */
-#define	IFT_ISO88024	0x8		/* Token Bus */
-#define	IFT_ISO88025	0x9		/* Token Ring */
-#define	IFT_ISO88026	0xa		/* MAN */
-#define	IFT_STARLAN	0xb
-#define	IFT_P10		0xc		/* Proteon 10MBit ring */
-#define	IFT_P80		0xd		/* Proteon 80MBit ring */
-#define	IFT_HY		0xe		/* Hyperchannel */
-#define	IFT_FDDI	0xf
-#define	IFT_LAPB	0x10
-#define	IFT_SDLC	0x11
-#define	IFT_T1		0x12
-#define	IFT_CEPT	0x13		/* E1 - european T1 */
-#define	IFT_ISDNBASIC	0x14
-#define	IFT_ISDNPRIMARY	0x15
-#define	IFT_PTPSERIAL	0x16		/* Proprietary PTP serial */
-#define	IFT_PPP		0x17		/* RFC 1331 */
-#define	IFT_LOOP	0x18		/* loopback */
-#define	IFT_EON		0x19		/* ISO over IP */
-#define	IFT_XETHER	0x1a		/* obsolete 3MB experimental ethernet */
-#define	IFT_NSIP	0x1b		/* XNS over IP */
-#define	IFT_SLIP	0x1c		/* IP over generic TTY */
-#define	IFT_ULTRA	0x1d		/* Ultra Technologies */
-#define	IFT_DS3		0x1e		/* Generic T3 */
-#define	IFT_SIP		0x1f		/* SMDS */
-#define	IFT_FRELAY	0x20		/* Frame Relay DTE only */
-#define	IFT_RS232	0x21
-#define	IFT_PARA	0x22		/* parallel-port */
-#define	IFT_ARCNET	0x23
-#define	IFT_ARCNETPLUS	0x24
-#define	IFT_ATM		0x25		/* ATM cells */
-#define	IFT_MIOX25	0x26
-#define	IFT_SONET	0x27		/* SONET or SDH */
-#define	IFT_X25PLE	0x28
-#define	IFT_ISO88022LLC	0x29
-#define	IFT_LOCALTALK	0x2a
-#define	IFT_SMDSDXI	0x2b
-#define	IFT_FRELAYDCE	0x2c		/* Frame Relay DCE */
-#define	IFT_V35		0x2d
-#define	IFT_HSSI	0x2e
-#define	IFT_HIPPI	0x2f
-#define	IFT_MODEM	0x30		/* Generic Modem */
-#define	IFT_AAL5	0x31		/* AAL5 over ATM */
-#define	IFT_SONETPATH	0x32
-#define	IFT_SONETVT	0x33
-#define	IFT_SMDSICIP	0x34		/* SMDS InterCarrier Interface */
-#define	IFT_PROPVIRTUAL	0x35		/* Proprietary Virtual/internal */
-#define	IFT_PROPMUX	0x36		/* Proprietary Multiplexing */
-#define	IFT_IEEE80212		   0x37 /* 100BaseVG */
-#define	IFT_FIBRECHANNEL	   0x38 /* Fibre Channel */
-#define	IFT_HIPPIINTERFACE	   0x39 /* HIPPI interfaces	 */
-#define	IFT_FRAMERELAYINTERCONNECT 0x3a /* Obsolete, use either 0x20 or 0x2c */
-#define	IFT_AFLANE8023		   0x3b /* ATM Emulated LAN for 802.3 */
-#define	IFT_AFLANE8025		   0x3c /* ATM Emulated LAN for 802.5 */
-#define	IFT_CCTEMUL		   0x3d /* ATM Emulated circuit		  */
-#define	IFT_FASTETHER		   0x3e /* Fast Ethernet (100BaseT) */
-#define	IFT_ISDN		   0x3f /* ISDN and X.25	    */
-#define	IFT_V11			   0x40 /* CCITT V.11/X.21		*/
-#define	IFT_V36			   0x41 /* CCITT V.36			*/
-#define	IFT_G703AT64K		   0x42 /* CCITT G703 at 64Kbps */
-#define	IFT_G703AT2MB		   0x43 /* Obsolete see DS1-MIB */
-#define	IFT_QLLC		   0x44 /* SNA QLLC			*/
-#define	IFT_FASTETHERFX		   0x45 /* Fast Ethernet (100BaseFX)	*/
-#define	IFT_CHANNEL		   0x46 /* channel			*/
-#define	IFT_IEEE80211		   0x47 /* radio spread spectrum	*/
-#define	IFT_IBM370PARCHAN	   0x48 /* IBM System 360/370 OEMI Channel */
-#define	IFT_ESCON		   0x49 /* IBM Enterprise Systems Connection */
-#define	IFT_DLSW		   0x4a /* Data Link Switching */
-#define	IFT_ISDNS		   0x4b /* ISDN S/T interface */
-#define	IFT_ISDNU		   0x4c /* ISDN U interface */
-#define	IFT_LAPD		   0x4d /* Link Access Protocol D */
-#define	IFT_IPSWITCH		   0x4e /* IP Switching Objects */
-#define	IFT_RSRB		   0x4f /* Remote Source Route Bridging */
-#define	IFT_ATMLOGICAL		   0x50 /* ATM Logical Port */
-#define	IFT_DS0			   0x51 /* Digital Signal Level 0 */
-#define	IFT_DS0BUNDLE		   0x52 /* group of ds0s on the same ds1 */
-#define	IFT_BSC			   0x53 /* Bisynchronous Protocol */
-#define	IFT_ASYNC		   0x54 /* Asynchronous Protocol */
-#define	IFT_CNR			   0x55 /* Combat Net Radio */
-#define	IFT_ISO88025DTR		   0x56 /* ISO 802.5r DTR */
-#define	IFT_EPLRS		   0x57 /* Ext Pos Loc Report Sys */
-#define	IFT_ARAP		   0x58 /* Appletalk Remote Access Protocol */
-#define	IFT_PROPCNLS		   0x59 /* Proprietary Connectionless Protocol*/
-#define	IFT_HOSTPAD		   0x5a /* CCITT-ITU X.29 PAD Protocol */
-#define	IFT_TERMPAD		   0x5b /* CCITT-ITU X.3 PAD Facility */
-#define	IFT_FRAMERELAYMPI	   0x5c /* Multiproto Interconnect over FR */
-#define	IFT_X213		   0x5d /* CCITT-ITU X213 */
-#define	IFT_ADSL		   0x5e /* Asymmetric Digital Subscriber Loop */
-#define	IFT_RADSL		   0x5f /* Rate-Adapt. Digital Subscriber Loop*/
-#define	IFT_SDSL		   0x60 /* Symmetric Digital Subscriber Loop */
-#define	IFT_VDSL		   0x61 /* Very H-Speed Digital Subscrib. Loop*/
-#define	IFT_ISO88025CRFPINT	   0x62 /* ISO 802.5 CRFP */
-#define	IFT_MYRINET		   0x63 /* Myricom Myrinet */
-#define	IFT_VOICEEM		   0x64 /* voice recEive and transMit */
-#define	IFT_VOICEFXO		   0x65 /* voice Foreign Exchange Office */
-#define	IFT_VOICEFXS		   0x66 /* voice Foreign Exchange Station */
-#define	IFT_VOICEENCAP		   0x67 /* voice encapsulation */
-#define	IFT_VOICEOVERIP		   0x68 /* voice over IP encapsulation */
-#define	IFT_ATMDXI		   0x69 /* ATM DXI */
-#define	IFT_ATMFUNI		   0x6a /* ATM FUNI */
-#define	IFT_ATMIMA		   0x6b /* ATM IMA		      */
-#define	IFT_PPPMULTILINKBUNDLE	   0x6c /* PPP Multilink Bundle */
-#define	IFT_IPOVERCDLC		   0x6d /* IBM ipOverCdlc */
-#define	IFT_IPOVERCLAW		   0x6e /* IBM Common Link Access to Workstn */
-#define	IFT_STACKTOSTACK	   0x6f /* IBM stackToStack */
-#define	IFT_VIRTUALIPADDRESS	   0x70 /* IBM VIPA */
-#define	IFT_MPC			   0x71 /* IBM multi-protocol channel support */
-#define	IFT_IPOVERATM		   0x72 /* IBM ipOverAtm */
-#define	IFT_ISO88025FIBER	   0x73 /* ISO 802.5j Fiber Token Ring */
-#define	IFT_TDLC		   0x74 /* IBM twinaxial data link control */
-#define	IFT_GIGABITETHERNET	   0x75 /* Gigabit Ethernet */
-#define	IFT_HDLC		   0x76 /* HDLC */
-#define	IFT_LAPF		   0x77 /* LAP F */
-#define	IFT_V37			   0x78 /* V.37 */
-#define	IFT_X25MLP		   0x79 /* Multi-Link Protocol */
-#define	IFT_X25HUNTGROUP	   0x7a /* X25 Hunt Group */
-#define	IFT_TRANSPHDLC		   0x7b /* Transp HDLC */
-#define	IFT_INTERLEAVE		   0x7c /* Interleave channel */
-#define	IFT_FAST		   0x7d /* Fast channel */
-#define	IFT_IP			   0x7e /* IP (for APPN HPR in IP networks) */
-#define	IFT_DOCSCABLEMACLAYER	   0x7f /* CATV Mac Layer */
-#define	IFT_DOCSCABLEDOWNSTREAM	   0x80 /* CATV Downstream interface */
-#define	IFT_DOCSCABLEUPSTREAM	   0x81 /* CATV Upstream interface */
-#define	IFT_A12MPPSWITCH	   0x82	/* Avalon Parallel Processor */
-#define	IFT_TUNNEL		   0x83	/* Encapsulation interface */
-#define	IFT_COFFEE		   0x84	/* coffee pot */
-#define	IFT_CES			   0x85	/* Circiut Emulation Service */
-#define	IFT_ATMSUBINTERFACE	   0x86	/* (x)  ATM Sub Interface */
-#define	IFT_L2VLAN		   0x87	/* Layer 2 Virtual LAN using 802.1Q */
-#define	IFT_L3IPVLAN		   0x88	/* Layer 3 Virtual LAN - IP Protocol */
-#define	IFT_L3IPXVLAN		   0x89	/* Layer 3 Virtual LAN - IPX Prot. */
-#define	IFT_DIGITALPOWERLINE	   0x8a	/* IP over Power Lines */
-#define	IFT_MEDIAMAILOVERIP	   0x8b	/* (xxx)  Multimedia Mail over IP */
-#define	IFT_DTM			   0x8c	/* Dynamic synchronous Transfer Mode */
-#define	IFT_DCN			   0x8d	/* Data Communications Network */
-#define	IFT_IPFORWARD		   0x8e	/* IP Forwarding Interface */
-#define	IFT_MSDSL		   0x8f	/* Multi-rate Symmetric DSL */
-#define	IFT_IEEE1394		   0x90	/* IEEE1394 High Performance SerialBus*/
-#define	IFT_IFGSN		   0x91	/* HIPPI-6400 */
-#define	IFT_DVBRCCMACLAYER	   0x92	/* DVB-RCC MAC Layer */
-#define	IFT_DVBRCCDOWNSTREAM	   0x93	/* DVB-RCC Downstream Channel */
-#define	IFT_DVBRCCUPSTREAM	   0x94	/* DVB-RCC Upstream Channel */
-#define	IFT_ATMVIRTUAL		   0x95	/* ATM Virtual Interface */
-#define	IFT_MPLSTUNNEL		   0x96	/* MPLS Tunnel Virtual Interface */
-#define	IFT_SRP			   0x97	/* Spatial Reuse Protocol */
-#define	IFT_VOICEOVERATM	   0x98	/* Voice over ATM */
-#define	IFT_VOICEOVERFRAMERELAY	   0x99	/* Voice Over Frame Relay */
-#define	IFT_IDSL		   0x9a	/* Digital Subscriber Loop over ISDN */
-#define	IFT_COMPOSITELINK	   0x9b	/* Avici Composite Link Interface */
-#define	IFT_SS7SIGLINK		   0x9c	/* SS7 Signaling Link */
-#define	IFT_PROPWIRELESSP2P	   0x9d	/* Prop. P2P wireless interface */
-#define	IFT_FRFORWARD		   0x9e	/* Frame forward Interface */
-#define	IFT_RFC1483		   0x9f	/* Multiprotocol over ATM AAL5 */
-#define	IFT_USB			   0xa0	/* USB Interface */
-#define	IFT_IEEE8023ADLAG	   0xa1	/* IEEE 802.3ad Link Aggregate*/
-#define	IFT_BGPPOLICYACCOUNTING	   0xa2	/* BGP Policy Accounting */
-#define	IFT_FRF16MFRBUNDLE	   0xa3	/* FRF.16 Multilik Frame Relay*/
-#define	IFT_H323GATEKEEPER	   0xa4	/* H323 Gatekeeper */
-#define	IFT_H323PROXY		   0xa5	/* H323 Voice and Video Proxy */
-#define	IFT_MPLS		   0xa6	/* MPLS */
-#define	IFT_MFSIGLINK		   0xa7	/* Multi-frequency signaling link */
-#define	IFT_HDSL2		   0xa8	/* High Bit-Rate DSL, 2nd gen. */
-#define	IFT_SHDSL		   0xa9	/* Multirate HDSL2 */
-#define	IFT_DS1FDL		   0xaa	/* Facility Data Link (4Kbps) on a DS1*/
-#define	IFT_POS			   0xab	/* Packet over SONET/SDH Interface */
-#define	IFT_DVBASILN		   0xac	/* DVB-ASI Input */
-#define	IFT_DVBASIOUT		   0xad	/* DVB-ASI Output */
-#define	IFT_PLC			   0xae	/* Power Line Communications */
-#define	IFT_NFAS		   0xaf	/* Non-Facility Associated Signaling */
-#define	IFT_TR008		   0xb0	/* TROO8 */
-#define	IFT_GR303RDT		   0xb1	/* Remote Digital Terminal */
-#define	IFT_GR303IDT		   0xb2	/* Integrated Digital Terminal */
-#define	IFT_ISUP		   0xb3	/* ISUP */
-#define	IFT_PROPDOCSWIRELESSMACLAYER	   0xb4	/* prop/Wireless MAC Layer */
-#define	IFT_PROPDOCSWIRELESSDOWNSTREAM	   0xb5	/* prop/Wireless Downstream */
-#define	IFT_PROPDOCSWIRELESSUPSTREAM	   0xb6	/* prop/Wireless Upstream */
-#define	IFT_HIPERLAN2		   0xb7	/* HIPERLAN Type 2 Radio Interface */
-#define	IFT_PROPBWAP2MP		   0xb8	/* PropBroadbandWirelessAccess P2MP*/
-#define	IFT_SONETOVERHEADCHANNEL   0xb9	/* SONET Overhead Channel */
-#define	IFT_DIGITALWRAPPEROVERHEADCHANNEL  0xba	/* Digital Wrapper Overhead */
-#define	IFT_AAL2		   0xbb	/* ATM adaptation layer 2 */
-#define	IFT_RADIOMAC		   0xbc	/* MAC layer over radio links */
-#define	IFT_ATMRADIO		   0xbd	/* ATM over radio links */
-#define	IFT_IMT			   0xbe /* Inter-Machine Trunks */
-#define	IFT_MVL			   0xbf /* Multiple Virtual Lines DSL */
-#define	IFT_REACHDSL		   0xc0 /* Long Reach DSL */
-#define	IFT_FRDLCIENDPT		   0xc1 /* Frame Relay DLCI End Point */
-#define	IFT_ATMVCIENDPT		   0xc2 /* ATM VCI End Point */
-#define	IFT_OPTICALCHANNEL	   0xc3 /* Optical Channel */
-#define	IFT_OPTICALTRANSPORT	   0xc4 /* Optical Transport */
-#define	IFT_INFINIBAND		   0xc7	/* Infiniband */
-#define	IFT_BRIDGE		   0xd1 /* Transparent bridge interface */
+typedef enum {
+	IFT_OTHER	= 0x1,		/* none of the following */
+	IFT_1822	= 0x2,		/* old-style arpanet imp */
+	IFT_HDH1822	= 0x3,		/* HDH arpanet imp */
+	IFT_X25DDN	= 0x4,		/* x25 to imp */
+	IFT_X25		= 0x5,		/* PDN X25 interface (RFC877) */
+	IFT_ETHER	= 0x6,		/* Ethernet CSMA/CD */
+	IFT_ISO88023	= 0x7,		/* CMSA/CD */
+	IFT_ISO88024	= 0x8,		/* Token Bus */
+	IFT_ISO88025	= 0x9,		/* Token Ring */
+	IFT_ISO88026	= 0xa,		/* MAN */
+	IFT_STARLAN	= 0xb,
+	IFT_P10		= 0xc,		/* Proteon 10MBit ring */
+	IFT_P80		= 0xd,		/* Proteon 80MBit ring */
+	IFT_HY		= 0xe,		/* Hyperchannel */
+	IFT_FDDI	= 0xf,
+	IFT_LAPB	= 0x10,
+	IFT_SDLC	= 0x11,
+	IFT_T1		= 0x12,
+	IFT_CEPT	= 0x13,		/* E1 - european T1 */
+	IFT_ISDNBASIC	= 0x14,
+	IFT_ISDNPRIMARY	= 0x15,
+	IFT_PTPSERIAL	= 0x16,		/* Proprietary PTP serial */
+	IFT_PPP		= 0x17,		/* RFC 1331 */
+	IFT_LOOP	= 0x18,		/* loopback */
+	IFT_EON		= 0x19,		/* ISO over IP */
+	IFT_XETHER	= 0x1a,		/* obsolete 3MB experimental ethernet */
+	IFT_NSIP	= 0x1b,		/* XNS over IP */
+	IFT_SLIP	= 0x1c,		/* IP over generic TTY */
+	IFT_ULTRA	= 0x1d,		/* Ultra Technologies */
+	IFT_DS3		= 0x1e,		/* Generic T3 */
+	IFT_SIP		= 0x1f,		/* SMDS */
+	IFT_FRELAY	= 0x20,		/* Frame Relay DTE only */
+	IFT_RS232	= 0x21,
+	IFT_PARA	= 0x22,		/* parallel-port */
+	IFT_ARCNET	= 0x23,
+	IFT_ARCNETPLUS	= 0x24,
+	IFT_ATM		= 0x25,		/* ATM cells */
+	IFT_MIOX25	= 0x26,
+	IFT_SONET	= 0x27,		/* SONET or SDH */
+	IFT_X25PLE	= 0x28,
+	IFT_ISO88022LLC	= 0x29,
+	IFT_LOCALTALK	= 0x2a,
+	IFT_SMDSDXI	= 0x2b,
+	IFT_FRELAYDCE	= 0x2c,		/* Frame Relay DCE */
+	IFT_V35		= 0x2d,
+	IFT_HSSI	= 0x2e,
+	IFT_HIPPI	= 0x2f,
+	IFT_MODEM	= 0x30,		/* Generic Modem */
+	IFT_AAL5	= 0x31,		/* AAL5 over ATM */
+	IFT_SONETPATH	= 0x32,
+	IFT_SONETVT	= 0x33,
+	IFT_SMDSICIP	= 0x34,		/* SMDS InterCarrier Interface */
+	IFT_PROPVIRTUAL	= 0x35,		/* Proprietary Virtual/internal */
+	IFT_PROPMUX	= 0x36,		/* Proprietary Multiplexing */
+	IFT_IEEE80212	= 0x37,		/* 100BaseVG */
+	IFT_FIBRECHANNEL = 0x38,	/* Fibre Channel */
+	IFT_HIPPIINTERFACE = 0x39,	/* HIPPI interfaces	 */
+	IFT_FRAMERELAYINTERCONNECT = 0x3a, /* Obsolete, use 0x20 either 0x2c */
+	IFT_AFLANE8023	= 0x3b,		/* ATM Emulated LAN for 802.3 */
+	IFT_AFLANE8025	= 0x3c,		/* ATM Emulated LAN for 802.5 */
+	IFT_CCTEMUL	= 0x3d,		/* ATM Emulated circuit		  */
+	IFT_FASTETHER	= 0x3e,		/* Fast Ethernet (100BaseT) */
+	IFT_ISDN	= 0x3f,		/* ISDN and X.25	    */
+	IFT_V11		= 0x40,		/* CCITT V.11/X.21		*/
+	IFT_V36		= 0x41,		/* CCITT V.36			*/
+	IFT_G703AT64K	= 0x42,		/* CCITT G703 at 64Kbps */
+	IFT_G703AT2MB	= 0x43,		/* Obsolete see DS1-MIB */
+	IFT_QLLC	= 0x44,		/* SNA QLLC			*/
+	IFT_FASTETHERFX	= 0x45,		/* Fast Ethernet (100BaseFX)	*/
+	IFT_CHANNEL	= 0x46,		/* channel			*/
+	IFT_IEEE80211	= 0x47,		/* radio spread spectrum	*/
+	IFT_IBM370PARCHAN = 0x48,	/* IBM System 360/370 OEMI Channel */
+	IFT_ESCON	= 0x49,		/* IBM Enterprise Systems Connection */
+	IFT_DLSW	= 0x4a,		/* Data Link Switching */
+	IFT_ISDNS	= 0x4b,		/* ISDN S/T interface */
+	IFT_ISDNU	= 0x4c,		/* ISDN U interface */
+	IFT_LAPD	= 0x4d,		/* Link Access Protocol D */
+	IFT_IPSWITCH	= 0x4e,		/* IP Switching Objects */
+	IFT_RSRB	= 0x4f,		/* Remote Source Route Bridging */
+	IFT_ATMLOGICAL	= 0x50,		/* ATM Logical Port */
+	IFT_DS0		= 0x51,		/* Digital Signal Level 0 */
+	IFT_DS0BUNDLE	= 0x52,		/* group of ds0s on the same ds1 */
+	IFT_BSC		= 0x53,		/* Bisynchronous Protocol */
+	IFT_ASYNC	= 0x54,		/* Asynchronous Protocol */
+	IFT_CNR		= 0x55,		/* Combat Net Radio */
+	IFT_ISO88025DTR	= 0x56,		/* ISO 802.5r DTR */
+	IFT_EPLRS	= 0x57,		/* Ext Pos Loc Report Sys */
+	IFT_ARAP	= 0x58,		/* Appletalk Remote Access Protocol */
+	IFT_PROPCNLS	= 0x59,		/* Proprietary Connectionless Protocol*/
+	IFT_HOSTPAD	= 0x5a,		/* CCITT-ITU X.29 PAD Protocol */
+	IFT_TERMPAD	= 0x5b,		/* CCITT-ITU X.3 PAD Facility */
+	IFT_FRAMERELAYMPI = 0x5c,	/* Multiproto Interconnect over FR */
+	IFT_X213	= 0x5d,		/* CCITT-ITU X213 */
+	IFT_ADSL	= 0x5e,		/* Asymmetric Digital Subscriber Loop */
+	IFT_RADSL	= 0x5f,		/* Rate-Adapt. Digital Subscriber Loop*/
+	IFT_SDSL	= 0x60,		/* Symmetric Digital Subscriber Loop */
+	IFT_VDSL	= 0x61,		/* Very H-Speed Digital Subscrib. Loop*/
+	IFT_ISO88025CRFPINT = 0x62,	/* ISO 802.5 CRFP */
+	IFT_MYRINET	= 0x63,		/* Myricom Myrinet */
+	IFT_VOICEEM	= 0x64,		/* voice recEive and transMit */
+	IFT_VOICEFXO	= 0x65,		/* voice Foreign Exchange Office */
+	IFT_VOICEFXS	= 0x66,		/* voice Foreign Exchange Station */
+	IFT_VOICEENCAP	= 0x67,		/* voice encapsulation */
+	IFT_VOICEOVERIP	= 0x68,		/* voice over IP encapsulation */
+	IFT_ATMDXI	= 0x69,		/* ATM DXI */
+	IFT_ATMFUNI	= 0x6a,		/* ATM FUNI */
+	IFT_ATMIMA	= 0x6b,		/* ATM IMA		      */
+	IFT_PPPMULTILINKBUNDLE = 0x6c,	/* PPP Multilink Bundle */
+	IFT_IPOVERCDLC	= 0x6d,		/* IBM ipOverCdlc */
+	IFT_IPOVERCLAW	= 0x6e,		/* IBM Common Link Access to Workstn */
+	IFT_STACKTOSTACK = 0x6f,	/* IBM stackToStack */
+	IFT_VIRTUALIPADDRESS = 0x70,	/* IBM VIPA */
+	IFT_MPC		= 0x71,		/* IBM multi-protocol channel support */
+	IFT_IPOVERATM	= 0x72,		/* IBM ipOverAtm */
+	IFT_ISO88025FIBER = 0x73,	/* ISO 802.5j Fiber Token Ring */
+	IFT_TDLC	= 0x74,		/* IBM twinaxial data link control */
+	IFT_GIGABITETHERNET = 0x75,	/* Gigabit Ethernet */
+	IFT_HDLC	= 0x76,		/* HDLC */
+	IFT_LAPF	= 0x77,		/* LAP F */
+	IFT_V37		= 0x78,		/* V.37 */
+	IFT_X25MLP	= 0x79,		/* Multi-Link Protocol */
+	IFT_X25HUNTGROUP = 0x7a,	/* X25 Hunt Group */
+	IFT_TRANSPHDLC	= 0x7b,		/* Transp HDLC */
+	IFT_INTERLEAVE	= 0x7c,		/* Interleave channel */
+	IFT_FAST	= 0x7d,		/* Fast channel */
+	IFT_IP		= 0x7e,		/* IP (for APPN HPR in IP networks) */
+	IFT_DOCSCABLEMACLAYER = 0x7f,	/* CATV Mac Layer */
+	IFT_DOCSCABLEDOWNSTREAM = 0x80,	/* CATV Downstream interface */
+	IFT_DOCSCABLEUPSTREAM = 0x81,	/* CATV Upstream interface */
+	IFT_A12MPPSWITCH = 0x82,	/* Avalon Parallel Processor */
+	IFT_TUNNEL	= 0x83,		/* Encapsulation interface */
+	IFT_COFFEE	= 0x84,		/* coffee pot */
+	IFT_CES		= 0x85,		/* Circiut Emulation Service */
+	IFT_ATMSUBINTERFACE = 0x86,	/* (x)  ATM Sub Interface */
+	IFT_L2VLAN	= 0x87,		/* Layer 2 Virtual LAN using 802.1Q */
+	IFT_L3IPVLAN	= 0x88,		/* Layer 3 Virtual LAN - IP Protocol */
+	IFT_L3IPXVLAN	= 0x89,		/* Layer 3 Virtual LAN - IPX Prot. */
+	IFT_DIGITALPOWERLINE = 0x8a,	/* IP over Power Lines */
+	IFT_MEDIAMAILOVERIP = 0x8b,	/* (xxx)  Multimedia Mail over IP */
+	IFT_DTM		= 0x8c,		/* Dynamic synchronous Transfer Mode */
+	IFT_DCN		= 0x8d,		/* Data Communications Network */
+	IFT_IPFORWARD	= 0x8e,		/* IP Forwarding Interface */
+	IFT_MSDSL	= 0x8f,		/* Multi-rate Symmetric DSL */
+	IFT_IEEE1394	= 0x90,		/* IEEE1394 High Performance SerialBus*/
+	IFT_IFGSN	= 0x91,		/* HIPPI-6400 */
+	IFT_DVBRCCMACLAYER = 0x92,	/* DVB-RCC MAC Layer */
+	IFT_DVBRCCDOWNSTREAM = 0x93,	/* DVB-RCC Downstream Channel */
+	IFT_DVBRCCUPSTREAM = 0x94,	/* DVB-RCC Upstream Channel */
+	IFT_ATMVIRTUAL	= 0x95,		/* ATM Virtual Interface */
+	IFT_MPLSTUNNEL	= 0x96,		/* MPLS Tunnel Virtual Interface */
+	IFT_SRP		= 0x97,		/* Spatial Reuse Protocol */
+	IFT_VOICEOVERATM = 0x98,	/* Voice over ATM */
+	IFT_VOICEOVERFRAMERELAY	= 0x99,	/* Voice Over Frame Relay */
+	IFT_IDSL	= 0x9a,		/* Digital Subscriber Loop over ISDN */
+	IFT_COMPOSITELINK = 0x9b,	/* Avici Composite Link Interface */
+	IFT_SS7SIGLINK	= 0x9c,		/* SS7 Signaling Link */
+	IFT_PROPWIRELESSP2P = 0x9d,	/* Prop. P2P wireless interface */
+	IFT_FRFORWARD	= 0x9e,		/* Frame forward Interface */
+	IFT_RFC1483	= 0x9f,		/* Multiprotocol over ATM AAL5 */
+	IFT_USB		= 0xa0,		/* USB Interface */
+	IFT_IEEE8023ADLAG = 0xa1,	/* IEEE 802.3ad Link Aggregate*/
+	IFT_BGPPOLICYACCOUNTING = 0xa2,	/* BGP Policy Accounting */
+	IFT_FRF16MFRBUNDLE = 0xa3,	/* FRF.16 Multilik Frame Relay*/
+	IFT_H323GATEKEEPER = 0xa4,	/* H323 Gatekeeper */
+	IFT_H323PROXY	= 0xa5,		/* H323 Voice and Video Proxy */
+	IFT_MPLS	= 0xa6,		/* MPLS */
+	IFT_MFSIGLINK	= 0xa7,		/* Multi-frequency signaling link */
+	IFT_HDSL2	= 0xa8,		/* High Bit-Rate DSL, 2nd gen. */
+	IFT_SHDSL	= 0xa9,		/* Multirate HDSL2 */
+	IFT_DS1FDL	= 0xaa,		/* Facility Data Link (4Kbps) on a DS1*/
+	IFT_POS		= 0xab,		/* Packet over SONET/SDH Interface */
+	IFT_DVBASILN	= 0xac,		/* DVB-ASI Input */
+	IFT_DVBASIOUT	= 0xad,		/* DVB-ASI Output */
+	IFT_PLC		= 0xae,		/* Power Line Communications */
+	IFT_NFAS	= 0xaf,		/* Non-Facility Associated Signaling */
+	IFT_TR008	= 0xb0,		/* TROO8 */
+	IFT_GR303RDT	= 0xb1,		/* Remote Digital Terminal */
+	IFT_GR303IDT	= 0xb2,		/* Integrated Digital Terminal */
+	IFT_ISUP	= 0xb3,		/* ISUP */
+	IFT_PROPDOCSWIRELESSMACLAYER = 0xb4,	/* prop/Wireless MAC Layer */
+	IFT_PROPDOCSWIRELESSDOWNSTREAM = 0xb5,	/* prop/Wireless Downstream */
+	IFT_PROPDOCSWIRELESSUPSTREAM = 0xb6,	/* prop/Wireless Upstream */
+	IFT_HIPERLAN2	= 0xb7,		/* HIPERLAN Type 2 Radio Interface */
+	IFT_PROPBWAP2MP	= 0xb8,		/* PropBroadbandWirelessAccess P2MP*/
+	IFT_SONETOVERHEADCHANNEL = 0xb9, /* SONET Overhead Channel */
+	IFT_DIGITALWRAPPEROVERHEADCHANNEL = 0xba, /* Digital Wrapper Overhead */
+	IFT_AAL2	= 0xbb,		/* ATM adaptation layer 2 */
+	IFT_RADIOMAC	= 0xbc,		/* MAC layer over radio links */
+	IFT_ATMRADIO	= 0xbd,		/* ATM over radio links */
+	IFT_IMT		= 0xbe,		/* Inter-Machine Trunks */
+	IFT_MVL		= 0xbf,		/* Multiple Virtual Lines DSL */
+	IFT_REACHDSL	= 0xc0,		/* Long Reach DSL */
+	IFT_FRDLCIENDPT	= 0xc1,		/* Frame Relay DLCI End Point */
+	IFT_ATMVCIENDPT	= 0xc2,		/* ATM VCI End Point */
+	IFT_OPTICALCHANNEL = 0xc3,	/* Optical Channel */
+	IFT_OPTICALTRANSPORT = 0xc4,	/* Optical Transport */
+	IFT_INFINIBAND	= 0xc7,		/* Infiniband */
+	IFT_BRIDGE	= 0xd1,		/* Transparent bridge interface */
+	IFT_STF		= 0xd7,		/* 6to4 interface */
 
-#define	IFT_STF			   0xd7	/* 6to4 interface */
-
-/* FreeBSD specific, not based on IANA assignments */
-#define	IFT_GIF		0xf0 /* Generic tunnel interface */
-#define	IFT_PVC		0xf1 /* Unused */
-#define	IFT_ENC		0xf4 /* Encapsulating interface */
-#define	IFT_PFLOG	0xf6 /* PF packet filter logging */
-#define	IFT_PFSYNC	0xf7 /* PF packet filter synchronization */
+	/*
+	 * Not based on IANA assignments.  Conflicting with IANA assignments.
+	 * We should make them negative probably.
+	 * This requires changes to struct if_data.
+	 */
+	IFT_GIF		= 0xf0,		/* Generic tunnel interface */
+	IFT_PVC		= 0xf1,		/* Unused */
+	IFT_ENC		= 0xf4,		/* Encapsulating interface */
+	IFT_PFLOG	= 0xf6,		/* PF packet filter logging */
+	IFT_PFSYNC	= 0xf7,		/* PF packet filter synchronization */
+} ifType;
 #endif /* !_NET_IF_TYPES_H_ */
Index: user/ngie/more-tests/sys/netinet/in.c
===================================================================
--- user/ngie/more-tests/sys/netinet/in.c	(revision 281675)
+++ user/ngie/more-tests/sys/netinet/in.c	(revision 281676)
@@ -1,1268 +1,1292 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (C) 2001 WIDE Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in.c	8.4 (Berkeley) 1/9/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
 #include <sys/priv.h>
 #include <sys/socket.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/sx.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_carp.h>
 #include <netinet/igmp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 static int in_aifaddr_ioctl(u_long, caddr_t, struct ifnet *, struct thread *);
 static int in_difaddr_ioctl(caddr_t, struct ifnet *, struct thread *);
 
 static void	in_socktrim(struct sockaddr_in *);
 static void	in_purgemaddrs(struct ifnet *);
 
 static VNET_DEFINE(int, nosameprefix);
 #define	V_nosameprefix			VNET(nosameprefix)
 SYSCTL_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(nosameprefix), 0,
 	"Refuse to create same prefixes on different interfaces");
 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 #define	V_ripcbinfo			VNET(ripcbinfo)
 
 static struct sx in_control_sx;
 SX_SYSINIT(in_control_sx, &in_control_sx, "in_control");
 
 /*
  * Return 1 if an internet address is for a ``local'' host
  * (one to which we have a connection).
  */
 int
 in_localaddr(struct in_addr in)
 {
 	register u_long i = ntohl(in.s_addr);
 	register struct in_ifaddr *ia;
 
 	IN_IFADDR_RLOCK();
 	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
 			IN_IFADDR_RUNLOCK();
 			return (1);
 		}
 	}
 	IN_IFADDR_RUNLOCK();
 	return (0);
 }
 
 /*
  * Return 1 if an internet address is for the local host and configured
  * on one of its interfaces.
  */
 int
 in_localip(struct in_addr in)
 {
 	struct in_ifaddr *ia;
 
 	IN_IFADDR_RLOCK();
 	LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
 		if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
 			IN_IFADDR_RUNLOCK();
 			return (1);
 		}
 	}
 	IN_IFADDR_RUNLOCK();
 	return (0);
 }
 
 /*
+ * Return 1 if an internet address is configured on an interface.
+ */
+int
+in_ifhasaddr(struct ifnet *ifp, struct in_addr in)
+{
+	struct ifaddr *ifa;
+	struct in_ifaddr *ia;
+
+	IF_ADDR_RLOCK(ifp);
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+		if (ifa->ifa_addr->sa_family != AF_INET)
+			continue;
+		ia = (struct in_ifaddr *)ifa;
+		if (ia->ia_addr.sin_addr.s_addr == in.s_addr) {
+			IF_ADDR_RUNLOCK(ifp);
+			return (1);
+		}
+	}
+	IF_ADDR_RUNLOCK(ifp);
+
+	return (0);
+}
+
+/*
  * Return a reference to the interface address which is different to
  * the supplied one but with same IP address value.
  */
 static struct in_ifaddr *
 in_localip_more(struct in_ifaddr *ia)
 {
 	in_addr_t in = IA_SIN(ia)->sin_addr.s_addr;
 	struct in_ifaddr *it;
 
 	IN_IFADDR_RLOCK();
 	LIST_FOREACH(it, INADDR_HASH(in), ia_hash) {
 		if (it != ia && IA_SIN(it)->sin_addr.s_addr == in) {
 			ifa_ref(&it->ia_ifa);
 			IN_IFADDR_RUNLOCK();
 			return (it);
 		}
 	}
 	IN_IFADDR_RUNLOCK();
 
 	return (NULL);
 }
 
 /*
  * Determine whether an IP address is in a reserved set of addresses
  * that may not be forwarded, or whether datagrams to that destination
  * may be forwarded.
  */
 int
 in_canforward(struct in_addr in)
 {
 	register u_long i = ntohl(in.s_addr);
 	register u_long net;
 
 	if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
 		return (0);
 	if (IN_CLASSA(i)) {
 		net = i & IN_CLASSA_NET;
 		if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * Trim a mask in a sockaddr
  */
 static void
 in_socktrim(struct sockaddr_in *ap)
 {
     register char *cplim = (char *) &ap->sin_addr;
     register char *cp = (char *) (&ap->sin_addr + 1);
 
     ap->sin_len = 0;
     while (--cp >= cplim)
 	if (*cp) {
 	    (ap)->sin_len = cp - (char *) (ap) + 1;
 	    break;
 	}
 }
 
 /*
  * Generic internet control operations (ioctl's).
  */
 int
 in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
     struct thread *td)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct sockaddr_in *addr = (struct sockaddr_in *)&ifr->ifr_addr;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	int error;
 
 	if (ifp == NULL)
 		return (EADDRNOTAVAIL);
 
 	/*
 	 * Filter out 4 ioctls we implement directly.  Forward the rest
 	 * to specific functions and ifp->if_ioctl().
 	 */
 	switch (cmd) {
 	case SIOCGIFADDR:
 	case SIOCGIFBRDADDR:
 	case SIOCGIFDSTADDR:
 	case SIOCGIFNETMASK:
 		break;
 	case SIOCDIFADDR:
 		sx_xlock(&in_control_sx);
 		error = in_difaddr_ioctl(data, ifp, td);
 		sx_xunlock(&in_control_sx);
 		return (error);
 	case OSIOCAIFADDR:	/* 9.x compat */
 	case SIOCAIFADDR:
 		sx_xlock(&in_control_sx);
 		error = in_aifaddr_ioctl(cmd, data, ifp, td);
 		sx_xunlock(&in_control_sx);
 		return (error);
 	case SIOCSIFADDR:
 	case SIOCSIFBRDADDR:
 	case SIOCSIFDSTADDR:
 	case SIOCSIFNETMASK:
 		/* We no longer support that old commands. */
 		return (EINVAL);
 	default:
 		if (ifp->if_ioctl == NULL)
 			return (EOPNOTSUPP);
 		return ((*ifp->if_ioctl)(ifp, cmd, data));
 	}
 
 	if (addr->sin_addr.s_addr != INADDR_ANY &&
 	    prison_check_ip4(td->td_ucred, &addr->sin_addr) != 0)
 		return (EADDRNOTAVAIL);
 
 	/*
 	 * Find address for this interface, if it exists.  If an
 	 * address was specified, find that one instead of the
 	 * first one on the interface, if possible.
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 		ia = (struct in_ifaddr *)ifa;
 		if (ia->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr)
 			break;
 	}
 	if (ifa == NULL)
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 			if (ifa->ifa_addr->sa_family == AF_INET) {
 				ia = (struct in_ifaddr *)ifa;
 				if (prison_check_ip4(td->td_ucred,
 				    &ia->ia_addr.sin_addr) == 0)
 					break;
 			}
 
 	if (ifa == NULL) {
 		IF_ADDR_RUNLOCK(ifp);
 		return (EADDRNOTAVAIL);
 	}
 
 	error = 0;
 	switch (cmd) {
 	case SIOCGIFADDR:
 		*addr = ia->ia_addr;
 		break;
 
 	case SIOCGIFBRDADDR:
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EINVAL;
 			break;
 		}
 		*addr = ia->ia_broadaddr;
 		break;
 
 	case SIOCGIFDSTADDR:
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
 			error = EINVAL;
 			break;
 		}
 		*addr = ia->ia_dstaddr;
 		break;
 
 	case SIOCGIFNETMASK:
 		*addr = ia->ia_sockmask;
 		break;
 	}
 
 	IF_ADDR_RUNLOCK(ifp);
 
 	return (error);
 }
 
 static int
 in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td)
 {
 	const struct in_aliasreq *ifra = (struct in_aliasreq *)data;
 	const struct sockaddr_in *addr = &ifra->ifra_addr;
 	const struct sockaddr_in *broadaddr = &ifra->ifra_broadaddr;
 	const struct sockaddr_in *mask = &ifra->ifra_mask;
 	const struct sockaddr_in *dstaddr = &ifra->ifra_dstaddr;
 	const int vhid = (cmd == SIOCAIFADDR) ? ifra->ifra_vhid : 0;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	bool iaIsFirst;
 	int error = 0;
 
 	error = priv_check(td, PRIV_NET_ADDIFADDR);
 	if (error)
 		return (error);
 
 	/*
 	 * ifra_addr must be present and be of INET family.
 	 * ifra_broadaddr/ifra_dstaddr and ifra_mask are optional.
 	 */
 	if (addr->sin_len != sizeof(struct sockaddr_in) ||
 	    addr->sin_family != AF_INET)
 		return (EINVAL);
 	if (broadaddr->sin_len != 0 &&
 	    (broadaddr->sin_len != sizeof(struct sockaddr_in) ||
 	    broadaddr->sin_family != AF_INET))
 		return (EINVAL);
 	if (mask->sin_len != 0 &&
 	    (mask->sin_len != sizeof(struct sockaddr_in) ||
 	    mask->sin_family != AF_INET))
 		return (EINVAL);
 	if ((ifp->if_flags & IFF_POINTOPOINT) &&
 	    (dstaddr->sin_len != sizeof(struct sockaddr_in) ||
 	     dstaddr->sin_addr.s_addr == INADDR_ANY))
 		return (EDESTADDRREQ);
 	if (vhid > 0 && carp_attach_p == NULL)
 		return (EPROTONOSUPPORT);
 
 	/*
 	 * See whether address already exist.
 	 */
 	iaIsFirst = true;
 	ia = NULL;
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in_ifaddr *it;
 
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 
 		it = (struct in_ifaddr *)ifa;
 		iaIsFirst = false;
 		if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
 		    prison_check_ip4(td->td_ucred, &addr->sin_addr) == 0)
 			ia = it;
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	if (ia != NULL)
 		(void )in_difaddr_ioctl(data, ifp, td);
 
 	ifa = ifa_alloc(sizeof(struct in_ifaddr), M_WAITOK);
 	ia = (struct in_ifaddr *)ifa;
 	ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
 	ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
 	ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
 
 	ia->ia_ifp = ifp;
 	ia->ia_addr = *addr;
 	if (mask->sin_len != 0) {
 		ia->ia_sockmask = *mask;
 		ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
 	} else {
 		in_addr_t i = ntohl(addr->sin_addr.s_addr);
 
 		/*
 	 	 * Be compatible with network classes, if netmask isn't
 		 * supplied, guess it based on classes.
 	 	 */
 		if (IN_CLASSA(i))
 			ia->ia_subnetmask = IN_CLASSA_NET;
 		else if (IN_CLASSB(i))
 			ia->ia_subnetmask = IN_CLASSB_NET;
 		else
 			ia->ia_subnetmask = IN_CLASSC_NET;
 		ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
 	}
 	ia->ia_subnet = ntohl(addr->sin_addr.s_addr) & ia->ia_subnetmask;
 	in_socktrim(&ia->ia_sockmask);
 
 	if (ifp->if_flags & IFF_BROADCAST) {
 		if (broadaddr->sin_len != 0) {
 			ia->ia_broadaddr = *broadaddr;
 		} else if (ia->ia_subnetmask == IN_RFC3021_MASK) {
 			ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
 			ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in);
 			ia->ia_broadaddr.sin_family = AF_INET;
 		} else {
 			ia->ia_broadaddr.sin_addr.s_addr =
 			    htonl(ia->ia_subnet | ~ia->ia_subnetmask);
 			ia->ia_broadaddr.sin_len = sizeof(struct sockaddr_in);
 			ia->ia_broadaddr.sin_family = AF_INET;
 		}
 	}
 
 	if (ifp->if_flags & IFF_POINTOPOINT)
 		ia->ia_dstaddr = *dstaddr;
 
 	/* XXXGL: rtinit() needs this strange assignment. */
 	if (ifp->if_flags & IFF_LOOPBACK)
                 ia->ia_dstaddr = ia->ia_addr;
 
 	if (vhid != 0) {
 		error = (*carp_attach_p)(&ia->ia_ifa, vhid);
 		if (error)
 			return (error);
 	}
 
 	/* if_addrhead is already referenced by ifa_alloc() */
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 
 	ifa_ref(ifa);			/* in_ifaddrhead */
 	IN_IFADDR_WLOCK();
 	TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
 	LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
 	IN_IFADDR_WUNLOCK();
 
 	/*
 	 * Give the interface a chance to initialize
 	 * if this is its first address,
 	 * and to validate the address if necessary.
 	 */
 	if (ifp->if_ioctl != NULL) {
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
 		if (error)
 			goto fail1;
 	}
 
 	/*
 	 * Add route for the network.
 	 */
 	if (vhid == 0) {
 		int flags = RTF_UP;
 
 		if (ifp->if_flags & (IFF_LOOPBACK|IFF_POINTOPOINT))
 			flags |= RTF_HOST;
 
 		error = in_addprefix(ia, flags);
 		if (error)
 			goto fail1;
 	}
 
 	/*
 	 * Add a loopback route to self.
 	 */
 	if (vhid == 0 && (ifp->if_flags & IFF_LOOPBACK) == 0 &&
 	    ia->ia_addr.sin_addr.s_addr != INADDR_ANY &&
 	    !((ifp->if_flags & IFF_POINTOPOINT) &&
 	     ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)) {
 		struct in_ifaddr *eia;
 
 		eia = in_localip_more(ia);
 
 		if (eia == NULL) {
 			error = ifa_add_loopback_route((struct ifaddr *)ia,
 			    (struct sockaddr *)&ia->ia_addr);
 			if (error)
 				goto fail2;
 		} else
 			ifa_free(&eia->ia_ifa);
 	}
 
 	if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST)) {
 		struct in_addr allhosts_addr;
 		struct in_ifinfo *ii;
 
 		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
 		allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
 
 		error = in_joingroup(ifp, &allhosts_addr, NULL,
 			&ii->ii_allhosts);
 	}
 
 	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 
 	return (error);
 
 fail2:
 	if (vhid == 0)
 		(void )in_scrubprefix(ia, LLE_STATIC);
 
 fail1:
 	if (ia->ia_ifa.ifa_carp)
 		(*carp_detach_p)(&ia->ia_ifa);
 
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 	ifa_free(&ia->ia_ifa);		/* if_addrhead */
 
 	IN_IFADDR_WLOCK();
 	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
 	LIST_REMOVE(ia, ia_hash);
 	IN_IFADDR_WUNLOCK();
 	ifa_free(&ia->ia_ifa);		/* in_ifaddrhead */
 
 	return (error);
 }
 
 static int
 in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td)
 {
 	const struct ifreq *ifr = (struct ifreq *)data;
 	const struct sockaddr_in *addr = (const struct sockaddr_in *)
 	    &ifr->ifr_addr;
 	struct ifaddr *ifa;
 	struct in_ifaddr *ia;
 	bool deleteAny, iaIsLast;
 	int error;
 
 	if (td != NULL) {
 		error = priv_check(td, PRIV_NET_DELIFADDR);
 		if (error)
 			return (error);
 	}
 
 	if (addr->sin_len != sizeof(struct sockaddr_in) ||
 	    addr->sin_family != AF_INET)
 		deleteAny = true;
 	else
 		deleteAny = false;
 
 	iaIsLast = true;
 	ia = NULL;
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in_ifaddr *it;
 
 		if (ifa->ifa_addr->sa_family != AF_INET)
 			continue;
 
 		it = (struct in_ifaddr *)ifa;
 		if (deleteAny && ia == NULL && (td == NULL ||
 		    prison_check_ip4(td->td_ucred, &it->ia_addr.sin_addr) == 0))
 			ia = it;
 
 		if (it->ia_addr.sin_addr.s_addr == addr->sin_addr.s_addr &&
 		    (td == NULL || prison_check_ip4(td->td_ucred,
 		    &addr->sin_addr) == 0))
 			ia = it;
 
 		if (it != ia)
 			iaIsLast = false;
 	}
 
 	if (ia == NULL) {
 		IF_ADDR_WUNLOCK(ifp);
 		return (EADDRNOTAVAIL);
 	}
 
 	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 	ifa_free(&ia->ia_ifa);		/* if_addrhead */
 
 	IN_IFADDR_WLOCK();
 	TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
 	LIST_REMOVE(ia, ia_hash);
 	IN_IFADDR_WUNLOCK();
 
 	/*
 	 * in_scrubprefix() kills the interface route.
 	 */
 	in_scrubprefix(ia, LLE_STATIC);
 
 	/*
 	 * in_ifadown gets rid of all the rest of
 	 * the routes.  This is not quite the right
 	 * thing to do, but at least if we are running
 	 * a routing process they will come back.
 	 */
 	in_ifadown(&ia->ia_ifa, 1);
 
 	if (ia->ia_ifa.ifa_carp)
 		(*carp_detach_p)(&ia->ia_ifa);
 
 	/*
 	 * If this is the last IPv4 address configured on this
 	 * interface, leave the all-hosts group.
 	 * No state-change report need be transmitted.
 	 */
 	if (iaIsLast && (ifp->if_flags & IFF_MULTICAST)) {
 		struct in_ifinfo *ii;
 
 		ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
 		IN_MULTI_LOCK();
 		if (ii->ii_allhosts) {
 			(void)in_leavegroup_locked(ii->ii_allhosts, NULL);
 			ii->ii_allhosts = NULL;
 		}
 		IN_MULTI_UNLOCK();
 	}
 
 	EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 	ifa_free(&ia->ia_ifa);		/* in_ifaddrhead */
 
 	return (0);
 }
 
 #define rtinitflags(x) \
 	((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
 	    ? RTF_HOST : 0)
 
 /*
  * Check if we have a route for the given prefix already or add one accordingly.
  */
 int
 in_addprefix(struct in_ifaddr *target, int flags)
 {
 	struct in_ifaddr *ia;
 	struct in_addr prefix, mask, p, m;
 	int error;
 
 	if ((flags & RTF_HOST) != 0) {
 		prefix = target->ia_dstaddr.sin_addr;
 		mask.s_addr = 0;
 	} else {
 		prefix = target->ia_addr.sin_addr;
 		mask = target->ia_sockmask.sin_addr;
 		prefix.s_addr &= mask.s_addr;
 	}
 
 	IN_IFADDR_RLOCK();
 	/* Look for an existing address with the same prefix, mask, and fib */
 	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		if (rtinitflags(ia)) {
 			p = ia->ia_dstaddr.sin_addr;
 
 			if (prefix.s_addr != p.s_addr)
 				continue;
 		} else {
 			p = ia->ia_addr.sin_addr;
 			m = ia->ia_sockmask.sin_addr;
 			p.s_addr &= m.s_addr;
 
 			if (prefix.s_addr != p.s_addr ||
 			    mask.s_addr != m.s_addr)
 				continue;
 		}
 		if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib)
 			continue;
 
 		/*
 		 * If we got a matching prefix route inserted by other
 		 * interface address, we are done here.
 		 */
 		if (ia->ia_flags & IFA_ROUTE) {
 #ifdef RADIX_MPATH
 			if (ia->ia_addr.sin_addr.s_addr ==
 			    target->ia_addr.sin_addr.s_addr) {
 				IN_IFADDR_RUNLOCK();
 				return (EEXIST);
 			} else
 				break;
 #endif
 			if (V_nosameprefix) {
 				IN_IFADDR_RUNLOCK();
 				return (EEXIST);
 			} else {
 				int fibnum;
 
 				fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS :
 					target->ia_ifp->if_fib;
 				rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum);
 				IN_IFADDR_RUNLOCK();
 				return (0);
 			}
 		}
 	}
 	IN_IFADDR_RUNLOCK();
 
 	/*
 	 * No-one seem to have this prefix route, so we try to insert it.
 	 */
 	error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
 	if (!error)
 		target->ia_flags |= IFA_ROUTE;
 	return (error);
 }
 
 /*
  * If there is no other address in the system that can serve a route to the
  * same prefix, remove the route.  Hand over the route to the new address
  * otherwise.
  */
 int
 in_scrubprefix(struct in_ifaddr *target, u_int flags)
 {
 	struct in_ifaddr *ia;
 	struct in_addr prefix, mask, p, m;
 	int error = 0;
 	struct sockaddr_in prefix0, mask0;
 
 	/*
 	 * Remove the loopback route to the interface address.
 	 */
 	if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
 	    !(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
 	    (flags & LLE_STATIC)) {
 		struct in_ifaddr *eia;
 
 		eia = in_localip_more(target);
 
 		if (eia != NULL) {
 			int fibnum = target->ia_ifp->if_fib;
 
 			error = ifa_switch_loopback_route((struct ifaddr *)eia,
 			    (struct sockaddr *)&target->ia_addr, fibnum);
 			ifa_free(&eia->ia_ifa);
 		} else {
 			error = ifa_del_loopback_route((struct ifaddr *)target,
 			    (struct sockaddr *)&target->ia_addr);
 		}
 
 		if (!(target->ia_ifp->if_flags & IFF_NOARP))
 			/* remove arp cache */
 			arp_ifscrub(target->ia_ifp,
 			    IA_SIN(target)->sin_addr.s_addr);
 	}
 
 	if (rtinitflags(target)) {
 		prefix = target->ia_dstaddr.sin_addr;
 		mask.s_addr = 0;
 	} else {
 		prefix = target->ia_addr.sin_addr;
 		mask = target->ia_sockmask.sin_addr;
 		prefix.s_addr &= mask.s_addr;
 	}
 
 	if ((target->ia_flags & IFA_ROUTE) == 0) {
 		int fibnum;
 		
 		fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS :
 			target->ia_ifp->if_fib;
 		rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum);
 		return (0);
 	}
 
 	IN_IFADDR_RLOCK();
 	TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 		if (rtinitflags(ia)) {
 			p = ia->ia_dstaddr.sin_addr;
 
 			if (prefix.s_addr != p.s_addr)
 				continue;
 		} else {
 			p = ia->ia_addr.sin_addr;
 			m = ia->ia_sockmask.sin_addr;
 			p.s_addr &= m.s_addr;
 
 			if (prefix.s_addr != p.s_addr ||
 			    mask.s_addr != m.s_addr)
 				continue;
 		}
 
 		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
 			continue;
 
 		/*
 		 * If we got a matching prefix address, move IFA_ROUTE and
 		 * the route itself to it.  Make sure that routing daemons
 		 * get a heads-up.
 		 */
 		if ((ia->ia_flags & IFA_ROUTE) == 0) {
 			ifa_ref(&ia->ia_ifa);
 			IN_IFADDR_RUNLOCK();
 			error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
 			    rtinitflags(target));
 			if (error == 0)
 				target->ia_flags &= ~IFA_ROUTE;
 			else
 				log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
 					error);
 			error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
 			    rtinitflags(ia) | RTF_UP);
 			if (error == 0)
 				ia->ia_flags |= IFA_ROUTE;
 			else
 				log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
 					error);
 			ifa_free(&ia->ia_ifa);
 			return (error);
 		}
 	}
 	IN_IFADDR_RUNLOCK();
 
 	/*
 	 * remove all L2 entries on the given prefix
 	 */
 	bzero(&prefix0, sizeof(prefix0));
 	prefix0.sin_len = sizeof(prefix0);
 	prefix0.sin_family = AF_INET;
 	prefix0.sin_addr.s_addr = target->ia_subnet;
 	bzero(&mask0, sizeof(mask0));
 	mask0.sin_len = sizeof(mask0);
 	mask0.sin_family = AF_INET;
 	mask0.sin_addr.s_addr = target->ia_subnetmask;
 	lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
 	    (struct sockaddr *)&mask0, flags);
 
 	/*
 	 * As no-one seem to have this prefix, we can remove the route.
 	 */
 	error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
 	if (error == 0)
 		target->ia_flags &= ~IFA_ROUTE;
 	else
 		log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
 	return (error);
 }
 
 #undef rtinitflags
 
 /*
  * Return 1 if the address might be a local broadcast address.
  */
 int
 in_broadcast(struct in_addr in, struct ifnet *ifp)
 {
 	register struct ifaddr *ifa;
 	u_long t;
 
 	if (in.s_addr == INADDR_BROADCAST ||
 	    in.s_addr == INADDR_ANY)
 		return (1);
 	if ((ifp->if_flags & IFF_BROADCAST) == 0)
 		return (0);
 	t = ntohl(in.s_addr);
 	/*
 	 * Look through the list of addresses for a match
 	 * with a broadcast address.
 	 */
 #define ia ((struct in_ifaddr *)ifa)
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
 		if (ifa->ifa_addr->sa_family == AF_INET &&
 		    (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
 		     /*
 		      * Check for old-style (host 0) broadcast, but
 		      * taking into account that RFC 3021 obsoletes it.
 		      */
 		    (ia->ia_subnetmask != IN_RFC3021_MASK &&
 		    t == ia->ia_subnet)) &&
 		     /*
 		      * Check for an all one subnetmask. These
 		      * only exist when an interface gets a secondary
 		      * address.
 		      */
 		    ia->ia_subnetmask != (u_long)0xffffffff)
 			    return (1);
 	return (0);
 #undef ia
 }
 
 /*
  * On interface removal, clean up IPv4 data structures hung off of the ifnet.
  */
 void
 in_ifdetach(struct ifnet *ifp)
 {
 
 	in_pcbpurgeif0(&V_ripcbinfo, ifp);
 	in_pcbpurgeif0(&V_udbinfo, ifp);
 	in_pcbpurgeif0(&V_ulitecbinfo, ifp);
 	in_purgemaddrs(ifp);
 }
 
 /*
  * Delete all IPv4 multicast address records, and associated link-layer
  * multicast address records, associated with ifp.
  * XXX It looks like domifdetach runs AFTER the link layer cleanup.
  * XXX This should not race with ifma_protospec being set during
  * a new allocation, if it does, we have bigger problems.
  */
 static void
 in_purgemaddrs(struct ifnet *ifp)
 {
 	LIST_HEAD(,in_multi) purgeinms;
 	struct in_multi		*inm, *tinm;
 	struct ifmultiaddr	*ifma;
 
 	LIST_INIT(&purgeinms);
 	IN_MULTI_LOCK();
 
 	/*
 	 * Extract list of in_multi associated with the detaching ifp
 	 * which the PF_INET layer is about to release.
 	 * We need to do this as IF_ADDR_LOCK() may be re-acquired
 	 * by code further down.
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_INET ||
 		    ifma->ifma_protospec == NULL)
 			continue;
 #if 0
 		KASSERT(ifma->ifma_protospec != NULL,
 		    ("%s: ifma_protospec is NULL", __func__));
 #endif
 		inm = (struct in_multi *)ifma->ifma_protospec;
 		LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
 		LIST_REMOVE(inm, inm_link);
 		inm_release_locked(inm);
 	}
 	igmp_ifdetach(ifp);
 
 	IN_MULTI_UNLOCK();
 }
 
 struct in_llentry {
 	struct llentry		base;
 	struct sockaddr_in	l3_addr4;
 };
 
 /*
  * Deletes an address from the address table.
  * This function is called by the timer functions
  * such as arptimer() and nd6_llinfo_timer(), and
  * the caller does the locking.
  */
 static void
 in_lltable_free(struct lltable *llt, struct llentry *lle)
 {
 	LLE_WUNLOCK(lle);
 	LLE_LOCK_DESTROY(lle);
 	free(lle, M_LLTABLE);
 }
 
 static struct llentry *
 in_lltable_new(const struct sockaddr *l3addr, u_int flags)
 {
 	struct in_llentry *lle;
 
 	lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
 	/*
 	 * For IPv4 this will trigger "arpresolve" to generate
 	 * an ARP request.
 	 */
 	lle->base.la_expire = time_uptime; /* mark expired */
 	lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in_lltable_free;
 	LLE_LOCK_INIT(&lle->base);
 	callout_init(&lle->base.la_timer, 1);
 
 	return (&lle->base);
 }
 
 #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m)	(			\
 	    (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
 
 static void
 in_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix,
     const struct sockaddr *mask, u_int flags)
 {
 	const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
 	const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
 	struct llentry *lle, *next;
 	int i;
 	size_t pkts_dropped;
 
 	IF_AFDATA_WLOCK(llt->llt_ifp);
 	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
 		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
 			/*
 			 * (flags & LLE_STATIC) means deleting all entries
 			 * including static ARP entries.
 			 */
 			if (IN_ARE_MASKED_ADDR_EQUAL(satosin(L3_ADDR(lle)),
 			    pfx, msk) && ((flags & LLE_STATIC) ||
 			    !(lle->la_flags & LLE_STATIC))) {
 				LLE_WLOCK(lle);
 				if (callout_stop(&lle->la_timer))
 					LLE_REMREF(lle);
 				pkts_dropped = llentry_free(lle);
 				ARPSTAT_ADD(dropped, pkts_dropped);
 			}
 		}
 	}
 	IF_AFDATA_WUNLOCK(llt->llt_ifp);
 }
 
 
 static int
 in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
 {
 	struct rtentry *rt;
 
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 
 	/* XXX rtalloc1_fib should take a const param */
 	rt = rtalloc1_fib(__DECONST(struct sockaddr *, l3addr), 0, 0,
 	    ifp->if_fib);
 
 	if (rt == NULL)
 		return (EINVAL);
 
 	/*
 	 * If the gateway for an existing host route matches the target L3
 	 * address, which is a special route inserted by some implementation
 	 * such as MANET, and the interface is of the correct type, then
 	 * allow for ARP to proceed.
 	 */
 	if (rt->rt_flags & RTF_GATEWAY) {
 		if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
 		    rt->rt_ifp->if_type != IFT_ETHER ||
 		    (rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 ||
 		    memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
 		    sizeof(in_addr_t)) != 0) {
 			RTFREE_LOCKED(rt);
 			return (EINVAL);
 		}
 	}
 
 	/*
 	 * Make sure that at least the destination address is covered
 	 * by the route. This is for handling the case where 2 or more
 	 * interfaces have the same prefix. An incoming packet arrives
 	 * on one interface and the corresponding outgoing packet leaves
 	 * another interface.
 	 */
 	if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) {
 		const char *sa, *mask, *addr, *lim;
 		int len;
 
 		mask = (const char *)rt_mask(rt);
 		/*
 		 * Just being extra cautious to avoid some custom
 		 * code getting into trouble.
 		 */
 		if (mask == NULL) {
 			RTFREE_LOCKED(rt);
 			return (EINVAL);
 		}
 
 		sa = (const char *)rt_key(rt);
 		addr = (const char *)l3addr;
 		len = ((const struct sockaddr_in *)l3addr)->sin_len;
 		lim = addr + len;
 
 		for ( ; addr < lim; sa++, mask++, addr++) {
 			if ((*sa ^ *addr) & *mask) {
 #ifdef DIAGNOSTIC
 				log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
 				    inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
 #endif
 				RTFREE_LOCKED(rt);
 				return (EINVAL);
 			}
 		}
 	}
 
 	RTFREE_LOCKED(rt);
 	return (0);
 }
 
 /*
  * Return NULL if not found or marked for deletion.
  * If found return lle read locked.
  */
 static struct llentry *
 in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
 	struct llentries *lleh;
 	u_int hashkey;
 
 	IF_AFDATA_LOCK_ASSERT(ifp);
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 
 	hashkey = sin->sin_addr.s_addr;
 	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
 	LIST_FOREACH(lle, lleh, lle_next) {
 		struct sockaddr_in *sa2 = satosin(L3_ADDR(lle));
 		if (lle->la_flags & LLE_DELETED)
 			continue;
 		if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
 			break;
 	}
 	if (lle == NULL) {
 #ifdef DIAGNOSTIC
 		if (flags & LLE_DELETE)
 			log(LOG_INFO, "interface address is missing from cache = %p  in delete\n", lle);
 #endif
 		if (!(flags & LLE_CREATE))
 			return (NULL);
 		IF_AFDATA_WLOCK_ASSERT(ifp);
 		/*
 		 * A route that covers the given address must have
 		 * been installed 1st because we are doing a resolution,
 		 * verify this.
 		 */
 		if (!(flags & LLE_IFADDR) &&
 		    in_lltable_rtcheck(ifp, flags, l3addr) != 0)
 			goto done;
 
 		lle = in_lltable_new(l3addr, flags);
 		if (lle == NULL) {
 			log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
 			goto done;
 		}
 		lle->la_flags = flags & ~LLE_CREATE;
 		if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
 			bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
 			lle->la_flags |= (LLE_VALID | LLE_STATIC);
 		}
 
 		lle->lle_tbl  = llt;
 		lle->lle_head = lleh;
 		lle->la_flags |= LLE_LINKED;
 		LIST_INSERT_HEAD(lleh, lle, lle_next);
 	} else if (flags & LLE_DELETE) {
 		if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
 			LLE_WLOCK(lle);
 			lle->la_flags |= LLE_DELETED;
 			EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
 #ifdef DIAGNOSTIC
 			log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif
 			if ((lle->la_flags &
 			    (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC)
 				llentry_free(lle);
 			else
 				LLE_WUNLOCK(lle);
 		}
 		lle = (void *)-1;
 
 	}
 	if (LLE_IS_VALID(lle)) {
 		if (flags & LLE_EXCLUSIVE)
 			LLE_WLOCK(lle);
 		else
 			LLE_RLOCK(lle);
 	}
 done:
 	return (lle);
 }
 
 static int
 in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
 {
 #define	SIN(lle)	((struct sockaddr_in *) L3_ADDR(lle))
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
 	/* XXX stack use */
 	struct {
 		struct rt_msghdr	rtm;
 		struct sockaddr_in	sin;
 		struct sockaddr_dl	sdl;
 	} arpc;
 	int error, i;
 
 	LLTABLE_LOCK_ASSERT();
 
 	error = 0;
 	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
 		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
 			struct sockaddr_dl *sdl;
 
 			/* skip deleted entries */
 			if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
 				continue;
 			/* Skip if jailed and not a valid IP of the prison. */
 			if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
 				continue;
 			/*
 			 * produce a msg made of:
 			 *  struct rt_msghdr;
 			 *  struct sockaddr_in; (IPv4)
 			 *  struct sockaddr_dl;
 			 */
 			bzero(&arpc, sizeof(arpc));
 			arpc.rtm.rtm_msglen = sizeof(arpc);
 			arpc.rtm.rtm_version = RTM_VERSION;
 			arpc.rtm.rtm_type = RTM_GET;
 			arpc.rtm.rtm_flags = RTF_UP;
 			arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
 			arpc.sin.sin_family = AF_INET;
 			arpc.sin.sin_len = sizeof(arpc.sin);
 			arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
 
 			/* publish */
 			if (lle->la_flags & LLE_PUB)
 				arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
 
 			sdl = &arpc.sdl;
 			sdl->sdl_family = AF_LINK;
 			sdl->sdl_len = sizeof(*sdl);
 			sdl->sdl_index = ifp->if_index;
 			sdl->sdl_type = ifp->if_type;
 			if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
 				sdl->sdl_alen = ifp->if_addrlen;
 				bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
 			} else {
 				sdl->sdl_alen = 0;
 				bzero(LLADDR(sdl), ifp->if_addrlen);
 			}
 
 			arpc.rtm.rtm_rmx.rmx_expire =
 			    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
 			arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
 			if (lle->la_flags & LLE_STATIC)
 				arpc.rtm.rtm_flags |= RTF_STATIC;
 			arpc.rtm.rtm_index = ifp->if_index;
 			error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
 			if (error)
 				break;
 		}
 	}
 	return error;
 #undef SIN
 }
 
 void *
 in_domifattach(struct ifnet *ifp)
 {
 	struct in_ifinfo *ii;
 	struct lltable *llt;
 
 	ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
 
 	llt = lltable_init(ifp, AF_INET);
 	if (llt != NULL) {
 		llt->llt_prefix_free = in_lltable_prefix_free;
 		llt->llt_lookup = in_lltable_lookup;
 		llt->llt_dump = in_lltable_dump;
 	}
 	ii->ii_llt = llt;
 
 	ii->ii_igmp = igmp_domifattach(ifp);
 
 	return ii;
 }
 
 void
 in_domifdetach(struct ifnet *ifp, void *aux)
 {
 	struct in_ifinfo *ii = (struct in_ifinfo *)aux;
 
 	igmp_domifdetach(ifp);
 	lltable_free(ii->ii_llt);
 	free(ii, M_IFADDR);
 }
Index: user/ngie/more-tests/sys/netinet/in.h
===================================================================
--- user/ngie/more-tests/sys/netinet/in.h	(revision 281675)
+++ user/ngie/more-tests/sys/netinet/in.h	(revision 281676)
@@ -1,668 +1,669 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in.h	8.3 (Berkeley) 1/3/94
  * $FreeBSD$
  */
 
 #ifndef _NETINET_IN_H_
 #define	_NETINET_IN_H_
 
 #include <sys/cdefs.h>
 #include <sys/_types.h>
 #include <machine/endian.h>
 
 /* Protocols common to RFC 1700, POSIX, and X/Open. */
 #define	IPPROTO_IP		0		/* dummy for IP */
 #define	IPPROTO_ICMP		1		/* control message protocol */
 #define	IPPROTO_TCP		6		/* tcp */
 #define	IPPROTO_UDP		17		/* user datagram protocol */
 
 #define	INADDR_ANY		((in_addr_t)0x00000000)
 #define	INADDR_BROADCAST	((in_addr_t)0xffffffff)	/* must be masked */
 
 #ifndef _UINT8_T_DECLARED
 typedef	__uint8_t		uint8_t;
 #define	_UINT8_T_DECLARED
 #endif
 
 #ifndef _UINT16_T_DECLARED
 typedef	__uint16_t		uint16_t;
 #define	_UINT16_T_DECLARED
 #endif
 
 #ifndef _UINT32_T_DECLARED
 typedef	__uint32_t		uint32_t;
 #define	_UINT32_T_DECLARED
 #endif
 
 #ifndef _IN_ADDR_T_DECLARED
 typedef	uint32_t		in_addr_t;
 #define	_IN_ADDR_T_DECLARED
 #endif
 
 #ifndef _IN_PORT_T_DECLARED
 typedef	uint16_t		in_port_t;
 #define	_IN_PORT_T_DECLARED
 #endif
 
 #ifndef _SA_FAMILY_T_DECLARED
 typedef	__sa_family_t		sa_family_t;
 #define	_SA_FAMILY_T_DECLARED
 #endif
 
 /* Internet address (a structure for historical reasons). */
 #ifndef	_STRUCT_IN_ADDR_DECLARED
 struct in_addr {
 	in_addr_t s_addr;
 };
 #define	_STRUCT_IN_ADDR_DECLARED
 #endif
 
 #ifndef	_SOCKLEN_T_DECLARED
 typedef	__socklen_t	socklen_t;
 #define	_SOCKLEN_T_DECLARED
 #endif
 
 #include <sys/_sockaddr_storage.h>
 
 /* Socket address, internet style. */
 struct sockaddr_in {
 	uint8_t	sin_len;
 	sa_family_t	sin_family;
 	in_port_t	sin_port;
 	struct	in_addr sin_addr;
 	char	sin_zero[8];
 };
 
 #if !defined(_KERNEL) && __POSIX_VISIBLE >= 200112
 
 #ifndef _BYTEORDER_PROTOTYPED
 #define	_BYTEORDER_PROTOTYPED
 __BEGIN_DECLS
 uint32_t	htonl(uint32_t);
 uint16_t	htons(uint16_t);
 uint32_t	ntohl(uint32_t);
 uint16_t	ntohs(uint16_t);
 __END_DECLS
 #endif
 
 #ifndef _BYTEORDER_FUNC_DEFINED
 #define	_BYTEORDER_FUNC_DEFINED
 #define	htonl(x)	__htonl(x)
 #define	htons(x)	__htons(x)
 #define	ntohl(x)	__ntohl(x)
 #define	ntohs(x)	__ntohs(x)
 #endif
 
 #endif /* !_KERNEL && __POSIX_VISIBLE >= 200112 */
 
 #if __POSIX_VISIBLE >= 200112
 #define	IPPROTO_IPV6		41		/* IP6 header */
 #define	IPPROTO_RAW		255		/* raw IP packet */
 #define	INET_ADDRSTRLEN		16
 #endif
 
 #if __BSD_VISIBLE
 /*
  * Constants and structures defined by the internet system,
  * Per RFC 790, September 1981, and numerous additions.
  */
 
 /*
  * Protocols (RFC 1700)
  */
 #define	IPPROTO_HOPOPTS		0		/* IP6 hop-by-hop options */
 #define	IPPROTO_IGMP		2		/* group mgmt protocol */
 #define	IPPROTO_GGP		3		/* gateway^2 (deprecated) */
 #define	IPPROTO_IPV4		4		/* IPv4 encapsulation */
 #define	IPPROTO_IPIP		IPPROTO_IPV4	/* for compatibility */
 #define	IPPROTO_ST		7		/* Stream protocol II */
 #define	IPPROTO_EGP		8		/* exterior gateway protocol */
 #define	IPPROTO_PIGP		9		/* private interior gateway */
 #define	IPPROTO_RCCMON		10		/* BBN RCC Monitoring */
 #define	IPPROTO_NVPII		11		/* network voice protocol*/
 #define	IPPROTO_PUP		12		/* pup */
 #define	IPPROTO_ARGUS		13		/* Argus */
 #define	IPPROTO_EMCON		14		/* EMCON */
 #define	IPPROTO_XNET		15		/* Cross Net Debugger */
 #define	IPPROTO_CHAOS		16		/* Chaos*/
 #define	IPPROTO_MUX		18		/* Multiplexing */
 #define	IPPROTO_MEAS		19		/* DCN Measurement Subsystems */
 #define	IPPROTO_HMP		20		/* Host Monitoring */
 #define	IPPROTO_PRM		21		/* Packet Radio Measurement */
 #define	IPPROTO_IDP		22		/* xns idp */
 #define	IPPROTO_TRUNK1		23		/* Trunk-1 */
 #define	IPPROTO_TRUNK2		24		/* Trunk-2 */
 #define	IPPROTO_LEAF1		25		/* Leaf-1 */
 #define	IPPROTO_LEAF2		26		/* Leaf-2 */
 #define	IPPROTO_RDP		27		/* Reliable Data */
 #define	IPPROTO_IRTP		28		/* Reliable Transaction */
 #define	IPPROTO_TP		29		/* tp-4 w/ class negotiation */
 #define	IPPROTO_BLT		30		/* Bulk Data Transfer */
 #define	IPPROTO_NSP		31		/* Network Services */
 #define	IPPROTO_INP		32		/* Merit Internodal */
 #define	IPPROTO_SEP		33		/* Sequential Exchange */
 #define	IPPROTO_3PC		34		/* Third Party Connect */
 #define	IPPROTO_IDPR		35		/* InterDomain Policy Routing */
 #define	IPPROTO_XTP		36		/* XTP */
 #define	IPPROTO_DDP		37		/* Datagram Delivery */
 #define	IPPROTO_CMTP		38		/* Control Message Transport */
 #define	IPPROTO_TPXX		39		/* TP++ Transport */
 #define	IPPROTO_IL		40		/* IL transport protocol */
 #define	IPPROTO_SDRP		42		/* Source Demand Routing */
 #define	IPPROTO_ROUTING		43		/* IP6 routing header */
 #define	IPPROTO_FRAGMENT	44		/* IP6 fragmentation header */
 #define	IPPROTO_IDRP		45		/* InterDomain Routing*/
 #define	IPPROTO_RSVP		46		/* resource reservation */
 #define	IPPROTO_GRE		47		/* General Routing Encap. */
 #define	IPPROTO_MHRP		48		/* Mobile Host Routing */
 #define	IPPROTO_BHA		49		/* BHA */
 #define	IPPROTO_ESP		50		/* IP6 Encap Sec. Payload */
 #define	IPPROTO_AH		51		/* IP6 Auth Header */
 #define	IPPROTO_INLSP		52		/* Integ. Net Layer Security */
 #define	IPPROTO_SWIPE		53		/* IP with encryption */
 #define	IPPROTO_NHRP		54		/* Next Hop Resolution */
 #define	IPPROTO_MOBILE		55		/* IP Mobility */
 #define	IPPROTO_TLSP		56		/* Transport Layer Security */
 #define	IPPROTO_SKIP		57		/* SKIP */
 #define	IPPROTO_ICMPV6		58		/* ICMP6 */
 #define	IPPROTO_NONE		59		/* IP6 no next header */
 #define	IPPROTO_DSTOPTS		60		/* IP6 destination option */
 #define	IPPROTO_AHIP		61		/* any host internal protocol */
 #define	IPPROTO_CFTP		62		/* CFTP */
 #define	IPPROTO_HELLO		63		/* "hello" routing protocol */
 #define	IPPROTO_SATEXPAK	64		/* SATNET/Backroom EXPAK */
 #define	IPPROTO_KRYPTOLAN	65		/* Kryptolan */
 #define	IPPROTO_RVD		66		/* Remote Virtual Disk */
 #define	IPPROTO_IPPC		67		/* Pluribus Packet Core */
 #define	IPPROTO_ADFS		68		/* Any distributed FS */
 #define	IPPROTO_SATMON		69		/* Satnet Monitoring */
 #define	IPPROTO_VISA		70		/* VISA Protocol */
 #define	IPPROTO_IPCV		71		/* Packet Core Utility */
 #define	IPPROTO_CPNX		72		/* Comp. Prot. Net. Executive */
 #define	IPPROTO_CPHB		73		/* Comp. Prot. HeartBeat */
 #define	IPPROTO_WSN		74		/* Wang Span Network */
 #define	IPPROTO_PVP		75		/* Packet Video Protocol */
 #define	IPPROTO_BRSATMON	76		/* BackRoom SATNET Monitoring */
 #define	IPPROTO_ND		77		/* Sun net disk proto (temp.) */
 #define	IPPROTO_WBMON		78		/* WIDEBAND Monitoring */
 #define	IPPROTO_WBEXPAK		79		/* WIDEBAND EXPAK */
 #define	IPPROTO_EON		80		/* ISO cnlp */
 #define	IPPROTO_VMTP		81		/* VMTP */
 #define	IPPROTO_SVMTP		82		/* Secure VMTP */
 #define	IPPROTO_VINES		83		/* Banyon VINES */
 #define	IPPROTO_TTP		84		/* TTP */
 #define	IPPROTO_IGP		85		/* NSFNET-IGP */
 #define	IPPROTO_DGP		86		/* dissimilar gateway prot. */
 #define	IPPROTO_TCF		87		/* TCF */
 #define	IPPROTO_IGRP		88		/* Cisco/GXS IGRP */
 #define	IPPROTO_OSPFIGP		89		/* OSPFIGP */
 #define	IPPROTO_SRPC		90		/* Strite RPC protocol */
 #define	IPPROTO_LARP		91		/* Locus Address Resoloution */
 #define	IPPROTO_MTP		92		/* Multicast Transport */
 #define	IPPROTO_AX25		93		/* AX.25 Frames */
 #define	IPPROTO_IPEIP		94		/* IP encapsulated in IP */
 #define	IPPROTO_MICP		95		/* Mobile Int.ing control */
 #define	IPPROTO_SCCSP		96		/* Semaphore Comm. security */
 #define	IPPROTO_ETHERIP		97		/* Ethernet IP encapsulation */
 #define	IPPROTO_ENCAP		98		/* encapsulation header */
 #define	IPPROTO_APES		99		/* any private encr. scheme */
 #define	IPPROTO_GMTP		100		/* GMTP*/
 #define	IPPROTO_IPCOMP		108		/* payload compression (IPComp) */
 #define	IPPROTO_SCTP		132		/* SCTP */
 #define	IPPROTO_MH		135		/* IPv6 Mobility Header */
 #define	IPPROTO_UDPLITE		136		/* UDP-Lite */
 #define	IPPROTO_HIP		139		/* IP6 Host Identity Protocol */
 #define	IPPROTO_SHIM6		140		/* IP6 Shim6 Protocol */
 /* 101-254: Partly Unassigned */
 #define	IPPROTO_PIM		103		/* Protocol Independent Mcast */
 #define	IPPROTO_CARP		112		/* CARP */
 #define	IPPROTO_PGM		113		/* PGM */
 #define	IPPROTO_MPLS		137		/* MPLS-in-IP */
 #define	IPPROTO_PFSYNC		240		/* PFSYNC */
 #define	IPPROTO_RESERVED_253	253		/* Reserved */
 #define	IPPROTO_RESERVED_254	254		/* Reserved */
 /* 255: Reserved */
 /* BSD Private, local use, namespace incursion, no longer used */
 #define	IPPROTO_OLD_DIVERT	254		/* OLD divert pseudo-proto */
 #define	IPPROTO_MAX		256
 
 /* last return value of *_input(), meaning "all job for this pkt is done".  */
 #define	IPPROTO_DONE		257
 
 /* Only used internally, so can be outside the range of valid IP protocols. */
 #define	IPPROTO_DIVERT		258		/* divert pseudo-protocol */
 #define	IPPROTO_SEND		259		/* SeND pseudo-protocol */
 
 /*
  * Defined to avoid confusion.  The master value is defined by
  * PROTO_SPACER in sys/protosw.h.
  */
 #define	IPPROTO_SPACER		32767		/* spacer for loadable protos */
 
 /*
  * Local port number conventions:
  *
  * When a user does a bind(2) or connect(2) with a port number of zero,
  * a non-conflicting local port address is chosen.
  * The default range is IPPORT_HIFIRSTAUTO through
  * IPPORT_HILASTAUTO, although that is settable by sysctl.
  *
  * A user may set the IPPROTO_IP option IP_PORTRANGE to change this
  * default assignment range.
  *
  * The value IP_PORTRANGE_DEFAULT causes the default behavior.
  *
  * The value IP_PORTRANGE_HIGH changes the range of candidate port numbers
  * into the "high" range.  These are reserved for client outbound connections
  * which do not want to be filtered by any firewalls.
  *
  * The value IP_PORTRANGE_LOW changes the range to the "low" are
  * that is (by convention) restricted to privileged processes.  This
  * convention is based on "vouchsafe" principles only.  It is only secure
  * if you trust the remote host to restrict these ports.
  *
  * The default range of ports and the high range can be changed by
  * sysctl(3).  (net.inet.ip.port{hi,low}{first,last}_auto)
  *
  * Changing those values has bad security implications if you are
  * using a stateless firewall that is allowing packets outside of that
  * range in order to allow transparent outgoing connections.
  *
  * Such a firewall configuration will generally depend on the use of these
  * default values.  If you change them, you may find your Security
  * Administrator looking for you with a heavy object.
  *
  * For a slightly more orthodox text view on this:
  *
  *            ftp://ftp.isi.edu/in-notes/iana/assignments/port-numbers
  *
  *    port numbers are divided into three ranges:
  *
  *                0 -  1023 Well Known Ports
  *             1024 - 49151 Registered Ports
  *            49152 - 65535 Dynamic and/or Private Ports
  *
  */
 
 /*
  * Ports < IPPORT_RESERVED are reserved for
  * privileged processes (e.g. root).         (IP_PORTRANGE_LOW)
  */
 #define	IPPORT_RESERVED		1024
 
 /*
  * Default local port range, used by IP_PORTRANGE_DEFAULT
  */
 #define IPPORT_EPHEMERALFIRST	10000
 #define IPPORT_EPHEMERALLAST	65535 
  
 /*
  * Dynamic port range, used by IP_PORTRANGE_HIGH.
  */
 #define	IPPORT_HIFIRSTAUTO	49152
 #define	IPPORT_HILASTAUTO	65535
 
 /*
  * Scanning for a free reserved port return a value below IPPORT_RESERVED,
  * but higher than IPPORT_RESERVEDSTART.  Traditionally the start value was
  * 512, but that conflicts with some well-known-services that firewalls may
  * have a fit if we use.
  */
 #define	IPPORT_RESERVEDSTART	600
 
 #define	IPPORT_MAX		65535
 
 /*
  * Definitions of bits in internet address integers.
  * On subnets, the decomposition of addresses to host and net parts
  * is done according to subnet mask, not the masks here.
  */
 #define	IN_CLASSA(i)		(((in_addr_t)(i) & 0x80000000) == 0)
 #define	IN_CLASSA_NET		0xff000000
 #define	IN_CLASSA_NSHIFT	24
 #define	IN_CLASSA_HOST		0x00ffffff
 #define	IN_CLASSA_MAX		128
 
 #define	IN_CLASSB(i)		(((in_addr_t)(i) & 0xc0000000) == 0x80000000)
 #define	IN_CLASSB_NET		0xffff0000
 #define	IN_CLASSB_NSHIFT	16
 #define	IN_CLASSB_HOST		0x0000ffff
 #define	IN_CLASSB_MAX		65536
 
 #define	IN_CLASSC(i)		(((in_addr_t)(i) & 0xe0000000) == 0xc0000000)
 #define	IN_CLASSC_NET		0xffffff00
 #define	IN_CLASSC_NSHIFT	8
 #define	IN_CLASSC_HOST		0x000000ff
 
 #define	IN_CLASSD(i)		(((in_addr_t)(i) & 0xf0000000) == 0xe0000000)
 #define	IN_CLASSD_NET		0xf0000000	/* These ones aren't really */
 #define	IN_CLASSD_NSHIFT	28		/* net and host fields, but */
 #define	IN_CLASSD_HOST		0x0fffffff	/* routing needn't know.    */
 #define	IN_MULTICAST(i)		IN_CLASSD(i)
 
 #define	IN_EXPERIMENTAL(i)	(((in_addr_t)(i) & 0xf0000000) == 0xf0000000)
 #define	IN_BADCLASS(i)		(((in_addr_t)(i) & 0xf0000000) == 0xf0000000)
 
 #define IN_LINKLOCAL(i)		(((in_addr_t)(i) & 0xffff0000) == 0xa9fe0000)
 #define IN_LOOPBACK(i)		(((in_addr_t)(i) & 0xff000000) == 0x7f000000)
 #define IN_ZERONET(i)		(((in_addr_t)(i) & 0xff000000) == 0)
 
 #define	IN_PRIVATE(i)	((((in_addr_t)(i) & 0xff000000) == 0x0a000000) || \
 			 (((in_addr_t)(i) & 0xfff00000) == 0xac100000) || \
 			 (((in_addr_t)(i) & 0xffff0000) == 0xc0a80000))
 
 #define	IN_LOCAL_GROUP(i)	(((in_addr_t)(i) & 0xffffff00) == 0xe0000000)
  
 #define	IN_ANY_LOCAL(i)		(IN_LINKLOCAL(i) || IN_LOCAL_GROUP(i))
 
 #define	INADDR_LOOPBACK		((in_addr_t)0x7f000001)
 #ifndef _KERNEL
 #define	INADDR_NONE		((in_addr_t)0xffffffff)	/* -1 return */
 #endif
 
 #define	INADDR_UNSPEC_GROUP	((in_addr_t)0xe0000000)	/* 224.0.0.0 */
 #define	INADDR_ALLHOSTS_GROUP	((in_addr_t)0xe0000001)	/* 224.0.0.1 */
 #define	INADDR_ALLRTRS_GROUP	((in_addr_t)0xe0000002)	/* 224.0.0.2 */
 #define	INADDR_ALLRPTS_GROUP	((in_addr_t)0xe0000016)	/* 224.0.0.22, IGMPv3 */
 #define	INADDR_CARP_GROUP	((in_addr_t)0xe0000012)	/* 224.0.0.18 */
 #define	INADDR_PFSYNC_GROUP	((in_addr_t)0xe00000f0)	/* 224.0.0.240 */
 #define	INADDR_ALLMDNS_GROUP	((in_addr_t)0xe00000fb)	/* 224.0.0.251 */
 #define	INADDR_MAX_LOCAL_GROUP	((in_addr_t)0xe00000ff)	/* 224.0.0.255 */
 
 #define	IN_LOOPBACKNET		127			/* official! */
 
 #define	IN_RFC3021_MASK		((in_addr_t)0xfffffffe)
 
 /*
  * Options for use with [gs]etsockopt at the IP level.
  * First word of comment is data type; bool is stored in int.
  */
 #define	IP_OPTIONS		1    /* buf/ip_opts; set/get IP options */
 #define	IP_HDRINCL		2    /* int; header is included with data */
 #define	IP_TOS			3    /* int; IP type of service and preced. */
 #define	IP_TTL			4    /* int; IP time to live */
 #define	IP_RECVOPTS		5    /* bool; receive all IP opts w/dgram */
 #define	IP_RECVRETOPTS		6    /* bool; receive IP opts for response */
 #define	IP_RECVDSTADDR		7    /* bool; receive IP dst addr w/dgram */
 #define	IP_SENDSRCADDR		IP_RECVDSTADDR /* cmsg_type to set src addr */
 #define	IP_RETOPTS		8    /* ip_opts; set/get IP options */
 #define	IP_MULTICAST_IF		9    /* struct in_addr *or* struct ip_mreqn;
 				      * set/get IP multicast i/f  */
 #define	IP_MULTICAST_TTL	10   /* u_char; set/get IP multicast ttl */
 #define	IP_MULTICAST_LOOP	11   /* u_char; set/get IP multicast loopback */
 #define	IP_ADD_MEMBERSHIP	12   /* ip_mreq; add an IP group membership */
 #define	IP_DROP_MEMBERSHIP	13   /* ip_mreq; drop an IP group membership */
 #define	IP_MULTICAST_VIF	14   /* set/get IP mcast virt. iface */
 #define	IP_RSVP_ON		15   /* enable RSVP in kernel */
 #define	IP_RSVP_OFF		16   /* disable RSVP in kernel */
 #define	IP_RSVP_VIF_ON		17   /* set RSVP per-vif socket */
 #define	IP_RSVP_VIF_OFF		18   /* unset RSVP per-vif socket */
 #define	IP_PORTRANGE		19   /* int; range to choose for unspec port */
 #define	IP_RECVIF		20   /* bool; receive reception if w/dgram */
 /* for IPSEC */
 #define	IP_IPSEC_POLICY		21   /* int; set/get security policy */
 				     /* unused; was IP_FAITH */
 #define	IP_ONESBCAST		23   /* bool: send all-ones broadcast */
 #define	IP_BINDANY		24   /* bool: allow bind to any address */
 #define	IP_BINDMULTI		25   /* bool: allow multiple listeners on a tuple */
 #define	IP_RSS_LISTEN_BUCKET	26   /* int; set RSS listen bucket */
 
 /*
  * Options for controlling the firewall and dummynet.
  * Historical options (from 40 to 64) will eventually be
  * replaced by only two options, IP_FW3 and IP_DUMMYNET3.
  */
 #define	IP_FW_TABLE_ADD		40   /* add entry */
 #define	IP_FW_TABLE_DEL		41   /* delete entry */
 #define	IP_FW_TABLE_FLUSH	42   /* flush table */
 #define	IP_FW_TABLE_GETSIZE	43   /* get table size */
 #define	IP_FW_TABLE_LIST	44   /* list table contents */
 
 #define	IP_FW3			48   /* generic ipfw v.3 sockopts */
 #define	IP_DUMMYNET3		49   /* generic dummynet v.3 sockopts */
 
 #define	IP_FW_ADD		50   /* add a firewall rule to chain */
 #define	IP_FW_DEL		51   /* delete a firewall rule from chain */
 #define	IP_FW_FLUSH		52   /* flush firewall rule chain */
 #define	IP_FW_ZERO		53   /* clear single/all firewall counter(s) */
 #define	IP_FW_GET		54   /* get entire firewall rule chain */
 #define	IP_FW_RESETLOG		55   /* reset logging counters */
 
 #define IP_FW_NAT_CFG           56   /* add/config a nat rule */
 #define IP_FW_NAT_DEL           57   /* delete a nat rule */
 #define IP_FW_NAT_GET_CONFIG    58   /* get configuration of a nat rule */
 #define IP_FW_NAT_GET_LOG       59   /* get log of a nat rule */
 
 #define	IP_DUMMYNET_CONFIGURE	60   /* add/configure a dummynet pipe */
 #define	IP_DUMMYNET_DEL		61   /* delete a dummynet pipe from chain */
 #define	IP_DUMMYNET_FLUSH	62   /* flush dummynet */
 #define	IP_DUMMYNET_GET		64   /* get entire dummynet pipes */
 
 #define	IP_RECVTTL		65   /* bool; receive IP TTL w/dgram */
 #define	IP_MINTTL		66   /* minimum TTL for packet or drop */
 #define	IP_DONTFRAG		67   /* don't fragment packet */
 #define	IP_RECVTOS		68   /* bool; receive IP TOS w/dgram */
 
 /* IPv4 Source Filter Multicast API [RFC3678] */
 #define	IP_ADD_SOURCE_MEMBERSHIP	70   /* join a source-specific group */
 #define	IP_DROP_SOURCE_MEMBERSHIP	71   /* drop a single source */
 #define	IP_BLOCK_SOURCE			72   /* block a source */
 #define	IP_UNBLOCK_SOURCE		73   /* unblock a source */
 
 /* The following option is private; do not use it from user applications. */
 #define	IP_MSFILTER			74   /* set/get filter list */
 
 /* Protocol Independent Multicast API [RFC3678] */
 #define	MCAST_JOIN_GROUP		80   /* join an any-source group */
 #define	MCAST_LEAVE_GROUP		81   /* leave all sources for group */
 #define	MCAST_JOIN_SOURCE_GROUP		82   /* join a source-specific group */
 #define	MCAST_LEAVE_SOURCE_GROUP	83   /* leave a single source */
 #define	MCAST_BLOCK_SOURCE		84   /* block a source */
 #define	MCAST_UNBLOCK_SOURCE		85   /* unblock a source */
 
 /* Flow and RSS definitions */
 #define	IP_FLOWID		90   /* get flow id for the given socket/inp */
 #define	IP_FLOWTYPE		91   /* get flow type (M_HASHTYPE) */
 #define	IP_RSSBUCKETID		92   /* get RSS flowid -> bucket mapping */
 #define	IP_RECVFLOWID		93   /* bool; receive IP flowid/flowtype w/ datagram */
 #define	IP_RECVRSSBUCKETID	94   /* bool; receive IP RSS bucket id w/ datagram */
 
 /*
  * Defaults and limits for options
  */
 #define	IP_DEFAULT_MULTICAST_TTL  1	/* normally limit m'casts to 1 hop  */
 #define	IP_DEFAULT_MULTICAST_LOOP 1	/* normally hear sends if a member  */
 
 /*
  * The imo_membership vector for each socket is now dynamically allocated at
  * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized
  * according to a power-of-two increment.
  */
 #define	IP_MIN_MEMBERSHIPS	31
 #define	IP_MAX_MEMBERSHIPS	4095
 #define	IP_MAX_SOURCE_FILTER	1024	/* XXX to be unused */
 
 /*
  * Default resource limits for IPv4 multicast source filtering.
  * These may be modified by sysctl.
  */
 #define	IP_MAX_GROUP_SRC_FILTER		512	/* sources per group */
 #define	IP_MAX_SOCK_SRC_FILTER		128	/* sources per socket/group */
 #define	IP_MAX_SOCK_MUTE_FILTER		128	/* XXX no longer used */
 
 /*
  * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP.
  */
 struct ip_mreq {
 	struct	in_addr imr_multiaddr;	/* IP multicast address of group */
 	struct	in_addr imr_interface;	/* local IP address of interface */
 };
 
 /*
  * Modified argument structure for IP_MULTICAST_IF, obtained from Linux.
  * This is used to specify an interface index for multicast sends, as
  * the IPv4 legacy APIs do not support this (unless IP_SENDIF is available).
  */
 struct ip_mreqn {
 	struct	in_addr imr_multiaddr;	/* IP multicast address of group */
 	struct	in_addr imr_address;	/* local IP address of interface */
 	int		imr_ifindex;	/* Interface index; cast to uint32_t */
 };
 
 /*
  * Argument structure for IPv4 Multicast Source Filter APIs. [RFC3678]
  */
 struct ip_mreq_source {
 	struct	in_addr imr_multiaddr;	/* IP multicast address of group */
 	struct	in_addr imr_sourceaddr;	/* IP address of source */
 	struct	in_addr imr_interface;	/* local IP address of interface */
 };
 
 /*
  * Argument structures for Protocol-Independent Multicast Source
  * Filter APIs. [RFC3678]
  */
 struct group_req {
 	uint32_t		gr_interface;	/* interface index */
 	struct sockaddr_storage	gr_group;	/* group address */
 };
 
 struct group_source_req {
 	uint32_t		gsr_interface;	/* interface index */
 	struct sockaddr_storage	gsr_group;	/* group address */
 	struct sockaddr_storage	gsr_source;	/* source address */
 };
 
 #ifndef __MSFILTERREQ_DEFINED
 #define __MSFILTERREQ_DEFINED
 /*
  * The following structure is private; do not use it from user applications.
  * It is used to communicate IP_MSFILTER/IPV6_MSFILTER information between
  * the RFC 3678 libc functions and the kernel.
  */
 struct __msfilterreq {
 	uint32_t		 msfr_ifindex;	/* interface index */
 	uint32_t		 msfr_fmode;	/* filter mode for group */
 	uint32_t		 msfr_nsrcs;	/* # of sources in msfr_srcs */
 	struct sockaddr_storage	 msfr_group;	/* group address */
 	struct sockaddr_storage	*msfr_srcs;	/* pointer to the first member
 						 * of a contiguous array of
 						 * sources to filter in full.
 						 */
 };
 #endif
 
 struct sockaddr;
 
 /*
  * Advanced (Full-state) APIs [RFC3678]
  * The RFC specifies uint_t for the 6th argument to [sg]etsourcefilter().
  * We use uint32_t here to be consistent.
  */
 int	setipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t,
 	    uint32_t, struct in_addr *);
 int	getipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t *,
 	    uint32_t *, struct in_addr *);
 int	setsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
 	    uint32_t, uint32_t, struct sockaddr_storage *);
 int	getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t,
 	    uint32_t *, uint32_t *, struct sockaddr_storage *);
 
 /*
  * Filter modes; also used to represent per-socket filter mode internally.
  */
 #define	MCAST_UNDEFINED	0	/* fmode: not yet defined */
 #define	MCAST_INCLUDE	1	/* fmode: include these source(s) */
 #define	MCAST_EXCLUDE	2	/* fmode: exclude these source(s) */
 
 /*
  * Argument for IP_PORTRANGE:
  * - which range to search when port is unspecified at bind() or connect()
  */
 #define	IP_PORTRANGE_DEFAULT	0	/* default range */
 #define	IP_PORTRANGE_HIGH	1	/* "high" - request firewall bypass */
 #define	IP_PORTRANGE_LOW	2	/* "low" - vouchsafe security */
 
 /*
  * Identifiers for IP sysctl nodes
  */
 #define	IPCTL_FORWARDING	1	/* act as router */
 #define	IPCTL_SENDREDIRECTS	2	/* may send redirects when forwarding */
 #define	IPCTL_DEFTTL		3	/* default TTL */
 #ifdef notyet
 #define	IPCTL_DEFMTU		4	/* default MTU */
 #endif
 /*	IPCTL_RTEXPIRE		5	deprecated */
 /*	IPCTL_RTMINEXPIRE	6	deprecated */
 /*	IPCTL_RTMAXCACHE	7	deprecated */
 #define	IPCTL_SOURCEROUTE	8	/* may perform source routes */
 #define	IPCTL_DIRECTEDBROADCAST	9	/* may re-broadcast received packets */
 #define	IPCTL_INTRQMAXLEN	10	/* max length of netisr queue */
 #define	IPCTL_INTRQDROPS	11	/* number of netisr q drops */
 #define	IPCTL_STATS		12	/* ipstat structure */
 #define	IPCTL_ACCEPTSOURCEROUTE	13	/* may accept source routed packets */
 #define	IPCTL_FASTFORWARDING	14	/* use fast IP forwarding code */
 					/* 15, unused, was: IPCTL_KEEPFAITH  */
 #define	IPCTL_GIF_TTL		16	/* default TTL for gif encap packet */
 
 #endif /* __BSD_VISIBLE */
 
 #ifdef _KERNEL
 
 struct ifnet; struct mbuf;	/* forward declarations for Standard C */
 
 int	 in_broadcast(struct in_addr, struct ifnet *);
 int	 in_canforward(struct in_addr);
 int	 in_localaddr(struct in_addr);
 int	 in_localip(struct in_addr);
+int	 in_ifhasaddr(struct ifnet *, struct in_addr);
 int	 inet_aton(const char *, struct in_addr *); /* in libkern */
 char	*inet_ntoa(struct in_addr); /* in libkern */
 char	*inet_ntoa_r(struct in_addr ina, char *buf); /* in libkern */
 char	*inet_ntop(int, const void *, char *, socklen_t); /* in libkern */
 int	 inet_pton(int af, const char *, void *); /* in libkern */
 void	 in_ifdetach(struct ifnet *);
 
 #define	in_hosteq(s, t)	((s).s_addr == (t).s_addr)
 #define	in_nullhost(x)	((x).s_addr == INADDR_ANY)
 #define	in_allhosts(x)	((x).s_addr == htonl(INADDR_ALLHOSTS_GROUP))
 
 #define	satosin(sa)	((struct sockaddr_in *)(sa))
 #define	sintosa(sin)	((struct sockaddr *)(sin))
 #define	ifatoia(ifa)	((struct in_ifaddr *)(ifa))
 #endif /* _KERNEL */
 
 /* INET6 stuff */
 #if __POSIX_VISIBLE >= 200112
 #define	__KAME_NETINET_IN_H_INCLUDED_
 #include <netinet6/in6.h>
 #undef __KAME_NETINET_IN_H_INCLUDED_
 #endif
 
 #endif /* !_NETINET_IN_H_*/
Index: user/ngie/more-tests/sys/netinet6/in6.c
===================================================================
--- user/ngie/more-tests/sys/netinet6/in6.c	(revision 281675)
+++ user/ngie/more-tests/sys/netinet6/in6.c	(revision 281676)
@@ -1,2403 +1,2433 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in.c	8.2 (Berkeley) 11/15/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/eventhandler.h>
 #include <sys/errno.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/if_dl.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <net/if_llatbl.h>
 #include <netinet/if_ether.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_carp.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/in6_pcb.h>
 
 VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix);
 #define V_icmp6_nodeinfo_oldmcprefix	VNET(icmp6_nodeinfo_oldmcprefix)
 
 /*
  * Definitions of some costant IP6 addresses.
  */
 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 const struct in6_addr in6addr_nodelocal_allnodes =
 	IN6ADDR_NODELOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allnodes =
 	IN6ADDR_LINKLOCAL_ALLNODES_INIT;
 const struct in6_addr in6addr_linklocal_allrouters =
 	IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
 const struct in6_addr in6addr_linklocal_allv2routers =
 	IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT;
 
 const struct in6_addr in6mask0 = IN6MASK0;
 const struct in6_addr in6mask32 = IN6MASK32;
 const struct in6_addr in6mask64 = IN6MASK64;
 const struct in6_addr in6mask96 = IN6MASK96;
 const struct in6_addr in6mask128 = IN6MASK128;
 
 const struct sockaddr_in6 sa6_any =
 	{ sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 };
 
 static int in6_notify_ifa(struct ifnet *, struct in6_ifaddr *,
 	struct in6_aliasreq *, int);
 static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
 
 static int in6_validate_ifra(struct ifnet *, struct in6_aliasreq *,
     struct in6_ifaddr *, int);
 static struct in6_ifaddr *in6_alloc_ifa(struct ifnet *,
     struct in6_aliasreq *, int flags);
 static int in6_update_ifa_internal(struct ifnet *, struct in6_aliasreq *,
     struct in6_ifaddr *, int, int);
 static int in6_broadcast_ifa(struct ifnet *, struct in6_aliasreq *,
     struct in6_ifaddr *, int);
 
 #define ifa2ia6(ifa)	((struct in6_ifaddr *)(ifa))
 #define ia62ifa(ia6)	(&((ia6)->ia_ifa))
 
 
 void
 in6_newaddrmsg(struct in6_ifaddr *ia, int cmd)
 {
 	struct sockaddr_dl gateway;
 	struct sockaddr_in6 mask, addr;
 	struct rtentry rt;
 
 	/*
 	 * initialize for rtmsg generation
 	 */
 	bzero(&gateway, sizeof(gateway));
 	gateway.sdl_len = sizeof(gateway);
 	gateway.sdl_family = AF_LINK;
 
 	bzero(&rt, sizeof(rt));
 	rt.rt_gateway = (struct sockaddr *)&gateway;
 	memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
 	memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
 	rt_mask(&rt) = (struct sockaddr *)&mask;
 	rt_key(&rt) = (struct sockaddr *)&addr;
 	rt.rt_flags = RTF_HOST | RTF_STATIC;
 	if (cmd == RTM_ADD)
 		rt.rt_flags |= RTF_UP;
 	/* Announce arrival of local address to all FIBs. */
 	rt_newaddrmsg(cmd, &ia->ia_ifa, 0, &rt);
 }
 
 int
 in6_mask2len(struct in6_addr *mask, u_char *lim0)
 {
 	int x = 0, y;
 	u_char *lim = lim0, *p;
 
 	/* ignore the scope_id part */
 	if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
 		lim = (u_char *)mask + sizeof(*mask);
 	for (p = (u_char *)mask; p < lim; x++, p++) {
 		if (*p != 0xff)
 			break;
 	}
 	y = 0;
 	if (p < lim) {
 		for (y = 0; y < 8; y++) {
 			if ((*p & (0x80 >> y)) == 0)
 				break;
 		}
 	}
 
 	/*
 	 * when the limit pointer is given, do a stricter check on the
 	 * remaining bits.
 	 */
 	if (p < lim) {
 		if (y != 0 && (*p & (0x00ff >> y)) != 0)
 			return (-1);
 		for (p = p + 1; p < lim; p++)
 			if (*p != 0)
 				return (-1);
 	}
 
 	return x * 8 + y;
 }
 
 #ifdef COMPAT_FREEBSD32
 struct in6_ndifreq32 {
 	char ifname[IFNAMSIZ];
 	uint32_t ifindex;
 };
 #define	SIOCGDEFIFACE32_IN6	_IOWR('i', 86, struct in6_ndifreq32)
 #endif
 
 int
 in6_control(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 	struct	in6_ifreq *ifr = (struct in6_ifreq *)data;
 	struct	in6_ifaddr *ia = NULL;
 	struct	in6_aliasreq *ifra = (struct in6_aliasreq *)data;
 	struct sockaddr_in6 *sa6;
 	int carp_attached = 0;
 	int error;
 	u_long ocmd = cmd;
 
 	/*
 	 * Compat to make pre-10.x ifconfig(8) operable.
 	 */
 	if (cmd == OSIOCAIFADDR_IN6)
 		cmd = SIOCAIFADDR_IN6;
 
 	switch (cmd) {
 	case SIOCGETSGCNT_IN6:
 	case SIOCGETMIFCNT_IN6:
 		/*
 		 * XXX mrt_ioctl has a 3rd, unused, FIB argument in route.c.
 		 * We cannot see how that would be needed, so do not adjust the
 		 * KPI blindly; more likely should clean up the IPv4 variant.
 		 */
 		return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP);
 	}
 
 	switch (cmd) {
 	case SIOCAADDRCTL_POLICY:
 	case SIOCDADDRCTL_POLICY:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_ADDRCTRL6);
 			if (error)
 				return (error);
 		}
 		return (in6_src_ioctl(cmd, data));
 	}
 
 	if (ifp == NULL)
 		return (EOPNOTSUPP);
 
 	switch (cmd) {
 	case SIOCSNDFLUSH_IN6:
 	case SIOCSPFXFLUSH_IN6:
 	case SIOCSRTRFLUSH_IN6:
 	case SIOCSDEFIFACE_IN6:
 	case SIOCSIFINFO_FLAGS:
 	case SIOCSIFINFO_IN6:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_ND6);
 			if (error)
 				return (error);
 		}
 		/* FALLTHROUGH */
 	case OSIOCGIFINFO_IN6:
 	case SIOCGIFINFO_IN6:
 	case SIOCGDRLST_IN6:
 	case SIOCGPRLST_IN6:
 	case SIOCGNBRINFO_IN6:
 	case SIOCGDEFIFACE_IN6:
 		return (nd6_ioctl(cmd, data, ifp));
 
 #ifdef COMPAT_FREEBSD32
 	case SIOCGDEFIFACE32_IN6:
 		{
 			struct in6_ndifreq ndif;
 			struct in6_ndifreq32 *ndif32;
 
 			error = nd6_ioctl(SIOCGDEFIFACE_IN6, (caddr_t)&ndif,
 			    ifp);
 			if (error)
 				return (error);
 			ndif32 = (struct in6_ndifreq32 *)data;
 			ndif32->ifindex = ndif.ifindex;
 			return (0);
 		}
 #endif
 	}
 
 	switch (cmd) {
 	case SIOCSIFPREFIX_IN6:
 	case SIOCDIFPREFIX_IN6:
 	case SIOCAIFPREFIX_IN6:
 	case SIOCCIFPREFIX_IN6:
 	case SIOCSGIFPREFIX_IN6:
 	case SIOCGIFPREFIX_IN6:
 		log(LOG_NOTICE,
 		    "prefix ioctls are now invalidated. "
 		    "please use ifconfig.\n");
 		return (EOPNOTSUPP);
 	}
 
 	switch (cmd) {
 	case SIOCSSCOPE6:
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_SCOPE6);
 			if (error)
 				return (error);
 		}
 		/* FALLTHROUGH */
 	case SIOCGSCOPE6:
 	case SIOCGSCOPE6DEF:
 		return (scope6_ioctl(cmd, data, ifp));
 	}
 
 	/*
 	 * Find address for this interface, if it exists.
 	 *
 	 * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
 	 * only, and used the first interface address as the target of other
 	 * operations (without checking ifra_addr).  This was because netinet
 	 * code/API assumed at most 1 interface address per interface.
 	 * Since IPv6 allows a node to assign multiple addresses
 	 * on a single interface, we almost always look and check the
 	 * presence of ifra_addr, and reject invalid ones here.
 	 * It also decreases duplicated code among SIOC*_IN6 operations.
 	 */
 	switch (cmd) {
 	case SIOCAIFADDR_IN6:
 	case SIOCSIFPHYADDR_IN6:
 		sa6 = &ifra->ifra_addr;
 		break;
 	case SIOCSIFADDR_IN6:
 	case SIOCGIFADDR_IN6:
 	case SIOCSIFDSTADDR_IN6:
 	case SIOCSIFNETMASK_IN6:
 	case SIOCGIFDSTADDR_IN6:
 	case SIOCGIFNETMASK_IN6:
 	case SIOCDIFADDR_IN6:
 	case SIOCGIFPSRCADDR_IN6:
 	case SIOCGIFPDSTADDR_IN6:
 	case SIOCGIFAFLAG_IN6:
 	case SIOCSNDFLUSH_IN6:
 	case SIOCSPFXFLUSH_IN6:
 	case SIOCSRTRFLUSH_IN6:
 	case SIOCGIFALIFETIME_IN6:
 	case SIOCSIFALIFETIME_IN6:
 	case SIOCGIFSTAT_IN6:
 	case SIOCGIFSTAT_ICMP6:
 		sa6 = &ifr->ifr_addr;
 		break;
 	case SIOCSIFADDR:
 	case SIOCSIFBRDADDR:
 	case SIOCSIFDSTADDR:
 	case SIOCSIFNETMASK:
 		/*
 		 * Although we should pass any non-INET6 ioctl requests
 		 * down to driver, we filter some legacy INET requests.
 		 * Drivers trust SIOCSIFADDR et al to come from an already
 		 * privileged layer, and do not perform any credentials
 		 * checks or input validation.
 		 */
 		return (EINVAL);
 	default:
 		sa6 = NULL;
 		break;
 	}
 	if (sa6 && sa6->sin6_family == AF_INET6) {
 		if (sa6->sin6_scope_id != 0)
 			error = sa6_embedscope(sa6, 0);
 		else
 			error = in6_setscope(&sa6->sin6_addr, ifp, NULL);
 		if (error != 0)
 			return (error);
 		if (td != NULL && (error = prison_check_ip6(td->td_ucred,
 		    &sa6->sin6_addr)) != 0)
 			return (error);
 		ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr);
 	} else
 		ia = NULL;
 
 	switch (cmd) {
 	case SIOCSIFADDR_IN6:
 	case SIOCSIFDSTADDR_IN6:
 	case SIOCSIFNETMASK_IN6:
 		/*
 		 * Since IPv6 allows a node to assign multiple addresses
 		 * on a single interface, SIOCSIFxxx ioctls are deprecated.
 		 */
 		/* we decided to obsolete this command (20000704) */
 		error = EINVAL;
 		goto out;
 
 	case SIOCDIFADDR_IN6:
 		/*
 		 * for IPv4, we look for existing in_ifaddr here to allow
 		 * "ifconfig if0 delete" to remove the first IPv4 address on
 		 * the interface.  For IPv6, as the spec allows multiple
 		 * interface address from the day one, we consider "remove the
 		 * first one" semantics to be not preferable.
 		 */
 		if (ia == NULL) {
 			error = EADDRNOTAVAIL;
 			goto out;
 		}
 		/* FALLTHROUGH */
 	case SIOCAIFADDR_IN6:
 		/*
 		 * We always require users to specify a valid IPv6 address for
 		 * the corresponding operation.
 		 */
 		if (ifra->ifra_addr.sin6_family != AF_INET6 ||
 		    ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) {
 			error = EAFNOSUPPORT;
 			goto out;
 		}
 
 		if (td != NULL) {
 			error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ?
 			    PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR);
 			if (error)
 				goto out;
 		}
 		/* FALLTHROUGH */
 	case SIOCGIFSTAT_IN6:
 	case SIOCGIFSTAT_ICMP6:
 		if (ifp->if_afdata[AF_INET6] == NULL) {
 			error = EPFNOSUPPORT;
 			goto out;
 		}
 		break;
 
 	case SIOCGIFADDR_IN6:
 		/* This interface is basically deprecated. use SIOCGIFCONF. */
 		/* FALLTHROUGH */
 	case SIOCGIFAFLAG_IN6:
 	case SIOCGIFNETMASK_IN6:
 	case SIOCGIFDSTADDR_IN6:
 	case SIOCGIFALIFETIME_IN6:
 		/* must think again about its semantics */
 		if (ia == NULL) {
 			error = EADDRNOTAVAIL;
 			goto out;
 		}
 		break;
 
 	case SIOCSIFALIFETIME_IN6:
 	    {
 		struct in6_addrlifetime *lt;
 
 		if (td != NULL) {
 			error = priv_check(td, PRIV_NETINET_ALIFETIME6);
 			if (error)
 				goto out;
 		}
 		if (ia == NULL) {
 			error = EADDRNOTAVAIL;
 			goto out;
 		}
 		/* sanity for overflow - beware unsigned */
 		lt = &ifr->ifr_ifru.ifru_lifetime;
 		if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME &&
 		    lt->ia6t_vltime + time_uptime < time_uptime) {
 			error = EINVAL;
 			goto out;
 		}
 		if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME &&
 		    lt->ia6t_pltime + time_uptime < time_uptime) {
 			error = EINVAL;
 			goto out;
 		}
 		break;
 	    }
 	}
 
 	switch (cmd) {
 	case SIOCGIFADDR_IN6:
 		ifr->ifr_addr = ia->ia_addr;
 		if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0)
 			goto out;
 		break;
 
 	case SIOCGIFDSTADDR_IN6:
 		if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
 			error = EINVAL;
 			goto out;
 		}
 		/*
 		 * XXX: should we check if ifa_dstaddr is NULL and return
 		 * an error?
 		 */
 		ifr->ifr_dstaddr = ia->ia_dstaddr;
 		if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0)
 			goto out;
 		break;
 
 	case SIOCGIFNETMASK_IN6:
 		ifr->ifr_addr = ia->ia_prefixmask;
 		break;
 
 	case SIOCGIFAFLAG_IN6:
 		ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags;
 		break;
 
 	case SIOCGIFSTAT_IN6:
 		COUNTER_ARRAY_COPY(((struct in6_ifextra *)
 		    ifp->if_afdata[AF_INET6])->in6_ifstat,
 		    &ifr->ifr_ifru.ifru_stat,
 		    sizeof(struct in6_ifstat) / sizeof(uint64_t));
 		break;
 
 	case SIOCGIFSTAT_ICMP6:
 		COUNTER_ARRAY_COPY(((struct in6_ifextra *)
 		    ifp->if_afdata[AF_INET6])->icmp6_ifstat,
 		    &ifr->ifr_ifru.ifru_icmp6stat,
 		    sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
 		break;
 
 	case SIOCGIFALIFETIME_IN6:
 		ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime;
 		if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 			time_t maxexpire;
 			struct in6_addrlifetime *retlt =
 			    &ifr->ifr_ifru.ifru_lifetime;
 
 			/*
 			 * XXX: adjust expiration time assuming time_t is
 			 * signed.
 			 */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (ia->ia6_lifetime.ia6t_vltime <
 			    maxexpire - ia->ia6_updatetime) {
 				retlt->ia6t_expire = ia->ia6_updatetime +
 				    ia->ia6_lifetime.ia6t_vltime;
 			} else
 				retlt->ia6t_expire = maxexpire;
 		}
 		if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 			time_t maxexpire;
 			struct in6_addrlifetime *retlt =
 			    &ifr->ifr_ifru.ifru_lifetime;
 
 			/*
 			 * XXX: adjust expiration time assuming time_t is
 			 * signed.
 			 */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (ia->ia6_lifetime.ia6t_pltime <
 			    maxexpire - ia->ia6_updatetime) {
 				retlt->ia6t_preferred = ia->ia6_updatetime +
 				    ia->ia6_lifetime.ia6t_pltime;
 			} else
 				retlt->ia6t_preferred = maxexpire;
 		}
 		break;
 
 	case SIOCSIFALIFETIME_IN6:
 		ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime;
 		/* for sanity */
 		if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 			ia->ia6_lifetime.ia6t_expire =
 				time_uptime + ia->ia6_lifetime.ia6t_vltime;
 		} else
 			ia->ia6_lifetime.ia6t_expire = 0;
 		if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 			ia->ia6_lifetime.ia6t_preferred =
 				time_uptime + ia->ia6_lifetime.ia6t_pltime;
 		} else
 			ia->ia6_lifetime.ia6t_preferred = 0;
 		break;
 
 	case SIOCAIFADDR_IN6:
 	{
 		struct nd_prefixctl pr0;
 		struct nd_prefix *pr;
 
 		/*
 		 * first, make or update the interface address structure,
 		 * and link it to the list.
 		 */
 		if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0)
 			goto out;
 		if (ia != NULL)
 			ifa_free(&ia->ia_ifa);
 		if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
 		    == NULL) {
 			/*
 			 * this can happen when the user specify the 0 valid
 			 * lifetime.
 			 */
 			break;
 		}
 
 		if (cmd == ocmd && ifra->ifra_vhid > 0) {
 			if (carp_attach_p != NULL)
 				error = (*carp_attach_p)(&ia->ia_ifa,
 				    ifra->ifra_vhid);
 			else
 				error = EPROTONOSUPPORT;
 			if (error)
 				goto out;
 			else
 				carp_attached = 1;
 		}
 
 		/*
 		 * then, make the prefix on-link on the interface.
 		 * XXX: we'd rather create the prefix before the address, but
 		 * we need at least one address to install the corresponding
 		 * interface route, so we configure the address first.
 		 */
 
 		/*
 		 * convert mask to prefix length (prefixmask has already
 		 * been validated in in6_update_ifa().
 		 */
 		bzero(&pr0, sizeof(pr0));
 		pr0.ndpr_ifp = ifp;
 		pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 		    NULL);
 		if (pr0.ndpr_plen == 128) {
 			/* we don't need to install a host route. */
 			goto aifaddr_out;
 		}
 		pr0.ndpr_prefix = ifra->ifra_addr;
 		/* apply the mask for safety. */
 		IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr,
 		    &ifra->ifra_prefixmask.sin6_addr);
 
 		/*
 		 * XXX: since we don't have an API to set prefix (not address)
 		 * lifetimes, we just use the same lifetimes as addresses.
 		 * The (temporarily) installed lifetimes can be overridden by
 		 * later advertised RAs (when accept_rtadv is non 0), which is
 		 * an intended behavior.
 		 */
 		pr0.ndpr_raf_onlink = 1; /* should be configurable? */
 		pr0.ndpr_raf_auto =
 		    ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0);
 		pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime;
 		pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime;
 
 		/* add the prefix if not yet. */
 		if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
 			/*
 			 * nd6_prelist_add will install the corresponding
 			 * interface route.
 			 */
 			if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) {
 				if (carp_attached)
 					(*carp_detach_p)(&ia->ia_ifa);
 				goto out;
 			}
 			if (pr == NULL) {
 				if (carp_attached)
 					(*carp_detach_p)(&ia->ia_ifa);
 				log(LOG_ERR, "nd6_prelist_add succeeded but "
 				    "no prefix\n");
 				error = EINVAL;
 				goto out;
 			}
 		}
 
 		/* relate the address to the prefix */
 		if (ia->ia6_ndpr == NULL) {
 			ia->ia6_ndpr = pr;
 			pr->ndpr_refcnt++;
 
 			/*
 			 * If this is the first autoconf address from the
 			 * prefix, create a temporary address as well
 			 * (when required).
 			 */
 			if ((ia->ia6_flags & IN6_IFF_AUTOCONF) &&
 			    V_ip6_use_tempaddr && pr->ndpr_refcnt == 1) {
 				int e;
 				if ((e = in6_tmpifadd(ia, 1, 0)) != 0) {
 					log(LOG_NOTICE, "in6_control: failed "
 					    "to create a temporary address, "
 					    "errno=%d\n", e);
 				}
 			}
 		}
 
 		/*
 		 * this might affect the status of autoconfigured addresses,
 		 * that is, this address might make other addresses detached.
 		 */
 		pfxlist_onlink_check();
 aifaddr_out:
 		if (error != 0 || ia == NULL)
 			break;
 		/*
 		 * Try to clear the flag when a new IPv6 address is added
 		 * onto an IFDISABLED interface and it succeeds.
 		 */
 		if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
 			struct in6_ndireq nd;
 
 			memset(&nd, 0, sizeof(nd));
 			nd.ndi.flags = ND_IFINFO(ifp)->flags;
 			nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
 			if (nd6_ioctl(SIOCSIFINFO_FLAGS, (caddr_t)&nd, ifp) < 0)
 				log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
 				    "SIOCSIFINFO_FLAGS for -ifdisabled "
 				    "failed.");
 			/*
 			 * Ignore failure of clearing the flag intentionally.
 			 * The failure means address duplication was detected.
 			 */
 		}
 		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 		break;
 	}
 
 	case SIOCDIFADDR_IN6:
 	{
 		struct nd_prefix *pr;
 
 		/*
 		 * If the address being deleted is the only one that owns
 		 * the corresponding prefix, expire the prefix as well.
 		 * XXX: theoretically, we don't have to worry about such
 		 * relationship, since we separate the address management
 		 * and the prefix management.  We do this, however, to provide
 		 * as much backward compatibility as possible in terms of
 		 * the ioctl operation.
 		 * Note that in6_purgeaddr() will decrement ndpr_refcnt.
 		 */
 		pr = ia->ia6_ndpr;
 		in6_purgeaddr(&ia->ia_ifa);
 		if (pr && pr->ndpr_refcnt == 0)
 			prelist_remove(pr);
 		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 		break;
 	}
 
 	default:
 		if (ifp->if_ioctl == NULL) {
 			error = EOPNOTSUPP;
 			goto out;
 		}
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		goto out;
 	}
 
 	error = 0;
 out:
 	if (ia != NULL)
 		ifa_free(&ia->ia_ifa);
 	return (error);
 }
 
 
 /*
  * Join necessary multicast groups.  Factored out from in6_update_ifa().
  * This entire work should only be done once, for the default FIB.
  */
 static int
 in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol)
 {
 	char ip6buf[INET6_ADDRSTRLEN];
 	struct in6_addr mltaddr;
 	struct in6_multi_mship *imm;
 	int delay, error;
 
 	KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__));
 
 	/* Join solicited multicast addr for new host id. */
 	bzero(&mltaddr, sizeof(struct in6_addr));
 	mltaddr.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
 	mltaddr.s6_addr32[2] = htonl(1);
 	mltaddr.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
 	mltaddr.s6_addr8[12] = 0xff;
 	if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0) {
 		/* XXX: should not happen */
 		log(LOG_ERR, "%s: in6_setscope failed\n", __func__);
 		goto cleanup;
 	}
 	delay = error = 0;
 	if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 		/*
 		 * We need a random delay for DAD on the address being
 		 * configured.  It also means delaying transmission of the
 		 * corresponding MLD report to avoid report collision.
 		 * [RFC 4861, Section 6.3.7]
 		 */
 		delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz);
 	}
 	imm = in6_joingroup(ifp, &mltaddr, &error, delay);
 	if (imm == NULL) {
 		nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
 		    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr),
 		    if_name(ifp), error));
 		goto cleanup;
 	}
 	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 	*in6m_sol = imm->i6mm_maddr;
 
 	/*
 	 * Join link-local all-nodes address.
 	 */
 	mltaddr = in6addr_linklocal_allnodes;
 	if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0)
 		goto cleanup; /* XXX: should not fail */
 
 	imm = in6_joingroup(ifp, &mltaddr, &error, 0);
 	if (imm == NULL) {
 		nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
 		    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr),
 		    if_name(ifp), error));
 		goto cleanup;
 	}
 	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 
 	/*
 	 * Join node information group address.
 	 */
 	delay = 0;
 	if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 		/*
 		 * The spec does not say anything about delay for this group,
 		 * but the same logic should apply.
 		 */
 		delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz);
 	}
 	if (in6_nigroup(ifp, NULL, -1, &mltaddr) == 0) {
 		/* XXX jinmei */
 		imm = in6_joingroup(ifp, &mltaddr, &error, delay);
 		if (imm == NULL)
 			nd6log((LOG_WARNING,
 			    "%s: in6_joingroup failed for %s on %s "
 			    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
 			    &mltaddr), if_name(ifp), error));
 			/* XXX not very fatal, go on... */
 		else
 			LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 	}
 	if (V_icmp6_nodeinfo_oldmcprefix &&
 	    in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr) == 0) {
 		imm = in6_joingroup(ifp, &mltaddr, &error, delay);
 		if (imm == NULL)
 			nd6log((LOG_WARNING,
 			    "%s: in6_joingroup failed for %s on %s "
 			    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
 			    &mltaddr), if_name(ifp), error));
 			/* XXX not very fatal, go on... */
 		else
 			LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 	}
 
 	/*
 	 * Join interface-local all-nodes address.
 	 * (ff01::1%ifN, and ff01::%ifN/32)
 	 */
 	mltaddr = in6addr_nodelocal_allnodes;
 	if ((error = in6_setscope(&mltaddr, ifp, NULL)) != 0)
 		goto cleanup; /* XXX: should not fail */
 
 	imm = in6_joingroup(ifp, &mltaddr, &error, 0);
 	if (imm == NULL) {
 		nd6log((LOG_WARNING, "%s: in6_joingroup failed for %s on %s "
 		    "(errno=%d)\n", __func__, ip6_sprintf(ip6buf,
 		    &mltaddr), if_name(ifp), error));
 		goto cleanup;
 	}
 	LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
 
 cleanup:
 	return (error);
 }
 
 /*
  * Update parameters of an IPv6 interface address.
  * If necessary, a new entry is created and linked into address chains.
  * This function is separated from in6_control().
  */
 int
 in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags)
 {
 	int error, hostIsNew = 0;
 
 	if ((error = in6_validate_ifra(ifp, ifra, ia, flags)) != 0)
 		return (error);
 
 	if (ia == NULL) {
 		hostIsNew = 1;
 		if ((ia = in6_alloc_ifa(ifp, ifra, flags)) == NULL)
 			return (ENOBUFS);
 	}
 
 	error = in6_update_ifa_internal(ifp, ifra, ia, hostIsNew, flags);
 	if (error != 0) {
 		if (hostIsNew != 0) {
 			in6_unlink_ifa(ia, ifp);
 			ifa_free(&ia->ia_ifa);
 		}
 		return (error);
 	}
 
 	if (hostIsNew)
 		error = in6_broadcast_ifa(ifp, ifra, ia, flags);
 
 	return (error);
 }
 
 /*
  * Fill in basic IPv6 address request info.
  */
 void
 in6_prepare_ifra(struct in6_aliasreq *ifra, const struct in6_addr *addr,
     const struct in6_addr *mask)
 {
 
 	memset(ifra, 0, sizeof(struct in6_aliasreq));
 
 	ifra->ifra_addr.sin6_family = AF_INET6;
 	ifra->ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
 	if (addr != NULL)
 		ifra->ifra_addr.sin6_addr = *addr;
 
 	ifra->ifra_prefixmask.sin6_family = AF_INET6;
 	ifra->ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
 	if (mask != NULL)
 		ifra->ifra_prefixmask.sin6_addr = *mask;
 }
 
 static int
 in6_validate_ifra(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags)
 {
 	int plen = -1;
 	struct sockaddr_in6 dst6;
 	struct in6_addrlifetime *lt;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* Validate parameters */
 	if (ifp == NULL || ifra == NULL) /* this maybe redundant */
 		return (EINVAL);
 
 	/*
 	 * The destination address for a p2p link must have a family
 	 * of AF_UNSPEC or AF_INET6.
 	 */
 	if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
 	    ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
 	    ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
 		return (EAFNOSUPPORT);
 
 	/*
 	 * Validate address
 	 */
 	if (ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6) ||
 	    ifra->ifra_addr.sin6_family != AF_INET6)
 		return (EINVAL);
 
 	/*
 	 * validate ifra_prefixmask.  don't check sin6_family, netmask
 	 * does not carry fields other than sin6_len.
 	 */
 	if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6))
 		return (EINVAL);
 	/*
 	 * Because the IPv6 address architecture is classless, we require
 	 * users to specify a (non 0) prefix length (mask) for a new address.
 	 * We also require the prefix (when specified) mask is valid, and thus
 	 * reject a non-consecutive mask.
 	 */
 	if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0)
 		return (EINVAL);
 	if (ifra->ifra_prefixmask.sin6_len != 0) {
 		plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
 		    (u_char *)&ifra->ifra_prefixmask +
 		    ifra->ifra_prefixmask.sin6_len);
 		if (plen <= 0)
 			return (EINVAL);
 	} else {
 		/*
 		 * In this case, ia must not be NULL.  We just use its prefix
 		 * length.
 		 */
 		plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
 	}
 	/*
 	 * If the destination address on a p2p interface is specified,
 	 * and the address is a scoped one, validate/set the scope
 	 * zone identifier.
 	 */
 	dst6 = ifra->ifra_dstaddr;
 	if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 &&
 	    (dst6.sin6_family == AF_INET6)) {
 		struct in6_addr in6_tmp;
 		u_int32_t zoneid;
 
 		in6_tmp = dst6.sin6_addr;
 		if (in6_setscope(&in6_tmp, ifp, &zoneid))
 			return (EINVAL); /* XXX: should be impossible */
 
 		if (dst6.sin6_scope_id != 0) {
 			if (dst6.sin6_scope_id != zoneid)
 				return (EINVAL);
 		} else		/* user omit to specify the ID. */
 			dst6.sin6_scope_id = zoneid;
 
 		/* convert into the internal form */
 		if (sa6_embedscope(&dst6, 0))
 			return (EINVAL); /* XXX: should be impossible */
 	}
 	/* Modify original ifra_dstaddr to reflect changes */
 	ifra->ifra_dstaddr = dst6;
 
 	/*
 	 * The destination address can be specified only for a p2p or a
 	 * loopback interface.  If specified, the corresponding prefix length
 	 * must be 128.
 	 */
 	if (ifra->ifra_dstaddr.sin6_family == AF_INET6) {
 		if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) {
 			/* XXX: noisy message */
 			nd6log((LOG_INFO, "in6_update_ifa: a destination can "
 			    "be specified for a p2p or a loopback IF only\n"));
 			return (EINVAL);
 		}
 		if (plen != 128) {
 			nd6log((LOG_INFO, "in6_update_ifa: prefixlen should "
 			    "be 128 when dstaddr is specified\n"));
 			return (EINVAL);
 		}
 	}
 	/* lifetime consistency check */
 	lt = &ifra->ifra_lifetime;
 	if (lt->ia6t_pltime > lt->ia6t_vltime)
 		return (EINVAL);
 	if (lt->ia6t_vltime == 0) {
 		/*
 		 * the following log might be noisy, but this is a typical
 		 * configuration mistake or a tool's bug.
 		 */
 		nd6log((LOG_INFO,
 		    "in6_update_ifa: valid lifetime is 0 for %s\n",
 		    ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr)));
 
 		if (ia == NULL)
 			return (0); /* there's nothing to do */
 	}
 
 	/* Check prefix mask */
 	if (ia != NULL && ifra->ifra_prefixmask.sin6_len != 0) {
 		/*
 		 * We prohibit changing the prefix length of an existing
 		 * address, because
 		 * + such an operation should be rare in IPv6, and
 		 * + the operation would confuse prefix management.
 		 */
 		if (ia->ia_prefixmask.sin6_len != 0 &&
 		    in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
 			nd6log((LOG_INFO, "in6_validate_ifa: the prefix length "
 			    "of an existing %s address should not be changed\n",
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 
 			return (EINVAL);
 		}
 	}
 
 	return (0);
 }
 
 
 /*
  * Allocate a new ifaddr and link it into chains.
  */
 static struct in6_ifaddr *
 in6_alloc_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, int flags)
 {
 	struct in6_ifaddr *ia;
 
 	/*
 	 * When in6_alloc_ifa() is called in a process of a received
 	 * RA, it is called under an interrupt context.  So, we should
 	 * call malloc with M_NOWAIT.
 	 */
 	ia = (struct in6_ifaddr *)ifa_alloc(sizeof(*ia), M_NOWAIT);
 	if (ia == NULL)
 		return (NULL);
 	LIST_INIT(&ia->ia6_memberships);
 	/* Initialize the address and masks, and put time stamp */
 	ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
 	ia->ia_addr.sin6_family = AF_INET6;
 	ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
 	/* XXX: Can we assign ,sin6_addr and skip the rest? */
 	ia->ia_addr = ifra->ifra_addr;
 	ia->ia6_createtime = time_uptime;
 	if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
 		/*
 		 * Some functions expect that ifa_dstaddr is not
 		 * NULL for p2p interfaces.
 		 */
 		ia->ia_ifa.ifa_dstaddr =
 		    (struct sockaddr *)&ia->ia_dstaddr;
 	} else {
 		ia->ia_ifa.ifa_dstaddr = NULL;
 	}
 
 	/* set prefix mask if any */
 	ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;
 	if (ifra->ifra_prefixmask.sin6_len != 0) {
 		ia->ia_prefixmask.sin6_family = AF_INET6;
 		ia->ia_prefixmask.sin6_len = ifra->ifra_prefixmask.sin6_len;
 		ia->ia_prefixmask.sin6_addr = ifra->ifra_prefixmask.sin6_addr;
 	}
 
 	ia->ia_ifp = ifp;
 	ifa_ref(&ia->ia_ifa);			/* if_addrhead */
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 
 	ifa_ref(&ia->ia_ifa);			/* in6_ifaddrhead */
 	IN6_IFADDR_WLOCK();
 	TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link);
 	LIST_INSERT_HEAD(IN6ADDR_HASH(&ia->ia_addr.sin6_addr), ia, ia6_hash);
 	IN6_IFADDR_WUNLOCK();
 
 	return (ia);
 }
 
 /*
  * Update/configure interface address parameters:
  *
  * 1) Update lifetime
  * 2) Update interface metric ad flags
  * 3) Notify other subsystems
  */
 static int
 in6_update_ifa_internal(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int hostIsNew, int flags)
 {
 	int error;
 
 	/* update timestamp */
 	ia->ia6_updatetime = time_uptime;
 
 	/*
 	 * Set lifetimes.  We do not refer to ia6t_expire and ia6t_preferred
 	 * to see if the address is deprecated or invalidated, but initialize
 	 * these members for applications.
 	 */
 	ia->ia6_lifetime = ifra->ifra_lifetime;
 	if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_expire =
 		    time_uptime + ia->ia6_lifetime.ia6t_vltime;
 	} else
 		ia->ia6_lifetime.ia6t_expire = 0;
 	if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
 		ia->ia6_lifetime.ia6t_preferred =
 		    time_uptime + ia->ia6_lifetime.ia6t_pltime;
 	} else
 		ia->ia6_lifetime.ia6t_preferred = 0;
 
 	/*
 	 * backward compatibility - if IN6_IFF_DEPRECATED is set from the
 	 * userland, make it deprecated.
 	 */
 	if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
 		ia->ia6_lifetime.ia6t_pltime = 0;
 		ia->ia6_lifetime.ia6t_preferred = time_uptime;
 	}
 
 	/*
 	 * configure address flags.
 	 */
 	ia->ia6_flags = ifra->ifra_flags;
 
 	/*
 	 * Make the address tentative before joining multicast addresses,
 	 * so that corresponding MLD responses would not have a tentative
 	 * source address.
 	 */
 	ia->ia6_flags &= ~IN6_IFF_DUPLICATED;	/* safety */
 	if (hostIsNew && in6if_do_dad(ifp))
 		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 
 	/* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
 		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 
 	/* notify other subsystems */
 	error = in6_notify_ifa(ifp, ia, ifra, hostIsNew);
 
 	return (error);
 }
 
 /*
  * Do link-level ifa job:
  * 1) Add lle entry for added address
  * 2) Notifies routing socket users about new address
  * 3) join appropriate multicast group
  * 4) start DAD if enabled
  */
 static int
 in6_broadcast_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
     struct in6_ifaddr *ia, int flags)
 {
 	struct in6_multi *in6m_sol;
 	int error = 0;
 
 	/* Add local address to lltable, if necessary (ex. on p2p link). */
 	if ((error = nd6_add_ifa_lle(ia)) != 0) {
 		in6_purgeaddr(&ia->ia_ifa);
 		ifa_free(&ia->ia_ifa);
 		return (error);
 	}
 
 	/* Join necessary multicast groups. */
 	in6m_sol = NULL;
 	if ((ifp->if_flags & IFF_MULTICAST) != 0) {
 		error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol);
 		if (error != 0) {
 			in6_purgeaddr(&ia->ia_ifa);
 			ifa_free(&ia->ia_ifa);
 			return (error);
 		}
 	}
 
 	/*
 	 * Perform DAD, if needed.
 	 * XXX It may be of use, if we can administratively disable DAD.
 	 */
 	if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
 	    (ia->ia6_flags & IN6_IFF_TENTATIVE))
 	{
 		int delay, mindelay, maxdelay;
 
 		delay = 0;
 		if ((flags & IN6_IFAUPDATE_DADDELAY)) {
 			/*
 			 * We need to impose a delay before sending an NS
 			 * for DAD.  Check if we also needed a delay for the
 			 * corresponding MLD message.  If we did, the delay
 			 * should be larger than the MLD delay (this could be
 			 * relaxed a bit, but this simple logic is at least
 			 * safe).
 			 * XXX: Break data hiding guidelines and look at
 			 * state for the solicited multicast group.
 			 */
 			mindelay = 0;
 			if (in6m_sol != NULL &&
 			    in6m_sol->in6m_state == MLD_REPORTING_MEMBER) {
 				mindelay = in6m_sol->in6m_timer;
 			}
 			maxdelay = MAX_RTR_SOLICITATION_DELAY * hz;
 			if (maxdelay - mindelay == 0)
 				delay = 0;
 			else {
 				delay =
 				    (arc4random() % (maxdelay - mindelay)) +
 				    mindelay;
 			}
 		}
 		nd6_dad_start((struct ifaddr *)ia, delay);
 	}
 
 	ifa_free(&ia->ia_ifa);
 	return (error);
 }
 
 void
 in6_purgeaddr(struct ifaddr *ifa)
 {
 	struct ifnet *ifp = ifa->ifa_ifp;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
 	struct in6_multi_mship *imm;
 	int plen, error;
 
 	if (ifa->ifa_carp)
 		(*carp_detach_p)(ifa);
 
 	/*
 	 * Remove the loopback route to the interface address.
 	 * The check for the current setting of "nd6_useloopback"
 	 * is not needed.
 	 */
 	if (ia->ia_flags & IFA_RTSELF) {
 		error = ifa_del_loopback_route((struct ifaddr *)ia,
 		    (struct sockaddr *)&ia->ia_addr);
 		if (error == 0)
 			ia->ia_flags &= ~IFA_RTSELF;
 	}
 
 	/* stop DAD processing */
 	nd6_dad_stop(ifa);
 
 	/* Remove local address entry from lltable. */
 	nd6_rem_ifa_lle(ia);
 
 	/* Leave multicast groups. */
 	while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) {
 		LIST_REMOVE(imm, i6mm_chain);
 		in6_leavegroup(imm);
 	}
 	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
 	if ((ia->ia_flags & IFA_ROUTE) && plen == 128) {
 		error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags |
 		    (ia->ia_dstaddr.sin6_family == AF_INET6) ? RTF_HOST : 0);
 		if (error != 0)
 			log(LOG_INFO, "%s: err=%d, destination address delete "
 			    "failed\n", __func__, error);
 		ia->ia_flags &= ~IFA_ROUTE;
 	}
 
 	in6_unlink_ifa(ia, ifp);
 }
 
 static void
 in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
 {
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	IF_ADDR_WLOCK(ifp);
 	TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
 	IF_ADDR_WUNLOCK(ifp);
 	ifa_free(&ia->ia_ifa);			/* if_addrhead */
 
 	/*
 	 * Defer the release of what might be the last reference to the
 	 * in6_ifaddr so that it can't be freed before the remainder of the
 	 * cleanup.
 	 */
 	IN6_IFADDR_WLOCK();
 	TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link);
 	LIST_REMOVE(ia, ia6_hash);
 	IN6_IFADDR_WUNLOCK();
 
 	/*
 	 * Release the reference to the base prefix.  There should be a
 	 * positive reference.
 	 */
 	if (ia->ia6_ndpr == NULL) {
 		nd6log((LOG_NOTICE,
 		    "in6_unlink_ifa: autoconf'ed address "
 		    "%s has no prefix\n", ip6_sprintf(ip6buf, IA6_IN6(ia))));
 	} else {
 		ia->ia6_ndpr->ndpr_refcnt--;
 		ia->ia6_ndpr = NULL;
 	}
 
 	/*
 	 * Also, if the address being removed is autoconf'ed, call
 	 * pfxlist_onlink_check() since the release might affect the status of
 	 * other (detached) addresses.
 	 */
 	if ((ia->ia6_flags & IN6_IFF_AUTOCONF)) {
 		pfxlist_onlink_check();
 	}
 	ifa_free(&ia->ia_ifa);			/* in6_ifaddrhead */
 }
 
 /*
  * Notifies other other subsystems about address change/arrival:
  * 1) Notifies device handler on first IPv6 address assignment
  * 2) Handle routing table changes for P2P links and route
  * 3) Handle routing table changes for address host route
  */
 static int
 in6_notify_ifa(struct ifnet *ifp, struct in6_ifaddr *ia,
     struct in6_aliasreq *ifra, int hostIsNew)
 {
 	int	error = 0, plen, ifacount = 0;
 	struct ifaddr *ifa;
 	struct sockaddr_in6 *pdst;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/*
 	 * Give the interface a chance to initialize
 	 * if this is its first address,
 	 */
 	if (hostIsNew != 0) {
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifacount++;
 		}
 		IF_ADDR_RUNLOCK(ifp);
 	}
 
 	if (ifacount <= 1 && ifp->if_ioctl) {
 		error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * If a new destination address is specified, scrub the old one and
 	 * install the new destination.  Note that the interface must be
 	 * p2p or loopback.
 	 */
 	pdst = &ifra->ifra_dstaddr;
 	if (pdst->sin6_family == AF_INET6 &&
 	    !IN6_ARE_ADDR_EQUAL(&pdst->sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
 		if ((ia->ia_flags & IFA_ROUTE) != 0 &&
 		    (rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST) != 0)) {
 			nd6log((LOG_ERR, "in6_update_ifa_internal: failed to "
 			    "remove a route to the old destination: %s\n",
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 			/* proceed anyway... */
 		} else
 			ia->ia_flags &= ~IFA_ROUTE;
 		ia->ia_dstaddr = *pdst;
 	}
 
 	/*
 	 * If a new destination address is specified for a point-to-point
 	 * interface, install a route to the destination as an interface
 	 * direct route.
 	 * XXX: the logic below rejects assigning multiple addresses on a p2p
 	 * interface that share the same destination.
 	 */
 	plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
 	if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
 	    ia->ia_dstaddr.sin6_family == AF_INET6) {
 		int rtflags = RTF_UP | RTF_HOST;
 		/*
 		 * Handle the case for ::1 .
 		 */
 		if (ifp->if_flags & IFF_LOOPBACK)
 			ia->ia_flags |= IFA_RTSELF;
 		error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags);
 		if (error)
 			return (error);
 		ia->ia_flags |= IFA_ROUTE;
 	}
 
 	/*
 	 * add a loopback route to self if not exists
 	 */
 	if (!(ia->ia_flags & IFA_RTSELF) && V_nd6_useloopback) {
 		error = ifa_add_loopback_route((struct ifaddr *)ia,
 		    (struct sockaddr *)&ia->ia_addr);
 		if (error == 0)
 			ia->ia_flags |= IFA_RTSELF;
 	}
 
 	return (error);
 }
 
 /*
  * Find an IPv6 interface link-local address specific to an interface.
  * ifaddr is returned referenced.
  */
 struct in6_ifaddr *
 in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
 {
 	struct ifaddr *ifa;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
 			if ((((struct in6_ifaddr *)ifa)->ia6_flags &
 			    ignoreflags) != 0)
 				continue;
 			ifa_ref(ifa);
 			break;
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 
 /*
  * find the internet address corresponding to a given address.
  * ifaddr is returned referenced.
  */
 struct in6_ifaddr *
 in6ifa_ifwithaddr(const struct in6_addr *addr, uint32_t zoneid)
 {
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK();
 	LIST_FOREACH(ia, IN6ADDR_HASH(addr), ia6_hash) {
 		if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), addr)) {
 			if (zoneid != 0 &&
 			    zoneid != ia->ia_addr.sin6_scope_id)
 				continue;
 			ifa_ref(&ia->ia_ifa);
 			break;
 		}
 	}
 	IN6_IFADDR_RUNLOCK();
 	return (ia);
 }
 
 /*
  * find the internet address corresponding to a given interface and address.
  * ifaddr is returned referenced.
  */
 struct in6_ifaddr *
 in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr)
 {
 	struct ifaddr *ifa;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) {
 			ifa_ref(ifa);
 			break;
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 /*
  * Find a link-local scoped address on ifp and return it if any.
  */
 struct in6_ifaddr *
 in6ifa_llaonifp(struct ifnet *ifp)
 {
 	struct sockaddr_in6 *sin6;
 	struct ifaddr *ifa;
 
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)
 		return (NULL);
 	if_addr_rlock(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
 		if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) ||
 		    IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr) ||
 		    IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr))
 			break;
 	}
 	if_addr_runlock(ifp);
 
 	return ((struct in6_ifaddr *)ifa);
 }
 
 /*
  * Convert IP6 address to printable (loggable) representation. Caller
  * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
  */
 static char digits[] = "0123456789abcdef";
 char *
 ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
 {
 	int i, cnt = 0, maxcnt = 0, idx = 0, index = 0;
 	char *cp;
 	const u_int16_t *a = (const u_int16_t *)addr;
 	const u_int8_t *d;
 	int dcolon = 0, zero = 0;
 
 	cp = ip6buf;
 
 	for (i = 0; i < 8; i++) {
 		if (*(a + i) == 0) {
 			cnt++;
 			if (cnt == 1)
 				idx = i;
 		}
 		else if (maxcnt < cnt) {
 			maxcnt = cnt;
 			index = idx;
 			cnt = 0;
 		}
 	}
 	if (maxcnt < cnt) {
 		maxcnt = cnt;
 		index = idx;
 	}
 
 	for (i = 0; i < 8; i++) {
 		if (dcolon == 1) {
 			if (*a == 0) {
 				if (i == 7)
 					*cp++ = ':';
 				a++;
 				continue;
 			} else
 				dcolon = 2;
 		}
 		if (*a == 0) {
 			if (dcolon == 0 && *(a + 1) == 0 && i == index) {
 				if (i == 0)
 					*cp++ = ':';
 				*cp++ = ':';
 				dcolon = 1;
 			} else {
 				*cp++ = '0';
 				*cp++ = ':';
 			}
 			a++;
 			continue;
 		}
 		d = (const u_char *)a;
 		/* Try to eliminate leading zeros in printout like in :0001. */
 		zero = 1;
 		*cp = digits[*d >> 4];
 		if (*cp != '0') {
 			zero = 0;
 			cp++;
 		}
 		*cp = digits[*d++ & 0xf];
 		if (zero == 0 || (*cp != '0')) {
 			zero = 0;
 			cp++;
 		}
 		*cp = digits[*d >> 4];
 		if (zero == 0 || (*cp != '0')) {
 			zero = 0;
 			cp++;
 		}
 		*cp++ = digits[*d & 0xf];
 		*cp++ = ':';
 		a++;
 	}
 	*--cp = '\0';
 	return (ip6buf);
 }
 
 int
 in6_localaddr(struct in6_addr *in6)
 {
 	struct in6_ifaddr *ia;
 
 	if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
 		return 1;
 
 	IN6_IFADDR_RLOCK();
 	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
 		    &ia->ia_prefixmask.sin6_addr)) {
 			IN6_IFADDR_RUNLOCK();
 			return 1;
 		}
 	}
 	IN6_IFADDR_RUNLOCK();
 
 	return (0);
 }
 
 /*
  * Return 1 if an internet address is for the local host and configured
  * on one of its interfaces.
  */
 int
 in6_localip(struct in6_addr *in6)
 {
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK();
 	LIST_FOREACH(ia, IN6ADDR_HASH(in6), ia6_hash) {
 		if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) {
 			IN6_IFADDR_RUNLOCK();
 			return (1);
 		}
 	}
 	IN6_IFADDR_RUNLOCK();
 	return (0);
 }
+ 
+/*
+ * Return 1 if an internet address is configured on an interface.
+ */
+int
+in6_ifhasaddr(struct ifnet *ifp, struct in6_addr *addr)
+{
+	struct in6_addr in6;
+	struct ifaddr *ifa;
+	struct in6_ifaddr *ia6;
+
+	in6 = *addr;
+	if (in6_clearscope(&in6))
+		return (0);
+	in6_setscope(&in6, ifp, NULL);
+
+	IF_ADDR_RLOCK(ifp);
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+		if (ifa->ifa_addr->sa_family != AF_INET6)
+			continue;
+		ia6 = (struct in6_ifaddr *)ifa;
+		if (IN6_ARE_ADDR_EQUAL(&ia6->ia_addr.sin6_addr, &in6)) {
+			IF_ADDR_RUNLOCK(ifp);
+			return (1);
+		}
+	}
+	IF_ADDR_RUNLOCK(ifp);
+
+	return (0);
+}
 
 int
 in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
 {
 	struct in6_ifaddr *ia;
 
 	IN6_IFADDR_RLOCK();
 	LIST_FOREACH(ia, IN6ADDR_HASH(&sa6->sin6_addr), ia6_hash) {
 		if (IN6_ARE_ADDR_EQUAL(IA6_IN6(ia), &sa6->sin6_addr)) {
 			if (ia->ia6_flags & IN6_IFF_DEPRECATED) {
 				IN6_IFADDR_RUNLOCK();
 				return (1); /* true */
 			}
 			break;
 		}
 	}
 	IN6_IFADDR_RUNLOCK();
 
 	return (0);		/* false */
 }
 
 /*
  * return length of part which dst and src are equal
  * hard coding...
  */
 int
 in6_matchlen(struct in6_addr *src, struct in6_addr *dst)
 {
 	int match = 0;
 	u_char *s = (u_char *)src, *d = (u_char *)dst;
 	u_char *lim = s + 16, r;
 
 	while (s < lim)
 		if ((r = (*d++ ^ *s++)) != 0) {
 			while (r < 128) {
 				match++;
 				r <<= 1;
 			}
 			break;
 		} else
 			match += 8;
 	return match;
 }
 
 /* XXX: to be scope conscious */
 int
 in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len)
 {
 	int bytelen, bitlen;
 
 	/* sanity check */
 	if (0 > len || len > 128) {
 		log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n",
 		    len);
 		return (0);
 	}
 
 	bytelen = len / 8;
 	bitlen = len % 8;
 
 	if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen))
 		return (0);
 	if (bitlen != 0 &&
 	    p1->s6_addr[bytelen] >> (8 - bitlen) !=
 	    p2->s6_addr[bytelen] >> (8 - bitlen))
 		return (0);
 
 	return (1);
 }
 
 void
 in6_prefixlen2mask(struct in6_addr *maskp, int len)
 {
 	u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
 	int bytelen, bitlen, i;
 
 	/* sanity check */
 	if (0 > len || len > 128) {
 		log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n",
 		    len);
 		return;
 	}
 
 	bzero(maskp, sizeof(*maskp));
 	bytelen = len / 8;
 	bitlen = len % 8;
 	for (i = 0; i < bytelen; i++)
 		maskp->s6_addr[i] = 0xff;
 	if (bitlen)
 		maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
 }
 
 /*
  * return the best address out of the same scope. if no address was
  * found, return the first valid address from designated IF.
  */
 struct in6_ifaddr *
 in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
 {
 	int dst_scope =	in6_addrscope(dst), blen = -1, tlen;
 	struct ifaddr *ifa;
 	struct in6_ifaddr *besta = 0;
 	struct in6_ifaddr *dep[2];	/* last-resort: deprecated */
 
 	dep[0] = dep[1] = NULL;
 
 	/*
 	 * We first look for addresses in the same scope.
 	 * If there is one, return it.
 	 * If two or more, return one which matches the dst longest.
 	 * If none, return one of global addresses assigned other ifs.
 	 */
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
 			continue; /* XXX: is there any case to allow anycast? */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
 			continue; /* don't use this interface */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (V_ip6_use_deprecated)
 				dep[0] = (struct in6_ifaddr *)ifa;
 			continue;
 		}
 
 		if (dst_scope == in6_addrscope(IFA_IN6(ifa))) {
 			/*
 			 * call in6_matchlen() as few as possible
 			 */
 			if (besta) {
 				if (blen == -1)
 					blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst);
 				tlen = in6_matchlen(IFA_IN6(ifa), dst);
 				if (tlen > blen) {
 					blen = tlen;
 					besta = (struct in6_ifaddr *)ifa;
 				}
 			} else
 				besta = (struct in6_ifaddr *)ifa;
 		}
 	}
 	if (besta) {
 		ifa_ref(&besta->ia_ifa);
 		IF_ADDR_RUNLOCK(ifp);
 		return (besta);
 	}
 
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
 			continue; /* XXX: is there any case to allow anycast? */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
 			continue; /* don't use this interface */
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
 			continue;
 		if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
 			if (V_ip6_use_deprecated)
 				dep[1] = (struct in6_ifaddr *)ifa;
 			continue;
 		}
 
 		if (ifa != NULL)
 			ifa_ref(ifa);
 		IF_ADDR_RUNLOCK(ifp);
 		return (struct in6_ifaddr *)ifa;
 	}
 
 	/* use the last-resort values, that are, deprecated addresses */
 	if (dep[0]) {
 		ifa_ref((struct ifaddr *)dep[0]);
 		IF_ADDR_RUNLOCK(ifp);
 		return dep[0];
 	}
 	if (dep[1]) {
 		ifa_ref((struct ifaddr *)dep[1]);
 		IF_ADDR_RUNLOCK(ifp);
 		return dep[1];
 	}
 
 	IF_ADDR_RUNLOCK(ifp);
 	return NULL;
 }
 
 /*
  * perform DAD when interface becomes IFF_UP.
  */
 void
 in6_if_up(struct ifnet *ifp)
 {
 	struct ifaddr *ifa;
 	struct in6_ifaddr *ia;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 		ia = (struct in6_ifaddr *)ifa;
 		if (ia->ia6_flags & IN6_IFF_TENTATIVE) {
 			/*
 			 * The TENTATIVE flag was likely set by hand
 			 * beforehand, implicitly indicating the need for DAD.
 			 * We may be able to skip the random delay in this
 			 * case, but we impose delays just in case.
 			 */
 			nd6_dad_start(ifa,
 			    arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz));
 		}
 	}
 	IF_ADDR_RUNLOCK(ifp);
 
 	/*
 	 * special cases, like 6to4, are handled in in6_ifattach
 	 */
 	in6_ifattach(ifp, NULL);
 }
 
 int
 in6if_do_dad(struct ifnet *ifp)
 {
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0)
 		return (0);
 
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) ||
 	    (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD))
 		return (0);
 
 	/*
 	 * Our DAD routine requires the interface up and running.
 	 * However, some interfaces can be up before the RUNNING
 	 * status.  Additionaly, users may try to assign addresses
 	 * before the interface becomes up (or running).
 	 * We simply skip DAD in such a case as a work around.
 	 * XXX: we should rather mark "tentative" on such addresses,
 	 * and do DAD after the interface becomes ready.
 	 */
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		return (0);
 
 	return (1);
 }
 
 /*
  * Calculate max IPv6 MTU through all the interfaces and store it
  * to in6_maxmtu.
  */
 void
 in6_setmaxmtu(void)
 {
 	unsigned long maxmtu = 0;
 	struct ifnet *ifp;
 
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		/* this function can be called during ifnet initialization */
 		if (!ifp->if_afdata[AF_INET6])
 			continue;
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
 		    IN6_LINKMTU(ifp) > maxmtu)
 			maxmtu = IN6_LINKMTU(ifp);
 	}
 	IFNET_RUNLOCK_NOSLEEP();
 	if (maxmtu)	/* update only when maxmtu is positive */
 		V_in6_maxmtu = maxmtu;
 }
 
 /*
  * Provide the length of interface identifiers to be used for the link attached
  * to the given interface.  The length should be defined in "IPv6 over
  * xxx-link" document.  Note that address architecture might also define
  * the length for a particular set of address prefixes, regardless of the
  * link type.  As clarified in rfc2462bis, those two definitions should be
  * consistent, and those really are as of August 2004.
  */
 int
 in6_if2idlen(struct ifnet *ifp)
 {
 	switch (ifp->if_type) {
 	case IFT_ETHER:		/* RFC2464 */
 #ifdef IFT_PROPVIRTUAL
 	case IFT_PROPVIRTUAL:	/* XXX: no RFC. treat it as ether */
 #endif
 #ifdef IFT_L2VLAN
 	case IFT_L2VLAN:	/* ditto */
 #endif
 #ifdef IFT_IEEE80211
 	case IFT_IEEE80211:	/* ditto */
 #endif
 #ifdef IFT_MIP
 	case IFT_MIP:	/* ditto */
 #endif
 	case IFT_INFINIBAND:
 		return (64);
 	case IFT_FDDI:		/* RFC2467 */
 		return (64);
 	case IFT_ISO88025:	/* RFC2470 (IPv6 over Token Ring) */
 		return (64);
 	case IFT_PPP:		/* RFC2472 */
 		return (64);
 	case IFT_ARCNET:	/* RFC2497 */
 		return (64);
 	case IFT_FRELAY:	/* RFC2590 */
 		return (64);
 	case IFT_IEEE1394:	/* RFC3146 */
 		return (64);
 	case IFT_GIF:
 		return (64);	/* draft-ietf-v6ops-mech-v2-07 */
 	case IFT_LOOP:
 		return (64);	/* XXX: is this really correct? */
 	default:
 		/*
 		 * Unknown link type:
 		 * It might be controversial to use the today's common constant
 		 * of 64 for these cases unconditionally.  For full compliance,
 		 * we should return an error in this case.  On the other hand,
 		 * if we simply miss the standard for the link type or a new
 		 * standard is defined for a new link type, the IFID length
 		 * is very likely to be the common constant.  As a compromise,
 		 * we always use the constant, but make an explicit notice
 		 * indicating the "unknown" case.
 		 */
 		printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type);
 		return (64);
 	}
 }
 
 #include <sys/sysctl.h>
 
 struct in6_llentry {
 	struct llentry		base;
 	struct sockaddr_in6	l3_addr6;
 };
 
 /*
  * Deletes an address from the address table.
  * This function is called by the timer functions
  * such as arptimer() and nd6_llinfo_timer(), and
  * the caller does the locking.
  */
 static void
 in6_lltable_free(struct lltable *llt, struct llentry *lle)
 {
 	LLE_WUNLOCK(lle);
 	LLE_LOCK_DESTROY(lle);
 	free(lle, M_LLTABLE);
 }
 
 static struct llentry *
 in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
 {
 	struct in6_llentry *lle;
 
 	lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
 	if (lle == NULL)		/* NB: caller generates msg */
 		return NULL;
 
 	lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr;
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in6_lltable_free;
 	LLE_LOCK_INIT(&lle->base);
 	callout_init(&lle->base.ln_timer_ch, 1);
 
 	return (&lle->base);
 }
 
 static void
 in6_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix,
     const struct sockaddr *mask, u_int flags)
 {
 	const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix;
 	const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask;
 	struct llentry *lle, *next;
 	int i;
 
 	/*
 	 * (flags & LLE_STATIC) means deleting all entries
 	 * including static ND6 entries.
 	 */
 	IF_AFDATA_WLOCK(llt->llt_ifp);
 	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
 		LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
 			if (IN6_ARE_MASKED_ADDR_EQUAL(
 			    &satosin6(L3_ADDR(lle))->sin6_addr,
 			    &pfx->sin6_addr, &msk->sin6_addr) &&
 			    ((flags & LLE_STATIC) ||
 			    !(lle->la_flags & LLE_STATIC))) {
 				LLE_WLOCK(lle);
 				if (callout_stop(&lle->la_timer))
 					LLE_REMREF(lle);
 				llentry_free(lle);
 			}
 		}
 	}
 	IF_AFDATA_WUNLOCK(llt->llt_ifp);
 }
 
 static int
 in6_lltable_rtcheck(struct ifnet *ifp,
 		    u_int flags,
 		    const struct sockaddr *l3addr)
 {
 	struct rtentry *rt;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	KASSERT(l3addr->sa_family == AF_INET6,
 	    ("sin_family %d", l3addr->sa_family));
 
 	/* Our local addresses are always only installed on the default FIB. */
 	/* XXX rtalloc1 should take a const param */
 	rt = in6_rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0,
 	    RT_DEFAULT_FIB);
 	if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) {
 		struct ifaddr *ifa;
 		/*
 		 * Create an ND6 cache for an IPv6 neighbor
 		 * that is not covered by our own prefix.
 		 */
 		/* XXX ifaof_ifpforaddr should take a const param */
 		ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp);
 		if (ifa != NULL) {
 			ifa_free(ifa);
 			if (rt != NULL)
 				RTFREE_LOCKED(rt);
 			return 0;
 		}
 		log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n",
 		    ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr));
 		if (rt != NULL)
 			RTFREE_LOCKED(rt);
 		return EINVAL;
 	}
 	RTFREE_LOCKED(rt);
 	return 0;
 }
 
 static struct llentry *
 in6_lltable_lookup(struct lltable *llt, u_int flags,
 	const struct sockaddr *l3addr)
 {
 	const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
 	struct llentries *lleh;
 	u_int hashkey;
 
 	IF_AFDATA_LOCK_ASSERT(ifp);
 	KASSERT(l3addr->sa_family == AF_INET6,
 	    ("sin_family %d", l3addr->sa_family));
 
 	hashkey = sin6->sin6_addr.s6_addr32[3];
 	lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
 	LIST_FOREACH(lle, lleh, lle_next) {
 		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)L3_ADDR(lle);
 		if (lle->la_flags & LLE_DELETED)
 			continue;
 		if (bcmp(&sa6->sin6_addr, &sin6->sin6_addr,
 		    sizeof(struct in6_addr)) == 0)
 			break;
 	}
 
 	if (lle == NULL) {
 		if (!(flags & LLE_CREATE))
 			return (NULL);
 		IF_AFDATA_WLOCK_ASSERT(ifp);
 		/*
 		 * A route that covers the given address must have
 		 * been installed 1st because we are doing a resolution,
 		 * verify this.
 		 */
 		if (!(flags & LLE_IFADDR) &&
 		    in6_lltable_rtcheck(ifp, flags, l3addr) != 0)
 			return NULL;
 
 		lle = in6_lltable_new(l3addr, flags);
 		if (lle == NULL) {
 			log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
 			return NULL;
 		}
 		lle->la_flags = flags & ~LLE_CREATE;
 		if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
 			bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
 			lle->la_flags |= (LLE_VALID | LLE_STATIC);
 		}
 
 		lle->lle_tbl  = llt;
 		lle->lle_head = lleh;
 		lle->la_flags |= LLE_LINKED;
 		LIST_INSERT_HEAD(lleh, lle, lle_next);
 	} else if (flags & LLE_DELETE) {
 		if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
 			LLE_WLOCK(lle);
 			lle->la_flags |= LLE_DELETED;
 #ifdef DIAGNOSTIC
 			log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
 #endif
 			if ((lle->la_flags &
 			    (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC)
 				llentry_free(lle);
 			else
 				LLE_WUNLOCK(lle);
 		}
 		lle = (void *)-1;
 	}
 	if (LLE_IS_VALID(lle)) {
 		if (flags & LLE_EXCLUSIVE)
 			LLE_WLOCK(lle);
 		else
 			LLE_RLOCK(lle);
 	}
 	return (lle);
 }
 
 static int
 in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
 {
 	struct ifnet *ifp = llt->llt_ifp;
 	struct llentry *lle;
 	/* XXX stack use */
 	struct {
 		struct rt_msghdr	rtm;
 		struct sockaddr_in6	sin6;
 		/*
 		 * ndp.c assumes that sdl is word aligned
 		 */
 #ifdef __LP64__
 		uint32_t		pad;
 #endif
 		struct sockaddr_dl	sdl;
 	} ndpc;
 	int i, error;
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		return 0;
 
 	LLTABLE_LOCK_ASSERT();
 
 	error = 0;
 	for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
 		LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
 			struct sockaddr_dl *sdl;
 
 			/* skip deleted or invalid entries */
 			if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID)
 				continue;
 			/* Skip if jailed and not a valid IP of the prison. */
 			if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
 				continue;
 			/*
 			 * produce a msg made of:
 			 *  struct rt_msghdr;
 			 *  struct sockaddr_in6 (IPv6)
 			 *  struct sockaddr_dl;
 			 */
 			bzero(&ndpc, sizeof(ndpc));
 			ndpc.rtm.rtm_msglen = sizeof(ndpc);
 			ndpc.rtm.rtm_version = RTM_VERSION;
 			ndpc.rtm.rtm_type = RTM_GET;
 			ndpc.rtm.rtm_flags = RTF_UP;
 			ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
 			ndpc.sin6.sin6_family = AF_INET6;
 			ndpc.sin6.sin6_len = sizeof(ndpc.sin6);
 			bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle));
 			if (V_deembed_scopeid)
 				sa6_recoverscope(&ndpc.sin6);
 
 			/* publish */
 			if (lle->la_flags & LLE_PUB)
 				ndpc.rtm.rtm_flags |= RTF_ANNOUNCE;
 
 			sdl = &ndpc.sdl;
 			sdl->sdl_family = AF_LINK;
 			sdl->sdl_len = sizeof(*sdl);
 			sdl->sdl_alen = ifp->if_addrlen;
 			sdl->sdl_index = ifp->if_index;
 			sdl->sdl_type = ifp->if_type;
 			bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
 			ndpc.rtm.rtm_rmx.rmx_expire =
 			    lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
 			ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
 			if (lle->la_flags & LLE_STATIC)
 				ndpc.rtm.rtm_flags |= RTF_STATIC;
 			ndpc.rtm.rtm_index = ifp->if_index;
 			error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc));
 			if (error)
 				break;
 		}
 	}
 	return error;
 }
 
 void *
 in6_domifattach(struct ifnet *ifp)
 {
 	struct in6_ifextra *ext;
 
 	/* There are not IPv6-capable interfaces. */
 	switch (ifp->if_type) {
 	case IFT_PFLOG:
 	case IFT_PFSYNC:
 	case IFT_USB:
 		return (NULL);
 	}
 	ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK);
 	bzero(ext, sizeof(*ext));
 
 	ext->in6_ifstat = malloc(sizeof(counter_u64_t) *
 	    sizeof(struct in6_ifstat) / sizeof(uint64_t), M_IFADDR, M_WAITOK);
 	COUNTER_ARRAY_ALLOC(ext->in6_ifstat,
 	    sizeof(struct in6_ifstat) / sizeof(uint64_t), M_WAITOK);
 
 	ext->icmp6_ifstat = malloc(sizeof(counter_u64_t) *
 	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_IFADDR,
 	    M_WAITOK);
 	COUNTER_ARRAY_ALLOC(ext->icmp6_ifstat,
 	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t), M_WAITOK);
 
 	ext->nd_ifinfo = nd6_ifattach(ifp);
 	ext->scope6_id = scope6_ifattach(ifp);
 	ext->lltable = lltable_init(ifp, AF_INET6);
 	if (ext->lltable != NULL) {
 		ext->lltable->llt_prefix_free = in6_lltable_prefix_free;
 		ext->lltable->llt_lookup = in6_lltable_lookup;
 		ext->lltable->llt_dump = in6_lltable_dump;
 	}
 
 	ext->mld_ifinfo = mld_domifattach(ifp);
 
 	return ext;
 }
 
 int
 in6_domifmtu(struct ifnet *ifp)
 {
 
 	return (IN6_LINKMTU(ifp));
 }
 
 void
 in6_domifdetach(struct ifnet *ifp, void *aux)
 {
 	struct in6_ifextra *ext = (struct in6_ifextra *)aux;
 
 	mld_domifdetach(ifp);
 	scope6_ifdetach(ext->scope6_id);
 	nd6_ifdetach(ext->nd_ifinfo);
 	lltable_free(ext->lltable);
 	COUNTER_ARRAY_FREE(ext->in6_ifstat,
 	    sizeof(struct in6_ifstat) / sizeof(uint64_t));
 	free(ext->in6_ifstat, M_IFADDR);
 	COUNTER_ARRAY_FREE(ext->icmp6_ifstat,
 	    sizeof(struct icmp6_ifstat) / sizeof(uint64_t));
 	free(ext->icmp6_ifstat, M_IFADDR);
 	free(ext, M_IFADDR);
 }
 
 /*
  * Convert sockaddr_in6 to sockaddr_in.  Original sockaddr_in6 must be
  * v4 mapped addr or v4 compat addr
  */
 void
 in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
 
 	bzero(sin, sizeof(*sin));
 	sin->sin_len = sizeof(struct sockaddr_in);
 	sin->sin_family = AF_INET;
 	sin->sin_port = sin6->sin6_port;
 	sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3];
 }
 
 /* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */
 void
 in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
 {
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_len = sizeof(struct sockaddr_in6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_port = sin->sin_port;
 	sin6->sin6_addr.s6_addr32[0] = 0;
 	sin6->sin6_addr.s6_addr32[1] = 0;
 	sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
 	sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr;
 }
 
 /* Convert sockaddr_in6 into sockaddr_in. */
 void
 in6_sin6_2_sin_in_sock(struct sockaddr *nam)
 {
 	struct sockaddr_in *sin_p;
 	struct sockaddr_in6 sin6;
 
 	/*
 	 * Save original sockaddr_in6 addr and convert it
 	 * to sockaddr_in.
 	 */
 	sin6 = *(struct sockaddr_in6 *)nam;
 	sin_p = (struct sockaddr_in *)nam;
 	in6_sin6_2_sin(sin_p, &sin6);
 }
 
 /* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */
 void
 in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam)
 {
 	struct sockaddr_in *sin_p;
 	struct sockaddr_in6 *sin6_p;
 
 	sin6_p = malloc(sizeof *sin6_p, M_SONAME, M_WAITOK);
 	sin_p = (struct sockaddr_in *)*nam;
 	in6_sin_2_v4mapsin6(sin_p, sin6_p);
 	free(*nam, M_SONAME);
 	*nam = (struct sockaddr *)sin6_p;
 }
Index: user/ngie/more-tests/sys/netinet6/in6.h
===================================================================
--- user/ngie/more-tests/sys/netinet6/in6.h	(revision 281675)
+++ user/ngie/more-tests/sys/netinet6/in6.h	(revision 281676)
@@ -1,730 +1,731 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6.h,v 1.89 2001/05/27 13:28:35 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in.h	8.3 (Berkeley) 1/3/94
  * $FreeBSD$
  */
 
 #ifndef __KAME_NETINET_IN_H_INCLUDED_
 #error "do not include netinet6/in6.h directly, include netinet/in.h.  see RFC2553"
 #endif
 
 #ifndef _NETINET6_IN6_H_
 #define _NETINET6_IN6_H_
 
 /*
  * Identification of the network protocol stack
  * for *BSD-current/release: http://www.kame.net/dev/cvsweb.cgi/kame/COVERAGE
  * has the table of implementation/integration differences.
  */
 #define __KAME__
 #define __KAME_VERSION		"FreeBSD"
 
 /*
  * IPv6 port allocation rules should mirror the IPv4 rules and are controlled
  * by the net.inet.ip.portrange sysctl tree. The following defines exist
  * for compatibility with userland applications that need them.
  */
 #if __BSD_VISIBLE
 #define	IPV6PORT_RESERVED	1024
 #define	IPV6PORT_ANONMIN	49152
 #define	IPV6PORT_ANONMAX	65535
 #define	IPV6PORT_RESERVEDMIN	600
 #define	IPV6PORT_RESERVEDMAX	(IPV6PORT_RESERVED-1)
 #endif
 
 /*
  * IPv6 address
  */
 struct in6_addr {
 	union {
 		uint8_t		__u6_addr8[16];
 		uint16_t	__u6_addr16[8];
 		uint32_t	__u6_addr32[4];
 	} __u6_addr;			/* 128-bit IP6 address */
 };
 
 #define s6_addr   __u6_addr.__u6_addr8
 #ifdef _KERNEL	/* XXX nonstandard */
 #define s6_addr8  __u6_addr.__u6_addr8
 #define s6_addr16 __u6_addr.__u6_addr16
 #define s6_addr32 __u6_addr.__u6_addr32
 #endif
 
 #define INET6_ADDRSTRLEN	46
 
 /*
  * XXX missing POSIX.1-2001 macro IPPROTO_IPV6.
  */
 
 /*
  * Socket address for IPv6
  */
 #if __BSD_VISIBLE
 #define SIN6_LEN
 #endif
 
 struct sockaddr_in6 {
 	uint8_t		sin6_len;	/* length of this struct */
 	sa_family_t	sin6_family;	/* AF_INET6 */
 	in_port_t	sin6_port;	/* Transport layer port # */
 	uint32_t	sin6_flowinfo;	/* IP6 flow information */
 	struct in6_addr	sin6_addr;	/* IP6 address */
 	uint32_t	sin6_scope_id;	/* scope zone index */
 };
 
 /*
  * Local definition for masks
  */
 #ifdef _KERNEL	/* XXX nonstandard */
 #define IN6MASK0	{{{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }}}
 #define IN6MASK32	{{{ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, \
 			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
 #define IN6MASK64	{{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \
 			    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
 #define IN6MASK96	{{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \
 			    0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }}}
 #define IN6MASK128	{{{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \
 			    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }}}
 #endif
 
 #ifdef _KERNEL
 extern const struct sockaddr_in6 sa6_any;
 
 extern const struct in6_addr in6mask0;
 extern const struct in6_addr in6mask32;
 extern const struct in6_addr in6mask64;
 extern const struct in6_addr in6mask96;
 extern const struct in6_addr in6mask128;
 #endif /* _KERNEL */
 
 /*
  * Macros started with IPV6_ADDR is KAME local
  */
 #ifdef _KERNEL	/* XXX nonstandard */
 #if _BYTE_ORDER == _BIG_ENDIAN
 #define IPV6_ADDR_INT32_ONE	1
 #define IPV6_ADDR_INT32_TWO	2
 #define IPV6_ADDR_INT32_MNL	0xff010000
 #define IPV6_ADDR_INT32_MLL	0xff020000
 #define IPV6_ADDR_INT32_SMP	0x0000ffff
 #define IPV6_ADDR_INT16_ULL	0xfe80
 #define IPV6_ADDR_INT16_USL	0xfec0
 #define IPV6_ADDR_INT16_MLL	0xff02
 #elif _BYTE_ORDER == _LITTLE_ENDIAN
 #define IPV6_ADDR_INT32_ONE	0x01000000
 #define IPV6_ADDR_INT32_TWO	0x02000000
 #define IPV6_ADDR_INT32_MNL	0x000001ff
 #define IPV6_ADDR_INT32_MLL	0x000002ff
 #define IPV6_ADDR_INT32_SMP	0xffff0000
 #define IPV6_ADDR_INT16_ULL	0x80fe
 #define IPV6_ADDR_INT16_USL	0xc0fe
 #define IPV6_ADDR_INT16_MLL	0x02ff
 #endif
 #endif
 
 /*
  * Definition of some useful macros to handle IP6 addresses
  */
 #if __BSD_VISIBLE
 #define IN6ADDR_ANY_INIT \
 	{{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }}}
 #define IN6ADDR_LOOPBACK_INIT \
 	{{{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
 #define IN6ADDR_NODELOCAL_ALLNODES_INIT \
 	{{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
 #define IN6ADDR_INTFACELOCAL_ALLNODES_INIT \
 	{{{ 0xff, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
 #define IN6ADDR_LINKLOCAL_ALLNODES_INIT \
 	{{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 }}}
 #define IN6ADDR_LINKLOCAL_ALLROUTERS_INIT \
 	{{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }}}
 #define IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT \
 	{{{ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
 	    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16 }}}
 #endif
 
 extern const struct in6_addr in6addr_any;
 extern const struct in6_addr in6addr_loopback;
 #if __BSD_VISIBLE
 extern const struct in6_addr in6addr_nodelocal_allnodes;
 extern const struct in6_addr in6addr_linklocal_allnodes;
 extern const struct in6_addr in6addr_linklocal_allrouters;
 extern const struct in6_addr in6addr_linklocal_allv2routers;
 #endif
 
 /*
  * Equality
  * NOTE: Some of kernel programming environment (for example, openbsd/sparc)
  * does not supply memcmp().  For userland memcmp() is preferred as it is
  * in ANSI standard.
  */
 #ifdef _KERNEL
 #define IN6_ARE_ADDR_EQUAL(a, b)			\
     (bcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0)
 #else
 #if __BSD_VISIBLE
 #define IN6_ARE_ADDR_EQUAL(a, b)			\
     (memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0)
 #endif
 #endif
 
 /*
  * Unspecified
  */
 #define IN6_IS_ADDR_UNSPECIFIED(a)	\
 	((a)->__u6_addr.__u6_addr32[0] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[1] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[2] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[3] == 0)
 
 /*
  * Loopback
  */
 #define IN6_IS_ADDR_LOOPBACK(a)		\
 	((a)->__u6_addr.__u6_addr32[0] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[1] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[2] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[3] == ntohl(1))
 
 /*
  * IPv4 compatible
  */
 #define IN6_IS_ADDR_V4COMPAT(a)		\
 	((a)->__u6_addr.__u6_addr32[0] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[1] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[2] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[3] != 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[3] != ntohl(1))
 
 /*
  * Mapped
  */
 #define IN6_IS_ADDR_V4MAPPED(a)		      \
 	((a)->__u6_addr.__u6_addr32[0] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[1] == 0 &&	\
 	 (a)->__u6_addr.__u6_addr32[2] == ntohl(0x0000ffff))
 
 /*
  * KAME Scope Values
  */
 
 #ifdef _KERNEL	/* XXX nonstandard */
 #define IPV6_ADDR_SCOPE_NODELOCAL	0x01
 #define IPV6_ADDR_SCOPE_INTFACELOCAL	0x01
 #define IPV6_ADDR_SCOPE_LINKLOCAL	0x02
 #define IPV6_ADDR_SCOPE_SITELOCAL	0x05
 #define IPV6_ADDR_SCOPE_ORGLOCAL	0x08	/* just used in this file */
 #define IPV6_ADDR_SCOPE_GLOBAL		0x0e
 #else
 #define __IPV6_ADDR_SCOPE_NODELOCAL	0x01
 #define __IPV6_ADDR_SCOPE_INTFACELOCAL	0x01
 #define __IPV6_ADDR_SCOPE_LINKLOCAL	0x02
 #define __IPV6_ADDR_SCOPE_SITELOCAL	0x05
 #define __IPV6_ADDR_SCOPE_ORGLOCAL	0x08	/* just used in this file */
 #define __IPV6_ADDR_SCOPE_GLOBAL	0x0e
 #endif
 
 /*
  * Unicast Scope
  * Note that we must check topmost 10 bits only, not 16 bits (see RFC2373).
  */
 #define IN6_IS_ADDR_LINKLOCAL(a)	\
 	(((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0x80))
 #define IN6_IS_ADDR_SITELOCAL(a)	\
 	(((a)->s6_addr[0] == 0xfe) && (((a)->s6_addr[1] & 0xc0) == 0xc0))
 
 /*
  * Multicast
  */
 #define IN6_IS_ADDR_MULTICAST(a)	((a)->s6_addr[0] == 0xff)
 
 #ifdef _KERNEL	/* XXX nonstandard */
 #define IPV6_ADDR_MC_SCOPE(a)		((a)->s6_addr[1] & 0x0f)
 #else
 #define __IPV6_ADDR_MC_SCOPE(a)		((a)->s6_addr[1] & 0x0f)
 #endif
 
 /*
  * Multicast Scope
  */
 #ifdef _KERNEL	/* refers nonstandard items */
 #define IN6_IS_ADDR_MC_NODELOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_NODELOCAL))
 #define IN6_IS_ADDR_MC_INTFACELOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_INTFACELOCAL))
 #define IN6_IS_ADDR_MC_LINKLOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_LINKLOCAL))
 #define IN6_IS_ADDR_MC_SITELOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_SITELOCAL))
 #define IN6_IS_ADDR_MC_ORGLOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_ORGLOCAL))
 #define IN6_IS_ADDR_MC_GLOBAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (IPV6_ADDR_MC_SCOPE(a) == IPV6_ADDR_SCOPE_GLOBAL))
 #else
 #define IN6_IS_ADDR_MC_NODELOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_NODELOCAL))
 #define IN6_IS_ADDR_MC_LINKLOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_LINKLOCAL))
 #define IN6_IS_ADDR_MC_SITELOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_SITELOCAL))
 #define IN6_IS_ADDR_MC_ORGLOCAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_ORGLOCAL))
 #define IN6_IS_ADDR_MC_GLOBAL(a)	\
 	(IN6_IS_ADDR_MULTICAST(a) &&	\
 	 (__IPV6_ADDR_MC_SCOPE(a) == __IPV6_ADDR_SCOPE_GLOBAL))
 #endif
 
 #ifdef _KERNEL	/* nonstandard */
 /*
  * KAME Scope
  */
 #define IN6_IS_SCOPE_LINKLOCAL(a)	\
 	((IN6_IS_ADDR_LINKLOCAL(a)) ||	\
 	 (IN6_IS_ADDR_MC_LINKLOCAL(a)))
 #define	IN6_IS_SCOPE_EMBED(a)			\
 	((IN6_IS_ADDR_LINKLOCAL(a)) ||		\
 	 (IN6_IS_ADDR_MC_LINKLOCAL(a)) ||	\
 	 (IN6_IS_ADDR_MC_INTFACELOCAL(a)))
 
 #define IFA6_IS_DEPRECATED(a) \
 	((a)->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME && \
 	 (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
 	 (a)->ia6_lifetime.ia6t_pltime)
 #define IFA6_IS_INVALID(a) \
 	((a)->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME && \
 	 (u_int32_t)((time_uptime - (a)->ia6_updatetime)) > \
 	 (a)->ia6_lifetime.ia6t_vltime)
 #endif /* _KERNEL */
 
 /*
  * IP6 route structure
  */
 #if __BSD_VISIBLE
 struct route_in6 {
 	struct	rtentry *ro_rt;
 	struct	llentry *ro_lle;
 	struct	in6_addr *ro_ia6;
 	int		ro_flags;
 	struct	sockaddr_in6 ro_dst;
 };
 #endif
 
 /*
  * Options for use with [gs]etsockopt at the IPV6 level.
  * First word of comment is data type; bool is stored in int.
  */
 /* no hdrincl */
 #if 0 /* the followings are relic in IPv4 and hence are disabled */
 #define IPV6_OPTIONS		1  /* buf/ip6_opts; set/get IP6 options */
 #define IPV6_RECVOPTS		5  /* bool; receive all IP6 opts w/dgram */
 #define IPV6_RECVRETOPTS	6  /* bool; receive IP6 opts for response */
 #define IPV6_RECVDSTADDR	7  /* bool; receive IP6 dst addr w/dgram */
 #define IPV6_RETOPTS		8  /* ip6_opts; set/get IP6 options */
 #endif
 #define IPV6_SOCKOPT_RESERVED1	3  /* reserved for future use */
 #define IPV6_UNICAST_HOPS	4  /* int; IP6 hops */
 #define IPV6_MULTICAST_IF	9  /* u_int; set/get IP6 multicast i/f  */
 #define IPV6_MULTICAST_HOPS	10 /* int; set/get IP6 multicast hops */
 #define IPV6_MULTICAST_LOOP	11 /* u_int; set/get IP6 multicast loopback */
 #define IPV6_JOIN_GROUP		12 /* ipv6_mreq; join a group membership */
 #define IPV6_LEAVE_GROUP	13 /* ipv6_mreq; leave a group membership */
 #define IPV6_PORTRANGE		14 /* int; range to choose for unspec port */
 #define ICMP6_FILTER		18 /* icmp6_filter; icmp6 filter */
 /* RFC2292 options */
 #ifdef _KERNEL
 #define IPV6_2292PKTINFO	19 /* bool; send/recv if, src/dst addr */
 #define IPV6_2292HOPLIMIT	20 /* bool; hop limit */
 #define IPV6_2292NEXTHOP	21 /* bool; next hop addr */
 #define IPV6_2292HOPOPTS	22 /* bool; hop-by-hop option */
 #define IPV6_2292DSTOPTS	23 /* bool; destinaion option */
 #define IPV6_2292RTHDR		24 /* bool; routing header */
 #define IPV6_2292PKTOPTIONS	25 /* buf/cmsghdr; set/get IPv6 options */
 #endif
 
 #define IPV6_CHECKSUM		26 /* int; checksum offset for raw socket */
 #define IPV6_V6ONLY		27 /* bool; make AF_INET6 sockets v6 only */
 #ifndef _KERNEL
 #define IPV6_BINDV6ONLY		IPV6_V6ONLY
 #endif
 
 #if 1 /* IPSEC */
 #define IPV6_IPSEC_POLICY	28 /* struct; get/set security policy */
 #endif /* IPSEC */
 
 				   /* 29; unused; was IPV6_FAITH */
 #if 1 /* IPV6FIREWALL */
 #define IPV6_FW_ADD		30 /* add a firewall rule to chain */
 #define IPV6_FW_DEL		31 /* delete a firewall rule from chain */
 #define IPV6_FW_FLUSH		32 /* flush firewall rule chain */
 #define IPV6_FW_ZERO		33 /* clear single/all firewall counter(s) */
 #define IPV6_FW_GET		34 /* get entire firewall rule chain */
 #endif
 
 /* new socket options introduced in RFC3542 */
 #define IPV6_RTHDRDSTOPTS	35 /* ip6_dest; send dst option before rthdr */
 
 #define IPV6_RECVPKTINFO	36 /* bool; recv if, dst addr */
 #define IPV6_RECVHOPLIMIT	37 /* bool; recv hop limit */
 #define IPV6_RECVRTHDR		38 /* bool; recv routing header */
 #define IPV6_RECVHOPOPTS	39 /* bool; recv hop-by-hop option */
 #define IPV6_RECVDSTOPTS	40 /* bool; recv dst option after rthdr */
 #ifdef _KERNEL
 #define IPV6_RECVRTHDRDSTOPTS	41 /* bool; recv dst option before rthdr */
 #endif
 
 #define IPV6_USE_MIN_MTU	42 /* bool; send packets at the minimum MTU */
 #define IPV6_RECVPATHMTU	43 /* bool; notify an according MTU */
 
 #define IPV6_PATHMTU		44 /* mtuinfo; get the current path MTU (sopt),
 				      4 bytes int; MTU notification (cmsg) */
 #if 0 /*obsoleted during 2292bis -> 3542*/
 #define IPV6_REACHCONF		45 /* no data; ND reachability confirm
 				      (cmsg only/not in of RFC3542) */
 #endif
 
 /* more new socket options introduced in RFC3542 */
 #define IPV6_PKTINFO		46 /* in6_pktinfo; send if, src addr */
 #define IPV6_HOPLIMIT		47 /* int; send hop limit */
 #define IPV6_NEXTHOP		48 /* sockaddr; next hop addr */
 #define IPV6_HOPOPTS		49 /* ip6_hbh; send hop-by-hop option */
 #define IPV6_DSTOPTS		50 /* ip6_dest; send dst option befor rthdr */
 #define IPV6_RTHDR		51 /* ip6_rthdr; send routing header */
 #if 0
 #define IPV6_PKTOPTIONS		52 /* buf/cmsghdr; set/get IPv6 options */
 				   /* obsoleted by RFC3542 */
 #endif
 
 #define IPV6_RECVTCLASS		57 /* bool; recv traffic class values */
 
 #define IPV6_AUTOFLOWLABEL	59 /* bool; attach flowlabel automagically */
 
 #define IPV6_TCLASS		61 /* int; send traffic class value */
 #define IPV6_DONTFRAG		62 /* bool; disable IPv6 fragmentation */
 
 #define IPV6_PREFER_TEMPADDR	63 /* int; prefer temporary addresses as
 				    * the source address.
 				    */
 
 #define	IPV6_BINDANY		64 /* bool: allow bind to any address */
 
 #define	IPV6_BINDMULTI		65 /* bool; allow multibind to same addr/port */
 #define	IPV6_RSS_LISTEN_BUCKET	66 /* int; set RSS listen bucket */
 #define	IPV6_FLOWID		67 /* int; flowid of given socket */
 #define	IPV6_FLOWTYPE		68 /* int; flowtype of given socket */
 #define	IPV6_RSSBUCKETID	69 /* int; RSS bucket ID of given socket */
 
 /*
  * The following option is private; do not use it from user applications.
  * It is deliberately defined to the same value as IP_MSFILTER.
  */
 #define	IPV6_MSFILTER		74 /* struct __msfilterreq;
 				    * set/get multicast source filter list.
 				    */
 
 /* to define items, should talk with KAME guys first, for *BSD compatibility */
 
 #define IPV6_RTHDR_LOOSE     0 /* this hop need not be a neighbor. XXX old spec */
 #define IPV6_RTHDR_STRICT    1 /* this hop must be a neighbor. XXX old spec */
 #define IPV6_RTHDR_TYPE_0    0 /* IPv6 routing header type 0 */
 
 /*
  * Defaults and limits for options
  */
 #define IPV6_DEFAULT_MULTICAST_HOPS 1	/* normally limit m'casts to 1 hop */
 #define IPV6_DEFAULT_MULTICAST_LOOP 1	/* normally hear sends if a member */
 
 /*
  * The im6o_membership vector for each socket is now dynamically allocated at
  * run-time, bounded by USHRT_MAX, and is reallocated when needed, sized
  * according to a power-of-two increment.
  */
 #define	IPV6_MIN_MEMBERSHIPS	31
 #define	IPV6_MAX_MEMBERSHIPS	4095
 
 /*
  * Default resource limits for IPv6 multicast source filtering.
  * These may be modified by sysctl.
  */
 #define	IPV6_MAX_GROUP_SRC_FILTER	512	/* sources per group */
 #define	IPV6_MAX_SOCK_SRC_FILTER	128	/* sources per socket/group */
 
 /*
  * Argument structure for IPV6_JOIN_GROUP and IPV6_LEAVE_GROUP.
  */
 struct ipv6_mreq {
 	struct in6_addr	ipv6mr_multiaddr;
 	unsigned int	ipv6mr_interface;
 };
 
 /*
  * IPV6_PKTINFO: Packet information(RFC2292 sec 5)
  */
 struct in6_pktinfo {
 	struct in6_addr	ipi6_addr;	/* src/dst IPv6 address */
 	unsigned int	ipi6_ifindex;	/* send/recv interface index */
 };
 
 /*
  * Control structure for IPV6_RECVPATHMTU socket option.
  */
 struct ip6_mtuinfo {
 	struct sockaddr_in6 ip6m_addr;	/* or sockaddr_storage? */
 	uint32_t ip6m_mtu;
 };
 
 /*
  * Argument for IPV6_PORTRANGE:
  * - which range to search when port is unspecified at bind() or connect()
  */
 #define	IPV6_PORTRANGE_DEFAULT	0	/* default range */
 #define	IPV6_PORTRANGE_HIGH	1	/* "high" - request firewall bypass */
 #define	IPV6_PORTRANGE_LOW	2	/* "low" - vouchsafe security */
 
 #if __BSD_VISIBLE
 /*
  * Definitions for inet6 sysctl operations.
  *
  * Third level is protocol number.
  * Fourth level is desired variable within that protocol.
  */
 #define IPV6PROTO_MAXID	(IPPROTO_PIM + 1)	/* don't list to IPV6PROTO_MAX */
 
 /*
  * Names for IP sysctl objects
  */
 #define IPV6CTL_FORWARDING	1	/* act as router */
 #define IPV6CTL_SENDREDIRECTS	2	/* may send redirects when forwarding*/
 #define IPV6CTL_DEFHLIM		3	/* default Hop-Limit */
 #ifdef notyet
 #define IPV6CTL_DEFMTU		4	/* default MTU */
 #endif
 #define IPV6CTL_FORWSRCRT	5	/* forward source-routed dgrams */
 #define IPV6CTL_STATS		6	/* stats */
 #define IPV6CTL_MRTSTATS	7	/* multicast forwarding stats */
 #define IPV6CTL_MRTPROTO	8	/* multicast routing protocol */
 #define IPV6CTL_MAXFRAGPACKETS	9	/* max packets reassembly queue */
 #define IPV6CTL_SOURCECHECK	10	/* verify source route and intf */
 #define IPV6CTL_SOURCECHECK_LOGINT 11	/* minimume logging interval */
 #define IPV6CTL_ACCEPT_RTADV	12
 					/* 13; unused; was: IPV6CTL_KEEPFAITH */
 #define IPV6CTL_LOG_INTERVAL	14
 #define IPV6CTL_HDRNESTLIMIT	15
 #define IPV6CTL_DAD_COUNT	16
 #define IPV6CTL_AUTO_FLOWLABEL	17
 #define IPV6CTL_DEFMCASTHLIM	18
 #define IPV6CTL_GIF_HLIM	19	/* default HLIM for gif encap packet */
 #define IPV6CTL_KAME_VERSION	20
 #define IPV6CTL_USE_DEPRECATED	21	/* use deprecated addr (RFC2462 5.5.4) */
 #define IPV6CTL_RR_PRUNE	22	/* walk timer for router renumbering */
 #if 0	/* obsolete */
 #define IPV6CTL_MAPPED_ADDR	23
 #endif
 #define IPV6CTL_V6ONLY		24
 /*	IPV6CTL_RTEXPIRE	25	deprecated */
 /*	IPV6CTL_RTMINEXPIRE	26	deprecated */
 /*	IPV6CTL_RTMAXCACHE	27	deprecated */
 
 #define IPV6CTL_USETEMPADDR	32	/* use temporary addresses (RFC3041) */
 #define IPV6CTL_TEMPPLTIME	33	/* preferred lifetime for tmpaddrs */
 #define IPV6CTL_TEMPVLTIME	34	/* valid lifetime for tmpaddrs */
 #define IPV6CTL_AUTO_LINKLOCAL	35	/* automatic link-local addr assign */
 #define IPV6CTL_RIP6STATS	36	/* raw_ip6 stats */
 #define IPV6CTL_PREFER_TEMPADDR	37	/* prefer temporary addr as src */
 #define IPV6CTL_ADDRCTLPOLICY	38	/* get/set address selection policy */
 #define IPV6CTL_USE_DEFAULTZONE	39	/* use default scope zone */
 
 #define IPV6CTL_MAXFRAGS	41	/* max fragments */
 #if 0
 #define IPV6CTL_IFQ		42	/* ip6intrq node */
 #define IPV6CTL_ISATAPRTR	43	/* isatap router */
 #endif
 #define IPV6CTL_MCAST_PMTU	44	/* enable pMTU discovery for multicast? */
 
 /* New entries should be added here from current IPV6CTL_MAXID value. */
 /* to define items, should talk with KAME guys first, for *BSD compatibility */
 #define IPV6CTL_STEALTH		45
 
 #define	ICMPV6CTL_ND6_ONLINKNSRFC4861	47
 #define	IPV6CTL_NO_RADR		48	/* No defroute from RA */
 #define	IPV6CTL_NORBIT_RAIF	49	/* Disable R-bit in NA on RA
 					 * receiving IF. */
 #define	IPV6CTL_RFC6204W3	50	/* Accept defroute even when forwarding
 					   enabled */
 #define	IPV6CTL_MAXID		51
 #endif /* __BSD_VISIBLE */
 
 /*
  * Since both netinet/ and netinet6/ call into netipsec/ and netpfil/,
  * the protocol specific mbuf flags are shared between them.
  */
 #define	M_FASTFWD_OURS		M_PROTO1	/* changed dst to local */
 #define	M_IP6_NEXTHOP		M_PROTO2	/* explicit ip nexthop */
 #define	M_IP_NEXTHOP		M_PROTO2	/* explicit ip nexthop */
 #define	M_SKIP_FIREWALL		M_PROTO3	/* skip firewall processing */
 #define	M_AUTHIPHDR		M_PROTO4
 #define	M_DECRYPTED		M_PROTO5
 #define	M_LOOP			M_PROTO6
 #define	M_AUTHIPDGM		M_PROTO7
 #define	M_RTALERT_MLD		M_PROTO8
 
 #ifdef _KERNEL
 struct cmsghdr;
 struct ip6_hdr;
 
 int	in6_cksum_pseudo(struct ip6_hdr *, uint32_t, uint8_t, uint16_t);
 int	in6_cksum(struct mbuf *, u_int8_t, u_int32_t, u_int32_t);
 int	in6_cksum_partial(struct mbuf *, u_int8_t, u_int32_t, u_int32_t,
 			  u_int32_t);
 int	in6_localaddr(struct in6_addr *);
 int	in6_localip(struct in6_addr *);
+int	in6_ifhasaddr(struct ifnet *, struct in6_addr *);
 int	in6_addrscope(const struct in6_addr *);
 char	*ip6_sprintf(char *, const struct in6_addr *);
 struct	in6_ifaddr *in6_ifawithifp(struct ifnet *, struct in6_addr *);
 extern void in6_if_up(struct ifnet *);
 struct sockaddr;
 extern	u_char	ip6_protox[];
 
 void	in6_sin6_2_sin(struct sockaddr_in *sin,
 			    struct sockaddr_in6 *sin6);
 void	in6_sin_2_v4mapsin6(struct sockaddr_in *sin,
 				 struct sockaddr_in6 *sin6);
 void	in6_sin6_2_sin_in_sock(struct sockaddr *nam);
 void	in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam);
 extern void addrsel_policy_init(void);
 
 #define	satosin6(sa)	((struct sockaddr_in6 *)(sa))
 #define	sin6tosa(sin6)	((struct sockaddr *)(sin6))
 #define	ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
 
 #endif /* _KERNEL */
 
 #ifndef _SIZE_T_DECLARED
 typedef	__size_t	size_t;
 #define	_SIZE_T_DECLARED
 #endif
 
 #ifndef _SOCKLEN_T_DECLARED
 typedef	__socklen_t	socklen_t;
 #define	_SOCKLEN_T_DECLARED
 #endif
 
 #if __BSD_VISIBLE
 
 __BEGIN_DECLS
 struct cmsghdr;
 
 extern int inet6_option_space(int);
 extern int inet6_option_init(void *, struct cmsghdr **, int);
 extern int inet6_option_append(struct cmsghdr *, const uint8_t *,
 	int, int);
 extern uint8_t *inet6_option_alloc(struct cmsghdr *, int, int, int);
 extern int inet6_option_next(const struct cmsghdr *, uint8_t **);
 extern int inet6_option_find(const struct cmsghdr *, uint8_t **, int);
 
 extern size_t inet6_rthdr_space(int, int);
 extern struct cmsghdr *inet6_rthdr_init(void *, int);
 extern int inet6_rthdr_add(struct cmsghdr *, const struct in6_addr *,
 	unsigned int);
 extern int inet6_rthdr_lasthop(struct cmsghdr *, unsigned int);
 #if 0 /* not implemented yet */
 extern int inet6_rthdr_reverse(const struct cmsghdr *, struct cmsghdr *);
 #endif
 extern int inet6_rthdr_segments(const struct cmsghdr *);
 extern struct in6_addr *inet6_rthdr_getaddr(struct cmsghdr *, int);
 extern int inet6_rthdr_getflags(const struct cmsghdr *, int);
 
 extern int inet6_opt_init(void *, socklen_t);
 extern int inet6_opt_append(void *, socklen_t, int, uint8_t, socklen_t,
 	uint8_t, void **);
 extern int inet6_opt_finish(void *, socklen_t, int);
 extern int inet6_opt_set_val(void *, int, void *, socklen_t);
 
 extern int inet6_opt_next(void *, socklen_t, int, uint8_t *, socklen_t *,
 	void **);
 extern int inet6_opt_find(void *, socklen_t, int, uint8_t, socklen_t *,
 	void **);
 extern int inet6_opt_get_val(void *, int, void *, socklen_t);
 extern socklen_t inet6_rth_space(int, int);
 extern void *inet6_rth_init(void *, socklen_t, int, int);
 extern int inet6_rth_add(void *, const struct in6_addr *);
 extern int inet6_rth_reverse(const void *, void *);
 extern int inet6_rth_segments(const void *);
 extern struct in6_addr *inet6_rth_getaddr(const void *, int);
 __END_DECLS
 
 #endif /* __BSD_VISIBLE */
 
 #endif /* !_NETINET6_IN6_H_ */
Index: user/ngie/more-tests/sys/pc98/pc98/genassym.c
===================================================================
--- user/ngie/more-tests/sys/pc98/pc98/genassym.c	(nonexistent)
+++ user/ngie/more-tests/sys/pc98/pc98/genassym.c	(revision 281676)
@@ -0,0 +1,3 @@
+/* $FreeBSD$ */
+
+#include "../../i386/i386/genassym.c"

Property changes on: user/ngie/more-tests/sys/pc98/pc98/genassym.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: user/ngie/more-tests/sys
===================================================================
--- user/ngie/more-tests/sys	(revision 281675)
+++ user/ngie/more-tests/sys	(revision 281676)

Property changes on: user/ngie/more-tests/sys
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys:r281621-281675
Index: user/ngie/more-tests/usr.bin/gzip/gzip.c
===================================================================
--- user/ngie/more-tests/usr.bin/gzip/gzip.c	(revision 281675)
+++ user/ngie/more-tests/usr.bin/gzip/gzip.c	(revision 281676)
@@ -1,2173 +1,2173 @@
-/*	$NetBSD: gzip.c,v 1.107 2015/01/13 02:37:20 mrg Exp $	*/
+/*	$NetBSD: gzip.c,v 1.108 2015/04/15 02:29:12 christos Exp $	*/
 
 /*-
  * Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 #ifndef lint
 __COPYRIGHT("@(#) Copyright (c) 1997, 1998, 2003, 2004, 2006\
  Matthew R. Green.  All rights reserved.");
 __FBSDID("$FreeBSD$");
 #endif /* not lint */
 
 /*
  * gzip.c -- GPL free gzip using zlib.
  *
  * RFC 1950 covers the zlib format
  * RFC 1951 covers the deflate format
  * RFC 1952 covers the gzip format
  *
  * TODO:
  *	- use mmap where possible
  *	- make bzip2/compress -v/-t/-l support work as well as possible
  */
 
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 
 #include <inttypes.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <zlib.h>
 #include <fts.h>
 #include <libgen.h>
 #include <stdarg.h>
 #include <getopt.h>
 #include <time.h>
 
 /* what type of file are we dealing with */
 enum filetype {
 	FT_GZIP,
 #ifndef NO_BZIP2_SUPPORT
 	FT_BZIP2,
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	FT_Z,
 #endif
 #ifndef NO_PACK_SUPPORT
 	FT_PACK,
 #endif
 #ifndef NO_XZ_SUPPORT
 	FT_XZ,
 #endif
 	FT_LAST,
 	FT_UNKNOWN
 };
 
 #ifndef NO_BZIP2_SUPPORT
 #include <bzlib.h>
 
 #define BZ2_SUFFIX	".bz2"
 #define BZIP2_MAGIC	"\102\132\150"
 #endif
 
 #ifndef NO_COMPRESS_SUPPORT
 #define Z_SUFFIX	".Z"
 #define Z_MAGIC		"\037\235"
 #endif
 
 #ifndef NO_PACK_SUPPORT
 #define PACK_MAGIC	"\037\036"
 #endif
 
 #ifndef NO_XZ_SUPPORT
 #include <lzma.h>
 #define XZ_SUFFIX	".xz"
 #define XZ_MAGIC	"\3757zXZ"
 #endif
 
 #define GZ_SUFFIX	".gz"
 
 #define BUFLEN		(64 * 1024)
 
 #define GZIP_MAGIC0	0x1F
 #define GZIP_MAGIC1	0x8B
 #define GZIP_OMAGIC1	0x9E
 
 #define GZIP_TIMESTAMP	(off_t)4
 #define GZIP_ORIGNAME	(off_t)10
 
 #define HEAD_CRC	0x02
 #define EXTRA_FIELD	0x04
 #define ORIG_NAME	0x08
 #define COMMENT		0x10
 
 #define OS_CODE		3	/* Unix */
 
 typedef struct {
     const char	*zipped;
     int		ziplen;
     const char	*normal;	/* for unzip - must not be longer than zipped */
 } suffixes_t;
 static suffixes_t suffixes[] = {
 #define	SUFFIX(Z, N) {Z, sizeof Z - 1, N}
 	SUFFIX(GZ_SUFFIX,	""),	/* Overwritten by -S .xxx */
 #ifndef SMALL
 	SUFFIX(GZ_SUFFIX,	""),
 	SUFFIX(".z",		""),
 	SUFFIX("-gz",		""),
 	SUFFIX("-z",		""),
 	SUFFIX("_z",		""),
 	SUFFIX(".taz",		".tar"),
 	SUFFIX(".tgz",		".tar"),
 #ifndef NO_BZIP2_SUPPORT
 	SUFFIX(BZ2_SUFFIX,	""),
 	SUFFIX(".tbz",		".tar"),
 	SUFFIX(".tbz2",		".tar"),
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	SUFFIX(Z_SUFFIX,	""),
 #endif
 #ifndef NO_XZ_SUPPORT
 	SUFFIX(XZ_SUFFIX,	""),
 #endif
 	SUFFIX(GZ_SUFFIX,	""),	/* Overwritten by -S "" */
 #endif /* SMALL */
 #undef SUFFIX
 };
 #define NUM_SUFFIXES (sizeof suffixes / sizeof suffixes[0])
 #define SUFFIX_MAXLEN	30
 
 static	const char	gzip_version[] = "FreeBSD gzip 20150413";
 
 #ifndef SMALL
 static	const char	gzip_copyright[] = \
 "   Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green\n"
 "   All rights reserved.\n"
 "\n"
 "   Redistribution and use in source and binary forms, with or without\n"
 "   modification, are permitted provided that the following conditions\n"
 "   are met:\n"
 "   1. Redistributions of source code must retain the above copyright\n"
 "      notice, this list of conditions and the following disclaimer.\n"
 "   2. Redistributions in binary form must reproduce the above copyright\n"
 "      notice, this list of conditions and the following disclaimer in the\n"
 "      documentation and/or other materials provided with the distribution.\n"
 "\n"
 "   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n"
 "   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n"
 "   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n"
 "   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n"
 "   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n"
 "   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n"
 "   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\n"
 "   AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n"
 "   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n"
 "   OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n"
 "   SUCH DAMAGE.";
 #endif
 
 static	int	cflag;			/* stdout mode */
 static	int	dflag;			/* decompress mode */
 static	int	lflag;			/* list mode */
 static	int	numflag = 6;		/* gzip -1..-9 value */
 
 #ifndef SMALL
 static	int	fflag;			/* force mode */
 static	int	kflag;			/* don't delete input files */
 static	int	nflag;			/* don't save name/timestamp */
 static	int	Nflag;			/* don't restore name/timestamp */
 static	int	qflag;			/* quiet mode */
 static	int	rflag;			/* recursive mode */
 static	int	tflag;			/* test */
 static	int	vflag;			/* verbose mode */
 static	const char *remove_file = NULL;	/* file to be removed upon SIGINT */
 #else
 #define		qflag	0
 #define		tflag	0
 #endif
 
 static	int	exit_value = 0;		/* exit value */
 
 static	char	*infile;		/* name of file coming in */
 
 static	void	maybe_err(const char *fmt, ...) __printflike(1, 2) __dead2;
 #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) ||	\
     !defined(NO_XZ_SUPPORT)
 static	void	maybe_errx(const char *fmt, ...) __printflike(1, 2) __dead2;
 #endif
 static	void	maybe_warn(const char *fmt, ...) __printflike(1, 2);
 static	void	maybe_warnx(const char *fmt, ...) __printflike(1, 2);
 static	enum filetype file_gettype(u_char *);
 #ifdef SMALL
 #define gz_compress(if, of, sz, fn, tm) gz_compress(if, of, sz)
 #endif
 static	off_t	gz_compress(int, int, off_t *, const char *, uint32_t);
 static	off_t	gz_uncompress(int, int, char *, size_t, off_t *, const char *);
 static	off_t	file_compress(char *, char *, size_t);
 static	off_t	file_uncompress(char *, char *, size_t);
 static	void	handle_pathname(char *);
 static	void	handle_file(char *, struct stat *);
 static	void	handle_stdin(void);
 static	void	handle_stdout(void);
 static	void	print_ratio(off_t, off_t, FILE *);
 static	void	print_list(int fd, off_t, const char *, time_t);
 static	void	usage(void) __dead2;
 static	void	display_version(void) __dead2;
 #ifndef SMALL
 static	void	display_license(void);
 static	void	sigint_handler(int);
 #endif
 static	const suffixes_t *check_suffix(char *, int);
 static	ssize_t	read_retry(int, void *, size_t);
 
 #ifdef SMALL
 #define unlink_input(f, sb) unlink(f)
 #else
 static	off_t	cat_fd(unsigned char *, size_t, off_t *, int fd);
 static	void	prepend_gzip(char *, int *, char ***);
 static	void	handle_dir(char *);
 static	void	print_verbage(const char *, const char *, off_t, off_t);
 static	void	print_test(const char *, int);
 static	void	copymodes(int fd, const struct stat *, const char *file);
 static	int	check_outfile(const char *outfile);
 #endif
 
 #ifndef NO_BZIP2_SUPPORT
 static	off_t	unbzip2(int, int, char *, size_t, off_t *);
 #endif
 
 #ifndef NO_COMPRESS_SUPPORT
 static	FILE 	*zdopen(int);
 static	off_t	zuncompress(FILE *, FILE *, char *, size_t, off_t *);
 #endif
 
 #ifndef NO_PACK_SUPPORT
 static	off_t	unpack(int, int, char *, size_t, off_t *);
 #endif
 
 #ifndef NO_XZ_SUPPORT
 static	off_t	unxz(int, int, char *, size_t, off_t *);
 #endif
 
 #ifdef SMALL
 #define getopt_long(a,b,c,d,e) getopt(a,b,c)
 #else
 static const struct option longopts[] = {
 	{ "stdout",		no_argument,		0,	'c' },
 	{ "to-stdout",		no_argument,		0,	'c' },
 	{ "decompress",		no_argument,		0,	'd' },
 	{ "uncompress",		no_argument,		0,	'd' },
 	{ "force",		no_argument,		0,	'f' },
 	{ "help",		no_argument,		0,	'h' },
 	{ "keep",		no_argument,		0,	'k' },
 	{ "list",		no_argument,		0,	'l' },
 	{ "no-name",		no_argument,		0,	'n' },
 	{ "name",		no_argument,		0,	'N' },
 	{ "quiet",		no_argument,		0,	'q' },
 	{ "recursive",		no_argument,		0,	'r' },
 	{ "suffix",		required_argument,	0,	'S' },
 	{ "test",		no_argument,		0,	't' },
 	{ "verbose",		no_argument,		0,	'v' },
 	{ "version",		no_argument,		0,	'V' },
 	{ "fast",		no_argument,		0,	'1' },
 	{ "best",		no_argument,		0,	'9' },
 	{ "ascii",		no_argument,		0,	'a' },
 	{ "license",		no_argument,		0,	'L' },
 	{ NULL,			no_argument,		0,	0 },
 };
 #endif
 
 int
 main(int argc, char **argv)
 {
 	const char *progname = getprogname();
 #ifndef SMALL
 	char *gzip;
 	int len;
 #endif
 	int ch;
 
 #ifndef SMALL
 	if ((gzip = getenv("GZIP")) != NULL)
 		prepend_gzip(gzip, &argc, &argv);
 	signal(SIGINT, sigint_handler);
 #endif
 
 	/*
 	 * XXX
 	 * handle being called `gunzip', `zcat' and `gzcat'
 	 */
 	if (strcmp(progname, "gunzip") == 0)
 		dflag = 1;
 	else if (strcmp(progname, "zcat") == 0 ||
 		 strcmp(progname, "gzcat") == 0)
 		dflag = cflag = 1;
 
 #ifdef SMALL
 #define OPT_LIST "123456789cdhlV"
 #else
 #define OPT_LIST "123456789acdfhklLNnqrS:tVv"
 #endif
 
 	while ((ch = getopt_long(argc, argv, OPT_LIST, longopts, NULL)) != -1) {
 		switch (ch) {
 		case '1': case '2': case '3':
 		case '4': case '5': case '6':
 		case '7': case '8': case '9':
 			numflag = ch - '0';
 			break;
 		case 'c':
 			cflag = 1;
 			break;
 		case 'd':
 			dflag = 1;
 			break;
 		case 'l':
 			lflag = 1;
 			dflag = 1;
 			break;
 		case 'V':
 			display_version();
 			/* NOTREACHED */
 #ifndef SMALL
 		case 'a':
 			fprintf(stderr, "%s: option --ascii ignored on this system\n", progname);
 			break;
 		case 'f':
 			fflag = 1;
 			break;
 		case 'k':
 			kflag = 1;
 			break;
 		case 'L':
 			display_license();
 			/* NOT REACHED */
 		case 'N':
 			nflag = 0;
 			Nflag = 1;
 			break;
 		case 'n':
 			nflag = 1;
 			Nflag = 0;
 			break;
 		case 'q':
 			qflag = 1;
 			break;
 		case 'r':
 			rflag = 1;
 			break;
 		case 'S':
 			len = strlen(optarg);
 			if (len != 0) {
 				if (len > SUFFIX_MAXLEN)
 					errx(1, "incorrect suffix: '%s': too long", optarg);
 				suffixes[0].zipped = optarg;
 				suffixes[0].ziplen = len;
 			} else {
 				suffixes[NUM_SUFFIXES - 1].zipped = "";
 				suffixes[NUM_SUFFIXES - 1].ziplen = 0;
 			}
 			break;
 		case 't':
 			cflag = 1;
 			tflag = 1;
 			dflag = 1;
 			break;
 		case 'v':
 			vflag = 1;
 			break;
 #endif
 		default:
 			usage();
 			/* NOTREACHED */
 		}
 	}
 	argv += optind;
 	argc -= optind;
 
 	if (argc == 0) {
 		if (dflag)	/* stdin mode */
 			handle_stdin();
 		else		/* stdout mode */
 			handle_stdout();
 	} else {
 		do {
 			handle_pathname(argv[0]);
 		} while (*++argv);
 	}
 #ifndef SMALL
 	if (qflag == 0 && lflag && argc > 1)
 		print_list(-1, 0, "(totals)", 0);
 #endif
 	exit(exit_value);
 }
 
 /* maybe print a warning */
 void
 maybe_warn(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarn(fmt, ap);
 		va_end(ap);
 	}
 	if (exit_value == 0)
 		exit_value = 1;
 }
 
 /* ... without an errno. */
 void
 maybe_warnx(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarnx(fmt, ap);
 		va_end(ap);
 	}
 	if (exit_value == 0)
 		exit_value = 1;
 }
 
 /* maybe print an error */
 void
 maybe_err(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarn(fmt, ap);
 		va_end(ap);
 	}
 	exit(2);
 }
 
 #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) ||	\
     !defined(NO_XZ_SUPPORT)
 /* ... without an errno. */
 void
 maybe_errx(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarnx(fmt, ap);
 		va_end(ap);
 	}
 	exit(2);
 }
 #endif
 
 #ifndef SMALL
 /* split up $GZIP and prepend it to the argument list */
 static void
 prepend_gzip(char *gzip, int *argc, char ***argv)
 {
 	char *s, **nargv, **ac;
 	int nenvarg = 0, i;
 
 	/* scan how many arguments there are */
 	for (s = gzip;;) {
 		while (*s == ' ' || *s == '\t')
 			s++;
 		if (*s == 0)
 			goto count_done;
 		nenvarg++;
 		while (*s != ' ' && *s != '\t')
 			if (*s++ == 0)
 				goto count_done;
 	}
 count_done:
 	/* punt early */
 	if (nenvarg == 0)
 		return;
 
 	*argc += nenvarg;
 	ac = *argv;
 
 	nargv = (char **)malloc((*argc + 1) * sizeof(char *));
 	if (nargv == NULL)
 		maybe_err("malloc");
 
 	/* stash this away */
 	*argv = nargv;
 
 	/* copy the program name first */
 	i = 0;
 	nargv[i++] = *(ac++);
 
 	/* take a copy of $GZIP and add it to the array */
 	s = strdup(gzip);
 	if (s == NULL)
 		maybe_err("strdup");
 	for (;;) {
 		/* Skip whitespaces. */
 		while (*s == ' ' || *s == '\t')
 			s++;
 		if (*s == 0)
 			goto copy_done;
 		nargv[i++] = s;
 		/* Find the end of this argument. */
 		while (*s != ' ' && *s != '\t')
 			if (*s++ == 0)
 				/* Argument followed by NUL. */
 				goto copy_done;
 		/* Terminate by overwriting ' ' or '\t' with NUL. */
 		*s++ = 0;
 	}
 copy_done:
 
 	/* copy the original arguments and a NULL */
 	while (*ac)
 		nargv[i++] = *(ac++);
 	nargv[i] = NULL;
 }
 #endif
 
 /* compress input to output. Return bytes read, -1 on error */
 static off_t
 gz_compress(int in, int out, off_t *gsizep, const char *origname, uint32_t mtime)
 {
 	z_stream z;
 	char *outbufp, *inbufp;
 	off_t in_tot = 0, out_tot = 0;
 	ssize_t in_size;
 	int i, error;
 	uLong crc;
 #ifdef SMALL
 	static char header[] = { GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED, 0,
 				 0, 0, 0, 0,
 				 0, OS_CODE };
 #endif
 
 	outbufp = malloc(BUFLEN);
 	inbufp = malloc(BUFLEN);
 	if (outbufp == NULL || inbufp == NULL) {
 		maybe_err("malloc failed");
 		goto out;
 	}
 
 	memset(&z, 0, sizeof z);
 	z.zalloc = Z_NULL;
 	z.zfree = Z_NULL;
 	z.opaque = 0;
 
 #ifdef SMALL
 	memcpy(outbufp, header, sizeof header);
 	i = sizeof header;
 #else
 	if (nflag != 0) {
 		mtime = 0;
 		origname = "";
 	}
 
 	i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c%c%c%s", 
 		     GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED,
 		     *origname ? ORIG_NAME : 0,
 		     mtime & 0xff,
 		     (mtime >> 8) & 0xff,
 		     (mtime >> 16) & 0xff,
 		     (mtime >> 24) & 0xff,
 		     numflag == 1 ? 4 : numflag == 9 ? 2 : 0,
 		     OS_CODE, origname);
 	if (i >= BUFLEN)     
 		/* this need PATH_MAX > BUFLEN ... */
 		maybe_err("snprintf");
 	if (*origname)
 		i++;
 #endif
 
 	z.next_out = (unsigned char *)outbufp + i;
 	z.avail_out = BUFLEN - i;
 
 	error = deflateInit2(&z, numflag, Z_DEFLATED,
 			     (-MAX_WBITS), 8, Z_DEFAULT_STRATEGY);
 	if (error != Z_OK) {
 		maybe_warnx("deflateInit2 failed");
 		in_tot = -1;
 		goto out;
 	}
 
 	crc = crc32(0L, Z_NULL, 0);
 	for (;;) {
 		if (z.avail_out == 0) {
 			if (write(out, outbufp, BUFLEN) != BUFLEN) {
 				maybe_warn("write");
 				out_tot = -1;
 				goto out;
 			}
 
 			out_tot += BUFLEN;
 			z.next_out = (unsigned char *)outbufp;
 			z.avail_out = BUFLEN;
 		}
 
 		if (z.avail_in == 0) {
 			in_size = read(in, inbufp, BUFLEN);
 			if (in_size < 0) {
 				maybe_warn("read");
 				in_tot = -1;
 				goto out;
 			}
 			if (in_size == 0)
 				break;
 
 			crc = crc32(crc, (const Bytef *)inbufp, (unsigned)in_size);
 			in_tot += in_size;
 			z.next_in = (unsigned char *)inbufp;
 			z.avail_in = in_size;
 		}
 
 		error = deflate(&z, Z_NO_FLUSH);
 		if (error != Z_OK && error != Z_STREAM_END) {
 			maybe_warnx("deflate failed");
 			in_tot = -1;
 			goto out;
 		}
 	}
 
 	/* clean up */
 	for (;;) {
 		size_t len;
 		ssize_t w;
 
 		error = deflate(&z, Z_FINISH);
 		if (error != Z_OK && error != Z_STREAM_END) {
 			maybe_warnx("deflate failed");
 			in_tot = -1;
 			goto out;
 		}
 
 		len = (char *)z.next_out - outbufp;
 
 		w = write(out, outbufp, len);
 		if (w == -1 || (size_t)w != len) {
 			maybe_warn("write");
 			out_tot = -1;
 			goto out;
 		}
 		out_tot += len;
 		z.next_out = (unsigned char *)outbufp;
 		z.avail_out = BUFLEN;
 
 		if (error == Z_STREAM_END)
 			break;
 	}
 
 	if (deflateEnd(&z) != Z_OK) {
 		maybe_warnx("deflateEnd failed");
 		in_tot = -1;
 		goto out;
 	}
 
 	i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c", 
 		 (int)crc & 0xff,
 		 (int)(crc >> 8) & 0xff,
 		 (int)(crc >> 16) & 0xff,
 		 (int)(crc >> 24) & 0xff,
 		 (int)in_tot & 0xff,
 		 (int)(in_tot >> 8) & 0xff,
 		 (int)(in_tot >> 16) & 0xff,
 		 (int)(in_tot >> 24) & 0xff);
 	if (i != 8)
 		maybe_err("snprintf");
 	if (write(out, outbufp, i) != i) {
 		maybe_warn("write");
 		in_tot = -1;
 	} else
 		out_tot += i;
 
 out:
 	if (inbufp != NULL)
 		free(inbufp);
 	if (outbufp != NULL)
 		free(outbufp);
 	if (gsizep)
 		*gsizep = out_tot;
 	return in_tot;
 }
 
 /*
  * uncompress input to output then close the input.  return the
  * uncompressed size written, and put the compressed sized read
  * into `*gsizep'.
  */
 static off_t
 gz_uncompress(int in, int out, char *pre, size_t prelen, off_t *gsizep,
 	      const char *filename)
 {
 	z_stream z;
 	char *outbufp, *inbufp;
 	off_t out_tot = -1, in_tot = 0;
 	uint32_t out_sub_tot = 0;
 	enum {
 		GZSTATE_MAGIC0,
 		GZSTATE_MAGIC1,
 		GZSTATE_METHOD,
 		GZSTATE_FLAGS,
 		GZSTATE_SKIPPING,
 		GZSTATE_EXTRA,
 		GZSTATE_EXTRA2,
 		GZSTATE_EXTRA3,
 		GZSTATE_ORIGNAME,
 		GZSTATE_COMMENT,
 		GZSTATE_HEAD_CRC1,
 		GZSTATE_HEAD_CRC2,
 		GZSTATE_INIT,
 		GZSTATE_READ,
 		GZSTATE_CRC,
 		GZSTATE_LEN,
 	} state = GZSTATE_MAGIC0;
 	int flags = 0, skip_count = 0;
 	int error = Z_STREAM_ERROR, done_reading = 0;
 	uLong crc = 0;
 	ssize_t wr;
 	int needmore = 0;
 
 #define ADVANCE()       { z.next_in++; z.avail_in--; }
 
 	if ((outbufp = malloc(BUFLEN)) == NULL) {
 		maybe_err("malloc failed");
 		goto out2;
 	}
 	if ((inbufp = malloc(BUFLEN)) == NULL) {
 		maybe_err("malloc failed");
 		goto out1;
 	}
 
 	memset(&z, 0, sizeof z);
 	z.avail_in = prelen;
 	z.next_in = (unsigned char *)pre;
 	z.avail_out = BUFLEN;
 	z.next_out = (unsigned char *)outbufp;
 	z.zalloc = NULL;
 	z.zfree = NULL;
 	z.opaque = 0;
 
 	in_tot = prelen;
 	out_tot = 0;
 
 	for (;;) {
 		if ((z.avail_in == 0 || needmore) && done_reading == 0) {
 			ssize_t in_size;
 
 			if (z.avail_in > 0) {
 				memmove(inbufp, z.next_in, z.avail_in);
 			}
 			z.next_in = (unsigned char *)inbufp;
 			in_size = read(in, z.next_in + z.avail_in,
 			    BUFLEN - z.avail_in);
 
 			if (in_size == -1) {
 				maybe_warn("failed to read stdin");
 				goto stop_and_fail;
 			} else if (in_size == 0) {
 				done_reading = 1;
 			}
 
 			z.avail_in += in_size;
 			needmore = 0;
 
 			in_tot += in_size;
 		}
 		if (z.avail_in == 0) {
 			if (done_reading && state != GZSTATE_MAGIC0) {
 				maybe_warnx("%s: unexpected end of file",
 					    filename);
 				goto stop_and_fail;
 			}
 			goto stop;
 		}
 		switch (state) {
 		case GZSTATE_MAGIC0:
 			if (*z.next_in != GZIP_MAGIC0) {
 				if (in_tot > 0) {
 					maybe_warnx("%s: trailing garbage "
 						    "ignored", filename);
 					goto stop;
 				}
 				maybe_warnx("input not gziped (MAGIC0)");
 				goto stop_and_fail;
 			}
 			ADVANCE();
 			state++;
 			out_sub_tot = 0;
 			crc = crc32(0L, Z_NULL, 0);
 			break;
 
 		case GZSTATE_MAGIC1:
 			if (*z.next_in != GZIP_MAGIC1 &&
 			    *z.next_in != GZIP_OMAGIC1) {
 				maybe_warnx("input not gziped (MAGIC1)");
 				goto stop_and_fail;
 			}
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_METHOD:
 			if (*z.next_in != Z_DEFLATED) {
 				maybe_warnx("unknown compression method");
 				goto stop_and_fail;
 			}
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_FLAGS:
 			flags = *z.next_in;
 			ADVANCE();
 			skip_count = 6;
 			state++;
 			break;
 
 		case GZSTATE_SKIPPING:
 			if (skip_count > 0) {
 				skip_count--;
 				ADVANCE();
 			} else
 				state++;
 			break;
 
 		case GZSTATE_EXTRA:
 			if ((flags & EXTRA_FIELD) == 0) {
 				state = GZSTATE_ORIGNAME;
 				break;
 			}
 			skip_count = *z.next_in;
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_EXTRA2:
 			skip_count |= ((*z.next_in) << 8);
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_EXTRA3:
 			if (skip_count > 0) {
 				skip_count--;
 				ADVANCE();
 			} else
 				state++;
 			break;
 
 		case GZSTATE_ORIGNAME:
 			if ((flags & ORIG_NAME) == 0) {
 				state++;
 				break;
 			}
 			if (*z.next_in == 0)
 				state++;
 			ADVANCE();
 			break;
 
 		case GZSTATE_COMMENT:
 			if ((flags & COMMENT) == 0) {
 				state++;
 				break;
 			}
 			if (*z.next_in == 0)
 				state++;
 			ADVANCE();
 			break;
 
 		case GZSTATE_HEAD_CRC1:
 			if (flags & HEAD_CRC)
 				skip_count = 2;
 			else
 				skip_count = 0;
 			state++;
 			break;
 
 		case GZSTATE_HEAD_CRC2:
 			if (skip_count > 0) {
 				skip_count--;
 				ADVANCE();
 			} else
 				state++;
 			break;
 
 		case GZSTATE_INIT:
 			if (inflateInit2(&z, -MAX_WBITS) != Z_OK) {
 				maybe_warnx("failed to inflateInit");
 				goto stop_and_fail;
 			}
 			state++;
 			break;
 
 		case GZSTATE_READ:
 			error = inflate(&z, Z_FINISH);
 			switch (error) {
 			/* Z_BUF_ERROR goes with Z_FINISH... */
 			case Z_BUF_ERROR:
 				if (z.avail_out > 0 && !done_reading)
 					continue;
 
 			case Z_STREAM_END:
 			case Z_OK:
 				break;
 
 			case Z_NEED_DICT:
 				maybe_warnx("Z_NEED_DICT error");
 				goto stop_and_fail;
 			case Z_DATA_ERROR:
 				maybe_warnx("data stream error");
 				goto stop_and_fail;
 			case Z_STREAM_ERROR:
 				maybe_warnx("internal stream error");
 				goto stop_and_fail;
 			case Z_MEM_ERROR:
 				maybe_warnx("memory allocation error");
 				goto stop_and_fail;
 
 			default:
 				maybe_warn("unknown error from inflate(): %d",
 				    error);
 			}
 			wr = BUFLEN - z.avail_out;
 
 			if (wr != 0) {
 				crc = crc32(crc, (const Bytef *)outbufp, (unsigned)wr);
 				if (
 #ifndef SMALL
 				    /* don't write anything with -t */
 				    tflag == 0 &&
 #endif
 				    write(out, outbufp, wr) != wr) {
 					maybe_warn("error writing to output");
 					goto stop_and_fail;
 				}
 
 				out_tot += wr;
 				out_sub_tot += wr;
 			}
 
 			if (error == Z_STREAM_END) {
 				inflateEnd(&z);
 				state++;
 			}
 
 			z.next_out = (unsigned char *)outbufp;
 			z.avail_out = BUFLEN;
 
 			break;
 		case GZSTATE_CRC:
 			{
 				uLong origcrc;
 
 				if (z.avail_in < 4) {
 					if (!done_reading) {
 						needmore = 1;
 						continue;
 					}
 					maybe_warnx("truncated input");
 					goto stop_and_fail;
 				}
 				origcrc = ((unsigned)z.next_in[0] & 0xff) |
 					((unsigned)z.next_in[1] & 0xff) << 8 |
 					((unsigned)z.next_in[2] & 0xff) << 16 |
 					((unsigned)z.next_in[3] & 0xff) << 24;
 				if (origcrc != crc) {
 					maybe_warnx("invalid compressed"
 					     " data--crc error");
 					goto stop_and_fail;
 				}
 			}
 
 			z.avail_in -= 4;
 			z.next_in += 4;
 
 			if (!z.avail_in && done_reading) {
 				goto stop;
 			}
 			state++;
 			break;
 		case GZSTATE_LEN:
 			{
 				uLong origlen;
 
 				if (z.avail_in < 4) {
 					if (!done_reading) {
 						needmore = 1;
 						continue;
 					}
 					maybe_warnx("truncated input");
 					goto stop_and_fail;
 				}
 				origlen = ((unsigned)z.next_in[0] & 0xff) |
 					((unsigned)z.next_in[1] & 0xff) << 8 |
 					((unsigned)z.next_in[2] & 0xff) << 16 |
 					((unsigned)z.next_in[3] & 0xff) << 24;
 
 				if (origlen != out_sub_tot) {
 					maybe_warnx("invalid compressed"
 					     " data--length error");
 					goto stop_and_fail;
 				}
 			}
 				
 			z.avail_in -= 4;
 			z.next_in += 4;
 
 			if (error < 0) {
 				maybe_warnx("decompression error");
 				goto stop_and_fail;
 			}
 			state = GZSTATE_MAGIC0;
 			break;
 		}
 		continue;
 stop_and_fail:
 		out_tot = -1;
 stop:
 		break;
 	}
 	if (state > GZSTATE_INIT)
 		inflateEnd(&z);
 
 	free(inbufp);
 out1:
 	free(outbufp);
 out2:
 	if (gsizep)
 		*gsizep = in_tot;
 	return (out_tot);
 }
 
 #ifndef SMALL
 /*
  * set the owner, mode, flags & utimes using the given file descriptor.
  * file is only used in possible warning messages.
  */
 static void
 copymodes(int fd, const struct stat *sbp, const char *file)
 {
 	struct timespec times[2];
 	struct stat sb;
 
 	/*
 	 * If we have no info on the input, give this file some
 	 * default values and return..
 	 */
 	if (sbp == NULL) {
 		mode_t mask = umask(022);
 
 		(void)fchmod(fd, DEFFILEMODE & ~mask);
 		(void)umask(mask);
 		return; 
 	}
 	sb = *sbp;
 
 	/* if the chown fails, remove set-id bits as-per compress(1) */
 	if (fchown(fd, sb.st_uid, sb.st_gid) < 0) {
 		if (errno != EPERM)
 			maybe_warn("couldn't fchown: %s", file);
 		sb.st_mode &= ~(S_ISUID|S_ISGID);
 	}
 
 	/* we only allow set-id and the 9 normal permission bits */
 	sb.st_mode &= S_ISUID | S_ISGID | S_IRWXU | S_IRWXG | S_IRWXO;
 	if (fchmod(fd, sb.st_mode) < 0)
 		maybe_warn("couldn't fchmod: %s", file);
 
 	times[0] = sb.st_atim;
 	times[1] = sb.st_mtim;
 	if (futimens(fd, times) < 0)
 		maybe_warn("couldn't futimens: %s", file);
 
 	/* only try flags if they exist already */
         if (sb.st_flags != 0 && fchflags(fd, sb.st_flags) < 0)
 		maybe_warn("couldn't fchflags: %s", file);
 }
 #endif
 
 /* what sort of file is this? */
 static enum filetype
 file_gettype(u_char *buf)
 {
 
 	if (buf[0] == GZIP_MAGIC0 &&
 	    (buf[1] == GZIP_MAGIC1 || buf[1] == GZIP_OMAGIC1))
 		return FT_GZIP;
 	else
 #ifndef NO_BZIP2_SUPPORT
 	if (memcmp(buf, BZIP2_MAGIC, 3) == 0 &&
 	    buf[3] >= '0' && buf[3] <= '9')
 		return FT_BZIP2;
 	else
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	if (memcmp(buf, Z_MAGIC, 2) == 0)
 		return FT_Z;
 	else
 #endif
 #ifndef NO_PACK_SUPPORT
 	if (memcmp(buf, PACK_MAGIC, 2) == 0)
 		return FT_PACK;
 	else
 #endif
 #ifndef NO_XZ_SUPPORT
 	if (memcmp(buf, XZ_MAGIC, 4) == 0)	/* XXX: We only have 4 bytes */
 		return FT_XZ;
 	else
 #endif
 		return FT_UNKNOWN;
 }
 
 #ifndef SMALL
 /* check the outfile is OK. */
 static int
 check_outfile(const char *outfile)
 {
 	struct stat sb;
 	int ok = 1;
 
 	if (lflag == 0 && stat(outfile, &sb) == 0) {
 		if (fflag)
 			unlink(outfile);
 		else if (isatty(STDIN_FILENO)) {
 			char ans[10] = { 'n', '\0' };	/* default */
 
 			fprintf(stderr, "%s already exists -- do you wish to "
 					"overwrite (y or n)? " , outfile);
 			(void)fgets(ans, sizeof(ans) - 1, stdin);
 			if (ans[0] != 'y' && ans[0] != 'Y') {
 				fprintf(stderr, "\tnot overwriting\n");
 				ok = 0;
 			} else
 				unlink(outfile);
 		} else {
 			maybe_warnx("%s already exists -- skipping", outfile);
 			ok = 0;
 		}
 	}
 	return ok;
 }
 
 static void
 unlink_input(const char *file, const struct stat *sb)
 {
 	struct stat nsb;
 
 	if (kflag)
 		return;
 	if (stat(file, &nsb) != 0)
 		/* Must be gone already */
 		return;
 	if (nsb.st_dev != sb->st_dev || nsb.st_ino != sb->st_ino)
 		/* Definitely a different file */
 		return;
 	unlink(file);
 }
 
 static void
 sigint_handler(int signo __unused)
 {
 
 	if (remove_file != NULL)
 		unlink(remove_file);
 	_exit(2);
 }
 #endif
 
 static const suffixes_t *
 check_suffix(char *file, int xlate)
 {
 	const suffixes_t *s;
 	int len = strlen(file);
 	char *sp;
 
 	for (s = suffixes; s != suffixes + NUM_SUFFIXES; s++) {
 		/* if it doesn't fit in "a.suf", don't bother */
 		if (s->ziplen >= len)
 			continue;
 		sp = file + len - s->ziplen;
 		if (strcmp(s->zipped, sp) != 0)
 			continue;
 		if (xlate)
 			strcpy(sp, s->normal);
 		return s;
 	}
 	return NULL;
 }
 
 /*
  * compress the given file: create a corresponding .gz file and remove the
  * original.
  */
 static off_t
 file_compress(char *file, char *outfile, size_t outsize)
 {
 	int in;
 	int out;
 	off_t size, insize;
 #ifndef SMALL
 	struct stat isb, osb;
 	const suffixes_t *suff;
 #endif
 
 	in = open(file, O_RDONLY);
 	if (in == -1) {
 		maybe_warn("can't open %s", file);
 		return (-1);
 	}
 
 #ifndef SMALL
 	if (fstat(in, &isb) != 0) {
 		maybe_warn("couldn't stat: %s", file);
 		close(in);
 		return (-1);
 	}
 #endif
 
 	if (cflag == 0) {
 #ifndef SMALL
 		if (isb.st_nlink > 1 && fflag == 0) {
 			maybe_warnx("%s has %d other link%s -- skipping",
 			    file, isb.st_nlink - 1,
 			    (isb.st_nlink - 1) == 1 ? "" : "s");
 			close(in);
 			return (-1);
 		}
 
 		if (fflag == 0 && (suff = check_suffix(file, 0)) &&
 		    suff->zipped[0] != 0) {
 			maybe_warnx("%s already has %s suffix -- unchanged",
 			    file, suff->zipped);
 			close(in);
 			return (-1);
 		}
 #endif
 
 		/* Add (usually) .gz to filename */
 		if ((size_t)snprintf(outfile, outsize, "%s%s",
 		    file, suffixes[0].zipped) >= outsize)
 			memcpy(outfile + outsize - suffixes[0].ziplen - 1,
 			    suffixes[0].zipped, suffixes[0].ziplen + 1);
 
 #ifndef SMALL
 		if (check_outfile(outfile) == 0) {
 			close(in);
 			return (-1);
 		}
 #endif
 	}
 
 	if (cflag == 0) {
 		out = open(outfile, O_WRONLY | O_CREAT | O_EXCL, 0600);
 		if (out == -1) {
 			maybe_warn("could not create output: %s", outfile);
 			fclose(stdin);
 			return (-1);
 		}
 #ifndef SMALL
 		remove_file = outfile;
 #endif
 	} else
 		out = STDOUT_FILENO;
 
 	insize = gz_compress(in, out, &size, basename(file), (uint32_t)isb.st_mtime);
 
 	(void)close(in);
 
 	/*
 	 * If there was an error, insize will be -1.
 	 * If we compressed to stdout, just return the size.
 	 * Otherwise stat the file and check it is the correct size.
 	 * We only blow away the file if we can stat the output and it
 	 * has the expected size.
 	 */
 	if (cflag != 0)
 		return (insize == -1 ? -1 : size);
 
 #ifndef SMALL
 	if (fstat(out, &osb) != 0) {
 		maybe_warn("couldn't stat: %s", outfile);
 		goto bad_outfile;
 	}
 
 	if (osb.st_size != size) {
 		maybe_warnx("output file: %s wrong size (%ju != %ju), deleting",
 		    outfile, (uintmax_t)osb.st_size, (uintmax_t)size);
 		goto bad_outfile;
 	}
 
 	copymodes(out, &isb, outfile);
 	remove_file = NULL;
 #endif
 	if (close(out) == -1)
 		maybe_warn("couldn't close output");
 
 	/* output is good, ok to delete input */
 	unlink_input(file, &isb);
 	return (size);
 
 #ifndef SMALL
     bad_outfile:
 	if (close(out) == -1)
 		maybe_warn("couldn't close output");
 
 	maybe_warnx("leaving original %s", file);
 	unlink(outfile);
 	return (size);
 #endif
 }
 
 /* uncompress the given file and remove the original */
 static off_t
 file_uncompress(char *file, char *outfile, size_t outsize)
 {
 	struct stat isb, osb;
 	off_t size;
 	ssize_t rbytes;
 	unsigned char header1[4];
 	enum filetype method;
 	int fd, ofd, zfd = -1;
 #ifndef SMALL
 	ssize_t rv;
 	time_t timestamp = 0;
 	char name[PATH_MAX + 1];
 #endif
 
 	/* gather the old name info */
 
 	fd = open(file, O_RDONLY);
 	if (fd < 0) {
 		maybe_warn("can't open %s", file);
 		goto lose;
 	}
 
 	strlcpy(outfile, file, outsize);
 	if (check_suffix(outfile, 1) == NULL && !(cflag || lflag)) {
 		maybe_warnx("%s: unknown suffix -- ignored", file);
 		goto lose;
 	}
 
 	rbytes = read(fd, header1, sizeof header1);
 	if (rbytes != sizeof header1) {
 		/* we don't want to fail here. */
 #ifndef SMALL
 		if (fflag)
 			goto lose;
 #endif
 		if (rbytes == -1)
 			maybe_warn("can't read %s", file);
 		else
 			goto unexpected_EOF;
 		goto lose;
 	}
 
 	method = file_gettype(header1);
 #ifndef SMALL
 	if (fflag == 0 && method == FT_UNKNOWN) {
 		maybe_warnx("%s: not in gzip format", file);
 		goto lose;
 	}
 
 #endif
 
 #ifndef SMALL
 	if (method == FT_GZIP && Nflag) {
 		unsigned char ts[4];	/* timestamp */
 
 		rv = pread(fd, ts, sizeof ts, GZIP_TIMESTAMP);
 		if (rv >= 0 && rv < (ssize_t)(sizeof ts))
 			goto unexpected_EOF;
 		if (rv == -1) {
 			if (!fflag)
 				maybe_warn("can't read %s", file);
 			goto lose;
 		}
 		timestamp = ts[3] << 24 | ts[2] << 16 | ts[1] << 8 | ts[0];
 
 		if (header1[3] & ORIG_NAME) {
 			rbytes = pread(fd, name, sizeof(name) - 1, GZIP_ORIGNAME);
 			if (rbytes < 0) {
 				maybe_warn("can't read %s", file);
 				goto lose;
 			}
 			if (name[0] != '\0') {
 				char *dp, *nf;
 
 				/* Make sure that name is NUL-terminated */
 				name[rbytes] = '\0';
 
 				/* strip saved directory name */
 				nf = strrchr(name, '/');
 				if (nf == NULL)
 					nf = name;
 				else
 					nf++;
 
 				/* preserve original directory name */
 				dp = strrchr(file, '/');
 				if (dp == NULL)
 					dp = file;
 				else
 					dp++;
 				snprintf(outfile, outsize, "%.*s%.*s",
 						(int) (dp - file), 
 						file, (int) rbytes, nf);
 			}
 		}
 	}
 #endif
 	lseek(fd, 0, SEEK_SET);
 
 	if (cflag == 0 || lflag) {
 		if (fstat(fd, &isb) != 0)
 			goto lose;
 #ifndef SMALL
 		if (isb.st_nlink > 1 && lflag == 0 && fflag == 0) {
 			maybe_warnx("%s has %d other links -- skipping",
 			    file, isb.st_nlink - 1);
 			goto lose;
 		}
 		if (nflag == 0 && timestamp)
 			isb.st_mtime = timestamp;
 		if (check_outfile(outfile) == 0)
 			goto lose;
 #endif
 	}
 
 	if (cflag == 0 && lflag == 0) {
 		zfd = open(outfile, O_WRONLY|O_CREAT|O_EXCL, 0600);
 		if (zfd == STDOUT_FILENO) {
 			/* We won't close STDOUT_FILENO later... */
 			zfd = dup(zfd);
 			close(STDOUT_FILENO);
 		}
 		if (zfd == -1) {
 			maybe_warn("can't open %s", outfile);
 			goto lose;
 		}
 #ifndef SMALL
 		remove_file = outfile;
 #endif
 	} else
 		zfd = STDOUT_FILENO;
 
 	switch (method) {
 #ifndef NO_BZIP2_SUPPORT
 	case FT_BZIP2:
 		/* XXX */
 		if (lflag) {
 			maybe_warnx("no -l with bzip2 files");
 			goto lose;
 		}
 
 		size = unbzip2(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
 
 #ifndef NO_COMPRESS_SUPPORT
 	case FT_Z: {
 		FILE *in, *out;
 
 		/* XXX */
 		if (lflag) {
 			maybe_warnx("no -l with Lempel-Ziv files");
 			goto lose;
 		}
 
 		if ((in = zdopen(fd)) == NULL) {
 			maybe_warn("zdopen for read: %s", file);
 			goto lose;
 		}
 
 		out = fdopen(dup(zfd), "w");
 		if (out == NULL) {
 			maybe_warn("fdopen for write: %s", outfile);
 			fclose(in);
 			goto lose;
 		}
 
 		size = zuncompress(in, out, NULL, 0, NULL);
 		/* need to fclose() if ferror() is true... */
 		if (ferror(in) | fclose(in)) {
 			maybe_warn("failed infile fclose");
 			unlink(outfile);
 			(void)fclose(out);
 		}
 		if (fclose(out) != 0) {
 			maybe_warn("failed outfile fclose");
 			unlink(outfile);
 			goto lose;
 		}
 		break;
 	}
 #endif
 
 #ifndef NO_PACK_SUPPORT
 	case FT_PACK:
 		if (lflag) {
 			maybe_warnx("no -l with packed files");
 			goto lose;
 		}
 
 		size = unpack(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
 
 #ifndef NO_XZ_SUPPORT
 	case FT_XZ:
 		if (lflag) {
 			maybe_warnx("no -l with xz files");
 			goto lose;
 		}
 
 		size = unxz(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
 
 #ifndef SMALL
 	case FT_UNKNOWN:
 		if (lflag) {
 			maybe_warnx("no -l for unknown filetypes");
 			goto lose;
 		}
 		size = cat_fd(NULL, 0, NULL, fd);
 		break;
 #endif
 	default:
 		if (lflag) {
 			print_list(fd, isb.st_size, outfile, isb.st_mtime);
 			close(fd);
 			return -1;	/* XXX */
 		}
 
 		size = gz_uncompress(fd, zfd, NULL, 0, NULL, file);
 		break;
 	}
 
 	if (close(fd) != 0)
 		maybe_warn("couldn't close input");
 	if (zfd != STDOUT_FILENO && close(zfd) != 0)
 		maybe_warn("couldn't close output");
 
 	if (size == -1) {
 		if (cflag == 0)
 			unlink(outfile);
 		maybe_warnx("%s: uncompress failed", file);
 		return -1;
 	}
 
 	/* if testing, or we uncompressed to stdout, this is all we need */
 #ifndef SMALL
 	if (tflag)
 		return size;
 #endif
 	/* if we are uncompressing to stdin, don't remove the file. */
 	if (cflag)
 		return size;
 
 	/*
 	 * if we create a file...
 	 */
 	/*
 	 * if we can't stat the file don't remove the file.
 	 */
 
 	ofd = open(outfile, O_RDWR, 0);
 	if (ofd == -1) {
 		maybe_warn("couldn't open (leaving original): %s",
 			   outfile);
 		return -1;
 	}
 	if (fstat(ofd, &osb) != 0) {
 		maybe_warn("couldn't stat (leaving original): %s",
 			   outfile);
 		close(ofd);
 		return -1;
 	}
 	if (osb.st_size != size) {
 		maybe_warnx("stat gave different size: %ju != %ju (leaving original)",
 		    (uintmax_t)size, (uintmax_t)osb.st_size);
 		close(ofd);
 		unlink(outfile);
 		return -1;
 	}
 #ifndef SMALL
 	copymodes(ofd, &isb, outfile);
 	remove_file = NULL;
 #endif
 	close(ofd);
 	unlink_input(file, &isb);
 	return size;
 
     unexpected_EOF:
 	maybe_warnx("%s: unexpected end of file", file);
     lose:
 	if (fd != -1)
 		close(fd);
 	if (zfd != -1 && zfd != STDOUT_FILENO)
 		close(fd);
 	return -1;
 }
 
 #ifndef SMALL
 static off_t
 cat_fd(unsigned char * prepend, size_t count, off_t *gsizep, int fd)
 {
 	char buf[BUFLEN];
 	off_t in_tot;
 	ssize_t w;
 
 	in_tot = count;
 	w = write(STDOUT_FILENO, prepend, count);
 	if (w == -1 || (size_t)w != count) {
 		maybe_warn("write to stdout");
 		return -1;
 	}
 	for (;;) {
 		ssize_t rv;
 
 		rv = read(fd, buf, sizeof buf);
 		if (rv == 0)
 			break;
 		if (rv < 0) {
 			maybe_warn("read from fd %d", fd);
 			break;
 		}
 
 		if (write(STDOUT_FILENO, buf, rv) != rv) {
 			maybe_warn("write to stdout");
 			break;
 		}
 		in_tot += rv;
 	}
 
 	if (gsizep)
 		*gsizep = in_tot;
 	return (in_tot);
 }
 #endif
 
 static void
 handle_stdin(void)
 {
 	unsigned char header1[4];
 	off_t usize, gsize;
 	enum filetype method;
 	ssize_t bytes_read;
 #ifndef NO_COMPRESS_SUPPORT
 	FILE *in;
 #endif
 
 #ifndef SMALL
 	if (fflag == 0 && lflag == 0 && isatty(STDIN_FILENO)) {
 		maybe_warnx("standard input is a terminal -- ignoring");
 		return;
 	}
 #endif
 
 	if (lflag) {
 		struct stat isb;
 
 		/* XXX could read the whole file, etc. */
 		if (fstat(STDIN_FILENO, &isb) < 0) {
 			maybe_warn("fstat");
 			return;
 		}
 		print_list(STDIN_FILENO, isb.st_size, "stdout", isb.st_mtime);
 		return;
 	}
 
 	bytes_read = read_retry(STDIN_FILENO, header1, sizeof header1);
 	if (bytes_read == -1) {
 		maybe_warn("can't read stdin");
 		return;
 	} else if (bytes_read != sizeof(header1)) {
 		maybe_warnx("(stdin): unexpected end of file");
 		return;
 	}
 
 	method = file_gettype(header1);
 	switch (method) {
 	default:
 #ifndef SMALL
 		if (fflag == 0) {
 			maybe_warnx("unknown compression format");
 			return;
 		}
 		usize = cat_fd(header1, sizeof header1, &gsize, STDIN_FILENO);
 		break;
 #endif
 	case FT_GZIP:
 		usize = gz_uncompress(STDIN_FILENO, STDOUT_FILENO, 
 			      (char *)header1, sizeof header1, &gsize, "(stdin)");
 		break;
 #ifndef NO_BZIP2_SUPPORT
 	case FT_BZIP2:
 		usize = unbzip2(STDIN_FILENO, STDOUT_FILENO,
 				(char *)header1, sizeof header1, &gsize);
 		break;
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	case FT_Z:
 		if ((in = zdopen(STDIN_FILENO)) == NULL) {
 			maybe_warnx("zopen of stdin");
 			return;
 		}
 
 		usize = zuncompress(in, stdout, (char *)header1,
 		    sizeof header1, &gsize);
 		fclose(in);
 		break;
 #endif
 #ifndef NO_PACK_SUPPORT
 	case FT_PACK:
 		usize = unpack(STDIN_FILENO, STDOUT_FILENO,
 			       (char *)header1, sizeof header1, &gsize);
 		break;
 #endif
 #ifndef NO_XZ_SUPPORT
 	case FT_XZ:
 		usize = unxz(STDIN_FILENO, STDOUT_FILENO,
 			     (char *)header1, sizeof header1, &gsize);
 		break;
 #endif
 	}
 
 #ifndef SMALL
         if (vflag && !tflag && usize != -1 && gsize != -1)
 		print_verbage(NULL, NULL, usize, gsize);
 	if (vflag && tflag)
 		print_test("(stdin)", usize != -1);
 #endif 
 
 }
 
 static void
 handle_stdout(void)
 {
 	off_t gsize, usize;
 	struct stat sb;
 	time_t systime;
 	uint32_t mtime;
 	int ret;
 
 #ifndef SMALL
 	if (fflag == 0 && isatty(STDOUT_FILENO)) {
 		maybe_warnx("standard output is a terminal -- ignoring");
 		return;
 	}
 #endif
 	/* If stdin is a file use its mtime, otherwise use current time */
 	ret = fstat(STDIN_FILENO, &sb);
 
 #ifndef SMALL
 	if (ret < 0) {
 		maybe_warn("Can't stat stdin");
 		return;
 	}
 #endif
 
 	if (S_ISREG(sb.st_mode))
 		mtime = (uint32_t)sb.st_mtime;
 	else {
 		systime = time(NULL);
 #ifndef SMALL
 		if (systime == -1) {
 			maybe_warn("time");
 			return;
 		} 
 #endif
 		mtime = (uint32_t)systime;
 	}
 	 		
 	usize = gz_compress(STDIN_FILENO, STDOUT_FILENO, &gsize, "", mtime);
 #ifndef SMALL
         if (vflag && !tflag && usize != -1 && gsize != -1)
 		print_verbage(NULL, NULL, usize, gsize);
 #endif 
 }
 
 /* do what is asked for, for the path name */
 static void
 handle_pathname(char *path)
 {
 	char *opath = path, *s = NULL;
 	ssize_t len;
 	int slen;
 	struct stat sb;
 
 	/* check for stdout/stdin */
 	if (path[0] == '-' && path[1] == '\0') {
 		if (dflag)
 			handle_stdin();
 		else
 			handle_stdout();
 		return;
 	}
 
 retry:
 	if (stat(path, &sb) != 0 || (fflag == 0 && cflag == 0 &&
 	    lstat(path, &sb) != 0)) {
 		/* lets try <path>.gz if we're decompressing */
 		if (dflag && s == NULL && errno == ENOENT) {
 			len = strlen(path);
 			slen = suffixes[0].ziplen;
 			s = malloc(len + slen + 1);
 			if (s == NULL)
 				maybe_err("malloc");
 			memcpy(s, path, len);
 			memcpy(s + len, suffixes[0].zipped, slen + 1);
 			path = s;
 			goto retry;
 		}
 		maybe_warn("can't stat: %s", opath);
 		goto out;
 	}
 
 	if (S_ISDIR(sb.st_mode)) {
 #ifndef SMALL
 		if (rflag)
 			handle_dir(path);
 		else
 #endif
 			maybe_warnx("%s is a directory", path);
 		goto out;
 	}
 
 	if (S_ISREG(sb.st_mode))
 		handle_file(path, &sb);
 	else
 		maybe_warnx("%s is not a regular file", path);
 
 out:
 	if (s)
 		free(s);
 }
 
 /* compress/decompress a file */
 static void
 handle_file(char *file, struct stat *sbp)
 {
 	off_t usize, gsize;
 	char	outfile[PATH_MAX];
 
 	infile = file;
 	if (dflag) {
 		usize = file_uncompress(file, outfile, sizeof(outfile));
 #ifndef SMALL
 		if (vflag && tflag)
 			print_test(file, usize != -1);
 #endif
 		if (usize == -1)
 			return;
 		gsize = sbp->st_size;
 	} else {
 		gsize = file_compress(file, outfile, sizeof(outfile));
 		if (gsize == -1)
 			return;
 		usize = sbp->st_size;
 	}
 
 
 #ifndef SMALL
 	if (vflag && !tflag)
 		print_verbage(file, (cflag) ? NULL : outfile, usize, gsize);
 #endif
 }
 
 #ifndef SMALL
 /* this is used with -r to recursively descend directories */
 static void
 handle_dir(char *dir)
 {
 	char *path_argv[2];
 	FTS *fts;
 	FTSENT *entry;
 
 	path_argv[0] = dir;
 	path_argv[1] = 0;
 	fts = fts_open(path_argv, FTS_PHYSICAL | FTS_NOCHDIR, NULL);
 	if (fts == NULL) {
 		warn("couldn't fts_open %s", dir);
 		return;
 	}
 
 	while ((entry = fts_read(fts))) {
 		switch(entry->fts_info) {
 		case FTS_D:
 		case FTS_DP:
 			continue;
 
 		case FTS_DNR:
 		case FTS_ERR:
 		case FTS_NS:
 			maybe_warn("%s", entry->fts_path);
 			continue;
 		case FTS_F:
 			handle_file(entry->fts_path, entry->fts_statp);
 		}
 	}
 	(void)fts_close(fts);
 }
 #endif
 
 /* print a ratio - size reduction as a fraction of uncompressed size */
 static void
 print_ratio(off_t in, off_t out, FILE *where)
 {
 	int percent10;	/* 10 * percent */
 	off_t diff;
 	char buff[8];
 	int len;
 
 	diff = in - out/2;
 	if (diff <= 0)
 		/*
 		 * Output is more than double size of input! print -99.9%
 		 * Quite possibly we've failed to get the original size.
 		 */
 		percent10 = -999;
 	else {
 		/*
 		 * We only need 12 bits of result from the final division,
 		 * so reduce the values until a 32bit division will suffice.
 		 */
 		while (in > 0x100000) {
 			diff >>= 1;
 			in >>= 1;
 		}
 		if (in != 0)
 			percent10 = ((u_int)diff * 2000) / (u_int)in - 1000;
 		else
 			percent10 = 0;
 	}
 
 	len = snprintf(buff, sizeof buff, "%2.2d.", percent10);
 	/* Move the '.' to before the last digit */
 	buff[len - 1] = buff[len - 2];
 	buff[len - 2] = '.';
 	fprintf(where, "%5s%%", buff);
 }
 
 #ifndef SMALL
 /* print compression statistics, and the new name (if there is one!) */
 static void
 print_verbage(const char *file, const char *nfile, off_t usize, off_t gsize)
 {
 	if (file)
 		fprintf(stderr, "%s:%s  ", file,
 		    strlen(file) < 7 ? "\t\t" : "\t");
 	print_ratio(usize, gsize, stderr);
 	if (nfile)
 		fprintf(stderr, " -- replaced with %s", nfile);
 	fprintf(stderr, "\n");
 	fflush(stderr);
 }
 
 /* print test results */
 static void
 print_test(const char *file, int ok)
 {
 
 	if (exit_value == 0 && ok == 0)
 		exit_value = 1;
 	fprintf(stderr, "%s:%s  %s\n", file,
 	    strlen(file) < 7 ? "\t\t" : "\t", ok ? "OK" : "NOT OK");
 	fflush(stderr);
 }
 #endif
 
 /* print a file's info ala --list */
 /* eg:
   compressed uncompressed  ratio uncompressed_name
       354841      1679360  78.8% /usr/pkgsrc/distfiles/libglade-2.0.1.tar
 */
 static void
 print_list(int fd, off_t out, const char *outfile, time_t ts)
 {
 	static int first = 1;
 #ifndef SMALL
 	static off_t in_tot, out_tot;
 	uint32_t crc = 0;
 #endif
 	off_t in = 0, rv;
 
 	if (first) {
 #ifndef SMALL
 		if (vflag)
 			printf("method  crc     date  time  ");
 #endif
 		if (qflag == 0)
 			printf("  compressed uncompressed  "
 			       "ratio uncompressed_name\n");
 	}
 	first = 0;
 
 	/* print totals? */
 #ifndef SMALL
 	if (fd == -1) {
 		in = in_tot;
 		out = out_tot;
 	} else
 #endif
 	{
 		/* read the last 4 bytes - this is the uncompressed size */
 		rv = lseek(fd, (off_t)(-8), SEEK_END);
 		if (rv != -1) {
 			unsigned char buf[8];
 			uint32_t usize;
 
 			rv = read(fd, (char *)buf, sizeof(buf));
 			if (rv == -1)
 				maybe_warn("read of uncompressed size");
 			else if (rv != sizeof(buf))
 				maybe_warnx("read of uncompressed size");
 
 			else {
 				usize = buf[4] | buf[5] << 8 |
 					buf[6] << 16 | buf[7] << 24;
 				in = (off_t)usize;
 #ifndef SMALL
 				crc = buf[0] | buf[1] << 8 |
 				      buf[2] << 16 | buf[3] << 24;
 #endif
 			}
 		}
 	}
 
 #ifndef SMALL
 	if (vflag && fd == -1)
 		printf("                            ");
 	else if (vflag) {
 		char *date = ctime(&ts);
 
 		/* skip the day, 1/100th second, and year */
 		date += 4;
 		date[12] = 0;
 		printf("%5s %08x %11s ", "defla"/*XXX*/, crc, date);
 	}
 	in_tot += in;
 	out_tot += out;
 #else
 	(void)&ts;	/* XXX */
 #endif
 	printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in);
 	print_ratio(in, out, stdout);
 	printf(" %s\n", outfile);
 }
 
 /* display the usage of NetBSD gzip */
 static void
 usage(void)
 {
 
 	fprintf(stderr, "%s\n", gzip_version);
 	fprintf(stderr,
 #ifdef SMALL
     "usage: %s [-" OPT_LIST "] [<file> [<file> ...]]\n",
 #else
     "usage: %s [-123456789acdfhklLNnqrtVv] [-S .suffix] [<file> [<file> ...]]\n"
     " -1 --fast            fastest (worst) compression\n"
     " -2 .. -8             set compression level\n"
     " -9 --best            best (slowest) compression\n"
     " -c --stdout          write to stdout, keep original files\n"
     "    --to-stdout\n"
     " -d --decompress      uncompress files\n"
     "    --uncompress\n"
     " -f --force           force overwriting & compress links\n"
     " -h --help            display this help\n"
     " -k --keep            don't delete input files during operation\n"
     " -l --list            list compressed file contents\n"
     " -N --name            save or restore original file name and time stamp\n"
     " -n --no-name         don't save original file name or time stamp\n"
     " -q --quiet           output no warnings\n"
     " -r --recursive       recursively compress files in directories\n"
     " -S .suf              use suffix .suf instead of .gz\n"
     "    --suffix .suf\n"
     " -t --test            test compressed file\n"
     " -V --version         display program version\n"
     " -v --verbose         print extra statistics\n",
 #endif
 	    getprogname());
 	exit(0);
 }
 
 #ifndef SMALL
 /* display the license information of FreeBSD gzip */
 static void
 display_license(void)
 {
 
 	fprintf(stderr, "%s (based on NetBSD gzip 20150113)\n", gzip_version);
 	fprintf(stderr, "%s\n", gzip_copyright);
 	exit(0);
 }
 #endif
 
 /* display the version of NetBSD gzip */
 static void
 display_version(void)
 {
 
 	fprintf(stderr, "%s\n", gzip_version);
 	exit(0);
 }
 
 #ifndef NO_BZIP2_SUPPORT
 #include "unbzip2.c"
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 #include "zuncompress.c"
 #endif
 #ifndef NO_PACK_SUPPORT
 #include "unpack.c"
 #endif
 #ifndef NO_XZ_SUPPORT
 #include "unxz.c"
 #endif
 
 static ssize_t
 read_retry(int fd, void *buf, size_t sz)
 {
 	char *cp = buf;
 	size_t left = MIN(sz, (size_t) SSIZE_MAX);
 
 	while (left > 0) {
 		ssize_t ret;
 
 		ret = read(fd, cp, left);
 		if (ret == -1) {
 			return ret;
 		} else if (ret == 0) {
 			break; /* EOF */
 		}
 		cp += ret;
 		left -= ret;
 	}
 
 	return sz - left;
 }
Index: user/ngie/more-tests/usr.bin/w/Makefile
===================================================================
--- user/ngie/more-tests/usr.bin/w/Makefile	(revision 281675)
+++ user/ngie/more-tests/usr.bin/w/Makefile	(revision 281676)
@@ -1,14 +1,14 @@
 #	@(#)Makefile	8.1 (Berkeley) 6/6/93
 # $FreeBSD$
 
 PROG=	w
 SRCS=	fmt.c pr_time.c proc_compare.c w.c
 MAN=	w.1 uptime.1
-LIBADD=	kvm util xo
+LIBADD=	kvm sbuf util xo
 #BINGRP= kmem
 #BINMODE=2555
 LINKS=	${BINDIR}/w ${BINDIR}/uptime
 
 .PATH: ${.CURDIR}/../../bin/ps
 
 .include <bsd.prog.mk>
Index: user/ngie/more-tests/usr.bin/w/w.c
===================================================================
--- user/ngie/more-tests/usr.bin/w/w.c	(revision 281675)
+++ user/ngie/more-tests/usr.bin/w/w.c	(revision 281676)
@@ -1,559 +1,569 @@
 /*-
  * Copyright (c) 1980, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 
 __FBSDID("$FreeBSD$");
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1991, 1993, 1994\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif
 
 #ifndef lint
 static const char sccsid[] = "@(#)w.c	8.4 (Berkeley) 4/16/94";
 #endif
 
 /*
  * w - print system status (who and what)
  *
  * This program is similar to the systat command on Tenex/Tops 10/20
  *
  */
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/user.h>
 #include <sys/ioctl.h>
+#include <sys/sbuf.h>
 #include <sys/socket.h>
 #include <sys/tty.h>
+#include <sys/types.h>
 
 #include <machine/cpu.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <arpa/nameser.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <kvm.h>
 #include <langinfo.h>
 #include <libgen.h>
 #include <libutil.h>
 #include <limits.h>
 #include <locale.h>
 #include <netdb.h>
 #include <nlist.h>
 #include <paths.h>
 #include <resolv.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <timeconv.h>
 #include <unistd.h>
 #include <utmpx.h>
 #include <vis.h>
 #include <libxo/xo.h>
 
 #include "extern.h"
 
 static struct utmpx *utmp;
 static struct winsize ws;
 static kvm_t   *kd;
 static time_t	now;		/* the current time of day */
 static int	ttywidth;	/* width of tty */
 static int	argwidth;	/* width of tty */
 static int	header = 1;	/* true if -h flag: don't print heading */
 static int	nflag;		/* true if -n flag: don't convert addrs */
 static int	dflag;		/* true if -d flag: output debug info */
 static int	sortidle;	/* sort by idle time */
 int		use_ampm;	/* use AM/PM time */
 static int	use_comma;      /* use comma as floats separator */
 static char   **sel_users;	/* login array of particular users selected */
 
 /*
  * One of these per active utmp entry.
  */
 static struct entry {
 	struct	entry *next;
 	struct	utmpx utmp;
 	dev_t	tdev;			/* dev_t of terminal */
 	time_t	idle;			/* idle time of terminal in seconds */
 	struct	kinfo_proc *kp;		/* `most interesting' proc */
 	char	*args;			/* arg list of interesting process */
 	struct	kinfo_proc *dkp;	/* debug option proc list */
 } *ep, *ehead = NULL, **nextp = &ehead;
 
 #define	debugproc(p) *(&((struct kinfo_proc *)p)->ki_udata)
 
 #define	W_DISPUSERSIZE	10
 #define	W_DISPLINESIZE	8
 #define	W_DISPHOSTSIZE	24
 
 static void		 pr_header(time_t *, int);
 static struct stat	*ttystat(char *);
 static void		 usage(int);
 
 char *fmt_argv(char **, char *, char *, size_t);	/* ../../bin/ps/fmt.c */
 
 int
 main(int argc, char *argv[])
 {
 	struct kinfo_proc *kp;
 	struct kinfo_proc *dkp;
 	struct stat *stp;
 	time_t touched;
 	int ch, i, nentries, nusers, wcmd, longidle, longattime, dropgid;
 	const char *memf, *nlistf, *p, *save_p;
 	char *x_suffix;
 	char buf[MAXHOSTNAMELEN], errbuf[_POSIX2_LINE_MAX];
 	char fn[MAXHOSTNAMELEN];
 	char *dot;
 
 	(void)setlocale(LC_ALL, "");
 	use_ampm = (*nl_langinfo(T_FMT_AMPM) != '\0');
 	use_comma = (*nl_langinfo(RADIXCHAR) != ',');
 
 	argc = xo_parse_args(argc, argv);
 	if (argc < 0)
 		exit(1);
 
 	/* Are we w(1) or uptime(1)? */
 	if (strcmp(basename(argv[0]), "uptime") == 0) {
 		wcmd = 0;
 		p = "";
 	} else {
 		wcmd = 1;
 		p = "dhiflM:N:nsuw";
 	}
 
 	dropgid = 0;
 	memf = _PATH_DEVNULL;
 	nlistf = NULL;
 	while ((ch = getopt(argc, argv, p)) != -1)
 		switch (ch) {
 		case 'd':
 			dflag = 1;
 			break;
 		case 'h':
 			header = 0;
 			break;
 		case 'i':
 			sortidle = 1;
 			break;
 		case 'M':
 			header = 0;
 			memf = optarg;
 			dropgid = 1;
 			break;
 		case 'N':
 			nlistf = optarg;
 			dropgid = 1;
 			break;
 		case 'n':
 			nflag = 1;
 			break;
 		case 'f': case 'l': case 's': case 'u': case 'w':
 			warnx("[-flsuw] no longer supported");
 			/* FALLTHROUGH */
 		case '?':
 		default:
 			usage(wcmd);
 		}
 	argc -= optind;
 	argv += optind;
 
 	if (!(_res.options & RES_INIT))
 		res_init();
 	_res.retrans = 2;	/* resolver timeout to 2 seconds per try */
 	_res.retry = 1;		/* only try once.. */
 
 	/*
 	 * Discard setgid privileges if not the running kernel so that bad
 	 * guys can't print interesting stuff from kernel memory.
 	 */
 	if (dropgid)
 		setgid(getgid());
 
 	if ((kd = kvm_openfiles(nlistf, memf, NULL, O_RDONLY, errbuf)) == NULL)
 		errx(1, "%s", errbuf);
 
 	(void)time(&now);
 
 	if (*argv)
 		sel_users = argv;
 
 	setutxent();
 	for (nusers = 0; (utmp = getutxent()) != NULL;) {
 		if (utmp->ut_type != USER_PROCESS)
 			continue;
 		if (!(stp = ttystat(utmp->ut_line)))
 			continue;	/* corrupted record */
 		++nusers;
 		if (wcmd == 0)
 			continue;
 		if (sel_users) {
 			int usermatch;
 			char **user;
 
 			usermatch = 0;
 			for (user = sel_users; !usermatch && *user; user++)
 				if (!strcmp(utmp->ut_user, *user))
 					usermatch = 1;
 			if (!usermatch)
 				continue;
 		}
 		if ((ep = calloc(1, sizeof(struct entry))) == NULL)
 			errx(1, "calloc");
 		*nextp = ep;
 		nextp = &ep->next;
 		memmove(&ep->utmp, utmp, sizeof *utmp);
 		ep->tdev = stp->st_rdev;
 		/*
 		 * If this is the console device, attempt to ascertain
 		 * the true console device dev_t.
 		 */
 		if (ep->tdev == 0) {
 			size_t size;
 
 			size = sizeof(dev_t);
 			(void)sysctlbyname("machdep.consdev", &ep->tdev, &size, NULL, 0);
 		}
 		touched = stp->st_atime;
 		if (touched < ep->utmp.ut_tv.tv_sec) {
 			/* tty untouched since before login */
 			touched = ep->utmp.ut_tv.tv_sec;
 		}
 		if ((ep->idle = now - touched) < 0)
 			ep->idle = 0;
 	}
 	endutxent();
 
 	xo_open_container("uptime-information");
 
 	if (header || wcmd == 0) {
 		pr_header(&now, nusers);
 		if (wcmd == 0) {
 			xo_close_container("uptime-information");
 			xo_finish();
 
 			(void)kvm_close(kd);
 			exit(0);
 		}
 
 #define HEADER_USER		"USER"
 #define HEADER_TTY		"TTY"
 #define HEADER_FROM		"FROM"
 #define HEADER_LOGIN_IDLE	"LOGIN@  IDLE "
 #define HEADER_WHAT		"WHAT\n"
 #define WUSED  (W_DISPUSERSIZE + W_DISPLINESIZE + W_DISPHOSTSIZE + \
 		sizeof(HEADER_LOGIN_IDLE) + 3)	/* header width incl. spaces */ 
 		xo_emit("{T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s}  {T:/%s}", 
 				W_DISPUSERSIZE, W_DISPUSERSIZE, HEADER_USER,
 				W_DISPLINESIZE, W_DISPLINESIZE, HEADER_TTY,
 				W_DISPHOSTSIZE, W_DISPHOSTSIZE, HEADER_FROM,
 				HEADER_LOGIN_IDLE HEADER_WHAT);
 	}
 
 	if ((kp = kvm_getprocs(kd, KERN_PROC_ALL, 0, &nentries)) == NULL)
 		err(1, "%s", kvm_geterr(kd));
 	for (i = 0; i < nentries; i++, kp++) {
 		if (kp->ki_stat == SIDL || kp->ki_stat == SZOMB ||
 		    kp->ki_tdev == NODEV)
 			continue;
 		for (ep = ehead; ep != NULL; ep = ep->next) {
 			if (ep->tdev == kp->ki_tdev) {
 				/*
 				 * proc is associated with this terminal
 				 */
 				if (ep->kp == NULL && kp->ki_pgid == kp->ki_tpgid) {
 					/*
 					 * Proc is 'most interesting'
 					 */
 					if (proc_compare(ep->kp, kp))
 						ep->kp = kp;
 				}
 				/*
 				 * Proc debug option info; add to debug
 				 * list using kinfo_proc ki_spare[0]
 				 * as next pointer; ptr to ptr avoids the
 				 * ptr = long assumption.
 				 */
 				dkp = ep->dkp;
 				ep->dkp = kp;
 				debugproc(kp) = dkp;
 			}
 		}
 	}
 	if ((ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws) == -1 &&
 	     ioctl(STDERR_FILENO, TIOCGWINSZ, &ws) == -1 &&
 	     ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) == -1) || ws.ws_col == 0)
 	       ttywidth = 79;
         else
 	       ttywidth = ws.ws_col - 1;
 	argwidth = ttywidth - WUSED;
 	if (argwidth < 4)
 		argwidth = 8;
 	for (ep = ehead; ep != NULL; ep = ep->next) {
 		if (ep->kp == NULL) {
 			ep->args = strdup("-");
 			continue;
 		}
 		ep->args = fmt_argv(kvm_getargv(kd, ep->kp, argwidth),
 		    ep->kp->ki_comm, NULL, MAXCOMLEN);
 		if (ep->args == NULL)
 			err(1, NULL);
 	}
 	/* sort by idle time */
 	if (sortidle && ehead != NULL) {
 		struct entry *from, *save;
 
 		from = ehead;
 		ehead = NULL;
 		while (from != NULL) {
 			for (nextp = &ehead;
 			    (*nextp) && from->idle >= (*nextp)->idle;
 			    nextp = &(*nextp)->next)
 				continue;
 			save = from;
 			from = from->next;
 			save->next = *nextp;
 			*nextp = save;
 		}
 	}
 
 	xo_open_container("user-table");
 	xo_open_list("user-entry");
 
 	for (ep = ehead; ep != NULL; ep = ep->next) {
 		struct addrinfo hints, *res;
 		struct sockaddr_storage ss;
 		struct sockaddr *sa = (struct sockaddr *)&ss;
 		struct sockaddr_in *lsin = (struct sockaddr_in *)&ss;
 		struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)&ss;
 		time_t t;
 		int isaddr;
 
 		xo_open_instance("user-entry");
 
 		save_p = p = *ep->utmp.ut_host ? ep->utmp.ut_host : "-";
 		if ((x_suffix = strrchr(p, ':')) != NULL) {
 			if ((dot = strchr(x_suffix, '.')) != NULL &&
 			    strchr(dot+1, '.') == NULL)
 				*x_suffix++ = '\0';
 			else
 				x_suffix = NULL;
 		}
 
 		isaddr = 0;
 		memset(&ss, '\0', sizeof(ss));
 		if (inet_pton(AF_INET6, p, &lsin6->sin6_addr) == 1) {
 			lsin6->sin6_len = sizeof(*lsin6);
 			lsin6->sin6_family = AF_INET6;
 			isaddr = 1;
 		} else if (inet_pton(AF_INET, p, &lsin->sin_addr) == 1) {
 			lsin->sin_len = sizeof(*lsin);
 			lsin->sin_family = AF_INET;
 			isaddr = 1;
 		}
 		if (!nflag) {
 			/* Attempt to change an IP address into a name */
 			if (isaddr && realhostname_sa(fn, sizeof(fn), sa,
 			    sa->sa_len) == HOSTNAME_FOUND)
 				p = fn;
 		} else if (!isaddr) {
 			/*
 			 * If a host has only one A/AAAA RR, change a
 			 * name into an IP address
 			 */
 			memset(&hints, 0, sizeof(hints));
 			hints.ai_flags = AI_PASSIVE;
 			hints.ai_family = AF_UNSPEC;
 			hints.ai_socktype = SOCK_STREAM;
 			if (getaddrinfo(p, NULL, &hints, &res) == 0) {
 				if (res->ai_next == NULL &&
 				    getnameinfo(res->ai_addr, res->ai_addrlen,
 					fn, sizeof(fn), NULL, 0,
 					NI_NUMERICHOST) == 0)
 					p = fn;
 				freeaddrinfo(res);
 			}
 		}
 
 		if (x_suffix) {
 			(void)snprintf(buf, sizeof(buf), "%s:%s", p, x_suffix);
 			p = buf;
 		}
 		if (dflag) {
 		        xo_open_container("process-table");
 		        xo_open_list("process-entry");
 
 			for (dkp = ep->dkp; dkp != NULL; dkp = debugproc(dkp)) {
 				const char *ptr;
 
 				ptr = fmt_argv(kvm_getargv(kd, dkp, argwidth),
 				    dkp->ki_comm, NULL, MAXCOMLEN);
 				if (ptr == NULL)
 					ptr = "-";
 				xo_open_instance("process-entry");
 				xo_emit("\t\t{:process-id/%-9d/%d} {:command/%s}\n",
 				    dkp->ki_pid, ptr);
 				xo_close_instance("process-entry");
 			}
 		        xo_close_list("process-entry");
 		        xo_close_container("process-table");
 		}
 		xo_emit("{:user/%-*.*s/%@**@s} {:tty/%-*.*s/%@**@s} ",
 			W_DISPUSERSIZE, W_DISPUSERSIZE, ep->utmp.ut_user,
 			W_DISPLINESIZE, W_DISPLINESIZE,
 			*ep->utmp.ut_line ?
 			(strncmp(ep->utmp.ut_line, "tty", 3) &&
 			 strncmp(ep->utmp.ut_line, "cua", 3) ?
 			 ep->utmp.ut_line : ep->utmp.ut_line + 3) : "-");
 
 		if (save_p && save_p != p)
 		    xo_attr("address", "%s", save_p);
 		xo_emit("{:from/%-*.*s/%@**@s} ",
 		    W_DISPHOSTSIZE, W_DISPHOSTSIZE, *p ? p : "-");
 		t = ep->utmp.ut_tv.tv_sec;
 		longattime = pr_attime(&t, &now);
 		longidle = pr_idle(ep->idle);
 		xo_emit("{:command/%.*s/%@*@s}\n",
 		    argwidth - longidle - longattime,
 		    ep->args);
 
 		xo_close_instance("user-entry");
 	}
 
 	xo_close_list("user-entry");
 	xo_close_container("user-table");
 	xo_close_container("uptime-information");
 	xo_finish();
 
 	(void)kvm_close(kd);
 	exit(0);
 }
 
 static void
 pr_header(time_t *nowp, int nusers)
 {
 	double avenrun[3];
 	time_t uptime;
 	struct timespec tp;
 	int days, hrs, i, mins, secs;
 	char buf[256];
+	struct sbuf *upbuf;
 
+	upbuf = sbuf_new_auto();
 	/*
 	 * Print time of day.
 	 */
 	if (strftime(buf, sizeof(buf),
 	    use_ampm ? "%l:%M%p" : "%k:%M", localtime(nowp)) != 0)
 		xo_emit("{:time-of-day/%s} ", buf);
 	/*
 	 * Print how long system has been up.
 	 */
 	if (clock_gettime(CLOCK_UPTIME, &tp) != -1) {
 		uptime = tp.tv_sec;
 		if (uptime > 60)
 			uptime += 30;
 		days = uptime / 86400;
 		uptime %= 86400;
 		hrs = uptime / 3600;
 		uptime %= 3600;
 		mins = uptime / 60;
 		secs = uptime % 60;
 		xo_emit(" up");
-		xo_attr("seconds", "%lu", (unsigned long) tp.tv_sec);
+		xo_emit("{e:uptime/%lu}", (unsigned long) tp.tv_sec);
+		xo_emit("{e:days/%d}{e:hours/%d}{e:minutes/%d}{e:seconds/%d}", days, hrs, mins, secs);
+
 		if (days > 0)
-			xo_emit(" {:uptime/%d day%s},",
+			sbuf_printf(upbuf, " %d day%s,",
 				days, days > 1 ? "s" : "");
 		if (hrs > 0 && mins > 0)
-			xo_emit(" {:uptime/%2d:%02d},", hrs, mins);
+			sbuf_printf(upbuf, " %2d:%02d,", hrs, mins);
 		else if (hrs > 0)
-			xo_emit(" {:uptime/%d hr%s},",
+			sbuf_printf(upbuf, " %d hr%s,",
 				hrs, hrs > 1 ? "s" : "");
 		else if (mins > 0)
-			xo_emit(" {:uptime/%d min%s},",
+			sbuf_printf(upbuf, " %d min%s,",
 				mins, mins > 1 ? "s" : "");
-		else
-			xo_emit(" {:uptime/%d sec%s},",
+		else 
+			sbuf_printf(upbuf, " %d sec%s,",
 				secs, secs > 1 ? "s" : "");
+		if (sbuf_finish(upbuf) != 0)
+			xo_err(1, "Could not generate output");
+		xo_emit("{:uptime-human/%s}", sbuf_data(upbuf));
+		sbuf_delete(upbuf);
 	}
 
 	/* Print number of users logged in to system */
 	xo_emit(" {:users/%d} {N:user%s}", nusers, nusers == 1 ? "" : "s");
 
 	/*
 	 * Print 1, 5, and 15 minute load averages.
 	 */
 	if (getloadavg(avenrun, sizeof(avenrun) / sizeof(avenrun[0])) == -1)
 		xo_emit(", no load average information available\n");
 	else {
 	        static const char *format[] = {
 		    " {:load-average-1/%.2f}",
 		    " {:load-average-5/%.2f}",
 		    " {:load-average-15/%.2f}",
 		};
 		xo_emit(", load averages:");
 		for (i = 0; i < (int)(sizeof(avenrun) / sizeof(avenrun[0])); i++) {
 			if (use_comma && i > 0)
 				xo_emit(",");
 			xo_emit(format[i], avenrun[i]);
 		}
 		xo_emit("\n");
 	}
 }
 
 static struct stat *
 ttystat(char *line)
 {
 	static struct stat sb;
 	char ttybuf[MAXPATHLEN];
 
 	(void)snprintf(ttybuf, sizeof(ttybuf), "%s%s", _PATH_DEV, line);
 	if (stat(ttybuf, &sb) == 0 && S_ISCHR(sb.st_mode)) {
 		return (&sb);
 	} else
 		return (NULL);
 }
 
 static void
 usage(int wcmd)
 {
 	if (wcmd)
 		xo_error("usage: w [-dhin] [-M core] [-N system] [user ...]\n");
 	else
 		xo_error("usage: uptime\n");
 	xo_finish();
 	exit(1);
 }
Index: user/ngie/more-tests/usr.sbin/bhyve/pci_ahci.c
===================================================================
--- user/ngie/more-tests/usr.sbin/bhyve/pci_ahci.c	(revision 281675)
+++ user/ngie/more-tests/usr.sbin/bhyve/pci_ahci.c	(revision 281676)
@@ -1,2295 +1,2287 @@
 /*-
  * Copyright (c) 2013  Zhixiang Yu <zcore@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/linker_set.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
 #include <sys/ioctl.h>
 #include <sys/disk.h>
 #include <sys/ata.h>
 #include <sys/endian.h>
 
 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <strings.h>
 #include <unistd.h>
 #include <assert.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include <inttypes.h>
 #include <md5.h>
 
 #include "bhyverun.h"
 #include "pci_emul.h"
 #include "ahci.h"
 #include "block_if.h"
 
 #define	MAX_PORTS	6	/* Intel ICH8 AHCI supports 6 ports */
 
 #define	PxSIG_ATA	0x00000101 /* ATA drive */
 #define	PxSIG_ATAPI	0xeb140101 /* ATAPI drive */
 
 enum sata_fis_type {
 	FIS_TYPE_REGH2D		= 0x27,	/* Register FIS - host to device */
 	FIS_TYPE_REGD2H		= 0x34,	/* Register FIS - device to host */
 	FIS_TYPE_DMAACT		= 0x39,	/* DMA activate FIS - device to host */
 	FIS_TYPE_DMASETUP	= 0x41,	/* DMA setup FIS - bidirectional */
 	FIS_TYPE_DATA		= 0x46,	/* Data FIS - bidirectional */
 	FIS_TYPE_BIST		= 0x58,	/* BIST activate FIS - bidirectional */
 	FIS_TYPE_PIOSETUP	= 0x5F,	/* PIO setup FIS - device to host */
 	FIS_TYPE_SETDEVBITS	= 0xA1,	/* Set dev bits FIS - device to host */
 };
 
 /*
  * SCSI opcodes
  */
 #define	TEST_UNIT_READY		0x00
 #define	REQUEST_SENSE		0x03
 #define	INQUIRY			0x12
 #define	START_STOP_UNIT		0x1B
 #define	PREVENT_ALLOW		0x1E
 #define	READ_CAPACITY		0x25
 #define	READ_10			0x28
 #define	POSITION_TO_ELEMENT	0x2B
 #define	READ_TOC		0x43
 #define	GET_EVENT_STATUS_NOTIFICATION 0x4A
 #define	MODE_SENSE_10		0x5A
 #define	REPORT_LUNS		0xA0
 #define	READ_12			0xA8
 #define	READ_CD			0xBE
 
 /*
  * SCSI mode page codes
  */
 #define	MODEPAGE_RW_ERROR_RECOVERY	0x01
 #define	MODEPAGE_CD_CAPABILITIES	0x2A
 
 /*
  * ATA commands
  */
 #define	ATA_SF_ENAB_SATA_SF		0x10
 #define		ATA_SATA_SF_AN		0x05
 #define	ATA_SF_DIS_SATA_SF		0x90
 
 /*
  * Debug printf
  */
 #ifdef AHCI_DEBUG
 static FILE *dbg;
 #define DPRINTF(format, arg...)	do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0)
 #else
 #define DPRINTF(format, arg...)
 #endif
 #define WPRINTF(format, arg...) printf(format, ##arg)
 
 struct ahci_ioreq {
 	struct blockif_req io_req;
 	struct ahci_port *io_pr;
 	STAILQ_ENTRY(ahci_ioreq) io_flist;
 	TAILQ_ENTRY(ahci_ioreq) io_blist;
 	uint8_t *cfis;
 	uint32_t len;
 	uint32_t done;
 	int slot;
-	int prdtl;
+	int more;
 };
 
 struct ahci_port {
 	struct blockif_ctxt *bctx;
 	struct pci_ahci_softc *pr_sc;
 	uint8_t *cmd_lst;
 	uint8_t *rfis;
 	char ident[20 + 1];
 	int atapi;
 	int reset;
 	int mult_sectors;
 	uint8_t xfermode;
 	uint8_t err_cfis[20];
 	uint8_t sense_key;
 	uint8_t asc;
 	uint32_t pending;
 
 	uint32_t clb;
 	uint32_t clbu;
 	uint32_t fb;
 	uint32_t fbu;
 	uint32_t is;
 	uint32_t ie;
 	uint32_t cmd;
 	uint32_t unused0;
 	uint32_t tfd;
 	uint32_t sig;
 	uint32_t ssts;
 	uint32_t sctl;
 	uint32_t serr;
 	uint32_t sact;
 	uint32_t ci;
 	uint32_t sntf;
 	uint32_t fbs;
 
 	/*
 	 * i/o request info
 	 */
 	struct ahci_ioreq *ioreq;
 	int ioqsz;
 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
 	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
 };
 
 struct ahci_cmd_hdr {
 	uint16_t flags;
 	uint16_t prdtl;
 	uint32_t prdbc;
 	uint64_t ctba;
 	uint32_t reserved[4];
 };
 
 struct ahci_prdt_entry {
 	uint64_t dba;
 	uint32_t reserved;
 #define	DBCMASK		0x3fffff
 	uint32_t dbc;
 };
 
 struct pci_ahci_softc {
 	struct pci_devinst *asc_pi;
 	pthread_mutex_t	mtx;
 	int ports;
 	uint32_t cap;
 	uint32_t ghc;
 	uint32_t is;
 	uint32_t pi;
 	uint32_t vs;
 	uint32_t ccc_ctl;
 	uint32_t ccc_pts;
 	uint32_t em_loc;
 	uint32_t em_ctl;
 	uint32_t cap2;
 	uint32_t bohc;
 	uint32_t lintr;
 	struct ahci_port port[MAX_PORTS];
 };
 #define	ahci_ctx(sc)	((sc)->asc_pi->pi_vmctx)
 
 static inline void lba_to_msf(uint8_t *buf, int lba)
 {
 	lba += 150;
 	buf[0] = (lba / 75) / 60;
 	buf[1] = (lba / 75) % 60;
 	buf[2] = lba % 75;
 }
 
 /*
  * generate HBA intr depending on whether or not ports within
  * the controller have an interrupt pending.
  */
 static void
 ahci_generate_intr(struct pci_ahci_softc *sc)
 {
 	struct pci_devinst *pi;
 	int i;
 
 	pi = sc->asc_pi;
 
 	for (i = 0; i < sc->ports; i++) {
 		struct ahci_port *pr;
 		pr = &sc->port[i];
 		if (pr->is & pr->ie)
 			sc->is |= (1 << i);
 	}
 
 	DPRINTF("%s %x\n", __func__, sc->is);
 
 	if (sc->is && (sc->ghc & AHCI_GHC_IE)) {		
 		if (pci_msi_enabled(pi)) {
 			/*
 			 * Generate an MSI interrupt on every edge
 			 */
 			pci_generate_msi(pi, 0);
 		} else if (!sc->lintr) {
 			/*
 			 * Only generate a pin-based interrupt if one wasn't
 			 * in progress
 			 */
 			sc->lintr = 1;
 			pci_lintr_assert(pi);
 		}
 	} else if (sc->lintr) {
 		/*
 		 * No interrupts: deassert pin-based signal if it had
 		 * been asserted
 		 */
 		pci_lintr_deassert(pi);
 		sc->lintr = 0;
 	}
 }
 
 static void
 ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis)
 {
 	int offset, len, irq;
 
 	if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE))
 		return;
 
 	switch (ft) {
 	case FIS_TYPE_REGD2H:
 		offset = 0x40;
 		len = 20;
 		irq = AHCI_P_IX_DHR;
 		break;
 	case FIS_TYPE_SETDEVBITS:
 		offset = 0x58;
 		len = 8;
 		irq = AHCI_P_IX_SDB;
 		break;
 	case FIS_TYPE_PIOSETUP:
 		offset = 0x20;
 		len = 20;
 		irq = 0;
 		break;
 	default:
 		WPRINTF("unsupported fis type %d\n", ft);
 		return;
 	}
 	memcpy(p->rfis + offset, fis, len);
 	if (irq) {
 		p->is |= irq;
 		ahci_generate_intr(p->pr_sc);
 	}
 }
 
 static void
 ahci_write_fis_piosetup(struct ahci_port *p)
 {
 	uint8_t fis[20];
 
 	memset(fis, 0, sizeof(fis));
 	fis[0] = FIS_TYPE_PIOSETUP;
 	ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis);
 }
 
 static void
 ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
 {
 	uint8_t fis[8];
 	uint8_t error;
 
 	error = (tfd >> 8) & 0xff;
 	memset(fis, 0, sizeof(fis));
 	fis[0] = FIS_TYPE_SETDEVBITS;
 	fis[1] = (1 << 6);
 	fis[2] = tfd & 0x77;
 	fis[3] = error;
 	if (fis[2] & ATA_S_ERROR) {
 		p->is |= AHCI_P_IX_TFE;
 		p->err_cfis[0] = slot;
 		p->err_cfis[2] = tfd & 0x77;
 		p->err_cfis[3] = error;
 		memcpy(&p->err_cfis[4], cfis + 4, 16);
 	} else {
 		*(uint32_t *)(fis + 4) = (1 << slot);
 		p->sact &= ~(1 << slot);
 	}
 	p->tfd = tfd;
 	ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis);
 }
 
 static void
 ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd)
 {
 	uint8_t fis[20];
 	uint8_t error;
 
 	error = (tfd >> 8) & 0xff;
 	memset(fis, 0, sizeof(fis));
 	fis[0] = FIS_TYPE_REGD2H;
 	fis[1] = (1 << 6);
 	fis[2] = tfd & 0xff;
 	fis[3] = error;
 	fis[4] = cfis[4];
 	fis[5] = cfis[5];
 	fis[6] = cfis[6];
 	fis[7] = cfis[7];
 	fis[8] = cfis[8];
 	fis[9] = cfis[9];
 	fis[10] = cfis[10];
 	fis[11] = cfis[11];
 	fis[12] = cfis[12];
 	fis[13] = cfis[13];
 	if (fis[2] & ATA_S_ERROR) {
 		p->is |= AHCI_P_IX_TFE;
 		p->err_cfis[0] = 0x80;
 		p->err_cfis[2] = tfd & 0xff;
 		p->err_cfis[3] = error;
 		memcpy(&p->err_cfis[4], cfis + 4, 16);
 	} else
 		p->ci &= ~(1 << slot);
 	p->tfd = tfd;
 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
 }
 
 static void
 ahci_write_reset_fis_d2h(struct ahci_port *p)
 {
 	uint8_t fis[20];
 
 	memset(fis, 0, sizeof(fis));
 	fis[0] = FIS_TYPE_REGD2H;
 	fis[3] = 1;
 	fis[4] = 1;
 	if (p->atapi) {
 		fis[5] = 0x14;
 		fis[6] = 0xeb;
 	}
 	fis[12] = 1;
 	ahci_write_fis(p, FIS_TYPE_REGD2H, fis);
 }
 
 static void
 ahci_check_stopped(struct ahci_port *p)
 {
 	/*
 	 * If we are no longer processing the command list and nothing
 	 * is in-flight, clear the running bit, the current command
 	 * slot, the command issue and active bits.
 	 */
 	if (!(p->cmd & AHCI_P_CMD_ST)) {
 		if (p->pending == 0) {
 			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
 			p->ci = 0;
 			p->sact = 0;
 		}
 	}
 }
 
 static void
 ahci_port_stop(struct ahci_port *p)
 {
 	struct ahci_ioreq *aior;
 	uint8_t *cfis;
 	int slot;
 	int ncq;
 	int error;
 
 	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
 
 	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
 		/*
 		 * Try to cancel the outstanding blockif request.
 		 */
 		error = blockif_cancel(p->bctx, &aior->io_req);
 		if (error != 0)
 			continue;
 
 		slot = aior->slot;
 		cfis = aior->cfis;
 		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
 		    cfis[2] == ATA_READ_FPDMA_QUEUED)
 			ncq = 1;
 
 		if (ncq)
 			p->sact &= ~(1 << slot);
 		else
 			p->ci &= ~(1 << slot);
 
 		/*
 		 * This command is now done.
 		 */
 		p->pending &= ~(1 << slot);
 
 		/*
 		 * Delete the blockif request from the busy list
 		 */
 		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
 
 		/*
 		 * Move the blockif request back to the free list
 		 */
 		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
 	}
 
 	ahci_check_stopped(p);
 }
 
 static void
 ahci_port_reset(struct ahci_port *pr)
 {
 	pr->serr = 0;
 	pr->sact = 0;
 	pr->xfermode = ATA_UDMA6;
 	pr->mult_sectors = 128;
 
 	if (!pr->bctx) {
 		pr->ssts = ATA_SS_DET_NO_DEVICE;
 		pr->sig = 0xFFFFFFFF;
 		pr->tfd = 0x7F;
 		return;
 	}
 	pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE;
 	if (pr->sctl & ATA_SC_SPD_MASK)
 		pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK);
 	else
 		pr->ssts |= ATA_SS_SPD_GEN3;
 	pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA;
 	if (!pr->atapi) {
 		pr->sig = PxSIG_ATA;
 		pr->tfd |= ATA_S_READY;
 	} else
 		pr->sig = PxSIG_ATAPI;
 	ahci_write_reset_fis_d2h(pr);
 }
 
 static void
 ahci_reset(struct pci_ahci_softc *sc)
 {
 	int i;
 
 	sc->ghc = AHCI_GHC_AE;
 	sc->is = 0;
 
 	if (sc->lintr) {
 		pci_lintr_deassert(sc->asc_pi);
 		sc->lintr = 0;
 	}
 
 	for (i = 0; i < sc->ports; i++) {
 		sc->port[i].ie = 0;
 		sc->port[i].is = 0;
 		sc->port[i].sctl = 0;
 		ahci_port_reset(&sc->port[i]);
 	}
 }
 
 static void
 ata_string(uint8_t *dest, const char *src, int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++) {
 		if (*src)
 			dest[i ^ 1] = *src++;
 		else
 			dest[i ^ 1] = ' ';
 	}
 }
 
 static void
 atapi_string(uint8_t *dest, const char *src, int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++) {
 		if (*src)
 			dest[i] = *src++;
 		else
 			dest[i] = ' ';
 	}
 }
 
+/*
+ * Build up the iovec based on the PRDT, 'done' and 'len'.
+ */
 static void
-ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
-    int seek)
+ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
+    struct ahci_prdt_entry *prdt, uint16_t prdtl)
 {
+	struct blockif_req *breq = &aior->io_req;
+	int i, j, skip, todo, left, extra;
+	uint32_t dbcsz;
+
+	/* Copy part of PRDT between 'done' and 'len' bytes into the iov. */
+	skip = aior->done;
+	left = aior->len - aior->done;
+	todo = 0;
+	for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0;
+	    i++, prdt++) {
+		dbcsz = (prdt->dbc & DBCMASK) + 1;
+		/* Skip already done part of the PRDT */
+		if (dbcsz <= skip) {
+			skip -= dbcsz;
+			continue;
+		}
+		dbcsz -= skip;
+		if (dbcsz > left)
+			dbcsz = left;
+		breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc),
+		    prdt->dba + skip, dbcsz);
+		breq->br_iov[j].iov_len = dbcsz;
+		todo += dbcsz;
+		left -= dbcsz;
+		skip = 0;
+		j++;
+	}
+
+	/* If we got limited by IOV length, round I/O down to sector size. */
+	if (j == BLOCKIF_IOV_MAX) {
+		extra = todo % blockif_sectsz(p->bctx);
+		todo -= extra;
+		assert(todo > 0);
+		while (extra > 0) {
+			if (breq->br_iov[j - 1].iov_len > extra) {
+				breq->br_iov[j - 1].iov_len -= extra;
+				break;
+			}
+			extra -= breq->br_iov[j - 1].iov_len;
+			j--;
+		}
+	}
+
+	breq->br_iovcnt = j;
+	aior->done += todo;
+	aior->more = (aior->done < aior->len && i < prdtl);
+}
+
+static void
+ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
+{
 	struct ahci_ioreq *aior;
 	struct blockif_req *breq;
-	struct pci_ahci_softc *sc;
 	struct ahci_prdt_entry *prdt;
 	struct ahci_cmd_hdr *hdr;
 	uint64_t lba;
 	uint32_t len;
-	int i, err, iovcnt, ncq, readop;
+	int err, ncq, readop;
 
-	sc = p->pr_sc;
 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
 	ncq = 0;
 	readop = 1;
 
-	prdt += seek;
 	if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 ||
 	    cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 ||
 	    cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 ||
 	    cfis[2] == ATA_WRITE_FPDMA_QUEUED)
 		readop = 0;
 
 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
 	    cfis[2] == ATA_READ_FPDMA_QUEUED) {
 		lba = ((uint64_t)cfis[10] << 40) |
 			((uint64_t)cfis[9] << 32) |
 			((uint64_t)cfis[8] << 24) |
 			((uint64_t)cfis[6] << 16) |
 			((uint64_t)cfis[5] << 8) |
 			cfis[4];
 		len = cfis[11] << 8 | cfis[3];
 		if (!len)
 			len = 65536;
 		ncq = 1;
 	} else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 ||
 	    cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 ||
 	    cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) {
 		lba = ((uint64_t)cfis[10] << 40) |
 			((uint64_t)cfis[9] << 32) |
 			((uint64_t)cfis[8] << 24) |
 			((uint64_t)cfis[6] << 16) |
 			((uint64_t)cfis[5] << 8) |
 			cfis[4];
 		len = cfis[13] << 8 | cfis[12];
 		if (!len)
 			len = 65536;
 	} else {
 		lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) |
 			(cfis[5] << 8) | cfis[4];
 		len = cfis[12];
 		if (!len)
 			len = 256;
 	}
 	lba *= blockif_sectsz(p->bctx);
 	len *= blockif_sectsz(p->bctx);
 
-	/*
-	 * Pull request off free list
-	 */
+	/* Pull request off free list */
 	aior = STAILQ_FIRST(&p->iofhd);
 	assert(aior != NULL);
 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
+
 	aior->cfis = cfis;
 	aior->slot = slot;
 	aior->len = len;
 	aior->done = done;
 	breq = &aior->io_req;
 	breq->br_offset = lba + done;
-	iovcnt = hdr->prdtl - seek;
-	if (iovcnt > BLOCKIF_IOV_MAX) {
-		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
-		iovcnt = BLOCKIF_IOV_MAX;
-	} else
-		aior->prdtl = 0;
-	breq->br_iovcnt = iovcnt;
+	ahci_build_iov(p, aior, prdt, hdr->prdtl);
 
-	/*
-	 * Mark this command in-flight.
-	 */
+	/* Mark this command in-flight. */
 	p->pending |= 1 << slot;
 
-	/*
-	 * Stuff request onto busy list
-	 */
+	/* Stuff request onto busy list. */
 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
 
-	/*
-	 * Build up the iovec based on the prdt
-	 */
-	for (i = 0; i < iovcnt; i++) {
-		uint32_t dbcsz;
-
-		dbcsz = (prdt->dbc & DBCMASK) + 1;
-		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
-		    prdt->dba, dbcsz);
-		breq->br_iov[i].iov_len = dbcsz;
-		aior->done += dbcsz;
-		prdt++;
-	}
 	if (readop)
 		err = blockif_read(p->bctx, breq);
 	else
 		err = blockif_write(p->bctx, breq);
 	assert(err == 0);
 
 	if (ncq)
 		p->ci &= ~(1 << slot);
 }
 
 static void
 ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	struct ahci_ioreq *aior;
 	struct blockif_req *breq;
 	int err;
 
 	/*
 	 * Pull request off free list
 	 */
 	aior = STAILQ_FIRST(&p->iofhd);
 	assert(aior != NULL);
 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
 	aior->cfis = cfis;
 	aior->slot = slot;
 	aior->len = 0;
 	aior->done = 0;
-	aior->prdtl = 0;
+	aior->more = 0;
 	breq = &aior->io_req;
 
 	/*
 	 * Mark this command in-flight.
 	 */
 	p->pending |= 1 << slot;
 
 	/*
 	 * Stuff request onto busy list
 	 */
 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
 
 	err = blockif_flush(p->bctx, breq);
 	assert(err == 0);
 }
 
 static inline void
 read_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
 		void *buf, int size)
 {
 	struct ahci_cmd_hdr *hdr;
 	struct ahci_prdt_entry *prdt;
 	void *to;
 	int i, len;
 
 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
 	len = size;
 	to = buf;
 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
 	for (i = 0; i < hdr->prdtl && len; i++) {
 		uint8_t *ptr;
 		uint32_t dbcsz;
 		int sublen;
 
 		dbcsz = (prdt->dbc & DBCMASK) + 1;
 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
 		sublen = len < dbcsz ? len : dbcsz;
 		memcpy(to, ptr, sublen);
 		len -= sublen;
 		to += sublen;
 		prdt++;
 	}
 }
 
 static void
 ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
 {
 	struct ahci_ioreq *aior;
 	struct blockif_req *breq;
 	uint8_t *entry;
 	uint64_t elba;
 	uint32_t len, elen;
 	int err;
 	uint8_t buf[512];
 
 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT) {
 		len = (uint16_t)cfis[13] << 8 | cfis[12];
 		len *= 512;
 	} else { /* ATA_SEND_FPDMA_QUEUED */
 		len = (uint16_t)cfis[11] << 8 | cfis[3];
 		len *= 512;
 	}
 	read_prdt(p, slot, cfis, buf, sizeof(buf));
 
 next:
 	entry = &buf[done];
 	elba = ((uint64_t)entry[5] << 40) |
 		((uint64_t)entry[4] << 32) |
 		((uint64_t)entry[3] << 24) |
 		((uint64_t)entry[2] << 16) |
 		((uint64_t)entry[1] << 8) |
 		entry[0];
 	elen = (uint16_t)entry[7] << 8 | entry[6];
 	done += 8;
 	if (elen == 0) {
 		if (done >= len) {
 			ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 			p->pending &= ~(1 << slot);
 			ahci_check_stopped(p);
 			return;
 		}
 		goto next;
 	}
 
 	/*
 	 * Pull request off free list
 	 */
 	aior = STAILQ_FIRST(&p->iofhd);
 	assert(aior != NULL);
 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
 	aior->cfis = cfis;
 	aior->slot = slot;
 	aior->len = len;
 	aior->done = done;
-	aior->prdtl = 0;
+	aior->more = (len != done);
 
 	breq = &aior->io_req;
 	breq->br_offset = elba * blockif_sectsz(p->bctx);
 	breq->br_iovcnt = 1;
 	breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
 
 	/*
 	 * Mark this command in-flight.
 	 */
 	p->pending |= 1 << slot;
 
 	/*
 	 * Stuff request onto busy list
 	 */
 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
 
 	err = blockif_delete(p->bctx, breq);
 	assert(err == 0);
 }
 
 static inline void
 write_prdt(struct ahci_port *p, int slot, uint8_t *cfis,
 		void *buf, int size)
 {
 	struct ahci_cmd_hdr *hdr;
 	struct ahci_prdt_entry *prdt;
 	void *from;
 	int i, len;
 
 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
 	len = size;
 	from = buf;
 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
 	for (i = 0; i < hdr->prdtl && len; i++) {
 		uint8_t *ptr;
 		uint32_t dbcsz;
 		int sublen;
 
 		dbcsz = (prdt->dbc & DBCMASK) + 1;
 		ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz);
 		sublen = len < dbcsz ? len : dbcsz;
 		memcpy(ptr, from, sublen);
 		len -= sublen;
 		from += sublen;
 		prdt++;
 	}
 	hdr->prdbc = size - len;
 }
 
 static void
 ahci_checksum(uint8_t *buf, int size)
 {
 	int i;
 	uint8_t sum = 0;
 
 	for (i = 0; i < size - 1; i++)
 		sum += buf[i];
 	buf[size - 1] = 0x100 - sum;
 }
 
 static void
 ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	struct ahci_cmd_hdr *hdr;
 	uint8_t buf[512];
 
 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
 	if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 ||
 	    cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) {
 		ahci_write_fis_d2h(p, slot, cfis,
 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
 		return;
 	}
 
 	memset(buf, 0, sizeof(buf));
 	memcpy(buf, p->err_cfis, sizeof(p->err_cfis));
 	ahci_checksum(buf, sizeof(buf));
 
 	if (cfis[2] == ATA_READ_LOG_EXT)
 		ahci_write_fis_piosetup(p);
 	write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
 }
 
 static void
 handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	struct ahci_cmd_hdr *hdr;
 
 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
 	if (p->atapi || hdr->prdtl == 0) {
 		ahci_write_fis_d2h(p, slot, cfis,
 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
 	} else {
 		uint16_t buf[256];
 		uint64_t sectors;
 		int sectsz, psectsz, psectoff, candelete, ro;
 		uint16_t cyl;
 		uint8_t sech, heads;
 
 		ro = blockif_is_ro(p->bctx);
 		candelete = blockif_candelete(p->bctx);
 		sectsz = blockif_sectsz(p->bctx);
 		sectors = blockif_size(p->bctx) / sectsz;
 		blockif_chs(p->bctx, &cyl, &heads, &sech);
 		blockif_psectsz(p->bctx, &psectsz, &psectoff);
 		memset(buf, 0, sizeof(buf));
 		buf[0] = 0x0040;
 		buf[1] = cyl;
 		buf[3] = heads;
 		buf[6] = sech;
 		ata_string((uint8_t *)(buf+10), p->ident, 20);
 		ata_string((uint8_t *)(buf+23), "001", 8);
 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
 		buf[47] = (0x8000 | 128);
 		buf[48] = 0x1;
 		buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
 		buf[50] = (1 << 14);
 		buf[53] = (1 << 1 | 1 << 2);
 		if (p->mult_sectors)
 			buf[59] = (0x100 | p->mult_sectors);
 		if (sectors <= 0x0fffffff) {
 			buf[60] = sectors;
 			buf[61] = (sectors >> 16);
 		} else {
 			buf[60] = 0xffff;
 			buf[61] = 0x0fff;
 		}
 		buf[63] = 0x7;
 		if (p->xfermode & ATA_WDMA0)
 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
 		buf[64] = 0x3;
 		buf[65] = 120;
 		buf[66] = 120;
 		buf[67] = 120;
 		buf[68] = 120;
 		buf[69] = 0;
 		buf[75] = 31;
 		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 |
 			   ATA_SUPPORT_NCQ);
 		buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED |
 			   (p->ssts & ATA_SS_SPD_MASK) >> 3);
 		buf[80] = 0x3f0;
 		buf[81] = 0x28;
 		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
 			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
 		buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
 			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 14);
 		buf[84] = (1 << 14);
 		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE|
 			   ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP);
 		buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE |
 			   ATA_SUPPORT_FLUSHCACHE48 | 1 << 15);
 		buf[87] = (1 << 14);
 		buf[88] = 0x7f;
 		if (p->xfermode & ATA_UDMA0)
 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
 		buf[93] = (1 | 1 <<14);
 		buf[100] = sectors;
 		buf[101] = (sectors >> 16);
 		buf[102] = (sectors >> 32);
 		buf[103] = (sectors >> 48);
 		if (candelete && !ro) {
 			buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT;
 			buf[105] = 1;
 			buf[169] = ATA_SUPPORT_DSM_TRIM;
 		}
 		buf[106] = 0x4000;
 		buf[209] = 0x4000;
 		if (psectsz > sectsz) {
 			buf[106] |= 0x2000;
 			buf[106] |= ffsl(psectsz / sectsz) - 1;
 			buf[209] |= (psectoff / sectsz);
 		}
 		if (sectsz > 512) {
 			buf[106] |= 0x1000;
 			buf[117] = sectsz / 2;
 			buf[118] = ((sectsz / 2) >> 16);
 		}
 		buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
 		buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14);
 		buf[222] = 0x1020;
 		buf[255] = 0x00a5;
 		ahci_checksum((uint8_t *)buf, sizeof(buf));
 		ahci_write_fis_piosetup(p);
 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
 	}
 }
 
 static void
 handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	if (!p->atapi) {
 		ahci_write_fis_d2h(p, slot, cfis,
 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
 	} else {
 		uint16_t buf[256];
 
 		memset(buf, 0, sizeof(buf));
 		buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5);
 		ata_string((uint8_t *)(buf+10), p->ident, 20);
 		ata_string((uint8_t *)(buf+23), "001", 8);
 		ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40);
 		buf[49] = (1 << 9 | 1 << 8);
 		buf[50] = (1 << 14 | 1);
 		buf[53] = (1 << 2 | 1 << 1);
 		buf[62] = 0x3f;
 		buf[63] = 7;
 		if (p->xfermode & ATA_WDMA0)
 			buf[63] |= (1 << ((p->xfermode & 7) + 8));
 		buf[64] = 3;
 		buf[65] = 120;
 		buf[66] = 120;
 		buf[67] = 120;
 		buf[68] = 120;
 		buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3);
 		buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3);
 		buf[78] = (1 << 5);
 		buf[80] = 0x3f0;
 		buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
 			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
 		buf[83] = (1 << 14);
 		buf[84] = (1 << 14);
 		buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET |
 			   ATA_SUPPORT_RESET | ATA_SUPPORT_NOP);
 		buf[87] = (1 << 14);
 		buf[88] = 0x7f;
 		if (p->xfermode & ATA_UDMA0)
 			buf[88] |= (1 << ((p->xfermode & 7) + 8));
 		buf[222] = 0x1020;
 		buf[255] = 0x00a5;
 		ahci_checksum((uint8_t *)buf, sizeof(buf));
 		ahci_write_fis_piosetup(p);
 		write_prdt(p, slot, cfis, (void *)buf, sizeof(buf));
 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY);
 	}
 }
 
 static void
 atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	uint8_t buf[36];
 	uint8_t *acmd;
 	int len;
 	uint32_t tfd;
 
 	acmd = cfis + 0x40;
 
 	if (acmd[1] & 1) {		/* VPD */
 		if (acmd[2] == 0) {	/* Supported VPD pages */
 			buf[0] = 0x05;
 			buf[1] = 0;
 			buf[2] = 0;
 			buf[3] = 1;
 			buf[4] = 0;
 			len = 4 + buf[3];
 		} else {
 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
 			p->asc = 0x24;
 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 			ahci_write_fis_d2h(p, slot, cfis, tfd);
 			return;
 		}
 	} else {
 		buf[0] = 0x05;
 		buf[1] = 0x80;
 		buf[2] = 0x00;
 		buf[3] = 0x21;
 		buf[4] = 31;
 		buf[5] = 0;
 		buf[6] = 0;
 		buf[7] = 0;
 		atapi_string(buf + 8, "BHYVE", 8);
 		atapi_string(buf + 16, "BHYVE DVD-ROM", 16);
 		atapi_string(buf + 32, "001", 4);
 		len = sizeof(buf);
 	}
 
 	if (len > acmd[4])
 		len = acmd[4];
 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 	write_prdt(p, slot, cfis, buf, len);
 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 }
 
 static void
 atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	uint8_t buf[8];
 	uint64_t sectors;
 
 	sectors = blockif_size(p->bctx) / 2048;
 	be32enc(buf, sectors - 1);
 	be32enc(buf + 4, 2048);
 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 	write_prdt(p, slot, cfis, buf, sizeof(buf));
 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 }
 
 static void
 atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	uint8_t *acmd;
 	uint8_t format;
 	int len;
 
 	acmd = cfis + 0x40;
 
 	len = be16dec(acmd + 7);
 	format = acmd[9] >> 6;
 	switch (format) {
 	case 0:
 	{
 		int msf, size;
 		uint64_t sectors;
 		uint8_t start_track, buf[20], *bp;
 
 		msf = (acmd[1] >> 1) & 1;
 		start_track = acmd[6];
 		if (start_track > 1 && start_track != 0xaa) {
 			uint32_t tfd;
 			p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
 			p->asc = 0x24;
 			tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
 			cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 			ahci_write_fis_d2h(p, slot, cfis, tfd);
 			return;
 		}
 		bp = buf + 2;
 		*bp++ = 1;
 		*bp++ = 1;
 		if (start_track <= 1) {
 			*bp++ = 0;
 			*bp++ = 0x14;
 			*bp++ = 1;
 			*bp++ = 0;
 			if (msf) {
 				*bp++ = 0;
 				lba_to_msf(bp, 0);
 				bp += 3;
 			} else {
 				*bp++ = 0;
 				*bp++ = 0;
 				*bp++ = 0;
 				*bp++ = 0;
 			}
 		}
 		*bp++ = 0;
 		*bp++ = 0x14;
 		*bp++ = 0xaa;
 		*bp++ = 0;
 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
 		sectors >>= 2;
 		if (msf) {
 			*bp++ = 0;
 			lba_to_msf(bp, sectors);
 			bp += 3;
 		} else {
 			be32enc(bp, sectors);
 			bp += 4;
 		}
 		size = bp - buf;
 		be16enc(buf, size - 2);
 		if (len > size)
 			len = size;
 		write_prdt(p, slot, cfis, buf, len);
 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 		break;
 	}
 	case 1:
 	{
 		uint8_t buf[12];
 
 		memset(buf, 0, sizeof(buf));
 		buf[1] = 0xa;
 		buf[2] = 0x1;
 		buf[3] = 0x1;
 		if (len > sizeof(buf))
 			len = sizeof(buf);
 		write_prdt(p, slot, cfis, buf, len);
 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 		break;
 	}
 	case 2:
 	{
 		int msf, size;
 		uint64_t sectors;
 		uint8_t start_track, *bp, buf[50];
 
 		msf = (acmd[1] >> 1) & 1;
 		start_track = acmd[6];
 		bp = buf + 2;
 		*bp++ = 1;
 		*bp++ = 1;
 
 		*bp++ = 1;
 		*bp++ = 0x14;
 		*bp++ = 0;
 		*bp++ = 0xa0;
 		*bp++ = 0;
 		*bp++ = 0;
 		*bp++ = 0;
 		*bp++ = 0;
 		*bp++ = 1;
 		*bp++ = 0;
 		*bp++ = 0;
 
 		*bp++ = 1;
 		*bp++ = 0x14;
 		*bp++ = 0;
 		*bp++ = 0xa1;
 		*bp++ = 0;
 		*bp++ = 0;
 		*bp++ = 0;
 		*bp++ = 0;
 		*bp++ = 1;
 		*bp++ = 0;
 		*bp++ = 0;
 
 		*bp++ = 1;
 		*bp++ = 0x14;
 		*bp++ = 0;
 		*bp++ = 0xa2;
 		*bp++ = 0;
 		*bp++ = 0;
 		*bp++ = 0;
 		sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx);
 		sectors >>= 2;
 		if (msf) {
 			*bp++ = 0;
 			lba_to_msf(bp, sectors);
 			bp += 3;
 		} else {
 			be32enc(bp, sectors);
 			bp += 4;
 		}
 
 		*bp++ = 1;
 		*bp++ = 0x14;
 		*bp++ = 0;
 		*bp++ = 1;
 		*bp++ = 0;
 		*bp++ = 0;
 		*bp++ = 0;
 		if (msf) {
 			*bp++ = 0;
 			lba_to_msf(bp, 0);
 			bp += 3;
 		} else {
 			*bp++ = 0;
 			*bp++ = 0;
 			*bp++ = 0;
 			*bp++ = 0;
 		}
 
 		size = bp - buf;
 		be16enc(buf, size - 2);
 		if (len > size)
 			len = size;
 		write_prdt(p, slot, cfis, buf, len);
 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 		break;
 	}
 	default:
 	{
 		uint32_t tfd;
 
 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
 		p->asc = 0x24;
 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 		ahci_write_fis_d2h(p, slot, cfis, tfd);
 		break;
 	}
 	}
 }
 
 static void
 atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	uint8_t buf[16];
 
 	memset(buf, 0, sizeof(buf));
 	buf[3] = 8;
 
 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 	write_prdt(p, slot, cfis, buf, sizeof(buf));
 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 }
 
 static void
-atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
-		uint32_t done, int seek)
+atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done)
 {
 	struct ahci_ioreq *aior;
 	struct ahci_cmd_hdr *hdr;
 	struct ahci_prdt_entry *prdt;
 	struct blockif_req *breq;
 	struct pci_ahci_softc *sc;
 	uint8_t *acmd;
 	uint64_t lba;
 	uint32_t len;
-	int i, err, iovcnt;
+	int err;
 
 	sc = p->pr_sc;
 	acmd = cfis + 0x40;
 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
 
-	prdt += seek;
 	lba = be32dec(acmd + 2);
 	if (acmd[0] == READ_10)
 		len = be16dec(acmd + 7);
 	else
 		len = be32dec(acmd + 6);
 	if (len == 0) {
 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 	}
 	lba *= 2048;
 	len *= 2048;
 
 	/*
 	 * Pull request off free list
 	 */
 	aior = STAILQ_FIRST(&p->iofhd);
 	assert(aior != NULL);
 	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
 	aior->cfis = cfis;
 	aior->slot = slot;
 	aior->len = len;
 	aior->done = done;
 	breq = &aior->io_req;
 	breq->br_offset = lba + done;
-	iovcnt = hdr->prdtl - seek;
-	if (iovcnt > BLOCKIF_IOV_MAX) {
-		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
-		iovcnt = BLOCKIF_IOV_MAX;
-	} else
-		aior->prdtl = 0;
-	breq->br_iovcnt = iovcnt;
+	ahci_build_iov(p, aior, prdt, hdr->prdtl);
 
-	/*
-	 * Mark this command in-flight.
-	 */
+	/* Mark this command in-flight. */
 	p->pending |= 1 << slot;
 
-	/*
-	 * Stuff request onto busy list
-	 */
+	/* Stuff request onto busy list. */
 	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
 
-	/*
-	 * Build up the iovec based on the prdt
-	 */
-	for (i = 0; i < iovcnt; i++) {
-		uint32_t dbcsz;
-
-		dbcsz = (prdt->dbc & DBCMASK) + 1;
-		breq->br_iov[i].iov_base = paddr_guest2host(ahci_ctx(sc),
-		    prdt->dba, dbcsz);
-		breq->br_iov[i].iov_len = dbcsz;
-		aior->done += dbcsz;
-		prdt++;
-	}
 	err = blockif_read(p->bctx, breq);
 	assert(err == 0);
 }
 
 static void
 atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	uint8_t buf[64];
 	uint8_t *acmd;
 	int len;
 
 	acmd = cfis + 0x40;
 	len = acmd[4];
 	if (len > sizeof(buf))
 		len = sizeof(buf);
 	memset(buf, 0, len);
 	buf[0] = 0x70 | (1 << 7);
 	buf[2] = p->sense_key;
 	buf[7] = 10;
 	buf[12] = p->asc;
 	write_prdt(p, slot, cfis, buf, len);
 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 	ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 }
 
 static void
 atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	uint8_t *acmd = cfis + 0x40;
 	uint32_t tfd;
 
 	switch (acmd[4] & 3) {
 	case 0:
 	case 1:
 	case 3:
 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 		tfd = ATA_S_READY | ATA_S_DSC;
 		break;
 	case 2:
 		/* TODO eject media */
 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
 		p->asc = 0x53;
 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
 		break;
 	}
 	ahci_write_fis_d2h(p, slot, cfis, tfd);
 }
 
 static void
 atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	uint8_t *acmd;
 	uint32_t tfd;
 	uint8_t pc, code;
 	int len;
 
 	acmd = cfis + 0x40;
 	len = be16dec(acmd + 7);
 	pc = acmd[2] >> 6;
 	code = acmd[2] & 0x3f;
 
 	switch (pc) {
 	case 0:
 		switch (code) {
 		case MODEPAGE_RW_ERROR_RECOVERY:
 		{
 			uint8_t buf[16];
 
 			if (len > sizeof(buf))
 				len = sizeof(buf);
 
 			memset(buf, 0, sizeof(buf));
 			be16enc(buf, 16 - 2);
 			buf[2] = 0x70;
 			buf[8] = 0x01;
 			buf[9] = 16 - 10;
 			buf[11] = 0x05;
 			write_prdt(p, slot, cfis, buf, len);
 			tfd = ATA_S_READY | ATA_S_DSC;
 			break;
 		}
 		case MODEPAGE_CD_CAPABILITIES:
 		{
 			uint8_t buf[30];
 
 			if (len > sizeof(buf))
 				len = sizeof(buf);
 
 			memset(buf, 0, sizeof(buf));
 			be16enc(buf, 30 - 2);
 			buf[2] = 0x70;
 			buf[8] = 0x2A;
 			buf[9] = 30 - 10;
 			buf[10] = 0x08;
 			buf[12] = 0x71;
 			be16enc(&buf[18], 2);
 			be16enc(&buf[20], 512);
 			write_prdt(p, slot, cfis, buf, len);
 			tfd = ATA_S_READY | ATA_S_DSC;
 			break;
 		}
 		default:
 			goto error;
 			break;
 		}
 		break;
 	case 3:
 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
 		p->asc = 0x39;
 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
 		break;
 error:
 	case 1:
 	case 2:
 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
 		p->asc = 0x24;
 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
 		break;
 	}
 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 	ahci_write_fis_d2h(p, slot, cfis, tfd);
 }
 
 static void
 atapi_get_event_status_notification(struct ahci_port *p, int slot,
     uint8_t *cfis)
 {
 	uint8_t *acmd;
 	uint32_t tfd;
 
 	acmd = cfis + 0x40;
 
 	/* we don't support asynchronous operation */
 	if (!(acmd[1] & 1)) {
 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
 		p->asc = 0x24;
 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
 	} else {
 		uint8_t buf[8];
 		int len;
 
 		len = be16dec(acmd + 7);
 		if (len > sizeof(buf))
 			len = sizeof(buf);
 
 		memset(buf, 0, sizeof(buf));
 		be16enc(buf, 8 - 2);
 		buf[2] = 0x04;
 		buf[3] = 0x10;
 		buf[5] = 0x02;
 		write_prdt(p, slot, cfis, buf, len);
 		tfd = ATA_S_READY | ATA_S_DSC;
 	}
 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 	ahci_write_fis_d2h(p, slot, cfis, tfd);
 }
 
 static void
 handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 	uint8_t *acmd;
 
 	acmd = cfis + 0x40;
 
 #ifdef AHCI_DEBUG
 	{
 		int i;
 		DPRINTF("ACMD:");
 		for (i = 0; i < 16; i++)
 			DPRINTF("%02x ", acmd[i]);
 		DPRINTF("\n");
 	}
 #endif
 
 	switch (acmd[0]) {
 	case TEST_UNIT_READY:
 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 		break;
 	case INQUIRY:
 		atapi_inquiry(p, slot, cfis);
 		break;
 	case READ_CAPACITY:
 		atapi_read_capacity(p, slot, cfis);
 		break;
 	case PREVENT_ALLOW:
 		/* TODO */
 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 		break;
 	case READ_TOC:
 		atapi_read_toc(p, slot, cfis);
 		break;
 	case REPORT_LUNS:
 		atapi_report_luns(p, slot, cfis);
 		break;
 	case READ_10:
 	case READ_12:
-		atapi_read(p, slot, cfis, 0, 0);
+		atapi_read(p, slot, cfis, 0);
 		break;
 	case REQUEST_SENSE:
 		atapi_request_sense(p, slot, cfis);
 		break;
 	case START_STOP_UNIT:
 		atapi_start_stop_unit(p, slot, cfis);
 		break;
 	case MODE_SENSE_10:
 		atapi_mode_sense(p, slot, cfis);
 		break;
 	case GET_EVENT_STATUS_NOTIFICATION:
 		atapi_get_event_status_notification(p, slot, cfis);
 		break;
 	default:
 		cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
 		p->asc = 0x20;
 		ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) |
 				ATA_S_READY | ATA_S_ERROR);
 		break;
 	}
 }
 
 static void
 ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
 {
 
 	switch (cfis[2]) {
 	case ATA_ATA_IDENTIFY:
 		handle_identify(p, slot, cfis);
 		break;
 	case ATA_SETFEATURES:
 	{
 		switch (cfis[3]) {
 		case ATA_SF_ENAB_SATA_SF:
 			switch (cfis[12]) {
 			case ATA_SATA_SF_AN:
 				p->tfd = ATA_S_DSC | ATA_S_READY;
 				break;
 			default:
 				p->tfd = ATA_S_ERROR | ATA_S_READY;
 				p->tfd |= (ATA_ERROR_ABORT << 8);
 				break;
 			}
 			break;
 		case ATA_SF_ENAB_WCACHE:
 		case ATA_SF_DIS_WCACHE:
 		case ATA_SF_ENAB_RCACHE:
 		case ATA_SF_DIS_RCACHE:
 			p->tfd = ATA_S_DSC | ATA_S_READY;
 			break;
 		case ATA_SF_SETXFER:
 		{
 			switch (cfis[12] & 0xf8) {
 			case ATA_PIO:
 			case ATA_PIO0:
 				break;
 			case ATA_WDMA0:
 			case ATA_UDMA0:
 				p->xfermode = (cfis[12] & 0x7);
 				break;
 			}
 			p->tfd = ATA_S_DSC | ATA_S_READY;
 			break;
 		}
 		default:
 			p->tfd = ATA_S_ERROR | ATA_S_READY;
 			p->tfd |= (ATA_ERROR_ABORT << 8);
 			break;
 		}
 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
 		break;
 	}
 	case ATA_SET_MULTI:
 		if (cfis[12] != 0 &&
 			(cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) {
 			p->tfd = ATA_S_ERROR | ATA_S_READY;
 			p->tfd |= (ATA_ERROR_ABORT << 8);
 		} else {
 			p->mult_sectors = cfis[12];
 			p->tfd = ATA_S_DSC | ATA_S_READY;
 		}
 		ahci_write_fis_d2h(p, slot, cfis, p->tfd);
 		break;
 	case ATA_READ:
 	case ATA_WRITE:
 	case ATA_READ48:
 	case ATA_WRITE48:
 	case ATA_READ_MUL:
 	case ATA_WRITE_MUL:
 	case ATA_READ_MUL48:
 	case ATA_WRITE_MUL48:
 	case ATA_READ_DMA:
 	case ATA_WRITE_DMA:
 	case ATA_READ_DMA48:
 	case ATA_WRITE_DMA48:
 	case ATA_READ_FPDMA_QUEUED:
 	case ATA_WRITE_FPDMA_QUEUED:
-		ahci_handle_dma(p, slot, cfis, 0, 0);
+		ahci_handle_rw(p, slot, cfis, 0);
 		break;
 	case ATA_FLUSHCACHE:
 	case ATA_FLUSHCACHE48:
 		ahci_handle_flush(p, slot, cfis);
 		break;
 	case ATA_DATA_SET_MANAGEMENT:
 		if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM &&
 		    cfis[13] == 0 && cfis[12] == 1) {
 			ahci_handle_dsm_trim(p, slot, cfis, 0);
 			break;
 		}
 		ahci_write_fis_d2h(p, slot, cfis,
 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
 		break;
 	case ATA_SEND_FPDMA_QUEUED:
 		if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM &&
 		    cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM &&
 		    cfis[11] == 0 && cfis[13] == 1) {
 			ahci_handle_dsm_trim(p, slot, cfis, 0);
 			break;
 		}
 		ahci_write_fis_d2h(p, slot, cfis,
 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
 		break;
 	case ATA_READ_LOG_EXT:
 	case ATA_READ_LOG_DMA_EXT:
 		ahci_handle_read_log(p, slot, cfis);
 		break;
 	case ATA_NOP:
 		ahci_write_fis_d2h(p, slot, cfis,
 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
 		break;
 	case ATA_STANDBY_CMD:
 	case ATA_STANDBY_IMMEDIATE:
 	case ATA_IDLE_CMD:
 	case ATA_IDLE_IMMEDIATE:
 	case ATA_SLEEP:
 		ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
 		break;
 	case ATA_ATAPI_IDENTIFY:
 		handle_atapi_identify(p, slot, cfis);
 		break;
 	case ATA_PACKET_CMD:
 		if (!p->atapi) {
 			ahci_write_fis_d2h(p, slot, cfis,
 			    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
 		} else
 			handle_packet_cmd(p, slot, cfis);
 		break;
 	default:
 		WPRINTF("Unsupported cmd:%02x\n", cfis[2]);
 		ahci_write_fis_d2h(p, slot, cfis,
 		    (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
 		break;
 	}
 }
 
 static void
 ahci_handle_slot(struct ahci_port *p, int slot)
 {
 	struct ahci_cmd_hdr *hdr;
 	struct ahci_prdt_entry *prdt;
 	struct pci_ahci_softc *sc;
 	uint8_t *cfis;
 	int cfl;
 
 	sc = p->pr_sc;
 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
 	cfl = (hdr->flags & 0x1f) * 4;
 	cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
 			0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
 	prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
 
 #ifdef AHCI_DEBUG
 	DPRINTF("\ncfis:");
 	for (i = 0; i < cfl; i++) {
 		if (i % 10 == 0)
 			DPRINTF("\n");
 		DPRINTF("%02x ", cfis[i]);
 	}
 	DPRINTF("\n");
 
 	for (i = 0; i < hdr->prdtl; i++) {
 		DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba);
 		prdt++;
 	}
 #endif
 
 	if (cfis[0] != FIS_TYPE_REGH2D) {
 		WPRINTF("Not a H2D FIS:%02x\n", cfis[0]);
 		return;
 	}
 
 	if (cfis[1] & 0x80) {
 		ahci_handle_cmd(p, slot, cfis);
 	} else {
 		if (cfis[15] & (1 << 2))
 			p->reset = 1;
 		else if (p->reset) {
 			p->reset = 0;
 			ahci_port_reset(p);
 		}
 		p->ci &= ~(1 << slot);
 	}
 }
 
 static void
 ahci_handle_port(struct ahci_port *p)
 {
 	int i;
 
 	if (!(p->cmd & AHCI_P_CMD_ST))
 		return;
 
 	/*
 	 * Search for any new commands to issue ignoring those that
 	 * are already in-flight.
 	 */
 	for (i = 0; (i < 32) && p->ci; i++) {
 		if ((p->ci & (1 << i)) && !(p->pending & (1 << i))) {
 			p->cmd &= ~AHCI_P_CMD_CCS_MASK;
 			p->cmd |= i << AHCI_P_CMD_CCS_SHIFT;
 			ahci_handle_slot(p, i);
 		}
 	}
 }
 
 /*
  * blockif callback routine - this runs in the context of the blockif
  * i/o thread, so the mutex needs to be acquired.
  */
 static void
 ata_ioreq_cb(struct blockif_req *br, int err)
 {
 	struct ahci_cmd_hdr *hdr;
 	struct ahci_ioreq *aior;
 	struct ahci_port *p;
 	struct pci_ahci_softc *sc;
 	uint32_t tfd;
 	uint8_t *cfis;
-	int pending, slot, ncq, dsm;
+	int slot, ncq, dsm;
 
 	DPRINTF("%s %d\n", __func__, err);
 
 	ncq = dsm = 0;
 	aior = br->br_param;
 	p = aior->io_pr;
 	cfis = aior->cfis;
 	slot = aior->slot;
-	pending = aior->prdtl;
 	sc = p->pr_sc;
 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE);
 
 	if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
 	    cfis[2] == ATA_READ_FPDMA_QUEUED ||
 	    cfis[2] == ATA_SEND_FPDMA_QUEUED)
 		ncq = 1;
 	if (cfis[2] == ATA_DATA_SET_MANAGEMENT ||
 	    (cfis[2] == ATA_SEND_FPDMA_QUEUED &&
 	     (cfis[13] & 0x1f) == ATA_SFPDMA_DSM))
 		dsm = 1;
 
 	pthread_mutex_lock(&sc->mtx);
 
 	/*
 	 * Delete the blockif request from the busy list
 	 */
 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
 
 	/*
 	 * Move the blockif request back to the free list
 	 */
 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
 
 	if (!err)
 		hdr->prdbc = aior->done;
 
-	if (dsm) {
-		if (aior->done != aior->len && !err) {
+	if (!err && aior->more) {
+		if (dsm)
 			ahci_handle_dsm_trim(p, slot, cfis, aior->done);
-			goto out;
-		}
-	} else {
-		if (pending && !err) {
-			ahci_handle_dma(p, slot, cfis, aior->done,
-			    hdr->prdtl - pending);
-			goto out;
-		}
+		else 
+			ahci_handle_rw(p, slot, cfis, aior->done);
+		goto out;
 	}
 
-	if (!err && aior->done == aior->len) {
+	if (!err)
 		tfd = ATA_S_READY | ATA_S_DSC;
-	} else {
+	else
 		tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR;
-	}
-
 	if (ncq)
 		ahci_write_fis_sdb(p, slot, cfis, tfd);
 	else
 		ahci_write_fis_d2h(p, slot, cfis, tfd);
 
 	/*
 	 * This command is now complete.
 	 */
 	p->pending &= ~(1 << slot);
 
 	ahci_check_stopped(p);
 out:
 	pthread_mutex_unlock(&sc->mtx);
 	DPRINTF("%s exit\n", __func__);
 }
 
 static void
 atapi_ioreq_cb(struct blockif_req *br, int err)
 {
 	struct ahci_cmd_hdr *hdr;
 	struct ahci_ioreq *aior;
 	struct ahci_port *p;
 	struct pci_ahci_softc *sc;
 	uint8_t *cfis;
 	uint32_t tfd;
-	int pending, slot;
+	int slot;
 
 	DPRINTF("%s %d\n", __func__, err);
 
 	aior = br->br_param;
 	p = aior->io_pr;
 	cfis = aior->cfis;
 	slot = aior->slot;
-	pending = aior->prdtl;
 	sc = p->pr_sc;
 	hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE);
 
 	pthread_mutex_lock(&sc->mtx);
 
 	/*
 	 * Delete the blockif request from the busy list
 	 */
 	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
 
 	/*
 	 * Move the blockif request back to the free list
 	 */
 	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
 
 	if (!err)
 		hdr->prdbc = aior->done;
 
-	if (pending && !err) {
-		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
+	if (!err && aior->more) {
+		atapi_read(p, slot, cfis, aior->done);
 		goto out;
 	}
 
-	if (!err && aior->done == aior->len) {
+	if (!err) {
 		tfd = ATA_S_READY | ATA_S_DSC;
 	} else {
 		p->sense_key = ATA_SENSE_ILLEGAL_REQUEST;
 		p->asc = 0x21;
 		tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR;
 	}
-
 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 	ahci_write_fis_d2h(p, slot, cfis, tfd);
 
 	/*
 	 * This command is now complete.
 	 */
 	p->pending &= ~(1 << slot);
 
 	ahci_check_stopped(p);
 out:
 	pthread_mutex_unlock(&sc->mtx);
 	DPRINTF("%s exit\n", __func__);
 }
 
 static void
 pci_ahci_ioreq_init(struct ahci_port *pr)
 {
 	struct ahci_ioreq *vr;
 	int i;
 
 	pr->ioqsz = blockif_queuesz(pr->bctx);
 	pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq));
 	STAILQ_INIT(&pr->iofhd);
 
 	/*
 	 * Add all i/o request entries to the free queue
 	 */
 	for (i = 0; i < pr->ioqsz; i++) {
 		vr = &pr->ioreq[i];
 		vr->io_pr = pr;
 		if (!pr->atapi)
 			vr->io_req.br_callback = ata_ioreq_cb;
 		else
 			vr->io_req.br_callback = atapi_ioreq_cb;
 		vr->io_req.br_param = vr;
 		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
 	}
 
 	TAILQ_INIT(&pr->iobhd);
 }
 
 static void
 pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
 {
 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
 	struct ahci_port *p = &sc->port[port];
 
 	DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
 		port, offset, value);
 
 	switch (offset) {
 	case AHCI_P_CLB:
 		p->clb = value;
 		break;
 	case AHCI_P_CLBU:
 		p->clbu = value;
 		break;
 	case AHCI_P_FB:
 		p->fb = value;
 		break;
 	case AHCI_P_FBU:
 		p->fbu = value;
 		break;
 	case AHCI_P_IS:
 		p->is &= ~value;
 		break;
 	case AHCI_P_IE:
 		p->ie = value & 0xFDC000FF;
 		ahci_generate_intr(sc);
 		break;
 	case AHCI_P_CMD:
 	{
 		p->cmd = value;
 		
 		if (!(value & AHCI_P_CMD_ST)) {
 			ahci_port_stop(p);
 		} else {
 			uint64_t clb;
 
 			p->cmd |= AHCI_P_CMD_CR;
 			clb = (uint64_t)p->clbu << 32 | p->clb;
 			p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb,
 					AHCI_CL_SIZE * AHCI_MAX_SLOTS);
 		}
 
 		if (value & AHCI_P_CMD_FRE) {
 			uint64_t fb;
 
 			p->cmd |= AHCI_P_CMD_FR;
 			fb = (uint64_t)p->fbu << 32 | p->fb;
 			/* we don't support FBSCP, so rfis size is 256Bytes */
 			p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256);
 		} else {
 			p->cmd &= ~AHCI_P_CMD_FR;
 		}
 
 		if (value & AHCI_P_CMD_CLO) {
 			p->tfd = 0;
 			p->cmd &= ~AHCI_P_CMD_CLO;
 		}
 
 		ahci_handle_port(p);
 		break;
 	}
 	case AHCI_P_TFD:
 	case AHCI_P_SIG:
 	case AHCI_P_SSTS:
 		WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset);
 		break;
 	case AHCI_P_SCTL:
 		p->sctl = value;
 		if (!(p->cmd & AHCI_P_CMD_ST)) {
 			if (value & ATA_SC_DET_RESET)
 				ahci_port_reset(p);
 		}
 		break;
 	case AHCI_P_SERR:
 		p->serr &= ~value;
 		break;
 	case AHCI_P_SACT:
 		p->sact |= value;
 		break;
 	case AHCI_P_CI:
 		p->ci |= value;
 		ahci_handle_port(p);
 		break;
 	case AHCI_P_SNTF:
 	case AHCI_P_FBS:
 	default:
 		break;
 	}
 }
 
 static void
 pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
 {
 	DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n",
 		offset, value);
 
 	switch (offset) {
 	case AHCI_CAP:
 	case AHCI_PI:
 	case AHCI_VS:
 	case AHCI_CAP2:
 		DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset);
 		break;
 	case AHCI_GHC:
 		if (value & AHCI_GHC_HR)
 			ahci_reset(sc);
 		else if (value & AHCI_GHC_IE) {
 			sc->ghc |= AHCI_GHC_IE;
 			ahci_generate_intr(sc);
 		}
 		break;
 	case AHCI_IS:
 		sc->is &= ~value;
 		ahci_generate_intr(sc);
 		break;
 	default:
 		break;
 	}
 }
 
 static void
 pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
 		int baridx, uint64_t offset, int size, uint64_t value)
 {
 	struct pci_ahci_softc *sc = pi->pi_arg;
 
 	assert(baridx == 5);
 	assert(size == 4);
 
 	pthread_mutex_lock(&sc->mtx);
 
 	if (offset < AHCI_OFFSET)
 		pci_ahci_host_write(sc, offset, value);
 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
 		pci_ahci_port_write(sc, offset, value);
 	else
 		WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset);
 
 	pthread_mutex_unlock(&sc->mtx);
 }
 
 static uint64_t
 pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset)
 {
 	uint32_t value;
 
 	switch (offset) {
 	case AHCI_CAP:
 	case AHCI_GHC:
 	case AHCI_IS:
 	case AHCI_PI:
 	case AHCI_VS:
 	case AHCI_CCCC:
 	case AHCI_CCCP:
 	case AHCI_EM_LOC:
 	case AHCI_EM_CTL:
 	case AHCI_CAP2:
 	{
 		uint32_t *p = &sc->cap;
 		p += (offset - AHCI_CAP) / sizeof(uint32_t);
 		value = *p;
 		break;
 	}
 	default:
 		value = 0;
 		break;
 	}
 	DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n",
 		offset, value);
 
 	return (value);
 }
 
 static uint64_t
 pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset)
 {
 	uint32_t value;
 	int port = (offset - AHCI_OFFSET) / AHCI_STEP;
 	offset = (offset - AHCI_OFFSET) % AHCI_STEP;
 
 	switch (offset) {
 	case AHCI_P_CLB:
 	case AHCI_P_CLBU:
 	case AHCI_P_FB:
 	case AHCI_P_FBU:
 	case AHCI_P_IS:
 	case AHCI_P_IE:
 	case AHCI_P_CMD:
 	case AHCI_P_TFD:
 	case AHCI_P_SIG:
 	case AHCI_P_SSTS:
 	case AHCI_P_SCTL:
 	case AHCI_P_SERR:
 	case AHCI_P_SACT:
 	case AHCI_P_CI:
 	case AHCI_P_SNTF:
 	case AHCI_P_FBS:
 	{
 		uint32_t *p= &sc->port[port].clb;
 		p += (offset - AHCI_P_CLB) / sizeof(uint32_t);
 		value = *p;
 		break;
 	}
 	default:
 		value = 0;
 		break;
 	}
 
 	DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n",
 		port, offset, value);
 
 	return value;
 }
 
 static uint64_t
 pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
     uint64_t offset, int size)
 {
 	struct pci_ahci_softc *sc = pi->pi_arg;
 	uint32_t value;
 
 	assert(baridx == 5);
 	assert(size == 4);
 
 	pthread_mutex_lock(&sc->mtx);
 
 	if (offset < AHCI_OFFSET)
 		value = pci_ahci_host_read(sc, offset);
 	else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP)
 		value = pci_ahci_port_read(sc, offset);
 	else {
 		value = 0;
 		WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", offset);
 	}
 
 	pthread_mutex_unlock(&sc->mtx);
 
 	return (value);
 }
 
 static int
 pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
 {
 	char bident[sizeof("XX:X:X")];
 	struct blockif_ctxt *bctxt;
 	struct pci_ahci_softc *sc;
 	int ret, slots;
 	MD5_CTX mdctx;
 	u_char digest[16];
 
 	ret = 0;
 
 	if (opts == NULL) {
 		fprintf(stderr, "pci_ahci: backing device required\n");
 		return (1);
 	}
 
 #ifdef AHCI_DEBUG
 	dbg = fopen("/tmp/log", "w+");
 #endif
 
 	sc = calloc(1, sizeof(struct pci_ahci_softc));
 	pi->pi_arg = sc;
 	sc->asc_pi = pi;
 	sc->ports = MAX_PORTS;
 
 	/*
 	 * Only use port 0 for a backing device. All other ports will be
 	 * marked as unused
 	 */
 	sc->port[0].atapi = atapi;
 
 	/*
 	 * Attempt to open the backing image. Use the PCI
 	 * slot/func for the identifier string.
 	 */
 	snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
 	bctxt = blockif_open(opts, bident);
 	if (bctxt == NULL) {       	
 		ret = 1;
 		goto open_fail;
 	}	
 	sc->port[0].bctx = bctxt;
 	sc->port[0].pr_sc = sc;
 
 	/*
 	 * Create an identifier for the backing file. Use parts of the
 	 * md5 sum of the filename
 	 */
 	MD5Init(&mdctx);
 	MD5Update(&mdctx, opts, strlen(opts));
 	MD5Final(digest, &mdctx);	
 	sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
 	    digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
 
 	/*
 	 * Allocate blockif request structures and add them
 	 * to the free list
 	 */
 	pci_ahci_ioreq_init(&sc->port[0]);
 
 	pthread_mutex_init(&sc->mtx, NULL);
 
 	/* Intel ICH8 AHCI */
 	slots = sc->port[0].ioqsz;
 	if (slots > 32)
 		slots = 32;
 	--slots;
 	sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF |
 	    AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP |
 	    AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)|
 	    AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC |
 	    (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1);
 
 	/* Only port 0 implemented */
 	sc->pi = 1;
 	sc->vs = 0x10300;
 	sc->cap2 = AHCI_CAP2_APST;
 	ahci_reset(sc);
 
 	pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821);
 	pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086);
 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
 	pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA);
 	pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0);
 	pci_emul_add_msicap(pi, 1);
 	pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32,
 	    AHCI_OFFSET + sc->ports * AHCI_STEP);
 
 	pci_lintr_request(pi);
 
 open_fail:
 	if (ret) {
 		if (sc->port[0].bctx != NULL)
 			blockif_close(sc->port[0].bctx);
 		free(sc);
 	}
 
 	return (ret);
 }
 
 static int
 pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
 {
 
 	return (pci_ahci_init(ctx, pi, opts, 0));
 }
 
 static int
 pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
 {
 
 	return (pci_ahci_init(ctx, pi, opts, 1));
 }
 
 /*
  * Use separate emulation names to distinguish drive and atapi devices
  */
 struct pci_devemu pci_de_ahci_hd = {
 	.pe_emu =	"ahci-hd",
 	.pe_init =	pci_ahci_hd_init,
 	.pe_barwrite =	pci_ahci_write,
 	.pe_barread =	pci_ahci_read
 };
 PCI_EMUL_SET(pci_de_ahci_hd);
 
 struct pci_devemu pci_de_ahci_cd = {
 	.pe_emu =	"ahci-cd",
 	.pe_init =	pci_ahci_atapi_init,
 	.pe_barwrite =	pci_ahci_write,
 	.pe_barread =	pci_ahci_read
 };
 PCI_EMUL_SET(pci_de_ahci_cd);
Index: user/ngie/more-tests/usr.sbin/bhyve
===================================================================
--- user/ngie/more-tests/usr.sbin/bhyve	(revision 281675)
+++ user/ngie/more-tests/usr.sbin/bhyve	(revision 281676)

Property changes on: user/ngie/more-tests/usr.sbin/bhyve
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/usr.sbin/bhyve:r281621-281675
Index: user/ngie/more-tests/usr.sbin/crunch/crunchide/exec_aout.c
===================================================================
--- user/ngie/more-tests/usr.sbin/crunch/crunchide/exec_aout.c	(revision 281675)
+++ user/ngie/more-tests/usr.sbin/crunch/crunchide/exec_aout.c	(nonexistent)
@@ -1,198 +0,0 @@
-/*	$NetBSD: exec_aout.c,v 1.6 1997/08/02 21:30:17 perry Exp $	*/
-/*
- * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
- * Copyright (c) 1994 University of Maryland
- * All Rights Reserved.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of U.M. not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  U.M. makes no representations about the
- * suitability of this software for any purpose.  It is provided "as is"
- * without express or implied warranty.
- *
- * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * Author: James da Silva, Systems Design and Analysis Group
- *			   Computer Science Department
- *			   University of Maryland at College Park
- */
-#include <sys/cdefs.h>
-#ifndef lint
-__RCSID("$NetBSD: exec_aout.c,v 1.6 1997/08/02 21:30:17 perry Exp $");
-__FBSDID("$FreeBSD$");
-#endif
- 
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <a.out.h>
-#include <sys/types.h>
-#include <sys/endian.h>
-#include <sys/stat.h> 
-#include <sys/errno.h>
-#include <netinet/in.h>
-
-#include "extern.h"
-
-#if defined(NLIST_AOUT)
-
-int nsyms, ntextrel, ndatarel;
-struct exec *hdrp;
-char *aoutdata, *strbase;
-struct relocation_info *textrel, *datarel;
-struct nlist *symbase;
-
-
-#define SYMSTR(sp)	(&strbase[(sp)->n_un.n_strx])
-
-/* is the symbol a global symbol defined in the current file? */
-#define IS_GLOBAL_DEFINED(sp) \
-                  (((sp)->n_type & N_EXT) && ((sp)->n_type & N_TYPE) != N_UNDF)
-
-/* is the relocation entry dependent on a symbol? */
-#define IS_SYMBOL_RELOC(rp)   \
-                  ((rp)->r_extern||(rp)->r_baserel||(rp)->r_jmptable)
-
-static void check_reloc(const char *filename, struct relocation_info *relp);
-
-int check_aout(int inf, const char *filename)
-{
-    struct stat infstat;
-    struct exec eh;
-
-    /*
-     * check the header to make sure it's an a.out-format file.
-     */
-
-    if(fstat(inf, &infstat) == -1)
-	return 0;
-    if(infstat.st_size < sizeof eh)
-	return 0;
-    if(read(inf, &eh, sizeof eh) != sizeof eh)
-	return 0;
-
-    if(N_BADMAG(eh))
-	return 0;
-
-    return 1;
-}
-
-int hide_aout(int inf, const char *filename)
-{
-    struct stat infstat;
-    struct relocation_info *relp;
-    struct nlist *symp;
-    int rc;
-
-    /*
-     * do some error checking.
-     */
-
-    if(fstat(inf, &infstat) == -1) {
-	perror(filename);
-	return 1;
-    }
-
-    /*
-     * Read the entire file into memory.  XXX - Really, we only need to
-     * read the header and from TRELOFF to the end of the file.
-     */
-
-    if((aoutdata = (char *) malloc(infstat.st_size)) == NULL) {
-	fprintf(stderr, "%s: too big to read into memory\n", filename);
-	return 1;
-    }
-
-    if((rc = read(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
-	fprintf(stderr, "%s: read error: %s\n", filename,
-		rc == -1? strerror(errno) : "short read");
-	return 1;
-    }
-
-    /*
-     * Calculate offsets and sizes from the header.
-     */
-
-    hdrp = (struct exec *) aoutdata;
-
-#ifdef __FreeBSD__
-    textrel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp));
-    datarel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp) +
-					  hdrp->a_trsize);
-#else
-    textrel = (struct relocation_info *) (aoutdata + N_TRELOFF(*hdrp));
-    datarel = (struct relocation_info *) (aoutdata + N_DRELOFF(*hdrp));
-#endif
-    symbase = (struct nlist *)		 (aoutdata + N_SYMOFF(*hdrp));
-    strbase = (char *) 			 (aoutdata + N_STROFF(*hdrp));
-
-    ntextrel = hdrp->a_trsize / sizeof(struct relocation_info);
-    ndatarel = hdrp->a_drsize / sizeof(struct relocation_info);
-    nsyms    = hdrp->a_syms   / sizeof(struct nlist);
-
-    /*
-     * Zap the type field of all globally-defined symbols.  The linker will
-     * subsequently ignore these entries.  Don't zap any symbols in the
-     * keep list.
-     */
-
-    for(symp = symbase; symp < symbase + nsyms; symp++) {
-	if(!IS_GLOBAL_DEFINED(symp))		/* keep undefined syms */
-	    continue;
-
-	/* keep (C) symbols which are on the keep list */
-	if(SYMSTR(symp)[0] == '_' && in_keep_list(SYMSTR(symp) + 1))
-	    continue;
-
-	symp->n_type = 0;
-    }
-
-    /*
-     * Check whether the relocation entries reference any symbols that we
-     * just zapped.  I don't know whether ld can handle this case, but I
-     * haven't encountered it yet.  These checks are here so that the program
-     * doesn't fail silently should such symbols be encountered.
-     */
-
-    for(relp = textrel; relp < textrel + ntextrel; relp++)
-	check_reloc(filename, relp);
-    for(relp = datarel; relp < datarel + ndatarel; relp++)
-	check_reloc(filename, relp);
-
-    /*
-     * Write the .o file back out to disk.  XXX - Really, we only need to
-     * write the symbol table entries back out.
-     */
-    lseek(inf, 0, SEEK_SET);
-    if((rc = write(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
-	fprintf(stderr, "%s: write error: %s\n", filename,
-		rc == -1? strerror(errno) : "short write");
-	return 1;
-    }
-
-    return 0;
-}
-
-
-static void check_reloc(const char *filename, struct relocation_info *relp)
-{
-    /* bail out if we zapped a symbol that is needed */
-    if(IS_SYMBOL_RELOC(relp) && symbase[relp->r_symbolnum].n_type == 0) {
-	fprintf(stderr,
-		"%s: oops, have hanging relocation for %s: bailing out!\n",
-		filename, SYMSTR(&symbase[relp->r_symbolnum]));
-	exit(1);
-    }
-}
-
-#endif /* defined(NLIST_AOUT) */

Property changes on: user/ngie/more-tests/usr.sbin/crunch/crunchide/exec_aout.c
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: user/ngie/more-tests/usr.sbin/crunch/crunchide/Makefile
===================================================================
--- user/ngie/more-tests/usr.sbin/crunch/crunchide/Makefile	(revision 281675)
+++ user/ngie/more-tests/usr.sbin/crunch/crunchide/Makefile	(revision 281676)
@@ -1,24 +1,9 @@
 # $FreeBSD$
 
 PROG=   crunchide
-SRCS=	crunchide.c
+SRCS=	crunchide.c exec_elf32.c exec_elf64.c
 
-TARGET_ARCH?=	${MACHINE_ARCH}
-
-.if ${TARGET_ARCH} == i386 && ${MACHINE_ARCH} == i386
-CFLAGS+=-DNLIST_AOUT
-SRCS+=	exec_aout.c
-.endif
-
-.if ${TARGET_ARCH} == aarch64 || ${TARGET_ARCH} == amd64 || \
-    ${TARGET_ARCH} == powerpc64 || \
-    ${TARGET_ARCH} == sparc64 || ${TARGET_ARCH:Mmips64*}
-CFLAGS+=-DNLIST_ELF64
-SRCS+=	exec_elf64.c
+CFLAGS+=-DNLIST_ELF32 -DNLIST_ELF64
 exec_elf64.o: exec_elf32.c
-.else
-CFLAGS+=-DNLIST_ELF32
-SRCS+=	exec_elf32.c
-.endif
 
 .include <bsd.prog.mk>
Index: user/ngie/more-tests/usr.sbin/crunch/crunchide/crunchide.c
===================================================================
--- user/ngie/more-tests/usr.sbin/crunch/crunchide/crunchide.c	(revision 281675)
+++ user/ngie/more-tests/usr.sbin/crunch/crunchide/crunchide.c	(revision 281676)
@@ -1,273 +1,267 @@
 /*	$NetBSD: crunchide.c,v 1.8 1997/11/01 06:51:45 lukem Exp $	*/
 /*
  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
  * Copyright (c) 1994 University of Maryland
  * All Rights Reserved.
  *
  * Permission to use, copy, modify, distribute, and sell this software and its
  * documentation for any purpose is hereby granted without fee, provided that
  * the above copyright notice appear in all copies and that both that
  * copyright notice and this permission notice appear in supporting
  * documentation, and that the name of U.M. not be used in advertising or
  * publicity pertaining to distribution of the software without specific,
  * written prior permission.  U.M. makes no representations about the
  * suitability of this software for any purpose.  It is provided "as is"
  * without express or implied warranty.
  *
  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  *
  * Author: James da Silva, Systems Design and Analysis Group
  *			   Computer Science Department
  *			   University of Maryland at College Park
  */
 /*
  * crunchide.c - tiptoes through an a.out symbol table, hiding all defined
  *	global symbols.  Allows the user to supply a "keep list" of symbols
  *	that are not to be hidden.  This program relies on the use of the
  * 	linker's -dc flag to actually put global bss data into the file's
  * 	bss segment (rather than leaving it as undefined "common" data).
  *
  * 	The point of all this is to allow multiple programs to be linked
  *	together without getting multiple-defined errors.
  *
  *	For example, consider a program "foo.c".  It can be linked with a
  *	small stub routine, called "foostub.c", eg:
  *	    int foo_main(int argc, char **argv){ return main(argc, argv); }
  *      like so:
  *	    cc -c foo.c foostub.c
  *	    ld -dc -r foo.o foostub.o -o foo.combined.o
  *	    crunchide -k _foo_main foo.combined.o
  *	at this point, foo.combined.o can be linked with another program
  * 	and invoked with "foo_main(argc, argv)".  foo's main() and any
  * 	other globals are hidden and will not conflict with other symbols.
  *
  * TODO:
  *	- resolve the theoretical hanging reloc problem (see check_reloc()
  *	  below). I have yet to see this problem actually occur in any real
  *	  program. In what cases will gcc/gas generate code that needs a
  *	  relative reloc from a global symbol, other than PIC?  The
  *	  solution is to not hide the symbol from the linker in this case,
  *	  but to generate some random name for it so that it doesn't link
  *	  with anything but holds the place for the reloc.
  *      - arrange that all the BSS segments start at the same address, so
  *	  that the final crunched binary BSS size is the max of all the
  *	  component programs' BSS sizes, rather than their sum.
  */
 
 #include <sys/cdefs.h>
 #ifndef lint
 __RCSID("$NetBSD: crunchide.c,v 1.8 1997/11/01 06:51:45 lukem Exp $");
 #endif
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/errno.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <fcntl.h>
 #include <a.out.h>
 
 #include "extern.h"
 
 char *pname = "crunchide";
 
 void usage(void);
 
 void add_to_keep_list(char *symbol);
 void add_file_to_keep_list(char *filename);
 
 int hide_syms(const char *filename);
 
 int verbose;
 
 int main(int, char *[]);
 
 int
 main(int argc, char **argv)
 {
     int ch, errors;
 
     if(argc > 0) pname = argv[0];
 
     while ((ch = getopt(argc, argv, "k:f:v")) != -1)
 	switch(ch) {
 	case 'k':
 	    add_to_keep_list(optarg);
 	    break;
 	case 'f':
 	    add_file_to_keep_list(optarg);
 	    break;
 	case 'v':
 	    verbose = 1;
 	    break;
 	default:
 	    usage();
 	}
 
     argc -= optind;
     argv += optind;
 
     if(argc == 0) usage();
 
     errors = 0;
     while(argc) {
 	if (hide_syms(*argv))
 		errors = 1;
 	argc--, argv++;
     }
 
     return errors;
 }
 
 void
 usage(void)
 {
     fprintf(stderr,
 	    "usage: %s [-k <symbol-name>] [-f <keep-list-file>] <files> ...\n",
 	    pname);
     exit(1);
 }
 
 /* ---------------------------- */
 
 struct keep {
     struct keep *next;
     char *sym;
 } *keep_list;
 
 void
 add_to_keep_list(char *symbol)
 {
     struct keep *newp, *prevp, *curp;
     int cmp;
 
     cmp = 0;
 
     for(curp = keep_list, prevp = NULL; curp; prevp = curp, curp = curp->next)
 	if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
 
     if(curp && cmp == 0)
 	return;	/* already in table */
 
     newp = (struct keep *) malloc(sizeof(struct keep));
     if(newp) newp->sym = strdup(symbol);
     if(newp == NULL || newp->sym == NULL) {
 	fprintf(stderr, "%s: out of memory for keep list\n", pname);
 	exit(1);
     }
 
     newp->next = curp;
     if(prevp) prevp->next = newp;
     else keep_list = newp;
 }
 
 int
 in_keep_list(const char *symbol)
 {
     struct keep *curp;
     int cmp;
 
     cmp = 0;
 
     for(curp = keep_list; curp; curp = curp->next)
 	if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
 
     return curp && cmp == 0;
 }
 
 void
 add_file_to_keep_list(char *filename)
 {
     FILE *keepf;
     char symbol[1024];
     int len;
 
     if((keepf = fopen(filename, "r")) == NULL) {
 	perror(filename);
 	usage();
     }
 
     while(fgets(symbol, sizeof(symbol), keepf)) {
 	len = strlen(symbol);
 	if(len && symbol[len-1] == '\n')
 	    symbol[len-1] = '\0';
 
 	add_to_keep_list(symbol);
     }
     fclose(keepf);
 }
 
 /* ---------------------------- */
 
 struct {
 	const char *name;
 	int	(*check)(int, const char *);	/* 1 if match, zero if not */
 	int	(*hide)(int, const char *);	/* non-zero if error */
 } exec_formats[] = {
-#ifdef NLIST_AOUT
-	{	"a.out",	check_aout,	hide_aout,	},
-#endif
-#ifdef NLIST_ECOFF
-	{	"ECOFF",	check_elf64,	hide_elf64,	},
-#endif
 #ifdef NLIST_ELF32
 	{	"ELF32",	check_elf32,	hide_elf32,	},
 #endif
 #ifdef NLIST_ELF64
 	{	"ELF64",	check_elf64,	hide_elf64,	},
 #endif
 };
 
 int
 hide_syms(const char *filename)
 {
 	int fd, i, n, rv;
 
 	fd = open(filename, O_RDWR, 0);
 	if (fd == -1) {
 		perror(filename);
 		return 1;
 	}
 
 	rv = 0;
 
         n = sizeof exec_formats / sizeof exec_formats[0];
         for (i = 0; i < n; i++) {
 		if (lseek(fd, 0, SEEK_SET) != 0) {
 			perror(filename);
 			goto err;
 		}
                 if ((*exec_formats[i].check)(fd, filename) != 0)
                         break;
 	}
 	if (i == n) {
 		fprintf(stderr, "%s: unknown executable format\n", filename);
 		goto err;
 	}
 
 	if (verbose)
 		fprintf(stderr, "%s is an %s binary\n", filename,
 		    exec_formats[i].name);
 
 	if (lseek(fd, 0, SEEK_SET) != 0) {
 		perror(filename);
 		goto err;
 	}
 	rv = (*exec_formats[i].hide)(fd, filename);
 
 out:
 	close (fd);
 	return (rv);
 
 err:
 	rv = 1;
 	goto out;
 }
Index: user/ngie/more-tests/usr.sbin/crunch/crunchide/exec_elf32.c
===================================================================
--- user/ngie/more-tests/usr.sbin/crunch/crunchide/exec_elf32.c	(revision 281675)
+++ user/ngie/more-tests/usr.sbin/crunch/crunchide/exec_elf32.c	(revision 281676)
@@ -1,498 +1,501 @@
 /*
  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christopher G. Demetriou
  *	for the NetBSD Project.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #ifndef lint
 #if 0
 __RCSID("$NetBSD: exec_elf32.c,v 1.6 1999/09/20 04:12:16 christos Exp $");
 #endif
 #endif
 __FBSDID("$FreeBSD$");
  
 #ifndef ELFSIZE
 #define ELFSIZE         32
 #endif
 
 #include <sys/types.h>
 #include <sys/endian.h>
 #include <sys/stat.h>
 
 #include <errno.h>
 #include <limits.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "extern.h"
 
 #if (defined(NLIST_ELF32) && (ELFSIZE == 32)) || \
     (defined(NLIST_ELF64) && (ELFSIZE == 64))
 
 #define	__ELF_WORD_SIZE ELFSIZE
 #if (ELFSIZE == 32)
 #include <sys/elf32.h>
 #define	xewtoh(x)	((data == ELFDATA2MSB) ? be32toh(x) : le32toh(x))
 #define	htoxew(x)	((data == ELFDATA2MSB) ? htobe32(x) : htole32(x))
 #define	wewtoh(x)	((data == ELFDATA2MSB) ? be32toh(x) : le32toh(x))
 #define	htowew(x)	((data == ELFDATA2MSB) ? htobe32(x) : htole32(x))
 #elif (ELFSIZE == 64)
 #include <sys/elf64.h>
 #define	xewtoh(x)	((data == ELFDATA2MSB) ? be64toh(x) : le64toh(x))
 #define	htoxew(x)	((data == ELFDATA2MSB) ? htobe64(x) : htole64(x))
 /* elf64 Elf64_Word are 32 bits */
 #define	wewtoh(x)	((data == ELFDATA2MSB) ? be32toh(x) : le32toh(x))
 #define	htowew(x)	((data == ELFDATA2MSB) ? htobe32(x) : htole32(x))
 #endif
 #include <sys/elf_generic.h>
 
 #define CONCAT(x,y)     __CONCAT(x,y)
 #define ELFNAME(x)      CONCAT(elf,CONCAT(ELFSIZE,CONCAT(_,x)))
 #define ELFNAME2(x,y)   CONCAT(x,CONCAT(_elf,CONCAT(ELFSIZE,CONCAT(_,y))))
 #define ELFNAMEEND(x)   CONCAT(x,CONCAT(_elf,ELFSIZE))
 #define ELFDEFNNAME(x)  CONCAT(ELF,CONCAT(ELFSIZE,CONCAT(_,x)))
+#ifndef ELFCLASS
+#define ELFCLASS	CONCAT(ELFCLASS,ELFSIZE)
+#endif
 
 #define	xe16toh(x)	((data == ELFDATA2MSB) ? be16toh(x) : le16toh(x))
 #define	xe32toh(x)	((data == ELFDATA2MSB) ? be32toh(x) : le32toh(x))
 #define	htoxe32(x)	((data == ELFDATA2MSB) ? htobe32(x) : htole32(x))
 
 struct shlayout {
 	Elf_Shdr *shdr;
 	void *bufp;
 };
 
 static ssize_t
 xreadatoff(int fd, void *buf, off_t off, size_t size, const char *fn)
 {
 	ssize_t rv;
 
 	if (lseek(fd, off, SEEK_SET) != off) {
 		perror(fn);
 		return -1;
 	}
 	if ((size_t)(rv = read(fd, buf, size)) != size) {
 		fprintf(stderr, "%s: read error: %s\n", fn,
 		    rv == -1 ? strerror(errno) : "short read");
 		return -1;
 	}
 	return size;
 }
 
 static ssize_t
 xwriteatoff(int fd, void *buf, off_t off, size_t size, const char *fn)
 {
 	ssize_t rv;
 
 	if (lseek(fd, off, SEEK_SET) != off) {
 		perror(fn);
 		return -1;
 	}
 	if ((size_t)(rv = write(fd, buf, size)) != size) {
 		fprintf(stderr, "%s: write error: %s\n", fn,
 		    rv == -1 ? strerror(errno) : "short write");
 		return -1;
 	}
 	return size;
 }
 
 static void *
 xmalloc(size_t size, const char *fn, const char *use)
 {
 	void *rv;
 
 	rv = malloc(size);
 	if (rv == NULL)
 		fprintf(stderr, "%s: out of memory (allocating for %s)\n",
 		    fn, use);
 	return (rv);
 }
 
 static void *
 xrealloc(void *ptr, size_t size, const char *fn, const char *use)
 {
 	void *rv;
 		
 	rv = realloc(ptr, size);
 	if (rv == NULL) {
 		free(ptr);
 		fprintf(stderr, "%s: out of memory (reallocating for %s)\n",
 		    fn, use);
 	}
 	return (rv);
 } 
 
 int
 ELFNAMEEND(check)(int fd, const char *fn)
 {
 	Elf_Ehdr eh;
 	struct stat sb;
 	unsigned char data;
 
 	/*
 	 * Check the header to maek sure it's an ELF file (of the
 	 * appropriate size).
 	 */
 	if (fstat(fd, &sb) == -1)
 		return 0;
 	if (sb.st_size < (off_t)(sizeof eh))
 		return 0;
 	if (read(fd, &eh, sizeof eh) != sizeof eh)
 		return 0;
 
-	if (IS_ELF(eh) == 0)
+	if (IS_ELF(eh) == 0 || eh.e_ident[EI_CLASS] != ELFCLASS)
                 return 0;
 
 	data = eh.e_ident[EI_DATA];
 
 	switch (xe16toh(eh.e_machine)) {
 	case EM_386: break;
 	case EM_ALPHA: break;
 #ifndef EM_AARCH64
 #define	EM_AARCH64	183
 #endif
 	case EM_AARCH64: break;
 #ifndef EM_ARM
 #define EM_ARM		40
 #endif
 	case EM_ARM: break;
 #ifndef EM_MIPS
 #define EM_MIPS		8
 #endif
 #ifndef EM_MIPS_RS4_BE		/* same as EM_MIPS_RS3_LE */
 #define EM_MIPS_RS4_BE	10
 #endif
 	case EM_MIPS: break;
 	case /* EM_MIPS_RS3_LE */ EM_MIPS_RS4_BE: break;
 #ifndef EM_PPC
 #define	EM_PPC		20
 #endif
 	case EM_PPC: break;
 #ifndef EM_PPC64
 #define	EM_PPC64	21
 #endif
 	case EM_PPC64: break;
 #ifndef EM_SPARCV9
 #define	EM_SPARCV9	43
 #endif
 	case EM_SPARCV9: break;
 #ifndef EM_X86_64
 #define	EM_X86_64	62
 #endif
 	case EM_X86_64: break;
 /*        ELFDEFNNAME(MACHDEP_ID_CASES) */
 
         default:
                 return 0;
         }
 
 	return 1;
 }
 
 /*
  * This function 'hides' (some of) ELF executable file's symbols.
  * It hides them by renaming them to "_$$hide$$ <filename> <symbolname>".
  * Symbols in the global keep list, or which are marked as being undefined,
  * are left alone.
  *
  * An old version of this code shuffled various tables around, turning
  * global symbols to be hidden into local symbols.  That lost on the
  * mips, because CALL16 relocs must reference global symbols, and, if
  * those symbols were being hidden, they were no longer global.
  *
  * The new renaming behaviour doesn't take global symbols out of the
  * namespace.  However, it's ... unlikely that there will ever be
  * any collisions in practice because of the new method.
  */
 int
 ELFNAMEEND(hide)(int fd, const char *fn)
 {
 	Elf_Ehdr ehdr;
 	struct shlayout *layoutp = NULL;
 	Elf_Shdr *shdrp = NULL, *symtabshdr, *strtabshdr, *shstrtabshdr;
 	Elf_Shdr shdrshdr;
 	Elf_Sym *symtabp = NULL;
 	char *shstrtabp = NULL, *strtabp = NULL;
 	Elf_Size nsyms, ewi;
 	Elf_Off off;
 	ssize_t shdrsize;
 	int rv, i, weird, l, m, r, strtabidx;
 	size_t nstrtab_size, nstrtab_nextoff, fn_size, size;
 	char *nstrtabp = NULL;
 	unsigned char data;
 	const char *weirdreason = NULL;
 	void *buf;
 	Elf_Half shnum;
 
 	rv = 0;
 	if (xreadatoff(fd, &ehdr, 0, sizeof ehdr, fn) != sizeof ehdr)
 		goto bad;
 
 	data = ehdr.e_ident[EI_DATA];
 	shnum = xe16toh(ehdr.e_shnum);
 
 	shdrsize = shnum * xe16toh(ehdr.e_shentsize);
 	if ((shdrp = xmalloc(shdrsize, fn, "section header table")) == NULL)
 		goto bad;
 	if (xreadatoff(fd, shdrp, xewtoh(ehdr.e_shoff), shdrsize, fn) !=
 	    shdrsize)
 		goto bad;
 
 	symtabshdr = strtabshdr = shstrtabshdr = NULL;
 	weird = 0;
 	for (i = 0; i < shnum; i++) {
 		switch (xe32toh(shdrp[i].sh_type)) {
 		case SHT_SYMTAB:
 			if (symtabshdr != NULL) {
 				weird = 1;
 				weirdreason = "multiple symbol tables";
 			}
 			symtabshdr = &shdrp[i];
 			strtabshdr = &shdrp[xe32toh(shdrp[i].sh_link)];
 			break;
 		case SHT_STRTAB:
 			if (i == xe16toh(ehdr.e_shstrndx))
 				shstrtabshdr = &shdrp[i];
 			break;
 		}
 	}
 	if (symtabshdr == NULL)
 		goto out;
 	if (strtabshdr == NULL) {
 		weird = 1;
 		weirdreason = "string table does not exist";
 	}
 	if (shstrtabshdr == NULL) {
 		weird = 1;
 		weirdreason = "section header string table does not exist";
 	}
 	if (weirdreason == NULL)
 		weirdreason = "unsupported";
 	if (weird) {
 		fprintf(stderr, "%s: weird executable (%s)\n", fn, weirdreason);
 		goto bad;
 	}
 
 	/*
 	 * sort section layout table by offset
 	 */
 	layoutp = xmalloc((shnum + 1) * sizeof(struct shlayout),
 	    fn, "layout table");
 	if (layoutp == NULL)
 		goto bad;
 
 	/* add a pseudo entry to represent the section header table */
 	shdrshdr.sh_offset = ehdr.e_shoff;
 	shdrshdr.sh_size = htoxew(shdrsize);
 	shdrshdr.sh_addralign = htoxew(ELFSIZE / 8);
 	layoutp[shnum].shdr = &shdrshdr;
 
 	/* insert and sort normal section headers */
 	for (i = shnum; i-- != 0;) {
 		l = i + 1;
 		r = shnum;
 		while (l <= r) {
 			m = ( l + r) / 2;
 			if (xewtoh(shdrp[i].sh_offset) >
 			    xewtoh(layoutp[m].shdr->sh_offset))
 				l = m + 1;
 			else
 				r = m - 1;
 		}
 
 		if (r != i) {
 			memmove(&layoutp[i], &layoutp[i + 1],
 			    sizeof(struct shlayout) * (r - i));
 		}
 
 		layoutp[r].shdr = &shdrp[i];
 		layoutp[r].bufp = NULL;
 	}
 	++shnum;
 
 	/*
 	 * load up everything we need
 	 */
 
 	/* load section string table for debug use */
 	if ((shstrtabp = xmalloc(xewtoh(shstrtabshdr->sh_size), fn,
 	    "section string table")) == NULL)
 		goto bad;
 	if ((size_t)xreadatoff(fd, shstrtabp, xewtoh(shstrtabshdr->sh_offset),
 	    xewtoh(shstrtabshdr->sh_size), fn) != xewtoh(shstrtabshdr->sh_size))
 		goto bad;
 
 	/* we need symtab, strtab, and everything behind strtab */
 	strtabidx = INT_MAX;
 	for (i = 0; i < shnum; i++) {
 		if (layoutp[i].shdr == &shdrshdr) {
 			/* not load section header again */
 			layoutp[i].bufp = shdrp;
 			continue;
 		}
 		if (layoutp[i].shdr == shstrtabshdr) {
 			/* not load section string table again */
 			layoutp[i].bufp = shstrtabp;
 			continue;
 		}
 
 		if (layoutp[i].shdr == strtabshdr)
 			strtabidx = i;
 		if (layoutp[i].shdr == symtabshdr || i >= strtabidx) {
 			off = xewtoh(layoutp[i].shdr->sh_offset);
 			size = xewtoh(layoutp[i].shdr->sh_size);
 			layoutp[i].bufp = xmalloc(size, fn,
 			    shstrtabp + xewtoh(layoutp[i].shdr->sh_name));
 			if (layoutp[i].bufp == NULL)
 				goto bad;
 			if ((size_t)xreadatoff(fd, layoutp[i].bufp, off, size, fn) !=
 			    size)
 				goto bad;
 
 			/* set symbol table and string table */
 			if (layoutp[i].shdr == symtabshdr)
 				symtabp = layoutp[i].bufp;
 			else if (layoutp[i].shdr == strtabshdr)
 				strtabp = layoutp[i].bufp;
 		}
 	}
 
 	nstrtab_size = 256;
 	nstrtabp = xmalloc(nstrtab_size, fn, "new string table");
 	if (nstrtabp == NULL)
 		goto bad;
 	nstrtab_nextoff = 0;
 
 	fn_size = strlen(fn);
 
 	/* Prepare data structures for symbol movement. */
 	nsyms = xewtoh(symtabshdr->sh_size) / xewtoh(symtabshdr->sh_entsize);
 
 	/* move symbols, making them local */
 	for (ewi = 0; ewi < nsyms; ewi++) {
 		Elf_Sym *sp = &symtabp[ewi];
 		const char *symname = strtabp + xe32toh(sp->st_name);
 		size_t newent_len;
 		/*
 		 * make sure there's size for the next entry, even if it's
 		 * as large as it can be.
 		 *
 		 * "_$$hide$$ <filename> <symname><NUL>" ->
 		 *    9 + 3 + sizes of fn and sym name
 		 */
 		while ((nstrtab_size - nstrtab_nextoff) <
 		    strlen(symname) + fn_size + 12) {
 			nstrtab_size *= 2;
 			nstrtabp = xrealloc(nstrtabp, nstrtab_size, fn,
 			    "new string table");
 			if (nstrtabp == NULL)
 				goto bad;
 		}
 
 		sp->st_name = htowew(nstrtab_nextoff);
 
 		/* if it's a keeper or is undefined, don't rename it. */
 		if (in_keep_list(symname) ||
 		    (xe16toh(sp->st_shndx) == SHN_UNDEF)) {
 			newent_len = sprintf(nstrtabp + nstrtab_nextoff,
 			    "%s", symname) + 1;
 		} else {
 			newent_len = sprintf(nstrtabp + nstrtab_nextoff,
 			    "_$$hide$$ %s %s", fn, symname) + 1;
 		}
 		nstrtab_nextoff += newent_len;
 	}
 	strtabshdr->sh_size = htoxew(nstrtab_nextoff);
 
 	/*
 	 * update section header table in ascending order of offset
 	 */
 	for (i = strtabidx + 1; i < shnum; i++) {
 		Elf_Off off, align;
 		off = xewtoh(layoutp[i - 1].shdr->sh_offset) +
 		    xewtoh(layoutp[i - 1].shdr->sh_size);
 		align = xewtoh(layoutp[i].shdr->sh_addralign);
 		off = (off + (align - 1)) & ~(align - 1);
 		layoutp[i].shdr->sh_offset = htoxew(off);
 	}
 
 	/*
 	 * write data to the file in descending order of offset
 	 */
 	for (i = shnum; i-- != 0;) {
 		if (layoutp[i].shdr == strtabshdr) {
 			/* new string table */
 			buf = nstrtabp;
 		} else
 			buf = layoutp[i].bufp;
 
 		if (layoutp[i].shdr == &shdrshdr ||
 		    layoutp[i].shdr == symtabshdr || i >= strtabidx) {
 			if (buf == NULL)
 				goto bad;
 
 			/*
 			 * update the offset of section header table in elf
 			 * header if needed.
 			 */
 			if (layoutp[i].shdr == &shdrshdr &&
 			    ehdr.e_shoff != shdrshdr.sh_offset) {
 				ehdr.e_shoff = shdrshdr.sh_offset;
 				off = offsetof(Elf_Ehdr, e_shoff);
 				size = sizeof(Elf_Off);
 				if ((size_t)xwriteatoff(fd, &ehdr.e_shoff, off, size,
 				    fn) != size)
 					goto bad;
 			}
 
 			off = xewtoh(layoutp[i].shdr->sh_offset);
 			size = xewtoh(layoutp[i].shdr->sh_size);
 			if ((size_t)xwriteatoff(fd, buf, off, size, fn) != size)
 				goto bad;
 		}
 	}
 
 out:
 	if (layoutp != NULL) {
 		for (i = 0; i < shnum; i++) {
 			if (layoutp[i].bufp != NULL)
 				free(layoutp[i].bufp);
 		}
 		free(layoutp);
 	}
 	free(nstrtabp);
 	return (rv);
 
 bad:
 	rv = 1;
 	goto out;
 }
 
 #endif /* include this size of ELF */
Index: user/ngie/more-tests/usr.sbin/crunch/crunchide/extern.h
===================================================================
--- user/ngie/more-tests/usr.sbin/crunch/crunchide/extern.h	(revision 281675)
+++ user/ngie/more-tests/usr.sbin/crunch/crunchide/extern.h	(revision 281676)
@@ -1,51 +1,43 @@
 /*	$NetBSD: extern.h,v 1.5 1998/05/06 13:16:57 mycroft Exp $	*/
 /* $FreeBSD$ */
 
 /*
  * Copyright (c) 1997 Christopher G. Demetriou.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christopher G. Demetriou
  *	for the NetBSD Project.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#ifdef NLIST_AOUT
-int	check_aout(int, const char *);
-int	hide_aout(int, const char *);
-#endif
-#ifdef NLIST_ECOFF
-int	check_ecoff(int, const char *);
-int	hide_ecoff(int, const char *);
-#endif
 #ifdef NLIST_ELF32
 int	check_elf32(int, const char *);
 int	hide_elf32(int, const char *);
 #endif
 #ifdef NLIST_ELF64
 int	check_elf64(int, const char *);
 int	hide_elf64(int, const char *);
 #endif
 
 int	in_keep_list(const char *symbol);
Index: user/ngie/more-tests
===================================================================
--- user/ngie/more-tests	(revision 281675)
+++ user/ngie/more-tests	(revision 281676)

Property changes on: user/ngie/more-tests
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r281621-281675