Index: projects/clang380-import/Makefile.inc1
===================================================================
--- projects/clang380-import/Makefile.inc1	(revision 293686)
+++ projects/clang380-import/Makefile.inc1	(revision 293687)
@@ -1,2419 +1,2414 @@
 #
 # $FreeBSD$
 #
 # Make command line options:
 #	-DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir
 #	-DNO_CLEAN do not clean at all
 #	-DDB_FROM_SRC use the user/group databases in src/etc instead of
 #	    the system database when installing.
 #	-DNO_SHARE do not go into share subdir
 #	-DKERNFAST define NO_KERNEL{CONFIG,CLEAN,DEPEND,OBJ}
 #	-DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel
 #	-DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel
 #	-DNO_KERNELDEPEND do not run ${MAKE} depend in ${MAKE} buildkernel
 #	-DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel
 #	-DNO_PORTSUPDATE do not update ports in ${MAKE} update
 #	-DNO_ROOT install without using root privilege
 #	-DNO_DOCUPDATE do not update doc in ${MAKE} update
 #	-DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects
 #	LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list
 #	LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list
 #	LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target
 #	LOCAL_MTREE="list of mtree files" to process to allow local directories
 #	    to be created before files are installed
 #	LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools
 #	    list
 #	METALOG="path to metadata log" to write permission and ownership
 #	    when NO_ROOT is set.  (default: ${DESTDIR}/METALOG)
 #	TARGET="machine" to crossbuild world for a different machine type
 #	TARGET_ARCH= may be required when a TARGET supports multiple endians
 #	BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL})
 #	WORLD_FLAGS= additional flags to pass to make(1) during buildworld
 #	KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel
 #	SUBDIR_OVERRIDE="list of dirs" to build rather than everything.
 #	    All libraries and includes, and some build tools will still build.
 
 #
 # The intended user-driven targets are:
 # buildworld  - rebuild *everything*, including glue to help do upgrades
 # installworld- install everything built by "buildworld"
 # doxygen     - build API documentation of the kernel
 # update      - convenient way to update your source tree (eg: svn/svnup)
 #
 # Standard targets (not defined here) are documented in the makefiles in
 # /usr/share/mk.  These include:
 #		obj depend all install clean cleandepend cleanobj
 
 .if !defined(TARGET) || !defined(TARGET_ARCH)
 .error "Both TARGET and TARGET_ARCH must be defined."
 .endif
 
 LOCALBASE?=	/usr/local
 
 # Cross toolchain changes must be in effect before bsd.compiler.mk
 # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes.
 .if defined(CROSS_TOOLCHAIN)
 .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk"
 CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}"
 .endif
 .include <bsd.compiler.mk>		# don't depend on src.opts.mk doing it
 .include "share/mk/src.opts.mk"	
 
 # We must do lib/ and libexec/ before bin/ in case of a mid-install error to
 # keep the users system reasonably usable.  For static->dynamic root upgrades,
 # we don't want to install a dynamic binary without rtld and the needed
 # libraries.  More commonly, for dynamic root, we don't want to install a
 # binary that requires a newer library version that hasn't been installed yet.
 # This ordering is not a guarantee though.  The only guarantee of a working
 # system here would require fine-grained ordering of all components based
 # on their dependencies.
 SRCDIR?=	${.CURDIR}
 .if !empty(SUBDIR_OVERRIDE)
 SUBDIR=	${SUBDIR_OVERRIDE}
 .else
 SUBDIR=	lib libexec
 .if make(install*)
 # Ensure libraries are installed before progressing.
 SUBDIR+=.WAIT
 .endif
 SUBDIR+=bin
 .if ${MK_CDDL} != "no"
 SUBDIR+=cddl
 .endif
 SUBDIR+=gnu include
 .if ${MK_KERBEROS} != "no"
 SUBDIR+=kerberos5
 .endif
 .if ${MK_RESCUE} != "no"
 SUBDIR+=rescue
 .endif
 SUBDIR+=sbin
 .if ${MK_CRYPT} != "no"
 SUBDIR+=secure
 .endif
 .if !defined(NO_SHARE)
 SUBDIR+=share
 .endif
 SUBDIR+=sys usr.bin usr.sbin
 .if ${MK_TESTS} != "no"
 SUBDIR+=	tests
 .endif
 .if ${MK_OFED} != "no"
 SUBDIR+=contrib/ofed
 .endif
 
 # Local directories are last, since it is nice to at least get the base
 # system rebuilt before you do them.
 .for _DIR in ${LOCAL_DIRS}
 .if exists(${.CURDIR}/${_DIR}/Makefile)
 SUBDIR+=	${_DIR}
 .endif
 .endfor
 # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR
 # of a LOCAL_DIRS directory.  This allows LOCAL_DIRS=foo and
 # LOCAL_LIB_DIRS=foo/lib to behave as expected.
 .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|}
 _REDUNDENT_LIB_DIRS+=    ${LOCAL_LIB_DIRS:M${_DIR}*}
 .endfor
 .for _DIR in ${LOCAL_LIB_DIRS}
 .if empty(_REDUNDENT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile)
 SUBDIR+=	${_DIR}
 .else
 .warning ${_DIR} not added to SUBDIR list.  See UPDATING 20141121.
 .endif
 .endfor
 
 # We must do etc/ last as it hooks into building the man whatis file
 # by calling 'makedb' in share/man.  This is only relevant for
 # install/distribute so they build the whatis file after every manpage is
 # installed.
 .if make(install*)
 SUBDIR+=.WAIT
 .endif
 SUBDIR+=etc
 
 .endif	# !empty(SUBDIR_OVERRIDE)
 
 .if defined(NOCLEAN)
 .warning NOCLEAN option is deprecated. Use NO_CLEAN instead.
 NO_CLEAN=	${NOCLEAN}
 .endif
 .if defined(NO_CLEANDIR)
 CLEANDIR=	clean cleandepend
 .else
 CLEANDIR=	cleandir
 .endif
 
 LOCAL_TOOL_DIRS?=
 PACKAGEDIR?=	${DESTDIR}/${DISTDIR}
 
 .if empty(SHELL:M*csh*)
 BUILDENV_SHELL?=${SHELL}
 .else
 BUILDENV_SHELL?=/bin/sh
 .endif
 
 SVN?=		/usr/local/bin/svn
 SVNFLAGS?=	-r HEAD
 
 MAKEOBJDIRPREFIX?=	/usr/obj
 .if !defined(OSRELDATE)
 .if exists(/usr/include/osreldate.h)
 OSRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		/usr/include/osreldate.h
 .else
 OSRELDATE=	0
 .endif
 .export OSRELDATE
 .endif
 
 # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION.
 .if !defined(VERSION) && !make(showconfig)
 REVISION!=	${MAKE} -C ${SRCDIR}/release -V REVISION
 BRANCH!=	${MAKE} -C ${SRCDIR}/release -V BRANCH
 SRCRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		${SRCDIR}/sys/sys/param.h
 VERSION=	FreeBSD ${REVISION}-${BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE}
 .export VERSION
 .endif
 
 KNOWN_ARCHES?=	aarch64/arm64 \
 		amd64 \
 		arm \
 		armeb/arm \
 		armv6/arm \
 		armv6hf/arm \
 		i386 \
 		i386/pc98 \
 		mips \
 		mipsel/mips \
 		mips64el/mips \
 		mips64/mips \
 		mipsn32el/mips \
 		mipsn32/mips \
 		powerpc \
 		powerpc64/powerpc \
 		riscv64/riscv \
 		sparc64
 
 .if ${TARGET} == ${TARGET_ARCH}
 _t=		${TARGET}
 .else
 _t=		${TARGET_ARCH}/${TARGET}
 .endif
 .for _t in ${_t}
 .if empty(KNOWN_ARCHES:M${_t})
 .error Unknown target ${TARGET_ARCH}:${TARGET}.
 .endif
 .endfor
 
 .if ${TARGET} == ${MACHINE}
 TARGET_CPUTYPE?=${CPUTYPE}
 .else
 TARGET_CPUTYPE?=
 .endif
 
 .if !empty(TARGET_CPUTYPE)
 _TARGET_CPUTYPE=${TARGET_CPUTYPE}
 .else
 _TARGET_CPUTYPE=dummy
 .endif
 # Skip for showconfig as it is just wasted time and may invoke auto.obj.mk.
 .if !make(showconfig)
 _CPUTYPE!=	MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} \
 		-f /dev/null -m ${.CURDIR}/share/mk -V CPUTYPE
 .if ${_CPUTYPE} != ${_TARGET_CPUTYPE}
 .error CPUTYPE global should be set with ?=.
 .endif
 .endif
 .if make(buildworld)
 BUILD_ARCH!=	uname -p
 .if ${MACHINE_ARCH} != ${BUILD_ARCH}
 .error To cross-build, set TARGET_ARCH.
 .endif
 .endif
 .if ${MACHINE} == ${TARGET} && ${MACHINE_ARCH} == ${TARGET_ARCH} && !defined(CROSS_BUILD_TESTING)
 OBJTREE=	${MAKEOBJDIRPREFIX}
 .else
 OBJTREE=	${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH}
 .endif
 WORLDTMP=	${OBJTREE}${.CURDIR}/tmp
 BPATH=		${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin
 XPATH=		${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin
 STRICTTMPPATH=	${BPATH}:${XPATH}
 TMPPATH=	${STRICTTMPPATH}:${PATH}
 
 #
 # Avoid running mktemp(1) unless actually needed.
 # It may not be functional, e.g., due to new ABI
 # when in the middle of installing over this system.
 #
 .if make(distributeworld) || make(installworld)
 INSTALLTMP!=	/usr/bin/mktemp -d -u -t install
 .endif
 
 #
 # Building a world goes through the following stages
 #
 # 1. legacy stage [BMAKE]
 #	This stage is responsible for creating compatibility
 #	shims that are needed by the bootstrap-tools,
 #	build-tools and cross-tools stages. These are generally
 #	APIs that tools from one of those three stages need to
 #	build that aren't present on the host.
 # 1. bootstrap-tools stage [BMAKE]
 #	This stage is responsible for creating programs that
 #	are needed for backward compatibility reasons. They
 #	are not built as cross-tools.
 # 2. build-tools stage [TMAKE]
 #	This stage is responsible for creating the object
 #	tree and building any tools that are needed during
 #	the build process. Some programs are listed during
 #	this phase because they build binaries to generate
 #	files needed to build these programs. This stage also
 #	builds the 'build-tools' target rather than 'all'.
 # 3. cross-tools stage [XMAKE]
 #	This stage is responsible for creating any tools that
 #	are needed for building the system. A cross-compiler is one
 #	of them. This differs from build tools in two ways:
 #	1. the 'all' target is built rather than 'build-tools'
 #	2. these tools are installed into TMPPATH for stage 4.
 # 4. world stage [WMAKE]
 #	This stage actually builds the world.
 # 5. install stage (optional) [IMAKE]
 #	This stage installs a previously built world.
 #
 
 BOOTSTRAPPING?=	0
 
 # Common environment for world related stages
 CROSSENV+=	MAKEOBJDIRPREFIX=${OBJTREE} \
 		MACHINE_ARCH=${TARGET_ARCH} \
 		MACHINE=${TARGET} \
 		CPUTYPE=${TARGET_CPUTYPE}
 .if ${MK_GROFF} != "no"
 CROSSENV+=	GROFF_BIN_PATH=${WORLDTMP}/legacy/usr/bin \
 		GROFF_FONT_PATH=${WORLDTMP}/legacy/usr/share/groff_font \
 		GROFF_TMAC_PATH=${WORLDTMP}/legacy/usr/share/tmac
 .endif
 .if defined(TARGET_CFLAGS)
 CROSSENV+=	${TARGET_CFLAGS}
 .endif
 
 # bootstrap-tools stage
 BMAKEENV=	INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${BPATH}:${PATH} \
 		WORLDTMP=${WORLDTMP} \
 		MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}"
 # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile
 BSARGS= 	DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		MK_HTML=no NO_LINT=yes MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_TESTS=no \
 		MK_INCLUDES=yes
 
 BMAKE=		MAKEOBJDIRPREFIX=${WORLDTMP} \
 		${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		${BSARGS}
 
 # build-tools stage
 TMAKE=		MAKEOBJDIRPREFIX=${OBJTREE} \
 		${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 		DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		-DNO_LINT \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_TESTS=no
 
 # cross-tools stage
 XMAKE=		TOOLS_PREFIX=${WORLDTMP} ${BMAKE} \
 		TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 		MK_GDB=no MK_TESTS=no
 
 # kernel-tools stage
 KTMAKEENV=	INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${BPATH}:${PATH} \
 		WORLDTMP=${WORLDTMP}
 KTMAKE=		TOOLS_PREFIX=${WORLDTMP} MAKEOBJDIRPREFIX=${WORLDTMP} \
 		${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		MK_HTML=no -DNO_LINT MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no
 
 # world stage
 WMAKEENV=	${CROSSENV} \
 		_LDSCRIPTROOT= \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${TMPPATH}
 
 # make hierarchy
 HMAKE=		PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q}
 .if defined(NO_ROOT)
 HMAKE+=		PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT
 .endif
 
 .if defined(CROSS_TOOLCHAIN_PREFIX)
 CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
 CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
 .endif
 
 # If we do not have a bootstrap binutils (because the in-tree one does not
 # support the target architecture), provide a default cross-binutils prefix.
 # This allows aarch64 builds, for example, to automatically use the
 # aarch64-binutils port or package.
 .if !make(showconfig)
 .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \
     !defined(CROSS_BINUTILS_PREFIX)
 CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_ARCH}-freebsd/bin/
 .if !exists(${CROSS_BINUTILS_PREFIX})
 .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX.
 .endif
 .endif
 .endif
 
 XCOMPILERS=	CC CXX CPP
 .for COMPILER in ${XCOMPILERS}
 .if defined(CROSS_COMPILER_PREFIX)
 X${COMPILER}?=	${CROSS_COMPILER_PREFIX}${${COMPILER}}
 .else
 X${COMPILER}?=	${${COMPILER}}
 .endif
 .endfor
 XBINUTILS=	AS AR LD NM OBJCOPY OBJDUMP RANLIB SIZE STRINGS
 .for BINUTIL in ${XBINUTILS}
 .if defined(CROSS_BINUTILS_PREFIX) && \
     exists(${CROSS_BINUTILS_PREFIX}${${BINUTIL}})
 X${BINUTIL}?=	${CROSS_BINUTILS_PREFIX}${${BINUTIL}}
 .else
 X${BINUTIL}?=	${${BINUTIL}}
 .endif
 .endfor
 CROSSENV+=	CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCFLAGS} ${XCXXFLAGS}" \
 		DEPFLAGS="${DEPFLAGS}" \
 		CPP="${XCPP} ${XCFLAGS}" \
 		AS="${XAS}" AR="${XAR}" LD="${XLD}" NM=${XNM} \
 		OBJDUMP=${XOBJDUMP} OBJCOPY="${XOBJCOPY}" \
 		RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \
 		SIZE="${XSIZE}"
 
 .if ${XCC:N${CCACHE_BIN}:M/*}
 .if defined(CROSS_BINUTILS_PREFIX)
 # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a
 # directory, but the compiler will look in the right place for it's
 # tools so we don't need to tell it where to look.
 .if exists(${CROSS_BINUTILS_PREFIX})
 BFLAGS+=	-B${CROSS_BINUTILS_PREFIX}
 .endif
 .else
 BFLAGS+=	-B${WORLDTMP}/usr/bin
 .endif
 .if ${TARGET} == "arm"
 .if ${TARGET_ARCH:M*hf*} != ""
 TARGET_ABI=	gnueabihf
 .else
 TARGET_ABI=	gnueabi
 .endif
 .endif
 .if defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc
 XCFLAGS+=	-isystem ${WORLDTMP}/usr/include -L${WORLDTMP}/usr/lib
 XCXXFLAGS+=	-I${WORLDTMP}/usr/include/c++/v1 -std=gnu++11 -L${WORLDTMP}/../lib/libc++
 # XXX: DEPFLAGS is a workaround for not properly passing CXXFLAGS to sub-makes
 # due to CXX="${XCXX} ${XCXXFLAGS}".  bsd.dep.mk does use CXXFLAGS when
 # building C++ files so this can come out if passing CXXFLAGS down is fixed.
 DEPFLAGS+=	-I${WORLDTMP}/usr/include/c++/v1
 .else
 TARGET_ABI?=	unknown
 TARGET_TRIPLE?=	${TARGET_ARCH:C/amd64/x86_64/}-${TARGET_ABI}-freebsd11.0
 XCFLAGS+=	-target ${TARGET_TRIPLE}
 .endif
 XCFLAGS+=	--sysroot=${WORLDTMP} ${BFLAGS}
 XCXXFLAGS+=	--sysroot=${WORLDTMP} ${BFLAGS}
 .else
 .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX})
 BFLAGS+=	-B${CROSS_BINUTILS_PREFIX}
 XCFLAGS+=	${BFLAGS}
 XCXXFLAGS+=	${BFLAGS}
 .endif
 .endif # ${XCC:M/*}
 
 WMAKE=		${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 DESTDIR=${WORLDTMP}
 
 .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
 # 32 bit world
 LIB32_OBJTREE=	${OBJTREE}${.CURDIR}/world32
 LIB32TMP=	${OBJTREE}${.CURDIR}/lib32
 
 .if ${TARGET_ARCH} == "amd64"
 .if empty(TARGET_CPUTYPE)
 LIB32CPUFLAGS=	-march=i686 -mmmx -msse -msse2
 .else
 LIB32CPUFLAGS=	-march=${TARGET_CPUTYPE}
 .endif
 LIB32WMAKEENV=	MACHINE=i386 MACHINE_ARCH=i386 \
 		MACHINE_CPU="i686 mmx sse sse2"
 LIB32WMAKEFLAGS=	\
 		AS="${XAS} --32" \
 		LD="${XLD} -m elf_i386_fbsd -Y P,${LIB32TMP}/usr/lib32" \
 		OBJCOPY="${XOBJCOPY}"
 
 .elif ${TARGET_ARCH} == "powerpc64"
 .if empty(TARGET_CPUTYPE)
 LIB32CPUFLAGS=	-mcpu=powerpc
 .else
 LIB32CPUFLAGS=	-mcpu=${TARGET_CPUTYPE}
 .endif
 LIB32WMAKEENV=	MACHINE=powerpc MACHINE_ARCH=powerpc
 LIB32WMAKEFLAGS=	\
 		LD="${XLD} -m elf32ppc_fbsd" \
 		OBJCOPY="${XOBJCOPY}"
 .endif
 
 
 LIB32FLAGS=	-m32 ${LIB32CPUFLAGS} -DCOMPAT_32BIT \
 		-isystem ${LIB32TMP}/usr/include/ \
 		-L${LIB32TMP}/usr/lib32 \
 		-B${LIB32TMP}/usr/lib32
 .if ${XCC:N${CCACHE_BIN}:M/*}
 LIB32FLAGS+=		--sysroot=${WORLDTMP}
 .endif
 
 # Yes, the flags are redundant.
 LIB32WMAKEENV+=	MAKEOBJDIRPREFIX=${LIB32_OBJTREE} \
 		_LDSCRIPTROOT=${LIB32TMP} \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${TMPPATH} \
 		LIBDIR=/usr/lib32 \
 		SHLIBDIR=/usr/lib32 \
 		DTRACE="${DTRACE} -32"
 LIB32WMAKEFLAGS+= CC="${XCC} ${LIB32FLAGS}" \
 		CXX="${XCXX} ${LIB32FLAGS}" \
 		DESTDIR=${LIB32TMP} \
 		-DCOMPAT_32BIT \
 		-DLIBRARIES_ONLY \
 		-DNO_CPU_CFLAGS \
 		MK_CTF=no \
 		-DNO_LINT \
 		MK_TESTS=no
 
 LIB32WMAKE=	${LIB32WMAKEENV} ${MAKE} ${LIB32WMAKEFLAGS} \
 		MK_MAN=no MK_HTML=no
 LIB32IMAKE=	${LIB32WMAKE:NINSTALL=*:NDESTDIR=*:N_LDSCRIPTROOT=*} \
 		MK_TOOLCHAIN=no ${IMAKE_INSTALL}
 .endif
 
 IMAKEENV=	${CROSSENV:N_LDSCRIPTROOT=*}
 IMAKE=		${IMAKEENV} ${MAKE} -f Makefile.inc1 \
 		${IMAKE_INSTALL} ${IMAKE_MTREE}
 .if empty(.MAKEFLAGS:M-n)
 IMAKEENV+=	PATH=${STRICTTMPPATH}:${INSTALLTMP} \
 		LD_LIBRARY_PATH=${INSTALLTMP} \
 		PATH_LOCALE=${INSTALLTMP}/locale
 IMAKE+=		__MAKE_SHELL=${INSTALLTMP}/sh
 .else
 IMAKEENV+=	PATH=${TMPPATH}:${INSTALLTMP}
 .endif
 .if defined(DB_FROM_SRC)
 INSTALLFLAGS+=	-N ${.CURDIR}/etc
 MTREEFLAGS+=	-N ${.CURDIR}/etc
 .endif
 _INSTALL_DDIR=	${DESTDIR}/${DISTDIR}
 INSTALL_DDIR=	${_INSTALL_DDIR:S://:/:g:C:/$::}
 .if defined(NO_ROOT)
 METALOG?=	${DESTDIR}/${DISTDIR}/METALOG
 IMAKE+=		-DNO_ROOT METALOG=${METALOG}
 INSTALLFLAGS+=	-U -M ${METALOG} -D ${INSTALL_DDIR}
 MTREEFLAGS+=	-W
 .endif
 .if defined(DB_FROM_SRC) || defined(NO_ROOT)
 IMAKE_INSTALL=	INSTALL="install ${INSTALLFLAGS}"
 IMAKE_MTREE=	MTREE_CMD="mtree ${MTREEFLAGS}"
 .endif
 
 # kernel stage
 KMAKEENV=	${WMAKEENV}
 KMAKE=		${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME}
 
 #
 # buildworld
 #
 # Attempt to rebuild the entire system, with reasonable chance of
 # success, regardless of how old your existing system is.
 #
 _worldtmp: .PHONY
 .if ${.CURDIR:C/[^,]//g} != ""
 #	The m4 build of sendmail files doesn't like it if ',' is used
 #	anywhere in the path of it's files.
 	@echo
 	@echo "*** Error: path to source tree contains a comma ','"
 	@echo
 	false
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Rebuilding the temporary build tree"
 	@echo "--------------------------------------------------------------"
 .if !defined(NO_CLEAN)
 	rm -rf ${WORLDTMP}
 .if defined(LIB32TMP)
 	rm -rf ${LIB32TMP}
 .endif
 .else
 	rm -rf ${WORLDTMP}/legacy/usr/include
 #	XXX - These three can depend on any header file.
-	rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/ioctl.c
+	rm -f ${OBJTREE}${.CURDIR}/lib/libsysdecode/ioctl.c
 	rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/kdump_subr.c
-	rm -f ${OBJTREE}${.CURDIR}/usr.bin/truss/ioctl.c
 .endif
 .for _dir in \
     lib usr legacy/bin legacy/usr
 	mkdir -p ${WORLDTMP}/${_dir}
 .endfor
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${WORLDTMP}/legacy/usr >/dev/null
 .if ${MK_GROFF} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.groff.dist \
 	    -p ${WORLDTMP}/legacy/usr >/dev/null
 .endif
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${WORLDTMP}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${WORLDTMP}/usr/include >/dev/null
 	ln -sf ${.CURDIR}/sys ${WORLDTMP}
 .if ${MK_DEBUG_FILES} != "no"
 	# We could instead disable debug files for these build stages
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${WORLDTMP}/legacy/usr/lib >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${WORLDTMP}/usr/lib >/dev/null
 .endif
 .if ${MK_LIB32} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${WORLDTMP}/usr >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${WORLDTMP}/legacy/usr/lib/debug/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${WORLDTMP}/usr/lib/debug/usr >/dev/null
 .endif
 .endif
 .if ${MK_TESTS} != "no"
 	mkdir -p ${WORLDTMP}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${WORLDTMP}${TESTSBASE} >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mkdir -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} >/dev/null
 .endif
 .endif
 .for _mtree in ${LOCAL_MTREE}
 	mtree -deU -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null
 .endfor
 _legacy:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1.1: legacy release compatibility shims"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${BMAKE} legacy
 _bootstrap-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1.2: bootstrap tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools
 _cleanobj:
 .if !defined(NO_CLEAN)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.1: cleaning up the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} ${CLEANDIR}
 .if defined(LIB32TMP)
 	${_+_}cd ${.CURDIR}; ${LIB32WMAKE} -f Makefile.inc1 ${CLEANDIR}
 .endif
 .endif
 _obj:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.2: rebuilding the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} obj
 _build-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.3: build tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${TMAKE} build-tools
 _cross-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3: cross tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools
 	${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools
 _includes:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.1: building includes"
 	@echo "--------------------------------------------------------------"
 # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need
 # headers from default SUBDIR.  Do SUBDIR_OVERRIDE includes last.
 	${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \
 	    includes
 .if !empty(SUBDIR_OVERRIDE) && make(buildworld)
 	${_+_}cd ${.CURDIR}; ${WMAKE} SHARED=symlinks includes
 .endif
 _libraries:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.2: building libraries"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; \
 	    ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \
 	    MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries
 _depend:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.3: make dependencies"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} depend
 everything:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.4: building everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all
 .if defined(LIB32TMP)
 build32: .PHONY
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 5.1: building 32 bit shim libraries"
 	@echo "--------------------------------------------------------------"
 	mkdir -p ${LIB32TMP}/usr/include
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${LIB32TMP}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${LIB32TMP}/usr/include >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${LIB32TMP}/usr >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${LIB32TMP}/usr/lib >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${LIB32TMP}/usr/lib/debug/usr >/dev/null
 .endif
 	mkdir -p ${WORLDTMP}
 	ln -sf ${.CURDIR}/sys ${WORLDTMP}
 .for _t in obj includes
 	${_+_}cd ${.CURDIR}/include; ${LIB32WMAKE} DIRPRFX=include/ ${_t}
 	${_+_}cd ${.CURDIR}/lib; ${LIB32WMAKE} DIRPRFX=lib/ ${_t}
 .if ${MK_CDDL} != "no"
 	${_+_}cd ${.CURDIR}/cddl/lib; ${LIB32WMAKE} DIRPRFX=cddl/lib/ ${_t}
 .endif
 	${_+_}cd ${.CURDIR}/gnu/lib; ${LIB32WMAKE} DIRPRFX=gnu/lib/ ${_t}
 .if ${MK_CRYPT} != "no"
 	${_+_}cd ${.CURDIR}/secure/lib; ${LIB32WMAKE} DIRPRFX=secure/lib/ ${_t}
 .endif
 .if ${MK_KERBEROS} != "no"
 	${_+_}cd ${.CURDIR}/kerberos5/lib; ${LIB32WMAKE} DIRPRFX=kerberos5/lib ${_t}
 .endif
 .endfor
 .for _dir in usr.bin/lex/lib
 	${_+_}cd ${.CURDIR}/${_dir}; ${LIB32WMAKE} DIRPRFX=${_dir}/ obj
 .endfor
 .for _dir in lib/ncurses/ncurses lib/ncurses/ncursesw lib/libmagic
 	${_+_}cd ${.CURDIR}/${_dir}; \
 	    WORLDTMP=${WORLDTMP} \
 	    MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" \
 	    MAKEOBJDIRPREFIX=${LIB32_OBJTREE} ${MAKE} SSP_CFLAGS= DESTDIR= \
 	    DIRPRFX=${_dir}/ -DNO_LINT -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 	    build-tools
 .endfor
 	${_+_}cd ${.CURDIR}; \
 	    ${LIB32WMAKE} -f Makefile.inc1 -DNO_FSCHG libraries
 .for _t in obj depend all
 	${_+_}cd ${.CURDIR}/libexec/rtld-elf; PROG=ld-elf32.so.1 ${LIB32WMAKE} \
 	    -DNO_FSCHG DIRPRFX=libexec/rtld-elf/ ${_t}
 	${_+_}cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIB32WMAKE} \
 	    DIRPRFX=usr.bin/ldd ${_t}
 .endfor
 
 distribute32 install32: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .if ${MK_CDDL} != "no"
 	${_+_}cd ${.CURDIR}/cddl/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .endif
 	${_+_}cd ${.CURDIR}/gnu/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .if ${MK_CRYPT} != "no"
 	${_+_}cd ${.CURDIR}/secure/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .endif
 .if ${MK_KERBEROS} != "no"
 	${_+_}cd ${.CURDIR}/kerberos5/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .endif
 	${_+_}cd ${.CURDIR}/libexec/rtld-elf; \
 	    PROG=ld-elf32.so.1 ${LIB32IMAKE} ${.TARGET:S/32$//}
 	${_+_}cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIB32IMAKE} \
 	    ${.TARGET:S/32$//}
 .endif
 
 WMAKE_TGTS=
 WMAKE_TGTS+=	_worldtmp _legacy
 .if empty(SUBDIR_OVERRIDE)
 WMAKE_TGTS+=	_bootstrap-tools
 .endif
 WMAKE_TGTS+=	_cleanobj _obj _build-tools _cross-tools
 WMAKE_TGTS+=	_includes _libraries _depend everything
 .if defined(LIB32TMP) && ${MK_LIB32} != "no" && empty(SUBDIR_OVERRIDE)
 WMAKE_TGTS+=	build32
 .endif
 
 buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue
 .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue
 
 buildworld_prologue:
 	@echo "--------------------------------------------------------------"
 	@echo ">>> World build started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 
 buildworld_epilogue:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> World build completed on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 
 #
 # We need to have this as a target because the indirection between Makefile
 # and Makefile.inc1 causes the correct PATH to be used, rather than a
 # modification of the current environment's PATH.  In addition, we need
 # to quote multiword values.
 #
 buildenvvars: .PHONY
 	@echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@}
 
 .if ${.TARGETS:Mbuildenv}
 .if ${.MAKEFLAGS:M-j}
 .error The buildenv target is incompatible with -j
 .endif
 .endif
 BUILDENV_DIR?=	${.CURDIR}
 buildenv: .PHONY
 	@echo Entering world for ${TARGET_ARCH}:${TARGET}
 .if ${BUILDENV_SHELL:M*zsh*}
 	@echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE}
 .endif
 	@cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL} \
 	    || true
 
 TOOLCHAIN_TGTS=	${WMAKE_TGTS:N_depend:Neverything:Nbuild32}
 toolchain: ${TOOLCHAIN_TGTS}
 kernel-toolchain: ${TOOLCHAIN_TGTS:N_includes:N_libraries}
 
 #
 # installcheck
 #
 # Checks to be sure system is ready for installworld/installkernel.
 #
 installcheck: _installcheck_world _installcheck_kernel
 _installcheck_world:
 _installcheck_kernel:
 
 #
 # Require DESTDIR to be set if installing for a different architecture or
 # using the user/group database in the source tree.
 #
 .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \
     defined(DB_FROM_SRC)
 .if !make(distributeworld)
 _installcheck_world: __installcheck_DESTDIR
 _installcheck_kernel: __installcheck_DESTDIR
 __installcheck_DESTDIR:
 .if !defined(DESTDIR) || empty(DESTDIR)
 	@echo "ERROR: Please set DESTDIR!"; \
 	false
 .endif
 .endif
 .endif
 
 .if !defined(DB_FROM_SRC)
 #
 # Check for missing UIDs/GIDs.
 #
 CHECK_UIDS=	auditdistd
 CHECK_GIDS=	audit
 .if ${MK_SENDMAIL} != "no"
 CHECK_UIDS+=	smmsp
 CHECK_GIDS+=	smmsp
 .endif
 .if ${MK_PF} != "no"
 CHECK_UIDS+=	proxy
 CHECK_GIDS+=	proxy authpf
 .endif
 .if ${MK_UNBOUND} != "no"
 CHECK_UIDS+=	unbound
 CHECK_GIDS+=	unbound
 .endif
 _installcheck_world: __installcheck_UGID
 __installcheck_UGID:
 .for uid in ${CHECK_UIDS}
 	@if ! `id -u ${uid} >/dev/null 2>&1`; then \
 		echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \
 		false; \
 	fi
 .endfor
 .for gid in ${CHECK_GIDS}
 	@if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \
 		echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \
 		false; \
 	fi
 .endfor
 .endif
 
 #
 # Required install tools to be saved in a scratch dir for safety.
 #
 .if ${MK_ZONEINFO} != "no"
 _zoneinfo=	zic tzsetup
 .endif
 
 ITOOLS=	[ awk cap_mkdb cat chflags chmod chown cmp cp \
 	date echo egrep find grep id install ${_install-info} \
 	ln make mkdir mtree mv pwd_mkdb \
 	rm sed services_mkdb sh strip sysctl test true uname wc ${_zoneinfo} \
 	${LOCAL_ITOOLS}
 
 # Needed for share/man
 .if ${MK_MAN} != "no"
 ITOOLS+=makewhatis
 .endif
 
 #
 # distributeworld
 #
 # Distributes everything compiled by a `buildworld'.
 #
 # installworld
 #
 # Installs everything compiled by a 'buildworld'.
 #
 
 # Non-base distributions produced by the base system
 EXTRA_DISTRIBUTIONS=	doc
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 EXTRA_DISTRIBUTIONS+=	lib32
 .endif
 .if ${MK_TESTS} != "no"
 EXTRA_DISTRIBUTIONS+=	tests
 .endif
 
 DEBUG_DISTRIBUTIONS=
 .if ${MK_DEBUG_FILES} != "no"
 DEBUG_DISTRIBUTIONS+=	base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,}
 .endif
 
 MTREE_MAGIC?=	mtree 2.0
 
 distributeworld installworld: _installcheck_world
 	mkdir -p ${INSTALLTMP}
 	progs=$$(for prog in ${ITOOLS}; do \
 		if progpath=`which $$prog`; then \
 			echo $$progpath; \
 		else \
 			echo "Required tool $$prog not found in PATH." >&2; \
 			exit 1; \
 		fi; \
 	    done); \
 	libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \
 	    while read line; do \
 		set -- $$line; \
 		if [ "$$2 $$3" != "not found" ]; then \
 			echo $$2; \
 		else \
 			echo "Required library $$1 not found." >&2; \
 			exit 1; \
 		fi; \
 	    done); \
 	cp $$libs $$progs ${INSTALLTMP}
 	cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale
 .if defined(NO_ROOT)
 	echo "#${MTREE_MAGIC}" > ${METALOG}
 .endif
 .if make(distributeworld)
 .for dist in ${EXTRA_DISTRIBUTIONS}
 	-mkdir ${DESTDIR}/${DISTDIR}/${dist}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null
 .endif
 .if ${MK_LIB32} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null
 .endif
 .endif
 .if ${MK_TESTS} != "no" && ${dist} == "tests"
 	-mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null
 .endif
 .endif
 .if defined(NO_ROOT)
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \
 	    sed -e 's#^\./#./${dist}/#' >> ${METALOG}
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \
 	    sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG}
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \
 	    sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG}
 .if ${MK_LIB32} != "no"
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.lib32.dist | \
 	    sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG}
 .endif
 .endif
 .endfor
 	-mkdir ${DESTDIR}/${DISTDIR}/base
 	${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \
 	    DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs
 .endif
 	${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \
 	    ${IMAKEENV} rm -rf ${INSTALLTMP}
 .if make(distributeworld)
 .for dist in ${EXTRA_DISTRIBUTIONS}
 	find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -empty -delete
 .endfor
 .if defined(NO_ROOT)
 .for dist in base ${EXTRA_DISTRIBUTIONS}
 	@# For each file that exists in this dist, print the corresponding
 	@# line from the METALOG.  This relies on the fact that
 	@# a line containing only the filename will sort immediatly before
 	@# the relevant mtree line.
 	cd ${DESTDIR}/${DISTDIR}; \
 	find ./${dist} | sort -u ${METALOG} - | \
 	awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \
 	${DESTDIR}/${DISTDIR}/${dist}.meta
 .endfor
 .for dist in ${DEBUG_DISTRIBUTIONS}
 	@# For each file that exists in this dist, print the corresponding
 	@# line from the METALOG.  This relies on the fact that
 	@# a line containing only the filename will sort immediatly before
 	@# the relevant mtree line.
 	cd ${DESTDIR}/${DISTDIR}; \
 	find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \
 	awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \
 	${DESTDIR}/${DISTDIR}/${dist}.debug.meta
 .endfor
 .endif
 .endif
 
 packageworld:
 .for dist in base ${EXTRA_DISTRIBUTIONS}
 .if defined(NO_ROOT)
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - --exclude usr/lib/debug \
 	    @${DESTDIR}/${DISTDIR}/${dist}.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz
 .else
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - --exclude usr/lib/debug . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz
 .endif
 .endfor
 
 .for dist in ${DEBUG_DISTRIBUTIONS}
 . if defined(NO_ROOT)
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz
 . else
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvLf - usr/lib/debug | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz
 . endif
 .endfor
 
 #
 # reinstall
 #
 # If you have a build server, you can NFS mount the source and obj directories
 # and do a 'make reinstall' on the *client* to install new binaries from the
 # most recent server build.
 #
 reinstall: .MAKE .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Making hierarchy"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install32
 .endif
 
 redistribute: .MAKE .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Distributing everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute32 \
 	    DISTRIBUTION=lib32
 .endif
 
 distrib-dirs: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET}
 
 distribution: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET}
 	${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \
 		${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \
 		METALOG=${METALOG} installconfig
 
 #
 # buildkernel and installkernel
 #
 # Which kernels to build and/or install is specified by setting
 # KERNCONF. If not defined a GENERIC kernel is built/installed.
 # Only the existing (depending TARGET) config files are used
 # for building kernels and only the first of these is designated
 # as the one being installed.
 #
 # Note that we have to use TARGET instead of TARGET_ARCH when
 # we're in kernel-land. Since only TARGET_ARCH is (expected) to
 # be set to cross-build, we have to make sure TARGET is set
 # properly.
 
 .if defined(KERNFAST)
 NO_KERNELCLEAN=	t
 NO_KERNELCONFIG=	t
 NO_KERNELDEPEND=	t
 NO_KERNELOBJ=		t
 # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah
 .if !defined(KERNCONF) && ${KERNFAST} != "1"
 KERNCONF=${KERNFAST}
 .endif
 .endif
 .if ${TARGET_ARCH} == "powerpc64"
 KERNCONF?=	GENERIC64
 .else
 KERNCONF?=	GENERIC
 .endif
 INSTKERNNAME?=	kernel
 
 KERNSRCDIR?=	${.CURDIR}/sys
 KRNLCONFDIR=	${KERNSRCDIR}/${TARGET}/conf
 KRNLOBJDIR=	${OBJTREE}${KERNSRCDIR}
 KERNCONFDIR?=	${KRNLCONFDIR}
 
 BUILDKERNELS=
 INSTALLKERNEL=
 .if defined(NO_INSTALLKERNEL)
 # All of the BUILDKERNELS loops start at index 1.
 BUILDKERNELS+= dummy
 .endif
 .for _kernel in ${KERNCONF}
 .if exists(${KERNCONFDIR}/${_kernel})
 BUILDKERNELS+=	${_kernel}
 .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL)
 INSTALLKERNEL= ${_kernel}
 .endif
 .endif
 .endfor
 
 ${WMAKE_TGTS:N_worldtmp:Nbuild32} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY
 
 #
 # buildkernel
 #
 # Builds all kernels defined by BUILDKERNELS.
 #
 buildkernel: .MAKE .PHONY
 .if empty(BUILDKERNELS:Ndummy)
 	@echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \
 	false
 .endif
 	@echo
 .for _kernel in ${BUILDKERNELS:Ndummy}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 	@echo "===> ${_kernel}"
 	mkdir -p ${KRNLOBJDIR}
 .if !defined(NO_KERNELCONFIG)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1: configuring the kernel"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLCONFDIR}; \
 		PATH=${TMPPATH} \
 		    config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \
 			-I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}'
 .endif
 .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.1: cleaning up the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR}
 .endif
 .if !defined(NO_KERNELOBJ)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.2: rebuilding the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.3: build tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools
 .if !defined(NO_KERNELDEPEND)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3.1: making dependencies"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} depend -DNO_MODULES_OBJ
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3.2: building everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 .endfor
 
 #
 # installkernel, etc.
 #
 # Install the kernel defined by INSTALLKERNEL
 #
 installkernel installkernel.debug \
 reinstallkernel reinstallkernel.debug: _installcheck_kernel
 .if !defined(NO_INSTALLKERNEL)
 .if empty(INSTALLKERNEL)
 	@echo "ERROR: No kernel \"${KERNCONF}\" to install."; \
 	false
 .endif
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${INSTALLKERNEL}"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${CROSSENV} PATH=${TMPPATH} \
 	    ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//}
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS)
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${_kernel}"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${_kernel}; \
 	    ${CROSSENV} PATH=${TMPPATH} \
 	    ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//}
 .endfor
 .endif
 
 distributekernel distributekernel.debug:
 .if !defined(NO_INSTALLKERNEL)
 .if empty(INSTALLKERNEL)
 	@echo "ERROR: No kernel \"${KERNCONF}\" to install."; \
 	false
 .endif
 	mkdir -p ${DESTDIR}/${DISTDIR}
 .if defined(NO_ROOT)
 	echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta
 .endif
 	cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \
 	    DESTDIR=${INSTALL_DDIR}/kernel \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
 	sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \
 	    ${DESTDIR}/${DISTDIR}/kernel.meta
 .endif
 .endif
 .if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS)
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 .if defined(NO_ROOT)
 	echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta
 .endif
 	cd ${KRNLOBJDIR}/${_kernel}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \
 	    KERNEL=${INSTKERNNAME}.${_kernel} \
 	    DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
 	sed -e 's|^./kernel|.|' \
 	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \
 	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta
 .endif
 .endfor
 .endif
 
 packagekernel:
 .if defined(NO_ROOT)
 .if !defined(NO_INSTALLKERNEL)
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --exclude '*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz
 .endif
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --include '*/*/*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz
 .if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS)
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --exclude '*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --include '*/*/*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz
 .endfor
 .endif
 .else
 .if !defined(NO_INSTALLKERNEL)
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --exclude '*.debug' . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz
 .endif
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --include '*/*/*.debug' $$(eval find .) | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz
 .if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS)
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --exclude '*.debug' . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --include '*/*/*.debug' $$(eval find .) | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz
 .endfor
 .endif
 .endif
 
 #
 # doxygen
 #
 # Build the API documentation with doxygen
 #
 doxygen: .PHONY
 	@if [ ! -x ${LOCALBASE}/bin/doxygen ]; then \
 		echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \
 		exit 1; \
 	fi
 	${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all
 
 #
 # update
 #
 # Update the source tree(s), by running svn/svnup to update to the
 # latest copy.
 #
 update:
 .if (defined(CVS_UPDATE) || defined(SUP_UPDATE)) && !defined(SVN_UPDATE)
 	@echo "--------------------------------------------------------------"
 	@echo "CVS_UPDATE and SUP_UPDATE are no longer supported."
 	@echo "Please see: https://wiki.freebsd.org/CvsIsDeprecated"
 	@echo "--------------------------------------------------------------"
 	@exit 1
 .endif
 .if defined(SVN_UPDATE)
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Updating ${.CURDIR} using Subversion"
 	@echo "--------------------------------------------------------------"
 	@(cd ${.CURDIR}; ${SVN} update ${SVNFLAGS})
 .endif
 
 #
 # ------------------------------------------------------------------------
 #
 # From here onwards are utility targets used by the 'make world' and
 # related targets.  If your 'world' breaks, you may like to try to fix
 # the problem and manually run the following targets to attempt to
 # complete the build.  Beware, this is *not* guaranteed to work, you
 # need to have a pretty good grip on the current state of the system
 # to attempt to manually finish it.  If in doubt, 'make world' again.
 #
 
 #
 # legacy: Build compatibility shims for the next three targets. This is a
 # minimal set of tools and shims necessary to compensate for older systems
 # which don't have the APIs required by the targets built in bootstrap-tools,
 # build-tools or cross-tools.
 #
 
 # ELF Tool Chain libraries are needed for ELF tools and dtrace tools.
 .if ${BOOTSTRAPPING} < 1100006
 _elftoolchain_libs= lib/libelf lib/libdwarf
 .endif
 
 legacy:
 .if ${BOOTSTRAPPING} < 800107 && ${BOOTSTRAPPING} != 0
 	@echo "ERROR: Source upgrades from versions prior to 8.0 are not supported."; \
 	false
 .endif
 .for _tool in tools/build ${_elftoolchain_libs}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,depend,all,install)"; \
 	    cd ${.CURDIR}/${_tool}; \
 	    ${MAKE} DIRPRFX=${_tool}/ obj; \
 	    ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy includes; \
 	    ${MAKE} DIRPRFX=${_tool}/ depend; \
 	    ${MAKE} DIRPRFX=${_tool}/ all; \
 	    ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install
 .endfor
 
 #
 # bootstrap-tools: Build tools needed for compatibility. These are binaries that
 # are built to build other binaries in the system. However, the focus of these
 # binaries is usually quite narrow. Bootstrap tools use the host's compiler and
 # libraries, augmented by -legacy.
 #
 _bt=		_bootstrap-tools
 
 .if ${MK_GAMES} != "no"
 _strfile=	usr.bin/fortune/strfile
 .endif
 
 .if ${MK_GCC} != "no" && ${MK_CXX} != "no"
 _gperf=		gnu/usr.bin/gperf
 .endif
 
 .if ${MK_GROFF} != "no"
 _groff=		gnu/usr.bin/groff \
 		usr.bin/soelim
 .endif
 
 .if ${MK_VT} != "no"
 _vtfontcvt=	usr.bin/vtfontcvt
 .endif
 
 .if ${BOOTSTRAPPING} < 900002
 _sed=		usr.bin/sed
 .endif
 
-.if ${BOOTSTRAPPING} < 1000002
+.if ${BOOTSTRAPPING} < 1000033
 _libopenbsd=	lib/libopenbsd
 _m4=		usr.bin/m4
+_lex=		usr.bin/lex
 
 ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd
+${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4
 .endif
 
 .if ${BOOTSTRAPPING} < 1000026
 _nmtree=	lib/libnetbsd \
 		usr.sbin/nmtree
 
 ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd
 .endif
 
 .if ${BOOTSTRAPPING} < 1000027
 _cat=		bin/cat
-.endif
-
-.if ${BOOTSTRAPPING} < 1000033
-_lex=		usr.bin/lex
-
-${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4
 .endif
 
 # r277259 crunchide: Correct 64-bit section header offset
 # r281674 crunchide: always include both 32- and 64-bit ELF support
 # r285986 crunchen: use STRIPBIN rather than STRIP
 .if ${BOOTSTRAPPING} < 1100078
 _crunch=	usr.sbin/crunch
 .endif
 
 .if ${BOOTSTRAPPING} >= 900040 && ${BOOTSTRAPPING} < 900041
 _awk=		usr.bin/awk
 .endif
 
 _yacc=		lib/liby \
 		usr.bin/yacc
 
 ${_bt}-usr.bin/yacc: ${_bt}-lib/liby
 
 .if ${MK_BSNMP} != "no"
 _gensnmptree=	usr.sbin/bsnmpd/gensnmptree
 .endif
 
 # We need to build tblgen when we're building clang either as
 # the bootstrap compiler, or as the part of the normal build.
 .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no"
 _clang_tblgen= \
 	lib/clang/libllvmsupport \
 	lib/clang/libllvmtablegen \
 	usr.bin/clang/llvm-tblgen \
 	usr.bin/clang/clang-tblgen
 
 ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
 ${_bt}-usr.bin/clang/llvm-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
 .endif
 
 # Default to building the GPL DTC, but build the BSDL one if users explicitly
 # request it.
 _dtc= usr.bin/dtc
 .if ${MK_GPL_DTC} != "no"
 _dtc= gnu/usr.bin/dtc
 .endif
 
 .if ${MK_KERBEROS} != "no"
 _kerberos5_bootstrap_tools= \
 	kerberos5/tools/make-roken \
 	kerberos5/lib/libroken \
 	kerberos5/lib/libvers \
 	kerberos5/tools/asn1_compile \
 	kerberos5/tools/slc \
 	usr.bin/compile_et
 
 .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g}
 .endif
 
 .if ${MK_MANDOCDB} != "no"
 _libopenbsd?=	lib/libopenbsd
 _makewhatis=	lib/libsqlite3 \
 		usr.bin/mandoc
 ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd ${_bt}-lib/libsqlite3
 .else
 _makewhatis=usr.bin/makewhatis
 .endif
 
 bootstrap-tools: .PHONY
 
 #	Please document (add comment) why something is in 'bootstrap-tools'.
 #	Try to bound the building of the bootstrap-tool to just the
 #	FreeBSD versions that need the tool built at this stage of the build.
 .for _tool in \
     ${_clang_tblgen} \
     ${_kerberos5_bootstrap_tools} \
     ${_strfile} \
     ${_gperf} \
     ${_groff} \
     ${_dtc} \
     ${_awk} \
     ${_cat} \
     usr.bin/lorder \
     ${_libopenbsd} \
     ${_makewhatis} \
     usr.bin/rpcgen \
     ${_sed} \
     ${_yacc} \
     ${_m4} \
     ${_lex} \
     usr.bin/xinstall \
     ${_gensnmptree} \
     usr.sbin/config \
     ${_crunch} \
     ${_nmtree} \
     ${_vtfontcvt} \
     usr.bin/localedef
 ${_bt}-${_tool}: .PHONY .MAKE
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ depend; \
 		${MAKE} DIRPRFX=${_tool}/ all; \
 		${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install
 
 bootstrap-tools: ${_bt}-${_tool}
 .endfor
 
 #
 # build-tools: Build special purpose build tools
 #
 .if !defined(NO_SHARE)
 _share=	share/syscons/scrnmaps
 .endif
 
 .if ${MK_GCC} != "no"
 _gcc_tools= gnu/usr.bin/cc/cc_tools
 .endif
 
 .if ${MK_RESCUE} != "no"
 # rescue includes programs that have build-tools targets
 _rescue=rescue/rescue
 .endif
 
 .for _tool in \
     bin/csh \
     bin/sh \
     ${LOCAL_TOOL_DIRS} \
     lib/ncurses/ncurses \
     lib/ncurses/ncursesw \
     ${_rescue} \
     ${_share} \
     usr.bin/awk \
     lib/libmagic \
     usr.bin/mkesdb_static \
     usr.bin/mkcsmapper_static \
     usr.bin/vi/catalog
 build-tools_${_tool}: .PHONY
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,build-tools)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ build-tools
 build-tools: build-tools_${_tool}
 .endfor
 .for _tool in \
     ${_gcc_tools}
 build-tools_${_tool}: .PHONY
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ depend; \
 		${MAKE} DIRPRFX=${_tool}/ all
 build-tools: build-tools_${_tool}
 .endfor
 
 #
 # kernel-tools: Build kernel-building tools
 #
 kernel-tools:
 	mkdir -p ${MAKEOBJDIRPREFIX}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${MAKEOBJDIRPREFIX}/usr >/dev/null
 
 #
 # cross-tools: All the tools needed to build the rest of the system after
 # we get done with the earlier stages. It is the last set of tools needed
 # to begin building the target binaries.
 #
 .if ${TARGET_ARCH} != ${MACHINE_ARCH}
 .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386"
 _btxld=		usr.sbin/btxld
 .endif
 .endif
 
 # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures
 # resulting from missing bug fixes or ELF Toolchain updates.
 .if ${MK_CDDL} != "no"
 _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \
     cddl/usr.bin/ctfmerge
 .endif
 
 # If we're given an XAS, don't build binutils.
 .if ${XAS:M/*} == ""
 .if ${MK_BINUTILS_BOOTSTRAP} != "no"
 _binutils=	gnu/usr.bin/binutils
 .endif
 .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no"
 _elftctools=	lib/libelftc \
 		usr.bin/elfcopy \
 		usr.bin/nm \
 		usr.bin/size \
 		usr.bin/strings
 # These are not required by the build, but can be useful for developers who
 # cross-build on a FreeBSD 10 host:
 _elftctools+=	usr.bin/addr2line
 .endif
 .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no"
 # If cross-building with an external binutils we still need to build strip for
 # the target (for at least crunchide).
 _elftctools=	lib/libelftc \
 		usr.bin/elfcopy
 .endif
 
 # If an full path to an external cross compiler is given, don't build
 # a cross compiler.
 .if ${XCC:N${CCACHE_BIN}:M/*} == "" && ${MK_CROSS_COMPILER} != "no"
 .if ${MK_CLANG_BOOTSTRAP} != "no"
 _clang=		usr.bin/clang
 _clang_libs=	lib/clang
 .endif
 .if ${MK_GCC_BOOTSTRAP} != "no"
 _cc=		gnu/usr.bin/cc
 .endif
 .endif
 .if ${MK_USB} != "no"
 _usb_tools=	sys/boot/usb/tools
 .endif
 
 cross-tools: .MAKE .PHONY
 .for _tool in \
     ${_clang_libs} \
     ${_clang} \
     ${_binutils} \
     ${_elftctools} \
     ${_dtrace_tools} \
     ${_cc} \
     ${_btxld} \
     ${_crunchide} \
     ${_usb_tools}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ depend; \
 		${MAKE} DIRPRFX=${_tool}/ all; \
 		${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX} install
 .endfor
 
 NXBDESTDIR=	${OBJTREE}/nxb-bin
 NXBENV=		MAKEOBJDIRPREFIX=${OBJTREE}/nxb \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${PATH}:${OBJTREE}/gperf_for_gcc/usr/bin
 NXBMAKE=	${NXBENV} ${MAKE} \
 		LLVM_TBLGEN=${NXBDESTDIR}/usr/bin/llvm-tblgen \
 		CLANG_TBLGEN=${NXBDESTDIR}/usr/bin/clang-tblgen \
 		MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \
 		MK_GDB=no MK_TESTS=no \
 		SSP_CFLAGS= \
 		MK_HTML=no NO_LINT=yes MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_DEBUG_FILES=no
 
 # native-xtools is the current target for qemu-user cross builds of ports
 # via poudriere and the imgact_binmisc kernel module.
 # For non-clang enabled targets that are still using the in tree gcc
 # we must build a gperf binary for one instance of its Makefiles.  On
 # clang-enabled systems, the gperf binary is obsolete.
 native-xtools: .PHONY
 .if ${MK_GCC_BOOTSTRAP} != "no"
 	mkdir -p ${OBJTREE}/gperf_for_gcc/usr/bin
 	${_+_}@${ECHODIR} "===> ${_gperf} (obj,depend,all,install)"; \
 	cd ${.CURDIR}/${_gperf}; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ obj; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ depend; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ all; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ DESTDIR=${OBJTREE}/gperf_for_gcc install
 .endif
 	mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${NXBDESTDIR}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${NXBDESTDIR}/usr/include >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${NXBDESTDIR}/usr/lib >/dev/null
 .endif
 .for _tool in \
     bin/cat \
     bin/chmod \
     bin/cp \
     bin/csh \
     bin/echo \
     bin/expr \
     bin/hostname \
     bin/ln \
     bin/ls \
     bin/mkdir \
     bin/mv \
     bin/ps \
     bin/realpath \
     bin/rm \
     bin/rmdir \
     bin/sh \
     bin/sleep \
     ${_clang_tblgen} \
     usr.bin/ar \
     ${_binutils} \
     ${_elftctools} \
     ${_cc} \
     ${_gcc_tools} \
     ${_clang_libs} \
     ${_clang} \
     sbin/md5 \
     sbin/sysctl \
     gnu/usr.bin/diff \
     usr.bin/awk \
     usr.bin/basename \
     usr.bin/bmake \
     usr.bin/bzip2 \
     usr.bin/cmp \
     usr.bin/dirname \
     usr.bin/env \
     usr.bin/fetch \
     usr.bin/find \
     usr.bin/grep \
     usr.bin/gzip \
     usr.bin/id \
     usr.bin/lex \
     usr.bin/lorder \
     usr.bin/mktemp \
     usr.bin/mt \
     usr.bin/patch \
     usr.bin/sed \
     usr.bin/sort \
     usr.bin/tar \
     usr.bin/touch \
     usr.bin/tr \
     usr.bin/true \
     usr.bin/uniq \
     usr.bin/unzip \
     usr.bin/xargs \
     usr.bin/xinstall \
     usr.bin/xz \
     usr.bin/yacc \
     usr.sbin/chown
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${NXBMAKE} DIRPRFX=${_tool}/ obj; \
 		${NXBMAKE} DIRPRFX=${_tool}/ depend; \
 		${NXBMAKE} DIRPRFX=${_tool}/ all; \
 		${NXBMAKE} DIRPRFX=${_tool}/ DESTDIR=${NXBDESTDIR} install
 .endfor
 
 #
 # hierarchy - ensure that all the needed directories are present
 #
 hierarchy hier: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs
 
 #
 # libraries - build all libraries, and install them under ${DESTDIR}.
 #
 # The list of libraries with dependents (${_prebuild_libs}) and their
 # interdependencies (__L) are built automatically by the
 # ${.CURDIR}/tools/make_libdeps.sh script.
 #
 libraries: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}; \
 	    ${MAKE} -f Makefile.inc1 _prereq_libs; \
 	    ${MAKE} -f Makefile.inc1 _startup_libs; \
 	    ${MAKE} -f Makefile.inc1 _prebuild_libs; \
 	    ${MAKE} -f Makefile.inc1 _generic_libs
 
 #
 # static libgcc.a prerequisite for shared libc
 #
 _prereq_libs= gnu/lib/libssp/libssp_nonshared gnu/lib/libgcc lib/libcompiler_rt
 
 # These dependencies are not automatically generated:
 #
 # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before
 # all shared libraries for ELF.
 #
 _startup_libs=	gnu/lib/csu
 _startup_libs+=	lib/csu
 _startup_libs+=	gnu/lib/libgcc
 _startup_libs+=	lib/libcompiler_rt
 _startup_libs+=	lib/libc
 _startup_libs+=	lib/libc_nonshared
 .if ${MK_LIBCPLUSPLUS} != "no"
 _startup_libs+=	lib/libcxxrt
 .endif
 
 gnu/lib/libgcc__L: lib/libc__L
 gnu/lib/libgcc__L: lib/libc_nonshared__L
 .if ${MK_LIBCPLUSPLUS} != "no"
 lib/libcxxrt__L: gnu/lib/libgcc__L
 .endif
 
 _prebuild_libs=	${_kerberos5_lib_libasn1} \
 		${_kerberos5_lib_libhdb} \
 		${_kerberos5_lib_libheimbase} \
 		${_kerberos5_lib_libheimntlm} \
 		${_libsqlite3} \
 		${_kerberos5_lib_libheimipcc} \
 		${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \
 		${_kerberos5_lib_libroken} \
 		${_kerberos5_lib_libwind} \
 		lib/libbz2 ${_libcom_err} lib/libcrypt \
 		lib/libelf lib/libexpat \
 		lib/libfigpar \
 		${_lib_libgssapi} \
 		lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \
 		${_lib_libcapsicum} \
 		lib/ncurses/ncurses lib/ncurses/ncursesw \
 		lib/libopie lib/libpam ${_lib_libthr} \
 		${_lib_libradius} lib/libsbuf lib/libtacplus \
 		lib/libgeom \
 		${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \
 		${_cddl_lib_libuutil} \
 		${_cddl_lib_libavl} \
 		${_cddl_lib_libzfs_core} \
 		${_cddl_lib_libctf} \
 		lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \
 		${_secure_lib_libcrypto} ${_lib_libldns} \
 		${_secure_lib_libssh} ${_secure_lib_libssl} \
 		gnu/lib/libdialog
 .if ${MK_GNUCXX} != "no"
 _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++
 gnu/lib/libstdc++__L: lib/msun__L
 gnu/lib/libsupc++__L: gnu/lib/libstdc++__L
 .endif
 
 .if ${MK_LIBCPLUSPLUS} != "no"
 _prebuild_libs+= lib/libc++
 .endif
 
 lib/libgeom__L: lib/libexpat__L
 lib/libkvm__L: lib/libelf__L
 
 .if ${MK_LIBTHR} != "no"
 _lib_libthr=	lib/libthr
 .endif
 
 .if ${MK_RADIUS_SUPPORT} != "no"
 _lib_libradius=	lib/libradius
 .endif
 
 .if ${MK_OFED} != "no"
 _ofed_lib=	contrib/ofed/usr.lib/
 .endif
 
 .if ${MK_CASPER} != "no"
 _lib_libcapsicum=lib/libcapsicum
 .endif
 
 lib/libcapsicum__L: lib/libnv__L
 lib/libpjdlog__L: lib/libutil__L
 lib/liblzma__L: lib/libthr__L
 
 _generic_libs=	${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib ${_ofed_lib}
 .for _DIR in ${LOCAL_LIB_DIRS}
 .if exists(${.CURDIR}/${_DIR}/Makefile)
 _generic_libs+= ${_DIR}
 .endif
 .endfor
 
 lib/libopie__L lib/libtacplus__L: lib/libmd__L
 
 .if ${MK_CDDL} != "no"
 _cddl_lib_libumem= cddl/lib/libumem
 _cddl_lib_libnvpair= cddl/lib/libnvpair
 _cddl_lib_libavl= cddl/lib/libavl
 _cddl_lib_libuutil= cddl/lib/libuutil
 _cddl_lib_libzfs_core= cddl/lib/libzfs_core
 _cddl_lib_libctf= cddl/lib/libctf
 _cddl_lib= cddl/lib
 cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L
 cddl/lib/libzfs__L: lib/libgeom__L
 cddl/lib/libctf__L: lib/libz__L
 .endif
 # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built
 # on select architectures though (see cddl/lib/Makefile)
 .if ${MACHINE_CPUARCH} != "sparc64"
 _prebuild_libs+=	lib/libproc lib/librtld_db
 .endif
 
 .if ${MK_CRYPT} != "no"
 .if ${MK_OPENSSL} != "no"
 _secure_lib_libcrypto= secure/lib/libcrypto
 _secure_lib_libssl= secure/lib/libssl
 lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L
 .if ${MK_LDNS} != "no"
 _lib_libldns= lib/libldns
 lib/libldns__L: secure/lib/libcrypto__L
 .endif
 .if ${MK_OPENSSH} != "no"
 _secure_lib_libssh= secure/lib/libssh
 secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L
 .if ${MK_LDNS} != "no"
 secure/lib/libssh__L: lib/libldns__L
 .endif
 .if ${MK_KERBEROS_SUPPORT} != "no"
 secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \
     kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \
     lib/libmd__L kerberos5/lib/libroken__L
 .endif
 .endif
 .endif
 _secure_lib=	secure/lib
 .endif
 
 .if ${MK_KERBEROS} != "no"
 kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L
 kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \
     kerberos5/lib/libwind__L lib/libsqlite3__L
 kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \
     kerberos5/lib/libroken__L lib/libcom_err__L
 kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L
 kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \
     kerberos5/lib/libroken__L kerberos5/lib/libwind__L \
     kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L
 kerberos5/lib/libroken__L: lib/libcrypt__L
 kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L
 kerberos5/lib/libheimbase__L: lib/libthr__L
 kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L
 .endif
 
 lib/libsqlite3__L: lib/libthr__L
 
 .if ${MK_GSSAPI} != "no"
 _lib_libgssapi=	lib/libgssapi
 .endif
 
 .if ${MK_KERBEROS} != "no"
 _kerberos5_lib=	kerberos5/lib
 _kerberos5_lib_libasn1= kerberos5/lib/libasn1
 _kerberos5_lib_libhdb= kerberos5/lib/libhdb
 _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase
 _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5
 _kerberos5_lib_libhx509= kerberos5/lib/libhx509
 _kerberos5_lib_libroken= kerberos5/lib/libroken
 _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm
 _libsqlite3= lib/libsqlite3
 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc
 _kerberos5_lib_libwind= kerberos5/lib/libwind
 _libcom_err= lib/libcom_err
 .endif
 
 .if ${MK_NIS} != "no"
 _lib_libypclnt=	lib/libypclnt
 .endif
 
 .if ${MK_OPENSSL} == "no"
 lib/libradius__L: lib/libmd__L
 .endif
 
 lib/libproc__L: \
     ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L
 .if ${MK_CXX} != "no"
 .if ${MK_LIBCPLUSPLUS} != "no"
 lib/libproc__L: lib/libcxxrt__L
 .else # This implies MK_GNUCXX != "no"; see lib/libproc
 lib/libproc__L: gnu/lib/libsupc++__L
 .endif
 .endif
 
 gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L
 
 .for _lib in ${_prereq_libs}
 ${_lib}__PL: .PHONY .MAKE
 .if exists(${.CURDIR}/${_lib})
 	${_+_}@${ECHODIR} "===> ${_lib} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_lib}; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ depend; \
 		${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \
 		    DIRPRFX=${_lib}/ all; \
 		${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \
 		    DIRPRFX=${_lib}/ install
 .endif
 .endfor
 
 .for _lib in ${_startup_libs} ${_prebuild_libs:Nlib/libpam} ${_generic_libs}
 ${_lib}__L: .PHONY .MAKE
 .if exists(${.CURDIR}/${_lib})
 	${_+_}@${ECHODIR} "===> ${_lib} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_lib}; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ depend; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install
 .endif
 .endfor
 
 # libpam is special: we need to build static PAM modules before
 # static PAM library, and dynamic PAM library before dynamic PAM
 # modules.
 lib/libpam__L: .PHONY .MAKE
 	${_+_}@${ECHODIR} "===> lib/libpam (obj,depend,all,install)"; \
 		cd ${.CURDIR}/lib/libpam; \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ obj; \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ depend; \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ \
 		    -D_NO_LIBPAM_SO_YET all; \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ \
 		    -D_NO_LIBPAM_SO_YET install
 
 _prereq_libs: ${_prereq_libs:S/$/__PL/}
 _startup_libs: ${_startup_libs:S/$/__L/}
 _prebuild_libs: ${_prebuild_libs:S/$/__L/}
 _generic_libs: ${_generic_libs:S/$/__L/}
 
 # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from
 # 'everything' with _PARALLEL_SUBDIR_OK set.  This is because it is unlikely
 # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE
 # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in
 # parallel.  This is safe for the world stage of buildworld though since it has
 # already built libraries in a proper order and installed includes into
 # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to
 # avoid trashing a system if it crashes mid-install.
 .if !make(all) || defined(_PARALLEL_SUBDIR_OK)
 SUBDIR_PARALLEL=
 .endif
 
 .include <bsd.subdir.mk>
 
 .if make(check-old) || make(check-old-dirs) || \
     make(check-old-files) || make(check-old-libs) || \
     make(delete-old) || make(delete-old-dirs) || \
     make(delete-old-files) || make(delete-old-libs)
 
 #
 # check for / delete old files section
 #
 
 .include "ObsoleteFiles.inc"
 
 OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \
 else you can not start such an application. Consult UPDATING for more \
 information regarding how to cope with the removal/revision bump of a \
 specific library."
 
 .if !defined(BATCH_DELETE_OLD_FILES)
 RM_I=-i
 .else
 RM_I=-v
 .endif
 
 delete-old-files:
 	@echo ">>> Removing old files (only deletes safe to delete libs)"
 # Ask for every old file if the user really wants to remove it.
 # It's annoying, but better safe than sorry.
 # NB: We cannot pass the list of OLD_FILES as a parameter because the
 # argument list will get too long. Using .for/.endfor make "loops" will make
 # the Makefile parser segfault.
 	@exec 3<&0; \
 	cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \
 			rm ${RM_I} "${DESTDIR}/$${file}" <&3; \
 		fi; \
 		for ext in debug symbols; do \
 		  if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \
 		      "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \
 			      <&3; \
 		  fi; \
 		done; \
 	done
 # Remove catpages without corresponding manpages.
 	@exec 3<&0; \
 	find ${DESTDIR}/usr/share/man/cat* ! -type d | \
 	sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \
 	while read catpage; do \
 		read manpage; \
 		if [ ! -e "$${manpage}" ]; then \
 			rm ${RM_I} $${catpage} <&3; \
 	        fi; \
 	done
 	@echo ">>> Old files removed"
 
 check-old-files:
 	@echo ">>> Checking for old files"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 		 	echo "${DESTDIR}/$${file}"; \
 		fi; \
 		for ext in debug symbols; do \
 		  if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \
 		  fi; \
 		done; \
 	done
 # Check for catpages without corresponding manpages.
 	@find ${DESTDIR}/usr/share/man/cat* ! -type d | \
 	sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \
 	while read catpage; do \
 		read manpage; \
 		if [ ! -e "$${manpage}" ]; then \
 			echo $${catpage}; \
 	        fi; \
 	done
 
 delete-old-libs:
 	@echo ">>> Removing old libraries"
 	@echo "${OLD_LIBS_MESSAGE}" | fmt
 	@exec 3<&0; \
 	cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_LIBS | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \
 			rm ${RM_I} "${DESTDIR}/$${file}" <&3; \
 		fi; \
 		for ext in debug symbols; do \
 		  if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \
 		      "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \
 			      <&3; \
 		  fi; \
 		done; \
 	done
 	@echo ">>> Old libraries removed"
 
 check-old-libs:
 	@echo ">>> Checking for old libraries"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_LIBS | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			echo "${DESTDIR}/$${file}"; \
 		fi; \
 		for ext in debug symbols; do \
 		  if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \
 		  fi; \
 		done; \
 	done
 
 delete-old-dirs:
 	@echo ">>> Removing old directories"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_DIRS | xargs -n1 | sort -r | \
 	while read dir; do \
 		if [ -d "${DESTDIR}/$${dir}" ]; then \
 			rmdir -v "${DESTDIR}/$${dir}" || true; \
 		elif [ -L "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 	done
 	@echo ">>> Old directories removed"
 
 check-old-dirs:
 	@echo ">>> Checking for old directories"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_DIRS | xargs -n1 | \
 	while read dir; do \
 		if [ -d "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir}"; \
 		elif [ -L "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 	done
 
 delete-old: delete-old-files delete-old-dirs
 	@echo "To remove old libraries run '${MAKE} delete-old-libs'."
 
 check-old: check-old-files check-old-libs check-old-dirs
 	@echo "To remove old files and directories run '${MAKE} delete-old'."
 	@echo "To remove old libraries run '${MAKE} delete-old-libs'."
 
 .endif
 
 #
 # showconfig - show build configuration.
 #
 showconfig:
 	@(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1; \
 	  ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1) 2>&1 | grep ^MK_ | sort -u
 
 .if !empty(KRNLOBJDIR) && !empty(KERNCONF)
 DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/
 
 .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE)
 .if exists(${KERNCONFDIR}/${KERNCONF})
 FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \
 	'${KERNCONFDIR}/${KERNCONF}' ; echo
 .endif
 .endif
 
 .endif
 
 .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH})
 DTBOUTPUTPATH= ${.CURDIR}
 .endif
 
 #
 # Build 'standalone' Device Tree Blob
 #
 builddtb:
 	@PATH=${TMPPATH} MACHINE=${TARGET} \
 	${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \
 	    "${FDT_DTS_FILE}" ${DTBOUTPUTPATH}
 
 ###############
 
 # cleanworld
 # In the following, the first 'rm' in a series will usually remove all
 # files and directories.  If it does not, then there are probably some
 # files with file flags set, so this unsets them and tries the 'rm' a
 # second time.  There are situations where this target will be cleaning
 # some directories via more than one method, but that duplication is
 # needed to correctly handle all the possible situations.  Removing all
 # files without file flags set in the first 'rm' instance saves time,
 # because 'chflags' will need to operate on fewer files afterwards.
 #
 # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be
 # created by bsd.obj.mk, except that we don't want to .include that file
 # in this makefile.
 #
 BW_CANONICALOBJDIR:=${OBJTREE}${.CURDIR}
 cleanworld: .PHONY
 .if exists(${BW_CANONICALOBJDIR}/)
 	-rm -rf ${BW_CANONICALOBJDIR}/*
 	-chflags -R 0 ${BW_CANONICALOBJDIR}
 	rm -rf ${BW_CANONICALOBJDIR}/*
 .endif
 .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR}
 	#   To be safe in this case, fall back to a 'make cleandir'
 	${_+_}@cd ${.CURDIR}; ${MAKE} cleandir
 .endif
 
 .if defined(TARGET) && defined(TARGET_ARCH)
 
 .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH}
 XDEV_CPUTYPE?=${CPUTYPE}
 .else
 XDEV_CPUTYPE?=${TARGET_CPUTYPE}
 .endif
 
 NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \
 	MK_MAN=no MK_NLS=no MK_PROFILE=no \
 	MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \
 	TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 	CPUTYPE=${XDEV_CPUTYPE}
 
 XDDIR=${TARGET_ARCH}-freebsd
 XDTP?=/usr/${XDDIR}
 .if ${XDTP:N/*}
 .error XDTP variable should be an absolute path
 .endif
 
 CDBENV=MAKEOBJDIRPREFIX=${MAKEOBJDIRPREFIX}/${XDDIR} \
 	INSTALL="sh ${.CURDIR}/tools/install.sh"
 CDENV= ${CDBENV} \
 	TOOLS_PREFIX=${XDTP}
 CD2CFLAGS=-isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib \
 	--sysroot=${XDDESTDIR}/ -B${XDDESTDIR}/usr/libexec \
 	-B${XDDESTDIR}/usr/bin -B${XDDESTDIR}/usr/lib
 CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CFLAGS}" \
 	CPP="${CPP} ${CD2CFLAGS}" \
 	MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH}
 
 CDTMP=	${MAKEOBJDIRPREFIX}/${XDDIR}/${.CURDIR}/tmp
 CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${NOFUN}
 CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} ${MAKE} ${NOFUN}
 XDDESTDIR=${DESTDIR}/${XDTP}
 .if !defined(OSREL)
 OSREL!= uname -r | sed -e 's/[-(].*//'
 .endif
 
 .ORDER: xdev-build xdev-install xdev-links
 xdev: xdev-build xdev-install
 
 .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools
 xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools
 
 _xb-worldtmp: .PHONY
 	mkdir -p ${CDTMP}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${CDTMP}/usr >/dev/null
 
 _xb-bootstrap-tools: .PHONY
 .for _tool in \
     ${_clang_tblgen} \
     ${_gperf}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ obj; \
 	${CDMAKE} DIRPRFX=${_tool}/ depend; \
 	${CDMAKE} DIRPRFX=${_tool}/ all; \
 	${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install
 .endfor
 
 _xb-build-tools: .PHONY
 	${_+_}@cd ${.CURDIR}; \
 	${CDBENV} ${MAKE} -f Makefile.inc1 ${NOFUN} build-tools
 
 _xb-cross-tools: .PHONY
 .for _tool in \
     ${_binutils} \
     ${_elftctools} \
     usr.bin/ar \
     ${_clang_libs} \
     ${_clang} \
     ${_cc}
 	${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,depend,all)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ obj; \
 	${CDMAKE} DIRPRFX=${_tool}/ depend; \
 	${CDMAKE} DIRPRFX=${_tool}/ all
 .endfor
 
 _xi-mtree: .PHONY
 	${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}"
 	mkdir -p ${XDDESTDIR}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \
 	    -p ${XDDESTDIR} >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${XDDESTDIR}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${XDDESTDIR}/usr/include >/dev/null
 .if ${MK_LIB32} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${XDDESTDIR}/usr >/dev/null
 .endif
 .if ${MK_TESTS} != "no"
 	mkdir -p ${XDDESTDIR}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${XDDESTDIR}${TESTSBASE} >/dev/null
 .endif
 
 .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries
 xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries
 
 _xi-cross-tools: .PHONY
 	@echo "_xi-cross-tools"
 .for _tool in \
     ${_binutils} \
     ${_elftctools} \
     usr.bin/ar \
     ${_clang_libs} \
     ${_clang} \
     ${_cc}
 	${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR}
 .endfor
 
 _xi-includes: .PHONY
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \
 		DESTDIR=${XDDESTDIR}
 
 _xi-libraries: .PHONY
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \
 		DESTDIR=${XDDESTDIR}
 
 xdev-links: .PHONY
 	${_+_}cd ${XDDESTDIR}/usr/bin; \
 	mkdir -p ../../../../usr/bin; \
 		for i in *; do \
 			ln -sf ../../${XDTP}/usr/bin/$$i \
 			    ../../../../usr/bin/${XDDIR}-$$i; \
 			ln -sf ../../${XDTP}/usr/bin/$$i \
 			    ../../../../usr/bin/${XDDIR}${OSREL}-$$i; \
 		done
 .else
 xdev xdev-build xdev-install xdev-links:
 	@echo "*** Error: Both TARGET and TARGET_ARCH must be defined for \"${.TARGET}\" target"
 .endif
Index: projects/clang380-import/bin/sh/eval.c
===================================================================
--- projects/clang380-import/bin/sh/eval.c	(revision 293686)
+++ projects/clang380-import/bin/sh/eval.c	(revision 293687)
@@ -1,1382 +1,1382 @@
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)eval.c	8.9 (Berkeley) 6/8/95";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <paths.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <sys/resource.h>
 #include <sys/wait.h> /* For WIFSIGNALED(status) */
 #include <errno.h>
 
 /*
  * Evaluate a command.
  */
 
 #include "shell.h"
 #include "nodes.h"
 #include "syntax.h"
 #include "expand.h"
 #include "parser.h"
 #include "jobs.h"
 #include "eval.h"
 #include "builtins.h"
 #include "options.h"
 #include "exec.h"
 #include "redir.h"
 #include "input.h"
 #include "output.h"
 #include "trap.h"
 #include "var.h"
 #include "memalloc.h"
 #include "error.h"
 #include "show.h"
 #include "mystring.h"
 #ifndef NO_HISTORY
 #include "myhistedit.h"
 #endif
 
 
 int evalskip;			/* set if we are skipping commands */
 int skipcount;			/* number of levels to skip */
 static int loopnest;		/* current loop nesting level */
 int funcnest;			/* depth of function calls */
 static int builtin_flags;	/* evalcommand flags for builtins */
 
 
 char *commandname;
 struct arglist *cmdenviron;
 int exitstatus;			/* exit status of last command */
 int oexitstatus;		/* saved exit status */
 
 
 static void evalloop(union node *, int);
 static void evalfor(union node *, int);
 static union node *evalcase(union node *);
 static void evalsubshell(union node *, int);
 static void evalredir(union node *, int);
 static void exphere(union node *, struct arglist *);
 static void expredir(union node *);
 static void evalpipe(union node *);
 static int is_valid_fast_cmdsubst(union node *n);
 static void evalcommand(union node *, int, struct backcmd *);
 static void prehash(union node *);
 
 
 /*
  * Called to reset things after an exception.
  */
 
 void
 reseteval(void)
 {
 	evalskip = 0;
 	loopnest = 0;
 }
 
 
 /*
  * The eval command.
  */
 
 int
 evalcmd(int argc, char **argv)
 {
         char *p;
         char *concat;
         char **ap;
 
         if (argc > 1) {
                 p = argv[1];
                 if (argc > 2) {
                         STARTSTACKSTR(concat);
                         ap = argv + 2;
                         for (;;) {
                                 STPUTS(p, concat);
                                 if ((p = *ap++) == NULL)
                                         break;
                                 STPUTC(' ', concat);
                         }
                         STPUTC('\0', concat);
                         p = grabstackstr(concat);
                 }
                 evalstring(p, builtin_flags);
         } else
                 exitstatus = 0;
         return exitstatus;
 }
 
 
 /*
  * Execute a command or commands contained in a string.
  */
 
 void
 evalstring(const char *s, int flags)
 {
 	union node *n;
 	struct stackmark smark;
 	int flags_exit;
 	int any;
 
 	flags_exit = flags & EV_EXIT;
 	flags &= ~EV_EXIT;
 	any = 0;
 	setstackmark(&smark);
 	setinputstring(s, 1);
 	while ((n = parsecmd(0)) != NEOF) {
 		if (n != NULL && !nflag) {
 			if (flags_exit && preadateof())
 				evaltree(n, flags | EV_EXIT);
 			else
 				evaltree(n, flags);
 			any = 1;
 			if (evalskip)
 				break;
 		}
 		popstackmark(&smark);
 		setstackmark(&smark);
 	}
 	popfile();
 	popstackmark(&smark);
 	if (!any)
 		exitstatus = 0;
 	if (flags_exit)
 		exraise(EXEXIT);
 }
 
 
 /*
  * Evaluate a parse tree.  The value is left in the global variable
  * exitstatus.
  */
 
 void
 evaltree(union node *n, int flags)
 {
 	int do_etest;
 	union node *next;
 	struct stackmark smark;
 
 	setstackmark(&smark);
 	do_etest = 0;
 	if (n == NULL) {
 		TRACE(("evaltree(NULL) called\n"));
 		exitstatus = 0;
 		goto out;
 	}
 	do {
 		next = NULL;
 #ifndef NO_HISTORY
 		displayhist = 1;	/* show history substitutions done with fc */
 #endif
 		TRACE(("evaltree(%p: %d) called\n", (void *)n, n->type));
 		switch (n->type) {
 		case NSEMI:
 			evaltree(n->nbinary.ch1, flags & ~EV_EXIT);
 			if (evalskip)
 				goto out;
 			next = n->nbinary.ch2;
 			break;
 		case NAND:
 			evaltree(n->nbinary.ch1, EV_TESTED);
 			if (evalskip || exitstatus != 0) {
 				goto out;
 			}
 			next = n->nbinary.ch2;
 			break;
 		case NOR:
 			evaltree(n->nbinary.ch1, EV_TESTED);
 			if (evalskip || exitstatus == 0)
 				goto out;
 			next = n->nbinary.ch2;
 			break;
 		case NREDIR:
 			evalredir(n, flags);
 			break;
 		case NSUBSHELL:
 			evalsubshell(n, flags);
 			do_etest = !(flags & EV_TESTED);
 			break;
 		case NBACKGND:
 			evalsubshell(n, flags);
 			break;
 		case NIF: {
 			evaltree(n->nif.test, EV_TESTED);
 			if (evalskip)
 				goto out;
 			if (exitstatus == 0)
 				next = n->nif.ifpart;
 			else if (n->nif.elsepart)
 				next = n->nif.elsepart;
 			else
 				exitstatus = 0;
 			break;
 		}
 		case NWHILE:
 		case NUNTIL:
 			evalloop(n, flags & ~EV_EXIT);
 			break;
 		case NFOR:
 			evalfor(n, flags & ~EV_EXIT);
 			break;
 		case NCASE:
 			next = evalcase(n);
 			break;
 		case NCLIST:
 			next = n->nclist.body;
 			break;
 		case NCLISTFALLTHRU:
 			if (n->nclist.body) {
 				evaltree(n->nclist.body, flags & ~EV_EXIT);
 				if (evalskip)
 					goto out;
 			}
 			next = n->nclist.next;
 			break;
 		case NDEFUN:
 			defun(n->narg.text, n->narg.next);
 			exitstatus = 0;
 			break;
 		case NNOT:
 			evaltree(n->nnot.com, EV_TESTED);
 			if (evalskip)
 				goto out;
 			exitstatus = !exitstatus;
 			break;
 
 		case NPIPE:
 			evalpipe(n);
 			do_etest = !(flags & EV_TESTED);
 			break;
 		case NCMD:
 			evalcommand(n, flags, (struct backcmd *)NULL);
 			do_etest = !(flags & EV_TESTED);
 			break;
 		default:
 			out1fmt("Node type = %d\n", n->type);
 			flushout(&output);
 			break;
 		}
 		n = next;
 		popstackmark(&smark);
 		setstackmark(&smark);
 	} while (n != NULL);
 out:
 	popstackmark(&smark);
 	if (pendingsig)
 		dotrap();
 	if (eflag && exitstatus != 0 && do_etest)
 		exitshell(exitstatus);
 	if (flags & EV_EXIT)
 		exraise(EXEXIT);
 }
 
 
 static void
 evalloop(union node *n, int flags)
 {
 	int status;
 
 	loopnest++;
 	status = 0;
 	for (;;) {
 		if (!evalskip)
 			evaltree(n->nbinary.ch1, EV_TESTED);
 		if (evalskip) {
 			if (evalskip == SKIPCONT && --skipcount <= 0) {
 				evalskip = 0;
 				continue;
 			}
 			if (evalskip == SKIPBREAK && --skipcount <= 0)
 				evalskip = 0;
 			if (evalskip == SKIPRETURN)
 				status = exitstatus;
 			break;
 		}
 		if (n->type == NWHILE) {
 			if (exitstatus != 0)
 				break;
 		} else {
 			if (exitstatus == 0)
 				break;
 		}
 		evaltree(n->nbinary.ch2, flags);
 		status = exitstatus;
 	}
 	loopnest--;
 	exitstatus = status;
 }
 
 
 
 static void
 evalfor(union node *n, int flags)
 {
 	struct arglist arglist;
 	union node *argp;
 	int i;
 	int status;
 
 	emptyarglist(&arglist);
 	for (argp = n->nfor.args ; argp ; argp = argp->narg.next) {
 		oexitstatus = exitstatus;
 		expandarg(argp, &arglist, EXP_FULL | EXP_TILDE);
 	}
 
 	loopnest++;
 	status = 0;
 	for (i = 0; i < arglist.count; i++) {
 		setvar(n->nfor.var, arglist.args[i], 0);
 		evaltree(n->nfor.body, flags);
 		status = exitstatus;
 		if (evalskip) {
 			if (evalskip == SKIPCONT && --skipcount <= 0) {
 				evalskip = 0;
 				continue;
 			}
 			if (evalskip == SKIPBREAK && --skipcount <= 0)
 				evalskip = 0;
 			break;
 		}
 	}
 	loopnest--;
 	exitstatus = status;
 }
 
 
 /*
  * Evaluate a case statement, returning the selected tree.
  *
  * The exit status needs care to get right.
  */
 
 static union node *
 evalcase(union node *n)
 {
 	union node *cp;
 	union node *patp;
 	struct arglist arglist;
 
 	emptyarglist(&arglist);
 	oexitstatus = exitstatus;
 	expandarg(n->ncase.expr, &arglist, EXP_TILDE);
 	for (cp = n->ncase.cases ; cp ; cp = cp->nclist.next) {
 		for (patp = cp->nclist.pattern ; patp ; patp = patp->narg.next) {
 			if (casematch(patp, arglist.args[0])) {
 				while (cp->nclist.next &&
 				    cp->type == NCLISTFALLTHRU &&
 				    cp->nclist.body == NULL)
 					cp = cp->nclist.next;
 				if (cp->nclist.next &&
 				    cp->type == NCLISTFALLTHRU)
 					return (cp);
 				if (cp->nclist.body == NULL)
 					exitstatus = 0;
 				return (cp->nclist.body);
 			}
 		}
 	}
 	exitstatus = 0;
 	return (NULL);
 }
 
 
 
 /*
  * Kick off a subshell to evaluate a tree.
  */
 
 static void
 evalsubshell(union node *n, int flags)
 {
 	struct job *jp;
 	int backgnd = (n->type == NBACKGND);
 
 	oexitstatus = exitstatus;
 	expredir(n->nredir.redirect);
 	if ((!backgnd && flags & EV_EXIT && !have_traps()) ||
 			forkshell(jp = makejob(n, 1), n, backgnd) == 0) {
 		if (backgnd)
 			flags &=~ EV_TESTED;
 		redirect(n->nredir.redirect, 0);
 		evaltree(n->nredir.n, flags | EV_EXIT);	/* never returns */
 	} else if (! backgnd) {
 		INTOFF;
 		exitstatus = waitforjob(jp, (int *)NULL);
 		INTON;
 	} else
 		exitstatus = 0;
 }
 
 
 /*
  * Evaluate a redirected compound command.
  */
 
 static void
 evalredir(union node *n, int flags)
 {
 	struct jmploc jmploc;
 	struct jmploc *savehandler;
 	volatile int in_redirect = 1;
 
 	oexitstatus = exitstatus;
 	expredir(n->nredir.redirect);
 	savehandler = handler;
 	if (setjmp(jmploc.loc)) {
 		int e;
 
 		handler = savehandler;
 		e = exception;
 		popredir();
 		if (e == EXERROR || e == EXEXEC) {
 			if (in_redirect) {
 				exitstatus = 2;
 				return;
 			}
 		}
 		longjmp(handler->loc, 1);
 	} else {
 		INTOFF;
 		handler = &jmploc;
 		redirect(n->nredir.redirect, REDIR_PUSH);
 		in_redirect = 0;
 		INTON;
 		evaltree(n->nredir.n, flags);
 	}
 	INTOFF;
 	handler = savehandler;
 	popredir();
 	INTON;
 }
 
 
 static void
 exphere(union node *redir, struct arglist *fn)
 {
 	struct jmploc jmploc;
 	struct jmploc *savehandler;
 	struct localvar *savelocalvars;
 	int need_longjmp = 0;
 	unsigned char saveoptreset;
 
 	redir->nhere.expdoc = "";
 	savelocalvars = localvars;
 	localvars = NULL;
 	saveoptreset = shellparam.reset;
 	forcelocal++;
 	savehandler = handler;
 	if (setjmp(jmploc.loc))
 		need_longjmp = exception != EXERROR && exception != EXEXEC;
 	else {
 		handler = &jmploc;
 		expandarg(redir->nhere.doc, fn, 0);
 		redir->nhere.expdoc = fn->args[0];
 		INTOFF;
 	}
 	handler = savehandler;
 	forcelocal--;
 	poplocalvars();
 	localvars = savelocalvars;
 	shellparam.reset = saveoptreset;
 	if (need_longjmp)
 		longjmp(handler->loc, 1);
 	INTON;
 }
 
 
 /*
  * Compute the names of the files in a redirection list.
  */
 
 static void
 expredir(union node *n)
 {
 	union node *redir;
 
 	for (redir = n ; redir ; redir = redir->nfile.next) {
 		struct arglist fn;
 		emptyarglist(&fn);
 		switch (redir->type) {
 		case NFROM:
 		case NTO:
 		case NFROMTO:
 		case NAPPEND:
 		case NCLOBBER:
 			expandarg(redir->nfile.fname, &fn, EXP_TILDE);
 			redir->nfile.expfname = fn.args[0];
 			break;
 		case NFROMFD:
 		case NTOFD:
 			if (redir->ndup.vname) {
 				expandarg(redir->ndup.vname, &fn, EXP_TILDE);
 				fixredir(redir, fn.args[0], 1);
 			}
 			break;
 		case NXHERE:
 			exphere(redir, &fn);
 			break;
 		}
 	}
 }
 
 
 
 /*
  * Evaluate a pipeline.  All the processes in the pipeline are children
  * of the process creating the pipeline.  (This differs from some versions
  * of the shell, which make the last process in a pipeline the parent
  * of all the rest.)
  */
 
 static void
 evalpipe(union node *n)
 {
 	struct job *jp;
 	struct nodelist *lp;
 	int pipelen;
 	int prevfd;
 	int pip[2];
 
 	TRACE(("evalpipe(%p) called\n", (void *)n));
 	pipelen = 0;
 	for (lp = n->npipe.cmdlist ; lp ; lp = lp->next)
 		pipelen++;
 	INTOFF;
 	jp = makejob(n, pipelen);
 	prevfd = -1;
 	for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) {
 		prehash(lp->n);
 		pip[1] = -1;
 		if (lp->next) {
 			if (pipe(pip) < 0) {
 				if (prevfd >= 0)
 					close(prevfd);
 				error("Pipe call failed: %s", strerror(errno));
 			}
 		}
 		if (forkshell(jp, lp->n, n->npipe.backgnd) == 0) {
 			INTON;
 			if (prevfd > 0) {
 				dup2(prevfd, 0);
 				close(prevfd);
 			}
 			if (pip[1] >= 0) {
 				if (!(prevfd >= 0 && pip[0] == 0))
 					close(pip[0]);
 				if (pip[1] != 1) {
 					dup2(pip[1], 1);
 					close(pip[1]);
 				}
 			}
 			evaltree(lp->n, EV_EXIT);
 		}
 		if (prevfd >= 0)
 			close(prevfd);
 		prevfd = pip[0];
 		if (pip[1] != -1)
 			close(pip[1]);
 	}
 	INTON;
 	if (n->npipe.backgnd == 0) {
 		INTOFF;
 		exitstatus = waitforjob(jp, (int *)NULL);
 		TRACE(("evalpipe:  job done exit status %d\n", exitstatus));
 		INTON;
 	} else
 		exitstatus = 0;
 }
 
 
 
 static int
 is_valid_fast_cmdsubst(union node *n)
 {
 
 	return (n->type == NCMD);
 }
 
 /*
  * Execute a command inside back quotes.  If it's a builtin command, we
  * want to save its output in a block obtained from malloc.  Otherwise
  * we fork off a subprocess and get the output of the command via a pipe.
  * Should be called with interrupts off.
  */
 
 void
 evalbackcmd(union node *n, struct backcmd *result)
 {
 	int pip[2];
 	struct job *jp;
 	struct stackmark smark;
 	struct jmploc jmploc;
 	struct jmploc *savehandler;
 	struct localvar *savelocalvars;
 	unsigned char saveoptreset;
 
 	result->fd = -1;
 	result->buf = NULL;
 	result->nleft = 0;
 	result->jp = NULL;
 	if (n == NULL) {
 		exitstatus = 0;
 		return;
 	}
 	setstackmark(&smark);
 	exitstatus = oexitstatus;
 	if (is_valid_fast_cmdsubst(n)) {
 		savelocalvars = localvars;
 		localvars = NULL;
 		saveoptreset = shellparam.reset;
 		forcelocal++;
 		savehandler = handler;
 		if (setjmp(jmploc.loc)) {
 			if (exception == EXERROR || exception == EXEXEC)
 				exitstatus = 2;
 			else if (exception != 0) {
 				handler = savehandler;
 				forcelocal--;
 				poplocalvars();
 				localvars = savelocalvars;
 				shellparam.reset = saveoptreset;
 				longjmp(handler->loc, 1);
 			}
 		} else {
 			handler = &jmploc;
 			evalcommand(n, EV_BACKCMD, result);
 		}
 		handler = savehandler;
 		forcelocal--;
 		poplocalvars();
 		localvars = savelocalvars;
 		shellparam.reset = saveoptreset;
 	} else {
 		if (pipe(pip) < 0)
 			error("Pipe call failed: %s", strerror(errno));
 		jp = makejob(n, 1);
 		if (forkshell(jp, n, FORK_NOJOB) == 0) {
 			FORCEINTON;
 			close(pip[0]);
 			if (pip[1] != 1) {
 				dup2(pip[1], 1);
 				close(pip[1]);
 			}
 			evaltree(n, EV_EXIT);
 		}
 		close(pip[1]);
 		result->fd = pip[0];
 		result->jp = jp;
 	}
 	popstackmark(&smark);
 	TRACE(("evalbackcmd done: fd=%d buf=%p nleft=%d jp=%p\n",
 		result->fd, result->buf, result->nleft, result->jp));
 }
 
 static int
 mustexpandto(const char *argtext, const char *mask)
 {
 	for (;;) {
 		if (*argtext == CTLQUOTEMARK || *argtext == CTLQUOTEEND) {
 			argtext++;
 			continue;
 		}
 		if (*argtext == CTLESC)
 			argtext++;
 		else if (BASESYNTAX[(int)*argtext] == CCTL)
 			return (0);
 		if (*argtext != *mask)
 			return (0);
 		if (*argtext == '\0')
 			return (1);
 		argtext++;
 		mask++;
 	}
 }
 
 static int
 isdeclarationcmd(struct narg *arg)
 {
 	int have_command = 0;
 
 	if (arg == NULL)
 		return (0);
 	while (mustexpandto(arg->text, "command")) {
 		have_command = 1;
 		arg = &arg->next->narg;
 		if (arg == NULL)
 			return (0);
 		/*
 		 * To also allow "command -p" and "command --" as part of
 		 * a declaration command, add code here.
 		 * We do not do this, as ksh does not do it either and it
 		 * is not required by POSIX.
 		 */
 	}
 	return (mustexpandto(arg->text, "export") ||
 	    mustexpandto(arg->text, "readonly") ||
 	    (mustexpandto(arg->text, "local") &&
 		(have_command || !isfunc("local"))));
 }
 
 static void
 xtracecommand(struct arglist *varlist, int argc, char **argv)
 {
 	char sep = 0;
 	const char *text, *p, *ps4;
 	int i;
 
 	ps4 = expandstr(ps4val());
 	out2str(ps4 != NULL ? ps4 : ps4val());
 	for (i = 0; i < varlist->count; i++) {
 		text = varlist->args[i];
 		if (sep != 0)
 			out2c(' ');
 		p = strchr(text, '=');
 		if (p != NULL) {
 			p++;
 			outbin(text, p - text, out2);
 			out2qstr(p);
 		} else
 			out2qstr(text);
 		sep = ' ';
 	}
 	for (i = 0; i < argc; i++) {
 		text = argv[i];
 		if (sep != 0)
 			out2c(' ');
 		out2qstr(text);
 		sep = ' ';
 	}
 	out2c('\n');
 	flushout(&errout);
 }
 
 /*
  * Check if a builtin can safely be executed in the same process,
  * even though it should be in a subshell (command substitution).
  * Note that jobid, jobs, times and trap can show information not
  * available in a child process; this is deliberate.
  * The arguments should already have been expanded.
  */
 static int
 safe_builtin(int idx, int argc, char **argv)
 {
 	if (idx == BLTINCMD || idx == COMMANDCMD || idx == ECHOCMD ||
 	    idx == FALSECMD || idx == JOBIDCMD || idx == JOBSCMD ||
 	    idx == KILLCMD || idx == PRINTFCMD || idx == PWDCMD ||
 	    idx == TESTCMD || idx == TIMESCMD || idx == TRUECMD ||
 	    idx == TYPECMD)
 		return (1);
 	if (idx == EXPORTCMD || idx == TRAPCMD || idx == ULIMITCMD ||
 	    idx == UMASKCMD)
 		return (argc <= 1 || (argc == 2 && argv[1][0] == '-'));
 	if (idx == SETCMD)
 		return (argc <= 1 || (argc == 2 && (argv[1][0] == '-' ||
 		    argv[1][0] == '+') && argv[1][1] == 'o' &&
 		    argv[1][2] == '\0'));
 	return (0);
 }
 
 /*
  * Execute a simple command.
  * Note: This may or may not return if (flags & EV_EXIT).
  */
 
 static void
 evalcommand(union node *cmd, int flags, struct backcmd *backcmd)
 {
 	union node *argp;
 	struct arglist arglist;
 	struct arglist varlist;
 	char **argv;
 	int argc;
 	char **envp;
 	int varflag;
 	int mode;
 	int pip[2];
 	struct cmdentry cmdentry;
 	struct job *jp;
 	struct jmploc jmploc;
 	struct jmploc *savehandler;
 	char *savecmdname;
 	struct shparam saveparam;
 	struct localvar *savelocalvars;
 	struct parsefile *savetopfile;
 	volatile int e;
 	char *lastarg;
 	int realstatus;
 	int do_clearcmdentry;
 	const char *path = pathval();
 	int i;
 
 	/* First expand the arguments. */
 	TRACE(("evalcommand(%p, %d) called\n", (void *)cmd, flags));
 	emptyarglist(&arglist);
 	emptyarglist(&varlist);
 	varflag = 1;
 	jp = NULL;
 	do_clearcmdentry = 0;
 	oexitstatus = exitstatus;
 	exitstatus = 0;
 	/* Add one slot at the beginning for tryexec(). */
 	appendarglist(&arglist, nullstr);
 	for (argp = cmd->ncmd.args ; argp ; argp = argp->narg.next) {
 		if (varflag && isassignment(argp->narg.text)) {
 			expandarg(argp, varflag == 1 ? &varlist : &arglist,
 			    EXP_VARTILDE);
 			continue;
 		} else if (varflag == 1)
 			varflag = isdeclarationcmd(&argp->narg) ? 2 : 0;
 		expandarg(argp, &arglist, EXP_FULL | EXP_TILDE);
 	}
 	appendarglist(&arglist, nullstr);
 	expredir(cmd->ncmd.redirect);
 	argc = arglist.count - 2;
 	argv = &arglist.args[1];
 
 	argv[argc] = NULL;
 	lastarg = NULL;
 	if (iflag && funcnest == 0 && argc > 0)
 		lastarg = argv[argc - 1];
 
 	/* Print the command if xflag is set. */
 	if (xflag)
 		xtracecommand(&varlist, argc, argv);
 
 	/* Now locate the command. */
 	if (argc == 0) {
 		/* Variable assignment(s) without command */
 		cmdentry.cmdtype = CMDBUILTIN;
 		cmdentry.u.index = BLTINCMD;
 		cmdentry.special = 0;
 	} else {
 		static const char PATH[] = "PATH=";
 		int cmd_flags = 0, bltinonly = 0;
 
 		/*
 		 * Modify the command lookup path, if a PATH= assignment
 		 * is present
 		 */
 		for (i = 0; i < varlist.count; i++)
 			if (strncmp(varlist.args[i], PATH, sizeof(PATH) - 1) == 0) {
 				path = varlist.args[i] + sizeof(PATH) - 1;
 				/*
 				 * On `PATH=... command`, we need to make
 				 * sure that the command isn't using the
 				 * non-updated hash table of the outer PATH
 				 * setting and we need to make sure that
 				 * the hash table isn't filled with items
 				 * from the temporary setting.
 				 *
 				 * It would be better to forbit using and
 				 * updating the table while this command
 				 * runs, by the command finding mechanism
 				 * is heavily integrated with hash handling,
 				 * so we just delete the hash before and after
 				 * the command runs. Partly deleting like
 				 * changepatch() does doesn't seem worth the
 				 * bookinging effort, since most such runs add
 				 * directories in front of the new PATH.
 				 */
 				clearcmdentry();
 				do_clearcmdentry = 1;
 			}
 
 		for (;;) {
 			if (bltinonly) {
 				cmdentry.u.index = find_builtin(*argv, &cmdentry.special);
 				if (cmdentry.u.index < 0) {
 					cmdentry.u.index = BLTINCMD;
 					argv--;
 					argc++;
 					break;
 				}
 			} else
 				find_command(argv[0], &cmdentry, cmd_flags, path);
 			/* implement the bltin and command builtins here */
 			if (cmdentry.cmdtype != CMDBUILTIN)
 				break;
 			if (cmdentry.u.index == BLTINCMD) {
 				if (argc == 1)
 					break;
 				argv++;
 				argc--;
 				bltinonly = 1;
 			} else if (cmdentry.u.index == COMMANDCMD) {
 				if (argc == 1)
 					break;
 				if (!strcmp(argv[1], "-p")) {
 					if (argc == 2)
 						break;
 					if (argv[2][0] == '-') {
 						if (strcmp(argv[2], "--"))
 							break;
 						if (argc == 3)
 							break;
 						argv += 3;
 						argc -= 3;
 					} else {
 						argv += 2;
 						argc -= 2;
 					}
 					path = _PATH_STDPATH;
 					clearcmdentry();
 					do_clearcmdentry = 1;
 				} else if (!strcmp(argv[1], "--")) {
 					if (argc == 2)
 						break;
 					argv += 2;
 					argc -= 2;
 				} else if (argv[1][0] == '-')
 					break;
 				else {
 					argv++;
 					argc--;
 				}
 				cmd_flags |= DO_NOFUNC;
 				bltinonly = 0;
 			} else
 				break;
 		}
 		/*
 		 * Special builtins lose their special properties when
 		 * called via 'command'.
 		 */
 		if (cmd_flags & DO_NOFUNC)
 			cmdentry.special = 0;
 	}
 
 	/* Fork off a child process if necessary. */
 	if (((cmdentry.cmdtype == CMDNORMAL || cmdentry.cmdtype == CMDUNKNOWN)
 	    && ((flags & EV_EXIT) == 0 || have_traps()))
 	 || ((flags & EV_BACKCMD) != 0
 	    && (cmdentry.cmdtype != CMDBUILTIN ||
 		 !safe_builtin(cmdentry.u.index, argc, argv)))) {
 		jp = makejob(cmd, 1);
 		mode = FORK_FG;
 		if (flags & EV_BACKCMD) {
 			mode = FORK_NOJOB;
 			if (pipe(pip) < 0)
 				error("Pipe call failed: %s", strerror(errno));
 		}
 		if (cmdentry.cmdtype == CMDNORMAL &&
 		    cmd->ncmd.redirect == NULL &&
 		    varlist.count == 0 &&
 		    (mode == FORK_FG || mode == FORK_NOJOB) &&
 		    !disvforkset() && !iflag && !mflag) {
 			vforkexecshell(jp, argv, environment(), path,
 			    cmdentry.u.index, flags & EV_BACKCMD ? pip : NULL);
 			goto parent;
 		}
 		if (forkshell(jp, cmd, mode) != 0)
 			goto parent;	/* at end of routine */
 		if (flags & EV_BACKCMD) {
 			FORCEINTON;
 			close(pip[0]);
 			if (pip[1] != 1) {
 				dup2(pip[1], 1);
 				close(pip[1]);
 			}
 			flags &= ~EV_BACKCMD;
 		}
 		flags |= EV_EXIT;
 	}
 
 	/* This is the child process if a fork occurred. */
 	/* Execute the command. */
 	if (cmdentry.cmdtype == CMDFUNCTION) {
 #ifdef DEBUG
 		trputs("Shell function:  ");  trargs(argv);
 #endif
 		saveparam = shellparam;
 		shellparam.malloc = 0;
 		shellparam.reset = 1;
 		shellparam.nparam = argc - 1;
 		shellparam.p = argv + 1;
 		shellparam.optp = NULL;
 		shellparam.optnext = NULL;
 		INTOFF;
 		savelocalvars = localvars;
 		localvars = NULL;
 		reffunc(cmdentry.u.func);
 		savehandler = handler;
 		if (setjmp(jmploc.loc)) {
-			freeparam(&shellparam);
-			shellparam = saveparam;
 			popredir();
 			unreffunc(cmdentry.u.func);
 			poplocalvars();
 			localvars = savelocalvars;
+			freeparam(&shellparam);
+			shellparam = saveparam;
 			funcnest--;
 			handler = savehandler;
 			longjmp(handler->loc, 1);
 		}
 		handler = &jmploc;
 		funcnest++;
 		redirect(cmd->ncmd.redirect, REDIR_PUSH);
 		INTON;
 		for (i = 0; i < varlist.count; i++)
 			mklocal(varlist.args[i]);
 		exitstatus = oexitstatus;
 		evaltree(getfuncnode(cmdentry.u.func),
 		    flags & (EV_TESTED | EV_EXIT));
 		INTOFF;
 		unreffunc(cmdentry.u.func);
 		poplocalvars();
 		localvars = savelocalvars;
 		freeparam(&shellparam);
 		shellparam = saveparam;
 		handler = savehandler;
 		funcnest--;
 		popredir();
 		INTON;
 		if (evalskip == SKIPRETURN) {
 			evalskip = 0;
 			skipcount = 0;
 		}
 		if (jp)
 			exitshell(exitstatus);
 	} else if (cmdentry.cmdtype == CMDBUILTIN) {
 #ifdef DEBUG
 		trputs("builtin command:  ");  trargs(argv);
 #endif
 		mode = (cmdentry.u.index == EXECCMD)? 0 : REDIR_PUSH;
 		if (flags == EV_BACKCMD) {
 			memout.nleft = 0;
 			memout.nextc = memout.buf;
 			memout.bufsize = 64;
 			mode |= REDIR_BACKQ;
 		}
 		savecmdname = commandname;
 		savetopfile = getcurrentfile();
 		cmdenviron = &varlist;
 		e = -1;
 		savehandler = handler;
 		if (setjmp(jmploc.loc)) {
 			e = exception;
 			if (e == EXINT)
 				exitstatus = SIGINT+128;
 			else if (e != EXEXIT)
 				exitstatus = 2;
 			goto cmddone;
 		}
 		handler = &jmploc;
 		redirect(cmd->ncmd.redirect, mode);
 		outclearerror(out1);
 		/*
 		 * If there is no command word, redirection errors should
 		 * not be fatal but assignment errors should.
 		 */
 		if (argc == 0)
 			cmdentry.special = 1;
 		listsetvar(cmdenviron, cmdentry.special ? 0 : VNOSET);
 		if (argc > 0)
 			bltinsetlocale();
 		commandname = argv[0];
 		argptr = argv + 1;
 		nextopt_optptr = NULL;		/* initialize nextopt */
 		builtin_flags = flags;
 		exitstatus = (*builtinfunc[cmdentry.u.index])(argc, argv);
 		flushall();
 		if (outiserror(out1)) {
 			warning("write error on stdout");
 			if (exitstatus == 0 || exitstatus == 1)
 				exitstatus = 2;
 		}
 cmddone:
 		if (argc > 0)
 			bltinunsetlocale();
 		cmdenviron = NULL;
 		out1 = &output;
 		out2 = &errout;
 		freestdout();
 		handler = savehandler;
 		commandname = savecmdname;
 		if (jp)
 			exitshell(exitstatus);
 		if (flags == EV_BACKCMD) {
 			backcmd->buf = memout.buf;
 			backcmd->nleft = memout.nextc - memout.buf;
 			memout.buf = NULL;
 		}
 		if (cmdentry.u.index != EXECCMD)
 			popredir();
 		if (e != -1) {
 			if ((e != EXERROR && e != EXEXEC)
 			    || cmdentry.special)
 				exraise(e);
 			popfilesupto(savetopfile);
 			if (flags != EV_BACKCMD)
 				FORCEINTON;
 		}
 	} else {
 #ifdef DEBUG
 		trputs("normal command:  ");  trargs(argv);
 #endif
 		redirect(cmd->ncmd.redirect, 0);
 		for (i = 0; i < varlist.count; i++)
 			setvareq(varlist.args[i], VEXPORT|VSTACK);
 		envp = environment();
 		shellexec(argv, envp, path, cmdentry.u.index);
 		/*NOTREACHED*/
 	}
 	goto out;
 
 parent:	/* parent process gets here (if we forked) */
 	if (mode == FORK_FG) {	/* argument to fork */
 		INTOFF;
 		exitstatus = waitforjob(jp, &realstatus);
 		INTON;
 		if (iflag && loopnest > 0 && WIFSIGNALED(realstatus)) {
 			evalskip = SKIPBREAK;
 			skipcount = loopnest;
 		}
 	} else if (mode == FORK_NOJOB) {
 		backcmd->fd = pip[0];
 		close(pip[1]);
 		backcmd->jp = jp;
 	}
 
 out:
 	if (lastarg)
 		setvar("_", lastarg, 0);
 	if (do_clearcmdentry)
 		clearcmdentry();
 }
 
 
 
 /*
  * Search for a command.  This is called before we fork so that the
  * location of the command will be available in the parent as well as
  * the child.  The check for "goodname" is an overly conservative
  * check that the name will not be subject to expansion.
  */
 
 static void
 prehash(union node *n)
 {
 	struct cmdentry entry;
 
 	if (n && n->type == NCMD && n->ncmd.args)
 		if (goodname(n->ncmd.args->narg.text))
 			find_command(n->ncmd.args->narg.text, &entry, 0,
 				     pathval());
 }
 
 
 
 /*
  * Builtin commands.  Builtin commands whose functions are closely
  * tied to evaluation are implemented here.
  */
 
 /*
  * No command given, a bltin command with no arguments, or a bltin command
  * with an invalid name.
  */
 
 int
 bltincmd(int argc, char **argv)
 {
 	if (argc > 1) {
 		out2fmt_flush("%s: not found\n", argv[1]);
 		return 127;
 	}
 	/*
 	 * Preserve exitstatus of a previous possible redirection
 	 * as POSIX mandates
 	 */
 	return exitstatus;
 }
 
 
 /*
  * Handle break and continue commands.  Break, continue, and return are
  * all handled by setting the evalskip flag.  The evaluation routines
  * above all check this flag, and if it is set they start skipping
  * commands rather than executing them.  The variable skipcount is
  * the number of loops to break/continue, or the number of function
  * levels to return.  (The latter is always 1.)  It should probably
  * be an error to break out of more loops than exist, but it isn't
  * in the standard shell so we don't make it one here.
  */
 
 int
 breakcmd(int argc, char **argv)
 {
 	long n;
 	char *end;
 
 	if (argc > 1) {
 		/* Allow arbitrarily large numbers. */
 		n = strtol(argv[1], &end, 10);
 		if (!is_digit(argv[1][0]) || *end != '\0')
 			error("Illegal number: %s", argv[1]);
 	} else
 		n = 1;
 	if (n > loopnest)
 		n = loopnest;
 	if (n > 0) {
 		evalskip = (**argv == 'c')? SKIPCONT : SKIPBREAK;
 		skipcount = n;
 	}
 	return 0;
 }
 
 /*
  * The `command' command.
  */
 int
 commandcmd(int argc __unused, char **argv __unused)
 {
 	const char *path;
 	int ch;
 	int cmd = -1;
 
 	path = bltinlookup("PATH", 1);
 
 	while ((ch = nextopt("pvV")) != '\0') {
 		switch (ch) {
 		case 'p':
 			path = _PATH_STDPATH;
 			break;
 		case 'v':
 			cmd = TYPECMD_SMALLV;
 			break;
 		case 'V':
 			cmd = TYPECMD_BIGV;
 			break;
 		}
 	}
 
 	if (cmd != -1) {
 		if (*argptr == NULL || argptr[1] != NULL)
 			error("wrong number of arguments");
 		return typecmd_impl(2, argptr - 1, cmd, path);
 	}
 	if (*argptr != NULL)
 		error("commandcmd bad call");
 
 	/*
 	 * Do nothing successfully if no command was specified;
 	 * ksh also does this.
 	 */
 	return 0;
 }
 
 
 /*
  * The return command.
  */
 
 int
 returncmd(int argc, char **argv)
 {
 	int ret = argc > 1 ? number(argv[1]) : oexitstatus;
 
 	evalskip = SKIPRETURN;
 	skipcount = 1;
 	return ret;
 }
 
 
 int
 falsecmd(int argc __unused, char **argv __unused)
 {
 	return 1;
 }
 
 
 int
 truecmd(int argc __unused, char **argv __unused)
 {
 	return 0;
 }
 
 
 int
 execcmd(int argc, char **argv)
 {
 	int i;
 
 	/*
 	 * Because we have historically not supported any options,
 	 * only treat "--" specially.
 	 */
 	if (argc > 1 && strcmp(argv[1], "--") == 0)
 		argc--, argv++;
 	if (argc > 1) {
 		iflag = 0;		/* exit on error */
 		mflag = 0;
 		optschanged();
 		for (i = 0; i < cmdenviron->count; i++)
 			setvareq(cmdenviron->args[i], VEXPORT|VSTACK);
 		shellexec(argv + 1, environment(), pathval(), 0);
 
 	}
 	return 0;
 }
 
 
 int
 timescmd(int argc __unused, char **argv __unused)
 {
 	struct rusage ru;
 	long shumins, shsmins, chumins, chsmins;
 	double shusecs, shssecs, chusecs, chssecs;
 
 	if (getrusage(RUSAGE_SELF, &ru) < 0)
 		return 1;
 	shumins = ru.ru_utime.tv_sec / 60;
 	shusecs = ru.ru_utime.tv_sec % 60 + ru.ru_utime.tv_usec / 1000000.;
 	shsmins = ru.ru_stime.tv_sec / 60;
 	shssecs = ru.ru_stime.tv_sec % 60 + ru.ru_stime.tv_usec / 1000000.;
 	if (getrusage(RUSAGE_CHILDREN, &ru) < 0)
 		return 1;
 	chumins = ru.ru_utime.tv_sec / 60;
 	chusecs = ru.ru_utime.tv_sec % 60 + ru.ru_utime.tv_usec / 1000000.;
 	chsmins = ru.ru_stime.tv_sec / 60;
 	chssecs = ru.ru_stime.tv_sec % 60 + ru.ru_stime.tv_usec / 1000000.;
 	out1fmt("%ldm%.3fs %ldm%.3fs\n%ldm%.3fs %ldm%.3fs\n", shumins,
 	    shusecs, shsmins, shssecs, chumins, chusecs, chsmins, chssecs);
 	return 0;
 }
Index: projects/clang380-import/bin/sh/tests/builtins/Makefile
===================================================================
--- projects/clang380-import/bin/sh/tests/builtins/Makefile	(revision 293686)
+++ projects/clang380-import/bin/sh/tests/builtins/Makefile	(revision 293687)
@@ -1,173 +1,174 @@
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 TESTSDIR=	${TESTSBASE}/bin/sh/${.CURDIR:T}
 
 .PATH: ${.CURDIR:H}
 ATF_TESTS_SH=	functional_test
 
 FILESDIR=	${TESTSDIR}
 
 FILES=		alias.0 alias.0.stdout
 FILES+=		alias.1 alias.1.stderr
 FILES+=		alias3.0 alias3.0.stdout
 FILES+=		alias4.0
 FILES+=		break1.0
 FILES+=		break2.0 break2.0.stdout
 FILES+=		break3.0
 FILES+=		break4.4
 FILES+=		break5.4
 FILES+=		break6.0
 FILES+=		builtin1.0
 FILES+=		case1.0
 FILES+=		case2.0
 FILES+=		case3.0
 FILES+=		case4.0
 FILES+=		case5.0
 FILES+=		case6.0
 FILES+=		case7.0
 FILES+=		case8.0
 FILES+=		case9.0
 FILES+=		case10.0
 FILES+=		case11.0
 FILES+=		case12.0
 FILES+=		case13.0
 FILES+=		case14.0
 FILES+=		case15.0
 FILES+=		case16.0
 FILES+=		case17.0
 FILES+=		case18.0
 FILES+=		case19.0
 FILES+=		case20.0
 FILES+=		cd1.0
 FILES+=		cd2.0
 FILES+=		cd3.0
 FILES+=		cd4.0
 FILES+=		cd5.0
 FILES+=		cd6.0
 FILES+=		cd7.0
 FILES+=		cd8.0
 FILES+=		cd9.0 cd9.0.stdout
 FILES+=		command1.0
 FILES+=		command2.0
 FILES+=		command3.0
 FILES+=		command3.0.stdout
 FILES+=		command4.0
 FILES+=		command5.0
 FILES+=		command5.0.stdout
 FILES+=		command6.0
 FILES+=		command6.0.stdout
 FILES+=		command7.0
 FILES+=		command8.0
 FILES+=		command9.0
 FILES+=		command10.0
 FILES+=		command11.0
 FILES+=		command12.0
 FILES+=		dot1.0
 FILES+=		dot2.0
 FILES+=		dot3.0
 FILES+=		dot4.0
 FILES+=		eval1.0
 FILES+=		eval2.0
 FILES+=		eval3.0
 FILES+=		eval4.0
 FILES+=		eval5.0
 FILES+=		eval6.0
 FILES+=		eval7.0
 FILES+=		eval8.7
 FILES+=		exec1.0
 FILES+=		exec2.0
 FILES+=		exit1.0
 FILES+=		exit2.8
 FILES+=		exit3.0
 FILES+=		export1.0
 FILES+=		fc1.0
 FILES+=		fc2.0
 FILES+=		for1.0
 FILES+=		for2.0
 FILES+=		for3.0
 FILES+=		getopts1.0 getopts1.0.stdout
 FILES+=		getopts2.0 getopts2.0.stdout
 FILES+=		getopts3.0
 FILES+=		getopts4.0
 FILES+=		getopts5.0
 FILES+=		getopts6.0
 FILES+=		getopts7.0
 FILES+=		getopts8.0 getopts8.0.stdout
 FILES+=		getopts9.0 getopts9.0.stdout
 FILES+=		getopts10.0
 FILES+=		hash1.0 hash1.0.stdout
 FILES+=		hash2.0 hash2.0.stdout
 FILES+=		hash3.0 hash3.0.stdout
 FILES+=		hash4.0
 FILES+=		jobid1.0
 FILES+=		jobid2.0
 FILES+=		kill1.0 kill2.0
 FILES+=		lineno.0 lineno.0.stdout
 FILES+=		lineno2.0
 FILES+=		lineno3.0 lineno3.0.stdout
 FILES+=		local1.0
 FILES+=		local2.0
 FILES+=		local3.0
 FILES+=		local4.0
+FILES+=		local5.0
 .if ${MK_NLS} != "no"
 FILES+=		locale1.0
 .endif
 FILES+=		printf1.0
 FILES+=		printf2.0
 FILES+=		printf3.0
 FILES+=		printf4.0
 FILES+=		read1.0 read1.0.stdout
 FILES+=		read2.0
 FILES+=		read3.0 read3.0.stdout
 FILES+=		read4.0 read4.0.stdout
 FILES+=		read5.0
 FILES+=		read6.0
 FILES+=		read7.0
 FILES+=		read8.0
 FILES+=		read9.0
 FILES+=		return1.0
 FILES+=		return2.1
 FILES+=		return3.1
 FILES+=		return4.0
 FILES+=		return5.0
 FILES+=		return6.4
 FILES+=		return7.4
 FILES+=		return8.0
 FILES+=		set1.0
 FILES+=		set2.0
 FILES+=		trap1.0
 FILES+=		trap10.0
 FILES+=		trap11.0
 FILES+=		trap12.0
 FILES+=		trap13.0
 FILES+=		trap14.0
 FILES+=		trap15.0
 FILES+=		trap16.0
 FILES+=		trap2.0
 FILES+=		trap3.0
 FILES+=		trap4.0
 FILES+=		trap5.0
 FILES+=		trap6.0
 FILES+=		trap7.0
 FILES+=		trap8.0
 FILES+=		trap9.0
 FILES+=		type1.0 type1.0.stderr
 FILES+=		type2.0
 FILES+=		type3.0
 FILES+=		unalias.0
 FILES+=		var-assign.0
 FILES+=		var-assign2.0
 FILES+=		wait1.0
 FILES+=		wait2.0
 FILES+=		wait3.0
 FILES+=		wait4.0
 FILES+=		wait5.0
 FILES+=		wait6.0
 FILES+=		wait7.0
 FILES+=		wait8.0
 FILES+=		wait9.127
 FILES+=		wait10.0
 
 .include <bsd.test.mk>
Index: projects/clang380-import/bin/sh/tests/builtins/local5.0
===================================================================
--- projects/clang380-import/bin/sh/tests/builtins/local5.0	(nonexistent)
+++ projects/clang380-import/bin/sh/tests/builtins/local5.0	(revision 293687)
@@ -0,0 +1,15 @@
+# $FreeBSD$
+
+f() {
+	local PATH IFS elem
+	IFS=:
+	for elem in ''$PATH''; do
+		PATH=/var/empty/$elem:$PATH
+	done
+	ls -d / >/dev/null
+}
+
+p1=$(command -v ls)
+f
+p2=$(command -v ls)
+[ "$p1" = "$p2" ]

Property changes on: projects/clang380-import/bin/sh/tests/builtins/local5.0
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: projects/clang380-import/bin/sh/var.c
===================================================================
--- projects/clang380-import/bin/sh/var.c	(revision 293686)
+++ projects/clang380-import/bin/sh/var.c	(revision 293687)
@@ -1,954 +1,965 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Kenneth Almquist.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)var.c	8.3 (Berkeley) 5/4/95";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <unistd.h>
 #include <stdlib.h>
 #include <paths.h>
 
 /*
  * Shell variables.
  */
 
 #include <locale.h>
 #include <langinfo.h>
 
 #include "shell.h"
 #include "output.h"
 #include "expand.h"
 #include "nodes.h"	/* for other headers */
 #include "eval.h"	/* defines cmdenviron */
 #include "exec.h"
 #include "syntax.h"
 #include "options.h"
 #include "mail.h"
 #include "var.h"
 #include "memalloc.h"
 #include "error.h"
 #include "mystring.h"
 #include "parser.h"
 #include "builtins.h"
 #ifndef NO_HISTORY
 #include "myhistedit.h"
 #endif
 
 
 #define VTABSIZE 39
 
 
 struct varinit {
 	struct var *var;
 	int flags;
 	const char *text;
 	void (*func)(const char *);
 };
 
 
 #ifndef NO_HISTORY
 struct var vhistsize;
 struct var vterm;
 #endif
 struct var vifs;
 struct var vmail;
 struct var vmpath;
 struct var vpath;
 struct var vps1;
 struct var vps2;
 struct var vps4;
 static struct var voptind;
 struct var vdisvfork;
 
 struct localvar *localvars;
 int forcelocal;
 
 static const struct varinit varinit[] = {
 #ifndef NO_HISTORY
 	{ &vhistsize,	VUNSET,				"HISTSIZE=",
 	  sethistsize },
 #endif
 	{ &vifs,	0,				"IFS= \t\n",
 	  NULL },
 	{ &vmail,	VUNSET,				"MAIL=",
 	  NULL },
 	{ &vmpath,	VUNSET,				"MAILPATH=",
 	  NULL },
 	{ &vpath,	0,				"PATH=" _PATH_DEFPATH,
 	  changepath },
 	/*
 	 * vps1 depends on uid
 	 */
 	{ &vps2,	0,				"PS2=> ",
 	  NULL },
 	{ &vps4,	0,				"PS4=+ ",
 	  NULL },
 #ifndef NO_HISTORY
 	{ &vterm,	VUNSET,				"TERM=",
 	  setterm },
 #endif
 	{ &voptind,	0,				"OPTIND=1",
 	  getoptsreset },
 	{ &vdisvfork,	VUNSET,				"SH_DISABLE_VFORK=",
 	  NULL },
 	{ NULL,	0,				NULL,
 	  NULL }
 };
 
 static struct var *vartab[VTABSIZE];
 
 static const char *const locale_names[7] = {
 	"LC_COLLATE", "LC_CTYPE", "LC_MONETARY",
 	"LC_NUMERIC", "LC_TIME", "LC_MESSAGES", NULL
 };
 static const int locale_categories[7] = {
 	LC_COLLATE, LC_CTYPE, LC_MONETARY, LC_NUMERIC, LC_TIME, LC_MESSAGES, 0
 };
 
 static int varequal(const char *, const char *);
 static struct var *find_var(const char *, struct var ***, int *);
 static int localevar(const char *);
 static void setvareq_const(const char *s, int flags);
 
 extern char **environ;
 
 /*
  * This routine initializes the builtin variables and imports the environment.
  * It is called when the shell is initialized.
  */
 
 void
 initvar(void)
 {
 	char ppid[20];
 	const struct varinit *ip;
 	struct var *vp;
 	struct var **vpp;
 	char **envp;
 
 	for (ip = varinit ; (vp = ip->var) != NULL ; ip++) {
 		if (find_var(ip->text, &vpp, &vp->name_len) != NULL)
 			continue;
 		vp->next = *vpp;
 		*vpp = vp;
 		vp->text = __DECONST(char *, ip->text);
 		vp->flags = ip->flags | VSTRFIXED | VTEXTFIXED;
 		vp->func = ip->func;
 	}
 	/*
 	 * PS1 depends on uid
 	 */
 	if (find_var("PS1", &vpp, &vps1.name_len) == NULL) {
 		vps1.next = *vpp;
 		*vpp = &vps1;
 		vps1.text = __DECONST(char *, geteuid() ? "PS1=$ " : "PS1=# ");
 		vps1.flags = VSTRFIXED|VTEXTFIXED;
 	}
 	fmtstr(ppid, sizeof(ppid), "%d", (int)getppid());
 	setvarsafe("PPID", ppid, 0);
 	for (envp = environ ; *envp ; envp++) {
 		if (strchr(*envp, '=')) {
 			setvareq(*envp, VEXPORT|VTEXTFIXED);
 		}
 	}
 	setvareq_const("OPTIND=1", 0);
 }
 
 /*
  * Safe version of setvar, returns 1 on success 0 on failure.
  */
 
 int
 setvarsafe(const char *name, const char *val, int flags)
 {
 	struct jmploc jmploc;
 	struct jmploc *const savehandler = handler;
 	int err = 0;
 	int inton;
 
 	inton = is_int_on();
 	if (setjmp(jmploc.loc))
 		err = 1;
 	else {
 		handler = &jmploc;
 		setvar(name, val, flags);
 	}
 	handler = savehandler;
 	SETINTON(inton);
 	return err;
 }
 
 /*
  * Set the value of a variable.  The flags argument is stored with the
  * flags of the variable.  If val is NULL, the variable is unset.
  */
 
 void
 setvar(const char *name, const char *val, int flags)
 {
 	const char *p;
 	size_t len;
 	size_t namelen;
 	size_t vallen;
 	char *nameeq;
 	int isbad;
 
 	isbad = 0;
 	p = name;
 	if (! is_name(*p))
 		isbad = 1;
 	p++;
 	for (;;) {
 		if (! is_in_name(*p)) {
 			if (*p == '\0' || *p == '=')
 				break;
 			isbad = 1;
 		}
 		p++;
 	}
 	namelen = p - name;
 	if (isbad)
 		error("%.*s: bad variable name", (int)namelen, name);
 	len = namelen + 2;		/* 2 is space for '=' and '\0' */
 	if (val == NULL) {
 		flags |= VUNSET;
 		vallen = 0;
 	} else {
 		vallen = strlen(val);
 		len += vallen;
 	}
 	INTOFF;
 	nameeq = ckmalloc(len);
 	memcpy(nameeq, name, namelen);
 	nameeq[namelen] = '=';
 	if (val)
 		memcpy(nameeq + namelen + 1, val, vallen + 1);
 	else
 		nameeq[namelen + 1] = '\0';
 	setvareq(nameeq, flags);
 	INTON;
 }
 
 static int
 localevar(const char *s)
 {
 	const char *const *ss;
 
 	if (*s != 'L')
 		return 0;
 	if (varequal(s + 1, "ANG"))
 		return 1;
 	if (strncmp(s + 1, "C_", 2) != 0)
 		return 0;
 	if (varequal(s + 3, "ALL"))
 		return 1;
 	for (ss = locale_names; *ss ; ss++)
 		if (varequal(s + 3, *ss + 3))
 			return 1;
 	return 0;
 }
 
 
 /*
  * Sets/unsets an environment variable from a pointer that may actually be a
  * pointer into environ where the string should not be manipulated.
  */
 static void
 change_env(const char *s, int set)
 {
 	char *eqp;
 	char *ss;
 
 	INTOFF;
 	ss = savestr(s);
 	if ((eqp = strchr(ss, '=')) != NULL)
 		*eqp = '\0';
 	if (set && eqp != NULL)
 		(void) setenv(ss, eqp + 1, 1);
 	else
 		(void) unsetenv(ss);
 	ckfree(ss);
 	INTON;
 
 	return;
 }
 
 
 /*
  * Same as setvar except that the variable and value are passed in
  * the first argument as name=value.  Since the first argument will
  * be actually stored in the table, it should not be a string that
  * will go away.
  */
 
 void
 setvareq(char *s, int flags)
 {
 	struct var *vp, **vpp;
 	int nlen;
 
 	if (aflag)
 		flags |= VEXPORT;
 	if (forcelocal && !(flags & (VNOSET | VNOLOCAL)))
 		mklocal(s);
 	vp = find_var(s, &vpp, &nlen);
 	if (vp != NULL) {
 		if (vp->flags & VREADONLY) {
 			if ((flags & (VTEXTFIXED|VSTACK)) == 0)
 				ckfree(s);
 			error("%.*s: is read only", vp->name_len, vp->text);
 		}
 		if (flags & VNOSET) {
 			if ((flags & (VTEXTFIXED|VSTACK)) == 0)
 				ckfree(s);
 			return;
 		}
 		INTOFF;
 
 		if (vp->func && (flags & VNOFUNC) == 0)
 			(*vp->func)(s + vp->name_len + 1);
 
 		if ((vp->flags & (VTEXTFIXED|VSTACK)) == 0)
 			ckfree(vp->text);
 
 		vp->flags &= ~(VTEXTFIXED|VSTACK|VUNSET);
 		vp->flags |= flags;
 		vp->text = s;
 
 		/*
 		 * We could roll this to a function, to handle it as
 		 * a regular variable function callback, but why bother?
 		 *
 		 * Note: this assumes iflag is not set to 1 initially.
 		 * As part of initvar(), this is called before arguments
 		 * are looked at.
 		 */
 		if ((vp == &vmpath || (vp == &vmail && ! mpathset())) &&
 		    iflag == 1)
 			chkmail(1);
 		if ((vp->flags & VEXPORT) && localevar(s)) {
 			change_env(s, 1);
 			(void) setlocale(LC_ALL, "");
 			updatecharset();
 		}
 		INTON;
 		return;
 	}
 	/* not found */
 	if (flags & VNOSET) {
 		if ((flags & (VTEXTFIXED|VSTACK)) == 0)
 			ckfree(s);
 		return;
 	}
 	INTOFF;
 	vp = ckmalloc(sizeof (*vp));
 	vp->flags = flags;
 	vp->text = s;
 	vp->name_len = nlen;
 	vp->next = *vpp;
 	vp->func = NULL;
 	*vpp = vp;
 	if ((vp->flags & VEXPORT) && localevar(s)) {
 		change_env(s, 1);
 		(void) setlocale(LC_ALL, "");
 		updatecharset();
 	}
 	INTON;
 }
 
 
 static void
 setvareq_const(const char *s, int flags)
 {
 	setvareq(__DECONST(char *, s), flags | VTEXTFIXED);
 }
 
 
 /*
  * Process a linked list of variable assignments.
  */
 
 void
 listsetvar(struct arglist *list, int flags)
 {
 	int i;
 
 	INTOFF;
 	for (i = 0; i < list->count; i++)
 		setvareq(savestr(list->args[i]), flags);
 	INTON;
 }
 
 
 
 /*
  * Find the value of a variable.  Returns NULL if not set.
  */
 
 char *
 lookupvar(const char *name)
 {
 	struct var *v;
 
 	v = find_var(name, NULL, NULL);
 	if (v == NULL || v->flags & VUNSET)
 		return NULL;
 	return v->text + v->name_len + 1;
 }
 
 
 
 /*
  * Search the environment of a builtin command.  If the second argument
  * is nonzero, return the value of a variable even if it hasn't been
  * exported.
  */
 
 char *
 bltinlookup(const char *name, int doall)
 {
 	struct var *v;
 	char *result;
 	int i;
 
 	result = NULL;
 	if (cmdenviron) for (i = 0; i < cmdenviron->count; i++) {
 		if (varequal(cmdenviron->args[i], name))
 			result = strchr(cmdenviron->args[i], '=') + 1;
 	}
 	if (result != NULL)
 		return result;
 
 	v = find_var(name, NULL, NULL);
 	if (v == NULL || v->flags & VUNSET ||
 	    (!doall && (v->flags & VEXPORT) == 0))
 		return NULL;
 	return v->text + v->name_len + 1;
 }
 
 
 /*
  * Set up locale for a builtin (LANG/LC_* assignments).
  */
 void
 bltinsetlocale(void)
 {
 	int act = 0;
 	char *loc, *locdef;
 	int i;
 
 	if (cmdenviron) for (i = 0; i < cmdenviron->count; i++) {
 		if (localevar(cmdenviron->args[i])) {
 			act = 1;
 			break;
 		}
 	}
 	if (!act)
 		return;
 	loc = bltinlookup("LC_ALL", 0);
 	INTOFF;
 	if (loc != NULL) {
 		setlocale(LC_ALL, loc);
 		INTON;
 		updatecharset();
 		return;
 	}
 	locdef = bltinlookup("LANG", 0);
 	for (i = 0; locale_names[i] != NULL; i++) {
 		loc = bltinlookup(locale_names[i], 0);
 		if (loc == NULL)
 			loc = locdef;
 		if (loc != NULL)
 			setlocale(locale_categories[i], loc);
 	}
 	INTON;
 	updatecharset();
 }
 
 /*
  * Undo the effect of bltinlocaleset().
  */
 void
 bltinunsetlocale(void)
 {
 	int i;
 
 	INTOFF;
 	if (cmdenviron) for (i = 0; i < cmdenviron->count; i++) {
 		if (localevar(cmdenviron->args[i])) {
 			setlocale(LC_ALL, "");
 			updatecharset();
 			return;
 		}
 	}
 	INTON;
 }
 
 /*
  * Update the localeisutf8 flag.
  */
 void
 updatecharset(void)
 {
 	char *charset;
 
 	charset = nl_langinfo(CODESET);
 	localeisutf8 = !strcmp(charset, "UTF-8");
 }
 
 void
 initcharset(void)
 {
 	updatecharset();
 	initial_localeisutf8 = localeisutf8;
 }
 
 /*
  * Generate a list of exported variables.  This routine is used to construct
  * the third argument to execve when executing a program.
  */
 
 char **
 environment(void)
 {
 	int nenv;
 	struct var **vpp;
 	struct var *vp;
 	char **env, **ep;
 
 	nenv = 0;
 	for (vpp = vartab ; vpp < vartab + VTABSIZE ; vpp++) {
 		for (vp = *vpp ; vp ; vp = vp->next)
 			if (vp->flags & VEXPORT)
 				nenv++;
 	}
 	ep = env = stalloc((nenv + 1) * sizeof *env);
 	for (vpp = vartab ; vpp < vartab + VTABSIZE ; vpp++) {
 		for (vp = *vpp ; vp ; vp = vp->next)
 			if (vp->flags & VEXPORT)
 				*ep++ = vp->text;
 	}
 	*ep = NULL;
 	return env;
 }
 
 
 static int
 var_compare(const void *a, const void *b)
 {
 	const char *const *sa, *const *sb;
 
 	sa = a;
 	sb = b;
 	/*
 	 * This compares two var=value strings which creates a different
 	 * order from what you would probably expect.  POSIX is somewhat
 	 * ambiguous on what should be sorted exactly.
 	 */
 	return strcoll(*sa, *sb);
 }
 
 
 /*
  * Command to list all variables which are set.  This is invoked from the
  * set command when it is called without any options or operands.
  */
 
 int
 showvarscmd(int argc __unused, char **argv __unused)
 {
 	struct var **vpp;
 	struct var *vp;
 	const char *s;
 	const char **vars;
 	int i, n;
 
 	/*
 	 * POSIX requires us to sort the variables.
 	 */
 	n = 0;
 	for (vpp = vartab; vpp < vartab + VTABSIZE; vpp++) {
 		for (vp = *vpp; vp; vp = vp->next) {
 			if (!(vp->flags & VUNSET))
 				n++;
 		}
 	}
 
 	INTOFF;
 	vars = ckmalloc(n * sizeof(*vars));
 	i = 0;
 	for (vpp = vartab; vpp < vartab + VTABSIZE; vpp++) {
 		for (vp = *vpp; vp; vp = vp->next) {
 			if (!(vp->flags & VUNSET))
 				vars[i++] = vp->text;
 		}
 	}
 
 	qsort(vars, n, sizeof(*vars), var_compare);
 	for (i = 0; i < n; i++) {
 		/*
 		 * Skip improper variable names so the output remains usable as
 		 * shell input.
 		 */
 		if (!isassignment(vars[i]))
 			continue;
 		s = strchr(vars[i], '=');
 		s++;
 		outbin(vars[i], s - vars[i], out1);
 		out1qstr(s);
 		out1c('\n');
 	}
 	ckfree(vars);
 	INTON;
 
 	return 0;
 }
 
 
 
 /*
  * The export and readonly commands.
  */
 
 int
 exportcmd(int argc __unused, char **argv)
 {
 	struct var **vpp;
 	struct var *vp;
 	char **ap;
 	char *name;
 	char *p;
 	char *cmdname;
 	int ch, values;
 	int flag = argv[0][0] == 'r'? VREADONLY : VEXPORT;
 
 	cmdname = argv[0];
 	values = 0;
 	while ((ch = nextopt("p")) != '\0') {
 		switch (ch) {
 		case 'p':
 			values = 1;
 			break;
 		}
 	}
 
 	if (values && *argptr != NULL)
 		error("-p requires no arguments");
 	if (*argptr != NULL) {
 		for (ap = argptr; (name = *ap) != NULL; ap++) {
 			if ((p = strchr(name, '=')) != NULL) {
 				p++;
 			} else {
 				vp = find_var(name, NULL, NULL);
 				if (vp != NULL) {
 					vp->flags |= flag;
 					if ((vp->flags & VEXPORT) && localevar(vp->text)) {
 						change_env(vp->text, 1);
 						(void) setlocale(LC_ALL, "");
 						updatecharset();
 					}
 					continue;
 				}
 			}
 			setvar(name, p, flag);
 		}
 	} else {
 		for (vpp = vartab ; vpp < vartab + VTABSIZE ; vpp++) {
 			for (vp = *vpp ; vp ; vp = vp->next) {
 				if (vp->flags & flag) {
 					if (values) {
 						/*
 						 * Skip improper variable names
 						 * so the output remains usable
 						 * as shell input.
 						 */
 						if (!isassignment(vp->text))
 							continue;
 						out1str(cmdname);
 						out1c(' ');
 					}
 					if (values && !(vp->flags & VUNSET)) {
 						outbin(vp->text,
 						    vp->name_len + 1, out1);
 						out1qstr(vp->text +
 						    vp->name_len + 1);
 					} else
 						outbin(vp->text, vp->name_len,
 						    out1);
 					out1c('\n');
 				}
 			}
 		}
 	}
 	return 0;
 }
 
 
 /*
  * The "local" command.
  */
 
 int
 localcmd(int argc __unused, char **argv __unused)
 {
 	char *name;
 
 	nextopt("");
 	if (! in_function())
 		error("Not in a function");
 	while ((name = *argptr++) != NULL) {
 		mklocal(name);
 	}
 	return 0;
 }
 
 
 /*
  * Make a variable a local variable.  When a variable is made local, it's
  * value and flags are saved in a localvar structure.  The saved values
  * will be restored when the shell function returns.  We handle the name
  * "-" as a special case.
  */
 
 void
 mklocal(char *name)
 {
 	struct localvar *lvp;
 	struct var **vpp;
 	struct var *vp;
 
 	INTOFF;
 	lvp = ckmalloc(sizeof (struct localvar));
 	if (name[0] == '-' && name[1] == '\0') {
 		lvp->text = ckmalloc(sizeof optval);
 		memcpy(lvp->text, optval, sizeof optval);
 		vp = NULL;
 	} else {
 		vp = find_var(name, &vpp, NULL);
 		if (vp == NULL) {
 			if (strchr(name, '='))
 				setvareq(savestr(name), VSTRFIXED | VNOLOCAL);
 			else
 				setvar(name, NULL, VSTRFIXED | VNOLOCAL);
 			vp = *vpp;	/* the new variable */
 			lvp->text = NULL;
 			lvp->flags = VUNSET;
 		} else {
 			lvp->text = vp->text;
 			lvp->flags = vp->flags;
 			vp->flags |= VSTRFIXED|VTEXTFIXED;
 			if (name[vp->name_len] == '=')
 				setvareq(savestr(name), VNOLOCAL);
 		}
 	}
 	lvp->vp = vp;
 	lvp->next = localvars;
 	localvars = lvp;
 	INTON;
 }
 
 
 /*
  * Called after a function returns.
  */
 
 void
 poplocalvars(void)
 {
 	struct localvar *lvp;
 	struct var *vp;
+	int islocalevar;
 
 	INTOFF;
 	while ((lvp = localvars) != NULL) {
 		localvars = lvp->next;
 		vp = lvp->vp;
 		if (vp == NULL) {	/* $- saved */
 			memcpy(optval, lvp->text, sizeof optval);
 			ckfree(lvp->text);
 			optschanged();
 		} else if ((lvp->flags & (VUNSET|VSTRFIXED)) == VUNSET) {
 			(void)unsetvar(vp->text);
 		} else {
+			islocalevar = (vp->flags | lvp->flags) & VEXPORT &&
+			    localevar(lvp->text);
 			if ((vp->flags & VTEXTFIXED) == 0)
 				ckfree(vp->text);
 			vp->flags = lvp->flags;
 			vp->text = lvp->text;
+			if (vp->func)
+				(*vp->func)(vp->text + vp->name_len + 1);
+			if (islocalevar) {
+				change_env(vp->text, vp->flags & VEXPORT &&
+				    (vp->flags & VUNSET) == 0);
+				setlocale(LC_ALL, "");
+				updatecharset();
+			}
 		}
 		ckfree(lvp);
 	}
 	INTON;
 }
 
 
 int
 setvarcmd(int argc, char **argv)
 {
 	if (argc <= 2)
 		return unsetcmd(argc, argv);
 	else if (argc == 3)
 		setvar(argv[1], argv[2], 0);
 	else
 		error("too many arguments");
 	return 0;
 }
 
 
 /*
  * The unset builtin command.
  */
 
 int
 unsetcmd(int argc __unused, char **argv __unused)
 {
 	char **ap;
 	int i;
 	int flg_func = 0;
 	int flg_var = 0;
 	int ret = 0;
 
 	while ((i = nextopt("vf")) != '\0') {
 		if (i == 'f')
 			flg_func = 1;
 		else
 			flg_var = 1;
 	}
 	if (flg_func == 0 && flg_var == 0)
 		flg_var = 1;
 
 	INTOFF;
 	for (ap = argptr; *ap ; ap++) {
 		if (flg_func)
 			ret |= unsetfunc(*ap);
 		if (flg_var)
 			ret |= unsetvar(*ap);
 	}
 	INTON;
 	return ret;
 }
 
 
 /*
  * Unset the specified variable.
  * Called with interrupts off.
  */
 
 int
 unsetvar(const char *s)
 {
 	struct var **vpp;
 	struct var *vp;
 
 	vp = find_var(s, &vpp, NULL);
 	if (vp == NULL)
 		return (0);
 	if (vp->flags & VREADONLY)
 		return (1);
 	if (vp->text[vp->name_len + 1] != '\0')
 		setvar(s, "", 0);
 	if ((vp->flags & VEXPORT) && localevar(vp->text)) {
 		change_env(s, 0);
 		setlocale(LC_ALL, "");
 		updatecharset();
 	}
 	vp->flags &= ~VEXPORT;
 	vp->flags |= VUNSET;
 	if ((vp->flags & VSTRFIXED) == 0) {
 		if ((vp->flags & VTEXTFIXED) == 0)
 			ckfree(vp->text);
 		*vpp = vp->next;
 		ckfree(vp);
 	}
 	return (0);
 }
 
 
 
 /*
  * Returns true if the two strings specify the same variable.  The first
  * variable name is terminated by '='; the second may be terminated by
  * either '=' or '\0'.
  */
 
 static int
 varequal(const char *p, const char *q)
 {
 	while (*p == *q++) {
 		if (*p++ == '=')
 			return 1;
 	}
 	if (*p == '=' && *(q - 1) == '\0')
 		return 1;
 	return 0;
 }
 
 /*
  * Search for a variable.
  * 'name' may be terminated by '=' or a NUL.
  * vppp is set to the pointer to vp, or the list head if vp isn't found
  * lenp is set to the number of characters in 'name'
  */
 
 static struct var *
 find_var(const char *name, struct var ***vppp, int *lenp)
 {
 	unsigned int hashval;
 	int len;
 	struct var *vp, **vpp;
 	const char *p = name;
 
 	hashval = 0;
 	while (*p && *p != '=')
 		hashval = 2 * hashval + (unsigned char)*p++;
 	len = p - name;
 
 	if (lenp)
 		*lenp = len;
 	vpp = &vartab[hashval % VTABSIZE];
 	if (vppp)
 		*vppp = vpp;
 
 	for (vp = *vpp ; vp ; vpp = &vp->next, vp = *vpp) {
 		if (vp->name_len != len)
 			continue;
 		if (memcmp(vp->text, name, len) != 0)
 			continue;
 		if (vppp)
 			*vppp = vpp;
 		return vp;
 	}
 	return NULL;
 }
Index: projects/clang380-import/contrib/hyperv/tools/hv_kvp_daemon.c
===================================================================
--- projects/clang380-import/contrib/hyperv/tools/hv_kvp_daemon.c	(revision 293686)
+++ projects/clang380-import/contrib/hyperv/tools/hv_kvp_daemon.c	(revision 293687)
@@ -1,1517 +1,1517 @@
 /*-
  * Copyright (c) 2014 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/poll.h>
 #include <sys/utsname.h>
 #include <sys/stat.h>
 #include <sys/un.h>
 
 #include <arpa/inet.h>
 #include <ifaddrs.h>
 #include <netdb.h>
 
 #include <netinet/in.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 
 #include <assert.h>
 
 #include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <poll.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <syslog.h>
 #include <unistd.h>
 
 #include "hv_kvp.h"
 
 typedef uint8_t		__u8;
 typedef uint16_t	__u16;
 typedef uint32_t	__u32;
 typedef uint64_t	__u64;
 
 /*
  * ENUM Data
  */
 
 enum key_index {
 	FullyQualifiedDomainName = 0,
 	IntegrationServicesVersion, /*This key is serviced in the kernel*/
 	NetworkAddressIPv4,
 	NetworkAddressIPv6,
 	OSBuildNumber,
 	OSName,
 	OSMajorVersion,
 	OSMinorVersion,
 	OSVersion,
 	ProcessorArchitecture
 };
 
 
 enum {
 	IPADDR = 0,
 	NETMASK,
 	GATEWAY,
 	DNS
 };
 
 
 /* Global Variables */
 
 /*
  * The structure for operation handlers.
  */
 struct kvp_op_hdlr {
 	int	kvp_op_key;
 	void	(*kvp_op_init)(void);
  	int	(*kvp_op_exec)(struct hv_kvp_msg *kvp_op_msg, void *data);
 };
 
 static struct kvp_op_hdlr kvp_op_hdlrs[HV_KVP_OP_COUNT];
 
 /* OS information */
 
 static const char *os_name = "";
 static const char *os_major = "";
 static const char *os_minor = "";
 static const char *processor_arch;
 static const char *os_build;
 static const char *lic_version = "BSD Pre-Release version";
 static struct utsname uts_buf;
 
 /* Global flags */
 static int is_daemon = 1;
 static int is_debugging = 0;
 
 #define	KVP_LOG(priority, format, args...) do	{			\
 		if (is_debugging == 1) {				\
 			if (is_daemon == 1)				\
 				syslog(priority, format, ## args);	\
 			else						\
 				printf(format, ## args);		\
 		} else {						\
 			if (priority < LOG_DEBUG) {			\
 				if (is_daemon == 1)			\
 					syslog(priority, format, ## args);	\
 				else					\
 					printf(format, ## args);	\
 			}						\
 		}							\
 	} while(0)
 
 /*
  * For KVP pool file
  */
 
 #define MAX_FILE_NAME		100
 #define ENTRIES_PER_BLOCK	50
 
 struct kvp_record {
 	char	key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
 	char	value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
 };
 
 struct kvp_pool {
 	int			pool_fd;
 	int			num_blocks;
 	struct kvp_record	*records;
 	int			num_records;
 	char			fname[MAX_FILE_NAME];
 };
 
 static struct kvp_pool kvp_pools[HV_KVP_POOL_COUNT];
 
 
 static void
 kvp_acquire_lock(int pool)
 {
 	struct flock fl = { 0, 0, 0, F_WRLCK, SEEK_SET, 0 };
 
 	fl.l_pid = getpid();
 
 	if (fcntl(kvp_pools[pool].pool_fd, F_SETLKW, &fl) == -1) {
 		KVP_LOG(LOG_ERR, "Failed to acquire the lock pool: %d", pool);
 		exit(EXIT_FAILURE);
 	}
 }
 
 
 static void
 kvp_release_lock(int pool)
 {
 	struct flock fl = { 0, 0, 0, F_UNLCK, SEEK_SET, 0 };
 
 	fl.l_pid = getpid();
 
 	if (fcntl(kvp_pools[pool].pool_fd, F_SETLK, &fl) == -1) {
 		perror("fcntl");
 		KVP_LOG(LOG_ERR, "Failed to release the lock pool: %d\n", pool);
 		exit(EXIT_FAILURE);
 	}
 }
 
 
 /*
  * Write in-memory copy of KVP to pool files
  */
 static void
 kvp_update_file(int pool)
 {
 	FILE *filep;
 	size_t bytes_written;
 
 	kvp_acquire_lock(pool);
 
 	filep = fopen(kvp_pools[pool].fname, "w");
 	if (!filep) {
 		kvp_release_lock(pool);
 		KVP_LOG(LOG_ERR, "Failed to open file, pool: %d\n", pool);
 		exit(EXIT_FAILURE);
 	}
 
 	bytes_written = fwrite(kvp_pools[pool].records,
 		sizeof(struct kvp_record),
 		kvp_pools[pool].num_records, filep);
 
 	if (ferror(filep) || fclose(filep)) {
 		kvp_release_lock(pool);
 		KVP_LOG(LOG_ERR, "Failed to write file, pool: %d\n", pool);
 		exit(EXIT_FAILURE);
 	}
 
 	kvp_release_lock(pool);
 }
 
 
 /*
  * Read KVPs from pool files and store in memory
  */
 static void
 kvp_update_mem_state(int pool)
 {
 	FILE *filep;
 	size_t records_read = 0;
 	struct kvp_record *record = kvp_pools[pool].records;
 	struct kvp_record *readp;
 	int num_blocks = kvp_pools[pool].num_blocks;
 	int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
 
 	kvp_acquire_lock(pool);
 
 	filep = fopen(kvp_pools[pool].fname, "r");
 	if (!filep) {
 		kvp_release_lock(pool);
 		KVP_LOG(LOG_ERR, "Failed to open file, pool: %d\n", pool);
 		exit(EXIT_FAILURE);
 	}
 	for ( ; ; )
 	{
 		readp = &record[records_read];
 		records_read += fread(readp, sizeof(struct kvp_record),
 			ENTRIES_PER_BLOCK * num_blocks,
 			filep);
 
 		if (ferror(filep)) {
 			KVP_LOG(LOG_ERR, "Failed to read file, pool: %d\n", pool);
 			exit(EXIT_FAILURE);
 		}
 
 		if (!feof(filep)) {
 			/*
 			 * Have more data to read. Expand the memory.
 			 */
 			num_blocks++;
 			record = realloc(record, alloc_unit * num_blocks);
 
 			if (record == NULL) {
 				KVP_LOG(LOG_ERR, "malloc failed\n");
 				exit(EXIT_FAILURE);
 			}
 			continue;
 		}
 		break;
 	}
 
 	kvp_pools[pool].num_blocks = num_blocks;
 	kvp_pools[pool].records = record;
 	kvp_pools[pool].num_records = records_read;
 
 	fclose(filep);
 	kvp_release_lock(pool);
 }
 
 
 static int
 kvp_file_init(void)
 {
 	int fd;
 	FILE *filep;
 	size_t records_read;
 	char *fname;
 	struct kvp_record *record;
 	struct kvp_record *readp;
 	int num_blocks;
 	int i;
 	int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
 
 	if (mkdir("/var/db/hyperv/pool", S_IRUSR | S_IWUSR | S_IROTH) < 0 &&
 	    (errno != EEXIST && errno != EISDIR)) {
 		KVP_LOG(LOG_ERR, " Failed to create /var/db/hyperv/pool\n");
 		exit(EXIT_FAILURE);
 	}
 
 	for (i = 0; i < HV_KVP_POOL_COUNT; i++)
 	{
 		fname = kvp_pools[i].fname;
 		records_read = 0;
 		num_blocks = 1;
 		snprintf(fname, MAX_FILE_NAME, "/var/db/hyperv/pool/.kvp_pool_%d", i);
 		fd = open(fname, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IROTH);
 
 		if (fd == -1) {
 			return (1);
 		}
 
 
 		filep = fopen(fname, "r");
 		if (!filep) {
 			close(fd);
 			return (1);
 		}
 
 		record = malloc(alloc_unit * num_blocks);
 		if (record == NULL) {
 			close(fd);
 			fclose(filep);
 			return (1);
 		}
 		for ( ; ; )
 		{
 			readp = &record[records_read];
 			records_read += fread(readp, sizeof(struct kvp_record),
 				ENTRIES_PER_BLOCK,
 				filep);
 
 			if (ferror(filep)) {
 				KVP_LOG(LOG_ERR, "Failed to read file, pool: %d\n",
 				    i);
 				exit(EXIT_FAILURE);
 			}
 
 			if (!feof(filep)) {
 				/*
 				 * More data to read.
 				 */
 				num_blocks++;
 				record = realloc(record, alloc_unit *
 					num_blocks);
 				if (record == NULL) {
 					close(fd);
 					fclose(filep);
 					return (1);
 				}
 				continue;
 			}
 			break;
 		}
 		kvp_pools[i].pool_fd = fd;
 		kvp_pools[i].num_blocks = num_blocks;
 		kvp_pools[i].records = record;
 		kvp_pools[i].num_records = records_read;
 		fclose(filep);
 	}
 
 	return (0);
 }
 
 
 static int
 kvp_key_delete(int pool, __u8 *key, int key_size)
 {
 	int i;
 	int j, k;
 	int num_records;
 	struct kvp_record *record;
 
 	KVP_LOG(LOG_DEBUG, "kvp_key_delete: pool =  %d, "
 	    "key = %s\n", pool, key);
 
 	/* Update in-memory state */
 	kvp_update_mem_state(pool);
 
 	num_records = kvp_pools[pool].num_records;
 	record = kvp_pools[pool].records;
 
 	for (i = 0; i < num_records; i++)
 	{
 		if (memcmp(key, record[i].key, key_size)) {
 			continue;
 		}
 
 		KVP_LOG(LOG_DEBUG, "Found delete key in pool %d.\n",
 		    pool);
 		/*
 		 * We found a match at the end; Just update the number of
 		 * entries and we are done.
 		 */
 		if (i == num_records) {
 			kvp_pools[pool].num_records--;
 			kvp_update_file(pool);
 			return (0);
 		}
 
 		/*
 		 * We found a match in the middle; Move the remaining
 		 * entries up.
 		 */
 		j = i;
 		k = j + 1;
 		for ( ; k < num_records; k++)
 		{
 			strcpy(record[j].key, record[k].key);
 			strcpy(record[j].value, record[k].value);
 			j++;
 		}
 		kvp_pools[pool].num_records--;
 		kvp_update_file(pool);
 		return (0);
 	}
 	KVP_LOG(LOG_DEBUG, "Not found delete key in pool %d.\n",
 	    pool);
 	return (1);
 }
 
 
 static int
 kvp_key_add_or_modify(int pool, __u8 *key, __u32 key_size, __u8 *value,
     __u32 value_size)
 {
 	int i;
 	int num_records;
 	struct kvp_record *record;
 	int num_blocks;
 
 	KVP_LOG(LOG_DEBUG, "kvp_key_add_or_modify: pool =  %d, "
 	    "key = %s, value = %s\n,", pool, key, value);
 
 	if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) ||
 	    (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) {
 		KVP_LOG(LOG_ERR, "kvp_key_add_or_modify: returning 1\n");
 		return (1);
 	}
 
 	/* Update the in-memory state. */
 	kvp_update_mem_state(pool);
 
 	num_records = kvp_pools[pool].num_records;
 	record = kvp_pools[pool].records;
 	num_blocks = kvp_pools[pool].num_blocks;
 
 	for (i = 0; i < num_records; i++)
 	{
 		if (memcmp(key, record[i].key, key_size)) {
 			continue;
 		}
 
 		/*
 		 * Key exists. Just update the value and we are done.
 		 */
 		memcpy(record[i].value, value, value_size);
 		kvp_update_file(pool);
 		return (0);
 	}
 
 	/*
 	 * Key doesn't exist; Add a new KVP.
 	 */
 	if (num_records == (ENTRIES_PER_BLOCK * num_blocks)) {
 		/* Increase the size of the recodrd array. */
 		record = realloc(record, sizeof(struct kvp_record) *
 			ENTRIES_PER_BLOCK * (num_blocks + 1));
 
 		if (record == NULL) {
 			return (1);
 		}
 		kvp_pools[pool].num_blocks++;
 	}
 	memcpy(record[i].value, value, value_size);
 	memcpy(record[i].key, key, key_size);
 	kvp_pools[pool].records = record;
 	kvp_pools[pool].num_records++;
 	kvp_update_file(pool);
 	return (0);
 }
 
 
 static int
 kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value,
     int value_size)
 {
 	int i;
 	int num_records;
 	struct kvp_record *record;
 
 	KVP_LOG(LOG_DEBUG, "kvp_get_value: pool =  %d, key = %s\n,",
 	    pool, key);
 
 	if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) ||
 	    (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) {
 		return (1);
 	}
 
 	/* Update the in-memory state first. */
 	kvp_update_mem_state(pool);
 
 	num_records = kvp_pools[pool].num_records;
 	record = kvp_pools[pool].records;
 
 	for (i = 0; i < num_records; i++)
 	{
 		if (memcmp(key, record[i].key, key_size)) {
 			continue;
 		}
 
 		/* Found the key */
 		memcpy(value, record[i].value, value_size);
 		return (0);
 	}
 
 	return (1);
 }
 
 
 static int
 kvp_pool_enumerate(int pool, int idx, __u8 *key, int key_size,
     __u8 *value, int value_size)
 {
 	struct kvp_record *record;
 
 	KVP_LOG(LOG_DEBUG, "kvp_pool_enumerate: pool = %d, index = %d\n,",
 	    pool, idx);
 
 	/* First update our in-memory state first. */
 	kvp_update_mem_state(pool);
 	record = kvp_pools[pool].records;
 
 	/* Index starts with 0 */
 	if (idx >= kvp_pools[pool].num_records) {
 		return (1);
 	}
 
 	memcpy(key, record[idx].key, key_size);
 	memcpy(value, record[idx].value, value_size);
 	return (0);
 }
 
 
 static void
 kvp_get_os_info(void)
 {
 	char *p;
 
 	uname(&uts_buf);
 	os_build = uts_buf.release;
 	os_name = uts_buf.sysname;
 	processor_arch = uts_buf.machine;
 
 	/*
 	 * Win7 host expects the build string to be of the form: x.y.z
 	 * Strip additional information we may have.
 	 */
 	p = strchr(os_build, '-');
 	if (p) {
 		*p = '\0';
 	}
 
 	/*
 	 * We don't have any other information about the FreeBSD os.
 	 */
 	return;
 }
 
 /*
  * Given the interface name, return the MAC address.
  */
 static char *
 kvp_if_name_to_mac(char *if_name)
 {
 	char *mac_addr = NULL;
 	struct ifaddrs *ifaddrs_ptr;
 	struct ifaddrs *head_ifaddrs_ptr;
 	struct sockaddr_dl *sdl;
 	int status;
 
 	status = getifaddrs(&ifaddrs_ptr);
 
 	if (status >= 0) {
 		head_ifaddrs_ptr = ifaddrs_ptr;
 		do {
 			sdl = (struct sockaddr_dl *)(uintptr_t)ifaddrs_ptr->ifa_addr;
 			if ((sdl->sdl_type == IFT_ETHER) &&
 			    (strcmp(ifaddrs_ptr->ifa_name, if_name) == 0)) {
 				mac_addr = strdup(ether_ntoa((struct ether_addr *)(LLADDR(sdl))));
 				break;
 			}
 		} while ((ifaddrs_ptr = ifaddrs_ptr->ifa_next) != NULL);
 		freeifaddrs(head_ifaddrs_ptr);
 	}
 
 	return (mac_addr);
 }
 
 
 /*
  * Given the MAC address, return the interface name.
  */
 static char *
 kvp_mac_to_if_name(char *mac)
 {
 	char *if_name = NULL;
 	struct ifaddrs *ifaddrs_ptr;
 	struct ifaddrs *head_ifaddrs_ptr;
 	struct sockaddr_dl *sdl;
 	int status;
 	char *buf_ptr, *p;
 
 	status = getifaddrs(&ifaddrs_ptr);
 
 	if (status >= 0) {
 		head_ifaddrs_ptr = ifaddrs_ptr;
 		do {
 			sdl = (struct sockaddr_dl *)(uintptr_t)ifaddrs_ptr->ifa_addr;
 			if (sdl->sdl_type == IFT_ETHER) {
 				buf_ptr = strdup(ether_ntoa((struct ether_addr *)(LLADDR(sdl))));
 				if (buf_ptr != NULL) {
 					for (p = buf_ptr; *p != '\0'; p++)
 						*p = toupper(*p);
 
 					if (strncmp(buf_ptr, mac, strlen(mac)) == 0) {
 						/* Caller will free the memory */
 						if_name = strdup(ifaddrs_ptr->ifa_name);
 						free(buf_ptr);
 						break;
 					} else
 						free(buf_ptr);
 				}
 			}
 		} while ((ifaddrs_ptr = ifaddrs_ptr->ifa_next) != NULL);
 		freeifaddrs(head_ifaddrs_ptr);
 	}
 	return (if_name);
 }
 
 
 static void
 kvp_process_ipconfig_file(char *cmd,
     char *config_buf, size_t len,
     size_t element_size, int offset)
 {
 	char buf[256];
 	char *p;
 	char *x;
 	FILE *file;
 
 	/*
 	 * First execute the command.
 	 */
 	file = popen(cmd, "r");
 	if (file == NULL) {
 		return;
 	}
 
 	if (offset == 0) {
 		memset(config_buf, 0, len);
 	}
 	while ((p = fgets(buf, sizeof(buf), file)) != NULL) {
 		if ((len - strlen(config_buf)) < (element_size + 1)) {
 			break;
 		}
 
 		x = strchr(p, '\n');
 		*x = '\0';
 		strlcat(config_buf, p, len);
 		strlcat(config_buf, ";", len);
 	}
 	pclose(file);
 }
 
 
 static void
 kvp_get_ipconfig_info(char *if_name, struct hv_kvp_ipaddr_value *buffer)
 {
 	char cmd[512];
 	char dhcp_info[128];
 	char *p;
 	FILE *file;
 
 	/*
 	 * Retrieve the IPV4 address of default gateway.
 	 */
 	snprintf(cmd, sizeof(cmd), "netstat -rn | grep %s | awk '/default/ {print $2 }'", if_name);
 
 	/*
 	 * Execute the command to gather gateway IPV4 info.
 	 */
 	kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
 	    (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0);
 
 	/*
 	 * Retrieve the IPV6 address of default gateway.
 	 */
 	snprintf(cmd, sizeof(cmd), "netstat -rn inet6 | grep %s | awk '/default/ {print $2 }", if_name);
 
 	/*
 	 * Execute the command to gather gateway IPV6 info.
 	 */
 	kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
 	    (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1);
 
 	/*
 	 * we just invoke an external script to get the DNS info.
 	 *
 	 * Following is the expected format of the information from the script:
 	 *
 	 * ipaddr1 (nameserver1)
 	 * ipaddr2 (nameserver2)
 	 * .
 	 * .
 	 */
 	/* Scripts are stored in /usr/libexec/hyperv/ directory */
 	snprintf(cmd, sizeof(cmd), "%s", "sh /usr/libexec/hyperv/hv_get_dns_info");
 
 	/*
 	 * Execute the command to get DNS info.
 	 */
 	kvp_process_ipconfig_file(cmd, (char *)buffer->dns_addr,
 	    (MAX_IP_ADDR_SIZE * 2), INET_ADDRSTRLEN, 0);
 
 	/*
 	 * Invoke an external script to get the DHCP state info.
 	 * The parameter to the script is the interface name.
 	 * Here is the expected output:
 	 *
 	 * Enabled: DHCP enabled.
 	 */
 
 
 	snprintf(cmd, sizeof(cmd), "%s %s",
 	    "sh /usr/libexec/hyperv/hv_get_dhcp_info", if_name);
 
 	file = popen(cmd, "r");
 	if (file == NULL) {
 		return;
 	}
 
 	p = fgets(dhcp_info, sizeof(dhcp_info), file);
 	if (p == NULL) {
 		pclose(file);
 		return;
 	}
 
 	if (!strncmp(p, "Enabled", 7)) {
 		buffer->dhcp_enabled = 1;
 	} else{
 		buffer->dhcp_enabled = 0;
 	}
 
 	pclose(file);
 }
 
 
 static unsigned int
 hweight32(unsigned int *w)
 {
 	unsigned int res = *w - ((*w >> 1) & 0x55555555);
 
 	res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
 	res = (res + (res >> 4)) & 0x0F0F0F0F;
 	res = res + (res >> 8);
 	return ((res + (res >> 16)) & 0x000000FF);
 }
 
 
 static int
 kvp_process_ip_address(void *addrp,
     int family, char *buffer,
     int length, int *offset)
 {
 	struct sockaddr_in *addr;
 	struct sockaddr_in6 *addr6;
 	int addr_length;
 	char tmp[50];
 	const char *str;
 
 	if (family == AF_INET) {
 		addr = (struct sockaddr_in *)addrp;
 		str = inet_ntop(family, &addr->sin_addr, tmp, 50);
 		addr_length = INET_ADDRSTRLEN;
 	} else {
 		addr6 = (struct sockaddr_in6 *)addrp;
 		str = inet_ntop(family, &addr6->sin6_addr.s6_addr, tmp, 50);
 		addr_length = INET6_ADDRSTRLEN;
 	}
 
 	if ((length - *offset) < addr_length + 1) {
 		return (HV_KVP_E_FAIL);
 	}
 	if (str == NULL) {
 		strlcpy(buffer, "inet_ntop failed\n", length);
 		return (HV_KVP_E_FAIL);
 	}
 	if (*offset == 0) {
 		strlcpy(buffer, tmp, length);
 	} else{
 		strlcat(buffer, tmp, length);
 	}
 	strlcat(buffer, ";", length);
 
 	*offset += strlen(str) + 1;
 	return (0);
 }
 
 
 static int
 kvp_get_ip_info(int family, char *if_name, int op,
     void *out_buffer, size_t length)
 {
 	struct ifaddrs *ifap;
 	struct ifaddrs *curp;
 	int offset = 0;
 	int sn_offset = 0;
 	int error = 0;
 	char *buffer;
 	size_t buffer_length;
 	struct hv_kvp_ipaddr_value *ip_buffer = NULL;
 	char cidr_mask[5];
 	int weight;
 	int i;
 	unsigned int *w = NULL;
 	char *sn_str;
 	size_t sn_str_length;
 	struct sockaddr_in6 *addr6;
 
 	if (op == HV_KVP_OP_ENUMERATE) {
 		buffer = out_buffer;
 		buffer_length = length;
 	} else {
 		ip_buffer = out_buffer;
 		buffer = (char *)ip_buffer->ip_addr;
 		buffer_length = sizeof(ip_buffer->ip_addr);
 		ip_buffer->addr_family = 0;
 	}
 
 	if (getifaddrs(&ifap)) {
 		strlcpy(buffer, "getifaddrs failed\n", buffer_length);
 		return (HV_KVP_E_FAIL);
 	}
 
 	curp = ifap;
 	while (curp != NULL) {
 		if (curp->ifa_addr == NULL) {
 			curp = curp->ifa_next;
 			continue;
 		}
 
 		if ((if_name != NULL) &&
 		    (strncmp(curp->ifa_name, if_name, strlen(if_name)))) {
 			/*
 			 * We want info about a specific interface;
 			 * just continue.
 			 */
 			curp = curp->ifa_next;
 			continue;
 		}
 
 		/*
 		 * We support two address families: AF_INET and AF_INET6.
 		 * If family value is 0, we gather both supported
 		 * address families; if not we gather info on
 		 * the specified address family.
 		 */
 		if ((family != 0) && (curp->ifa_addr->sa_family != family)) {
 			curp = curp->ifa_next;
 			continue;
 		}
 		if ((curp->ifa_addr->sa_family != AF_INET) &&
 		    (curp->ifa_addr->sa_family != AF_INET6)) {
 			curp = curp->ifa_next;
 			continue;
 		}
 
 		if (op == HV_KVP_OP_GET_IP_INFO) {
 			/*
 			 * Get the info other than the IP address.
 			 */
 			if (curp->ifa_addr->sa_family == AF_INET) {
 				ip_buffer->addr_family |= ADDR_FAMILY_IPV4;
 
 				/*
 				 * Get subnet info.
 				 */
 				error = kvp_process_ip_address(
 					curp->ifa_netmask,
 					AF_INET,
 					(char *)
 					ip_buffer->sub_net,
 					length,
 					&sn_offset);
 				if (error) {
 					goto kvp_get_ip_info_ipaddr;
 				}
 			} else {
 				ip_buffer->addr_family |= ADDR_FAMILY_IPV6;
 
 				/*
 				 * Get subnet info in CIDR format.
 				 */
 				weight = 0;
 				sn_str = (char *)ip_buffer->sub_net;
 				sn_str_length = sizeof(ip_buffer->sub_net);
 				addr6 = (struct sockaddr_in6 *)(uintptr_t)
 				    curp->ifa_netmask;
 				w = (unsigned int *)(uintptr_t)addr6->sin6_addr.s6_addr;
 
 				for (i = 0; i < 4; i++)
 				{
 					weight += hweight32(&w[i]);
 				}
 
 				snprintf(cidr_mask, sizeof(cidr_mask), "/%d", weight);
 				if ((length - sn_offset) <
 				    (strlen(cidr_mask) + 1)) {
 					goto kvp_get_ip_info_ipaddr;
 				}
 
 				if (sn_offset == 0) {
 					strlcpy(sn_str, cidr_mask, sn_str_length);
 				} else{
 					strlcat(sn_str, cidr_mask, sn_str_length);
 				}
 				strlcat((char *)ip_buffer->sub_net, ";", sn_str_length);
 				sn_offset += strlen(sn_str) + 1;
 			}
 
 			/*
 			 * Collect other ip configuration info.
 			 */
 
 			kvp_get_ipconfig_info(if_name, ip_buffer);
 		}
 
 kvp_get_ip_info_ipaddr:
 		error = kvp_process_ip_address(curp->ifa_addr,
 			curp->ifa_addr->sa_family,
 			buffer,
 			length, &offset);
 		if (error) {
 			goto kvp_get_ip_info_done;
 		}
 
 		curp = curp->ifa_next;
 	}
 
 kvp_get_ip_info_done:
 	freeifaddrs(ifap);
 	return (error);
 }
 
 
 static int
 kvp_write_file(FILE *f, const char *s1, const char *s2, const char *s3)
 {
 	int ret;
 
 	ret = fprintf(f, "%s%s%s%s\n", s1, s2, "=", s3);
 
 	if (ret < 0) {
 		return (HV_KVP_E_FAIL);
 	}
 
 	return (0);
 }
 
 
 static int
 kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
 {
 	int error = 0;
 	char if_file[128];
 	FILE *file;
 	char cmd[512];
 	char *mac_addr;
 
 	/*
 	 * FreeBSD - Configuration File
 	 */
 	snprintf(if_file, sizeof(if_file), "%s%s", "/var/db/hyperv",
 	    "hv_set_ip_data");
 	file = fopen(if_file, "w");
 
 	if (file == NULL) {
 		KVP_LOG(LOG_ERR, "FreeBSD Failed to open config file\n");
 		return (HV_KVP_E_FAIL);
 	}
 
 	/*
 	 * Write out the MAC address.
 	 */
 
 	mac_addr = kvp_if_name_to_mac(if_name);
 	if (mac_addr == NULL) {
 		error = HV_KVP_E_FAIL;
 		goto kvp_set_ip_info_error;
 	}
 	/* MAC Address */
 	error = kvp_write_file(file, "HWADDR", "", mac_addr);
 	if (error) {
 		goto kvp_set_ip_info_error;
 	}
 
 	/* Interface Name  */
 	error = kvp_write_file(file, "IF_NAME", "", if_name);
 	if (error) {
 		goto kvp_set_ip_info_error;
 	}
 
 	/* IP Address  */
 	error = kvp_write_file(file, "IP_ADDR", "",
 	    (char *)new_val->ip_addr);
 	if (error) {
 		goto kvp_set_ip_info_error;
 	}
 
 	/* Subnet Mask */
 	error = kvp_write_file(file, "SUBNET", "",
 	    (char *)new_val->sub_net);
 	if (error) {
 		goto kvp_set_ip_info_error;
 	}
 
 
 	/* Gateway */
 	error = kvp_write_file(file, "GATEWAY", "",
 	    (char *)new_val->gate_way);
 	if (error) {
 		goto kvp_set_ip_info_error;
 	}
 
 	/* DNS */
 	error = kvp_write_file(file, "DNS", "", (char *)new_val->dns_addr);
 	if (error) {
 		goto kvp_set_ip_info_error;
 	}
 
 	/* DHCP */
 	if (new_val->dhcp_enabled) {
 		error = kvp_write_file(file, "DHCP", "", "1");
 	} else{
 		error = kvp_write_file(file, "DHCP", "", "0");
 	}
 
 	if (error) {
 		goto kvp_set_ip_info_error;
 	}
 
 	free(mac_addr);
 	fclose(file);
 
 	/*
 	 * Invoke the external script with the populated
 	 * configuration file.
 	 */
 
 	snprintf(cmd, sizeof(cmd), "%s %s",
 	    "sh /usr/libexec/hyperv/hv_set_ifconfig", if_file);
 	system(cmd);
 	return (0);
 
 kvp_set_ip_info_error:
 	KVP_LOG(LOG_ERR, "Failed to write config file\n");
 	free(mac_addr);
 	fclose(file);
 	return (error);
 }
 
 
 static int
 kvp_get_domain_name(char *buffer, int length)
 {
 	struct addrinfo hints, *info;
 	int error = 0;
 
 	gethostname(buffer, length);
 	memset(&hints, 0, sizeof(hints));
 	hints.ai_family = AF_INET;    /* Get only ipv4 addrinfo. */
 	hints.ai_socktype = SOCK_STREAM;
 	hints.ai_flags = AI_CANONNAME;
 
 	error = getaddrinfo(buffer, NULL, &hints, &info);
 	if (error != 0) {
 		strlcpy(buffer, "getaddrinfo failed\n", length);
 		return (error);
 	}
 	strlcpy(buffer, info->ai_canonname, length);
 	freeaddrinfo(info);
 	return (error);
 }
 
 
 static int
 kvp_op_getipinfo(struct hv_kvp_msg *op_msg, void *data __unused)
 {
 	struct hv_kvp_ipaddr_value *ip_val;
 	char *if_name;
 
 	assert(op_msg != NULL);
 	KVP_LOG(LOG_DEBUG, "In kvp_op_getipinfo.\n");
 
 	ip_val = &op_msg->body.kvp_ip_val;
 	op_msg->hdr.error = HV_KVP_S_OK;
 
 	if_name = kvp_mac_to_if_name((char *)ip_val->adapter_id);
 
 	if (if_name == NULL) {
 		/* No interface found with the mac address. */
 		op_msg->hdr.error = HV_KVP_E_FAIL;
 		goto kvp_op_getipinfo_done;
 	}
 
 	op_msg->hdr.error = kvp_get_ip_info(0, if_name,
 	    HV_KVP_OP_GET_IP_INFO, ip_val, (MAX_IP_ADDR_SIZE * 2));
 
 	free(if_name);
 
 kvp_op_getipinfo_done:
 	return(op_msg->hdr.error);
 }
 
 
 static int
 kvp_op_setipinfo(struct hv_kvp_msg *op_msg, void *data __unused)
 {
 	struct hv_kvp_ipaddr_value *ip_val;
 	char *if_name;
 
 	assert(op_msg != NULL);
 	KVP_LOG(LOG_DEBUG, "In kvp_op_setipinfo.\n");
 
 	ip_val = &op_msg->body.kvp_ip_val;
 	op_msg->hdr.error = HV_KVP_S_OK;
 
 	if_name = (char *)ip_val->adapter_id;
 
 	if (if_name == NULL) {
 		/* No adapter provided. */
 		op_msg->hdr.error = HV_KVP_GUID_NOTFOUND;
 		goto kvp_op_setipinfo_done;
 	}
 
 	op_msg->hdr.error = kvp_set_ip_info(if_name, ip_val);
 
 kvp_op_setipinfo_done:
 	return(op_msg->hdr.error);
 }
 
 
 static int
 kvp_op_setgetdel(struct hv_kvp_msg *op_msg, void *data)
 {
 	struct kvp_op_hdlr *op_hdlr = (struct kvp_op_hdlr *)data;
 	int error = 0;
 	int op_pool;
 
 	assert(op_msg != NULL);
 	assert(op_hdlr != NULL);
 
 	op_pool = op_msg->hdr.kvp_hdr.pool;
 	op_msg->hdr.error = HV_KVP_S_OK;
 
 	switch(op_hdlr->kvp_op_key) {
 	case HV_KVP_OP_SET:
 		if (op_pool == HV_KVP_POOL_AUTO) {
 			/* Auto Pool is not writeable from host side. */
 			error = 1;
 			KVP_LOG(LOG_ERR, "Ilegal to write to pool %d from host\n",
 			    op_pool);
 		} else {
 			error = kvp_key_add_or_modify(op_pool,
 			    op_msg->body.kvp_set.data.key,
 			    op_msg->body.kvp_set.data.key_size,
 			    op_msg->body.kvp_set.data.msg_value.value,
 			    op_msg->body.kvp_set.data.value_size);
 		}
 		break;
 
 	case HV_KVP_OP_GET:
 		error = kvp_get_value(op_pool,
 		    op_msg->body.kvp_get.data.key,
 		    op_msg->body.kvp_get.data.key_size,
 		    op_msg->body.kvp_get.data.msg_value.value,
 		    op_msg->body.kvp_get.data.value_size);
 		break;
 
 	case HV_KVP_OP_DELETE:
 		if (op_pool == HV_KVP_POOL_AUTO) {
 			/* Auto Pool is not writeable from host side. */
 			error = 1;
 			KVP_LOG(LOG_ERR, "Ilegal to change pool %d from host\n",
 			    op_pool);
 		} else {
 			error = kvp_key_delete(op_pool,
 			    op_msg->body.kvp_delete.key,
 			    op_msg->body.kvp_delete.key_size);
 		}
 		break;
 
 	default:
 		break;
 	}
 
 	if (error != 0)
 		op_msg->hdr.error = HV_KVP_S_CONT;
 
 	return(error);
 }
 
 
 static int
 kvp_op_enumerate(struct hv_kvp_msg *op_msg, void *data __unused)
 {
 	char *key_name, *key_value;
 	int error = 0;
 	int op_pool;
 	int op;
 
 	assert(op_msg != NULL);
 
 	op = op_msg->hdr.kvp_hdr.operation;
 	op_pool = op_msg->hdr.kvp_hdr.pool;
 	op_msg->hdr.error = HV_KVP_S_OK;
 
 	/*
 	 * If the pool is not HV_KVP_POOL_AUTO, read from the appropriate
 	 * pool and return the KVP according to the index requested.
 	 */
 	if (op_pool != HV_KVP_POOL_AUTO) {
 		if (kvp_pool_enumerate(op_pool,
 		    op_msg->body.kvp_enum_data.index,
 		    op_msg->body.kvp_enum_data.data.key,
 		    HV_KVP_EXCHANGE_MAX_KEY_SIZE,
 		    op_msg->body.kvp_enum_data.data.msg_value.value,
 		    HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) {
 			op_msg->hdr.error = HV_KVP_S_CONT;
 			error = -1;
 		}
 		goto kvp_op_enumerate_done;
 	}
 
 	key_name = (char *)op_msg->body.kvp_enum_data.data.key;
 	key_value = (char *)op_msg->body.kvp_enum_data.data.msg_value.value;
 
 	switch (op_msg->body.kvp_enum_data.index)
 	{
 	case FullyQualifiedDomainName:
 		kvp_get_domain_name(key_value,
 		    HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
 		strcpy(key_name, "FullyQualifiedDomainName");
 		break;
 
 	case IntegrationServicesVersion:
 		strcpy(key_name, "IntegrationServicesVersion");
 		strlcpy(key_value, lic_version, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
 		break;
 
 	case NetworkAddressIPv4:
 		kvp_get_ip_info(AF_INET, NULL, HV_KVP_OP_ENUMERATE,
 		    key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
 		strcpy(key_name, "NetworkAddressIPv4");
 		break;
 
 	case NetworkAddressIPv6:
 		kvp_get_ip_info(AF_INET6, NULL, HV_KVP_OP_ENUMERATE,
 		    key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
 		strcpy(key_name, "NetworkAddressIPv6");
 		break;
 
 	case OSBuildNumber:
 		strlcpy(key_value, os_build, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
 		strcpy(key_name, "OSBuildNumber");
 		break;
 
 	case OSName:
 		strlcpy(key_value, os_name, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
 		strcpy(key_name, "OSName");
 		break;
 
 	case OSMajorVersion:
 		strlcpy(key_value, os_major, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
 		strcpy(key_name, "OSMajorVersion");
 		break;
 
 	case OSMinorVersion:
 		strlcpy(key_value, os_minor, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
 		strcpy(key_name, "OSMinorVersion");
 		break;
 
 	case OSVersion:
 		strlcpy(key_value, os_build, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
 		strcpy(key_name, "OSVersion");
 		break;
 
 	case ProcessorArchitecture:
 		strlcpy(key_value, processor_arch, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
 		strcpy(key_name, "ProcessorArchitecture");
 		break;
 
 	default:
 #ifdef DEBUG
 		KVP_LOG(LOG_ERR, "Auto pool Index %d not found.\n",
 		    op_msg->body.kvp_enum_data.index);
 #endif
 		op_msg->hdr.error = HV_KVP_S_CONT;
 		error = -1;
 		break;
 	}
 
 kvp_op_enumerate_done:
 	return(error);
 }
 
 
 /*
  * Load handler, and call init routine if provided.
  */
 static int
 kvp_op_load(int key, void (*init)(void),
 	    int (*exec)(struct hv_kvp_msg *, void *))
 {
 	int error = 0;
 
 	if (key < 0 || key >= HV_KVP_OP_COUNT) {
 		KVP_LOG(LOG_ERR, "Operation key out of supported range\n");
 		error = -1;
 		goto kvp_op_load_done;
 	}
 
 	kvp_op_hdlrs[key].kvp_op_key = key;
 	kvp_op_hdlrs[key].kvp_op_init = init;
 	kvp_op_hdlrs[key].kvp_op_exec = exec;
 
 	if (kvp_op_hdlrs[key].kvp_op_init != NULL)
 		kvp_op_hdlrs[key].kvp_op_init();
 
 kvp_op_load_done:
 	return(error);
 }
 
 
 /*
  * Initialize the operation hanlders.
  */
 static int
 kvp_ops_init(void)
 {
 	int i;
 
 	/* Set the initial values. */
 	for (i = 0; i < HV_KVP_OP_COUNT; i++) {
 		kvp_op_hdlrs[i].kvp_op_key = -1;
 		kvp_op_hdlrs[i].kvp_op_init = NULL;
 		kvp_op_hdlrs[i].kvp_op_exec = NULL;
 	}
 
 	return(kvp_op_load(HV_KVP_OP_GET, NULL, kvp_op_setgetdel) |
 	    kvp_op_load(HV_KVP_OP_SET, NULL, kvp_op_setgetdel) |
 	    kvp_op_load(HV_KVP_OP_DELETE, NULL, kvp_op_setgetdel) |
 	    kvp_op_load(HV_KVP_OP_ENUMERATE, kvp_get_os_info,
 	        kvp_op_enumerate) |
 	    kvp_op_load(HV_KVP_OP_GET_IP_INFO, NULL, kvp_op_getipinfo) |
 	    kvp_op_load(HV_KVP_OP_SET_IP_INFO, NULL, kvp_op_setipinfo));
 }
 
 
 int
 main(int argc, char *argv[])
 {
 	struct hv_kvp_msg *hv_kvp_dev_buf;
 	struct hv_kvp_msg *hv_msg;
 	struct pollfd hv_kvp_poll_fd[1];
 	int op, pool;
 	int hv_kvp_dev_fd, error, len, r;
 	int ch;
 
 	while ((ch = getopt(argc, argv, "dn")) != -1) {
 		switch (ch) {
 		case 'n':
 			/* Run as regular process for debugging purpose. */
 			is_daemon = 0;
 			break;
 		case 'd':
 			/* Generate debugging output */
 			is_debugging = 1;
 			break;
 		default:
 			break;
 		}
 	}
 
 	openlog("HV_KVP", 0, LOG_USER);
 
 	/* Become daemon first. */
 	if (is_daemon == 1)
 		daemon(1, 0);
 	else
 		KVP_LOG(LOG_DEBUG, "Run as regular process.\n");
 
 	KVP_LOG(LOG_INFO, "HV_KVP starting; pid is: %d\n", getpid());
 
 	/* Communication buffer hv_kvp_dev_buf */
 	hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf));
 	/* Buffer for daemon internal use */
 	hv_msg = malloc(sizeof(*hv_msg));
 
 	/* Memory allocation failed */
 	if (hv_kvp_dev_buf == NULL || hv_msg == NULL) {
 		KVP_LOG(LOG_ERR, "Failed to allocate memory for hv buffer\n");
 		exit(EXIT_FAILURE);
 	}
 
 	/* Initialize op handlers */
 	if (kvp_ops_init() != 0) {
 		KVP_LOG(LOG_ERR, "Failed to initizlize operation handlers\n");
 		exit(EXIT_FAILURE);
 	}
 
 	if (kvp_file_init()) {
 		KVP_LOG(LOG_ERR, "Failed to initialize the pools\n");
 		exit(EXIT_FAILURE);
 	}
 
 	/* Open the Character Device */
 	hv_kvp_dev_fd = open("/dev/hv_kvp_dev", O_RDWR);
 
 	if (hv_kvp_dev_fd < 0) {
 		KVP_LOG(LOG_ERR, "open /dev/hv_kvp_dev failed; error: %d %s\n",
 		    errno, strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 
 	/* Initialize the struct for polling the char device */
 	hv_kvp_poll_fd[0].fd = hv_kvp_dev_fd;
 	hv_kvp_poll_fd[0].events = (POLLIN | POLLRDNORM);
 
 	/* Register the daemon to the KVP driver */
 	memset(hv_kvp_dev_buf, 0, sizeof(*hv_kvp_dev_buf));
 	hv_kvp_dev_buf->hdr.kvp_hdr.operation = HV_KVP_OP_REGISTER;
 	len = write(hv_kvp_dev_fd, hv_kvp_dev_buf, sizeof(*hv_kvp_dev_buf));
 
 
 	for (;;) {
-		r = poll (hv_kvp_poll_fd, 1, 100);
+		r = poll (hv_kvp_poll_fd, 1, INFTIM);
 
 		KVP_LOG(LOG_DEBUG, "poll returned r = %d, revent = 0x%x\n",
 		    r, hv_kvp_poll_fd[0].revents);
 
 		if (r == 0 || (r < 0 && errno == EAGAIN) ||
 		    (r < 0 && errno == EINTR)) {
 			/* Nothing to read */
 			continue;
 		}
 
 		if (r < 0) {
 			/*
 			 * For pread return failure other than EAGAIN,
 			 * we want to exit.
 			 */
 			KVP_LOG(LOG_ERR, "Poll failed.\n");
 			perror("poll");
 			exit(EIO);
 		}
 
 		/* Read from character device */
 		len = pread(hv_kvp_dev_fd, hv_kvp_dev_buf,
 		    sizeof(*hv_kvp_dev_buf), 0);
 
 		if (len < 0) {
 			KVP_LOG(LOG_ERR, "Read failed.\n");
 			perror("pread");
 			exit(EIO);
 		}
 
 		if (len != sizeof(struct hv_kvp_msg)) {
 			KVP_LOG(LOG_ERR, "read len is: %d\n", len);
 			continue;
 		}
 
 		/* Copy hv_kvp_dev_buf to hv_msg */
 		memcpy(hv_msg, hv_kvp_dev_buf, sizeof(*hv_msg));
 
 		/*
 		 * We will use the KVP header information to pass back
 		 * the error from this daemon. So, first save the op
 		 * and pool info to local variables.
 		 */
 
 		op = hv_msg->hdr.kvp_hdr.operation;
 		pool = hv_msg->hdr.kvp_hdr.pool;
 
 		if (op < 0 || op >= HV_KVP_OP_COUNT ||
 		    kvp_op_hdlrs[op].kvp_op_exec == NULL) {
 			KVP_LOG(LOG_WARNING,
 			    "Unsupported operation OP = %d\n", op);
 			hv_msg->hdr.error = HV_ERROR_NOT_SUPPORTED;
 		} else {
 			/*
 			 * Call the operateion handler's execution routine.
 			 */
 			error = kvp_op_hdlrs[op].kvp_op_exec(hv_msg,
 			    (void *)&kvp_op_hdlrs[op]);
 			if (error != 0 && hv_msg->hdr.error != HV_KVP_S_CONT)
 				KVP_LOG(LOG_WARNING,
 				    "Operation failed OP = %d, error = 0x%x\n",
 				    op, error);
 		}
 
 		/*
 		 * Send the value back to the kernel. The response is
 		 * already in the receive buffer.
 		 */
 hv_kvp_done:
 		len = pwrite(hv_kvp_dev_fd, hv_msg, sizeof(*hv_kvp_dev_buf), 0);
 
 		if (len != sizeof(struct hv_kvp_msg)) {
 			KVP_LOG(LOG_ERR, "write len is: %d\n", len);
 			goto hv_kvp_done;
 		}
 	}
 }
Index: projects/clang380-import/gnu/lib/libgcc/Makefile
===================================================================
--- projects/clang380-import/gnu/lib/libgcc/Makefile	(revision 293686)
+++ projects/clang380-import/gnu/lib/libgcc/Makefile	(revision 293687)
@@ -1,369 +1,416 @@
 # $FreeBSD$
 
 GCCDIR=	${.CURDIR}/../../../contrib/gcc
 GCCLIB=	${.CURDIR}/../../../contrib/gcclibs
+COMPILERRTDIR=	${.CURDIR}/../../../contrib/compiler-rt
+UNWINDINCDIR=	${.CURDIR}/../../../contrib/llvm/projects/libunwind/include
+UNWINDSRCDIR=	${.CURDIR}/../../../contrib/llvm/projects/libunwind/src
 
 SHLIB_NAME=	libgcc_s.so.1
 SHLIBDIR?=	/lib
 
 .include <src.opts.mk>
 #
 # libgcc is linked in last and thus cannot depend on ssp symbols coming
 # from earlier libraries. Disable stack protection for this library.
 #
 MK_SSP=	no
 
 .include "${.CURDIR}/../../usr.bin/cc/Makefile.tgt"
 
 .if ${TARGET_CPUARCH} == "arm"
 CFLAGS+=	-DTARGET_ARM_EABI
 .endif
 
 .PATH: ${GCCDIR}/config/${GCC_CPU} ${GCCDIR}/config ${GCCDIR}
 
 CFLAGS+=	-DIN_GCC -DIN_LIBGCC2 -D__GCC_FLOAT_NOT_NEEDED \
 		-DHAVE_GTHR_DEFAULT \
 		-I${GCCLIB}/include \
 		-I${GCCDIR}/config -I${GCCDIR} -I. \
 		-I${.CURDIR}/../../usr.bin/cc/cc_tools
 
 LDFLAGS+=	-nodefaultlibs
 LIBADD+=	c
 
 OBJS=		# added to below in various ways depending on TARGET_CPUARCH
 
 #---------------------------------------------------------------------------
 #
 # Library members defined in libgcc2.c.
 # When upgrading GCC, obtain the following list from mklibgcc.in
 #
 LIB2FUNCS= _muldi3 _negdi2 _lshrdi3 _ashldi3 _ashrdi3 \
 	_cmpdi2 _ucmpdi2 \
 	_enable_execute_stack _trampoline __main _absvsi2 _absvdi2 _addvsi3 \
 	_addvdi3 _subvsi3 _subvdi3 _mulvsi3 _mulvdi3 _negvsi2 _negvdi2 _ctors \
 	_ffssi2 _ffsdi2 _clz _clzsi2 _clzdi2 _ctzsi2 _ctzdi2 _popcount_tab \
 	_popcountsi2 _popcountdi2 _paritysi2 _paritydi2 _powisf2 _powidf2 \
 	_powixf2 _powitf2 _mulsc3 _muldc3 _mulxc3 _multc3 _divsc3 _divdc3 \
 	_divxc3 _divtc3 _bswapsi2 _bswapdi2
 .if ${COMPILER_TYPE} != "clang" || ${TARGET_CPUARCH} != "arm"
 LIB2FUNCS+= _clear_cache
 .endif
 
 # The floating-point conversion routines that involve a single-word integer.
 .for mode in sf df xf
 LIB2FUNCS+= _fixuns${mode}si
 .endfor
 
 # Likewise double-word routines.
 .if ${TARGET_CPUARCH} != "aarch64" && ${TARGET_CPUARCH} != "arm"
 # These are implemented in an ARM specific file but will not be filtered out
 .for mode in sf df xf tf
 LIB2FUNCS+= _fix${mode}di _fixuns${mode}di
 LIB2FUNCS+= _floatdi${mode} _floatundi${mode}
 .endfor
 .endif
 
 LIB2ADD = $(LIB2FUNCS_EXTRA)
 LIB2ADD_ST = $(LIB2FUNCS_STATIC_EXTRA)
 
 # Additional sources to handle exceptions; overridden by targets as needed.
+.if ${MK_LLVM_LIBUNWIND} != "no"
+
+.PATH: ${COMPILERRTDIR}/lib/builtins
+.PATH: ${UNWINDSRCDIR}
+LIB2ADDEH = gcc_personality_v0.c \
+	int_util.c \
+	Unwind-EHABI.cpp \
+	Unwind-sjlj.c \
+	UnwindLevel1-gcc-ext.c \
+	UnwindLevel1.c \
+	UnwindRegistersRestore.S \
+	UnwindRegistersSave.S \
+	libunwind.cpp
+
+CFLAGS+=	-I${UNWINDINCDIR} -I${.CURDIR}
+.if empty(CXXFLAGS:M-std=*)
+CXXFLAGS+=	-std=c++11
+.endif
+CXXFLAGS+=	-fno-rtti
+
+.else # MK_LLVM_LIBUNWIND
+
+.if ${TARGET_CPUARCH} == "arm"
+LIB2ADDEH =	unwind-arm.c libunwind.S pr-support.c unwind-c.c
+.else
 LIB2ADDEH = unwind-dw2.c unwind-dw2-fde-glibc.c unwind-sjlj.c gthr-gnat.c \
 	unwind-c.c
+.endif
+
+.endif # MK_LLVM_LIBUNWIND
+
 LIB2ADDEHSTATIC = $(LIB2ADDEH)
 LIB2ADDEHSHARED = $(LIB2ADDEH)
 
 # List of extra C and assembler files to add to static and shared libgcc2.
 # Assembler files should have names ending in `.asm'.
 LIB2FUNCS_EXTRA =
 
 # List of extra C and assembler files to add to static libgcc2.
 # Assembler files should have names ending in `.asm'.
 LIB2FUNCS_STATIC_EXTRA =
 
 # Defined in libgcc2.c, included only in the static library.
 # KAN: Excluded _sf_to_tf and _df_to_tf as TPBIT_FUNCS are not
 # built on any of our platforms.
 LIB2FUNCS_ST = _eprintf __gcc_bcmp
 
 FPBIT_FUNCS = _pack_sf _unpack_sf _addsub_sf _mul_sf _div_sf \
     _fpcmp_parts_sf _compare_sf _eq_sf _ne_sf _gt_sf _ge_sf \
     _lt_sf _le_sf _unord_sf _si_to_sf _sf_to_si _negate_sf _make_sf \
     _sf_to_df _thenan_sf _sf_to_usi _usi_to_sf
 
 DPBIT_FUNCS = _pack_df _unpack_df _addsub_df _mul_df _div_df \
     _fpcmp_parts_df _compare_df _eq_df _ne_df _gt_df _ge_df \
     _lt_df _le_df _unord_df _si_to_df _df_to_si _negate_df _make_df \
     _df_to_sf _thenan_df _df_to_usi _usi_to_df
 
 TPBIT_FUNCS = _pack_tf _unpack_tf _addsub_tf _mul_tf _div_tf \
     _fpcmp_parts_tf _compare_tf _eq_tf _ne_tf _gt_tf _ge_tf \
     _lt_tf _le_tf _unord_tf _si_to_tf _tf_to_si _negate_tf _make_tf \
     _tf_to_df _tf_to_sf _thenan_tf _tf_to_usi _usi_to_tf
 
 # These might cause a divide overflow trap and so are compiled with
 # unwinder info.
 LIB2_DIVMOD_FUNCS = _divdi3 _moddi3 _udivdi3 _umoddi3 _udiv_w_sdiv _udivmoddi4
 
 #-----------------------------------------------------------------------
 #
 #	Platform specific bits.
 #	When upgrading GCC, get the following definitions from config/<cpu>/t-*
 #
 .if ${TARGET_CPUARCH} == "arm"
 #	from config/arm/t-strongarm-elf
 CFLAGS+=	-Dinhibit_libc -fno-inline
 CFLAGS.clang+=	-fheinous-gnu-extensions
 
 LIB1ASMSRC =	lib1funcs.asm
 LIB1ASMFUNCS =  _dvmd_tls _bb_init_func
-LIB2ADDEH =	unwind-arm.c libunwind.S pr-support.c unwind-c.c
 # Some compilers generate __aeabi_ functions libgcc_s is missing
 LIBADD+=	compiler_rt
 .endif
 
 .if ${TARGET_CPUARCH} == mips
 LIB2FUNCS_EXTRA = floatunsidf.c floatunsisf.c
 # ABIs other than o32 need this
 .if ${TARGET_ARCH} != "mips" && ${TARGET_ARCH} != "mipsel"
 LIB2FUNCS_EXTRA+= floatdidf.c fixunsdfsi.c
 LIB2FUNCS_EXTRA+= floatdisf.c floatundidf.c
 LIB2FUNCS_EXTRA+= fixsfdi.c floatundisf.c
 LIB2FUNCS_EXTRA+= fixdfdi.c fixunssfsi.c
 .endif
 .endif
 
 .if ${TARGET_ARCH} == "powerpc"
 #	from config/rs6000/t-ppccomm
 LIB2FUNCS_EXTRA = tramp.asm
 LIB2FUNCS_STATIC_EXTRA = eabi.asm
 .endif
 
 .if ${TARGET_ARCH} == "powerpc64"
 #	from config/rs6000/t-ppccomm
 LIB2FUNCS_EXTRA = tramp.asm
 .endif
 
 .if ${TARGET_CPUARCH} == "sparc64"
 #	from config/sparc/t-elf
 LIB1ASMSRC =   lb1spc.asm
 LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
 .endif
 
 #-----------------------------------------------------------------------
 
 # Remove any objects from LIB2FUNCS and LIB2_DIVMOD_FUNCS that are
 # defined as optimized assembly code in LIB1ASMFUNCS.
 .if defined(LIB1ASMFUNCS)
 .for sym in ${LIB1ASMFUNCS}
 LIB2FUNCS:=	${LIB2FUNCS:S/${sym}//g}
 LIB2_DIVMOD_FUNCS:= ${LIB2_DIVMOD_FUNCS:S/${sym}//g}
 .endfor
 .endif
 
-COMMONHDRS=	tm.h tconfig.h options.h unwind.h gthr-default.h
+COMMONHDRS=	tm.h tconfig.h options.h gthr-default.h
+.if ${MK_LLVM_LIBUNWIND} == no
+COMMONHDRS+=	unwind.h
+.endif
 
 #-----------------------------------------------------------------------
 #
 # Helpful shortcuts for compiler invocations.
 #
 HIDE =  -fvisibility=hidden -DHIDE_EXPORTS
 CC_T =	${CC} -c ${CFLAGS} ${HIDE} -fPIC
 CC_P =	${CC} -c ${CFLAGS} ${HIDE} -p -fPIC
 CC_S =	${CC} -c ${CFLAGS} ${PICFLAG} -DSHARED
+CXX_T =	${CXX} -c ${CXXFLAGS} ${HIDE} -fPIC
+CXX_P =	${CXX} -c ${CXXFLAGS} ${HIDE} -p -fPIC
+CXX_S =	${CXX} -c ${CXXFLAGS} ${PICFLAG} -DSHARED
 
 #-----------------------------------------------------------------------
 #
 # Functions from libgcc2.c
 #
 STD_CFLAGS =
 DIV_CFLAGS =	-fexceptions -fnon-call-exceptions
 
 STD_FUNCS =	${LIB2FUNCS}
 DIV_FUNCS =	${LIB2_DIVMOD_FUNCS}
 
 STD_CFILE =	libgcc2.c
 DIV_CFILE =	libgcc2.c
 
 OBJ_GRPS =	STD DIV
 
 #-----------------------------------------------------------------------
 #
 # Floating point emulation functions
 #
 .if ${TARGET_CPUARCH} == "armNOT_YET" || \
     ${TARGET_CPUARCH} == "powerpc" || ${TARGET_CPUARCH} == "sparc64"
 
 FPBIT_CFLAGS =	-DFINE_GRAINED_LIBRARIES -DFLOAT
 DPBIT_CFLAGS =	-DFINE_GRAINED_LIBRARIES
 
 FPBIT_CFILE =	config/fp-bit.c
 DPBIT_CFILE =	config/fp-bit.c
 
 OBJ_GRPS +=	FPBIT DPBIT
 .endif
 
 #-----------------------------------------------------------------------
 #
 # Generic build rules for object groups defined above
 #
 .for T in ${OBJ_GRPS}
 ${T}_OBJS_T =	${${T}_FUNCS:S/$/.o/}
 ${T}_OBJS_P =	${${T}_FUNCS:S/$/.po/}
 ${T}_OBJS_S =	${${T}_FUNCS:S/$/.So/}
 OBJS +=		${${T}_FUNCS:S/$/.o/}
 
 ${${T}_OBJS_T}: ${${T}_CFILE} ${COMMONHDRS}
 	${CC_T} ${${T}_CFLAGS} -DL${.PREFIX} -o ${.TARGET} ${.ALLSRC:M*.c}
 ${${T}_OBJS_P}: ${${T}_CFILE} ${COMMONHDRS}
 	${CC_P} ${${T}_CFLAGS} -DL${.PREFIX} -o ${.TARGET} ${.ALLSRC:M*.c}
 ${${T}_OBJS_S}: ${${T}_CFILE} ${COMMONHDRS}
 	${CC_S} ${${T}_CFLAGS} -DL${.PREFIX} -o ${.TARGET} ${.ALLSRC:M*.c}
 .endfor
 
 #-----------------------------------------------------------------------
 #
 # Extra objects coming from separate files
 #
 .if !empty(LIB2ADD)
 OBJS  +=	${LIB2ADD:R:S/$/.o/}
 SOBJS +=	${LIB2ADD:R:S/$/.So/}
 POBJS +=	${LIB2ADD:R:S/$/.po/}
 .endif
 
 #-----------------------------------------------------------------------
 #
 # Objects that should be in static library only.
 #
 SYMS_ST =	${LIB2FUNCS_ST}	${LIB2ADD_ST}
 STAT_OBJS_T = 	${SYMS_ST:S/$/.o/}
 STAT_OBJS_P = 	${SYMS_ST:S/$/.po/}
 STATICOBJS  =	${SYMS_ST:S/$/.o/}
 
 ${STAT_OBJS_T}:	${STD_CFILE} ${COMMONHDRS}
 	${CC_T} -DL${.PREFIX} -o ${.TARGET} ${.ALLSRC:M*.c}
 ${STAT_OBJS_P}:	${STD_CFILE} ${COMMONHDRS}
 	${CC_P} -DL${.PREFIX} -o ${.TARGET} ${.ALLSRC:M*.c}
 
 #-----------------------------------------------------------------------
 #
 # Assembler files.
 #
 .if defined(LIB1ASMSRC)
 ASM_T =		${LIB1ASMFUNCS:S/$/.o/}
 ASM_P =		${LIB1ASMFUNCS:S/$/.po/}
 ASM_S =		${LIB1ASMFUNCS:S/$/.So/}
 ASM_V =		${LIB1ASMFUNCS:S/$/.vis/}
 OBJS +=		${LIB1ASMFUNCS:S/$/.o/}
 
 ${ASM_T}: ${LIB1ASMSRC} ${.PREFIX}.vis
 	${CC} -x assembler-with-cpp -c ${CFLAGS} -DL${.PREFIX} \
 	    -o ${.TARGET} -include ${.PREFIX}.vis ${.ALLSRC:N*.h:N*.vis}
 ${ASM_P}: ${LIB1ASMSRC} ${.PREFIX}.vis
 	${CC} -x assembler-with-cpp -p -c ${CFLAGS} -DL${.PREFIX} \
 	    -o ${.TARGET} -include ${.PREFIX}.vis ${.ALLSRC:N*.h:N*.vis}
 ${ASM_S}: ${LIB1ASMSRC}
 	${CC} -x assembler-with-cpp -c ${PICFLAG} ${CFLAGS} -DL${.PREFIX} \
 	    -o ${.TARGET} ${.ALLSRC:N*.h}
 ${ASM_V}: ${LIB1ASMSRC}
 	${CC} -x assembler-with-cpp -c ${CFLAGS} -DL${.PREFIX} \
 	    -o ${.PREFIX}.vo ${.ALLSRC:N*.h}
 	( ${NM} -pg ${.PREFIX}.vo | \
 		awk 'NF == 3 && $$2 !~ /^[UN]$$/ { print "\t.hidden ", $$3 }'\
 	) > ${.TARGET}
 
 CLEANFILES += ${ASM_V} ${ASM_V:R:S/$/.vo/}
 .endif
 
 #-----------------------------------------------------------------------
 #
 # Exception handling / unwinding support.
 #
 EH_OBJS_T = ${LIB2ADDEHSTATIC:R:S/$/.o/}
 EH_OBJS_P = ${LIB2ADDEHSTATIC:R:S/$/.po/}
 EH_OBJS_S = ${LIB2ADDEHSHARED:R:S/$/.So/}
 EH_CFLAGS = -fexceptions -D__GLIBC__=3 -DElfW=__ElfN
 SOBJS    += ${EH_OBJS_S}
 
-.for _src in ${LIB2ADDEHSTATIC}
+.for _src in ${LIB2ADDEHSTATIC:M*.c}
 ${_src:R:S/$/.o/}: ${_src} ${COMMONHDRS}
 	${CC_T} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC}
 ${_src:R:S/$/.po/}: ${_src} ${COMMONHDRS}
 	${CC_P} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC}
 .endfor
-.for _src in ${LIB2ADDEHSHARED}
+.for _src in ${LIB2ADDEHSTATIC:M*.cpp}
+${_src:R:S/$/.o/}: ${_src} ${COMMONHDRS}
+	${CXX_T} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC}
+${_src:R:S/$/.po/}: ${_src} ${COMMONHDRS}
+	${CXX_P} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC}
+.endfor
+.for _src in ${LIB2ADDEHSHARED:M*.c}
 ${_src:R:S/$/.So/}: ${_src} ${COMMONHDRS}
 	${CC_S} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC}
+.endfor
+.for _src in ${LIB2ADDEHSHARED:M*.cpp}
+${_src:R:S/$/.So/}: ${_src} ${COMMONHDRS}
+	${CXX_S} ${EH_CFLAGS} -o ${.TARGET} ${.IMPSRC}
 .endfor
 
 
 #-----------------------------------------------------------------------
 #
 # Generated headers
 #
 ${COMMONHDRS}: ${.CURDIR}/../../usr.bin/cc/cc_tools/Makefile
 	(cd ${.CURDIR}; ${MAKE} -f ${.ALLSRC} MFILE=${.ALLSRC} GCCDIR=${GCCDIR} ${.TARGET})
 
 CLEANFILES += ${COMMONHDRS}
 CLEANFILES += cs-*.h option*
 
 #-----------------------------------------------------------------------
 #
 # Build symbol version map
 #
 SHLIB_MKMAP      = ${GCCDIR}/mkmap-symver.awk
 SHLIB_MKMAP_OPTS =
 SHLIB_MAPFILES   = ${GCCDIR}/libgcc-std.ver
 .if ${TARGET_CPUARCH} == "arm"
 SHLIB_MAPFILES  += ${GCCDIR}/config/arm/libgcc-bpabi.ver
 .endif
 VERSION_MAP      = libgcc.map
 
 libgcc.map: ${SHLIB_MKMAP} ${SHLIB_MAPFILES} ${SOBJS} ${OBJS:R:S/$/.So/}
 	(  ${NM} -pg ${SOBJS};echo %% ; \
 	  cat ${SHLIB_MAPFILES} \
 	    | sed -e '/^[   ]*#/d' \
 	          -e 's/^%\(if\|else\|elif\|endif\|define\)/#\1/' \
 	    | ${CC} ${CFLAGS} -E -xassembler-with-cpp -; \
 	) | awk -f ${SHLIB_MKMAP} ${SHLIB_MKMAP_OPTS} > ${.TARGET}
 
 CLEANFILES +=	libgcc.map
 
 #-----------------------------------------------------------------------
 #
 # Build additional static libgcc_eh[_p].a libraries.
 #
 libgcc_eh.a:	${EH_OBJS_T}
 	@${ECHO} building static gcc_eh library
 	@rm -f ${.TARGET}
 	@${AR} ${ARFLAGS} ${.TARGET} `lorder ${EH_OBJS_T} | tsort -q`
 	${RANLIB} ${RANLIBFLAGS} ${.TARGET}
 
 _LIBS+= libgcc_eh.a
 
 .if ${MK_PROFILE} != "no"
 libgcc_eh_p.a:	${EH_OBJS_P}
 	@${ECHO} building profiled gcc_eh library
 	@rm -f ${.TARGET}
 	@${AR} ${ARFLAGS} ${.TARGET} `lorder ${EH_OBJS_P} | tsort -q`
 	${RANLIB} ${RANLIBFLAGS} ${.TARGET}
 
 _LIBS+= libgcc_eh_p.a
 .endif
 
 _libinstall: _lib-eh-install
 
 _lib-eh-install:
 .if ${MK_INSTALLLIB} != "no"
 	${INSTALL} -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \
 		${_INSTALLFLAGS} libgcc_eh.a ${DESTDIR}${LIBDIR}
 .endif
 .if ${MK_PROFILE} != "no"
 	${INSTALL} -C -o ${LIBOWN} -g ${LIBGRP} -m ${LIBMODE} \
 		${_INSTALLFLAGS} libgcc_eh_p.a ${DESTDIR}${LIBDIR}
 .endif
 
 CLEANFILES+=	libgcc_eh.a libgcc_eh_p.a ${EH_OBJS_T} ${EH_OBJS_P}
 
 .include <bsd.lib.mk>
 
 .SUFFIXES: .vis .vo
Index: projects/clang380-import/gnu/lib
===================================================================
--- projects/clang380-import/gnu/lib	(revision 293686)
+++ projects/clang380-import/gnu/lib	(revision 293687)

Property changes on: projects/clang380-import/gnu/lib
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/gnu/lib:r292913-293685
Index: projects/clang380-import/include/limits.h
===================================================================
--- projects/clang380-import/include/limits.h	(revision 293686)
+++ projects/clang380-import/include/limits.h	(revision 293687)
@@ -1,144 +1,147 @@
 /*-
  * Copyright (c) 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)limits.h	8.2 (Berkeley) 1/4/94
  * $FreeBSD$
  */
 
 #ifndef _LIMITS_H_
 #define	_LIMITS_H_
 
 #include <sys/cdefs.h>
 
 #if __POSIX_VISIBLE
 #define	_POSIX_ARG_MAX		4096
 #define	_POSIX_LINK_MAX		8
 #define	_POSIX_MAX_CANON	255
 #define	_POSIX_MAX_INPUT	255
 #define	_POSIX_NAME_MAX		14
 #define	_POSIX_PIPE_BUF		512
 #define	_POSIX_SSIZE_MAX	32767
 #define	_POSIX_STREAM_MAX	8
 
 #if __POSIX_VISIBLE >= 200112
 #define	_POSIX_CHILD_MAX	25
 #define	_POSIX_NGROUPS_MAX	8
 #define	_POSIX_OPEN_MAX		20
 #define	_POSIX_PATH_MAX		256
 #define	_POSIX_TZNAME_MAX	6
 #else
 #define	_POSIX_CHILD_MAX	6
 #define	_POSIX_NGROUPS_MAX	0
 #define	_POSIX_OPEN_MAX		16
 #define	_POSIX_PATH_MAX		255
 #define	_POSIX_TZNAME_MAX	3
 #endif
 
+#if __POSIX_VISIBLE >= 200112
 #define	BC_BASE_MAX		   99	/* max ibase/obase values in bc(1) */
 #define	BC_DIM_MAX		 2048	/* max array elements in bc(1) */
 #define	BC_SCALE_MAX		   99	/* max scale value in bc(1) */
 #define	BC_STRING_MAX		 1000	/* max const string length in bc(1) */
+#define	CHARCLASS_NAME_MAX	   14	/* max character class name size */
 #define	COLL_WEIGHTS_MAX	   10	/* max weights for order keyword */
 #define	EXPR_NEST_MAX		   32	/* max expressions nested in expr(1) */
 #define	LINE_MAX		 2048	/* max bytes in an input line */
 #define	RE_DUP_MAX		  255	/* max RE's in interval notation */
 
 #define	_POSIX2_BC_BASE_MAX	99
 #define	_POSIX2_BC_DIM_MAX	2048
 #define	_POSIX2_BC_SCALE_MAX	99
 #define	_POSIX2_BC_STRING_MAX	1000
+#define	_POSIX2_CHARCLASS_NAME_MAX 14
+#define	_POSIX2_COLL_WEIGHTS_MAX 2
 #define	_POSIX2_EQUIV_CLASS_MAX	2
 #define	_POSIX2_EXPR_NEST_MAX	32
 #define	_POSIX2_LINE_MAX	2048
 #define	_POSIX2_RE_DUP_MAX	255
 #endif
+#endif
 
 #if __POSIX_VISIBLE >= 199309
 #define	_POSIX_AIO_LISTIO_MAX	2
 #define	_POSIX_AIO_MAX		1
 #define	_POSIX_DELAYTIMER_MAX	32
 #define	_POSIX_MQ_OPEN_MAX	8
 #define	_POSIX_MQ_PRIO_MAX	32
 #define	_POSIX_RTSIG_MAX	8
 #define	_POSIX_SEM_NSEMS_MAX	256
 #define	_POSIX_SEM_VALUE_MAX	32767
 #define	_POSIX_SIGQUEUE_MAX	32
 #define	_POSIX_TIMER_MAX	32
 
 #define	_POSIX_CLOCKRES_MIN	20000000
 #endif
 
 #if __POSIX_VISIBLE >= 199506
 #define	_POSIX_THREAD_DESTRUCTOR_ITERATIONS 4
 #define	_POSIX_THREAD_KEYS_MAX	128
 #define	_POSIX_THREAD_THREADS_MAX 64
 #endif
 
 #if __POSIX_VISIBLE >= 200112
 #define	_POSIX_HOST_NAME_MAX	255
 #define	_POSIX_LOGIN_NAME_MAX	9
 #define	_POSIX_SS_REPL_MAX	4
 #define	_POSIX_SYMLINK_MAX	255
 #define	_POSIX_SYMLOOP_MAX	8
 #define	_POSIX_TRACE_EVENT_NAME_MAX 30
 #define	_POSIX_TRACE_NAME_MAX	8
 #define	_POSIX_TRACE_SYS_MAX	8
 #define	_POSIX_TRACE_USER_EVENT_MAX 32
 #define	_POSIX_TTY_NAME_MAX	9
-#define	_POSIX2_CHARCLASS_NAME_MAX 14
-#define	_POSIX2_COLL_WEIGHTS_MAX 2
 
 #define	_POSIX_RE_DUP_MAX	_POSIX2_RE_DUP_MAX
 #endif
 
 #if __XSI_VISIBLE || __POSIX_VISIBLE >= 200809
 #define	NL_ARGMAX		99	/* max # of position args for printf */
 #define	NL_MSGMAX		32767
 #define	NL_SETMAX		255
 #define	NL_TEXTMAX		2048
 #endif
 
 #if __XSI_VISIBLE
 #define	_XOPEN_IOV_MAX		16
 #define	_XOPEN_NAME_MAX		255
 #define	_XOPEN_PATH_MAX		1024
 #define	PASS_MAX		128	/* _PASSWORD_LEN from <pwd.h> */
 
 #define	NL_LANGMAX		31	/* max LANG name length */
 #define	NL_NMAX			1
 #endif
 
 #define	MB_LEN_MAX		6	/* 31-bit UTF-8 */
 
 #include <sys/limits.h>
 
 #if __POSIX_VISIBLE
 #include <sys/syslimits.h>
 #endif
 
 #endif /* !_LIMITS_H_ */
Index: projects/clang380-import/include
===================================================================
--- projects/clang380-import/include	(revision 293686)
+++ projects/clang380-import/include	(revision 293687)

Property changes on: projects/clang380-import/include
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/include:r293280-293685
Index: projects/clang380-import/lib/csu/arm/Makefile
===================================================================
--- projects/clang380-import/lib/csu/arm/Makefile	(revision 293686)
+++ projects/clang380-import/lib/csu/arm/Makefile	(revision 293687)
@@ -1,46 +1,47 @@
 # $FreeBSD$
 
 .PATH: ${.CURDIR}/../common
 
 SRCS=		crt1.c crti.S crtn.S
 OBJS=		${SRCS:N*.h:R:S/$/.o/g}
 OBJS+=		Scrt1.o gcrt1.o
 CFLAGS+=	-I${.CURDIR}/../common \
 		-I${.CURDIR}/../../libc/include
+STATIC_CFLAGS+=	-mlong-calls
 
 FILES=		${OBJS}
 FILESMODE=	${LIBMODE}
 FILESOWN=	${LIBOWN}
 FILESGRP=	${LIBGRP}
 FILESDIR=	${LIBDIR}
 # These FILES qualify as libraries for the purpose of LIBRARIES_ONLY.
 .undef LIBRARIES_ONLY
 
 CLEANFILES=	${OBJS}
 CLEANFILES+=	crt1.s gcrt1.s Scrt1.s
 
 # See the comment in lib/csu/common/crtbrand.c for the reason crt1.c is not
 # directly compiled to .o files.
 
 crt1.s: crt1.c
-	${CC} ${CFLAGS} -S -o ${.TARGET} ${.CURDIR}/crt1.c
+	${CC} ${CFLAGS} ${STATIC_CFLAGS} -S -o ${.TARGET} ${.CURDIR}/crt1.c
 	sed ${SED_FIX_NOTE} ${.TARGET}
 
 crt1.o: crt1.s
 	${CC} ${ACFLAGS} -c -o ${.TARGET} crt1.s
 
 gcrt1.s: crt1.c
-	${CC} ${CFLAGS} -DGCRT -S -o ${.TARGET} ${.CURDIR}/crt1.c
+	${CC} ${CFLAGS} ${STATIC_CFLAGS} -DGCRT -S -o ${.TARGET} ${.CURDIR}/crt1.c
 	sed ${SED_FIX_NOTE} ${.TARGET}
 
 gcrt1.o: gcrt1.s
 	${CC} ${ACFLAGS} -c -o ${.TARGET} gcrt1.s
 
 Scrt1.s: crt1.c
 	${CC} ${CFLAGS} -fPIC -DPIC -S -o ${.TARGET} ${.CURDIR}/crt1.c
 	sed ${SED_FIX_NOTE} ${.TARGET}
 
 Scrt1.o: Scrt1.s
 	${CC} ${ACFLAGS} -c -o ${.TARGET} Scrt1.s
 
 .include <bsd.lib.mk>
Index: projects/clang380-import/lib/libc/sys/sendfile.2
===================================================================
--- projects/clang380-import/lib/libc/sys/sendfile.2	(revision 293686)
+++ projects/clang380-import/lib/libc/sys/sendfile.2	(revision 293687)
@@ -1,318 +1,377 @@
 .\" Copyright (c) 2003, David G. Lawrence
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice unmodified, this list of conditions, and the following
 .\"    disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 7, 2010
+.Dd January 7, 2016
 .Dt SENDFILE 2
 .Os
 .Sh NAME
 .Nm sendfile
 .Nd send a file to a socket
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In sys/types.h
 .In sys/socket.h
 .In sys/uio.h
 .Ft int
 .Fo sendfile
 .Fa "int fd" "int s" "off_t offset" "size_t nbytes"
 .Fa "struct sf_hdtr *hdtr" "off_t *sbytes" "int flags"
 .Fc
 .Sh DESCRIPTION
 The
 .Fn sendfile
 system call
-sends a regular file specified by descriptor
+sends a regular file or shared memory object specified by descriptor
 .Fa fd
 out a stream socket specified by descriptor
 .Fa s .
 .Pp
 The
 .Fa offset
 argument specifies where to begin in the file.
 Should
 .Fa offset
 fall beyond the end of file, the system will return
 success and report 0 bytes sent as described below.
 The
 .Fa nbytes
 argument specifies how many bytes of the file should be sent, with 0 having the special
 meaning of send until the end of file has been reached.
 .Pp
 An optional header and/or trailer can be sent before and after the file data by specifying
 a pointer to a
 .Vt "struct sf_hdtr" ,
 which has the following structure:
 .Pp
 .Bd -literal -offset indent -compact
 struct sf_hdtr {
 	struct iovec *headers;	/* pointer to header iovecs */
 	int hdr_cnt;		/* number of header iovecs */
 	struct iovec *trailers;	/* pointer to trailer iovecs */
 	int trl_cnt;		/* number of trailer iovecs */
 };
 .Ed
 .Pp
 The
 .Fa headers
 and
 .Fa trailers
 pointers, if
 .Pf non- Dv NULL ,
 point to arrays of
 .Vt "struct iovec"
 structures.
 See the
 .Fn writev
 system call for information on the iovec structure.
 The number of iovecs in these
 arrays is specified by
 .Fa hdr_cnt
 and
 .Fa trl_cnt .
 .Pp
 If
 .Pf non- Dv NULL ,
 the system will write the total number of bytes sent on the socket to the
 variable pointed to by
 .Fa sbytes .
 .Pp
-The
+The least significant 16 bits of
 .Fa flags
 argument is a bitmap of these values:
-.Bl -item -offset indent
-.It
-.Dv SF_NODISKIO .
-This flag causes any
-.Fn sendfile
-call which would block on disk I/O to instead
-return
-.Er EBUSY .
-Busy servers may benefit by transferring requests that would
-block to a separate I/O worker thread.
-.It
-.Dv SF_MNOWAIT .
-Do not wait for some kernel resource to become available,
-in particular,
-.Vt mbuf
-and
-.Vt sf_buf .
-The flag does not make the
-.Fn sendfile
-syscall truly non-blocking, since other resources are still allocated
-in a blocking fashion.
-.It
-.Dv SF_SYNC .
+.Bl -tag -offset indent
+.It Dv SF_NODISKIO
+This flag causes
 .Nm
+to return
+.Er EBUSY
+instead of blocking when a busy page is encountered.
+This rare situation can happen if some other process is now working
+with the same region of the file.
+It is advised to retry the operation after a short period.
+.Pp
+Note that in older
+.Fx
+versions the
+.Dv SF_NODISKIO
+had slightly different notion.
+The flag prevented
+.Nm
+to run I/O operations in case if an invalid (not cached) page is encountered,
+thus avoiding blocking on I/O.
+Starting with
+.Fx 11
+.Nm
+sending files off the
+.Xr ffs 7
+filesystem doesn't block on I/O
+(see 
+.Sx IMPLEMENTATION NOTES
+), so the condition no longer applies.
+However, it is safe if an application utilizes
+.Dv SF_NODISKIO
+and on
+.Er EBUSY
+performs the same action as it did in
+older
+.Fx
+versions, e.g.
+.Xr aio_read 2,
+.Xr read 2
+or
+.Nm
+in a different context.
+.It Dv SF_NOCACHE
+The data sent to socket will not be cached by the virtual memory system,
+and will be freed directly to the pool of free pages.
+.It Dv SF_SYNC
+.Nm
 sleeps until the network stack no longer references the VM pages
 of the file, making subsequent modifications to it safe.
 Please note that this is not a guarantee that the data has actually
 been sent.
 .El
 .Pp
+The most significant 16 bits of
+.Fa flags
+specify amount of pages that
+.Nm
+may read ahead when reading the file.
+A macro
+.Fn SF_FLAGS
+is provided to combine readahead amount and flags.
+Example shows specifing readahead of 16 pages and
+.Dv SF_NOCACHE
+flag:
+.Pp
+.Bd -literal -offset indent -compact
+	SF_FLAGS(16, SF_NOCACHE)
+.Ed
+.Pp
 When using a socket marked for non-blocking I/O,
 .Fn sendfile
 may send fewer bytes than requested.
 In this case, the number of bytes successfully
 written is returned in
 .Fa *sbytes
 (if specified),
 and the error
 .Er EAGAIN
 is returned.
 .Sh IMPLEMENTATION NOTES
 The
 .Fx
 implementation of
 .Fn sendfile
+doesn't block on disk I/O when it sends a file off the
+.Xr ffs 7
+filesystem.
+The syscall returns success before the actual I/O completes, and data
+is put into the socket later unattended.
+However, the order of data in the socket is preserved, so it is safe
+to do further writes to the socket.
+.Pp
+The
+.Fx
+implementation of
+.Fn sendfile
 is "zero-copy", meaning that it has been optimized so that copying of the file data is avoided.
 .Sh TUNING
 On some architectures, this system call internally uses a special
 .Fn sendfile
 buffer
 .Pq Vt "struct sf_buf"
 to handle sending file data to the client.
 If the sending socket is
 blocking, and there are not enough
 .Fn sendfile
 buffers available,
 .Fn sendfile
 will block and report a state of
 .Dq Li sfbufa .
 If the sending socket is non-blocking and there are not enough
 .Fn sendfile
 buffers available, the call will block and wait for the
 necessary buffers to become available before finishing the call.
 .Pp
 The number of
 .Vt sf_buf Ns 's
 allocated should be proportional to the number of nmbclusters used to
 send data to a client via
 .Fn sendfile .
 Tune accordingly to avoid blocking!
 Busy installations that make extensive use of
 .Fn sendfile
 may want to increase these values to be inline with their
 .Va kern.ipc.nmbclusters
 (see
 .Xr tuning 7
 for details).
 .Pp
 The number of
 .Fn sendfile
 buffers available is determined at boot time by either the
 .Va kern.ipc.nsfbufs
 .Xr loader.conf 5
 variable or the
 .Dv NSFBUFS
 kernel configuration tunable.
 The number of
 .Fn sendfile
 buffers scales with
 .Va kern.maxusers .
 The
 .Va kern.ipc.nsfbufsused
 and
 .Va kern.ipc.nsfbufspeak
 read-only
 .Xr sysctl 8
 variables show current and peak
 .Fn sendfile
 buffers usage respectively.
 These values may also be viewed through
 .Nm netstat Fl m .
 .Pp
 If a value of zero is reported for
 .Va kern.ipc.nsfbufs ,
 your architecture does not need to use
 .Fn sendfile
 buffers because their task can be efficiently performed
 by the generic virtual memory structures.
 .Sh RETURN VALUES
 .Rv -std sendfile
 .Sh ERRORS
 .Bl -tag -width Er
 .It Bq Er EAGAIN
 The socket is marked for non-blocking I/O and not all data was sent due to
 the socket buffer being filled.
 If specified, the number of bytes successfully sent will be returned in
 .Fa *sbytes .
 .It Bq Er EBADF
 The
 .Fa fd
 argument
 is not a valid file descriptor.
 .It Bq Er EBADF
 The
 .Fa s
 argument
 is not a valid socket descriptor.
 .It Bq Er EBUSY
-Completing the entire transfer would have required disk I/O, so
-it was aborted.
-Partial data may have been sent.
-(This error can only occur when
+A busy page was encountered and
 .Dv SF_NODISKIO
-is specified.)
+had been specified.
+Partial data may have been sent.
 .It Bq Er EFAULT
 An invalid address was specified for an argument.
 .It Bq Er EINTR
 A signal interrupted
 .Fn sendfile
 before it could be completed.
 If specified, the number
 of bytes successfully sent will be returned in
 .Fa *sbytes .
 .It Bq Er EINVAL
 The
 .Fa fd
 argument
 is not a regular file.
 .It Bq Er EINVAL
 The
 .Fa s
 argument
 is not a SOCK_STREAM type socket.
 .It Bq Er EINVAL
 The
 .Fa offset
 argument
 is negative.
 .It Bq Er EIO
 An error occurred while reading from
 .Fa fd .
 .It Bq Er ENOBUFS
 The system was unable to allocate an internal buffer.
 .It Bq Er ENOTCONN
 The
 .Fa s
 argument
 points to an unconnected socket.
 .It Bq Er ENOTSOCK
 The
 .Fa s
 argument
 is not a socket.
 .It Bq Er EOPNOTSUPP
 The file system for descriptor
 .Fa fd
 does not support
 .Fn sendfile .
 .It Bq Er EPIPE
 The socket peer has closed the connection.
 .El
 .Sh SEE ALSO
 .Xr netstat 1 ,
 .Xr open 2 ,
 .Xr send 2 ,
 .Xr socket 2 ,
 .Xr writev 2 ,
 .Xr tuning 7
 .Rs
 .%A K. Elmeleegy
 .%A A. Chanda
 .%A A. L. Cox
 .%A W. Zwaenepoel
 .%T A Portable Kernel Abstraction for Low-Overhead Ephemeral Mapping Management
 .%J The Proceedings of the 2005 USENIX Annual Technical Conference
 .%P pp 223-236
 .%D 2005
 .Re
 .Sh HISTORY
 The
 .Fn sendfile
 system call
 first appeared in
 .Fx 3.0 .
 This manual page first appeared in
 .Fx 3.1 .
+In
+.Fx 10
+support for sending shared memory descriptors had been introduced.
+In
+.Fx 11
+a non-blocking implementation had been introduced.
 .Sh AUTHORS
-The
+The initial implementation of
 .Fn sendfile
 system call
 and this manual page were written by
 .An David G. Lawrence Aq Mt dg@dglawrence.com .
+The
+.Fx 11
+implementation was written by
+.An Gleb Smirnoff Aq Mt glebius@FreeBSD.org .
Index: projects/clang380-import/lib/libc
===================================================================
--- projects/clang380-import/lib/libc	(revision 293686)
+++ projects/clang380-import/lib/libc	(revision 293687)

Property changes on: projects/clang380-import/lib/libc
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/lib/libc:r293280-293685
Index: projects/clang380-import/lib/libstand/uuid_to_string.c
===================================================================
--- projects/clang380-import/lib/libstand/uuid_to_string.c	(revision 293686)
+++ projects/clang380-import/lib/libstand/uuid_to_string.c	(revision 293687)
@@ -1,111 +1,111 @@
 /*-
  * Copyright (c) 2015 M. Warner Losh
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 
 /*
  * Note: some comments taken from lib/libc/uuid/uuid_to_string.c
  * Copyright (c) 2002,2005 Marcel Moolenaar
  * Copyright (c) 2002 Hiten Mahesh Pandya
  */
 
 #include <stand.h>
 #include <uuid.h>
 
 /*
  * Dump len characters into *buf from val as hex and update *buf
  */
 static void
 tohex(char **buf, int len, uint32_t val)
 {
 	static const char *hexstr = "0123456789abcdef";
 	char *walker = *buf;
 	int i;
 
-	for (i = len - 1; i >= 0; i++) {
+	for (i = len - 1; i >= 0; i--) {
 		walker[i] = hexstr[val & 0xf];
 		val >>= 4;
 	}
 	*buf = walker + len;
 }
 
 /*
  * uuid_to_string() - Convert a binary UUID into a string representation.
  * See also:
  *	http://www.opengroup.org/onlinepubs/009629399/uuid_to_string.htm
  *
  * NOTE: The references given above do not have a status code for when
  *	 the string could not be allocated. The status code has been
  *	 taken from the Hewlett-Packard implementation.
  *
  * NOTE: we don't support u == NULL for a nil UUID, sorry.
  *
  * NOTE: The sequence field is in big-endian, while the time fields are in
  *	 native byte order.
  *
  *	 hhhhhhhh-hhhh-hhhh-bbbb-bbbbbbbbbbbb
  *	 01234567-89ab-cdef-0123-456789abcdef
  */
 void
 uuid_to_string(const uuid_t *u, char **s, uint32_t *status)
 {
 	uuid_t nil;
 	char *w;
 
 	if (status != NULL)
 		*status = uuid_s_ok;
 	if (s == NULL)	/* Regular version does this odd-ball behavior too */
 		return;
 	w = *s = malloc(37);
 	if (*s == NULL) {
 		if (status != NULL)
 			*status = uuid_s_no_memory;
 		return;
 	}
 	if (u == NULL) {
 		u = &nil;
 		uuid_create_nil(&nil, NULL);
 	}
 	/* native */
 	tohex(&w, 8, u->time_low);
 	*w++ = '-';
 	tohex(&w, 4, u->time_mid);
 	*w++ = '-';
 	tohex(&w, 4, u->time_hi_and_version);
 	*w++ = '-';
 	/* Big endian, so do a byte at a time */
 	tohex(&w, 2, u->clock_seq_hi_and_reserved);
 	tohex(&w, 2, u->clock_seq_low);
 	*w++ = '-';
 	tohex(&w, 2, u->node[0]);
 	tohex(&w, 2, u->node[1]);
 	tohex(&w, 2, u->node[2]);
 	tohex(&w, 2, u->node[3]);
 	tohex(&w, 2, u->node[4]);
 	tohex(&w, 2, u->node[5]);
-	*w++ - '\0';
+	*w++ = '\0';
 }
Index: projects/clang380-import/release/release.sh
===================================================================
--- projects/clang380-import/release/release.sh	(revision 293686)
+++ projects/clang380-import/release/release.sh	(revision 293687)
@@ -1,406 +1,407 @@
 #!/bin/sh
 #-
 # Copyright (c) 2013-2015 The FreeBSD Foundation
 # Copyright (c) 2013 Glen Barber
 # Copyright (c) 2011 Nathan Whitehorn
 # All rights reserved.
 #
 # Portions of this software were developed by Glen Barber
 # under sponsorship from the FreeBSD Foundation.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # release.sh: check out source trees, and build release components with
 #  totally clean, fresh trees.
 # Based on release/generate-release.sh written by Nathan Whitehorn
 #
 # $FreeBSD$
 #
 
 export PATH="/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin"
 
 VERSION=2
 
 # Prototypes that can be redefined per-chroot or per-target.
 load_chroot_env() { }
 load_target_env() { }
 buildenv_setup() { }
 
 usage() {
 	echo "Usage: $0 [-c release.conf]"
 	exit 1
 }
 
 # env_setup(): Set up the default build environment variables, such as the
 # CHROOTDIR, VCSCMD, SVNROOT, etc.  This is called before the release.conf
 # file is sourced, if '-c <release.conf>' is specified.
 env_setup() {
 	# The directory within which the release will be built.
 	CHROOTDIR="/scratch"
 	RELENGDIR="$(dirname $(realpath ${0}))"
 
 	# The default version control system command to obtain the sources.
 	for _dir in /usr/bin /usr/local/bin; do
 		for _svn in svn svnlite; do
 			[ -x "${_dir}/${_svn}" ] && VCSCMD="${_dir}/${_svn}"
 			[ ! -z "${VCSCMD}" ] && break 2
 		done
 	done
 	VCSCMD="${VCSCMD} checkout"
 
 	# The default svn checkout server, and svn branches for src/, doc/,
 	# and ports/.
 	SVNROOT="svn://svn.FreeBSD.org/"
 	SRCBRANCH="base/head@rHEAD"
 	DOCBRANCH="doc/head@rHEAD"
 	PORTBRANCH="ports/head@rHEAD"
 
 	# Set for embedded device builds.
 	EMBEDDEDBUILD=
 
 	# Sometimes one needs to checkout src with --force svn option.
 	# If custom kernel configs copied to src tree before checkout, e.g.
 	SRC_FORCE_CHECKOUT=
 
 	# The default make.conf and src.conf to use.  Set to /dev/null
 	# by default to avoid polluting the chroot(8) environment with
 	# non-default settings.
 	MAKE_CONF="/dev/null"
 	SRC_CONF="/dev/null"
 
 	# The number of make(1) jobs, defaults to the number of CPUs available
 	# for buildworld, and half of number of CPUs available for buildkernel.
 	WORLD_FLAGS="-j$(sysctl -n hw.ncpu)"
 	KERNEL_FLAGS="-j$(( $(( $(sysctl -n hw.ncpu) + 1 )) / 2))"
 
 	MAKE_FLAGS="-s"
 
 	# The name of the kernel to build, defaults to GENERIC.
 	KERNEL="GENERIC"
 
 	# Set to non-empty value to disable checkout of doc/ and/or ports/.
 	# Disabling ports/ checkout also forces NODOC to be set.
 	NODOC=
 	NOPORTS=
 
 	# Set to non-empty value to build dvd1.iso as part of the release.
 	WITH_DVD=
 	WITH_COMPRESSED_IMAGES=
 
 	# Set to non-empty value to build virtual machine images as part of
 	# the release.
 	WITH_VMIMAGES=
 	WITH_COMPRESSED_VMIMAGES=
 	XZ_THREADS=0
 
 	# Set to non-empty value to build virtual machine images for various
 	# cloud providers as part of the release.
 	WITH_CLOUDWARE=
 
 	return 0
 } # env_setup()
 
 # env_check(): Perform sanity tests on the build environment, such as ensuring
 # files/directories exist, as well as adding backwards-compatibility hacks if
 # necessary.  This is called unconditionally, and overrides the defaults set
 # in env_setup() if '-c <release.conf>' is specified.
 env_check() {
 	chroot_build_release_cmd="chroot_build_release"
 	# Fix for backwards-compatibility with release.conf that does not have
 	# the trailing '/'.
 	case ${SVNROOT} in
 		*svn*)
 			SVNROOT="${SVNROOT}/"
 			;;
 		*)
 			;;
 	esac
 
 	# Prefix the branches with the SVNROOT for the full checkout URL.
 	SRCBRANCH="${SVNROOT}${SRCBRANCH}"
 	DOCBRANCH="${SVNROOT}${DOCBRANCH}"
 	PORTBRANCH="${SVNROOT}${PORTBRANCH}"
 
 	if [ -n "${EMBEDDEDBUILD}" ]; then
 		WITH_DVD=
 		WITH_COMPRESSED_IMAGES=
 		NODOC=yes
 		case ${EMBEDDED_TARGET}:${EMBEDDED_TARGET_ARCH} in
 			arm:armv6)
 				chroot_build_release_cmd="chroot_arm_armv6_build_release"
 				;;
 			*)
 		esac
 	fi
 
 	# If PORTS is set and NODOC is unset, force NODOC=yes because the ports
 	# tree is required to build the documentation set.
 	if [ -n "${NOPORTS}" ] && [ -z "${NODOC}" ]; then
 		echo "*** NOTICE: Setting NODOC=1 since ports tree is required"
 		echo "            and NOPORTS is set."
 		NODOC=yes
 	fi
 
 	# If NOPORTS and/or NODOC are unset, they must not pass to make as
 	# variables.  The release makefile verifies definedness of the
 	# NOPORTS/NODOC variables instead of their values.
 	DOCPORTS=
 	if [ -n "${NOPORTS}" ]; then
 		DOCPORTS="NOPORTS=yes "
 	fi
 	if [ -n "${NODOC}" ]; then
 		DOCPORTS="${DOCPORTS}NODOC=yes"
 	fi
 
 	# The aggregated build-time flags based upon variables defined within
 	# this file, unless overridden by release.conf.  In most cases, these
 	# will not need to be changed.
 	CONF_FILES="__MAKE_CONF=${MAKE_CONF} SRCCONF=${SRC_CONF}"
 	if [ -n "${TARGET}" ] && [ -n "${TARGET_ARCH}" ]; then
 		ARCH_FLAGS="TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH}"
 	else
 		ARCH_FLAGS=
 	fi
 	# Force src checkout if configured
 	FORCE_SRC_KEY=
 	if [ -n "${SRC_FORCE_CHECKOUT}" ]; then
 		FORCE_SRC_KEY="--force"
 	fi
 
 	if [ -z "${CHROOTDIR}" ]; then
 		echo "Please set CHROOTDIR."
 		exit 1
 	fi
 
 	if [ $(id -u) -ne 0 ]; then
 		echo "Needs to be run as root."
 		exit 1
 	fi
 
 	CHROOT_MAKEENV="${CHROOT_MAKEENV} \
 		MAKEOBJDIRPREFIX=${CHROOTDIR}/tmp/obj"
 	CHROOT_WMAKEFLAGS="${MAKE_FLAGS} ${WORLD_FLAGS} ${CONF_FILES}"
 	CHROOT_IMAKEFLAGS="${CONF_FILES}"
 	CHROOT_DMAKEFLAGS="${CONF_FILES}"
 	RELEASE_WMAKEFLAGS="${MAKE_FLAGS} ${WORLD_FLAGS} ${ARCH_FLAGS} \
 		${CONF_FILES}"
 	RELEASE_KMAKEFLAGS="${MAKE_FLAGS} ${KERNEL_FLAGS} \
 		KERNCONF=\"${KERNEL}\" ${ARCH_FLAGS} ${CONF_FILES}"
 	RELEASE_RMAKEFLAGS="${ARCH_FLAGS} \
 		KERNCONF=\"${KERNEL}\" ${CONF_FILES} ${DOCPORTS} \
 		WITH_DVD=${WITH_DVD} WITH_VMIMAGES=${WITH_VMIMAGES} \
 		WITH_CLOUDWARE=${WITH_CLOUDWARE} XZ_THREADS=${XZ_THREADS}"
 
 	return 0
 } # env_check()
 
 # chroot_setup(): Prepare the build chroot environment for the release build.
 chroot_setup() {
 	load_chroot_env
 	mkdir -p ${CHROOTDIR}/usr
 
 	if [ -z "${SRC_UPDATE_SKIP}" ]; then
 		${VCSCMD} ${FORCE_SRC_KEY} ${SRCBRANCH} ${CHROOTDIR}/usr/src
 	fi
 	if [ -z "${NODOC}" ] && [ -z "${DOC_UPDATE_SKIP}" ]; then
 		${VCSCMD} ${DOCBRANCH} ${CHROOTDIR}/usr/doc
 	fi
 	if [ -z "${NOPORTS}" ] && [ -z "${PORTS_UPDATE_SKIP}" ]; then
 		${VCSCMD} ${PORTBRANCH} ${CHROOTDIR}/usr/ports
 	fi
 
 	if [ -z "${CHROOTBUILD_SKIP}" ]; then
 		cd ${CHROOTDIR}/usr/src
 		env ${CHROOT_MAKEENV} make ${CHROOT_WMAKEFLAGS} buildworld
 		env ${CHROOT_MAKEENV} make ${CHROOT_IMAKEFLAGS} installworld \
 			DESTDIR=${CHROOTDIR}
 		env ${CHROOT_MAKEENV} make ${CHROOT_DMAKEFLAGS} distribution \
 			DESTDIR=${CHROOTDIR}
 	fi
 
 	return 0
 } # chroot_setup()
 
 # extra_chroot_setup(): Prepare anything additional within the build
 # necessary for the release build.
 extra_chroot_setup() {
 	mkdir -p ${CHROOTDIR}/dev
 	mount -t devfs devfs ${CHROOTDIR}/dev
 	[ -e /etc/resolv.conf ] && cp /etc/resolv.conf \
 		${CHROOTDIR}/etc/resolv.conf
 	# Run ldconfig(8) in the chroot directory so /var/run/ld-elf*.so.hints
 	# is created.  This is needed by ports-mgmt/pkg.
 	eval chroot ${CHROOTDIR} /etc/rc.d/ldconfig forcerestart
 
 	# If MAKE_CONF and/or SRC_CONF are set and not character devices
 	# (/dev/null), copy them to the chroot.
 	if [ -e ${MAKE_CONF} ] && [ ! -c ${MAKE_CONF} ]; then
 		mkdir -p ${CHROOTDIR}/$(dirname ${MAKE_CONF})
 		cp ${MAKE_CONF} ${CHROOTDIR}/${MAKE_CONF}
 	fi
 	if [ -e ${SRC_CONF} ] && [ ! -c ${SRC_CONF} ]; then
 		mkdir -p ${CHROOTDIR}/$(dirname ${SRC_CONF})
 		cp ${SRC_CONF} ${CHROOTDIR}/${SRC_CONF}
 	fi
 
 	if [ -d ${CHROOTDIR}/usr/ports ]; then
 		# Trick the ports 'run-autotools-fixup' target to do the right
 		# thing.
 		_OSVERSION=$(chroot ${CHROOTDIR} /usr/bin/uname -U)
 		REVISION=$(chroot ${CHROOTDIR} make -C /usr/src/release -V REVISION)
 		BRANCH=$(chroot ${CHROOTDIR} make -C /usr/src/release -V BRANCH)
 		UNAME_r=${REVISION}-${BRANCH}
 		if [ -d ${CHROOTDIR}/usr/doc ] && [ -z "${NODOC}" ]; then
 			PBUILD_FLAGS="OSVERSION=${_OSVERSION} BATCH=yes"
 			PBUILD_FLAGS="${PBUILD_FLAGS} UNAME_r=${UNAME_r}"
 			PBUILD_FLAGS="${PBUILD_FLAGS} OSREL=${REVISION}"
 			chroot ${CHROOTDIR} make -C /usr/ports/textproc/docproj \
 				${PBUILD_FLAGS} OPTIONS_UNSET="FOP IGOR" \
+				FORCE_PKG_REGISTER=1 \
 				install clean distclean
 		fi
 	fi
 
 	if [ ! -z "${EMBEDDEDPORTS}" ]; then
 		for _PORT in ${EMBEDDEDPORTS}; do
 			eval chroot ${CHROOTDIR} make -C /usr/ports/${_PORT} \
 				BATCH=1 FORCE_PKG_REGISTER=1 install clean distclean
 		done
 	fi
 
 	buildenv_setup
 
 	return 0
 } # extra_chroot_setup()
 
 # chroot_build_target(): Build the userland and kernel for the build target.
 chroot_build_target() {
 	load_target_env
 	if [ ! -z "${EMBEDDEDBUILD}" ]; then
 		RELEASE_WMAKEFLAGS="${RELEASE_WMAKEFLAGS} \
 			TARGET=${EMBEDDED_TARGET} \
 			TARGET_ARCH=${EMBEDDED_TARGET_ARCH}"
 		RELEASE_KMAKEFLAGS="${RELEASE_KMAKEFLAGS} \
 			TARGET=${EMBEDDED_TARGET} \
 			TARGET_ARCH=${EMBEDDED_TARGET_ARCH}"
 	fi
 	eval chroot ${CHROOTDIR} make -C /usr/src ${RELEASE_WMAKEFLAGS} buildworld
 	eval chroot ${CHROOTDIR} make -C /usr/src ${RELEASE_KMAKEFLAGS} buildkernel
 
 	return 0
 } # chroot_build_target
 
 # chroot_build_release(): Invoke the 'make release' target.
 chroot_build_release() {
 	load_target_env
 	if [ ! -z "${WITH_VMIMAGES}" ]; then
 		if [ -z "${VMFORMATS}" ]; then
 			VMFORMATS="$(eval chroot ${CHROOTDIR} \
 				make -C /usr/src/release -V VMFORMATS)"
 		fi
 		if [ -z "${VMSIZE}" ]; then
 			VMSIZE="$(eval chroot ${CHROOTDIR} \
 				make -C /usr/src/release -V VMSIZE)"
 		fi
 		RELEASE_RMAKEFLAGS="${RELEASE_RMAKEFLAGS} \
 			VMFORMATS=\"${VMFORMATS}\" VMSIZE=${VMSIZE}"
 	fi
 	eval chroot ${CHROOTDIR} make -C /usr/src/release \
 		${RELEASE_RMAKEFLAGS} release
 	eval chroot ${CHROOTDIR} make -C /usr/src/release \
 		${RELEASE_RMAKEFLAGS} install DESTDIR=/R \
 		WITH_COMPRESSED_IMAGES=${WITH_COMPRESSED_IMAGES} \
 		WITH_COMPRESSED_VMIMAGES=${WITH_COMPRESSED_VMIMAGES}
 
 	return 0
 } # chroot_build_release()
 
 # chroot_arm_armv6_build_release(): Create arm/armv6 SD card image.
 chroot_arm_armv6_build_release() {
 	load_target_env
 	eval chroot ${CHROOTDIR} make -C /usr/src/release obj
 	if [ -e "${RELENGDIR}/tools/${EMBEDDED_TARGET}.subr" ]; then
 		. "${RELENGDIR}/tools/${EMBEDDED_TARGET}.subr"
 	fi
 	[ ! -z "${RELEASECONF}" ] && . "${RELEASECONF}"
 	WORLDDIR="$(eval chroot ${CHROOTDIR} make -C /usr/src/release -V WORLDDIR)"
 	OBJDIR="$(eval chroot ${CHROOTDIR} make -C /usr/src/release -V .OBJDIR)"
 	DESTDIR="${OBJDIR}/${KERNEL}"
 	IMGBASE="${CHROOTDIR}/${OBJDIR}/${KERNEL}.img"
 	OSRELEASE="$(eval chroot ${CHROOTDIR} make -C /usr/src/release \
 		TARGET=${EMBEDDED_TARGET} TARGET_ARCH=${EMBEDDED_TARGET_ARCH} \
 		-V OSRELEASE)"
 	chroot ${CHROOTDIR} mkdir -p ${DESTDIR}
 	chroot ${CHROOTDIR} truncate -s ${IMAGE_SIZE} ${IMGBASE##${CHROOTDIR}}
 	export mddev=$(chroot ${CHROOTDIR} \
 		mdconfig -f ${IMGBASE##${CHROOTDIR}} ${MD_ARGS})
 	arm_create_disk
 	arm_install_base
 	arm_install_uboot
 	mdconfig -d -u ${mddev}
 	chroot ${CHROOTDIR} rmdir ${DESTDIR}
 	mv ${IMGBASE} ${CHROOTDIR}/${OBJDIR}/${OSRELEASE}-${KERNEL}.img
 	chroot ${CHROOTDIR} mkdir -p /R
 	chroot ${CHROOTDIR} cp -p ${OBJDIR}/${OSRELEASE}-${KERNEL}.img \
 		/R/${OSRELEASE}-${KERNEL}.img
 	chroot ${CHROOTDIR} xz -T ${XZ_THREADS} /R/${OSRELEASE}-${KERNEL}.img
 	cd ${CHROOTDIR}/R && sha512 ${OSRELEASE}* \
 		> CHECKSUM.SHA512
 	cd ${CHROOTDIR}/R && sha256 ${OSRELEASE}* \
 		> CHECKSUM.SHA256
 
 	return 0
 } # chroot_arm_armv6_build_release()
 
 # main(): Start here.
 main() {
 	set -e # Everything must succeed
 	env_setup
 	while getopts c: opt; do
 		case ${opt} in
 			c)
 				RELEASECONF="${OPTARG}"
 				;;
 			\?)
 				usage
 				;;
 		esac
 	done
 	shift $(($OPTIND - 1))
 	if [ ! -z "${RELEASECONF}" ]; then
 		if [ -e "${RELEASECONF}" ]; then
 			. ${RELEASECONF}
 		else
 			echo "Nonexistent configuration file: ${RELEASECONF}"
 			echo "Using default build environment."
 		fi
 	fi
 	env_check
 	trap "umount ${CHROOTDIR}/dev" EXIT # Clean up devfs mount on exit
 	chroot_setup
 	extra_chroot_setup
 	chroot_build_target
 	${chroot_build_release_cmd}
 
 	return 0
 } # main()
 
 main "${@}"
Index: projects/clang380-import/share/man/man4/ismt.4
===================================================================
--- projects/clang380-import/share/man/man4/ismt.4	(revision 293686)
+++ projects/clang380-import/share/man/man4/ismt.4	(revision 293687)
@@ -1,59 +1,59 @@
 .\"
 .\" Copyright (c) 2014 Intel Corporation
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions, and the following disclaimer,
 .\"    without modification.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of Intel Corporation nor the names of its
 .\"    contributors may be used to endorse or promote products derived from
 .\"    this software without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 .\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 .\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
 .\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 .\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 .\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 .\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGES.
 .\"
 .\" ismt driver man page.
 .\"
 .\" Author: Jim Harris <jimharris@FreeBSD.org>
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 9, 2014
+.Dd January 11, 2016
 .Dt ISMT 4
 .Os
 .Sh NAME
 .Nm ismt
 .Nd Intel SMBus Message Transport (SMBus 2.0) driver
 .Sh SYNOPSIS
 .Cd device pci
 .Cd device smbus
 .Cd device smb
 .Cd device ismt
 .Sh DESCRIPTION
 This driver provides access to the SMBus 2.0 controller device contained
 in the Intel Atom S1200 and C2000 CPUs.
 .Sh SEE ALSO
 .Xr smb 4 ,
 .Xr smbus 4
 .Sh HISTORY
 The
 .Nm
 driver first appeared in
-.Fx 11.0 .
+.Fx 10.3 .
 .Sh AUTHORS
 .An Jim Harris Aq Mt jimharris@FreeBSD.org
Index: projects/clang380-import/share/man/man4
===================================================================
--- projects/clang380-import/share/man/man4	(revision 293686)
+++ projects/clang380-import/share/man/man4	(revision 293687)

Property changes on: projects/clang380-import/share/man/man4
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/share/man/man4:r293430-293685
Index: projects/clang380-import/share/man/man5/src.conf.5
===================================================================
--- projects/clang380-import/share/man/man5/src.conf.5	(revision 293686)
+++ projects/clang380-import/share/man/man5/src.conf.5	(revision 293687)
@@ -1,1506 +1,1527 @@
 .\" DO NOT EDIT-- this file is automatically generated.
 .\" from FreeBSD: head/tools/build/options/makeman 292283 2015-12-15 18:42:30Z bdrewery
 .\" $FreeBSD$
-.Dd December 15, 2015
+.Dd January 9, 2016
 .Dt SRC.CONF 5
 .Os
 .Sh NAME
 .Nm src.conf
 .Nd "source build options"
 .Sh DESCRIPTION
 The
 .Nm
 file contains settings that will apply to every build involving the
 .Fx
 source tree; see
 .Xr build 7 .
 .Pp
 The
 .Nm
 file uses the standard makefile syntax.
 However,
 .Nm
 should not specify any dependencies to
 .Xr make 1 .
 Instead,
 .Nm
 is to set
 .Xr make 1
 variables that control the aspects of how the system builds.
 .Pp
 The default location of
 .Nm
 is
 .Pa /etc/src.conf ,
 though an alternative location can be specified in the
 .Xr make 1
 variable
 .Va SRCCONF .
 Overriding the location of
 .Nm
 may be necessary if the system-wide settings are not suitable
 for a particular build.
 For instance, setting
 .Va SRCCONF
 to
 .Pa /dev/null
 effectively resets all build controls to their defaults.
 .Pp
 The only purpose of
 .Nm
 is to control the compilation of the
 .Fx
 source code, which is usually located in
 .Pa /usr/src .
 As a rule, the system administrator creates
 .Nm
 when the values of certain control variables need to be changed
 from their defaults.
 .Pp
 In addition, control variables can be specified
 for a particular build via the
 .Fl D
 option of
 .Xr make 1
 or in its environment; see
 .Xr environ 7 .
 .Pp
 The environment of
 .Xr make 1
 for the build can be controlled via the
 .Va SRC_ENV_CONF
 variable, which defaults to
 .Pa /etc/src-env.conf .
 Some examples that may only be set in this file are
 .Va WITH_DIRDEPS_BUILD ,
 and
 .Va WITH_META_MODE
 as they are environment-only variables.
 Note that
 .Va MAKEOBJDIRPREFIX
 may be set here only when using
 .Va WITH_DIRDEPS_BUILD .
 .Pp
 The values of variables are ignored regardless of their setting;
 even if they would be set to
 .Dq Li FALSE
 or
 .Dq Li NO .
 Just the existence of an option will cause
 it to be honoured by
 .Xr make 1 .
 .Pp
 The following list provides a name and short description for variables
 that can be used for source builds.
 .Bl -tag -width indent
 .It Va WITHOUT_ACCT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ACCT 223201 2011-06-17 20:47:44Z ed
 Set to not build process accounting tools such as
 .Xr accton 8
 and
 .Xr sa 8 .
 .It Va WITHOUT_ACPI
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ACPI 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr acpiconf 8 ,
 .Xr acpidump 8
 and related programs.
 .It Va WITHOUT_AMD
 .\" from FreeBSD: head/tools/build/options/WITHOUT_AMD 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr amd 8 ,
 and related programs.
 .It Va WITHOUT_APM
 .\" from FreeBSD: head/tools/build/options/WITHOUT_APM 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr apm 8 ,
 .Xr apmd 8
 and related programs.
 .It Va WITHOUT_ASSERT_DEBUG
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ASSERT_DEBUG 162215 2006-09-11 13:55:27Z ru
 Set to compile programs and libraries without the
 .Xr assert 3
 checks.
 .It Va WITHOUT_AT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_AT 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr at 1
 and related utilities.
 .It Va WITHOUT_ATM
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ATM 156932 2006-03-21 07:50:50Z ru
 Set to not build
 programs and libraries related to ATM networking.
 .It Va WITHOUT_AUDIT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_AUDIT 156932 2006-03-21 07:50:50Z ru
 Set to not build audit support into system programs.
 .It Va WITHOUT_AUTHPF
 .\" from FreeBSD: head/tools/build/options/WITHOUT_AUTHPF 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr authpf 8 .
 .It Va WITHOUT_AUTOFS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_AUTOFS 277728 2015-01-26 07:15:49Z ngie
 Set to not build
 .Xr autofs 4
 related programs, libraries, and kernel modules.
 .It Va WITH_AUTO_OBJ
 .\" from FreeBSD: head/tools/build/options/WITH_AUTO_OBJ 284708 2015-06-22 20:21:57Z sjg
 Enable automatic creation of objdirs.
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITHOUT_BHYVE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BHYVE 277727 2015-01-26 06:44:48Z ngie
 Set to not build or install
 .Xr bhyve 8 ,
 associated utilities, and examples.
 .Pp
 This option only affects amd64/amd64.
 .It Va WITHOUT_BINUTILS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BINUTILS 286332 2015-08-05 18:30:00Z emaste
 Set to not build or install binutils (as, ld, objcopy, and objdump ) as part
 of the normal system build.
 The resulting system cannot build programs from source.
 .Pp
 It is a default setting on
 arm64/aarch64.
 .It Va WITHOUT_BINUTILS_BOOTSTRAP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BINUTILS_BOOTSTRAP 264660 2014-04-18 17:03:58Z imp
 Set to not build binutils (as, c++-filt, gconv,
 ld, nm, objcopy, objdump, readelf, size and strip)
 as part of the bootstrap process.
 .Bf -symbolic
 The option does not work for build targets unless some alternative
 toolchain is provided.
 .Ef
 .Pp
 It is a default setting on
 arm64/aarch64.
 .It Va WITHOUT_BLUETOOTH
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BLUETOOTH 156932 2006-03-21 07:50:50Z ru
 Set to not build Bluetooth related kernel modules, programs and libraries.
 .It Va WITHOUT_BOOT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BOOT 156932 2006-03-21 07:50:50Z ru
 Set to not build the boot blocks and loader.
 .It Va WITHOUT_BOOTPARAMD
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BOOTPARAMD 278192 2015-02-04 10:19:32Z ngie
 Set to not build or install
 .Xr bootparamd 8 .
 .It Va WITHOUT_BOOTPD
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BOOTPD 278192 2015-02-04 10:19:32Z ngie
 Set to not build or install
 .Xr bootpd 8 .
 .It Va WITHOUT_BSDINSTALL
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BSDINSTALL 277677 2015-01-25 04:43:13Z ngie
 Set to not build
 .Xr bsdinstall 8 ,
 .Xr sade 8 ,
 and related programs.
 .It Va WITHOUT_BSD_CPIO
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BSD_CPIO 179813 2008-06-16 05:48:15Z dougb
 Set to not build the BSD licensed version of cpio based on
 .Xr libarchive 3 .
 .It Va WITH_BSD_GREP
 .\" from FreeBSD: head/tools/build/options/WITH_BSD_GREP 222273 2011-05-25 01:04:12Z obrien
 Install BSD-licensed grep as '[ef]grep' instead of GNU grep.
 .It Va WITHOUT_BSNMP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BSNMP 183306 2008-09-23 16:15:42Z sam
 Set to not build or install
 .Xr bsnmpd 1
 and related libraries and data files.
 .It Va WITHOUT_BZIP2
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BZIP2 174550 2007-12-12 16:43:17Z ru
 Set to not build contributed bzip2 software as a part of the base system.
 .Bf -symbolic
 The option has no effect yet.
 .Ef
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_BZIP2_SUPPORT
 .El
 .It Va WITHOUT_BZIP2_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_BZIP2_SUPPORT 166255 2007-01-26 10:19:08Z delphij
 Set to build some programs without optional bzip2 support.
 .It Va WITHOUT_CALENDAR
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CALENDAR 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr calendar 1 .
 .It Va WITHOUT_CAPSICUM
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CAPSICUM 229319 2012-01-02 21:57:58Z rwatson
 Set to not build Capsicum support into system programs.
 .It Va WITHOUT_CASPER
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CASPER 258838 2013-12-02 08:21:28Z pjd
 Set to not build Casper program and related libraries.
 .It Va WITH_CCACHE_BUILD
 .\" from FreeBSD: head/tools/build/options/WITH_CCACHE_BUILD 290526 2015-11-08 00:50:18Z bdrewery
 Set to use
 .Xr ccache 1
 for the build.
 No configuration is required except to install the
 .Sy devel/ccache
 package.
 Using with
 .Xr distcc 1
 should set
 .Sy CCACHE_PREFIX=/usr/local/bin/distcc .
 The default cache directory of
 .Pa $HOME/.ccache
 will be used, which can be overridden by setting
 .Sy CCACHE_DIR .
 The
 .Sy CCACHE_COMPILERCHECK
 option defaults to
 .Sy content
 when using the in-tree bootstrap compiler,
 and
 .Sy mtime
 when using an external compiler.
 The
 .Sy CCACHE_CPP2
 option is used for Clang but not GCC.
 ccache works best when combined with the
 .Sy WITH_FAST_DEPEND
 option.
 .Pp
 Sharing a cache between multiple work directories requires using a layout
 similar to
 .Pa /some/prefix/src
 .Pa /some/prefix/obj
 and an environment such as:
 .Bd -literal -offset indent
 CCACHE_BASEDIR='${SRCTOP:H}' MAKEOBJDIRPREFIX='${SRCTOP:H}/obj'
 .Ed
 .Pp
 See
 .Xr ccache 1
 for more configuration options.
 .It Va WITHOUT_CCD
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CCD 277678 2015-01-25 04:52:48Z ngie
 Set to not build
 .Xr geom_ccd 4
 and related utilities.
 .It Va WITHOUT_CDDL
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CDDL 163861 2006-11-01 09:02:11Z jb
 Set to not build code licensed under Sun's CDDL.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_CTF
 .It
 .Va WITHOUT_ZFS
 .El
 .It Va WITHOUT_CLANG
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG 264660 2014-04-18 17:03:58Z imp
 Set to not build the Clang C/C++ compiler during the regular phase of the build.
 .Pp
 It is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32 and sparc64/sparc64.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_CLANG_EXTRAS
 .It
 .Va WITHOUT_CLANG_FULL
 .El
 .It Va WITH_CLANG
 .\" from FreeBSD: head/tools/build/options/WITH_CLANG 264660 2014-04-18 17:03:58Z imp
 Set to build the Clang C/C++ compiler during the normal phase of the build.
 .Pp
 It is a default setting on
 amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386, pc98/i386, powerpc/powerpc and powerpc/powerpc64.
 .It Va WITHOUT_CLANG_BOOTSTRAP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG_BOOTSTRAP 273177 2014-10-16 18:28:11Z skreuzer
 Set to not build the Clang C/C++ compiler during the bootstrap phase of the build.
 You must enable either gcc or clang bootstrap to be able to build the system,
 unless an alternative compiler is provided via
 XCC.
 .Pp
 It is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64.
 .It Va WITH_CLANG_BOOTSTRAP
 .\" from FreeBSD: head/tools/build/options/WITH_CLANG_BOOTSTRAP 264660 2014-04-18 17:03:58Z imp
 Set to build the Clang C/C++ compiler during the bootstrap phase of the build.
 .Pp
 It is a default setting on
 amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386 and pc98/i386.
 .It Va WITH_CLANG_EXTRAS
 .\" from FreeBSD: head/tools/build/options/WITH_CLANG_EXTRAS 231057 2012-02-05 23:56:22Z dim
 Set to build additional clang and llvm tools, such as bugpoint.
 .It Va WITHOUT_CLANG_FULL
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG_FULL 246259 2013-02-02 22:28:29Z dim
 Set to avoid building the ARCMigrate, Rewriter and StaticAnalyzer components of
 the Clang C/C++ compiler.
 .Pp
 It is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32 and sparc64/sparc64.
 .It Va WITH_CLANG_FULL
 .\" from FreeBSD: head/tools/build/options/WITH_CLANG_FULL 246259 2013-02-02 22:28:29Z dim
 Set to build the ARCMigrate, Rewriter and StaticAnalyzer components of the
 Clang C/C++ compiler.
 .Pp
 It is a default setting on
 amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386, pc98/i386, powerpc/powerpc and powerpc/powerpc64.
 .It Va WITHOUT_CLANG_IS_CC
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG_IS_CC 242629 2012-11-05 21:53:23Z brooks
 Set to install the GCC compiler as
 .Pa /usr/bin/cc ,
 .Pa /usr/bin/c++
 and
 .Pa /usr/bin/cpp .
 .Pp
 It is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64.
 .It Va WITH_CLANG_IS_CC
 .\" from FreeBSD: head/tools/build/options/WITH_CLANG_IS_CC 235342 2012-05-12 16:12:36Z gjb
 Set to install the Clang C/C++ compiler as
 .Pa /usr/bin/cc ,
 .Pa /usr/bin/c++
 and
 .Pa /usr/bin/cpp .
 .Pp
 It is a default setting on
 amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386 and pc98/i386.
 .It Va WITHOUT_CPP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CPP 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr cpp 1 .
 .It Va WITHOUT_CROSS_COMPILER
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CROSS_COMPILER 275138 2014-11-26 20:43:09Z gjb
 Set to not build any cross compiler in the cross-tools stage of buildworld.
 If you are compiling a different version of
 .Fx
 than what is installed on the system, you will need to provide an alternate
 compiler with XCC to ensure success.
 If you are compiling with an identical version of
 .Fx
 to the host, this option may be safely used.
 This option may also be safe when the host version of
 .Fx
 is close to the sources being built, but all bets are off if there have
 been any changes to the toolchain between the versions.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_BINUTILS_BOOTSTRAP
 .It
 .Va WITHOUT_CLANG_BOOTSTRAP
 .It
 .Va WITHOUT_ELFTOOLCHAIN_BOOTSTRAP
 .It
 .Va WITHOUT_GCC_BOOTSTRAP
 .El
 .It Va WITHOUT_CRYPT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CRYPT 156932 2006-03-21 07:50:50Z ru
 Set to not build any crypto code.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_KERBEROS
 .It
 .Va WITHOUT_KERBEROS_SUPPORT
 .It
 .Va WITHOUT_OPENSSH
 .It
 .Va WITHOUT_OPENSSL
 .El
 .Pp
 When set, the following options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_GSSAPI
 (unless
 .Va WITH_GSSAPI
 is set explicitly)
 .El
 .It Va WITH_CTF
 .\" from FreeBSD: head/tools/build/options/WITH_CTF 228159 2011-11-30 18:22:44Z fjoe
 Set to compile with CTF (Compact C Type Format) data.
 CTF data encapsulates a reduced form of debugging information
 similar to DWARF and the venerable stabs and is required for DTrace.
 .It Va WITHOUT_CTM
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CTM 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr ctm 1
 and related utilities.
 .It Va WITHOUT_CUSE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CUSE 270171 2014-08-19 15:40:26Z hselasky
 Set to not build CUSE-related programs and libraries.
 .It Va WITHOUT_CXX
 .\" from FreeBSD: head/tools/build/options/WITHOUT_CXX 281053 2015-04-03 23:55:04Z bdrewery
 Set to not build
 .Xr c++ 1
 and related libraries.
 It will also prevent building of
 .Xr gperf 1
 and
 .Xr devd 8 .
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_CLANG
 .It
 .Va WITHOUT_CLANG_EXTRAS
 .It
 .Va WITHOUT_CLANG_FULL
 .It
 .Va WITHOUT_GNUCXX
 .It
 .Va WITHOUT_GROFF
 .El
 .It Va WITHOUT_DEBUG_FILES
 .\" from FreeBSD: head/tools/build/options/WITHOUT_DEBUG_FILES 290059 2015-10-27 20:49:56Z emaste
 Set to avoid building or installing standalone debug files for each
 executable binary and shared library.
 .It Va WITHOUT_DICT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_DICT 156932 2006-03-21 07:50:50Z ru
 Set to not build the Webster dictionary files.
 .It Va WITH_DIRDEPS_BUILD
 .\" from FreeBSD: head/tools/build/options/WITH_DIRDEPS_BUILD 290816 2015-11-14 03:24:48Z sjg
 Enable building in meta mode.
 This is an experimental build feature.
 For details see
 http://www.crufty.net/sjg/docs/freebsd-meta-mode.htm.
 .Pp
 The build is driven by dirdeps.mk using
 .Va DIRDEPS
 stored in
 Makefile.depend files found in each directory.
 .Pp
 The build can be started from anywhere, and behaves the same.
 The initial instance of
 .Xr make 1 
 recursively reads
 .Va DIRDEPS
 from Makefile.depend
 computing a graph of tree dependencies from the current origin.
 Setting
 .Va NO_DIRDEPS
 will skip checking dirdep dependencies and will only build in the current
 directory.
 .Pp
 As each target is made
 .Xr make 1
 produces a meta file which is used to capture (and compare)
 the command line,
 as well as any command output.
 If
 .Xr filemon 4
 is available the meta file will also capture a record of files
 used to produce the target by tracking syscalls.
 .Pp
 The build will hide commands ran unless
 .Va NO_SILENT
 is defined.
 .Pp
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITH_INSTALL_AS_USER
 .El
 .Pp
 When set, the following options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITH_AUTO_OBJ
 (unless
 .Va WITHOUT_AUTO_OBJ
 is set explicitly)
 .It Va WITH_META_MODE
 (unless
 .Va WITHOUT_META_MODE
 is set explicitly)
 .It Va WITH_STAGING
 (unless
 .Va WITHOUT_STAGING
 is set explicitly)
 .It Va WITH_STAGING_MAN
 (unless
 .Va WITHOUT_STAGING_MAN
 is set explicitly)
 .It Va WITH_STAGING_PROG
 (unless
 .Va WITHOUT_STAGING_PROG
 is set explicitly)
 .It Va WITH_SYSROOT
 (unless
 .Va WITHOUT_SYSROOT
 is set explicitly)
 .El
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITH_DIRDEPS_CACHE
 .\" from FreeBSD: head/tools/build/options/WITH_DIRDEPS_CACHE 290816 2015-11-14 03:24:48Z sjg
 Cache result of dirdeps.mk which can save significant time
 for subsequent builds.
 Depends on
 .Va WITH_DIRDEPS_BUILD .
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITHOUT_DMAGENT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_DMAGENT 262335 2014-02-22 13:05:23Z bapt
 Set to not build dma Mail Transport Agent
 .It Va WITHOUT_DOCCOMPRESS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_DOCCOMPRESS 266752 2014-05-27 15:52:27Z gjb
 Set to not to install compressed system documentation.
 Only the uncompressed version will be installed.
 .It Va WITH_DTRACE_TESTS
 .\" from FreeBSD: head/tools/build/options/WITH_DTRACE_TESTS 286174 2015-08-02 00:37:33Z markj
 Set to build and install the DTrace test suite in
 .Pa /usr/tests/cddl/usr.sbin/dtrace .
 This test suite is considered experimental on architectures other than
 amd64/amd64 and running it may cause system instability.
 .It Va WITHOUT_DYNAMICROOT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_DYNAMICROOT 156932 2006-03-21 07:50:50Z ru
 Set this if you do not want to link
 .Pa /bin
 and
 .Pa /sbin
 dynamically.
 .It Va WITHOUT_ED_CRYPTO
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ED_CRYPTO 235660 2012-05-19 20:05:27Z marcel
 Set to build
 .Xr ed 1
 without support for encryption/decryption.
 .It Va WITHOUT_EE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_EE 277663 2015-01-25 00:03:44Z ngie
 Set to not build and install
 .Xr edit 1 ,
 .Xr ee 1 ,
 and related programs.
 .It Va WITH_EISA
 .\" from FreeBSD: head/tools/build/options/WITH_EISA 264654 2014-04-18 16:53:06Z imp
 Set to build EISA kernel modules.
 .It Va WITHOUT_ELFCOPY_AS_OBJCOPY
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ELFCOPY_AS_OBJCOPY 286030 2015-07-29 18:45:38Z emaste
 Set to build and install
 .Xr objcopy 1
 from GNU Binutils, instead of the one from ELF Tool Chain.
 .Pp
 It is a default setting on
 amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64.
 .It Va WITH_ELFCOPY_AS_OBJCOPY
 .\" from FreeBSD: head/tools/build/options/WITH_ELFCOPY_AS_OBJCOPY 286030 2015-07-29 18:45:38Z emaste
 Set to build and install ELF Tool Chain's elfcopy as
 .Xr objcopy 1 ,
 instead of the one from GNU Binutils.
 .Pp
 It is a default setting on
 arm64/aarch64.
 .It Va WITHOUT_EXAMPLES
 .\" from FreeBSD: head/tools/build/options/WITHOUT_EXAMPLES 156938 2006-03-21 09:06:24Z ru
 Set to avoid installing examples to
 .Pa /usr/share/examples/ .
 .It Va WITH_FAST_DEPEND
 .\" from FreeBSD: head/tools/build/options/WITH_FAST_DEPEND 290433 2015-11-06 04:45:29Z bdrewery
 Set to generate
 .Sy .depend
 files in the build during compilation instead of the
 historial
 .Xr mkdep 1
 call during the "make depend" phase.
 .It Va WITHOUT_FDT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_FDT 221539 2011-05-06 19:10:27Z ru
 Set to not build Flattened Device Tree support as part of the base system.
 This includes the device tree compiler (dtc) and libfdt support library.
 .It Va WITHOUT_FILE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_FILE 278193 2015-02-04 10:24:40Z ngie
 Set to not build
 .Xr file 1
 and related programs.
 .It Va WITHOUT_FINGER
 .\" from FreeBSD: head/tools/build/options/WITHOUT_FINGER 278192 2015-02-04 10:19:32Z ngie
 Set to not build or install
 .Xr finger 1
 and
 .Xr fingerd 8 .
 .It Va WITHOUT_FLOPPY
 .\" from FreeBSD: head/tools/build/options/WITHOUT_FLOPPY 221540 2011-05-06 19:13:03Z ru
 Set to not build or install programs
 for operating floppy disk driver.
 .It Va WITHOUT_FMTREE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_FMTREE 261299 2014-01-30 21:37:43Z brooks
 Set to not build and install
 .Pa /usr/sbin/fmtree .
 .It Va WITHOUT_FORMAT_EXTENSIONS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_FORMAT_EXTENSIONS 250658 2013-05-15 13:04:10Z brooks
 Set to not enable
 .Fl fformat-extensions
 when compiling the kernel.
 Also disables all format checking.
 .It Va WITHOUT_FORTH
 .\" from FreeBSD: head/tools/build/options/WITHOUT_FORTH 156932 2006-03-21 07:50:50Z ru
 Set to build bootloaders without Forth support.
 .It Va WITHOUT_FP_LIBC
 .\" from FreeBSD: head/tools/build/options/WITHOUT_FP_LIBC 156932 2006-03-21 07:50:50Z ru
 Set to build
 .Nm libc
 without floating-point support.
 .It Va WITHOUT_FREEBSD_UPDATE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_FREEBSD_UPDATE 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr freebsd-update 8 .
 .It Va WITHOUT_FTP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_FTP 278192 2015-02-04 10:19:32Z ngie
 Set to not build or install
 .Xr ftp 1
 and
 .Xr ftpd 8 .
 .It Va WITHOUT_GAMES
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GAMES 156932 2006-03-21 07:50:50Z ru
 Set to not build games.
 .It Va WITHOUT_GCC
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GCC 264660 2014-04-18 17:03:58Z imp
 Set to not build and install gcc and g++ as part of the normal build process.
 .Pp
 It is a default setting on
 amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386 and pc98/i386.
 .It Va WITH_GCC
 .\" from FreeBSD: head/tools/build/options/WITH_GCC 255326 2013-09-06 20:49:48Z zeising
 Set to build and install gcc and g++.
 .Pp
 It is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64.
 .It Va WITHOUT_GCC_BOOTSTRAP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GCC_BOOTSTRAP 273177 2014-10-16 18:28:11Z skreuzer
 Set to not build gcc and g++ as part of the bootstrap process.
 You must enable either gcc or clang bootstrap to be able to build the system,
 unless an alternative compiler is provided via
 XCC.
 .Pp
 It is a default setting on
 amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386 and pc98/i386.
 .It Va WITH_GCC_BOOTSTRAP
 .\" from FreeBSD: head/tools/build/options/WITH_GCC_BOOTSTRAP 264660 2014-04-18 17:03:58Z imp
 Set to build gcc and g++ as part of the bootstrap process.
 .Pp
 It is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64.
 .It Va WITHOUT_GCOV
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GCOV 156932 2006-03-21 07:50:50Z ru
 Set to not build the
 .Xr gcov 1
 tool.
 .It Va WITHOUT_GDB
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GDB 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr gdb 1 .
 .Pp
 It is a default setting on
 arm64/aarch64.
 .It Va WITHOUT_GNU
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GNU 174550 2007-12-12 16:43:17Z ru
 Set to not build contributed GNU software as a part of the base system.
 This option can be useful if the system built must not contain any code
 covered by the GNU Public License due to legal reasons.
 .Bf -symbolic
 The option has no effect yet.
 .Ef
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_GNU_SUPPORT
 .El
 .It Va WITHOUT_GNUCXX
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GNUCXX 255321 2013-09-06 20:08:03Z theraven
 Do not build the GNU C++ stack (g++, libstdc++).
 This is the default on platforms where clang is the system compiler.
 .Pp
 It is a default setting on
 amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, arm64/aarch64, i386/i386 and pc98/i386.
 .It Va WITH_GNUCXX
 .\" from FreeBSD: head/tools/build/options/WITH_GNUCXX 255321 2013-09-06 20:08:03Z theraven
 Build the GNU C++ stack (g++, libstdc++).
 This is the default on platforms where gcc is the system compiler.
 .Pp
 It is a default setting on
 mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64.
 .It Va WITHOUT_GNU_GREP_COMPAT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GNU_GREP_COMPAT 273421 2014-10-21 20:44:33Z emaste
 Set this option to omit the gnu extensions to grep from being included in
 BSD grep.
 .It Va WITHOUT_GNU_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GNU_SUPPORT 156932 2006-03-21 07:50:50Z ru
 Set to build some programs without optional GNU support.
 .It Va WITHOUT_GPIO
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GPIO 228081 2011-11-28 17:54:34Z dim
 Set to not build
 .Xr gpioctl 8
 as part of the base system.
 .It Va WITHOUT_GPL_DTC
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GPL_DTC 264515 2014-04-15 20:41:55Z imp
 Set to build the BSD licensed version of the device tree compiler, instead of the
 GPL'd one from elinux.org.
 .It Va WITHOUT_GROFF
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GROFF 218941 2011-02-22 08:13:49Z uqs
 Set to not build
 .Xr groff 1
 and
 .Xr vgrind 1 .
 You should consider installing the textproc/groff port to not break
 .Xr man 1 .
 .It Va WITHOUT_GSSAPI
 .\" from FreeBSD: head/tools/build/options/WITHOUT_GSSAPI 174548 2007-12-12 16:39:32Z ru
 Set to not build libgssapi.
 .It Va WITHOUT_HAST
 .\" from FreeBSD: head/tools/build/options/WITHOUT_HAST 277725 2015-01-26 06:27:07Z ngie
 Set to not build
 .Xr hastd 8
 and related utilities.
 .It Va WITH_HESIOD
 .\" from FreeBSD: head/tools/build/options/WITH_HESIOD 156932 2006-03-21 07:50:50Z ru
 Set to build Hesiod support.
 .It Va WITHOUT_HTML
 .\" from FreeBSD: head/tools/build/options/WITHOUT_HTML 156932 2006-03-21 07:50:50Z ru
 Set to not build HTML docs.
 .It Va WITHOUT_HYPERV
 .\" from FreeBSD: head/tools/build/options/WITHOUT_HYPERV 271493 2014-09-13 02:15:31Z delphij
 Set to not build or install HyperV utilities.
 .It Va WITHOUT_ICONV
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ICONV 254919 2013-08-26 17:15:56Z antoine
 Set to not build iconv as part of libc.
 .It Va WITHOUT_INCLUDES
 .\" from FreeBSD: head/tools/build/options/WITHOUT_INCLUDES 275138 2014-11-26 20:43:09Z gjb
 Set to not install header files.
 This option used to be spelled
 .Va NO_INCS .
 .Bf -symbolic
 The option does not work for build targets.
 .Ef
 .It Va WITHOUT_INET
 .\" from FreeBSD: head/tools/build/options/WITHOUT_INET 221266 2011-04-30 17:58:28Z bz
 Set to not build programs and libraries related to IPv4 networking.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_INET_SUPPORT
 .El
 .It Va WITHOUT_INET6
 .\" from FreeBSD: head/tools/build/options/WITHOUT_INET6 156932 2006-03-21 07:50:50Z ru
 Set to not build
 programs and libraries related to IPv6 networking.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_INET6_SUPPORT
 .El
 .It Va WITHOUT_INET6_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_INET6_SUPPORT 156932 2006-03-21 07:50:50Z ru
 Set to build libraries, programs, and kernel modules without IPv6 support.
 .It Va WITHOUT_INETD
 .\" from FreeBSD: head/tools/build/options/WITHOUT_INETD 278192 2015-02-04 10:19:32Z ngie
 Set to not build
 .Xr inetd 8 .
 .It Va WITHOUT_INET_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_INET_SUPPORT 221266 2011-04-30 17:58:28Z bz
 Set to build libraries, programs, and kernel modules without IPv4 support.
 .It Va WITHOUT_INSTALLLIB
 .\" from FreeBSD: head/tools/build/options/WITHOUT_INSTALLLIB 174497 2007-12-09 21:56:21Z dougb
 Set this if you do not want to install optional libraries.
 For example when creating a
 .Xr nanobsd 8
 image.
 .It Va WITH_INSTALL_AS_USER
 .\" from FreeBSD: head/tools/build/options/WITH_INSTALL_AS_USER 238021 2012-07-02 20:24:01Z marcel
 Set to make install targets succeed for non-root users by installing
 files with owner and group attributes set to that of the user running
 the
 .Xr make 1
 command.
 The user still has to set the
 .Va DESTDIR
 variable to point to a directory where the user has write permissions.
 .It Va WITHOUT_IPFILTER
 .\" from FreeBSD: head/tools/build/options/WITHOUT_IPFILTER 156932 2006-03-21 07:50:50Z ru
 Set to not build IP Filter package.
 .It Va WITHOUT_IPFW
 .\" from FreeBSD: head/tools/build/options/WITHOUT_IPFW 183242 2008-09-21 22:02:26Z sam
 Set to not build IPFW tools.
 .It Va WITHOUT_ISCSI
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ISCSI 277675 2015-01-25 04:20:11Z ngie
 Set to not build
 .Xr iscid 8
 and related utilities.
 .It Va WITHOUT_JAIL
 .\" from FreeBSD: head/tools/build/options/WITHOUT_JAIL 249966 2013-04-27 04:09:09Z eadler
 Set to not build tools for the support of jails; e.g.,
 .Xr jail 8 .
 .It Va WITHOUT_KDUMP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_KDUMP 240690 2012-09-19 11:38:37Z zeising
 Set to not build
 .Xr kdump 1
 and
 .Xr truss 1 .
 .It Va WITHOUT_KERBEROS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_KERBEROS 174549 2007-12-12 16:42:03Z ru
 Set this if you do not want to build Kerberos 5 (KTH Heimdal).
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_KERBEROS_SUPPORT
 .El
 .Pp
 When set, the following options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_GSSAPI
 (unless
 .Va WITH_GSSAPI
 is set explicitly)
 .El
 .It Va WITHOUT_KERBEROS_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_KERBEROS_SUPPORT 251794 2013-06-15 20:29:07Z eadler
 Set to build some programs without Kerberos support, like
 .Xr ssh 1 ,
 .Xr telnet 1 ,
 .Xr sshd 8 ,
 and
 .Xr telnetd 8 .
 .It Va WITHOUT_KERNEL_SYMBOLS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_KERNEL_SYMBOLS 222189 2011-05-22 18:23:17Z imp
 Set to not install kernel symbol files.
 .Bf -symbolic
 This option is recommended for those people who have small root partitions.
 .Ef
 .It Va WITHOUT_KVM
 .\" from FreeBSD: head/tools/build/options/WITHOUT_KVM 174550 2007-12-12 16:43:17Z ru
 Set to not build the
 .Nm libkvm
 library as a part of the base system.
 .Bf -symbolic
 The option has no effect yet.
 .Ef
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_KVM_SUPPORT
 .El
 .It Va WITHOUT_KVM_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_KVM_SUPPORT 170644 2007-06-13 02:08:04Z sepotvin
 Set to build some programs without optional
 .Nm libkvm
 support.
 .It Va WITHOUT_LDNS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LDNS 255591 2013-09-15 13:11:13Z des
 Setting this variable will prevent the LDNS library from being built.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_LDNS_UTILS
 .It
 .Va WITHOUT_UNBOUND
 .El
 .It Va WITHOUT_LDNS_UTILS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LDNS_UTILS 255850 2013-09-24 14:33:31Z des
 Setting this variable will prevent building the LDNS utilities
 .Xr drill 1
 and
 .Xr host 1 .
 .It Va WITHOUT_LEGACY_CONSOLE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LEGACY_CONSOLE 249966 2013-04-27 04:09:09Z eadler
 Set to not build programs that support a legacy PC console; e.g.,
 .Xr kbdcontrol 8
 and
 .Xr vidcontrol 8 .
 .It Va WITHOUT_LIB32
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LIB32 274664 2014-11-18 17:06:48Z imp
 On 64-bit platforms, set to not build 32-bit library set and a
 .Nm ld-elf32.so.1
 runtime linker.
 .It Va WITHOUT_LIBCPLUSPLUS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LIBCPLUSPLUS 246262 2013-02-02 22:42:46Z dim
 Set to avoid building libcxxrt and libc++.
 .It Va WITHOUT_LIBPTHREAD
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LIBPTHREAD 188848 2009-02-20 11:09:55Z mtm
 Set to not build the
 .Nm libpthread
 providing library,
 .Nm libthr .
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_LIBTHR
 .El
 .It Va WITHOUT_LIBTHR
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LIBTHR 156932 2006-03-21 07:50:50Z ru
 Set to not build the
 .Nm libthr
 (1:1 threading)
 library.
+.It Va WITHOUT_LLDB
+.\" from FreeBSD: head/tools/build/options/WITHOUT_LLDB 289275 2015-10-14 00:23:31Z emaste
+Set to not build the LLDB debugger.
+.Pp
+It is a default setting on
+arm/arm, arm/armeb, arm/armv6, arm/armv6hf, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64.
 .It Va WITH_LLDB
 .\" from FreeBSD: head/tools/build/options/WITH_LLDB 255722 2013-09-20 01:52:02Z emaste
 Set to build the LLDB debugger.
+.Pp
+It is a default setting on
+amd64/amd64 and arm64/aarch64.
+.It Va WITHOUT_LLVM_LIBUNWIND
+.\" from FreeBSD: head/tools/build/options/WITHOUT_LLVM_LIBUNWIND 293450 2016-01-09 00:42:07Z emaste
+Set to use GCC's stack unwinder (instead of LLVM's libunwind).
+.Pp
+It is a default setting on
+amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm/armv6hf, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64.
+.It Va WITH_LLVM_LIBUNWIND
+.\" from FreeBSD: head/tools/build/options/WITH_LLVM_LIBUNWIND 293450 2016-01-09 00:42:07Z emaste
+Set to use LLVM's libunwind stack unwinder (instead of GCC's unwinder).
+.Pp
+It is a default setting on
+arm64/aarch64.
 .It Va WITHOUT_LOCALES
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LOCALES 156932 2006-03-21 07:50:50Z ru
 Set to not build localization files; see
 .Xr locale 1 .
 .It Va WITHOUT_LOCATE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LOCATE 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr locate 1
 and related programs.
 .It Va WITHOUT_LPR
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LPR 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr lpr 1
 and related programs.
 .It Va WITHOUT_LS_COLORS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LS_COLORS 235660 2012-05-19 20:05:27Z marcel
 Set to build
 .Xr ls 1
 without support for colors to distinguish file types.
 .It Va WITHOUT_LZMA_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_LZMA_SUPPORT 245171 2013-01-08 18:37:12Z obrien
 Set to build some programs without optional lzma compression support.
 .It Va WITHOUT_MAIL
 .\" from FreeBSD: head/tools/build/options/WITHOUT_MAIL 183242 2008-09-21 22:02:26Z sam
 Set to not build any mail support (MUA or MTA).
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_DMAGENT
 .It
 .Va WITHOUT_MAILWRAPPER
 .It
 .Va WITHOUT_SENDMAIL
 .El
 .It Va WITHOUT_MAILWRAPPER
 .\" from FreeBSD: head/tools/build/options/WITHOUT_MAILWRAPPER 156932 2006-03-21 07:50:50Z ru
 Set to not build the
 .Xr mailwrapper 8
 MTA selector.
 .It Va WITHOUT_MAKE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_MAKE 183242 2008-09-21 22:02:26Z sam
 Set to not install
 .Xr make 1
 and related support files.
 .It Va WITHOUT_MAN
 .\" from FreeBSD: head/tools/build/options/WITHOUT_MAN 156932 2006-03-21 07:50:50Z ru
 Set to not build manual pages.
 When set, the following options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_MAN_UTILS
 (unless
 .Va WITH_MAN_UTILS
 is set explicitly)
 .El
 .It Va WITHOUT_MANCOMPRESS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_MANCOMPRESS 266752 2014-05-27 15:52:27Z gjb
 Set to not to install compressed man pages.
 Only the uncompressed versions will be installed.
 .It Va WITHOUT_MANDOCDB
 .\" from FreeBSD: head/tools/build/options/WITHOUT_MANDOCDB 283777 2015-05-30 17:41:37Z bapt
 Use the
 .Xr mandoc 1
 version of
 .Xr makewhatis 8
 database and utilities.
 .It Va WITHOUT_MAN_UTILS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_MAN_UTILS 208322 2010-05-20 00:07:21Z jkim
 Set to not build utilities for manual pages,
 .Xr apropos 1 ,
 .Xr catman 1 ,
 .Xr makewhatis 1 ,
 .Xr man 1 ,
 .Xr whatis 1 ,
 .Xr manctl 8 ,
 and related support files.
 .It Va WITH_META_MODE
 .\" from FreeBSD: head/tools/build/options/WITH_META_MODE 290816 2015-11-14 03:24:48Z sjg
 Create meta files when not doing DIRDEPS_BUILD.
 The meta files can be useful for debugging.
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITH_NAND
 .\" from FreeBSD: head/tools/build/options/WITH_NAND 235537 2012-05-17 10:11:18Z gber
 Set to build the NAND Flash components.
 .It Va WITHOUT_NDIS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_NDIS 183242 2008-09-21 22:02:26Z sam
 Set to not build programs and libraries
 related to NDIS emulation support.
 .It Va WITHOUT_NETCAT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_NETCAT 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr nc 1
 utility.
 .It Va WITHOUT_NETGRAPH
 .\" from FreeBSD: head/tools/build/options/WITHOUT_NETGRAPH 183242 2008-09-21 22:02:26Z sam
 Set to not build applications to support
 .Xr netgraph 4 .
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_ATM
 .It
 .Va WITHOUT_BLUETOOTH
 .It
 .Va WITHOUT_NETGRAPH_SUPPORT
 .El
 .It Va WITHOUT_NETGRAPH_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_NETGRAPH_SUPPORT 183305 2008-09-23 16:11:15Z sam
 Set to build libraries, programs, and kernel modules without netgraph support.
 .It Va WITHOUT_NIS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_NIS 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr NIS 8
 support and related programs.
 If set, you might need to adopt your
 .Xr nsswitch.conf 5
 and remove
 .Sq nis
 entries.
 .It Va WITHOUT_NLS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_NLS 156932 2006-03-21 07:50:50Z ru
 Set to not build NLS catalogs.
 .It Va WITHOUT_NLS_CATALOGS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_NLS_CATALOGS 156932 2006-03-21 07:50:50Z ru
 Set to not build NLS catalog support for
 .Xr csh 1 .
 .It Va WITHOUT_NS_CACHING
 .\" from FreeBSD: head/tools/build/options/WITHOUT_NS_CACHING 172803 2007-10-19 14:01:25Z ru
 Set to disable name caching in the
 .Pa nsswitch
 subsystem.
 The generic caching daemon,
 .Xr nscd 8 ,
 will not be built either if this option is set.
 .It Va WITHOUT_NTP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_NTP 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr ntpd 8
 and related programs.
 .It Va WITH_OFED
 .\" from FreeBSD: head/tools/build/options/WITH_OFED 228081 2011-11-28 17:54:34Z dim
 Set to build the
 .Dq "OpenFabrics Enterprise Distribution"
 Infiniband software stack.
 .It Va WITH_OPENLDAP
 .\" from FreeBSD: head/tools/build/options/WITH_OPENLDAP 264902 2014-04-24 23:17:31Z imp
 Enable building openldap support for kerberos.
 .It Va WITHOUT_OPENSSH
 .\" from FreeBSD: head/tools/build/options/WITHOUT_OPENSSH 156932 2006-03-21 07:50:50Z ru
 Set to not build OpenSSH.
 .It Va WITHOUT_OPENSSL
 .\" from FreeBSD: head/tools/build/options/WITHOUT_OPENSSL 156932 2006-03-21 07:50:50Z ru
 Set to not build OpenSSL.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_KERBEROS
 .It
 .Va WITHOUT_KERBEROS_SUPPORT
 .It
 .Va WITHOUT_OPENSSH
 .El
 .Pp
 When set, the following options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITHOUT_GSSAPI
 (unless
 .Va WITH_GSSAPI
 is set explicitly)
 .El
 .It Va WITHOUT_PAM
 .\" from FreeBSD: head/tools/build/options/WITHOUT_PAM 174550 2007-12-12 16:43:17Z ru
 Set to not build PAM library and modules.
 .Bf -symbolic
 This option is deprecated and does nothing.
 .Ef
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_PAM_SUPPORT
 .El
 .It Va WITHOUT_PAM_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_PAM_SUPPORT 156932 2006-03-21 07:50:50Z ru
 Set to build some programs without PAM support, particularly
 .Xr ftpd 8
 and
 .Xr ppp 8 .
 .It Va WITHOUT_PC_SYSINSTALL
 .\" from FreeBSD: head/tools/build/options/WITHOUT_PC_SYSINSTALL 245606 2013-01-18 15:57:09Z eadler
 Set to not build
 .Xr pc-sysinstall 8
 and related programs.
 .It Va WITHOUT_PF
 .\" from FreeBSD: head/tools/build/options/WITHOUT_PF 156932 2006-03-21 07:50:50Z ru
 Set to not build PF firewall package.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_AUTHPF
 .El
 .It Va WITHOUT_PKGBOOTSTRAP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_PKGBOOTSTRAP 258924 2013-12-04 15:58:42Z bdrewery
 Set to not build
 .Xr pkg 7
 bootstrap tool.
 .It Va WITHOUT_PMC
 .\" from FreeBSD: head/tools/build/options/WITHOUT_PMC 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr pmccontrol 8
 and related programs.
 .It Va WITHOUT_PORTSNAP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_PORTSNAP 183242 2008-09-21 22:02:26Z sam
 Set to not build or install
 .Xr portsnap 8
 and related files.
 .It Va WITHOUT_PPP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_PPP 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr ppp 8
 and related programs.
 .It Va WITHOUT_PROFILE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_PROFILE 228196 2011-12-02 09:09:54Z fjoe
 Set to avoid compiling profiled libraries.
 .It Va WITHOUT_QUOTAS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_QUOTAS 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr quota 8
 and related programs.
 .It Va WITHOUT_RADIUS_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_RADIUS_SUPPORT 278182 2015-02-04 06:53:45Z ngie
 Set to not build radius support into various applications, like
 .Xr pam_radius 8
 and
 .Xr ppp 8 .
 .It Va WITHOUT_RBOOTD
 .\" from FreeBSD: head/tools/build/options/WITHOUT_RBOOTD 278192 2015-02-04 10:19:32Z ngie
 Set to not build or install
 .Xr rbootd 8 .
 .It Va WITHOUT_RCMDS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_RCMDS 156932 2006-03-21 07:50:50Z ru
 Disable building of the
 .Bx
 r-commands.
 This includes
 .Xr rlogin 1 ,
 .Xr rsh 1 ,
 etc.
 .It Va WITHOUT_RCS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_RCS 275138 2014-11-26 20:43:09Z gjb
 Set to not build
 .Xr rcs 1 ,
 .Xr etcupdate 8 ,
 and related utilities.
 .It Va WITHOUT_RESCUE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_RESCUE 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr rescue 8 .
 .It Va WITHOUT_ROUTED
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ROUTED 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr routed 8
 utility.
 .It Va WITHOUT_SENDMAIL
 .\" from FreeBSD: head/tools/build/options/WITHOUT_SENDMAIL 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr sendmail 8
 and related programs.
 .It Va WITHOUT_SETUID_LOGIN
 .\" from FreeBSD: head/tools/build/options/WITHOUT_SETUID_LOGIN 156932 2006-03-21 07:50:50Z ru
 Set this to disable the installation of
 .Xr login 1
 as a set-user-ID root program.
 .It Va WITHOUT_SHAREDOCS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_SHAREDOCS 156932 2006-03-21 07:50:50Z ru
 Set to not build the
 .Bx 4.4
 legacy docs.
 .It Va WITH_SHARED_TOOLCHAIN
 .\" from FreeBSD: head/tools/build/options/WITH_SHARED_TOOLCHAIN 235342 2012-05-12 16:12:36Z gjb
 Set to build the toolchain binaries shared.
 The set includes
 .Xr cc 1 ,
 .Xr make 1
 and necessary utilities like assembler, linker and library archive manager.
 .It Va WITH_SORT_THREADS
 .\" from FreeBSD: head/tools/build/options/WITH_SORT_THREADS 264158 2014-04-05 18:00:45Z imp
 Set to enable threads in
 .Xr sort 1 .
 .It Va WITHOUT_SOURCELESS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_SOURCELESS 230972 2012-02-04 00:54:43Z rmh
 Set to not build kernel modules that include sourceless code (either microcode or native code for host CPU).
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_SOURCELESS_HOST
 .It
 .Va WITHOUT_SOURCELESS_UCODE
 .El
 .It Va WITHOUT_SOURCELESS_HOST
 .\" from FreeBSD: head/tools/build/options/WITHOUT_SOURCELESS_HOST 230972 2012-02-04 00:54:43Z rmh
 Set to not build kernel modules that include sourceless native code for host CPU.
 .It Va WITHOUT_SOURCELESS_UCODE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_SOURCELESS_UCODE 230972 2012-02-04 00:54:43Z rmh
 Set to not build kernel modules that include sourceless microcode.
 .It Va WITHOUT_SSP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_SSP 180012 2008-06-25 21:33:28Z ru
 Set to not build world with propolice stack smashing protection.
 .It Va WITH_STAGING
 .\" from FreeBSD: head/tools/build/options/WITH_STAGING 290816 2015-11-14 03:24:48Z sjg
 Enable staging of files to a stage tree.
 This can be best thought of as auto-install to
 .Va DESTDIR
 with some extra meta data to ensure dependencies can be tracked.
 Depends on
 .Va WITH_DIRDEPS_BUILD .
 When set, the following options are also in effect:
 .Pp
 .Bl -inset -compact
 .It Va WITH_STAGING_MAN
 (unless
 .Va WITHOUT_STAGING_MAN
 is set explicitly)
 .It Va WITH_STAGING_PROG
 (unless
 .Va WITHOUT_STAGING_PROG
 is set explicitly)
 .El
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITH_STAGING_MAN
 .\" from FreeBSD: head/tools/build/options/WITH_STAGING_MAN 284708 2015-06-22 20:21:57Z sjg
 Enable staging of MAN pages to stage tree.
 .It Va WITH_STAGING_PROG
 .\" from FreeBSD: head/tools/build/options/WITH_STAGING_PROG 284708 2015-06-22 20:21:57Z sjg
 Enable staging of PROGs to stage tree.
 .It Va WITH_STALE_STAGED
 .\" from FreeBSD: head/tools/build/options/WITH_STALE_STAGED 284708 2015-06-22 20:21:57Z sjg
 Check staged files are not stale.
 .It Va WITH_SVN
 .\" from FreeBSD: head/tools/build/options/WITH_SVN 252561 2013-07-03 12:36:47Z zeising
 Set to install
 .Xr svnlite 1
 as
 .Xr svn 1 .
 .It Va WITHOUT_SVNLITE
 .\" from FreeBSD: head/tools/build/options/WITHOUT_SVNLITE 252561 2013-07-03 12:36:47Z zeising
 Set to not build
 .Xr svnlite 1
 and related programs.
 .It Va WITHOUT_SYMVER
 .\" from FreeBSD: head/tools/build/options/WITHOUT_SYMVER 169649 2007-05-17 05:03:24Z deischen
 Set to disable symbol versioning when building shared libraries.
 .It Va WITHOUT_SYSCONS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_SYSCONS 156932 2006-03-21 07:50:50Z ru
 Set to not build
 .Xr syscons 4
 support files such as keyboard maps, fonts, and screen output maps.
 .It Va WITH_SYSROOT
 .\" from FreeBSD: head/tools/build/options/WITH_SYSROOT 290816 2015-11-14 03:24:48Z sjg
 Enable use of sysroot during build.
 Depends on
 .Va WITH_DIRDEPS_BUILD .
 .Pp
 This must be set in the environment, make command line, or
 .Pa /etc/src-env.conf ,
 not
 .Pa /etc/src.conf .
 .It Va WITHOUT_TALK
 .\" from FreeBSD: head/tools/build/options/WITHOUT_TALK 277676 2015-01-25 04:37:44Z ngie
 Set to not build or install
 .Xr talk 1
 and
 .Xr talkd 8 .
 .It Va WITHOUT_TCP_WRAPPERS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_TCP_WRAPPERS 278192 2015-02-04 10:19:32Z ngie
 Set to not build or install
 .Xr tcpd 8 ,
 and related utilities.
 .It Va WITHOUT_TCSH
 .\" from FreeBSD: head/tools/build/options/WITHOUT_TCSH 156932 2006-03-21 07:50:50Z ru
 Set to not build and install
 .Pa /bin/csh
 (which is
 .Xr tcsh 1 ) .
 .It Va WITHOUT_TELNET
 .\" from FreeBSD: head/tools/build/options/WITHOUT_TELNET 183242 2008-09-21 22:02:26Z sam
 Set to not build
 .Xr telnet 8
 and related programs.
 .It Va WITHOUT_TESTS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_TESTS 268778 2014-07-16 21:40:11Z jmmv
 Set to not build nor install the
 .Fx
 Test Suite in
 .Pa /usr/tests/ .
 See
 .Xr tests 7
 for more details.
 This also disables the build of all test-related dependencies, including ATF.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_DTRACE_TESTS
 .It
 .Va WITHOUT_TESTS_SUPPORT
 .El
 .It Va WITHOUT_TESTS_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_TESTS_SUPPORT 274665 2014-11-18 17:06:50Z imp
 Set to disables the build of all test-related dependencies, including ATF.
 .It Va WITHOUT_TEXTPROC
 .\" from FreeBSD: head/tools/build/options/WITHOUT_TEXTPROC 183242 2008-09-21 22:02:26Z sam
 Set to not build
 programs used for text processing.
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_GROFF
 .El
 .It Va WITHOUT_TFTP
 .\" from FreeBSD: head/tools/build/options/WITHOUT_TFTP 278192 2015-02-04 10:19:32Z ngie
 Set to not build or install
 .Xr tftp 1
 and
 .Xr tftpd 8 .
 .It Va WITHOUT_TIMED
 .\" from FreeBSD: head/tools/build/options/WITHOUT_TIMED 278192 2015-02-04 10:19:32Z ngie
 Set to not build or install
 .Xr timed 8 .
 .It Va WITHOUT_TOOLCHAIN
 .\" from FreeBSD: head/tools/build/options/WITHOUT_TOOLCHAIN 273172 2014-10-16 15:55:13Z brooks
 Set to not install header or
 programs used for program development,
 compilers, debuggers etc.
 .Bf -symbolic
 The option does not work for build targets.
 .Ef
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_BINUTILS
 .It
 .Va WITHOUT_CLANG
 .It
 .Va WITHOUT_CLANG_EXTRAS
 .It
 .Va WITHOUT_CLANG_FULL
 .It
 .Va WITHOUT_GCC
 .It
 .Va WITHOUT_GDB
 .It
 .Va WITHOUT_INCLUDES
 .El
 .It Va WITHOUT_UNBOUND
 .\" from FreeBSD: head/tools/build/options/WITHOUT_UNBOUND 255597 2013-09-15 14:51:23Z des
 Set to not build
 .Xr unbound 8
 and related programs.
 .It Va WITHOUT_USB
 .\" from FreeBSD: head/tools/build/options/WITHOUT_USB 156932 2006-03-21 07:50:50Z ru
 Set to not build USB-related programs and libraries.
 .It Va WITHOUT_USB_GADGET_EXAMPLES
 .\" from FreeBSD: head/tools/build/options/WITHOUT_USB_GADGET_EXAMPLES 274665 2014-11-18 17:06:50Z imp
 Set to build USB gadget kernel modules.
 .It Va WITHOUT_UTMPX
 .\" from FreeBSD: head/tools/build/options/WITHOUT_UTMPX 231530 2012-02-11 20:28:42Z ed
 Set to not build user accounting tools such as
 .Xr last 1 ,
 .Xr users 1 ,
 .Xr who 1 ,
 .Xr ac 8 ,
 .Xr lastlogin 8
 and
 .Xr utx 8 .
 .It Va WITHOUT_VI
 .\" from FreeBSD: head/tools/build/options/WITHOUT_VI 264903 2014-04-24 23:17:40Z imp
 Set to not build and install vi, view, ex and related programs.
 .It Va WITHOUT_VT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_VT 268022 2014-06-30 00:20:12Z emaste
 Set to not build
 .Xr vt 4
 support files (fonts and keymaps).
 .It Va WITHOUT_WARNS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_WARNS 276559 2015-01-02 18:57:58Z bapt
 Set this to not add warning flags to the compiler invocations.
 Useful as a temporary workaround when code enters the tree
 which triggers warnings in environments that differ from the
 original developer.
 .It Va WITHOUT_WIRELESS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_WIRELESS 183242 2008-09-21 22:02:26Z sam
 Set to not build programs used for 802.11 wireless networks; especially
 .Xr wpa_supplicant 8
 and
 .Xr hostapd 8 .
 When set, it also enforces the following options:
 .Pp
 .Bl -item -compact
 .It
 .Va WITHOUT_WIRELESS_SUPPORT
 .El
 .It Va WITHOUT_WIRELESS_SUPPORT
 .\" from FreeBSD: head/tools/build/options/WITHOUT_WIRELESS_SUPPORT 183305 2008-09-23 16:11:15Z sam
 Set to build libraries, programs, and kernel modules without
 802.11 wireless support.
 .It Va WITHOUT_WPA_SUPPLICANT_EAPOL
 .\" from FreeBSD: head/tools/build/options/WITHOUT_WPA_SUPPLICANT_EAPOL 156932 2006-03-21 07:50:50Z ru
 Build
 .Xr wpa_supplicant 8
 without support for the IEEE 802.1X protocol and without
 support for EAP-PEAP, EAP-TLS, EAP-LEAP, and EAP-TTLS
 protocols (usable only via 802.1X).
 .It Va WITHOUT_ZFS
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ZFS 168409 2007-04-06 02:13:30Z pjd
 Set to not build ZFS file system.
 .It Va WITHOUT_ZONEINFO
 .\" from FreeBSD: head/tools/build/options/WITHOUT_ZONEINFO 235342 2012-05-12 16:12:36Z gjb
 Set to not build the timezone database.
 .El
 .Sh FILES
 .Bl -tag -compact -width Pa
 .It Pa /etc/src.conf
 .It Pa /etc/src-env.conf
 .It Pa /usr/share/mk/bsd.own.mk
 .El
 .Sh SEE ALSO
 .Xr make 1 ,
 .Xr make.conf 5 ,
 .Xr build 7 ,
 .Xr ports 7
 .Sh HISTORY
 The
 .Nm
 file appeared in
 .Fx 7.0 .
 .Sh AUTHORS
 This manual page was autogenerated.
Index: projects/clang380-import/share/misc/committers-src.dot
===================================================================
--- projects/clang380-import/share/misc/committers-src.dot	(revision 293686)
+++ projects/clang380-import/share/misc/committers-src.dot	(revision 293687)
@@ -1,755 +1,757 @@
 # $FreeBSD$
 
 # This file is meant to list all FreeBSD src committers and describe the
 # mentor-mentee relationships between them.
 # The graphical output can be generated from this file with the following
 # command:
 # $ dot -T png -o file.png committers-src.dot
 #
 # The dot binary is part of the graphics/graphviz port.
 
 digraph src {
 
 # Node definitions follow this example:
 #
 #   foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??"]
 #
 # ????/??/?? is the date when the commit bit was obtained, usually the one you
 # can find looking at svn logs for the svnadmin/access file.
 # Use YYYY/MM/DD format.
 #
 # For returned commit bits, the node definition will follow this example:
 #
 #   foo [label="Foo Bar\nfoo@FreeBSD.org\n????/??/??\n????/??/??"]
 #
 # The first date is the same as for an active committer, the second date is
 # the date when the commit bit has been returned. Again, check svn logs.
 
 node [color=grey62, style=filled, bgcolor=black];
 
 # Alumni go here.. Try to keep things sorted.
 
 alm [label="Andrew Moore\nalm@FreeBSD.org\n1993/06/12\n????/??/??"]
 anholt [label="Eric Anholt\nanholt@FreeBSD.org\n2002/04/22\n2008/08/07"]
 archie [label="Archie Cobbs\narchie@FreeBSD.org\n1998/11/06\n2006/06/09"]
 arr [label="Andrew R. Reiter\narr@FreeBSD.org\n2001/11/02\n2005/05/25"]
 arun [label="Arun Sharma\narun@FreeBSD.org\n2003/03/06\n2006/12/16"]
 asmodai [label="Jeroen Ruigrok\nasmodai@FreeBSD.org\n1999/12/16\n2001/11/16"]
 benjsc [label="Benjamin Close\nbenjsc@FreeBSD.org\n2007/02/09\n2010/09/15"]
 billf [label="Bill Fumerola\nbillf@FreeBSD.org\n1998/11/11\n2008/11/10"]
 bmah [label="Bruce A. Mah\nbmah@FreeBSD.org\n2002/01/29\n2009/09/13"]
 bmilekic [label="Bosko Milekic\nbmilekic@FreeBSD.org\n2000/09/21\n2008/11/10"]
 bushman [label="Michael Bushkov\nbushman@FreeBSD.org\n2007/03/10\n2010/04/29"]
 carl [label="Carl Delsey\ncarl@FreeBSD.org\n2013/01/14\n2014/03/06"]
 ceri [label="Ceri Davies\nceri@FreeBSD.org\n2006/11/07\n2012/03/07"]
 cjc [label="Crist J. Clark\ncjc@FreeBSD.org\n2001/06/01\n2006/12/29"]
 davidxu [label="David Xu\ndavidxu@FreeBSD.org\n2002/09/02\n2014/04/14"]
 dds [label="Diomidis Spinellis\ndds@FreeBSD.org\n2003/06/20\n2010/09/22"]
 dhartmei [label="Daniel Hartmeier\ndhartmei@FreeBSD.org\n2004/04/06\n2008/12/08"]
 dmlb [label="Duncan Barclay\ndmlb@FreeBSD.org\n2001/12/14\n2008/11/10"]
 dougb [label="Doug Barton\ndougb@FreeBSD.org\n2000/10/26\n2012/10/08"]
 eik [label="Oliver Eikemeier\neik@FreeBSD.org\n2004/05/20\n2008/11/10"]
 furuta [label="Atsushi Furuta\nfuruta@FreeBSD.org\n2000/06/21\n2003/03/08"]
 gj [label="Gary L. Jennejohn\ngj@FreeBSD.org\n1994/??/??\n2006/04/28"]
 groudier [label="Gerard Roudier\ngroudier@FreeBSD.org\n1999/12/30\n2006/04/06"]
 jake [label="Jake Burkholder\njake@FreeBSD.org\n2000/05/16\n2008/11/10"]
 jayanth [label="Jayanth Vijayaraghavan\njayanth@FreeBSD.org\n2000/05/08\n2008/11/10"]
 jb [label="John Birrell\njb@FreeBSD.org\n1997/03/27\n2009/12/15"]
 jdp [label="John Polstra\njdp@FreeBSD.org\n1995/12/07\n2008/02/26"]
 jedgar [label="Chris D. Faulhaber\njedgar@FreeBSD.org\n1999/12/15\n2006/04/07"]
 jkh [label="Jordan K. Hubbard\njkh@FreeBSD.org\n1993/06/12\n2008/06/13"]
 jlemon [label="Jonathan Lemon\njlemon@FreeBSD.org\n1997/08/14\n2008/11/10"]
 joe [label="Josef Karthauser\njoe@FreeBSD.org\n1999/10/22\n2008/08/10"]
 jtc [label="J.T. Conklin\njtc@FreeBSD.org\n1993/06/12\n????/??/??"]
 kargl [label="Steven G. Kargl\nkargl@FreeBSD.org\n2011/01/17\n2015/06/28"]
 kbyanc [label="Kelly Yancey\nkbyanc@FreeBSD.org\n2000/07/11\n2006/07/25"]
 keichii [label="Michael Wu\nkeichii@FreeBSD.org\n2001/03/07\n2006/04/28"]
 linimon [label="Mark Linimon\nlinimon@FreeBSD.org\n2006/09/30\n2008/05/04"]
 lulf [label="Ulf Lilleengen\nlulf@FreeBSD.org\n2007/10/24\n2012/01/19"]
 mb [label="Maxim Bolotin\nmb@FreeBSD.org\n2000/04/06\n2003/03/08"]
 marks [label="Mark Santcroos\nmarks@FreeBSD.org\n2004/03/18\n2008/09/29"]
 mike [label="Mike Barcroft\nmike@FreeBSD.org\n2001/07/17\n2006/04/28"]
 msmith [label="Mike Smith\nmsmith@FreeBSD.org\n1996/10/22\n2003/12/15"]
 murray [label="Murray Stokely\nmurray@FreeBSD.org\n2000/04/05\n2010/07/25"]
 mux [label="Maxime Henrion\nmux@FreeBSD.org\n2002/03/03\n2011/06/22"]
 nate [label="Nate Willams\nnate@FreeBSD.org\n1993/06/12\n2003/12/15"]
 njl [label="Nate Lawson\nnjl@FreeBSD.org\n2002/08/07\n2008/02/16"]
 non [label="Noriaki Mitsnaga\nnon@FreeBSD.org\n2000/06/19\n2007/03/06"]
 onoe [label="Atsushi Onoe\nonoe@FreeBSD.org\n2000/07/21\n2008/11/10"]
 rafan [label="Rong-En Fan\nrafan@FreeBSD.org\n2007/01/31\n2012/07/23"]
 randi [label="Randi Harper\nrandi@FreeBSD.org\n2010/04/20\n2012/05/10"]
 rgrimes [label="Rod Grimes\nrgrimes@FreeBSD.org\n1993/06/12\n2003/03/08"]
 rink [label="Rink Springer\nrink@FreeBSD.org\n2006/01/16\n2010/11/04"]
 robert [label="Robert Drehmel\nrobert@FreeBSD.org\n2001/08/23\n2006/05/13"]
 sah [label="Sam Hopkins\nsah@FreeBSD.org\n2004/12/15\n2008/11/10"]
 shafeeq [label="Shafeeq Sinnamohideen\nshafeeq@FreeBSD.org\n2000/06/19\n2006/04/06"]
 sheldonh [label="Sheldon Hearn\nsheldonh@FreeBSD.org\n1999/06/14\n2006/05/13"]
 shiba [label="Takeshi Shibagaki\nshiba@FreeBSD.org\n2000/06/19\n2008/11/10"]
 shin [label="Yoshinobu Inoue\nshin@FreeBSD.org\n1999/07/29\n2003/03/08"]
 snb [label="Nick Barkas\nsnb@FreeBSD.org\n2009/05/05\n2010/11/04"]
 tmm [label="Thomas Moestl\ntmm@FreeBSD.org\n2001/03/07\n2006/07/12"]
 toshi [label="Toshihiko Arai\ntoshi@FreeBSD.org\n2000/07/06\n2003/03/08"]
 tshiozak [label="Takuya SHIOZAKI\ntshiozak@FreeBSD.org\n2001/04/25\n2003/03/08"]
 uch [label="UCHIYAMA Yasushi\nuch@FreeBSD.org\n2000/06/21\n2002/04/24"]
 wilko [label="Wilko Bulte\nwilko@FreeBSD.org\n2000/01/13\n2013/01/17"]
 yar [label="Yar Tikhiy\nyar@FreeBSD.org\n2001/03/25\n2012/05/23"]
 zack [label="Zack Kirsch\nzack@FreeBSD.org\n2010/11/05\n2012/09/08"]
 
 
 node [color=lightblue2, style=filled, bgcolor=black];
 
 # Current src committers go here. Try to keep things sorted.
 
 ache [label="Andrey Chernov\nache@FreeBSD.org\n1993/10/31"]
 achim [label="Achim Leubner\nachim@FreeBSD.org\n2013/01/23"]
 adrian [label="Adrian Chadd\nadrian@FreeBSD.org\n2000/07/03"]
 ae [label="Andrey V. Elsukov\nae@FreeBSD.org\n2010/06/03"]
 akiyama [label="Shunsuke Akiyama\nakiyama@FreeBSD.org\n2000/06/19"]
 alc [label="Alan Cox\nalc@FreeBSD.org\n1999/02/23"]
 allanjude [label="Allan Jude\nallanjude@FreeBSD.org\n2015/07/30"]
 ambrisko [label="Doug Ambrisko\nambrisko@FreeBSD.org\n2001/12/19"]
 anchie [label="Ana Kukec\nanchie@FreeBSD.org\n2010/04/14"]
 andre [label="Andre Oppermann\nandre@FreeBSD.org\n2003/11/12"]
 andreast [label="Andreas Tobler\nandreast@FreeBSD.org\n2010/09/05"]
 andrew [label="Andrew Turner\nandrew@FreeBSD.org\n2010/07/19"]
 antoine [label="Antoine Brodin\nantoine@FreeBSD.org\n2008/02/03"]
 araujo [label="Marcelo Araujo\naraujo@FreeBSD.org\n2015/08/04"]
 ariff [label="Ariff Abdullah\nariff@FreeBSD.org\n2005/11/14"]
 art [label="Artem Belevich\nart@FreeBSD.org\n2011/03/29"]
 arybchik [label="Andrew Rybchenko\narybchik@FreeBSD.org\n2014/10/12"]
 asomers [label="Alan Somers\nasomers@FreeBSD.org\n2013/04/24"]
 avg [label="Andriy Gapon\navg@FreeBSD.org\n2009/02/18"]
 avos [label="Andriy Voskoboinyk\navos@FreeBSD.org\n2015/09/24"]
 bapt [label="Baptiste Daroussin\nbapt@FreeBSD.org\n2011/12/23"]
 bdrewery [label="Bryan Drewery\nbdrewery@FreeBSD.org\n2013/12/14"]
 benl [label="Ben Laurie\nbenl@FreeBSD.org\n2011/05/18"]
 benno [label="Benno Rice\nbenno@FreeBSD.org\n2000/11/02"]
 bms [label="Bruce M Simpson\nbms@FreeBSD.org\n2003/08/06"]
 br [label="Ruslan Bukin\nbr@FreeBSD.org\n2013/09/02"]
 brian [label="Brian Somers\nbrian@FreeBSD.org\n1996/12/16"]
 brooks [label="Brooks Davis\nbrooks@FreeBSD.org\n2001/06/21"]
 brucec [label="Bruce Cran\nbrucec@FreeBSD.org\n2010/01/29"]
 brueffer [label="Christian Brueffer\nbrueffer@FreeBSD.org\n2006/02/28"]
 bruno [label="Bruno Ducrot\nbruno@FreeBSD.org\n2005/07/18"]
 bryanv [label="Bryan Venteicher\nbryanv@FreeBSD.org\n2012/11/03"]
 bschmidt [label="Bernhard Schmidt\nbschmidt@FreeBSD.org\n2010/02/06"]
 bz [label="Bjoern A. Zeeb\nbz@FreeBSD.org\n2004/07/27"]
 cem [label="Conrad Meyer\ncem@FreeBSD.org\n2015/07/05"]
 cognet [label="Olivier Houchard\ncognet@FreeBSD.org\n2002/10/09"]
 cokane [label="Coleman Kane\ncokane@FreeBSD.org\n2000/06/19"]
 cperciva [label="Colin Percival\ncperciva@FreeBSD.org\n2004/01/20"]
 csjp [label="Christian S.J. Peron\ncsjp@FreeBSD.org\n2004/05/04"]
 das [label="David Schultz\ndas@FreeBSD.org\n2003/02/21"]
 davide [label="Davide Italiano\ndavide@FreeBSD.org\n2012/01/27"]
 dchagin [label="Dmitry Chagin\ndchagin@FreeBSD.org\n2009/02/28"]
 delphij [label="Xin Li\ndelphij@FreeBSD.org\n2004/09/14"]
 des [label="Dag-Erling Smorgrav\ndes@FreeBSD.org\n1998/04/03"]
 dfr [label="Doug Rabson\ndfr@FreeBSD.org\n????/??/??"]
 dg [label="David Greenman\ndg@FreeBSD.org\n1993/06/14"]
 dim [label="Dimitry Andric\ndim@FreeBSD.org\n2010/08/30"]
 dteske [label="Devin Teske\ndteske@FreeBSD.org\n2012/04/10"]
 dumbbell [label="Jean-Sebastien Pedron\ndumbbell@FreeBSD.org\n2004/11/29"]
 dwmalone [label="David Malone\ndwmalone@FreeBSD.org\n2000/07/11"]
 eadler [label="Eitan Adler\neadler@FreeBSD.org\n2012/01/18"]
 ed [label="Ed Schouten\ned@FreeBSD.org\n2008/05/22"]
 edavis [label="Eric Davis\nedavis@FreeBSD.org\n2013/10/09"]
 edwin [label="Edwin Groothuis\nedwin@FreeBSD.org\n2007/06/25"]
 eivind [label="Eivind Eklund\neivind@FreeBSD.org\n1997/02/02"]
 emaste [label="Ed Maste\nemaste@FreeBSD.org\n2005/10/04"]
 emax [label="Maksim Yevmenkin\nemax@FreeBSD.org\n2003/10/12"]
 eri [label="Ermal Luci\neri@FreeBSD.org\n2008/06/11"]
 erj [label="Eric Joyner\nerj@FreeBSD.org\n2014/12/14"]
 fabient [label="Fabien Thomas\nfabient@FreeBSD.org\n2009/03/16"]
 fanf [label="Tony Finch\nfanf@FreeBSD.org\n2002/05/05"]
 fjoe [label="Max Khon\nfjoe@FreeBSD.org\n2001/08/06"]
 flz [label="Florent Thoumie\nflz@FreeBSD.org\n2006/03/30"]
 gabor [label="Gabor Kovesdan\ngabor@FreeBSD.org\n2010/02/02"]
 gad [label="Garance A. Drosehn\ngad@FreeBSD.org\n2000/10/27"]
 gallatin [label="Andrew Gallatin\ngallatin@FreeBSD.org\n1999/01/15"]
 gavin [label="Gavin Atkinson\ngavin@FreeBSD.org\n2009/12/07"]
 gibbs [label="Justin T. Gibbs\ngibbs@FreeBSD.org\n????/??/??"]
 gjb [label="Glen Barber\ngjb@FreeBSD.org\n2013/06/04"]
 gleb [label="Gleb Kurtsou\ngleb@FreeBSD.org\n2011/09/19"]
 glebius [label="Gleb Smirnoff\nglebius@FreeBSD.org\n2004/07/14"]
 gnn [label="George V. Neville-Neil\ngnn@FreeBSD.org\n2004/10/11"]
 gordon [label="Gordon Tetlow\ngordon@FreeBSD.org\n2002/05/17"]
 grehan [label="Peter Grehan\ngrehan@FreeBSD.org\n2002/08/08"]
 grog [label="Greg Lehey\ngrog@FreeBSD.org\n1998/08/30"]
 gshapiro [label="Gregory Shapiro\ngshapiro@FreeBSD.org\n2000/07/12"]
 harti [label="Hartmut Brandt\nharti@FreeBSD.org\n2003/01/29"]
 hiren [label="Hiren Panchasara\nhiren@FreeBSD.org\n2013/04/12"]
 hmp [label="Hiten Pandya\nhmp@FreeBSD.org\n2004/03/23"]
 ian [label="Ian Lepore\nian@FreeBSD.org\n2013/01/07"]
 iedowse [label="Ian Dowse\niedowse@FreeBSD.org\n2000/12/01"]
 imp [label="Warner Losh\nimp@FreeBSD.org\n1996/09/20"]
 ivoras [label="Ivan Voras\nivoras@FreeBSD.org\n2008/06/10"]
 jah [label="Jason A. Harmening\njah@FreeBSD.org\n2015/03/08"]
 jamie [label="Jamie Gritton\njamie@FreeBSD.org\n2009/01/28"]
 jasone [label="Jason Evans\njasone@FreeBSD.org\n1999/03/03"]
 jceel [label="Jakub Klama\njceel@FreeBSD.org\n2011/09/25"]
 jch [label="Julien Charbon\njch@FreeBSD.org\n2014/09/24"]
 jchandra [label="Jayachandran C.\njchandra@FreeBSD.org\n2010/05/19"]
 jeff [label="Jeff Roberson\njeff@FreeBSD.org\n2002/02/21"]
 jh [label="Jaakko Heinonen\njh@FreeBSD.org\n2009/10/02"]
 jhb [label="John Baldwin\njhb@FreeBSD.org\n1999/08/23"]
 jhibbits [label="Justin Hibbits\njhibbits@FreeBSD.org\n2011/11/30"]
 jilles [label="Jilles Tjoelker\njilles@FreeBSD.org\n2009/05/22"]
 jimharris [label="Jim Harris\njimharris@FreeBSD.org\n2011/12/09"]
 jinmei [label="JINMEI Tatuya\njinmei@FreeBSD.org\n2007/03/17"]
 jkim [label="Jung-uk Kim\njkim@FreeBSD.org\n2005/07/06"]
 jkoshy [label="A. Joseph Koshy\njkoshy@FreeBSD.org\n1998/05/13"]
 jlh [label="Jeremie Le Hen\njlh@FreeBSD.org\n2012/04/22"]
 jls [label="Jordan Sissel\njls@FreeBSD.org\n2006/12/06"]
 jmg [label="John-Mark Gurney\njmg@FreeBSD.org\n1997/02/13"]
 jmmv [label="Julio Merino\njmmv@FreeBSD.org\n2013/11/02"]
 joerg [label="Joerg Wunsch\njoerg@FreeBSD.org\n1993/11/14"]
 jon [label="Jonathan Chen\njon@FreeBSD.org\n2000/10/17"]
 jonathan [label="Jonathan Anderson\njonathan@FreeBSD.org\n2010/10/07"]
 jpaetzel [label="Josh Paetzel\njpaetzel@FreeBSD.org\n2011/01/21"]
 jtl [label="Jonathan T. Looney\njtl@FreeBSD.org\n2015/10/26"]
 julian [label="Julian Elischer\njulian@FreeBSD.org\n1993/04/19"]
 jwd [label="John De Boskey\njwd@FreeBSD.org\n2000/05/19"]
 kaiw [label="Kai Wang\nkaiw@FreeBSD.org\n2007/09/26"]
 kan [label="Alexander Kabaev\nkan@FreeBSD.org\n2002/07/21"]
 ken [label="Ken Merry\nken@FreeBSD.org\n1998/09/08"]
 kensmith [label="Ken Smith\nkensmith@FreeBSD.org\n2004/01/23"]
 kevlo [label="Kevin Lo\nkevlo@FreeBSD.org\n2006/07/23"]
 kib [label="Konstantin Belousov\nkib@FreeBSD.org\n2006/06/03"]
 kmacy [label="Kip Macy\nkmacy@FreeBSD.org\n2005/06/01"]
 kp [label="Kristof Provost\nkp@FreeBSD.org\n2015/03/22"]
 le [label="Lukas Ertl\nle@FreeBSD.org\n2004/02/02"]
 lidl [label="Kurt Lidl\nlidl@FreeBSD.org\n2015/10/21"]
 loos [label="Luiz Otavio O Souza\nloos@FreeBSD.org\n2013/07/03"]
 lstewart [label="Lawrence Stewart\nlstewart@FreeBSD.org\n2008/10/06"]
 marcel [label="Marcel Moolenaar\nmarcel@FreeBSD.org\n1999/07/03"]
 marius [label="Marius Strobl\nmarius@FreeBSD.org\n2004/04/17"]
 markj [label="Mark Johnston\nmarkj@FreeBSD.org\n2012/12/18"]
 markm [label="Mark Murray\nmarkm@FreeBSD.org\n1995/04/24"]
 markus [label="Markus Brueffer\nmarkus@FreeBSD.org\n2006/06/01"]
 matteo [label="Matteo Riondato\nmatteo@FreeBSD.org\n2006/01/18"]
 mav [label="Alexander Motin\nmav@FreeBSD.org\n2007/04/12"]
 maxim [label="Maxim Konovalov\nmaxim@FreeBSD.org\n2002/02/07"]
 mdf [label="Matthew Fleming\nmdf@FreeBSD.org\n2010/06/04"]
 mdodd [label="Matthew N. Dodd\nmdodd@FreeBSD.org\n1999/07/27"]
 melifaro [label="Alexander V. Chernikov\nmelifaro@FreeBSD.org\n2011/10/04"]
 mjacob [label="Matt Jacob\nmjacob@FreeBSD.org\n1997/08/13"]
 mjg [label="Mateusz Guzik\nmjg@FreeBSD.org\n2012/06/04"]
 mlaier [label="Max Laier\nmlaier@FreeBSD.org\n2004/02/10"]
 mmel [label="Michal Meloun\nmmel@FreeBSD.org\n2015/11/01"]
 monthadar [label="Monthadar Al Jaberi\nmonthadar@FreeBSD.org\n2012/04/02"]
 mp [label="Mark Peek\nmp@FreeBSD.org\n2001/07/27"]
 mr [label="Michael Reifenberger\nmr@FreeBSD.org\n2001/09/30"]
 neel [label="Neel Natu\nneel@FreeBSD.org\n2009/09/20"]
 netchild [label="Alexander Leidinger\nnetchild@FreeBSD.org\n2005/03/31"]
 ngie [label="Garrett Cooper\nngie@FreeBSD.org\n2014/07/27"]
 nork [label="Norikatsu Shigemura\nnork@FreeBSD.org\n2009/06/09"]
 np [label="Navdeep Parhar\nnp@FreeBSD.org\n2009/06/05"]
 nwhitehorn [label="Nathan Whitehorn\nnwhitehorn@FreeBSD.org\n2008/07/03"]
 obrien [label="David E. O'Brien\nobrien@FreeBSD.org\n1996/10/29"]
 olli [label="Oliver Fromme\nolli@FreeBSD.org\n2008/02/14"]
 oshogbo [label="Mariusz Zaborski\noshogbo@FreeBSD.org\n2015/04/15"]
 peadar [label="Peter Edwards\npeadar@FreeBSD.org\n2004/03/08"]
 peter [label="Peter Wemm\npeter@FreeBSD.org\n1995/07/04"]
 peterj [label="Peter Jeremy\npeterj@FreeBSD.org\n2012/09/14"]
 pfg [label="Pedro Giffuni\npfg@FreeBSD.org\n2011/12/01"]
 philip [label="Philip Paeps\nphilip@FreeBSD.org\n2004/01/21"]
 phk [label="Poul-Henning Kamp\nphk@FreeBSD.org\n1994/02/21"]
 pho [label="Peter Holm\npho@FreeBSD.org\n2008/11/16"]
 pjd [label="Pawel Jakub Dawidek\npjd@FreeBSD.org\n2004/02/02"]
 pkelsey [label="Patrick Kelsey\pkelsey@FreeBSD.org\n2014/05/29"]
 pluknet [label="Sergey Kandaurov\npluknet@FreeBSD.org\n2010/10/05"]
 ps [label="Paul Saab\nps@FreeBSD.org\n2000/02/23"]
 qingli [label="Qing Li\nqingli@FreeBSD.org\n2005/04/13"]
 ray [label="Aleksandr Rybalko\nray@FreeBSD.org\n2011/05/25"]
 rdivacky [label="Roman Divacky\nrdivacky@FreeBSD.org\n2008/03/13"]
 remko [label="Remko Lodder\nremko@FreeBSD.org\n2007/02/23"]
 rik [label="Roman Kurakin\nrik@FreeBSD.org\n2003/12/18"]
 rmacklem [label="Rick Macklem\nrmacklem@FreeBSD.org\n2009/03/27"]
 rmh [label="Robert Millan\nrmh@FreeBSD.org\n2011/09/18"]
 rnoland [label="Robert Noland\nrnoland@FreeBSD.org\n2008/09/15"]
 roberto [label="Ollivier Robert\nroberto@FreeBSD.org\n1995/02/22"]
 rodrigc [label="Craig Rodrigues\nrodrigc@FreeBSD.org\n2005/05/14"]
 royger [label="Roger Pau Monne\nroyger@FreeBSD.org\n2013/11/26"]
 rpaulo [label="Rui Paulo\nrpaulo@FreeBSD.org\n2007/09/25"]
 rpokala [label="Ravi Pokala\nrpokala@FreeBSD.org\n2015/11/19"]
 rrs [label="Randall R Stewart\nrrs@FreeBSD.org\n2007/02/08"]
 rse [label="Ralf S. Engelschall\nrse@FreeBSD.org\n1997/07/31"]
 rstone [label="Ryan Stone\nrstone@FreeBSD.org\n2010/04/19"]
 ru [label="Ruslan Ermilov\nru@FreeBSD.org\n1999/05/27"]
 rwatson [label="Robert N. M. Watson\nrwatson@FreeBSD.org\n1999/12/16"]
 sam [label="Sam Leffler\nsam@FreeBSD.org\n2002/07/02"]
 sanpei [label="MIHIRA Sanpei Yoshiro\nsanpei@FreeBSD.org\n2000/06/19"]
 sbruno [label="Sean Bruno\nsbruno@FreeBSD.org\n2008/08/02"]
 scf [label="Sean C. Farley\nscf@FreeBSD.org\n2007/06/24"]
 schweikh [label="Jens Schweikhardt\nschweikh@FreeBSD.org\n2001/04/06"]
 scottl [label="Scott Long\nscottl@FreeBSD.org\n2000/09/28"]
 se [label="Stefan Esser\nse@FreeBSD.org\n1994/08/26"]
 sephe [label="Sepherosa Ziehau\nsephe@FreeBSD.org\n2007/03/28"]
 sepotvin [label="Stephane E. Potvin\nsepotvin@FreeBSD.org\n2007/02/15"]
 simon [label="Simon L. Nielsen\nsimon@FreeBSD.org\n2006/03/07"]
 sjg [label="Simon J. Gerraty\nsjg@FreeBSD.org\n2012/10/23"]
 skra [label="Svatopluk Kraus\nslm@FreeBSD.org\n2015/10/28"]
 slm [label="Stephen McConnell\nslm@FreeBSD.org\n2014/05/07"]
 smh [label="Steven Hartland\nsmh@FreeBSD.org\n2012/11/12"]
 sobomax [label="Maxim Sobolev\nsobomax@FreeBSD.org\n2001/07/25"]
 sos [label="Soren Schmidt\nsos@FreeBSD.org\n????/??/??"]
 sson [label="Stacey Son\nsson@FreeBSD.org\n2008/07/08"]
 stas [label="Stanislav Sedov\nstas@FreeBSD.org\n2008/08/22"]
 suz [label="SUZUKI Shinsuke\nsuz@FreeBSD.org\n2002/03/26"]
 syrinx [label="Shteryana Shopova\nsyrinx@FreeBSD.org\n2006/10/07"]
 takawata [label="Takanori Watanabe\ntakawata@FreeBSD.org\n2000/07/06"]
 theraven [label="David Chisnall\ntheraven@FreeBSD.org\n2011/11/11"]
 thompsa [label="Andrew Thompson\nthompsa@FreeBSD.org\n2005/05/25"]
 ticso [label="Bernd Walter\nticso@FreeBSD.org\n2002/01/31"]
 tijl [label="Tijl Coosemans\ntijl@FreeBSD.org\n2010/07/16"]
 trasz [label="Edward Tomasz Napierala\ntrasz@FreeBSD.org\n2008/08/22"]
 trhodes [label="Tom Rhodes\ntrhodes@FreeBSD.org\n2002/05/28"]
 trociny [label="Mikolaj Golub\ntrociny@FreeBSD.org\n2011/03/10"]
 tuexen [label="Michael Tuexen\ntuexen@FreeBSD.org\n2009/06/06"]
 tychon [label="Tycho Nightingale\ntychon@FreeBSD.org\n2014/01/21"]
 ume [label="Hajimu UMEMOTO\nume@FreeBSD.org\n2000/02/26"]
 uqs [label="Ulrich Spoerlein\nuqs@FreeBSD.org\n2010/01/28"]
 vangyzen [label="Eric van Gyzen\nvangyzen@FreeBSD.org\n2015/03/08"]
 vanhu [label="Yvan Vanhullebus\nvanhu@FreeBSD.org\n2008/07/21"]
 versus [label="Konrad Jankowski\nversus@FreeBSD.org\n2008/10/27"]
 weongyo [label="Weongyo Jeong\nweongyo@FreeBSD.org\n2007/12/21"]
 wes [label="Wes Peters\nwes@FreeBSD.org\n1998/11/25"]
 whu [label="Wei Hu\nwhu@FreeBSD.org\n2015/02/11"]
 wkoszek [label="Wojciech A. Koszek\nwkoszek@FreeBSD.org\n2006/02/21"]
 wollman [label="Garrett Wollman\nwollman@FreeBSD.org\n????/??/??"]
 wsalamon [label="Wayne Salamon\nwsalamon@FreeBSD.org\n2005/06/25"]
 yongari [label="Pyun YongHyeon\nyongari@FreeBSD.org\n2004/08/01"]
 zbb [label="Zbigniew Bodek\nzbb@FreeBSD.org\n2013/09/02"]
 zec [label="Marko Zec\nzec@FreeBSD.org\n2008/06/22"]
 zml [label="Zachary Loafman\nzml@FreeBSD.org\n2009/05/27"]
 zont [label="Andrey Zonov\nzont@FreeBSD.org\n2012/08/21"]
 
 # Pseudo target representing rev 1.1 of commit.allow
 day1 [label="Birth of FreeBSD"]
 
 # Here are the mentor/mentee relationships.
 # Group together all the mentees for a particular mentor.
 # Keep the list sorted by mentor login.
 
 day1 -> jtc
 day1 -> jkh
 day1 -> nate
 day1 -> rgrimes
 day1 -> alm
 day1 -> dg
 
 adrian -> avos
 adrian -> lidl
 adrian -> loos
 adrian -> monthadar
 adrian -> ray
 adrian -> rmh
+adrian -> sephe
 
 ae -> melifaro
 
 alc -> davide
 
 andre -> qingli
 
 anholt -> jkim
 
 avg -> art
 avg -> pluknet
 avg -> smh
 
 bapt -> allanjude
 bapt -> araujo
 bapt -> bdrewery
 
 benno -> grehan
 
 billf -> dougb
 billf -> gad
 billf -> jedgar
 billf -> jhb
 billf -> shafeeq
 
 bmilekic -> csjp
 
 bms -> dhartmei
 bms -> mlaier
 bms -> thompsa
 
 brian -> joe
 
 brooks -> bushman
 brooks -> jamie
 brooks -> theraven
 
 bz -> anchie
 bz -> jamie
 bz -> syrinx
 
 cognet -> br
 cognet -> jceel
 cognet -> kevlo
 cognet -> ian
 cognet -> wkoszek
 cognet -> zbb
 
 cperciva -> eadler
 cperciva -> flz
 cperciva -> randi
 cperciva -> simon
 
 csjp -> bushman
 
 das -> kargl
 das -> rodrigc
 
 delphij -> gabor
 delphij -> rafan
+delphij -> sephe
 
 des -> anholt
 des -> hmp
 des -> mike
 des -> olli
 des -> ru
 des -> bapt
 
 dds -> versus
 
 dfr -> gallatin
 dfr -> zml
 
 dg -> peter
 
 dim -> theraven
 
 dwmalone -> fanf
 dwmalone -> peadar
 dwmalone -> snb
 
 ed -> dim
 ed -> gavin
 ed -> jilles
 ed -> rdivacky
 ed -> uqs
 
 eivind -> des
 eivind -> rwatson
 
 emaste -> achim
 emaste -> rstone
 emaste -> dteske
 emaste -> markj
 
 emax -> markus
 
 fjoe -> versus
 
 gallatin -> ticso
 
 gavin -> versus
 
 gibbs -> mjacob
 gibbs -> njl
 gibbs -> royger
 gibbs -> whu
 
 glebius -> mav
 
 gnn -> jinmei
 gnn -> rrs
 gnn -> ivoras
 gnn -> vanhu
 gnn -> lstewart
 gnn -> np
 gnn -> davide
 gnn -> arybchik
 gnn -> erj
 gnn -> kp
 gnn -> jtl
 
 grehan -> bryanv
 
 grog -> edwin
 grog -> le
 grog -> peterj
 
 imp -> akiyama
 imp -> ambrisko
 imp -> andrew
 imp -> bmah
 imp -> bruno
 imp -> dmlb
 imp -> emax
 imp -> furuta
 imp -> joe
 imp -> jon
 imp -> keichii
 imp -> mb
 imp -> mr
 imp -> neel
 imp -> non
 imp -> nork
 imp -> onoe
 imp -> remko
 imp -> rik
 imp -> rink
 imp -> sanpei
 imp -> shiba
 imp -> takawata
 imp -> toshi
 imp -> uch
 
 jake -> bms
 jake -> gordon
 jake -> harti
 jake -> jeff
 jake -> kmacy
 jake -> robert
 jake -> yongari
 
 jb -> sson
 
 jdp -> fjoe
 
 jfv -> erj
 
 jhb -> arr
 jhb -> avg
 jhb -> jch
 jhb -> jeff
 jhb -> kbyanc
 jhb -> peterj
 jhb -> pfg
 jhb -> rnoland
 jhb -> rpokala
 
 jimharris -> carl
 
 jkh -> dfr
 jkh -> gj
 jkh -> grog
 jkh -> imp
 jkh -> jlemon
 jkh -> joerg
 jkh -> jwd
 jkh -> msmith
 jkh -> murray
 jkh -> phk
 jkh -> wes
 jkh -> yar
 
 jkoshy -> kaiw
 jkoshy -> fabient
 jkoshy -> rstone
 
 jlemon -> bmilekic
 jlemon -> brooks
 
 jmallett -> pkelsey
 
 jmmv -> ngie
 
 joerg -> brian
 joerg -> eik
 joerg -> jmg
 joerg -> le
 joerg -> netchild
 joerg -> schweikh
 
 julian -> glebius
 julian -> davidxu
 julian -> archie
 julian -> adrian
 julian -> zec
 julian -> mp
 
 kan -> kib
 
 ken -> asomers
 ken -> slm
 
 kib -> ae
 kib -> dchagin
 kib -> gjb
 kib -> jah
 kib -> jlh
 kib -> jpaetzel
 kib -> lulf
 kib -> melifaro
 kib -> mmel
 kib -> pho
 kib -> pluknet
 kib -> rdivacky
 kib -> rmacklem
 kib -> rmh
 kib -> skra
 kib -> stas
 kib -> tijl
 kib -> trociny
 kib -> vangyzen
 kib -> zont
 
 kmacy -> lstewart
 
 marcel -> allanjude
 marcel -> art
 marcel -> arun
 marcel -> marius
 marcel -> nwhitehorn
 marcel -> sjg
 
 markj -> cem
 
 markm -> jasone
 markm -> sheldonh
 
 mav -> ae
 
 mdf -> gleb
 
 mdodd -> jake
 
 mike -> das
 
 mlaier -> benjsc
 mlaier -> dhartmei
 mlaier -> thompsa
 mlaier -> eri
 
 msmith -> cokane
 msmith -> jasone
 msmith -> scottl
 
 murray -> delphij
 
 mux -> cognet
 mux -> dumbbell
 
 netchild -> ariff
 
 njl -> marks
 njl -> philip
 njl -> rpaulo
 njl -> sepotvin
 
 nwhitehorn -> andreast
 nwhitehorn -> jhibbits
 
 obrien -> benno
 obrien -> groudier
 obrien -> gshapiro
 obrien -> kan
 obrien -> sam
 
 peter -> asmodai
 peter -> jayanth
 peter -> ps
 
 philip -> benl
 philip -> ed
 philip -> jls
 philip -> matteo
 philip -> uqs
 philip -> kp
 
 phk -> jkoshy
 phk -> mux
 
 pjd -> kib
 pjd -> lulf
 pjd -> oshogbo
 pjd -> smh
 pjd -> trociny
 
 rgrimes -> markm
 
 rmacklem -> jwd
 
 royger -> whu
 
 rpaulo -> avg
 rpaulo -> bschmidt
 rpaulo -> dim
 rpaulo -> jmmv
 rpaulo -> lidl
 rpaulo -> ngie
 
 rrs -> brucec
 rrs -> jchandra
 rrs -> tuexen
 
 rstone -> markj
 
 ru -> ceri
 ru -> cjc
 ru -> eik
 ru -> maxim
 ru -> sobomax
 
 rwatson -> adrian
 rwatson -> antoine
 rwatson -> bmah
 rwatson -> brueffer
 rwatson -> bz
 rwatson -> cperciva
 rwatson -> emaste
 rwatson -> gnn
 rwatson -> jh
 rwatson -> jonathan
 rwatson -> kensmith
 rwatson -> kmacy
 rwatson -> linimon
 rwatson -> rmacklem
 rwatson -> shafeeq
 rwatson -> tmm
 rwatson -> trasz
 rwatson -> trhodes
 rwatson -> wsalamon
 
 rodrigc -> araujo
 
 sam -> andre
 sam -> benjsc
 sam -> sephe
 
 sbruno -> hiren
 sbruno -> jimharris
 
 schweikh -> dds
 
 scottl -> achim
 scottl -> jimharris
 scottl -> pjd
 scottl -> sah
 scottl -> sbruno
 scottl -> slm
 scottl -> yongari
 
 sheldonh -> dwmalone
 sheldonh -> iedowse
 
 shin -> ume
 
 simon -> benl
 
 sos -> marcel
 
 thompsa -> weongyo
 thompsa -> eri
 
 trasz -> jh
 trasz -> mjg
 
 ume -> jinmei
 ume -> suz
 ume -> tshiozak
 
 wes -> scf
 
 wkoszek -> jceel
 
 wollman -> gad
 
 zml -> mdf
 zml -> zack
 
 }
Index: projects/clang380-import/share/mk/src.opts.mk
===================================================================
--- projects/clang380-import/share/mk/src.opts.mk	(revision 293686)
+++ projects/clang380-import/share/mk/src.opts.mk	(revision 293687)
@@ -1,417 +1,417 @@
 # $FreeBSD$
 #
 # Option file for FreeBSD /usr/src builds.
 #
 # Users define WITH_FOO and WITHOUT_FOO on the command line or in /etc/src.conf
 # and /etc/make.conf files. These translate in the build system to MK_FOO={yes,no}
 # with sensible (usually) defaults.
 #
 # Makefiles must include bsd.opts.mk after defining specific MK_FOO options that
 # are applicable for that Makefile (typically there are none, but sometimes there
 # are exceptions). Recursive makes usually add MK_FOO=no for options that they wish
 # to omit from that make.
 #
 # Makefiles must include bsd.mkopt.mk before they test the value of any MK_FOO
 # variable.
 #
 # Makefiles may also assume that this file is included by src.opts.mk should it
 # need variables defined there prior to the end of the Makefile where
 # bsd.{subdir,lib.bin}.mk is traditionally included.
 #
 # The old-style YES_FOO and NO_FOO are being phased out. No new instances of them
 # should be added. Old instances should be removed since they were just to
 # bridge the gap between FreeBSD 4 and FreeBSD 5.
 #
 # Makefiles should never test WITH_FOO or WITHOUT_FOO directly (although an
 # exception is made for _WITHOUT_SRCONF which turns off this mechanism
 # completely inside bsd.*.mk files).
 #
 
 .if !target(__<src.opts.mk>__)
 __<src.opts.mk>__:
 
 .include <bsd.own.mk>
 
 #
 # Define MK_* variables (which are either "yes" or "no") for users
 # to set via WITH_*/WITHOUT_* in /etc/src.conf and override in the
 # make(1) environment.
 # These should be tested with `== "no"' or `!= "no"' in makefiles.
 # The NO_* variables should only be set by makefiles for variables
 # that haven't been converted over.
 #
 
 # These options are used by src the builds
 
 __DEFAULT_YES_OPTIONS = \
     ACCT \
     ACPI \
     AMD \
     APM \
     AT \
     ATM \
     AUDIT \
     AUTHPF \
     AUTOFS \
     BHYVE \
     BINUTILS \
     BINUTILS_BOOTSTRAP \
     BLUETOOTH \
     BOOT \
     BOOTPARAMD \
     BOOTPD \
     BSD_CPIO \
     BSDINSTALL \
     BSNMP \
     BZIP2 \
     CALENDAR \
     CAPSICUM \
     CASPER \
     CCD \
     CDDL \
     CPP \
     CROSS_COMPILER \
     CRYPT \
     CTM \
     CUSE \
     CXX \
     DICT \
     DMAGENT \
     DYNAMICROOT \
     ED_CRYPTO \
     EE \
     ELFTOOLCHAIN_BOOTSTRAP \
     EXAMPLES \
     FDT \
     FILE \
     FINGER \
     FLOPPY \
     FMTREE \
     FORTH \
     FP_LIBC \
     FREEBSD_UPDATE \
     FTP \
     GAMES \
     GCOV \
     GDB \
     GNU \
     GNU_GREP_COMPAT \
     GPIO \
     GPL_DTC \
     GROFF \
     HAST \
     HTML \
     HYPERV \
     ICONV \
     INET \
     INET6 \
     INETD \
     IPFILTER \
     IPFW \
     ISCSI \
     JAIL \
     KDUMP \
     KVM \
     LDNS \
     LDNS_UTILS \
     LEGACY_CONSOLE \
     LIB32 \
     LIBPTHREAD \
     LIBTHR \
     LOCALES \
     LOCATE \
     LPR \
     LS_COLORS \
     LZMA_SUPPORT \
     MAIL \
     MAILWRAPPER \
     MAKE \
     MANDOCDB \
     NDIS \
     NETCAT \
     NETGRAPH \
     NLS_CATALOGS \
     NS_CACHING \
     NTP \
     OPENSSL \
     PAM \
     PC_SYSINSTALL \
     PF \
     PKGBOOTSTRAP \
     PMC \
     PORTSNAP \
     PPP \
     QUOTAS \
     RADIUS_SUPPORT \
     RCMDS \
     RBOOTD \
     RCS \
     RESCUE \
     ROUTED \
     SENDMAIL \
     SETUID_LOGIN \
     SHAREDOCS \
     SOURCELESS \
     SOURCELESS_HOST \
     SOURCELESS_UCODE \
     SVNLITE \
     SYSCONS \
     TALK \
     TCP_WRAPPERS \
     TCSH \
     TELNET \
     TESTS \
     TEXTPROC \
     TFTP \
     TIMED \
     UNBOUND \
     USB \
     UTMPX \
     VI \
     VT \
     WIRELESS \
     WPA_SUPPLICANT_EAPOL \
     ZFS \
     ZONEINFO
 
 __DEFAULT_NO_OPTIONS = \
     BSD_GREP \
     CLANG_EXTRAS \
     DTRACE_TESTS \
     EISA \
     HESIOD \
     LIBSOFT \
     NAND \
     OFED \
     OPENLDAP \
     SHARED_TOOLCHAIN \
     SORT_THREADS \
     SVN
 
 #
 # Default behaviour of some options depends on the architecture.  Unfortunately
 # this means that we have to test TARGET_ARCH (the buildworld case) as well
 # as MACHINE_ARCH (the non-buildworld case).  Normally TARGET_ARCH is not
 # used at all in bsd.*.mk, but we have to make an exception here if we want
 # to allow defaults for some things like clang to vary by target architecture.
 # Additional, per-target behavior should be rarely added only after much
 # gnashing of teeth and grinding of gears.
 #
 .if defined(TARGET_ARCH)
 __T=${TARGET_ARCH}
 .else
 __T=${MACHINE_ARCH}
 .endif
 .if defined(TARGET)
 __TT=${TARGET}
 .else
 __TT=${MACHINE}
 .endif
 
 .include <bsd.compiler.mk>
 # If the compiler is not C++11 capable, disable Clang and use GCC instead.
 # This means that architectures that have GCC 4.2 as default can not
 # build Clang without using an external compiler.
 
 .if ${COMPILER_FEATURES:Mc++11} && (${__T} == "aarch64" || \
     ${__T} == "amd64" || ${__TT} == "arm" || ${__T} == "i386")
 # Clang is enabled, and will be installed as the default /usr/bin/cc.
 __DEFAULT_YES_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_FULL CLANG_IS_CC
 __DEFAULT_NO_OPTIONS+=GCC GCC_BOOTSTRAP GNUCXX
 .elif ${COMPILER_FEATURES:Mc++11} && ${__T:Mpowerpc*}
 # On powerpc, if an external compiler that supports C++11 is used as ${CC},
 # then Clang is enabled, but GCC is installed as the default /usr/bin/cc.
 __DEFAULT_YES_OPTIONS+=CLANG CLANG_FULL GCC GCC_BOOTSTRAP GNUCXX
 __DEFAULT_NO_OPTIONS+=CLANG_BOOTSTRAP CLANG_IS_CC
 .else
 # Everything else disables Clang, and uses GCC instead.
 __DEFAULT_YES_OPTIONS+=GCC GCC_BOOTSTRAP GNUCXX
 __DEFAULT_NO_OPTIONS+=CLANG CLANG_BOOTSTRAP CLANG_FULL CLANG_IS_CC
 .endif
 # In-tree binutils/gcc are older versions without modern architecture support.
 .if ${__T} == "aarch64" || ${__T} == "riscv64"
 BROKEN_OPTIONS+=BINUTILS BINUTILS_BOOTSTRAP GCC GCC_BOOTSTRAP GDB
-__DEFAULT_YES_OPTIONS+=ELFCOPY_AS_OBJCOPY
+__DEFAULT_YES_OPTIONS+=ELFCOPY_AS_OBJCOPY LLVM_LIBUNWIND
 .else
-__DEFAULT_NO_OPTIONS+=ELFCOPY_AS_OBJCOPY
+__DEFAULT_NO_OPTIONS+=ELFCOPY_AS_OBJCOPY LLVM_LIBUNWIND
 .endif
 .if ${__T} == "riscv64"
 BROKEN_OPTIONS+=PROFILE # "sorry, unimplemented: profiler support for RISC-V"
 BROKEN_OPTIONS+=TESTS   # "undefined reference to `_Unwind_Resume'"
 BROKEN_OPTIONS+=CXX     # "libcxxrt.so: undefined reference to `_Unwind_Resume_or_Rethrow'"
 .endif
 .if ${__T} == "aarch64" || ${__T} == "amd64"
 __DEFAULT_YES_OPTIONS+=LLDB
 .else
 __DEFAULT_NO_OPTIONS+=LLDB
 .endif
 # LLVM lacks support for FreeBSD 64-bit atomic operations for ARMv4/ARMv5
 .if ${__T} == "arm" || ${__T} == "armeb"
 BROKEN_OPTIONS+=LLDB
 .endif
 # Only doing soft float API stuff on armv6
 .if ${__T} != "armv6"
 BROKEN_OPTIONS+=LIBSOFT
 .endif
 
 .include <bsd.mkopt.mk>
 
 #
 # MK_* options that default to "yes" if the compiler is a C++11 compiler.
 #
 .for var in \
     LIBCPLUSPLUS
 .if !defined(MK_${var})
 .if ${COMPILER_FEATURES:Mc++11}
 .if defined(WITHOUT_${var})
 MK_${var}:=	no
 .else
 MK_${var}:=	yes
 .endif
 .else
 .if defined(WITH_${var})
 MK_${var}:=	yes
 .else
 MK_${var}:=	no
 .endif
 .endif
 .endif
 .endfor
 
 #
 # Force some options off if their dependencies are off.
 # Order is somewhat important.
 #
 .if ${MK_LIBPTHREAD} == "no"
 MK_LIBTHR:=	no
 .endif
 
 .if ${MK_LDNS} == "no"
 MK_LDNS_UTILS:=	no
 MK_UNBOUND:= no
 .endif
 
 .if ${MK_SOURCELESS} == "no"
 MK_SOURCELESS_HOST:=	no
 MK_SOURCELESS_UCODE:= no
 .endif
 
 .if ${MK_CDDL} == "no"
 MK_ZFS:=	no
 MK_CTF:=	no
 .endif
 
 .if ${MK_CRYPT} == "no"
 MK_OPENSSL:=	no
 MK_OPENSSH:=	no
 MK_KERBEROS:=	no
 .endif
 
 .if ${MK_CXX} == "no"
 MK_CLANG:=	no
 MK_GROFF:=	no
 MK_GNUCXX:=	no
 .endif
 
 .if ${MK_MAIL} == "no"
 MK_MAILWRAPPER:= no
 MK_SENDMAIL:=	no
 MK_DMAGENT:=	no
 .endif
 
 .if ${MK_NETGRAPH} == "no"
 MK_ATM:=	no
 MK_BLUETOOTH:=	no
 .endif
 
 .if ${MK_OPENSSL} == "no"
 MK_OPENSSH:=	no
 MK_KERBEROS:=	no
 .endif
 
 .if ${MK_PF} == "no"
 MK_AUTHPF:=	no
 .endif
 
 .if ${MK_TESTS} == "no"
 MK_DTRACE_TESTS:= no
 .endif
 
 .if ${MK_TEXTPROC} == "no"
 MK_GROFF:=	no
 .endif
 
 .if ${MK_CROSS_COMPILER} == "no"
 MK_BINUTILS_BOOTSTRAP:= no
 MK_CLANG_BOOTSTRAP:= no
 MK_ELFTOOLCHAIN_BOOTSTRAP:= no
 MK_GCC_BOOTSTRAP:= no
 .endif
 
 .if ${MK_TOOLCHAIN} == "no"
 MK_BINUTILS:=	no
 MK_CLANG:=	no
 MK_GCC:=	no
 MK_GDB:=	no
 MK_INCLUDES:=	no
 .endif
 
 .if ${MK_CLANG} == "no"
 MK_CLANG_EXTRAS:= no
 MK_CLANG_FULL:= no
 .endif
 
 #
 # Set defaults for the MK_*_SUPPORT variables.
 #
 
 #
 # MK_*_SUPPORT options which default to "yes" unless their corresponding
 # MK_* variable is set to "no".
 #
 .for var in \
     BZIP2 \
     GNU \
     INET \
     INET6 \
     KERBEROS \
     KVM \
     NETGRAPH \
     PAM \
     TESTS \
     WIRELESS
 .if defined(WITHOUT_${var}_SUPPORT) || ${MK_${var}} == "no"
 MK_${var}_SUPPORT:= no
 .else
 MK_${var}_SUPPORT:= yes
 .endif
 .endfor
 
 #
 # MK_* options whose default value depends on another option.
 #
 .for vv in \
     GSSAPI/KERBEROS \
     MAN_UTILS/MAN
 .if defined(WITH_${vv:H})
 MK_${vv:H}:=	yes
 .elif defined(WITHOUT_${vv:H})
 MK_${vv:H}:=	no
 .else
 MK_${vv:H}:=	${MK_${vv:T}}
 .endif
 .endfor
 
 .if !${COMPILER_FEATURES:Mc++11}
 MK_LLDB:=	no
 .endif
 
 # gcc 4.8 and newer supports libc++, so suppress gnuc++ in that case.
 # while in theory we could build it with that, we don't want to do
 # that since it creates too much confusion for too little gain.
 .if ${COMPILER_TYPE} == "gcc" && ${COMPILER_VERSION} >= 40800
 MK_GNUCXX:=no
 MK_GCC:=no
 .endif
 
 .endif #  !target(__<src.opts.mk>__)
Index: projects/clang380-import/share
===================================================================
--- projects/clang380-import/share	(revision 293686)
+++ projects/clang380-import/share	(revision 293687)

Property changes on: projects/clang380-import/share
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/share:r293430-293685
Index: projects/clang380-import/sys/amd64/amd64/elf_machdep.c
===================================================================
--- projects/clang380-import/sys/amd64/amd64/elf_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/amd64/amd64/elf_machdep.c	(revision 293687)
@@ -1,293 +1,294 @@
 /*-
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/linker.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 
 #include <machine/elf.h>
 #include <machine/fpu.h>
 #include <machine/md_var.h>
 
 struct sysentvec elf64_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD ELF64",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
 
 static Elf64_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
 	(sysinit_cfunc_t) elf64_insert_brand_entry,
 	&freebsd_brand_info);
 
 static Elf64_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf64_insert_brand_entry,
 	&freebsd_brand_oinfo);
 
 static Elf64_Brandinfo kfreebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/lib/ld-kfreebsd-x86-64.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_kfreebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
 };
 
 SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf64_insert_brand_entry,
 	&kfreebsd_brand_info);
 
 void
 elf64_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 	void *buf;
 	size_t len;
 
 	len = 0;
 	if (use_xsave) {
 		if (dst != NULL) {
 			fpugetregs(td);
 			len += elf64_populate_note(NT_X86_XSTATE,
 			    get_pcb_user_save_td(td), dst,
 			    cpu_max_ext_state_size, &buf);
 			*(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) =
 			    xsave_mask;
 		} else
 			len += elf64_populate_note(NT_X86_XSTATE, NULL, NULL,
 			    cpu_max_ext_state_size, NULL);
 	}
 	*off = len;
 }
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf64_Addr *where, val;
 	Elf32_Addr *where32, val32;
 	Elf_Addr addr;
 	Elf_Addr addend;
 	Elf_Size rtype, symidx;
 	const Elf_Rel *rel;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		rel = (const Elf_Rel *)data;
 		where = (Elf_Addr *) (relocbase + rel->r_offset);
 		rtype = ELF_R_TYPE(rel->r_info);
 		symidx = ELF_R_SYM(rel->r_info);
 		/* Addend is 32 bit on 32 bit relocs */
 		switch (rtype) {
 		case R_X86_64_PC32:
 		case R_X86_64_32S:
 			addend = *(Elf32_Addr *)where;
 			break;
 		default:
 			addend = *where;
 			break;
 		}
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) (relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("unknown reloc type %d\n", type);
 	}
 
 	switch (rtype) {
 
 		case R_X86_64_NONE:	/* none */
 			break;
 
 		case R_X86_64_64:		/* S + A */
 			error = lookup(lf, symidx, 1, &addr);
 			val = addr + addend;
 			if (error != 0)
 				return -1;
 			if (*where != val)
 				*where = val;
 			break;
 
 		case R_X86_64_PC32:	/* S + A - P */
 			error = lookup(lf, symidx, 1, &addr);
 			where32 = (Elf32_Addr *)where;
 			val32 = (Elf32_Addr)(addr + addend - (Elf_Addr)where);
 			if (error != 0)
 				return -1;
 			if (*where32 != val32)
 				*where32 = val32;
 			break;
 
 		case R_X86_64_32S:	/* S + A sign extend */
 			error = lookup(lf, symidx, 1, &addr);
 			val32 = (Elf32_Addr)(addr + addend);
 			where32 = (Elf32_Addr *)where;
 			if (error != 0)
 				return -1;
 			if (*where32 != val32)
 				*where32 = val32;
 			break;
 
 		case R_X86_64_COPY:	/* none */
 			/*
 			 * There shouldn't be copy relocations in kernel
 			 * objects.
 			 */
 			printf("kldload: unexpected R_COPY relocation\n");
 			return -1;
 			break;
 
 		case R_X86_64_GLOB_DAT:	/* S */
 		case R_X86_64_JMP_SLOT:	/* XXX need addend + offset */
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return -1;
 			if (*where != addr)
 				*where = addr;
 			break;
 
 		case R_X86_64_RELATIVE:	/* B + A */
 			addr = relocbase + addend;
 			val = addr;
 			if (*where != val)
 				*where = val;
 			break;
 
 		default:
 			printf("kldload: unexpected relocation type %ld\n",
 			       rtype);
 			return -1;
 	}
 	return(0);
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
Index: projects/clang380-import/sys/amd64/amd64/trap.c
===================================================================
--- projects/clang380-import/sys/amd64/amd64/trap.c	(revision 293686)
+++ projects/clang380-import/sys/amd64/amd64/trap.c	(revision 293687)
@@ -1,968 +1,975 @@
 /*-
  * Copyright (C) 1994, David Greenman
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the University of Utah, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * AMD64 Trap and System call handling
  */
 
 #include "opt_clock.h"
 #include "opt_cpu.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_isa.h"
 #include "opt_kdb.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/ptrace.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/uio.h>
 #include <sys/vmmeter.h>
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DEFINE( , , page_fault, all);
 PMC_SOFT_DEFINE( , , page_fault, read);
 PMC_SOFT_DEFINE( , , page_fault, write);
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #include <machine/stack.h>
 #include <machine/tss.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 #endif
 
 extern void __noinline trap(struct trapframe *frame);
 extern void trap_check(struct trapframe *frame);
 extern void syscall(struct trapframe *frame);
 void dblfault_handler(struct trapframe *frame);
 
 static int trap_pfault(struct trapframe *, int);
 static void trap_fatal(struct trapframe *, vm_offset_t);
 
 #define MAX_TRAP_MSG		32
 static char *trap_msg[] = {
 	"",					/*  0 unused */
 	"privileged instruction fault",		/*  1 T_PRIVINFLT */
 	"",					/*  2 unused */
 	"breakpoint instruction fault",		/*  3 T_BPTFLT */
 	"",					/*  4 unused */
 	"",					/*  5 unused */
 	"arithmetic trap",			/*  6 T_ARITHTRAP */
 	"",					/*  7 unused */
 	"",					/*  8 unused */
 	"general protection fault",		/*  9 T_PROTFLT */
 	"trace trap",				/* 10 T_TRCTRAP */
 	"",					/* 11 unused */
 	"page fault",				/* 12 T_PAGEFLT */
 	"",					/* 13 unused */
 	"alignment fault",			/* 14 T_ALIGNFLT */
 	"",					/* 15 unused */
 	"",					/* 16 unused */
 	"",					/* 17 unused */
 	"integer divide fault",			/* 18 T_DIVIDE */
 	"non-maskable interrupt trap",		/* 19 T_NMI */
 	"overflow trap",			/* 20 T_OFLOW */
 	"FPU bounds check fault",		/* 21 T_BOUND */
 	"FPU device not available",		/* 22 T_DNA */
 	"double fault",				/* 23 T_DOUBLEFLT */
 	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
 	"invalid TSS fault",			/* 25 T_TSSFLT */
 	"segment not present fault",		/* 26 T_SEGNPFLT */
 	"stack fault",				/* 27 T_STKFLT */
 	"machine check trap",			/* 28 T_MCHK */
 	"SIMD floating-point exception",	/* 29 T_XMMFLT */
 	"reserved (unknown) fault",		/* 30 T_RESERVED */
 	"",					/* 31 unused (reserved) */
 	"DTrace pid return trap",		/* 32 T_DTRACE_RET */
 };
 
 #ifdef KDB
 static int kdb_on_nmi = 1;
 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RWTUN,
 	&kdb_on_nmi, 0, "Go to KDB on NMI");
 #endif
 static int panic_on_nmi = 1;
 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RWTUN,
 	&panic_on_nmi, 0, "Panic on NMI");
 static int prot_fault_translation;
 SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN,
     &prot_fault_translation, 0,
     "Select signal to deliver on protection fault");
 static int uprintf_signal;
 SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN,
     &uprintf_signal, 0,
     "Print debugging information on trap signal to ctty");
 
 /*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
  * routines that prepare a suitable stack frame, and restore this
  * frame after the exception has been processed.
  */
 
 void
 trap(struct trapframe *frame)
 {
 #ifdef KDTRACE_HOOKS
 	struct reg regs;
 #endif
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	int i = 0, ucode = 0, code;
 	u_int type;
 	register_t addr = 0;
 	ksiginfo_t ksi;
 
 	PCPU_INC(cnt.v_trap);
 	type = frame->tf_trapno;
 
 #ifdef SMP
 	/* Handler for NMI IPIs used for stopping CPUs. */
 	if (type == T_NMI) {
 	         if (ipi_nmi_handler() == 0)
 	                   goto out;
 	}
 #endif /* SMP */
 
 #ifdef KDB
 	if (kdb_active) {
 		kdb_reenter();
 		goto out;
 	}
 #endif
 
 	if (type == T_RESERVED) {
 		trap_fatal(frame, 0);
 		goto out;
 	}
 
 	if (type == T_NMI) {
 #ifdef HWPMC_HOOKS
 		/*
 		 * CPU PMCs interrupt using an NMI.  If the PMC module is
 		 * active, pass the 'rip' value to the PMC module's interrupt
 		 * handler.  A non-zero return value from the handler means that
 		 * the NMI was consumed by it and we can return immediately.
 		 */
 		if (pmc_intr != NULL &&
 		    (*pmc_intr)(PCPU_GET(cpuid), frame) != 0)
 			goto out;
 #endif
 
 #ifdef STACK
 		if (stack_nmi_handler(frame) != 0)
 			goto out;
 #endif
 	}
 
 	if (type == T_MCHK) {
 		mca_intr();
 		goto out;
 	}
 
 	if ((frame->tf_rflags & PSL_I) == 0) {
 		/*
 		 * Buggy application or kernel code has disabled
 		 * interrupts and then trapped.  Enabling interrupts
 		 * now is wrong, but it is better than running with
 		 * interrupts disabled until they are accidentally
 		 * enabled later.
 		 */
 		if (ISPL(frame->tf_cs) == SEL_UPL)
 			uprintf(
 			    "pid %ld (%s): trap %d with interrupts disabled\n",
 			    (long)curproc->p_pid, curthread->td_name, type);
 		else if (type != T_NMI && type != T_BPTFLT &&
 		    type != T_TRCTRAP) {
 			/*
 			 * XXX not quite right, since this may be for a
 			 * multiple fault in user mode.
 			 */
 			printf("kernel trap %d with interrupts disabled\n",
 			    type);
 
 			/*
 			 * We shouldn't enable interrupts while holding a
 			 * spin lock.
 			 */
 			if (td->td_md.md_spinlock_count == 0)
 				enable_intr();
 		}
 	}
 
 	code = frame->tf_err;
 
         if (ISPL(frame->tf_cs) == SEL_UPL) {
 		/* user trap */
 
 		td->td_pticks = 0;
 		td->td_frame = frame;
 		addr = frame->tf_rip;
 		if (td->td_cowgen != p->p_cowgen)
 			thread_cow_update(td);
 
 		switch (type) {
 		case T_PRIVINFLT:	/* privileged instruction fault */
 			i = SIGILL;
 			ucode = ILL_PRVOPC;
 			break;
 
 		case T_BPTFLT:		/* bpt instruction fault */
 		case T_TRCTRAP:		/* trace trap */
 			enable_intr();
 #ifdef KDTRACE_HOOKS
 			if (type == T_BPTFLT) {
 				fill_frame_regs(frame, &regs);
 				if (dtrace_pid_probe_ptr != NULL &&
 				    dtrace_pid_probe_ptr(&regs) == 0)
 					goto out;
 			}
 #endif
 			frame->tf_rflags &= ~PSL_T;
 			i = SIGTRAP;
 			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
 			break;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 			ucode = fputrap_x87();
 			if (ucode == -1)
 				goto userout;
 			i = SIGFPE;
 			break;
 
 		case T_PROTFLT:		/* general protection fault */
 			i = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 		case T_STKFLT:		/* stack fault */
 		case T_SEGNPFLT:	/* segment not present fault */
 			i = SIGBUS;
 			ucode = BUS_ADRERR;
 			break;
 		case T_TSSFLT:		/* invalid TSS fault */
 			i = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 		case T_ALIGNFLT:
 			i = SIGBUS;
 			ucode = BUS_ADRALN;
 			break;
 		case T_DOUBLEFLT:	/* double fault */
 		default:
 			i = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 
 		case T_PAGEFLT:		/* page fault */
+			/*
+			 * Emulator can take care about this trap?
+			 */
+			if (*p->p_sysent->sv_trap != NULL &&
+			    (*p->p_sysent->sv_trap)(td) == 0)
+				goto userout;
+
 			addr = frame->tf_addr;
 			i = trap_pfault(frame, TRUE);
 			if (i == -1)
 				goto userout;
 			if (i == 0)
 				goto user;
 
 			if (i == SIGSEGV)
 				ucode = SEGV_MAPERR;
 			else {
 				if (prot_fault_translation == 0) {
 					/*
 					 * Autodetect.
 					 * This check also covers the images
 					 * without the ABI-tag ELF note.
 					 */
 					if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
 					    && p->p_osrel >= P_OSREL_SIGSEGV) {
 						i = SIGSEGV;
 						ucode = SEGV_ACCERR;
 					} else {
 						i = SIGBUS;
 						ucode = BUS_PAGE_FAULT;
 					}
 				} else if (prot_fault_translation == 1) {
 					/*
 					 * Always compat mode.
 					 */
 					i = SIGBUS;
 					ucode = BUS_PAGE_FAULT;
 				} else {
 					/*
 					 * Always SIGSEGV mode.
 					 */
 					i = SIGSEGV;
 					ucode = SEGV_ACCERR;
 				}
 			}
 			break;
 
 		case T_DIVIDE:		/* integer divide fault */
 			ucode = FPE_INTDIV;
 			i = SIGFPE;
 			break;
 
 #ifdef DEV_ISA
 		case T_NMI:
 			/* machine/parity/power fail/"kitchen sink" faults */
 			if (isa_nmi(code) == 0) {
 #ifdef KDB
 				/*
 				 * NMI can be hooked up to a pushbutton
 				 * for debugging.
 				 */
 				if (kdb_on_nmi) {
 					printf ("NMI ... going to debugger\n");
 					kdb_trap(type, 0, frame);
 				}
 #endif /* KDB */
 				goto userout;
 			} else if (panic_on_nmi)
 				panic("NMI indicates hardware failure");
 			break;
 #endif /* DEV_ISA */
 
 		case T_OFLOW:		/* integer overflow fault */
 			ucode = FPE_INTOVF;
 			i = SIGFPE;
 			break;
 
 		case T_BOUND:		/* bounds check fault */
 			ucode = FPE_FLTSUB;
 			i = SIGFPE;
 			break;
 
 		case T_DNA:
 			/* transparent fault (due to context switch "late") */
 			KASSERT(PCB_USER_FPU(td->td_pcb),
 			    ("kernel FPU ctx has leaked"));
 			fpudna();
 			goto userout;
 
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			ucode = ILL_COPROC;
 			i = SIGILL;
 			break;
 
 		case T_XMMFLT:		/* SIMD floating-point exception */
 			ucode = fputrap_sse();
 			if (ucode == -1)
 				goto userout;
 			i = SIGFPE;
 			break;
 #ifdef KDTRACE_HOOKS
 		case T_DTRACE_RET:
 			enable_intr();
 			fill_frame_regs(frame, &regs);
 			if (dtrace_return_probe_ptr != NULL &&
 			    dtrace_return_probe_ptr(&regs) == 0)
 				goto out;
 			break;
 #endif
 		}
 	} else {
 		/* kernel trap */
 
 		KASSERT(cold || td->td_ucred != NULL,
 		    ("kernel trap doesn't have ucred"));
 		switch (type) {
 		case T_PAGEFLT:			/* page fault */
 			(void) trap_pfault(frame, FALSE);
 			goto out;
 
 		case T_DNA:
 			KASSERT(!PCB_USER_FPU(td->td_pcb),
 			    ("Unregistered use of FPU in kernel"));
 			fpudna();
 			goto out;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 		case T_XMMFLT:		/* SIMD floating-point exception */
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			/*
 			 * For now, supporting kernel handler
 			 * registration for FPU traps is overkill.
 			 */
 			trap_fatal(frame, 0);
 			goto out;
 
 		case T_STKFLT:		/* stack fault */
 		case T_PROTFLT:		/* general protection fault */
 		case T_SEGNPFLT:	/* segment not present fault */
 			if (td->td_intr_nesting_level != 0)
 				break;
 
 			/*
 			 * Invalid segment selectors and out of bounds
 			 * %rip's and %rsp's can be set up in user mode.
 			 * This causes a fault in kernel mode when the
 			 * kernel tries to return to user mode.  We want
 			 * to get this fault so that we can fix the
 			 * problem here and not have to check all the
 			 * selectors and pointers when the user changes
 			 * them.
 			 */
 			if (frame->tf_rip == (long)doreti_iret) {
 				frame->tf_rip = (long)doreti_iret_fault;
 				goto out;
 			}
 			if (frame->tf_rip == (long)ld_ds) {
 				frame->tf_rip = (long)ds_load_fault;
 				goto out;
 			}
 			if (frame->tf_rip == (long)ld_es) {
 				frame->tf_rip = (long)es_load_fault;
 				goto out;
 			}
 			if (frame->tf_rip == (long)ld_fs) {
 				frame->tf_rip = (long)fs_load_fault;
 				goto out;
 			}
 			if (frame->tf_rip == (long)ld_gs) {
 				frame->tf_rip = (long)gs_load_fault;
 				goto out;
 			}
 			if (frame->tf_rip == (long)ld_gsbase) {
 				frame->tf_rip = (long)gsbase_load_fault;
 				goto out;
 			}
 			if (frame->tf_rip == (long)ld_fsbase) {
 				frame->tf_rip = (long)fsbase_load_fault;
 				goto out;
 			}
 			if (curpcb->pcb_onfault != NULL) {
 				frame->tf_rip = (long)curpcb->pcb_onfault;
 				goto out;
 			}
 			break;
 
 		case T_TSSFLT:
 			/*
 			 * PSL_NT can be set in user mode and isn't cleared
 			 * automatically when the kernel is entered.  This
 			 * causes a TSS fault when the kernel attempts to
 			 * `iret' because the TSS link is uninitialized.  We
 			 * want to get this fault so that we can fix the
 			 * problem here and not every time the kernel is
 			 * entered.
 			 */
 			if (frame->tf_rflags & PSL_NT) {
 				frame->tf_rflags &= ~PSL_NT;
 				goto out;
 			}
 			break;
 
 		case T_TRCTRAP:	 /* trace trap */
 			/*
 			 * Ignore debug register trace traps due to
 			 * accesses in the user's address space, which
 			 * can happen under several conditions such as
 			 * if a user sets a watchpoint on a buffer and
 			 * then passes that buffer to a system call.
 			 * We still want to get TRCTRAPS for addresses
 			 * in kernel space because that is useful when
 			 * debugging the kernel.
 			 */
 			if (user_dbreg_trap()) {
 				/*
 				 * Reset breakpoint bits because the
 				 * processor doesn't
 				 */
 				/* XXX check upper bits here */
 				load_dr6(rdr6() & 0xfffffff0);
 				goto out;
 			}
 			/*
 			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
 			 */
 		case T_BPTFLT:
 			/*
 			 * If KDB is enabled, let it handle the debugger trap.
 			 * Otherwise, debugger traps "can't happen".
 			 */
 #ifdef KDB
 			if (kdb_trap(type, 0, frame))
 				goto out;
 #endif
 			break;
 
 #ifdef DEV_ISA
 		case T_NMI:
 			/* machine/parity/power fail/"kitchen sink" faults */
 			if (isa_nmi(code) == 0) {
 #ifdef KDB
 				/*
 				 * NMI can be hooked up to a pushbutton
 				 * for debugging.
 				 */
 				if (kdb_on_nmi) {
 					printf ("NMI ... going to debugger\n");
 					kdb_trap(type, 0, frame);
 				}
 #endif /* KDB */
 				goto out;
 			} else if (panic_on_nmi == 0)
 				goto out;
 			/* FALLTHROUGH */
 #endif /* DEV_ISA */
 		}
 
 		trap_fatal(frame, 0);
 		goto out;
 	}
 
 	/* Translate fault for emulators (e.g. Linux) */
 	if (*p->p_sysent->sv_transtrap)
 		i = (*p->p_sysent->sv_transtrap)(i, type);
 
 	ksiginfo_init_trap(&ksi);
 	ksi.ksi_signo = i;
 	ksi.ksi_code = ucode;
 	ksi.ksi_trapno = type;
 	ksi.ksi_addr = (void *)addr;
 	if (uprintf_signal) {
 		uprintf("pid %d comm %s: signal %d err %lx code %d type %d "
 		    "addr 0x%lx rsp 0x%lx rip 0x%lx "
 		    "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
 		    p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr,
 		    frame->tf_rsp, frame->tf_rip,
 		    fubyte((void *)(frame->tf_rip + 0)),
 		    fubyte((void *)(frame->tf_rip + 1)),
 		    fubyte((void *)(frame->tf_rip + 2)),
 		    fubyte((void *)(frame->tf_rip + 3)),
 		    fubyte((void *)(frame->tf_rip + 4)),
 		    fubyte((void *)(frame->tf_rip + 5)),
 		    fubyte((void *)(frame->tf_rip + 6)),
 		    fubyte((void *)(frame->tf_rip + 7)));
 	}
 	KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled"));
 	trapsignal(td, &ksi);
 
 user:
 	userret(td, frame);
 	KASSERT(PCB_USER_FPU(td->td_pcb),
 	    ("Return from trap with kernel FPU ctx leaked"));
 userout:
 out:
 	return;
 }
 
 /*
  * Ensure that we ignore any DTrace-induced faults. This function cannot
  * be instrumented, so it cannot generate such faults itself.
  */
 void
 trap_check(struct trapframe *frame)
 {
 
 #ifdef KDTRACE_HOOKS
 	if (dtrace_trap_func != NULL &&
 	    (*dtrace_trap_func)(frame, frame->tf_trapno) != 0)
 		return;
 #endif
 	trap(frame);
 }
 
 static int
 trap_pfault(frame, usermode)
 	struct trapframe *frame;
 	int usermode;
 {
 	vm_offset_t va;
 	vm_map_t map;
 	int rv = 0;
 	vm_prot_t ftype;
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	vm_offset_t eva = frame->tf_addr;
 
 	if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
 		/*
 		 * Due to both processor errata and lazy TLB invalidation when
 		 * access restrictions are removed from virtual pages, memory
 		 * accesses that are allowed by the physical mapping layer may
 		 * nonetheless cause one spurious page fault per virtual page. 
 		 * When the thread is executing a "no faulting" section that
 		 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
 		 * every page fault is treated as a spurious page fault,
 		 * unless it accesses the same virtual address as the most
 		 * recent page fault within the same "no faulting" section.
 		 */
 		if (td->td_md.md_spurflt_addr != eva ||
 		    (td->td_pflags & TDP_RESETSPUR) != 0) {
 			/*
 			 * Do nothing to the TLB.  A stale TLB entry is
 			 * flushed automatically by a page fault.
 			 */
 			td->td_md.md_spurflt_addr = eva;
 			td->td_pflags &= ~TDP_RESETSPUR;
 			return (0);
 		}
 	} else {
 		/*
 		 * If we get a page fault while in a critical section, then
 		 * it is most likely a fatal kernel page fault.  The kernel
 		 * is already going to panic trying to get a sleep lock to
 		 * do the VM lookup, so just consider it a fatal trap so the
 		 * kernel can print out a useful trap message and even get
 		 * to the debugger.
 		 *
 		 * If we get a page fault while holding a non-sleepable
 		 * lock, then it is most likely a fatal kernel page fault.
 		 * If WITNESS is enabled, then it's going to whine about
 		 * bogus LORs with various VM locks, so just skip to the
 		 * fatal trap handling directly.
 		 */
 		if (td->td_critnest != 0 ||
 		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
 		    "Kernel page fault") != 0) {
 			trap_fatal(frame, eva);
 			return (-1);
 		}
 	}
 	va = trunc_page(eva);
 	if (va >= VM_MIN_KERNEL_ADDRESS) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 */
 		if (usermode)
 			goto nogo;
 
 		map = kernel_map;
 	} else {
 		map = &p->p_vmspace->vm_map;
 
 		/*
 		 * When accessing a usermode address, kernel must be
 		 * ready to accept the page fault, and provide a
 		 * handling routine.  Since accessing the address
 		 * without the handler is a bug, do not try to handle
 		 * it normally, and panic immediately.
 		 */
 		if (!usermode && (td->td_intr_nesting_level != 0 ||
 		    curpcb->pcb_onfault == NULL)) {
 			trap_fatal(frame, eva);
 			return (-1);
 		}
 	}
 
 	/*
 	 * If the trap was caused by errant bits in the PTE then panic.
 	 */
 	if (frame->tf_err & PGEX_RSV) {
 		trap_fatal(frame, eva);
 		return (-1);
 	}
 
 	/*
 	 * PGEX_I is defined only if the execute disable bit capability is
 	 * supported and enabled.
 	 */
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_WRITE;
 	else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
 		ftype = VM_PROT_EXECUTE;
 	else
 		ftype = VM_PROT_READ;
 
 	/* Fault in the page. */
 	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 	if (rv == KERN_SUCCESS) {
 #ifdef HWPMC_HOOKS
 		if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
 			PMC_SOFT_CALL_TF( , , page_fault, all, frame);
 			if (ftype == VM_PROT_READ)
 				PMC_SOFT_CALL_TF( , , page_fault, read,
 				    frame);
 			else
 				PMC_SOFT_CALL_TF( , , page_fault, write,
 				    frame);
 		}
 #endif
 		return (0);
 	}
 nogo:
 	if (!usermode) {
 		if (td->td_intr_nesting_level == 0 &&
 		    curpcb->pcb_onfault != NULL) {
 			frame->tf_rip = (long)curpcb->pcb_onfault;
 			return (0);
 		}
 		trap_fatal(frame, eva);
 		return (-1);
 	}
 	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 
 static void
 trap_fatal(frame, eva)
 	struct trapframe *frame;
 	vm_offset_t eva;
 {
 	int code, ss;
 	u_int type;
 	long esp;
 	struct soft_segment_descriptor softseg;
 	char *msg;
 
 	code = frame->tf_err;
 	type = frame->tf_trapno;
 	sdtossd(&gdt[NGDT * PCPU_GET(cpuid) + IDXSEL(frame->tf_cs & 0xffff)],
 	    &softseg);
 
 	if (type <= MAX_TRAP_MSG)
 		msg = trap_msg[type];
 	else
 		msg = "UNKNOWN";
 	printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
 	    ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
 #ifdef SMP
 	/* two separate prints in case of a trap on an unmapped page */
 	printf("cpuid = %d; ", PCPU_GET(cpuid));
 	printf("apic id = %02x\n", PCPU_GET(apic_id));
 #endif
 	if (type == T_PAGEFLT) {
 		printf("fault virtual address	= 0x%lx\n", eva);
 		printf("fault code		= %s %s %s%s, %s\n",
 			code & PGEX_U ? "user" : "supervisor",
 			code & PGEX_W ? "write" : "read",
 			code & PGEX_I ? "instruction" : "data",
 			code & PGEX_RSV ? " rsv" : "",
 			code & PGEX_P ? "protection violation" : "page not present");
 	}
 	printf("instruction pointer	= 0x%lx:0x%lx\n",
 	       frame->tf_cs & 0xffff, frame->tf_rip);
         if (ISPL(frame->tf_cs) == SEL_UPL) {
 		ss = frame->tf_ss & 0xffff;
 		esp = frame->tf_rsp;
 	} else {
 		ss = GSEL(GDATA_SEL, SEL_KPL);
 		esp = (long)&frame->tf_rsp;
 	}
 	printf("stack pointer	        = 0x%x:0x%lx\n", ss, esp);
 	printf("frame pointer	        = 0x%x:0x%lx\n", ss, frame->tf_rbp);
 	printf("code segment		= base 0x%lx, limit 0x%lx, type 0x%x\n",
 	       softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
 	printf("			= DPL %d, pres %d, long %d, def32 %d, gran %d\n",
 	       softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
 	       softseg.ssd_gran);
 	printf("processor eflags	= ");
 	if (frame->tf_rflags & PSL_T)
 		printf("trace trap, ");
 	if (frame->tf_rflags & PSL_I)
 		printf("interrupt enabled, ");
 	if (frame->tf_rflags & PSL_NT)
 		printf("nested task, ");
 	if (frame->tf_rflags & PSL_RF)
 		printf("resume, ");
 	printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
 	printf("current process		= %d (%s)\n",
 	    curproc->p_pid, curthread->td_name);
 
 #ifdef KDB
 	if (debugger_on_panic || kdb_active)
 		if (kdb_trap(type, 0, frame))
 			return;
 #endif
 	printf("trap number		= %d\n", type);
 	if (type <= MAX_TRAP_MSG)
 		panic("%s", trap_msg[type]);
 	else
 		panic("unknown/reserved trap");
 }
 
 /*
  * Double fault handler. Called when a fault occurs while writing
  * a frame for a trap/exception onto the stack. This usually occurs
  * when the stack overflows (such is the case with infinite recursion,
  * for example).
  */
 void
 dblfault_handler(struct trapframe *frame)
 {
 #ifdef KDTRACE_HOOKS
 	if (dtrace_doubletrap_func != NULL)
 		(*dtrace_doubletrap_func)();
 #endif
 	printf("\nFatal double fault\n");
 	printf("rip = 0x%lx\n", frame->tf_rip);
 	printf("rsp = 0x%lx\n", frame->tf_rsp);
 	printf("rbp = 0x%lx\n", frame->tf_rbp);
 #ifdef SMP
 	/* two separate prints in case of a trap on an unmapped page */
 	printf("cpuid = %d; ", PCPU_GET(cpuid));
 	printf("apic id = %02x\n", PCPU_GET(apic_id));
 #endif
 	panic("double fault");
 }
 
 int
 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 {
 	struct proc *p;
 	struct trapframe *frame;
 	register_t *argp;
 	caddr_t params;
 	int reg, regcnt, error;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 	reg = 0;
 	regcnt = 6;
 
 	params = (caddr_t)frame->tf_rsp + sizeof(register_t);
 	sa->code = frame->tf_rax;
 
 	if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
 		sa->code = frame->tf_rdi;
 		reg++;
 		regcnt--;
 	}
  	if (p->p_sysent->sv_mask)
  		sa->code &= p->p_sysent->sv_mask;
 
  	if (sa->code >= p->p_sysent->sv_size)
  		sa->callp = &p->p_sysent->sv_table[0];
   	else
  		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	sa->narg = sa->callp->sy_narg;
 	KASSERT(sa->narg <= sizeof(sa->args) / sizeof(sa->args[0]),
 	    ("Too many syscall arguments!"));
 	error = 0;
 	argp = &frame->tf_rdi;
 	argp += reg;
 	bcopy(argp, sa->args, sizeof(sa->args[0]) * regcnt);
 	if (sa->narg > regcnt) {
 		KASSERT(params != NULL, ("copyin args with no params!"));
 		error = copyin(params, &sa->args[regcnt],
 	    	    (sa->narg - regcnt) * sizeof(sa->args[0]));
 	}
 
 	if (error == 0) {
 		td->td_retval[0] = 0;
 		td->td_retval[1] = frame->tf_rdx;
 	}
 
 	return (error);
 }
 
 #include "../../kern/subr_syscall.c"
 
 /*
  * System call handler for native binaries.  The trap frame is already
  * set up by the assembler trampoline and a pointer to it is saved in
  * td_frame.
  */
 void
 amd64_syscall(struct thread *td, int traced)
 {
 	struct syscall_args sa;
 	int error;
 	ksiginfo_t ksi;
 
 #ifdef DIAGNOSTIC
 	if (ISPL(td->td_frame->tf_cs) != SEL_UPL) {
 		panic("syscall");
 		/* NOT REACHED */
 	}
 #endif
 	error = syscallenter(td, &sa);
 
 	/*
 	 * Traced syscall.
 	 */
 	if (__predict_false(traced)) {
 		td->td_frame->tf_rflags &= ~PSL_T;
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGTRAP;
 		ksi.ksi_code = TRAP_TRACE;
 		ksi.ksi_addr = (void *)td->td_frame->tf_rip;
 		trapsignal(td, &ksi);
 	}
 
 	KASSERT(PCB_USER_FPU(td->td_pcb),
 	    ("System call %s returing with kernel FPU ctx leaked",
 	     syscallname(td->td_proc, sa.code)));
 	KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
 	    ("System call %s returning with mangled pcb_save",
 	     syscallname(td->td_proc, sa.code)));
 
 	syscallret(td, error, &sa);
 
 	/*
 	 * If the user-supplied value of %rip is not a canonical
 	 * address, then some CPUs will trigger a ring 0 #GP during
 	 * the sysret instruction.  However, the fault handler would
 	 * execute in ring 0 with the user's %gs and %rsp which would
 	 * not be safe.  Instead, use the full return path which
 	 * catches the problem safely.
 	 */
 	if (td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS)
 		set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 }
Index: projects/clang380-import/sys/amd64/linux/linux_sysvec.c
===================================================================
--- projects/clang380-import/sys/amd64/linux/linux_sysvec.c	(revision 293686)
+++ projects/clang380-import/sys/amd64/linux/linux_sysvec.c	(revision 293687)
@@ -1,943 +1,992 @@
 /*-
  * Copyright (c) 2013 Dmitry Chagin
  * Copyright (c) 2004 Tim J. Robbins
  * Copyright (c) 2003 Peter Wemm
  * Copyright (c) 2002 Doug Rabson
  * Copyright (c) 1998-1999 Andrew Gallatin
  * Copyright (c) 1994-1996 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 
 #define	__ELF_WORD_SIZE	64
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 
 #include <amd64/linux/linux.h>
 #include <amd64/linux/linux_proto.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_futex.h>
 #include <compat/linux/linux_ioctl.h>
 #include <compat/linux/linux_mib.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_sysproto.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_vdso.h>
 
 MODULE_VERSION(linux64, 1);
 
 #if BYTE_ORDER == LITTLE_ENDIAN
 #define SHELLMAGIC      0x2123 /* #! */
 #else
 #define SHELLMAGIC      0x2321
 #endif
 
 #if defined(DEBUG)
 SYSCTL_PROC(_compat_linux, OID_AUTO, debug,
 	    CTLTYPE_STRING | CTLFLAG_RW,
 	    0, 0, linux_sysctl_debug, "A",
 	    "Linux 64 debugging control");
 #endif
 
 /*
  * Allow the this functions to use the ldebug() facility
  * even though they are not syscalls themselves. Map them
  * to syscall 0. This is slightly less bogus than using
  * ldebug(sigreturn).
  */
 #define	LINUX_SYS_linux_rt_sendsig	0
 
 const char *linux_kplatform;
 static int linux_szsigcode;
 static vm_object_t linux_shared_page_obj;
 static char *linux_shared_page_mapping;
 extern char _binary_linux_locore_o_start;
 extern char _binary_linux_locore_o_end;
 
 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
 
 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
 
 static register_t * linux_copyout_strings(struct image_params *imgp);
 static int	elf_linux_fixup(register_t **stack_base,
 		    struct image_params *iparams);
 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static void	linux_vdso_install(void *param);
 static void	linux_vdso_deinstall(void *param);
 static void	linux_set_syscall_retval(struct thread *td, int error);
 static int	linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa);
 static void	linux_exec_setregs(struct thread *td, struct image_params *imgp,
 		    u_long stack);
+static int	linux_vsyscall(struct thread *td);
 
 /*
  * Linux syscalls return negative errno's, we do positive and map them
  * Reference:
  *   FreeBSD: src/sys/sys/errno.h
  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
  *            linux-2.6.17.8/include/asm-generic/errno.h
  */
 static int bsd_to_linux_errno[ELAST + 1] = {
 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
 	 -72, -67, -71
 };
 
 #define LINUX_T_UNKNOWN  255
 static int _bsd_to_linux_trapcode[] = {
 	LINUX_T_UNKNOWN,	/* 0 */
 	6,			/* 1  T_PRIVINFLT */
 	LINUX_T_UNKNOWN,	/* 2 */
 	3,			/* 3  T_BPTFLT */
 	LINUX_T_UNKNOWN,	/* 4 */
 	LINUX_T_UNKNOWN,	/* 5 */
 	16,			/* 6  T_ARITHTRAP */
 	254,			/* 7  T_ASTFLT */
 	LINUX_T_UNKNOWN,	/* 8 */
 	13,			/* 9  T_PROTFLT */
 	1,			/* 10 T_TRCTRAP */
 	LINUX_T_UNKNOWN,	/* 11 */
 	14,			/* 12 T_PAGEFLT */
 	LINUX_T_UNKNOWN,	/* 13 */
 	17,			/* 14 T_ALIGNFLT */
 	LINUX_T_UNKNOWN,	/* 15 */
 	LINUX_T_UNKNOWN,	/* 16 */
 	LINUX_T_UNKNOWN,	/* 17 */
 	0,			/* 18 T_DIVIDE */
 	2,			/* 19 T_NMI */
 	4,			/* 20 T_OFLOW */
 	5,			/* 21 T_BOUND */
 	7,			/* 22 T_DNA */
 	8,			/* 23 T_DOUBLEFLT */
 	9,			/* 24 T_FPOPFLT */
 	10,			/* 25 T_TSSFLT */
 	11,			/* 26 T_SEGNPFLT */
 	12,			/* 27 T_STKFLT */
 	18,			/* 28 T_MCHK */
 	19,			/* 29 T_XMMFLT */
 	15			/* 30 T_RESERVED */
 };
 #define bsd_to_linux_trapcode(code) \
     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
      _bsd_to_linux_trapcode[(code)]: \
      LINUX_T_UNKNOWN)
 
 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
 LINUX_VDSO_SYM_CHAR(linux_platform);
 
 /*
  * If FreeBSD & Linux have a difference of opinion about what a trap
  * means, deal with it here.
  *
  * MPSAFE
  */
 static int
 translate_traps(int signal, int trap_code)
 {
 
 	if (signal != SIGBUS)
 		return signal;
 	switch (trap_code) {
 	case T_PROTFLT:
 	case T_TSSFLT:
 	case T_DOUBLEFLT:
 	case T_PAGEFLT:
 		return SIGSEGV;
 	default:
 		return signal;
 	}
 }
 
 static int
 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 {
 	struct proc *p;
 	struct trapframe *frame;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 
 	sa->args[0] = frame->tf_rdi;
 	sa->args[1] = frame->tf_rsi;
 	sa->args[2] = frame->tf_rdx;
 	sa->args[3] = frame->tf_rcx;
 	sa->args[4] = frame->tf_r8;
 	sa->args[5] = frame->tf_r9;
 	sa->code = frame->tf_rax;
 
 	if (sa->code >= p->p_sysent->sv_size)
 		/* nosys */
 		sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 	sa->narg = sa->callp->sy_narg;
 
 	td->td_retval[0] = 0;
 	return (0);
 }
 
 static void
 linux_set_syscall_retval(struct thread *td, int error)
 {
 	struct trapframe *frame = td->td_frame;
 
 	/*
 	 * On Linux only %rcx and %r11 values are not preserved across
 	 * the syscall.
 	 * So, do not clobber %rdx and %r10
 	 */
 	td->td_retval[1] = frame->tf_rdx;
 	frame->tf_r10 = frame->tf_rcx;
 
 	cpu_set_syscall_retval(td, error);
 
 	 /* Restore all registers. */
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 }
 
 static int
 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
 {
 	Elf_Auxargs *args;
 	Elf_Addr *base;
 	Elf_Addr *pos;
 	struct ps_strings *arginfo;
 	struct proc *p;
 
 	p = imgp->proc;
 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
 
 	KASSERT(curthread->td_proc == imgp->proc,
 	    ("unsafe elf_linux_fixup(), should be curproc"));
 	base = (Elf64_Addr *)*stack_base;
 	args = (Elf64_Auxargs *)imgp->auxargs;
 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
 
 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
 	    imgp->proc->p_sysent->sv_shared_page_base);
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
 	AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
 	AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp);
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 
 	base--;
 	suword(base, (uint64_t)imgp->args->argc);
 
 	*stack_base = (register_t *)base;
 	return (0);
 }
 
 /*
  * Copy strings out to the new process address space, constructing new arg
  * and env vector tables. Return a pointer to the base so that it can be used
  * as the initial stack pointer.
  */
 static register_t *
 linux_copyout_strings(struct image_params *imgp)
 {
 	int argc, envc;
 	char **vectp;
 	char *stringp, *destp;
 	register_t *stack_base;
 	struct ps_strings *arginfo;
 	char canary[LINUX_AT_RANDOM_LEN];
 	size_t execpath_len;
 	struct proc *p;
 
 	/*
 	 * Calculate string base and vector table pointers.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
 		execpath_len = strlen(imgp->execpath) + 1;
 	else
 		execpath_len = 0;
 
 	p = imgp->proc;
 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
 	destp =	(caddr_t)arginfo - SPARE_USRSPACE -
 	    roundup(sizeof(canary), sizeof(char *)) -
 	    roundup(execpath_len, sizeof(char *)) -
 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
 
 	if (execpath_len != 0) {
 		imgp->execpathp = (uintptr_t)arginfo - execpath_len;
 		copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	imgp->canary = (uintptr_t)arginfo -
 	    roundup(execpath_len, sizeof(char *)) -
 	    roundup(sizeof(canary), sizeof(char *));
 	copyout(canary, (void *)imgp->canary, sizeof(canary));
 
 	/*
 	 * If we have a valid auxargs ptr, prepare some room
 	 * on the stack.
 	 */
 	if (imgp->auxargs) {
 		/*
 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
 		 * lower compatibility.
 		 */
 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
 		    (LINUX_AT_COUNT * 2);
 
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets,and imgp->auxarg_size is room
 		 * for argument of Runtime loader.
 		 */
 		vectp = (char **)(destp - (imgp->args->argc +
 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
 
 	} else {
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets
 		 */
 		vectp = (char **)(destp - (imgp->args->argc +
 		    imgp->args->envc + 2) * sizeof(char *));
 	}
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	stack_base = (register_t *)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
 	suword(&arginfo->ps_nargvstr, argc);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		suword(vectp++, (long)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	suword(vectp++, 0);
 
 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
 	suword(&arginfo->ps_nenvstr, envc);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		suword(vectp++, (long)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* end of vector table is a null pointer */
 	suword(vectp, 0);
 	return (stack_base);
 }
 
 /*
  * Reset registers to default values on exec.
  */
 static void
 linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	mtx_lock(&dt_lock);
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 	else
 		mtx_unlock(&dt_lock);
 
 	pcb->pcb_fsbase = 0;
 	pcb->pcb_gsbase = 0;
 	clear_pcb_flags(pcb, PCB_32BIT);
 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_rip = imgp->entry_addr;
 	regs->tf_rsp = stack;
 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 
 	/*
 	 * Reset the hardware debug registers if they were in use.
 	 * They won't have any meaning for the newly exec'd process.
 	 */
 	if (pcb->pcb_flags & PCB_DBREGS) {
 		pcb->pcb_dr0 = 0;
 		pcb->pcb_dr1 = 0;
 		pcb->pcb_dr2 = 0;
 		pcb->pcb_dr3 = 0;
 		pcb->pcb_dr6 = 0;
 		pcb->pcb_dr7 = 0;
 		if (pcb == curpcb) {
 			/*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 			reset_dbregs();
 		}
 		clear_pcb_flags(pcb, PCB_DBREGS);
 	}
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 }
 
 /*
  * Copied from amd64/amd64/machdep.c
  *
  * XXX fpu state need? don't think so
  */
 int
 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
 {
 	struct proc *p;
 	struct l_ucontext uc;
 	struct l_sigcontext *context;
 	struct trapframe *regs;
 	unsigned long rflags;
 	int error;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 	error = copyin((void *)regs->tf_rbx, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 
 	p = td->td_proc;
 	context = &uc.uc_mcontext;
 	rflags = context->sc_rflags;
 
 	/*
 	 * Don't allow users to change privileged or reserved flags.
 	 */
 	/*
 	 * XXX do allow users to change the privileged flag PSL_RF.
 	 * The cpu sets PSL_RF in tf_rflags for faults.  Debuggers
 	 * should sometimes set it there too.  tf_rflags is kept in
 	 * the signal context during signal handling and there is no
 	 * other place to remember it, so the PSL_RF bit may be
 	 * corrupted by the signal handler without us knowing.
 	 * Corruption of the PSL_RF bit at worst causes one more or
 	 * one less debugger trap, so allowing it is fairly harmless.
 	 */
 
 #define RFLAG_SECURE(ef, oef)     ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 	if (!RFLAG_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
 		printf("linux_rt_sigreturn: rflags = 0x%lx\n", rflags);
 		return (EINVAL);
 	}
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 #define CS_SECURE(cs)           (ISPL(cs) == SEL_UPL)
 	if (!CS_SECURE(context->sc_cs)) {
 		printf("linux_rt_sigreturn: cs = 0x%x\n", context->sc_cs);
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_rip;
 		trapsignal(td, &ksi);
 		return (EINVAL);
 	}
 
 	PROC_LOCK(p);
 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
 	SIG_CANTMASK(td->td_sigmask);
 	signotify(td);
 	PROC_UNLOCK(p);
 
 	regs->tf_rdi    = context->sc_rdi;
 	regs->tf_rsi    = context->sc_rsi;
 	regs->tf_rdx    = context->sc_rdx;
 	regs->tf_rbp    = context->sc_rbp;
 	regs->tf_rbx    = context->sc_rbx;
 	regs->tf_rcx    = context->sc_rcx;
 	regs->tf_rax    = context->sc_rax;
 	regs->tf_rip    = context->sc_rip;
 	regs->tf_rsp    = context->sc_rsp;
 	regs->tf_r8     = context->sc_r8;
 	regs->tf_r9     = context->sc_r9;
 	regs->tf_r10    = context->sc_r10;
 	regs->tf_r11    = context->sc_r11;
 	regs->tf_r12    = context->sc_r12;
 	regs->tf_r13    = context->sc_r13;
 	regs->tf_r14    = context->sc_r14;
 	regs->tf_r15    = context->sc_r15;
 	regs->tf_cs     = context->sc_cs;
 	regs->tf_err    = context->sc_err;
 	regs->tf_rflags = rflags;
 
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	return (EJUSTRETURN);
 }
 
 /*
  * copied from amd64/amd64/machdep.c
  *
  * Send an interrupt to process.
  */
 static void
 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct l_rt_sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	caddr_t sp;
 	struct trapframe *regs;
 	int sig, code;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	code = ksi->ksi_code;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
 	LINUX_CTR4(rt_sendsig, "%p, %d, %p, %u",
 	    catcher, sig, mask, code);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size -
 		    sizeof(struct l_rt_sigframe);
 	} else
 		sp = (caddr_t)regs->tf_rsp - sizeof(struct l_rt_sigframe) - 128;
 	/* Align to 16 bytes. */
 	sfp = (struct l_rt_sigframe *)((unsigned long)sp & ~0xFul);
 	mtx_unlock(&psp->ps_mtx);
 
 	/* Translate the signal. */
 	sig = bsd_to_linux_signal(sig);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	bsd_to_linux_sigset(mask, &sf.sf_sc.uc_sigmask);
 	bsd_to_linux_sigset(mask, &sf.sf_sc.uc_mcontext.sc_mask);
 
 	sf.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
 	sf.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
 	sf.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
 	PROC_UNLOCK(p);
 
 	sf.sf_sc.uc_mcontext.sc_rdi    = regs->tf_rdi;
 	sf.sf_sc.uc_mcontext.sc_rsi    = regs->tf_rsi;
 	sf.sf_sc.uc_mcontext.sc_rdx    = regs->tf_rdx;
 	sf.sf_sc.uc_mcontext.sc_rbp    = regs->tf_rbp;
 	sf.sf_sc.uc_mcontext.sc_rbx    = regs->tf_rbx;
 	sf.sf_sc.uc_mcontext.sc_rcx    = regs->tf_rcx;
 	sf.sf_sc.uc_mcontext.sc_rax    = regs->tf_rax;
 	sf.sf_sc.uc_mcontext.sc_rip    = regs->tf_rip;
 	sf.sf_sc.uc_mcontext.sc_rsp    = regs->tf_rsp;
 	sf.sf_sc.uc_mcontext.sc_r8     = regs->tf_r8;
 	sf.sf_sc.uc_mcontext.sc_r9     = regs->tf_r9;
 	sf.sf_sc.uc_mcontext.sc_r10    = regs->tf_r10;
 	sf.sf_sc.uc_mcontext.sc_r11    = regs->tf_r11;
 	sf.sf_sc.uc_mcontext.sc_r12    = regs->tf_r12;
 	sf.sf_sc.uc_mcontext.sc_r13    = regs->tf_r13;
 	sf.sf_sc.uc_mcontext.sc_r14    = regs->tf_r14;
 	sf.sf_sc.uc_mcontext.sc_r15    = regs->tf_r15;
 	sf.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
 	sf.sf_sc.uc_mcontext.sc_rflags = regs->tf_rflags;
 	sf.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
 	sf.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
 	sf.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
 
 	/* Build the argument list for the signal handler. */
 	regs->tf_rdi = sig;			/* arg 1 in %rdi */
 	regs->tf_rax = 0;
 	regs->tf_rsi = (register_t)&sfp->sf_si;	/* arg 2 in %rsi */
 	regs->tf_rdx = (register_t)&sfp->sf_sc;	/* arg 3 in %rdx */
 
 	sf.sf_handler = catcher;
 	/* Fill in POSIX parts */
 	ksiginfo_to_lsiginfo(ksi, &sf.sf_si, sig);
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_rsp = (long)sfp;
 	regs->tf_rip = linux_rt_sigcode;
 	regs->tf_rflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * If a linux binary is exec'ing something, try this image activator
  * first.  We override standard shell script execution in order to
  * be able to modify the interpreter path.  We only do this if a linux
  * binary is doing the exec, so we do not create an EXEC module for it.
  */
 static int exec_linux_imgact_try(struct image_params *iparams);
 
 static int
 exec_linux_imgact_try(struct image_params *imgp)
 {
 	const char *head = (const char *)imgp->image_header;
 	char *rpath;
 	int error = -1, len;
 
 	/*
 	 * The interpreter for shell scripts run from a linux binary needs
 	 * to be located in /compat/linux if possible in order to recursively
 	 * maintain linux path emulation.
 	 */
 	if (((const short *)head)[0] == SHELLMAGIC) {
 		/*
 		 * Run our normal shell image activator.  If it succeeds
 		 * attempt to use the alternate path for the interpreter.
 		 * If an alternate path is found, use our stringspace
 		 * to store it.
 		 */
 		if ((error = exec_shell_imgact(imgp)) == 0) {
 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
 			    imgp->interpreter_name, UIO_SYSSPACE,
 			    &rpath, 0, AT_FDCWD);
 			if (rpath != NULL) {
 				len = strlen(rpath) + 1;
 
 				if (len <= MAXSHELLCMDLEN)
 					memcpy(imgp->interpreter_name,
 					    rpath, len);
 				free(rpath, M_TEMP);
 			}
 		}
 	}
 	return(error);
 }
 
+#define	LINUX_VSYSCALL_START		(-10UL << 20)
+#define	LINUX_VSYSCALL_SZ		1024
+
+const unsigned long linux_vsyscall_vector[] = {
+	LINUX_SYS_gettimeofday,
+	LINUX_SYS_linux_time,
+				/* getcpu not implemented */
+};
+
+static int
+linux_vsyscall(struct thread *td)
+{
+	struct trapframe *frame;
+	uint64_t retqaddr;
+	int code, traced;
+	int error; 
+
+	frame = td->td_frame;
+
+	/* Check %rip for vsyscall area */
+	if (__predict_true(frame->tf_rip < LINUX_VSYSCALL_START))
+		return (EINVAL);
+	if ((frame->tf_rip & (LINUX_VSYSCALL_SZ - 1)) != 0)
+		return (EINVAL);
+	code = (frame->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SZ;
+	if (code >= nitems(linux_vsyscall_vector))
+		return (EINVAL);
+
+	/*
+	 * vsyscall called as callq *(%rax), so we must
+	 * use return address from %rsp and also fixup %rsp
+	 */
+	error = copyin((void *)frame->tf_rsp, &retqaddr, sizeof(retqaddr));
+	if (error)
+		return (error);
+
+	frame->tf_rip = retqaddr;
+	frame->tf_rax = linux_vsyscall_vector[code];
+	frame->tf_rsp += 8;
+
+	traced = (frame->tf_flags & PSL_T);
+
+	amd64_syscall(td, traced);
+
+	return (0);
+}
+
 struct sysentvec elf_linux_sysvec = {
 	.sv_size	= LINUX_SYS_MAXSYSCALL,
 	.sv_table	= linux_sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= ELAST + 1,
 	.sv_errtbl	= bsd_to_linux_errno,
 	.sv_transtrap	= translate_traps,
 	.sv_fixup	= elf_linux_fixup,
 	.sv_sendsig	= linux_rt_sendsig,
 	.sv_sigcode	= &_binary_linux_locore_o_start,
 	.sv_szsigcode	= &linux_szsigcode,
 	.sv_name	= "Linux ELF64",
 	.sv_coredump	= elf64_coredump,
 	.sv_imgact_try	= exec_linux_imgact_try,
 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = linux_copyout_strings,
 	.sv_setregs	= linux_exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_LINUX | SV_LP64 | SV_SHP,
 	.sv_set_syscall_retval = linux_set_syscall_retval,
 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= linux_schedtail,
-	.sv_thread_detach = linux_thread_detach
+	.sv_thread_detach = linux_thread_detach,
+	.sv_trap	= linux_vsyscall,
 };
 
 static void
 linux_vdso_install(void *param)
 {
 
 	linux_szsigcode = (&_binary_linux_locore_o_end - 
 	    &_binary_linux_locore_o_start);
 
 	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
 		panic("Linux invalid vdso size\n");
 
 	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
 
 	linux_shared_page_obj = __elfN(linux_shared_page_init)
 	    (&linux_shared_page_mapping);
 
 	__elfN(linux_vdso_reloc)(&elf_linux_sysvec, SHAREDPAGE);
 
 	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
 	    linux_szsigcode);
 	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
 
 	linux_kplatform = linux_shared_page_mapping +
 	    (linux_platform - (caddr_t)SHAREDPAGE);
 }
 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t)linux_vdso_install, NULL);
 
 static void
 linux_vdso_deinstall(void *param)
 {
 
 	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
 };
 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
 
 static char GNULINUX_ABI_VENDOR[] = "GNU";
 static int GNULINUX_ABI_DESC = 0;
 
 static boolean_t
 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNULINUX_ABI_DESC)
 		return (FALSE);
 
 	/*
 	 * For linux we encode osrel as follows (see linux_mib.c):
 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
 	 */
 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
 
 	return (TRUE);
 }
 
 static Elf_Brandnote linux64_brandnote = {
 	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
 	.hdr.n_descsz	= 16,
 	.hdr.n_type	= 1,
 	.vendor		= GNULINUX_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= linux_trans_osrel
 };
 
 static Elf64_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
 	.emul_path	= "/compat/linux",
 	.interp_path	= "/lib64/ld-linux-x86-64.so.2",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux64_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 static Elf64_Brandinfo linux_glibc2brandshort = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_X86_64,
 	.compat_3_brand	= "Linux",
 	.emul_path	= "/compat/linux",
 	.interp_path	= "/lib64/ld-linux.so.2",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux64_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 Elf64_Brandinfo *linux_brandlist[] = {
 	&linux_glibc2brand,
 	&linux_glibc2brandshort,
 	NULL
 };
 
 static int
 linux64_elf_modevent(module_t mod, int type, void *data)
 {
 	Elf64_Brandinfo **brandinfo;
 	int error;
 	struct linux_ioctl_handler **lihp;
 
 	error = 0;
 
 	switch(type) {
 	case MOD_LOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf64_insert_brand_entry(*brandinfo) < 0)
 				error = EINVAL;
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_register_handler(*lihp);
 			LIST_INIT(&futex_list);
 			mtx_init(&futex_mtx, "ftllk64", NULL, MTX_DEF);
 			stclohz = (stathz ? stathz : hz);
 			if (bootverbose)
 				printf("Linux x86-64 ELF exec handler installed\n");
 		} else
 			printf("cannot insert Linux x86-64 ELF brand handler\n");
 		break;
 	case MOD_UNLOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf64_brand_inuse(*brandinfo))
 				error = EBUSY;
 		if (error == 0) {
 			for (brandinfo = &linux_brandlist[0];
 			     *brandinfo != NULL; ++brandinfo)
 				if (elf64_remove_brand_entry(*brandinfo) < 0)
 					error = EINVAL;
 		}
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_unregister_handler(*lihp);
 			mtx_destroy(&futex_mtx);
 			if (bootverbose)
 				printf("Linux ELF exec handler removed\n");
 		} else
 			printf("Could not deinstall ELF interpreter entry\n");
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (error);
 }
 
 static moduledata_t linux64_elf_mod = {
 	"linux64elf",
 	linux64_elf_modevent,
 	0
 };
 
 DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
 MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1);
Index: projects/clang380-import/sys/amd64/linux32/linux32_sysvec.c
===================================================================
--- projects/clang380-import/sys/amd64/linux32/linux32_sysvec.c	(revision 293686)
+++ projects/clang380-import/sys/amd64/linux32/linux32_sysvec.c	(revision 293687)
@@ -1,1204 +1,1205 @@
 /*-
  * Copyright (c) 2004 Tim J. Robbins
  * Copyright (c) 2003 Peter Wemm
  * Copyright (c) 2002 Doug Rabson
  * Copyright (c) 1998-1999 Andrew Gallatin
  * Copyright (c) 1994-1996 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include "opt_compat.h"
 
 #ifndef COMPAT_FREEBSD32
 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
 #endif
 
 #define	__ELF_WORD_SIZE	32
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 
 #include <amd64/linux32/linux.h>
 #include <amd64/linux32/linux32_proto.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_futex.h>
 #include <compat/linux/linux_ioctl.h>
 #include <compat/linux/linux_mib.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_vdso.h>
 
 MODULE_VERSION(linux, 1);
 
 #define	AUXARGS_ENTRY_32(pos, id, val)	\
 	do {				\
 		suword32(pos++, id);	\
 		suword32(pos++, val);	\
 	} while (0)
 
 #if BYTE_ORDER == LITTLE_ENDIAN
 #define SHELLMAGIC      0x2123 /* #! */
 #else
 #define SHELLMAGIC      0x2321
 #endif
 
 /*
  * Allow the sendsig functions to use the ldebug() facility
  * even though they are not syscalls themselves. Map them
  * to syscall 0. This is slightly less bogus than using
  * ldebug(sigreturn).
  */
 #define	LINUX32_SYS_linux_rt_sendsig	0
 #define	LINUX32_SYS_linux_sendsig	0
 
 const char *linux_kplatform;
 static int linux_szsigcode;
 static vm_object_t linux_shared_page_obj;
 static char *linux_shared_page_mapping;
 extern char _binary_linux32_locore_o_start;
 extern char _binary_linux32_locore_o_end;
 
 extern struct sysent linux32_sysent[LINUX32_SYS_MAXSYSCALL];
 
 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
 
 static int	elf_linux_fixup(register_t **stack_base,
 		    struct image_params *iparams);
 static register_t *linux_copyout_strings(struct image_params *imgp);
 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
 static void	exec_linux_setregs(struct thread *td, 
 				   struct image_params *imgp, u_long stack);
 static void	linux32_fixlimit(struct rlimit *rl, int which);
 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static void	linux_vdso_install(void *param);
 static void	linux_vdso_deinstall(void *param);
 
 /*
  * Linux syscalls return negative errno's, we do positive and map them
  * Reference:
  *   FreeBSD: src/sys/sys/errno.h
  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
  *            linux-2.6.17.8/include/asm-generic/errno.h
  */
 static int bsd_to_linux_errno[ELAST + 1] = {
 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
 	 -72, -67, -71
 };
 
 #define LINUX_T_UNKNOWN  255
 static int _bsd_to_linux_trapcode[] = {
 	LINUX_T_UNKNOWN,	/* 0 */
 	6,			/* 1  T_PRIVINFLT */
 	LINUX_T_UNKNOWN,	/* 2 */
 	3,			/* 3  T_BPTFLT */
 	LINUX_T_UNKNOWN,	/* 4 */
 	LINUX_T_UNKNOWN,	/* 5 */
 	16,			/* 6  T_ARITHTRAP */
 	254,			/* 7  T_ASTFLT */
 	LINUX_T_UNKNOWN,	/* 8 */
 	13,			/* 9  T_PROTFLT */
 	1,			/* 10 T_TRCTRAP */
 	LINUX_T_UNKNOWN,	/* 11 */
 	14,			/* 12 T_PAGEFLT */
 	LINUX_T_UNKNOWN,	/* 13 */
 	17,			/* 14 T_ALIGNFLT */
 	LINUX_T_UNKNOWN,	/* 15 */
 	LINUX_T_UNKNOWN,	/* 16 */
 	LINUX_T_UNKNOWN,	/* 17 */
 	0,			/* 18 T_DIVIDE */
 	2,			/* 19 T_NMI */
 	4,			/* 20 T_OFLOW */
 	5,			/* 21 T_BOUND */
 	7,			/* 22 T_DNA */
 	8,			/* 23 T_DOUBLEFLT */
 	9,			/* 24 T_FPOPFLT */
 	10,			/* 25 T_TSSFLT */
 	11,			/* 26 T_SEGNPFLT */
 	12,			/* 27 T_STKFLT */
 	18,			/* 28 T_MCHK */
 	19,			/* 29 T_XMMFLT */
 	15			/* 30 T_RESERVED */
 };
 #define bsd_to_linux_trapcode(code) \
     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
      _bsd_to_linux_trapcode[(code)]: \
      LINUX_T_UNKNOWN)
 
 struct linux32_ps_strings {
 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
 	u_int ps_nargvstr;	/* the number of argument strings */
 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
 	u_int ps_nenvstr;	/* the number of environment strings */
 };
 
 LINUX_VDSO_SYM_INTPTR(linux32_sigcode);
 LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode);
 LINUX_VDSO_SYM_INTPTR(linux32_vsyscall);
 LINUX_VDSO_SYM_CHAR(linux_platform);
 
 /*
  * If FreeBSD & Linux have a difference of opinion about what a trap
  * means, deal with it here.
  *
  * MPSAFE
  */
 static int
 translate_traps(int signal, int trap_code)
 {
 	if (signal != SIGBUS)
 		return signal;
 	switch (trap_code) {
 	case T_PROTFLT:
 	case T_TSSFLT:
 	case T_DOUBLEFLT:
 	case T_PAGEFLT:
 		return SIGSEGV;
 	default:
 		return signal;
 	}
 }
 
 static int
 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
 {
 	Elf32_Auxargs *args;
 	Elf32_Addr *base;
 	Elf32_Addr *pos;
 	struct linux32_ps_strings *arginfo;
 
 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
 
 	KASSERT(curthread->td_proc == imgp->proc,
 	    ("unsafe elf_linux_fixup(), should be curproc"));
 	base = (Elf32_Addr *)*stack_base;
 	args = (Elf32_Auxargs *)imgp->auxargs;
 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
 
 	AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR,
 	    imgp->proc->p_sysent->sv_shared_page_base);
 	AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall);
 	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
 
 	/*
 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
 	 * as it has appeared in the 2.4.0-rc7 first time.
 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
 	 * is not present.
 	 * Also see linux_times() implementation.
 	 */
 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
 		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
 	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
 	AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary));
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp));
 	if (args->execfd != -1)
 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 
 	base--;
 	suword32(base, (uint32_t)imgp->args->argc);
 	*stack_base = (register_t *)base;
 	return (0);
 }
 
 static void
 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct l_rt_sigframe *fp, frame;
 	int oonstack;
 	int sig;
 	int code;
 	
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
 #ifdef DEBUG
 	if (ldebug(rt_sendsig))
 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
 		    catcher, sig, (void*)mask, code);
 #endif
 	/*
 	 * Allocate space for the signal handler context.
 	 */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
 	} else
 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
 	mtx_unlock(&psp->ps_mtx);
 
 	/*
 	 * Build the argument list for the signal handler.
 	 */
 	sig = bsd_to_linux_signal(sig);
 
 	bzero(&frame, sizeof(frame));
 
 	frame.sf_handler = PTROUT(catcher);
 	frame.sf_sig = sig;
 	frame.sf_siginfo = PTROUT(&fp->sf_si);
 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
 
 	/* Fill in POSIX parts */
 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
 
 	/*
 	 * Build the signal context to be used by sigreturn
 	 * and libgcc unwind.
 	 */
 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
 
 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
 	PROC_UNLOCK(p);
 
 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
 
 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__mask;
 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
 	frame.sf_sc.uc_mcontext.sc_esp    = regs->tf_rsp;
 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
 	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
 
 #ifdef DEBUG
 	if (ldebug(rt_sendsig))
 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
 #endif
 
 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 #ifdef DEBUG
 		if (ldebug(rt_sendsig))
 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
 			    fp, oonstack);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.
 	 */
 	regs->tf_rsp = PTROUT(fp);
 	regs->tf_rip = linux32_rt_sigcode;
 	regs->tf_rflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucode32sel;
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * in u. to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 static void
 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct l_sigframe *fp, frame;
 	l_sigset_t lmask;
 	int oonstack;
 	int sig, code;
 
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		linux_rt_sendsig(catcher, ksi, mask);
 		return;
 	}
 
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
 #ifdef DEBUG
 	if (ldebug(sendsig))
 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
 		    catcher, sig, (void*)mask, code);
 #endif
 
 	/*
 	 * Allocate space for the signal handler context.
 	 */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
 	} else
 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * Build the argument list for the signal handler.
 	 */
 	sig = bsd_to_linux_signal(sig);
 
 	bzero(&frame, sizeof(frame));
 
 	frame.sf_handler = PTROUT(catcher);
 	frame.sf_sig = sig;
 
 	bsd_to_linux_sigset(mask, &lmask);
 
 	/*
 	 * Build the signal context to be used by sigreturn.
 	 */
 	frame.sf_sc.sc_mask   = lmask.__mask;
 	frame.sf_sc.sc_gs     = regs->tf_gs;
 	frame.sf_sc.sc_fs     = regs->tf_fs;
 	frame.sf_sc.sc_es     = regs->tf_es;
 	frame.sf_sc.sc_ds     = regs->tf_ds;
 	frame.sf_sc.sc_edi    = regs->tf_rdi;
 	frame.sf_sc.sc_esi    = regs->tf_rsi;
 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
 	frame.sf_sc.sc_esp    = regs->tf_rsp;
 	frame.sf_sc.sc_edx    = regs->tf_rdx;
 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
 	frame.sf_sc.sc_eax    = regs->tf_rax;
 	frame.sf_sc.sc_eip    = regs->tf_rip;
 	frame.sf_sc.sc_cs     = regs->tf_cs;
 	frame.sf_sc.sc_eflags = regs->tf_rflags;
 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
 	frame.sf_sc.sc_ss     = regs->tf_ss;
 	frame.sf_sc.sc_err    = regs->tf_err;
 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
 
 	frame.sf_extramask[0] = lmask.__mask;
 
 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.
 	 */
 	regs->tf_rsp = PTROUT(fp);
 	regs->tf_rip = linux32_sigcode;
 	regs->tf_rflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucode32sel;
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * psl to gain improper privileges or to cause
  * a machine fault.
  */
 int
 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
 {
 	struct l_sigframe frame;
 	struct trapframe *regs;
 	sigset_t bmask;
 	l_sigset_t lmask;
 	int eflags;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 
 #ifdef DEBUG
 	if (ldebug(sigreturn))
 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
 #endif
 	/*
 	 * The trampoline code hands us the sigframe.
 	 * It is unsafe to keep track of it ourselves, in the event that a
 	 * program jumps out of a signal handler.
 	 */
 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
 		return (EFAULT);
 
 	/*
 	 * Check for security violations.
 	 */
 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 	eflags = frame.sf_sc.sc_eflags;
 	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
 		return(EINVAL);
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_rip;
 		trapsignal(td, &ksi);
 		return(EINVAL);
 	}
 
 	lmask.__mask = frame.sf_sc.sc_mask;
 	lmask.__mask = frame.sf_extramask[0];
 	linux_to_bsd_sigset(&lmask, &bmask);
 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
 
 	/*
 	 * Restore signal context.
 	 */
 	regs->tf_rdi    = frame.sf_sc.sc_edi;
 	regs->tf_rsi    = frame.sf_sc.sc_esi;
 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
 	regs->tf_rdx    = frame.sf_sc.sc_edx;
 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
 	regs->tf_rax    = frame.sf_sc.sc_eax;
 	regs->tf_rip    = frame.sf_sc.sc_eip;
 	regs->tf_cs     = frame.sf_sc.sc_cs;
 	regs->tf_ds     = frame.sf_sc.sc_ds;
 	regs->tf_es     = frame.sf_sc.sc_es;
 	regs->tf_fs     = frame.sf_sc.sc_fs;
 	regs->tf_gs     = frame.sf_sc.sc_gs;
 	regs->tf_rflags = eflags;
 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
 	regs->tf_ss     = frame.sf_sc.sc_ss;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by rt_sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * psl to gain improper privileges or to cause
  * a machine fault.
  */
 int
 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
 {
 	struct l_ucontext uc;
 	struct l_sigcontext *context;
 	sigset_t bmask;
 	l_stack_t *lss;
 	stack_t ss;
 	struct trapframe *regs;
 	int eflags;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 
 #ifdef DEBUG
 	if (ldebug(rt_sigreturn))
 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
 #endif
 	/*
 	 * The trampoline code hands us the ucontext.
 	 * It is unsafe to keep track of it ourselves, in the event that a
 	 * program jumps out of a signal handler.
 	 */
 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
 		return (EFAULT);
 
 	context = &uc.uc_mcontext;
 
 	/*
 	 * Check for security violations.
 	 */
 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 	eflags = context->sc_eflags;
 	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
 		return(EINVAL);
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
 	if (!CS_SECURE(context->sc_cs)) {
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_rip;
 		trapsignal(td, &ksi);
 		return(EINVAL);
 	}
 
 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
 
 	/*
 	 * Restore signal context
 	 */
 	regs->tf_gs	= context->sc_gs;
 	regs->tf_fs	= context->sc_fs;
 	regs->tf_es	= context->sc_es;
 	regs->tf_ds	= context->sc_ds;
 	regs->tf_rdi    = context->sc_edi;
 	regs->tf_rsi    = context->sc_esi;
 	regs->tf_rbp    = context->sc_ebp;
 	regs->tf_rbx    = context->sc_ebx;
 	regs->tf_rdx    = context->sc_edx;
 	regs->tf_rcx    = context->sc_ecx;
 	regs->tf_rax    = context->sc_eax;
 	regs->tf_rip    = context->sc_eip;
 	regs->tf_cs     = context->sc_cs;
 	regs->tf_rflags = eflags;
 	regs->tf_rsp    = context->sc_esp_at_signal;
 	regs->tf_ss     = context->sc_ss;
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 
 	/*
 	 * call sigaltstack & ignore results..
 	 */
 	lss = &uc.uc_stack;
 	ss.ss_sp = PTRIN(lss->ss_sp);
 	ss.ss_size = lss->ss_size;
 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
 
 #ifdef DEBUG
 	if (ldebug(rt_sigreturn))
 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
 #endif
 	(void)kern_sigaltstack(td, &ss, NULL);
 
 	return (EJUSTRETURN);
 }
 
 static int
 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 {
 	struct proc *p;
 	struct trapframe *frame;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 
 	sa->args[0] = frame->tf_rbx;
 	sa->args[1] = frame->tf_rcx;
 	sa->args[2] = frame->tf_rdx;
 	sa->args[3] = frame->tf_rsi;
 	sa->args[4] = frame->tf_rdi;
 	sa->args[5] = frame->tf_rbp;	/* Unconfirmed */
 	sa->code = frame->tf_rax;
 
 	if (sa->code >= p->p_sysent->sv_size)
 		/* nosys */
 		sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 	sa->narg = sa->callp->sy_narg;
 
 	td->td_retval[0] = 0;
 	td->td_retval[1] = frame->tf_rdx;
 
 	return (0);
 }
 
 /*
  * If a linux binary is exec'ing something, try this image activator
  * first.  We override standard shell script execution in order to
  * be able to modify the interpreter path.  We only do this if a linux
  * binary is doing the exec, so we do not create an EXEC module for it.
  */
 static int	exec_linux_imgact_try(struct image_params *iparams);
 
 static int
 exec_linux_imgact_try(struct image_params *imgp)
 {
 	const char *head = (const char *)imgp->image_header;
 	char *rpath;
 	int error = -1;
 
 	/*
 	* The interpreter for shell scripts run from a linux binary needs
 	* to be located in /compat/linux if possible in order to recursively
 	* maintain linux path emulation.
 	*/
 	if (((const short *)head)[0] == SHELLMAGIC) {
 		/*
 		* Run our normal shell image activator.  If it succeeds attempt
 		* to use the alternate path for the interpreter.  If an
 		* alternate * path is found, use our stringspace to store it.
 		*/
 		if ((error = exec_shell_imgact(imgp)) == 0) {
 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
 			    AT_FDCWD);
 			if (rpath != NULL)
 				imgp->args->fname_buf =
 				    imgp->interpreter_name = rpath;
 		}
 	}
 	return (error);
 }
 
 /*
  * Clear registers on exec
  * XXX copied from ia32_signal.c.
  */
 static void
 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	mtx_lock(&dt_lock);
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 	else
 		mtx_unlock(&dt_lock);
 
 	critical_enter();
 	wrmsr(MSR_FSBASE, 0);
 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
 	pcb->pcb_fsbase = 0;
 	pcb->pcb_gsbase = 0;
 	critical_exit();
 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
 
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_rip = imgp->entry_addr;
 	regs->tf_rsp = stack;
 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
 	regs->tf_gs = _ugssel;
 	regs->tf_fs = _ufssel;
 	regs->tf_es = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_ss = _udatasel;
 	regs->tf_flags = TF_HASSEGS;
 	regs->tf_cs = _ucode32sel;
 	regs->tf_rbx = imgp->ps_strings;
 
 	fpstate_drop(td);
 
 	/* Do full restore on return so that we can change to a different %cs */
 	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
 	td->td_retval[1] = 0;
 }
 
 /*
  * XXX copied from ia32_sysvec.c.
  */
 static register_t *
 linux_copyout_strings(struct image_params *imgp)
 {
 	int argc, envc;
 	u_int32_t *vectp;
 	char *stringp, *destp;
 	u_int32_t *stack_base;
 	struct linux32_ps_strings *arginfo;
 	char canary[LINUX_AT_RANDOM_LEN];
 	size_t execpath_len;
 
 	/*
 	 * Calculate string base and vector table pointers.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
 		execpath_len = strlen(imgp->execpath) + 1;
 	else
 		execpath_len = 0;
 
 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
 	destp =	(caddr_t)arginfo - SPARE_USRSPACE -
 	    roundup(sizeof(canary), sizeof(char *)) -
 	    roundup(execpath_len, sizeof(char *)) -
 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
 
 	if (execpath_len != 0) {
 		imgp->execpathp = (uintptr_t)arginfo - execpath_len;
 		copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	imgp->canary = (uintptr_t)arginfo -
 	    roundup(execpath_len, sizeof(char *)) -
 	    roundup(sizeof(canary), sizeof(char *));
 	copyout(canary, (void *)imgp->canary, sizeof(canary));
 
 	/*
 	 * If we have a valid auxargs ptr, prepare some room
 	 * on the stack.
 	 */
 	if (imgp->auxargs) {
 		/*
 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
 		 * lower compatibility.
 		 */
 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
 		    (LINUX_AT_COUNT * 2);
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets,and imgp->auxarg_size is room
 		 * for argument of Runtime loader.
 		 */
 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
 		    imgp->args->envc + 2 + imgp->auxarg_size) *
 		    sizeof(u_int32_t));
 
 	} else
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets
 		 */
 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
 		    imgp->args->envc + 2) * sizeof(u_int32_t));
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	stack_base = vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
 	suword32(&arginfo->ps_nargvstr, argc);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		suword32(vectp++, (uint32_t)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	suword32(vectp++, 0);
 
 	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
 	suword32(&arginfo->ps_nenvstr, envc);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		suword32(vectp++, (uint32_t)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* end of vector table is a null pointer */
 	suword32(vectp, 0);
 
 	return ((register_t *)stack_base);
 }
 
 static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
     "32-bit Linux emulation");
 
 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
     &linux32_maxdsiz, 0, "");
 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
     &linux32_maxssiz, 0, "");
 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
     &linux32_maxvmem, 0, "");
 
 #if defined(DEBUG)
 SYSCTL_PROC(_compat_linux32, OID_AUTO, debug,
             CTLTYPE_STRING | CTLFLAG_RW,
             0, 0, linux_sysctl_debug, "A",
             "Linux debugging control");
 #endif
 
 static void
 linux32_fixlimit(struct rlimit *rl, int which)
 {
 
 	switch (which) {
 	case RLIMIT_DATA:
 		if (linux32_maxdsiz != 0) {
 			if (rl->rlim_cur > linux32_maxdsiz)
 				rl->rlim_cur = linux32_maxdsiz;
 			if (rl->rlim_max > linux32_maxdsiz)
 				rl->rlim_max = linux32_maxdsiz;
 		}
 		break;
 	case RLIMIT_STACK:
 		if (linux32_maxssiz != 0) {
 			if (rl->rlim_cur > linux32_maxssiz)
 				rl->rlim_cur = linux32_maxssiz;
 			if (rl->rlim_max > linux32_maxssiz)
 				rl->rlim_max = linux32_maxssiz;
 		}
 		break;
 	case RLIMIT_VMEM:
 		if (linux32_maxvmem != 0) {
 			if (rl->rlim_cur > linux32_maxvmem)
 				rl->rlim_cur = linux32_maxvmem;
 			if (rl->rlim_max > linux32_maxvmem)
 				rl->rlim_max = linux32_maxvmem;
 		}
 		break;
 	}
 }
 
 struct sysentvec elf_linux_sysvec = {
 	.sv_size	= LINUX32_SYS_MAXSYSCALL,
 	.sv_table	= linux32_sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= ELAST + 1,
 	.sv_errtbl	= bsd_to_linux_errno,
 	.sv_transtrap	= translate_traps,
 	.sv_fixup	= elf_linux_fixup,
 	.sv_sendsig	= linux_sendsig,
 	.sv_sigcode	= &_binary_linux32_locore_o_start,
 	.sv_szsigcode	= &linux_szsigcode,
 	.sv_name	= "Linux ELF32",
 	.sv_coredump	= elf32_coredump,
 	.sv_imgact_try	= exec_linux_imgact_try,
 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= LINUX32_MAXUSER,
 	.sv_usrstack	= LINUX32_USRSTACK,
 	.sv_psstrings	= LINUX32_PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = linux_copyout_strings,
 	.sv_setregs	= exec_linux_setregs,
 	.sv_fixlimit	= linux32_fixlimit,
 	.sv_maxssiz	= &linux32_maxssiz,
 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = linux32_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_shared_page_base = LINUX32_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= linux_schedtail,
 	.sv_thread_detach = linux_thread_detach,
+	.sv_trap	= NULL,	
 };
 
 static void
 linux_vdso_install(void *param)
 {
 
 	linux_szsigcode = (&_binary_linux32_locore_o_end - 
 	    &_binary_linux32_locore_o_start);
 
 	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
 		panic("Linux invalid vdso size\n");
 
 	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
 
 	linux_shared_page_obj = __elfN(linux_shared_page_init)
 	    (&linux_shared_page_mapping);
 
 	__elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE);
 
 	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
 	    linux_szsigcode);
 	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
 
 	linux_kplatform = linux_shared_page_mapping +
 	    (linux_platform - (caddr_t)LINUX32_SHAREDPAGE);
 }
 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t)linux_vdso_install, NULL);
 
 static void
 linux_vdso_deinstall(void *param)
 {
 
 	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
 };
 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
 
 static char GNU_ABI_VENDOR[] = "GNU";
 static int GNULINUX_ABI_DESC = 0;
 
 static boolean_t
 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNULINUX_ABI_DESC)
 		return (FALSE);
 
 	/*
 	 * For linux we encode osrel as follows (see linux_mib.c):
 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
 	 */
 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
 
 	return (TRUE);
 }
 
 static Elf_Brandnote linux32_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
 	.hdr.n_type	= 1,
 	.vendor		= GNU_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= linux32_trans_osrel
 };
 
 static Elf32_Brandinfo linux_brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
 	.emul_path	= "/compat/linux",
 	.interp_path	= "/lib/ld-linux.so.1",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux32_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 static Elf32_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
 	.emul_path	= "/compat/linux",
 	.interp_path	= "/lib/ld-linux.so.2",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux32_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 Elf32_Brandinfo *linux_brandlist[] = {
 	&linux_brand,
 	&linux_glibc2brand,
 	NULL
 };
 
 static int
 linux_elf_modevent(module_t mod, int type, void *data)
 {
 	Elf32_Brandinfo **brandinfo;
 	int error;
 	struct linux_ioctl_handler **lihp;
 
 	error = 0;
 
 	switch(type) {
 	case MOD_LOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf32_insert_brand_entry(*brandinfo) < 0)
 				error = EINVAL;
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_register_handler(*lihp);
 			LIST_INIT(&futex_list);
 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
 			stclohz = (stathz ? stathz : hz);
 			if (bootverbose)
 				printf("Linux ELF exec handler installed\n");
 		} else
 			printf("cannot insert Linux ELF brand handler\n");
 		break;
 	case MOD_UNLOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf32_brand_inuse(*brandinfo))
 				error = EBUSY;
 		if (error == 0) {
 			for (brandinfo = &linux_brandlist[0];
 			     *brandinfo != NULL; ++brandinfo)
 				if (elf32_remove_brand_entry(*brandinfo) < 0)
 					error = EINVAL;
 		}
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_unregister_handler(*lihp);
 			mtx_destroy(&futex_mtx);
 			if (bootverbose)
 				printf("Linux ELF exec handler removed\n");
 		} else
 			printf("Could not deinstall ELF interpreter entry\n");
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (error);
 }
 
 static moduledata_t linux_elf_mod = {
 	"linuxelf",
 	linux_elf_modevent,
 	0
 };
 
 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
 MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1);
Index: projects/clang380-import/sys/arm/arm/elf_machdep.c
===================================================================
--- projects/clang380-import/sys/arm/arm/elf_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/arm/arm/elf_machdep.c	(revision 293687)
@@ -1,282 +1,283 @@
 /*-
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/linker.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/proc.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 
 #include <machine/acle-compat.h>
 #include <machine/elf.h>
 #include <machine/md_var.h>
 
 static boolean_t elf32_arm_abi_supported(struct image_params *);
 
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	=
 #if __ARM_ARCH >= 6
 			  SV_SHP | SV_TIMEKEEP |
 #endif
 			  SV_ABI_FREEBSD | SV_ILP32,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
 static Elf32_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_ARM,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 	.header_supported= elf32_arm_abi_supported,
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&freebsd_brand_info);
 
 static boolean_t
 elf32_arm_abi_supported(struct image_params *imgp)
 {
 	const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
 
 	/*
 	 * When configured for EABI, FreeBSD supports EABI vesions 4 and 5.
 	 */
 	if (EF_ARM_EABI_VERSION(hdr->e_flags) < EF_ARM_EABI_FREEBSD_MIN) {
 		if (bootverbose)
 			uprintf("Attempting to execute non EABI binary (rev %d) image %s",
 			    EF_ARM_EABI_VERSION(hdr->e_flags), imgp->args->fname);
 		return (FALSE);
 	}
 	return (TRUE);
 }
 
 void
 elf32_dump_thread(struct thread *td __unused, void *dst __unused,
     size_t *off __unused)
 {
 }
 
 /*
  * It is possible for the compiler to emit relocations for unaligned data.
  * We handle this situation with these inlines.
  */
 #define	RELOC_ALIGNED_P(x) \
 	(((uintptr_t)(x) & (sizeof(void *) - 1)) == 0)
 
 static __inline Elf_Addr
 load_ptr(Elf_Addr *where)
 {
 	Elf_Addr res;
 
 	if (RELOC_ALIGNED_P(where))
 		return *where;
 	memcpy(&res, where, sizeof(res));
 	return (res);
 }
 
 static __inline void
 store_ptr(Elf_Addr *where, Elf_Addr val)
 {
 	if (RELOC_ALIGNED_P(where))
 		*where = val;
 	else
 		memcpy(where, &val, sizeof(val));
 }
 #undef RELOC_ALIGNED_P
 
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf_Addr *where;
 	Elf_Addr addr;
 	Elf_Addr addend;
 	Elf_Word rtype, symidx;
 	const Elf_Rel *rel;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		rel = (const Elf_Rel *)data;
 		where = (Elf_Addr *) (relocbase + rel->r_offset);
 		addend = load_ptr(where);
 		rtype = ELF_R_TYPE(rel->r_info);
 		symidx = ELF_R_SYM(rel->r_info);
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) (relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("unknown reloc type %d\n", type);
 	}
 
 	if (local) {
 		if (rtype == R_ARM_RELATIVE) {	/* A + B */
 			addr = elf_relocaddr(lf, relocbase + addend);
 			if (load_ptr(where) != addr)
 				store_ptr(where, addr);
 		}
 		return (0);
 	}
 
 	switch (rtype) {
 
 		case R_ARM_NONE:	/* none */
 			break;
 
 		case R_ARM_ABS32:
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return -1;
 			store_ptr(where, addr + load_ptr(where));
 			break;
 
 		case R_ARM_COPY:	/* none */
 			/*
 			 * There shouldn't be copy relocations in kernel
 			 * objects.
 			 */
 			printf("kldload: unexpected R_COPY relocation\n");
 			return -1;
 			break;
 
 		case R_ARM_JUMP_SLOT:
 			error = lookup(lf, symidx, 1, &addr);
 			if (error == 0) {
 				store_ptr(where, addr);
 				return (0);
 			}
 			return (-1);
 		case R_ARM_RELATIVE:
 			break;
 
 		default:
 			printf("kldload: unexpected relocation type %d\n",
 			       rtype);
 			return -1;
 	}
 	return(0);
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf __unused)
 {
 
 	/*
 	 * The pmap code does not do an icache sync upon establishing executable
 	 * mappings in the kernel pmap.  It's an optimization based on the fact
 	 * that kernel memory allocations always have EXECUTABLE protection even
 	 * when the memory isn't going to hold executable code.  The only time
 	 * kernel memory holding instructions does need a sync is after loading
 	 * a kernel module, and that's when this function gets called.  Normal
 	 * data cache maintenance has already been done by the IO code, and TLB
 	 * maintenance has been done by the pmap code, so all we have to do here
 	 * is invalidate the instruction cache (which also invalidates the
 	 * branch predictor cache on platforms that have one).
 	 */
 	cpu_icache_sync_all();
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
Index: projects/clang380-import/sys/arm64/arm64/elf_machdep.c
===================================================================
--- projects/clang380-import/sys/arm64/arm64/elf_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/arm64/arm64/elf_machdep.c	(revision 293687)
@@ -1,215 +1,217 @@
 /*-
  * Copyright (c) 2014, 2015 The FreeBSD Foundation.
  * Copyright (c) 2014 Andrew Turner.
  * All rights reserved.
  *
  * This software was developed by Andrew Turner under
  * sponsorship from the FreeBSD Foundation.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/linker.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 
 #include <machine/elf.h>
 #include <machine/md_var.h>
 
 #include "linker_if.h"
 
 static struct sysentvec elf64_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD ELF64",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_READ | VM_PROT_WRITE,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_SHP | SV_TIMEKEEP | SV_ABI_FREEBSD | SV_LP64,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
+	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
 
 static Elf64_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_AARCH64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info);
 
 static Elf64_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_AARCH64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo);
 
 void
 elf64_dump_thread(struct thread *td __unused, void *dst __unused,
     size_t *off __unused)
 {
 
 }
 
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf_Addr *where, addr, addend;
 	Elf_Word rtype, symidx;
 	const Elf_Rel *rel;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		rel = (const Elf_Rel *)data;
 		where = (Elf_Addr *) (relocbase + rel->r_offset);
 		addend = *where;
 		rtype = ELF_R_TYPE(rel->r_info);
 		symidx = ELF_R_SYM(rel->r_info);
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) (relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("unknown reloc type %d\n", type);
 	}
 
 	if (local) {
 		if (rtype == R_AARCH64_RELATIVE)
 			*where = elf_relocaddr(lf, relocbase + addend);
 		return (0);
 	}
 
 	switch (rtype) {
 	case R_AARCH64_NONE:
 	case R_AARCH64_RELATIVE:
 		break;
 	case R_AARCH64_ABS64:
 	case R_AARCH64_GLOB_DAT:
 	case R_AARCH64_JUMP_SLOT:
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		*where = addr + addend;
 		break;
 	default:
 		printf("kldload: unexpected relocation type %d\n", rtype);
 		return (-1);
 	}
 	return (0);
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 /* Process one elf relocation with addend. */
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf)
 {
 
 	if (lf->id != 1)
 		cpu_icache_sync_range((vm_offset_t)lf->address, lf->size);
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
Index: projects/clang380-import/sys/boot/common/ufsread.c
===================================================================
--- projects/clang380-import/sys/boot/common/ufsread.c	(revision 293686)
+++ projects/clang380-import/sys/boot/common/ufsread.c	(revision 293687)
@@ -1,304 +1,303 @@
 /*-
  * Copyright (c) 2002 McAfee, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Marshall
  * Kirk McKusick and McAfee Research,, the Security Research Division of
  * McAfee, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as
  * part of the DARPA CHATS research program
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*-
  * Copyright (c) 1998 Robert Nordier
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #ifdef UFS_SMALL_CGBASE
 /* XXX: Revert to old (broken for over 1.5Tb filesystems) version of cgbase
    (see sys/ufs/ffs/fs.h rev 1.39) so that small boot loaders (e.g. boot2) can
    support both UFS1 and UFS2. */
 #undef cgbase
 #define cgbase(fs, c)   ((ufs2_daddr_t)((fs)->fs_fpg * (c)))
 #endif
 
 typedef	uint32_t	ufs_ino_t;
 
 /*
  * We use 4k `virtual' blocks for filesystem data, whatever the actual
  * filesystem block size. FFS blocks are always a multiple of 4k.
  */
 #define VBLKSHIFT	12
 #define VBLKSIZE	(1 << VBLKSHIFT)
 #define VBLKMASK	(VBLKSIZE - 1)
 #define DBPERVBLK	(VBLKSIZE / DEV_BSIZE)
 #define INDIRPERVBLK(fs) (NINDIR(fs) / ((fs)->fs_bsize >> VBLKSHIFT))
 #define IPERVBLK(fs)	(INOPB(fs) / ((fs)->fs_bsize >> VBLKSHIFT))
 #define INO_TO_VBA(fs, ipervblk, x) \
     (fsbtodb(fs, cgimin(fs, ino_to_cg(fs, x))) + \
     (((x) % (fs)->fs_ipg) / (ipervblk) * DBPERVBLK))
 #define INO_TO_VBO(ipervblk, x) ((x) % ipervblk)
 #define FS_TO_VBA(fs, fsb, off) (fsbtodb(fs, fsb) + \
     ((off) / VBLKSIZE) * DBPERVBLK)
 #define FS_TO_VBO(fs, fsb, off) ((off) & VBLKMASK)
 
 /* Buffers that must not span a 64k boundary. */
 struct dmadat {
 	char blkbuf[VBLKSIZE];	/* filesystem blocks */
 	char indbuf[VBLKSIZE];	/* indir blocks */
 	char sbbuf[SBLOCKSIZE];	/* superblock */
 	char secbuf[DEV_BSIZE];	/* for MBR/disklabel */
 };
 static struct dmadat *dmadat;
 
 static ufs_ino_t lookup(const char *);
 static ssize_t fsread(ufs_ino_t, void *, size_t);
 
 static uint8_t ls, dsk_meta;
 static uint32_t fs_off;
 
 static __inline uint8_t
 fsfind(const char *name, ufs_ino_t * ino)
 {
 	static char buf[DEV_BSIZE];
 	struct direct *d;
 	char *s;
 	ssize_t n;
 
 	fs_off = 0;
 	while ((n = fsread(*ino, buf, DEV_BSIZE)) > 0)
 		for (s = buf; s < buf + DEV_BSIZE;) {
 			d = (void *)s;
 			if (ls)
 				printf("%s ", d->d_name);
 			else if (!strcmp(name, d->d_name)) {
 				*ino = d->d_ino;
 				return d->d_type;
 			}
 			s += d->d_reclen;
 		}
 	if (n != -1 && ls)
 		printf("\n");
 	return 0;
 }
 
 static ufs_ino_t
 lookup(const char *path)
 {
 	static char name[MAXNAMLEN + 1];
 	const char *s;
 	ufs_ino_t ino;
 	ssize_t n;
 	uint8_t dt;
 
 	ino = ROOTINO;
 	dt = DT_DIR;
 	for (;;) {
 		if (*path == '/')
 			path++;
 		if (!*path)
 			break;
 		for (s = path; *s && *s != '/'; s++);
 		if ((n = s - path) > MAXNAMLEN)
 			return 0;
 		ls = *path == '?' && n == 1 && !*s;
 		memcpy(name, path, n);
 		name[n] = 0;
 		if (dt != DT_DIR) {
 			printf("%s: not a directory.\n", name);
 			return (0);
 		}
 		if ((dt = fsfind(name, &ino)) <= 0)
 			break;
 		path = s;
 	}
 	return dt == DT_REG ? ino : 0;
 }
 
 /*
  * Possible superblock locations ordered from most to least likely.
  */
 static int sblock_try[] = SBLOCKSEARCH;
 
 #if defined(UFS2_ONLY)
 #define DIP(field) dp2.field
 #elif defined(UFS1_ONLY)
 #define DIP(field) dp1.field
 #else
 #define DIP(field) fs.fs_magic == FS_UFS1_MAGIC ? dp1.field : dp2.field
 #endif
 
 static ssize_t
 fsread(ufs_ino_t inode, void *buf, size_t nbyte)
 {
 #ifndef UFS2_ONLY
 	static struct ufs1_dinode dp1;
 	ufs1_daddr_t addr1;
 #endif
 #ifndef UFS1_ONLY
 	static struct ufs2_dinode dp2;
 #endif
 	static struct fs fs;
 	static ufs_ino_t inomap;
 	char *blkbuf;
 	void *indbuf;
 	char *s;
 	size_t n, nb, size, off, vboff;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t addr2, vbaddr;
 	static ufs2_daddr_t blkmap, indmap;
 	u_int u;
 
 	blkbuf = dmadat->blkbuf;
 	indbuf = dmadat->indbuf;
 	if (!dsk_meta) {
 		inomap = 0;
 		for (n = 0; sblock_try[n] != -1; n++) {
 			if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE,
 			    SBLOCKSIZE / DEV_BSIZE))
 				return -1;
 			memcpy(&fs, dmadat->sbbuf, sizeof(struct fs));
 			if ((
 #if defined(UFS1_ONLY)
 			    fs.fs_magic == FS_UFS1_MAGIC
 #elif defined(UFS2_ONLY)
 			    (fs.fs_magic == FS_UFS2_MAGIC &&
 			    fs.fs_sblockloc == sblock_try[n])
 #else
 			    fs.fs_magic == FS_UFS1_MAGIC ||
 			    (fs.fs_magic == FS_UFS2_MAGIC &&
 			    fs.fs_sblockloc == sblock_try[n])
 #endif
 			    ) &&
 			    fs.fs_bsize <= MAXBSIZE &&
 			    fs.fs_bsize >= sizeof(struct fs))
 				break;
 		}
 		if (sblock_try[n] == -1) {
-			printf("Not ufs\n");
 			return -1;
 		}
 		dsk_meta++;
 	} else
 		memcpy(&fs, dmadat->sbbuf, sizeof(struct fs));
 	if (!inode)
 		return 0;
 	if (inomap != inode) {
 		n = IPERVBLK(&fs);
 		if (dskread(blkbuf, INO_TO_VBA(&fs, n, inode), DBPERVBLK))
 			return -1;
 		n = INO_TO_VBO(n, inode);
 #if defined(UFS1_ONLY)
 		memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
 		    sizeof(struct ufs1_dinode));
 #elif defined(UFS2_ONLY)
 		memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
 		    sizeof(struct ufs2_dinode));
 #else
 		if (fs.fs_magic == FS_UFS1_MAGIC)
 			memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
 			    sizeof(struct ufs1_dinode));
 		else
 			memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
 			    sizeof(struct ufs2_dinode));
 #endif
 		inomap = inode;
 		fs_off = 0;
 		blkmap = indmap = 0;
 	}
 	s = buf;
 	size = DIP(di_size);
 	n = size - fs_off;
 	if (nbyte > n)
 		nbyte = n;
 	nb = nbyte;
 	while (nb) {
 		lbn = lblkno(&fs, fs_off);
 		off = blkoff(&fs, fs_off);
 		if (lbn < NDADDR) {
 			addr2 = DIP(di_db[lbn]);
 		} else if (lbn < NDADDR + NINDIR(&fs)) {
 			n = INDIRPERVBLK(&fs);
 			addr2 = DIP(di_ib[0]);
 			u = (u_int)(lbn - NDADDR) / n * DBPERVBLK;
 			vbaddr = fsbtodb(&fs, addr2) + u;
 			if (indmap != vbaddr) {
 				if (dskread(indbuf, vbaddr, DBPERVBLK))
 					return -1;
 				indmap = vbaddr;
 			}
 			n = (lbn - NDADDR) & (n - 1);
 #if defined(UFS1_ONLY)
 			memcpy(&addr1, (ufs1_daddr_t *)indbuf + n,
 			    sizeof(ufs1_daddr_t));
 			addr2 = addr1;
 #elif defined(UFS2_ONLY)
 			memcpy(&addr2, (ufs2_daddr_t *)indbuf + n,
 			    sizeof(ufs2_daddr_t));
 #else
 			if (fs.fs_magic == FS_UFS1_MAGIC) {
 				memcpy(&addr1, (ufs1_daddr_t *)indbuf + n,
 				    sizeof(ufs1_daddr_t));
 				addr2 = addr1;
 			} else
 				memcpy(&addr2, (ufs2_daddr_t *)indbuf + n,
 				    sizeof(ufs2_daddr_t));
 #endif
 		} else
 			return -1;
 		vbaddr = fsbtodb(&fs, addr2) + (off >> VBLKSHIFT) * DBPERVBLK;
 		vboff = off & VBLKMASK;
 		n = sblksize(&fs, size, lbn) - (off & ~VBLKMASK);
 		if (n > VBLKSIZE)
 			n = VBLKSIZE;
 		if (blkmap != vbaddr) {
 			if (dskread(blkbuf, vbaddr, n >> DEV_BSHIFT))
 				return -1;
 			blkmap = vbaddr;
 		}
 		n -= vboff;
 		if (n > nb)
 			n = nb;
 		memcpy(s, blkbuf + vboff, n);
 		s += n;
 		fs_off += n;
 		nb -= n;
 	}
 	return nbyte;
 }
Index: projects/clang380-import/sys/boot/efi/boot1/Makefile
===================================================================
--- projects/clang380-import/sys/boot/efi/boot1/Makefile	(revision 293686)
+++ projects/clang380-import/sys/boot/efi/boot1/Makefile	(revision 293687)
@@ -1,116 +1,115 @@
 # $FreeBSD$
 
 MAN=
 
 .include <bsd.own.mk>
 
 # In-tree GCC does not support __attribute__((ms_abi)).
 .if ${COMPILER_TYPE} != "gcc"
 
 MK_SSP=		no
 
 PROG=		boot1.sym
 INTERNALPROG=
 
 # architecture-specific loader code
 SRCS=	boot1.c self_reloc.c start.S
 
 CFLAGS+=	-I.
 CFLAGS+=	-I${.CURDIR}/../include
 CFLAGS+=	-I${.CURDIR}/../include/${MACHINE}
 CFLAGS+=	-I${.CURDIR}/../../../contrib/dev/acpica/include
 CFLAGS+=	-I${.CURDIR}/../../..
 
 # Always add MI sources and REGULAR efi loader bits
 .PATH:		${.CURDIR}/../loader/arch/${MACHINE}
 .PATH:		${.CURDIR}/../loader
 .PATH:		${.CURDIR}/../../common
 CFLAGS+=	-I${.CURDIR}/../../common
 
 FILES=	boot1.efi boot1.efifat
 FILESMODE_boot1.efi=	${BINMODE}
 
 LDSCRIPT=	${.CURDIR}/../loader/arch/${MACHINE}/ldscript.${MACHINE}
 LDFLAGS=	-Wl,-T${LDSCRIPT} -Wl,-Bsymbolic -shared
 
 .if ${MACHINE_CPUARCH} == "aarch64"
 CFLAGS+=	-msoft-float -mgeneral-regs-only
 .endif
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 CFLAGS+=	-fPIC
 LDFLAGS+=	-Wl,-znocombreloc
 .endif
 
-.if ${MACHINE_CPUARCH} == "arm" || ${MACHINE_CPUARCH} == "i386"
 #
 # Add libstand for the runtime functions used by the compiler - for example
 # __aeabi_* (arm) or __divdi3 (i386).
+# as well as required string and memory functions for all platforms.
 #
 DPADD+=		${LIBSTAND}
 LDADD+=		-lstand
-.endif
 
 DPADD+=		${LDSCRIPT}
 
 OBJCOPY?=	objcopy
 OBJDUMP?=	objdump
 
 .if ${MACHINE_CPUARCH} == "amd64"
 EFI_TARGET=	efi-app-x86_64
 .elif ${MACHINE_CPUARCH} == "i386"
 EFI_TARGET=	efi-app-ia32
 .else
 EFI_TARGET=	binary
 .endif
 
 boot1.efi: ${PROG}
 	if [ `${OBJDUMP} -t ${.ALLSRC} | fgrep '*UND*' | wc -l` != 0 ]; then \
 		${OBJDUMP} -t ${.ALLSRC} | fgrep '*UND*'; \
 		exit 1; \
 	fi
 	${OBJCOPY} -j .peheader -j .text -j .sdata -j .data \
 		-j .dynamic -j .dynsym -j .rel.dyn \
 		-j .rela.dyn -j .reloc -j .eh_frame \
 		--output-target=${EFI_TARGET} ${.ALLSRC} ${.TARGET}
 
 boot1.o: ${.CURDIR}/../../common/ufsread.c
 
 # The following inserts our objects into a template FAT file system
 # created by generate-fat.sh
 
 .include "${.CURDIR}/Makefile.fat"
 BOOT1_MAXSIZE?=	131072
 
 boot1.efifat: boot1.efi
 	@set -- `ls -l boot1.efi`; \
 	x=$$(($$5-${BOOT1_MAXSIZE})); \
 	if [ $$x -ge 0 ]; then \
 	    echo "boot1 $$x bytes too large; regenerate FAT templates?" >&2 ;\
 	    exit 1; \
 	fi
 	echo ${.OBJDIR}
 	uudecode ${.CURDIR}/fat-${MACHINE}.tmpl.bz2.uu
 	mv fat-${MACHINE}.tmpl.bz2 ${.TARGET}.bz2
 	bzip2 -f -d ${.TARGET}.bz2
 	dd if=boot1.efi of=${.TARGET} seek=${BOOT1_OFFSET} conv=notrunc
 
 CLEANFILES= boot1.efi boot1.efifat
 
 .endif # ${COMPILER_TYPE} != "gcc"
 
 .include <bsd.prog.mk>
 
 beforedepend ${OBJS}: machine
 
 CLEANFILES+=   machine
 
 machine:
 	ln -sf ${.CURDIR}/../../../${MACHINE}/include machine
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 beforedepend ${OBJS}: x86
 CLEANFILES+=   x86
 
 x86:
 	ln -sf ${.CURDIR}/../../../x86/include x86
 .endif
Index: projects/clang380-import/sys/boot/efi/boot1/boot1.c
===================================================================
--- projects/clang380-import/sys/boot/efi/boot1/boot1.c	(revision 293686)
+++ projects/clang380-import/sys/boot/efi/boot1/boot1.c	(revision 293687)
@@ -1,575 +1,323 @@
 /*-
  * Copyright (c) 1998 Robert Nordier
  * All rights reserved.
  * Copyright (c) 2001 Robert Drehmel
  * All rights reserved.
  * Copyright (c) 2014 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/dirent.h>
 #include <machine/elf.h>
 #include <machine/stdarg.h>
+#include <stand.h>
 
 #include <efi.h>
 #include <eficonsctl.h>
 
 #define _PATH_LOADER	"/boot/loader.efi"
 #define _PATH_KERNEL	"/boot/kernel/kernel"
 
 #define BSIZEMAX	16384
 
-typedef int putc_func_t(char c, void *arg);
+void panic(const char *fmt, ...) __dead2;
+void putchar(int c);
 
-struct sp_data {
-	char	*sp_buf;
-	u_int	sp_len;
-	u_int	sp_size;
-};
-
-static const char digits[] = "0123456789abcdef";
-
-static void panic(const char *fmt, ...) __dead2;
-static int printf(const char *fmt, ...);
-static int putchar(char c, void *arg);
-static int vprintf(const char *fmt, va_list ap);
-static int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap);
-
-static int __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap);
-static int __putc(char c, void *arg);
-static int __puts(const char *s, putc_func_t *putc, void *arg);
-static int __sputc(char c, void *arg);
-static char *__uitoa(char *buf, u_int val, int base);
-static char *__ultoa(char *buf, u_long val, int base);
-
 static int domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet);
 static void load(const char *fname);
 
 static EFI_SYSTEM_TABLE *systab;
 static EFI_HANDLE *image;
 
-static void
-bcopy(const void *src, void *dst, size_t len)
-{
-	const char *s = src;
-	char *d = dst;
-
-	while (len-- != 0)
-		*d++ = *s++;
-}
-
-static void
-memcpy(void *dst, const void *src, size_t len)
-{
-	bcopy(src, dst, len);
-}
-
-static void
-bzero(void *b, size_t len)
-{
-	char *p = b;
-
-	while (len-- != 0)
-		*p++ = 0;
-}
-
-static int
-strcmp(const char *s1, const char *s2)
-{
-	for (; *s1 == *s2 && *s1; s1++, s2++)
-		;
-	return ((u_char)*s1 - (u_char)*s2);
-}
-
 static EFI_GUID BlockIoProtocolGUID = BLOCK_IO_PROTOCOL;
 static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL;
 static EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL;
 static EFI_GUID ConsoleControlGUID = EFI_CONSOLE_CONTROL_PROTOCOL_GUID;
 
 static EFI_BLOCK_IO *bootdev;
 static EFI_DEVICE_PATH *bootdevpath;
 static EFI_HANDLE *bootdevhandle;
 
 EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab)
 {
 	EFI_HANDLE handles[128];
 	EFI_BLOCK_IO *blkio;
 	UINTN i, nparts = sizeof(handles), cols, rows, max_dim, best_mode;
 	EFI_STATUS status;
 	EFI_DEVICE_PATH *devpath;
 	EFI_BOOT_SERVICES *BS;
 	EFI_CONSOLE_CONTROL_PROTOCOL *ConsoleControl = NULL;
 	SIMPLE_TEXT_OUTPUT_INTERFACE *conout = NULL;
 	char *path = _PATH_LOADER;
 
 	systab = Xsystab;
 	image = Ximage;
 
 	BS = systab->BootServices;
 	status = BS->LocateProtocol(&ConsoleControlGUID, NULL,
 	    (VOID **)&ConsoleControl);
 	if (status == EFI_SUCCESS)
 		(void)ConsoleControl->SetMode(ConsoleControl,
 		    EfiConsoleControlScreenText);
 	/*
 	 * Reset the console and find the best text mode.
 	 */
 	conout = systab->ConOut;
 	conout->Reset(conout, TRUE);
 	max_dim = best_mode = 0;
 	for (i = 0; ; i++) {
 		status = conout->QueryMode(conout, i, &cols, &rows);
 		if (EFI_ERROR(status))
 			break;
 		if (cols * rows > max_dim) {
 			max_dim = cols * rows;
 			best_mode = i;
 		}
 	}
 	if (max_dim > 0)
 		conout->SetMode(conout, best_mode);
 	conout->EnableCursor(conout, TRUE);
 	conout->ClearScreen(conout);
 
 	printf("\n"
 	       ">> FreeBSD EFI boot block\n");
 	printf("   Loader path: %s\n", path);
 
 	status = systab->BootServices->LocateHandle(ByProtocol,
 	    &BlockIoProtocolGUID, NULL, &nparts, handles);
 	nparts /= sizeof(handles[0]);
 
 	for (i = 0; i < nparts; i++) {
 		status = systab->BootServices->HandleProtocol(handles[i],
 		    &DevicePathGUID, (void **)&devpath);
 		if (EFI_ERROR(status))
 			continue;
 
 		while (!IsDevicePathEnd(NextDevicePathNode(devpath)))
 			devpath = NextDevicePathNode(devpath);
 
 		status = systab->BootServices->HandleProtocol(handles[i],
 		    &BlockIoProtocolGUID, (void **)&blkio);
 		if (EFI_ERROR(status))
 			continue;
 
 		if (!blkio->Media->LogicalPartition)
 			continue;
 
 		if (domount(devpath, blkio, 1) >= 0)
 			break;
 	}
 
 	if (i == nparts)
 		panic("No bootable partition found");
 
 	bootdevhandle = handles[i];
 	load(path);
 
 	panic("Load failed");
 
 	return EFI_SUCCESS;
 }
 
 static int
 dskread(void *buf, u_int64_t lba, int nblk)
 {
 	EFI_STATUS status;
 	int size;
 
 	lba = lba / (bootdev->Media->BlockSize / DEV_BSIZE);
 	size = nblk * DEV_BSIZE;
 	status = bootdev->ReadBlocks(bootdev, bootdev->Media->MediaId, lba,
 	    size, buf);
 
 	if (EFI_ERROR(status))
 		return (-1);
 
 	return (0);
 }
 
 #include "ufsread.c"
 
 static ssize_t
 fsstat(ufs_ino_t inode)
 {
 #ifndef UFS2_ONLY
 	static struct ufs1_dinode dp1;
 	ufs1_daddr_t addr1;
 #endif
 #ifndef UFS1_ONLY
 	static struct ufs2_dinode dp2;
 #endif
 	static struct fs fs;
 	static ufs_ino_t inomap;
 	char *blkbuf;
 	void *indbuf;
 	size_t n, nb, size, off, vboff;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t addr2, vbaddr;
 	static ufs2_daddr_t blkmap, indmap;
 	u_int u;
 
 	blkbuf = dmadat->blkbuf;
 	indbuf = dmadat->indbuf;
 	if (!dsk_meta) {
 		inomap = 0;
 		for (n = 0; sblock_try[n] != -1; n++) {
 			if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE,
 			    SBLOCKSIZE / DEV_BSIZE))
 				return -1;
 			memcpy(&fs, dmadat->sbbuf, sizeof(struct fs));
 			if ((
 #if defined(UFS1_ONLY)
 			    fs.fs_magic == FS_UFS1_MAGIC
 #elif defined(UFS2_ONLY)
 			    (fs.fs_magic == FS_UFS2_MAGIC &&
 			    fs.fs_sblockloc == sblock_try[n])
 #else
 			    fs.fs_magic == FS_UFS1_MAGIC ||
 			    (fs.fs_magic == FS_UFS2_MAGIC &&
 			    fs.fs_sblockloc == sblock_try[n])
 #endif
 			    ) &&
 			    fs.fs_bsize <= MAXBSIZE &&
 			    fs.fs_bsize >= sizeof(struct fs))
 				break;
 		}
 		if (sblock_try[n] == -1) {
-			printf("Not ufs\n");
 			return -1;
 		}
 		dsk_meta++;
 	} else
 		memcpy(&fs, dmadat->sbbuf, sizeof(struct fs));
 	if (!inode)
 		return 0;
 	if (inomap != inode) {
 		n = IPERVBLK(&fs);
 		if (dskread(blkbuf, INO_TO_VBA(&fs, n, inode), DBPERVBLK))
 			return -1;
 		n = INO_TO_VBO(n, inode);
 #if defined(UFS1_ONLY)
 		memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
 		    sizeof(struct ufs1_dinode));
 #elif defined(UFS2_ONLY)
 		memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
 		    sizeof(struct ufs2_dinode));
 #else
 		if (fs.fs_magic == FS_UFS1_MAGIC)
 			memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
 			    sizeof(struct ufs1_dinode));
 		else
 			memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
 			    sizeof(struct ufs2_dinode));
 #endif
 		inomap = inode;
 		fs_off = 0;
 		blkmap = indmap = 0;
 	}
 	size = DIP(di_size);
 	n = size - fs_off;
 	return (n);
 }
 
 static struct dmadat __dmadat;
 
 static int
 domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet)
 {
 
 	dmadat = &__dmadat;
 	bootdev = blkio;
 	bootdevpath = device;
 	if (fsread(0, NULL, 0)) {
 		if (!quiet)
 			printf("domount: can't read superblock\n");
 		return (-1);
 	}
 	if (!quiet)
 		printf("Succesfully mounted UFS filesystem\n");
 	return (0);
 }
 
 static void
 load(const char *fname)
 {
 	ufs_ino_t ino;
 	EFI_STATUS status;
 	EFI_HANDLE loaderhandle;
 	EFI_LOADED_IMAGE *loaded_image;
 	void *buffer;
 	size_t bufsize;
 
 	if ((ino = lookup(fname)) == 0) {
 		printf("File %s not found\n", fname);
 		return;
 	}
 
 	bufsize = fsstat(ino);
 	status = systab->BootServices->AllocatePool(EfiLoaderData,
 	    bufsize, &buffer);
 	fsread(ino, buffer, bufsize);
 
 	/* XXX: For secure boot, we need our own loader here */
 	status = systab->BootServices->LoadImage(TRUE, image, bootdevpath,
 	    buffer, bufsize, &loaderhandle);
 	if (EFI_ERROR(status))
 		printf("LoadImage failed with error %lu\n",
 		    EFI_ERROR_CODE(status));
 
 	status = systab->BootServices->HandleProtocol(loaderhandle,
 	    &LoadedImageGUID, (VOID**)&loaded_image);
 	if (EFI_ERROR(status))
 		printf("HandleProtocol failed with error %lu\n",
 		    EFI_ERROR_CODE(status));
 
 	loaded_image->DeviceHandle = bootdevhandle;
 
 	status = systab->BootServices->StartImage(loaderhandle, NULL, NULL);
 	if (EFI_ERROR(status))
 		printf("StartImage failed with error %lu\n",
 		    EFI_ERROR_CODE(status));
 }
 
-static void
+void
 panic(const char *fmt, ...)
 {
-	char buf[128];
 	va_list ap;
 
+	printf("panic: ");
 	va_start(ap, fmt);
-	vsnprintf(buf, sizeof buf, fmt, ap);
-	printf("panic: %s\n", buf);
+	vprintf(fmt, ap);
 	va_end(ap);
+	printf("\n");
 
 	while (1) {}
 }
 
-static int
-printf(const char *fmt, ...)
+void
+putchar(int c)
 {
-	va_list ap;
-	int ret;
-
-	/* Don't annoy the user as we probe for partitions */
-	if (strcmp(fmt,"Not ufs\n") == 0)
-		return 0;
-
-	va_start(ap, fmt);
-	ret = vprintf(fmt, ap);
-	va_end(ap);
-	return (ret);
-}
-
-static int
-putchar(char c, void *arg)
-{
 	CHAR16 buf[2];
 
 	if (c == '\n') {
 		buf[0] = '\r';
 		buf[1] = 0;
 		systab->ConOut->OutputString(systab->ConOut, buf);
 	}
 	buf[0] = c;
 	buf[1] = 0;
 	systab->ConOut->OutputString(systab->ConOut, buf);
-	return (1);
-}
-
-static int
-vprintf(const char *fmt, va_list ap)
-{
-	int ret;
-
-	ret = __printf(fmt, putchar, 0, ap);
-	return (ret);
-}
-
-static int
-vsnprintf(char *str, size_t sz, const char *fmt, va_list ap)
-{
-	struct sp_data sp;
-	int ret;
-
-	sp.sp_buf = str;
-	sp.sp_len = 0;
-	sp.sp_size = sz;
-	ret = __printf(fmt, __sputc, &sp, ap);
-	return (ret);
-}
-
-static int
-__printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap)
-{
-	char buf[(sizeof(long) * 8) + 1];
-	char *nbuf;
-	u_long ul;
-	u_int ui;
-	int lflag;
-	int sflag;
-	char *s;
-	int pad;
-	int ret;
-	int c;
-
-	nbuf = &buf[sizeof buf - 1];
-	ret = 0;
-	while ((c = *fmt++) != 0) {
-		if (c != '%') {
-			ret += putc(c, arg);
-			continue;
-		}
-		lflag = 0;
-		sflag = 0;
-		pad = 0;
-reswitch:	c = *fmt++;
-		switch (c) {
-		case '#':
-			sflag = 1;
-			goto reswitch;
-		case '%':
-			ret += putc('%', arg);
-			break;
-		case 'c':
-			c = va_arg(ap, int);
-			ret += putc(c, arg);
-			break;
-		case 'd':
-			if (lflag == 0) {
-				ui = (u_int)va_arg(ap, int);
-				if (ui < (int)ui) {
-					ui = -ui;
-					ret += putc('-', arg);
-				}
-				s = __uitoa(nbuf, ui, 10);
-			} else {
-				ul = (u_long)va_arg(ap, long);
-				if (ul < (long)ul) {
-					ul = -ul;
-					ret += putc('-', arg);
-				}
-				s = __ultoa(nbuf, ul, 10);
-			}
-			ret += __puts(s, putc, arg);
-			break;
-		case 'l':
-			lflag = 1;
-			goto reswitch;
-		case 'o':
-			if (lflag == 0) {
-				ui = (u_int)va_arg(ap, u_int);
-				s = __uitoa(nbuf, ui, 8);
-			} else {
-				ul = (u_long)va_arg(ap, u_long);
-				s = __ultoa(nbuf, ul, 8);
-			}
-			ret += __puts(s, putc, arg);
-			break;
-		case 'p':
-			ul = (u_long)va_arg(ap, void *);
-			s = __ultoa(nbuf, ul, 16);
-			ret += __puts("0x", putc, arg);
-			ret += __puts(s, putc, arg);
-			break;
-		case 's':
-			s = va_arg(ap, char *);
-			ret += __puts(s, putc, arg);
-			break;
-		case 'u':
-			if (lflag == 0) {
-				ui = va_arg(ap, u_int);
-				s = __uitoa(nbuf, ui, 10);
-			} else {
-				ul = va_arg(ap, u_long);
-				s = __ultoa(nbuf, ul, 10);
-			}
-			ret += __puts(s, putc, arg);
-			break;
-		case 'x':
-			if (lflag == 0) {
-				ui = va_arg(ap, u_int);
-				s = __uitoa(nbuf, ui, 16);
-			} else {
-				ul = va_arg(ap, u_long);
-				s = __ultoa(nbuf, ul, 16);
-			}
-			if (sflag)
-				ret += __puts("0x", putc, arg);
-			ret += __puts(s, putc, arg);
-			break;
-		case '0': case '1': case '2': case '3': case '4':
-		case '5': case '6': case '7': case '8': case '9':
-			pad = pad * 10 + c - '0';
-			goto reswitch;
-		default:
-			break;
-		}
-	}
-	return (ret);
-}
-
-static int
-__sputc(char c, void *arg)
-{
-	struct sp_data *sp;
-
-	sp = arg;
-	if (sp->sp_len < sp->sp_size)
-		sp->sp_buf[sp->sp_len++] = c;
-	sp->sp_buf[sp->sp_len] = '\0';
-	return (1);
-}
-
-static int
-__puts(const char *s, putc_func_t *putc, void *arg)
-{
-	const char *p;
-	int ret;
-
-	ret = 0;
-	for (p = s; *p != '\0'; p++)
-		ret += putc(*p, arg);
-	return (ret);
-}
-
-static char *
-__uitoa(char *buf, u_int ui, int base)
-{
-	char *p;
-
-	p = buf;
-	*p = '\0';
-	do
-		*--p = digits[ui % base];
-	while ((ui /= base) != 0);
-	return (p);
-}
-
-static char *
-__ultoa(char *buf, u_long ul, int base)
-{
-	char *p;
-
-	p = buf;
-	*p = '\0';
-	do
-		*--p = digits[ul % base];
-	while ((ul /= base) != 0);
-	return (p);
 }
Index: projects/clang380-import/sys/boot/i386/loader/main.c
===================================================================
--- projects/clang380-import/sys/boot/i386/loader/main.c	(revision 293686)
+++ projects/clang380-import/sys/boot/i386/loader/main.c	(revision 293687)
@@ -1,451 +1,461 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * MD bootstrap main() and assorted miscellaneous
  * commands.
  */
 
 #include <stand.h>
 #include <stddef.h>
 #include <string.h>
 #include <machine/bootinfo.h>
 #include <machine/cpufunc.h>
 #include <machine/psl.h>
 #include <sys/reboot.h>
 
 #include "bootstrap.h"
 #include "common/bootargs.h"
 #include "libi386/libi386.h"
 #include "libi386/smbios.h"
 #include "btxv86.h"
 
 #ifdef LOADER_ZFS_SUPPORT
 #include "../zfs/libzfs.h"
 #endif
 
 CTASSERT(sizeof(struct bootargs) == BOOTARGS_SIZE);
 CTASSERT(offsetof(struct bootargs, bootinfo) == BA_BOOTINFO);
 CTASSERT(offsetof(struct bootargs, bootflags) == BA_BOOTFLAGS);
 CTASSERT(offsetof(struct bootinfo, bi_size) == BI_SIZE);
 
 /* Arguments passed in from the boot1/boot2 loader */
 static struct bootargs *kargs;
 
 static u_int32_t	initial_howto;
 static u_int32_t	initial_bootdev;
 static struct bootinfo	*initial_bootinfo;
 
 struct arch_switch	archsw;		/* MI/MD interface boundary */
 
 static void		extract_currdev(void);
 static int		isa_inb(int port);
 static void		isa_outb(int port, int value);
 void			exit(int code);
 #ifdef LOADER_ZFS_SUPPORT
 static void		init_zfs_bootenv(char *currdev);
 static void		i386_zfs_probe(void);
 #endif
 
 /* from vers.c */
 extern	char bootprog_name[], bootprog_rev[], bootprog_date[], bootprog_maker[];
 
 /* XXX debugging */
 extern char end[];
 
 static void *heap_top;
 static void *heap_bottom;
 
 int
 main(void)
 {
     int			i;
 
     /* Pick up arguments */
     kargs = (void *)__args;
     initial_howto = kargs->howto;
     initial_bootdev = kargs->bootdev;
     initial_bootinfo = kargs->bootinfo ? (struct bootinfo *)PTOV(kargs->bootinfo) : NULL;
 
     /* Initialize the v86 register set to a known-good state. */
     bzero(&v86, sizeof(v86));
     v86.efl = PSL_RESERVED_DEFAULT | PSL_I;
 
     /* 
      * Initialise the heap as early as possible.  Once this is done, malloc() is usable.
      */
     bios_getmem();
 
 #if defined(LOADER_BZIP2_SUPPORT) || defined(LOADER_FIREWIRE_SUPPORT) || \
     defined(LOADER_GPT_SUPPORT) || defined(LOADER_ZFS_SUPPORT)
     if (high_heap_size > 0) {
 	heap_top = PTOV(high_heap_base + high_heap_size);
 	heap_bottom = PTOV(high_heap_base);
 	if (high_heap_base < memtop_copyin)
 	    memtop_copyin = high_heap_base;
     } else
 #endif
     {
 	heap_top = (void *)PTOV(bios_basemem);
 	heap_bottom = (void *)end;
     }
     setheap(heap_bottom, heap_top);
 
     /*
      * XXX Chicken-and-egg problem; we want to have console output early, but some
      * console attributes may depend on reading from eg. the boot device, which we
      * can't do yet.
      *
      * We can use printf() etc. once this is done.
      * If the previous boot stage has requested a serial console, prefer that.
      */
     bi_setboothowto(initial_howto);
     if (initial_howto & RB_MULTIPLE) {
 	if (initial_howto & RB_SERIAL)
 	    setenv("console", "comconsole vidconsole", 1);
 	else
 	    setenv("console", "vidconsole comconsole", 1);
     } else if (initial_howto & RB_SERIAL)
 	setenv("console", "comconsole", 1);
     else if (initial_howto & RB_MUTE)
 	setenv("console", "nullconsole", 1);
     cons_probe();
 
     /*
      * Initialise the block cache
      */
     bcache_init(32, 512);	/* 16k cache XXX tune this */
 
     /*
      * Special handling for PXE and CD booting.
      */
     if (kargs->bootinfo == 0) {
 	/*
 	 * We only want the PXE disk to try to init itself in the below
 	 * walk through devsw if we actually booted off of PXE.
 	 */
 	if (kargs->bootflags & KARGS_FLAGS_PXE)
 	    pxe_enable(kargs->pxeinfo ? PTOV(kargs->pxeinfo) : NULL);
 	else if (kargs->bootflags & KARGS_FLAGS_CD)
 	    bc_add(initial_bootdev);
     }
 
     archsw.arch_autoload = i386_autoload;
     archsw.arch_getdev = i386_getdev;
     archsw.arch_copyin = i386_copyin;
     archsw.arch_copyout = i386_copyout;
     archsw.arch_readin = i386_readin;
     archsw.arch_isainb = isa_inb;
     archsw.arch_isaoutb = isa_outb;
 #ifdef LOADER_ZFS_SUPPORT
     archsw.arch_zfs_probe = i386_zfs_probe;
 #endif
 
     /*
      * March through the device switch probing for things.
      */
     for (i = 0; devsw[i] != NULL; i++)
 	if (devsw[i]->dv_init != NULL)
 	    (devsw[i]->dv_init)();
     printf("BIOS %dkB/%dkB available memory\n", bios_basemem / 1024, bios_extmem / 1024);
     if (initial_bootinfo != NULL) {
 	initial_bootinfo->bi_basemem = bios_basemem / 1024;
 	initial_bootinfo->bi_extmem = bios_extmem / 1024;
     }
 
     /* detect ACPI for future reference */
     biosacpi_detect();
 
     /* detect SMBIOS for future reference */
     smbios_detect(NULL);
 
     /* detect PCI BIOS for future reference */
     biospci_detect();
 
     printf("\n");
     printf("%s, Revision %s\n", bootprog_name, bootprog_rev);
     printf("(%s, %s)\n", bootprog_maker, bootprog_date);
 
     extract_currdev();				/* set $currdev and $loaddev */
     setenv("LINES", "24", 1);			/* optional */
     
     bios_getsmap();
 
     interact(NULL);
 
     /* if we ever get here, it is an error */
     return (1);
 }
 
 /*
  * Set the 'current device' by (if possible) recovering the boot device as 
  * supplied by the initial bootstrap.
  *
  * XXX should be extended for netbooting.
  */
 static void
 extract_currdev(void)
 {
     struct i386_devdesc		new_currdev;
 #ifdef LOADER_ZFS_SUPPORT
     char			buf[20];
     struct zfs_boot_args	*zargs;
 #endif
     int				biosdev = -1;
 
     /* Assume we are booting from a BIOS disk by default */
     new_currdev.d_dev = &biosdisk;
 
     /* new-style boot loaders such as pxeldr and cdldr */
     if (kargs->bootinfo == 0) {
         if ((kargs->bootflags & KARGS_FLAGS_CD) != 0) {
 	    /* we are booting from a CD with cdboot */
 	    new_currdev.d_dev = &bioscd;
 	    new_currdev.d_unit = bc_bios2unit(initial_bootdev);
 	} else if ((kargs->bootflags & KARGS_FLAGS_PXE) != 0) {
 	    /* we are booting from pxeldr */
 	    new_currdev.d_dev = &pxedisk;
 	    new_currdev.d_unit = 0;
 	} else {
 	    /* we don't know what our boot device is */
 	    new_currdev.d_kind.biosdisk.slice = -1;
 	    new_currdev.d_kind.biosdisk.partition = 0;
 	    biosdev = -1;
 	}
 #ifdef LOADER_ZFS_SUPPORT
     } else if ((kargs->bootflags & KARGS_FLAGS_ZFS) != 0) {
 	zargs = NULL;
 	/* check for new style extended argument */
 	if ((kargs->bootflags & KARGS_FLAGS_EXTARG) != 0)
 	    zargs = (struct zfs_boot_args *)(kargs + 1);
 
 	if (zargs != NULL &&
 	    zargs->size >= offsetof(struct zfs_boot_args, primary_pool)) {
 	    /* sufficient data is provided */
 	    new_currdev.d_kind.zfs.pool_guid = zargs->pool;
 	    new_currdev.d_kind.zfs.root_guid = zargs->root;
 	    if (zargs->size >= sizeof(*zargs) && zargs->primary_vdev != 0) {
 		sprintf(buf, "%llu", zargs->primary_pool);
 		setenv("vfs.zfs.boot.primary_pool", buf, 1);
 		sprintf(buf, "%llu", zargs->primary_vdev);
 		setenv("vfs.zfs.boot.primary_vdev", buf, 1);
 	    }
 	} else {
 	    /* old style zfsboot block */
 	    new_currdev.d_kind.zfs.pool_guid = kargs->zfspool;
 	    new_currdev.d_kind.zfs.root_guid = 0;
 	}
 	new_currdev.d_dev = &zfs_dev;
 #endif
     } else if ((initial_bootdev & B_MAGICMASK) != B_DEVMAGIC) {
 	/* The passed-in boot device is bad */
 	new_currdev.d_kind.biosdisk.slice = -1;
 	new_currdev.d_kind.biosdisk.partition = 0;
 	biosdev = -1;
     } else {
 	new_currdev.d_kind.biosdisk.slice = B_SLICE(initial_bootdev) - 1;
 	new_currdev.d_kind.biosdisk.partition = B_PARTITION(initial_bootdev);
 	biosdev = initial_bootinfo->bi_bios_dev;
 
 	/*
 	 * If we are booted by an old bootstrap, we have to guess at the BIOS
 	 * unit number.  We will lose if there is more than one disk type
 	 * and we are not booting from the lowest-numbered disk type 
 	 * (ie. SCSI when IDE also exists).
 	 */
 	if ((biosdev == 0) && (B_TYPE(initial_bootdev) != 2))	/* biosdev doesn't match major */
 	    biosdev = 0x80 + B_UNIT(initial_bootdev);		/* assume harddisk */
     }
     new_currdev.d_type = new_currdev.d_dev->dv_type;
 
     /*
      * If we are booting off of a BIOS disk and we didn't succeed in determining
      * which one we booted off of, just use disk0: as a reasonable default.
      */
     if ((new_currdev.d_type == biosdisk.dv_type) &&
 	((new_currdev.d_unit = bd_bios2unit(biosdev)) == -1)) {
 	printf("Can't work out which disk we are booting from.\n"
 	       "Guessed BIOS device 0x%x not found by probes, defaulting to disk0:\n", biosdev);
 	new_currdev.d_unit = 0;
     }
 
 #ifdef LOADER_ZFS_SUPPORT
-    init_zfs_bootenv(zfs_fmtdev(&new_currdev));
+    if (new_currdev.d_type == DEVT_ZFS)
+	init_zfs_bootenv(zfs_fmtdev(&new_currdev));
 #endif
 
     env_setenv("currdev", EV_VOLATILE, i386_fmtdev(&new_currdev),
 	       i386_setcurrdev, env_nounset);
     env_setenv("loaddev", EV_VOLATILE, i386_fmtdev(&new_currdev), env_noset,
 	       env_nounset);
 }
 
 #ifdef LOADER_ZFS_SUPPORT
 static void
 init_zfs_bootenv(char *currdev)
 {
 	char *beroot;
 
+	if (strlen(currdev) == 0)
+		return;
+	if(strncmp(currdev, "zfs:", 4) != 0)
+		return;
 	/* Remove the trailing : */
 	currdev[strlen(currdev) - 1] = '\0';
 	setenv("zfs_be_active", currdev, 1);
+	setenv("zfs_be_currpage", "1", 1);
 	/* Do not overwrite if already set */
 	setenv("vfs.root.mountfrom", currdev, 0);
 	/* Forward past zfs: */
 	currdev = strchr(currdev, ':');
 	currdev++;
 	/* Remove the last element (current bootenv) */
 	beroot = strrchr(currdev, '/');
 	if (beroot != NULL)
 		beroot[0] = '\0';
-
 	beroot = currdev;
-	
 	setenv("zfs_be_root", beroot, 1);
 }
 #endif
 
 COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot);
 
 static int
 command_reboot(int argc, char *argv[])
 {
     int i;
 
     for (i = 0; devsw[i] != NULL; ++i)
 	if (devsw[i]->dv_cleanup != NULL)
 	    (devsw[i]->dv_cleanup)();
 
     printf("Rebooting...\n");
     delay(1000000);
     __exit(0);
 }
 
 /* provide this for panic, as it's not in the startup code */
 void
 exit(int code)
 {
     __exit(code);
 }
 
 COMMAND_SET(heap, "heap", "show heap usage", command_heap);
 
 static int
 command_heap(int argc, char *argv[])
 {
     mallocstats();
     printf("heap base at %p, top at %p, upper limit at %p\n", heap_bottom,
       sbrk(0), heap_top);
     return(CMD_OK);
 }
 
 #ifdef LOADER_ZFS_SUPPORT
 COMMAND_SET(lszfs, "lszfs", "list child datasets of a zfs dataset",
     command_lszfs);
 
 static int
 command_lszfs(int argc, char *argv[])
 {
     int err;
 
     if (argc != 2) {
 	command_errmsg = "wrong number of arguments";
 	return (CMD_ERROR);
     }
 
     err = zfs_list(argv[1]);
     if (err != 0) {
 	command_errmsg = strerror(err);
 	return (CMD_ERROR);
     }
 
     return (CMD_OK);
 }
 
 COMMAND_SET(reloadbe, "reloadbe", "refresh the list of ZFS Boot Environments",
     command_reloadbe);
 
 static int
 command_reloadbe(int argc, char *argv[])
 {
     int err;
+    char *root;
 
     if (argc > 2) {
 	command_errmsg = "wrong number of arguments";
 	return (CMD_ERROR);
     }
 
     if (argc == 2) {
 	err = zfs_bootenv(argv[1]);
     } else {
+	root = getenv("zfs_be_root");
+	if (root == NULL) {
+	    /* There does not appear to be a ZFS pool here, exit without error */
+	    return (CMD_OK);
+	}
 	err = zfs_bootenv(getenv("zfs_be_root"));
     }
 
     if (err != 0) {
 	command_errmsg = strerror(err);
 	return (CMD_ERROR);
     }
 
     return (CMD_OK);
 }
 #endif
 
 /* ISA bus access functions for PnP. */
 static int
 isa_inb(int port)
 {
 
     return (inb(port));
 }
 
 static void
 isa_outb(int port, int value)
 {
 
     outb(port, value);
 }
 
 #ifdef LOADER_ZFS_SUPPORT
 static void
 i386_zfs_probe(void)
 {
     char devname[32];
     int unit;
 
     /*
      * Open all the disks we can find and see if we can reconstruct
      * ZFS pools from them.
      */
     for (unit = 0; unit < MAXBDDEV; unit++) {
 	if (bd_unit2bios(unit) == -1)
 	    break;
 	sprintf(devname, "disk%d:", unit);
 	zfs_probe_dev(devname, NULL);
     }
 }
 #endif
Index: projects/clang380-import/sys/boot/powerpc/boot1.chrp/boot1.c
===================================================================
--- projects/clang380-import/sys/boot/powerpc/boot1.chrp/boot1.c	(revision 293686)
+++ projects/clang380-import/sys/boot/powerpc/boot1.chrp/boot1.c	(revision 293687)
@@ -1,771 +1,767 @@
 /*-
  * Copyright (c) 1998 Robert Nordier
  * All rights reserved.
  * Copyright (c) 2001 Robert Drehmel
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/dirent.h>
 #include <machine/elf.h>
 #include <machine/stdarg.h>
 
 #define _PATH_LOADER	"/boot/loader"
 #define _PATH_KERNEL	"/boot/kernel/kernel"
 
 #define BSIZEMAX	16384
 
 typedef int putc_func_t(char c, void *arg);
 typedef int32_t ofwh_t;
 
 struct sp_data {
 	char	*sp_buf;
 	u_int	sp_len;
 	u_int	sp_size;
 };
 
 static const char digits[] = "0123456789abcdef";
 
 static char bootpath[128];
 static char bootargs[128];
 
 static ofwh_t bootdev;
 
 static struct fs fs;
 static char blkbuf[BSIZEMAX];
 static unsigned int fsblks;
 
 static uint32_t fs_off;
 
 int main(int ac, char **av);
 
 static void exit(int) __dead2;
 static void load(const char *);
 static int dskread(void *, u_int64_t, int);
 
 static void usage(void);
 
 static void bcopy(const void *src, void *dst, size_t len);
 static void bzero(void *b, size_t len);
 
 static int domount(const char *device, int quiet);
 
 static void panic(const char *fmt, ...) __dead2;
 static int printf(const char *fmt, ...);
 static int putchar(char c, void *arg);
 static int vprintf(const char *fmt, va_list ap);
 static int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap);
 
 static int __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap);
 static int __putc(char c, void *arg);
 static int __puts(const char *s, putc_func_t *putc, void *arg);
 static int __sputc(char c, void *arg);
 static char *__uitoa(char *buf, u_int val, int base);
 static char *__ultoa(char *buf, u_long val, int base);
 
 void __syncicache(void *, int);
 
 /*
  * Open Firmware interface functions
  */
 typedef u_int32_t	ofwcell_t;
 typedef u_int32_t	u_ofwh_t;
 typedef int (*ofwfp_t)(void *);
 ofwfp_t ofw;			/* the prom Open Firmware entry */
 ofwh_t chosenh;
 
 void ofw_init(void *, int, int (*)(void *), char *, int);
 static ofwh_t ofw_finddevice(const char *);
 static ofwh_t ofw_open(const char *);
 static int ofw_close(ofwh_t);
 static int ofw_getprop(ofwh_t, const char *, void *, size_t);
 static int ofw_setprop(ofwh_t, const char *, void *, size_t);
 static int ofw_read(ofwh_t, void *, size_t);
 static int ofw_write(ofwh_t, const void *, size_t);
 static int ofw_claim(void *virt, size_t len, u_int align);
 static int ofw_seek(ofwh_t, u_int64_t);
 static void ofw_exit(void) __dead2;
 
 ofwh_t bootdevh;
 ofwh_t stdinh, stdouth;
 
 __asm("                         \n\
         .data                   \n\
 	.align 4		\n\
 stack:                          \n\
         .space  16384           \n\
                                 \n\
         .text                   \n\
         .globl  _start          \n\
 _start:                         \n\
         lis     %r1,stack@ha    \n\
         addi    %r1,%r1,stack@l \n\
         addi    %r1,%r1,8192    \n\
                                 \n\
         b       ofw_init        \n\
 ");
 
 void
 ofw_init(void *vpd, int res, int (*openfirm)(void *), char *arg, int argl)
 {
 	char *av[16];
 	char *p;
 	int ac;
 
 	ofw = openfirm;
 
 	chosenh = ofw_finddevice("/chosen");
 	ofw_getprop(chosenh, "stdin", &stdinh, sizeof(stdinh));
 	ofw_getprop(chosenh, "stdout", &stdouth, sizeof(stdouth));
 	ofw_getprop(chosenh, "bootargs", bootargs, sizeof(bootargs));
 	ofw_getprop(chosenh, "bootpath", bootpath, sizeof(bootpath));
 
 	bootargs[sizeof(bootargs) - 1] = '\0';
 	bootpath[sizeof(bootpath) - 1] = '\0';
 
 	p = bootpath;
 	while (*p != '\0') {
 		if (*p == ':') {
 			*(++p) = '\0';
 			break;
 		}
 		p++;
 	}
 
 	ac = 0;
 	p = bootargs;
 	for (;;) {
 		while (*p == ' ' && *p != '\0')
 			p++;
 		if (*p == '\0' || ac >= 16)
 			break;
 		av[ac++] = p;
 		while (*p != ' ' && *p != '\0')
 			p++;
 		if (*p != '\0')
 			*p++ = '\0';
 	}
 
 	exit(main(ac, av));
 }
 
 static ofwh_t
 ofw_finddevice(const char *name)
 {
 	ofwcell_t args[] = {
 		(ofwcell_t)"finddevice",
 		1,
 		1,
 		(ofwcell_t)name,
 		0
 	};
 
 	if ((*ofw)(args)) {
 		printf("ofw_finddevice: name=\"%s\"\n", name);
 		return (1);
 	}
 	return (args[4]);
 }
 
 static int
 ofw_getprop(ofwh_t ofwh, const char *name, void *buf, size_t len)
 {
 	ofwcell_t args[] = {
 		(ofwcell_t)"getprop",
 		4,
 		1,
 		(u_ofwh_t)ofwh,
 		(ofwcell_t)name,
 		(ofwcell_t)buf,
 		len,
 	0
 	};
 
 	if ((*ofw)(args)) {
 		printf("ofw_getprop: ofwh=0x%x buf=%p len=%u\n",
 			ofwh, buf, len);
 		return (1);
 	}
 	return (0);
 }
 
 static int
 ofw_setprop(ofwh_t ofwh, const char *name, void *buf, size_t len)
 {
 	ofwcell_t args[] = {
 		(ofwcell_t)"setprop",
 		4,
 		1,
 		(u_ofwh_t)ofwh,
 		(ofwcell_t)name,
 		(ofwcell_t)buf,
 		len,
 	0
 	};
 
 	if ((*ofw)(args)) {
 		printf("ofw_setprop: ofwh=0x%x buf=%p len=%u\n",
 			ofwh, buf, len);
 		return (1);
 	}
 	return (0);
 }
 
 static ofwh_t
 ofw_open(const char *path)
 {
 	ofwcell_t args[] = {
 		(ofwcell_t)"open",
 		1,
 		1,
 		(ofwcell_t)path,
 		0
 	};
 
 	if ((*ofw)(args)) {
 		printf("ofw_open: path=\"%s\"\n", path);
 		return (-1);
 	}
 	return (args[4]);
 }
 
 static int
 ofw_close(ofwh_t devh)
 {
 	ofwcell_t args[] = {
 		(ofwcell_t)"close",
 		1,
 		0,
 		(u_ofwh_t)devh
 	};
 
 	if ((*ofw)(args)) {
 		printf("ofw_close: devh=0x%x\n", devh);
 		return (1);
 	}
 	return (0);
 }
 
 static int
 ofw_claim(void *virt, size_t len, u_int align)
 {
 	ofwcell_t args[] = {
 		(ofwcell_t)"claim",
 		3,
 		1,
 		(ofwcell_t)virt,
 		len,
 		align,
 		0,
 		0
 	};
 
 	if ((*ofw)(args)) {
 		printf("ofw_claim: virt=%p len=%u\n", virt, len);
 		return (1);
 	}
 
 	return (0);
 }
 
 static int
 ofw_read(ofwh_t devh, void *buf, size_t len)
 {
 	ofwcell_t args[] = {
 		(ofwcell_t)"read",
 		3,
 		1,
 		(u_ofwh_t)devh,
 		(ofwcell_t)buf,
 		len,
 		0
 	};
 
 	if ((*ofw)(args)) {
 		printf("ofw_read: devh=0x%x buf=%p len=%u\n", devh, buf, len);
 		return (1);
 	}
 	return (0);
 }
 
 static int
 ofw_write(ofwh_t devh, const void *buf, size_t len)
 {
 	ofwcell_t args[] = {
 		(ofwcell_t)"write",
 		3,
 		1,
 		(u_ofwh_t)devh,
 		(ofwcell_t)buf,
 		len,
 		0
 	};
 
 	if ((*ofw)(args)) {
 		printf("ofw_write: devh=0x%x buf=%p len=%u\n", devh, buf, len);
 		return (1);
 	}
 	return (0);
 }
 
 static int
 ofw_seek(ofwh_t devh, u_int64_t off)
 {
 	ofwcell_t args[] = {
 		(ofwcell_t)"seek",
 		3,
 		1,
 		(u_ofwh_t)devh,
 		off >> 32,
 		off,
 		0
 	};
 
 	if ((*ofw)(args)) {
 		printf("ofw_seek: devh=0x%x off=0x%lx\n", devh, off);
 		return (1);
 	}
 	return (0);
 }
 
 static void
 ofw_exit(void)
 {
 	ofwcell_t args[3];
 
 	args[0] = (ofwcell_t)"exit";
 	args[1] = 0;
 	args[2] = 0;
 
 	for (;;)
 		(*ofw)(args);
 }
 
 static void
 bcopy(const void *src, void *dst, size_t len)
 {
 	const char *s = src;
 	char *d = dst;
 
 	while (len-- != 0)
 		*d++ = *s++;
 }
 
 static void
 memcpy(void *dst, const void *src, size_t len)
 {
 	bcopy(src, dst, len);
 }
 
 static void
 bzero(void *b, size_t len)
 {
 	char *p = b;
 
 	while (len-- != 0)
 		*p++ = 0;
 }
 
 static int
 strcmp(const char *s1, const char *s2)
 {
 	for (; *s1 == *s2 && *s1; s1++, s2++)
 		;
 	return ((u_char)*s1 - (u_char)*s2);
 }
 
 #include "ufsread.c"
 
 int
 main(int ac, char **av)
 {
 	const char *path;
 	char bootpath_full[255];
 	int i, len;
 
 	path = _PATH_LOADER;
 	for (i = 0; i < ac; i++) {
 		switch (av[i][0]) {
 		case '-':
 			switch (av[i][1]) {
 			default:
 				usage();
 			}
 			break;
 		default:
 			path = av[i];
 			break;
 		}
 	}
 
 	printf(" \n>> FreeBSD/powerpc Open Firmware boot block\n"
 	"   Boot path:   %s\n"
 	"   Boot loader: %s\n", bootpath, path);
 
 	len = 0;
 	while (bootpath[len] != '\0') len++;
 
 	memcpy(bootpath_full,bootpath,len+1);
 
 	if (bootpath_full[len-1] == ':') {
 		for (i = 0; i < 16; i++) {
 			if (i < 10) {
 				bootpath_full[len] = i + '0';
 				bootpath_full[len+1] = '\0';
 			} else {
 				bootpath_full[len] = '1';
 				bootpath_full[len+1] = i - 10 + '0';
 				bootpath_full[len+2] = '\0';
 			}
 				
 			if (domount(bootpath_full,1) >= 0)
 				break;
 
 			if (bootdev > 0)
 				ofw_close(bootdev);
 		}
 
 		if (i >= 16)
 			panic("domount");
 	} else {
 		if (domount(bootpath_full,0) == -1)
 			panic("domount");
 	}
 
 	printf("   Boot volume:   %s\n",bootpath_full);
 	ofw_setprop(chosenh, "bootargs", bootpath_full, len+2);
 	load(path);
 	return (1);
 }
 
 static void
 usage(void)
 {
 
 	printf("usage: boot device [/path/to/loader]\n");
 	exit(1);
 }
 
 static void
 exit(int code)
 {
 
 	ofw_exit();
 }
 
 static struct dmadat __dmadat;
 
 static int
 domount(const char *device, int quiet)
 {
 
 	dmadat = &__dmadat;
 	if ((bootdev = ofw_open(device)) == -1) {
 		printf("domount: can't open device\n");
 		return (-1);
 	}
 	if (fsread(0, NULL, 0)) {
 		if (!quiet)
 			printf("domount: can't read superblock\n");
 		return (-1);
 	}
 	return (0);
 }
 
 static void
 load(const char *fname)
 {
 	Elf32_Ehdr eh;
 	Elf32_Phdr ph;
 	caddr_t p;
 	ufs_ino_t ino;
 	int i;
 
 	if ((ino = lookup(fname)) == 0) {
 		printf("File %s not found\n", fname);
 		return;
 	}
 	if (fsread(ino, &eh, sizeof(eh)) != sizeof(eh)) {
 		printf("Can't read elf header\n");
 		return;
 	}
 	if (!IS_ELF(eh)) {
 		printf("Not an ELF file\n");
 		return;
 	}
 	for (i = 0; i < eh.e_phnum; i++) {
 		fs_off = eh.e_phoff + i * eh.e_phentsize;
 		if (fsread(ino, &ph, sizeof(ph)) != sizeof(ph)) {
 			printf("Can't read program header %d\n", i);
 			return;
 		}
 		if (ph.p_type != PT_LOAD)
 			continue;
 		fs_off = ph.p_offset;
 		p = (caddr_t)ph.p_vaddr;
 		ofw_claim(p,(ph.p_filesz > ph.p_memsz) ? 
 		    ph.p_filesz : ph.p_memsz,0);
 		if (fsread(ino, p, ph.p_filesz) != ph.p_filesz) {
 			printf("Can't read content of section %d\n", i);
 			return;
 		}
 		if (ph.p_filesz != ph.p_memsz)
 			bzero(p + ph.p_filesz, ph.p_memsz - ph.p_filesz);
 		__syncicache(p, ph.p_memsz);
 	}
 	ofw_close(bootdev);
 	(*(void (*)(void *, int, ofwfp_t, char *, int))eh.e_entry)(NULL, 0, 
 	    ofw,NULL,0);
 }
 
 static int
 dskread(void *buf, u_int64_t lba, int nblk)
 {
 	/*
 	 * The Open Firmware should open the correct partition for us.
 	 * That means, if we read from offset zero on an open instance handle,
 	 * we should read from offset zero of that partition.
 	 */
 	ofw_seek(bootdev, lba * DEV_BSIZE);
 	ofw_read(bootdev, buf, nblk * DEV_BSIZE);
 	return (0);
 }
 
 static void
 panic(const char *fmt, ...)
 {
 	char buf[128];
 	va_list ap;
 
 	va_start(ap, fmt);
 	vsnprintf(buf, sizeof buf, fmt, ap);
 	printf("panic: %s\n", buf);
 	va_end(ap);
 
 	exit(1);
 }
 
 static int
 printf(const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
-	/* Don't annoy the user as we probe for partitions */
-	if (strcmp(fmt,"Not ufs\n") == 0)
-		return 0;
-
 	va_start(ap, fmt);
 	ret = vprintf(fmt, ap);
 	va_end(ap);
 	return (ret);
 }
 
 static int
 putchar(char c, void *arg)
 {
 	char buf;
 
 	if (c == '\n') {
 		buf = '\r';
 		ofw_write(stdouth, &buf, 1);
 	}
 	buf = c;
 	ofw_write(stdouth, &buf, 1);
 	return (1);
 }
 
 static int
 vprintf(const char *fmt, va_list ap)
 {
 	int ret;
 
 	ret = __printf(fmt, putchar, 0, ap);
 	return (ret);
 }
 
 static int
 vsnprintf(char *str, size_t sz, const char *fmt, va_list ap)
 {
 	struct sp_data sp;
 	int ret;
 
 	sp.sp_buf = str;
 	sp.sp_len = 0;
 	sp.sp_size = sz;
 	ret = __printf(fmt, __sputc, &sp, ap);
 	return (ret);
 }
 
 static int
 __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap)
 {
 	char buf[(sizeof(long) * 8) + 1];
 	char *nbuf;
 	u_long ul;
 	u_int ui;
 	int lflag;
 	int sflag;
 	char *s;
 	int pad;
 	int ret;
 	int c;
 
 	nbuf = &buf[sizeof buf - 1];
 	ret = 0;
 	while ((c = *fmt++) != 0) {
 		if (c != '%') {
 			ret += putc(c, arg);
 			continue;
 		}
 		lflag = 0;
 		sflag = 0;
 		pad = 0;
 reswitch:	c = *fmt++;
 		switch (c) {
 		case '#':
 			sflag = 1;
 			goto reswitch;
 		case '%':
 			ret += putc('%', arg);
 			break;
 		case 'c':
 			c = va_arg(ap, int);
 			ret += putc(c, arg);
 			break;
 		case 'd':
 			if (lflag == 0) {
 				ui = (u_int)va_arg(ap, int);
 				if (ui < (int)ui) {
 					ui = -ui;
 					ret += putc('-', arg);
 				}
 				s = __uitoa(nbuf, ui, 10);
 			} else {
 				ul = (u_long)va_arg(ap, long);
 				if (ul < (long)ul) {
 					ul = -ul;
 					ret += putc('-', arg);
 				}
 				s = __ultoa(nbuf, ul, 10);
 			}
 			ret += __puts(s, putc, arg);
 			break;
 		case 'l':
 			lflag = 1;
 			goto reswitch;
 		case 'o':
 			if (lflag == 0) {
 				ui = (u_int)va_arg(ap, u_int);
 				s = __uitoa(nbuf, ui, 8);
 			} else {
 				ul = (u_long)va_arg(ap, u_long);
 				s = __ultoa(nbuf, ul, 8);
 			}
 			ret += __puts(s, putc, arg);
 			break;
 		case 'p':
 			ul = (u_long)va_arg(ap, void *);
 			s = __ultoa(nbuf, ul, 16);
 			ret += __puts("0x", putc, arg);
 			ret += __puts(s, putc, arg);
 			break;
 		case 's':
 			s = va_arg(ap, char *);
 			ret += __puts(s, putc, arg);
 			break;
 		case 'u':
 			if (lflag == 0) {
 				ui = va_arg(ap, u_int);
 				s = __uitoa(nbuf, ui, 10);
 			} else {
 				ul = va_arg(ap, u_long);
 				s = __ultoa(nbuf, ul, 10);
 			}
 			ret += __puts(s, putc, arg);
 			break;
 		case 'x':
 			if (lflag == 0) {
 				ui = va_arg(ap, u_int);
 				s = __uitoa(nbuf, ui, 16);
 			} else {
 				ul = va_arg(ap, u_long);
 				s = __ultoa(nbuf, ul, 16);
 			}
 			if (sflag)
 				ret += __puts("0x", putc, arg);
 			ret += __puts(s, putc, arg);
 			break;
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
 			pad = pad * 10 + c - '0';
 			goto reswitch;
 		default:
 			break;
 		}
 	}
 	return (ret);
 }
 
 static int
 __sputc(char c, void *arg)
 {
 	struct sp_data *sp;
 
 	sp = arg;
 	if (sp->sp_len < sp->sp_size)
 		sp->sp_buf[sp->sp_len++] = c;
 	sp->sp_buf[sp->sp_len] = '\0';
 	return (1);
 }
 
 static int
 __puts(const char *s, putc_func_t *putc, void *arg)
 {
 	const char *p;
 	int ret;
 
 	ret = 0;
 	for (p = s; *p != '\0'; p++)
 		ret += putc(*p, arg);
 	return (ret);
 }
 
 static char *
 __uitoa(char *buf, u_int ui, int base)
 {
 	char *p;
 
 	p = buf;
 	*p = '\0';
 	do
 		*--p = digits[ui % base];
 	while ((ui /= base) != 0);
 	return (p);
 }
 
 static char *
 __ultoa(char *buf, u_long ul, int base)
 {
 	char *p;
 
 	p = buf;
 	*p = '\0';
 	do
 		*--p = digits[ul % base];
 	while ((ul /= base) != 0);
 	return (p);
 }
Index: projects/clang380-import/sys/boot/powerpc/boot1.chrp
===================================================================
--- projects/clang380-import/sys/boot/powerpc/boot1.chrp	(revision 293686)
+++ projects/clang380-import/sys/boot/powerpc/boot1.chrp	(revision 293687)

Property changes on: projects/clang380-import/sys/boot/powerpc/boot1.chrp
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/boot/powerpc/boot1.chrp:r292913-293685
Index: projects/clang380-import/sys/boot/userboot/libstand/Makefile
===================================================================
--- projects/clang380-import/sys/boot/userboot/libstand/Makefile	(revision 293686)
+++ projects/clang380-import/sys/boot/userboot/libstand/Makefile	(revision 293687)
@@ -1,136 +1,12 @@
 # $FreeBSD$
-# Originally from	$NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $
-#
-# Notes:
-# - We don't use the libc strerror/sys_errlist because the string table is
-#   quite large.
-#
 
-MAN=
+.include <src.opts.mk>
 
-.include <bsd.own.mk>
-MK_SSP=		no
-
 LIBSTAND_SRC=	${.CURDIR}/../../../../lib/libstand
-LIBC_SRC=	${LIBSTAND_SRC}/../libc
 
-.PATH:		${LIBSTAND_SRC}
-LIB=		stand
 INTERNALLIB=
-MK_PROFILE=	no
-NO_PIC=
+INCS=
+MAN=
+.PATH:	${LIBSTAND_SRC}
 
-WARNS?=		0
-
-# standalone components and stuff we have modified locally
-SRCS+=	gzguts.h zutil.h __main.c assert.c bcd.c bswap.c environment.c getopt.c gets.c \
-	globals.c pager.c printf.c strdup.c strerror.c strtol.c strtoul.c random.c \
-	sbrk.c twiddle.c zalloc.c zalloc_malloc.c
-
-# private (pruned) versions of libc string functions
-SRCS+=	strcasecmp.c
-
-.PATH: ${LIBC_SRC}/net
-
-SRCS+= ntoh.c
-
-# string functions from libc
-.PATH: ${LIBC_SRC}/string
-.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "powerpc" || \
-	${MACHINE_CPUARCH} == "sparc64" || ${MACHINE_CPUARCH} == "amd64" || \
-	${MACHINE_CPUARCH} == "arm"
-SRCS+=	bcmp.c bcopy.c bzero.c ffs.c memccpy.c memchr.c memcmp.c memcpy.c \
-	memmove.c memset.c qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \
-	strcspn.c strlen.c strncat.c strncmp.c strncpy.c strpbrk.c \
-	strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c
-.endif
-.if ${MACHINE_CPUARCH} == "arm"
-.PATH: ${LIBC_SRC}/arm/gen
-SRCS+= divsi3.S
-.endif
-.if ${MACHINE_CPUARCH} == "powerpc"
-.PATH: ${LIBC_SRC}/quad
-SRCS+=	ashldi3.c ashrdi3.c
-.PATH: ${LIBC_SRC}/powerpc/gen
-SRCS+=	syncicache.c
-.endif
-
-# uuid functions from libc
-.PATH: ${LIBC_SRC}/uuid
-SRCS+= uuid_equal.c uuid_is_nil.c
-
-# _setjmp/_longjmp
-.if ${MACHINE_CPUARCH} == "amd64"
-.PATH: ${LIBSTAND_SRC}/amd64
-.elif ${MACHINE_ARCH} == "powerpc64"
-.PATH: ${LIBSTAND_SRC}/powerpc
-.else
-.PATH: ${LIBSTAND_SRC}/${MACHINE_CPUARCH}
-.endif
-SRCS+=	_setjmp.S
-
-# decompression functionality from libbz2
-# NOTE: to actually test this functionality after libbz2 upgrade compile
-# loader(8) with LOADER_BZIP2_SUPPORT defined
-.PATH: ${LIBSTAND_SRC}/../../contrib/bzip2
-CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS
-SRCS+=	libstand_bzlib_private.h
-
-.for file in bzlib.c crctable.c decompress.c huffman.c randtable.c
-SRCS+=	_${file}
-CLEANFILES+=	_${file}
-
-_${file}: ${file}
-	sed "s|bzlib_private\.h|libstand_bzlib_private.h|" \
-	    ${.ALLSRC} > ${.TARGET}
-.endfor
-
-CLEANFILES+= libstand_bzlib_private.h
-libstand_bzlib_private.h: bzlib_private.h
-	sed -e 's|<stdlib.h>|"stand.h"|' \
-		${.ALLSRC} > ${.TARGET}
-
-# decompression functionality from libz
-.PATH: ${LIBSTAND_SRC}/../libz
-CFLAGS+=-DHAVE_MEMCPY -I${LIBSTAND_SRC}/../libz
-SRCS+=	adler32.c crc32.c libstand_zutil.h libstand_gzguts.h
-
-.for file in infback.c inffast.c inflate.c inftrees.c zutil.c
-SRCS+=	_${file}
-CLEANFILES+=	_${file}
-
-_${file}: ${file}
-	sed -e "s|zutil\.h|libstand_zutil.h|" \
-	    -e "s|gzguts\.h|libstand_gzguts.h|" \
-	    ${.ALLSRC} > ${.TARGET}
-.endfor
-
-# depend on stand.h being able to be included multiple times
-.for file in zutil.h gzguts.h
-CLEANFILES+= libstand_${file}
-libstand_${file}: ${file}
-	sed -e 's|<fcntl.h>|"stand.h"|' \
-	    -e 's|<stddef.h>|"stand.h"|' \
-	    -e 's|<string.h>|"stand.h"|' \
-	    -e 's|<stdio.h>|"stand.h"|' \
-	    -e 's|<stdlib.h>|"stand.h"|' \
-	    ${.ALLSRC} > ${.TARGET}
-.endfor
-
-# io routines
-SRCS+=	closeall.c dev.c ioctl.c nullfs.c stat.c \
-	fstat.c close.c lseek.c open.c read.c write.c readdir.c
-
-# network routines
-SRCS+=	arp.c ether.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c
-
-# network info services:
-SRCS+=	bootp.c rarp.c bootparam.c
-
-# boot filesystems
-SRCS+=	ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c
-SRCS+=	dosfs.c ext2fs.c
-SRCS+=	splitfs.c
-
-.include <bsd.stand.mk>
-.include <bsd.lib.mk>
+.include "${LIBSTAND_SRC}/Makefile"
Index: projects/clang380-import/sys/boot/userboot/libstand/Makefile.depend
===================================================================
--- projects/clang380-import/sys/boot/userboot/libstand/Makefile.depend	(revision 293686)
+++ projects/clang380-import/sys/boot/userboot/libstand/Makefile.depend	(revision 293687)
@@ -1,58 +1,57 @@
 # $FreeBSD$
 # Autogenerated - do NOT edit!
 
 DIRDEPS = \
 	include \
 	include/arpa \
 	include/xlocale \
 	lib/libbz2 \
-	lib/libstand \
 
 
 .include <dirdeps.mk>
 
 .if ${DEP_RELDIR} == ${_DEP_RELDIR}
 # local dependencies - needed for -jN in clean tree
 _bzlib.o: _bzlib.c
 _bzlib.o: libstand_bzlib_private.h
 _bzlib.po: _bzlib.c
 _bzlib.po: libstand_bzlib_private.h
 _crctable.o: _crctable.c
 _crctable.o: libstand_bzlib_private.h
 _crctable.po: _crctable.c
 _crctable.po: libstand_bzlib_private.h
 _decompress.o: _decompress.c
 _decompress.o: libstand_bzlib_private.h
 _decompress.po: _decompress.c
 _decompress.po: libstand_bzlib_private.h
 _huffman.o: _huffman.c
 _huffman.o: libstand_bzlib_private.h
 _huffman.po: _huffman.c
 _huffman.po: libstand_bzlib_private.h
 _infback.o: _infback.c
 _infback.o: libstand_zutil.h
 _infback.po: _infback.c
 _infback.po: libstand_zutil.h
 _inffast.o: _inffast.c
 _inffast.o: libstand_zutil.h
 _inffast.po: _inffast.c
 _inffast.po: libstand_zutil.h
 _inflate.o: _inflate.c
 _inflate.o: libstand_zutil.h
 _inflate.po: _inflate.c
 _inflate.po: libstand_zutil.h
 _inftrees.o: _inftrees.c
 _inftrees.o: libstand_zutil.h
 _inftrees.po: _inftrees.c
 _inftrees.po: libstand_zutil.h
 _randtable.o: _randtable.c
 _randtable.o: libstand_bzlib_private.h
 _randtable.po: _randtable.c
 _randtable.po: libstand_bzlib_private.h
 _zutil.o: _zutil.c
 _zutil.o: libstand_gzguts.h
 _zutil.o: libstand_zutil.h
 _zutil.po: _zutil.c
 _zutil.po: libstand_gzguts.h
 _zutil.po: libstand_zutil.h
 .endif
Index: projects/clang380-import/sys/boot/userboot/userboot/main.c
===================================================================
--- projects/clang380-import/sys/boot/userboot/userboot/main.c	(revision 293686)
+++ projects/clang380-import/sys/boot/userboot/userboot/main.c	(revision 293687)
@@ -1,315 +1,320 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 1998,2000 Doug Rabson <dfr@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <string.h>
 #include <setjmp.h>
 
 #include "bootstrap.h"
 #include "disk.h"
 #include "libuserboot.h"
 
 #if defined(USERBOOT_ZFS_SUPPORT)
 #include "../zfs/libzfs.h"
 
 static void userboot_zfs_probe(void);
 static int userboot_zfs_found;
 static void init_zfs_bootenv(char *currdev);
 #endif
 
 #define	USERBOOT_VERSION	USERBOOT_VERSION_3
 
 #define	MALLOCSZ		(10*1024*1024)
 
 struct loader_callbacks *callbacks;
 void *callbacks_arg;
 
 extern char bootprog_name[];
 extern char bootprog_rev[];
 extern char bootprog_date[];
 extern char bootprog_maker[];
 static jmp_buf jb;
 
 struct arch_switch archsw;	/* MI/MD interface boundary */
 
 static void	extract_currdev(void);
 
 void
 delay(int usec)
 {
 
         CALLBACK(delay, usec);
 }
 
 void
 exit(int v)
 {
 
 	CALLBACK(exit, v);
 	longjmp(jb, 1);
 }
 
 void
 loader_main(struct loader_callbacks *cb, void *arg, int version, int ndisks)
 {
 	static char mallocbuf[MALLOCSZ];
 	const char *var;
 	int i;
 
         if (version != USERBOOT_VERSION)
                 abort();
 
 	callbacks = cb;
         callbacks_arg = arg;
 	userboot_disk_maxunit = ndisks;
 
 	/*
 	 * initialise the heap as early as possible.  Once this is done,
 	 * alloc() is usable.
 	 */
 	setheap((void *)mallocbuf, (void *)(mallocbuf + sizeof(mallocbuf)));
 
         /*
          * Hook up the console
          */
 	cons_probe();
 
 	printf("\n");
 	printf("%s, Revision %s\n", bootprog_name, bootprog_rev);
 	printf("(%s, %s)\n", bootprog_maker, bootprog_date);
 #if 0
 	printf("Memory: %ld k\n", memsize() / 1024);
 #endif
 
 	setenv("LINES", "24", 1);	/* optional */
 
 	/*
 	 * Set custom environment variables
 	 */
 	i = 0;
 	while (1) {
 		var = CALLBACK(getenv, i++);
 		if (var == NULL)
 			break;
 		putenv(var);
 	}
 
 	archsw.arch_autoload = userboot_autoload;
 	archsw.arch_getdev = userboot_getdev;
 	archsw.arch_copyin = userboot_copyin;
 	archsw.arch_copyout = userboot_copyout;
 	archsw.arch_readin = userboot_readin;
 #if defined(USERBOOT_ZFS_SUPPORT)
 	archsw.arch_zfs_probe = userboot_zfs_probe;
 #endif
 
 	/*
 	 * March through the device switch probing for things.
 	 */
 	for (i = 0; devsw[i] != NULL; i++)
 		if (devsw[i]->dv_init != NULL)
 			(devsw[i]->dv_init)();
 
 	extract_currdev();
 
 	if (setjmp(jb))
 		return;
 
 	interact(NULL);			/* doesn't return */
 
 	exit(0);
 }
 
 /*
  * Set the 'current device' by (if possible) recovering the boot device as 
  * supplied by the initial bootstrap.
  */
 static void
 extract_currdev(void)
 {
 	struct disk_devdesc dev;
 
 	//bzero(&dev, sizeof(dev));
 
 #if defined(USERBOOT_ZFS_SUPPORT)
 	if (userboot_zfs_found) {
 		struct zfs_devdesc zdev;
 	
 		/* Leave the pool/root guid's unassigned */
 		bzero(&zdev, sizeof(zdev));
 		zdev.d_dev = &zfs_dev;
 		zdev.d_type = zdev.d_dev->dv_type;
 		
 		dev = *(struct disk_devdesc *)&zdev;
+		init_zfs_bootenv(zfs_fmtdev(&dev));
 	} else
 #endif
 
 	if (userboot_disk_maxunit > 0) {
 		dev.d_dev = &userboot_disk;
 		dev.d_type = dev.d_dev->dv_type;
 		dev.d_unit = 0;
 		dev.d_slice = 0;
 		dev.d_partition = 0;
 		/*
 		 * If we cannot auto-detect the partition type then
 		 * access the disk as a raw device.
 		 */
 		if (dev.d_dev->dv_open(NULL, &dev)) {
 			dev.d_slice = -1;
 			dev.d_partition = -1;
 		}
 	} else {
 		dev.d_dev = &host_dev;
 		dev.d_type = dev.d_dev->dv_type;
 		dev.d_unit = 0;
 	}
 
-#if defined(USERBOOT_ZFS_SUPPORT)
-	init_zfs_bootenv(zfs_fmtdev(&dev));
-#endif
-
 	env_setenv("currdev", EV_VOLATILE, userboot_fmtdev(&dev),
             userboot_setcurrdev, env_nounset);
 	env_setenv("loaddev", EV_VOLATILE, userboot_fmtdev(&dev),
             env_noset, env_nounset);
 }
 
 #if defined(USERBOOT_ZFS_SUPPORT)
 static void
 init_zfs_bootenv(char *currdev)
 {
 	char *beroot;
 
+	if (strlen(currdev) == 0)
+		return;
+	if(strncmp(currdev, "zfs:", 4) != 0)
+		return;
 	/* Remove the trailing : */
 	currdev[strlen(currdev) - 1] = '\0';
 	setenv("zfs_be_active", currdev, 1);
+	setenv("zfs_be_currpage", "1", 1);
 	/* Do not overwrite if already set */
 	setenv("vfs.root.mountfrom", currdev, 0);
 	/* Forward past zfs: */
 	currdev = strchr(currdev, ':');
 	currdev++;
 	/* Remove the last element (current bootenv) */
 	beroot = strrchr(currdev, '/');
 	if (beroot != NULL)
 		beroot[0] = '\0';
-
 	beroot = currdev;
-	
 	setenv("zfs_be_root", beroot, 1);
 }
 
 static void
 userboot_zfs_probe(void)
 {
 	char devname[32];
 	uint64_t pool_guid;
 	int unit;
 
 	/*
 	 * Open all the disks we can find and see if we can reconstruct
 	 * ZFS pools from them. Record if any were found.
 	 */
 	for (unit = 0; unit < userboot_disk_maxunit; unit++) {
 		sprintf(devname, "disk%d:", unit);
 		pool_guid = 0;
 		zfs_probe_dev(devname, &pool_guid);
 		if (pool_guid != 0)
 			userboot_zfs_found = 1;
 	}
 }
 
 COMMAND_SET(lszfs, "lszfs", "list child datasets of a zfs dataset",
 	    command_lszfs);
 
 static int
 command_lszfs(int argc, char *argv[])
 {
 	int err;
 
 	if (argc != 2) {
 		command_errmsg = "a single dataset must be supplied";
 		return (CMD_ERROR);
 	}
 
 	err = zfs_list(argv[1]);
 	if (err != 0) {
 		command_errmsg = strerror(err);
 		return (CMD_ERROR);
 	}
 	return (CMD_OK);
 }
 
 COMMAND_SET(reloadbe, "reloadbe", "refresh the list of ZFS Boot Environments",
 	    command_reloadbe);
 
 static int
 command_reloadbe(int argc, char *argv[])
 {
 	int err;
+	char *root;
 
 	if (argc > 2) {
 		command_errmsg = "wrong number of arguments";
 		return (CMD_ERROR);
 	}
 
 	if (argc == 2) {
 		err = zfs_bootenv(argv[1]);
 	} else {
-		err = zfs_bootenv(getenv("zfs_be_root"));
+		root = getenv("zfs_be_root");
+		if (root == NULL) {
+			return (CMD_OK);
+		}
+		err = zfs_bootenv(root);
 	}
 
 	if (err != 0) {
 		command_errmsg = strerror(err);
 		return (CMD_ERROR);
 	}
 
 	return (CMD_OK);
 }
 #endif /* USERBOOT_ZFS_SUPPORT */
 
 COMMAND_SET(quit, "quit", "exit the loader", command_quit);
 
 static int
 command_quit(int argc, char *argv[])
 {
 
 	exit(USERBOOT_EXIT_QUIT);
 	return (CMD_OK);
 }
 
 COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot);
 
 static int
 command_reboot(int argc, char *argv[])
 {
 
 	exit(USERBOOT_EXIT_REBOOT);
 	return (CMD_OK);
 }
Index: projects/clang380-import/sys/boot/zfs/zfs.c
===================================================================
--- projects/clang380-import/sys/boot/zfs/zfs.c	(revision 293686)
+++ projects/clang380-import/sys/boot/zfs/zfs.c	(revision 293687)
@@ -1,860 +1,865 @@
 /*-
  * Copyright (c) 2007 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Stand-alone file reading package.
  */
 
 #include <sys/disk.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/queue.h>
 #include <part.h>
 #include <stddef.h>
 #include <stdarg.h>
 #include <string.h>
 #include <stand.h>
 #include <bootstrap.h>
 
 #include "libzfs.h"
 
 #include "zfsimpl.c"
 
 /* Define the range of indexes to be populated with ZFS Boot Environments */
 #define		ZFS_BE_FIRST	4
 #define		ZFS_BE_LAST	8
 
 static int	zfs_open(const char *path, struct open_file *f);
 static int	zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
 static int	zfs_close(struct open_file *f);
 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
 static int	zfs_stat(struct open_file *f, struct stat *sb);
 static int	zfs_readdir(struct open_file *f, struct dirent *d);
 
 struct devsw zfs_dev;
 
 struct fs_ops zfs_fsops = {
 	"zfs",
 	zfs_open,
 	zfs_close,
 	zfs_read,
 	zfs_write,
 	zfs_seek,
 	zfs_stat,
 	zfs_readdir
 };
 
 /*
  * In-core open file.
  */
 struct file {
 	off_t		f_seekp;	/* seek pointer */
 	dnode_phys_t	f_dnode;
 	uint64_t	f_zap_type;	/* zap type for readdir */
 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
 };
 
 static int	zfs_env_index;
 static int	zfs_env_count;
 
 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
 struct zfs_be_list *zfs_be_headp;
 struct zfs_be_entry {
 	const char *name;
 	SLIST_ENTRY(zfs_be_entry) entries;
 } *zfs_be, *zfs_be_tmp;
 
 /*
  * Open a file.
  */
 static int
 zfs_open(const char *upath, struct open_file *f)
 {
 	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
 	struct file *fp;
 	int rc;
 
 	if (f->f_dev != &zfs_dev)
 		return (EINVAL);
 
 	/* allocate file system specific data structure */
 	fp = malloc(sizeof(struct file));
 	bzero(fp, sizeof(struct file));
 	f->f_fsdata = (void *)fp;
 
 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
 	fp->f_seekp = 0;
 	if (rc) {
 		f->f_fsdata = NULL;
 		free(fp);
 	}
 	return (rc);
 }
 
 static int
 zfs_close(struct open_file *f)
 {
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	dnode_cache_obj = 0;
 	f->f_fsdata = (void *)0;
 	if (fp == (struct file *)0)
 		return (0);
 
 	free(fp);
 	return (0);
 }
 
 /*
  * Copy a portion of a file into kernel memory.
  * Cross block boundaries when necessary.
  */
 static int
 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 	struct stat sb;
 	size_t n;
 	int rc;
 
 	rc = zfs_stat(f, &sb);
 	if (rc)
 		return (rc);
 	n = size;
 	if (fp->f_seekp + n > sb.st_size)
 		n = sb.st_size - fp->f_seekp;
 
 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
 	if (rc)
 		return (rc);
 
 	if (0) {
 	    int i;
 	    for (i = 0; i < n; i++)
 		putchar(((char*) start)[i]);
 	}
 	fp->f_seekp += n;
 	if (resid)
 		*resid = size - n;
 
 	return (0);
 }
 
 /*
  * Don't be silly - the bootstrap has no business writing anything.
  */
 static int
 zfs_write(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
 {
 
 	return (EROFS);
 }
 
 static off_t
 zfs_seek(struct open_file *f, off_t offset, int where)
 {
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	switch (where) {
 	case SEEK_SET:
 		fp->f_seekp = offset;
 		break;
 	case SEEK_CUR:
 		fp->f_seekp += offset;
 		break;
 	case SEEK_END:
 	    {
 		struct stat sb;
 		int error;
 
 		error = zfs_stat(f, &sb);
 		if (error != 0) {
 			errno = error;
 			return (-1);
 		}
 		fp->f_seekp = sb.st_size - offset;
 		break;
 	    }
 	default:
 		errno = EINVAL;
 		return (-1);
 	}
 	return (fp->f_seekp);
 }
 
 static int
 zfs_stat(struct open_file *f, struct stat *sb)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
 }
 
 static int
 zfs_readdir(struct open_file *f, struct dirent *d)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 	mzap_ent_phys_t mze;
 	struct stat sb;
 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	int rc;
 
 	rc = zfs_stat(f, &sb);
 	if (rc)
 		return (rc);
 	if (!S_ISDIR(sb.st_mode))
 		return (ENOTDIR);
 
 	/*
 	 * If this is the first read, get the zap type.
 	 */
 	if (fp->f_seekp == 0) {
 		rc = dnode_read(spa, &fp->f_dnode,
 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
 		if (rc)
 			return (rc);
 
 		if (fp->f_zap_type == ZBT_MICRO) {
 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
 		} else {
 			rc = dnode_read(spa, &fp->f_dnode,
 					offsetof(zap_phys_t, zap_num_leafs),
 					&fp->f_num_leafs,
 					sizeof(fp->f_num_leafs));
 			if (rc)
 				return (rc);
 
 			fp->f_seekp = bsize;
 			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
 			rc = dnode_read(spa, &fp->f_dnode,
 					fp->f_seekp,
 					fp->f_zap_leaf,
 					bsize);
 			if (rc)
 				return (rc);
 		}
 	}
 
 	if (fp->f_zap_type == ZBT_MICRO) {
 	mzap_next:
 		if (fp->f_seekp >= bsize)
 			return (ENOENT);
 
 		rc = dnode_read(spa, &fp->f_dnode,
 				fp->f_seekp, &mze, sizeof(mze));
 		if (rc)
 			return (rc);
 		fp->f_seekp += sizeof(mze);
 
 		if (!mze.mze_name[0])
 			goto mzap_next;
 
 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
 		strcpy(d->d_name, mze.mze_name);
 		d->d_namlen = strlen(d->d_name);
 		return (0);
 	} else {
 		zap_leaf_t zl;
 		zap_leaf_chunk_t *zc, *nc;
 		int chunk;
 		size_t namelen;
 		char *p;
 		uint64_t value;
 
 		/*
 		 * Initialise this so we can use the ZAP size
 		 * calculating macros.
 		 */
 		zl.l_bs = ilog2(bsize);
 		zl.l_phys = fp->f_zap_leaf;
 
 		/*
 		 * Figure out which chunk we are currently looking at
 		 * and consider seeking to the next leaf. We use the
 		 * low bits of f_seekp as a simple chunk index.
 		 */
 	fzap_next:
 		chunk = fp->f_seekp & (bsize - 1);
 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
 			fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize;
 			chunk = 0;
 
 			/*
 			 * Check for EOF and read the new leaf.
 			 */
 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
 				return (ENOENT);
 
 			rc = dnode_read(spa, &fp->f_dnode,
 					fp->f_seekp,
 					fp->f_zap_leaf,
 					bsize);
 			if (rc)
 				return (rc);
 		}
 
 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
 		fp->f_seekp++;
 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
 			goto fzap_next;
 
 		namelen = zc->l_entry.le_name_numints;
 		if (namelen > sizeof(d->d_name))
 			namelen = sizeof(d->d_name);
 
 		/*
 		 * Paste the name back together.
 		 */
 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
 		p = d->d_name;
 		while (namelen > 0) {
 			int len;
 			len = namelen;
 			if (len > ZAP_LEAF_ARRAY_BYTES)
 				len = ZAP_LEAF_ARRAY_BYTES;
 			memcpy(p, nc->l_array.la_array, len);
 			p += len;
 			namelen -= len;
 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
 		}
 		d->d_name[sizeof(d->d_name) - 1] = 0;
 
 		/*
 		 * Assume the first eight bytes of the value are
 		 * a uint64_t.
 		 */
 		value = fzap_leaf_value(&zl, zc);
 
 		d->d_fileno = ZFS_DIRENT_OBJ(value);
 		d->d_type = ZFS_DIRENT_TYPE(value);
 		d->d_namlen = strlen(d->d_name);
 
 		return (0);
 	}
 }
 
 static int
 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
 {
 	int fd;
 
 	fd = (uintptr_t) priv;
 	lseek(fd, offset, SEEK_SET);
 	if (read(fd, buf, size) == size) {
 		return 0;
 	} else {
 		return (EIO);
 	}
 }
 
 static int
 zfs_dev_init(void)
 {
 	spa_t *spa;
 	spa_t *next;
 	spa_t *prev;
 
 	zfs_init();
 	if (archsw.arch_zfs_probe == NULL)
 		return (ENXIO);
 	archsw.arch_zfs_probe();
 
 	prev = NULL;
 	spa = STAILQ_FIRST(&zfs_pools);
 	while (spa != NULL) {
 		next = STAILQ_NEXT(spa, spa_link);
 		if (zfs_spa_init(spa)) {
 			if (prev == NULL)
 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
 			else
 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
 		} else
 			prev = spa;
 		spa = next;
 	}
 	return (0);
 }
 
 struct zfs_probe_args {
 	int		fd;
 	const char	*devname;
 	uint64_t	*pool_guid;
-	uint16_t	secsz;
+	u_int		secsz;
 };
 
 static int
 zfs_diskread(void *arg, void *buf, size_t blocks, off_t offset)
 {
 	struct zfs_probe_args *ppa;
 
 	ppa = (struct zfs_probe_args *)arg;
 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
 }
 
 static int
 zfs_probe(int fd, uint64_t *pool_guid)
 {
 	spa_t *spa;
 	int ret;
 
 	ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
 	if (ret == 0 && pool_guid != NULL)
 		*pool_guid = spa->spa_guid;
 	return (ret);
 }
 
 static void
 zfs_probe_partition(void *arg, const char *partname,
     const struct ptable_entry *part)
 {
 	struct zfs_probe_args *ppa, pa;
 	struct ptable *table;
 	char devname[32];
 	int ret;
 
 	/* Probe only freebsd-zfs and freebsd partitions */
 	if (part->type != PART_FREEBSD &&
 	    part->type != PART_FREEBSD_ZFS)
 		return;
 
 	ppa = (struct zfs_probe_args *)arg;
 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
 	devname[strlen(ppa->devname) - 1] = '\0';
 	sprintf(devname, "%s%s:", devname, partname);
 	pa.fd = open(devname, O_RDONLY);
 	if (pa.fd == -1)
 		return;
 	ret = zfs_probe(pa.fd, ppa->pool_guid);
 	if (ret == 0)
 		return;
 	/* Do we have BSD label here? */
 	if (part->type == PART_FREEBSD) {
 		pa.devname = devname;
 		pa.pool_guid = ppa->pool_guid;
 		pa.secsz = ppa->secsz;
 		table = ptable_open(&pa, part->end - part->start + 1,
 		    ppa->secsz, zfs_diskread);
 		if (table != NULL) {
 			ptable_iterate(table, &pa, zfs_probe_partition);
 			ptable_close(table);
 		}
 	}
 	close(pa.fd);
 }
 
 int
 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
 {
 	struct ptable *table;
 	struct zfs_probe_args pa;
 	off_t mediasz;
 	int ret;
 
 	pa.fd = open(devname, O_RDONLY);
 	if (pa.fd == -1)
 		return (ENXIO);
 	/* Probe the whole disk */
 	ret = zfs_probe(pa.fd, pool_guid);
 	if (ret == 0)
 		return (0);
 	/* Probe each partition */
 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
 	if (ret == 0)
 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
 	if (ret == 0) {
 		pa.devname = devname;
 		pa.pool_guid = pool_guid;
 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
 		    zfs_diskread);
 		if (table != NULL) {
 			ptable_iterate(table, &pa, zfs_probe_partition);
 			ptable_close(table);
 		}
 	}
 	close(pa.fd);
 	return (ret);
 }
 
 /*
  * Print information about ZFS pools
  */
 static void
 zfs_dev_print(int verbose)
 {
 	spa_t *spa;
 	char line[80];
 
 	if (verbose) {
 		spa_all_status();
 		return;
 	}
 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 		sprintf(line, "    zfs:%s\n", spa->spa_name);
 		pager_output(line);
 	}
 }
 
 /*
  * Attempt to open the pool described by (dev) for use by (f).
  */
 static int
 zfs_dev_open(struct open_file *f, ...)
 {
 	va_list		args;
 	struct zfs_devdesc	*dev;
 	struct zfsmount	*mount;
 	spa_t		*spa;
 	int		rv;
 
 	va_start(args, f);
 	dev = va_arg(args, struct zfs_devdesc *);
 	va_end(args);
 
 	if (dev->pool_guid == 0)
 		spa = STAILQ_FIRST(&zfs_pools);
 	else
 		spa = spa_find_by_guid(dev->pool_guid);
 	if (!spa)
 		return (ENXIO);
 	mount = malloc(sizeof(*mount));
 	rv = zfs_mount(spa, dev->root_guid, mount);
 	if (rv != 0) {
 		free(mount);
 		return (rv);
 	}
 	if (mount->objset.os_type != DMU_OST_ZFS) {
 		printf("Unexpected object set type %ju\n",
 		    (uintmax_t)mount->objset.os_type);
 		free(mount);
 		return (EIO);
 	}
 	f->f_devdata = mount;
 	free(dev);
 	return (0);
 }
 
 static int
 zfs_dev_close(struct open_file *f)
 {
 
 	free(f->f_devdata);
 	f->f_devdata = NULL;
 	return (0);
 }
 
 static int
 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
 {
 
 	return (ENOSYS);
 }
 
 struct devsw zfs_dev = {
 	.dv_name = "zfs",
 	.dv_type = DEVT_ZFS,
 	.dv_init = zfs_dev_init,
 	.dv_strategy = zfs_dev_strategy,
 	.dv_open = zfs_dev_open,
 	.dv_close = zfs_dev_close,
 	.dv_ioctl = noioctl,
 	.dv_print = zfs_dev_print,
 	.dv_cleanup = NULL
 };
 
 int
 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
 {
 	static char	rootname[ZFS_MAXNAMELEN];
 	static char	poolname[ZFS_MAXNAMELEN];
 	spa_t		*spa;
 	const char	*end;
 	const char	*np;
 	const char	*sep;
 	int		rv;
 
 	np = devspec;
 	if (*np != ':')
 		return (EINVAL);
 	np++;
 	end = strchr(np, ':');
 	if (end == NULL)
 		return (EINVAL);
 	sep = strchr(np, '/');
 	if (sep == NULL || sep >= end)
 		sep = end;
 	memcpy(poolname, np, sep - np);
 	poolname[sep - np] = '\0';
 	if (sep < end) {
 		sep++;
 		memcpy(rootname, sep, end - sep);
 		rootname[end - sep] = '\0';
 	}
 	else
 		rootname[0] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	dev->pool_guid = spa->spa_guid;
 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
 	if (rv != 0)
 		return (rv);
 	if (path != NULL)
 		*path = (*end == '\0') ? end : end + 1;
 	dev->d_dev = &zfs_dev;
 	dev->d_type = zfs_dev.dv_type;
 	return (0);
 }
 
 char *
 zfs_fmtdev(void *vdev)
 {
 	static char		rootname[ZFS_MAXNAMELEN];
 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
 	spa_t			*spa;
 
 	buf[0] = '\0';
 	if (dev->d_type != DEVT_ZFS)
 		return (buf);
 
 	if (dev->pool_guid == 0) {
 		spa = STAILQ_FIRST(&zfs_pools);
 		dev->pool_guid = spa->spa_guid;
 	} else
 		spa = spa_find_by_guid(dev->pool_guid);
 	if (spa == NULL) {
 		printf("ZFS: can't find pool by guid\n");
 		return (buf);
 	}
 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
 		printf("ZFS: can't find root filesystem\n");
 		return (buf);
 	}
 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
 		printf("ZFS: can't find filesystem by guid\n");
 		return (buf);
 	}
 
 	if (rootname[0] == '\0')
 		sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name);
 	else
 		sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name,
 		    rootname);
 	return (buf);
 }
 
 int
 zfs_list(const char *name)
 {
 	static char	poolname[ZFS_MAXNAMELEN];
 	uint64_t	objid;
 	spa_t		*spa;
 	const char	*dsname;
 	int		len;
 	int		rv;
 
 	len = strlen(name);
 	dsname = strchr(name, '/');
 	if (dsname != NULL) {
 		len = dsname - name;
 		dsname++;
 	} else
 		dsname = "";
 	memcpy(poolname, name, len);
 	poolname[len] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	rv = zfs_lookup_dataset(spa, dsname, &objid);
 	if (rv != 0)
 		return (rv);
 
 	return (zfs_list_dataset(spa, objid));
 }
 
 int
 zfs_bootenv(const char *name)
 {
-	static char	poolname[ZFS_MAXNAMELEN], *dsname;
+	static char	poolname[ZFS_MAXNAMELEN], *dsname, *root;
 	char		becount[4];
 	uint64_t	objid;
 	spa_t		*spa;
 	int		len, rv, pages, perpage, currpage;
 
-	if (strcmp(name, getenv("zfs_be_root")) != 0) {
+	if (name == NULL)
+		return (EINVAL);
+	if ((root = getenv("zfs_be_root")) == NULL)
+		return (EINVAL);
+
+	if (strcmp(name, root) != 0) {
 		if (setenv("zfs_be_root", name, 1) != 0)
 			return (ENOMEM);
 	}
 
 	SLIST_INIT(&zfs_be_head);
 	zfs_env_count = 0;
 	len = strlen(name);
 	dsname = strchr(name, '/');
 	if (dsname != NULL) {
 		len = dsname - name;
 		dsname++;
 	} else
 		dsname = "";
 	memcpy(poolname, name, len);
 	poolname[len] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	rv = zfs_lookup_dataset(spa, dsname, &objid);
 	if (rv != 0)
 		return (rv);
 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
 
 	/* Calculate and store the number of pages of BEs */
 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
 	snprintf(becount, 4, "%d", pages);
 	if (setenv("zfs_be_pages", becount, 1) != 0)
 		return (ENOMEM);
 
 	/* Roll over the page counter if it has exceeded the maximum */
 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
 	if (currpage > pages) {
 		if (setenv("zfs_be_currpage", "1", 1) != 0)
 			return (ENOMEM);
 	}
 
 	/* Populate the menu environment variables */
 	zfs_set_env();
 
 	/* Clean up the SLIST of ZFS BEs */
 	while (!SLIST_EMPTY(&zfs_be_head)) {
 		zfs_be = SLIST_FIRST(&zfs_be_head);
 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
 		free(zfs_be);
 	}
 
 	return (rv);
 }
 
 int
 zfs_belist_add(const char *name)
 {
 
 	/* Add the boot environment to the head of the SLIST */
 	zfs_be = malloc(sizeof(struct zfs_be_entry));
 	zfs_be->name = name;
 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
 	zfs_env_count++;
 
 	return (0);
 }
 
 int
 zfs_set_env(void)
 {
 	char envname[32], envval[256];
 	char *beroot, *pagenum;
 	int rv, page, ctr;
 
 	beroot = getenv("zfs_be_root");
 	if (beroot == NULL) {
 		return (1);
 	}
 
 	pagenum = getenv("zfs_be_currpage");
 	if (pagenum != NULL) {
 		page = strtol(pagenum, NULL, 10);
 	} else {
 		page = 1;
 	}
 
 	ctr = 1;
 	rv = 0;
 	zfs_env_index = ZFS_BE_FIRST;
 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
 		/* Skip to the requested page number */
 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
 			ctr++;
 			continue;
 		}
 		
 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0) {
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0){
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
 		rv = setenv(envname, "set_bootenv", 1);
 		if (rv != 0){
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0){
 			break;
 		}
 
 		zfs_env_index++;
 		if (zfs_env_index > ZFS_BE_LAST) {
 			break;
 		}
 
 	}
 	
 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 	}
 
 	return (rv);
 }
\ No newline at end of file
Index: projects/clang380-import/sys/boot
===================================================================
--- projects/clang380-import/sys/boot	(revision 293686)
+++ projects/clang380-import/sys/boot	(revision 293687)

Property changes on: projects/clang380-import/sys/boot
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/boot:r293430-293685
Index: projects/clang380-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
===================================================================
--- projects/clang380-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	(revision 293686)
+++ projects/clang380-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	(revision 293687)
@@ -1,917 +1,983 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
  */
 
 #include <sys/zfs_context.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bio.h>
 #include <sys/disk.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/vdev_impl.h>
 #include <sys/fs/zfs.h>
 #include <sys/zio.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 
 /*
  * Virtual device vector for GEOM.
  */
 
 static g_attrchanged_t vdev_geom_attrchanged;
 struct g_class zfs_vdev_class = {
 	.name = "ZFS::VDEV",
 	.version = G_VERSION,
 	.attrchanged = vdev_geom_attrchanged,
 };
 
 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
 
 SYSCTL_DECL(_vfs_zfs_vdev);
 /* Don't send BIO_FLUSH. */
 static int vdev_geom_bio_flush_disable;
 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN,
     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
 /* Don't send BIO_DELETE. */
 static int vdev_geom_bio_delete_disable;
 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN,
     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
 
 static void
 vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
 { 
 	int error;
 	uint16_t rate;
 
 	error = g_getattr("GEOM::rotation_rate", cp, &rate);
 	if (error == 0)
 		vd->vdev_rotation_rate = rate;
 	else
 		vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
 }
 
 static void
 vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
 {
 	vdev_t *vd;
+	spa_t *spa;
+	char *physpath;
+	int error, physpath_len;
 
 	vd = cp->private;
 	if (vd == NULL)
 		return;
 
 	if (strcmp(attr, "GEOM::rotation_rate") == 0) {
 		vdev_geom_set_rotation_rate(vd, cp);
 		return;
 	}
+
+	if (strcmp(attr, "GEOM::physpath") != 0)
+		return;
+
+	if (g_access(cp, 1, 0, 0) != 0)
+		return;
+
+	/*
+	 * Record/Update physical path information for this device.
+	 */
+	spa = vd->vdev_spa;
+	physpath_len = MAXPATHLEN;
+	physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
+	error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
+	g_access(cp, -1, 0, 0);
+	if (error == 0) {
+		char *old_physpath;
+
+		old_physpath = vd->vdev_physpath;
+		vd->vdev_physpath = spa_strdup(physpath);
+		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
+
+		if (old_physpath != NULL) {
+			int held_lock;
+
+			held_lock = spa_config_held(spa, SCL_STATE, RW_WRITER);
+			if (held_lock == 0) {
+				g_topology_unlock();
+				spa_config_enter(spa, SCL_STATE, FTAG,
+				    RW_WRITER);
+			}
+
+			spa_strfree(old_physpath);
+
+			if (held_lock == 0) {
+				spa_config_exit(spa, SCL_STATE, FTAG);
+				g_topology_lock();
+			}
+		}
+	}
+	g_free(physpath);
 }
 
 static void
 vdev_geom_orphan(struct g_consumer *cp)
 {
 	vdev_t *vd;
 
 	g_topology_assert();
 
 	vd = cp->private;
-	if (vd == NULL)
+	if (vd == NULL) {
+		/* Vdev close in progress.  Ignore the event. */
 		return;
+	}
 
 	/*
 	 * Orphan callbacks occur from the GEOM event thread.
 	 * Concurrent with this call, new I/O requests may be
 	 * working their way through GEOM about to find out
 	 * (only once executed by the g_down thread) that we've
 	 * been orphaned from our disk provider.  These I/Os
 	 * must be retired before we can detach our consumer.
 	 * This is most easily achieved by acquiring the
 	 * SPA ZIO configuration lock as a writer, but doing
 	 * so with the GEOM topology lock held would cause
 	 * a lock order reversal.  Instead, rely on the SPA's
 	 * async removal support to invoke a close on this
 	 * vdev once it is safe to do so.
 	 */
 	zfs_post_remove(vd->vdev_spa, vd);
 	vd->vdev_remove_wanted = B_TRUE;
 	spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
 }
 
 static struct g_consumer *
-vdev_geom_attach(struct g_provider *pp)
+vdev_geom_attach(struct g_provider *pp, vdev_t *vd)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	ZFS_LOG(1, "Attaching to %s.", pp->name);
 	/* Do we have geom already? No? Create one. */
 	LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
 		if (gp->flags & G_GEOM_WITHER)
 			continue;
 		if (strcmp(gp->name, "zfs::vdev") != 0)
 			continue;
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
 		gp->orphan = vdev_geom_orphan;
+		gp->attrchanged = vdev_geom_attrchanged;
 		cp = g_new_consumer(gp);
 		if (g_attach(cp, pp) != 0) {
 			g_wither_geom(gp, ENXIO);
 			return (NULL);
 		}
 		if (g_access(cp, 1, 0, 1) != 0) {
 			g_wither_geom(gp, ENXIO);
 			return (NULL);
 		}
 		ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
 	} else {
 		/* Check if we are already connected to this provider. */
 		LIST_FOREACH(cp, &gp->consumer, consumer) {
 			if (cp->provider == pp) {
 				ZFS_LOG(1, "Found consumer for %s.", pp->name);
 				break;
 			}
 		}
 		if (cp == NULL) {
 			cp = g_new_consumer(gp);
 			if (g_attach(cp, pp) != 0) {
 				g_destroy_consumer(cp);
 				return (NULL);
 			}
 			if (g_access(cp, 1, 0, 1) != 0) {
 				g_detach(cp);
 				g_destroy_consumer(cp);
 				return (NULL);
 			}
 			ZFS_LOG(1, "Created consumer for %s.", pp->name);
 		} else {
 			if (g_access(cp, 1, 0, 1) != 0)
 				return (NULL);
 			ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
 		}
 	}
+
+	/* 
+	 * BUG: cp may already belong to a vdev.  This could happen if:
+	 * 1) That vdev is a shared spare, or
+	 * 2) We are trying to reopen a missing vdev and we are scanning by
+	 *    guid.  In that case, we'll ultimately fail to open this consumer,
+	 *    but not until after setting the private field.
+	 * The solution is to:
+	 * 1) Don't set the private field until after the open succeeds, and
+	 * 2) Set it to a linked list of vdevs, not just a single vdev
+	 */
+	cp->private = vd;
+	vd->vdev_tsd = cp;
+
+	/* Fetch initial physical path information for this device. */
+	vdev_geom_attrchanged(cp, "GEOM::physpath");
+	
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	return (cp);
 }
 
 static void
-vdev_geom_detach(void *arg, int flag __unused)
+vdev_geom_close_locked(vdev_t *vd)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
-	cp = arg;
-	gp = cp->geom;
 
+	cp = vd->vdev_tsd;
+	if (cp == NULL)
+		return;
+
 	ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
+	KASSERT(vd->vdev_tsd == cp, ("%s: vdev_tsd is not cp", __func__));
+	vd->vdev_tsd = NULL;
+	vd->vdev_delayed_close = B_FALSE;
+	cp->private = NULL;
+
+	gp = cp->geom;
 	g_access(cp, -1, 0, -1);
 	/* Destroy consumer on last close. */
 	if (cp->acr == 0 && cp->ace == 0) {
-		ZFS_LOG(1, "Destroyed consumer to %s.", cp->provider->name);
 		if (cp->acw > 0)
 			g_access(cp, 0, -cp->acw, 0);
-		g_detach(cp);
+		if (cp->provider != NULL) {
+			ZFS_LOG(1, "Destroyed consumer to %s.",
+			    cp->provider->name);
+			g_detach(cp);
+		}
 		g_destroy_consumer(cp);
 	}
 	/* Destroy geom if there are no consumers left. */
 	if (LIST_EMPTY(&gp->consumer)) {
 		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
 		g_wither_geom(gp, ENXIO);
 	}
 }
 
 static void
 nvlist_get_guids(nvlist_t *list, uint64_t *pguid, uint64_t *vguid)
 {
 
 	nvlist_lookup_uint64(list, ZPOOL_CONFIG_GUID, vguid);
 	nvlist_lookup_uint64(list, ZPOOL_CONFIG_POOL_GUID, pguid);
 }
 
 static int
 vdev_geom_io(struct g_consumer *cp, int cmd, void *data, off_t offset, off_t size)
 {
 	struct bio *bp;
 	u_char *p;
 	off_t off, maxio;
 	int error;
 
 	ASSERT((offset % cp->provider->sectorsize) == 0);
 	ASSERT((size % cp->provider->sectorsize) == 0);
 
 	bp = g_alloc_bio();
 	off = offset;
 	offset += size;
 	p = data;
 	maxio = MAXPHYS - (MAXPHYS % cp->provider->sectorsize);
 	error = 0;
 
 	for (; off < offset; off += maxio, p += maxio, size -= maxio) {
 		bzero(bp, sizeof(*bp));
 		bp->bio_cmd = cmd;
 		bp->bio_done = NULL;
 		bp->bio_offset = off;
 		bp->bio_length = MIN(size, maxio);
 		bp->bio_data = p;
 		g_io_request(bp, cp);
 		error = biowait(bp, "vdev_geom_io");
 		if (error != 0)
 			break;
 	}
 
 	g_destroy_bio(bp);
 	return (error);
 }
 
 static void
 vdev_geom_taste_orphan(struct g_consumer *cp)
 {
 
 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
 	    cp->provider->name));
 }
 
 static int
 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **config)
 {
 	struct g_provider *pp;
 	vdev_label_t *label;
 	char *p, *buf;
 	size_t buflen;
 	uint64_t psize;
 	off_t offset, size;
 	uint64_t state, txg;
 	int error, l, len;
 
 	g_topology_assert_not();
 
 	pp = cp->provider;
 	ZFS_LOG(1, "Reading config from %s...", pp->name);
 
 	psize = pp->mediasize;
 	psize = P2ALIGN(psize, (uint64_t)sizeof(vdev_label_t));
 
 	size = sizeof(*label) + pp->sectorsize -
 	    ((sizeof(*label) - 1) % pp->sectorsize) - 1;
 
 	label = kmem_alloc(size, KM_SLEEP);
 	buflen = sizeof(label->vl_vdev_phys.vp_nvlist);
 
 	*config = NULL;
 	for (l = 0; l < VDEV_LABELS; l++) {
 
 		offset = vdev_label_offset(psize, l, 0);
 		if ((offset % pp->sectorsize) != 0)
 			continue;
 
 		if (vdev_geom_io(cp, BIO_READ, label, offset, size) != 0)
 			continue;
 		buf = label->vl_vdev_phys.vp_nvlist;
 
 		if (nvlist_unpack(buf, buflen, config, 0) != 0)
 			continue;
 
 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
 		    &state) != 0 || state > POOL_STATE_L2CACHE) {
 			nvlist_free(*config);
 			*config = NULL;
 			continue;
 		}
 
 		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
 		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
 		    &txg) != 0 || txg == 0)) {
 			nvlist_free(*config);
 			*config = NULL;
 			continue;
 		}
 
 		break;
 	}
 
 	kmem_free(label, size);
 	return (*config == NULL ? ENOENT : 0);
 }
 
 static void
 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
 {
 	nvlist_t **new_configs;
 	uint64_t i;
 
 	if (id < *count)
 		return;
 	new_configs = kmem_zalloc((id + 1) * sizeof(nvlist_t *),
 	    KM_SLEEP);
 	for (i = 0; i < *count; i++)
 		new_configs[i] = (*configs)[i];
 	if (*configs != NULL)
 		kmem_free(*configs, *count * sizeof(void *));
 	*configs = new_configs;
 	*count = id + 1;
 }
 
 static void
 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
     const char *name, uint64_t* known_pool_guid)
 {
 	nvlist_t *vdev_tree;
 	uint64_t pool_guid;
 	uint64_t vdev_guid, known_guid;
 	uint64_t id, txg, known_txg;
 	char *pname;
 	int i;
 
 	if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
 	    strcmp(pname, name) != 0)
 		goto ignore;
 
 	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
 		goto ignore;
 
 	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
 		goto ignore;
 
 	if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
 		goto ignore;
 
 	if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
 		goto ignore;
 
 	VERIFY(nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
 
 	if (*known_pool_guid != 0) {
 		if (pool_guid != *known_pool_guid)
 			goto ignore;
 	} else
 		*known_pool_guid = pool_guid;
 
 	resize_configs(configs, count, id);
 
 	if ((*configs)[id] != NULL) {
 		VERIFY(nvlist_lookup_uint64((*configs)[id],
 		    ZPOOL_CONFIG_POOL_TXG, &known_txg) == 0);
 		if (txg <= known_txg)
 			goto ignore;
 		nvlist_free((*configs)[id]);
 	}
 
 	(*configs)[id] = cfg;
 	return;
 
 ignore:
 	nvlist_free(cfg);
 }
 
 static int
 vdev_geom_attach_taster(struct g_consumer *cp, struct g_provider *pp)
 {
 	int error;
 
 	if (pp->flags & G_PF_WITHER)
 		return (EINVAL);
 	g_attach(cp, pp);
 	error = g_access(cp, 1, 0, 0);
 	if (error == 0) {
 		if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize))
 			error = EINVAL;
 		else if (pp->mediasize < SPA_MINDEVSIZE)
 			error = EINVAL;
 		if (error != 0)
 			g_access(cp, -1, 0, 0);
 	}
 	if (error != 0)
 		g_detach(cp);
 	return (error);
 }
 
 static void
 vdev_geom_detach_taster(struct g_consumer *cp)
 {
 	g_access(cp, -1, 0, 0);
 	g_detach(cp);
 }
 
 int
 vdev_geom_read_pool_label(const char *name,
     nvlist_t ***configs, uint64_t *count)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *zgp;
 	struct g_provider *pp;
 	struct g_consumer *zcp;
 	nvlist_t *vdev_cfg;
 	uint64_t pool_guid;
 	int error;
 
 	DROP_GIANT();
 	g_topology_lock();
 
 	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
 	/* This orphan function should be never called. */
 	zgp->orphan = vdev_geom_taste_orphan;
 	zcp = g_new_consumer(zgp);
 
 	*configs = NULL;
 	*count = 0;
 	pool_guid = 0;
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (mp == &zfs_vdev_class)
 			continue;
 		LIST_FOREACH(gp, &mp->geom, geom) {
 			if (gp->flags & G_GEOM_WITHER)
 				continue;
 			LIST_FOREACH(pp, &gp->provider, provider) {
 				if (pp->flags & G_PF_WITHER)
 					continue;
 				if (vdev_geom_attach_taster(zcp, pp) != 0)
 					continue;
 				g_topology_unlock();
 				error = vdev_geom_read_config(zcp, &vdev_cfg);
 				g_topology_lock();
 				vdev_geom_detach_taster(zcp);
 				if (error)
 					continue;
 				ZFS_LOG(1, "successfully read vdev config");
 
 				process_vdev_config(configs, count,
 				    vdev_cfg, name, &pool_guid);
 			}
 		}
 	}
 
 	g_destroy_consumer(zcp);
 	g_destroy_geom(zgp);
 	g_topology_unlock();
 	PICKUP_GIANT();
 
 	return (*count > 0 ? 0 : ENOENT);
 }
 
 static void
 vdev_geom_read_guids(struct g_consumer *cp, uint64_t *pguid, uint64_t *vguid)
 {
 	nvlist_t *config;
 
 	g_topology_assert_not();
 
 	*pguid = 0;
 	*vguid = 0;
 	if (vdev_geom_read_config(cp, &config) == 0) {
 		nvlist_get_guids(config, pguid, vguid);
 		nvlist_free(config);
 	}
 }
 
 static struct g_consumer *
-vdev_geom_attach_by_guids(uint64_t pool_guid, uint64_t vdev_guid)
+vdev_geom_attach_by_guids(vdev_t *vd)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *zgp;
 	struct g_provider *pp;
 	struct g_consumer *cp, *zcp;
 	uint64_t pguid, vguid;
 
 	g_topology_assert();
 
 	zgp = g_new_geomf(&zfs_vdev_class, "zfs::vdev::taste");
 	/* This orphan function should be never called. */
 	zgp->orphan = vdev_geom_taste_orphan;
 	zcp = g_new_consumer(zgp);
 
 	cp = NULL;
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (mp == &zfs_vdev_class)
 			continue;
 		LIST_FOREACH(gp, &mp->geom, geom) {
 			if (gp->flags & G_GEOM_WITHER)
 				continue;
 			LIST_FOREACH(pp, &gp->provider, provider) {
 				if (vdev_geom_attach_taster(zcp, pp) != 0)
 					continue;
 				g_topology_unlock();
 				vdev_geom_read_guids(zcp, &pguid, &vguid);
 				g_topology_lock();
 				vdev_geom_detach_taster(zcp);
-				if (pguid != pool_guid || vguid != vdev_guid)
+				if (pguid != spa_guid(vd->vdev_spa) ||
+				    vguid != vd->vdev_guid)
 					continue;
-				cp = vdev_geom_attach(pp);
+				cp = vdev_geom_attach(pp, vd);
 				if (cp == NULL) {
 					printf("ZFS WARNING: Unable to "
 					    "attach to %s.\n", pp->name);
 					continue;
 				}
 				break;
 			}
 			if (cp != NULL)
 				break;
 		}
 		if (cp != NULL)
 			break;
 	}
 end:
 	g_destroy_consumer(zcp);
 	g_destroy_geom(zgp);
 	return (cp);
 }
 
 static struct g_consumer *
 vdev_geom_open_by_guids(vdev_t *vd)
 {
 	struct g_consumer *cp;
 	char *buf;
 	size_t len;
 
 	g_topology_assert();
 
 	ZFS_LOG(1, "Searching by guid [%ju].", (uintmax_t)vd->vdev_guid);
-	cp = vdev_geom_attach_by_guids(spa_guid(vd->vdev_spa), vd->vdev_guid);
+	cp = vdev_geom_attach_by_guids(vd);
 	if (cp != NULL) {
 		len = strlen(cp->provider->name) + strlen("/dev/") + 1;
 		buf = kmem_alloc(len, KM_SLEEP);
 
 		snprintf(buf, len, "/dev/%s", cp->provider->name);
 		spa_strfree(vd->vdev_path);
 		vd->vdev_path = buf;
 
 		ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
 		    (uintmax_t)spa_guid(vd->vdev_spa),
 		    (uintmax_t)vd->vdev_guid, vd->vdev_path);
 	} else {
 		ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
 		    (uintmax_t)spa_guid(vd->vdev_spa),
 		    (uintmax_t)vd->vdev_guid);
 	}
 
 	return (cp);
 }
 
 static struct g_consumer *
 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	uint64_t pguid, vguid;
 
 	g_topology_assert();
 
 	cp = NULL;
 	pp = g_provider_by_name(vd->vdev_path + sizeof("/dev/") - 1);
 	if (pp != NULL) {
 		ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
-		cp = vdev_geom_attach(pp);
+		cp = vdev_geom_attach(pp, vd);
 		if (cp != NULL && check_guid && ISP2(pp->sectorsize) &&
 		    pp->sectorsize <= VDEV_PAD_SIZE) {
 			g_topology_unlock();
 			vdev_geom_read_guids(cp, &pguid, &vguid);
 			g_topology_lock();
 			if (pguid != spa_guid(vd->vdev_spa) ||
 			    vguid != vd->vdev_guid) {
-				vdev_geom_detach(cp, 0);
+				vdev_geom_close_locked(vd);
 				cp = NULL;
 				ZFS_LOG(1, "guid mismatch for provider %s: "
 				    "%ju:%ju != %ju:%ju.", vd->vdev_path,
 				    (uintmax_t)spa_guid(vd->vdev_spa),
 				    (uintmax_t)vd->vdev_guid,
 				    (uintmax_t)pguid, (uintmax_t)vguid);
 			} else {
 				ZFS_LOG(1, "guid match for provider %s.",
 				    vd->vdev_path);
 			}
 		}
 	}
 
 	return (cp);
 }
 
 static int
 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
     uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	size_t bufsize;
 	int error;
 
 	/*
 	 * We must have a pathname, and it must be absolute.
 	 */
 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
 		return (EINVAL);
 	}
 
 	vd->vdev_tsd = NULL;
 
 	DROP_GIANT();
 	g_topology_lock();
 	error = 0;
 
 	if (vd->vdev_spa->spa_splitting_newspa ||
 	    (vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
 	     vd->vdev_spa->spa_load_state == SPA_LOAD_NONE)) {
 		/*
 		 * We are dealing with a vdev that hasn't been previously
 		 * opened (since boot), and we are not loading an
 		 * existing pool configuration.  This looks like a
 		 * vdev add operation to a new or existing pool.
 		 * Assume the user knows what he/she is doing and find
 		 * GEOM provider by its name, ignoring GUID mismatches.
 		 *
 		 * XXPOLICY: It would be safer to only allow a device
 		 *           that is unlabeled or labeled but missing
 		 *           GUID information to be opened in this fashion,
 		 *           unless we are doing a split, in which case we
 		 *           should allow any guid.
 		 */
 		cp = vdev_geom_open_by_path(vd, 0);
 	} else {
 		/*
 		 * Try using the recorded path for this device, but only
 		 * accept it if its label data contains the expected GUIDs.
 		 */
 		cp = vdev_geom_open_by_path(vd, 1);
 		if (cp == NULL) {
 			/*
 			 * The device at vd->vdev_path doesn't have the
 			 * expected GUIDs. The disks might have merely
 			 * moved around so try all other GEOM providers
 			 * to find one with the right GUIDs.
 			 */
 			cp = vdev_geom_open_by_guids(vd);
 		}
 	}
 
 	if (cp == NULL) {
 		ZFS_LOG(1, "Provider %s not found.", vd->vdev_path);
 		error = ENOENT;
 	} else if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
 	    !ISP2(cp->provider->sectorsize)) {
 		ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
 		    vd->vdev_path);
-		vdev_geom_detach(cp, 0);
+
+		vdev_geom_close_locked(vd);
 		error = EINVAL;
 		cp = NULL;
 	} else if (cp->acw == 0 && (spa_mode(vd->vdev_spa) & FWRITE) != 0) {
 		int i;
 
 		for (i = 0; i < 5; i++) {
 			error = g_access(cp, 0, 1, 0);
 			if (error == 0)
 				break;
 			g_topology_unlock();
 			tsleep(vd, 0, "vdev", hz / 2);
 			g_topology_lock();
 		}
 		if (error != 0) {
 			printf("ZFS WARNING: Unable to open %s for writing (error=%d).\n",
 			    vd->vdev_path, error);
-			vdev_geom_detach(cp, 0);
+			vdev_geom_close_locked(vd);
 			cp = NULL;
 		}
 	}
+
 	g_topology_unlock();
 	PICKUP_GIANT();
 	if (cp == NULL) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
 		return (error);
 	}
-
-	cp->private = vd;
-	vd->vdev_tsd = cp;
 	pp = cp->provider;
 
 	/*
 	 * Determine the actual size of the device.
 	 */
 	*max_psize = *psize = pp->mediasize;
 
 	/*
 	 * Determine the device's minimum transfer size and preferred
 	 * transfer size.
 	 */
 	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
 	*physical_ashift = 0;
 	if (pp->stripesize)
 		*physical_ashift = highbit(pp->stripesize) - 1;
 
 	/*
 	 * Clear the nowritecache settings, so that on a vdev_reopen()
 	 * we will try again.
 	 */
 	vd->vdev_nowritecache = B_FALSE;
 
-	if (vd->vdev_physpath != NULL)
-		spa_strfree(vd->vdev_physpath);
-	bufsize = sizeof("/dev/") + strlen(pp->name);
-	vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP);
-	snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name);
-
 	/*
 	 * Determine the device's rotation rate.
 	 */
 	vdev_geom_set_rotation_rate(vd, cp);
 
 	return (0);
 }
 
 static void
 vdev_geom_close(vdev_t *vd)
 {
-	struct g_consumer *cp;
 
-	cp = vd->vdev_tsd;
-	if (cp == NULL)
-		return;
-	vd->vdev_tsd = NULL;
-	vd->vdev_delayed_close = B_FALSE;
-	cp->private = NULL;	/* XXX locking */
-	g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
+	DROP_GIANT();
+	g_topology_lock();
+	vdev_geom_close_locked(vd);
+	g_topology_unlock();
+	PICKUP_GIANT();
 }
 
 static void
 vdev_geom_io_intr(struct bio *bp)
 {
 	vdev_t *vd;
 	zio_t *zio;
 
 	zio = bp->bio_caller1;
 	vd = zio->io_vd;
 	zio->io_error = bp->bio_error;
 	if (zio->io_error == 0 && bp->bio_resid != 0)
 		zio->io_error = SET_ERROR(EIO);
 
 	switch(zio->io_error) {
 	case ENOTSUP:
 		/*
 		 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
 		 * that future attempts will never succeed. In this case
 		 * we set a persistent flag so that we don't bother with
 		 * requests in the future.
 		 */
 		switch(bp->bio_cmd) {
 		case BIO_FLUSH:
 			vd->vdev_nowritecache = B_TRUE;
 			break;
 		case BIO_DELETE:
 			vd->vdev_notrim = B_TRUE;
 			break;
 		}
 		break;
 	case ENXIO:
 		if (!vd->vdev_remove_wanted) {
 			/*
 			 * If provider's error is set we assume it is being
 			 * removed.
 			 */
 			if (bp->bio_to->error != 0) {
 				vd->vdev_remove_wanted = B_TRUE;
 				spa_async_request(zio->io_spa,
 				    SPA_ASYNC_REMOVE);
 			} else if (!vd->vdev_delayed_close) {
 				vd->vdev_delayed_close = B_TRUE;
 			}
 		}
 		break;
 	}
 	g_destroy_bio(bp);
 	zio_interrupt(zio);
 }
 
 static void
 vdev_geom_io_start(zio_t *zio)
 {
 	vdev_t *vd;
 	struct g_consumer *cp;
 	struct bio *bp;
 	int error;
 
 	vd = zio->io_vd;
 
 	switch (zio->io_type) {
 	case ZIO_TYPE_IOCTL:
 		/* XXPOLICY */
 		if (!vdev_readable(vd)) {
 			zio->io_error = SET_ERROR(ENXIO);
 			zio_interrupt(zio);
 			return;
 		} else {
 			switch (zio->io_cmd) {
 			case DKIOCFLUSHWRITECACHE:
 				if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
 					break;
 				if (vd->vdev_nowritecache) {
 					zio->io_error = SET_ERROR(ENOTSUP);
 					break;
 				}
 				goto sendreq;
 			default:
 				zio->io_error = SET_ERROR(ENOTSUP);
 			}
 		}
 
 		zio_execute(zio);
 		return;
 	case ZIO_TYPE_FREE:
 		if (vd->vdev_notrim) {
 			zio->io_error = SET_ERROR(ENOTSUP);
 		} else if (!vdev_geom_bio_delete_disable) {
 			goto sendreq;
 		}
 		zio_execute(zio);
 		return;
 	}
 sendreq:
 	ASSERT(zio->io_type == ZIO_TYPE_READ ||
 	    zio->io_type == ZIO_TYPE_WRITE ||
 	    zio->io_type == ZIO_TYPE_FREE ||
 	    zio->io_type == ZIO_TYPE_IOCTL);
 
 	cp = vd->vdev_tsd;
 	if (cp == NULL) {
 		zio->io_error = SET_ERROR(ENXIO);
 		zio_interrupt(zio);
 		return;
 	}
 	bp = g_alloc_bio();
 	bp->bio_caller1 = zio;
 	switch (zio->io_type) {
 	case ZIO_TYPE_READ:
 	case ZIO_TYPE_WRITE:
 		bp->bio_cmd = zio->io_type == ZIO_TYPE_READ ? BIO_READ : BIO_WRITE;
 		bp->bio_data = zio->io_data;
 		bp->bio_offset = zio->io_offset;
 		bp->bio_length = zio->io_size;
 		break;
 	case ZIO_TYPE_FREE:
 		bp->bio_cmd = BIO_DELETE;
 		bp->bio_data = NULL;
 		bp->bio_offset = zio->io_offset;
 		bp->bio_length = zio->io_size;
 		break;
 	case ZIO_TYPE_IOCTL:
 		bp->bio_cmd = BIO_FLUSH;
 		bp->bio_flags |= BIO_ORDERED;
 		bp->bio_data = NULL;
 		bp->bio_offset = cp->provider->mediasize;
 		bp->bio_length = 0;
 		break;
 	}
 	bp->bio_done = vdev_geom_io_intr;
 
 	g_io_request(bp, cp);
 }
 
 static void
 vdev_geom_io_done(zio_t *zio)
 {
 }
 
 static void
 vdev_geom_hold(vdev_t *vd)
 {
 }
 
 static void
 vdev_geom_rele(vdev_t *vd)
 {
 }
 
 vdev_ops_t vdev_geom_ops = {
 	vdev_geom_open,
 	vdev_geom_close,
 	vdev_default_asize,
 	vdev_geom_io_start,
 	vdev_geom_io_done,
 	NULL,
 	vdev_geom_hold,
 	vdev_geom_rele,
 	VDEV_TYPE_DISK,		/* name of this vdev type */
 	B_TRUE			/* leaf vdev */
 };
Index: projects/clang380-import/sys/cddl/contrib/opensolaris
===================================================================
--- projects/clang380-import/sys/cddl/contrib/opensolaris	(revision 293686)
+++ projects/clang380-import/sys/cddl/contrib/opensolaris	(revision 293687)

Property changes on: projects/clang380-import/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/cddl/contrib/opensolaris:r292913-293685
Index: projects/clang380-import/sys/compat/ia32/ia32_sysvec.c
===================================================================
--- projects/clang380-import/sys/compat/ia32/ia32_sysvec.c	(revision 293686)
+++ projects/clang380-import/sys/compat/ia32/ia32_sysvec.c	(revision 293687)
@@ -1,239 +1,240 @@
 /*-
  * Copyright (c) 2002 Doug Rabson
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 
 #define __ELF_WORD_SIZE 32
 
 #include <sys/param.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/mman.h>
 #include <sys/namei.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
 #include <sys/procfs.h>
 #include <sys/resourcevar.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/vnode.h>
 #include <sys/imgact_elf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_syscall.h>
 #include <compat/ia32/ia32_signal.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/cpufunc.h>
 
 CTASSERT(sizeof(struct ia32_mcontext) == 640);
 CTASSERT(sizeof(struct ia32_ucontext) == 704);
 CTASSERT(sizeof(struct ia32_sigframe) == 800);
 CTASSERT(sizeof(struct siginfo32) == 64);
 #ifdef COMPAT_FREEBSD4
 CTASSERT(sizeof(struct ia32_mcontext4) == 260);
 CTASSERT(sizeof(struct ia32_ucontext4) == 324);
 CTASSERT(sizeof(struct ia32_sigframe4) == 408);
 #endif
 
 extern const char *freebsd32_syscallnames[];
 
 static SYSCTL_NODE(_compat, OID_AUTO, ia32, CTLFLAG_RW, 0, "ia32 mode");
 
 static u_long	ia32_maxdsiz = IA32_MAXDSIZ;
 SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxdsiz, CTLFLAG_RWTUN, &ia32_maxdsiz, 0, "");
 u_long	ia32_maxssiz = IA32_MAXSSIZ;
 SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxssiz, CTLFLAG_RWTUN, &ia32_maxssiz, 0, "");
 static u_long	ia32_maxvmem = IA32_MAXVMEM;
 SYSCTL_ULONG(_compat_ia32, OID_AUTO, maxvmem, CTLFLAG_RWTUN, &ia32_maxvmem, 0, "");
 
 struct sysentvec ia32_freebsd_sysvec = {
 	.sv_size	= FREEBSD32_SYS_MAXSYSCALL,
 	.sv_table	= freebsd32_sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= elf32_freebsd_fixup,
 	.sv_sendsig	= ia32_sendsig,
 	.sv_sigcode	= ia32_sigcode,
 	.sv_szsigcode	= &sz_ia32_sigcode,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= elf32_coredump,
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= IA32_PAGE_SIZE,
 	.sv_minuser	= FREEBSD32_MINUSER,
 	.sv_maxuser	= FREEBSD32_MAXUSER,
 	.sv_usrstack	= FREEBSD32_USRSTACK,
 	.sv_psstrings	= FREEBSD32_PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= freebsd32_copyout_strings,
 	.sv_setregs	= ia32_setregs,
 	.sv_fixlimit	= ia32_fixlimit,
 	.sv_maxssiz	= &ia32_maxssiz,
 	.sv_flags	= SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 |
 #ifdef __amd64__
 		SV_SHP | SV_TIMEKEEP
 #else
 		0
 #endif
 	,
 	.sv_set_syscall_retval = ia32_set_syscall_retval,
 	.sv_fetch_syscall_args = ia32_fetch_syscall_args,
 	.sv_syscallnames = freebsd32_syscallnames,
 	.sv_shared_page_base = FREEBSD32_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec);
 
 static Elf32_Brandinfo ia32_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &ia32_freebsd_sysvec,
 	.interp_newpath	= "/libexec/ld-elf32.so.1",
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_MIDDLE,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&ia32_brand_info);
 
 static Elf32_Brandinfo ia32_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &ia32_freebsd_sysvec,
 	.interp_newpath	= "/libexec/ld-elf32.so.1",
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oia32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&ia32_brand_oinfo);
 
 static Elf32_Brandinfo kia32_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/lib/ld.so.1",
 	.sysvec		= &ia32_freebsd_sysvec,
 	.brand_note	= &elf32_kfreebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
 };
 
 SYSINIT(kia32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&kia32_brand_info);
 
 void
 elf32_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 	void *buf;
 	size_t len;
 
 	len = 0;
 	if (use_xsave) {
 		if (dst != NULL) {
 			fpugetregs(td);
 			len += elf32_populate_note(NT_X86_XSTATE,
 			    get_pcb_user_save_td(td), dst,
 			    cpu_max_ext_state_size, &buf);
 			*(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) =
 			    xsave_mask;
 		} else
 			len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL,
 			    cpu_max_ext_state_size, NULL);
 	}
 	*off = len;
 }
 
 void
 ia32_fixlimit(struct rlimit *rl, int which)
 {
 
 	switch (which) {
 	case RLIMIT_DATA:
 		if (ia32_maxdsiz != 0) {
 			if (rl->rlim_cur > ia32_maxdsiz)
 				rl->rlim_cur = ia32_maxdsiz;
 			if (rl->rlim_max > ia32_maxdsiz)
 				rl->rlim_max = ia32_maxdsiz;
 		}
 		break;
 	case RLIMIT_STACK:
 		if (ia32_maxssiz != 0) {
 			if (rl->rlim_cur > ia32_maxssiz)
 				rl->rlim_cur = ia32_maxssiz;
 			if (rl->rlim_max > ia32_maxssiz)
 				rl->rlim_max = ia32_maxssiz;
 		}
 		break;
 	case RLIMIT_VMEM:
 		if (ia32_maxvmem != 0) {
 			if (rl->rlim_cur > ia32_maxvmem)
 				rl->rlim_cur = ia32_maxvmem;
 			if (rl->rlim_max > ia32_maxvmem)
 				rl->rlim_max = ia32_maxvmem;
 		}
 		break;
 	}
 }
Index: projects/clang380-import/sys/compat/linux/linux_futex.c
===================================================================
--- projects/clang380-import/sys/compat/linux/linux_futex.c	(revision 293686)
+++ projects/clang380-import/sys/compat/linux/linux_futex.c	(revision 293687)
@@ -1,1276 +1,1280 @@
 /*	$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $ */
 
 /*-
  * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Emmanuel Dreyfus
  * 4. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #if 0
 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $");
 #endif
 
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/sx.h>
 #include <sys/umtx.h>
 
 #ifdef COMPAT_LINUX32
 #include <machine/../linux32/linux.h>
 #include <machine/../linux32/linux32_proto.h>
 #else
 #include <machine/../linux/linux.h>
 #include <machine/../linux/linux_proto.h>
 #endif
 #include <compat/linux/linux_dtrace.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_futex.h>
 #include <compat/linux/linux_timer.h>
 #include <compat/linux/linux_util.h>
 
 /* DTrace init */
 LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
 
 /**
  * Futex part for the special DTrace module "locks".
  */
 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, locked, "struct mtx *");
 LIN_SDT_PROBE_DEFINE1(locks, futex_mtx, unlock, "struct mtx *");
 
 /**
  * Per futex probes.
  */
 LIN_SDT_PROBE_DEFINE1(futex, futex, create, "struct sx *");
 LIN_SDT_PROBE_DEFINE1(futex, futex, destroy, "struct sx *");
 
 /**
  * DTrace probes in this module.
  */
 LIN_SDT_PROBE_DEFINE2(futex, futex_put, entry, "struct futex *",
     "struct waiting_proc *");
 LIN_SDT_PROBE_DEFINE3(futex, futex_put, destroy, "uint32_t *", "uint32_t",
     "int");
 LIN_SDT_PROBE_DEFINE3(futex, futex_put, unlock, "uint32_t *", "uint32_t",
     "int");
 LIN_SDT_PROBE_DEFINE0(futex, futex_put, return);
 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, entry, "uint32_t *", "struct futex **",
     "uint32_t");
 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, umtx_key_get_error, "int");
 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, shared, "uint32_t *", "uint32_t",
     "int");
 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, null, "uint32_t *");
 LIN_SDT_PROBE_DEFINE3(futex, futex_get0, new, "uint32_t *", "uint32_t", "int");
 LIN_SDT_PROBE_DEFINE1(futex, futex_get0, return, "int");
 LIN_SDT_PROBE_DEFINE3(futex, futex_get, entry, "uint32_t *",
     "struct waiting_proc **", "struct futex **");
 LIN_SDT_PROBE_DEFINE0(futex, futex_get, error);
 LIN_SDT_PROBE_DEFINE1(futex, futex_get, return, "int");
 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, entry, "struct futex *",
     "struct waiting_proc **", "int");
 LIN_SDT_PROBE_DEFINE5(futex, futex_sleep, requeue_error, "int", "uint32_t *",
     "struct waiting_proc *", "uint32_t *", "uint32_t");
 LIN_SDT_PROBE_DEFINE3(futex, futex_sleep, sleep_error, "int", "uint32_t *",
     "struct waiting_proc *");
 LIN_SDT_PROBE_DEFINE1(futex, futex_sleep, return, "int");
 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, entry, "struct futex *", "int",
     "uint32_t");
 LIN_SDT_PROBE_DEFINE3(futex, futex_wake, iterate, "uint32_t",
     "struct waiting_proc *", "uint32_t");
 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, wakeup, "struct waiting_proc *");
 LIN_SDT_PROBE_DEFINE1(futex, futex_wake, return, "int");
 LIN_SDT_PROBE_DEFINE4(futex, futex_requeue, entry, "struct futex *", "int",
     "struct futex *", "int");
 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, wakeup, "struct waiting_proc *");
 LIN_SDT_PROBE_DEFINE3(futex, futex_requeue, requeue, "uint32_t *",
     "struct waiting_proc *", "uint32_t");
 LIN_SDT_PROBE_DEFINE1(futex, futex_requeue, return, "int");
 LIN_SDT_PROBE_DEFINE4(futex, futex_wait, entry, "struct futex *",
     "struct waiting_proc **", "int", "uint32_t");
 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, sleep_error, "int");
 LIN_SDT_PROBE_DEFINE1(futex, futex_wait, return, "int");
 LIN_SDT_PROBE_DEFINE3(futex, futex_atomic_op, entry, "struct thread *",
     "int", "uint32_t");
 LIN_SDT_PROBE_DEFINE4(futex, futex_atomic_op, decoded_op, "int", "int", "int",
     "int");
 LIN_SDT_PROBE_DEFINE0(futex, futex_atomic_op, missing_access_check);
 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_op, "int");
 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, unimplemented_cmp, "int");
 LIN_SDT_PROBE_DEFINE1(futex, futex_atomic_op, return, "int");
 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, entry, "struct thread *",
     "struct linux_sys_futex_args *");
 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_clockswitch);
 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, itimerfix_error, "int");
 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, copyin_error, "int");
 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, invalid_cmp_requeue_use);
 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wait, "uint32_t *",
     "uint32_t", "uint32_t");
 LIN_SDT_PROBE_DEFINE4(futex, linux_sys_futex, debug_wait_value_neq,
     "uint32_t *", "uint32_t", "int", "uint32_t");
 LIN_SDT_PROBE_DEFINE3(futex, linux_sys_futex, debug_wake, "uint32_t *",
     "uint32_t", "uint32_t");
 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_cmp_requeue, "uint32_t *",
     "uint32_t", "uint32_t", "uint32_t *", "struct l_timespec *");
 LIN_SDT_PROBE_DEFINE2(futex, linux_sys_futex, debug_cmp_requeue_value_neq,
     "uint32_t", "int");
 LIN_SDT_PROBE_DEFINE5(futex, linux_sys_futex, debug_wake_op, "uint32_t *",
     "int", "uint32_t", "uint32_t *", "uint32_t");
 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unhandled_efault);
 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_lock_pi);
 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_unlock_pi);
 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_trylock_pi);
 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, deprecated_requeue);
 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_wait_requeue_pi);
 LIN_SDT_PROBE_DEFINE0(futex, linux_sys_futex, unimplemented_cmp_requeue_pi);
 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, unknown_operation, "int");
 LIN_SDT_PROBE_DEFINE1(futex, linux_sys_futex, return, "int");
 LIN_SDT_PROBE_DEFINE2(futex, linux_set_robust_list, entry, "struct thread *",
     "struct linux_set_robust_list_args *");
 LIN_SDT_PROBE_DEFINE0(futex, linux_set_robust_list, size_error);
 LIN_SDT_PROBE_DEFINE1(futex, linux_set_robust_list, return, "int");
 LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *",
     "struct linux_get_robust_list_args *");
 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int");
 LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int");
 LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry,
     "struct linux_emuldata *", "uint32_t *", "unsigned int");
 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int");
 LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int");
 LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry,
     "struct linux_robust_list **", "struct linux_robust_list **",
     "unsigned int *");
 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int");
 LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int");
 LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *",
     "struct linux_emuldata *");
 LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int");
 LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return);
 
 struct futex;
 
 struct waiting_proc {
 	uint32_t	wp_flags;
 	struct futex	*wp_futex;
 	TAILQ_ENTRY(waiting_proc) wp_list;
 };
 
 struct futex {
 	struct sx	f_lck;
 	uint32_t	*f_uaddr;	/* user-supplied value, for debug */
 	struct umtx_key	f_key;
 	uint32_t	f_refcount;
 	uint32_t	f_bitset;
 	LIST_ENTRY(futex) f_list;
 	TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
 };
 
 struct futex_list futex_list;
 
 #define FUTEX_LOCK(f)		sx_xlock(&(f)->f_lck)
 #define FUTEX_UNLOCK(f)		sx_xunlock(&(f)->f_lck)
 #define FUTEX_INIT(f)		do { \
 				    sx_init_flags(&(f)->f_lck, "ftlk", \
 					SX_DUPOK); \
 				    LIN_SDT_PROBE1(futex, futex, create, \
 					&(f)->f_lck); \
 				} while (0)
 #define FUTEX_DESTROY(f)	do { \
 				    LIN_SDT_PROBE1(futex, futex, destroy, \
 					&(f)->f_lck); \
 				    sx_destroy(&(f)->f_lck); \
 				} while (0)
 #define FUTEX_ASSERT_LOCKED(f)	sx_assert(&(f)->f_lck, SA_XLOCKED)
 
 struct mtx futex_mtx;			/* protects the futex list */
 #define FUTEXES_LOCK		do { \
 				    mtx_lock(&futex_mtx); \
 				    LIN_SDT_PROBE1(locks, futex_mtx, \
 					locked, &futex_mtx); \
 				} while (0)
 #define FUTEXES_UNLOCK		do { \
 				    LIN_SDT_PROBE1(locks, futex_mtx, \
 					unlock, &futex_mtx); \
 				    mtx_unlock(&futex_mtx); \
 				} while (0)
 
 /* flags for futex_get() */
 #define FUTEX_CREATE_WP		0x1	/* create waiting_proc */
 #define FUTEX_DONTCREATE	0x2	/* don't create futex if not exists */
 #define FUTEX_DONTEXISTS	0x4	/* return EINVAL if futex exists */
 #define	FUTEX_SHARED		0x8	/* shared futex */
 
 /* wp_flags */
 #define FUTEX_WP_REQUEUED	0x1	/* wp requeued - wp moved from wp_list
 					 * of futex where thread sleep to wp_list
 					 * of another futex.
 					 */
 #define FUTEX_WP_REMOVED	0x2	/* wp is woken up and removed from futex
 					 * wp_list to prevent double wakeup.
 					 */
 
 static void futex_put(struct futex *, struct waiting_proc *);
 static int futex_get0(uint32_t *, struct futex **f, uint32_t);
 static int futex_get(uint32_t *, struct waiting_proc **, struct futex **,
     uint32_t);
 static int futex_sleep(struct futex *, struct waiting_proc *, int);
 static int futex_wake(struct futex *, int, uint32_t);
 static int futex_requeue(struct futex *, int, struct futex *, int);
 static int futex_wait(struct futex *, struct waiting_proc *, int,
     uint32_t);
 static int futex_atomic_op(struct thread *, int, uint32_t *);
 static int handle_futex_death(struct linux_emuldata *, uint32_t *,
     unsigned int);
 static int fetch_robust_entry(struct linux_robust_list **,
     struct linux_robust_list **, unsigned int *);
 
 /* support.s */
 int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
 int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
 int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
 int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
 int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
 
 
 static void
 futex_put(struct futex *f, struct waiting_proc *wp)
 {
 	LIN_SDT_PROBE2(futex, futex_put, entry, f, wp);
 
 	FUTEX_ASSERT_LOCKED(f);
 	if (wp != NULL) {
 		if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0)
 			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
 		free(wp, M_FUTEX_WP);
 	}
 
 	FUTEXES_LOCK;
 	if (--f->f_refcount == 0) {
 		LIST_REMOVE(f, f_list);
 		FUTEXES_UNLOCK;
 		FUTEX_UNLOCK(f);
 
 		LIN_SDT_PROBE3(futex, futex_put, destroy, f->f_uaddr,
 		    f->f_refcount, f->f_key.shared);
 		LINUX_CTR3(sys_futex, "futex_put destroy uaddr %p ref %d "
 		    "shared %d", f->f_uaddr, f->f_refcount, f->f_key.shared);
 		umtx_key_release(&f->f_key);
 		FUTEX_DESTROY(f);
 		free(f, M_FUTEX);
 
 		LIN_SDT_PROBE0(futex, futex_put, return);
 		return;
 	}
 
 	LIN_SDT_PROBE3(futex, futex_put, unlock, f->f_uaddr, f->f_refcount,
 	    f->f_key.shared);
 	LINUX_CTR3(sys_futex, "futex_put uaddr %p ref %d shared %d",
 	    f->f_uaddr, f->f_refcount, f->f_key.shared);
 	FUTEXES_UNLOCK;
 	FUTEX_UNLOCK(f);
 
 	LIN_SDT_PROBE0(futex, futex_put, return);
 }
 
 static int
 futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags)
 {
 	struct futex *f, *tmpf;
 	struct umtx_key key;
 	int error;
 
 	LIN_SDT_PROBE3(futex, futex_get0, entry, uaddr, newf, flags);
 
 	*newf = tmpf = NULL;
 
 	error = umtx_key_get(uaddr, TYPE_FUTEX, (flags & FUTEX_SHARED) ?
 	    AUTO_SHARE : THREAD_SHARE, &key);
 	if (error) {
 		LIN_SDT_PROBE1(futex, futex_get0, umtx_key_get_error, error);
 		LIN_SDT_PROBE1(futex, futex_get0, return, error);
 		return (error);
 	}
 retry:
 	FUTEXES_LOCK;
 	LIST_FOREACH(f, &futex_list, f_list) {
 		if (umtx_key_match(&f->f_key, &key)) {
 			if (tmpf != NULL) {
 				FUTEX_UNLOCK(tmpf);
 				FUTEX_DESTROY(tmpf);
 				free(tmpf, M_FUTEX);
 			}
 			if (flags & FUTEX_DONTEXISTS) {
 				FUTEXES_UNLOCK;
 				umtx_key_release(&key);
 
 				LIN_SDT_PROBE1(futex, futex_get0, return,
 				    EINVAL);
 				return (EINVAL);
 			}
 
 			/*
 			 * Increment refcount of the found futex to
 			 * prevent it from deallocation before FUTEX_LOCK()
 			 */
 			++f->f_refcount;
 			FUTEXES_UNLOCK;
 			umtx_key_release(&key);
 
 			FUTEX_LOCK(f);
 			*newf = f;
 			LIN_SDT_PROBE3(futex, futex_get0, shared, uaddr,
 			    f->f_refcount, f->f_key.shared);
 			LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d",
 			    uaddr, f->f_refcount, f->f_key.shared);
 
 			LIN_SDT_PROBE1(futex, futex_get0, return, 0);
 			return (0);
 		}
 	}
 
 	if (flags & FUTEX_DONTCREATE) {
 		FUTEXES_UNLOCK;
 		umtx_key_release(&key);
 		LIN_SDT_PROBE1(futex, futex_get0, null, uaddr);
 		LINUX_CTR1(sys_futex, "futex_get uaddr %p null", uaddr);
 
 		LIN_SDT_PROBE1(futex, futex_get0, return, 0);
 		return (0);
 	}
 
 	if (tmpf == NULL) {
 		FUTEXES_UNLOCK;
 		tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO);
 		tmpf->f_uaddr = uaddr;
 		tmpf->f_key = key;
 		tmpf->f_refcount = 1;
 		tmpf->f_bitset = FUTEX_BITSET_MATCH_ANY;
 		FUTEX_INIT(tmpf);
 		TAILQ_INIT(&tmpf->f_waiting_proc);
 
 		/*
 		 * Lock the new futex before an insert into the futex_list
 		 * to prevent futex usage by other.
 		 */
 		FUTEX_LOCK(tmpf);
 		goto retry;
 	}
 
 	LIST_INSERT_HEAD(&futex_list, tmpf, f_list);
 	FUTEXES_UNLOCK;
 
 	LIN_SDT_PROBE3(futex, futex_get0, new, uaddr, tmpf->f_refcount,
 	    tmpf->f_key.shared);
 	LINUX_CTR3(sys_futex, "futex_get uaddr %p ref %d shared %d new",
 	    uaddr, tmpf->f_refcount, tmpf->f_key.shared);
 	*newf = tmpf;
 
 	LIN_SDT_PROBE1(futex, futex_get0, return, 0);
 	return (0);
 }
 
 static int
 futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f,
     uint32_t flags)
 {
 	int error;
 
 	LIN_SDT_PROBE3(futex, futex_get, entry, uaddr, wp, f);
 
 	if (flags & FUTEX_CREATE_WP) {
 		*wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK);
 		(*wp)->wp_flags = 0;
 	}
 	error = futex_get0(uaddr, f, flags);
 	if (error) {
 		LIN_SDT_PROBE0(futex, futex_get, error);
 
 		if (flags & FUTEX_CREATE_WP)
 			free(*wp, M_FUTEX_WP);
 
 		LIN_SDT_PROBE1(futex, futex_get, return, error);
 		return (error);
 	}
 	if (flags & FUTEX_CREATE_WP) {
 		TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list);
 		(*wp)->wp_futex = *f;
 	}
 
 	LIN_SDT_PROBE1(futex, futex_get, return, error);
 	return (error);
 }
 
 static int
 futex_sleep(struct futex *f, struct waiting_proc *wp, int timeout)
 {
 	int error;
 
 	FUTEX_ASSERT_LOCKED(f);
 	LIN_SDT_PROBE3(futex, futex_sleep, entry, f, wp, timeout);
 	LINUX_CTR4(sys_futex, "futex_sleep enter uaddr %p wp %p timo %d ref %d",
 	    f->f_uaddr, wp, timeout, f->f_refcount);
 	error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout);
 	if (wp->wp_flags & FUTEX_WP_REQUEUED) {
 		KASSERT(f != wp->wp_futex, ("futex != wp_futex"));
 
 		if (error) {
 			LIN_SDT_PROBE5(futex, futex_sleep, requeue_error, error,
 			    f->f_uaddr, wp, wp->wp_futex->f_uaddr,
 			    wp->wp_futex->f_refcount);
 		}
 
 		LINUX_CTR5(sys_futex, "futex_sleep out error %d uaddr %p wp"
 		    " %p requeued uaddr %p ref %d",
 		    error, f->f_uaddr, wp, wp->wp_futex->f_uaddr,
 		    wp->wp_futex->f_refcount);
 		futex_put(f, NULL);
 		f = wp->wp_futex;
 		FUTEX_LOCK(f);
 	} else {
 		if (error) {
 			LIN_SDT_PROBE3(futex, futex_sleep, sleep_error, error,
 			    f->f_uaddr, wp);
 		}
 		LINUX_CTR3(sys_futex, "futex_sleep out error %d uaddr %p wp %p",
 		    error, f->f_uaddr, wp);
 	}
 
 	futex_put(f, wp);
 
 	LIN_SDT_PROBE1(futex, futex_sleep, return, error);
 	return (error);
 }
 
 static int
 futex_wake(struct futex *f, int n, uint32_t bitset)
 {
 	struct waiting_proc *wp, *wpt;
 	int count = 0;
 
 	LIN_SDT_PROBE3(futex, futex_wake, entry, f, n, bitset);
 
 	if (bitset == 0) {
 		LIN_SDT_PROBE1(futex, futex_wake, return, EINVAL);
 		return (EINVAL);
 	}
 
 	FUTEX_ASSERT_LOCKED(f);
 	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
 		LIN_SDT_PROBE3(futex, futex_wake, iterate, f->f_uaddr, wp,
 		    f->f_refcount);
 		LINUX_CTR3(sys_futex, "futex_wake uaddr %p wp %p ref %d",
 		    f->f_uaddr, wp, f->f_refcount);
 		/*
 		 * Unless we find a matching bit in
 		 * the bitset, continue searching.
 		 */
 		if (!(wp->wp_futex->f_bitset & bitset))
 			continue;
 
 		wp->wp_flags |= FUTEX_WP_REMOVED;
 		TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
 		LIN_SDT_PROBE1(futex, futex_wake, wakeup, wp);
 		wakeup_one(wp);
 		if (++count == n)
 			break;
 	}
 
 	LIN_SDT_PROBE1(futex, futex_wake, return, count);
 	return (count);
 }
 
 static int
 futex_requeue(struct futex *f, int n, struct futex *f2, int n2)
 {
 	struct waiting_proc *wp, *wpt;
 	int count = 0;
 
 	LIN_SDT_PROBE4(futex, futex_requeue, entry, f, n, f2, n2);
 
 	FUTEX_ASSERT_LOCKED(f);
 	FUTEX_ASSERT_LOCKED(f2);
 
 	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
 		if (++count <= n) {
 			LINUX_CTR2(sys_futex, "futex_req_wake uaddr %p wp %p",
 			    f->f_uaddr, wp);
 			wp->wp_flags |= FUTEX_WP_REMOVED;
 			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
 			LIN_SDT_PROBE1(futex, futex_requeue, wakeup, wp);
 			wakeup_one(wp);
 		} else {
 			LIN_SDT_PROBE3(futex, futex_requeue, requeue,
 			    f->f_uaddr, wp, f2->f_uaddr);
 			LINUX_CTR3(sys_futex, "futex_requeue uaddr %p wp %p to %p",
 			    f->f_uaddr, wp, f2->f_uaddr);
 			wp->wp_flags |= FUTEX_WP_REQUEUED;
 			/* Move wp to wp_list of f2 futex */
 			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
 			TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list);
 
 			/*
 			 * Thread which sleeps on wp after waking should
 			 * acquire f2 lock, so increment refcount of f2 to
 			 * prevent it from premature deallocation.
 			 */
 			wp->wp_futex = f2;
 			FUTEXES_LOCK;
 			++f2->f_refcount;
 			FUTEXES_UNLOCK;
 			if (count - n >= n2)
 				break;
 		}
 	}
 
 	LIN_SDT_PROBE1(futex, futex_requeue, return, count);
 	return (count);
 }
 
 static int
 futex_wait(struct futex *f, struct waiting_proc *wp, int timeout_hz,
     uint32_t bitset)
 {
 	int error;
 
 	LIN_SDT_PROBE4(futex, futex_wait, entry, f, wp, timeout_hz, bitset);
 
 	if (bitset == 0) {
 		LIN_SDT_PROBE1(futex, futex_wait, return, EINVAL);
 		return (EINVAL);
 	}
 
 	f->f_bitset = bitset;
 	error = futex_sleep(f, wp, timeout_hz);
 	if (error)
 		LIN_SDT_PROBE1(futex, futex_wait, sleep_error, error);
 	if (error == EWOULDBLOCK)
 		error = ETIMEDOUT;
 
 	LIN_SDT_PROBE1(futex, futex_wait, return, error);
 	return (error);
 }
 
 static int
 futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr)
 {
 	int op = (encoded_op >> 28) & 7;
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, ret;
 
 	LIN_SDT_PROBE3(futex, futex_atomic_op, entry, td, encoded_op, uaddr);
 
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
 	LIN_SDT_PROBE4(futex, futex_atomic_op, decoded_op, op, cmp, oparg,
 	    cmparg);
 	
 	/* XXX: Linux verifies access here and returns EFAULT */
 	LIN_SDT_PROBE0(futex, futex_atomic_op, missing_access_check);
 
 	switch (op) {
 	case FUTEX_OP_SET:
 		ret = futex_xchgl(oparg, uaddr, &oldval);
 		break;
 	case FUTEX_OP_ADD:
 		ret = futex_addl(oparg, uaddr, &oldval);
 		break;
 	case FUTEX_OP_OR:
 		ret = futex_orl(oparg, uaddr, &oldval);
 		break;
 	case FUTEX_OP_ANDN:
 		ret = futex_andl(~oparg, uaddr, &oldval);
 		break;
 	case FUTEX_OP_XOR:
 		ret = futex_xorl(oparg, uaddr, &oldval);
 		break;
 	default:
 		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_op, op);
 		ret = -ENOSYS;
 		break;
 	}
 
 	if (ret) {
 		LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
 		return (ret);
 	}
 
 	switch (cmp) {
 	case FUTEX_OP_CMP_EQ:
 		ret = (oldval == cmparg);
 		break;
 	case FUTEX_OP_CMP_NE:
 		ret = (oldval != cmparg);
 		break;
 	case FUTEX_OP_CMP_LT:
 		ret = (oldval < cmparg);
 		break;
 	case FUTEX_OP_CMP_GE:
 		ret = (oldval >= cmparg);
 		break;
 	case FUTEX_OP_CMP_LE:
 		ret = (oldval <= cmparg);
 		break;
 	case FUTEX_OP_CMP_GT:
 		ret = (oldval > cmparg);
 		break;
 	default:
 		LIN_SDT_PROBE1(futex, futex_atomic_op, unimplemented_cmp, cmp);
 		ret = -ENOSYS;
 	}
 
 	LIN_SDT_PROBE1(futex, futex_atomic_op, return, ret);
 	return (ret);
 }
 
 int
 linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
 {
 	int clockrt, nrwake, op_ret, ret;
 	struct linux_pemuldata *pem;
 	struct waiting_proc *wp;
 	struct futex *f, *f2;
 	struct l_timespec ltimeout;
 	struct timespec timeout;
 	struct timeval utv, ctv;
 	int timeout_hz;
 	int error;
 	uint32_t flags, val;
 
 	LIN_SDT_PROBE2(futex, linux_sys_futex, entry, td, args);
 
 	if (args->op & LINUX_FUTEX_PRIVATE_FLAG) {
 		flags = 0;
 		args->op &= ~LINUX_FUTEX_PRIVATE_FLAG;
 	} else
 		flags = FUTEX_SHARED;
 
 	/*
 	 * Currently support for switching between CLOCK_MONOTONIC and
 	 * CLOCK_REALTIME is not present. However Linux forbids the use of
 	 * FUTEX_CLOCK_REALTIME with any op except FUTEX_WAIT_BITSET and
 	 * FUTEX_WAIT_REQUEUE_PI.
 	 */
 	clockrt = args->op & LINUX_FUTEX_CLOCK_REALTIME;
 	args->op = args->op & ~LINUX_FUTEX_CLOCK_REALTIME;
 	if (clockrt && args->op != LINUX_FUTEX_WAIT_BITSET &&
 		args->op != LINUX_FUTEX_WAIT_REQUEUE_PI) {
 		LIN_SDT_PROBE0(futex, linux_sys_futex,
 		    unimplemented_clockswitch);
 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
 		return (ENOSYS);
 	}
 
 	error = 0;
 	f = f2 = NULL;
 
 	switch (args->op) {
 	case LINUX_FUTEX_WAIT:
 		args->val3 = FUTEX_BITSET_MATCH_ANY;
 		/* FALLTHROUGH */
 
 	case LINUX_FUTEX_WAIT_BITSET:
 		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wait, args->uaddr,
 		    args->val, args->val3);
 		LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x",
 		    args->uaddr, args->val, args->val3);
 
 		if (args->timeout != NULL) {
 			error = copyin(args->timeout, &ltimeout, sizeof(ltimeout));
 			if (error) {
 				LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
 				    error);
 				LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 				return (error);
 			}
 			error = linux_to_native_timespec(&timeout, &ltimeout);
 			if (error)
 				return (error);
 			TIMESPEC_TO_TIMEVAL(&utv, &timeout);
 			error = itimerfix(&utv);
 			if (error) {
 				LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error,
 				    error);
 				LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 				return (error);
 			}
 			if (clockrt) {
 				microtime(&ctv);
 				timevalsub(&utv, &ctv);
 			} else if (args->op == LINUX_FUTEX_WAIT_BITSET) {
 				microuptime(&ctv);
 				timevalsub(&utv, &ctv);
 			}
 			if (utv.tv_sec < 0)
 				timevalclear(&utv);
 			timeout_hz = tvtohz(&utv);
 		} else
 			timeout_hz = 0;
 
 		error = futex_get(args->uaddr, &wp, &f,
 		    flags | FUTEX_CREATE_WP);
 		if (error) {
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 			return (error);
 		}
 
 		error = copyin(args->uaddr, &val, sizeof(val));
 		if (error) {
 			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
 			    error);
 			LINUX_CTR1(sys_futex, "WAIT copyin failed %d",
 			    error);
 			futex_put(f, wp);
 
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 			return (error);
 		}
 		if (val != args->val) {
 			LIN_SDT_PROBE4(futex, linux_sys_futex,
 			    debug_wait_value_neq, args->uaddr, args->val, val,
 			    args->val3);
 			LINUX_CTR3(sys_futex,
 			    "WAIT uaddr %p val 0x%x != uval 0x%x",
 			    args->uaddr, args->val, val);
 			futex_put(f, wp);
 
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return,
 			    EWOULDBLOCK);
 			return (EWOULDBLOCK);
 		}
 
 		error = futex_wait(f, wp, timeout_hz, args->val3);
 		break;
 
 	case LINUX_FUTEX_WAKE:
 		args->val3 = FUTEX_BITSET_MATCH_ANY;
 		/* FALLTHROUGH */
 
 	case LINUX_FUTEX_WAKE_BITSET:
 		LIN_SDT_PROBE3(futex, linux_sys_futex, debug_wake, args->uaddr,
 		    args->val, args->val3);
 		LINUX_CTR3(sys_futex, "WAKE uaddr %p nrwake 0x%x bitset 0x%x",
 		    args->uaddr, args->val, args->val3);
 
 		error = futex_get(args->uaddr, NULL, &f,
 		    flags | FUTEX_DONTCREATE);
 		if (error) {
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 			return (error);
 		}
 
 		if (f == NULL) {
 			td->td_retval[0] = 0;
 
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 			return (error);
 		}
 		td->td_retval[0] = futex_wake(f, args->val, args->val3);
 		futex_put(f, NULL);
 		break;
 
 	case LINUX_FUTEX_CMP_REQUEUE:
 		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_cmp_requeue,
 		    args->uaddr, args->val, args->val3, args->uaddr2,
 		    args->timeout);
 		LINUX_CTR5(sys_futex, "CMP_REQUEUE uaddr %p "
 		    "nrwake 0x%x uval 0x%x uaddr2 %p nrequeue 0x%x",
 		    args->uaddr, args->val, args->val3, args->uaddr2,
 		    args->timeout);
 
 		/*
 		 * Linux allows this, we would not, it is an incorrect
 		 * usage of declared ABI, so return EINVAL.
 		 */
 		if (args->uaddr == args->uaddr2) {
 			LIN_SDT_PROBE0(futex, linux_sys_futex,
 			    invalid_cmp_requeue_use);
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
 			return (EINVAL);
 		}
 
 		error = futex_get(args->uaddr, NULL, &f, flags);
 		if (error) {
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 			return (error);
 		}
 
 		/*
 		 * To avoid deadlocks return EINVAL if second futex
 		 * exists at this time.
 		 *
 		 * Glibc fall back to FUTEX_WAKE in case of any error
 		 * returned by FUTEX_CMP_REQUEUE.
 		 */
 		error = futex_get(args->uaddr2, NULL, &f2,
 		    flags | FUTEX_DONTEXISTS);
 		if (error) {
 			futex_put(f, NULL);
 
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 			return (error);
 		}
 		error = copyin(args->uaddr, &val, sizeof(val));
 		if (error) {
 			LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
 			    error);
 			LINUX_CTR1(sys_futex, "CMP_REQUEUE copyin failed %d",
 			    error);
 			futex_put(f2, NULL);
 			futex_put(f, NULL);
 
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 			return (error);
 		}
 		if (val != args->val3) {
 			LIN_SDT_PROBE2(futex, linux_sys_futex,
 			    debug_cmp_requeue_value_neq, args->val, val);
 			LINUX_CTR2(sys_futex, "CMP_REQUEUE val 0x%x != uval 0x%x",
 			    args->val, val);
 			futex_put(f2, NULL);
 			futex_put(f, NULL);
 
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EAGAIN);
 			return (EAGAIN);
 		}
 
 		nrwake = (int)(unsigned long)args->timeout;
 		td->td_retval[0] = futex_requeue(f, args->val, f2, nrwake);
 		futex_put(f2, NULL);
 		futex_put(f, NULL);
 		break;
 
 	case LINUX_FUTEX_WAKE_OP:
 		LIN_SDT_PROBE5(futex, linux_sys_futex, debug_wake_op,
 		    args->uaddr, args->op, args->val, args->uaddr2, args->val3);
 		LINUX_CTR5(sys_futex, "WAKE_OP "
 		    "uaddr %p nrwake 0x%x uaddr2 %p op 0x%x nrwake2 0x%x",
 		    args->uaddr, args->val, args->uaddr2, args->val3,
 		    args->timeout);
 
 		error = futex_get(args->uaddr, NULL, &f, flags);
 		if (error) {
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 			return (error);
 		}
 
 		if (args->uaddr != args->uaddr2)
 			error = futex_get(args->uaddr2, NULL, &f2, flags);
 		if (error) {
 			futex_put(f, NULL);
 
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 			return (error);
 		}
 
 		/*
 		 * This function returns positive number as results and
 		 * negative as errors
 		 */
 		op_ret = futex_atomic_op(td, args->val3, args->uaddr2);
 
 		LINUX_CTR2(sys_futex, "WAKE_OP atomic_op uaddr %p ret 0x%x",
 		    args->uaddr, op_ret);
 
 		if (op_ret < 0) {
 			/* XXX: We don't handle the EFAULT yet. */
 			if (op_ret != -EFAULT) {
 				if (f2 != NULL)
 					futex_put(f2, NULL);
 				futex_put(f, NULL);
 
 				LIN_SDT_PROBE1(futex, linux_sys_futex, return,
 				    -op_ret);
 				return (-op_ret);
 			} else {
 				LIN_SDT_PROBE0(futex, linux_sys_futex,
 				    unhandled_efault);
 			}
 			if (f2 != NULL)
 				futex_put(f2, NULL);
 			futex_put(f, NULL);
 
 			LIN_SDT_PROBE1(futex, linux_sys_futex, return, EFAULT);
 			return (EFAULT);
 		}
 
 		ret = futex_wake(f, args->val, args->val3);
 
 		if (op_ret > 0) {
 			op_ret = 0;
 			nrwake = (int)(unsigned long)args->timeout;
 
 			if (f2 != NULL)
 				op_ret += futex_wake(f2, nrwake, args->val3);
 			else
 				op_ret += futex_wake(f, nrwake, args->val3);
 			ret += op_ret;
 
 		}
 		if (f2 != NULL)
 			futex_put(f2, NULL);
 		futex_put(f, NULL);
 		td->td_retval[0] = ret;
 		break;
 
 	case LINUX_FUTEX_LOCK_PI:
 		/* not yet implemented */
 		pem = pem_find(td->td_proc);
 		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
 			linux_msg(td,
 				  "linux_sys_futex: "
 				  "unsupported futex_pi op\n");
 			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
 			LIN_SDT_PROBE0(futex, linux_sys_futex,
 			    unimplemented_lock_pi);
 		}
 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
 		return (ENOSYS);
 
 	case LINUX_FUTEX_UNLOCK_PI:
 		/* not yet implemented */
 		pem = pem_find(td->td_proc);
 		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
 			linux_msg(td,
 				  "linux_sys_futex: "
 				  "unsupported futex_pi op\n");
 			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
 			LIN_SDT_PROBE0(futex, linux_sys_futex,
 			    unimplemented_unlock_pi);
 		}
 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
 		return (ENOSYS);
 
 	case LINUX_FUTEX_TRYLOCK_PI:
 		/* not yet implemented */
 		pem = pem_find(td->td_proc);
 		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
 			linux_msg(td,
 				  "linux_sys_futex: "
 				  "unsupported futex_pi op\n");
 			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
 			LIN_SDT_PROBE0(futex, linux_sys_futex,
 			    unimplemented_trylock_pi);
 		}
 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
 		return (ENOSYS);
 
 	case LINUX_FUTEX_REQUEUE:
 
 		/*
 		 * Glibc does not use this operation since version 2.3.3,
 		 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
 		 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
 		 * FUTEX_REQUEUE returned EINVAL.
 		 */
 		pem = pem_find(td->td_proc);
 		if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
 			linux_msg(td,
 				  "linux_sys_futex: "
 				  "unsupported futex_requeue op\n");
 			pem->flags |= LINUX_XDEPR_REQUEUEOP;
 			LIN_SDT_PROBE0(futex, linux_sys_futex,
 			    deprecated_requeue);
 		}
 
 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, EINVAL);
 		return (EINVAL);
 
 	case LINUX_FUTEX_WAIT_REQUEUE_PI:
 		/* not yet implemented */
 		pem = pem_find(td->td_proc);
 		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
 			linux_msg(td,
 				  "linux_sys_futex: "
 				  "unsupported futex_pi op\n");
 			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
 			LIN_SDT_PROBE0(futex, linux_sys_futex,
 			    unimplemented_wait_requeue_pi);
 		}
 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
 		return (ENOSYS);
 
 	case LINUX_FUTEX_CMP_REQUEUE_PI:
 		/* not yet implemented */
 		pem = pem_find(td->td_proc);
 		if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
 			linux_msg(td,
 				  "linux_sys_futex: "
 				  "unsupported futex_pi op\n");
 			pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
 			LIN_SDT_PROBE0(futex, linux_sys_futex,
 			    unimplemented_cmp_requeue_pi);
 		}
 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
 		return (ENOSYS);
 
 	default:
 		linux_msg(td,
 			  "linux_sys_futex: unknown op %d\n", args->op);
 		LIN_SDT_PROBE1(futex, linux_sys_futex, unknown_operation,
 		    args->op);
 		LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
 		return (ENOSYS);
 	}
 
 	LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
 	return (error);
 }
 
 int
 linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args)
 {
 	struct linux_emuldata *em;
 
 	LIN_SDT_PROBE2(futex, linux_set_robust_list, entry, td, args);
 
 	if (args->len != sizeof(struct linux_robust_list_head)) {
 		LIN_SDT_PROBE0(futex, linux_set_robust_list, size_error);
 		LIN_SDT_PROBE1(futex, linux_set_robust_list, return, EINVAL);
 		return (EINVAL);
 	}
 
 	em = em_find(td);
 	em->robust_futexes = args->head;
 
 	LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0);
 	return (0);
 }
 
 int
 linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args)
 {
 	struct linux_emuldata *em;
 	struct linux_robust_list_head *head;
 	l_size_t len = sizeof(struct linux_robust_list_head);
 	struct thread *td2;
 	int error = 0;
 
 	LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args);
 
 	if (!args->pid) {
 		em = em_find(td);
 		KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
 		head = em->robust_futexes;
 	} else {
 		td2 = tdfind(args->pid, -1);
 		if (td2 == NULL) {
 			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
 			    ESRCH);
 			return (ESRCH);
 		}
-		if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX)
+		if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) {
+			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
+			    EPERM);
+			PROC_UNLOCK(td2->td_proc);
 			return (EPERM);
+		}
 
 		em = em_find(td2);
 		KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
 		/* XXX: ptrace? */
 		if (priv_check(td, PRIV_CRED_SETUID) ||
 		    priv_check(td, PRIV_CRED_SETEUID) ||
 		    p_candebug(td, td2->td_proc)) {
 			PROC_UNLOCK(td2->td_proc);
 
 			LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
 			    EPERM);
 			return (EPERM);
 		}
 		head = em->robust_futexes;
 
 		PROC_UNLOCK(td2->td_proc);
 	}
 
 	error = copyout(&len, args->len, sizeof(l_size_t));
 	if (error) {
 		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
 		    error);
 		LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EFAULT);
 		return (EFAULT);
 	}
 
 	error = copyout(head, args->head, sizeof(struct linux_robust_list_head));
 	if (error) {
 		LIN_SDT_PROBE1(futex, linux_get_robust_list, copyout_error,
 		    error);
 	}
 
 	LIN_SDT_PROBE1(futex, linux_get_robust_list, return, error);
 	return (error);
 }
 
 static int
 handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr,
     unsigned int pi)
 {
 	uint32_t uval, nval, mval;
 	struct futex *f;
 	int error;
 
 	LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi);
 
 retry:
 	error = copyin(uaddr, &uval, 4);
 	if (error) {
 		LIN_SDT_PROBE1(futex, handle_futex_death, copyin_error, error);
 		LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT);
 		return (EFAULT);
 	}
 	if ((uval & FUTEX_TID_MASK) == em->em_tid) {
 		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
 		nval = casuword32(uaddr, uval, mval);
 
 		if (nval == -1) {
 			LIN_SDT_PROBE1(futex, handle_futex_death, return,
 			    EFAULT);
 			return (EFAULT);
 		}
 
 		if (nval != uval)
 			goto retry;
 
 		if (!pi && (uval & FUTEX_WAITERS)) {
 			error = futex_get(uaddr, NULL, &f,
 			    FUTEX_DONTCREATE | FUTEX_SHARED);
 			if (error) {
 				LIN_SDT_PROBE1(futex, handle_futex_death,
 				    return, error);
 				return (error);
 			}
 			if (f != NULL) {
 				futex_wake(f, 1, FUTEX_BITSET_MATCH_ANY);
 				futex_put(f, NULL);
 			}
 		}
 	}
 
 	LIN_SDT_PROBE1(futex, handle_futex_death, return, 0);
 	return (0);
 }
 
 static int
 fetch_robust_entry(struct linux_robust_list **entry,
     struct linux_robust_list **head, unsigned int *pi)
 {
 	l_ulong uentry;
 	int error;
 
 	LIN_SDT_PROBE3(futex, fetch_robust_entry, entry, entry, head, pi);
 
 	error = copyin((const void *)head, &uentry, sizeof(l_ulong));
 	if (error) {
 		LIN_SDT_PROBE1(futex, fetch_robust_entry, copyin_error, error);
 		LIN_SDT_PROBE1(futex, fetch_robust_entry, return, EFAULT);
 		return (EFAULT);
 	}
 
 	*entry = (void *)(uentry & ~1UL);
 	*pi = uentry & 1;
 
 	LIN_SDT_PROBE1(futex, fetch_robust_entry, return, 0);
 	return (0);
 }
 
 /* This walks the list of robust futexes releasing them. */
 void
 release_futexes(struct thread *td, struct linux_emuldata *em)
 {
 	struct linux_robust_list_head *head = NULL;
 	struct linux_robust_list *entry, *next_entry, *pending;
 	unsigned int limit = 2048, pi, next_pi, pip;
 	l_long futex_offset;
 	int rc, error;
 
 	LIN_SDT_PROBE2(futex, release_futexes, entry, td, em);
 
 	head = em->robust_futexes;
 
 	if (head == NULL) {
 		LIN_SDT_PROBE0(futex, release_futexes, return);
 		return;
 	}
 
 	if (fetch_robust_entry(&entry, PTRIN(&head->list.next), &pi)) {
 		LIN_SDT_PROBE0(futex, release_futexes, return);
 		return;
 	}
 
 	error = copyin(&head->futex_offset, &futex_offset,
 	    sizeof(futex_offset));
 	if (error) {
 		LIN_SDT_PROBE1(futex, release_futexes, copyin_error, error);
 		LIN_SDT_PROBE0(futex, release_futexes, return);
 		return;
 	}
 
 	if (fetch_robust_entry(&pending, PTRIN(&head->pending_list), &pip)) {
 		LIN_SDT_PROBE0(futex, release_futexes, return);
 		return;
 	}
 
 	while (entry != &head->list) {
 		rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi);
 
 		if (entry != pending)
 			if (handle_futex_death(em,
 			    (uint32_t *)((caddr_t)entry + futex_offset), pi)) {
 				LIN_SDT_PROBE0(futex, release_futexes, return);
 				return;
 			}
 		if (rc) {
 			LIN_SDT_PROBE0(futex, release_futexes, return);
 			return;
 		}
 
 		entry = next_entry;
 		pi = next_pi;
 
 		if (!--limit)
 			break;
 
 		sched_relinquish(curthread);
 	}
 
 	if (pending)
 		handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip);
 
 	LIN_SDT_PROBE0(futex, release_futexes, return);
 }
Index: projects/clang380-import/sys/compat/svr4/svr4_sysvec.c
===================================================================
--- projects/clang380-import/sys/compat/svr4/svr4_sysvec.c	(revision 293686)
+++ projects/clang380-import/sys/compat/svr4/svr4_sysvec.c	(revision 293687)
@@ -1,312 +1,313 @@
 /*-
  * Copyright (c) 1998 Mark Newton
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christos Zoulas.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /* XXX we use functions that might not exist. */
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/fcntl.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/socket.h>
 #include <sys/syscallsubr.h>
 #include <sys/vnode.h>
 #include <vm/vm.h>
 #include <sys/exec.h>
 #include <sys/kernel.h>
 #include <machine/cpu.h>
 #include <netinet/in.h>
 
 #include <compat/svr4/svr4.h>
 #include <compat/svr4/svr4_types.h>
 #include <compat/svr4/svr4_syscall.h>
 #include <compat/svr4/svr4_signal.h>
 #include <compat/svr4/svr4_socket.h>
 #include <compat/svr4/svr4_sockio.h>
 #include <compat/svr4/svr4_errno.h>
 #include <compat/svr4/svr4_proto.h>
 #include <compat/svr4/svr4_siginfo.h>
 #include <compat/svr4/svr4_util.h>
 
 int bsd_to_svr4_errno[ELAST+1] = {
         0,
         SVR4_EPERM,
         SVR4_ENOENT,
         SVR4_ESRCH,
         SVR4_EINTR,
         SVR4_EIO,
         SVR4_ENXIO,
         SVR4_E2BIG,
         SVR4_ENOEXEC,
         SVR4_EBADF,
         SVR4_ECHILD,
         SVR4_EDEADLK,
         SVR4_ENOMEM,
         SVR4_EACCES,
         SVR4_EFAULT,
         SVR4_ENOTBLK,
         SVR4_EBUSY,
         SVR4_EEXIST,
         SVR4_EXDEV,
         SVR4_ENODEV,
         SVR4_ENOTDIR,
         SVR4_EISDIR,
         SVR4_EINVAL,
         SVR4_ENFILE,
         SVR4_EMFILE,
         SVR4_ENOTTY,
         SVR4_ETXTBSY,
         SVR4_EFBIG,
         SVR4_ENOSPC,
         SVR4_ESPIPE,
         SVR4_EROFS,
         SVR4_EMLINK,
         SVR4_EPIPE,
         SVR4_EDOM,
         SVR4_ERANGE,
         SVR4_EAGAIN,
         SVR4_EINPROGRESS,
         SVR4_EALREADY,
         SVR4_ENOTSOCK,
         SVR4_EDESTADDRREQ,
         SVR4_EMSGSIZE,
         SVR4_EPROTOTYPE,
         SVR4_ENOPROTOOPT,
         SVR4_EPROTONOSUPPORT,
         SVR4_ESOCKTNOSUPPORT,
         SVR4_EOPNOTSUPP,
         SVR4_EPFNOSUPPORT,
         SVR4_EAFNOSUPPORT,
         SVR4_EADDRINUSE,
         SVR4_EADDRNOTAVAIL,
         SVR4_ENETDOWN,
         SVR4_ENETUNREACH,
         SVR4_ENETRESET,
         SVR4_ECONNABORTED,
         SVR4_ECONNRESET,
         SVR4_ENOBUFS,
         SVR4_EISCONN,
         SVR4_ENOTCONN,
         SVR4_ESHUTDOWN,
         SVR4_ETOOMANYREFS,
         SVR4_ETIMEDOUT,
         SVR4_ECONNREFUSED,
         SVR4_ELOOP,
         SVR4_ENAMETOOLONG,
         SVR4_EHOSTDOWN,
         SVR4_EHOSTUNREACH,
         SVR4_ENOTEMPTY,
         SVR4_EPROCLIM,
         SVR4_EUSERS,
         SVR4_EDQUOT,
         SVR4_ESTALE,
         SVR4_EREMOTE,
         SVR4_EBADRPC,
         SVR4_ERPCMISMATCH,
         SVR4_EPROGUNAVAIL,
         SVR4_EPROGMISMATCH,
         SVR4_EPROCUNAVAIL,
         SVR4_ENOLCK,
         SVR4_ENOSYS,
         SVR4_EFTYPE,
         SVR4_EAUTH,
         SVR4_ENEEDAUTH,
         SVR4_EIDRM,
         SVR4_ENOMSG,
 };
 
 
 static int 	svr4_fixup(register_t **stack_base, struct image_params *imgp);
 
 extern struct sysent svr4_sysent[];
 #undef szsigcode
 #undef sigcode
 
 extern int svr4_szsigcode;
 extern char svr4_sigcode[];
 
 struct sysentvec svr4_sysvec = {
 	.sv_size	= SVR4_SYS_MAXSYSCALL,
 	.sv_table	= svr4_sysent,
 	.sv_mask	= 0xff,
 	.sv_errsize	= ELAST,  /* ELAST */
 	.sv_errtbl	= bsd_to_svr4_errno,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= svr4_fixup,
 	.sv_sendsig	= svr4_sendsig,
 	.sv_sigcode	= svr4_sigcode,
 	.sv_szsigcode	= &svr4_szsigcode,
 	.sv_name	= "SVR4",
 	.sv_coredump	= elf32_coredump,
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= SVR4_MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz     = NULL,
 	.sv_flags	= SV_ABI_UNDEF | SV_IA32 | SV_ILP32,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 
 const char      svr4_emul_path[] = "/compat/svr4";
 
 Elf32_Brandinfo svr4_brand = {
 	.brand		= ELFOSABI_SYSV,
 	.machine	= EM_386, /* XXX only implemented for x86 so far. */
 	.compat_3_brand	= "SVR4",
 	.emul_path	= svr4_emul_path,
 	.interp_path	= "/lib/libc.so.1",
 	.sysvec		= &svr4_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= NULL,
 	.flags		= 0
 };
 
 static int
 svr4_fixup(register_t **stack_base, struct image_params *imgp)
 {
 	Elf32_Auxargs *args;
 	register_t *pos;
              
 	KASSERT(curthread->td_proc == imgp->proc,
 	    ("unsafe svr4_fixup(), should be curproc"));
 	args = (Elf32_Auxargs *)imgp->auxargs;
 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);  
     
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 	
 	free(imgp->auxargs, M_TEMP);      
 	imgp->auxargs = NULL;
 
 	(*stack_base)--;
 	**stack_base = (register_t)imgp->args->argc;
 	return 0;
 }
 
 /*
  * Search an alternate path before passing pathname arguments on
  * to system calls. Useful for keeping a separate 'emulation tree'.
  *
  * If cflag is set, we check if an attempt can be made to create
  * the named file, i.e. we check if the directory it should
  * be in exists.
  */
 int
 svr4_emul_find(struct thread *td, char *path, enum uio_seg pathseg,
     char **pbuf, int create)
 {
 
 	return (kern_alternate_path(td, svr4_emul_path, path, pathseg, pbuf,
 	    create, AT_FDCWD));
 }
 
 static int
 svr4_elf_modevent(module_t mod, int type, void *data)
 {
 	int error;
 
 	error = 0;
 
 	switch(type) {
 	case MOD_LOAD:
 		if (elf32_insert_brand_entry(&svr4_brand) < 0) {
 			printf("cannot insert svr4 elf brand handler\n");
 			error = EINVAL;
 			break;
 		}
 		if (bootverbose)
 			printf("svr4 ELF exec handler installed\n");
 		svr4_sockcache_init();
 		break;
 	case MOD_UNLOAD:
 		/* Only allow the emulator to be removed if it isn't in use. */
 		if (elf32_brand_inuse(&svr4_brand) != 0) {
 			error = EBUSY;
 		} else if (elf32_remove_brand_entry(&svr4_brand) < 0) {
 			error = EINVAL;
 		}
 
 		if (error) {
 			printf("Could not deinstall ELF interpreter entry (error %d)\n",
 			       error);
 			break;
 		}
 		if (bootverbose)
 			printf("svr4 ELF exec handler removed\n");
 		svr4_sockcache_destroy();
 		break;
 	default:
 		return (EOPNOTSUPP);
 		break;
 	}
 	return error;
 }
 
 static moduledata_t svr4_elf_mod = {
 	"svr4elf",
 	svr4_elf_modevent,
 	0
 };
 DECLARE_MODULE_TIED(svr4elf, svr4_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
 MODULE_DEPEND(svr4elf, streams, 1, 1, 1);
Index: projects/clang380-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
===================================================================
--- projects/clang380-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	(revision 293686)
+++ projects/clang380-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c	(revision 293687)
@@ -1,1464 +1,1453 @@
 /*	$FreeBSD$	*/
 
 /*
  * Copyright (C) 2012 by Darren Reed.
  *
  * See the IPFILTER.LICENCE file for details on licencing.
  */
 #if !defined(lint)
 static const char sccsid[] = "@(#)ip_fil.c	2.41 6/5/96 (C) 1993-2000 Darren Reed";
 static const char rcsid[] = "@(#)$Id$";
 #endif
 
 #if defined(KERNEL) || defined(_KERNEL)
 # undef KERNEL
 # undef _KERNEL
 # define	KERNEL	1
 # define	_KERNEL	1
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_inet6.h"
 #endif
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 440000) && \
     !defined(KLD_MODULE) && !defined(IPFILTER_LKM)
 # include "opt_random_ip_id.h"
 #endif
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/file.h>
 # include <sys/fcntl.h>
 # include <sys/filio.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 # include <sys/dirent.h>
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000)
 #include <sys/jail.h>
 #endif
 # include <sys/mbuf.h>
 # include <sys/sockopt.h>
 #if !defined(__hpux)
 # include <sys/mbuf.h>
 #endif
 #include <sys/socket.h>
 # include <sys/selinfo.h>
 # include <netinet/tcp_var.h>
 
 #include <net/if.h>
 # include <net/if_var.h>
 #  include <net/netisr.h>
 #include <net/route.h>
 #include <netinet/in.h>
+#include <netinet/in_fib.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000)
 #include <net/vnet.h>
 #else
 #define CURVNET_SET(arg)
 #define CURVNET_RESTORE()
 #endif
 #if defined(__osf__)
 # include <netinet/tcp_timer.h>
 #endif
 #include <netinet/udp.h>
 #include <netinet/tcpip.h>
 #include <netinet/ip_icmp.h>
 #include "netinet/ip_compat.h"
 #ifdef USE_INET6
 # include <netinet/icmp6.h>
 #endif
 #include "netinet/ip_fil.h"
 #include "netinet/ip_nat.h"
 #include "netinet/ip_frag.h"
 #include "netinet/ip_state.h"
 #include "netinet/ip_proxy.h"
 #include "netinet/ip_auth.h"
 #include "netinet/ip_sync.h"
 #include "netinet/ip_lookup.h"
 #include "netinet/ip_dstlist.h"
 #ifdef	IPFILTER_SCAN
 #include "netinet/ip_scan.h"
 #endif
 #include "netinet/ip_pool.h"
 # include <sys/malloc.h>
 #include <sys/kernel.h>
 #ifdef CSUM_DATA_VALID
 #include <machine/in_cksum.h>
 #endif
 extern	int	ip_optcopy __P((struct ip *, struct ip *));
 
 
 # ifdef IPFILTER_M_IPFILTER
 MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures");
 # endif
 
 
 static	int	(*ipf_savep) __P((void *, ip_t *, int, void *, int, struct mbuf **));
 static	int	ipf_send_ip __P((fr_info_t *, mb_t *));
 static void	ipf_timer_func __P((void *arg));
 int		ipf_locks_done = 0;
 
 ipf_main_softc_t ipfmain;
 
 # include <sys/conf.h>
 # if defined(NETBSD_PF)
 #  include <net/pfil.h>
 # endif /* NETBSD_PF */
 /*
  * We provide the ipf_checkp name just to minimize changes later.
  */
 int (*ipf_checkp) __P((void *, ip_t *ip, int hlen, void *ifp, int out, mb_t **mp));
 
 
 static eventhandler_tag ipf_arrivetag, ipf_departtag, ipf_clonetag;
 
 static void ipf_ifevent(void *arg);
 
 static void ipf_ifevent(arg)
 	void *arg;
 {
         ipf_sync(arg, NULL);
 }
 
 
 
 static int
 ipf_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
 {
 	struct ip *ip = mtod(*mp, struct ip *);
 	int rv;
 
 	/*
 	 * IPFilter expects evreything in network byte order
 	 */
 #if (__FreeBSD_version < 1000019)
 	ip->ip_len = htons(ip->ip_len);
 	ip->ip_off = htons(ip->ip_off);
 #endif
 	rv = ipf_check(&ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT),
 		       mp);
 #if (__FreeBSD_version < 1000019)
 	if ((rv == 0) && (*mp != NULL)) {
 		ip = mtod(*mp, struct ip *);
 		ip->ip_len = ntohs(ip->ip_len);
 		ip->ip_off = ntohs(ip->ip_off);
 	}
 #endif
 	return rv;
 }
 
 # ifdef USE_INET6
 #  include <netinet/ip6.h>
 
 static int
 ipf_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
 {
 	return (ipf_check(&ipfmain, mtod(*mp, struct ip *),
 			  sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp));
 }
 # endif
 #if	defined(IPFILTER_LKM)
 int ipf_identify(s)
 	char *s;
 {
 	if (strcmp(s, "ipl") == 0)
 		return 1;
 	return 0;
 }
 #endif /* IPFILTER_LKM */
 
 
 static void
 ipf_timer_func(arg)
 	void *arg;
 {
 	ipf_main_softc_t *softc = arg;
 	SPL_INT(s);
 
 	SPL_NET(s);
 	READ_ENTER(&softc->ipf_global);
 
         if (softc->ipf_running > 0)
 		ipf_slowtimer(softc);
 
 	if (softc->ipf_running == -1 || softc->ipf_running == 1) {
 #if 0
 		softc->ipf_slow_ch = timeout(ipf_timer_func, softc, hz/2);
 #endif
 		callout_init(&softc->ipf_slow_ch, 1);
 		callout_reset(&softc->ipf_slow_ch,
 			(hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT,
 			ipf_timer_func, softc);
 	}
 	RWLOCK_EXIT(&softc->ipf_global);
 	SPL_X(s);
 }
 
 
 int
 ipfattach(softc)
 	ipf_main_softc_t *softc;
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	SPL_NET(s);
 	if (softc->ipf_running > 0) {
 		SPL_X(s);
 		return EBUSY;
 	}
 
 	if (ipf_init_all(softc) < 0) {
 		SPL_X(s);
 		return EIO;
 	}
 
 
 	if (ipf_checkp != ipf_check) {
 		ipf_savep = ipf_checkp;
 		ipf_checkp = ipf_check;
 	}
 
 	bzero((char *)ipfmain.ipf_selwait, sizeof(ipfmain.ipf_selwait));
 	softc->ipf_running = 1;
 
 	if (softc->ipf_control_forwarding & 1)
 		V_ipforwarding = 1;
 
 	SPL_X(s);
 #if 0
 	softc->ipf_slow_ch = timeout(ipf_timer_func, softc,
 				     (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT);
 #endif
 	callout_init(&softc->ipf_slow_ch, 1);
 	callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT,
 		ipf_timer_func, softc);
 	return 0;
 }
 
 
 /*
  * Disable the filter by removing the hooks from the IP input/output
  * stream.
  */
 int
 ipfdetach(softc)
 	ipf_main_softc_t *softc;
 {
 #ifdef USE_SPL
 	int s;
 #endif
 
 	if (softc->ipf_control_forwarding & 2)
 		V_ipforwarding = 0;
 
 	SPL_NET(s);
 
 #if 0
 	if (softc->ipf_slow_ch.callout != NULL)
 		untimeout(ipf_timer_func, softc, softc->ipf_slow_ch);
 	bzero(&softc->ipf_slow, sizeof(softc->ipf_slow));
 #endif
 	callout_drain(&softc->ipf_slow_ch);
 
 #ifndef NETBSD_PF
 	if (ipf_checkp != NULL)
 		ipf_checkp = ipf_savep;
 	ipf_savep = NULL;
 #endif
 
 	ipf_fini_all(softc);
 
 	softc->ipf_running = -2;
 
 	SPL_X(s);
 
 	return 0;
 }
 
 
 /*
  * Filter ioctl interface.
  */
 int
 ipfioctl(dev, cmd, data, mode
 , p)
 	struct thread *p;
 #    define	p_cred	td_ucred
 #    define	p_uid	td_ucred->cr_ruid
 	struct cdev *dev;
 	ioctlcmd_t cmd;
 	caddr_t data;
 	int mode;
 {
 	int error = 0, unit = 0;
 	SPL_INT(s);
 
 #if (BSD >= 199306)
         if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE))
 	{
 		ipfmain.ipf_interror = 130001;
 		return EPERM;
 	}
 #endif
 
 	unit = GET_MINOR(dev);
 	if ((IPL_LOGMAX < unit) || (unit < 0)) {
 		ipfmain.ipf_interror = 130002;
 		return ENXIO;
 	}
 
 	if (ipfmain.ipf_running <= 0) {
 		if (unit != IPL_LOGIPF && cmd != SIOCIPFINTERROR) {
 			ipfmain.ipf_interror = 130003;
 			return EIO;
 		}
 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
 		    cmd != SIOCGETFS && cmd != SIOCGETFF &&
 		    cmd != SIOCIPFINTERROR) {
 			ipfmain.ipf_interror = 130004;
 			return EIO;
 		}
 	}
 
 	SPL_NET(s);
 
 	CURVNET_SET(TD_TO_VNET(p));
 	error = ipf_ioctlswitch(&ipfmain, unit, data, cmd, mode, p->p_uid, p);
 	CURVNET_RESTORE();
 	if (error != -1) {
 		SPL_X(s);
 		return error;
 	}
 
 	SPL_X(s);
 
 	return error;
 }
 
 
 /*
  * ipf_send_reset - this could conceivably be a call to tcp_respond(), but that
  * requires a large amount of setting up and isn't any more efficient.
  */
 int
 ipf_send_reset(fin)
 	fr_info_t *fin;
 {
 	struct tcphdr *tcp, *tcp2;
 	int tlen = 0, hlen;
 	struct mbuf *m;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip;
 
 	tcp = fin->fin_dp;
 	if (tcp->th_flags & TH_RST)
 		return -1;		/* feedback loop */
 
 	if (ipf_checkl4sum(fin) == -1)
 		return -1;
 
 	tlen = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
 			((tcp->th_flags & TH_SYN) ? 1 : 0) +
 			((tcp->th_flags & TH_FIN) ? 1 : 0);
 
 #ifdef USE_INET6
 	hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t);
 #else
 	hlen = sizeof(ip_t);
 #endif
 #ifdef MGETHDR
 	MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 	MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	if (sizeof(*tcp2) + hlen > MLEN) {
 		if (!(MCLGET(m, M_NOWAIT))) {
 			FREE_MB_T(m);
 			return -1;
 		}
 	}
 
 	m->m_len = sizeof(*tcp2) + hlen;
 #if (BSD >= 199103)
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.len = m->m_len;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #endif
 	ip = mtod(m, struct ip *);
 	bzero((char *)ip, hlen);
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 #endif
 	tcp2 = (struct tcphdr *)((char *)ip + hlen);
 	tcp2->th_sport = tcp->th_dport;
 	tcp2->th_dport = tcp->th_sport;
 
 	if (tcp->th_flags & TH_ACK) {
 		tcp2->th_seq = tcp->th_ack;
 		tcp2->th_flags = TH_RST;
 		tcp2->th_ack = 0;
 	} else {
 		tcp2->th_seq = 0;
 		tcp2->th_ack = ntohl(tcp->th_seq);
 		tcp2->th_ack += tlen;
 		tcp2->th_ack = htonl(tcp2->th_ack);
 		tcp2->th_flags = TH_RST|TH_ACK;
 	}
 	TCP_X2_A(tcp2, 0);
 	TCP_OFF_A(tcp2, sizeof(*tcp2) >> 2);
 	tcp2->th_win = tcp->th_win;
 	tcp2->th_sum = 0;
 	tcp2->th_urp = 0;
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = fin->fin_dst6.in6;
 		ip6->ip6_dst = fin->fin_src6.in6;
 		tcp2->th_sum = in6_cksum(m, IPPROTO_TCP,
 					 sizeof(*ip6), sizeof(*tcp2));
 		return ipf_send_ip(fin, m);
 	}
 #endif
 	ip->ip_p = IPPROTO_TCP;
 	ip->ip_len = htons(sizeof(struct tcphdr));
 	ip->ip_src.s_addr = fin->fin_daddr;
 	ip->ip_dst.s_addr = fin->fin_saddr;
 	tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2));
 	ip->ip_len = htons(hlen + sizeof(*tcp2));
 	return ipf_send_ip(fin, m);
 }
 
 
 /*
  * ip_len must be in network byte order when called.
  */
 static int
 ipf_send_ip(fin, m)
 	fr_info_t *fin;
 	mb_t *m;
 {
 	fr_info_t fnew;
 	ip_t *ip, *oip;
 	int hlen;
 
 	ip = mtod(m, ip_t *);
 	bzero((char *)&fnew, sizeof(fnew));
 	fnew.fin_main_soft = fin->fin_main_soft;
 
 	IP_V_A(ip, fin->fin_v);
 	switch (fin->fin_v)
 	{
 	case 4 :
 		oip = fin->fin_ip;
 		hlen = sizeof(*oip);
 		fnew.fin_v = 4;
 		fnew.fin_p = ip->ip_p;
 		fnew.fin_plen = ntohs(ip->ip_len);
 		IP_HL_A(ip, sizeof(*oip) >> 2);
 		ip->ip_tos = oip->ip_tos;
 		ip->ip_id = fin->fin_ip->ip_id;
 #if defined(FreeBSD) && (__FreeBSD_version > 460000)
 		ip->ip_off = htons(path_mtu_discovery ? IP_DF : 0);
 #else
 		ip->ip_off = 0;
 #endif
 		ip->ip_ttl = V_ip_defttl;
 		ip->ip_sum = 0;
 		break;
 #ifdef USE_INET6
 	case 6 :
 	{
 		ip6_t *ip6 = (ip6_t *)ip;
 
 		ip6->ip6_vfc = 0x60;
 		ip6->ip6_hlim = IPDEFTTL;
 
 		hlen = sizeof(*ip6);
 		fnew.fin_p = ip6->ip6_nxt;
 		fnew.fin_v = 6;
 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
 		break;
 	}
 #endif
 	default :
 		return EINVAL;
 	}
 #ifdef IPSEC
 	m->m_pkthdr.rcvif = NULL;
 #endif
 
 	fnew.fin_ifp = fin->fin_ifp;
 	fnew.fin_flx = FI_NOCKSUM;
 	fnew.fin_m = m;
 	fnew.fin_ip = ip;
 	fnew.fin_mp = &m;
 	fnew.fin_hlen = hlen;
 	fnew.fin_dp = (char *)ip + hlen;
 	(void) ipf_makefrip(hlen, ip, &fnew);
 
 	return ipf_fastroute(m, &m, &fnew, NULL);
 }
 
 
 int
 ipf_send_icmp_err(type, fin, dst)
 	int type;
 	fr_info_t *fin;
 	int dst;
 {
 	int err, hlen, xtra, iclen, ohlen, avail, code;
 	struct in_addr dst4;
 	struct icmp *icmp;
 	struct mbuf *m;
 	i6addr_t dst6;
 	void *ifp;
 #ifdef USE_INET6
 	ip6_t *ip6;
 #endif
 	ip_t *ip, *ip2;
 
 	if ((type < 0) || (type >= ICMP_MAXTYPE))
 		return -1;
 
 	code = fin->fin_icode;
 #ifdef USE_INET6
 #if 0
 	/* XXX Fix an off by one error: s/>/>=/
 	 was:
 	 if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int)))
 	 Fix obtained from NetBSD ip_fil_netbsd.c r1.4: */
 #endif
 	if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int)))
 		return -1;
 #endif
 
 	if (ipf_checkl4sum(fin) == -1)
 		return -1;
 #ifdef MGETHDR
 	MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 	MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 	if (m == NULL)
 		return -1;
 	avail = MHLEN;
 
 	xtra = 0;
 	hlen = 0;
 	ohlen = 0;
 	dst4.s_addr = 0;
 	ifp = fin->fin_ifp;
 	if (fin->fin_v == 4) {
 		if ((fin->fin_p == IPPROTO_ICMP) && !(fin->fin_flx & FI_SHORT))
 			switch (ntohs(fin->fin_data[0]) >> 8)
 			{
 			case ICMP_ECHO :
 			case ICMP_TSTAMP :
 			case ICMP_IREQ :
 			case ICMP_MASKREQ :
 				break;
 			default :
 				FREE_MB_T(m);
 				return 0;
 			}
 
 		if (dst == 0) {
 			if (ipf_ifpaddr(&ipfmain, 4, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			dst4 = dst6.in4;
 		} else
 			dst4.s_addr = fin->fin_daddr;
 
 		hlen = sizeof(ip_t);
 		ohlen = fin->fin_hlen;
 		iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen;
 		if (fin->fin_hlen < fin->fin_plen)
 			xtra = MIN(fin->fin_dlen, 8);
 		else
 			xtra = 0;
 	}
 
 #ifdef USE_INET6
 	else if (fin->fin_v == 6) {
 		hlen = sizeof(ip6_t);
 		ohlen = sizeof(ip6_t);
 		iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen;
 		type = icmptoicmp6types[type];
 		if (type == ICMP6_DST_UNREACH)
 			code = icmptoicmp6unreach[code];
 
 		if (iclen + max_linkhdr + fin->fin_plen > avail) {
 			if (!(MCLGET(m, M_NOWAIT))) {
 				FREE_MB_T(m);
 				return -1;
 			}
 			avail = MCLBYTES;
 		}
 		xtra = MIN(fin->fin_plen, avail - iclen - max_linkhdr);
 		xtra = MIN(xtra, IPV6_MMTU - iclen);
 		if (dst == 0) {
 			if (ipf_ifpaddr(&ipfmain, 6, FRI_NORMAL, ifp,
 					&dst6, NULL) == -1) {
 				FREE_MB_T(m);
 				return -1;
 			}
 		} else
 			dst6 = fin->fin_dst6;
 	}
 #endif
 	else {
 		FREE_MB_T(m);
 		return -1;
 	}
 
 	avail -= (max_linkhdr + iclen);
 	if (avail < 0) {
 		FREE_MB_T(m);
 		return -1;
 	}
 	if (xtra > avail)
 		xtra = avail;
 	iclen += xtra;
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 	m->m_pkthdr.len = iclen;
 	m->m_len = iclen;
 	ip = mtod(m, ip_t *);
 	icmp = (struct icmp *)((char *)ip + hlen);
 	ip2 = (ip_t *)&icmp->icmp_ip;
 
 	icmp->icmp_type = type;
 	icmp->icmp_code = fin->fin_icode;
 	icmp->icmp_cksum = 0;
 #ifdef icmp_nextmtu
 	if (type == ICMP_UNREACH && fin->fin_icode == ICMP_UNREACH_NEEDFRAG) {
 		if (fin->fin_mtu != 0) {
 			icmp->icmp_nextmtu = htons(fin->fin_mtu);
 
 		} else if (ifp != NULL) {
 			icmp->icmp_nextmtu = htons(GETIFMTU_4(ifp));
 
 		} else {	/* make up a number... */
 			icmp->icmp_nextmtu = htons(fin->fin_plen - 20);
 		}
 	}
 #endif
 
 	bcopy((char *)fin->fin_ip, (char *)ip2, ohlen);
 
 #ifdef USE_INET6
 	ip6 = (ip6_t *)ip;
 	if (fin->fin_v == 6) {
 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
 		ip6->ip6_plen = htons(iclen - hlen);
 		ip6->ip6_nxt = IPPROTO_ICMPV6;
 		ip6->ip6_hlim = 0;
 		ip6->ip6_src = dst6.in6;
 		ip6->ip6_dst = fin->fin_src6.in6;
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 					     sizeof(*ip6), iclen - hlen);
 	} else
 #endif
 	{
 		ip->ip_p = IPPROTO_ICMP;
 		ip->ip_src.s_addr = dst4.s_addr;
 		ip->ip_dst.s_addr = fin->fin_saddr;
 
 		if (xtra > 0)
 			bcopy((char *)fin->fin_ip + ohlen,
 			      (char *)&icmp->icmp_ip + ohlen, xtra);
 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
 					     sizeof(*icmp) + 8);
 		ip->ip_len = htons(iclen);
 		ip->ip_p = IPPROTO_ICMP;
 	}
 	err = ipf_send_ip(fin, m);
 	return err;
 }
 
 
 
 
 /*
  * m0 - pointer to mbuf where the IP packet starts
  * mpp - pointer to the mbuf pointer that is the start of the mbuf chain
  */
 int
 ipf_fastroute(m0, mpp, fin, fdp)
 	mb_t *m0, **mpp;
 	fr_info_t *fin;
 	frdest_t *fdp;
 {
 	register struct ip *ip, *mhip;
 	register struct mbuf *m = *mpp;
-	register struct route *ro;
 	int len, off, error = 0, hlen, code;
 	struct ifnet *ifp, *sifp;
-	struct sockaddr_in *dst;
-	struct route iproute;
+	struct sockaddr_in dst;
+	struct nhop4_extended nh4;
+	int has_nhop = 0;
+	u_long fibnum = 0;
 	u_short ip_off;
 	frdest_t node;
 	frentry_t *fr;
 
-	ro = NULL;
-
 #ifdef M_WRITABLE
 	/*
 	* HOT FIX/KLUDGE:
 	*
 	* If the mbuf we're about to send is not writable (because of
 	* a cluster reference, for example) we'll need to make a copy
 	* of it since this routine modifies the contents.
 	*
 	* If you have non-crappy network hardware that can transmit data
 	* from the mbuf, rather than making a copy, this is gonna be a
 	* problem.
 	*/
 	if (M_WRITABLE(m) == 0) {
 		m0 = m_dup(m, M_NOWAIT);
 		if (m0 != 0) {
 			FREE_MB_T(m);
 			m = m0;
 			*mpp = m;
 		} else {
 			error = ENOBUFS;
 			FREE_MB_T(m);
 			goto done;
 		}
 	}
 #endif
 
 #ifdef USE_INET6
 	if (fin->fin_v == 6) {
 		/*
 		 * currently "to <if>" and "to <if>:ip#" are not supported
 		 * for IPv6
 		 */
 		return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	}
 #endif
 
 	hlen = fin->fin_hlen;
 	ip = mtod(m0, struct ip *);
 	ifp = NULL;
 
 	/*
 	 * Route packet.
 	 */
-	ro = &iproute;
-	bzero(ro, sizeof (*ro));
-	dst = (struct sockaddr_in *)&ro->ro_dst;
-	dst->sin_family = AF_INET;
-	dst->sin_addr = ip->ip_dst;
+	bzero(&dst, sizeof (dst));
+	dst.sin_family = AF_INET;
+	dst.sin_addr = ip->ip_dst;
+	dst.sin_len = sizeof(dst);
 
 	fr = fin->fin_fr;
 	if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) &&
 	    (fdp->fd_type == FRD_DSTLIST)) {
 		if (ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL, &node) == 0)
 			fdp = &node;
 	}
 
 	if (fdp != NULL)
 		ifp = fdp->fd_ptr;
 	else
 		ifp = fin->fin_ifp;
 
 	if ((ifp == NULL) && ((fr == NULL) || !(fr->fr_flags & FR_FASTROUTE))) {
 		error = -2;
 		goto bad;
 	}
 
 	if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0))
-		dst->sin_addr = fdp->fd_ip;
+		dst.sin_addr = fdp->fd_ip;
 
-	dst->sin_len = sizeof(*dst);
-	in_rtalloc(ro, M_GETFIB(m0));
-
-	if ((ifp == NULL) && (ro->ro_rt != NULL))
-		ifp = ro->ro_rt->rt_ifp;
-
-	if ((ro->ro_rt == NULL) || (ifp == NULL)) {
+	fibnum = M_GETFIB(m0);
+	if (fib4_lookup_nh_ext(fibnum, dst.sin_addr, NHR_REF, 0, &nh4) != 0) {
 		if (in_localaddr(ip->ip_dst))
 			error = EHOSTUNREACH;
 		else
 			error = ENETUNREACH;
 		goto bad;
 	}
-	if (ro->ro_rt->rt_flags & RTF_GATEWAY)
-		dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
-	if (ro->ro_rt)
-		counter_u64_add(ro->ro_rt->rt_pksent, 1);
 
+	has_nhop = 1;
+	if (ifp == NULL)
+		ifp = nh4.nh_ifp;
+	if (nh4.nh_flags & NHF_GATEWAY)
+		dst.sin_addr = nh4.nh_addr;
+
 	/*
 	 * For input packets which are being "fastrouted", they won't
 	 * go back through output filtering and miss their chance to get
 	 * NAT'd and counted.  Duplicated packets aren't considered to be
 	 * part of the normal packet stream, so do not NAT them or pass
 	 * them through stateful checking, etc.
 	 */
 	if ((fdp != &fr->fr_dif) && (fin->fin_out == 0)) {
 		sifp = fin->fin_ifp;
 		fin->fin_ifp = ifp;
 		fin->fin_out = 1;
 		(void) ipf_acctpkt(fin, NULL);
 		fin->fin_fr = NULL;
 		if (!fr || !(fr->fr_flags & FR_RETMASK)) {
 			u_32_t pass;
 
 			(void) ipf_state_check(fin, &pass);
 		}
 
 		switch (ipf_nat_checkout(fin, NULL))
 		{
 		case 0 :
 			break;
 		case 1 :
 			ip->ip_sum = 0;
 			break;
 		case -1 :
 			error = -1;
 			goto bad;
 			break;
 		}
 
 		fin->fin_ifp = sifp;
 		fin->fin_out = 0;
 	} else
 		ip->ip_sum = 0;
 	/*
 	 * If small enough for interface, can just send directly.
 	 */
 	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
 		if (!ip->ip_sum)
 			ip->ip_sum = in_cksum(m, hlen);
-		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst,
-			    ro
+		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)&dst,
+			    NULL
 			);
 		goto done;
 	}
 	/*
 	 * Too large for interface; fragment if possible.
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	ip_off = ntohs(ip->ip_off);
 	if (ip_off & IP_DF) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 	len = (ifp->if_mtu - hlen) &~ 7;
 	if (len < 8) {
 		error = EMSGSIZE;
 		goto bad;
 	}
 
     {
 	int mhlen, firstlen = len;
 	struct mbuf **mnext = &m->m_act;
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 */
 	m0 = m;
 	mhlen = sizeof (struct ip);
 	for (off = hlen + len; off < ntohs(ip->ip_len); off += len) {
 #ifdef MGETHDR
 		MGETHDR(m, M_NOWAIT, MT_HEADER);
 #else
 		MGET(m, M_NOWAIT, MT_HEADER);
 #endif
 		if (m == 0) {
 			m = m0;
 			error = ENOBUFS;
 			goto bad;
 		}
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		bcopy((char *)ip, (char *)mhip, sizeof(*ip));
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			IP_HL_A(mhip, mhlen >> 2);
 		}
 		m->m_len = mhlen;
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ntohs(ip->ip_len))
 			len = ntohs(ip->ip_len) - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		*mnext = m;
 		m->m_next = m_copy(m0, off, len);
 		if (m->m_next == 0) {
 			error = ENOBUFS;	/* ??? */
 			goto sendorfree;
 		}
 		m->m_pkthdr.len = mhlen + len;
 		m->m_pkthdr.rcvif = NULL;
 		mhip->ip_off = htons((u_short)mhip->ip_off);
 		mhip->ip_sum = 0;
 		mhip->ip_sum = in_cksum(m, mhlen);
 		mnext = &m->m_act;
 	}
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header, then send each fragment (in order).
 	 */
 	m_adj(m0, hlen + firstlen - ip->ip_len);
 	ip->ip_len = htons((u_short)(hlen + firstlen));
 	ip->ip_off = htons((u_short)IP_MF);
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(m0, hlen);
 sendorfree:
 	for (m = m0; m; m = m0) {
 		m0 = m->m_act;
 		m->m_act = 0;
 		if (error == 0)
 			error = (*ifp->if_output)(ifp, m,
-			    (struct sockaddr *)dst,
-			    ro
+			    (struct sockaddr *)&dst,
+			    NULL
 			    );
 		else
 			FREE_MB_T(m);
 	}
     }
 done:
 	if (!error)
 		ipfmain.ipf_frouteok[0]++;
 	else
 		ipfmain.ipf_frouteok[1]++;
 
-	if ((ro != NULL) && (ro->ro_rt != NULL)) {
-		RTFREE(ro->ro_rt);
-	}
+	if (has_nhop)
+		fib4_free_nh_ext(fibnum, &nh4);
+
 	return 0;
 bad:
 	if (error == EMSGSIZE) {
 		sifp = fin->fin_ifp;
 		code = fin->fin_icode;
 		fin->fin_icode = ICMP_UNREACH_NEEDFRAG;
 		fin->fin_ifp = ifp;
 		(void) ipf_send_icmp_err(ICMP_UNREACH, fin, 1);
 		fin->fin_ifp = sifp;
 		fin->fin_icode = code;
 	}
 	FREE_MB_T(m);
 	goto done;
 }
 
 
 int
 ipf_verifysrc(fin)
 	fr_info_t *fin;
 {
-	struct sockaddr_in *dst;
-	struct route iproute;
+	struct nhop4_basic nh4;
 
-	bzero((char *)&iproute, sizeof(iproute));
-	dst = (struct sockaddr_in *)&iproute.ro_dst;
-	dst->sin_len = sizeof(*dst);
-	dst->sin_family = AF_INET;
-	dst->sin_addr = fin->fin_src;
-	in_rtalloc(&iproute, 0);
-	if (iproute.ro_rt == NULL)
-		return 0;
-	return (fin->fin_ifp == iproute.ro_rt->rt_ifp);
+	if (fib4_lookup_nh_basic(0, fin->fin_src, 0, 0, &nh4) != 0)
+		return (0);
+	return (fin->fin_ifp == nh4.nh_ifp);
 }
 
 
 /*
  * return the first IP Address associated with an interface
  */
 int
 ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask)
 	ipf_main_softc_t *softc;
 	int v, atype;
 	void *ifptr;
 	i6addr_t *inp, *inpmask;
 {
 #ifdef USE_INET6
 	struct in6_addr *inp6 = NULL;
 #endif
 	struct sockaddr *sock, *mask;
 	struct sockaddr_in *sin;
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 
 	if ((ifptr == NULL) || (ifptr == (void *)-1))
 		return -1;
 
 	sin = NULL;
 	ifp = ifptr;
 
 	if (v == 4)
 		inp->in4.s_addr = 0;
 #ifdef USE_INET6
 	else if (v == 6)
 		bzero((char *)inp, sizeof(*inp));
 #endif
 	ifa = TAILQ_FIRST(&ifp->if_addrhead);
 
 	sock = ifa->ifa_addr;
 	while (sock != NULL && ifa != NULL) {
 		sin = (struct sockaddr_in *)sock;
 		if ((v == 4) && (sin->sin_family == AF_INET))
 			break;
 #ifdef USE_INET6
 		if ((v == 6) && (sin->sin_family == AF_INET6)) {
 			inp6 = &((struct sockaddr_in6 *)sin)->sin6_addr;
 			if (!IN6_IS_ADDR_LINKLOCAL(inp6) &&
 			    !IN6_IS_ADDR_LOOPBACK(inp6))
 				break;
 		}
 #endif
 		ifa = TAILQ_NEXT(ifa, ifa_link);
 		if (ifa != NULL)
 			sock = ifa->ifa_addr;
 	}
 
 	if (ifa == NULL || sin == NULL)
 		return -1;
 
 	mask = ifa->ifa_netmask;
 	if (atype == FRI_BROADCAST)
 		sock = ifa->ifa_broadaddr;
 	else if (atype == FRI_PEERADDR)
 		sock = ifa->ifa_dstaddr;
 
 	if (sock == NULL)
 		return -1;
 
 #ifdef USE_INET6
 	if (v == 6) {
 		return ipf_ifpfillv6addr(atype, (struct sockaddr_in6 *)sock,
 					 (struct sockaddr_in6 *)mask,
 					 inp, inpmask);
 	}
 #endif
 	return ipf_ifpfillv4addr(atype, (struct sockaddr_in *)sock,
 				 (struct sockaddr_in *)mask,
 				 &inp->in4, &inpmask->in4);
 }
 
 
 u_32_t
 ipf_newisn(fin)
 	fr_info_t *fin;
 {
 	u_32_t newiss;
 	newiss = arc4random();
 	return newiss;
 }
 
 
 INLINE int
 ipf_checkv4sum(fin)
 	fr_info_t *fin;
 {
 #ifdef CSUM_DATA_VALID
 	int manual = 0;
 	u_short sum;
 	ip_t *ip;
 	mb_t *m;
 
 	if ((fin->fin_flx & FI_NOCKSUM) != 0)
 		return 0;
 
 	if ((fin->fin_flx & FI_SHORT) != 0)
 		return 1;
 
 	if (fin->fin_cksum != FI_CK_NEEDED)
 		return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1;
 
 	m = fin->fin_m;
 	if (m == NULL) {
 		manual = 1;
 		goto skipauto;
 	}
 	ip = fin->fin_ip;
 
 	if ((m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID)) ==
 	    CSUM_IP_CHECKED) {
 		fin->fin_cksum = FI_CK_BAD;
 		fin->fin_flx |= FI_BAD;
 		return -1;
 	}
 	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 		/* Depending on the driver, UDP may have zero checksum */
 		if (fin->fin_p == IPPROTO_UDP && (fin->fin_flx &
 		    (FI_FRAG|FI_SHORT|FI_BAD)) == 0) {
 			udphdr_t *udp = fin->fin_dp;
 			if (udp->uh_sum == 0) {
 				/*
 				 * we're good no matter what the hardware
 				 * checksum flags and csum_data say (handling
 				 * of csum_data for zero UDP checksum is not
 				 * consistent across all drivers)
 				 */
 				fin->fin_cksum = 1;
 				return 0;
 			}
 		}
 
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			sum = m->m_pkthdr.csum_data;
 		else
 			sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 					htonl(m->m_pkthdr.csum_data +
 					fin->fin_dlen + fin->fin_p));
 		sum ^= 0xffff;
 		if (sum != 0) {
 			fin->fin_cksum = FI_CK_BAD;
 			fin->fin_flx |= FI_BAD;
 		} else {
 			fin->fin_cksum = FI_CK_SUMOK;
 			return 0;
 		}
 	} else {
 		if (m->m_pkthdr.csum_flags == CSUM_DELAY_DATA) {
 			fin->fin_cksum = FI_CK_L4FULL;
 			return 0;
 		} else if (m->m_pkthdr.csum_flags == CSUM_TCP ||
 			   m->m_pkthdr.csum_flags == CSUM_UDP) {
 			fin->fin_cksum = FI_CK_L4PART;
 			return 0;
 		} else if (m->m_pkthdr.csum_flags == CSUM_IP) {
 			fin->fin_cksum = FI_CK_L4PART;
 			return 0;
 		} else {
 			manual = 1;
 		}
 	}
 skipauto:
 	if (manual != 0) {
 		if (ipf_checkl4sum(fin) == -1) {
 			fin->fin_flx |= FI_BAD;
 			return -1;
 		}
 	}
 #else
 	if (ipf_checkl4sum(fin) == -1) {
 		fin->fin_flx |= FI_BAD;
 		return -1;
 	}
 #endif
 	return 0;
 }
 
 
 #ifdef USE_INET6
 INLINE int
 ipf_checkv6sum(fin)
 	fr_info_t *fin;
 {
 	if ((fin->fin_flx & FI_NOCKSUM) != 0)
 		return 0;
 
 	if ((fin->fin_flx & FI_SHORT) != 0)
 		return 1;
 
 	if (fin->fin_cksum != FI_CK_NEEDED)
 		return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1;
 
 	if (ipf_checkl4sum(fin) == -1) {
 		fin->fin_flx |= FI_BAD;
 		return -1;
 	}
 	return 0;
 }
 #endif /* USE_INET6 */
 
 
 size_t
 mbufchainlen(m0)
 	struct mbuf *m0;
 	{
 	size_t len;
 
 	if ((m0->m_flags & M_PKTHDR) != 0) {
 		len = m0->m_pkthdr.len;
 	} else {
 		struct mbuf *m;
 
 		for (m = m0, len = 0; m != NULL; m = m->m_next)
 			len += m->m_len;
 	}
 	return len;
 }
 
 
 /* ------------------------------------------------------------------------ */
 /* Function:    ipf_pullup                                                  */
 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
 /* Parameters:  xmin(I)- pointer to buffer where data packet starts         */
 /*              fin(I) - pointer to packet information                      */
 /*              len(I) - number of bytes to pullup                          */
 /*                                                                          */
 /* Attempt to move at least len bytes (from the start of the buffer) into a */
 /* single buffer for ease of access.  Operating system native functions are */
 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
 /* a single buffer, set the FI_COALESCE flag even though ipf_coalesce() has */
 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
 /* and ONLY if the pullup succeeds.                                         */
 /*                                                                          */
 /* We assume that 'xmin' is a pointer to a buffer that is part of the chain */
 /* of buffers that starts at *fin->fin_mp.                                  */
 /* ------------------------------------------------------------------------ */
 void *
 ipf_pullup(xmin, fin, len)
 	mb_t *xmin;
 	fr_info_t *fin;
 	int len;
 {
 	int dpoff, ipoff;
 	mb_t *m = xmin;
 	char *ip;
 
 	if (m == NULL)
 		return NULL;
 
 	ip = (char *)fin->fin_ip;
 	if ((fin->fin_flx & FI_COALESCE) != 0)
 		return ip;
 
 	ipoff = fin->fin_ipoff;
 	if (fin->fin_dp != NULL)
 		dpoff = (char *)fin->fin_dp - (char *)ip;
 	else
 		dpoff = 0;
 
 	if (M_LEN(m) < len) {
 		mb_t *n = *fin->fin_mp;
 		/*
 		 * Assume that M_PKTHDR is set and just work with what is left
 		 * rather than check..
 		 * Should not make any real difference, anyway.
 		 */
 		if (m != n) {
 			/*
 			 * Record the mbuf that points to the mbuf that we're
 			 * about to go to work on so that we can update the
 			 * m_next appropriately later.
 			 */
 			for (; n->m_next != m; n = n->m_next)
 				;
 		} else {
 			n = NULL;
 		}
 
 #ifdef MHLEN
 		if (len > MHLEN)
 #else
 		if (len > MLEN)
 #endif
 		{
 #ifdef HAVE_M_PULLDOWN
 			if (m_pulldown(m, 0, len, NULL) == NULL)
 				m = NULL;
 #else
 			FREE_MB_T(*fin->fin_mp);
 			m = NULL;
 			n = NULL;
 #endif
 		} else
 		{
 			m = m_pullup(m, len);
 		}
 		if (n != NULL)
 			n->m_next = m;
 		if (m == NULL) {
 			/*
 			 * When n is non-NULL, it indicates that m pointed to
 			 * a sub-chain (tail) of the mbuf and that the head
 			 * of this chain has not yet been free'd.
 			 */
 			if (n != NULL) {
 				FREE_MB_T(*fin->fin_mp);
 			}
 
 			*fin->fin_mp = NULL;
 			fin->fin_m = NULL;
 			return NULL;
 		}
 
 		if (n == NULL)
 			*fin->fin_mp = m;
 
 		while (M_LEN(m) == 0) {
 			m = m->m_next;
 		}
 		fin->fin_m = m;
 		ip = MTOD(m, char *) + ipoff;
 
 		fin->fin_ip = (ip_t *)ip;
 		if (fin->fin_dp != NULL)
 			fin->fin_dp = (char *)fin->fin_ip + dpoff;
 		if (fin->fin_fraghdr != NULL)
 			fin->fin_fraghdr = (char *)ip +
 					   ((char *)fin->fin_fraghdr -
 					    (char *)fin->fin_ip);
 	}
 
 	if (len == fin->fin_plen)
 		fin->fin_flx |= FI_COALESCE;
 	return ip;
 }
 
 
 int
 ipf_inject(fin, m)
 	fr_info_t *fin;
 	mb_t *m;
 {
 	int error = 0;
 
 	if (fin->fin_out == 0) {
 		netisr_dispatch(NETISR_IP, m);
 	} else {
 		fin->fin_ip->ip_len = ntohs(fin->fin_ip->ip_len);
 		fin->fin_ip->ip_off = ntohs(fin->fin_ip->ip_off);
 		error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
 	}
 
 	return error;
 }
 
 int ipf_pfil_unhook(void) {
 #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011)
 	struct pfil_head *ph_inet;
 #  ifdef USE_INET6
 	struct pfil_head *ph_inet6;
 #  endif
 #endif
 
 #ifdef NETBSD_PF
 	ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 	if (ph_inet != NULL)
 		pfil_remove_hook((void *)ipf_check_wrapper, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet);
 # ifdef USE_INET6
 	ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 	if (ph_inet6 != NULL)
 		pfil_remove_hook((void *)ipf_check_wrapper6, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6);
 # endif
 #endif
 
 	return (0);
 }
 
 int ipf_pfil_hook(void) {
 #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011)
 	struct pfil_head *ph_inet;
 #  ifdef USE_INET6
 	struct pfil_head *ph_inet6;
 #  endif
 #endif
 
 # ifdef NETBSD_PF
 	ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET);
 #    ifdef USE_INET6
 	ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6);
 #    endif
 	if (ph_inet == NULL
 #    ifdef USE_INET6
 	    && ph_inet6 == NULL
 #    endif
 	   ) {
 		return ENODEV;
 	}
 
 	if (ph_inet != NULL)
 		pfil_add_hook((void *)ipf_check_wrapper, NULL,
 		    PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet);
 #  ifdef USE_INET6
 	if (ph_inet6 != NULL)
 		pfil_add_hook((void *)ipf_check_wrapper6, NULL,
 				      PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6);
 #  endif
 # endif
 	return (0);
 }
 
 void
 ipf_event_reg(void)
 {
 	ipf_arrivetag = EVENTHANDLER_REGISTER(ifnet_arrival_event, \
 					       ipf_ifevent, &ipfmain, \
 					       EVENTHANDLER_PRI_ANY);
 	ipf_departtag = EVENTHANDLER_REGISTER(ifnet_departure_event, \
 					       ipf_ifevent, &ipfmain, \
 					       EVENTHANDLER_PRI_ANY);
 	ipf_clonetag  = EVENTHANDLER_REGISTER(if_clone_event, ipf_ifevent, \
 					       &ipfmain, EVENTHANDLER_PRI_ANY);
 }
 
 void
 ipf_event_dereg(void)
 {
 	if (ipf_arrivetag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ipf_arrivetag);
 	}
 	if (ipf_departtag != NULL) {
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event, ipf_departtag);
 	}
 	if (ipf_clonetag != NULL) {
 		EVENTHANDLER_DEREGISTER(if_clone_event, ipf_clonetag);
 	}
 }
 
 
 u_32_t
 ipf_random()
 {
 	return arc4random();
 }
 
 
 u_int
 ipf_pcksum(fin, hlen, sum)
 	fr_info_t *fin;
 	int hlen;
 	u_int sum;
 {
 	struct mbuf *m;
 	u_int sum2;
 	int off;
 
 	m = fin->fin_m;
 	off = (char *)fin->fin_dp - (char *)fin->fin_ip;
 	m->m_data += hlen;
 	m->m_len -= hlen;
 	sum2 = in_cksum(fin->fin_m, fin->fin_plen - off);
 	m->m_len += hlen;
 	m->m_data -= hlen;
 
 	/*
 	 * Both sum and sum2 are partial sums, so combine them together.
 	 */
 	sum += ~sum2 & 0xffff;
 	while (sum > 0xffff)
 		sum = (sum & 0xffff) + (sum >> 16);
 	sum2 = ~sum & 0xffff;
 	return sum2;
 }

Property changes on: projects/clang380-import/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c:r292913-293685
Index: projects/clang380-import/sys/contrib/ipfilter
===================================================================
--- projects/clang380-import/sys/contrib/ipfilter	(revision 293686)
+++ projects/clang380-import/sys/contrib/ipfilter	(revision 293687)

Property changes on: projects/clang380-import/sys/contrib/ipfilter
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/contrib/ipfilter:r292913-293685
Index: projects/clang380-import/sys/dev/cxgbe/t4_main.c
===================================================================
--- projects/clang380-import/sys/dev/cxgbe/t4_main.c	(revision 293686)
+++ projects/clang380-import/sys/dev/cxgbe/t4_main.c	(revision 293687)
@@ -1,9222 +1,9229 @@
 /*-
  * Copyright (c) 2011 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/priv.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/taskqueue.h>
 #include <sys/pciio.h>
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pci_private.h>
 #include <sys/firmware.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/if_vlan_var.h>
 #ifdef RSS
 #include <net/rss_config.h>
 #endif
 #if defined(__i386__) || defined(__amd64__)
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #endif
 
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "common/t4_regs_values.h"
 #include "t4_ioctl.h"
 #include "t4_l2t.h"
 #include "t4_mp_ring.h"
 
 /* T4 bus driver interface */
 static int t4_probe(device_t);
 static int t4_attach(device_t);
 static int t4_detach(device_t);
 static device_method_t t4_methods[] = {
 	DEVMETHOD(device_probe,		t4_probe),
 	DEVMETHOD(device_attach,	t4_attach),
 	DEVMETHOD(device_detach,	t4_detach),
 
 	DEVMETHOD_END
 };
 static driver_t t4_driver = {
 	"t4nex",
 	t4_methods,
 	sizeof(struct adapter)
 };
 
 
 /* T4 port (cxgbe) interface */
 static int cxgbe_probe(device_t);
 static int cxgbe_attach(device_t);
 static int cxgbe_detach(device_t);
 static device_method_t cxgbe_methods[] = {
 	DEVMETHOD(device_probe,		cxgbe_probe),
 	DEVMETHOD(device_attach,	cxgbe_attach),
 	DEVMETHOD(device_detach,	cxgbe_detach),
 	{ 0, 0 }
 };
 static driver_t cxgbe_driver = {
 	"cxgbe",
 	cxgbe_methods,
 	sizeof(struct port_info)
 };
 
 /* T4 VI (vcxgbe) interface */
 static int vcxgbe_probe(device_t);
 static int vcxgbe_attach(device_t);
 static int vcxgbe_detach(device_t);
 static device_method_t vcxgbe_methods[] = {
 	DEVMETHOD(device_probe,		vcxgbe_probe),
 	DEVMETHOD(device_attach,	vcxgbe_attach),
 	DEVMETHOD(device_detach,	vcxgbe_detach),
 	{ 0, 0 }
 };
 static driver_t vcxgbe_driver = {
 	"vcxgbe",
 	vcxgbe_methods,
 	sizeof(struct vi_info)
 };
 
 static d_ioctl_t t4_ioctl;
 static d_open_t t4_open;
 static d_close_t t4_close;
 
 static struct cdevsw t4_cdevsw = {
        .d_version = D_VERSION,
        .d_flags = 0,
        .d_open = t4_open,
        .d_close = t4_close,
        .d_ioctl = t4_ioctl,
        .d_name = "t4nex",
 };
 
 /* T5 bus driver interface */
 static int t5_probe(device_t);
 static device_method_t t5_methods[] = {
 	DEVMETHOD(device_probe,		t5_probe),
 	DEVMETHOD(device_attach,	t4_attach),
 	DEVMETHOD(device_detach,	t4_detach),
 
 	DEVMETHOD_END
 };
 static driver_t t5_driver = {
 	"t5nex",
 	t5_methods,
 	sizeof(struct adapter)
 };
 
 
 /* T5 port (cxl) interface */
 static driver_t cxl_driver = {
 	"cxl",
 	cxgbe_methods,
 	sizeof(struct port_info)
 };
 
 /* T5 VI (vcxl) interface */
 static driver_t vcxl_driver = {
 	"vcxl",
 	vcxgbe_methods,
 	sizeof(struct vi_info)
 };
 
 static struct cdevsw t5_cdevsw = {
        .d_version = D_VERSION,
        .d_flags = 0,
        .d_open = t4_open,
        .d_close = t4_close,
        .d_ioctl = t4_ioctl,
        .d_name = "t5nex",
 };
 
 /* ifnet + media interface */
 static void cxgbe_init(void *);
 static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t);
 static int cxgbe_transmit(struct ifnet *, struct mbuf *);
 static void cxgbe_qflush(struct ifnet *);
 static int cxgbe_media_change(struct ifnet *);
 static void cxgbe_media_status(struct ifnet *, struct ifmediareq *);
 
 MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services");
 
 /*
  * Correct lock order when you need to acquire multiple locks is t4_list_lock,
  * then ADAPTER_LOCK, then t4_uld_list_lock.
  */
 static struct sx t4_list_lock;
 SLIST_HEAD(, adapter) t4_list;
 #ifdef TCP_OFFLOAD
 static struct sx t4_uld_list_lock;
 SLIST_HEAD(, uld_info) t4_uld_list;
 #endif
 
 /*
  * Tunables.  See tweak_tunables() too.
  *
  * Each tunable is set to a default value here if it's known at compile-time.
  * Otherwise it is set to -1 as an indication to tweak_tunables() that it should
  * provide a reasonable default when the driver is loaded.
  *
  * Tunables applicable to both T4 and T5 are under hw.cxgbe.  Those specific to
  * T5 are under hw.cxl.
  */
 
 /*
  * Number of queues for tx and rx, 10G and 1G, NIC and offload.
  */
 #define NTXQ_10G 16
 static int t4_ntxq10g = -1;
 TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g);
 
 #define NRXQ_10G 8
 static int t4_nrxq10g = -1;
 TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g);
 
 #define NTXQ_1G 4
 static int t4_ntxq1g = -1;
 TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g);
 
 #define NRXQ_1G 2
 static int t4_nrxq1g = -1;
 TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g);
 
 static int t4_rsrv_noflowq = 0;
 TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq);
 
 #ifdef TCP_OFFLOAD
 #define NOFLDTXQ_10G 8
 static int t4_nofldtxq10g = -1;
 TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g);
 
 #define NOFLDRXQ_10G 2
 static int t4_nofldrxq10g = -1;
 TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g);
 
 #define NOFLDTXQ_1G 2
 static int t4_nofldtxq1g = -1;
 TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g);
 
 #define NOFLDRXQ_1G 1
 static int t4_nofldrxq1g = -1;
 TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g);
 #endif
 
 #ifdef DEV_NETMAP
 #define NNMTXQ_10G 2
 static int t4_nnmtxq10g = -1;
 TUNABLE_INT("hw.cxgbe.nnmtxq10g", &t4_nnmtxq10g);
 
 #define NNMRXQ_10G 2
 static int t4_nnmrxq10g = -1;
 TUNABLE_INT("hw.cxgbe.nnmrxq10g", &t4_nnmrxq10g);
 
 #define NNMTXQ_1G 1
 static int t4_nnmtxq1g = -1;
 TUNABLE_INT("hw.cxgbe.nnmtxq1g", &t4_nnmtxq1g);
 
 #define NNMRXQ_1G 1
 static int t4_nnmrxq1g = -1;
 TUNABLE_INT("hw.cxgbe.nnmrxq1g", &t4_nnmrxq1g);
 #endif
 
 /*
  * Holdoff parameters for 10G and 1G ports.
  */
 #define TMR_IDX_10G 1
 static int t4_tmr_idx_10g = TMR_IDX_10G;
 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g);
 
 #define PKTC_IDX_10G (-1)
 static int t4_pktc_idx_10g = PKTC_IDX_10G;
 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g);
 
 #define TMR_IDX_1G 1
 static int t4_tmr_idx_1g = TMR_IDX_1G;
 TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g);
 
 #define PKTC_IDX_1G (-1)
 static int t4_pktc_idx_1g = PKTC_IDX_1G;
 TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g);
 
 /*
  * Size (# of entries) of each tx and rx queue.
  */
 static unsigned int t4_qsize_txq = TX_EQ_QSIZE;
 TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq);
 
 static unsigned int t4_qsize_rxq = RX_IQ_QSIZE;
 TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq);
 
 /*
  * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively).
  */
 static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX;
 TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types);
 
 /*
  * Configuration file.
  */
 #define DEFAULT_CF	"default"
 #define FLASH_CF	"flash"
 #define UWIRE_CF	"uwire"
 #define FPGA_CF		"fpga"
 static char t4_cfg_file[32] = DEFAULT_CF;
 TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file));
 
 /*
  * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively).
  * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them.
  * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water
  *            mark or when signalled to do so, 0 to never emit PAUSE.
  */
 static int t4_pause_settings = PAUSE_TX | PAUSE_RX;
 TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings);
 
 /*
  * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed,
  * encouraged respectively).
  */
 static unsigned int t4_fw_install = 1;
 TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install);
 
 /*
  * ASIC features that will be used.  Disable the ones you don't want so that the
  * chip resources aren't wasted on features that will not be used.
  */
 static int t4_linkcaps_allowed = 0;	/* No DCBX, PPP, etc. by default */
 TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed);
 
 static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC;
 TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed);
 
 static int t4_toecaps_allowed = -1;
 TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed);
 
 static int t4_rdmacaps_allowed = 0;
 TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed);
 
 static int t4_iscsicaps_allowed = 0;
 TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed);
 
 static int t4_fcoecaps_allowed = 0;
 TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed);
 
 static int t5_write_combine = 0;
 TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine);
 
 static int t4_num_vis = 1;
 TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis);
 
 /* Functions used by extra VIs to obtain unique MAC addresses for each VI. */
 static int vi_mac_funcs[] = {
 	FW_VI_FUNC_OFLD,
 	FW_VI_FUNC_IWARP,
 	FW_VI_FUNC_OPENISCSI,
 	FW_VI_FUNC_OPENFCOE,
 	FW_VI_FUNC_FOISCSI,
 	FW_VI_FUNC_FOFCOE,
 };
 
 struct intrs_and_queues {
 	uint16_t intr_type;	/* INTx, MSI, or MSI-X */
 	uint16_t nirq;		/* Total # of vectors */
 	uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */
 	uint16_t intr_flags_1g;	/* Interrupt flags for each 1G port */
 	uint16_t ntxq10g;	/* # of NIC txq's for each 10G port */
 	uint16_t nrxq10g;	/* # of NIC rxq's for each 10G port */
 	uint16_t ntxq1g;	/* # of NIC txq's for each 1G port */
 	uint16_t nrxq1g;	/* # of NIC rxq's for each 1G port */
 	uint16_t rsrv_noflowq;	/* Flag whether to reserve queue 0 */
 #ifdef TCP_OFFLOAD
 	uint16_t nofldtxq10g;	/* # of TOE txq's for each 10G port */
 	uint16_t nofldrxq10g;	/* # of TOE rxq's for each 10G port */
 	uint16_t nofldtxq1g;	/* # of TOE txq's for each 1G port */
 	uint16_t nofldrxq1g;	/* # of TOE rxq's for each 1G port */
 #endif
 #ifdef DEV_NETMAP
 	uint16_t nnmtxq10g;	/* # of netmap txq's for each 10G port */
 	uint16_t nnmrxq10g;	/* # of netmap rxq's for each 10G port */
 	uint16_t nnmtxq1g;	/* # of netmap txq's for each 1G port */
 	uint16_t nnmrxq1g;	/* # of netmap rxq's for each 1G port */
 #endif
 };
 
 struct filter_entry {
         uint32_t valid:1;	/* filter allocated and valid */
         uint32_t locked:1;	/* filter is administratively locked */
         uint32_t pending:1;	/* filter action is pending firmware reply */
 	uint32_t smtidx:8;	/* Source MAC Table index for smac */
 	struct l2t_entry *l2t;	/* Layer Two Table entry for dmac */
 
         struct t4_filter_specification fs;
 };
 
 static int map_bars_0_and_4(struct adapter *);
 static int map_bar_2(struct adapter *);
 static void setup_memwin(struct adapter *);
 static int validate_mem_range(struct adapter *, uint32_t, int);
 static int fwmtype_to_hwmtype(int);
 static int validate_mt_off_len(struct adapter *, int, uint32_t, int,
     uint32_t *);
 static void memwin_info(struct adapter *, int, uint32_t *, uint32_t *);
 static uint32_t position_memwin(struct adapter *, int, uint32_t);
 static int cfg_itype_and_nqueues(struct adapter *, int, int, int,
     struct intrs_and_queues *);
 static int prep_firmware(struct adapter *);
 static int partition_resources(struct adapter *, const struct firmware *,
     const char *);
 static int get_params__pre_init(struct adapter *);
 static int get_params__post_init(struct adapter *);
 static int set_params__post_init(struct adapter *);
 static void t4_set_desc(struct adapter *);
 static void build_medialist(struct port_info *, struct ifmedia *);
 static int cxgbe_init_synchronized(struct vi_info *);
 static int cxgbe_uninit_synchronized(struct vi_info *);
 static int setup_intr_handlers(struct adapter *);
 static void quiesce_txq(struct adapter *, struct sge_txq *);
 static void quiesce_wrq(struct adapter *, struct sge_wrq *);
 static void quiesce_iq(struct adapter *, struct sge_iq *);
 static void quiesce_fl(struct adapter *, struct sge_fl *);
 static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
     driver_intr_t *, void *, char *);
 static int t4_free_irq(struct adapter *, struct irq *);
 static void reg_block_dump(struct adapter *, uint8_t *, unsigned int,
     unsigned int);
 static void t4_get_regs(struct adapter *, struct t4_regdump *, uint8_t *);
 static void vi_refresh_stats(struct adapter *, struct vi_info *);
 static void cxgbe_refresh_stats(struct adapter *, struct port_info *);
 static void cxgbe_tick(void *);
 static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t);
 static int cpl_not_handled(struct sge_iq *, const struct rss_header *,
     struct mbuf *);
 static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *);
 static int fw_msg_not_handled(struct adapter *, const __be64 *);
 static void t4_sysctls(struct adapter *);
 static void cxgbe_sysctls(struct port_info *);
 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
 static int sysctl_bitfield(SYSCTL_HANDLER_ARGS);
 static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
 static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
 static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
 static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
 static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
 static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
 static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS);
 static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
 static int sysctl_temperature(SYSCTL_HANDLER_ARGS);
 #ifdef SBUF_DRAIN
 static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
 static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
 static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS);
 static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS);
 static int sysctl_meminfo(SYSCTL_HANDLER_ARGS);
 static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS);
 static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS);
 static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_tids(SYSCTL_HANDLER_ARGS);
 static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS);
 static int sysctl_tp_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
 static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
 static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
 #endif
 static uint32_t fconf_to_mode(uint32_t);
 static uint32_t mode_to_fconf(uint32_t);
 static uint32_t fspec_to_fconf(struct t4_filter_specification *);
 static int get_filter_mode(struct adapter *, uint32_t *);
 static int set_filter_mode(struct adapter *, uint32_t);
 static inline uint64_t get_filter_hits(struct adapter *, uint32_t);
 static int get_filter(struct adapter *, struct t4_filter *);
 static int set_filter(struct adapter *, struct t4_filter *);
 static int del_filter(struct adapter *, struct t4_filter *);
 static void clear_filter(struct filter_entry *);
 static int set_filter_wr(struct adapter *, int);
 static int del_filter_wr(struct adapter *, int);
 static int get_sge_context(struct adapter *, struct t4_sge_context *);
 static int load_fw(struct adapter *, struct t4_data *);
 static int read_card_mem(struct adapter *, int, struct t4_mem_range *);
 static int read_i2c(struct adapter *, struct t4_i2c_data *);
 static int set_sched_class(struct adapter *, struct t4_sched_params *);
 static int set_sched_queue(struct adapter *, struct t4_sched_queue *);
 #ifdef TCP_OFFLOAD
 static int toe_capability(struct vi_info *, int);
 #endif
 static int mod_event(module_t, int, void *);
 
 struct {
 	uint16_t device;
 	char *desc;
 } t4_pciids[] = {
 	{0xa000, "Chelsio Terminator 4 FPGA"},
 	{0x4400, "Chelsio T440-dbg"},
 	{0x4401, "Chelsio T420-CR"},
 	{0x4402, "Chelsio T422-CR"},
 	{0x4403, "Chelsio T440-CR"},
 	{0x4404, "Chelsio T420-BCH"},
 	{0x4405, "Chelsio T440-BCH"},
 	{0x4406, "Chelsio T440-CH"},
 	{0x4407, "Chelsio T420-SO"},
 	{0x4408, "Chelsio T420-CX"},
 	{0x4409, "Chelsio T420-BT"},
 	{0x440a, "Chelsio T404-BT"},
 	{0x440e, "Chelsio T440-LP-CR"},
 }, t5_pciids[] = {
 	{0xb000, "Chelsio Terminator 5 FPGA"},
 	{0x5400, "Chelsio T580-dbg"},
 	{0x5401,  "Chelsio T520-CR"},		/* 2 x 10G */
 	{0x5402,  "Chelsio T522-CR"},		/* 2 x 10G, 2 X 1G */
 	{0x5403,  "Chelsio T540-CR"},		/* 4 x 10G */
 	{0x5407,  "Chelsio T520-SO"},		/* 2 x 10G, nomem */
 	{0x5409,  "Chelsio T520-BT"},		/* 2 x 10GBaseT */
 	{0x540a,  "Chelsio T504-BT"},		/* 4 x 1G */
 	{0x540d,  "Chelsio T580-CR"},		/* 2 x 40G */
 	{0x540e,  "Chelsio T540-LP-CR"},	/* 4 x 10G */
 	{0x5410,  "Chelsio T580-LP-CR"},	/* 2 x 40G */
 	{0x5411,  "Chelsio T520-LL-CR"},	/* 2 x 10G */
 	{0x5412,  "Chelsio T560-CR"},		/* 1 x 40G, 2 x 10G */
 	{0x5414,  "Chelsio T580-LP-SO-CR"},	/* 2 x 40G, nomem */
 	{0x5415,  "Chelsio T502-BT"},		/* 2 x 1G */
 #ifdef notyet
 	{0x5404,  "Chelsio T520-BCH"},
 	{0x5405,  "Chelsio T540-BCH"},
 	{0x5406,  "Chelsio T540-CH"},
 	{0x5408,  "Chelsio T520-CX"},
 	{0x540b,  "Chelsio B520-SR"},
 	{0x540c,  "Chelsio B504-BT"},
 	{0x540f,  "Chelsio Amsterdam"},
 	{0x5413,  "Chelsio T580-CHR"},
 #endif
 };
 
 #ifdef TCP_OFFLOAD
 /*
  * service_iq() has an iq and needs the fl.  Offset of fl from the iq should be
  * exactly the same for both rxq and ofld_rxq.
  */
 CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq));
 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
 #endif
 
 /* No easy way to include t4_msg.h before adapter.h so we check this way */
 CTASSERT(nitems(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS);
 CTASSERT(nitems(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES);
 
 CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
 
 static int
 t4_probe(device_t dev)
 {
 	int i;
 	uint16_t v = pci_get_vendor(dev);
 	uint16_t d = pci_get_device(dev);
 	uint8_t f = pci_get_function(dev);
 
 	if (v != PCI_VENDOR_ID_CHELSIO)
 		return (ENXIO);
 
 	/* Attach only to PF0 of the FPGA */
 	if (d == 0xa000 && f != 0)
 		return (ENXIO);
 
 	for (i = 0; i < nitems(t4_pciids); i++) {
 		if (d == t4_pciids[i].device) {
 			device_set_desc(dev, t4_pciids[i].desc);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 
 	return (ENXIO);
 }
 
 static int
 t5_probe(device_t dev)
 {
 	int i;
 	uint16_t v = pci_get_vendor(dev);
 	uint16_t d = pci_get_device(dev);
 	uint8_t f = pci_get_function(dev);
 
 	if (v != PCI_VENDOR_ID_CHELSIO)
 		return (ENXIO);
 
 	/* Attach only to PF0 of the FPGA */
 	if (d == 0xb000 && f != 0)
 		return (ENXIO);
 
 	for (i = 0; i < nitems(t5_pciids); i++) {
 		if (d == t5_pciids[i].device) {
 			device_set_desc(dev, t5_pciids[i].desc);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 
 	return (ENXIO);
 }
 
 static void
 t5_attribute_workaround(device_t dev)
 {
 	device_t root_port;
 	uint32_t v;
 
 	/*
 	 * The T5 chips do not properly echo the No Snoop and Relaxed
 	 * Ordering attributes when replying to a TLP from a Root
 	 * Port.  As a workaround, find the parent Root Port and
 	 * disable No Snoop and Relaxed Ordering.  Note that this
 	 * affects all devices under this root port.
 	 */
 	root_port = pci_find_pcie_root_port(dev);
 	if (root_port == NULL) {
 		device_printf(dev, "Unable to find parent root port\n");
 		return;
 	}
 
 	v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL,
 	    PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2);
 	if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) !=
 	    0)
 		device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n",
 		    device_get_nameunit(root_port));
 }
 
 static int
 t4_attach(device_t dev)
 {
 	struct adapter *sc;
 	int rc = 0, i, j, n10g, n1g, rqidx, tqidx;
 	struct intrs_and_queues iaq;
 	struct sge *s;
 #ifdef TCP_OFFLOAD
 	int ofld_rqidx, ofld_tqidx;
 #endif
 #ifdef DEV_NETMAP
 	int nm_rqidx, nm_tqidx;
 #endif
 	int num_vis;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 	TUNABLE_INT_FETCH("hw.cxgbe.debug_flags", &sc->debug_flags);
 
 	if ((pci_get_device(dev) & 0xff00) == 0x5400)
 		t5_attribute_workaround(dev);
 	pci_enable_busmaster(dev);
 	if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) {
 		uint32_t v;
 
 		pci_set_max_read_req(dev, 4096);
 		v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2);
 		v |= PCIEM_CTL_RELAXED_ORD_ENABLE;
 		pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2);
 
 		sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5);
 	}
 
 	sc->traceq = -1;
 	mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF);
 	snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer",
 	    device_get_nameunit(dev));
 
 	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
 	    device_get_nameunit(dev));
 	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
 	sx_xlock(&t4_list_lock);
 	SLIST_INSERT_HEAD(&t4_list, sc, link);
 	sx_xunlock(&t4_list_lock);
 
 	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
 	TAILQ_INIT(&sc->sfl);
 	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
 
 	mtx_init(&sc->regwin_lock, "register and memory window", 0, MTX_DEF);
 
 	rc = map_bars_0_and_4(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	/*
 	 * This is the real PF# to which we're attaching.  Works from within PCI
 	 * passthrough environments too, where pci_get_function() could return a
 	 * different PF# depending on the passthrough configuration.  We need to
 	 * use the real PF# in all our communication with the firmware.
 	 */
 	sc->pf = G_SOURCEPF(t4_read_reg(sc, A_PL_WHOAMI));
 	sc->mbox = sc->pf;
 
 	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
 	sc->an_handler = an_not_handled;
 	for (i = 0; i < nitems(sc->cpl_handler); i++)
 		sc->cpl_handler[i] = cpl_not_handled;
 	for (i = 0; i < nitems(sc->fw_msg_handler); i++)
 		sc->fw_msg_handler[i] = fw_msg_not_handled;
 	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
 	t4_register_cpl_handler(sc, CPL_TRACE_PKT, t4_trace_pkt);
 	t4_register_cpl_handler(sc, CPL_TRACE_PKT_T5, t5_trace_pkt);
 	t4_init_sge_cpl_handlers(sc);
 
 	/* Prepare the adapter for operation */
 	rc = -t4_prep_adapter(sc);
 	if (rc != 0) {
 		device_printf(dev, "failed to prepare adapter: %d.\n", rc);
 		goto done;
 	}
 
 	/*
 	 * Do this really early, with the memory windows set up even before the
 	 * character device.  The userland tool's register i/o and mem read
 	 * will work even in "recovery mode".
 	 */
 	setup_memwin(sc);
 	sc->cdev = make_dev(is_t4(sc) ? &t4_cdevsw : &t5_cdevsw,
 	    device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "%s",
 	    device_get_nameunit(dev));
 	if (sc->cdev == NULL)
 		device_printf(dev, "failed to create nexus char device.\n");
 	else
 		sc->cdev->si_drv1 = sc;
 
 	/* Go no further if recovery mode has been requested. */
 	if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) {
 		device_printf(dev, "recovery mode.\n");
 		goto done;
 	}
 
 #if defined(__i386__)
 	if ((cpu_feature & CPUID_CX8) == 0) {
 		device_printf(dev, "64 bit atomics not available.\n");
 		rc = ENOTSUP;
 		goto done;
 	}
 #endif
 
 	/* Prepare the firmware for operation */
 	rc = prep_firmware(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	rc = get_params__post_init(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	rc = set_params__post_init(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	rc = map_bar_2(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	rc = t4_create_dma_tag(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	/*
 	 * Number of VIs to create per-port.  The first VI is the
 	 * "main" regular VI for the port.  The second VI is used for
 	 * netmap if present, and any remaining VIs are used for
 	 * additional virtual interfaces.
 	 *
 	 * Limit the number of VIs per port to the number of available
 	 * MAC addresses per port.
 	 */
 	if (t4_num_vis >= 1)
 		num_vis = t4_num_vis;
 	else
 		num_vis = 1;
 #ifdef DEV_NETMAP
 	num_vis++;
 #endif
 	if (num_vis > nitems(vi_mac_funcs)) {
 		num_vis = nitems(vi_mac_funcs);
 		device_printf(dev, "Number of VIs limited to %d\n", num_vis);
 	}
 
 	/*
 	 * First pass over all the ports - allocate VIs and initialize some
 	 * basic parameters like mac address, port type, etc.  We also figure
 	 * out whether a port is 10G or 1G and use that information when
 	 * calculating how many interrupts to attempt to allocate.
 	 */
 	n10g = n1g = 0;
 	for_each_port(sc, i) {
 		struct port_info *pi;
 		struct vi_info *vi;
 
 		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
 		sc->port[i] = pi;
 
 		/* These must be set before t4_port_init */
 		pi->adapter = sc;
 		pi->port_id = i;
 		pi->nvi = num_vis;
 		pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE,
 		    M_ZERO | M_WAITOK);
 
 		/*
 		 * Allocate the "main" VI and initialize parameters
 		 * like mac addr.
 		 */
 		rc = -t4_port_init(pi, sc->mbox, sc->pf, 0);
 		if (rc != 0) {
 			device_printf(dev, "unable to initialize port %d: %d\n",
 			    i, rc);
 			free(pi->vi, M_CXGBE);
 			free(pi, M_CXGBE);
 			sc->port[i] = NULL;
 			goto done;
 		}
 
 		pi->link_cfg.requested_fc &= ~(PAUSE_TX | PAUSE_RX);
 		pi->link_cfg.requested_fc |= t4_pause_settings;
 		pi->link_cfg.fc &= ~(PAUSE_TX | PAUSE_RX);
 		pi->link_cfg.fc |= t4_pause_settings;
 
 		rc = -t4_link_start(sc, sc->mbox, pi->tx_chan, &pi->link_cfg);
 		if (rc != 0) {
 			device_printf(dev, "port %d l1cfg failed: %d\n", i, rc);
 			free(pi->vi, M_CXGBE);
 			free(pi, M_CXGBE);
 			sc->port[i] = NULL;
 			goto done;
 		}
 
 		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
 		    device_get_nameunit(dev), i);
 		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
 		sc->chan_map[pi->tx_chan] = i;
 
 		if (is_10G_port(pi) || is_40G_port(pi)) {
 			n10g++;
 			for_each_vi(pi, j, vi) {
 				vi->tmr_idx = t4_tmr_idx_10g;
 				vi->pktc_idx = t4_pktc_idx_10g;
 			}
 		} else {
 			n1g++;
 			for_each_vi(pi, j, vi) {
 				vi->tmr_idx = t4_tmr_idx_1g;
 				vi->pktc_idx = t4_pktc_idx_1g;
 			}
 		}
 
 		pi->linkdnrc = -1;
 
 		for_each_vi(pi, j, vi) {
 			vi->qsize_rxq = t4_qsize_rxq;
 			vi->qsize_txq = t4_qsize_txq;
 			vi->pi = pi;
 		}
 
 		pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1);
 		if (pi->dev == NULL) {
 			device_printf(dev,
 			    "failed to add device for port %d.\n", i);
 			rc = ENXIO;
 			goto done;
 		}
 		pi->vi[0].dev = pi->dev;
 		device_set_softc(pi->dev, pi);
 	}
 
 	/*
 	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
 	 */
 #ifdef DEV_NETMAP
 	num_vis--;
 #endif
 	rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	sc->intr_type = iaq.intr_type;
 	sc->intr_count = iaq.nirq;
 
 	s = &sc->sge;
 	s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
 	s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
 	if (num_vis > 1) {
 		s->nrxq += (n10g + n1g) * (num_vis - 1);
 		s->ntxq += (n10g + n1g) * (num_vis - 1);
 	}
 	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
 	s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
 	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
 #ifdef TCP_OFFLOAD
 	if (is_offload(sc)) {
 		s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g;
 		s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g;
 		if (num_vis > 1) {
 			s->nofldrxq += (n10g + n1g) * (num_vis - 1);
 			s->nofldtxq += (n10g + n1g) * (num_vis - 1);
 		}
 		s->neq += s->nofldtxq + s->nofldrxq;
 		s->niq += s->nofldrxq;
 
 		s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq),
 		    M_CXGBE, M_ZERO | M_WAITOK);
 		s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq),
 		    M_CXGBE, M_ZERO | M_WAITOK);
 	}
 #endif
 #ifdef DEV_NETMAP
 	s->nnmrxq = n10g * iaq.nnmrxq10g + n1g * iaq.nnmrxq1g;
 	s->nnmtxq = n10g * iaq.nnmtxq10g + n1g * iaq.nnmtxq1g;
 	s->neq += s->nnmtxq + s->nnmrxq;
 	s->niq += s->nnmrxq;
 
 	s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq),
 	    M_CXGBE, M_ZERO | M_WAITOK);
 	s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq),
 	    M_CXGBE, M_ZERO | M_WAITOK);
 #endif
 
 	s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 	s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 	s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	t4_init_l2t(sc, M_WAITOK);
 
 	/*
 	 * Second pass over the ports.  This time we know the number of rx and
 	 * tx queues that each port should get.
 	 */
 	rqidx = tqidx = 0;
 #ifdef TCP_OFFLOAD
 	ofld_rqidx = ofld_tqidx = 0;
 #endif
 #ifdef DEV_NETMAP
 	nm_rqidx = nm_tqidx = 0;
 #endif
 	for_each_port(sc, i) {
 		struct port_info *pi = sc->port[i];
 		struct vi_info *vi;
 
 		if (pi == NULL)
 			continue;
 
 		for_each_vi(pi, j, vi) {
 #ifdef DEV_NETMAP
 			if (j == 1) {
 				vi->flags |= VI_NETMAP | INTR_RXQ;
 				vi->first_rxq = nm_rqidx;
 				vi->first_txq = nm_tqidx;
 				if (is_10G_port(pi) || is_40G_port(pi)) {
 					vi->nrxq = iaq.nnmrxq10g;
 					vi->ntxq = iaq.nnmtxq10g;
 				} else {
 					vi->nrxq = iaq.nnmrxq1g;
 					vi->ntxq = iaq.nnmtxq1g;
 				}
 				nm_rqidx += vi->nrxq;
 				nm_tqidx += vi->ntxq;
 				continue;
 			}
 #endif
 
 			vi->first_rxq = rqidx;
 			vi->first_txq = tqidx;
 			if (is_10G_port(pi) || is_40G_port(pi)) {
 				vi->flags |= iaq.intr_flags_10g & INTR_RXQ;
 				vi->nrxq = j == 0 ? iaq.nrxq10g : 1;
 				vi->ntxq = j == 0 ? iaq.ntxq10g : 1;
 			} else {
 				vi->flags |= iaq.intr_flags_1g & INTR_RXQ;
 				vi->nrxq = j == 0 ? iaq.nrxq1g : 1;
 				vi->ntxq = j == 0 ? iaq.ntxq1g : 1;
 			}
 
 			if (vi->ntxq > 1)
 				vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0;
 			else
 				vi->rsrv_noflowq = 0;
 
 			rqidx += vi->nrxq;
 			tqidx += vi->ntxq;
 
 #ifdef TCP_OFFLOAD
 			if (!is_offload(sc))
 				continue;
 			vi->first_ofld_rxq = ofld_rqidx;
 			vi->first_ofld_txq = ofld_tqidx;
 			if (is_10G_port(pi) || is_40G_port(pi)) {
 				vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ;
 				vi->nofldrxq = j == 0 ? iaq.nofldrxq10g : 1;
 				vi->nofldtxq = j == 0 ? iaq.nofldtxq10g : 1;
 			} else {
 				vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ;
 				vi->nofldrxq = j == 0 ? iaq.nofldrxq1g : 1;
 				vi->nofldtxq = j == 0 ? iaq.nofldtxq1g : 1;
 			}
 			ofld_rqidx += vi->nofldrxq;
 			ofld_tqidx += vi->nofldtxq;
 #endif
 		}
 	}
 
 	rc = setup_intr_handlers(sc);
 	if (rc != 0) {
 		device_printf(dev,
 		    "failed to setup interrupt handlers: %d\n", rc);
 		goto done;
 	}
 
 	rc = bus_generic_attach(dev);
 	if (rc != 0) {
 		device_printf(dev,
 		    "failed to attach all child ports: %d\n", rc);
 		goto done;
 	}
 
 	device_printf(dev,
 	    "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n",
 	    sc->params.pci.speed, sc->params.pci.width, sc->params.nports,
 	    sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" :
 	    (sc->intr_type == INTR_MSI ? "MSI" : "INTx"),
 	    sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq);
 
 	t4_set_desc(sc);
 
 done:
 	if (rc != 0 && sc->cdev) {
 		/* cdev was created and so cxgbetool works; recover that way. */
 		device_printf(dev,
 		    "error during attach, adapter is now in recovery mode.\n");
 		rc = 0;
 	}
 
 	if (rc != 0)
 		t4_detach(dev);
 	else
 		t4_sysctls(sc);
 
 	return (rc);
 }
 
 /*
  * Idempotent
  */
 static int
 t4_detach(device_t dev)
 {
 	struct adapter *sc;
 	struct port_info *pi;
 	int i, rc;
 
 	sc = device_get_softc(dev);
 
 	if (sc->flags & FULL_INIT_DONE)
 		t4_intr_disable(sc);
 
 	if (sc->cdev) {
 		destroy_dev(sc->cdev);
 		sc->cdev = NULL;
 	}
 
 	rc = bus_generic_detach(dev);
 	if (rc) {
 		device_printf(dev,
 		    "failed to detach child devices: %d\n", rc);
 		return (rc);
 	}
 
 	for (i = 0; i < sc->intr_count; i++)
 		t4_free_irq(sc, &sc->irq[i]);
 
 	for (i = 0; i < MAX_NPORTS; i++) {
 		pi = sc->port[i];
 		if (pi) {
 			t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid);
 			if (pi->dev)
 				device_delete_child(dev, pi->dev);
 
 			mtx_destroy(&pi->pi_lock);
 			free(pi->vi, M_CXGBE);
 			free(pi, M_CXGBE);
 		}
 	}
 
 	if (sc->flags & FULL_INIT_DONE)
 		adapter_full_uninit(sc);
 
 	if (sc->flags & FW_OK)
 		t4_fw_bye(sc, sc->mbox);
 
 	if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX)
 		pci_release_msi(dev);
 
 	if (sc->regs_res)
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid,
 		    sc->regs_res);
 
 	if (sc->udbs_res)
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid,
 		    sc->udbs_res);
 
 	if (sc->msix_res)
 		bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid,
 		    sc->msix_res);
 
 	if (sc->l2t)
 		t4_free_l2t(sc->l2t);
 
 #ifdef TCP_OFFLOAD
 	free(sc->sge.ofld_rxq, M_CXGBE);
 	free(sc->sge.ofld_txq, M_CXGBE);
 #endif
 #ifdef DEV_NETMAP
 	free(sc->sge.nm_rxq, M_CXGBE);
 	free(sc->sge.nm_txq, M_CXGBE);
 #endif
 	free(sc->irq, M_CXGBE);
 	free(sc->sge.rxq, M_CXGBE);
 	free(sc->sge.txq, M_CXGBE);
 	free(sc->sge.ctrlq, M_CXGBE);
 	free(sc->sge.iqmap, M_CXGBE);
 	free(sc->sge.eqmap, M_CXGBE);
 	free(sc->tids.ftid_tab, M_CXGBE);
 	t4_destroy_dma_tag(sc);
 	if (mtx_initialized(&sc->sc_lock)) {
 		sx_xlock(&t4_list_lock);
 		SLIST_REMOVE(&t4_list, sc, adapter, link);
 		sx_xunlock(&t4_list_lock);
 		mtx_destroy(&sc->sc_lock);
 	}
 
 	callout_drain(&sc->sfl_callout);
 	if (mtx_initialized(&sc->tids.ftid_lock))
 		mtx_destroy(&sc->tids.ftid_lock);
 	if (mtx_initialized(&sc->sfl_lock))
 		mtx_destroy(&sc->sfl_lock);
 	if (mtx_initialized(&sc->ifp_lock))
 		mtx_destroy(&sc->ifp_lock);
 	if (mtx_initialized(&sc->regwin_lock))
 		mtx_destroy(&sc->regwin_lock);
 
 	bzero(sc, sizeof(*sc));
 
 	return (0);
 }
 
 static int
 cxgbe_probe(device_t dev)
 {
 	char buf[128];
 	struct port_info *pi = device_get_softc(dev);
 
 	snprintf(buf, sizeof(buf), "port %d", pi->port_id);
 	device_set_desc_copy(dev, buf);
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
     IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
     IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS)
 #define T4_CAP_ENABLE (T4_CAP)
 
 static int
 cxgbe_vi_attach(device_t dev, struct vi_info *vi)
 {
 	struct ifnet *ifp;
 	struct sbuf *sb;
 
 	vi->xact_addr_filt = -1;
 	callout_init(&vi->tick, 1);
 
 	/* Allocate an ifnet and set it up */
 	ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "Cannot allocate ifnet\n");
 		return (ENOMEM);
 	}
 	vi->ifp = ifp;
 	ifp->if_softc = vi;
 
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 
 	ifp->if_init = cxgbe_init;
 	ifp->if_ioctl = cxgbe_ioctl;
 	ifp->if_transmit = cxgbe_transmit;
 	ifp->if_qflush = cxgbe_qflush;
 	ifp->if_get_counter = cxgbe_get_counter;
 
 	ifp->if_capabilities = T4_CAP;
 #ifdef TCP_OFFLOAD
 	if (vi->nofldrxq != 0)
 		ifp->if_capabilities |= IFCAP_TOE;
 #endif
 	ifp->if_capenable = T4_CAP_ENABLE;
 	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
 	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
 
 	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 	ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS;
 	ifp->if_hw_tsomaxsegsize = 65536;
 
 	/* Initialize ifmedia for this VI */
 	ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change,
 	    cxgbe_media_status);
 	build_medialist(vi->pi, &vi->media);
 
 	vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp,
 	    EVENTHANDLER_PRI_ANY);
 
 	ether_ifattach(ifp, vi->hw_addr);
 
 	sb = sbuf_new_auto();
 	sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq);
 #ifdef TCP_OFFLOAD
 	if (ifp->if_capabilities & IFCAP_TOE)
 		sbuf_printf(sb, "; %d txq, %d rxq (TOE)",
 		    vi->nofldtxq, vi->nofldrxq);
 #endif
 	sbuf_finish(sb);
 	device_printf(dev, "%s\n", sbuf_data(sb));
 	sbuf_delete(sb);
 
 	vi_sysctls(vi);
 
 	return (0);
 }
 
 static int
 cxgbe_attach(device_t dev)
 {
 	struct port_info *pi = device_get_softc(dev);
 	struct vi_info *vi;
 	int i, rc;
 
 	callout_init_mtx(&pi->tick, &pi->pi_lock, 0);
 
 	rc = cxgbe_vi_attach(dev, &pi->vi[0]);
 	if (rc)
 		return (rc);
 
 	for_each_vi(pi, i, vi) {
 		if (i == 0)
 			continue;
 #ifdef DEV_NETMAP
 		if (vi->flags & VI_NETMAP) {
 			/*
 			 * media handled here to keep
 			 * implementation private to this file
 			 */
 			ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change,
 			    cxgbe_media_status);
 			build_medialist(pi, &vi->media);
 			vi->dev = device_add_child(dev, is_t4(pi->adapter) ?
 			    "ncxgbe" : "ncxl", device_get_unit(dev));
 		} else
 #endif
 			vi->dev = device_add_child(dev, is_t4(pi->adapter) ?
 			    "vcxgbe" : "vcxl", -1);
 		if (vi->dev == NULL) {
 			device_printf(dev, "failed to add VI %d\n", i);
 			continue;
 		}
 		device_set_softc(vi->dev, vi);
 	}
 
 	cxgbe_sysctls(pi);
 
 	bus_generic_attach(dev);
 
 	return (0);
 }
 
 static void
 cxgbe_vi_detach(struct vi_info *vi)
 {
 	struct ifnet *ifp = vi->ifp;
 
 	ether_ifdetach(ifp);
 
 	if (vi->vlan_c)
 		EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c);
 
 	/* Let detach proceed even if these fail. */
 	cxgbe_uninit_synchronized(vi);
 	callout_drain(&vi->tick);
 	vi_full_uninit(vi);
 
 	ifmedia_removeall(&vi->media);
 	if_free(vi->ifp);
 	vi->ifp = NULL;
 }
 
 static int
 cxgbe_detach(device_t dev)
 {
 	struct port_info *pi = device_get_softc(dev);
 	struct adapter *sc = pi->adapter;
 	int rc;
 
 	/* Detach the extra VIs first. */
 	rc = bus_generic_detach(dev);
 	if (rc)
 		return (rc);
 	device_delete_children(dev);
 
 	doom_vi(sc, &pi->vi[0]);
 
 	if (pi->flags & HAS_TRACEQ) {
 		sc->traceq = -1;	/* cloner should not create ifnet */
 		t4_tracer_port_detach(sc);
 	}
 
 	cxgbe_vi_detach(&pi->vi[0]);
 	callout_drain(&pi->tick);
 
 	end_synchronized_op(sc, 0);
 
 	return (0);
 }
 
 static void
 cxgbe_init(void *arg)
 {
 	struct vi_info *vi = arg;
 	struct adapter *sc = vi->pi->adapter;
 
 	if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0)
 		return;
 	cxgbe_init_synchronized(vi);
 	end_synchronized_op(sc, 0);
 }
 
 static int
 cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
 {
 	int rc = 0, mtu, flags, can_sleep;
 	struct vi_info *vi = ifp->if_softc;
 	struct adapter *sc = vi->pi->adapter;
 	struct ifreq *ifr = (struct ifreq *)data;
 	uint32_t mask;
 
 	switch (cmd) {
 	case SIOCSIFMTU:
 		mtu = ifr->ifr_mtu;
 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
 			return (EINVAL);
 
 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu");
 		if (rc)
 			return (rc);
 		ifp->if_mtu = mtu;
 		if (vi->flags & VI_INIT_DONE) {
 			t4_update_fl_bufsize(ifp);
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				rc = update_mac_settings(ifp, XGMAC_MTU);
 		}
 		end_synchronized_op(sc, 0);
 		break;
 
 	case SIOCSIFFLAGS:
 		can_sleep = 0;
 redo_sifflags:
 		rc = begin_synchronized_op(sc, vi,
 		    can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg");
 		if (rc)
 			return (rc);
 
 		if (ifp->if_flags & IFF_UP) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				flags = vi->if_flags;
 				if ((ifp->if_flags ^ flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					if (can_sleep == 1) {
 						end_synchronized_op(sc, 0);
 						can_sleep = 0;
 						goto redo_sifflags;
 					}
 					rc = update_mac_settings(ifp,
 					    XGMAC_PROMISC | XGMAC_ALLMULTI);
 				}
 			} else {
 				if (can_sleep == 0) {
 					end_synchronized_op(sc, LOCK_HELD);
 					can_sleep = 1;
 					goto redo_sifflags;
 				}
 				rc = cxgbe_init_synchronized(vi);
 			}
 			vi->if_flags = ifp->if_flags;
 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			if (can_sleep == 0) {
 				end_synchronized_op(sc, LOCK_HELD);
 				can_sleep = 1;
 				goto redo_sifflags;
 			}
 			rc = cxgbe_uninit_synchronized(vi);
 		}
 		end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI: /* these two are called with a mutex held :-( */
 		rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi");
 		if (rc)
 			return (rc);
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			rc = update_mac_settings(ifp, XGMAC_MCADDRS);
 		end_synchronized_op(sc, LOCK_HELD);
 		break;
 
 	case SIOCSIFCAP:
 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap");
 		if (rc)
 			return (rc);
 
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (mask & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
 
 			if (IFCAP_TSO4 & ifp->if_capenable &&
 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
 				ifp->if_capenable &= ~IFCAP_TSO4;
 				if_printf(ifp,
 				    "tso4 disabled due to -txcsum.\n");
 			}
 		}
 		if (mask & IFCAP_TXCSUM_IPV6) {
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
 
 			if (IFCAP_TSO6 & ifp->if_capenable &&
 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
 				ifp->if_capenable &= ~IFCAP_TSO6;
 				if_printf(ifp,
 				    "tso6 disabled due to -txcsum6.\n");
 			}
 		}
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		if (mask & IFCAP_RXCSUM_IPV6)
 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 
 		/*
 		 * Note that we leave CSUM_TSO alone (it is always set).  The
 		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
 		 * sending a TSO request our way, so it's sufficient to toggle
 		 * IFCAP_TSOx only.
 		 */
 		if (mask & IFCAP_TSO4) {
 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
 				if_printf(ifp, "enable txcsum first.\n");
 				rc = EAGAIN;
 				goto fail;
 			}
 			ifp->if_capenable ^= IFCAP_TSO4;
 		}
 		if (mask & IFCAP_TSO6) {
 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
 				if_printf(ifp, "enable txcsum6 first.\n");
 				rc = EAGAIN;
 				goto fail;
 			}
 			ifp->if_capenable ^= IFCAP_TSO6;
 		}
 		if (mask & IFCAP_LRO) {
 #if defined(INET) || defined(INET6)
 			int i;
 			struct sge_rxq *rxq;
 
 			ifp->if_capenable ^= IFCAP_LRO;
 			for_each_rxq(vi, i, rxq) {
 				if (ifp->if_capenable & IFCAP_LRO)
 					rxq->iq.flags |= IQ_LRO_ENABLED;
 				else
 					rxq->iq.flags &= ~IQ_LRO_ENABLED;
 			}
 #endif
 		}
 #ifdef TCP_OFFLOAD
 		if (mask & IFCAP_TOE) {
 			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE;
 
 			rc = toe_capability(vi, enable);
 			if (rc != 0)
 				goto fail;
 
 			ifp->if_capenable ^= mask;
 		}
 #endif
 		if (mask & IFCAP_VLAN_HWTAGGING) {
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				rc = update_mac_settings(ifp, XGMAC_VLANEX);
 		}
 		if (mask & IFCAP_VLAN_MTU) {
 			ifp->if_capenable ^= IFCAP_VLAN_MTU;
 
 			/* Need to find out how to disable auto-mtu-inflation */
 		}
 		if (mask & IFCAP_VLAN_HWTSO)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 		if (mask & IFCAP_VLAN_HWCSUM)
 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
 
 #ifdef VLAN_CAPABILITIES
 		VLAN_CAPABILITIES(ifp);
 #endif
 fail:
 		end_synchronized_op(sc, 0);
 		break;
 
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		ifmedia_ioctl(ifp, ifr, &vi->media, cmd);
 		break;
 
 	case SIOCGI2C: {
 		struct ifi2creq i2c;
 
 		rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
 		if (rc != 0)
 			break;
 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
 			rc = EPERM;
 			break;
 		}
 		if (i2c.len > sizeof(i2c.data)) {
 			rc = EINVAL;
 			break;
 		}
 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c");
 		if (rc)
 			return (rc);
 		rc = -t4_i2c_rd(sc, sc->mbox, vi->pi->port_id, i2c.dev_addr,
 		    i2c.offset, i2c.len, &i2c.data[0]);
 		end_synchronized_op(sc, 0);
 		if (rc == 0)
 			rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
 		break;
 	}
 
 	default:
 		rc = ether_ioctl(ifp, cmd, data);
 	}
 
 	return (rc);
 }
 
 static int
 cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct sge_txq *txq;
 	void *items[1];
 	int rc;
 
 	M_ASSERTPKTHDR(m);
 	MPASS(m->m_nextpkt == NULL);	/* not quite ready for this yet */
 
 	if (__predict_false(pi->link_cfg.link_ok == 0)) {
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	rc = parse_pkt(&m);
 	if (__predict_false(rc != 0)) {
 		MPASS(m == NULL);			/* was freed already */
 		atomic_add_int(&pi->tx_parse_error, 1);	/* rare, atomic is ok */
 		return (rc);
 	}
 
 	/* Select a txq. */
 	txq = &sc->sge.txq[vi->first_txq];
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
 		    vi->rsrv_noflowq);
 
 	items[0] = m;
 	rc = mp_ring_enqueue(txq->r, items, 1, 4096);
 	if (__predict_false(rc != 0))
 		m_freem(m);
 
 	return (rc);
 }
 
 static void
 cxgbe_qflush(struct ifnet *ifp)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct sge_txq *txq;
 	int i;
 
 	/* queues do not exist if !VI_INIT_DONE. */
 	if (vi->flags & VI_INIT_DONE) {
 		for_each_txq(vi, i, txq) {
 			TXQ_LOCK(txq);
 			txq->eq.flags &= ~EQ_ENABLED;
 			TXQ_UNLOCK(txq);
 			while (!mp_ring_is_idle(txq->r)) {
 				mp_ring_check_drainage(txq->r, 0);
 				pause("qflush", 1);
 			}
 		}
 	}
 	if_qflush(ifp);
 }
 
 static uint64_t
 vi_get_counter(struct ifnet *ifp, ift_counter c)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct fw_vi_stats_vf *s = &vi->stats;
 
 	vi_refresh_stats(vi->pi->adapter, vi);
 
 	switch (c) {
 	case IFCOUNTER_IPACKETS:
 		return (s->rx_bcast_frames + s->rx_mcast_frames +
 		    s->rx_ucast_frames);
 	case IFCOUNTER_IERRORS:
 		return (s->rx_err_frames);
 	case IFCOUNTER_OPACKETS:
 		return (s->tx_bcast_frames + s->tx_mcast_frames +
 		    s->tx_ucast_frames + s->tx_offload_frames);
 	case IFCOUNTER_OERRORS:
 		return (s->tx_drop_frames);
 	case IFCOUNTER_IBYTES:
 		return (s->rx_bcast_bytes + s->rx_mcast_bytes +
 		    s->rx_ucast_bytes);
 	case IFCOUNTER_OBYTES:
 		return (s->tx_bcast_bytes + s->tx_mcast_bytes +
 		    s->tx_ucast_bytes + s->tx_offload_bytes);
 	case IFCOUNTER_IMCASTS:
 		return (s->rx_mcast_frames);
 	case IFCOUNTER_OMCASTS:
 		return (s->tx_mcast_frames);
 	case IFCOUNTER_OQDROPS: {
 		uint64_t drops;
 
 		drops = 0;
 		if ((vi->flags & (VI_INIT_DONE | VI_NETMAP)) == VI_INIT_DONE) {
 			int i;
 			struct sge_txq *txq;
 
 			for_each_txq(vi, i, txq)
 				drops += counter_u64_fetch(txq->r->drops);
 		}
 
 		return (drops);
 
 	}
 
 	default:
 		return (if_get_counter_default(ifp, c));
 	}
 }
 
 uint64_t
 cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct port_stats *s = &pi->stats;
 
 	if (pi->nvi > 1)
 		return (vi_get_counter(ifp, c));
 
 	cxgbe_refresh_stats(sc, pi);
 
 	switch (c) {
 	case IFCOUNTER_IPACKETS:
 		return (s->rx_frames - s->rx_pause);
 
 	case IFCOUNTER_IERRORS:
 		return (s->rx_jabber + s->rx_runt + s->rx_too_long +
 		    s->rx_fcs_err + s->rx_len_err);
 
 	case IFCOUNTER_OPACKETS:
 		return (s->tx_frames - s->tx_pause);
 
 	case IFCOUNTER_OERRORS:
 		return (s->tx_error_frames);
 
 	case IFCOUNTER_IBYTES:
 		return (s->rx_octets - s->rx_pause * 64);
 
 	case IFCOUNTER_OBYTES:
 		return (s->tx_octets - s->tx_pause * 64);
 
 	case IFCOUNTER_IMCASTS:
 		return (s->rx_mcast_frames - s->rx_pause);
 
 	case IFCOUNTER_OMCASTS:
 		return (s->tx_mcast_frames - s->tx_pause);
 
 	case IFCOUNTER_IQDROPS:
 		return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
 		    s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
 		    s->rx_trunc3 + pi->tnl_cong_drops);
 
 	case IFCOUNTER_OQDROPS: {
 		uint64_t drops;
 
 		drops = s->tx_drop;
 		if (vi->flags & VI_INIT_DONE) {
 			int i;
 			struct sge_txq *txq;
 
 			for_each_txq(vi, i, txq)
 				drops += counter_u64_fetch(txq->r->drops);
 		}
 
 		return (drops);
 
 	}
 
 	default:
 		return (if_get_counter_default(ifp, c));
 	}
 }
 
 static int
 cxgbe_media_change(struct ifnet *ifp)
 {
 	struct vi_info *vi = ifp->if_softc;
 
 	device_printf(vi->dev, "%s unimplemented.\n", __func__);
 
 	return (EOPNOTSUPP);
 }
 
 static void
 cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
 	struct ifmedia_entry *cur;
 	int speed = pi->link_cfg.speed;
 
 	cur = vi->media.ifm_cur;
 
 	ifmr->ifm_status = IFM_AVALID;
 	if (!pi->link_cfg.link_ok)
 		return;
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 
 	/* active and current will differ iff current media is autoselect. */
 	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
 		return;
 
 	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
 	if (speed == SPEED_10000)
 		ifmr->ifm_active |= IFM_10G_T;
 	else if (speed == SPEED_1000)
 		ifmr->ifm_active |= IFM_1000_T;
 	else if (speed == SPEED_100)
 		ifmr->ifm_active |= IFM_100_TX;
 	else if (speed == SPEED_10)
 		ifmr->ifm_active |= IFM_10_T;
 	else
 		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
 			    speed));
 }
 
 static int
 vcxgbe_probe(device_t dev)
 {
 	char buf[128];
 	struct vi_info *vi = device_get_softc(dev);
 
 	snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id,
 	    vi - vi->pi->vi);
 	device_set_desc_copy(dev, buf);
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 vcxgbe_attach(device_t dev)
 {
 	struct vi_info *vi;
 	struct port_info *pi;
 	struct adapter *sc;
 	int func, index, rc;
 	u32 param, val;
 
 	vi = device_get_softc(dev);
 	pi = vi->pi;
 	sc = pi->adapter;
 
 	index = vi - pi->vi;
 	KASSERT(index < nitems(vi_mac_funcs),
 	    ("%s: VI %s doesn't have a MAC func", __func__,
 	    device_get_nameunit(dev)));
 	func = vi_mac_funcs[index];
 	rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
 	    vi->hw_addr, &vi->rss_size, func, 0);
 	if (rc < 0) {
 		device_printf(dev, "Failed to allocate virtual interface "
 		    "for port %d: %d\n", pi->port_id, -rc);
 		return (-rc);
 	}
 	vi->viid = rc;
 
 	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) |
 	    V_FW_PARAMS_PARAM_YZ(vi->viid);
 	rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
 	if (rc)
 		vi->rss_base = 0xffff;
 	else {
 		/* MPASS((val >> 16) == rss_size); */
 		vi->rss_base = val & 0xffff;
 	}
 
 	rc = cxgbe_vi_attach(dev, vi);
 	if (rc) {
 		t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
 		return (rc);
 	}
 	return (0);
 }
 
 static int
 vcxgbe_detach(device_t dev)
 {
 	struct vi_info *vi;
 	struct adapter *sc;
 
 	vi = device_get_softc(dev);
 	sc = vi->pi->adapter;
 
 	doom_vi(sc, vi);
 
 	cxgbe_vi_detach(vi);
 	t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
 
 	end_synchronized_op(sc, 0);
 
 	return (0);
 }
 
 void
 t4_fatal_err(struct adapter *sc)
 {
 	t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0);
 	t4_intr_disable(sc);
 	log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n",
 	    device_get_nameunit(sc->dev));
 }
 
 static int
 map_bars_0_and_4(struct adapter *sc)
 {
 	sc->regs_rid = PCIR_BAR(0);
 	sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
 	    &sc->regs_rid, RF_ACTIVE);
 	if (sc->regs_res == NULL) {
 		device_printf(sc->dev, "cannot map registers.\n");
 		return (ENXIO);
 	}
 	sc->bt = rman_get_bustag(sc->regs_res);
 	sc->bh = rman_get_bushandle(sc->regs_res);
 	sc->mmio_len = rman_get_size(sc->regs_res);
 	setbit(&sc->doorbells, DOORBELL_KDB);
 
 	sc->msix_rid = PCIR_BAR(4);
 	sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
 	    &sc->msix_rid, RF_ACTIVE);
 	if (sc->msix_res == NULL) {
 		device_printf(sc->dev, "cannot map MSI-X BAR.\n");
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 static int
 map_bar_2(struct adapter *sc)
 {
 
 	/*
 	 * T4: only iWARP driver uses the userspace doorbells.  There is no need
 	 * to map it if RDMA is disabled.
 	 */
 	if (is_t4(sc) && sc->rdmacaps == 0)
 		return (0);
 
 	sc->udbs_rid = PCIR_BAR(2);
 	sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY,
 	    &sc->udbs_rid, RF_ACTIVE);
 	if (sc->udbs_res == NULL) {
 		device_printf(sc->dev, "cannot map doorbell BAR.\n");
 		return (ENXIO);
 	}
 	sc->udbs_base = rman_get_virtual(sc->udbs_res);
 
 	if (is_t5(sc)) {
 		setbit(&sc->doorbells, DOORBELL_UDB);
 #if defined(__i386__) || defined(__amd64__)
 		if (t5_write_combine) {
 			int rc;
 
 			/*
 			 * Enable write combining on BAR2.  This is the
 			 * userspace doorbell BAR and is split into 128B
 			 * (UDBS_SEG_SIZE) doorbell regions, each associated
 			 * with an egress queue.  The first 64B has the doorbell
 			 * and the second 64B can be used to submit a tx work
 			 * request with an implicit doorbell.
 			 */
 
 			rc = pmap_change_attr((vm_offset_t)sc->udbs_base,
 			    rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING);
 			if (rc == 0) {
 				clrbit(&sc->doorbells, DOORBELL_UDB);
 				setbit(&sc->doorbells, DOORBELL_WCWR);
 				setbit(&sc->doorbells, DOORBELL_UDBWC);
 			} else {
 				device_printf(sc->dev,
 				    "couldn't enable write combining: %d\n",
 				    rc);
 			}
 
 			t4_write_reg(sc, A_SGE_STAT_CFG,
 			    V_STATSOURCE_T5(7) | V_STATMODE(0));
 		}
 #endif
 	}
 
 	return (0);
 }
 
 static const struct memwin t4_memwin[] = {
 	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
 	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
 	{ MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 }
 };
 
 static const struct memwin t5_memwin[] = {
 	{ MEMWIN0_BASE, MEMWIN0_APERTURE },
 	{ MEMWIN1_BASE, MEMWIN1_APERTURE },
 	{ MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 },
 };
 
 static void
 setup_memwin(struct adapter *sc)
 {
 	const struct memwin *mw;
 	int i, n;
 	uint32_t bar0;
 
 	if (is_t4(sc)) {
 		/*
 		 * Read low 32b of bar0 indirectly via the hardware backdoor
 		 * mechanism.  Works from within PCI passthrough environments
 		 * too, where rman_get_start() can return a different value.  We
 		 * need to program the T4 memory window decoders with the actual
 		 * addresses that will be coming across the PCIe link.
 		 */
 		bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0));
 		bar0 &= (uint32_t) PCIM_BAR_MEM_BASE;
 
 		mw = &t4_memwin[0];
 		n = nitems(t4_memwin);
 	} else {
 		/* T5 uses the relative offset inside the PCIe BAR */
 		bar0 = 0;
 
 		mw = &t5_memwin[0];
 		n = nitems(t5_memwin);
 	}
 
 	for (i = 0; i < n; i++, mw++) {
 		t4_write_reg(sc,
 		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i),
 		    (mw->base + bar0) | V_BIR(0) |
 		    V_WINDOW(ilog2(mw->aperture) - 10));
 	}
 
 	/* flush */
 	t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2));
 }
 
 /*
  * Verify that the memory range specified by the addr/len pair is valid and lies
  * entirely within a single region (EDCx or MCx).
  */
 static int
 validate_mem_range(struct adapter *sc, uint32_t addr, int len)
 {
 	uint32_t em, addr_len, maddr, mlen;
 
 	/* Memory can only be accessed in naturally aligned 4 byte units */
 	if (addr & 3 || len & 3 || len == 0)
 		return (EINVAL);
 
 	/* Enabled memories */
 	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
 	if (em & F_EDRAM0_ENABLE) {
 		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
 		maddr = G_EDRAM0_BASE(addr_len) << 20;
 		mlen = G_EDRAM0_SIZE(addr_len) << 20;
 		if (mlen > 0 && addr >= maddr && addr < maddr + mlen &&
 		    addr + len <= maddr + mlen)
 			return (0);
 	}
 	if (em & F_EDRAM1_ENABLE) {
 		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
 		maddr = G_EDRAM1_BASE(addr_len) << 20;
 		mlen = G_EDRAM1_SIZE(addr_len) << 20;
 		if (mlen > 0 && addr >= maddr && addr < maddr + mlen &&
 		    addr + len <= maddr + mlen)
 			return (0);
 	}
 	if (em & F_EXT_MEM_ENABLE) {
 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
 		maddr = G_EXT_MEM_BASE(addr_len) << 20;
 		mlen = G_EXT_MEM_SIZE(addr_len) << 20;
 		if (mlen > 0 && addr >= maddr && addr < maddr + mlen &&
 		    addr + len <= maddr + mlen)
 			return (0);
 	}
 	if (!is_t4(sc) && em & F_EXT_MEM1_ENABLE) {
 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
 		maddr = G_EXT_MEM1_BASE(addr_len) << 20;
 		mlen = G_EXT_MEM1_SIZE(addr_len) << 20;
 		if (mlen > 0 && addr >= maddr && addr < maddr + mlen &&
 		    addr + len <= maddr + mlen)
 			return (0);
 	}
 
 	return (EFAULT);
 }
 
 static int
 fwmtype_to_hwmtype(int mtype)
 {
 
 	switch (mtype) {
 	case FW_MEMTYPE_EDC0:
 		return (MEM_EDC0);
 	case FW_MEMTYPE_EDC1:
 		return (MEM_EDC1);
 	case FW_MEMTYPE_EXTMEM:
 		return (MEM_MC0);
 	case FW_MEMTYPE_EXTMEM1:
 		return (MEM_MC1);
 	default:
 		panic("%s: cannot translate fw mtype %d.", __func__, mtype);
 	}
 }
 
 /*
  * Verify that the memory range specified by the memtype/offset/len pair is
  * valid and lies entirely within the memtype specified.  The global address of
  * the start of the range is returned in addr.
  */
 static int
 validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len,
     uint32_t *addr)
 {
 	uint32_t em, addr_len, maddr, mlen;
 
 	/* Memory can only be accessed in naturally aligned 4 byte units */
 	if (off & 3 || len & 3 || len == 0)
 		return (EINVAL);
 
 	em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
 	switch (fwmtype_to_hwmtype(mtype)) {
 	case MEM_EDC0:
 		if (!(em & F_EDRAM0_ENABLE))
 			return (EINVAL);
 		addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR);
 		maddr = G_EDRAM0_BASE(addr_len) << 20;
 		mlen = G_EDRAM0_SIZE(addr_len) << 20;
 		break;
 	case MEM_EDC1:
 		if (!(em & F_EDRAM1_ENABLE))
 			return (EINVAL);
 		addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR);
 		maddr = G_EDRAM1_BASE(addr_len) << 20;
 		mlen = G_EDRAM1_SIZE(addr_len) << 20;
 		break;
 	case MEM_MC:
 		if (!(em & F_EXT_MEM_ENABLE))
 			return (EINVAL);
 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
 		maddr = G_EXT_MEM_BASE(addr_len) << 20;
 		mlen = G_EXT_MEM_SIZE(addr_len) << 20;
 		break;
 	case MEM_MC1:
 		if (is_t4(sc) || !(em & F_EXT_MEM1_ENABLE))
 			return (EINVAL);
 		addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
 		maddr = G_EXT_MEM1_BASE(addr_len) << 20;
 		mlen = G_EXT_MEM1_SIZE(addr_len) << 20;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	if (mlen > 0 && off < mlen && off + len <= mlen) {
 		*addr = maddr + off;	/* global address */
 		return (0);
 	}
 
 	return (EFAULT);
 }
 
 static void
 memwin_info(struct adapter *sc, int win, uint32_t *base, uint32_t *aperture)
 {
 	const struct memwin *mw;
 
 	if (is_t4(sc)) {
 		KASSERT(win >= 0 && win < nitems(t4_memwin),
 		    ("%s: incorrect memwin# (%d)", __func__, win));
 		mw = &t4_memwin[win];
 	} else {
 		KASSERT(win >= 0 && win < nitems(t5_memwin),
 		    ("%s: incorrect memwin# (%d)", __func__, win));
 		mw = &t5_memwin[win];
 	}
 
 	if (base != NULL)
 		*base = mw->base;
 	if (aperture != NULL)
 		*aperture = mw->aperture;
 }
 
 /*
  * Positions the memory window such that it can be used to access the specified
  * address in the chip's address space.  The return value is the offset of addr
  * from the start of the window.
  */
 static uint32_t
 position_memwin(struct adapter *sc, int n, uint32_t addr)
 {
 	uint32_t start, pf;
 	uint32_t reg;
 
 	KASSERT(n >= 0 && n <= 3,
 	    ("%s: invalid window %d.", __func__, n));
 	KASSERT((addr & 3) == 0,
 	    ("%s: addr (0x%x) is not at a 4B boundary.", __func__, addr));
 
 	if (is_t4(sc)) {
 		pf = 0;
 		start = addr & ~0xf;	/* start must be 16B aligned */
 	} else {
 		pf = V_PFNUM(sc->pf);
 		start = addr & ~0x7f;	/* start must be 128B aligned */
 	}
 	reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, n);
 
 	t4_write_reg(sc, reg, start | pf);
 	t4_read_reg(sc, reg);
 
 	return (addr - start);
 }
 
 static int
 cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, int num_vis,
     struct intrs_and_queues *iaq)
 {
 	int rc, itype, navail, nrxq10g, nrxq1g, n;
 	int nofldrxq10g = 0, nofldrxq1g = 0;
 	int nnmrxq10g = 0, nnmrxq1g = 0;
 
 	bzero(iaq, sizeof(*iaq));
 
 	iaq->ntxq10g = t4_ntxq10g;
 	iaq->ntxq1g = t4_ntxq1g;
 	iaq->nrxq10g = nrxq10g = t4_nrxq10g;
 	iaq->nrxq1g = nrxq1g = t4_nrxq1g;
 	iaq->rsrv_noflowq = t4_rsrv_noflowq;
 #ifdef TCP_OFFLOAD
 	if (is_offload(sc)) {
 		iaq->nofldtxq10g = t4_nofldtxq10g;
 		iaq->nofldtxq1g = t4_nofldtxq1g;
 		iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g;
 		iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g;
 	}
 #endif
 #ifdef DEV_NETMAP
 	iaq->nnmtxq10g = t4_nnmtxq10g;
 	iaq->nnmtxq1g = t4_nnmtxq1g;
 	iaq->nnmrxq10g = nnmrxq10g = t4_nnmrxq10g;
 	iaq->nnmrxq1g = nnmrxq1g = t4_nnmrxq1g;
 #endif
 
 	for (itype = INTR_MSIX; itype; itype >>= 1) {
 
 		if ((itype & t4_intr_types) == 0)
 			continue;	/* not allowed */
 
 		if (itype == INTR_MSIX)
 			navail = pci_msix_count(sc->dev);
 		else if (itype == INTR_MSI)
 			navail = pci_msi_count(sc->dev);
 		else
 			navail = 1;
 restart:
 		if (navail == 0)
 			continue;
 
 		iaq->intr_type = itype;
 		iaq->intr_flags_10g = 0;
 		iaq->intr_flags_1g = 0;
 
 		/*
 		 * Best option: an interrupt vector for errors, one for the
 		 * firmware event queue, and one for every rxq (NIC, TOE, and
 		 * netmap).
 		 */
 		iaq->nirq = T4_EXTRA_INTR;
 		iaq->nirq += n10g * (nrxq10g + nofldrxq10g + nnmrxq10g);
 		iaq->nirq += n10g * 2 * (num_vis - 1);
 		iaq->nirq += n1g * (nrxq1g + nofldrxq1g + nnmrxq1g);
 		iaq->nirq += n1g * 2 * (num_vis - 1);
 		if (iaq->nirq <= navail &&
 		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
 			iaq->intr_flags_10g = INTR_ALL;
 			iaq->intr_flags_1g = INTR_ALL;
 			goto allocate;
 		}
 
 		/*
 		 * Second best option: a vector for errors, one for the firmware
 		 * event queue, and vectors for either all the NIC rx queues or
 		 * all the TOE rx queues.  The queues that don't get vectors
 		 * will forward their interrupts to those that do.
 		 *
 		 * Note: netmap rx queues cannot be created early and so they
 		 * can't be setup to receive forwarded interrupts for others.
 		 */
 		iaq->nirq = T4_EXTRA_INTR;
 		if (nrxq10g >= nofldrxq10g) {
 			iaq->intr_flags_10g = INTR_RXQ;
 			iaq->nirq += n10g * nrxq10g;
 			iaq->nirq += n10g * (num_vis - 1);
 #ifdef DEV_NETMAP
 			iaq->nnmrxq10g = min(nnmrxq10g, nrxq10g);
 #endif
 		} else {
 			iaq->intr_flags_10g = INTR_OFLD_RXQ;
 			iaq->nirq += n10g * nofldrxq10g;
 #ifdef DEV_NETMAP
 			iaq->nnmrxq10g = min(nnmrxq10g, nofldrxq10g);
 #endif
 		}
 		if (nrxq1g >= nofldrxq1g) {
 			iaq->intr_flags_1g = INTR_RXQ;
 			iaq->nirq += n1g * nrxq1g;
 			iaq->nirq += n1g * (num_vis - 1);
 #ifdef DEV_NETMAP
 			iaq->nnmrxq1g = min(nnmrxq1g, nrxq1g);
 #endif
 		} else {
 			iaq->intr_flags_1g = INTR_OFLD_RXQ;
 			iaq->nirq += n1g * nofldrxq1g;
 #ifdef DEV_NETMAP
 			iaq->nnmrxq1g = min(nnmrxq1g, nofldrxq1g);
 #endif
 		}
 		if (iaq->nirq <= navail &&
 		    (itype != INTR_MSI || powerof2(iaq->nirq)))
 			goto allocate;
 
 		/*
 		 * Next best option: an interrupt vector for errors, one for the
 		 * firmware event queue, and at least one per VI.  At this
 		 * point we know we'll have to downsize nrxq and/or nofldrxq
 		 * and/or nnmrxq to fit what's available to us.
 		 */
 		iaq->nirq = T4_EXTRA_INTR;
 		iaq->nirq += (n10g + n1g) * num_vis;
 		if (iaq->nirq <= navail) {
 			int leftover = navail - iaq->nirq;
 
 			if (n10g > 0) {
 				int target = max(nrxq10g, nofldrxq10g);
 
 				iaq->intr_flags_10g = nrxq10g >= nofldrxq10g ?
 				    INTR_RXQ : INTR_OFLD_RXQ;
 
 				n = 1;
 				while (n < target && leftover >= n10g) {
 					leftover -= n10g;
 					iaq->nirq += n10g;
 					n++;
 				}
 				iaq->nrxq10g = min(n, nrxq10g);
 #ifdef TCP_OFFLOAD
 				iaq->nofldrxq10g = min(n, nofldrxq10g);
 #endif
 #ifdef DEV_NETMAP
 				iaq->nnmrxq10g = min(n, nnmrxq10g);
 #endif
 			}
 
 			if (n1g > 0) {
 				int target = max(nrxq1g, nofldrxq1g);
 
 				iaq->intr_flags_1g = nrxq1g >= nofldrxq1g ?
 				    INTR_RXQ : INTR_OFLD_RXQ;
 
 				n = 1;
 				while (n < target && leftover >= n1g) {
 					leftover -= n1g;
 					iaq->nirq += n1g;
 					n++;
 				}
 				iaq->nrxq1g = min(n, nrxq1g);
 #ifdef TCP_OFFLOAD
 				iaq->nofldrxq1g = min(n, nofldrxq1g);
 #endif
 #ifdef DEV_NETMAP
 				iaq->nnmrxq1g = min(n, nnmrxq1g);
 #endif
 			}
 
 			if (itype != INTR_MSI || powerof2(iaq->nirq))
 				goto allocate;
 		}
 
 		/*
 		 * Least desirable option: one interrupt vector for everything.
 		 */
 		iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1;
 		iaq->intr_flags_10g = iaq->intr_flags_1g = 0;
 #ifdef TCP_OFFLOAD
 		if (is_offload(sc))
 			iaq->nofldrxq10g = iaq->nofldrxq1g = 1;
 #endif
 #ifdef DEV_NETMAP
 		iaq->nnmrxq10g = iaq->nnmrxq1g = 1;
 #endif
 
 allocate:
 		navail = iaq->nirq;
 		rc = 0;
 		if (itype == INTR_MSIX)
 			rc = pci_alloc_msix(sc->dev, &navail);
 		else if (itype == INTR_MSI)
 			rc = pci_alloc_msi(sc->dev, &navail);
 
 		if (rc == 0) {
 			if (navail == iaq->nirq)
 				return (0);
 
 			/*
 			 * Didn't get the number requested.  Use whatever number
 			 * the kernel is willing to allocate (it's in navail).
 			 */
 			device_printf(sc->dev, "fewer vectors than requested, "
 			    "type=%d, req=%d, rcvd=%d; will downshift req.\n",
 			    itype, iaq->nirq, navail);
 			pci_release_msi(sc->dev);
 			goto restart;
 		}
 
 		device_printf(sc->dev,
 		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
 		    itype, rc, iaq->nirq, navail);
 	}
 
 	device_printf(sc->dev,
 	    "failed to find a usable interrupt type.  "
 	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
 	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
 
 	return (ENXIO);
 }
 
 #define FW_VERSION(chip) ( \
     V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \
     V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \
     V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \
     V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD))
 #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf)
 
 struct fw_info {
 	uint8_t chip;
 	char *kld_name;
 	char *fw_mod_name;
 	struct fw_hdr fw_hdr;	/* XXX: waste of space, need a sparse struct */
 } fw_info[] = {
 	{
 		.chip = CHELSIO_T4,
 		.kld_name = "t4fw_cfg",
 		.fw_mod_name = "t4fw",
 		.fw_hdr = {
 			.chip = FW_HDR_CHIP_T4,
 			.fw_ver = htobe32_const(FW_VERSION(T4)),
 			.intfver_nic = FW_INTFVER(T4, NIC),
 			.intfver_vnic = FW_INTFVER(T4, VNIC),
 			.intfver_ofld = FW_INTFVER(T4, OFLD),
 			.intfver_ri = FW_INTFVER(T4, RI),
 			.intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU),
 			.intfver_iscsi = FW_INTFVER(T4, ISCSI),
 			.intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU),
 			.intfver_fcoe = FW_INTFVER(T4, FCOE),
 		},
 	}, {
 		.chip = CHELSIO_T5,
 		.kld_name = "t5fw_cfg",
 		.fw_mod_name = "t5fw",
 		.fw_hdr = {
 			.chip = FW_HDR_CHIP_T5,
 			.fw_ver = htobe32_const(FW_VERSION(T5)),
 			.intfver_nic = FW_INTFVER(T5, NIC),
 			.intfver_vnic = FW_INTFVER(T5, VNIC),
 			.intfver_ofld = FW_INTFVER(T5, OFLD),
 			.intfver_ri = FW_INTFVER(T5, RI),
 			.intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU),
 			.intfver_iscsi = FW_INTFVER(T5, ISCSI),
 			.intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU),
 			.intfver_fcoe = FW_INTFVER(T5, FCOE),
 		},
 	}
 };
 
 static struct fw_info *
 find_fw_info(int chip)
 {
 	int i;
 
 	for (i = 0; i < nitems(fw_info); i++) {
 		if (fw_info[i].chip == chip)
 			return (&fw_info[i]);
 	}
 	return (NULL);
 }
 
 /*
  * Is the given firmware API compatible with the one the driver was compiled
  * with?
  */
 static int
 fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2)
 {
 
 	/* short circuit if it's the exact same firmware version */
 	if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver)
 		return (1);
 
 	/*
 	 * XXX: Is this too conservative?  Perhaps I should limit this to the
 	 * features that are supported in the driver.
 	 */
 #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x)
 	if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) &&
 	    SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) &&
 	    SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe))
 		return (1);
 #undef SAME_INTF
 
 	return (0);
 }
 
 /*
  * The firmware in the KLD is usable, but should it be installed?  This routine
  * explains itself in detail if it indicates the KLD firmware should be
  * installed.
  */
 static int
 should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c)
 {
 	const char *reason;
 
 	if (!card_fw_usable) {
 		reason = "incompatible or unusable";
 		goto install;
 	}
 
 	if (k > c) {
 		reason = "older than the version bundled with this driver";
 		goto install;
 	}
 
 	if (t4_fw_install == 2 && k != c) {
 		reason = "different than the version bundled with this driver";
 		goto install;
 	}
 
 	return (0);
 
 install:
 	if (t4_fw_install == 0) {
 		device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
 		    "but the driver is prohibited from installing a different "
 		    "firmware on the card.\n",
 		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
 		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason);
 
 		return (0);
 	}
 
 	device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, "
 	    "installing firmware %u.%u.%u.%u on card.\n",
 	    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
 	    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason,
 	    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
 	    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
 
 	return (1);
 }
 /*
  * Establish contact with the firmware and determine if we are the master driver
  * or not, and whether we are responsible for chip initialization.
  */
 static int
 prep_firmware(struct adapter *sc)
 {
 	const struct firmware *fw = NULL, *default_cfg;
 	int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1;
 	enum dev_state state;
 	struct fw_info *fw_info;
 	struct fw_hdr *card_fw;		/* fw on the card */
 	const struct fw_hdr *kld_fw;	/* fw in the KLD */
 	const struct fw_hdr *drv_fw;	/* fw header the driver was compiled
 					   against */
 
 	/* Contact firmware. */
 	rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state);
 	if (rc < 0 || state == DEV_STATE_ERR) {
 		rc = -rc;
 		device_printf(sc->dev,
 		    "failed to connect to the firmware: %d, %d.\n", rc, state);
 		return (rc);
 	}
 	pf = rc;
 	if (pf == sc->mbox)
 		sc->flags |= MASTER_PF;
 	else if (state == DEV_STATE_UNINIT) {
 		/*
 		 * We didn't get to be the master so we definitely won't be
 		 * configuring the chip.  It's a bug if someone else hasn't
 		 * configured it already.
 		 */
 		device_printf(sc->dev, "couldn't be master(%d), "
 		    "device not already initialized either(%d).\n", rc, state);
 		return (EDOOFUS);
 	}
 
 	/* This is the firmware whose headers the driver was compiled against */
 	fw_info = find_fw_info(chip_id(sc));
 	if (fw_info == NULL) {
 		device_printf(sc->dev,
 		    "unable to look up firmware information for chip %d.\n",
 		    chip_id(sc));
 		return (EINVAL);
 	}
 	drv_fw = &fw_info->fw_hdr;
 
 	/*
 	 * The firmware KLD contains many modules.  The KLD name is also the
 	 * name of the module that contains the default config file.
 	 */
 	default_cfg = firmware_get(fw_info->kld_name);
 
 	/* Read the header of the firmware on the card */
 	card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK);
 	rc = -t4_read_flash(sc, FLASH_FW_START,
 	    sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1);
 	if (rc == 0)
 		card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw);
 	else {
 		device_printf(sc->dev,
 		    "Unable to read card's firmware header: %d\n", rc);
 		card_fw_usable = 0;
 	}
 
 	/* This is the firmware in the KLD */
 	fw = firmware_get(fw_info->fw_mod_name);
 	if (fw != NULL) {
 		kld_fw = (const void *)fw->data;
 		kld_fw_usable = fw_compatible(drv_fw, kld_fw);
 	} else {
 		kld_fw = NULL;
 		kld_fw_usable = 0;
 	}
 
 	if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver &&
 	    (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) {
 		/*
 		 * Common case: the firmware on the card is an exact match and
 		 * the KLD is an exact match too, or the KLD is
 		 * absent/incompatible.  Note that t4_fw_install = 2 is ignored
 		 * here -- use cxgbetool loadfw if you want to reinstall the
 		 * same firmware as the one on the card.
 		 */
 	} else if (kld_fw_usable && state == DEV_STATE_UNINIT &&
 	    should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver),
 	    be32toh(card_fw->fw_ver))) {
 
 		rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to install firmware: %d\n", rc);
 			goto done;
 		}
 
 		/* Installed successfully, update the cached header too. */
 		memcpy(card_fw, kld_fw, sizeof(*card_fw));
 		card_fw_usable = 1;
 		need_fw_reset = 0;	/* already reset as part of load_fw */
 	}
 
 	if (!card_fw_usable) {
 		uint32_t d, c, k;
 
 		d = ntohl(drv_fw->fw_ver);
 		c = ntohl(card_fw->fw_ver);
 		k = kld_fw ? ntohl(kld_fw->fw_ver) : 0;
 
 		device_printf(sc->dev, "Cannot find a usable firmware: "
 		    "fw_install %d, chip state %d, "
 		    "driver compiled with %d.%d.%d.%d, "
 		    "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n",
 		    t4_fw_install, state,
 		    G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d),
 		    G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d),
 		    G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c),
 		    G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c),
 		    G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k),
 		    G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k));
 		rc = EINVAL;
 		goto done;
 	}
 
 	/* We're using whatever's on the card and it's known to be good. */
 	sc->params.fw_vers = ntohl(card_fw->fw_ver);
 	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
 	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
 	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
 	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
 	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
 	t4_get_tp_version(sc, &sc->params.tp_vers);
 
 	/* Reset device */
 	if (need_fw_reset &&
 	    (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) {
 		device_printf(sc->dev, "firmware reset failed: %d.\n", rc);
 		if (rc != ETIMEDOUT && rc != EIO)
 			t4_fw_bye(sc, sc->mbox);
 		goto done;
 	}
 	sc->flags |= FW_OK;
 
 	rc = get_params__pre_init(sc);
 	if (rc != 0)
 		goto done; /* error message displayed already */
 
 	/* Partition adapter resources as specified in the config file. */
 	if (state == DEV_STATE_UNINIT) {
 
 		KASSERT(sc->flags & MASTER_PF,
 		    ("%s: trying to change chip settings when not master.",
 		    __func__));
 
 		rc = partition_resources(sc, default_cfg, fw_info->kld_name);
 		if (rc != 0)
 			goto done;	/* error message displayed already */
 
 		t4_tweak_chip_settings(sc);
 
 		/* get basic stuff going */
 		rc = -t4_fw_initialize(sc, sc->mbox);
 		if (rc != 0) {
 			device_printf(sc->dev, "fw init failed: %d.\n", rc);
 			goto done;
 		}
 	} else {
 		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf);
 		sc->cfcsum = 0;
 	}
 
 done:
 	free(card_fw, M_CXGBE);
 	if (fw != NULL)
 		firmware_put(fw, FIRMWARE_UNLOAD);
 	if (default_cfg != NULL)
 		firmware_put(default_cfg, FIRMWARE_UNLOAD);
 
 	return (rc);
 }
 
 #define FW_PARAM_DEV(param) \
 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
 #define FW_PARAM_PFVF(param) \
 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
 
 /*
  * Partition chip resources for use between various PFs, VFs, etc.
  */
 static int
 partition_resources(struct adapter *sc, const struct firmware *default_cfg,
     const char *name_prefix)
 {
 	const struct firmware *cfg = NULL;
 	int rc = 0;
 	struct fw_caps_config_cmd caps;
 	uint32_t mtype, moff, finicsum, cfcsum;
 
 	/*
 	 * Figure out what configuration file to use.  Pick the default config
 	 * file for the card if the user hasn't specified one explicitly.
 	 */
 	snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file);
 	if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) {
 		/* Card specific overrides go here. */
 		if (pci_get_device(sc->dev) == 0x440a)
 			snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF);
 		if (is_fpga(sc))
 			snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF);
 	}
 
 	/*
 	 * We need to load another module if the profile is anything except
 	 * "default" or "flash".
 	 */
 	if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 &&
 	    strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
 		char s[32];
 
 		snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file);
 		cfg = firmware_get(s);
 		if (cfg == NULL) {
 			if (default_cfg != NULL) {
 				device_printf(sc->dev,
 				    "unable to load module \"%s\" for "
 				    "configuration profile \"%s\", will use "
 				    "the default config file instead.\n",
 				    s, sc->cfg_file);
 				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
 				    "%s", DEFAULT_CF);
 			} else {
 				device_printf(sc->dev,
 				    "unable to load module \"%s\" for "
 				    "configuration profile \"%s\", will use "
 				    "the config file on the card's flash "
 				    "instead.\n", s, sc->cfg_file);
 				snprintf(sc->cfg_file, sizeof(sc->cfg_file),
 				    "%s", FLASH_CF);
 			}
 		}
 	}
 
 	if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 &&
 	    default_cfg == NULL) {
 		device_printf(sc->dev,
 		    "default config file not available, will use the config "
 		    "file on the card's flash instead.\n");
 		snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF);
 	}
 
 	if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) {
 		u_int cflen, i, n;
 		const uint32_t *cfdata;
 		uint32_t param, val, addr, off, mw_base, mw_aperture;
 
 		KASSERT(cfg != NULL || default_cfg != NULL,
 		    ("%s: no config to upload", __func__));
 
 		/*
 		 * Ask the firmware where it wants us to upload the config file.
 		 */
 		param = FW_PARAM_DEV(CF);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
 		if (rc != 0) {
 			/* No support for config file?  Shouldn't happen. */
 			device_printf(sc->dev,
 			    "failed to query config file location: %d.\n", rc);
 			goto done;
 		}
 		mtype = G_FW_PARAMS_PARAM_Y(val);
 		moff = G_FW_PARAMS_PARAM_Z(val) << 16;
 
 		/*
 		 * XXX: sheer laziness.  We deliberately added 4 bytes of
 		 * useless stuffing/comments at the end of the config file so
 		 * it's ok to simply throw away the last remaining bytes when
 		 * the config file is not an exact multiple of 4.  This also
 		 * helps with the validate_mt_off_len check.
 		 */
 		if (cfg != NULL) {
 			cflen = cfg->datasize & ~3;
 			cfdata = cfg->data;
 		} else {
 			cflen = default_cfg->datasize & ~3;
 			cfdata = default_cfg->data;
 		}
 
 		if (cflen > FLASH_CFG_MAX_SIZE) {
 			device_printf(sc->dev,
 			    "config file too long (%d, max allowed is %d).  "
 			    "Will try to use the config on the card, if any.\n",
 			    cflen, FLASH_CFG_MAX_SIZE);
 			goto use_config_on_flash;
 		}
 
 		rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "%s: addr (%d/0x%x) or len %d is not valid: %d.  "
 			    "Will try to use the config on the card, if any.\n",
 			    __func__, mtype, moff, cflen, rc);
 			goto use_config_on_flash;
 		}
 
 		memwin_info(sc, 2, &mw_base, &mw_aperture);
 		while (cflen) {
 			off = position_memwin(sc, 2, addr);
 			n = min(cflen, mw_aperture - off);
 			for (i = 0; i < n; i += 4)
 				t4_write_reg(sc, mw_base + off + i, *cfdata++);
 			cflen -= n;
 			addr += n;
 		}
 	} else {
 use_config_on_flash:
 		mtype = FW_MEMTYPE_FLASH;
 		moff = t4_flash_cfg_addr(sc);
 	}
 
 	bzero(&caps, sizeof(caps));
 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
 	caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID |
 	    V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) |
 	    V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps));
 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to pre-process config file: %d "
 		    "(mtype %d, moff 0x%x).\n", rc, mtype, moff);
 		goto done;
 	}
 
 	finicsum = be32toh(caps.finicsum);
 	cfcsum = be32toh(caps.cfcsum);
 	if (finicsum != cfcsum) {
 		device_printf(sc->dev,
 		    "WARNING: config file checksum mismatch: %08x %08x\n",
 		    finicsum, cfcsum);
 	}
 	sc->cfcsum = cfcsum;
 
 #define LIMIT_CAPS(x) do { \
 	caps.x &= htobe16(t4_##x##_allowed); \
 } while (0)
 
 	/*
 	 * Let the firmware know what features will (not) be used so it can tune
 	 * things accordingly.
 	 */
 	LIMIT_CAPS(linkcaps);
 	LIMIT_CAPS(niccaps);
 	LIMIT_CAPS(toecaps);
 	LIMIT_CAPS(rdmacaps);
 	LIMIT_CAPS(iscsicaps);
 	LIMIT_CAPS(fcoecaps);
 #undef LIMIT_CAPS
 
 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 	    F_FW_CMD_REQUEST | F_FW_CMD_WRITE);
 	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to process config file: %d.\n", rc);
 	}
 done:
 	if (cfg != NULL)
 		firmware_put(cfg, FIRMWARE_UNLOAD);
 	return (rc);
 }
 
 /*
  * Retrieve parameters that are needed (or nice to have) very early.
  */
 static int
 get_params__pre_init(struct adapter *sc)
 {
 	int rc;
 	uint32_t param[2], val[2];
 	struct fw_devlog_cmd cmd;
 	struct devlog_params *dlog = &sc->params.devlog;
 
 	param[0] = FW_PARAM_DEV(PORTVEC);
 	param[1] = FW_PARAM_DEV(CCLK);
 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to query parameters (pre_init): %d.\n", rc);
 		return (rc);
 	}
 
 	sc->params.portvec = val[0];
 	sc->params.nports = bitcount32(val[0]);
 	sc->params.vpd.cclk = val[1];
 
 	/* Read device log parameters. */
 	bzero(&cmd, sizeof(cmd));
 	cmd.op_to_write = htobe32(V_FW_CMD_OP(FW_DEVLOG_CMD) |
 	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
 	cmd.retval_len16 = htobe32(FW_LEN16(cmd));
 	rc = -t4_wr_mbox(sc, sc->mbox, &cmd, sizeof(cmd), &cmd);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to get devlog parameters: %d.\n", rc);
 		bzero(dlog, sizeof (*dlog));
 		rc = 0;	/* devlog isn't critical for device operation */
 	} else {
 		val[0] = be32toh(cmd.memtype_devlog_memaddr16_devlog);
 		dlog->memtype = G_FW_DEVLOG_CMD_MEMTYPE_DEVLOG(val[0]);
 		dlog->start = G_FW_DEVLOG_CMD_MEMADDR16_DEVLOG(val[0]) << 4;
 		dlog->size = be32toh(cmd.memsize_devlog);
 	}
 
 	return (rc);
 }
 
 /*
  * Retrieve various parameters that are of interest to the driver.  The device
  * has been initialized by the firmware at this point.
  */
 static int
 get_params__post_init(struct adapter *sc)
 {
 	int rc;
 	uint32_t param[7], val[7];
 	struct fw_caps_config_cmd caps;
 
 	param[0] = FW_PARAM_PFVF(IQFLINT_START);
 	param[1] = FW_PARAM_PFVF(EQ_START);
 	param[2] = FW_PARAM_PFVF(FILTER_START);
 	param[3] = FW_PARAM_PFVF(FILTER_END);
 	param[4] = FW_PARAM_PFVF(L2T_START);
 	param[5] = FW_PARAM_PFVF(L2T_END);
 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to query parameters (post_init): %d.\n", rc);
 		return (rc);
 	}
 
 	sc->sge.iq_start = val[0];
 	sc->sge.eq_start = val[1];
 	sc->tids.ftid_base = val[2];
 	sc->tids.nftids = val[3] - val[2] + 1;
 	sc->params.ftid_min = val[2];
 	sc->params.ftid_max = val[3];
 	sc->vres.l2t.start = val[4];
 	sc->vres.l2t.size = val[5] - val[4] + 1;
 	KASSERT(sc->vres.l2t.size <= L2T_SIZE,
 	    ("%s: L2 table size (%u) larger than expected (%u)",
 	    __func__, sc->vres.l2t.size, L2T_SIZE));
 
 	/* get capabilites */
 	bzero(&caps, sizeof(caps));
 	caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 	    F_FW_CMD_REQUEST | F_FW_CMD_READ);
 	caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps));
 	rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to get card capabilities: %d.\n", rc);
 		return (rc);
 	}
 
 #define READ_CAPS(x) do { \
 	sc->x = htobe16(caps.x); \
 } while (0)
 	READ_CAPS(linkcaps);
 	READ_CAPS(niccaps);
 	READ_CAPS(toecaps);
 	READ_CAPS(rdmacaps);
 	READ_CAPS(iscsicaps);
 	READ_CAPS(fcoecaps);
 
 	if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) {
 		param[0] = FW_PARAM_PFVF(ETHOFLD_START);
 		param[1] = FW_PARAM_PFVF(ETHOFLD_END);
 		param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to query NIC parameters: %d.\n", rc);
 			return (rc);
 		}
 		sc->tids.etid_base = val[0];
 		sc->params.etid_min = val[0];
 		sc->tids.netids = val[1] - val[0] + 1;
 		sc->params.netids = sc->tids.netids;
 		sc->params.eo_wr_cred = val[2];
 		sc->params.ethoffload = 1;
 	}
 
 	if (sc->toecaps) {
 		/* query offload-related parameters */
 		param[0] = FW_PARAM_DEV(NTID);
 		param[1] = FW_PARAM_PFVF(SERVER_START);
 		param[2] = FW_PARAM_PFVF(SERVER_END);
 		param[3] = FW_PARAM_PFVF(TDDP_START);
 		param[4] = FW_PARAM_PFVF(TDDP_END);
 		param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to query TOE parameters: %d.\n", rc);
 			return (rc);
 		}
 		sc->tids.ntids = val[0];
 		sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS);
 		sc->tids.stid_base = val[1];
 		sc->tids.nstids = val[2] - val[1] + 1;
 		sc->vres.ddp.start = val[3];
 		sc->vres.ddp.size = val[4] - val[3] + 1;
 		sc->params.ofldq_wr_cred = val[5];
 		sc->params.offload = 1;
 	}
 	if (sc->rdmacaps) {
 		param[0] = FW_PARAM_PFVF(STAG_START);
 		param[1] = FW_PARAM_PFVF(STAG_END);
 		param[2] = FW_PARAM_PFVF(RQ_START);
 		param[3] = FW_PARAM_PFVF(RQ_END);
 		param[4] = FW_PARAM_PFVF(PBL_START);
 		param[5] = FW_PARAM_PFVF(PBL_END);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to query RDMA parameters(1): %d.\n", rc);
 			return (rc);
 		}
 		sc->vres.stag.start = val[0];
 		sc->vres.stag.size = val[1] - val[0] + 1;
 		sc->vres.rq.start = val[2];
 		sc->vres.rq.size = val[3] - val[2] + 1;
 		sc->vres.pbl.start = val[4];
 		sc->vres.pbl.size = val[5] - val[4] + 1;
 
 		param[0] = FW_PARAM_PFVF(SQRQ_START);
 		param[1] = FW_PARAM_PFVF(SQRQ_END);
 		param[2] = FW_PARAM_PFVF(CQ_START);
 		param[3] = FW_PARAM_PFVF(CQ_END);
 		param[4] = FW_PARAM_PFVF(OCQ_START);
 		param[5] = FW_PARAM_PFVF(OCQ_END);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to query RDMA parameters(2): %d.\n", rc);
 			return (rc);
 		}
 		sc->vres.qp.start = val[0];
 		sc->vres.qp.size = val[1] - val[0] + 1;
 		sc->vres.cq.start = val[2];
 		sc->vres.cq.size = val[3] - val[2] + 1;
 		sc->vres.ocq.start = val[4];
 		sc->vres.ocq.size = val[5] - val[4] + 1;
 	}
 	if (sc->iscsicaps) {
 		param[0] = FW_PARAM_PFVF(ISCSI_START);
 		param[1] = FW_PARAM_PFVF(ISCSI_END);
 		rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
 		if (rc != 0) {
 			device_printf(sc->dev,
 			    "failed to query iSCSI parameters: %d.\n", rc);
 			return (rc);
 		}
 		sc->vres.iscsi.start = val[0];
 		sc->vres.iscsi.size = val[1] - val[0] + 1;
 	}
 
 	/*
 	 * We've got the params we wanted to query via the firmware.  Now grab
 	 * some others directly from the chip.
 	 */
 	rc = t4_read_chip_settings(sc);
 
 	return (rc);
 }
 
 static int
 set_params__post_init(struct adapter *sc)
 {
 	uint32_t param, val;
 
 	/* ask for encapsulated CPLs */
 	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
 	val = 1;
 	(void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
 
 	return (0);
 }
 
 #undef FW_PARAM_PFVF
 #undef FW_PARAM_DEV
 
 static void
 t4_set_desc(struct adapter *sc)
 {
 	char buf[128];
 	struct adapter_params *p = &sc->params;
 
 	snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, "
 	    "P/N:%s, E/C:%s", p->vpd.id, is_offload(sc) ? "R" : "",
 	    chip_rev(sc), p->vpd.sn, p->vpd.pn, p->vpd.ec);
 
 	device_set_desc_copy(sc->dev, buf);
 }
 
 static void
 build_medialist(struct port_info *pi, struct ifmedia *media)
 {
 	int m;
 
 	PORT_LOCK(pi);
 
 	ifmedia_removeall(media);
 
 	m = IFM_ETHER | IFM_FDX;
 
 	switch(pi->port_type) {
 	case FW_PORT_TYPE_BT_XFI:
 	case FW_PORT_TYPE_BT_XAUI:
 		ifmedia_add(media, m | IFM_10G_T, 0, NULL);
 		/* fall through */
 
 	case FW_PORT_TYPE_BT_SGMII:
 		ifmedia_add(media, m | IFM_1000_T, 0, NULL);
 		ifmedia_add(media, m | IFM_100_TX, 0, NULL);
 		ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL);
 		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
 		break;
 
 	case FW_PORT_TYPE_CX4:
 		ifmedia_add(media, m | IFM_10G_CX4, 0, NULL);
 		ifmedia_set(media, m | IFM_10G_CX4);
 		break;
 
 	case FW_PORT_TYPE_QSFP_10G:
 	case FW_PORT_TYPE_SFP:
 	case FW_PORT_TYPE_FIBER_XFI:
 	case FW_PORT_TYPE_FIBER_XAUI:
 		switch (pi->mod_type) {
 
 		case FW_PORT_MOD_TYPE_LR:
 			ifmedia_add(media, m | IFM_10G_LR, 0, NULL);
 			ifmedia_set(media, m | IFM_10G_LR);
 			break;
 
 		case FW_PORT_MOD_TYPE_SR:
 			ifmedia_add(media, m | IFM_10G_SR, 0, NULL);
 			ifmedia_set(media, m | IFM_10G_SR);
 			break;
 
 		case FW_PORT_MOD_TYPE_LRM:
 			ifmedia_add(media, m | IFM_10G_LRM, 0, NULL);
 			ifmedia_set(media, m | IFM_10G_LRM);
 			break;
 
 		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
 		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
 			ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL);
 			ifmedia_set(media, m | IFM_10G_TWINAX);
 			break;
 
 		case FW_PORT_MOD_TYPE_NONE:
 			m &= ~IFM_FDX;
 			ifmedia_add(media, m | IFM_NONE, 0, NULL);
 			ifmedia_set(media, m | IFM_NONE);
 			break;
 
 		case FW_PORT_MOD_TYPE_NA:
 		case FW_PORT_MOD_TYPE_ER:
 		default:
 			device_printf(pi->dev,
 			    "unknown port_type (%d), mod_type (%d)\n",
 			    pi->port_type, pi->mod_type);
 			ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
 			ifmedia_set(media, m | IFM_UNKNOWN);
 			break;
 		}
 		break;
 
 	case FW_PORT_TYPE_QSFP:
 		switch (pi->mod_type) {
 
 		case FW_PORT_MOD_TYPE_LR:
 			ifmedia_add(media, m | IFM_40G_LR4, 0, NULL);
 			ifmedia_set(media, m | IFM_40G_LR4);
 			break;
 
 		case FW_PORT_MOD_TYPE_SR:
 			ifmedia_add(media, m | IFM_40G_SR4, 0, NULL);
 			ifmedia_set(media, m | IFM_40G_SR4);
 			break;
 
 		case FW_PORT_MOD_TYPE_TWINAX_PASSIVE:
 		case FW_PORT_MOD_TYPE_TWINAX_ACTIVE:
 			ifmedia_add(media, m | IFM_40G_CR4, 0, NULL);
 			ifmedia_set(media, m | IFM_40G_CR4);
 			break;
 
 		case FW_PORT_MOD_TYPE_NONE:
 			m &= ~IFM_FDX;
 			ifmedia_add(media, m | IFM_NONE, 0, NULL);
 			ifmedia_set(media, m | IFM_NONE);
 			break;
 
 		default:
 			device_printf(pi->dev,
 			    "unknown port_type (%d), mod_type (%d)\n",
 			    pi->port_type, pi->mod_type);
 			ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
 			ifmedia_set(media, m | IFM_UNKNOWN);
 			break;
 		}
 		break;
 
 	default:
 		device_printf(pi->dev,
 		    "unknown port_type (%d), mod_type (%d)\n", pi->port_type,
 		    pi->mod_type);
 		ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL);
 		ifmedia_set(media, m | IFM_UNKNOWN);
 		break;
 	}
 
 	PORT_UNLOCK(pi);
 }
 
 #define FW_MAC_EXACT_CHUNK	7
 
 /*
  * Program the port's XGMAC based on parameters in ifnet.  The caller also
  * indicates which parameters should be programmed (the rest are left alone).
  */
 int
 update_mac_settings(struct ifnet *ifp, int flags)
 {
 	int rc = 0;
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 	KASSERT(flags, ("%s: not told what to update.", __func__));
 
 	if (flags & XGMAC_MTU)
 		mtu = ifp->if_mtu;
 
 	if (flags & XGMAC_PROMISC)
 		promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0;
 
 	if (flags & XGMAC_ALLMULTI)
 		allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0;
 
 	if (flags & XGMAC_VLANEX)
 		vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0;
 
 	if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) {
 		rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc,
 		    allmulti, 1, vlanex, false);
 		if (rc) {
 			if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags,
 			    rc);
 			return (rc);
 		}
 	}
 
 	if (flags & XGMAC_UCADDR) {
 		uint8_t ucaddr[ETHER_ADDR_LEN];
 
 		bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr));
 		rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt,
 		    ucaddr, true, true);
 		if (rc < 0) {
 			rc = -rc;
 			if_printf(ifp, "change_mac failed: %d\n", rc);
 			return (rc);
 		} else {
 			vi->xact_addr_filt = rc;
 			rc = 0;
 		}
 	}
 
 	if (flags & XGMAC_MCADDRS) {
 		const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK];
 		int del = 1;
 		uint64_t hash = 0;
 		struct ifmultiaddr *ifma;
 		int i = 0, j;
 
 		if_maddr_rlock(ifp);
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK)
 				continue;
 			mcaddr[i] =
 			    LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
 			MPASS(ETHER_IS_MULTICAST(mcaddr[i]));
 			i++;
 
 			if (i == FW_MAC_EXACT_CHUNK) {
 				rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid,
 				    del, i, mcaddr, NULL, &hash, 0);
 				if (rc < 0) {
 					rc = -rc;
 					for (j = 0; j < i; j++) {
 						if_printf(ifp,
 						    "failed to add mc address"
 						    " %02x:%02x:%02x:"
 						    "%02x:%02x:%02x rc=%d\n",
 						    mcaddr[j][0], mcaddr[j][1],
 						    mcaddr[j][2], mcaddr[j][3],
 						    mcaddr[j][4], mcaddr[j][5],
 						    rc);
 					}
 					goto mcfail;
 				}
 				del = 0;
 				i = 0;
 			}
 		}
 		if (i > 0) {
 			rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i,
 			    mcaddr, NULL, &hash, 0);
 			if (rc < 0) {
 				rc = -rc;
 				for (j = 0; j < i; j++) {
 					if_printf(ifp,
 					    "failed to add mc address"
 					    " %02x:%02x:%02x:"
 					    "%02x:%02x:%02x rc=%d\n",
 					    mcaddr[j][0], mcaddr[j][1],
 					    mcaddr[j][2], mcaddr[j][3],
 					    mcaddr[j][4], mcaddr[j][5],
 					    rc);
 				}
 				goto mcfail;
 			}
 		}
 
 		rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
 		if (rc != 0)
 			if_printf(ifp, "failed to set mc address hash: %d", rc);
 mcfail:
 		if_maddr_runlock(ifp);
 	}
 
 	return (rc);
 }
 
 /*
  * {begin|end}_synchronized_op must be called from the same thread.
  */
 int
 begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags,
     char *wmesg)
 {
 	int rc, pri;
 
 #ifdef WITNESS
 	/* the caller thinks it's ok to sleep, but is it really? */
 	if (flags & SLEEP_OK)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "begin_synchronized_op");
 #endif
 
 	if (INTR_OK)
 		pri = PCATCH;
 	else
 		pri = 0;
 
 	ADAPTER_LOCK(sc);
 	for (;;) {
 
 		if (vi && IS_DOOMED(vi)) {
 			rc = ENXIO;
 			goto done;
 		}
 
 		if (!IS_BUSY(sc)) {
 			rc = 0;
 			break;
 		}
 
 		if (!(flags & SLEEP_OK)) {
 			rc = EBUSY;
 			goto done;
 		}
 
 		if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
 			rc = EINTR;
 			goto done;
 		}
 	}
 
 	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
 	SET_BUSY(sc);
 #ifdef INVARIANTS
 	sc->last_op = wmesg;
 	sc->last_op_thr = curthread;
 	sc->last_op_flags = flags;
 #endif
 
 done:
 	if (!(flags & HOLD_LOCK) || rc)
 		ADAPTER_UNLOCK(sc);
 
 	return (rc);
 }
 
 /*
  * Tell if_ioctl and if_init that the VI is going away.  This is
  * special variant of begin_synchronized_op and must be paired with a
  * call to end_synchronized_op.
  */
 void
 doom_vi(struct adapter *sc, struct vi_info *vi)
 {
 
 	ADAPTER_LOCK(sc);
 	SET_DOOMED(vi);
 	wakeup(&sc->flags);
 	while (IS_BUSY(sc))
 		mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
 	SET_BUSY(sc);
 #ifdef INVARIANTS
 	sc->last_op = "t4detach";
 	sc->last_op_thr = curthread;
 	sc->last_op_flags = 0;
 #endif
 	ADAPTER_UNLOCK(sc);
 }
 
 /*
  * {begin|end}_synchronized_op must be called from the same thread.
  */
 void
 end_synchronized_op(struct adapter *sc, int flags)
 {
 
 	if (flags & LOCK_HELD)
 		ADAPTER_LOCK_ASSERT_OWNED(sc);
 	else
 		ADAPTER_LOCK(sc);
 
 	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
 	CLR_BUSY(sc);
 	wakeup(&sc->flags);
 	ADAPTER_UNLOCK(sc);
 }
 
 static int
 cxgbe_init_synchronized(struct vi_info *vi)
 {
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct ifnet *ifp = vi->ifp;
 	int rc = 0, i;
 	struct sge_txq *txq;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return (0);	/* already running */
 
 	if (!(sc->flags & FULL_INIT_DONE) &&
 	    ((rc = adapter_full_init(sc)) != 0))
 		return (rc);	/* error message displayed already */
 
 	if (!(vi->flags & VI_INIT_DONE) &&
 	    ((rc = vi_full_init(vi)) != 0))
 		return (rc); /* error message displayed already */
 
 	rc = update_mac_settings(ifp, XGMAC_ALL);
 	if (rc)
 		goto done;	/* error message displayed already */
 
 	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
 	if (rc != 0) {
 		if_printf(ifp, "enable_vi failed: %d\n", rc);
 		goto done;
 	}
 
 	/*
 	 * Can't fail from this point onwards.  Review cxgbe_uninit_synchronized
 	 * if this changes.
 	 */
 
 	for_each_txq(vi, i, txq) {
 		TXQ_LOCK(txq);
 		txq->eq.flags |= EQ_ENABLED;
 		TXQ_UNLOCK(txq);
 	}
 
 	/*
 	 * The first iq of the first port to come up is used for tracing.
 	 */
 	if (sc->traceq < 0 && IS_MAIN_VI(vi)) {
 		sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id;
 		t4_write_reg(sc, is_t4(sc) ?  A_MPS_TRC_RSS_CONTROL :
 		    A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) |
 		    V_QUEUENUMBER(sc->traceq));
 		pi->flags |= HAS_TRACEQ;
 	}
 
 	/* all ok */
 	PORT_LOCK(pi);
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	pi->up_vis++;
 
 	if (pi->nvi > 1)
 		callout_reset(&vi->tick, hz, vi_tick, vi);
 	else
 		callout_reset(&pi->tick, hz, cxgbe_tick, pi);
 	PORT_UNLOCK(pi);
 done:
 	if (rc != 0)
 		cxgbe_uninit_synchronized(vi);
 
 	return (rc);
 }
 
 /*
  * Idempotent.
  */
 static int
 cxgbe_uninit_synchronized(struct vi_info *vi)
 {
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	struct ifnet *ifp = vi->ifp;
 	int rc, i;
 	struct sge_txq *txq;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (!(vi->flags & VI_INIT_DONE)) {
 		KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING),
 		    ("uninited VI is running"));
 		return (0);
 	}
 
 	/*
 	 * Disable the VI so that all its data in either direction is discarded
 	 * by the MPS.  Leave everything else (the queues, interrupts, and 1Hz
 	 * tick) intact as the TP can deliver negative advice or data that it's
 	 * holding in its RAM (for an offloaded connection) even after the VI is
 	 * disabled.
 	 */
 	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
 	if (rc) {
 		if_printf(ifp, "disable_vi failed: %d\n", rc);
 		return (rc);
 	}
 
 	for_each_txq(vi, i, txq) {
 		TXQ_LOCK(txq);
 		txq->eq.flags &= ~EQ_ENABLED;
 		TXQ_UNLOCK(txq);
 	}
 
 	PORT_LOCK(pi);
 	if (pi->nvi == 1)
 		callout_stop(&pi->tick);
 	else
 		callout_stop(&vi->tick);
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 		PORT_UNLOCK(pi);
 		return (0);
 	}
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	pi->up_vis--;
 	if (pi->up_vis > 0) {
 		PORT_UNLOCK(pi);
 		return (0);
 	}
 	PORT_UNLOCK(pi);
 
 	pi->link_cfg.link_ok = 0;
 	pi->link_cfg.speed = 0;
 	pi->linkdnrc = -1;
 	t4_os_link_changed(sc, pi->port_id, 0, -1);
 
 	return (0);
 }
 
 /*
  * It is ok for this function to fail midway and return right away.  t4_detach
  * will walk the entire sc->irq list and clean up whatever is valid.
  */
 static int
 setup_intr_handlers(struct adapter *sc)
 {
 	int rc, rid, p, q, v;
 	char s[8];
 	struct irq *irq;
 	struct port_info *pi;
 	struct vi_info *vi;
 	struct sge_rxq *rxq;
 #ifdef TCP_OFFLOAD
 	struct sge_ofld_rxq *ofld_rxq;
 #endif
 #ifdef DEV_NETMAP
 	struct sge_nm_rxq *nm_rxq;
 #endif
+#ifdef RSS
+	int nbuckets = rss_getnumbuckets();
+#endif
 
 	/*
 	 * Setup interrupts.
 	 */
 	irq = &sc->irq[0];
 	rid = sc->intr_type == INTR_INTX ? 0 : 1;
 	if (sc->intr_count == 1)
 		return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all"));
 
 	/* Multiple interrupts. */
 	KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports,
 	    ("%s: too few intr.", __func__));
 
 	/* The first one is always error intr */
 	rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err");
 	if (rc != 0)
 		return (rc);
 	irq++;
 	rid++;
 
 	/* The second one is always the firmware event queue */
 	rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sc->sge.fwq, "evt");
 	if (rc != 0)
 		return (rc);
 	irq++;
 	rid++;
 
 	for_each_port(sc, p) {
 		pi = sc->port[p];
 		for_each_vi(pi, v, vi) {
 			vi->first_intr = rid - 1;
 #ifdef DEV_NETMAP
 			if (vi->flags & VI_NETMAP) {
 				for_each_nm_rxq(vi, q, nm_rxq) {
 					snprintf(s, sizeof(s), "%d-%d", p, q);
 					rc = t4_alloc_irq(sc, irq, rid,
 					    t4_nm_intr, nm_rxq, s);
 					if (rc != 0)
 						return (rc);
 					irq++;
 					rid++;
 					vi->nintr++;
 				}
 				continue;
 			}
 #endif
 			if (vi->flags & INTR_RXQ) {
 				for_each_rxq(vi, q, rxq) {
 					if (v == 0)
 						snprintf(s, sizeof(s), "%d.%d",
 						    p, q);
 					else
 						snprintf(s, sizeof(s),
 						    "%d(%d).%d", p, v, q);
 					rc = t4_alloc_irq(sc, irq, rid,
 					    t4_intr, rxq, s);
 					if (rc != 0)
 						return (rc);
+#ifdef RSS
+					bus_bind_intr(sc->dev, irq->res,
+					    rss_getcpu(q % nbuckets));
+#endif
 					irq++;
 					rid++;
 					vi->nintr++;
 				}
 			}
 #ifdef TCP_OFFLOAD
 			if (vi->flags & INTR_OFLD_RXQ) {
 				for_each_ofld_rxq(vi, q, ofld_rxq) {
 					snprintf(s, sizeof(s), "%d,%d", p, q);
 					rc = t4_alloc_irq(sc, irq, rid,
 					    t4_intr, ofld_rxq, s);
 					if (rc != 0)
 						return (rc);
 					irq++;
 					rid++;
 					vi->nintr++;
 				}
 			}
 #endif
 		}
 	}
 	MPASS(irq == &sc->irq[sc->intr_count]);
 
 	return (0);
 }
 
 int
 adapter_full_init(struct adapter *sc)
 {
 	int rc, i;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
 	KASSERT((sc->flags & FULL_INIT_DONE) == 0,
 	    ("%s: FULL_INIT_DONE already", __func__));
 
 	/*
 	 * queues that belong to the adapter (not any particular port).
 	 */
 	rc = t4_setup_adapter_queues(sc);
 	if (rc != 0)
 		goto done;
 
 	for (i = 0; i < nitems(sc->tq); i++) {
 		sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &sc->tq[i]);
 		if (sc->tq[i] == NULL) {
 			device_printf(sc->dev,
 			    "failed to allocate task queue %d\n", i);
 			rc = ENOMEM;
 			goto done;
 		}
 		taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d",
 		    device_get_nameunit(sc->dev), i);
 	}
 
 	t4_intr_enable(sc);
 	sc->flags |= FULL_INIT_DONE;
 done:
 	if (rc != 0)
 		adapter_full_uninit(sc);
 
 	return (rc);
 }
 
 int
 adapter_full_uninit(struct adapter *sc)
 {
 	int i;
 
 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
 
 	t4_teardown_adapter_queues(sc);
 
 	for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) {
 		taskqueue_free(sc->tq[i]);
 		sc->tq[i] = NULL;
 	}
 
 	sc->flags &= ~FULL_INIT_DONE;
 
 	return (0);
 }
 
 #ifdef RSS
 #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \
     RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \
     RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \
     RSS_HASHTYPE_RSS_UDP_IPV6)
 
 /* Translates kernel hash types to hardware. */
 static int
 hashconfig_to_hashen(int hashconfig)
 {
 	int hashen = 0;
 
 	if (hashconfig & RSS_HASHTYPE_RSS_IPV4)
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN;
 	if (hashconfig & RSS_HASHTYPE_RSS_IPV6)
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN;
 	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) {
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
 		    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
 	}
 	if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) {
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN |
 		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
 	}
 	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4)
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN;
 	if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6)
 		hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN;
 
 	return (hashen);
 }
 
 /* Translates hardware hash types to kernel. */
 static int
 hashen_to_hashconfig(int hashen)
 {
 	int hashconfig = 0;
 
 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) {
 		/*
 		 * If UDP hashing was enabled it must have been enabled for
 		 * either IPv4 or IPv6 (inclusive or).  Enabling UDP without
 		 * enabling any 4-tuple hash is nonsense configuration.
 		 */
 		MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
 		    F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN));
 
 		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
 			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4;
 		if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
 			hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6;
 	}
 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN)
 		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4;
 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)
 		hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6;
 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN)
 		hashconfig |= RSS_HASHTYPE_RSS_IPV4;
 	if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN)
 		hashconfig |= RSS_HASHTYPE_RSS_IPV6;
 
 	return (hashconfig);
 }
 #endif
 
 int
 vi_full_init(struct vi_info *vi)
 {
 	struct adapter *sc = vi->pi->adapter;
 	struct ifnet *ifp = vi->ifp;
 	uint16_t *rss;
 	struct sge_rxq *rxq;
 	int rc, i, j, hashen;
 #ifdef RSS
 	int nbuckets = rss_getnumbuckets();
 	int hashconfig = rss_gethashconfig();
 	int extra;
 	uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
 	uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)];
 #endif
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 	KASSERT((vi->flags & VI_INIT_DONE) == 0,
 	    ("%s: VI_INIT_DONE already", __func__));
 
 	sysctl_ctx_init(&vi->ctx);
 	vi->flags |= VI_SYSCTL_CTX;
 
 	/*
 	 * Allocate tx/rx/fl queues for this VI.
 	 */
 	rc = t4_setup_vi_queues(vi);
 	if (rc != 0)
 		goto done;	/* error message displayed already */
 
 #ifdef DEV_NETMAP
 	/* Netmap VIs configure RSS when netmap is enabled. */
 	if (vi->flags & VI_NETMAP) {
 		vi->flags |= VI_INIT_DONE;
 		return (0);
 	}
 #endif
 
 	/*
 	 * Setup RSS for this VI.  Save a copy of the RSS table for later use.
 	 */
 	if (vi->nrxq > vi->rss_size) {
 		if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); "
 		    "some queues will never receive traffic.\n", vi->nrxq,
 		    vi->rss_size);
 	} else if (vi->rss_size % vi->nrxq) {
 		if_printf(ifp, "nrxq (%d), hw RSS table size (%d); "
 		    "expect uneven traffic distribution.\n", vi->nrxq,
 		    vi->rss_size);
 	}
 #ifdef RSS
 	MPASS(RSS_KEYSIZE == 40);
 	if (vi->nrxq != nbuckets) {
 		if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);"
 		    "performance will be impacted.\n", vi->nrxq, nbuckets);
 	}
 
 	rss_getkey((void *)&raw_rss_key[0]);
 	for (i = 0; i < nitems(rss_key); i++) {
 		rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]);
 	}
 	t4_write_rss_key(sc, (void *)&rss_key[0], -1);
 #endif
 	rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK);
 	for (i = 0; i < vi->rss_size;) {
 #ifdef RSS
 		j = rss_get_indirection_to_bucket(i);
 		j %= vi->nrxq;
 		rxq = &sc->sge.rxq[vi->first_rxq + j];
 		rss[i++] = rxq->iq.abs_id;
 #else
 		for_each_rxq(vi, j, rxq) {
 			rss[i++] = rxq->iq.abs_id;
 			if (i == vi->rss_size)
 				break;
 		}
 #endif
 	}
 
 	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss,
 	    vi->rss_size);
 	if (rc != 0) {
 		if_printf(ifp, "rss_config failed: %d\n", rc);
 		goto done;
 	}
 
 #ifdef RSS
 	hashen = hashconfig_to_hashen(hashconfig);
 
 	/*
 	 * We may have had to enable some hashes even though the global config
 	 * wants them disabled.  This is a potential problem that must be
 	 * reported to the user.
 	 */
 	extra = hashen_to_hashconfig(hashen) ^ hashconfig;
 
 	/*
 	 * If we consider only the supported hash types, then the enabled hashes
 	 * are a superset of the requested hashes.  In other words, there cannot
 	 * be any supported hash that was requested but not enabled, but there
 	 * can be hashes that were not requested but had to be enabled.
 	 */
 	extra &= SUPPORTED_RSS_HASHTYPES;
 	MPASS((extra & hashconfig) == 0);
 
 	if (extra) {
 		if_printf(ifp,
 		    "global RSS config (0x%x) cannot be accomodated.\n",
 		    hashconfig);
 	}
 	if (extra & RSS_HASHTYPE_RSS_IPV4)
 		if_printf(ifp, "IPv4 2-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_TCP_IPV4)
 		if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_IPV6)
 		if_printf(ifp, "IPv6 2-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_TCP_IPV6)
 		if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_UDP_IPV4)
 		if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n");
 	if (extra & RSS_HASHTYPE_RSS_UDP_IPV6)
 		if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n");
 #else
 	hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN |
 	    F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN |
 	    F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN |
 	    F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN;
 #endif
 	rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0]);
 	if (rc != 0) {
 		if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc);
 		goto done;
 	}
 
 	vi->rss = rss;
 	vi->flags |= VI_INIT_DONE;
 done:
 	if (rc != 0)
 		vi_full_uninit(vi);
 
 	return (rc);
 }
 
 /*
  * Idempotent.
  */
 int
 vi_full_uninit(struct vi_info *vi)
 {
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	int i;
 	struct sge_rxq *rxq;
 	struct sge_txq *txq;
 #ifdef TCP_OFFLOAD
 	struct sge_ofld_rxq *ofld_rxq;
 	struct sge_wrq *ofld_txq;
 #endif
 
 	if (vi->flags & VI_INIT_DONE) {
 
 		/* Need to quiesce queues.  */
 #ifdef DEV_NETMAP
 		if (vi->flags & VI_NETMAP)
 			goto skip;
 #endif
 
 		/* XXX: Only for the first VI? */
 		if (IS_MAIN_VI(vi))
 			quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
 
 		for_each_txq(vi, i, txq) {
 			quiesce_txq(sc, txq);
 		}
 
 #ifdef TCP_OFFLOAD
 		for_each_ofld_txq(vi, i, ofld_txq) {
 			quiesce_wrq(sc, ofld_txq);
 		}
 #endif
 
 		for_each_rxq(vi, i, rxq) {
 			quiesce_iq(sc, &rxq->iq);
 			quiesce_fl(sc, &rxq->fl);
 		}
 
 #ifdef TCP_OFFLOAD
 		for_each_ofld_rxq(vi, i, ofld_rxq) {
 			quiesce_iq(sc, &ofld_rxq->iq);
 			quiesce_fl(sc, &ofld_rxq->fl);
 		}
 #endif
 		free(vi->rss, M_CXGBE);
 	}
 #ifdef DEV_NETMAP
 skip:
 #endif
 
 	t4_teardown_vi_queues(vi);
 	vi->flags &= ~VI_INIT_DONE;
 
 	return (0);
 }
 
 static void
 quiesce_txq(struct adapter *sc, struct sge_txq *txq)
 {
 	struct sge_eq *eq = &txq->eq;
 	struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
 
 	(void) sc;	/* unused */
 
 #ifdef INVARIANTS
 	TXQ_LOCK(txq);
 	MPASS((eq->flags & EQ_ENABLED) == 0);
 	TXQ_UNLOCK(txq);
 #endif
 
 	/* Wait for the mp_ring to empty. */
 	while (!mp_ring_is_idle(txq->r)) {
 		mp_ring_check_drainage(txq->r, 0);
 		pause("rquiesce", 1);
 	}
 
 	/* Then wait for the hardware to finish. */
 	while (spg->cidx != htobe16(eq->pidx))
 		pause("equiesce", 1);
 
 	/* Finally, wait for the driver to reclaim all descriptors. */
 	while (eq->cidx != eq->pidx)
 		pause("dquiesce", 1);
 }
 
 static void
 quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
 {
 
 	/* XXXTX */
 }
 
 static void
 quiesce_iq(struct adapter *sc, struct sge_iq *iq)
 {
 	(void) sc;	/* unused */
 
 	/* Synchronize with the interrupt handler */
 	while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED))
 		pause("iqfree", 1);
 }
 
 static void
 quiesce_fl(struct adapter *sc, struct sge_fl *fl)
 {
 	mtx_lock(&sc->sfl_lock);
 	FL_LOCK(fl);
 	fl->flags |= FL_DOOMED;
 	FL_UNLOCK(fl);
 	callout_stop(&sc->sfl_callout);
 	mtx_unlock(&sc->sfl_lock);
 
 	KASSERT((fl->flags & FL_STARVING) == 0,
 	    ("%s: still starving", __func__));
 }
 
 static int
 t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid,
     driver_intr_t *handler, void *arg, char *name)
 {
 	int rc;
 
 	irq->rid = rid;
 	irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid,
 	    RF_SHAREABLE | RF_ACTIVE);
 	if (irq->res == NULL) {
 		device_printf(sc->dev,
 		    "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
 		return (ENOMEM);
 	}
 
 	rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET,
 	    NULL, handler, arg, &irq->tag);
 	if (rc != 0) {
 		device_printf(sc->dev,
 		    "failed to setup interrupt for rid %d, name %s: %d\n",
 		    rid, name, rc);
 	} else if (name)
 		bus_describe_intr(sc->dev, irq->res, irq->tag, name);
 
 	return (rc);
 }
 
 static int
 t4_free_irq(struct adapter *sc, struct irq *irq)
 {
 	if (irq->tag)
 		bus_teardown_intr(sc->dev, irq->res, irq->tag);
 	if (irq->res)
 		bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res);
 
 	bzero(irq, sizeof(*irq));
 
 	return (0);
 }
 
 static void
 reg_block_dump(struct adapter *sc, uint8_t *buf, unsigned int start,
     unsigned int end)
 {
 	uint32_t *p = (uint32_t *)(buf + start);
 
 	for ( ; start <= end; start += sizeof(uint32_t))
 		*p++ = t4_read_reg(sc, start);
 }
 
 static void
 t4_get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
 {
 	int i, n;
 	const unsigned int *reg_ranges;
 	static const unsigned int t4_reg_ranges[] = {
 		0x1008, 0x1108,
 		0x1180, 0x11b4,
 		0x11fc, 0x123c,
 		0x1300, 0x173c,
 		0x1800, 0x18fc,
 		0x3000, 0x30d8,
 		0x30e0, 0x5924,
 		0x5960, 0x59d4,
 		0x5a00, 0x5af8,
 		0x6000, 0x6098,
 		0x6100, 0x6150,
 		0x6200, 0x6208,
 		0x6240, 0x6248,
 		0x6280, 0x6338,
 		0x6370, 0x638c,
 		0x6400, 0x643c,
 		0x6500, 0x6524,
 		0x6a00, 0x6a38,
 		0x6a60, 0x6a78,
 		0x6b00, 0x6b84,
 		0x6bf0, 0x6c84,
 		0x6cf0, 0x6d84,
 		0x6df0, 0x6e84,
 		0x6ef0, 0x6f84,
 		0x6ff0, 0x7084,
 		0x70f0, 0x7184,
 		0x71f0, 0x7284,
 		0x72f0, 0x7384,
 		0x73f0, 0x7450,
 		0x7500, 0x7530,
 		0x7600, 0x761c,
 		0x7680, 0x76cc,
 		0x7700, 0x7798,
 		0x77c0, 0x77fc,
 		0x7900, 0x79fc,
 		0x7b00, 0x7c38,
 		0x7d00, 0x7efc,
 		0x8dc0, 0x8e1c,
 		0x8e30, 0x8e78,
 		0x8ea0, 0x8f6c,
 		0x8fc0, 0x9074,
 		0x90fc, 0x90fc,
 		0x9400, 0x9458,
 		0x9600, 0x96bc,
 		0x9800, 0x9808,
 		0x9820, 0x983c,
 		0x9850, 0x9864,
 		0x9c00, 0x9c6c,
 		0x9c80, 0x9cec,
 		0x9d00, 0x9d6c,
 		0x9d80, 0x9dec,
 		0x9e00, 0x9e6c,
 		0x9e80, 0x9eec,
 		0x9f00, 0x9f6c,
 		0x9f80, 0x9fec,
 		0xd004, 0xd03c,
 		0xdfc0, 0xdfe0,
 		0xe000, 0xea7c,
 		0xf000, 0x11110,
 		0x11118, 0x11190,
 		0x19040, 0x1906c,
 		0x19078, 0x19080,
 		0x1908c, 0x19124,
 		0x19150, 0x191b0,
 		0x191d0, 0x191e8,
 		0x19238, 0x1924c,
 		0x193f8, 0x19474,
 		0x19490, 0x194f8,
 		0x19800, 0x19f30,
 		0x1a000, 0x1a06c,
 		0x1a0b0, 0x1a120,
 		0x1a128, 0x1a138,
 		0x1a190, 0x1a1c4,
 		0x1a1fc, 0x1a1fc,
 		0x1e040, 0x1e04c,
 		0x1e284, 0x1e28c,
 		0x1e2c0, 0x1e2c0,
 		0x1e2e0, 0x1e2e0,
 		0x1e300, 0x1e384,
 		0x1e3c0, 0x1e3c8,
 		0x1e440, 0x1e44c,
 		0x1e684, 0x1e68c,
 		0x1e6c0, 0x1e6c0,
 		0x1e6e0, 0x1e6e0,
 		0x1e700, 0x1e784,
 		0x1e7c0, 0x1e7c8,
 		0x1e840, 0x1e84c,
 		0x1ea84, 0x1ea8c,
 		0x1eac0, 0x1eac0,
 		0x1eae0, 0x1eae0,
 		0x1eb00, 0x1eb84,
 		0x1ebc0, 0x1ebc8,
 		0x1ec40, 0x1ec4c,
 		0x1ee84, 0x1ee8c,
 		0x1eec0, 0x1eec0,
 		0x1eee0, 0x1eee0,
 		0x1ef00, 0x1ef84,
 		0x1efc0, 0x1efc8,
 		0x1f040, 0x1f04c,
 		0x1f284, 0x1f28c,
 		0x1f2c0, 0x1f2c0,
 		0x1f2e0, 0x1f2e0,
 		0x1f300, 0x1f384,
 		0x1f3c0, 0x1f3c8,
 		0x1f440, 0x1f44c,
 		0x1f684, 0x1f68c,
 		0x1f6c0, 0x1f6c0,
 		0x1f6e0, 0x1f6e0,
 		0x1f700, 0x1f784,
 		0x1f7c0, 0x1f7c8,
 		0x1f840, 0x1f84c,
 		0x1fa84, 0x1fa8c,
 		0x1fac0, 0x1fac0,
 		0x1fae0, 0x1fae0,
 		0x1fb00, 0x1fb84,
 		0x1fbc0, 0x1fbc8,
 		0x1fc40, 0x1fc4c,
 		0x1fe84, 0x1fe8c,
 		0x1fec0, 0x1fec0,
 		0x1fee0, 0x1fee0,
 		0x1ff00, 0x1ff84,
 		0x1ffc0, 0x1ffc8,
 		0x20000, 0x2002c,
 		0x20100, 0x2013c,
 		0x20190, 0x201c8,
 		0x20200, 0x20318,
 		0x20400, 0x20528,
 		0x20540, 0x20614,
 		0x21000, 0x21040,
 		0x2104c, 0x21060,
 		0x210c0, 0x210ec,
 		0x21200, 0x21268,
 		0x21270, 0x21284,
 		0x212fc, 0x21388,
 		0x21400, 0x21404,
 		0x21500, 0x21518,
 		0x2152c, 0x2153c,
 		0x21550, 0x21554,
 		0x21600, 0x21600,
 		0x21608, 0x21628,
 		0x21630, 0x2163c,
 		0x21700, 0x2171c,
 		0x21780, 0x2178c,
 		0x21800, 0x21c38,
 		0x21c80, 0x21d7c,
 		0x21e00, 0x21e04,
 		0x22000, 0x2202c,
 		0x22100, 0x2213c,
 		0x22190, 0x221c8,
 		0x22200, 0x22318,
 		0x22400, 0x22528,
 		0x22540, 0x22614,
 		0x23000, 0x23040,
 		0x2304c, 0x23060,
 		0x230c0, 0x230ec,
 		0x23200, 0x23268,
 		0x23270, 0x23284,
 		0x232fc, 0x23388,
 		0x23400, 0x23404,
 		0x23500, 0x23518,
 		0x2352c, 0x2353c,
 		0x23550, 0x23554,
 		0x23600, 0x23600,
 		0x23608, 0x23628,
 		0x23630, 0x2363c,
 		0x23700, 0x2371c,
 		0x23780, 0x2378c,
 		0x23800, 0x23c38,
 		0x23c80, 0x23d7c,
 		0x23e00, 0x23e04,
 		0x24000, 0x2402c,
 		0x24100, 0x2413c,
 		0x24190, 0x241c8,
 		0x24200, 0x24318,
 		0x24400, 0x24528,
 		0x24540, 0x24614,
 		0x25000, 0x25040,
 		0x2504c, 0x25060,
 		0x250c0, 0x250ec,
 		0x25200, 0x25268,
 		0x25270, 0x25284,
 		0x252fc, 0x25388,
 		0x25400, 0x25404,
 		0x25500, 0x25518,
 		0x2552c, 0x2553c,
 		0x25550, 0x25554,
 		0x25600, 0x25600,
 		0x25608, 0x25628,
 		0x25630, 0x2563c,
 		0x25700, 0x2571c,
 		0x25780, 0x2578c,
 		0x25800, 0x25c38,
 		0x25c80, 0x25d7c,
 		0x25e00, 0x25e04,
 		0x26000, 0x2602c,
 		0x26100, 0x2613c,
 		0x26190, 0x261c8,
 		0x26200, 0x26318,
 		0x26400, 0x26528,
 		0x26540, 0x26614,
 		0x27000, 0x27040,
 		0x2704c, 0x27060,
 		0x270c0, 0x270ec,
 		0x27200, 0x27268,
 		0x27270, 0x27284,
 		0x272fc, 0x27388,
 		0x27400, 0x27404,
 		0x27500, 0x27518,
 		0x2752c, 0x2753c,
 		0x27550, 0x27554,
 		0x27600, 0x27600,
 		0x27608, 0x27628,
 		0x27630, 0x2763c,
 		0x27700, 0x2771c,
 		0x27780, 0x2778c,
 		0x27800, 0x27c38,
 		0x27c80, 0x27d7c,
 		0x27e00, 0x27e04
 	};
 	static const unsigned int t5_reg_ranges[] = {
 		0x1008, 0x1148,
 		0x1180, 0x11b4,
 		0x11fc, 0x123c,
 		0x1280, 0x173c,
 		0x1800, 0x18fc,
 		0x3000, 0x3028,
 		0x3060, 0x30d8,
 		0x30e0, 0x30fc,
 		0x3140, 0x357c,
 		0x35a8, 0x35cc,
 		0x35ec, 0x35ec,
 		0x3600, 0x5624,
 		0x56cc, 0x575c,
 		0x580c, 0x5814,
 		0x5890, 0x58bc,
 		0x5940, 0x59dc,
 		0x59fc, 0x5a18,
 		0x5a60, 0x5a9c,
 		0x5b94, 0x5bfc,
 		0x6000, 0x6040,
 		0x6058, 0x614c,
 		0x7700, 0x7798,
 		0x77c0, 0x78fc,
 		0x7b00, 0x7c54,
 		0x7d00, 0x7efc,
 		0x8dc0, 0x8de0,
 		0x8df8, 0x8e84,
 		0x8ea0, 0x8f84,
 		0x8fc0, 0x90f8,
 		0x9400, 0x9470,
 		0x9600, 0x96f4,
 		0x9800, 0x9808,
 		0x9820, 0x983c,
 		0x9850, 0x9864,
 		0x9c00, 0x9c6c,
 		0x9c80, 0x9cec,
 		0x9d00, 0x9d6c,
 		0x9d80, 0x9dec,
 		0x9e00, 0x9e6c,
 		0x9e80, 0x9eec,
 		0x9f00, 0x9f6c,
 		0x9f80, 0xa020,
 		0xd004, 0xd03c,
 		0xdfc0, 0xdfe0,
 		0xe000, 0x11088,
 		0x1109c, 0x11110,
 		0x11118, 0x1117c,
 		0x11190, 0x11204,
 		0x19040, 0x1906c,
 		0x19078, 0x19080,
 		0x1908c, 0x19124,
 		0x19150, 0x191b0,
 		0x191d0, 0x191e8,
 		0x19238, 0x19290,
 		0x193f8, 0x19474,
 		0x19490, 0x194cc,
 		0x194f0, 0x194f8,
 		0x19c00, 0x19c60,
 		0x19c94, 0x19e10,
 		0x19e50, 0x19f34,
 		0x19f40, 0x19f50,
 		0x19f90, 0x19fe4,
 		0x1a000, 0x1a06c,
 		0x1a0b0, 0x1a120,
 		0x1a128, 0x1a138,
 		0x1a190, 0x1a1c4,
 		0x1a1fc, 0x1a1fc,
 		0x1e008, 0x1e00c,
 		0x1e040, 0x1e04c,
 		0x1e284, 0x1e290,
 		0x1e2c0, 0x1e2c0,
 		0x1e2e0, 0x1e2e0,
 		0x1e300, 0x1e384,
 		0x1e3c0, 0x1e3c8,
 		0x1e408, 0x1e40c,
 		0x1e440, 0x1e44c,
 		0x1e684, 0x1e690,
 		0x1e6c0, 0x1e6c0,
 		0x1e6e0, 0x1e6e0,
 		0x1e700, 0x1e784,
 		0x1e7c0, 0x1e7c8,
 		0x1e808, 0x1e80c,
 		0x1e840, 0x1e84c,
 		0x1ea84, 0x1ea90,
 		0x1eac0, 0x1eac0,
 		0x1eae0, 0x1eae0,
 		0x1eb00, 0x1eb84,
 		0x1ebc0, 0x1ebc8,
 		0x1ec08, 0x1ec0c,
 		0x1ec40, 0x1ec4c,
 		0x1ee84, 0x1ee90,
 		0x1eec0, 0x1eec0,
 		0x1eee0, 0x1eee0,
 		0x1ef00, 0x1ef84,
 		0x1efc0, 0x1efc8,
 		0x1f008, 0x1f00c,
 		0x1f040, 0x1f04c,
 		0x1f284, 0x1f290,
 		0x1f2c0, 0x1f2c0,
 		0x1f2e0, 0x1f2e0,
 		0x1f300, 0x1f384,
 		0x1f3c0, 0x1f3c8,
 		0x1f408, 0x1f40c,
 		0x1f440, 0x1f44c,
 		0x1f684, 0x1f690,
 		0x1f6c0, 0x1f6c0,
 		0x1f6e0, 0x1f6e0,
 		0x1f700, 0x1f784,
 		0x1f7c0, 0x1f7c8,
 		0x1f808, 0x1f80c,
 		0x1f840, 0x1f84c,
 		0x1fa84, 0x1fa90,
 		0x1fac0, 0x1fac0,
 		0x1fae0, 0x1fae0,
 		0x1fb00, 0x1fb84,
 		0x1fbc0, 0x1fbc8,
 		0x1fc08, 0x1fc0c,
 		0x1fc40, 0x1fc4c,
 		0x1fe84, 0x1fe90,
 		0x1fec0, 0x1fec0,
 		0x1fee0, 0x1fee0,
 		0x1ff00, 0x1ff84,
 		0x1ffc0, 0x1ffc8,
 		0x30000, 0x30030,
 		0x30100, 0x30144,
 		0x30190, 0x301d0,
 		0x30200, 0x30318,
 		0x30400, 0x3052c,
 		0x30540, 0x3061c,
 		0x30800, 0x30834,
 		0x308c0, 0x30908,
 		0x30910, 0x309ac,
 		0x30a00, 0x30a2c,
 		0x30a44, 0x30a50,
 		0x30a74, 0x30c24,
 		0x30d00, 0x30d00,
 		0x30d08, 0x30d14,
 		0x30d1c, 0x30d20,
 		0x30d3c, 0x30d50,
 		0x31200, 0x3120c,
 		0x31220, 0x31220,
 		0x31240, 0x31240,
 		0x31600, 0x3160c,
 		0x31a00, 0x31a1c,
 		0x31e00, 0x31e20,
 		0x31e38, 0x31e3c,
 		0x31e80, 0x31e80,
 		0x31e88, 0x31ea8,
 		0x31eb0, 0x31eb4,
 		0x31ec8, 0x31ed4,
 		0x31fb8, 0x32004,
 		0x32200, 0x32200,
 		0x32208, 0x32240,
 		0x32248, 0x32280,
 		0x32288, 0x322c0,
 		0x322c8, 0x322fc,
 		0x32600, 0x32630,
 		0x32a00, 0x32abc,
 		0x32b00, 0x32b70,
 		0x33000, 0x33048,
 		0x33060, 0x3309c,
 		0x330f0, 0x33148,
 		0x33160, 0x3319c,
 		0x331f0, 0x332e4,
 		0x332f8, 0x333e4,
 		0x333f8, 0x33448,
 		0x33460, 0x3349c,
 		0x334f0, 0x33548,
 		0x33560, 0x3359c,
 		0x335f0, 0x336e4,
 		0x336f8, 0x337e4,
 		0x337f8, 0x337fc,
 		0x33814, 0x33814,
 		0x3382c, 0x3382c,
 		0x33880, 0x3388c,
 		0x338e8, 0x338ec,
 		0x33900, 0x33948,
 		0x33960, 0x3399c,
 		0x339f0, 0x33ae4,
 		0x33af8, 0x33b10,
 		0x33b28, 0x33b28,
 		0x33b3c, 0x33b50,
 		0x33bf0, 0x33c10,
 		0x33c28, 0x33c28,
 		0x33c3c, 0x33c50,
 		0x33cf0, 0x33cfc,
 		0x34000, 0x34030,
 		0x34100, 0x34144,
 		0x34190, 0x341d0,
 		0x34200, 0x34318,
 		0x34400, 0x3452c,
 		0x34540, 0x3461c,
 		0x34800, 0x34834,
 		0x348c0, 0x34908,
 		0x34910, 0x349ac,
 		0x34a00, 0x34a2c,
 		0x34a44, 0x34a50,
 		0x34a74, 0x34c24,
 		0x34d00, 0x34d00,
 		0x34d08, 0x34d14,
 		0x34d1c, 0x34d20,
 		0x34d3c, 0x34d50,
 		0x35200, 0x3520c,
 		0x35220, 0x35220,
 		0x35240, 0x35240,
 		0x35600, 0x3560c,
 		0x35a00, 0x35a1c,
 		0x35e00, 0x35e20,
 		0x35e38, 0x35e3c,
 		0x35e80, 0x35e80,
 		0x35e88, 0x35ea8,
 		0x35eb0, 0x35eb4,
 		0x35ec8, 0x35ed4,
 		0x35fb8, 0x36004,
 		0x36200, 0x36200,
 		0x36208, 0x36240,
 		0x36248, 0x36280,
 		0x36288, 0x362c0,
 		0x362c8, 0x362fc,
 		0x36600, 0x36630,
 		0x36a00, 0x36abc,
 		0x36b00, 0x36b70,
 		0x37000, 0x37048,
 		0x37060, 0x3709c,
 		0x370f0, 0x37148,
 		0x37160, 0x3719c,
 		0x371f0, 0x372e4,
 		0x372f8, 0x373e4,
 		0x373f8, 0x37448,
 		0x37460, 0x3749c,
 		0x374f0, 0x37548,
 		0x37560, 0x3759c,
 		0x375f0, 0x376e4,
 		0x376f8, 0x377e4,
 		0x377f8, 0x377fc,
 		0x37814, 0x37814,
 		0x3782c, 0x3782c,
 		0x37880, 0x3788c,
 		0x378e8, 0x378ec,
 		0x37900, 0x37948,
 		0x37960, 0x3799c,
 		0x379f0, 0x37ae4,
 		0x37af8, 0x37b10,
 		0x37b28, 0x37b28,
 		0x37b3c, 0x37b50,
 		0x37bf0, 0x37c10,
 		0x37c28, 0x37c28,
 		0x37c3c, 0x37c50,
 		0x37cf0, 0x37cfc,
 		0x38000, 0x38030,
 		0x38100, 0x38144,
 		0x38190, 0x381d0,
 		0x38200, 0x38318,
 		0x38400, 0x3852c,
 		0x38540, 0x3861c,
 		0x38800, 0x38834,
 		0x388c0, 0x38908,
 		0x38910, 0x389ac,
 		0x38a00, 0x38a2c,
 		0x38a44, 0x38a50,
 		0x38a74, 0x38c24,
 		0x38d00, 0x38d00,
 		0x38d08, 0x38d14,
 		0x38d1c, 0x38d20,
 		0x38d3c, 0x38d50,
 		0x39200, 0x3920c,
 		0x39220, 0x39220,
 		0x39240, 0x39240,
 		0x39600, 0x3960c,
 		0x39a00, 0x39a1c,
 		0x39e00, 0x39e20,
 		0x39e38, 0x39e3c,
 		0x39e80, 0x39e80,
 		0x39e88, 0x39ea8,
 		0x39eb0, 0x39eb4,
 		0x39ec8, 0x39ed4,
 		0x39fb8, 0x3a004,
 		0x3a200, 0x3a200,
 		0x3a208, 0x3a240,
 		0x3a248, 0x3a280,
 		0x3a288, 0x3a2c0,
 		0x3a2c8, 0x3a2fc,
 		0x3a600, 0x3a630,
 		0x3aa00, 0x3aabc,
 		0x3ab00, 0x3ab70,
 		0x3b000, 0x3b048,
 		0x3b060, 0x3b09c,
 		0x3b0f0, 0x3b148,
 		0x3b160, 0x3b19c,
 		0x3b1f0, 0x3b2e4,
 		0x3b2f8, 0x3b3e4,
 		0x3b3f8, 0x3b448,
 		0x3b460, 0x3b49c,
 		0x3b4f0, 0x3b548,
 		0x3b560, 0x3b59c,
 		0x3b5f0, 0x3b6e4,
 		0x3b6f8, 0x3b7e4,
 		0x3b7f8, 0x3b7fc,
 		0x3b814, 0x3b814,
 		0x3b82c, 0x3b82c,
 		0x3b880, 0x3b88c,
 		0x3b8e8, 0x3b8ec,
 		0x3b900, 0x3b948,
 		0x3b960, 0x3b99c,
 		0x3b9f0, 0x3bae4,
 		0x3baf8, 0x3bb10,
 		0x3bb28, 0x3bb28,
 		0x3bb3c, 0x3bb50,
 		0x3bbf0, 0x3bc10,
 		0x3bc28, 0x3bc28,
 		0x3bc3c, 0x3bc50,
 		0x3bcf0, 0x3bcfc,
 		0x3c000, 0x3c030,
 		0x3c100, 0x3c144,
 		0x3c190, 0x3c1d0,
 		0x3c200, 0x3c318,
 		0x3c400, 0x3c52c,
 		0x3c540, 0x3c61c,
 		0x3c800, 0x3c834,
 		0x3c8c0, 0x3c908,
 		0x3c910, 0x3c9ac,
 		0x3ca00, 0x3ca2c,
 		0x3ca44, 0x3ca50,
 		0x3ca74, 0x3cc24,
 		0x3cd00, 0x3cd00,
 		0x3cd08, 0x3cd14,
 		0x3cd1c, 0x3cd20,
 		0x3cd3c, 0x3cd50,
 		0x3d200, 0x3d20c,
 		0x3d220, 0x3d220,
 		0x3d240, 0x3d240,
 		0x3d600, 0x3d60c,
 		0x3da00, 0x3da1c,
 		0x3de00, 0x3de20,
 		0x3de38, 0x3de3c,
 		0x3de80, 0x3de80,
 		0x3de88, 0x3dea8,
 		0x3deb0, 0x3deb4,
 		0x3dec8, 0x3ded4,
 		0x3dfb8, 0x3e004,
 		0x3e200, 0x3e200,
 		0x3e208, 0x3e240,
 		0x3e248, 0x3e280,
 		0x3e288, 0x3e2c0,
 		0x3e2c8, 0x3e2fc,
 		0x3e600, 0x3e630,
 		0x3ea00, 0x3eabc,
 		0x3eb00, 0x3eb70,
 		0x3f000, 0x3f048,
 		0x3f060, 0x3f09c,
 		0x3f0f0, 0x3f148,
 		0x3f160, 0x3f19c,
 		0x3f1f0, 0x3f2e4,
 		0x3f2f8, 0x3f3e4,
 		0x3f3f8, 0x3f448,
 		0x3f460, 0x3f49c,
 		0x3f4f0, 0x3f548,
 		0x3f560, 0x3f59c,
 		0x3f5f0, 0x3f6e4,
 		0x3f6f8, 0x3f7e4,
 		0x3f7f8, 0x3f7fc,
 		0x3f814, 0x3f814,
 		0x3f82c, 0x3f82c,
 		0x3f880, 0x3f88c,
 		0x3f8e8, 0x3f8ec,
 		0x3f900, 0x3f948,
 		0x3f960, 0x3f99c,
 		0x3f9f0, 0x3fae4,
 		0x3faf8, 0x3fb10,
 		0x3fb28, 0x3fb28,
 		0x3fb3c, 0x3fb50,
 		0x3fbf0, 0x3fc10,
 		0x3fc28, 0x3fc28,
 		0x3fc3c, 0x3fc50,
 		0x3fcf0, 0x3fcfc,
 		0x40000, 0x4000c,
 		0x40040, 0x40068,
 		0x4007c, 0x40144,
 		0x40180, 0x4018c,
 		0x40200, 0x40298,
 		0x402ac, 0x4033c,
 		0x403f8, 0x403fc,
 		0x41304, 0x413c4,
 		0x41400, 0x4141c,
 		0x41480, 0x414d0,
 		0x44000, 0x44078,
 		0x440c0, 0x44278,
 		0x442c0, 0x44478,
 		0x444c0, 0x44678,
 		0x446c0, 0x44878,
 		0x448c0, 0x449fc,
 		0x45000, 0x45068,
 		0x45080, 0x45084,
 		0x450a0, 0x450b0,
 		0x45200, 0x45268,
 		0x45280, 0x45284,
 		0x452a0, 0x452b0,
 		0x460c0, 0x460e4,
 		0x47000, 0x4708c,
 		0x47200, 0x47250,
 		0x47400, 0x47420,
 		0x47600, 0x47618,
 		0x47800, 0x47814,
 		0x48000, 0x4800c,
 		0x48040, 0x48068,
 		0x4807c, 0x48144,
 		0x48180, 0x4818c,
 		0x48200, 0x48298,
 		0x482ac, 0x4833c,
 		0x483f8, 0x483fc,
 		0x49304, 0x493c4,
 		0x49400, 0x4941c,
 		0x49480, 0x494d0,
 		0x4c000, 0x4c078,
 		0x4c0c0, 0x4c278,
 		0x4c2c0, 0x4c478,
 		0x4c4c0, 0x4c678,
 		0x4c6c0, 0x4c878,
 		0x4c8c0, 0x4c9fc,
 		0x4d000, 0x4d068,
 		0x4d080, 0x4d084,
 		0x4d0a0, 0x4d0b0,
 		0x4d200, 0x4d268,
 		0x4d280, 0x4d284,
 		0x4d2a0, 0x4d2b0,
 		0x4e0c0, 0x4e0e4,
 		0x4f000, 0x4f08c,
 		0x4f200, 0x4f250,
 		0x4f400, 0x4f420,
 		0x4f600, 0x4f618,
 		0x4f800, 0x4f814,
 		0x50000, 0x500cc,
 		0x50400, 0x50400,
 		0x50800, 0x508cc,
 		0x50c00, 0x50c00,
 		0x51000, 0x5101c,
 		0x51300, 0x51308,
 	};
 
 	if (is_t4(sc)) {
 		reg_ranges = &t4_reg_ranges[0];
 		n = nitems(t4_reg_ranges);
 	} else {
 		reg_ranges = &t5_reg_ranges[0];
 		n = nitems(t5_reg_ranges);
 	}
 
 	regs->version = chip_id(sc) | chip_rev(sc) << 10;
 	for (i = 0; i < n; i += 2)
 		reg_block_dump(sc, buf, reg_ranges[i], reg_ranges[i + 1]);
 }
 
 #define	A_PL_INDIR_CMD	0x1f8
 
 #define	S_PL_AUTOINC	31
 #define	M_PL_AUTOINC	0x1U
 #define	V_PL_AUTOINC(x)	((x) << S_PL_AUTOINC)
 #define	G_PL_AUTOINC(x)	(((x) >> S_PL_AUTOINC) & M_PL_AUTOINC)
 
 #define	S_PL_VFID	20
 #define	M_PL_VFID	0xffU
 #define	V_PL_VFID(x)	((x) << S_PL_VFID)
 #define	G_PL_VFID(x)	(((x) >> S_PL_VFID) & M_PL_VFID)
 
 #define	S_PL_ADDR	0
 #define	M_PL_ADDR	0xfffffU
 #define	V_PL_ADDR(x)	((x) << S_PL_ADDR)
 #define	G_PL_ADDR(x)	(((x) >> S_PL_ADDR) & M_PL_ADDR)
 
 #define	A_PL_INDIR_DATA	0x1fc
 
 static uint64_t
 read_vf_stat(struct adapter *sc, unsigned int viid, int reg)
 {
 	u32 stats[2];
 
 	mtx_assert(&sc->regwin_lock, MA_OWNED);
 	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
 	    V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg)));
 	stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA);
 	stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA);
 	return (((uint64_t)stats[1]) << 32 | stats[0]);
 }
 
 static void
 t4_get_vi_stats(struct adapter *sc, unsigned int viid,
     struct fw_vi_stats_vf *stats)
 {
 
 #define GET_STAT(name) \
 	read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L)
 
 	stats->tx_bcast_bytes    = GET_STAT(TX_VF_BCAST_BYTES);
 	stats->tx_bcast_frames   = GET_STAT(TX_VF_BCAST_FRAMES);
 	stats->tx_mcast_bytes    = GET_STAT(TX_VF_MCAST_BYTES);
 	stats->tx_mcast_frames   = GET_STAT(TX_VF_MCAST_FRAMES);
 	stats->tx_ucast_bytes    = GET_STAT(TX_VF_UCAST_BYTES);
 	stats->tx_ucast_frames   = GET_STAT(TX_VF_UCAST_FRAMES);
 	stats->tx_drop_frames    = GET_STAT(TX_VF_DROP_FRAMES);
 	stats->tx_offload_bytes  = GET_STAT(TX_VF_OFFLOAD_BYTES);
 	stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES);
 	stats->rx_bcast_bytes    = GET_STAT(RX_VF_BCAST_BYTES);
 	stats->rx_bcast_frames   = GET_STAT(RX_VF_BCAST_FRAMES);
 	stats->rx_mcast_bytes    = GET_STAT(RX_VF_MCAST_BYTES);
 	stats->rx_mcast_frames   = GET_STAT(RX_VF_MCAST_FRAMES);
 	stats->rx_ucast_bytes    = GET_STAT(RX_VF_UCAST_BYTES);
 	stats->rx_ucast_frames   = GET_STAT(RX_VF_UCAST_FRAMES);
 	stats->rx_err_frames     = GET_STAT(RX_VF_ERR_FRAMES);
 
 #undef GET_STAT
 }
 
 static void
 t4_clr_vi_stats(struct adapter *sc, unsigned int viid)
 {
 	int reg;
 
 	t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) |
 	    V_PL_VFID(G_FW_VIID_VIN(viid)) |
 	    V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L)));
 	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
 	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
 		t4_write_reg(sc, A_PL_INDIR_DATA, 0);
 }
 
 static void
 vi_refresh_stats(struct adapter *sc, struct vi_info *vi)
 {
 	struct timeval tv;
 	const struct timeval interval = {0, 250000};	/* 250ms */
 
 	if (!(vi->flags & VI_INIT_DONE))
 		return;
 
 	getmicrotime(&tv);
 	timevalsub(&tv, &interval);
 	if (timevalcmp(&tv, &vi->last_refreshed, <))
 		return;
 
 	mtx_lock(&sc->regwin_lock);
 	t4_get_vi_stats(sc, vi->viid, &vi->stats);
 	getmicrotime(&vi->last_refreshed);
 	mtx_unlock(&sc->regwin_lock);
 }
 
 static void
 cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi)
 {
 	int i;
 	u_int v, tnl_cong_drops;
 	struct timeval tv;
 	const struct timeval interval = {0, 250000};	/* 250ms */
 
 	getmicrotime(&tv);
 	timevalsub(&tv, &interval);
 	if (timevalcmp(&tv, &pi->last_refreshed, <))
 		return;
 
 	tnl_cong_drops = 0;
 	t4_get_port_stats(sc, pi->tx_chan, &pi->stats);
 	for (i = 0; i < NCHAN; i++) {
 		if (pi->rx_chan_map & (1 << i)) {
 			mtx_lock(&sc->regwin_lock);
 			t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
 			    1, A_TP_MIB_TNL_CNG_DROP_0 + i);
 			mtx_unlock(&sc->regwin_lock);
 			tnl_cong_drops += v;
 		}
 	}
 	pi->tnl_cong_drops = tnl_cong_drops;
 	getmicrotime(&pi->last_refreshed);
 }
 
 static void
 cxgbe_tick(void *arg)
 {
 	struct port_info *pi = arg;
 	struct adapter *sc = pi->adapter;
 
 	PORT_LOCK_ASSERT_OWNED(pi);
 	cxgbe_refresh_stats(sc, pi);
 
 	callout_schedule(&pi->tick, hz);
 }
 
 void
 vi_tick(void *arg)
 {
 	struct vi_info *vi = arg;
 	struct adapter *sc = vi->pi->adapter;
 
 	vi_refresh_stats(sc, vi);
 
 	callout_schedule(&vi->tick, hz);
 }
 
 static void
 cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid)
 {
 	struct ifnet *vlan;
 
 	if (arg != ifp || ifp->if_type != IFT_ETHER)
 		return;
 
 	vlan = VLAN_DEVAT(ifp, vid);
 	VLAN_SETCOOKIE(vlan, ifp);
 }
 
 static int
 cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 
 #ifdef INVARIANTS
 	panic("%s: opcode 0x%02x on iq %p with payload %p",
 	    __func__, rss->opcode, iq, m);
 #else
 	log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n",
 	    __func__, rss->opcode, iq, m);
 	m_freem(m);
 #endif
 	return (EDOOFUS);
 }
 
 int
 t4_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
 {
 	uintptr_t *loc, new;
 
 	if (opcode >= nitems(sc->cpl_handler))
 		return (EINVAL);
 
 	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
 	loc = (uintptr_t *) &sc->cpl_handler[opcode];
 	atomic_store_rel_ptr(loc, new);
 
 	return (0);
 }
 
 static int
 an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl)
 {
 
 #ifdef INVARIANTS
 	panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl);
 #else
 	log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n",
 	    __func__, iq, ctrl);
 #endif
 	return (EDOOFUS);
 }
 
 int
 t4_register_an_handler(struct adapter *sc, an_handler_t h)
 {
 	uintptr_t *loc, new;
 
 	new = h ? (uintptr_t)h : (uintptr_t)an_not_handled;
 	loc = (uintptr_t *) &sc->an_handler;
 	atomic_store_rel_ptr(loc, new);
 
 	return (0);
 }
 
 static int
 fw_msg_not_handled(struct adapter *sc, const __be64 *rpl)
 {
 	const struct cpl_fw6_msg *cpl =
 	    __containerof(rpl, struct cpl_fw6_msg, data[0]);
 
 #ifdef INVARIANTS
 	panic("%s: fw_msg type %d", __func__, cpl->type);
 #else
 	log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type);
 #endif
 	return (EDOOFUS);
 }
 
 int
 t4_register_fw_msg_handler(struct adapter *sc, int type, fw_msg_handler_t h)
 {
 	uintptr_t *loc, new;
 
 	if (type >= nitems(sc->fw_msg_handler))
 		return (EINVAL);
 
 	/*
 	 * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL
 	 * handler dispatch table.  Reject any attempt to install a handler for
 	 * this subtype.
 	 */
 	if (type == FW_TYPE_RSSCPL || type == FW6_TYPE_RSSCPL)
 		return (EINVAL);
 
 	new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled;
 	loc = (uintptr_t *) &sc->fw_msg_handler[type];
 	atomic_store_rel_ptr(loc, new);
 
 	return (0);
 }
 
 static void
 t4_sysctls(struct adapter *sc)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *children, *c0;
 	static char *caps[] = {
 		"\20\1PPP\2QFC\3DCBX",			/* caps[0] linkcaps */
 		"\20\1NIC\2VM\3IDS\4UM\5UM_ISGL"	/* caps[1] niccaps */
 		    "\6HASHFILTER\7ETHOFLD",
 		"\20\1TOE",				/* caps[2] toecaps */
 		"\20\1RDDP\2RDMAC",			/* caps[3] rdmacaps */
 		"\20\1INITIATOR_PDU\2TARGET_PDU"	/* caps[4] iscsicaps */
 		    "\3INITIATOR_CNXOFLD\4TARGET_CNXOFLD"
 		    "\5INITIATOR_SSNOFLD\6TARGET_SSNOFLD",
 		"\20\1INITIATOR\2TARGET\3CTRL_OFLD"	/* caps[5] fcoecaps */
 		    "\4PO_INITIAOR\5PO_TARGET"
 	};
 	static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"};
 
 	ctx = device_get_sysctl_ctx(sc->dev);
 
 	/*
 	 * dev.t4nex.X.
 	 */
 	oid = device_get_sysctl_tree(sc->dev);
 	c0 = children = SYSCTL_CHILDREN(oid);
 
 	sc->sc_do_rxcopy = 1;
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW,
 	    &sc->sc_do_rxcopy, 1, "Do RX copy of small frames");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL,
 	    sc->params.nports, "# of ports");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD,
 	    NULL, chip_rev(sc), "chip hardware revision");
 
 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version",
 	    CTLFLAG_RD, sc->fw_version, 0, "firmware version");
 
 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
 	    CTLFLAG_RD, sc->cfg_file, 0, "configuration file");
 
 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL,
 	    sc->cfcsum, "config file checksum");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells",
 	    CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells,
 	    sysctl_bitfield, "A", "available doorbells");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkcaps",
 	    CTLTYPE_STRING | CTLFLAG_RD, caps[0], sc->linkcaps,
 	    sysctl_bitfield, "A", "available link capabilities");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "niccaps",
 	    CTLTYPE_STRING | CTLFLAG_RD, caps[1], sc->niccaps,
 	    sysctl_bitfield, "A", "available NIC capabilities");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "toecaps",
 	    CTLTYPE_STRING | CTLFLAG_RD, caps[2], sc->toecaps,
 	    sysctl_bitfield, "A", "available TCP offload capabilities");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdmacaps",
 	    CTLTYPE_STRING | CTLFLAG_RD, caps[3], sc->rdmacaps,
 	    sysctl_bitfield, "A", "available RDMA capabilities");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "iscsicaps",
 	    CTLTYPE_STRING | CTLFLAG_RD, caps[4], sc->iscsicaps,
 	    sysctl_bitfield, "A", "available iSCSI capabilities");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoecaps",
 	    CTLTYPE_STRING | CTLFLAG_RD, caps[5], sc->fcoecaps,
 	    sysctl_bitfield, "A", "available FCoE capabilities");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL,
 	    sc->params.vpd.cclk, "core clock frequency (in KHz)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc->sge.timer_val,
 	    sizeof(sc->sge.timer_val), sysctl_int_array, "A",
 	    "interrupt holdoff timer values (us)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc->sge.counter_val,
 	    sizeof(sc->sge.counter_val), sysctl_int_array, "A",
 	    "interrupt holdoff packet counter values");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD,
 	    NULL, sc->tids.nftids, "number of filters");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT |
 	    CTLFLAG_RD, sc, 0, sysctl_temperature, "I",
 	    "chip temperature (in Celsius)");
 
 	t4_sge_sysctls(sc, ctx, children);
 
 	sc->lro_timeout = 100;
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW,
 	    &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)");
 
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "debug_flags", CTLFLAG_RW,
 	    &sc->debug_flags, 0, "flags to enable runtime debugging");
 
 #ifdef SBUF_DRAIN
 	/*
 	 * dev.t4nex.X.misc.  Marked CTLFLAG_SKIP to avoid information overload.
 	 */
 	oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc",
 	    CTLFLAG_RD | CTLFLAG_SKIP, NULL,
 	    "logs and miscellaneous information");
 	children = SYSCTL_CHILDREN(oid);
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cctrl, "A", "congestion control");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
 	    sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cim_la, "A", "CIM logic analyzer");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cim_ma_la, "A", "CIM MA logic analyzer");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
 	    sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");
 
 	if (is_t5(sc)) {
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx",
 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ,
 		    sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)");
 
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx",
 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ,
 		    sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)");
 	}
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cim_pif_la, "A", "CIM PIF logic analyzer");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cim_qcfg, "A", "CIM queue configuration");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_cpl_stats, "A", "CPL statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_ddp_stats, "A", "non-TCP DDP statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_devlog, "A", "firmware's device log");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_fcoe_stats, "A", "FCoE statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_hw_sched, "A", "hardware scheduler ");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_l2t, "A", "hardware L2 table");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_lb_stats, "A", "loopback statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_meminfo, "A", "memory regions");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_mps_tcam, "A", "MPS TCAM entries");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_path_mtus, "A", "path MTUs");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_pm_stats, "A", "PM statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_rdma_stats, "A", "RDMA statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_tcp_stats, "A", "TCP statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_tids, "A", "TID information");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_tp_err_stats, "A", "TP error statistics");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_tp_la, "A", "TP logic analyzer");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_tx_rate, "A", "Tx rate");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la",
 	    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 	    sysctl_ulprx_la, "A", "ULPRX logic analyzer");
 
 	if (is_t5(sc)) {
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats",
 		    CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
 		    sysctl_wcwr_stats, "A", "write combined work requests");
 	}
 #endif
 
 #ifdef TCP_OFFLOAD
 	if (is_offload(sc)) {
 		/*
 		 * dev.t4nex.X.toe.
 		 */
 		oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD,
 		    NULL, "TOE parameters");
 		children = SYSCTL_CHILDREN(oid);
 
 		sc->tt.sndbuf = 256 * 1024;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW,
 		    &sc->tt.sndbuf, 0, "max hardware send buffer size");
 
 		sc->tt.ddp = 0;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
 		    &sc->tt.ddp, 0, "DDP allowed");
 
 		sc->tt.indsz = G_INDICATESIZE(t4_read_reg(sc, A_TP_PARA_REG5));
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "indsz", CTLFLAG_RW,
 		    &sc->tt.indsz, 0, "DDP max indicate size allowed");
 
 		sc->tt.ddp_thres =
 		    G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2));
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp_thres", CTLFLAG_RW,
 		    &sc->tt.ddp_thres, 0, "DDP threshold");
 
 		sc->tt.rx_coalesce = 1;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce",
 		    CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing");
 
 		sc->tt.tx_align = 1;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align",
 		    CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload");
 	}
 #endif
 }
 
 void
 vi_sysctls(struct vi_info *vi)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *children;
 
 	ctx = device_get_sysctl_ctx(vi->dev);
 
 	/*
 	 * dev.[nv](cxgbe|cxl).X.
 	 */
 	oid = device_get_sysctl_tree(vi->dev);
 	children = SYSCTL_CHILDREN(oid);
 
 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL,
 	    vi->viid, "VI identifer");
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD,
 	    &vi->nrxq, 0, "# of rx queues");
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD,
 	    &vi->ntxq, 0, "# of tx queues");
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD,
 	    &vi->first_rxq, 0, "index of first rx queue");
 	SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD,
 	    &vi->first_txq, 0, "index of first tx queue");
 
 	if (vi->flags & VI_NETMAP)
 		return;
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", CTLTYPE_INT |
 	    CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU",
 	    "Reserve queue 0 for non-flowid packets");
 
 #ifdef TCP_OFFLOAD
 	if (vi->nofldrxq != 0) {
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
 		    &vi->nofldrxq, 0,
 		    "# of rx queues for offloaded TCP connections");
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD,
 		    &vi->nofldtxq, 0,
 		    "# of tx queues for offloaded TCP connections");
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq",
 		    CTLFLAG_RD, &vi->first_ofld_rxq, 0,
 		    "index of first TOE rx queue");
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq",
 		    CTLFLAG_RD, &vi->first_ofld_txq, 0,
 		    "index of first TOE tx queue");
 	}
 #endif
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx",
 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I",
 	    "holdoff timer index");
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx",
 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I",
 	    "holdoff packet counter index");
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq",
 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I",
 	    "rx queue size");
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq",
 	    CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I",
 	    "tx queue size");
 }
 
 static void
 cxgbe_sysctls(struct port_info *pi)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *oid;
 	struct sysctl_oid_list *children;
 	struct adapter *sc = pi->adapter;
 
 	ctx = device_get_sysctl_ctx(pi->dev);
 
 	/*
 	 * dev.cxgbe.X.
 	 */
 	oid = device_get_sysctl_tree(pi->dev);
 	children = SYSCTL_CHILDREN(oid);
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING |
 	   CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down");
 	if (pi->port_type == FW_PORT_TYPE_BT_XAUI) {
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature",
 		    CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I",
 		    "PHY temperature (in Celsius)");
 		SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version",
 		    CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I",
 		    "PHY firmware version");
 	}
 
 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings",
 	    CTLTYPE_STRING | CTLFLAG_RW, pi, PAUSE_TX, sysctl_pause_settings,
 	    "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)");
 
 	/*
 	 * dev.cxgbe.X.stats.
 	 */
 	oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
 	    NULL, "port statistics");
 	children = SYSCTL_CHILDREN(oid);
 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
 	    &pi->tx_parse_error, 0,
 	    "# of tx packets with invalid length or # of segments");
 
 #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
 	SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
 	    CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \
 	    sysctl_handle_t4_reg64, "QU", desc)
 
 	SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_64",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max",
 	    "# of tx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L));
 	SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L));
 
 	SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err",
 	    "# of frames received with bad FCS",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_len_err",
 	    "# of frames received with length error",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_64",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max",
 	    "# of rx frames in this range",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L));
 	SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received",
 	    PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L));
 
 #undef SYSCTL_ADD_T4_REG64
 
 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \
 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \
 	    &pi->stats.name, desc)
 
 	/* We get these from port_stats and they may be stale by upto 1s */
 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0,
 	    "# drops due to buffer-group 0 overflows");
 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1,
 	    "# drops due to buffer-group 1 overflows");
 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2,
 	    "# drops due to buffer-group 2 overflows");
 	SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3,
 	    "# drops due to buffer-group 3 overflows");
 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc0,
 	    "# of buffer-group 0 truncated packets");
 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc1,
 	    "# of buffer-group 1 truncated packets");
 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc2,
 	    "# of buffer-group 2 truncated packets");
 	SYSCTL_ADD_T4_PORTSTAT(rx_trunc3,
 	    "# of buffer-group 3 truncated packets");
 
 #undef SYSCTL_ADD_T4_PORTSTAT
 }
 
 static int
 sysctl_int_array(SYSCTL_HANDLER_ARGS)
 {
 	int rc, *i, space = 0;
 	struct sbuf sb;
 
 	sbuf_new_for_sysctl(&sb, NULL, 64, req);
 	for (i = arg1; arg2; arg2 -= sizeof(int), i++) {
 		if (space)
 			sbuf_printf(&sb, " ");
 		sbuf_printf(&sb, "%d", *i);
 		space = 1;
 	}
 	rc = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (rc);
 }
 
 static int
 sysctl_bitfield(SYSCTL_HANDLER_ARGS)
 {
 	int rc;
 	struct sbuf *sb;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return(rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	sbuf_printf(sb, "%b", (int)arg2, (char *)arg1);
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_btphy(SYSCTL_HANDLER_ARGS)
 {
 	struct port_info *pi = arg1;
 	int op = arg2;
 	struct adapter *sc = pi->adapter;
 	u_int v;
 	int rc;
 
 	rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt");
 	if (rc)
 		return (rc);
 	/* XXX: magic numbers */
 	rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820,
 	    &v);
 	end_synchronized_op(sc, 0);
 	if (rc)
 		return (rc);
 	if (op == 0)
 		v /= 256;
 
 	rc = sysctl_handle_int(oidp, &v, 0, req);
 	return (rc);
 }
 
 static int
 sysctl_noflowq(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
 	int rc, val;
 
 	val = vi->rsrv_noflowq;
 	rc = sysctl_handle_int(oidp, &val, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 
 	if ((val >= 1) && (vi->ntxq > 1))
 		vi->rsrv_noflowq = 1;
 	else
 		vi->rsrv_noflowq = 0;
 
 	return (rc);
 }
 
 static int
 sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
 	struct adapter *sc = vi->pi->adapter;
 	int idx, rc, i;
 	struct sge_rxq *rxq;
 #ifdef TCP_OFFLOAD
 	struct sge_ofld_rxq *ofld_rxq;
 #endif
 	uint8_t v;
 
 	idx = vi->tmr_idx;
 
 	rc = sysctl_handle_int(oidp, &idx, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 
 	if (idx < 0 || idx >= SGE_NTIMERS)
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4tmr");
 	if (rc)
 		return (rc);
 
 	v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1);
 	for_each_rxq(vi, i, rxq) {
 #ifdef atomic_store_rel_8
 		atomic_store_rel_8(&rxq->iq.intr_params, v);
 #else
 		rxq->iq.intr_params = v;
 #endif
 	}
 #ifdef TCP_OFFLOAD
 	for_each_ofld_rxq(vi, i, ofld_rxq) {
 #ifdef atomic_store_rel_8
 		atomic_store_rel_8(&ofld_rxq->iq.intr_params, v);
 #else
 		ofld_rxq->iq.intr_params = v;
 #endif
 	}
 #endif
 	vi->tmr_idx = idx;
 
 	end_synchronized_op(sc, LOCK_HELD);
 	return (0);
 }
 
 static int
 sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
 	struct adapter *sc = vi->pi->adapter;
 	int idx, rc;
 
 	idx = vi->pktc_idx;
 
 	rc = sysctl_handle_int(oidp, &idx, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 
 	if (idx < -1 || idx >= SGE_NCOUNTERS)
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4pktc");
 	if (rc)
 		return (rc);
 
 	if (vi->flags & VI_INIT_DONE)
 		rc = EBUSY; /* cannot be changed once the queues are created */
 	else
 		vi->pktc_idx = idx;
 
 	end_synchronized_op(sc, LOCK_HELD);
 	return (rc);
 }
 
 static int
 sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
 	struct adapter *sc = vi->pi->adapter;
 	int qsize, rc;
 
 	qsize = vi->qsize_rxq;
 
 	rc = sysctl_handle_int(oidp, &qsize, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 
 	if (qsize < 128 || (qsize & 7))
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4rxqs");
 	if (rc)
 		return (rc);
 
 	if (vi->flags & VI_INIT_DONE)
 		rc = EBUSY; /* cannot be changed once the queues are created */
 	else
 		vi->qsize_rxq = qsize;
 
 	end_synchronized_op(sc, LOCK_HELD);
 	return (rc);
 }
 
 static int
 sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
 {
 	struct vi_info *vi = arg1;
 	struct adapter *sc = vi->pi->adapter;
 	int qsize, rc;
 
 	qsize = vi->qsize_txq;
 
 	rc = sysctl_handle_int(oidp, &qsize, 0, req);
 	if (rc != 0 || req->newptr == NULL)
 		return (rc);
 
 	if (qsize < 128 || qsize > 65536)
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4txqs");
 	if (rc)
 		return (rc);
 
 	if (vi->flags & VI_INIT_DONE)
 		rc = EBUSY; /* cannot be changed once the queues are created */
 	else
 		vi->qsize_txq = qsize;
 
 	end_synchronized_op(sc, LOCK_HELD);
 	return (rc);
 }
 
 static int
 sysctl_pause_settings(SYSCTL_HANDLER_ARGS)
 {
 	struct port_info *pi = arg1;
 	struct adapter *sc = pi->adapter;
 	struct link_config *lc = &pi->link_cfg;
 	int rc;
 
 	if (req->newptr == NULL) {
 		struct sbuf *sb;
 		static char *bits = "\20\1PAUSE_RX\2PAUSE_TX";
 
 		rc = sysctl_wire_old_buffer(req, 0);
 		if (rc != 0)
 			return(rc);
 
 		sb = sbuf_new_for_sysctl(NULL, NULL, 128, req);
 		if (sb == NULL)
 			return (ENOMEM);
 
 		sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits);
 		rc = sbuf_finish(sb);
 		sbuf_delete(sb);
 	} else {
 		char s[2];
 		int n;
 
 		s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX));
 		s[1] = 0;
 
 		rc = sysctl_handle_string(oidp, s, sizeof(s), req);
 		if (rc != 0)
 			return(rc);
 
 		if (s[1] != 0)
 			return (EINVAL);
 		if (s[0] < '0' || s[0] > '9')
 			return (EINVAL);	/* not a number */
 		n = s[0] - '0';
 		if (n & ~(PAUSE_TX | PAUSE_RX))
 			return (EINVAL);	/* some other bit is set too */
 
 		rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK,
 		    "t4PAUSE");
 		if (rc)
 			return (rc);
 		if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) {
 			int link_ok = lc->link_ok;
 
 			lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX);
 			lc->requested_fc |= n;
 			rc = -t4_link_start(sc, sc->mbox, pi->tx_chan, lc);
 			lc->link_ok = link_ok;	/* restore */
 		}
 		end_synchronized_op(sc, 0);
 	}
 
 	return (rc);
 }
 
 static int
 sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	int reg = arg2;
 	uint64_t val;
 
 	val = t4_read_reg64(sc, reg);
 
 	return (sysctl_handle_64(oidp, &val, 0, req));
 }
 
 static int
 sysctl_temperature(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	int rc, t;
 	uint32_t param, val;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp");
 	if (rc)
 		return (rc);
 	param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) |
 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) |
 	    V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP);
 	rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
 	end_synchronized_op(sc, 0);
 	if (rc)
 		return (rc);
 
 	/* unknown is returned as 0 but we display -1 in that case */
 	t = val == 0 ? -1 : val;
 
 	rc = sysctl_handle_int(oidp, &t, 0, req);
 	return (rc);
 }
 
 #ifdef SBUF_DRAIN
 static int
 sysctl_cctrl(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i;
 	uint16_t incr[NMTUS][NCCTRL_WIN];
 	static const char *dec_fac[] = {
 		"0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875",
 		"0.9375"
 	};
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_read_cong_tbl(sc, incr);
 
 	for (i = 0; i < NCCTRL_WIN; ++i) {
 		sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i,
 		    incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i],
 		    incr[5][i], incr[6][i], incr[7][i]);
 		sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n",
 		    incr[8][i], incr[9][i], incr[10][i], incr[11][i],
 		    incr[12][i], incr[13][i], incr[14][i], incr[15][i],
 		    sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = {
 	"TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI",	/* ibq's */
 	"ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI",	/* obq's */
 	"SGE0-RX", "SGE1-RX"	/* additional obq's (T5 onwards) */
 };
 
 static int
 sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i, n, qid = arg2;
 	uint32_t *buf, *p;
 	char *qtype;
 	u_int cim_num_obq = is_t4(sc) ? CIM_NUM_OBQ : CIM_NUM_OBQ_T5;
 
 	KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq,
 	    ("%s: bad qid %d\n", __func__, qid));
 
 	if (qid < CIM_NUM_IBQ) {
 		/* inbound queue */
 		qtype = "IBQ";
 		n = 4 * CIM_IBQ_SIZE;
 		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
 		rc = t4_read_cim_ibq(sc, qid, buf, n);
 	} else {
 		/* outbound queue */
 		qtype = "OBQ";
 		qid -= CIM_NUM_IBQ;
 		n = 4 * cim_num_obq * CIM_OBQ_SIZE;
 		buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
 		rc = t4_read_cim_obq(sc, qid, buf, n);
 	}
 
 	if (rc < 0) {
 		rc = -rc;
 		goto done;
 	}
 	n = rc * sizeof(uint32_t);	/* rc has # of words actually read */
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		goto done;
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
 	if (sb == NULL) {
 		rc = ENOMEM;
 		goto done;
 	}
 
 	sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
 	for (i = 0, p = buf; i < n; i += 16, p += 4)
 		sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
 		    p[2], p[3]);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 done:
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_cim_la(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	u_int cfg;
 	struct sbuf *sb;
 	uint32_t *buf, *p;
 	int rc;
 
 	rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
 	if (rc != 0)
 		return (rc);
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	rc = -t4_cim_read_la(sc, buf, NULL);
 	if (rc != 0)
 		goto done;
 
 	sbuf_printf(sb, "Status   Data      PC%s",
 	    cfg & F_UPDBGLACAPTPCONLY ? "" :
 	    "     LS0Stat  LS0Addr             LS0Data");
 
 	KASSERT((sc->params.cim_la_size & 7) == 0,
 	    ("%s: p will walk off the end of buf", __func__));
 
 	for (p = buf; p < &buf[sc->params.cim_la_size]; p += 8) {
 		if (cfg & F_UPDBGLACAPTPCONLY) {
 			sbuf_printf(sb, "\n  %02x   %08x %08x", p[5] & 0xff,
 			    p[6], p[7]);
 			sbuf_printf(sb, "\n  %02x   %02x%06x %02x%06x",
 			    (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
 			    p[4] & 0xff, p[5] >> 8);
 			sbuf_printf(sb, "\n  %02x   %x%07x %x%07x",
 			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
 			    p[1] & 0xf, p[2] >> 4);
 		} else {
 			sbuf_printf(sb,
 			    "\n  %02x   %x%07x %x%07x %08x %08x "
 			    "%08x%08x%08x%08x",
 			    (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
 			    p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
 			    p[6], p[7]);
 		}
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 done:
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	u_int i;
 	struct sbuf *sb;
 	uint32_t *buf, *p;
 	int rc;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE);
 	p = buf;
 
 	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
 		sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2],
 		    p[1], p[0]);
 	}
 
 	sbuf_printf(sb, "\n\nCnt ID Tag UE       Data       RDY VLD");
 	for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) {
 		sbuf_printf(sb, "\n%3u %2u  %x   %u %08x%08x  %u   %u",
 		    (p[2] >> 10) & 0xff, (p[2] >> 7) & 7,
 		    (p[2] >> 3) & 0xf, (p[2] >> 2) & 1,
 		    (p[1] >> 2) | ((p[2] & 3) << 30),
 		    (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1,
 		    p[0] & 1);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	u_int i;
 	struct sbuf *sb;
 	uint32_t *buf, *p;
 	int rc;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL);
 	p = buf;
 
 	sbuf_printf(sb, "Cntl ID DataBE   Addr                 Data");
 	for (i = 0; i < CIM_MALA_SIZE; i++, p += 6) {
 		sbuf_printf(sb, "\n %02x  %02x  %04x  %08x %08x%08x%08x%08x",
 		    (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff,
 		    p[4], p[3], p[2], p[1], p[0]);
 	}
 
 	sbuf_printf(sb, "\n\nCntl ID               Data");
 	for (i = 0; i < CIM_MALA_SIZE; i++, p += 6) {
 		sbuf_printf(sb, "\n %02x  %02x %08x%08x%08x%08x",
 		    (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i;
 	uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
 	uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5];
 	uint16_t thres[CIM_NUM_IBQ];
 	uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr;
 	uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat;
 	u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq;
 
 	if (is_t4(sc)) {
 		cim_num_obq = CIM_NUM_OBQ;
 		ibq_rdaddr = A_UP_IBQ_0_RDADDR;
 		obq_rdaddr = A_UP_OBQ_0_REALADDR;
 	} else {
 		cim_num_obq = CIM_NUM_OBQ_T5;
 		ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR;
 		obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR;
 	}
 	nq = CIM_NUM_IBQ + cim_num_obq;
 
 	rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat);
 	if (rc == 0)
 		rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr);
 	if (rc != 0)
 		return (rc);
 
 	t4_read_cimq_cfg(sc, base, size, thres);
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	sbuf_printf(sb, "Queue  Base  Size Thres RdPtr WrPtr  SOP  EOP Avail");
 
 	for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
 		sbuf_printf(sb, "\n%7s %5x %5u %5u %6x  %4x %4u %4u %5u",
 		    qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
 		    G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
 		    G_QUEREMFLITS(p[2]) * 16);
 	for ( ; i < nq; i++, p += 4, wr += 2)
 		sbuf_printf(sb, "\n%7s %5x %5u %12x  %4x %4u %4u %5u", qname[i],
 		    base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
 		    wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
 		    G_QUEREMFLITS(p[2]) * 16);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_cpl_stats stats;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_tp_get_cpl_stats(sc, &stats);
 
 	sbuf_printf(sb, "                 channel 0  channel 1  channel 2  "
 	    "channel 3\n");
 	sbuf_printf(sb, "CPL requests:   %10u %10u %10u %10u\n",
 		   stats.req[0], stats.req[1], stats.req[2], stats.req[3]);
 	sbuf_printf(sb, "CPL responses:  %10u %10u %10u %10u",
 		   stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_ddp_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_usm_stats stats;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return(rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_get_usm_stats(sc, &stats);
 
 	sbuf_printf(sb, "Frames: %u\n", stats.frames);
 	sbuf_printf(sb, "Octets: %ju\n", stats.octets);
 	sbuf_printf(sb, "Drops:  %u", stats.drops);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 const char *devlog_level_strings[] = {
 	[FW_DEVLOG_LEVEL_EMERG]		= "EMERG",
 	[FW_DEVLOG_LEVEL_CRIT]		= "CRIT",
 	[FW_DEVLOG_LEVEL_ERR]		= "ERR",
 	[FW_DEVLOG_LEVEL_NOTICE]	= "NOTICE",
 	[FW_DEVLOG_LEVEL_INFO]		= "INFO",
 	[FW_DEVLOG_LEVEL_DEBUG]		= "DEBUG"
 };
 
 const char *devlog_facility_strings[] = {
 	[FW_DEVLOG_FACILITY_CORE]	= "CORE",
 	[FW_DEVLOG_FACILITY_CF]		= "CF",
 	[FW_DEVLOG_FACILITY_SCHED]	= "SCHED",
 	[FW_DEVLOG_FACILITY_TIMER]	= "TIMER",
 	[FW_DEVLOG_FACILITY_RES]	= "RES",
 	[FW_DEVLOG_FACILITY_HW]		= "HW",
 	[FW_DEVLOG_FACILITY_FLR]	= "FLR",
 	[FW_DEVLOG_FACILITY_DMAQ]	= "DMAQ",
 	[FW_DEVLOG_FACILITY_PHY]	= "PHY",
 	[FW_DEVLOG_FACILITY_MAC]	= "MAC",
 	[FW_DEVLOG_FACILITY_PORT]	= "PORT",
 	[FW_DEVLOG_FACILITY_VI]		= "VI",
 	[FW_DEVLOG_FACILITY_FILTER]	= "FILTER",
 	[FW_DEVLOG_FACILITY_ACL]	= "ACL",
 	[FW_DEVLOG_FACILITY_TM]		= "TM",
 	[FW_DEVLOG_FACILITY_QFC]	= "QFC",
 	[FW_DEVLOG_FACILITY_DCB]	= "DCB",
 	[FW_DEVLOG_FACILITY_ETH]	= "ETH",
 	[FW_DEVLOG_FACILITY_OFLD]	= "OFLD",
 	[FW_DEVLOG_FACILITY_RI]		= "RI",
 	[FW_DEVLOG_FACILITY_ISCSI]	= "ISCSI",
 	[FW_DEVLOG_FACILITY_FCOE]	= "FCOE",
 	[FW_DEVLOG_FACILITY_FOISCSI]	= "FOISCSI",
 	[FW_DEVLOG_FACILITY_FOFCOE]	= "FOFCOE"
 };
 
 static int
 sysctl_devlog(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct devlog_params *dparams = &sc->params.devlog;
 	struct fw_devlog_e *buf, *e;
 	int i, j, rc, nentries, first = 0, m;
 	struct sbuf *sb;
 	uint64_t ftstamp = UINT64_MAX;
 
 	if (dparams->start == 0) {
 		dparams->memtype = FW_MEMTYPE_EDC0;
 		dparams->start = 0x84000;
 		dparams->size = 32768;
 	}
 
 	nentries = dparams->size / sizeof(struct fw_devlog_e);
 
 	buf = malloc(dparams->size, M_CXGBE, M_NOWAIT);
 	if (buf == NULL)
 		return (ENOMEM);
 
 	m = fwmtype_to_hwmtype(dparams->memtype);
 	rc = -t4_mem_read(sc, m, dparams->start, dparams->size, (void *)buf);
 	if (rc != 0)
 		goto done;
 
 	for (i = 0; i < nentries; i++) {
 		e = &buf[i];
 
 		if (e->timestamp == 0)
 			break;	/* end */
 
 		e->timestamp = be64toh(e->timestamp);
 		e->seqno = be32toh(e->seqno);
 		for (j = 0; j < 8; j++)
 			e->params[j] = be32toh(e->params[j]);
 
 		if (e->timestamp < ftstamp) {
 			ftstamp = e->timestamp;
 			first = i;
 		}
 	}
 
 	if (buf[first].timestamp == 0)
 		goto done;	/* nothing in the log */
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		goto done;
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL) {
 		rc = ENOMEM;
 		goto done;
 	}
 	sbuf_printf(sb, "%10s  %15s  %8s  %8s  %s\n",
 	    "Seq#", "Tstamp", "Level", "Facility", "Message");
 
 	i = first;
 	do {
 		e = &buf[i];
 		if (e->timestamp == 0)
 			break;	/* end */
 
 		sbuf_printf(sb, "%10d  %15ju  %8s  %8s  ",
 		    e->seqno, e->timestamp,
 		    (e->level < nitems(devlog_level_strings) ?
 			devlog_level_strings[e->level] : "UNKNOWN"),
 		    (e->facility < nitems(devlog_facility_strings) ?
 			devlog_facility_strings[e->facility] : "UNKNOWN"));
 		sbuf_printf(sb, e->fmt, e->params[0], e->params[1],
 		    e->params[2], e->params[3], e->params[4],
 		    e->params[5], e->params[6], e->params[7]);
 
 		if (++i == nentries)
 			i = 0;
 	} while (i != first);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 done:
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_fcoe_stats stats[4];
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_get_fcoe_stats(sc, 0, &stats[0]);
 	t4_get_fcoe_stats(sc, 1, &stats[1]);
 	t4_get_fcoe_stats(sc, 2, &stats[2]);
 	t4_get_fcoe_stats(sc, 3, &stats[3]);
 
 	sbuf_printf(sb, "                   channel 0        channel 1        "
 	    "channel 2        channel 3\n");
 	sbuf_printf(sb, "octetsDDP:  %16ju %16ju %16ju %16ju\n",
 	    stats[0].octetsDDP, stats[1].octetsDDP, stats[2].octetsDDP,
 	    stats[3].octetsDDP);
 	sbuf_printf(sb, "framesDDP:  %16u %16u %16u %16u\n", stats[0].framesDDP,
 	    stats[1].framesDDP, stats[2].framesDDP, stats[3].framesDDP);
 	sbuf_printf(sb, "framesDrop: %16u %16u %16u %16u",
 	    stats[0].framesDrop, stats[1].framesDrop, stats[2].framesDrop,
 	    stats[3].framesDrop);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_hw_sched(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i;
 	unsigned int map, kbps, ipg, mode;
 	unsigned int pace_tab[NTX_SCHED];
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP);
 	mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG));
 	t4_read_pace_tbl(sc, pace_tab);
 
 	sbuf_printf(sb, "Scheduler  Mode   Channel  Rate (Kbps)   "
 	    "Class IPG (0.1 ns)   Flow IPG (us)");
 
 	for (i = 0; i < NTX_SCHED; ++i, map >>= 2) {
 		t4_get_tx_sched(sc, i, &kbps, &ipg);
 		sbuf_printf(sb, "\n    %u      %-5s     %u     ", i,
 		    (mode & (1 << i)) ? "flow" : "class", map & 3);
 		if (kbps)
 			sbuf_printf(sb, "%9u     ", kbps);
 		else
 			sbuf_printf(sb, " disabled     ");
 
 		if (ipg)
 			sbuf_printf(sb, "%13u        ", ipg);
 		else
 			sbuf_printf(sb, "     disabled        ");
 
 		if (pace_tab[i])
 			sbuf_printf(sb, "%10u", pace_tab[i]);
 		else
 			sbuf_printf(sb, "  disabled");
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_lb_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i, j;
 	uint64_t *p0, *p1;
 	struct lb_port_stats s[2];
 	static const char *stat_name[] = {
 		"OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:",
 		"UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:",
 		"Frames128To255:", "Frames256To511:", "Frames512To1023:",
 		"Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:",
 		"BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:",
 		"BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:",
 		"BG2FramesTrunc:", "BG3FramesTrunc:"
 	};
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	memset(s, 0, sizeof(s));
 
 	for (i = 0; i < 4; i += 2) {
 		t4_get_lb_stats(sc, i, &s[0]);
 		t4_get_lb_stats(sc, i + 1, &s[1]);
 
 		p0 = &s[0].octets;
 		p1 = &s[1].octets;
 		sbuf_printf(sb, "%s                       Loopback %u"
 		    "           Loopback %u", i == 0 ? "" : "\n", i, i + 1);
 
 		for (j = 0; j < nitems(stat_name); j++)
 			sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j],
 				   *p0++, *p1++);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_linkdnrc(SYSCTL_HANDLER_ARGS)
 {
 	int rc = 0;
 	struct port_info *pi = arg1;
 	struct sbuf *sb;
 	static const char *linkdnreasons[] = {
 		"non-specific", "remote fault", "autoneg failed", "reserved3",
 		"PHY overheated", "unknown", "rx los", "reserved7"
 	};
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return(rc);
 	sb = sbuf_new_for_sysctl(NULL, NULL, 64, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	if (pi->linkdnrc < 0)
 		sbuf_printf(sb, "n/a");
 	else if (pi->linkdnrc < nitems(linkdnreasons))
 		sbuf_printf(sb, "%s", linkdnreasons[pi->linkdnrc]);
 	else
 		sbuf_printf(sb, "%d", pi->linkdnrc);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 struct mem_desc {
 	unsigned int base;
 	unsigned int limit;
 	unsigned int idx;
 };
 
 static int
 mem_desc_cmp(const void *a, const void *b)
 {
 	return ((const struct mem_desc *)a)->base -
 	       ((const struct mem_desc *)b)->base;
 }
 
 static void
 mem_region_show(struct sbuf *sb, const char *name, unsigned int from,
     unsigned int to)
 {
 	unsigned int size;
 
 	size = to - from + 1;
 	if (size == 0)
 		return;
 
 	/* XXX: need humanize_number(3) in libkern for a more readable 'size' */
 	sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size);
 }
 
 static int
 sysctl_meminfo(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i, n;
 	uint32_t lo, hi, used, alloc;
 	static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"};
 	static const char *region[] = {
 		"DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:",
 		"Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:",
 		"Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:",
 		"TDDP region:", "TPT region:", "STAG region:", "RQ region:",
 		"RQUDP region:", "PBL region:", "TXPBL region:",
 		"DBVFIFO region:", "ULPRX state:", "ULPTX state:",
 		"On-chip queues:"
 	};
 	struct mem_desc avail[4];
 	struct mem_desc mem[nitems(region) + 3];	/* up to 3 holes */
 	struct mem_desc *md = mem;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	for (i = 0; i < nitems(mem); i++) {
 		mem[i].limit = 0;
 		mem[i].idx = i;
 	}
 
 	/* Find and sort the populated memory ranges */
 	i = 0;
 	lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE);
 	if (lo & F_EDRAM0_ENABLE) {
 		hi = t4_read_reg(sc, A_MA_EDRAM0_BAR);
 		avail[i].base = G_EDRAM0_BASE(hi) << 20;
 		avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20);
 		avail[i].idx = 0;
 		i++;
 	}
 	if (lo & F_EDRAM1_ENABLE) {
 		hi = t4_read_reg(sc, A_MA_EDRAM1_BAR);
 		avail[i].base = G_EDRAM1_BASE(hi) << 20;
 		avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20);
 		avail[i].idx = 1;
 		i++;
 	}
 	if (lo & F_EXT_MEM_ENABLE) {
 		hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR);
 		avail[i].base = G_EXT_MEM_BASE(hi) << 20;
 		avail[i].limit = avail[i].base +
 		    (G_EXT_MEM_SIZE(hi) << 20);
 		avail[i].idx = is_t4(sc) ? 2 : 3;	/* Call it MC for T4 */
 		i++;
 	}
 	if (!is_t4(sc) && lo & F_EXT_MEM1_ENABLE) {
 		hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR);
 		avail[i].base = G_EXT_MEM1_BASE(hi) << 20;
 		avail[i].limit = avail[i].base +
 		    (G_EXT_MEM1_SIZE(hi) << 20);
 		avail[i].idx = 4;
 		i++;
 	}
 	if (!i)                                    /* no memory available */
 		return 0;
 	qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp);
 
 	(md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR);
 	(md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR);
 	(md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE);
 	(md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE);
 
 	/* the next few have explicit upper bounds */
 	md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE);
 	md->limit = md->base - 1 +
 		    t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) *
 		    G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE));
 	md++;
 
 	md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE);
 	md->limit = md->base - 1 +
 		    t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) *
 		    G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE));
 	md++;
 
 	if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
 		hi = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4;
 		md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE);
 		md->limit = (sc->tids.ntids - hi) * 16 + md->base - 1;
 	} else {
 		md->base = 0;
 		md->idx = nitems(region);  /* hide it */
 	}
 	md++;
 
 #define ulp_region(reg) \
 	md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\
 	(md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT)
 
 	ulp_region(RX_ISCSI);
 	ulp_region(RX_TDDP);
 	ulp_region(TX_TPT);
 	ulp_region(RX_STAG);
 	ulp_region(RX_RQ);
 	ulp_region(RX_RQUDP);
 	ulp_region(RX_PBL);
 	ulp_region(TX_PBL);
 #undef ulp_region
 
 	md->base = 0;
 	md->idx = nitems(region);
 	if (!is_t4(sc) && t4_read_reg(sc, A_SGE_CONTROL2) & F_VFIFO_ENABLE) {
 		md->base = G_BASEADDR(t4_read_reg(sc, A_SGE_DBVFIFO_BADDR));
 		md->limit = md->base + (G_DBVFIFO_SIZE((t4_read_reg(sc,
 		    A_SGE_DBVFIFO_SIZE))) << 2) - 1;
 	}
 	md++;
 
 	md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE);
 	md->limit = md->base + sc->tids.ntids - 1;
 	md++;
 	md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE);
 	md->limit = md->base + sc->tids.ntids - 1;
 	md++;
 
 	md->base = sc->vres.ocq.start;
 	if (sc->vres.ocq.size)
 		md->limit = md->base + sc->vres.ocq.size - 1;
 	else
 		md->idx = nitems(region);  /* hide it */
 	md++;
 
 	/* add any address-space holes, there can be up to 3 */
 	for (n = 0; n < i - 1; n++)
 		if (avail[n].limit < avail[n + 1].base)
 			(md++)->base = avail[n].limit;
 	if (avail[n].limit)
 		(md++)->base = avail[n].limit;
 
 	n = md - mem;
 	qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp);
 
 	for (lo = 0; lo < i; lo++)
 		mem_region_show(sb, memory[avail[lo].idx], avail[lo].base,
 				avail[lo].limit - 1);
 
 	sbuf_printf(sb, "\n");
 	for (i = 0; i < n; i++) {
 		if (mem[i].idx >= nitems(region))
 			continue;                        /* skip holes */
 		if (!mem[i].limit)
 			mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0;
 		mem_region_show(sb, region[mem[i].idx], mem[i].base,
 				mem[i].limit);
 	}
 
 	sbuf_printf(sb, "\n");
 	lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR);
 	hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1;
 	mem_region_show(sb, "uP RAM:", lo, hi);
 
 	lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR);
 	hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1;
 	mem_region_show(sb, "uP Extmem2:", lo, hi);
 
 	lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE);
 	sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n",
 		   G_PMRXMAXPAGE(lo),
 		   t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10,
 		   (lo & F_PMRXNUMCHN) ? 2 : 1);
 
 	lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE);
 	hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
 	sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n",
 		   G_PMTXMAXPAGE(lo),
 		   hi >= (1 << 20) ? (hi >> 20) : (hi >> 10),
 		   hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo));
 	sbuf_printf(sb, "%u p-structs\n",
 		   t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT));
 
 	for (i = 0; i < 4; i++) {
 		lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4);
 		if (is_t4(sc)) {
 			used = G_USED(lo);
 			alloc = G_ALLOC(lo);
 		} else {
 			used = G_T5_USED(lo);
 			alloc = G_T5_ALLOC(lo);
 		}
 		sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated",
 			   i, used, alloc);
 	}
 	for (i = 0; i < 4; i++) {
 		lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4);
 		if (is_t4(sc)) {
 			used = G_USED(lo);
 			alloc = G_ALLOC(lo);
 		} else {
 			used = G_T5_USED(lo);
 			alloc = G_T5_ALLOC(lo);
 		}
 		sbuf_printf(sb,
 			   "\nLoopback %d using %u pages out of %u allocated",
 			   i, used, alloc);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static inline void
 tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask)
 {
 	*mask = x | y;
 	y = htobe64(y);
 	memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN);
 }
 
 static int
 sysctl_mps_tcam(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i, n;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	sbuf_printf(sb,
 	    "Idx  Ethernet address     Mask     Vld Ports PF"
 	    "  VF              Replication             P0 P1 P2 P3  ML");
 	n = is_t4(sc) ? NUM_MPS_CLS_SRAM_L_INSTANCES :
 	    NUM_MPS_T5_CLS_SRAM_L_INSTANCES;
 	for (i = 0; i < n; i++) {
 		uint64_t tcamx, tcamy, mask;
 		uint32_t cls_lo, cls_hi;
 		uint8_t addr[ETHER_ADDR_LEN];
 
 		tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i));
 		tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i));
 		cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i));
 		cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i));
 
 		if (tcamx & tcamy)
 			continue;
 
 		tcamxy2valmask(tcamx, tcamy, addr, &mask);
 		sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx"
 			   "  %c   %#x%4u%4d", i, addr[0], addr[1], addr[2],
 			   addr[3], addr[4], addr[5], (uintmax_t)mask,
 			   (cls_lo & F_SRAM_VLD) ? 'Y' : 'N',
 			   G_PORTMAP(cls_hi), G_PF(cls_lo),
 			   (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1);
 
 		if (cls_lo & F_REPLICATE) {
 			struct fw_ldst_cmd ldst_cmd;
 
 			memset(&ldst_cmd, 0, sizeof(ldst_cmd));
 			ldst_cmd.op_to_addrspace =
 			    htobe32(V_FW_CMD_OP(FW_LDST_CMD) |
 				F_FW_CMD_REQUEST | F_FW_CMD_READ |
 				V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS));
 			ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd));
 			ldst_cmd.u.mps.rplc.fid_idx =
 			    htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) |
 				V_FW_LDST_CMD_IDX(i));
 
 			rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
 			    "t4mps");
 			if (rc)
 				break;
 			rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd,
 			    sizeof(ldst_cmd), &ldst_cmd);
 			end_synchronized_op(sc, 0);
 
 			if (rc != 0) {
 				sbuf_printf(sb,
 				    " ------------ error %3u ------------", rc);
 				rc = 0;
 			} else {
 				sbuf_printf(sb, " %08x %08x %08x %08x",
 				    be32toh(ldst_cmd.u.mps.rplc.rplc127_96),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc95_64),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc63_32),
 				    be32toh(ldst_cmd.u.mps.rplc.rplc31_0));
 			}
 		} else
 			sbuf_printf(sb, "%36s", "");
 
 		sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo),
 		    G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo),
 		    G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf);
 	}
 
 	if (rc)
 		(void) sbuf_finish(sb);
 	else
 		rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_path_mtus(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	uint16_t mtus[NMTUS];
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_read_mtu_tbl(sc, mtus, NULL);
 
 	sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u",
 	    mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6],
 	    mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13],
 	    mtus[14], mtus[15]);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_pm_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, i;
 	uint32_t cnt[PM_NSTATS];
 	uint64_t cyc[PM_NSTATS];
 	static const char *rx_stats[] = {
 		"Read:", "Write bypass:", "Write mem:", "Flush:"
 	};
 	static const char *tx_stats[] = {
 		"Read:", "Write bypass:", "Write mem:", "Bypass + mem:"
 	};
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_pmtx_get_stats(sc, cnt, cyc);
 	sbuf_printf(sb, "                Tx pcmds             Tx bytes");
 	for (i = 0; i < ARRAY_SIZE(tx_stats); i++)
 		sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], cnt[i],
 		    cyc[i]);
 
 	t4_pmrx_get_stats(sc, cnt, cyc);
 	sbuf_printf(sb, "\n                Rx pcmds             Rx bytes");
 	for (i = 0; i < ARRAY_SIZE(rx_stats); i++)
 		sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], cnt[i],
 		    cyc[i]);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_rdma_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_rdma_stats stats;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_tp_get_rdma_stats(sc, &stats);
 	sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod);
 	sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_tcp_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_tcp_stats v4, v6;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_tp_get_tcp_stats(sc, &v4, &v6);
 	sbuf_printf(sb,
 	    "                                IP                 IPv6\n");
 	sbuf_printf(sb, "OutRsts:      %20u %20u\n",
 	    v4.tcpOutRsts, v6.tcpOutRsts);
 	sbuf_printf(sb, "InSegs:       %20ju %20ju\n",
 	    v4.tcpInSegs, v6.tcpInSegs);
 	sbuf_printf(sb, "OutSegs:      %20ju %20ju\n",
 	    v4.tcpOutSegs, v6.tcpOutSegs);
 	sbuf_printf(sb, "RetransSegs:  %20ju %20ju",
 	    v4.tcpRetransSegs, v6.tcpRetransSegs);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_tids(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tid_info *t = &sc->tids;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	if (t->natids) {
 		sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1,
 		    t->atids_in_use);
 	}
 
 	if (t->ntids) {
 		if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) {
 			uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4;
 
 			if (b) {
 				sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1,
 				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
 				    t->ntids - 1);
 			} else {
 				sbuf_printf(sb, "TID range: %u-%u",
 				    t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4,
 				    t->ntids - 1);
 			}
 		} else
 			sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1);
 		sbuf_printf(sb, ", in use: %u\n",
 		    atomic_load_acq_int(&t->tids_in_use));
 	}
 
 	if (t->nstids) {
 		sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base,
 		    t->stid_base + t->nstids - 1, t->stids_in_use);
 	}
 
 	if (t->nftids) {
 		sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base,
 		    t->ftid_base + t->nftids - 1);
 	}
 
 	if (t->netids) {
 		sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base,
 		    t->etid_base + t->netids - 1);
 	}
 
 	sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users",
 	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4),
 	    t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6));
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	struct tp_err_stats stats;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_tp_get_err_stats(sc, &stats);
 
 	sbuf_printf(sb, "                 channel 0  channel 1  channel 2  "
 		      "channel 3\n");
 	sbuf_printf(sb, "macInErrs:      %10u %10u %10u %10u\n",
 	    stats.macInErrs[0], stats.macInErrs[1], stats.macInErrs[2],
 	    stats.macInErrs[3]);
 	sbuf_printf(sb, "hdrInErrs:      %10u %10u %10u %10u\n",
 	    stats.hdrInErrs[0], stats.hdrInErrs[1], stats.hdrInErrs[2],
 	    stats.hdrInErrs[3]);
 	sbuf_printf(sb, "tcpInErrs:      %10u %10u %10u %10u\n",
 	    stats.tcpInErrs[0], stats.tcpInErrs[1], stats.tcpInErrs[2],
 	    stats.tcpInErrs[3]);
 	sbuf_printf(sb, "tcp6InErrs:     %10u %10u %10u %10u\n",
 	    stats.tcp6InErrs[0], stats.tcp6InErrs[1], stats.tcp6InErrs[2],
 	    stats.tcp6InErrs[3]);
 	sbuf_printf(sb, "tnlCongDrops:   %10u %10u %10u %10u\n",
 	    stats.tnlCongDrops[0], stats.tnlCongDrops[1], stats.tnlCongDrops[2],
 	    stats.tnlCongDrops[3]);
 	sbuf_printf(sb, "tnlTxDrops:     %10u %10u %10u %10u\n",
 	    stats.tnlTxDrops[0], stats.tnlTxDrops[1], stats.tnlTxDrops[2],
 	    stats.tnlTxDrops[3]);
 	sbuf_printf(sb, "ofldVlanDrops:  %10u %10u %10u %10u\n",
 	    stats.ofldVlanDrops[0], stats.ofldVlanDrops[1],
 	    stats.ofldVlanDrops[2], stats.ofldVlanDrops[3]);
 	sbuf_printf(sb, "ofldChanDrops:  %10u %10u %10u %10u\n\n",
 	    stats.ofldChanDrops[0], stats.ofldChanDrops[1],
 	    stats.ofldChanDrops[2], stats.ofldChanDrops[3]);
 	sbuf_printf(sb, "ofldNoNeigh:    %u\nofldCongDefer:  %u",
 	    stats.ofldNoNeigh, stats.ofldCongDefer);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 struct field_desc {
 	const char *name;
 	u_int start;
 	u_int width;
 };
 
 static void
 field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f)
 {
 	char buf[32];
 	int line_size = 0;
 
 	while (f->name) {
 		uint64_t mask = (1ULL << f->width) - 1;
 		int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name,
 		    ((uintmax_t)v >> f->start) & mask);
 
 		if (line_size + len >= 79) {
 			line_size = 8;
 			sbuf_printf(sb, "\n        ");
 		}
 		sbuf_printf(sb, "%s ", buf);
 		line_size += len + 1;
 		f++;
 	}
 	sbuf_printf(sb, "\n");
 }
 
 static struct field_desc tp_la0[] = {
 	{ "RcfOpCodeOut", 60, 4 },
 	{ "State", 56, 4 },
 	{ "WcfState", 52, 4 },
 	{ "RcfOpcSrcOut", 50, 2 },
 	{ "CRxError", 49, 1 },
 	{ "ERxError", 48, 1 },
 	{ "SanityFailed", 47, 1 },
 	{ "SpuriousMsg", 46, 1 },
 	{ "FlushInputMsg", 45, 1 },
 	{ "FlushInputCpl", 44, 1 },
 	{ "RssUpBit", 43, 1 },
 	{ "RssFilterHit", 42, 1 },
 	{ "Tid", 32, 10 },
 	{ "InitTcb", 31, 1 },
 	{ "LineNumber", 24, 7 },
 	{ "Emsg", 23, 1 },
 	{ "EdataOut", 22, 1 },
 	{ "Cmsg", 21, 1 },
 	{ "CdataOut", 20, 1 },
 	{ "EreadPdu", 19, 1 },
 	{ "CreadPdu", 18, 1 },
 	{ "TunnelPkt", 17, 1 },
 	{ "RcfPeerFin", 16, 1 },
 	{ "RcfReasonOut", 12, 4 },
 	{ "TxCchannel", 10, 2 },
 	{ "RcfTxChannel", 8, 2 },
 	{ "RxEchannel", 6, 2 },
 	{ "RcfRxChannel", 5, 1 },
 	{ "RcfDataOutSrdy", 4, 1 },
 	{ "RxDvld", 3, 1 },
 	{ "RxOoDvld", 2, 1 },
 	{ "RxCongestion", 1, 1 },
 	{ "TxCongestion", 0, 1 },
 	{ NULL }
 };
 
 static struct field_desc tp_la1[] = {
 	{ "CplCmdIn", 56, 8 },
 	{ "CplCmdOut", 48, 8 },
 	{ "ESynOut", 47, 1 },
 	{ "EAckOut", 46, 1 },
 	{ "EFinOut", 45, 1 },
 	{ "ERstOut", 44, 1 },
 	{ "SynIn", 43, 1 },
 	{ "AckIn", 42, 1 },
 	{ "FinIn", 41, 1 },
 	{ "RstIn", 40, 1 },
 	{ "DataIn", 39, 1 },
 	{ "DataInVld", 38, 1 },
 	{ "PadIn", 37, 1 },
 	{ "RxBufEmpty", 36, 1 },
 	{ "RxDdp", 35, 1 },
 	{ "RxFbCongestion", 34, 1 },
 	{ "TxFbCongestion", 33, 1 },
 	{ "TxPktSumSrdy", 32, 1 },
 	{ "RcfUlpType", 28, 4 },
 	{ "Eread", 27, 1 },
 	{ "Ebypass", 26, 1 },
 	{ "Esave", 25, 1 },
 	{ "Static0", 24, 1 },
 	{ "Cread", 23, 1 },
 	{ "Cbypass", 22, 1 },
 	{ "Csave", 21, 1 },
 	{ "CPktOut", 20, 1 },
 	{ "RxPagePoolFull", 18, 2 },
 	{ "RxLpbkPkt", 17, 1 },
 	{ "TxLpbkPkt", 16, 1 },
 	{ "RxVfValid", 15, 1 },
 	{ "SynLearned", 14, 1 },
 	{ "SetDelEntry", 13, 1 },
 	{ "SetInvEntry", 12, 1 },
 	{ "CpcmdDvld", 11, 1 },
 	{ "CpcmdSave", 10, 1 },
 	{ "RxPstructsFull", 8, 2 },
 	{ "EpcmdDvld", 7, 1 },
 	{ "EpcmdFlush", 6, 1 },
 	{ "EpcmdTrimPrefix", 5, 1 },
 	{ "EpcmdTrimPostfix", 4, 1 },
 	{ "ERssIp4Pkt", 3, 1 },
 	{ "ERssIp6Pkt", 2, 1 },
 	{ "ERssTcpUdpPkt", 1, 1 },
 	{ "ERssFceFipPkt", 0, 1 },
 	{ NULL }
 };
 
 static struct field_desc tp_la2[] = {
 	{ "CplCmdIn", 56, 8 },
 	{ "MpsVfVld", 55, 1 },
 	{ "MpsPf", 52, 3 },
 	{ "MpsVf", 44, 8 },
 	{ "SynIn", 43, 1 },
 	{ "AckIn", 42, 1 },
 	{ "FinIn", 41, 1 },
 	{ "RstIn", 40, 1 },
 	{ "DataIn", 39, 1 },
 	{ "DataInVld", 38, 1 },
 	{ "PadIn", 37, 1 },
 	{ "RxBufEmpty", 36, 1 },
 	{ "RxDdp", 35, 1 },
 	{ "RxFbCongestion", 34, 1 },
 	{ "TxFbCongestion", 33, 1 },
 	{ "TxPktSumSrdy", 32, 1 },
 	{ "RcfUlpType", 28, 4 },
 	{ "Eread", 27, 1 },
 	{ "Ebypass", 26, 1 },
 	{ "Esave", 25, 1 },
 	{ "Static0", 24, 1 },
 	{ "Cread", 23, 1 },
 	{ "Cbypass", 22, 1 },
 	{ "Csave", 21, 1 },
 	{ "CPktOut", 20, 1 },
 	{ "RxPagePoolFull", 18, 2 },
 	{ "RxLpbkPkt", 17, 1 },
 	{ "TxLpbkPkt", 16, 1 },
 	{ "RxVfValid", 15, 1 },
 	{ "SynLearned", 14, 1 },
 	{ "SetDelEntry", 13, 1 },
 	{ "SetInvEntry", 12, 1 },
 	{ "CpcmdDvld", 11, 1 },
 	{ "CpcmdSave", 10, 1 },
 	{ "RxPstructsFull", 8, 2 },
 	{ "EpcmdDvld", 7, 1 },
 	{ "EpcmdFlush", 6, 1 },
 	{ "EpcmdTrimPrefix", 5, 1 },
 	{ "EpcmdTrimPostfix", 4, 1 },
 	{ "ERssIp4Pkt", 3, 1 },
 	{ "ERssIp6Pkt", 2, 1 },
 	{ "ERssTcpUdpPkt", 1, 1 },
 	{ "ERssFceFipPkt", 0, 1 },
 	{ NULL }
 };
 
 static void
 tp_la_show(struct sbuf *sb, uint64_t *p, int idx)
 {
 
 	field_desc_show(sb, *p, tp_la0);
 }
 
 static void
 tp_la_show2(struct sbuf *sb, uint64_t *p, int idx)
 {
 
 	if (idx)
 		sbuf_printf(sb, "\n");
 	field_desc_show(sb, p[0], tp_la0);
 	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
 		field_desc_show(sb, p[1], tp_la0);
 }
 
 static void
 tp_la_show3(struct sbuf *sb, uint64_t *p, int idx)
 {
 
 	if (idx)
 		sbuf_printf(sb, "\n");
 	field_desc_show(sb, p[0], tp_la0);
 	if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL)
 		field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1);
 }
 
 static int
 sysctl_tp_la(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	uint64_t *buf, *p;
 	int rc;
 	u_int i, inc;
 	void (*show_func)(struct sbuf *, uint64_t *, int);
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK);
 
 	t4_tp_read_la(sc, buf, NULL);
 	p = buf;
 
 	switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) {
 	case 2:
 		inc = 2;
 		show_func = tp_la_show2;
 		break;
 	case 3:
 		inc = 2;
 		show_func = tp_la_show3;
 		break;
 	default:
 		inc = 1;
 		show_func = tp_la_show;
 	}
 
 	for (i = 0; i < TPLA_SIZE / inc; i++, p += inc)
 		(*show_func)(sb, p, i);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_tx_rate(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc;
 	u64 nrate[NCHAN], orate[NCHAN];
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 256, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	t4_get_chan_txrate(sc, nrate, orate);
 	sbuf_printf(sb, "              channel 0   channel 1   channel 2   "
 		 "channel 3\n");
 	sbuf_printf(sb, "NIC B/s:     %10ju  %10ju  %10ju  %10ju\n",
 	    nrate[0], nrate[1], nrate[2], nrate[3]);
 	sbuf_printf(sb, "Offload B/s: %10ju  %10ju  %10ju  %10ju",
 	    orate[0], orate[1], orate[2], orate[3]);
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 
 static int
 sysctl_ulprx_la(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	uint32_t *buf, *p;
 	int rc, i;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE,
 	    M_ZERO | M_WAITOK);
 
 	t4_ulprx_read_la(sc, buf);
 	p = buf;
 
 	sbuf_printf(sb, "      Pcmd        Type   Message"
 	    "                Data");
 	for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) {
 		sbuf_printf(sb, "\n%08x%08x  %4x  %08x  %08x%08x%08x%08x",
 		    p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]);
 	}
 
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *sc = arg1;
 	struct sbuf *sb;
 	int rc, v;
 
 	rc = sysctl_wire_old_buffer(req, 0);
 	if (rc != 0)
 		return (rc);
 
 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
 	if (sb == NULL)
 		return (ENOMEM);
 
 	v = t4_read_reg(sc, A_SGE_STAT_CFG);
 	if (G_STATSOURCE_T5(v) == 7) {
 		if (G_STATMODE(v) == 0) {
 			sbuf_printf(sb, "total %d, incomplete %d",
 			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
 			    t4_read_reg(sc, A_SGE_STAT_MATCH));
 		} else if (G_STATMODE(v) == 1) {
 			sbuf_printf(sb, "total %d, data overflow %d",
 			    t4_read_reg(sc, A_SGE_STAT_TOTAL),
 			    t4_read_reg(sc, A_SGE_STAT_MATCH));
 		}
 	}
 	rc = sbuf_finish(sb);
 	sbuf_delete(sb);
 
 	return (rc);
 }
 #endif
 
 static uint32_t
 fconf_to_mode(uint32_t fconf)
 {
 	uint32_t mode;
 
 	mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR |
 	    T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT;
 
 	if (fconf & F_FRAGMENTATION)
 		mode |= T4_FILTER_IP_FRAGMENT;
 
 	if (fconf & F_MPSHITTYPE)
 		mode |= T4_FILTER_MPS_HIT_TYPE;
 
 	if (fconf & F_MACMATCH)
 		mode |= T4_FILTER_MAC_IDX;
 
 	if (fconf & F_ETHERTYPE)
 		mode |= T4_FILTER_ETH_TYPE;
 
 	if (fconf & F_PROTOCOL)
 		mode |= T4_FILTER_IP_PROTO;
 
 	if (fconf & F_TOS)
 		mode |= T4_FILTER_IP_TOS;
 
 	if (fconf & F_VLAN)
 		mode |= T4_FILTER_VLAN;
 
 	if (fconf & F_VNIC_ID)
 		mode |= T4_FILTER_VNIC;
 
 	if (fconf & F_PORT)
 		mode |= T4_FILTER_PORT;
 
 	if (fconf & F_FCOE)
 		mode |= T4_FILTER_FCoE;
 
 	return (mode);
 }
 
 static uint32_t
 mode_to_fconf(uint32_t mode)
 {
 	uint32_t fconf = 0;
 
 	if (mode & T4_FILTER_IP_FRAGMENT)
 		fconf |= F_FRAGMENTATION;
 
 	if (mode & T4_FILTER_MPS_HIT_TYPE)
 		fconf |= F_MPSHITTYPE;
 
 	if (mode & T4_FILTER_MAC_IDX)
 		fconf |= F_MACMATCH;
 
 	if (mode & T4_FILTER_ETH_TYPE)
 		fconf |= F_ETHERTYPE;
 
 	if (mode & T4_FILTER_IP_PROTO)
 		fconf |= F_PROTOCOL;
 
 	if (mode & T4_FILTER_IP_TOS)
 		fconf |= F_TOS;
 
 	if (mode & T4_FILTER_VLAN)
 		fconf |= F_VLAN;
 
 	if (mode & T4_FILTER_VNIC)
 		fconf |= F_VNIC_ID;
 
 	if (mode & T4_FILTER_PORT)
 		fconf |= F_PORT;
 
 	if (mode & T4_FILTER_FCoE)
 		fconf |= F_FCOE;
 
 	return (fconf);
 }
 
 static uint32_t
 fspec_to_fconf(struct t4_filter_specification *fs)
 {
 	uint32_t fconf = 0;
 
 	if (fs->val.frag || fs->mask.frag)
 		fconf |= F_FRAGMENTATION;
 
 	if (fs->val.matchtype || fs->mask.matchtype)
 		fconf |= F_MPSHITTYPE;
 
 	if (fs->val.macidx || fs->mask.macidx)
 		fconf |= F_MACMATCH;
 
 	if (fs->val.ethtype || fs->mask.ethtype)
 		fconf |= F_ETHERTYPE;
 
 	if (fs->val.proto || fs->mask.proto)
 		fconf |= F_PROTOCOL;
 
 	if (fs->val.tos || fs->mask.tos)
 		fconf |= F_TOS;
 
 	if (fs->val.vlan_vld || fs->mask.vlan_vld)
 		fconf |= F_VLAN;
 
 	if (fs->val.vnic_vld || fs->mask.vnic_vld)
 		fconf |= F_VNIC_ID;
 
 	if (fs->val.iport || fs->mask.iport)
 		fconf |= F_PORT;
 
 	if (fs->val.fcoe || fs->mask.fcoe)
 		fconf |= F_FCOE;
 
 	return (fconf);
 }
 
 static int
 get_filter_mode(struct adapter *sc, uint32_t *mode)
 {
 	int rc;
 	uint32_t fconf;
 
 	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4getfm");
 	if (rc)
 		return (rc);
 
 	t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1,
 	    A_TP_VLAN_PRI_MAP);
 
 	if (sc->params.tp.vlan_pri_map != fconf) {
 		log(LOG_WARNING, "%s: cached filter mode out of sync %x %x.\n",
 		    device_get_nameunit(sc->dev), sc->params.tp.vlan_pri_map,
 		    fconf);
 	}
 
 	*mode = fconf_to_mode(fconf);
 
 	end_synchronized_op(sc, LOCK_HELD);
 	return (0);
 }
 
 static int
 set_filter_mode(struct adapter *sc, uint32_t mode)
 {
 	uint32_t fconf;
 	int rc;
 
 	fconf = mode_to_fconf(mode);
 
 	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4setfm");
 	if (rc)
 		return (rc);
 
 	if (sc->tids.ftids_in_use > 0) {
 		rc = EBUSY;
 		goto done;
 	}
 
 #ifdef TCP_OFFLOAD
 	if (uld_active(sc, ULD_TOM)) {
 		rc = EBUSY;
 		goto done;
 	}
 #endif
 
 	rc = -t4_set_filter_mode(sc, fconf);
 done:
 	end_synchronized_op(sc, LOCK_HELD);
 	return (rc);
 }
 
 static inline uint64_t
 get_filter_hits(struct adapter *sc, uint32_t fid)
 {
 	uint32_t mw_base, off, tcb_base = t4_read_reg(sc, A_TP_CMM_TCB_BASE);
 	uint64_t hits;
 
 	memwin_info(sc, 0, &mw_base, NULL);
 	off = position_memwin(sc, 0,
 	    tcb_base + (fid + sc->tids.ftid_base) * TCB_SIZE);
 	if (is_t4(sc)) {
 		hits = t4_read_reg64(sc, mw_base + off + 16);
 		hits = be64toh(hits);
 	} else {
 		hits = t4_read_reg(sc, mw_base + off + 24);
 		hits = be32toh(hits);
 	}
 
 	return (hits);
 }
 
 static int
 get_filter(struct adapter *sc, struct t4_filter *t)
 {
 	int i, rc, nfilters = sc->tids.nftids;
 	struct filter_entry *f;
 
 	rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
 	    "t4getf");
 	if (rc)
 		return (rc);
 
 	if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL ||
 	    t->idx >= nfilters) {
 		t->idx = 0xffffffff;
 		goto done;
 	}
 
 	f = &sc->tids.ftid_tab[t->idx];
 	for (i = t->idx; i < nfilters; i++, f++) {
 		if (f->valid) {
 			t->idx = i;
 			t->l2tidx = f->l2t ? f->l2t->idx : 0;
 			t->smtidx = f->smtidx;
 			if (f->fs.hitcnts)
 				t->hits = get_filter_hits(sc, t->idx);
 			else
 				t->hits = UINT64_MAX;
 			t->fs = f->fs;
 
 			goto done;
 		}
 	}
 
 	t->idx = 0xffffffff;
 done:
 	end_synchronized_op(sc, LOCK_HELD);
 	return (0);
 }
 
 static int
 set_filter(struct adapter *sc, struct t4_filter *t)
 {
 	unsigned int nfilters, nports;
 	struct filter_entry *f;
 	int i, rc;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf");
 	if (rc)
 		return (rc);
 
 	nfilters = sc->tids.nftids;
 	nports = sc->params.nports;
 
 	if (nfilters == 0) {
 		rc = ENOTSUP;
 		goto done;
 	}
 
 	if (!(sc->flags & FULL_INIT_DONE)) {
 		rc = EAGAIN;
 		goto done;
 	}
 
 	if (t->idx >= nfilters) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	/* Validate against the global filter mode */
 	if ((sc->params.tp.vlan_pri_map | fspec_to_fconf(&t->fs)) !=
 	    sc->params.tp.vlan_pri_map) {
 		rc = E2BIG;
 		goto done;
 	}
 
 	if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	if (t->fs.val.iport >= nports) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	/* Can't specify an iq if not steering to it */
 	if (!t->fs.dirsteer && t->fs.iq) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	/* IPv6 filter idx must be 4 aligned */
 	if (t->fs.type == 1 &&
 	    ((t->idx & 0x3) || t->idx + 4 >= nfilters)) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	if (sc->tids.ftid_tab == NULL) {
 		KASSERT(sc->tids.ftids_in_use == 0,
 		    ("%s: no memory allocated but filters_in_use > 0",
 		    __func__));
 
 		sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) *
 		    nfilters, M_CXGBE, M_NOWAIT | M_ZERO);
 		if (sc->tids.ftid_tab == NULL) {
 			rc = ENOMEM;
 			goto done;
 		}
 		mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF);
 	}
 
 	for (i = 0; i < 4; i++) {
 		f = &sc->tids.ftid_tab[t->idx + i];
 
 		if (f->pending || f->valid) {
 			rc = EBUSY;
 			goto done;
 		}
 		if (f->locked) {
 			rc = EPERM;
 			goto done;
 		}
 
 		if (t->fs.type == 0)
 			break;
 	}
 
 	f = &sc->tids.ftid_tab[t->idx];
 	f->fs = t->fs;
 
 	rc = set_filter_wr(sc, t->idx);
 done:
 	end_synchronized_op(sc, 0);
 
 	if (rc == 0) {
 		mtx_lock(&sc->tids.ftid_lock);
 		for (;;) {
 			if (f->pending == 0) {
 				rc = f->valid ? 0 : EIO;
 				break;
 			}
 
 			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
 			    PCATCH, "t4setfw", 0)) {
 				rc = EINPROGRESS;
 				break;
 			}
 		}
 		mtx_unlock(&sc->tids.ftid_lock);
 	}
 	return (rc);
 }
 
 static int
 del_filter(struct adapter *sc, struct t4_filter *t)
 {
 	unsigned int nfilters;
 	struct filter_entry *f;
 	int rc;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf");
 	if (rc)
 		return (rc);
 
 	nfilters = sc->tids.nftids;
 
 	if (nfilters == 0) {
 		rc = ENOTSUP;
 		goto done;
 	}
 
 	if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 ||
 	    t->idx >= nfilters) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	if (!(sc->flags & FULL_INIT_DONE)) {
 		rc = EAGAIN;
 		goto done;
 	}
 
 	f = &sc->tids.ftid_tab[t->idx];
 
 	if (f->pending) {
 		rc = EBUSY;
 		goto done;
 	}
 	if (f->locked) {
 		rc = EPERM;
 		goto done;
 	}
 
 	if (f->valid) {
 		t->fs = f->fs;	/* extra info for the caller */
 		rc = del_filter_wr(sc, t->idx);
 	}
 
 done:
 	end_synchronized_op(sc, 0);
 
 	if (rc == 0) {
 		mtx_lock(&sc->tids.ftid_lock);
 		for (;;) {
 			if (f->pending == 0) {
 				rc = f->valid ? EIO : 0;
 				break;
 			}
 
 			if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
 			    PCATCH, "t4delfw", 0)) {
 				rc = EINPROGRESS;
 				break;
 			}
 		}
 		mtx_unlock(&sc->tids.ftid_lock);
 	}
 
 	return (rc);
 }
 
 static void
 clear_filter(struct filter_entry *f)
 {
 	if (f->l2t)
 		t4_l2t_release(f->l2t);
 
 	bzero(f, sizeof (*f));
 }
 
 static int
 set_filter_wr(struct adapter *sc, int fidx)
 {
 	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
 	struct fw_filter_wr *fwr;
 	unsigned int ftid;
 	struct wrq_cookie cookie;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (f->fs.newdmac || f->fs.newvlan) {
 		/* This filter needs an L2T entry; allocate one. */
 		f->l2t = t4_l2t_alloc_switching(sc->l2t);
 		if (f->l2t == NULL)
 			return (EAGAIN);
 		if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport,
 		    f->fs.dmac)) {
 			t4_l2t_release(f->l2t);
 			f->l2t = NULL;
 			return (ENOMEM);
 		}
 	}
 
 	ftid = sc->tids.ftid_base + fidx;
 
 	fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
 	if (fwr == NULL)
 		return (ENOMEM);
 	bzero(fwr, sizeof(*fwr));
 
 	fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR));
 	fwr->len16_pkd = htobe32(FW_LEN16(*fwr));
 	fwr->tid_to_iq =
 	    htobe32(V_FW_FILTER_WR_TID(ftid) |
 		V_FW_FILTER_WR_RQTYPE(f->fs.type) |
 		V_FW_FILTER_WR_NOREPLY(0) |
 		V_FW_FILTER_WR_IQ(f->fs.iq));
 	fwr->del_filter_to_l2tix =
 	    htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) |
 		V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
 		V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
 		V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) |
 		V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) |
 		V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) |
 		V_FW_FILTER_WR_DMAC(f->fs.newdmac) |
 		V_FW_FILTER_WR_SMAC(f->fs.newsmac) |
 		V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT ||
 		    f->fs.newvlan == VLAN_REWRITE) |
 		V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE ||
 		    f->fs.newvlan == VLAN_REWRITE) |
 		V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
 		V_FW_FILTER_WR_TXCHAN(f->fs.eport) |
 		V_FW_FILTER_WR_PRIO(f->fs.prio) |
 		V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0));
 	fwr->ethtype = htobe16(f->fs.val.ethtype);
 	fwr->ethtypem = htobe16(f->fs.mask.ethtype);
 	fwr->frag_to_ovlan_vldm =
 	    (V_FW_FILTER_WR_FRAG(f->fs.val.frag) |
 		V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) |
 		V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) |
 		V_FW_FILTER_WR_OVLAN_VLD(f->fs.val.vnic_vld) |
 		V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) |
 		V_FW_FILTER_WR_OVLAN_VLDM(f->fs.mask.vnic_vld));
 	fwr->smac_sel = 0;
 	fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) |
 	    V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id));
 	fwr->maci_to_matchtypem =
 	    htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
 		V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
 		V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) |
 		V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) |
 		V_FW_FILTER_WR_PORT(f->fs.val.iport) |
 		V_FW_FILTER_WR_PORTM(f->fs.mask.iport) |
 		V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) |
 		V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype));
 	fwr->ptcl = f->fs.val.proto;
 	fwr->ptclm = f->fs.mask.proto;
 	fwr->ttyp = f->fs.val.tos;
 	fwr->ttypm = f->fs.mask.tos;
 	fwr->ivlan = htobe16(f->fs.val.vlan);
 	fwr->ivlanm = htobe16(f->fs.mask.vlan);
 	fwr->ovlan = htobe16(f->fs.val.vnic);
 	fwr->ovlanm = htobe16(f->fs.mask.vnic);
 	bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip));
 	bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm));
 	bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip));
 	bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm));
 	fwr->lp = htobe16(f->fs.val.dport);
 	fwr->lpm = htobe16(f->fs.mask.dport);
 	fwr->fp = htobe16(f->fs.val.sport);
 	fwr->fpm = htobe16(f->fs.mask.sport);
 	if (f->fs.newsmac)
 		bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma));
 
 	f->pending = 1;
 	sc->tids.ftids_in_use++;
 
 	commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
 	return (0);
 }
 
 static int
 del_filter_wr(struct adapter *sc, int fidx)
 {
 	struct filter_entry *f = &sc->tids.ftid_tab[fidx];
 	struct fw_filter_wr *fwr;
 	unsigned int ftid;
 	struct wrq_cookie cookie;
 
 	ftid = sc->tids.ftid_base + fidx;
 
 	fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
 	if (fwr == NULL)
 		return (ENOMEM);
 	bzero(fwr, sizeof (*fwr));
 
 	t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id);
 
 	f->pending = 1;
 	commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
 	return (0);
 }
 
 int
 t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);
 	unsigned int idx = GET_TID(rpl);
 	unsigned int rc;
 	struct filter_entry *f;
 
 	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
 	    rss->opcode));
 
 	if (is_ftid(sc, idx)) {
 
 		idx -= sc->tids.ftid_base;
 		f = &sc->tids.ftid_tab[idx];
 		rc = G_COOKIE(rpl->cookie);
 
 		mtx_lock(&sc->tids.ftid_lock);
 		if (rc == FW_FILTER_WR_FLT_ADDED) {
 			KASSERT(f->pending, ("%s: filter[%u] isn't pending.",
 			    __func__, idx));
 			f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff;
 			f->pending = 0;  /* asynchronous setup completed */
 			f->valid = 1;
 		} else {
 			if (rc != FW_FILTER_WR_FLT_DELETED) {
 				/* Add or delete failed, display an error */
 				log(LOG_ERR,
 				    "filter %u setup failed with error %u\n",
 				    idx, rc);
 			}
 
 			clear_filter(f);
 			sc->tids.ftids_in_use--;
 		}
 		wakeup(&sc->tids.ftid_tab);
 		mtx_unlock(&sc->tids.ftid_lock);
 	}
 
 	return (0);
 }
 
 static int
 get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
 {
 	int rc;
 
 	if (cntxt->cid > M_CTXTQID)
 		return (EINVAL);
 
 	if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
 	    cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
 		return (EINVAL);
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt");
 	if (rc)
 		return (rc);
 
 	if (sc->flags & FW_OK) {
 		rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
 		    &cntxt->data[0]);
 		if (rc == 0)
 			goto done;
 	}
 
 	/*
 	 * Read via firmware failed or wasn't even attempted.  Read directly via
 	 * the backdoor.
 	 */
 	rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]);
 done:
 	end_synchronized_op(sc, 0);
 	return (rc);
 }
 
 static int
 load_fw(struct adapter *sc, struct t4_data *fw)
 {
 	int rc;
 	uint8_t *fw_data;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
 	if (rc)
 		return (rc);
 
 	if (sc->flags & FULL_INIT_DONE) {
 		rc = EBUSY;
 		goto done;
 	}
 
 	fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
 	if (fw_data == NULL) {
 		rc = ENOMEM;
 		goto done;
 	}
 
 	rc = copyin(fw->data, fw_data, fw->len);
 	if (rc == 0)
 		rc = -t4_load_fw(sc, fw_data, fw->len);
 
 	free(fw_data, M_CXGBE);
 done:
 	end_synchronized_op(sc, 0);
 	return (rc);
 }
 
 static int
 read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr)
 {
 	uint32_t addr, off, remaining, i, n;
 	uint32_t *buf, *b;
 	uint32_t mw_base, mw_aperture;
 	int rc;
 	uint8_t *dst;
 
 	rc = validate_mem_range(sc, mr->addr, mr->len);
 	if (rc != 0)
 		return (rc);
 
 	memwin_info(sc, win, &mw_base, &mw_aperture);
 	buf = b = malloc(min(mr->len, mw_aperture), M_CXGBE, M_WAITOK);
 	addr = mr->addr;
 	remaining = mr->len;
 	dst = (void *)mr->data;
 
 	while (remaining) {
 		off = position_memwin(sc, win, addr);
 
 		/* number of bytes that we'll copy in the inner loop */
 		n = min(remaining, mw_aperture - off);
 		for (i = 0; i < n; i += 4)
 			*b++ = t4_read_reg(sc, mw_base + off + i);
 
 		rc = copyout(buf, dst, n);
 		if (rc != 0)
 			break;
 
 		b = buf;
 		dst += n;
 		remaining -= n;
 		addr += n;
 	}
 
 	free(buf, M_CXGBE);
 	return (rc);
 }
 
 static int
 read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
 {
 	int rc;
 
 	if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
 		return (EINVAL);
 
 	if (i2cd->len > sizeof(i2cd->data))
 		return (EFBIG);
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
 	if (rc)
 		return (rc);
 	rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
 	    i2cd->offset, i2cd->len, &i2cd->data[0]);
 	end_synchronized_op(sc, 0);
 
 	return (rc);
 }
 
 static int
 in_range(int val, int lo, int hi)
 {
 
 	return (val < 0 || (val <= hi && val >= lo));
 }
 
 static int
 set_sched_class(struct adapter *sc, struct t4_sched_params *p)
 {
 	int fw_subcmd, fw_type, rc;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsc");
 	if (rc)
 		return (rc);
 
 	if (!(sc->flags & FULL_INIT_DONE)) {
 		rc = EAGAIN;
 		goto done;
 	}
 
 	/*
 	 * Translate the cxgbetool parameters into T4 firmware parameters.  (The
 	 * sub-command and type are in common locations.)
 	 */
 	if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG)
 		fw_subcmd = FW_SCHED_SC_CONFIG;
 	else if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS)
 		fw_subcmd = FW_SCHED_SC_PARAMS;
 	else {
 		rc = EINVAL;
 		goto done;
 	}
 	if (p->type == SCHED_CLASS_TYPE_PACKET)
 		fw_type = FW_SCHED_TYPE_PKTSCHED;
 	else {
 		rc = EINVAL;
 		goto done;
 	}
 
 	if (fw_subcmd == FW_SCHED_SC_CONFIG) {
 		/* Vet our parameters ..*/
 		if (p->u.config.minmax < 0) {
 			rc = EINVAL;
 			goto done;
 		}
 
 		/* And pass the request to the firmware ...*/
 		rc = -t4_sched_config(sc, fw_type, p->u.config.minmax, 1);
 		goto done;
 	}
 
 	if (fw_subcmd == FW_SCHED_SC_PARAMS) {
 		int fw_level;
 		int fw_mode;
 		int fw_rateunit;
 		int fw_ratemode;
 
 		if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL)
 			fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL;
 		else if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR)
 			fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR;
 		else if (p->u.params.level == SCHED_CLASS_LEVEL_CH_RL)
 			fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL;
 		else {
 			rc = EINVAL;
 			goto done;
 		}
 
 		if (p->u.params.mode == SCHED_CLASS_MODE_CLASS)
 			fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
 		else if (p->u.params.mode == SCHED_CLASS_MODE_FLOW)
 			fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
 		else {
 			rc = EINVAL;
 			goto done;
 		}
 
 		if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_BITS)
 			fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
 		else if (p->u.params.rateunit == SCHED_CLASS_RATEUNIT_PKTS)
 			fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
 		else {
 			rc = EINVAL;
 			goto done;
 		}
 
 		if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_REL)
 			fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
 		else if (p->u.params.ratemode == SCHED_CLASS_RATEMODE_ABS)
 			fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
 		else {
 			rc = EINVAL;
 			goto done;
 		}
 
 		/* Vet our parameters ... */
 		if (!in_range(p->u.params.channel, 0, 3) ||
 		    !in_range(p->u.params.cl, 0, is_t4(sc) ? 15 : 16) ||
 		    !in_range(p->u.params.minrate, 0, 10000000) ||
 		    !in_range(p->u.params.maxrate, 0, 10000000) ||
 		    !in_range(p->u.params.weight, 0, 100)) {
 			rc = ERANGE;
 			goto done;
 		}
 
 		/*
 		 * Translate any unset parameters into the firmware's
 		 * nomenclature and/or fail the call if the parameters
 		 * are required ...
 		 */
 		if (p->u.params.rateunit < 0 || p->u.params.ratemode < 0 ||
 		    p->u.params.channel < 0 || p->u.params.cl < 0) {
 			rc = EINVAL;
 			goto done;
 		}
 		if (p->u.params.minrate < 0)
 			p->u.params.minrate = 0;
 		if (p->u.params.maxrate < 0) {
 			if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL ||
 			    p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) {
 				rc = EINVAL;
 				goto done;
 			} else
 				p->u.params.maxrate = 0;
 		}
 		if (p->u.params.weight < 0) {
 			if (p->u.params.level == SCHED_CLASS_LEVEL_CL_WRR) {
 				rc = EINVAL;
 				goto done;
 			} else
 				p->u.params.weight = 0;
 		}
 		if (p->u.params.pktsize < 0) {
 			if (p->u.params.level == SCHED_CLASS_LEVEL_CL_RL ||
 			    p->u.params.level == SCHED_CLASS_LEVEL_CH_RL) {
 				rc = EINVAL;
 				goto done;
 			} else
 				p->u.params.pktsize = 0;
 		}
 
 		/* See what the firmware thinks of the request ... */
 		rc = -t4_sched_params(sc, fw_type, fw_level, fw_mode,
 		    fw_rateunit, fw_ratemode, p->u.params.channel,
 		    p->u.params.cl, p->u.params.minrate, p->u.params.maxrate,
 		    p->u.params.weight, p->u.params.pktsize, 1);
 		goto done;
 	}
 
 	rc = EINVAL;
 done:
 	end_synchronized_op(sc, 0);
 	return (rc);
 }
 
 static int
 set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
 {
 	struct port_info *pi = NULL;
 	struct vi_info *vi;
 	struct sge_txq *txq;
 	uint32_t fw_mnem, fw_queue, fw_class;
 	int i, rc;
 
 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq");
 	if (rc)
 		return (rc);
 
 	if (!(sc->flags & FULL_INIT_DONE)) {
 		rc = EAGAIN;
 		goto done;
 	}
 
 	if (p->port >= sc->params.nports) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	/* XXX: Only supported for the main VI. */
 	pi = sc->port[p->port];
 	vi = &pi->vi[0];
 	if (!in_range(p->queue, 0, vi->ntxq - 1) || !in_range(p->cl, 0, 7)) {
 		rc = EINVAL;
 		goto done;
 	}
 
 	/*
 	 * Create a template for the FW_PARAMS_CMD mnemonic and value (TX
 	 * Scheduling Class in this case).
 	 */
 	fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
 	    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH));
 	fw_class = p->cl < 0 ? 0xffffffff : p->cl;
 
 	/*
 	 * If op.queue is non-negative, then we're only changing the scheduling
 	 * on a single specified TX queue.
 	 */
 	if (p->queue >= 0) {
 		txq = &sc->sge.txq[vi->first_txq + p->queue];
 		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
 		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
 		    &fw_class);
 		goto done;
 	}
 
 	/*
 	 * Change the scheduling on all the TX queues for the
 	 * interface.
 	 */
 	for_each_txq(vi, i, txq) {
 		fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
 		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue,
 		    &fw_class);
 		if (rc)
 			goto done;
 	}
 
 	rc = 0;
 done:
 	end_synchronized_op(sc, 0);
 	return (rc);
 }
 
 int
 t4_os_find_pci_capability(struct adapter *sc, int cap)
 {
 	int i;
 
 	return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0);
 }
 
 int
 t4_os_pci_save_state(struct adapter *sc)
 {
 	device_t dev;
 	struct pci_devinfo *dinfo;
 
 	dev = sc->dev;
 	dinfo = device_get_ivars(dev);
 
 	pci_cfg_save(dev, dinfo, 0);
 	return (0);
 }
 
 int
 t4_os_pci_restore_state(struct adapter *sc)
 {
 	device_t dev;
 	struct pci_devinfo *dinfo;
 
 	dev = sc->dev;
 	dinfo = device_get_ivars(dev);
 
 	pci_cfg_restore(dev, dinfo);
 	return (0);
 }
 
 void
 t4_os_portmod_changed(const struct adapter *sc, int idx)
 {
 	struct port_info *pi = sc->port[idx];
 	struct vi_info *vi;
 	struct ifnet *ifp;
 	int v;
 	static const char *mod_str[] = {
 		NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
 	};
 
 	for_each_vi(pi, v, vi) {
 		build_medialist(pi, &vi->media);
 	}
 
 	ifp = pi->vi[0].ifp;
 	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
 		if_printf(ifp, "transceiver unplugged.\n");
 	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
 		if_printf(ifp, "unknown transceiver inserted.\n");
 	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
 		if_printf(ifp, "unsupported transceiver inserted.\n");
 	else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) {
 		if_printf(ifp, "%s transceiver inserted.\n",
 		    mod_str[pi->mod_type]);
 	} else {
 		if_printf(ifp, "transceiver (type %d) inserted.\n",
 		    pi->mod_type);
 	}
 }
 
 void
 t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason)
 {
 	struct port_info *pi = sc->port[idx];
 	struct vi_info *vi;
 	struct ifnet *ifp;
 	int v;
 
 	if (link_stat)
 		pi->linkdnrc = -1;
 	else {
 		if (reason >= 0)
 			pi->linkdnrc = reason;
 	}
 	for_each_vi(pi, v, vi) {
 		ifp = vi->ifp;
 		if (ifp == NULL)
 			continue;
 
 		if (link_stat) {
 			ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed);
 			if_link_state_change(ifp, LINK_STATE_UP);
 		} else {
 			if_link_state_change(ifp, LINK_STATE_DOWN);
 		}
 	}
 }
 
 void
 t4_iterate(void (*func)(struct adapter *, void *), void *arg)
 {
 	struct adapter *sc;
 
 	sx_slock(&t4_list_lock);
 	SLIST_FOREACH(sc, &t4_list, link) {
 		/*
 		 * func should not make any assumptions about what state sc is
 		 * in - the only guarantee is that sc->sc_lock is a valid lock.
 		 */
 		func(sc, arg);
 	}
 	sx_sunlock(&t4_list_lock);
 }
 
 static int
 t4_open(struct cdev *dev, int flags, int type, struct thread *td)
 {
        return (0);
 }
 
 static int
 t4_close(struct cdev *dev, int flags, int type, struct thread *td)
 {
        return (0);
 }
 
 static int
 t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
     struct thread *td)
 {
 	int rc;
 	struct adapter *sc = dev->si_drv1;
 
 	rc = priv_check(td, PRIV_DRIVER);
 	if (rc != 0)
 		return (rc);
 
 	switch (cmd) {
 	case CHELSIO_T4_GETREG: {
 		struct t4_reg *edata = (struct t4_reg *)data;
 
 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
 			return (EFAULT);
 
 		if (edata->size == 4)
 			edata->val = t4_read_reg(sc, edata->addr);
 		else if (edata->size == 8)
 			edata->val = t4_read_reg64(sc, edata->addr);
 		else
 			return (EINVAL);
 
 		break;
 	}
 	case CHELSIO_T4_SETREG: {
 		struct t4_reg *edata = (struct t4_reg *)data;
 
 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
 			return (EFAULT);
 
 		if (edata->size == 4) {
 			if (edata->val & 0xffffffff00000000)
 				return (EINVAL);
 			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
 		} else if (edata->size == 8)
 			t4_write_reg64(sc, edata->addr, edata->val);
 		else
 			return (EINVAL);
 		break;
 	}
 	case CHELSIO_T4_REGDUMP: {
 		struct t4_regdump *regs = (struct t4_regdump *)data;
 		int reglen = is_t4(sc) ? T4_REGDUMP_SIZE : T5_REGDUMP_SIZE;
 		uint8_t *buf;
 
 		if (regs->len < reglen) {
 			regs->len = reglen; /* hint to the caller */
 			return (ENOBUFS);
 		}
 
 		regs->len = reglen;
 		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
 		t4_get_regs(sc, regs, buf);
 		rc = copyout(buf, regs->data, reglen);
 		free(buf, M_CXGBE);
 		break;
 	}
 	case CHELSIO_T4_GET_FILTER_MODE:
 		rc = get_filter_mode(sc, (uint32_t *)data);
 		break;
 	case CHELSIO_T4_SET_FILTER_MODE:
 		rc = set_filter_mode(sc, *(uint32_t *)data);
 		break;
 	case CHELSIO_T4_GET_FILTER:
 		rc = get_filter(sc, (struct t4_filter *)data);
 		break;
 	case CHELSIO_T4_SET_FILTER:
 		rc = set_filter(sc, (struct t4_filter *)data);
 		break;
 	case CHELSIO_T4_DEL_FILTER:
 		rc = del_filter(sc, (struct t4_filter *)data);
 		break;
 	case CHELSIO_T4_GET_SGE_CONTEXT:
 		rc = get_sge_context(sc, (struct t4_sge_context *)data);
 		break;
 	case CHELSIO_T4_LOAD_FW:
 		rc = load_fw(sc, (struct t4_data *)data);
 		break;
 	case CHELSIO_T4_GET_MEM:
 		rc = read_card_mem(sc, 2, (struct t4_mem_range *)data);
 		break;
 	case CHELSIO_T4_GET_I2C:
 		rc = read_i2c(sc, (struct t4_i2c_data *)data);
 		break;
 	case CHELSIO_T4_CLEAR_STATS: {
 		int i, v;
 		u_int port_id = *(uint32_t *)data;
 		struct port_info *pi;
 		struct vi_info *vi;
 
 		if (port_id >= sc->params.nports)
 			return (EINVAL);
 		pi = sc->port[port_id];
 
 		/* MAC stats */
 		t4_clr_port_stats(sc, pi->tx_chan);
 		pi->tx_parse_error = 0;
 		mtx_lock(&sc->regwin_lock);
 		for_each_vi(pi, v, vi) {
 			if (vi->flags & VI_INIT_DONE)
 				t4_clr_vi_stats(sc, vi->viid);
 		}
 		mtx_unlock(&sc->regwin_lock);
 
 		/*
 		 * Since this command accepts a port, clear stats for
 		 * all VIs on this port.
 		 */
 		for_each_vi(pi, v, vi) {
 			if (vi->flags & VI_INIT_DONE) {
 				struct sge_rxq *rxq;
 				struct sge_txq *txq;
 				struct sge_wrq *wrq;
 
 				if (vi->flags & VI_NETMAP)
 					continue;
 
 				for_each_rxq(vi, i, rxq) {
 #if defined(INET) || defined(INET6)
 					rxq->lro.lro_queued = 0;
 					rxq->lro.lro_flushed = 0;
 #endif
 					rxq->rxcsum = 0;
 					rxq->vlan_extraction = 0;
 				}
 
 				for_each_txq(vi, i, txq) {
 					txq->txcsum = 0;
 					txq->tso_wrs = 0;
 					txq->vlan_insertion = 0;
 					txq->imm_wrs = 0;
 					txq->sgl_wrs = 0;
 					txq->txpkt_wrs = 0;
 					txq->txpkts0_wrs = 0;
 					txq->txpkts1_wrs = 0;
 					txq->txpkts0_pkts = 0;
 					txq->txpkts1_pkts = 0;
 					mp_ring_reset_stats(txq->r);
 				}
 
 #ifdef TCP_OFFLOAD
 				/* nothing to clear for each ofld_rxq */
 
 				for_each_ofld_txq(vi, i, wrq) {
 					wrq->tx_wrs_direct = 0;
 					wrq->tx_wrs_copied = 0;
 				}
 #endif
 
 				if (IS_MAIN_VI(vi)) {
 					wrq = &sc->sge.ctrlq[pi->port_id];
 					wrq->tx_wrs_direct = 0;
 					wrq->tx_wrs_copied = 0;
 				}
 			}
 		}
 		break;
 	}
 	case CHELSIO_T4_SCHED_CLASS:
 		rc = set_sched_class(sc, (struct t4_sched_params *)data);
 		break;
 	case CHELSIO_T4_SCHED_QUEUE:
 		rc = set_sched_queue(sc, (struct t4_sched_queue *)data);
 		break;
 	case CHELSIO_T4_GET_TRACER:
 		rc = t4_get_tracer(sc, (struct t4_tracer *)data);
 		break;
 	case CHELSIO_T4_SET_TRACER:
 		rc = t4_set_tracer(sc, (struct t4_tracer *)data);
 		break;
 	default:
 		rc = EINVAL;
 	}
 
 	return (rc);
 }
 
 #ifdef TCP_OFFLOAD
 void
 t4_iscsi_init(struct adapter *sc, u_int tag_mask, const u_int *pgsz_order)
 {
 
 	t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask);
 	t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) |
 		V_HPZ1(pgsz_order[1]) | V_HPZ2(pgsz_order[2]) |
 		V_HPZ3(pgsz_order[3]));
 }
 
 static int
 toe_capability(struct vi_info *vi, int enable)
 {
 	int rc;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (!is_offload(sc))
 		return (ENODEV);
 
 	if (enable) {
 		if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) {
 			/* TOE is already enabled. */
 			return (0);
 		}
 
 		/*
 		 * We need the port's queues around so that we're able to send
 		 * and receive CPLs to/from the TOE even if the ifnet for this
 		 * port has never been UP'd administratively.
 		 */
 		if (!(vi->flags & VI_INIT_DONE)) {
 			rc = cxgbe_init_synchronized(vi);
 			if (rc)
 				return (rc);
 		}
 		if (!(pi->vi[0].flags & VI_INIT_DONE)) {
 			rc = cxgbe_init_synchronized(&pi->vi[0]);
 			if (rc)
 				return (rc);
 		}
 
 		if (isset(&sc->offload_map, pi->port_id)) {
 			/* TOE is enabled on another VI of this port. */
 			pi->uld_vis++;
 			return (0);
 		}
 
 		if (!uld_active(sc, ULD_TOM)) {
 			rc = t4_activate_uld(sc, ULD_TOM);
 			if (rc == EAGAIN) {
 				log(LOG_WARNING,
 				    "You must kldload t4_tom.ko before trying "
 				    "to enable TOE on a cxgbe interface.\n");
 			}
 			if (rc != 0)
 				return (rc);
 			KASSERT(sc->tom_softc != NULL,
 			    ("%s: TOM activated but softc NULL", __func__));
 			KASSERT(uld_active(sc, ULD_TOM),
 			    ("%s: TOM activated but flag not set", __func__));
 		}
 
 		/* Activate iWARP and iSCSI too, if the modules are loaded. */
 		if (!uld_active(sc, ULD_IWARP))
 			(void) t4_activate_uld(sc, ULD_IWARP);
 		if (!uld_active(sc, ULD_ISCSI))
 			(void) t4_activate_uld(sc, ULD_ISCSI);
 
 		pi->uld_vis++;
 		setbit(&sc->offload_map, pi->port_id);
 	} else {
 		pi->uld_vis--;
 
 		if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0)
 			return (0);
 
 		KASSERT(uld_active(sc, ULD_TOM),
 		    ("%s: TOM never initialized?", __func__));
 		clrbit(&sc->offload_map, pi->port_id);
 	}
 
 	return (0);
 }
 
 /*
  * Add an upper layer driver to the global list.
  */
 int
 t4_register_uld(struct uld_info *ui)
 {
 	int rc = 0;
 	struct uld_info *u;
 
 	sx_xlock(&t4_uld_list_lock);
 	SLIST_FOREACH(u, &t4_uld_list, link) {
 	    if (u->uld_id == ui->uld_id) {
 		    rc = EEXIST;
 		    goto done;
 	    }
 	}
 
 	SLIST_INSERT_HEAD(&t4_uld_list, ui, link);
 	ui->refcount = 0;
 done:
 	sx_xunlock(&t4_uld_list_lock);
 	return (rc);
 }
 
 int
 t4_unregister_uld(struct uld_info *ui)
 {
 	int rc = EINVAL;
 	struct uld_info *u;
 
 	sx_xlock(&t4_uld_list_lock);
 
 	SLIST_FOREACH(u, &t4_uld_list, link) {
 	    if (u == ui) {
 		    if (ui->refcount > 0) {
 			    rc = EBUSY;
 			    goto done;
 		    }
 
 		    SLIST_REMOVE(&t4_uld_list, ui, uld_info, link);
 		    rc = 0;
 		    goto done;
 	    }
 	}
 done:
 	sx_xunlock(&t4_uld_list_lock);
 	return (rc);
 }
 
 int
 t4_activate_uld(struct adapter *sc, int id)
 {
 	int rc;
 	struct uld_info *ui;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (id < 0 || id > ULD_MAX)
 		return (EINVAL);
 	rc = EAGAIN;	/* kldoad the module with this ULD and try again. */
 
 	sx_slock(&t4_uld_list_lock);
 
 	SLIST_FOREACH(ui, &t4_uld_list, link) {
 		if (ui->uld_id == id) {
 			if (!(sc->flags & FULL_INIT_DONE)) {
 				rc = adapter_full_init(sc);
 				if (rc != 0)
 					break;
 			}
 
 			rc = ui->activate(sc);
 			if (rc == 0) {
 				setbit(&sc->active_ulds, id);
 				ui->refcount++;
 			}
 			break;
 		}
 	}
 
 	sx_sunlock(&t4_uld_list_lock);
 
 	return (rc);
 }
 
 int
 t4_deactivate_uld(struct adapter *sc, int id)
 {
 	int rc;
 	struct uld_info *ui;
 
 	ASSERT_SYNCHRONIZED_OP(sc);
 
 	if (id < 0 || id > ULD_MAX)
 		return (EINVAL);
 	rc = ENXIO;
 
 	sx_slock(&t4_uld_list_lock);
 
 	SLIST_FOREACH(ui, &t4_uld_list, link) {
 		if (ui->uld_id == id) {
 			rc = ui->deactivate(sc);
 			if (rc == 0) {
 				clrbit(&sc->active_ulds, id);
 				ui->refcount--;
 			}
 			break;
 		}
 	}
 
 	sx_sunlock(&t4_uld_list_lock);
 
 	return (rc);
 }
 
 int
 uld_active(struct adapter *sc, int uld_id)
 {
 
 	MPASS(uld_id >= 0 && uld_id <= ULD_MAX);
 
 	return (isset(&sc->active_ulds, uld_id));
 }
 #endif
 
 /*
  * Come up with reasonable defaults for some of the tunables, provided they're
  * not set by the user (in which case we'll use the values as is).
  */
 static void
 tweak_tunables(void)
 {
 	int nc = mp_ncpus;	/* our snapshot of the number of CPUs */
 
 	if (t4_ntxq10g < 1) {
 #ifdef RSS
 		t4_ntxq10g = rss_getnumbuckets();
 #else
 		t4_ntxq10g = min(nc, NTXQ_10G);
 #endif
 	}
 
 	if (t4_ntxq1g < 1) {
 #ifdef RSS
 		/* XXX: way too many for 1GbE? */
 		t4_ntxq1g = rss_getnumbuckets();
 #else
 		t4_ntxq1g = min(nc, NTXQ_1G);
 #endif
 	}
 
 	if (t4_nrxq10g < 1) {
 #ifdef RSS
 		t4_nrxq10g = rss_getnumbuckets();
 #else
 		t4_nrxq10g = min(nc, NRXQ_10G);
 #endif
 	}
 
 	if (t4_nrxq1g < 1) {
 #ifdef RSS
 		/* XXX: way too many for 1GbE? */
 		t4_nrxq1g = rss_getnumbuckets();
 #else
 		t4_nrxq1g = min(nc, NRXQ_1G);
 #endif
 	}
 
 #ifdef TCP_OFFLOAD
 	if (t4_nofldtxq10g < 1)
 		t4_nofldtxq10g = min(nc, NOFLDTXQ_10G);
 
 	if (t4_nofldtxq1g < 1)
 		t4_nofldtxq1g = min(nc, NOFLDTXQ_1G);
 
 	if (t4_nofldrxq10g < 1)
 		t4_nofldrxq10g = min(nc, NOFLDRXQ_10G);
 
 	if (t4_nofldrxq1g < 1)
 		t4_nofldrxq1g = min(nc, NOFLDRXQ_1G);
 
 	if (t4_toecaps_allowed == -1)
 		t4_toecaps_allowed = FW_CAPS_CONFIG_TOE;
 #else
 	if (t4_toecaps_allowed == -1)
 		t4_toecaps_allowed = 0;
 #endif
 
 #ifdef DEV_NETMAP
 	if (t4_nnmtxq10g < 1)
 		t4_nnmtxq10g = min(nc, NNMTXQ_10G);
 
 	if (t4_nnmtxq1g < 1)
 		t4_nnmtxq1g = min(nc, NNMTXQ_1G);
 
 	if (t4_nnmrxq10g < 1)
 		t4_nnmrxq10g = min(nc, NNMRXQ_10G);
 
 	if (t4_nnmrxq1g < 1)
 		t4_nnmrxq1g = min(nc, NNMRXQ_1G);
 #endif
 
 	if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS)
 		t4_tmr_idx_10g = TMR_IDX_10G;
 
 	if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS)
 		t4_pktc_idx_10g = PKTC_IDX_10G;
 
 	if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS)
 		t4_tmr_idx_1g = TMR_IDX_1G;
 
 	if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS)
 		t4_pktc_idx_1g = PKTC_IDX_1G;
 
 	if (t4_qsize_txq < 128)
 		t4_qsize_txq = 128;
 
 	if (t4_qsize_rxq < 128)
 		t4_qsize_rxq = 128;
 	while (t4_qsize_rxq & 7)
 		t4_qsize_rxq++;
 
 	t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX;
 }
 
 static struct sx mlu;	/* mod load unload */
 SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
 
 static int
 mod_event(module_t mod, int cmd, void *arg)
 {
 	int rc = 0;
 	static int loaded = 0;
 
 	switch (cmd) {
 	case MOD_LOAD:
 		sx_xlock(&mlu);
 		if (loaded++ == 0) {
 			t4_sge_modload();
 			sx_init(&t4_list_lock, "T4/T5 adapters");
 			SLIST_INIT(&t4_list);
 #ifdef TCP_OFFLOAD
 			sx_init(&t4_uld_list_lock, "T4/T5 ULDs");
 			SLIST_INIT(&t4_uld_list);
 #endif
 			t4_tracer_modload();
 			tweak_tunables();
 		}
 		sx_xunlock(&mlu);
 		break;
 
 	case MOD_UNLOAD:
 		sx_xlock(&mlu);
 		if (--loaded == 0) {
 			int tries;
 
 			sx_slock(&t4_list_lock);
 			if (!SLIST_EMPTY(&t4_list)) {
 				rc = EBUSY;
 				sx_sunlock(&t4_list_lock);
 				goto done_unload;
 			}
 #ifdef TCP_OFFLOAD
 			sx_slock(&t4_uld_list_lock);
 			if (!SLIST_EMPTY(&t4_uld_list)) {
 				rc = EBUSY;
 				sx_sunlock(&t4_uld_list_lock);
 				sx_sunlock(&t4_list_lock);
 				goto done_unload;
 			}
 #endif
 			tries = 0;
 			while (tries++ < 5 && t4_sge_extfree_refs() != 0) {
 				uprintf("%ju clusters with custom free routine "
 				    "still is use.\n", t4_sge_extfree_refs());
 				pause("t4unload", 2 * hz);
 			}
 #ifdef TCP_OFFLOAD
 			sx_sunlock(&t4_uld_list_lock);
 #endif
 			sx_sunlock(&t4_list_lock);
 
 			if (t4_sge_extfree_refs() == 0) {
 				t4_tracer_modunload();
 #ifdef TCP_OFFLOAD
 				sx_destroy(&t4_uld_list_lock);
 #endif
 				sx_destroy(&t4_list_lock);
 				t4_sge_modunload();
 				loaded = 0;
 			} else {
 				rc = EBUSY;
 				loaded++;	/* undo earlier decrement */
 			}
 		}
 done_unload:
 		sx_xunlock(&mlu);
 		break;
 	}
 
 	return (rc);
 }
 
 static devclass_t t4_devclass, t5_devclass;
 static devclass_t cxgbe_devclass, cxl_devclass;
 static devclass_t vcxgbe_devclass, vcxl_devclass;
 
 DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
 MODULE_VERSION(t4nex, 1);
 MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
 #ifdef DEV_NETMAP
 MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
 #endif /* DEV_NETMAP */
 
 
 DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
 MODULE_VERSION(t5nex, 1);
 MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
 #ifdef DEV_NETMAP
 MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
 #endif /* DEV_NETMAP */
 
 DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
 MODULE_VERSION(cxgbe, 1);
 
 DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0);
 MODULE_VERSION(cxl, 1);
 
 DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0);
 MODULE_VERSION(vcxgbe, 1);
 
 DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0);
 MODULE_VERSION(vcxl, 1);
Index: projects/clang380-import/sys/dev/ic/ns16550.h
===================================================================
--- projects/clang380-import/sys/dev/ic/ns16550.h	(revision 293686)
+++ projects/clang380-import/sys/dev/ic/ns16550.h	(revision 293687)
@@ -1,240 +1,242 @@
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)ns16550.h	7.1 (Berkeley) 5/9/91
  * $FreeBSD$
  */
 
 /*
  * NS8250... UART registers.
  */
 
 /* 8250 registers #[0-6]. */
 
 #define	com_data	0	/* data register (R/W) */
 #define	REG_DATA	com_data
 
 #define	com_ier		1	/* interrupt enable register (W) */
 #define	REG_IER		com_ier
 #define	IER_ERXRDY	0x1
 #define	IER_ETXRDY	0x2
 #define	IER_ERLS	0x4
 #define	IER_EMSC	0x8
 
 #define	IER_BITS	"\20\1ERXRDY\2ETXRDY\3ERLS\4EMSC"
 
 #define	com_iir		2	/* interrupt identification register (R) */
 #define	REG_IIR		com_iir
 #define	IIR_IMASK	0xf
 #define	IIR_RXTOUT	0xc
 #define	IIR_BUSY	0x7
 #define	IIR_RLS		0x6
 #define	IIR_RXRDY	0x4
 #define	IIR_TXRDY	0x2
 #define	IIR_NOPEND	0x1
 #define	IIR_MLSC	0x0
 #define	IIR_FIFO_MASK	0xc0	/* set if FIFOs are enabled */
 
 #define	IIR_BITS	"\20\1NOPEND\2TXRDY\3RXRDY"
 
 #define	com_lcr		3	/* line control register (R/W) */
 #define	com_cfcr	com_lcr	/* character format control register (R/W) */
 #define	REG_LCR		com_lcr
 #define	LCR_DLAB	0x80
 #define	CFCR_DLAB	LCR_DLAB
 #define	LCR_EFR_ENABLE	0xbf	/* magic to enable EFR on 16650 up */
 #define	CFCR_EFR_ENABLE	LCR_EFR_ENABLE
 #define	LCR_SBREAK	0x40
 #define	CFCR_SBREAK	LCR_SBREAK
 #define	LCR_PZERO	0x30
 #define	CFCR_PZERO	LCR_PZERO
 #define	LCR_PONE	0x20
 #define	CFCR_PONE	LCR_PONE
 #define	LCR_PEVEN	0x10
 #define	CFCR_PEVEN	LCR_PEVEN
 #define	LCR_PODD	0x00
 #define	CFCR_PODD	LCR_PODD
 #define	LCR_PENAB	0x08
 #define	CFCR_PENAB	LCR_PENAB
 #define	LCR_STOPB	0x04
 #define	CFCR_STOPB	LCR_STOPB
 #define	LCR_8BITS	0x03
 #define	CFCR_8BITS	LCR_8BITS
 #define	LCR_7BITS	0x02
 #define	CFCR_7BITS	LCR_7BITS
 #define	LCR_6BITS	0x01
 #define	CFCR_6BITS	LCR_6BITS
 #define	LCR_5BITS	0x00
 #define	CFCR_5BITS	LCR_5BITS
 
 #define	com_mcr		4	/* modem control register (R/W) */
 #define	REG_MCR		com_mcr
 #define	MCR_PRESCALE	0x80	/* only available on 16650 up */
 #define	MCR_LOOPBACK	0x10
 #define	MCR_IE		0x08
 #define	MCR_IENABLE	MCR_IE
 #define	MCR_DRS		0x04
 #define	MCR_RTS		0x02
 #define	MCR_DTR		0x01
 
 #define	MCR_BITS	"\20\1DTR\2RTS\3DRS\4IE\5LOOPBACK\10PRESCALE"
 
 #define	com_lsr		5	/* line status register (R/W) */
 #define	REG_LSR		com_lsr
 #define	LSR_RCV_FIFO	0x80
 #define	LSR_TEMT	0x40
 #define	LSR_TSRE	LSR_TEMT
 #define	LSR_THRE	0x20
 #define	LSR_TXRDY	LSR_THRE
 #define	LSR_BI		0x10
 #define	LSR_FE		0x08
 #define	LSR_PE		0x04
 #define	LSR_OE		0x02
 #define	LSR_RXRDY	0x01
 #define	LSR_RCV_MASK	0x1f
 
 #define	LSR_BITS	"\20\1RXRDY\2OE\3PE\4FE\5BI\6THRE\7TEMT\10RCV_FIFO"
 
 #define	com_msr		6	/* modem status register (R/W) */
 #define	REG_MSR		com_msr
 #define	MSR_DCD		0x80
 #define	MSR_RI		0x40
 #define	MSR_DSR		0x20
 #define	MSR_CTS		0x10
 #define	MSR_DDCD	0x08
 #define	MSR_TERI	0x04
 #define	MSR_DDSR	0x02
 #define	MSR_DCTS	0x01
 
 #define	MSR_BITS	"\20\1DCTS\2DDSR\3TERI\4DDCD\5CTS\6DSR\7RI\10DCD"
 
 /* 8250 multiplexed registers #[0-1].  Access enabled by LCR[7]. */
 #define	com_dll		0	/* divisor latch low (R/W) */
 #define	com_dlbl	com_dll
 #define	com_dlm		1	/* divisor latch high (R/W) */
 #define	com_dlbh	com_dlm
 #define	REG_DLL		com_dll
 #define	REG_DLH		com_dlm
 
 /* 16450 register #7.  Not multiplexed. */
 #define	com_scr		7	/* scratch register (R/W) */
 
 /* 16550 register #2.  Not multiplexed. */
 #define	com_fcr		2	/* FIFO control register (W) */
 #define	com_fifo	com_fcr
 #define	REG_FCR		com_fcr
 #define	FCR_ENABLE	0x01
 #define	FIFO_ENABLE	FCR_ENABLE
 #define	FCR_RCV_RST	0x02
 #define	FIFO_RCV_RST	FCR_RCV_RST
 #define	FCR_XMT_RST	0x04
 #define	FIFO_XMT_RST	FCR_XMT_RST
 #define	FCR_DMA		0x08
 #define	FIFO_DMA_MODE	FCR_DMA
 #define	FCR_RX_LOW	0x00
 #define	FIFO_RX_LOW	FCR_RX_LOW
 #define	FCR_RX_MEDL	0x40
 #define	FIFO_RX_MEDL	FCR_RX_MEDL
 #define	FCR_RX_MEDH	0x80
 #define	FIFO_RX_MEDH	FCR_RX_MEDH
 #define	FCR_RX_HIGH	0xc0
 #define	FIFO_RX_HIGH	FCR_RX_HIGH
 
 #define	FCR_BITS	"\20\1ENABLE\2RCV_RST\3XMT_RST\4DMA"
 
 /* 16650 registers #2,[4-7].  Access enabled by LCR_EFR_ENABLE. */
 
 #define	com_efr		2	/* enhanced features register (R/W) */
 #define	REG_EFR		com_efr
 #define	EFR_CTS		0x80
 #define	EFR_AUTOCTS	EFR_CTS
 #define	EFR_RTS		0x40
 #define	EFR_AUTORTS	EFR_RTS
 #define	EFR_EFE		0x10	/* enhanced functions enable */
 
 #define	com_xon1	4	/* XON 1 character (R/W) */
 #define	com_xon2	5	/* XON 2 character (R/W) */
 #define	com_xoff1	6	/* XOFF 1 character (R/W) */
 #define	com_xoff2	7	/* XOFF 2 character (R/W) */
 
 #define DW_REG_USR	31	/* DesignWare derived Uart Status Reg */
 #define com_usr		39	/* Octeon 16750/16550 Uart Status Reg */
 #define REG_USR		com_usr
 #define USR_BUSY	1	/* Uart Busy. Serial transfer in progress */
 #define USR_TXFIFO_NOTFULL 2    /* Uart TX FIFO Not full */
 
 /* 16950 register #1.  Access enabled by ACR[7].  Also requires !LCR[7]. */
 #define	com_asr		1	/* additional status register (R[0-7]/W[0-1]) */
 
 /* 16950 register #3.  R/W access enabled by ACR[7]. */
 #define	com_rfl		3	/* receiver fifo level (R) */
 
 /*
  * 16950 register #4.  Access enabled by ACR[7].  Also requires
  * !LCR_EFR_ENABLE.
  */
 #define	com_tfl		4	/* transmitter fifo level (R) */
 
 /*
  * 16950 register #5.  Accessible if !LCR_EFR_ENABLE.  Read access also
  * requires ACR[6].
  */
 #define	com_icr		5	/* index control register (R/W) */
+#define	REG_ICR		com_icr
 
 /*
  * 16950 register #7.  It is the same as com_scr except it has a different
  * abbreviation in the manufacturer's data sheet and it also serves as an
  * index into the Indexed Control register set.
  */
 #define	com_spr		com_scr	/* scratch pad (and index) register (R/W) */
 #define	REG_SPR		com_scr
 
 /*
  * 16950 indexed control registers #[0-0x13].  Access is via index in SPR,
  * data in ICR (if ICR is accessible).
  */
 
 #define	com_acr		0	/* additional control register (R/W) */
+#define	REG_ACR		com_acr
 #define	ACR_ASE		0x80	/* ASR/RFL/TFL enable */
 #define	ACR_ICRE	0x40	/* ICR enable */
 #define	ACR_TLE		0x20	/* TTL/RTL enable */
 
 #define	com_cpr		1	/* clock prescaler register (R/W) */
 #define	com_tcr		2	/* times clock register (R/W) */
 #define	com_ttl		4	/* transmitter trigger level (R/W) */
 #define	com_rtl		5	/* receiver trigger level (R/W) */
 /* ... */
 
 /* Hardware extension mode register for RSB-2000/3000. */
 #define	com_emr		com_msr
 #define	EMR_EXBUFF	0x04
 #define	EMR_CTSFLW	0x08
 #define	EMR_DSRFLW	0x10
 #define	EMR_RTSFLW	0x20
 #define	EMR_DTRFLW	0x40
 #define	EMR_EFMODE	0x80
Index: projects/clang380-import/sys/dev/iscsi/iscsi.c
===================================================================
--- projects/clang380-import/sys/dev/iscsi/iscsi.c	(revision 293686)
+++ projects/clang380-import/sys/dev/iscsi/iscsi.c	(revision 293687)
@@ -1,2454 +1,2497 @@
 /*-
  * Copyright (c) 2012 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/condvar.h>
 #include <sys/conf.h>
 #include <sys/endian.h>
 #include <sys/eventhandler.h>
 #include <sys/file.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/sx.h>
 #include <vm/uma.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_xpt.h>
 #include <cam/cam_debug.h>
 #include <cam/cam_sim.h>
 #include <cam/cam_xpt_sim.h>
 #include <cam/cam_xpt_periph.h>
 #include <cam/cam_periph.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
 #include <dev/iscsi/icl.h>
 #include <dev/iscsi/icl_wrappers.h>
 #include <dev/iscsi/iscsi_ioctl.h>
 #include <dev/iscsi/iscsi_proto.h>
 #include <dev/iscsi/iscsi.h>
 
 #ifdef ICL_KERNEL_PROXY
 #include <sys/socketvar.h>
 #endif
 
 #ifdef ICL_KERNEL_PROXY
 FEATURE(iscsi_kernel_proxy, "iSCSI initiator built with ICL_KERNEL_PROXY");
 #endif
 
 /*
  * XXX: This is global so the iscsi_unload() can access it.
  * 	Think about how to do this properly.
  */
 static struct iscsi_softc	*sc;
 
 SYSCTL_NODE(_kern, OID_AUTO, iscsi, CTLFLAG_RD, 0, "iSCSI initiator");
 static int debug = 1;
 SYSCTL_INT(_kern_iscsi, OID_AUTO, debug, CTLFLAG_RWTUN,
     &debug, 0, "Enable debug messages");
 static int ping_timeout = 5;
 SYSCTL_INT(_kern_iscsi, OID_AUTO, ping_timeout, CTLFLAG_RWTUN, &ping_timeout,
     0, "Timeout for ping (NOP-Out) requests, in seconds");
 static int iscsid_timeout = 60;
 SYSCTL_INT(_kern_iscsi, OID_AUTO, iscsid_timeout, CTLFLAG_RWTUN, &iscsid_timeout,
     0, "Time to wait for iscsid(8) to handle reconnection, in seconds");
 static int login_timeout = 60;
 SYSCTL_INT(_kern_iscsi, OID_AUTO, login_timeout, CTLFLAG_RWTUN, &login_timeout,
     0, "Time to wait for iscsid(8) to finish Login Phase, in seconds");
 static int maxtags = 255;
 SYSCTL_INT(_kern_iscsi, OID_AUTO, maxtags, CTLFLAG_RWTUN, &maxtags,
     0, "Max number of IO requests queued");
 static int fail_on_disconnection = 0;
 SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
     &fail_on_disconnection, 0, "Destroy CAM SIM on connection failure");
+static int fail_on_shutdown = 1;
+SYSCTL_INT(_kern_iscsi, OID_AUTO, fail_on_shutdown, CTLFLAG_RWTUN,
+    &fail_on_shutdown, 0, "Fail disconnected sessions on shutdown");
 
 static MALLOC_DEFINE(M_ISCSI, "iSCSI", "iSCSI initiator");
 static uma_zone_t iscsi_outstanding_zone;
 
 #define	CONN_SESSION(X)	((struct iscsi_session *)X->ic_prv0)
 #define	PDU_SESSION(X)	(CONN_SESSION(X->ip_conn))
 
 #define	ISCSI_DEBUG(X, ...)						\
 	do {								\
 		if (debug > 1) 						\
 			printf("%s: " X "\n", __func__, ## __VA_ARGS__);\
 	} while (0)
 
 #define	ISCSI_WARN(X, ...)						\
 	do {								\
 		if (debug > 0) {					\
 			printf("WARNING: %s: " X "\n",			\
 			    __func__, ## __VA_ARGS__);			\
 		}							\
 	} while (0)
 
 #define	ISCSI_SESSION_DEBUG(S, X, ...)					\
 	do {								\
 		if (debug > 1) {					\
 			printf("%s: %s (%s): " X "\n",			\
 			    __func__, S->is_conf.isc_target_addr,	\
 			    S->is_conf.isc_target, ## __VA_ARGS__);	\
 		}							\
 	} while (0)
 
 #define	ISCSI_SESSION_WARN(S, X, ...)					\
 	do {								\
 		if (debug > 0) {					\
 			printf("WARNING: %s (%s): " X "\n",		\
 			    S->is_conf.isc_target_addr,			\
 			    S->is_conf.isc_target, ## __VA_ARGS__);	\
 		}							\
 	} while (0)
 
 #define ISCSI_SESSION_LOCK(X)		mtx_lock(&X->is_lock)
 #define ISCSI_SESSION_UNLOCK(X)		mtx_unlock(&X->is_lock)
 #define ISCSI_SESSION_LOCK_ASSERT(X)	mtx_assert(&X->is_lock, MA_OWNED)
 #define ISCSI_SESSION_LOCK_ASSERT_NOT(X) mtx_assert(&X->is_lock, MA_NOTOWNED)
 
 static int	iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg,
 		    int mode, struct thread *td);
 
 static struct cdevsw iscsi_cdevsw = {
      .d_version = D_VERSION,
      .d_ioctl   = iscsi_ioctl,
      .d_name    = "iscsi",
 };
 
 static void	iscsi_pdu_queue_locked(struct icl_pdu *request);
 static void	iscsi_pdu_queue(struct icl_pdu *request);
 static void	iscsi_pdu_update_statsn(const struct icl_pdu *response);
 static void	iscsi_pdu_handle_nop_in(struct icl_pdu *response);
 static void	iscsi_pdu_handle_scsi_response(struct icl_pdu *response);
 static void	iscsi_pdu_handle_task_response(struct icl_pdu *response);
 static void	iscsi_pdu_handle_data_in(struct icl_pdu *response);
 static void	iscsi_pdu_handle_logout_response(struct icl_pdu *response);
 static void	iscsi_pdu_handle_r2t(struct icl_pdu *response);
 static void	iscsi_pdu_handle_async_message(struct icl_pdu *response);
 static void	iscsi_pdu_handle_reject(struct icl_pdu *response);
 static void	iscsi_session_reconnect(struct iscsi_session *is);
 static void	iscsi_session_terminate(struct iscsi_session *is);
 static void	iscsi_action(struct cam_sim *sim, union ccb *ccb);
 static void	iscsi_poll(struct cam_sim *sim);
 static struct iscsi_outstanding	*iscsi_outstanding_find(struct iscsi_session *is,
 		    uint32_t initiator_task_tag);
 static struct iscsi_outstanding	*iscsi_outstanding_add(struct iscsi_session *is,
 		    union ccb *ccb, uint32_t *initiator_task_tagp);
 static void	iscsi_outstanding_remove(struct iscsi_session *is,
 		    struct iscsi_outstanding *io);
 
 static bool
 iscsi_pdu_prepare(struct icl_pdu *request)
 {
 	struct iscsi_session *is;
 	struct iscsi_bhs_scsi_command *bhssc;
 
 	is = PDU_SESSION(request);
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 	/*
 	 * We're only using fields common for all the request
 	 * (initiator -> target) PDUs.
 	 */
 	bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs;
 
 	/*
 	 * Data-Out PDU does not contain CmdSN.
 	 */
 	if (bhssc->bhssc_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_OUT) {
 		if (ISCSI_SNGT(is->is_cmdsn, is->is_maxcmdsn) &&
 		    (bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0) {
 			/*
 			 * Current MaxCmdSN prevents us from sending any more
 			 * SCSI Command PDUs to the target; postpone the PDU.
 			 * It will get resent by either iscsi_pdu_queue(),
 			 * or by maintenance thread.
 			 */
 #if 0
 			ISCSI_SESSION_DEBUG(is, "postponing send, CmdSN %u, "
 			    "ExpCmdSN %u, MaxCmdSN %u, opcode 0x%x",
 			    is->is_cmdsn, is->is_expcmdsn, is->is_maxcmdsn,
 			    bhssc->bhssc_opcode);
 #endif
 			return (true);
 		}
 		bhssc->bhssc_cmdsn = htonl(is->is_cmdsn);
 		if ((bhssc->bhssc_opcode & ISCSI_BHS_OPCODE_IMMEDIATE) == 0)
 			is->is_cmdsn++;
 	}
 	bhssc->bhssc_expstatsn = htonl(is->is_statsn + 1);
 
 	return (false);
 }
 
 static void
 iscsi_session_send_postponed(struct iscsi_session *is)
 {
 	struct icl_pdu *request;
 	bool postpone;
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 	while (!STAILQ_EMPTY(&is->is_postponed)) {
 		request = STAILQ_FIRST(&is->is_postponed);
 		postpone = iscsi_pdu_prepare(request);
 		if (postpone)
 			break;
 		STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next);
 		icl_pdu_queue(request);
 	}
 }
 
 static void
 iscsi_pdu_queue_locked(struct icl_pdu *request)
 {
 	struct iscsi_session *is;
 	bool postpone;
 
 	is = PDU_SESSION(request);
 	ISCSI_SESSION_LOCK_ASSERT(is);
 	iscsi_session_send_postponed(is);
 	postpone = iscsi_pdu_prepare(request);
 	if (postpone) {
 		STAILQ_INSERT_TAIL(&is->is_postponed, request, ip_next);
 		return;
 	}
 	icl_pdu_queue(request);
 }
 
 static void
 iscsi_pdu_queue(struct icl_pdu *request)
 {
 	struct iscsi_session *is;
 
 	is = PDU_SESSION(request);
 	ISCSI_SESSION_LOCK(is);
 	iscsi_pdu_queue_locked(request);
 	ISCSI_SESSION_UNLOCK(is);
 }
 
 static void
 iscsi_session_logout(struct iscsi_session *is)
 {
 	struct icl_pdu *request;
 	struct iscsi_bhs_logout_request *bhslr;
 
 	request = icl_pdu_new(is->is_conn, M_NOWAIT);
 	if (request == NULL)
 		return;
 
 	bhslr = (struct iscsi_bhs_logout_request *)request->ip_bhs;
 	bhslr->bhslr_opcode = ISCSI_BHS_OPCODE_LOGOUT_REQUEST;
 	bhslr->bhslr_reason = BHSLR_REASON_CLOSE_SESSION;
 	iscsi_pdu_queue_locked(request);
 }
 
 static void
 iscsi_session_terminate_task(struct iscsi_session *is,
     struct iscsi_outstanding *io, bool requeue)
 {
 
 	if (io->io_ccb != NULL) {
 		io->io_ccb->ccb_h.status &= ~(CAM_SIM_QUEUED | CAM_STATUS_MASK);
 		if (requeue)
 			io->io_ccb->ccb_h.status |= CAM_REQUEUE_REQ;
 		else
 			io->io_ccb->ccb_h.status |= CAM_REQ_ABORTED;
 		if ((io->io_ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
 			io->io_ccb->ccb_h.status |= CAM_DEV_QFRZN;
 			xpt_freeze_devq(io->io_ccb->ccb_h.path, 1);
 			ISCSI_SESSION_DEBUG(is, "freezing devq");
 		}
 		xpt_done(io->io_ccb);
 	}
 	iscsi_outstanding_remove(is, io);
 }
 
 static void
 iscsi_session_terminate_tasks(struct iscsi_session *is, bool requeue)
 {
 	struct iscsi_outstanding *io, *tmp;
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 	TAILQ_FOREACH_SAFE(io, &is->is_outstanding, io_next, tmp) {
 		iscsi_session_terminate_task(is, io, requeue);
 	}
 }
 
 static void
 iscsi_session_cleanup(struct iscsi_session *is, bool destroy_sim)
 {
 	struct icl_pdu *pdu;
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 	/*
 	 * Don't queue any new PDUs.
 	 */
 	if (is->is_sim != NULL && is->is_simq_frozen == false) {
 		ISCSI_SESSION_DEBUG(is, "freezing");
 		xpt_freeze_simq(is->is_sim, 1);
 		is->is_simq_frozen = true;
 	}
 
 	/*
 	 * Remove postponed PDUs.
 	 */
 	while (!STAILQ_EMPTY(&is->is_postponed)) {
 		pdu = STAILQ_FIRST(&is->is_postponed);
 		STAILQ_REMOVE_HEAD(&is->is_postponed, ip_next);
 		icl_pdu_free(pdu);
 	}
 
 	if (destroy_sim == false) {
 		/*
 		 * Terminate SCSI tasks, asking CAM to requeue them.
 		 */
 		iscsi_session_terminate_tasks(is, true);
 		return;
 	}
 
 	iscsi_session_terminate_tasks(is, false);
 
 	if (is->is_sim == NULL)
 		return;
 
 	ISCSI_SESSION_DEBUG(is, "deregistering SIM");
 	xpt_async(AC_LOST_DEVICE, is->is_path, NULL);
 
 	if (is->is_simq_frozen) {
 		xpt_release_simq(is->is_sim, 1);
 		is->is_simq_frozen = false;
 	}
 
 	xpt_free_path(is->is_path);
 	is->is_path = NULL;
 	xpt_bus_deregister(cam_sim_path(is->is_sim));
 	cam_sim_free(is->is_sim, TRUE /*free_devq*/);
 	is->is_sim = NULL;
 	is->is_devq = NULL;
 }
 
 static void
 iscsi_maintenance_thread_reconnect(struct iscsi_session *is)
 {
 
 	icl_conn_close(is->is_conn);
 
 	ISCSI_SESSION_LOCK(is);
 
 	is->is_connected = false;
 	is->is_reconnecting = false;
 	is->is_login_phase = false;
 
 #ifdef ICL_KERNEL_PROXY
 	if (is->is_login_pdu != NULL) {
 		icl_pdu_free(is->is_login_pdu);
 		is->is_login_pdu = NULL;
 	}
 	cv_signal(&is->is_login_cv);
 #endif
  
 	if (fail_on_disconnection) {
 		ISCSI_SESSION_DEBUG(is, "connection failed, destroying devices");
 		iscsi_session_cleanup(is, true);
 	} else {
 		iscsi_session_cleanup(is, false);
 	}
  
 	KASSERT(TAILQ_EMPTY(&is->is_outstanding),
 	    ("destroying session with active tasks"));
 	KASSERT(STAILQ_EMPTY(&is->is_postponed),
 	    ("destroying session with postponed PDUs"));
 
 	/*
 	 * Request immediate reconnection from iscsid(8).
 	 */
 	//ISCSI_SESSION_DEBUG(is, "waking up iscsid(8)");
 	is->is_waiting_for_iscsid = true;
 	strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason));
 	is->is_timeout = 0;
 	ISCSI_SESSION_UNLOCK(is);
 	cv_signal(&is->is_softc->sc_cv);
 }
 
 static void
 iscsi_maintenance_thread_terminate(struct iscsi_session *is)
 {
 	struct iscsi_softc *sc;
 
 	sc = is->is_softc;
 	sx_xlock(&sc->sc_lock);
-	TAILQ_REMOVE(&sc->sc_sessions, is, is_next);
-	sx_xunlock(&sc->sc_lock);
 
 	icl_conn_close(is->is_conn);
 	callout_drain(&is->is_callout);
 
 	ISCSI_SESSION_LOCK(is);
 
 	KASSERT(is->is_terminating, ("is_terminating == false"));
 
 #ifdef ICL_KERNEL_PROXY
 	if (is->is_login_pdu != NULL) {
 		icl_pdu_free(is->is_login_pdu);
 		is->is_login_pdu = NULL;
 	}
 	cv_signal(&is->is_login_cv);
 #endif
 
 	iscsi_session_cleanup(is, true);
 
 	KASSERT(TAILQ_EMPTY(&is->is_outstanding),
 	    ("destroying session with active tasks"));
 	KASSERT(STAILQ_EMPTY(&is->is_postponed),
 	    ("destroying session with postponed PDUs"));
 
 	ISCSI_SESSION_UNLOCK(is);
 
 	icl_conn_free(is->is_conn);
 	mtx_destroy(&is->is_lock);
 	cv_destroy(&is->is_maintenance_cv);
 #ifdef ICL_KERNEL_PROXY
 	cv_destroy(&is->is_login_cv);
 #endif
+	TAILQ_REMOVE(&sc->sc_sessions, is, is_next);
+	sx_xunlock(&sc->sc_lock);
+
 	ISCSI_SESSION_DEBUG(is, "terminated");
 	free(is, M_ISCSI);
 
 	/*
 	 * The iscsi_unload() routine might be waiting.
 	 */
 	cv_signal(&sc->sc_cv);
 }
 
 static void
 iscsi_maintenance_thread(void *arg)
 {
 	struct iscsi_session *is;
 
 	is = arg;
 
 	for (;;) {
 		ISCSI_SESSION_LOCK(is);
 		if (is->is_reconnecting == false &&
 		    is->is_terminating == false &&
 		    STAILQ_EMPTY(&is->is_postponed))
 			cv_wait(&is->is_maintenance_cv, &is->is_lock);
 
-		if (is->is_reconnecting) {
-			ISCSI_SESSION_UNLOCK(is);
-			iscsi_maintenance_thread_reconnect(is);
-			continue;
-		}
-
+		/* Terminate supersedes reconnect. */
 		if (is->is_terminating) {
 			ISCSI_SESSION_UNLOCK(is);
 			iscsi_maintenance_thread_terminate(is);
 			kthread_exit();
 			return;
 		}
 
+		if (is->is_reconnecting) {
+			ISCSI_SESSION_UNLOCK(is);
+			iscsi_maintenance_thread_reconnect(is);
+			continue;
+		}
+
 		iscsi_session_send_postponed(is);
 		ISCSI_SESSION_UNLOCK(is);
 	}
 }
 
 static void
 iscsi_session_reconnect(struct iscsi_session *is)
 {
 
 	/*
 	 * XXX: We can't use locking here, because
 	 * 	it's being called from various contexts.
 	 * 	Hope it doesn't break anything.
 	 */
 	if (is->is_reconnecting)
 		return;
 
 	is->is_reconnecting = true;
 	cv_signal(&is->is_maintenance_cv);
 }
 
 static void
 iscsi_session_terminate(struct iscsi_session *is)
 {
 
 	if (is->is_terminating)
 		return;
 
 	is->is_terminating = true;
 
 #if 0
 	iscsi_session_logout(is);
 #endif
 	cv_signal(&is->is_maintenance_cv);
 }
 
 static void
 iscsi_callout(void *context)
 {
 	struct icl_pdu *request;
 	struct iscsi_bhs_nop_out *bhsno;
 	struct iscsi_session *is;
 	bool reconnect_needed = false;
 
 	is = context;
 
 	ISCSI_SESSION_LOCK(is);
 	if (is->is_terminating) {
 		ISCSI_SESSION_UNLOCK(is);
 		return;
 	}
 
 	callout_schedule(&is->is_callout, 1 * hz);
 
 	is->is_timeout++;
 
 	if (is->is_waiting_for_iscsid) {
 		if (iscsid_timeout > 0 && is->is_timeout > iscsid_timeout) {
 			ISCSI_SESSION_WARN(is, "timed out waiting for iscsid(8) "
 			    "for %d seconds; reconnecting",
 			    is->is_timeout);
 			reconnect_needed = true;
 		}
 		goto out;
 	}
 
 	if (is->is_login_phase) {
 		if (login_timeout > 0 && is->is_timeout > login_timeout) {
 			ISCSI_SESSION_WARN(is, "login timed out after %d seconds; "
 			    "reconnecting", is->is_timeout);
 			reconnect_needed = true;
 		}
 		goto out;
 	}
 
 	if (ping_timeout <= 0) {
 		/*
 		 * Pings are disabled.  Don't send NOP-Out in this case.
 		 * Reset the timeout, to avoid triggering reconnection,
 		 * should the user decide to reenable them.
 		 */
 		is->is_timeout = 0;
 		goto out;
 	}
 
 	if (is->is_timeout >= ping_timeout) {
 		ISCSI_SESSION_WARN(is, "no ping reply (NOP-In) after %d seconds; "
 		    "reconnecting", ping_timeout);
 		reconnect_needed = true;
 		goto out;
 	}
 
 	ISCSI_SESSION_UNLOCK(is);
 
 	/*
 	 * If the ping was reset less than one second ago - which means
 	 * that we've received some PDU during the last second - assume
 	 * the traffic flows correctly and don't bother sending a NOP-Out.
 	 *
 	 * (It's 2 - one for one second, and one for incrementing is_timeout
 	 * earlier in this routine.)
 	 */
 	if (is->is_timeout < 2)
 		return;
 
 	request = icl_pdu_new(is->is_conn, M_NOWAIT);
 	if (request == NULL) {
 		ISCSI_SESSION_WARN(is, "failed to allocate PDU");
 		return;
 	}
 	bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs;
 	bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT |
 	    ISCSI_BHS_OPCODE_IMMEDIATE;
 	bhsno->bhsno_flags = 0x80;
 	bhsno->bhsno_target_transfer_tag = 0xffffffff;
 	iscsi_pdu_queue(request);
 	return;
 
 out:
+	if (is->is_terminating) {
+		ISCSI_SESSION_UNLOCK(is);
+		return;
+	}
+
 	ISCSI_SESSION_UNLOCK(is);
 
 	if (reconnect_needed)
 		iscsi_session_reconnect(is);
 }
 
 static void
 iscsi_pdu_update_statsn(const struct icl_pdu *response)
 {
 	const struct iscsi_bhs_data_in *bhsdi;
 	struct iscsi_session *is;
 	uint32_t expcmdsn, maxcmdsn, statsn;
 
 	is = PDU_SESSION(response);
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 	/*
 	 * We're only using fields common for all the response
 	 * (target -> initiator) PDUs.
 	 */
 	bhsdi = (const struct iscsi_bhs_data_in *)response->ip_bhs;
 	/*
 	 * Ok, I lied.  In case of Data-In, "The fields StatSN, Status,
 	 * and Residual Count only have meaningful content if the S bit
 	 * is set to 1", so we also need to check the bit specific for
 	 * Data-In PDU.
 	 */
 	if (bhsdi->bhsdi_opcode != ISCSI_BHS_OPCODE_SCSI_DATA_IN ||
 	    (bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0) {
 		statsn = ntohl(bhsdi->bhsdi_statsn);
 		if (statsn != is->is_statsn && statsn != (is->is_statsn + 1)) {
 			/* XXX: This is normal situation for MCS */
 			ISCSI_SESSION_WARN(is, "PDU 0x%x StatSN %u != "
 			    "session ExpStatSN %u (or + 1); reconnecting",
 			    bhsdi->bhsdi_opcode, statsn, is->is_statsn);
 			iscsi_session_reconnect(is);
 		}
 		if (ISCSI_SNGT(statsn, is->is_statsn))
 			is->is_statsn = statsn;
 	}
 
 	expcmdsn = ntohl(bhsdi->bhsdi_expcmdsn);
 	maxcmdsn = ntohl(bhsdi->bhsdi_maxcmdsn);
 
 	if (ISCSI_SNLT(maxcmdsn + 1, expcmdsn)) {
 		ISCSI_SESSION_DEBUG(is,
 		    "PDU MaxCmdSN %u + 1 < PDU ExpCmdSN %u; ignoring",
 		    maxcmdsn, expcmdsn);
 	} else {
 		if (ISCSI_SNGT(maxcmdsn, is->is_maxcmdsn)) {
 			is->is_maxcmdsn = maxcmdsn;
 
 			/*
 			 * Command window increased; kick the maintanance thread
 			 * to send out postponed commands.
 			 */
 			if (!STAILQ_EMPTY(&is->is_postponed))
 				cv_signal(&is->is_maintenance_cv);
 		} else if (ISCSI_SNLT(maxcmdsn, is->is_maxcmdsn)) {
 			/* XXX: This is normal situation for MCS */
 			ISCSI_SESSION_DEBUG(is,
 			    "PDU MaxCmdSN %u < session MaxCmdSN %u; ignoring",
 			    maxcmdsn, is->is_maxcmdsn);
 		}
 
 		if (ISCSI_SNGT(expcmdsn, is->is_expcmdsn)) {
 			is->is_expcmdsn = expcmdsn;
 		} else if (ISCSI_SNLT(expcmdsn, is->is_expcmdsn)) {
 			/* XXX: This is normal situation for MCS */
 			ISCSI_SESSION_DEBUG(is,
 			    "PDU ExpCmdSN %u < session ExpCmdSN %u; ignoring",
 			    expcmdsn, is->is_expcmdsn);
 		}
 	}
 
 	/*
 	 * Every incoming PDU - not just NOP-In - resets the ping timer.
 	 * The purpose of the timeout is to reset the connection when it stalls;
 	 * we don't want this to happen when NOP-In or NOP-Out ends up delayed
 	 * in some queue.
 	 */
 	is->is_timeout = 0;
 }
 
 static void
 iscsi_receive_callback(struct icl_pdu *response)
 {
 	struct iscsi_session *is;
 
 	is = PDU_SESSION(response);
 
 	ISCSI_SESSION_LOCK(is);
 
 #ifdef ICL_KERNEL_PROXY
 	if (is->is_login_phase) {
 		if (is->is_login_pdu == NULL)
 			is->is_login_pdu = response;
 		else
 			icl_pdu_free(response);
 		ISCSI_SESSION_UNLOCK(is);
 		cv_signal(&is->is_login_cv);
 		return;
 	}
 #endif
 
 	iscsi_pdu_update_statsn(response);
 	
 	/*
 	 * The handling routine is responsible for freeing the PDU
 	 * when it's no longer needed.
 	 */
 	switch (response->ip_bhs->bhs_opcode) {
 	case ISCSI_BHS_OPCODE_NOP_IN:
 		iscsi_pdu_handle_nop_in(response);
 		ISCSI_SESSION_UNLOCK(is);
 		break;
 	case ISCSI_BHS_OPCODE_SCSI_RESPONSE:
 		iscsi_pdu_handle_scsi_response(response);
 		/* Session lock dropped inside. */
 		ISCSI_SESSION_LOCK_ASSERT_NOT(is);
 		break;
 	case ISCSI_BHS_OPCODE_TASK_RESPONSE:
 		iscsi_pdu_handle_task_response(response);
 		ISCSI_SESSION_UNLOCK(is);
 		break;
 	case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
 		iscsi_pdu_handle_data_in(response);
 		/* Session lock dropped inside. */
 		ISCSI_SESSION_LOCK_ASSERT_NOT(is);
 		break;
 	case ISCSI_BHS_OPCODE_LOGOUT_RESPONSE:
 		iscsi_pdu_handle_logout_response(response);
 		ISCSI_SESSION_UNLOCK(is);
 		break;
 	case ISCSI_BHS_OPCODE_R2T:
 		iscsi_pdu_handle_r2t(response);
 		ISCSI_SESSION_UNLOCK(is);
 		break;
 	case ISCSI_BHS_OPCODE_ASYNC_MESSAGE:
 		iscsi_pdu_handle_async_message(response);
 		ISCSI_SESSION_UNLOCK(is);
 		break;
 	case ISCSI_BHS_OPCODE_REJECT:
 		iscsi_pdu_handle_reject(response);
 		ISCSI_SESSION_UNLOCK(is);
 		break;
 	default:
 		ISCSI_SESSION_WARN(is, "received PDU with unsupported "
 		    "opcode 0x%x; reconnecting",
 		    response->ip_bhs->bhs_opcode);
 		iscsi_session_reconnect(is);
 		ISCSI_SESSION_UNLOCK(is);
 		icl_pdu_free(response);
 	}
 }
 
 static void
 iscsi_error_callback(struct icl_conn *ic)
 {
 	struct iscsi_session *is;
 
 	is = CONN_SESSION(ic);
 
 	ISCSI_SESSION_WARN(is, "connection error; reconnecting");
 	iscsi_session_reconnect(is);
 }
 
 static void
 iscsi_pdu_handle_nop_in(struct icl_pdu *response)
 {
 	struct iscsi_session *is;
 	struct iscsi_bhs_nop_out *bhsno;
 	struct iscsi_bhs_nop_in *bhsni;
 	struct icl_pdu *request;
 	void *data = NULL;
 	size_t datasize;
 	int error;
 
 	is = PDU_SESSION(response);
 	bhsni = (struct iscsi_bhs_nop_in *)response->ip_bhs;
 
 	if (bhsni->bhsni_target_transfer_tag == 0xffffffff) {
 		/*
 		 * Nothing to do; iscsi_pdu_update_statsn() already
 		 * zeroed the timeout.
 		 */
 		icl_pdu_free(response);
 		return;
 	}
 
 	datasize = icl_pdu_data_segment_length(response);
 	if (datasize > 0) {
 		data = malloc(datasize, M_ISCSI, M_NOWAIT | M_ZERO);
 		if (data == NULL) {
 			ISCSI_SESSION_WARN(is, "failed to allocate memory; "
 			    "reconnecting");
 			icl_pdu_free(response);
 			iscsi_session_reconnect(is);
 			return;
 		}
 		icl_pdu_get_data(response, 0, data, datasize);
 	}
 
 	request = icl_pdu_new(response->ip_conn, M_NOWAIT);
 	if (request == NULL) {
 		ISCSI_SESSION_WARN(is, "failed to allocate memory; "
 		    "reconnecting");
 		free(data, M_ISCSI);
 		icl_pdu_free(response);
 		iscsi_session_reconnect(is);
 		return;
 	}
 	bhsno = (struct iscsi_bhs_nop_out *)request->ip_bhs;
 	bhsno->bhsno_opcode = ISCSI_BHS_OPCODE_NOP_OUT |
 	    ISCSI_BHS_OPCODE_IMMEDIATE;
 	bhsno->bhsno_flags = 0x80;
 	bhsno->bhsno_initiator_task_tag = 0xffffffff;
 	bhsno->bhsno_target_transfer_tag = bhsni->bhsni_target_transfer_tag;
 	if (datasize > 0) {
 		error = icl_pdu_append_data(request, data, datasize, M_NOWAIT);
 		if (error != 0) {
 			ISCSI_SESSION_WARN(is, "failed to allocate memory; "
 			    "reconnecting");
 			free(data, M_ISCSI);
 			icl_pdu_free(request);
 			icl_pdu_free(response);
 			iscsi_session_reconnect(is);
 			return;
 		}
 		free(data, M_ISCSI);
 	}
 
 	icl_pdu_free(response);
 	iscsi_pdu_queue_locked(request);
 }
 
 static void
 iscsi_pdu_handle_scsi_response(struct icl_pdu *response)
 {
 	struct iscsi_bhs_scsi_response *bhssr;
 	struct iscsi_outstanding *io;
 	struct iscsi_session *is;
 	union ccb *ccb;
 	struct ccb_scsiio *csio;
 	size_t data_segment_len, received;
 	uint16_t sense_len;
 
 	is = PDU_SESSION(response);
 
 	bhssr = (struct iscsi_bhs_scsi_response *)response->ip_bhs;
 	io = iscsi_outstanding_find(is, bhssr->bhssr_initiator_task_tag);
 	if (io == NULL || io->io_ccb == NULL) {
 		ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhssr->bhssr_initiator_task_tag);
 		icl_pdu_free(response);
 		iscsi_session_reconnect(is);
 		ISCSI_SESSION_UNLOCK(is);
 		return;
 	}
 
 	ccb = io->io_ccb;
 	received = io->io_received;
 	iscsi_outstanding_remove(is, io);
 	ISCSI_SESSION_UNLOCK(is);
 
 	if (bhssr->bhssr_response != BHSSR_RESPONSE_COMMAND_COMPLETED) {
 		ISCSI_SESSION_WARN(is, "service response 0x%x", bhssr->bhssr_response);
  		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
  			xpt_freeze_devq(ccb->ccb_h.path, 1);
 			ISCSI_SESSION_DEBUG(is, "freezing devq");
 		}
  		ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN;
 	} else if (bhssr->bhssr_status == 0) {
 		ccb->ccb_h.status = CAM_REQ_CMP;
 	} else {
  		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
  			xpt_freeze_devq(ccb->ccb_h.path, 1);
 			ISCSI_SESSION_DEBUG(is, "freezing devq");
 		}
  		ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN;
 		ccb->csio.scsi_status = bhssr->bhssr_status;
 	}
 
 	csio = &ccb->csio;
 	data_segment_len = icl_pdu_data_segment_length(response);
 	if (data_segment_len > 0) {
 		if (data_segment_len < sizeof(sense_len)) {
 			ISCSI_SESSION_WARN(is, "truncated data segment (%zd bytes)",
 			    data_segment_len);
 			if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
 				xpt_freeze_devq(ccb->ccb_h.path, 1);
 				ISCSI_SESSION_DEBUG(is, "freezing devq");
 			}
 			ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN;
 			goto out;
 		}
 		icl_pdu_get_data(response, 0, &sense_len, sizeof(sense_len));
 		sense_len = ntohs(sense_len);
 #if 0
 		ISCSI_SESSION_DEBUG(is, "sense_len %d, data len %zd",
 		    sense_len, data_segment_len);
 #endif
 		if (sizeof(sense_len) + sense_len > data_segment_len) {
 			ISCSI_SESSION_WARN(is, "truncated data segment "
 			    "(%zd bytes, should be %zd)",
 			    data_segment_len, sizeof(sense_len) + sense_len);
 			if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
 				xpt_freeze_devq(ccb->ccb_h.path, 1);
 				ISCSI_SESSION_DEBUG(is, "freezing devq");
 			}
 			ccb->ccb_h.status = CAM_REQ_CMP_ERR | CAM_DEV_QFRZN;
 			goto out;
 		} else if (sizeof(sense_len) + sense_len < data_segment_len)
 			ISCSI_SESSION_WARN(is, "oversize data segment "
 			    "(%zd bytes, should be %zd)",
 			    data_segment_len, sizeof(sense_len) + sense_len);
 		if (sense_len > csio->sense_len) {
 			ISCSI_SESSION_DEBUG(is, "truncating sense from %d to %d",
 			    sense_len, csio->sense_len);
 			sense_len = csio->sense_len;
 		}
 		icl_pdu_get_data(response, sizeof(sense_len), &csio->sense_data, sense_len);
 		csio->sense_resid = csio->sense_len - sense_len;
 		ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
 	}
 
 out:
 	if (bhssr->bhssr_flags & BHSSR_FLAGS_RESIDUAL_UNDERFLOW)
 		csio->resid = ntohl(bhssr->bhssr_residual_count);
 
 	if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
 		KASSERT(received <= csio->dxfer_len,
 		    ("received > csio->dxfer_len"));
 		if (received < csio->dxfer_len) {
 			if (csio->resid != csio->dxfer_len - received) {
 				ISCSI_SESSION_WARN(is, "underflow mismatch: "
 				    "target indicates %d, we calculated %zd",
 				    csio->resid, csio->dxfer_len - received);
 			}
 			csio->resid = csio->dxfer_len - received;
 		}
 	}
 
 	xpt_done(ccb);
 	icl_pdu_free(response);
 }
 
 static void
 iscsi_pdu_handle_task_response(struct icl_pdu *response)
 {
 	struct iscsi_bhs_task_management_response *bhstmr;
 	struct iscsi_outstanding *io, *aio;
 	struct iscsi_session *is;
 
 	is = PDU_SESSION(response);
 
 	bhstmr = (struct iscsi_bhs_task_management_response *)response->ip_bhs;
 	io = iscsi_outstanding_find(is, bhstmr->bhstmr_initiator_task_tag);
 	if (io == NULL || io->io_ccb != NULL) {
 		ISCSI_SESSION_WARN(is, "bad itt 0x%x",
 		    bhstmr->bhstmr_initiator_task_tag);
 		icl_pdu_free(response);
 		iscsi_session_reconnect(is);
 		return;
 	}
 
 	if (bhstmr->bhstmr_response != BHSTMR_RESPONSE_FUNCTION_COMPLETE) {
 		ISCSI_SESSION_WARN(is, "task response 0x%x",
 		    bhstmr->bhstmr_response);
 	} else {
 		aio = iscsi_outstanding_find(is, io->io_datasn);
 		if (aio != NULL && aio->io_ccb != NULL)
 			iscsi_session_terminate_task(is, aio, false);
 	}
 
 	iscsi_outstanding_remove(is, io);
 	icl_pdu_free(response);
 }
 
 static void
 iscsi_pdu_handle_data_in(struct icl_pdu *response)
 {
 	struct iscsi_bhs_data_in *bhsdi;
 	struct iscsi_outstanding *io;
 	struct iscsi_session *is;
 	union ccb *ccb;
 	struct ccb_scsiio *csio;
 	size_t data_segment_len, received, oreceived;
 	
 	is = PDU_SESSION(response);
 	bhsdi = (struct iscsi_bhs_data_in *)response->ip_bhs;
 	io = iscsi_outstanding_find(is, bhsdi->bhsdi_initiator_task_tag);
 	if (io == NULL || io->io_ccb == NULL) {
 		ISCSI_SESSION_WARN(is, "bad itt 0x%x", bhsdi->bhsdi_initiator_task_tag);
 		icl_pdu_free(response);
 		iscsi_session_reconnect(is);
 		ISCSI_SESSION_UNLOCK(is);
 		return;
 	}
 
 	data_segment_len = icl_pdu_data_segment_length(response);
 	if (data_segment_len == 0) {
 		/*
 		 * "The sending of 0 length data segments should be avoided,
 		 * but initiators and targets MUST be able to properly receive
 		 * 0 length data segments."
 		 */
 		ISCSI_SESSION_UNLOCK(is);
 		icl_pdu_free(response);
 		return;
 	}
 
 	/*
 	 * We need to track this for security reasons - without it, malicious target
 	 * could respond to SCSI READ without sending Data-In PDUs, which would result
 	 * in read operation on the initiator side returning random kernel data.
 	 */
 	if (ntohl(bhsdi->bhsdi_buffer_offset) != io->io_received) {
 		ISCSI_SESSION_WARN(is, "data out of order; expected offset %zd, got %zd",
 		    io->io_received, (size_t)ntohl(bhsdi->bhsdi_buffer_offset));
 		icl_pdu_free(response);
 		iscsi_session_reconnect(is);
 		ISCSI_SESSION_UNLOCK(is);
 		return;
 	}
 
 	ccb = io->io_ccb;
 	csio = &ccb->csio;
 
 	if (io->io_received + data_segment_len > csio->dxfer_len) {
 		ISCSI_SESSION_WARN(is, "oversize data segment (%zd bytes "
 		    "at offset %zd, buffer is %d)",
 		    data_segment_len, io->io_received, csio->dxfer_len);
 		icl_pdu_free(response);
 		iscsi_session_reconnect(is);
 		ISCSI_SESSION_UNLOCK(is);
 		return;
 	}
 
 	oreceived = io->io_received;
 	io->io_received += data_segment_len;
 	received = io->io_received;
 	if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) != 0)
 		iscsi_outstanding_remove(is, io);
 	ISCSI_SESSION_UNLOCK(is);
 
 	icl_pdu_get_data(response, 0, csio->data_ptr + oreceived, data_segment_len);
 
 	/*
 	 * XXX: Check DataSN.
 	 * XXX: Check F.
 	 */
 	if ((bhsdi->bhsdi_flags & BHSDI_FLAGS_S) == 0) {
 		/*
 		 * Nothing more to do.
 		 */
 		icl_pdu_free(response);
 		return;
 	}
 
 	//ISCSI_SESSION_DEBUG(is, "got S flag; status 0x%x", bhsdi->bhsdi_status);
 	if (bhsdi->bhsdi_status == 0) {
 		ccb->ccb_h.status = CAM_REQ_CMP;
 	} else {
 		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
 			xpt_freeze_devq(ccb->ccb_h.path, 1);
 			ISCSI_SESSION_DEBUG(is, "freezing devq");
 		}
 		ccb->ccb_h.status = CAM_SCSI_STATUS_ERROR | CAM_DEV_QFRZN;
 		csio->scsi_status = bhsdi->bhsdi_status;
 	}
 
 	if ((csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_IN) {
 		KASSERT(received <= csio->dxfer_len,
 		    ("received > csio->dxfer_len"));
 		if (received < csio->dxfer_len) {
 			csio->resid = ntohl(bhsdi->bhsdi_residual_count);
 			if (csio->resid != csio->dxfer_len - received) {
 				ISCSI_SESSION_WARN(is, "underflow mismatch: "
 				    "target indicates %d, we calculated %zd",
 				    csio->resid, csio->dxfer_len - received);
 			}
 			csio->resid = csio->dxfer_len - received;
 		}
 	}
 
 	xpt_done(ccb);
 	icl_pdu_free(response);
 }
 
 static void
 iscsi_pdu_handle_logout_response(struct icl_pdu *response)
 {
 
 	ISCSI_SESSION_DEBUG(PDU_SESSION(response), "logout response");
 	icl_pdu_free(response);
 }
 
 static void
 iscsi_pdu_handle_r2t(struct icl_pdu *response)
 {
 	struct icl_pdu *request;
 	struct iscsi_session *is;
 	struct iscsi_bhs_r2t *bhsr2t;
 	struct iscsi_bhs_data_out *bhsdo;
 	struct iscsi_outstanding *io;
 	struct ccb_scsiio *csio;
 	size_t off, len, total_len;
 	int error;
 
 	is = PDU_SESSION(response);
 
 	bhsr2t = (struct iscsi_bhs_r2t *)response->ip_bhs;
 	io = iscsi_outstanding_find(is, bhsr2t->bhsr2t_initiator_task_tag);
 	if (io == NULL || io->io_ccb == NULL) {
 		ISCSI_SESSION_WARN(is, "bad itt 0x%x; reconnecting",
 		    bhsr2t->bhsr2t_initiator_task_tag);
 		icl_pdu_free(response);
 		iscsi_session_reconnect(is);
 		return;
 	}
 
 	csio = &io->io_ccb->csio;
 
 	if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_OUT) {
 		ISCSI_SESSION_WARN(is, "received R2T for read command; reconnecting");
 		icl_pdu_free(response);
 		iscsi_session_reconnect(is);
 		return;
 	}
 
 	/*
 	 * XXX: Verify R2TSN.
 	 */
 
 	io->io_datasn = 0;
 
 	off = ntohl(bhsr2t->bhsr2t_buffer_offset);
 	if (off > csio->dxfer_len) {
 		ISCSI_SESSION_WARN(is, "target requested invalid offset "
 		    "%zd, buffer is is %d; reconnecting", off, csio->dxfer_len);
 		icl_pdu_free(response);
 		iscsi_session_reconnect(is);
 		return;
 	}
 
 	total_len = ntohl(bhsr2t->bhsr2t_desired_data_transfer_length);
 	if (total_len == 0 || total_len > csio->dxfer_len) {
 		ISCSI_SESSION_WARN(is, "target requested invalid length "
 		    "%zd, buffer is %d; reconnecting", total_len, csio->dxfer_len);
 		icl_pdu_free(response);
 		iscsi_session_reconnect(is);
 		return;
 	}
 
 	//ISCSI_SESSION_DEBUG(is, "r2t; off %zd, len %zd", off, total_len);
 
 	for (;;) {
 		len = total_len;
 
 		if (len > is->is_max_data_segment_length)
 			len = is->is_max_data_segment_length;
 
 		if (off + len > csio->dxfer_len) {
 			ISCSI_SESSION_WARN(is, "target requested invalid "
 			    "length/offset %zd, buffer is %d; reconnecting",
 			    off + len, csio->dxfer_len);
 			icl_pdu_free(response);
 			iscsi_session_reconnect(is);
 			return;
 		}
 
 		request = icl_pdu_new(response->ip_conn, M_NOWAIT);
 		if (request == NULL) {
 			icl_pdu_free(response);
 			iscsi_session_reconnect(is);
 			return;
 		}
 
 		bhsdo = (struct iscsi_bhs_data_out *)request->ip_bhs;
 		bhsdo->bhsdo_opcode = ISCSI_BHS_OPCODE_SCSI_DATA_OUT;
 		bhsdo->bhsdo_lun = bhsr2t->bhsr2t_lun;
 		bhsdo->bhsdo_initiator_task_tag =
 		    bhsr2t->bhsr2t_initiator_task_tag;
 		bhsdo->bhsdo_target_transfer_tag =
 		    bhsr2t->bhsr2t_target_transfer_tag;
 		bhsdo->bhsdo_datasn = htonl(io->io_datasn++);
 		bhsdo->bhsdo_buffer_offset = htonl(off);
 		error = icl_pdu_append_data(request, csio->data_ptr + off, len,
 		    M_NOWAIT);
 		if (error != 0) {
 			ISCSI_SESSION_WARN(is, "failed to allocate memory; "
 			    "reconnecting");
 			icl_pdu_free(request);
 			icl_pdu_free(response);
 			iscsi_session_reconnect(is);
 			return;
 		}
 
 		off += len;
 		total_len -= len;
 
 		if (total_len == 0) {
 			bhsdo->bhsdo_flags |= BHSDO_FLAGS_F;
 			//ISCSI_SESSION_DEBUG(is, "setting F, off %zd", off);
 		} else {
 			//ISCSI_SESSION_DEBUG(is, "not finished, off %zd", off);
 		}
 
 		iscsi_pdu_queue_locked(request);
 
 		if (total_len == 0)
 			break;
 	}
 
 	icl_pdu_free(response);
 }
 
 static void
 iscsi_pdu_handle_async_message(struct icl_pdu *response)
 {
 	struct iscsi_bhs_asynchronous_message *bhsam;
 	struct iscsi_session *is;
 
 	is = PDU_SESSION(response);
 	bhsam = (struct iscsi_bhs_asynchronous_message *)response->ip_bhs;
 	switch (bhsam->bhsam_async_event) {
 	case BHSAM_EVENT_TARGET_REQUESTS_LOGOUT:
 		ISCSI_SESSION_WARN(is, "target requests logout; removing session");
 		iscsi_session_logout(is);
 		iscsi_session_terminate(is);
 		break;
 	case BHSAM_EVENT_TARGET_TERMINATES_CONNECTION:
 		ISCSI_SESSION_WARN(is, "target indicates it will drop drop the connection");
 		break;
 	case BHSAM_EVENT_TARGET_TERMINATES_SESSION:
 		ISCSI_SESSION_WARN(is, "target indicates it will drop drop the session");
 		break;
 	default:
 		/*
 		 * XXX: Technically, we're obligated to also handle
 		 * 	parameter renegotiation.
 		 */
 		ISCSI_SESSION_WARN(is, "ignoring AsyncEvent %d", bhsam->bhsam_async_event);
 		break;
 	}
 
 	icl_pdu_free(response);
 }
 
 static void
 iscsi_pdu_handle_reject(struct icl_pdu *response)
 {
 	struct iscsi_bhs_reject *bhsr;
 	struct iscsi_session *is;
 
 	is = PDU_SESSION(response);
 	bhsr = (struct iscsi_bhs_reject *)response->ip_bhs;
 	ISCSI_SESSION_WARN(is, "received Reject PDU, reason 0x%x; protocol error?",
 	    bhsr->bhsr_reason);
 
 	icl_pdu_free(response);
 }
 
 static int
 iscsi_ioctl_daemon_wait(struct iscsi_softc *sc,
     struct iscsi_daemon_request *request)
 {
 	struct iscsi_session *is;
 	int error;
 
 	sx_slock(&sc->sc_lock);
 	for (;;) {
 		TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
 			ISCSI_SESSION_LOCK(is);
 			if (is->is_waiting_for_iscsid)
 				break;
 			ISCSI_SESSION_UNLOCK(is);
 		}
 
 		if (is == NULL) {
 			/*
 			 * No session requires attention from iscsid(8); wait.
 			 */
 			error = cv_wait_sig(&sc->sc_cv, &sc->sc_lock);
 			if (error != 0) {
 				sx_sunlock(&sc->sc_lock);
 				return (error);
 			}
 			continue;
 		}
 
 		is->is_waiting_for_iscsid = false;
 		is->is_login_phase = true;
 		is->is_reason[0] = '\0';
 		ISCSI_SESSION_UNLOCK(is);
 
 		request->idr_session_id = is->is_id;
 		memcpy(&request->idr_isid, &is->is_isid,
 		    sizeof(request->idr_isid));
 		request->idr_tsih = 0;	/* New or reinstated session. */
 		memcpy(&request->idr_conf, &is->is_conf,
 		    sizeof(request->idr_conf));
 		
 		error = icl_limits(is->is_conf.isc_offload,
 		    &request->idr_limits.isl_max_data_segment_length);
 		if (error != 0) {
 			ISCSI_SESSION_WARN(is, "icl_limits for offload \"%s\" "
 			    "failed with error %d", is->is_conf.isc_offload,
 			    error);
 			sx_sunlock(&sc->sc_lock);
 			return (error);
 		}
 
 		sx_sunlock(&sc->sc_lock);
 		return (0);
 	}
 }
 
 static int
 iscsi_ioctl_daemon_handoff(struct iscsi_softc *sc,
     struct iscsi_daemon_handoff *handoff)
 {
 	struct iscsi_session *is;
 	struct icl_conn *ic;
 	int error;
 
 	sx_slock(&sc->sc_lock);
 
 	/*
 	 * Find the session to hand off socket to.
 	 */
 	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
 		if (is->is_id == handoff->idh_session_id)
 			break;
 	}
 	if (is == NULL) {
 		sx_sunlock(&sc->sc_lock);
 		return (ESRCH);
 	}
 	ISCSI_SESSION_LOCK(is);
 	ic = is->is_conn;
 	if (is->is_conf.isc_discovery || is->is_terminating) {
 		ISCSI_SESSION_UNLOCK(is);
 		sx_sunlock(&sc->sc_lock);
 		return (EINVAL);
 	}
 	if (is->is_connected) {
 		/*
 		 * This might have happened because another iscsid(8)
 		 * instance handed off the connection in the meantime.
 		 * Just return.
 		 */
 		ISCSI_SESSION_WARN(is, "handoff on already connected "
 		    "session");
 		ISCSI_SESSION_UNLOCK(is);
 		sx_sunlock(&sc->sc_lock);
 		return (EBUSY);
 	}
 
 	strlcpy(is->is_target_alias, handoff->idh_target_alias,
 	    sizeof(is->is_target_alias));
 	is->is_tsih = handoff->idh_tsih;
 	is->is_statsn = handoff->idh_statsn;
 	is->is_initial_r2t = handoff->idh_initial_r2t;
 	is->is_immediate_data = handoff->idh_immediate_data;
 
 	/*
 	 * Cap MaxRecvDataSegmentLength obtained from the target to the maximum
 	 * size supported by our ICL module.
 	 */
 	is->is_max_data_segment_length = min(ic->ic_max_data_segment_length,
 	    handoff->idh_max_data_segment_length);
 	is->is_max_burst_length = handoff->idh_max_burst_length;
 	is->is_first_burst_length = handoff->idh_first_burst_length;
 
 	if (handoff->idh_header_digest == ISCSI_DIGEST_CRC32C)
 		ic->ic_header_crc32c = true;
 	else
 		ic->ic_header_crc32c = false;
 	if (handoff->idh_data_digest == ISCSI_DIGEST_CRC32C)
 		ic->ic_data_crc32c = true;
 	else
 		ic->ic_data_crc32c = false;
 
 	is->is_cmdsn = 0;
 	is->is_expcmdsn = 0;
 	is->is_maxcmdsn = 0;
 	is->is_waiting_for_iscsid = false;
 	is->is_login_phase = false;
 	is->is_timeout = 0;
 	is->is_connected = true;
 	is->is_reason[0] = '\0';
 
 	ISCSI_SESSION_UNLOCK(is);
 
 #ifdef ICL_KERNEL_PROXY
 	if (handoff->idh_socket != 0) {
 #endif
 		/*
 		 * Handoff without using ICL proxy.
 		 */
 		error = icl_conn_handoff(ic, handoff->idh_socket);
 		if (error != 0) {
 			sx_sunlock(&sc->sc_lock);
 			iscsi_session_terminate(is);
 			return (error);
 		}
 #ifdef ICL_KERNEL_PROXY
 	}
 #endif
 
 	sx_sunlock(&sc->sc_lock);
 
 	if (is->is_sim != NULL) {
 		/*
 		 * When reconnecting, there already is SIM allocated for the session.
 		 */
 		KASSERT(is->is_simq_frozen, ("reconnect without frozen simq"));
 		ISCSI_SESSION_LOCK(is);
 		ISCSI_SESSION_DEBUG(is, "releasing");
 		xpt_release_simq(is->is_sim, 1);
 		is->is_simq_frozen = false;
 		ISCSI_SESSION_UNLOCK(is);
 
 	} else {
 		ISCSI_SESSION_LOCK(is);
 		is->is_devq = cam_simq_alloc(maxtags);
 		if (is->is_devq == NULL) {
 			ISCSI_SESSION_WARN(is, "failed to allocate simq");
 			iscsi_session_terminate(is);
 			return (ENOMEM);
 		}
 
 		is->is_sim = cam_sim_alloc(iscsi_action, iscsi_poll, "iscsi",
 		    is, is->is_id /* unit */, &is->is_lock,
 		    1, maxtags, is->is_devq);
 		if (is->is_sim == NULL) {
 			ISCSI_SESSION_UNLOCK(is);
 			ISCSI_SESSION_WARN(is, "failed to allocate SIM");
 			cam_simq_free(is->is_devq);
 			iscsi_session_terminate(is);
 			return (ENOMEM);
 		}
 
 		error = xpt_bus_register(is->is_sim, NULL, 0);
 		if (error != 0) {
 			ISCSI_SESSION_UNLOCK(is);
 			ISCSI_SESSION_WARN(is, "failed to register bus");
 			iscsi_session_terminate(is);
 			return (ENOMEM);
 		}
 
 		error = xpt_create_path(&is->is_path, /*periph*/NULL,
 		    cam_sim_path(is->is_sim), CAM_TARGET_WILDCARD,
 		    CAM_LUN_WILDCARD);
 		if (error != CAM_REQ_CMP) {
 			ISCSI_SESSION_UNLOCK(is);
 			ISCSI_SESSION_WARN(is, "failed to create path");
 			iscsi_session_terminate(is);
 			return (ENOMEM);
 		}
 		ISCSI_SESSION_UNLOCK(is);
 	}
 
 	return (0);
 }
 
 static int
 iscsi_ioctl_daemon_fail(struct iscsi_softc *sc,
     struct iscsi_daemon_fail *fail)
 {
 	struct iscsi_session *is;
 
 	sx_slock(&sc->sc_lock);
 
 	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
 		if (is->is_id == fail->idf_session_id)
 			break;
 	}
 	if (is == NULL) {
 		sx_sunlock(&sc->sc_lock);
 		return (ESRCH);
 	}
 	ISCSI_SESSION_LOCK(is);
 	ISCSI_SESSION_DEBUG(is, "iscsid(8) failed: %s",
 	    fail->idf_reason);
 	strlcpy(is->is_reason, fail->idf_reason, sizeof(is->is_reason));
 	//is->is_waiting_for_iscsid = false;
 	//is->is_login_phase = true;
 	//iscsi_session_reconnect(is);
 	ISCSI_SESSION_UNLOCK(is);
 	sx_sunlock(&sc->sc_lock);
 
 	return (0);
 }
 
 #ifdef ICL_KERNEL_PROXY
 static int
 iscsi_ioctl_daemon_connect(struct iscsi_softc *sc,
     struct iscsi_daemon_connect *idc)
 {
 	struct iscsi_session *is;
 	struct sockaddr *from_sa, *to_sa;
 	int error;
 
 	sx_slock(&sc->sc_lock);
 	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
 		if (is->is_id == idc->idc_session_id)
 			break;
 	}
 	if (is == NULL) {
 		sx_sunlock(&sc->sc_lock);
 		return (ESRCH);
 	}
 	sx_sunlock(&sc->sc_lock);
 
 	if (idc->idc_from_addrlen > 0) {
 		error = getsockaddr(&from_sa, (void *)idc->idc_from_addr, idc->idc_from_addrlen);
 		if (error != 0) {
 			ISCSI_SESSION_WARN(is,
 			    "getsockaddr failed with error %d", error);
 			return (error);
 		}
 	} else {
 		from_sa = NULL;
 	}
 	error = getsockaddr(&to_sa, (void *)idc->idc_to_addr, idc->idc_to_addrlen);
 	if (error != 0) {
 		ISCSI_SESSION_WARN(is, "getsockaddr failed with error %d",
 		    error);
 		free(from_sa, M_SONAME);
 		return (error);
 	}
 
 	ISCSI_SESSION_LOCK(is);
 	is->is_waiting_for_iscsid = false;
 	is->is_login_phase = true;
 	is->is_timeout = 0;
 	ISCSI_SESSION_UNLOCK(is);
 
 	error = icl_conn_connect(is->is_conn, idc->idc_iser, idc->idc_domain,
 	    idc->idc_socktype, idc->idc_protocol, from_sa, to_sa);
 	free(from_sa, M_SONAME);
 	free(to_sa, M_SONAME);
 
 	/*
 	 * Digests are always disabled during login phase.
 	 */
 	is->is_conn->ic_header_crc32c = false;
 	is->is_conn->ic_data_crc32c = false;
 
 	return (error);
 }
 
 static int
 iscsi_ioctl_daemon_send(struct iscsi_softc *sc,
     struct iscsi_daemon_send *ids)
 {
 	struct iscsi_session *is;
 	struct icl_pdu *ip;
 	size_t datalen;
 	void *data;
 	int error;
 
 	sx_slock(&sc->sc_lock);
 	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
 		if (is->is_id == ids->ids_session_id)
 			break;
 	}
 	if (is == NULL) {
 		sx_sunlock(&sc->sc_lock);
 		return (ESRCH);
 	}
 	sx_sunlock(&sc->sc_lock);
 
 	if (is->is_login_phase == false)
 		return (EBUSY);
 
 	if (is->is_terminating || is->is_reconnecting)
 		return (EIO);
 
 	datalen = ids->ids_data_segment_len;
 	if (datalen > ISCSI_MAX_DATA_SEGMENT_LENGTH)
 		return (EINVAL);
 	if (datalen > 0) {
 		data = malloc(datalen, M_ISCSI, M_WAITOK);
 		error = copyin(ids->ids_data_segment, data, datalen);
 		if (error != 0) {
 			free(data, M_ISCSI);
 			return (error);
 		}
 	}
 
 	ip = icl_pdu_new(is->is_conn, M_WAITOK);
 	memcpy(ip->ip_bhs, ids->ids_bhs, sizeof(*ip->ip_bhs));
 	if (datalen > 0) {
 		error = icl_pdu_append_data(ip, data, datalen, M_WAITOK);
 		KASSERT(error == 0, ("icl_pdu_append_data(..., M_WAITOK) failed"));
 		free(data, M_ISCSI);
 	}
 	icl_pdu_queue(ip);
 
 	return (0);
 }
 
 static int
 iscsi_ioctl_daemon_receive(struct iscsi_softc *sc,
     struct iscsi_daemon_receive *idr)
 {
 	struct iscsi_session *is;
 	struct icl_pdu *ip;
 	void *data;
 
 	sx_slock(&sc->sc_lock);
 	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
 		if (is->is_id == idr->idr_session_id)
 			break;
 	}
 	if (is == NULL) {
 		sx_sunlock(&sc->sc_lock);
 		return (ESRCH);
 	}
 	sx_sunlock(&sc->sc_lock);
 
 	if (is->is_login_phase == false)
 		return (EBUSY);
 
 	ISCSI_SESSION_LOCK(is);
 	while (is->is_login_pdu == NULL &&
 	    is->is_terminating == false &&
 	    is->is_reconnecting == false)
 		cv_wait(&is->is_login_cv, &is->is_lock);
 	if (is->is_terminating || is->is_reconnecting) {
 		ISCSI_SESSION_UNLOCK(is);
 		return (EIO);
 	}
 	ip = is->is_login_pdu;
 	is->is_login_pdu = NULL;
 	ISCSI_SESSION_UNLOCK(is);
 
 	if (ip->ip_data_len > idr->idr_data_segment_len) {
 		icl_pdu_free(ip);
 		return (EMSGSIZE);
 	}
 
 	copyout(ip->ip_bhs, idr->idr_bhs, sizeof(*ip->ip_bhs));
 	if (ip->ip_data_len > 0) {
 		data = malloc(ip->ip_data_len, M_ISCSI, M_WAITOK);
 		icl_pdu_get_data(ip, 0, data, ip->ip_data_len);
 		copyout(data, idr->idr_data_segment, ip->ip_data_len);
 		free(data, M_ISCSI);
 	}
 
 	icl_pdu_free(ip);
 
 	return (0);
 }
 #endif /* ICL_KERNEL_PROXY */
 
 static void
 iscsi_sanitize_session_conf(struct iscsi_session_conf *isc)
 {
 	/*
 	 * Just make sure all the fields are null-terminated.
 	 *
 	 * XXX: This is not particularly secure.  We should
 	 * 	create our own conf and then copy in relevant
 	 * 	fields.
 	 */
 	isc->isc_initiator[ISCSI_NAME_LEN - 1] = '\0';
 	isc->isc_initiator_addr[ISCSI_ADDR_LEN - 1] = '\0';
 	isc->isc_initiator_alias[ISCSI_ALIAS_LEN - 1] = '\0';
 	isc->isc_target[ISCSI_NAME_LEN - 1] = '\0';
 	isc->isc_target_addr[ISCSI_ADDR_LEN - 1] = '\0';
 	isc->isc_user[ISCSI_NAME_LEN - 1] = '\0';
 	isc->isc_secret[ISCSI_SECRET_LEN - 1] = '\0';
 	isc->isc_mutual_user[ISCSI_NAME_LEN - 1] = '\0';
 	isc->isc_mutual_secret[ISCSI_SECRET_LEN - 1] = '\0';
 }
 
 static bool
 iscsi_valid_session_conf(const struct iscsi_session_conf *isc)
 {
 
 	if (isc->isc_initiator[0] == '\0') {
 		ISCSI_DEBUG("empty isc_initiator");
 		return (false);
 	}
 
 	if (isc->isc_target_addr[0] == '\0') {
 		ISCSI_DEBUG("empty isc_target_addr");
 		return (false);
 	}
 
 	if (isc->isc_discovery != 0 && isc->isc_target[0] != 0) {
 		ISCSI_DEBUG("non-empty isc_target for discovery session");
 		return (false);
 	}
 
 	if (isc->isc_discovery == 0 && isc->isc_target[0] == 0) {
 		ISCSI_DEBUG("empty isc_target for non-discovery session");
 		return (false);
 	}
 
 	return (true);
 }
 
 static int
 iscsi_ioctl_session_add(struct iscsi_softc *sc, struct iscsi_session_add *isa)
 {
 	struct iscsi_session *is;
 	const struct iscsi_session *is2;
 	int error;
 
 	iscsi_sanitize_session_conf(&isa->isa_conf);
 	if (iscsi_valid_session_conf(&isa->isa_conf) == false)
 		return (EINVAL);
 
 	is = malloc(sizeof(*is), M_ISCSI, M_ZERO | M_WAITOK);
 	memcpy(&is->is_conf, &isa->isa_conf, sizeof(is->is_conf));
 
 	sx_xlock(&sc->sc_lock);
 
 	/*
 	 * Prevent duplicates.
 	 */
 	TAILQ_FOREACH(is2, &sc->sc_sessions, is_next) {
 		if (!!is->is_conf.isc_discovery !=
 		    !!is2->is_conf.isc_discovery)
 			continue;
 
 		if (strcmp(is->is_conf.isc_target_addr,
 		    is2->is_conf.isc_target_addr) != 0)
 			continue;
 
 		if (is->is_conf.isc_discovery == 0 &&
 		    strcmp(is->is_conf.isc_target,
 		    is2->is_conf.isc_target) != 0)
 			continue;
 
 		sx_xunlock(&sc->sc_lock);
 		free(is, M_ISCSI);
 		return (EBUSY);
 	}
 
 	is->is_conn = icl_new_conn(is->is_conf.isc_offload,
 	    "iscsi", &is->is_lock);
 	if (is->is_conn == NULL) {
 		sx_xunlock(&sc->sc_lock);
 		free(is, M_ISCSI);
 		return (EINVAL);
 	}
 	is->is_conn->ic_receive = iscsi_receive_callback;
 	is->is_conn->ic_error = iscsi_error_callback;
 	is->is_conn->ic_prv0 = is;
 	TAILQ_INIT(&is->is_outstanding);
 	STAILQ_INIT(&is->is_postponed);
 	mtx_init(&is->is_lock, "iscsi_lock", NULL, MTX_DEF);
 	cv_init(&is->is_maintenance_cv, "iscsi_mt");
 #ifdef ICL_KERNEL_PROXY
 	cv_init(&is->is_login_cv, "iscsi_login");
 #endif
 
 	is->is_softc = sc;
 	sc->sc_last_session_id++;
 	is->is_id = sc->sc_last_session_id;
 	is->is_isid[0] = 0x80; /* RFC 3720, 10.12.5: 10b, "Random" ISID. */
 	arc4rand(&is->is_isid[1], 5, 0);
 	is->is_tsih = 0;
 	callout_init(&is->is_callout, 1);
 
 	error = kthread_add(iscsi_maintenance_thread, is, NULL, NULL, 0, 0, "iscsimt");
 	if (error != 0) {
 		ISCSI_SESSION_WARN(is, "kthread_add(9) failed with error %d", error);
 		sx_xunlock(&sc->sc_lock);
 		return (error);
 	}
 
 	callout_reset(&is->is_callout, 1 * hz, iscsi_callout, is);
 	TAILQ_INSERT_TAIL(&sc->sc_sessions, is, is_next);
 
 	/*
 	 * Trigger immediate reconnection.
 	 */
 	ISCSI_SESSION_LOCK(is);
 	is->is_waiting_for_iscsid = true;
 	strlcpy(is->is_reason, "Waiting for iscsid(8)", sizeof(is->is_reason));
 	ISCSI_SESSION_UNLOCK(is);
 	cv_signal(&sc->sc_cv);
 
 	sx_xunlock(&sc->sc_lock);
 
 	return (0);
 }
 
 static bool
 iscsi_session_conf_matches(unsigned int id1, const struct iscsi_session_conf *c1,
     unsigned int id2, const struct iscsi_session_conf *c2)
 {
 
 	if (id2 != 0 && id2 != id1)
 		return (false);
 	if (c2->isc_target[0] != '\0' &&
 	    strcmp(c1->isc_target, c2->isc_target) != 0)
 		return (false);
 	if (c2->isc_target_addr[0] != '\0' &&
 	    strcmp(c1->isc_target_addr, c2->isc_target_addr) != 0)
 		return (false);
 	return (true);
 }
 
 static int
 iscsi_ioctl_session_remove(struct iscsi_softc *sc,
     struct iscsi_session_remove *isr)
 {
 	struct iscsi_session *is, *tmp;
 	bool found = false;
 
 	iscsi_sanitize_session_conf(&isr->isr_conf);
 
 	sx_xlock(&sc->sc_lock);
 	TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp) {
 		ISCSI_SESSION_LOCK(is);
 		if (iscsi_session_conf_matches(is->is_id, &is->is_conf,
 		    isr->isr_session_id, &isr->isr_conf)) {
 			found = true;
 			iscsi_session_logout(is);
 			iscsi_session_terminate(is);
 		}
 		ISCSI_SESSION_UNLOCK(is);
 	}
 	sx_xunlock(&sc->sc_lock);
 
 	if (!found)
 		return (ESRCH);
 
 	return (0);
 }
 
 static int
 iscsi_ioctl_session_list(struct iscsi_softc *sc, struct iscsi_session_list *isl)
 {
 	int error;
 	unsigned int i = 0;
 	struct iscsi_session *is;
 	struct iscsi_session_state iss;
 
 	sx_slock(&sc->sc_lock);
 	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
 		if (i >= isl->isl_nentries) {
 			sx_sunlock(&sc->sc_lock);
 			return (EMSGSIZE);
 		}
 		memset(&iss, 0, sizeof(iss));
 		memcpy(&iss.iss_conf, &is->is_conf, sizeof(iss.iss_conf));
 		iss.iss_id = is->is_id;
 		strlcpy(iss.iss_target_alias, is->is_target_alias, sizeof(iss.iss_target_alias));
 		strlcpy(iss.iss_reason, is->is_reason, sizeof(iss.iss_reason));
 		strlcpy(iss.iss_offload, is->is_conn->ic_offload, sizeof(iss.iss_offload));
 
 		if (is->is_conn->ic_header_crc32c)
 			iss.iss_header_digest = ISCSI_DIGEST_CRC32C;
 		else
 			iss.iss_header_digest = ISCSI_DIGEST_NONE;
 
 		if (is->is_conn->ic_data_crc32c)
 			iss.iss_data_digest = ISCSI_DIGEST_CRC32C;
 		else
 			iss.iss_data_digest = ISCSI_DIGEST_NONE;
 
 		iss.iss_max_data_segment_length = is->is_max_data_segment_length;
 		iss.iss_immediate_data = is->is_immediate_data;
 		iss.iss_connected = is->is_connected;
 	
 		error = copyout(&iss, isl->isl_pstates + i, sizeof(iss));
 		if (error != 0) {
 			sx_sunlock(&sc->sc_lock);
 			return (error);
 		}
 		i++;
 	}
 	sx_sunlock(&sc->sc_lock);
 
 	isl->isl_nentries = i;
 
 	return (0);
 }
 
 static int
 iscsi_ioctl_session_modify(struct iscsi_softc *sc,
     struct iscsi_session_modify *ism)
 {
 	struct iscsi_session *is;
 
 	iscsi_sanitize_session_conf(&ism->ism_conf);
 	if (iscsi_valid_session_conf(&ism->ism_conf) == false)
 		return (EINVAL);
 
 	sx_xlock(&sc->sc_lock);
 	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
 		ISCSI_SESSION_LOCK(is);
 		if (is->is_id == ism->ism_session_id)
 			break;
 		ISCSI_SESSION_UNLOCK(is);
 	}
 	if (is == NULL) {
 		sx_xunlock(&sc->sc_lock);
 		return (ESRCH);
 	}
 	sx_xunlock(&sc->sc_lock);
 
 	memcpy(&is->is_conf, &ism->ism_conf, sizeof(is->is_conf));
 	ISCSI_SESSION_UNLOCK(is);
 
 	iscsi_session_reconnect(is);
 
 	return (0);
 }
 
 static int
 iscsi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int mode,
     struct thread *td)
 {
 	struct iscsi_softc *sc;
 
 	sc = dev->si_drv1;
 
 	switch (cmd) {
 	case ISCSIDWAIT:
 		return (iscsi_ioctl_daemon_wait(sc,
 		    (struct iscsi_daemon_request *)arg));
 	case ISCSIDHANDOFF:
 		return (iscsi_ioctl_daemon_handoff(sc,
 		    (struct iscsi_daemon_handoff *)arg));
 	case ISCSIDFAIL:
 		return (iscsi_ioctl_daemon_fail(sc,
 		    (struct iscsi_daemon_fail *)arg));
 #ifdef ICL_KERNEL_PROXY
 	case ISCSIDCONNECT:
 		return (iscsi_ioctl_daemon_connect(sc,
 		    (struct iscsi_daemon_connect *)arg));
 	case ISCSIDSEND:
 		return (iscsi_ioctl_daemon_send(sc,
 		    (struct iscsi_daemon_send *)arg));
 	case ISCSIDRECEIVE:
 		return (iscsi_ioctl_daemon_receive(sc,
 		    (struct iscsi_daemon_receive *)arg));
 #endif /* ICL_KERNEL_PROXY */
 	case ISCSISADD:
 		return (iscsi_ioctl_session_add(sc,
 		    (struct iscsi_session_add *)arg));
 	case ISCSISREMOVE:
 		return (iscsi_ioctl_session_remove(sc,
 		    (struct iscsi_session_remove *)arg));
 	case ISCSISLIST:
 		return (iscsi_ioctl_session_list(sc,
 		    (struct iscsi_session_list *)arg));
 	case ISCSISMODIFY:
 		return (iscsi_ioctl_session_modify(sc,
 		    (struct iscsi_session_modify *)arg));
 	default:
 		return (EINVAL);
 	}
 }
 
 static struct iscsi_outstanding *
 iscsi_outstanding_find(struct iscsi_session *is, uint32_t initiator_task_tag)
 {
 	struct iscsi_outstanding *io;
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 	TAILQ_FOREACH(io, &is->is_outstanding, io_next) {
 		if (io->io_initiator_task_tag == initiator_task_tag)
 			return (io);
 	}
 	return (NULL);
 }
 
 static struct iscsi_outstanding *
 iscsi_outstanding_find_ccb(struct iscsi_session *is, union ccb *ccb)
 {
 	struct iscsi_outstanding *io;
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 	TAILQ_FOREACH(io, &is->is_outstanding, io_next) {
 		if (io->io_ccb == ccb)
 			return (io);
 	}
 	return (NULL);
 }
 
 static struct iscsi_outstanding *
 iscsi_outstanding_add(struct iscsi_session *is,
     union ccb *ccb, uint32_t *initiator_task_tagp)
 {
 	struct iscsi_outstanding *io;
 	int error;
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 	io = uma_zalloc(iscsi_outstanding_zone, M_NOWAIT | M_ZERO);
 	if (io == NULL) {
 		ISCSI_SESSION_WARN(is, "failed to allocate %zd bytes",
 		    sizeof(*io));
 		return (NULL);
 	}
 
 	error = icl_conn_task_setup(is->is_conn, &ccb->csio,
 	    initiator_task_tagp, &io->io_icl_prv);
 	if (error != 0) {
 		ISCSI_SESSION_WARN(is,
 		    "icl_conn_task_setup() failed with error %d", error);
 		uma_zfree(iscsi_outstanding_zone, io);
 		return (NULL);
 	}
 
 	KASSERT(iscsi_outstanding_find(is, *initiator_task_tagp) == NULL,
 	    ("initiator_task_tag 0x%x already added", *initiator_task_tagp));
 
 	io->io_initiator_task_tag = *initiator_task_tagp;
 	io->io_ccb = ccb;
 	TAILQ_INSERT_TAIL(&is->is_outstanding, io, io_next);
 	return (io);
 }
 
 static void
 iscsi_outstanding_remove(struct iscsi_session *is, struct iscsi_outstanding *io)
 {
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 	icl_conn_task_done(is->is_conn, io->io_icl_prv);
 	TAILQ_REMOVE(&is->is_outstanding, io, io_next);
 	uma_zfree(iscsi_outstanding_zone, io);
 }
 
 static void
 iscsi_action_abort(struct iscsi_session *is, union ccb *ccb)
 {
 	struct icl_pdu *request;
 	struct iscsi_bhs_task_management_request *bhstmr;
 	struct ccb_abort *cab = &ccb->cab;
 	struct iscsi_outstanding *io, *aio;
 	uint32_t initiator_task_tag;
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 #if 0
 	KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__));
 #else
 	if (is->is_login_phase) {
 		ccb->ccb_h.status = CAM_REQ_ABORTED;
 		xpt_done(ccb);
 		return;
 	}
 #endif
 
 	aio = iscsi_outstanding_find_ccb(is, cab->abort_ccb);
 	if (aio == NULL) {
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		xpt_done(ccb);
 		return;
 	}
 
 	request = icl_pdu_new(is->is_conn, M_NOWAIT);
 	if (request == NULL) {
 		ccb->ccb_h.status = CAM_RESRC_UNAVAIL;
 		xpt_done(ccb);
 		return;
 	}
 
 	initiator_task_tag = is->is_initiator_task_tag++;
 
 	io = iscsi_outstanding_add(is, NULL, &initiator_task_tag);
 	if (io == NULL) {
 		icl_pdu_free(request);
 		ccb->ccb_h.status = CAM_RESRC_UNAVAIL;
 		xpt_done(ccb);
 		return;
 	}
 	io->io_datasn = aio->io_initiator_task_tag;
 
 	bhstmr = (struct iscsi_bhs_task_management_request *)request->ip_bhs;
 	bhstmr->bhstmr_opcode = ISCSI_BHS_OPCODE_TASK_REQUEST;
 	bhstmr->bhstmr_function = 0x80 | BHSTMR_FUNCTION_ABORT_TASK;
 	bhstmr->bhstmr_lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun));
 	bhstmr->bhstmr_initiator_task_tag = initiator_task_tag;
 	bhstmr->bhstmr_referenced_task_tag = aio->io_initiator_task_tag;
 
 	iscsi_pdu_queue_locked(request);
 }
 
 static void
 iscsi_action_scsiio(struct iscsi_session *is, union ccb *ccb)
 {
 	struct icl_pdu *request;
 	struct iscsi_bhs_scsi_command *bhssc;
 	struct ccb_scsiio *csio;
 	struct iscsi_outstanding *io;
 	size_t len;
 	uint32_t initiator_task_tag;
 	int error;
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 #if 0
 	KASSERT(is->is_login_phase == false, ("%s called during Login Phase", __func__));
 #else
 	if (is->is_login_phase) {
 		ISCSI_SESSION_DEBUG(is, "called during login phase");
 		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
 			xpt_freeze_devq(ccb->ccb_h.path, 1);
 			ISCSI_SESSION_DEBUG(is, "freezing devq");
 		}
 		ccb->ccb_h.status = CAM_REQ_ABORTED | CAM_DEV_QFRZN;
 		xpt_done(ccb);
 		return;
 	}
 #endif
 
 	request = icl_pdu_new(is->is_conn, M_NOWAIT);
 	if (request == NULL) {
 		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
 			xpt_freeze_devq(ccb->ccb_h.path, 1);
 			ISCSI_SESSION_DEBUG(is, "freezing devq");
 		}
 		ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN;
 		xpt_done(ccb);
 		return;
 	}
 
 	initiator_task_tag = is->is_initiator_task_tag++;
 	io = iscsi_outstanding_add(is, ccb, &initiator_task_tag);
 	if (io == NULL) {
 		icl_pdu_free(request);
 		if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
 			xpt_freeze_devq(ccb->ccb_h.path, 1);
 			ISCSI_SESSION_DEBUG(is, "freezing devq");
 		}
 		ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN;
 		xpt_done(ccb);
 		return;
 	}
 
 	csio = &ccb->csio;
 	bhssc = (struct iscsi_bhs_scsi_command *)request->ip_bhs;
 	bhssc->bhssc_opcode = ISCSI_BHS_OPCODE_SCSI_COMMAND;
 	bhssc->bhssc_flags |= BHSSC_FLAGS_F;
 	switch (csio->ccb_h.flags & CAM_DIR_MASK) {
 	case CAM_DIR_IN:
 		bhssc->bhssc_flags |= BHSSC_FLAGS_R;
 		break;
 	case CAM_DIR_OUT:
 		bhssc->bhssc_flags |= BHSSC_FLAGS_W;
 		break;
 	}
 
 	if ((ccb->ccb_h.flags & CAM_TAG_ACTION_VALID) != 0) {
 		switch (csio->tag_action) {
 		case MSG_HEAD_OF_Q_TAG:
 			bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_HOQ;
 			break;
 		case MSG_ORDERED_Q_TAG:
 			bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ORDERED;
 			break;
 		case MSG_ACA_TASK:
 			bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_ACA;
 			break;
 		case MSG_SIMPLE_Q_TAG:
 		default:
 			bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_SIMPLE;
 			break;
 		}
 	} else
 		bhssc->bhssc_flags |= BHSSC_FLAGS_ATTR_UNTAGGED;
 
 	bhssc->bhssc_lun = htobe64(CAM_EXTLUN_BYTE_SWIZZLE(ccb->ccb_h.target_lun));
 	bhssc->bhssc_initiator_task_tag = initiator_task_tag;
 	bhssc->bhssc_expected_data_transfer_length = htonl(csio->dxfer_len);
 	KASSERT(csio->cdb_len <= sizeof(bhssc->bhssc_cdb),
 	    ("unsupported CDB size %zd", (size_t)csio->cdb_len));
 
 	if (csio->ccb_h.flags & CAM_CDB_POINTER)
 		memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_ptr, csio->cdb_len);
 	else
 		memcpy(&bhssc->bhssc_cdb, csio->cdb_io.cdb_bytes, csio->cdb_len);
 
 	if (is->is_immediate_data &&
 	    (csio->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_OUT) {
 		len = csio->dxfer_len;
 		//ISCSI_SESSION_DEBUG(is, "adding %zd of immediate data", len);
 		if (len > is->is_first_burst_length) {
 			ISCSI_SESSION_DEBUG(is, "len %zd -> %zd", len, is->is_first_burst_length);
 			len = is->is_first_burst_length;
 		}
 		if (len > is->is_max_data_segment_length) {
 			ISCSI_SESSION_DEBUG(is, "len %zd -> %zd", len, is->is_max_data_segment_length);
 			len = is->is_max_data_segment_length;
 		}
 
 		error = icl_pdu_append_data(request, csio->data_ptr, len, M_NOWAIT);
 		if (error != 0) {
 			icl_pdu_free(request);
 			if ((ccb->ccb_h.status & CAM_DEV_QFRZN) == 0) {
 				xpt_freeze_devq(ccb->ccb_h.path, 1);
 				ISCSI_SESSION_DEBUG(is, "freezing devq");
 			}
 			ccb->ccb_h.status = CAM_RESRC_UNAVAIL | CAM_DEV_QFRZN;
 			xpt_done(ccb);
 			return;
 		}
 	}
 	iscsi_pdu_queue_locked(request);
 }
 
 static void
 iscsi_action(struct cam_sim *sim, union ccb *ccb)
 {
 	struct iscsi_session *is;
 
 	is = cam_sim_softc(sim);
 
 	ISCSI_SESSION_LOCK_ASSERT(is);
 
 	if (is->is_terminating ||
 	    (is->is_connected == false && fail_on_disconnection)) {
 		ccb->ccb_h.status = CAM_DEV_NOT_THERE;
 		xpt_done(ccb);
 		return;
 	}
 
 	switch (ccb->ccb_h.func_code) {
 	case XPT_PATH_INQ:
 	{
 		struct ccb_pathinq *cpi = &ccb->cpi;
 
 		cpi->version_num = 1;
 		cpi->hba_inquiry = PI_TAG_ABLE;
 		cpi->target_sprt = 0;
 		cpi->hba_misc = PIM_EXTLUNS;
 		cpi->hba_eng_cnt = 0;
 		cpi->max_target = 0;
 		/*
 		 * Note that the variable below is only relevant for targets
 		 * that don't claim compliance with anything above SPC2, which
 		 * means they don't support REPORT_LUNS.
 		 */
 		cpi->max_lun = 255;
 		cpi->initiator_id = ~0;
 		strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
 		strlcpy(cpi->hba_vid, "iSCSI", HBA_IDLEN);
 		strlcpy(cpi->dev_name, cam_sim_name(sim), DEV_IDLEN);
 		cpi->unit_number = cam_sim_unit(sim);
 		cpi->bus_id = cam_sim_bus(sim);
 		cpi->base_transfer_speed = 150000; /* XXX */
 		cpi->transport = XPORT_ISCSI;
 		cpi->transport_version = 0;
 		cpi->protocol = PROTO_SCSI;
 		cpi->protocol_version = SCSI_REV_SPC3;
 		cpi->maxio = MAXPHYS;
 		cpi->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_GET_TRAN_SETTINGS:
 	{
 		struct ccb_trans_settings	*cts;
 		struct ccb_trans_settings_scsi	*scsi;
 
 		cts = &ccb->cts;
 		scsi = &cts->proto_specific.scsi;
 
 		cts->protocol = PROTO_SCSI;
 		cts->protocol_version = SCSI_REV_SPC3;
 		cts->transport = XPORT_ISCSI;
 		cts->transport_version = 0;
 		scsi->valid = CTS_SCSI_VALID_TQ;
 		scsi->flags = CTS_SCSI_FLAGS_TAG_ENB;
 		cts->ccb_h.status = CAM_REQ_CMP;
 		break;
 	}
 	case XPT_CALC_GEOMETRY:
 		cam_calc_geometry(&ccb->ccg, /*extended*/1);
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 #if 0
 	/*
 	 * XXX: What's the point?
 	 */
 	case XPT_RESET_BUS:
 	case XPT_TERM_IO:
 		ISCSI_SESSION_DEBUG(is, "faking success for reset, abort, or term_io");
 		ccb->ccb_h.status = CAM_REQ_CMP;
 		break;
 #endif
 	case XPT_ABORT:
 		iscsi_action_abort(is, ccb);
 		return;
 	case XPT_SCSI_IO:
 		iscsi_action_scsiio(is, ccb);
 		return;
 	default:
 #if 0
 		ISCSI_SESSION_DEBUG(is, "got unsupported code 0x%x", ccb->ccb_h.func_code);
 #endif
 		ccb->ccb_h.status = CAM_FUNC_NOTAVAIL;
 		break;
 	}
 	xpt_done(ccb);
 }
 
 static void
 iscsi_poll(struct cam_sim *sim)
 {
 
 	KASSERT(0, ("%s: you're not supposed to be here", __func__));
 }
 
 static void
-iscsi_shutdown(struct iscsi_softc *sc)
+iscsi_terminate_sessions(struct iscsi_softc *sc)
 {
 	struct iscsi_session *is;
 
-	/*
-	 * Trying to reconnect during system shutdown would lead to hang.
-	 */
-	fail_on_disconnection = 1;
+	sx_slock(&sc->sc_lock);
+	TAILQ_FOREACH(is, &sc->sc_sessions, is_next)
+		iscsi_session_terminate(is);
+	while(!TAILQ_EMPTY(&sc->sc_sessions)) {
+		ISCSI_DEBUG("waiting for sessions to terminate");
+		cv_wait(&sc->sc_cv, &sc->sc_lock);
+	}
+	ISCSI_DEBUG("all sessions terminated");
+	sx_sunlock(&sc->sc_lock);
+}
 
+static void
+iscsi_shutdown_pre(struct iscsi_softc *sc)
+{
+	struct iscsi_session *is;
+
+	if (!fail_on_shutdown)
+		return;
+
 	/*
 	 * If we have any sessions waiting for reconnection, request
 	 * maintenance thread to fail them immediately instead of waiting
 	 * for reconnect timeout.
+	 *
+	 * This prevents LUNs with mounted filesystems that are supported
+	 * by disconnected iSCSI sessions from hanging, however it will
+	 * fail all queued BIOs.
 	 */
+	ISCSI_DEBUG("forcing failing all disconnected sessions due to shutdown");
+
+	fail_on_disconnection = 1;
+
 	sx_slock(&sc->sc_lock);
 	TAILQ_FOREACH(is, &sc->sc_sessions, is_next) {
 		ISCSI_SESSION_LOCK(is);
-		if (is->is_waiting_for_iscsid)
+		if (!is->is_connected) {
+			ISCSI_SESSION_DEBUG(is, "force failing disconnected session early");
 			iscsi_session_reconnect(is);
+		}
 		ISCSI_SESSION_UNLOCK(is);
 	}
 	sx_sunlock(&sc->sc_lock);
 }
 
+static void
+iscsi_shutdown_post(struct iscsi_softc *sc)
+{
+
+	ISCSI_DEBUG("removing all sessions due to shutdown");
+	iscsi_terminate_sessions(sc);
+}
+
 static int
 iscsi_load(void)
 {
 	int error;
 
 	sc = malloc(sizeof(*sc), M_ISCSI, M_ZERO | M_WAITOK);
 	sx_init(&sc->sc_lock, "iscsi");
 	TAILQ_INIT(&sc->sc_sessions);
 	cv_init(&sc->sc_cv, "iscsi_cv");
 
 	iscsi_outstanding_zone = uma_zcreate("iscsi_outstanding",
 	    sizeof(struct iscsi_outstanding), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 
 	error = make_dev_p(MAKEDEV_CHECKNAME, &sc->sc_cdev, &iscsi_cdevsw,
 	    NULL, UID_ROOT, GID_WHEEL, 0600, "iscsi");
 	if (error != 0) {
 		ISCSI_WARN("failed to create device node, error %d", error);
 		return (error);
 	}
 	sc->sc_cdev->si_drv1 = sc;
 
-	sc->sc_shutdown_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync,
-	    iscsi_shutdown, sc, SHUTDOWN_PRI_DEFAULT-1);
+	sc->sc_shutdown_pre_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync,
+	    iscsi_shutdown_pre, sc, SHUTDOWN_PRI_FIRST);
+	/*
+	 * shutdown_post_sync needs to run after filesystem shutdown and before
+	 * CAM shutdown - otherwise when rebooting with an iSCSI session that is
+	 * disconnected but has outstanding requests, dashutdown() will hang on
+	 * cam_periph_runccb().
+	 */
+	sc->sc_shutdown_post_eh = EVENTHANDLER_REGISTER(shutdown_post_sync,
+	    iscsi_shutdown_post, sc, SHUTDOWN_PRI_DEFAULT - 1);
 
 	return (0);
 }
 
 static int
 iscsi_unload(void)
 {
-	struct iscsi_session *is, *tmp;
 
 	if (sc->sc_cdev != NULL) {
 		ISCSI_DEBUG("removing device node");
 		destroy_dev(sc->sc_cdev);
 		ISCSI_DEBUG("device node removed");
 	}
 
-	if (sc->sc_shutdown_eh != NULL)
-		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->sc_shutdown_eh);
+	if (sc->sc_shutdown_pre_eh != NULL)
+		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->sc_shutdown_pre_eh);
+	if (sc->sc_shutdown_post_eh != NULL)
+		EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->sc_shutdown_post_eh);
 
-	sx_slock(&sc->sc_lock);
-	TAILQ_FOREACH_SAFE(is, &sc->sc_sessions, is_next, tmp)
-		iscsi_session_terminate(is);
-	while(!TAILQ_EMPTY(&sc->sc_sessions)) {
-		ISCSI_DEBUG("waiting for sessions to terminate");
-		cv_wait(&sc->sc_cv, &sc->sc_lock);
-	}
-	ISCSI_DEBUG("all sessions terminated");
-	sx_sunlock(&sc->sc_lock);
+	iscsi_terminate_sessions(sc);
 
 	uma_zdestroy(iscsi_outstanding_zone);
 	sx_destroy(&sc->sc_lock);
 	cv_destroy(&sc->sc_cv);
 	free(sc, M_ISCSI);
 	return (0);
 }
 
 static int
 iscsi_quiesce(void)
 {
 	sx_slock(&sc->sc_lock);
 	if (!TAILQ_EMPTY(&sc->sc_sessions)) {
 		sx_sunlock(&sc->sc_lock);
 		return (EBUSY);
 	}
 	sx_sunlock(&sc->sc_lock);
 	return (0);
 }
 
 static int
 iscsi_modevent(module_t mod, int what, void *arg)
 {
 	int error;
 
 	switch (what) {
 	case MOD_LOAD:
 		error = iscsi_load();
 		break;
 	case MOD_UNLOAD:
 		error = iscsi_unload();
 		break;
 	case MOD_QUIESCE:
 		error = iscsi_quiesce();
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 moduledata_t iscsi_data = {
 	"iscsi",
 	iscsi_modevent,
 	0
 };
 
 DECLARE_MODULE(iscsi, iscsi_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
 MODULE_DEPEND(iscsi, cam, 1, 1, 1);
 MODULE_DEPEND(iscsi, icl, 1, 1, 1);
Index: projects/clang380-import/sys/dev/iscsi/iscsi.h
===================================================================
--- projects/clang380-import/sys/dev/iscsi/iscsi.h	(revision 293686)
+++ projects/clang380-import/sys/dev/iscsi/iscsi.h	(revision 293687)
@@ -1,137 +1,138 @@
 /*-
  * Copyright (c) 2012 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef ISCSI_H
 #define	ISCSI_H
 
 struct iscsi_softc;
 struct icl_conn;
 
 #define	ISCSI_NAME_LEN		224	/* 223 bytes, by RFC 3720, + '\0' */
 #define	ISCSI_ADDR_LEN		47	/* INET6_ADDRSTRLEN + '\0' */
 #define	ISCSI_SECRET_LEN	17	/* 16 + '\0' */
 
 struct iscsi_outstanding {
 	TAILQ_ENTRY(iscsi_outstanding)	io_next;
 	union ccb			*io_ccb;
 	size_t				io_received;
 	uint32_t			io_initiator_task_tag;
 	uint32_t			io_datasn;
 	void				*io_icl_prv;
 };
 
 struct iscsi_session {
 	TAILQ_ENTRY(iscsi_session)	is_next;
 
 	struct icl_conn			*is_conn;
 	struct mtx			is_lock;
 
 	uint32_t			is_statsn;
 	uint32_t			is_cmdsn;
 	uint32_t			is_expcmdsn;
 	uint32_t			is_maxcmdsn;
 	uint32_t			is_initiator_task_tag;
 	int				is_header_digest;
 	int				is_data_digest;
 	int				is_initial_r2t;
 	size_t				is_max_burst_length;
 	size_t				is_first_burst_length;
 	uint8_t				is_isid[6];
 	uint16_t			is_tsih;
 	bool				is_immediate_data;
 	size_t				is_max_data_segment_length;
 	char				is_target_alias[ISCSI_ALIAS_LEN];
 
 	TAILQ_HEAD(, iscsi_outstanding)	is_outstanding;
 	STAILQ_HEAD(, icl_pdu)		is_postponed;
 
 	struct callout			is_callout;
 	unsigned int			is_timeout;
 
 	/*
 	 * XXX: This could be rewritten using a single variable,
 	 * 	but somehow it results in uglier code. 
 	 */
 	/*
 	 * We're waiting for iscsid(8); after iscsid_timeout
 	 * expires, kernel will wake up an iscsid(8) to handle
 	 * the session.
 	 */
 	bool				is_waiting_for_iscsid;
 
 	/*
 	 * Some iscsid(8) instance is handling the session;
 	 * after login_timeout expires, kernel will wake up
 	 * another iscsid(8) to handle the session.
 	 */
 	bool				is_login_phase;
 
 	/*
 	 * We're in the process of removing the iSCSI session.
 	 */
 	bool				is_terminating;
 
 	/*
 	 * We're waiting for the maintenance thread to do some
 	 * reconnection tasks.
 	 */
 	bool				is_reconnecting;
 
 	bool				is_connected;
 
 	struct cam_devq			*is_devq;
 	struct cam_sim			*is_sim;
 	struct cam_path			*is_path;
 	struct cv			is_maintenance_cv;
 	struct iscsi_softc		*is_softc;
 	unsigned int			is_id;
 	struct iscsi_session_conf	is_conf;
 	bool				is_simq_frozen;
 
 	char				is_reason[ISCSI_REASON_LEN];
 
 #ifdef ICL_KERNEL_PROXY
 	struct cv			is_login_cv;;
 	struct icl_pdu			*is_login_pdu;
 #endif
 };
 
 struct iscsi_softc {
 	device_t			sc_dev;
 	struct sx			sc_lock;
 	struct cdev			*sc_cdev;
 	TAILQ_HEAD(, iscsi_session)	sc_sessions;
 	struct cv			sc_cv;
 	unsigned int			sc_last_session_id;
-	eventhandler_tag		sc_shutdown_eh;
+	eventhandler_tag		sc_shutdown_pre_eh;
+	eventhandler_tag		sc_shutdown_post_eh;
 };
 
 #endif /* !ISCSI_H */
Index: projects/clang380-import/sys/dev/puc/pucdata.c
===================================================================
--- projects/clang380-import/sys/dev/puc/pucdata.c	(revision 293686)
+++ projects/clang380-import/sys/dev/puc/pucdata.c	(revision 293687)
@@ -1,1764 +1,1862 @@
 /*-
  * Copyright (c) 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * PCI "universal" communications card driver configuration data (used to
  * match/attach the cards).
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/sysctl.h>
 
 #include <machine/resource.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 
+#include <dev/ic/ns16550.h>
+
+#include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <dev/puc/puc_bus.h>
 #include <dev/puc/puc_cfg.h>
 #include <dev/puc/puc_bfe.h>
 
+static puc_config_f puc_config_advantech;
 static puc_config_f puc_config_amc;
 static puc_config_f puc_config_diva;
 static puc_config_f puc_config_exar;
 static puc_config_f puc_config_exar_pcie;
 static puc_config_f puc_config_icbook;
 static puc_config_f puc_config_moxa;
 static puc_config_f puc_config_oxford_pci954;
 static puc_config_f puc_config_oxford_pcie;
 static puc_config_f puc_config_quatech;
 static puc_config_f puc_config_syba;
 static puc_config_f puc_config_siig;
 static puc_config_f puc_config_sunix;
 static puc_config_f puc_config_timedia;
 static puc_config_f puc_config_titan;
 
 const struct puc_cfg puc_pci_devices[] = {
 	{   0x0009, 0x7168, 0xffff, 0,
 	    "Sunix SUN1889",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x10, 0, 8,
 	},
 
 	{   0x103c, 0x1048, 0x103c, 0x1049,
 	    "HP Diva Serial [GSP] Multiport UART - Tosca Console",
 	    DEFAULT_RCLK,
 	    PUC_PORT_3S, 0x10, 0, -1,
 	    .config_function = puc_config_diva
 	},
 
 	{   0x103c, 0x1048, 0x103c, 0x104a,
 	    "HP Diva Serial [GSP] Multiport UART - Tosca Secondary",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 0, -1,
 	    .config_function = puc_config_diva
 	},
 
 	{   0x103c, 0x1048, 0x103c, 0x104b,
 	    "HP Diva Serial [GSP] Multiport UART - Maestro SP2",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 0, -1,
 	    .config_function = puc_config_diva
 	},
 
 	{   0x103c, 0x1048, 0x103c, 0x1223,
 	    "HP Diva Serial [GSP] Multiport UART - Superdome Console",
 	    DEFAULT_RCLK,
 	    PUC_PORT_3S, 0x10, 0, -1,
 	    .config_function = puc_config_diva
 	},
 
 	{   0x103c, 0x1048, 0x103c, 0x1226,
 	    "HP Diva Serial [GSP] Multiport UART - Keystone SP2",
 	    DEFAULT_RCLK,
 	    PUC_PORT_3S, 0x10, 0, -1,
 	    .config_function = puc_config_diva
 	},
 
 	{   0x103c, 0x1048, 0x103c, 0x1282,
 	    "HP Diva Serial [GSP] Multiport UART - Everest SP2",
 	    DEFAULT_RCLK,
 	    PUC_PORT_3S, 0x10, 0, -1,
 	    .config_function = puc_config_diva
 	},
 
 	{   0x10b5, 0x1076, 0x10b5, 0x1076,
 	    "VScom PCI-800",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x18, 0, 8,
 	},
 
 	{   0x10b5, 0x1077, 0x10b5, 0x1077,
 	    "VScom PCI-400",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x18, 0, 8,
 	},
 
 	{   0x10b5, 0x1103, 0x10b5, 0x1103,
 	    "VScom PCI-200",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x18, 4, 0,
 	},
 
 	/*
 	 * Boca Research Turbo Serial 658 (8 serial port) card.
 	 * Appears to be the same as Chase Research PLC PCI-FAST8
 	 * and Perle PCI-FAST8 Multi-Port serial cards.
 	 */
 	{   0x10b5, 0x9050, 0x12e0, 0x0021,
 	    "Boca Research Turbo Serial 658",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_8S, 0x18, 0, 8,
 	},
 
 	{   0x10b5, 0x9050, 0x12e0, 0x0031,
 	    "Boca Research Turbo Serial 654",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_4S, 0x18, 0, 8,
 	},
 
 	/*
 	 * Dolphin Peripherals 4035 (dual serial port) card.  PLX 9050, with
 	 * a seemingly-lame EEPROM setup that puts the Dolphin IDs
 	 * into the subsystem fields, and claims that it's a
 	 * network/misc (0x02/0x80) device.
 	 */
 	{   0x10b5, 0x9050, 0xd84d, 0x6808,
 	    "Dolphin Peripherals 4035",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x18, 4, 0,
 	},
 
 	/*
 	 * Dolphin Peripherals 4014 (dual parallel port) card.  PLX 9050, with
 	 * a seemingly-lame EEPROM setup that puts the Dolphin IDs
 	 * into the subsystem fields, and claims that it's a
 	 * network/misc (0x02/0x80) device.
 	 */
 	{   0x10b5, 0x9050, 0xd84d, 0x6810,
 	    "Dolphin Peripherals 4014",
 	    0,
 	    PUC_PORT_2P, 0x20, 4, 0,
 	},
 
 	{   0x10e8, 0x818e, 0xffff, 0,
 	    "Applied Micro Circuits 8 Port UART",
 	    DEFAULT_RCLK,
 	    PUC_PORT_8S, 0x14, -1, -1,
 	    .config_function = puc_config_amc
 	},
 
 	/*
 	 * The following members of the Digi International Neo series are
 	 * based on Exar PCI chips, f. e. the 8 port variants on XR17V258IV.
 	 * Accordingly, the PCIe versions of these cards incorporate a PLX
 	 * PCIe-PCI-bridge.
 	 */
 
 	{   0x114f, 0x00b0, 0xffff, 0,
 	    "Digi Neo PCI 4 Port",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, -1,
 	    .config_function = puc_config_exar
 	},
 
 	{   0x114f, 0x00b1, 0xffff, 0,
 	    "Digi Neo PCI 8 Port",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x10, 0, -1,
 	    .config_function = puc_config_exar
 	},
 
 	{   0x114f, 0x00f0, 0xffff, 0,
 	    "Digi Neo PCIe 8 Port",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x10, 0, -1,
 	    .config_function = puc_config_exar
 	},
 
 	{   0x114f, 0x00f1, 0xffff, 0,
 	    "Digi Neo PCIe 4 Port",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, -1,
 	    .config_function = puc_config_exar
 	},
 
 	{   0x114f, 0x00f2, 0xffff, 0,
 	    "Digi Neo PCIe 4 Port RJ45",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, -1,
 	    .config_function = puc_config_exar
 	},
 
 	{   0x114f, 0x00f3, 0xffff, 0,
 	    "Digi Neo PCIe 8 Port RJ45",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x10, 0, -1,
 	    .config_function = puc_config_exar
 	},
 
 	{   0x11fe, 0x8010, 0xffff, 0,
 	    "Comtrol RocketPort 550/8 RJ11 part A",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x11fe, 0x8011, 0xffff, 0,
 	    "Comtrol RocketPort 550/8 RJ11 part B",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x11fe, 0x8012, 0xffff, 0,
 	    "Comtrol RocketPort 550/8 Octa part A",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x11fe, 0x8013, 0xffff, 0,
 	    "Comtrol RocketPort 550/8 Octa part B",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x11fe, 0x8014, 0xffff, 0,
 	    "Comtrol RocketPort 550/4 RJ45",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x11fe, 0x8015, 0xffff, 0,
 	    "Comtrol RocketPort 550/Quad",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x11fe, 0x8016, 0xffff, 0,
 	    "Comtrol RocketPort 550/16 part A",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x11fe, 0x8017, 0xffff, 0,
 	    "Comtrol RocketPort 550/16 part B",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_12S, 0x10, 0, 8,
 	},
 
 	{   0x11fe, 0x8018, 0xffff, 0,
 	    "Comtrol RocketPort 550/8 part A",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x11fe, 0x8019, 0xffff, 0,
 	    "Comtrol RocketPort 550/8 part B",
 	    DEFAULT_RCLK * 4,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	/*
 	 * IBM SurePOS 300 Series (481033H) serial ports
 	 * Details can be found on the IBM RSS websites
 	 */
 
 	{   0x1014, 0x0297, 0xffff, 0,
 	    "IBM SurePOS 300 Series (481033H) serial ports",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 4, 0
 	},
 
 	/*
 	 * SIIG Boards.
 	 *
 	 * SIIG provides documentation for their boards at:
 	 * <URL:http://www.siig.com/downloads.asp>
 	 */
 
 	{   0x131f, 0x1010, 0xffff, 0,
 	    "SIIG Cyber I/O PCI 16C550 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S1P, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1011, 0xffff, 0,
 	    "SIIG Cyber I/O PCI 16C650 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S1P, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1012, 0xffff, 0,
 	    "SIIG Cyber I/O PCI 16C850 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S1P, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1021, 0xffff, 0,
 	    "SIIG Cyber Parallel Dual PCI (10x family)",
 	    0,
 	    PUC_PORT_2P, 0x18, 8, 0,
 	},
 
 	{   0x131f, 0x1030, 0xffff, 0,
 	    "SIIG Cyber Serial Dual PCI 16C550 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1031, 0xffff, 0,
 	    "SIIG Cyber Serial Dual PCI 16C650 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1032, 0xffff, 0,
 	    "SIIG Cyber Serial Dual PCI 16C850 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1034, 0xffff, 0,	/* XXX really? */
 	    "SIIG Cyber 2S1P PCI 16C550 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S1P, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1035, 0xffff, 0,	/* XXX really? */
 	    "SIIG Cyber 2S1P PCI 16C650 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S1P, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1036, 0xffff, 0,	/* XXX really? */
 	    "SIIG Cyber 2S1P PCI 16C850 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S1P, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1050, 0xffff, 0,
 	    "SIIG Cyber 4S PCI 16C550 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1051, 0xffff, 0,
 	    "SIIG Cyber 4S PCI 16C650 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x1052, 0xffff, 0,
 	    "SIIG Cyber 4S PCI 16C850 (10x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x18, 4, 0,
 	},
 
 	{   0x131f, 0x2010, 0xffff, 0,
 	    "SIIG Cyber I/O PCI 16C550 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S1P, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2011, 0xffff, 0,
 	    "SIIG Cyber I/O PCI 16C650 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S1P, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2012, 0xffff, 0,
 	    "SIIG Cyber I/O PCI 16C850 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S1P, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2021, 0xffff, 0,
 	    "SIIG Cyber Parallel Dual PCI (20x family)",
 	    0,
 	    PUC_PORT_2P, 0x10, 8, 0,
 	},
 
 	{   0x131f, 0x2030, 0xffff, 0,
 	    "SIIG Cyber Serial Dual PCI 16C550 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2031, 0xffff, 0,
 	    "SIIG Cyber Serial Dual PCI 16C650 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2032, 0xffff, 0,
 	    "SIIG Cyber Serial Dual PCI 16C850 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2040, 0xffff, 0,
 	    "SIIG Cyber 2P1S PCI 16C550 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S2P, 0x10, -1, 0,
 	    .config_function = puc_config_siig
 	},
 
 	{   0x131f, 0x2041, 0xffff, 0,
 	    "SIIG Cyber 2P1S PCI 16C650 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S2P, 0x10, -1, 0,
 	    .config_function = puc_config_siig
 	},
 
 	{   0x131f, 0x2042, 0xffff, 0,
 	    "SIIG Cyber 2P1S PCI 16C850 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S2P, 0x10, -1, 0,
 	    .config_function = puc_config_siig
 	},
 
 	{   0x131f, 0x2050, 0xffff, 0,
 	    "SIIG Cyber 4S PCI 16C550 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2051, 0xffff, 0,
 	    "SIIG Cyber 4S PCI 16C650 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2052, 0xffff, 0,
 	    "SIIG Cyber 4S PCI 16C850 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2060, 0xffff, 0,
 	    "SIIG Cyber 2S1P PCI 16C550 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S1P, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2061, 0xffff, 0,
 	    "SIIG Cyber 2S1P PCI 16C650 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S1P, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2062, 0xffff, 0,
 	    "SIIG Cyber 2S1P PCI 16C850 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S1P, 0x10, 4, 0,
 	},
 
 	{   0x131f, 0x2081, 0xffff, 0,
 	    "SIIG PS8000 8S PCI 16C650 (20x family)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_8S, 0x10, -1, -1,
 	    .config_function = puc_config_siig
 	},
 
 	{   0x135c, 0x0010, 0xffff, 0,
 	    "Quatech QSC-100",
 	    -3,	/* max 8x clock rate */
 	    PUC_PORT_4S, 0x14, 0, 8,
 	    .config_function = puc_config_quatech
 	},
 
 	{   0x135c, 0x0020, 0xffff, 0,
 	    "Quatech DSC-100",
 	    -1, /* max 2x clock rate */
 	    PUC_PORT_2S, 0x14, 0, 8,
 	    .config_function = puc_config_quatech
 	},
 
 	{   0x135c, 0x0030, 0xffff, 0,
 	    "Quatech DSC-200/300",
 	    -1, /* max 2x clock rate */
 	    PUC_PORT_2S, 0x14, 0, 8,
 	    .config_function = puc_config_quatech
 	},
 
 	{   0x135c, 0x0040, 0xffff, 0,
 	    "Quatech QSC-200/300",
 	    -3, /* max 8x clock rate */
 	    PUC_PORT_4S, 0x14, 0, 8,
 	    .config_function = puc_config_quatech
 	},
 
 	{   0x135c, 0x0050, 0xffff, 0,
 	    "Quatech ESC-100D",
 	    -3, /* max 8x clock rate */
 	    PUC_PORT_8S, 0x14, 0, 8,
 	    .config_function = puc_config_quatech
 	},
 
 	{   0x135c, 0x0060, 0xffff, 0,
 	    "Quatech ESC-100M",
 	    -3, /* max 8x clock rate */
 	    PUC_PORT_8S, 0x14, 0, 8,
 	    .config_function = puc_config_quatech
 	},
 
 	{   0x135c, 0x0170, 0xffff, 0,
 	    "Quatech QSCLP-100",
 	    -1, /* max 2x clock rate */
 	    PUC_PORT_4S, 0x18, 0, 8,
 	    .config_function = puc_config_quatech
 	},
 
 	{   0x135c, 0x0180, 0xffff, 0,
 	    "Quatech DSCLP-100",
 	    -1, /* max 3x clock rate */
 	    PUC_PORT_2S, 0x18, 0, 8,
 	    .config_function = puc_config_quatech
 	},
 
 	{   0x135c, 0x01b0, 0xffff, 0,
 	    "Quatech DSCLP-200/300",
 	    -1, /* max 2x clock rate */
 	    PUC_PORT_2S, 0x18, 0, 8,
 	    .config_function = puc_config_quatech
 	},
 
 	{   0x135c, 0x01e0, 0xffff, 0,
 	    "Quatech ESCLP-100",
 	    -3, /* max 8x clock rate */
 	    PUC_PORT_8S, 0x10, 0, 8,
 	    .config_function = puc_config_quatech
 	},
 
 	{   0x1393, 0x1024, 0xffff, 0,
 	    "Moxa Technologies, Smartio CP-102E/PCIe",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x14, 0, -1,
 	    .config_function = puc_config_moxa
 	},
 
 	{   0x1393, 0x1025, 0xffff, 0,
 	    "Moxa Technologies, Smartio CP-102EL/PCIe",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x14, 0, -1,
 	    .config_function = puc_config_moxa
 	},
 
 	{   0x1393, 0x1040, 0xffff, 0,
 	    "Moxa Technologies, Smartio C104H/PCI",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x18, 0, 8,
 	},
 
 	{   0x1393, 0x1041, 0xffff, 0,
 	    "Moxa Technologies, Smartio CP-104UL/PCI",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x18, 0, 8,
 	},
 
 	{   0x1393, 0x1042, 0xffff, 0,
 	    "Moxa Technologies, Smartio CP-104JU/PCI",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x18, 0, 8,
 	},
 
 	{   0x1393, 0x1043, 0xffff, 0,
 	    "Moxa Technologies, Smartio CP-104EL/PCIe",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x18, 0, 8,
 	},
 
 	{   0x1393, 0x1045, 0xffff, 0,
 	    "Moxa Technologies, Smartio CP-104EL-A/PCIe",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x14, 0, -1,
 	    .config_function = puc_config_moxa
 	},
 
 	{   0x1393, 0x1120, 0xffff, 0,
 	    "Moxa Technologies, CP-112UL",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x18, 0, 8,
 	},
 
 	{   0x1393, 0x1141, 0xffff, 0,
 	    "Moxa Technologies, Industio CP-114",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x18, 0, 8,
 	},
 
 	{   0x1393, 0x1144, 0xffff, 0,
 	    "Moxa Technologies, Smartio CP-114EL/PCIe",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x14, 0, -1,
 	    .config_function = puc_config_moxa
 	},
 
 	{   0x1393, 0x1182, 0xffff, 0,
 	    "Moxa Technologies, Smartio CP-118EL-A/PCIe",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x14, 0, -1,
 	    .config_function = puc_config_moxa
 	},
 
 	{   0x1393, 0x1680, 0xffff, 0,
 	    "Moxa Technologies, C168H/PCI",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x18, 0, 8,
 	},
 
 	{   0x1393, 0x1681, 0xffff, 0,
 	    "Moxa Technologies, C168U/PCI",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x18, 0, 8,
 	},
 
 	{   0x1393, 0x1682, 0xffff, 0,
 	    "Moxa Technologies, CP-168EL/PCIe",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x18, 0, 8,
 	},
 
 	{   0x1393, 0x1683, 0xffff, 0,
 	    "Moxa Technologies, Smartio CP-168EL-A/PCIe",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x14, 0, -1,
 	    .config_function = puc_config_moxa
 	},
 
 	{   0x13a8, 0x0152, 0xffff, 0,
 	    "Exar XR17C/D152",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x10, 0, -1,
 	    .config_function = puc_config_exar
 	},
 
 	{   0x13a8, 0x0154, 0xffff, 0,
 	    "Exar XR17C154",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, -1,
 	    .config_function = puc_config_exar
 	},
 
 	{   0x13a8, 0x0158, 0xffff, 0,
 	    "Exar XR17C158",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x10, 0, -1,
 	    .config_function = puc_config_exar
 	},
 
 	{   0x13a8, 0x0258, 0xffff, 0,
 	    "Exar XR17V258IV",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x10, 0, -1,
 	    .config_function = puc_config_exar
 	},
 
 	/* The XR17V358 uses the 125MHz PCIe clock as its reference clock. */
 	{   0x13a8, 0x0358, 0xffff, 0,
 	    "Exar XR17V358",
 	    125000000,
 	    PUC_PORT_8S, 0x10, 0, -1,
 	    .config_function = puc_config_exar_pcie
 	},
 
+	/*
+	 * The Advantech PCI-1602 Rev. A use the first two ports of an Oxford
+	 * Semiconductor OXuPCI954.  Note these boards have a hardware bug in
+	 * that they drive the RS-422/485 transmitters after power-on until a
+	 * driver initalizes the UARTs.
+	 */
 	{   0x13fe, 0x1600, 0x1602, 0x0002,
-	    "Advantech PCI-1602",
+	    "Advantech PCI-1602 Rev. A",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x10, 0, 8,
+	    .config_function = puc_config_advantech
 	},
 
+	/* Advantech PCI-1602 Rev. B1/PCI-1603 are also based on OXuPCI952. */
+	{   0x13fe, 0xa102, 0x13fe, 0xa102,
+	    "Advantech 2-port PCI (PCI-1602 Rev. B1/PCI-1603)",
+	    DEFAULT_RCLK * 8,
+	    PUC_PORT_2S, 0x10, 4, 0,
+	    .config_function = puc_config_advantech
+	},
+
 	{   0x1407, 0x0100, 0xffff, 0,
 	    "Lava Computers Dual Serial",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x1407, 0x0101, 0xffff, 0,
 	    "Lava Computers Quatro A",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x1407, 0x0102, 0xffff, 0,
 	    "Lava Computers Quatro B",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x1407, 0x0120, 0xffff, 0,
 	    "Lava Computers Quattro-PCI A",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x1407, 0x0121, 0xffff, 0,
 	    "Lava Computers Quattro-PCI B",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x1407, 0x0180, 0xffff, 0,
 	    "Lava Computers Octo A",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 4, 0,
 	},
 
 	{   0x1407, 0x0181, 0xffff, 0,
 	    "Lava Computers Octo B",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 4, 0,
 	},
 
 	{   0x1409, 0x7268, 0xffff, 0,
 	    "Sunix SUN1888",
 	    0,
 	    PUC_PORT_2P, 0x10, 0, 8,
 	},
 
 	{   0x1409, 0x7168, 0xffff, 0,
 	    NULL,
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_NONSTANDARD, 0x10, -1, -1,
 	    .config_function = puc_config_timedia
 	},
 
 	/*
 	 * Boards with an Oxford Semiconductor chip.
 	 *
 	 * Oxford Semiconductor provides documentation for their chip at:
 	 * <URL:http://www.plxtech.com/products/uart/>
 	 *
 	 * As sold by Kouwell <URL:http://www.kouwell.com/>.
 	 * I/O Flex PCI I/O Card Model-223 with 4 serial and 1 parallel ports.
 	 */
 	{
 	    0x1415, 0x9501, 0x10fc, 0xc070,
 	    "I-O DATA RSA-PCI2/R",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x10, 0, 8,
 	},
 
 	{   0x1415, 0x9501, 0x131f, 0x2050,
 	    "SIIG Cyber 4 PCI 16550",
 	    DEFAULT_RCLK * 10,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x1415, 0x9501, 0x131f, 0x2051,
 	    "SIIG Cyber 4S PCI 16C650 (20x family)",
 	    DEFAULT_RCLK * 10,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x1415, 0x9501, 0x131f, 0x2052,
 	    "SIIG Quartet Serial 850",
 	    DEFAULT_RCLK * 10,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x1415, 0x9501, 0x14db, 0x2150,
 	    "Kuroutoshikou SERIAL4P-LPPCI2",
 	    DEFAULT_RCLK * 10,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x1415, 0x9501, 0xffff, 0,
 	    "Oxford Semiconductor OX16PCI954 UARTs",
 	    0,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	    .config_function = puc_config_oxford_pci954
 	},
 
 	{   0x1415, 0x950a, 0x131f, 0x2030,
 	    "SIIG Cyber 2S PCIe",
 	    DEFAULT_RCLK * 10,
 	    PUC_PORT_2S, 0x10, 0, 8,
 	},
 
 	{   0x1415, 0x950a, 0x131f, 0x2032,
 	    "SIIG Cyber Serial Dual PCI 16C850",
 	    DEFAULT_RCLK * 10,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x1415, 0x950a, 0x131f, 0x2061,
 	    "SIIG Cyber 2SP1 PCIe",
 	    DEFAULT_RCLK * 10,
 	    PUC_PORT_2S, 0x10, 0, 8,
 	},
 
 	{   0x1415, 0x950a, 0xffff, 0,
 	    "Oxford Semiconductor OX16PCI954 UARTs",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x1415, 0x9511, 0xffff, 0,
 	    "Oxford Semiconductor OX9160/OX16PCI954 UARTs (function 1)",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x1415, 0x9521, 0xffff, 0,
 	    "Oxford Semiconductor OX16PCI952 UARTs",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x1415, 0x9538, 0xffff, 0,
 	    "Oxford Semiconductor OX16PCI958 UARTs",
 	    DEFAULT_RCLK,
 	    PUC_PORT_8S, 0x18, 0, 8,
 	},
 
 	/*
 	 * Perle boards use Oxford Semiconductor chips, but they store the
 	 * Oxford Semiconductor device ID as a subvendor device ID and use
 	 * their own device IDs.
 	 */
 
 	{   0x155f, 0x0331, 0xffff, 0,
 	    "Perle Ultraport4 Express",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x155f, 0xB012, 0xffff, 0,
 	    "Perle Speed2 LE",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x10, 0, 8,
 	},
 
 	{   0x155f, 0xB022, 0xffff, 0,
 	    "Perle Speed2 LE",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x10, 0, 8,
 	},
 
 	{   0x155f, 0xB004, 0xffff, 0,
 	    "Perle Speed4 LE",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x155f, 0xB008, 0xffff, 0,
 	    "Perle Speed8 LE",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x10, 0, 8,
 	},
 
 
 	/*
 	 * Oxford Semiconductor PCI Express Expresso family
 	 *
 	 * Found in many 'native' PCI Express serial boards such as:
 	 *
 	 * eMegatech MP954ER4 (4 port) and MP958ER8 (8 port)
 	 * <URL:http://www.emegatech.com.tw/pdrs232pcie.html>
 	 *
 	 * Lindy 51189 (4 port)
 	 * <URL:http://www.lindy.com> <URL:http://tinyurl.com/lindy-51189>
 	 *
 	 * StarTech.com PEX4S952 (4 port) and PEX8S952 (8 port)
 	 * <URL:http://www.startech.com>
 	 */
 
 	{   0x1415, 0xc11b, 0xffff, 0,
 	    "Oxford Semiconductor OXPCIe952 1S1P",
 	    DEFAULT_RCLK * 0x22,
 	    PUC_PORT_NONSTANDARD, 0x10, 0, -1,
 	    .config_function = puc_config_oxford_pcie
 	},
 
 	{   0x1415, 0xc138, 0xffff, 0,
 	    "Oxford Semiconductor OXPCIe952 UARTs",
 	    DEFAULT_RCLK * 0x22,
 	    PUC_PORT_NONSTANDARD, 0x10, 0, -1,
 	    .config_function = puc_config_oxford_pcie
 	},
 
 	{   0x1415, 0xc158, 0xffff, 0,
 	    "Oxford Semiconductor OXPCIe952 UARTs",
 	    DEFAULT_RCLK * 0x22,
 	    PUC_PORT_NONSTANDARD, 0x10, 0, -1,
 	    .config_function = puc_config_oxford_pcie
 	},
 
 	{   0x1415, 0xc15d, 0xffff, 0,
 	    "Oxford Semiconductor OXPCIe952 UARTs (function 1)",
 	    DEFAULT_RCLK * 0x22,
 	    PUC_PORT_NONSTANDARD, 0x10, 0, -1,
 	    .config_function = puc_config_oxford_pcie
 	},
 
 	{   0x1415, 0xc208, 0xffff, 0,
 	    "Oxford Semiconductor OXPCIe954 UARTs",
 	    DEFAULT_RCLK * 0x22,
 	    PUC_PORT_NONSTANDARD, 0x10, 0, -1,
 	    .config_function = puc_config_oxford_pcie
 	},
 
 	{   0x1415, 0xc20d, 0xffff, 0,
 	    "Oxford Semiconductor OXPCIe954 UARTs (function 1)",
 	    DEFAULT_RCLK * 0x22,
 	    PUC_PORT_NONSTANDARD, 0x10, 0, -1,
 	    .config_function = puc_config_oxford_pcie
 	},
 
 	{   0x1415, 0xc308, 0xffff, 0,
 	    "Oxford Semiconductor OXPCIe958 UARTs",
 	    DEFAULT_RCLK * 0x22,
 	    PUC_PORT_NONSTANDARD, 0x10, 0, -1,
 	    .config_function = puc_config_oxford_pcie
 	},
 
 	{   0x1415, 0xc30d, 0xffff, 0,
 	    "Oxford Semiconductor OXPCIe958 UARTs (function 1)",
 	    DEFAULT_RCLK * 0x22,
 	    PUC_PORT_NONSTANDARD, 0x10, 0, -1,
 	    .config_function = puc_config_oxford_pcie
 	},
 
 	{   0x14d2, 0x8010, 0xffff, 0,
 	    "VScom PCI-100L",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_1S, 0x14, 0, 0,
 	},
 
 	{   0x14d2, 0x8020, 0xffff, 0,
 	    "VScom PCI-200L",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x14, 4, 0,
 	},
 
 	{   0x14d2, 0x8028, 0xffff, 0,
 	    "VScom 200Li",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x20, 0, 8,
 	},
 
 	/*
 	 * VScom (Titan?) PCI-800L.  More modern variant of the
 	 * PCI-800.  Uses 6 discrete 16550 UARTs, plus another
 	 * two of them obviously implemented as macro cells in
 	 * the ASIC.  This causes the weird port access pattern
 	 * below, where two of the IO port ranges each access
 	 * one of the ASIC UARTs, and a block of IO addresses
 	 * access the external UARTs.
 	 */
 	{   0x14d2, 0x8080, 0xffff, 0,
 	    "Titan VScom PCI-800L",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, 0x14, -1, -1,
 	    .config_function = puc_config_titan
 	},
 
 	/*
 	 * VScom PCI-800H. Uses 8 16950 UART, behind a PCI chips that offers
 	 * 4 com port on PCI device 0 and 4 on PCI device 1. PCI device 0 has
 	 * device ID 3 and PCI device 1 device ID 4.
 	 */
 	{   0x14d2, 0xa003, 0xffff, 0,
 	    "Titan PCI-800H",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x14d2, 0xa004, 0xffff, 0,
 	    "Titan PCI-800H",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x14d2, 0xa005, 0xffff, 0,
 	    "Titan PCI-200H",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x10, 0, 8,
 	},
 
 	{   0x14d2, 0xe020, 0xffff, 0,
 	    "Titan VScom PCI-200HV2",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x14d2, 0xa007, 0xffff, 0,
 	    "Titan VScom PCIex-800H",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x14d2, 0xa008, 0xffff, 0,
 	    "Titan VScom PCIex-800H",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x14db, 0x2130, 0xffff, 0,
 	    "Avlab Technology, PCI IO 2S",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x14db, 0x2150, 0xffff, 0,
 	    "Avlab Low Profile PCI 4 Serial",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 4, 0,
 	},
 
 	{   0x14db, 0x2152, 0xffff, 0,
 	    "Avlab Low Profile PCI 4 Serial",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 4, 0,
 	},
 
 	{   0x1592, 0x0781, 0xffff, 0,
 	    "Syba Tech Ltd. PCI-4S2P-550-ECP",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S1P, 0x10, 0, -1,
 	    .config_function = puc_config_syba
 	},
 
 	{   0x1fd4, 0x1999, 0x1fd4, 0x0002,
 	    "Sunix SER5xxxx 2-port serial",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S, 0x10, 0, 8,
 	},
 
 	{   0x1fd4, 0x1999, 0x1fd4, 0x0004,
 	    "Sunix SER5xxxx 4-port serial",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	},
 
 	{   0x1fd4, 0x1999, 0x1fd4, 0x0008,
 	    "Sunix SER5xxxx 8-port serial",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_8S, -1, -1, -1,
 	    .config_function = puc_config_sunix
 	},
 
 	{   0x1fd4, 0x1999, 0x1fd4, 0x0101,
 	    "Sunix MIO5xxxx 1-port serial and 1284 Printer port",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_1S1P, -1, -1, -1,
 	    .config_function = puc_config_sunix
 	},
 
 	{   0x1fd4, 0x1999, 0x1fd4, 0x0102,
 	    "Sunix MIO5xxxx 2-port serial and 1284 Printer port",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_2S1P, -1, -1, -1,
 	    .config_function = puc_config_sunix
 	},
 
 	{   0x1fd4, 0x1999, 0x1fd4, 0x0104,
 	    "Sunix MIO5xxxx 4-port serial and 1284 Printer port",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_4S1P, -1, -1, -1,
 	    .config_function = puc_config_sunix
 	},
 
 	{   0x5372, 0x6872, 0xffff, 0,
 	    "Feasso PCI FPP-02 2S1P",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S1P, 0x10, 4, 0,
 	},
 
 	{   0x5372, 0x6873, 0xffff, 0,
 	    "Sun 1040 PCI Quad Serial",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 4, 0,
 	},
 
 	{   0x6666, 0x0001, 0xffff, 0,
 	    "Decision Computer Inc, PCCOM 4-port serial",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x1c, 0, 8,
 	},
 
 	{   0x6666, 0x0002, 0xffff, 0,
 	    "Decision Computer Inc, PCCOM 8-port serial",
 	    DEFAULT_RCLK,
 	    PUC_PORT_8S, 0x1c, 0, 8,
 	},
 
 	{   0x6666, 0x0004, 0xffff, 0,
 	    "PCCOM dual port RS232/422/485",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x1c, 0, 8,
 	},
 
 	{   0x9710, 0x9815, 0xffff, 0,
 	    "NetMos NM9815 Dual 1284 Printer port",
 	    0,
 	    PUC_PORT_2P, 0x10, 8, 0,
 	},
 
 	/*
 	 * This is more specific than the generic NM9835 entry, and is placed
 	 * here to _prevent_ puc(4) from claiming this single port card.
 	 *
 	 * uart(4) will claim this device.
 	 */
 	{   0x9710, 0x9835, 0x1000, 1,
 	    "NetMos NM9835 based 1-port serial",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S, 0x10, 4, 0,
 	},
 
 	{   0x9710, 0x9835, 0x1000, 2,
 	    "NetMos NM9835 based 2-port serial",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x9710, 0x9835, 0xffff, 0,
 	    "NetMos NM9835 Dual UART and 1284 Printer port",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S1P, 0x10, 4, 0,
 	},
 
 	{   0x9710, 0x9845, 0x1000, 0x0006,
 	    "NetMos NM9845 6 Port UART",
 	    DEFAULT_RCLK,
 	    PUC_PORT_6S, 0x10, 4, 0,
 	},
 
 	{   0x9710, 0x9845, 0xffff, 0,
 	    "NetMos NM9845 Quad UART and 1284 Printer port",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S1P, 0x10, 4, 0,
 	},
 
 	{   0x9710, 0x9865, 0xa000, 0x3002,
 	    "NetMos NM9865 Dual UART",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 4, 0,
 	},
 
 	{   0x9710, 0x9865, 0xa000, 0x3003,
 	    "NetMos NM9865 Triple UART",
 	    DEFAULT_RCLK,
 	    PUC_PORT_3S, 0x10, 4, 0,
 	},
 
 	{   0x9710, 0x9865, 0xa000, 0x3004,
 	    "NetMos NM9865 Quad UART",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 4, 0,
 	},
 
 	{   0x9710, 0x9865, 0xa000, 0x3011,
 	    "NetMos NM9865 Single UART and 1284 Printer port",
 	    DEFAULT_RCLK,
 	    PUC_PORT_1S1P, 0x10, 4, 0,
 	},
 
 	{   0x9710, 0x9865, 0xa000, 0x3012,
 	    "NetMos NM9865 Dual UART and 1284 Printer port",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S1P, 0x10, 4, 0,
 	},
 
 	{   0x9710, 0x9865, 0xa000, 0x3020,
 	    "NetMos NM9865 Dual 1284 Printer port",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2P, 0x10, 4, 0,
 	},
 
 	{   0xb00c, 0x021c, 0xffff, 0,
 	    "IC Book Labs Gunboat x4 Lite",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	    .config_function = puc_config_icbook
 	},
 
 	{   0xb00c, 0x031c, 0xffff, 0,
 	    "IC Book Labs Gunboat x4 Pro",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	    .config_function = puc_config_icbook
 	},
 
 	{   0xb00c, 0x041c, 0xffff, 0,
 	    "IC Book Labs Ironclad x8 Lite",
 	    DEFAULT_RCLK,
 	    PUC_PORT_8S, 0x10, 0, 8,
 	    .config_function = puc_config_icbook
 	},
 
 	{   0xb00c, 0x051c, 0xffff, 0,
 	    "IC Book Labs Ironclad x8 Pro",
 	    DEFAULT_RCLK,
 	    PUC_PORT_8S, 0x10, 0, 8,
 	    .config_function = puc_config_icbook
 	},
 
 	{   0xb00c, 0x081c, 0xffff, 0,
 	    "IC Book Labs Dreadnought x16 Pro",
 	    DEFAULT_RCLK * 8,
 	    PUC_PORT_16S, 0x10, 0, 8,
 	    .config_function = puc_config_icbook
 	},
 
 	{   0xb00c, 0x091c, 0xffff, 0,
 	    "IC Book Labs Dreadnought x16 Lite",
 	    DEFAULT_RCLK,
 	    PUC_PORT_16S, 0x10, 0, 8,
 	    .config_function = puc_config_icbook
 	},
 
 	{   0xb00c, 0x0a1c, 0xffff, 0,
 	    "IC Book Labs Gunboat x2 Low Profile",
 	    DEFAULT_RCLK,
 	    PUC_PORT_2S, 0x10, 0, 8,
 	},
 
 	{   0xb00c, 0x0b1c, 0xffff, 0,
 	    "IC Book Labs Gunboat x4 Low Profile",
 	    DEFAULT_RCLK,
 	    PUC_PORT_4S, 0x10, 0, 8,
 	    .config_function = puc_config_icbook
 	},
 
 	{ 0xffff, 0, 0xffff, 0, NULL, 0 }
 };
 
 static int
+puc_config_advantech(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
+    intptr_t *res __unused)
+{
+	const struct puc_cfg *cfg;
+	struct resource *cres;
+	struct puc_bar *bar;
+	device_t cdev, dev;
+	bus_size_t off;
+	int base, crtype, fixed, high, i, oxpcie;
+	uint8_t acr, func, mask;
+
+	if (cmd != PUC_CFG_SETUP)
+		return (ENXIO);
+
+	base = fixed = oxpcie = 0;
+	crtype = SYS_RES_IOPORT;
+	acr = mask = 0x0;
+	func = high = 1;
+	off = 0x60;
+
+	cfg = sc->sc_cfg;
+	switch (cfg->subvendor) {
+	case 0x13fe:
+		switch (cfg->device) {
+		case 0xa102:
+			high = 0;
+			break;
+		default:
+			break;
+		}
+	default:
+		break;
+	}
+	if (fixed == 1)
+		goto setup;
+
+	dev = sc->sc_dev;
+	cdev = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev),
+	    pci_get_slot(dev), func);
+	if (cdev == NULL) {
+		device_printf(dev, "could not find config function\n");
+		return (ENXIO);
+	}
+
+	i = PCIR_BAR(0);
+	cres = bus_alloc_resource_any(cdev, crtype, &i, RF_ACTIVE);
+	if (cres == NULL) {
+		device_printf(dev, "could not allocate config resource\n");
+		return (ENXIO);
+	}
+
+	if (oxpcie == 0) {
+		mask = bus_read_1(cres, off);
+		if (pci_get_function(dev) == 1)
+			base = 4;
+	}
+
+ setup:
+	for (i = 0; i < sc->sc_nports; ++i) {
+		device_printf(dev, "port %d: ", i);
+		bar = puc_get_bar(sc, cfg->rid + i * cfg->d_rid);
+		if (bar == NULL) {
+			printf("could not get BAR\n");
+			continue;
+		}
+
+		if (fixed == 0) {
+			if ((mask & (1 << (base + i))) == 0) {
+				acr = 0;
+				printf("RS-232\n");
+			} else {
+				acr = (high == 1 ? 0x18 : 0x10);
+				printf("RS-422/RS-485, active-%s auto-DTR\n",
+				    high == 1 ? "high" : "low");
+			}
+		}
+
+		bus_write_1(bar->b_res, REG_SPR, REG_ACR);
+		bus_write_1(bar->b_res, REG_ICR, acr);
+	}
+
+	bus_release_resource(cdev, crtype, rman_get_rid(cres), cres);
+	return (0);
+}
+
+static int
 puc_config_amc(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd, int port,
     intptr_t *res)
 {
 
 	switch (cmd) {
 	case PUC_CFG_GET_OFS:
 		*res = 8 * (port & 1);
 		return (0);
 	case PUC_CFG_GET_RID:
 		*res = 0x14 + (port >> 1) * 4;
 		return (0);
 	default:
 		break;
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_diva(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
     intptr_t *res)
 {
 	const struct puc_cfg *cfg = sc->sc_cfg;
 
 	if (cmd == PUC_CFG_GET_OFS) {
 		if (cfg->subdevice == 0x1282)		/* Everest SP */
 			port <<= 1;
 		else if (cfg->subdevice == 0x104b)	/* Maestro SP2 */
 			port = (port == 3) ? 4 : port;
 		*res = port * 8 + ((port > 2) ? 0x18 : 0);
 		return (0);
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_exar(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd,
     int port, intptr_t *res)
 {
 
 	if (cmd == PUC_CFG_GET_OFS) {
 		*res = port * 0x200;
 		return (0);
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_exar_pcie(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd,
     int port, intptr_t *res)
 {
 
 	if (cmd == PUC_CFG_GET_OFS) {
 		*res = port * 0x400;
 		return (0);
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_icbook(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd,
     int port __unused, intptr_t *res)
 {
 
 	if (cmd == PUC_CFG_GET_ILR) {
 		*res = PUC_ILR_DIGI;
 		return (0);
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_moxa(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
     intptr_t *res)
 {
 	const struct puc_cfg *cfg = sc->sc_cfg;
 
 	if (cmd == PUC_CFG_GET_OFS) {
 		if (port == 3 && (cfg->device == 0x1045 ||
 		    cfg->device == 0x1144))
 			port = 7;
 		*res = port * 0x200;
 
 		return 0;
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_quatech(struct puc_softc *sc, enum puc_cfg_cmd cmd,
     int port __unused, intptr_t *res)
 {
 	const struct puc_cfg *cfg = sc->sc_cfg;
 	struct puc_bar *bar;
 	uint8_t v0, v1;
 
 	switch (cmd) {
 	case PUC_CFG_SETUP:
 		/*
 		 * Check if the scratchpad register is enabled or if the
 		 * interrupt status and options registers are active.
 		 */
 		bar = puc_get_bar(sc, cfg->rid);
 		if (bar == NULL)
 			return (ENXIO);
-		/* Set DLAB in the LCR register of UART 0. */
-		bus_write_1(bar->b_res, 3, 0x80);
-		/* Write 0 to the SPR register of UART 0. */
-		bus_write_1(bar->b_res, 7, 0);
-		/* Read back the contents of the SPR register of UART 0. */
-		v0 = bus_read_1(bar->b_res, 7);
-		/* Write a specific value to the SPR register of UART 0. */
-		bus_write_1(bar->b_res, 7, 0x80 + -cfg->clock);
-		/* Read back the contents of the SPR register of UART 0. */
-		v1 = bus_read_1(bar->b_res, 7);
-		/* Clear DLAB in the LCR register of UART 0. */
-		bus_write_1(bar->b_res, 3, 0);
-		/* Save the two values read-back from the SPR register. */
+		bus_write_1(bar->b_res, REG_LCR, LCR_DLAB);
+		bus_write_1(bar->b_res, REG_SPR, 0);
+		v0 = bus_read_1(bar->b_res, REG_SPR);
+		bus_write_1(bar->b_res, REG_SPR, 0x80 + -cfg->clock);
+		v1 = bus_read_1(bar->b_res, REG_SPR);
+		bus_write_1(bar->b_res, REG_LCR, 0);
 		sc->sc_cfg_data = (v0 << 8) | v1;
 		if (v0 == 0 && v1 == 0x80 + -cfg->clock) {
 			/*
 			 * The SPR register echoed the two values written
-			 * by us. This means that the SPAD jumper is set.
+			 * by us.  This means that the SPAD jumper is set.
 			 */
 			device_printf(sc->sc_dev, "warning: extra features "
 			    "not usable -- SPAD compatibility enabled\n");
 			return (0);
 		}
 		if (v0 != 0) {
 			/*
-			 * The first value doesn't match. This can only mean
+			 * The first value doesn't match.  This can only mean
 			 * that the SPAD jumper is not set and that a non-
 			 * standard fixed clock multiplier jumper is set.
 			 */
 			if (bootverbose)
 				device_printf(sc->sc_dev, "fixed clock rate "
 				    "multiplier of %d\n", 1 << v0);
 			if (v0 < -cfg->clock)
 				device_printf(sc->sc_dev, "warning: "
 				    "suboptimal fixed clock rate multiplier "
 				    "setting\n");
 			return (0);
 		}
 		/*
-		 * The first value matched, but the second didn't. We know
-		 * that the SPAD jumper is not set. We also know that the
+		 * The first value matched, but the second didn't.  We know
+		 * that the SPAD jumper is not set.  We also know that the
 		 * clock rate multiplier is software controlled *and* that
 		 * we just programmed it to the maximum allowed.
 		 */
 		if (bootverbose)
 			device_printf(sc->sc_dev, "clock rate multiplier of "
 			    "%d selected\n", 1 << -cfg->clock);
 		return (0);
 	case PUC_CFG_GET_CLOCK:
 		v0 = (sc->sc_cfg_data >> 8) & 0xff;
 		v1 = sc->sc_cfg_data & 0xff;
 		if (v0 == 0 && v1 == 0x80 + -cfg->clock) {
 			/*
 			 * XXX With the SPAD jumper applied, there's no
 			 * easy way of knowing if there's also a clock
-			 * rate multiplier jumper installed. Let's hope
-			 * not...
+			 * rate multiplier jumper installed.  Let's hope
+			 * not ...
 			 */
 			*res = DEFAULT_RCLK;
 		} else if (v0 == 0) {
 			/*
 			 * No clock rate multiplier jumper installed,
 			 * so we programmed the board with the maximum
 			 * multiplier allowed as given to us in the
 			 * clock field of the config record (negated).
 			 */
 			*res = DEFAULT_RCLK << -cfg->clock;
 		} else
 			*res = DEFAULT_RCLK << v0;
 		return (0);
 	case PUC_CFG_GET_ILR:
 		v0 = (sc->sc_cfg_data >> 8) & 0xff;
 		v1 = sc->sc_cfg_data & 0xff;
 		*res = (v0 == 0 && v1 == 0x80 + -cfg->clock) ?
 		    PUC_ILR_NONE : PUC_ILR_QUATECH;
 		return (0);
 	default:
 		break;
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_syba(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
     intptr_t *res)
 {
 	static int base[] = { 0x251, 0x3f0, 0 };
 	const struct puc_cfg *cfg = sc->sc_cfg;
 	struct puc_bar *bar;
 	int efir, idx, ofs;
 	uint8_t v;
 
 	switch (cmd) {
 	case PUC_CFG_SETUP:
 		bar = puc_get_bar(sc, cfg->rid);
 		if (bar == NULL)
 			return (ENXIO);
 
 		/* configure both W83877TFs */
 		bus_write_1(bar->b_res, 0x250, 0x89);
 		bus_write_1(bar->b_res, 0x3f0, 0x87);
 		bus_write_1(bar->b_res, 0x3f0, 0x87);
 		idx = 0;
 		while (base[idx] != 0) {
 			efir = base[idx];
 			bus_write_1(bar->b_res, efir, 0x09);
 			v = bus_read_1(bar->b_res, efir + 1);
 			if ((v & 0x0f) != 0x0c)
 				return (ENXIO);
 			bus_write_1(bar->b_res, efir, 0x16);
 			v = bus_read_1(bar->b_res, efir + 1);
 			bus_write_1(bar->b_res, efir, 0x16);
 			bus_write_1(bar->b_res, efir + 1, v | 0x04);
 			bus_write_1(bar->b_res, efir, 0x16);
 			bus_write_1(bar->b_res, efir + 1, v & ~0x04);
 			ofs = base[idx] & 0x300;
 			bus_write_1(bar->b_res, efir, 0x23);
 			bus_write_1(bar->b_res, efir + 1, (ofs + 0x78) >> 2);
 			bus_write_1(bar->b_res, efir, 0x24);
 			bus_write_1(bar->b_res, efir + 1, (ofs + 0xf8) >> 2);
 			bus_write_1(bar->b_res, efir, 0x25);
 			bus_write_1(bar->b_res, efir + 1, (ofs + 0xe8) >> 2);
 			bus_write_1(bar->b_res, efir, 0x17);
 			bus_write_1(bar->b_res, efir + 1, 0x03);
 			bus_write_1(bar->b_res, efir, 0x28);
 			bus_write_1(bar->b_res, efir + 1, 0x43);
 			idx++;
 		}
 		bus_write_1(bar->b_res, 0x250, 0xaa);
 		bus_write_1(bar->b_res, 0x3f0, 0xaa);
 		return (0);
 	case PUC_CFG_GET_OFS:
 		switch (port) {
 		case 0:
 			*res = 0x2f8;
 			return (0);
 		case 1:
 			*res = 0x2e8;
 			return (0);
 		case 2:
 			*res = 0x3f8;
 			return (0);
 		case 3:
 			*res = 0x3e8;
 			return (0);
 		case 4:
 			*res = 0x278;
 			return (0);
 		}
 		break;
 	default:
 		break;
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_siig(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
     intptr_t *res)
 {
 	const struct puc_cfg *cfg = sc->sc_cfg;
 
 	switch (cmd) {
 	case PUC_CFG_GET_OFS:
 		if (cfg->ports == PUC_PORT_8S) {
 			*res = (port > 4) ? 8 * (port - 4) : 0;
 			return (0);
 		}
 		break;
 	case PUC_CFG_GET_RID:
 		if (cfg->ports == PUC_PORT_8S) {
 			*res = 0x10 + ((port > 4) ? 0x10 : 4 * port);
 			return (0);
 		}
 		if (cfg->ports == PUC_PORT_2S1P) {
 			switch (port) {
 			case 0: *res = 0x10; return (0);
 			case 1: *res = 0x14; return (0);
 			case 2: *res = 0x1c; return (0);
 			}
 		}
 		break;
 	default:
 		break;
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_timedia(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
     intptr_t *res)
 {
 	static const uint16_t dual[] = {
 	    0x0002, 0x4036, 0x4037, 0x4038, 0x4078, 0x4079, 0x4085,
 	    0x4088, 0x4089, 0x5037, 0x5078, 0x5079, 0x5085, 0x6079,
 	    0x7079, 0x8079, 0x8137, 0x8138, 0x8237, 0x8238, 0x9079,
 	    0x9137, 0x9138, 0x9237, 0x9238, 0xA079, 0xB079, 0xC079,
 	    0xD079, 0
 	};
 	static const uint16_t quad[] = {
 	    0x4055, 0x4056, 0x4095, 0x4096, 0x5056, 0x8156, 0x8157,
 	    0x8256, 0x8257, 0x9056, 0x9156, 0x9157, 0x9158, 0x9159,
 	    0x9256, 0x9257, 0xA056, 0xA157, 0xA158, 0xA159, 0xB056,
 	    0xB157, 0
 	};
 	static const uint16_t octa[] = {
 	    0x4065, 0x4066, 0x5065, 0x5066, 0x8166, 0x9066, 0x9166,
 	    0x9167, 0x9168, 0xA066, 0xA167, 0xA168, 0
 	};
 	static const struct {
 		int ports;
 		const uint16_t *ids;
 	} subdevs[] = {
 	    { 2, dual },
 	    { 4, quad },
 	    { 8, octa },
 	    { 0, NULL }
 	};
 	static char desc[64];
 	int dev, id;
 	uint16_t subdev;
 
 	switch (cmd) {
 	case PUC_CFG_GET_CLOCK:
 		if (port < 2)
 			*res = DEFAULT_RCLK * 8;
 		else
 			*res = DEFAULT_RCLK;
 		return (0);
 	case PUC_CFG_GET_DESC:
 		snprintf(desc, sizeof(desc),
 		    "Timedia technology %d Port Serial", (int)sc->sc_cfg_data);
 		*res = (intptr_t)desc;
 		return (0);
 	case PUC_CFG_GET_NPORTS:
 		subdev = pci_get_subdevice(sc->sc_dev);
 		dev = 0;
 		while (subdevs[dev].ports != 0) {
 			id = 0;
 			while (subdevs[dev].ids[id] != 0) {
 				if (subdev == subdevs[dev].ids[id]) {
 					sc->sc_cfg_data = subdevs[dev].ports;
 					*res = sc->sc_cfg_data;
 					return (0);
 				}
 				id++;
 			}
 			dev++;
 		}
 		return (ENXIO);
 	case PUC_CFG_GET_OFS:
 		*res = (port == 1 || port == 3) ? 8 : 0;
 		return (0);
 	case PUC_CFG_GET_RID:
 		*res = 0x10 + ((port > 3) ? port - 2 : port >> 1) * 4;
 		return (0);
 	case PUC_CFG_GET_TYPE:
 		*res = PUC_TYPE_SERIAL;
 		return (0);
 	default:
 		break;
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_oxford_pci954(struct puc_softc *sc, enum puc_cfg_cmd cmd,
     int port __unused, intptr_t *res)
 {
 
 	switch (cmd) {
 	case PUC_CFG_GET_CLOCK:
 		/*
 		 * OXu16PCI954 use a 14.7456 MHz clock by default while
 		 * OX16PCI954 and OXm16PCI954 employ a 1.8432 MHz one.
 		 */
 		if (pci_get_revid(sc->sc_dev) == 1)
 			*res = DEFAULT_RCLK * 8;
 		else
 			*res = DEFAULT_RCLK;
 		return (0);
 	default:
 		break;
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_oxford_pcie(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
     intptr_t *res)
 {
 	const struct puc_cfg *cfg = sc->sc_cfg;
 	int idx;
 	struct puc_bar *bar;
 	uint8_t value;
 
 	switch (cmd) {
 	case PUC_CFG_SETUP:
 		device_printf(sc->sc_dev, "%d UARTs detected\n",
 			sc->sc_nports);
 
 		/* Set UARTs to enhanced mode */
 		bar = puc_get_bar(sc, cfg->rid);
 		if (bar == NULL)
 			return (ENXIO);
 		for (idx = 0; idx < sc->sc_nports; idx++) {
 			value = bus_read_1(bar->b_res, 0x1000 + (idx << 9) +
 			    0x92);
 			bus_write_1(bar->b_res, 0x1000 + (idx << 9) + 0x92,
 			    value | 0x10);
 		}
 		return (0);
 	case PUC_CFG_GET_LEN:
 		*res = 0x200;
 		return (0);
 	case PUC_CFG_GET_NPORTS:
 		/*
 		 * Check if we are being called from puc_bfe_attach()
-		 * or puc_bfe_probe(). If puc_bfe_probe(), we cannot
-		 * puc_get_bar(), so we return a value of 16. This has cosmetic
-		 * side-effects at worst; in PUC_CFG_GET_DESC,
-		 * (int)sc->sc_cfg_data will not contain the true number of
-		 * ports in PUC_CFG_GET_DESC, but we are not implementing that
-		 * call for this device family anyway.
+		 * or puc_bfe_probe().  If puc_bfe_probe(), we cannot
+		 * puc_get_bar(), so we return a value of 16.  This has
+		 * cosmetic side-effects at worst; in PUC_CFG_GET_DESC,
+		 * sc->sc_cfg_data will not contain the true number of
+		 * ports in PUC_CFG_GET_DESC, but we are not implementing
+		 * that call for this device family anyway.
 		 *
-		 * The check is for initialisation of sc->sc_bar[idx], which is
-		 * only done in puc_bfe_attach().
+		 * The check is for initialization of sc->sc_bar[idx],
+		 * which is only done in puc_bfe_attach().
 		 */
 		idx = 0;
 		do {
 			if (sc->sc_bar[idx++].b_rid != -1) {
 				sc->sc_cfg_data = 16;
 				*res = sc->sc_cfg_data;
 				return (0);
 			}
 		} while (idx < PUC_PCI_BARS);
 
 		bar = puc_get_bar(sc, cfg->rid);
 		if (bar == NULL)
 			return (ENXIO);
 
 		value = bus_read_1(bar->b_res, 0x04);
 		if (value == 0)
 			return (ENXIO);
 
 		sc->sc_cfg_data = value;
 		*res = sc->sc_cfg_data;
 		return (0);
 	case PUC_CFG_GET_OFS:
 		*res = 0x1000 + (port << 9);
 		return (0);
 	case PUC_CFG_GET_TYPE:
 		*res = PUC_TYPE_SERIAL;
 		return (0);
 	default:
 		break;
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_sunix(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
     intptr_t *res)
 {
 	int error;
 
 	switch (cmd) {
 	case PUC_CFG_GET_OFS:
 		error = puc_config(sc, PUC_CFG_GET_TYPE, port, res);
 		if (error != 0)
 			return (error);
 		*res = (*res == PUC_TYPE_SERIAL) ? (port & 3) * 8 : 0;
 		return (0);
 	case PUC_CFG_GET_RID:
 		error = puc_config(sc, PUC_CFG_GET_TYPE, port, res);
 		if (error != 0)
 			return (error);
 		*res = (*res == PUC_TYPE_SERIAL && port <= 3) ? 0x10 : 0x14;
 		return (0);
 	default:
 		break;
 	}
 	return (ENXIO);
 }
 
 static int
 puc_config_titan(struct puc_softc *sc __unused, enum puc_cfg_cmd cmd,
     int port, intptr_t *res)
 {
 
 	switch (cmd) {
 	case PUC_CFG_GET_OFS:
 		*res = (port < 3) ? 0 : (port - 2) << 3;
 		return (0);
 	case PUC_CFG_GET_RID:
 		*res = 0x14 + ((port >= 2) ? 0x0c : port << 2);
 		return (0);
 	default:
 		break;
 	}
 	return (ENXIO);
 }
Index: projects/clang380-import/sys/dev/rtwn/if_rtwn.c
===================================================================
--- projects/clang380-import/sys/dev/rtwn/if_rtwn.c	(revision 293686)
+++ projects/clang380-import/sys/dev/rtwn/if_rtwn.c	(revision 293687)
@@ -1,3490 +1,3490 @@
 /*	$OpenBSD: if_rtwn.c,v 1.6 2015/08/28 00:03:53 deraadt Exp $	*/
 
 /*-
  * Copyright (c) 2010 Damien Bergamini <damien.bergamini@free.fr>
  * Copyright (c) 2015 Stefan Sperling <stsp@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *
  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Driver for Realtek RTL8188CE
  */
 
 #include <sys/param.h>
 #include <sys/sysctl.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/firmware.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_radiotap.h>
 #include <net80211/ieee80211_regdomain.h>
 #include <net80211/ieee80211_ratectl.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 
 #include <dev/rtwn/if_rtwnreg.h>
 
 #define	RTWN_DEBUG
 #ifdef RTWN_DEBUG
 #define	DPRINTF(x)	do { if (sc->sc_debug > 0) printf x; } while (0)
 #define	DPRINTFN(n, x)	do { if (sc->sc_debug >= (n)) printf x; } while (0)
 #else
 #define	DPRINTF(x)
 #define	DPRINTFN(n, x)
 #endif
 
 /*
  * PCI configuration space registers.
  */
 #define	RTWN_PCI_IOBA		0x10	/* i/o mapped base */
 #define	RTWN_PCI_MMBA		0x18	/* memory mapped base */
 
 #define RTWN_INT_ENABLE	(R92C_IMR_ROK | R92C_IMR_VODOK | R92C_IMR_VIDOK | \
 			R92C_IMR_BEDOK | R92C_IMR_BKDOK | R92C_IMR_MGNTDOK | \
 			R92C_IMR_HIGHDOK | R92C_IMR_BDOK | R92C_IMR_RDU | \
 			R92C_IMR_RXFOVW)
 
 struct rtwn_ident {
 	uint16_t	vendor;
 	uint16_t	device;
 	const char	*name;
 };
 
 
 static const struct rtwn_ident rtwn_ident_table[] = {
 	{ 0x10ec, 0x8176, "Realtek RTL8188CE" },
 	{ 0, 0, NULL }
 };
 
 
 static void	rtwn_dma_map_addr(void *, bus_dma_segment_t *, int, int);
 static void	rtwn_setup_rx_desc(struct rtwn_softc *, struct r92c_rx_desc *,
 		    bus_addr_t, size_t, int);
 static int	rtwn_alloc_rx_list(struct rtwn_softc *);
 static void	rtwn_reset_rx_list(struct rtwn_softc *);
 static void	rtwn_free_rx_list(struct rtwn_softc *);
 static int	rtwn_alloc_tx_list(struct rtwn_softc *, int);
 static void	rtwn_reset_tx_list(struct rtwn_softc *, int);
 static void	rtwn_free_tx_list(struct rtwn_softc *, int);
 static struct ieee80211vap *rtwn_vap_create(struct ieee80211com *,
 		    const char [IFNAMSIZ], int, enum ieee80211_opmode, int,
 		    const uint8_t [IEEE80211_ADDR_LEN],
 		    const uint8_t [IEEE80211_ADDR_LEN]);
 static void	rtwn_vap_delete(struct ieee80211vap *);
 static void	rtwn_write_1(struct rtwn_softc *, uint16_t, uint8_t);
 static void	rtwn_write_2(struct rtwn_softc *, uint16_t, uint16_t);
 static void	rtwn_write_4(struct rtwn_softc *, uint16_t, uint32_t);
 static uint8_t	rtwn_read_1(struct rtwn_softc *, uint16_t);
 static uint16_t	rtwn_read_2(struct rtwn_softc *, uint16_t);
 static uint32_t	rtwn_read_4(struct rtwn_softc *, uint16_t);
 static int	rtwn_fw_cmd(struct rtwn_softc *, uint8_t, const void *, int);
 static void	rtwn_rf_write(struct rtwn_softc *, int, uint8_t, uint32_t);
 static uint32_t	rtwn_rf_read(struct rtwn_softc *, int, uint8_t);
 static int	rtwn_llt_write(struct rtwn_softc *, uint32_t, uint32_t);
 static uint8_t	rtwn_efuse_read_1(struct rtwn_softc *, uint16_t);
 static void	rtwn_efuse_read(struct rtwn_softc *);
 static int	rtwn_read_chipid(struct rtwn_softc *);
 static void	rtwn_read_rom(struct rtwn_softc *);
 static int	rtwn_ra_init(struct rtwn_softc *);
 static void	rtwn_tsf_sync_enable(struct rtwn_softc *);
 static void	rtwn_set_led(struct rtwn_softc *, int, int);
 static void	rtwn_calib_to(void *);
 static int	rtwn_newstate(struct ieee80211vap *, enum ieee80211_state, int);
 static int	rtwn_updateedca(struct ieee80211com *);
 static void	rtwn_update_avgrssi(struct rtwn_softc *, int, int8_t);
 static int8_t	rtwn_get_rssi(struct rtwn_softc *, int, void *);
 static void	rtwn_rx_frame(struct rtwn_softc *, struct r92c_rx_desc *,
 		    struct rtwn_rx_data *, int);
 static int	rtwn_tx(struct rtwn_softc *, struct mbuf *,
 		    struct ieee80211_node *);
 static void	rtwn_tx_done(struct rtwn_softc *, int);
 static int	rtwn_raw_xmit(struct ieee80211_node *, struct mbuf *,
 		    const struct ieee80211_bpf_params *);
 static int	rtwn_transmit(struct ieee80211com *, struct mbuf *);
 static void	rtwn_parent(struct ieee80211com *);
 static void	rtwn_start(struct rtwn_softc *sc);
 static void	rtwn_watchdog(void *);
 static int	rtwn_power_on(struct rtwn_softc *);
 static int	rtwn_llt_init(struct rtwn_softc *);
 static void	rtwn_fw_reset(struct rtwn_softc *);
 static void	rtwn_fw_loadpage(struct rtwn_softc *, int, const uint8_t *,
 		    int);
 static int	rtwn_load_firmware(struct rtwn_softc *);
 static int	rtwn_dma_init(struct rtwn_softc *);
 static void	rtwn_mac_init(struct rtwn_softc *);
 static void	rtwn_bb_init(struct rtwn_softc *);
 static void	rtwn_rf_init(struct rtwn_softc *);
 static void	rtwn_cam_init(struct rtwn_softc *);
 static void	rtwn_pa_bias_init(struct rtwn_softc *);
 static void	rtwn_rxfilter_init(struct rtwn_softc *);
 static void	rtwn_edca_init(struct rtwn_softc *);
 static void	rtwn_write_txpower(struct rtwn_softc *, int, uint16_t[]);
 static void	rtwn_get_txpower(struct rtwn_softc *, int,
 		    struct ieee80211_channel *, struct ieee80211_channel *,
 		    uint16_t[]);
 static void	rtwn_set_txpower(struct rtwn_softc *,
 		    struct ieee80211_channel *, struct ieee80211_channel *);
 static void	rtwn_scan_start(struct ieee80211com *);
 static void	rtwn_scan_end(struct ieee80211com *);
 static void	rtwn_set_channel(struct ieee80211com *);
 static void	rtwn_update_mcast(struct ieee80211com *);
 static void	rtwn_set_chan(struct rtwn_softc *,
 		    struct ieee80211_channel *, struct ieee80211_channel *);
 static int	rtwn_iq_calib_chain(struct rtwn_softc *, int, uint16_t[2],
 		    uint16_t[2]);
 static void	rtwn_iq_calib_run(struct rtwn_softc *, int, uint16_t[2][2],
 		    uint16_t[2][2]);
 static int	rtwn_iq_calib_compare_results(uint16_t[2][2], uint16_t[2][2],
 		    uint16_t[2][2], uint16_t[2][2], int);
 static void	rtwn_iq_calib_write_results(struct rtwn_softc *, uint16_t[2],
 		    uint16_t[2], int);
 static void	rtwn_iq_calib(struct rtwn_softc *);
 static void	rtwn_lc_calib(struct rtwn_softc *);
 static void	rtwn_temp_calib(struct rtwn_softc *);
 static void	rtwn_init_locked(struct rtwn_softc *);
 static void	rtwn_init(struct rtwn_softc *);
 static void	rtwn_stop_locked(struct rtwn_softc *);
 static void	rtwn_stop(struct rtwn_softc *);
 static void	rtwn_intr(void *);
 static void	rtwn_hw_reset(void *, int);
 
 /* Aliases. */
 #define	rtwn_bb_write	rtwn_write_4
 #define rtwn_bb_read	rtwn_read_4
 
 static int	rtwn_probe(device_t);
 static int	rtwn_attach(device_t);
 static int	rtwn_detach(device_t);
 static int	rtwn_shutdown(device_t);
 static int	rtwn_suspend(device_t);
 static int	rtwn_resume(device_t);
 
 static device_method_t rtwn_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		rtwn_probe),
 	DEVMETHOD(device_attach,	rtwn_attach),
 	DEVMETHOD(device_detach,	rtwn_detach),
 	DEVMETHOD(device_shutdown,	rtwn_shutdown),
 	DEVMETHOD(device_suspend,	rtwn_suspend),
 	DEVMETHOD(device_resume,	rtwn_resume),
 
 	DEVMETHOD_END
 };
 
 static driver_t rtwn_driver = {
 	"rtwn",
 	rtwn_methods,
 	sizeof (struct rtwn_softc)
 };
 static devclass_t rtwn_devclass;
 
 DRIVER_MODULE(rtwn, pci, rtwn_driver, rtwn_devclass, NULL, NULL);
 
 MODULE_VERSION(rtwn, 1);
 
 MODULE_DEPEND(rtwn, pci,  1, 1, 1);
 MODULE_DEPEND(rtwn, wlan, 1, 1, 1);
 MODULE_DEPEND(rtwn, firmware, 1, 1, 1);
 
 static int
 rtwn_probe(device_t dev)
 {
 	const struct rtwn_ident *ident;
 
 	for (ident = rtwn_ident_table; ident->name != NULL; ident++) {
 		if (pci_get_vendor(dev) == ident->vendor &&
 		    pci_get_device(dev) == ident->device) {
 			device_set_desc(dev, ident->name);
 			return (BUS_PROBE_DEFAULT);
 		}
 	}
 	return (ENXIO);
 }
 
 static int
 rtwn_attach(device_t dev)
 {
 	struct rtwn_softc *sc = device_get_softc(dev);
 	struct ieee80211com *ic = &sc->sc_ic;
 	uint32_t lcsr;
 	uint8_t bands[howmany(IEEE80211_MODE_MAX, 8)];
 	int i, count, error, rid;
 
 	sc->sc_dev = dev;
 	sc->sc_debug = 0;
 
 	/*
 	 * Get the offset of the PCI Express Capability Structure in PCI
 	 * Configuration Space.
 	 */
 	error = pci_find_cap(dev, PCIY_EXPRESS, &sc->sc_cap_off);
 	if (error != 0) {
 		device_printf(dev, "PCIe capability structure not found!\n");
 		return (error);
 	}
 
 	/* Enable bus-mastering. */
 	pci_enable_busmaster(dev);
 
 	rid = PCIR_BAR(2);
 	sc->mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 	if (sc->mem == NULL) {
 		device_printf(dev, "can't map mem space\n");
 		return (ENOMEM);
 	}
 	sc->sc_st = rman_get_bustag(sc->mem);
 	sc->sc_sh = rman_get_bushandle(sc->mem);
 
 	/* Install interrupt handler. */
 	count = 1;
 	rid = 0;
 	if (pci_alloc_msi(dev, &count) == 0)
 		rid = 1;
 	sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE |
 	    (rid != 0 ? 0 : RF_SHAREABLE));
 	if (sc->irq == NULL) {
 		device_printf(dev, "can't map interrupt\n");
 		return (ENXIO);
 	}
 
 	RTWN_LOCK_INIT(sc);
 	callout_init_mtx(&sc->calib_to, &sc->sc_mtx, 0);
 	callout_init_mtx(&sc->watchdog_to, &sc->sc_mtx, 0);
 	TASK_INIT(&sc->sc_reinit_task, 0, rtwn_hw_reset, sc);
 	mbufq_init(&sc->sc_snd, ifqmaxlen);
 
 	error = rtwn_read_chipid(sc);
 	if (error != 0) {
 		device_printf(dev, "unsupported test chip\n");
 		goto fail;
 	}
 
 	/* Disable PCIe Active State Power Management (ASPM). */
 	lcsr = pci_read_config(sc->sc_dev, sc->sc_cap_off + PCIER_LINK_CTL, 4);
 	lcsr &= ~PCIEM_LINK_CTL_ASPMC;
 	pci_write_config(sc->sc_dev, sc->sc_cap_off + PCIER_LINK_CTL, lcsr, 4);
 
 	/* Allocate Tx/Rx buffers. */
 	error = rtwn_alloc_rx_list(sc);
 	if (error != 0) {
 		device_printf(dev, "could not allocate Rx buffers\n");
 		goto fail;
 	}
 	for (i = 0; i < RTWN_NTXQUEUES; i++) {
 		error = rtwn_alloc_tx_list(sc, i);
 		if (error != 0) {
 			device_printf(dev, "could not allocate Tx buffers\n");
 			goto fail;
 		}
 	}
 
 	/* Determine number of Tx/Rx chains. */
 	if (sc->chip & RTWN_CHIP_92C) {
 		sc->ntxchains = (sc->chip & RTWN_CHIP_92C_1T2R) ? 1 : 2;
 		sc->nrxchains = 2;
 	} else {
 		sc->ntxchains = 1;
 		sc->nrxchains = 1;
 	}
 	rtwn_read_rom(sc);
 
 	device_printf(sc->sc_dev, "MAC/BB RTL%s, RF 6052 %dT%dR\n",
 	    (sc->chip & RTWN_CHIP_92C) ? "8192CE" : "8188CE",
 	    sc->ntxchains, sc->nrxchains);
 
 	ic->ic_softc = sc;
 	ic->ic_name = device_get_nameunit(dev);
 	ic->ic_opmode = IEEE80211_M_STA;
 	ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */
 
 	/* set device capabilities */
 	ic->ic_caps =
 		  IEEE80211_C_STA		/* station mode */
 		| IEEE80211_C_MONITOR		/* monitor mode */
 		| IEEE80211_C_SHPREAMBLE	/* short preamble supported */
 		| IEEE80211_C_SHSLOT		/* short slot time supported */
 		| IEEE80211_C_WPA		/* capable of WPA1+WPA2 */
 		| IEEE80211_C_BGSCAN		/* capable of bg scanning */
 		| IEEE80211_C_WME		/* 802.11e */
 		;
 
 	memset(bands, 0, sizeof(bands));
 	setbit(bands, IEEE80211_MODE_11B);
 	setbit(bands, IEEE80211_MODE_11G);
 	ieee80211_init_channels(ic, NULL, bands);
 
 	ieee80211_ifattach(ic);
 
 	ic->ic_wme.wme_update = rtwn_updateedca;
 	ic->ic_update_mcast = rtwn_update_mcast;
 	ic->ic_scan_start =rtwn_scan_start;
 	ic->ic_scan_end = rtwn_scan_end;
 	ic->ic_set_channel = rtwn_set_channel;
 	ic->ic_raw_xmit = rtwn_raw_xmit;
 	ic->ic_transmit = rtwn_transmit;
 	ic->ic_parent = rtwn_parent;
 	ic->ic_vap_create = rtwn_vap_create;
 	ic->ic_vap_delete = rtwn_vap_delete;
 
 	ieee80211_radiotap_attach(ic,
 	    &sc->sc_txtap.wt_ihdr, sizeof(sc->sc_txtap),
 		RTWN_TX_RADIOTAP_PRESENT,
 	    &sc->sc_rxtap.wr_ihdr, sizeof(sc->sc_rxtap),
 		RTWN_RX_RADIOTAP_PRESENT);
 
 	/*
 	 * Hook our interrupt after all initialization is complete.
 	 */
 	error = bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE,
 	    NULL, rtwn_intr, sc, &sc->sc_ih);
 	if (error != 0) {
 		device_printf(dev, "can't establish interrupt, error %d\n",
 		    error);
 		goto fail;
 	}
 
 	if (bootverbose)
 		ieee80211_announce(ic);
 
 	return (0);
 
 fail:
 	rtwn_detach(dev);
 	return (error);
 }
 	
 
 static int
 rtwn_detach(device_t dev)
 {
 	struct rtwn_softc *sc = device_get_softc(dev);
 	int i;
 
 	if (sc->sc_ic.ic_softc != NULL) {
 		ieee80211_draintask(&sc->sc_ic, &sc->sc_reinit_task);
 		rtwn_stop(sc);
 
 		callout_drain(&sc->calib_to);
 		callout_drain(&sc->watchdog_to);
 		ieee80211_ifdetach(&sc->sc_ic);
 		mbufq_drain(&sc->sc_snd);
 	}
 
 	/* Uninstall interrupt handler. */
 	if (sc->irq != NULL) {
 		bus_teardown_intr(dev, sc->irq, sc->sc_ih);
 		bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq),
 		    sc->irq);
 		pci_release_msi(dev);
 	}
 
 	/* Free Tx/Rx buffers. */
 	for (i = 0; i < RTWN_NTXQUEUES; i++)
 		rtwn_free_tx_list(sc, i);
 	rtwn_free_rx_list(sc);
 
 	if (sc->mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    rman_get_rid(sc->mem), sc->mem);
 
 	RTWN_LOCK_DESTROY(sc);
 	return (0);
 }
 
 static int
 rtwn_shutdown(device_t dev)
 {
 
 	return (0);
 }
 
 static int
 rtwn_suspend(device_t dev)
 {
 	return (0);
 }
 
 static int
 rtwn_resume(device_t dev)
 {
 
 	return (0);
 }
 
 static void
 rtwn_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (error != 0)
 		return;
 	KASSERT(nsegs == 1, ("too many DMA segments, %d should be 1", nsegs));
 	*(bus_addr_t *)arg = segs[0].ds_addr;
 }
 
 static void
 rtwn_setup_rx_desc(struct rtwn_softc *sc, struct r92c_rx_desc *desc,
     bus_addr_t addr, size_t len, int idx)
 {
 
 	memset(desc, 0, sizeof(*desc));
 	desc->rxdw0 = htole32(SM(R92C_RXDW0_PKTLEN, len) |
 		((idx == RTWN_RX_LIST_COUNT - 1) ? R92C_RXDW0_EOR : 0));
 	desc->rxbufaddr = htole32(addr);
 	bus_space_barrier(sc->sc_st, sc->sc_sh, 0, sc->sc_mapsize,
 	    BUS_SPACE_BARRIER_WRITE);
 	desc->rxdw0 |= htole32(R92C_RXDW0_OWN);
 }
 
 static int
 rtwn_alloc_rx_list(struct rtwn_softc *sc)
 {
 	struct rtwn_rx_ring *rx_ring = &sc->rx_ring;
 	struct rtwn_rx_data *rx_data;
 	bus_size_t size;
 	int i, error;
 
 	/* Allocate Rx descriptors. */
 	size = sizeof(struct r92c_rx_desc) * RTWN_RX_LIST_COUNT;
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    size, 1, size, 0, NULL, NULL, &rx_ring->desc_dmat);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not create rx desc DMA tag\n");
 		goto fail;
 	}
 
 	error = bus_dmamem_alloc(rx_ring->desc_dmat, (void **)&rx_ring->desc,
 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT,
 	    &rx_ring->desc_map);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not allocate rx desc\n");
 		goto fail;
 	}
 	error = bus_dmamap_load(rx_ring->desc_dmat, rx_ring->desc_map,
 	    rx_ring->desc, size, rtwn_dma_map_addr, &rx_ring->paddr, 0);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not load rx desc DMA map\n");
 		goto fail;
 	}
 	bus_dmamap_sync(rx_ring->desc_dmat, rx_ring->desc_map,
 	    BUS_DMASYNC_PREWRITE);
 
 	/* Create RX buffer DMA tag. */
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES,
 	    1, MCLBYTES, 0, NULL, NULL, &rx_ring->data_dmat);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not create rx buf DMA tag\n");
 		goto fail;
 	}
 
 	/* Allocate Rx buffers. */
 	for (i = 0; i < RTWN_RX_LIST_COUNT; i++) {
 		rx_data = &rx_ring->rx_data[i];
 		error = bus_dmamap_create(rx_ring->data_dmat, 0, &rx_data->map);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "could not create rx buf DMA map\n");
 			goto fail;
 		}
 
 		rx_data->m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (rx_data->m == NULL) {
 			device_printf(sc->sc_dev,
 			    "could not allocate rx mbuf\n");
 			error = ENOMEM;
 			goto fail;
 		}
 
 		error = bus_dmamap_load(rx_ring->data_dmat, rx_data->map,
 		    mtod(rx_data->m, void *), MCLBYTES, rtwn_dma_map_addr,
 		    &rx_data->paddr, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "could not load rx buf DMA map");
 			goto fail;
 		}
 
 		rtwn_setup_rx_desc(sc, &rx_ring->desc[i], rx_data->paddr,
 		    MCLBYTES, i);
 	}
 	return (0);
 
 fail:
 	rtwn_free_rx_list(sc);
 	return (error);
 }
 
 static void
 rtwn_reset_rx_list(struct rtwn_softc *sc)
 {
 	struct rtwn_rx_ring *rx_ring = &sc->rx_ring;
 	struct rtwn_rx_data *rx_data;
 	int i;
 
 	for (i = 0; i < RTWN_RX_LIST_COUNT; i++) {
 		rx_data = &rx_ring->rx_data[i];
 		rtwn_setup_rx_desc(sc, &rx_ring->desc[i], rx_data->paddr,
 		    MCLBYTES, i);
 	}
 }
 
 static void
 rtwn_free_rx_list(struct rtwn_softc *sc)
 {
 	struct rtwn_rx_ring *rx_ring = &sc->rx_ring;
 	struct rtwn_rx_data *rx_data;
 	int i;
 
 	if (rx_ring->desc_dmat != NULL) {
 		if (rx_ring->desc != NULL) {
 			bus_dmamap_unload(rx_ring->desc_dmat,
 			    rx_ring->desc_map);
 			bus_dmamem_free(rx_ring->desc_dmat, rx_ring->desc,
 			    rx_ring->desc_map);
 			rx_ring->desc = NULL;
 		}
 		bus_dma_tag_destroy(rx_ring->desc_dmat);
 		rx_ring->desc_dmat = NULL;
 	}
 
 	for (i = 0; i < RTWN_RX_LIST_COUNT; i++) {
 		rx_data = &rx_ring->rx_data[i];
 
 		if (rx_data->m != NULL) {
 			bus_dmamap_unload(rx_ring->data_dmat, rx_data->map);
 			m_freem(rx_data->m);
 			rx_data->m = NULL;
 		}
 		bus_dmamap_destroy(rx_ring->data_dmat, rx_data->map);
 		rx_data->map = NULL;
 	}
 	if (rx_ring->data_dmat != NULL) {
 		bus_dma_tag_destroy(rx_ring->data_dmat);
 		rx_ring->data_dmat = NULL;
 	}
 }
 
 static int
 rtwn_alloc_tx_list(struct rtwn_softc *sc, int qid)
 {
 	struct rtwn_tx_ring *tx_ring = &sc->tx_ring[qid];
 	struct rtwn_tx_data *tx_data;
 	bus_size_t size;
 	int i, error;
 
 	size = sizeof(struct r92c_tx_desc) * RTWN_TX_LIST_COUNT;
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), PAGE_SIZE, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
 	    size, 1, size, 0, NULL, NULL, &tx_ring->desc_dmat);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not create tx ring DMA tag\n");
 		goto fail;
 	}
 
 	error = bus_dmamem_alloc(tx_ring->desc_dmat, (void **)&tx_ring->desc,
 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO, &tx_ring->desc_map);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "can't map tx ring DMA memory\n");
 		goto fail;
 	}
 	error = bus_dmamap_load(tx_ring->desc_dmat, tx_ring->desc_map,
 	    tx_ring->desc, size, rtwn_dma_map_addr, &tx_ring->paddr,
 	    BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not load desc DMA map\n");
 		goto fail;
 	}
 
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0,
 	    BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES,
 	    1, MCLBYTES, 0, NULL, NULL, &tx_ring->data_dmat);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not create tx buf DMA tag\n");
 		goto fail;
 	}
 
 	for (i = 0; i < RTWN_TX_LIST_COUNT; i++) {
 		struct r92c_tx_desc *desc = &tx_ring->desc[i];
 
 		/* setup tx desc */
 		desc->nextdescaddr = htole32(tx_ring->paddr +
 		    + sizeof(struct r92c_tx_desc)
 		    * ((i + 1) % RTWN_TX_LIST_COUNT));
 		tx_data = &tx_ring->tx_data[i];
 		error = bus_dmamap_create(tx_ring->data_dmat, 0, &tx_data->map);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "could not create tx buf DMA map\n");
 			goto fail;
 		}
 		tx_data->m = NULL;
 		tx_data->ni = NULL;
 	}
 	return (0);
 
 fail:
 	rtwn_free_tx_list(sc, qid);
 	return (error);
 }
 
 static void
 rtwn_reset_tx_list(struct rtwn_softc *sc, int qid)
 {
 	struct rtwn_tx_ring *tx_ring = &sc->tx_ring[qid];
 	int i;
 
 	for (i = 0; i < RTWN_TX_LIST_COUNT; i++) {
 		struct r92c_tx_desc *desc = &tx_ring->desc[i];
 		struct rtwn_tx_data *tx_data = &tx_ring->tx_data[i];
 
 		memset(desc, 0, sizeof(*desc) -
 		    (sizeof(desc->reserved) + sizeof(desc->nextdescaddr64) +
 		    sizeof(desc->nextdescaddr)));
 
 		if (tx_data->m != NULL) {
 			bus_dmamap_unload(tx_ring->data_dmat, tx_data->map);
 			m_freem(tx_data->m);
 			tx_data->m = NULL;
 		}
 		if (tx_data->ni != NULL) {
 			ieee80211_free_node(tx_data->ni);
 			tx_data->ni = NULL;
 		}
 	}
 
 	bus_dmamap_sync(tx_ring->desc_dmat, tx_ring->desc_map,
 	    BUS_DMASYNC_POSTWRITE);
 
 	sc->qfullmsk &= ~(1 << qid);
 	tx_ring->queued = 0;
 	tx_ring->cur = 0;
 }
 
 static void
 rtwn_free_tx_list(struct rtwn_softc *sc, int qid)
 {
 	struct rtwn_tx_ring *tx_ring = &sc->tx_ring[qid];
 	struct rtwn_tx_data *tx_data;
 	int i;
 
 	if (tx_ring->desc_dmat != NULL) {
 		if (tx_ring->desc != NULL) {
 			bus_dmamap_unload(tx_ring->desc_dmat,
 			    tx_ring->desc_map);
 			bus_dmamem_free(tx_ring->desc_dmat, tx_ring->desc,
 			    tx_ring->desc_map);
 		}
 		bus_dma_tag_destroy(tx_ring->desc_dmat);
 	}
 
 	for (i = 0; i < RTWN_TX_LIST_COUNT; i++) {
 		tx_data = &tx_ring->tx_data[i];
 
 		if (tx_data->m != NULL) {
 			bus_dmamap_unload(tx_ring->data_dmat, tx_data->map);
 			m_freem(tx_data->m);
 			tx_data->m = NULL;
 		}
 	}
 	if (tx_ring->data_dmat != NULL) {
 		bus_dma_tag_destroy(tx_ring->data_dmat);
 		tx_ring->data_dmat = NULL;
 	}
 
 	sc->qfullmsk &= ~(1 << qid);
 	tx_ring->queued = 0;
 	tx_ring->cur = 0;
 }
 
 
 static struct ieee80211vap *
 rtwn_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit,
     enum ieee80211_opmode opmode, int flags,
     const uint8_t bssid[IEEE80211_ADDR_LEN],
     const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct rtwn_vap *rvp;
 	struct ieee80211vap *vap;
 
 	if (!TAILQ_EMPTY(&ic->ic_vaps))
 		return (NULL);
 
 	rvp = malloc(sizeof(struct rtwn_vap), M_80211_VAP, M_WAITOK | M_ZERO);
 	vap = &rvp->vap;
 	if (ieee80211_vap_setup(ic, vap, name, unit, opmode,
 	    flags | IEEE80211_CLONE_NOBEACONS, bssid) != 0) {
 		/* out of memory */
 		 free(rvp, M_80211_VAP);
 		 return (NULL);
 	}
 
 	/* Override state transition machine. */
 	rvp->newstate = vap->iv_newstate;
 	vap->iv_newstate = rtwn_newstate;
 
 	/* Complete setup. */
 	ieee80211_vap_attach(vap, ieee80211_media_change,
 	    ieee80211_media_status, mac);
 	ic->ic_opmode = opmode;
 	return (vap);
 }
 
 static void
 rtwn_vap_delete(struct ieee80211vap *vap)
 {
 	struct rtwn_vap *rvp = RTWN_VAP(vap);
 
 	ieee80211_vap_detach(vap);
 	free(rvp, M_80211_VAP);
 }
 
 static void
 rtwn_write_1(struct rtwn_softc *sc, uint16_t addr, uint8_t val)
 {
 
 	bus_space_write_1(sc->sc_st, sc->sc_sh, addr, val);
 }
 
 static void
 rtwn_write_2(struct rtwn_softc *sc, uint16_t addr, uint16_t val)
 {
 
 	val = htole16(val);
 	bus_space_write_2(sc->sc_st, sc->sc_sh, addr, val);
 }
 
 static void
 rtwn_write_4(struct rtwn_softc *sc, uint16_t addr, uint32_t val)
 {
 
 	val = htole32(val);
 	bus_space_write_4(sc->sc_st, sc->sc_sh, addr, val);
 }
 
 static uint8_t
 rtwn_read_1(struct rtwn_softc *sc, uint16_t addr)
 {
 
 	return (bus_space_read_1(sc->sc_st, sc->sc_sh, addr));
 }
 
 static uint16_t
 rtwn_read_2(struct rtwn_softc *sc, uint16_t addr)
 {
 
 	return (bus_space_read_2(sc->sc_st, sc->sc_sh, addr));
 }
 
 static uint32_t
 rtwn_read_4(struct rtwn_softc *sc, uint16_t addr)
 {
 
 	return (bus_space_read_4(sc->sc_st, sc->sc_sh, addr));
 }
 
 static int
 rtwn_fw_cmd(struct rtwn_softc *sc, uint8_t id, const void *buf, int len)
 {
 	struct r92c_fw_cmd cmd;
 	int ntries;
 
 	/* Wait for current FW box to be empty. */
 	for (ntries = 0; ntries < 100; ntries++) {
 		if (!(rtwn_read_1(sc, R92C_HMETFR) & (1 << sc->fwcur)))
 			break;
 		DELAY(1);
 	}
 	if (ntries == 100) {
 		device_printf(sc->sc_dev,
 		    "could not send firmware command %d\n", id);
 		return (ETIMEDOUT);
 	}
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.id = id;
 	if (len > 3)
 		cmd.id |= R92C_CMD_FLAG_EXT;
 	KASSERT(len <= sizeof(cmd.msg), ("rtwn_fw_cmd\n"));
 	memcpy(cmd.msg, buf, len);
 
 	/* Write the first word last since that will trigger the FW. */
 	rtwn_write_2(sc, R92C_HMEBOX_EXT(sc->fwcur), *((uint8_t *)&cmd + 4));
 	rtwn_write_4(sc, R92C_HMEBOX(sc->fwcur), *((uint8_t *)&cmd + 0));
 
 	sc->fwcur = (sc->fwcur + 1) % R92C_H2C_NBOX;
 
 	/* Give firmware some time for processing. */
 	DELAY(2000);
 
 	return (0);
 }
 
 static void
 rtwn_rf_write(struct rtwn_softc *sc, int chain, uint8_t addr, uint32_t val)
 {
 	rtwn_bb_write(sc, R92C_LSSI_PARAM(chain),
 	    SM(R92C_LSSI_PARAM_ADDR, addr) |
 	    SM(R92C_LSSI_PARAM_DATA, val));
 }
 
 static uint32_t
 rtwn_rf_read(struct rtwn_softc *sc, int chain, uint8_t addr)
 {
 	uint32_t reg[R92C_MAX_CHAINS], val;
 
 	reg[0] = rtwn_bb_read(sc, R92C_HSSI_PARAM2(0));
 	if (chain != 0)
 		reg[chain] = rtwn_bb_read(sc, R92C_HSSI_PARAM2(chain));
 
 	rtwn_bb_write(sc, R92C_HSSI_PARAM2(0),
 	    reg[0] & ~R92C_HSSI_PARAM2_READ_EDGE);
 	DELAY(1000);
 
 	rtwn_bb_write(sc, R92C_HSSI_PARAM2(chain),
 	    RW(reg[chain], R92C_HSSI_PARAM2_READ_ADDR, addr) |
 	    R92C_HSSI_PARAM2_READ_EDGE);
 	DELAY(1000);
 
 	rtwn_bb_write(sc, R92C_HSSI_PARAM2(0),
 	    reg[0] | R92C_HSSI_PARAM2_READ_EDGE);
 	DELAY(1000);
 
 	if (rtwn_bb_read(sc, R92C_HSSI_PARAM1(chain)) & R92C_HSSI_PARAM1_PI)
 		val = rtwn_bb_read(sc, R92C_HSPI_READBACK(chain));
 	else
 		val = rtwn_bb_read(sc, R92C_LSSI_READBACK(chain));
 	return (MS(val, R92C_LSSI_READBACK_DATA));
 }
 
 static int
 rtwn_llt_write(struct rtwn_softc *sc, uint32_t addr, uint32_t data)
 {
 	int ntries;
 
 	rtwn_write_4(sc, R92C_LLT_INIT,
 	    SM(R92C_LLT_INIT_OP, R92C_LLT_INIT_OP_WRITE) |
 	    SM(R92C_LLT_INIT_ADDR, addr) |
 	    SM(R92C_LLT_INIT_DATA, data));
 	/* Wait for write operation to complete. */
 	for (ntries = 0; ntries < 20; ntries++) {
 		if (MS(rtwn_read_4(sc, R92C_LLT_INIT), R92C_LLT_INIT_OP) ==
 		    R92C_LLT_INIT_OP_NO_ACTIVE)
 			return (0);
 		DELAY(5);
 	}
 	return (ETIMEDOUT);
 }
 
 static uint8_t
 rtwn_efuse_read_1(struct rtwn_softc *sc, uint16_t addr)
 {
 	uint32_t reg;
 	int ntries;
 
 	reg = rtwn_read_4(sc, R92C_EFUSE_CTRL);
 	reg = RW(reg, R92C_EFUSE_CTRL_ADDR, addr);
 	reg &= ~R92C_EFUSE_CTRL_VALID;
 	rtwn_write_4(sc, R92C_EFUSE_CTRL, reg);
 	/* Wait for read operation to complete. */
 	for (ntries = 0; ntries < 100; ntries++) {
 		reg = rtwn_read_4(sc, R92C_EFUSE_CTRL);
 		if (reg & R92C_EFUSE_CTRL_VALID)
 			return (MS(reg, R92C_EFUSE_CTRL_DATA));
 		DELAY(5);
 	}
 	device_printf(sc->sc_dev,
 	    "could not read efuse byte at address 0x%x\n", addr);
 	return (0xff);
 }
 
 static void
 rtwn_efuse_read(struct rtwn_softc *sc)
 {
 	uint8_t *rom = (uint8_t *)&sc->rom;
 	uint16_t addr = 0;
 	uint32_t reg;
 	uint8_t off, msk;
 	int i;
 
 	reg = rtwn_read_2(sc, R92C_SYS_ISO_CTRL);
 	if (!(reg & R92C_SYS_ISO_CTRL_PWC_EV12V)) {
 		rtwn_write_2(sc, R92C_SYS_ISO_CTRL,
 		    reg | R92C_SYS_ISO_CTRL_PWC_EV12V);
 	}
 	reg = rtwn_read_2(sc, R92C_SYS_FUNC_EN);
 	if (!(reg & R92C_SYS_FUNC_EN_ELDR)) {
 		rtwn_write_2(sc, R92C_SYS_FUNC_EN,
 		    reg | R92C_SYS_FUNC_EN_ELDR);
 	}
 	reg = rtwn_read_2(sc, R92C_SYS_CLKR);
 	if ((reg & (R92C_SYS_CLKR_LOADER_EN | R92C_SYS_CLKR_ANA8M)) !=
 	    (R92C_SYS_CLKR_LOADER_EN | R92C_SYS_CLKR_ANA8M)) {
 		rtwn_write_2(sc, R92C_SYS_CLKR,
 		    reg | R92C_SYS_CLKR_LOADER_EN | R92C_SYS_CLKR_ANA8M);
 	}
 	memset(&sc->rom, 0xff, sizeof(sc->rom));
 	while (addr < 512) {
 		reg = rtwn_efuse_read_1(sc, addr);
 		if (reg == 0xff)
 			break;
 		addr++;
 		off = reg >> 4;
 		msk = reg & 0xf;
 		for (i = 0; i < 4; i++) {
 			if (msk & (1 << i))
 				continue;
 			rom[off * 8 + i * 2 + 0] =
 			    rtwn_efuse_read_1(sc, addr);
 			addr++;
 			rom[off * 8 + i * 2 + 1] =
 			    rtwn_efuse_read_1(sc, addr);
 			addr++;
 		}
 	}
 #ifdef RTWN_DEBUG
 	if (sc->sc_debug >= 2) {
 		/* Dump ROM content. */
 		printf("\n");
 		for (i = 0; i < sizeof(sc->rom); i++)
 			printf("%02x:", rom[i]);
 		printf("\n");
 	}
 #endif
 }
 
 static int
 rtwn_read_chipid(struct rtwn_softc *sc)
 {
 	uint32_t reg;
 
 	reg = rtwn_read_4(sc, R92C_SYS_CFG);
 	if (reg & R92C_SYS_CFG_TRP_VAUX_EN)
 		/* Unsupported test chip. */
 		return (EIO);
 
 	if (reg & R92C_SYS_CFG_TYPE_92C) {
 		sc->chip |= RTWN_CHIP_92C;
 		/* Check if it is a castrated 8192C. */
 		if (MS(rtwn_read_4(sc, R92C_HPON_FSM),
 		    R92C_HPON_FSM_CHIP_BONDING_ID) ==
 		    R92C_HPON_FSM_CHIP_BONDING_ID_92C_1T2R)
 			sc->chip |= RTWN_CHIP_92C_1T2R;
 	}
 	if (reg & R92C_SYS_CFG_VENDOR_UMC) {
 		sc->chip |= RTWN_CHIP_UMC;
 		if (MS(reg, R92C_SYS_CFG_CHIP_VER_RTL) == 0)
 			sc->chip |= RTWN_CHIP_UMC_A_CUT;
 	}
 	return (0);
 }
 
 static void
 rtwn_read_rom(struct rtwn_softc *sc)
 {
 	struct r92c_rom *rom = &sc->rom;
 
 	/* Read full ROM image. */
 	rtwn_efuse_read(sc);
 
 	if (rom->id != 0x8129)
 		device_printf(sc->sc_dev, "invalid EEPROM ID 0x%x\n", rom->id);
 
 	/* XXX Weird but this is what the vendor driver does. */
 	sc->pa_setting = rtwn_efuse_read_1(sc, 0x1fa);
 	DPRINTF(("PA setting=0x%x\n", sc->pa_setting));
 
 	sc->board_type = MS(rom->rf_opt1, R92C_ROM_RF1_BOARD_TYPE);
 
 	sc->regulatory = MS(rom->rf_opt1, R92C_ROM_RF1_REGULATORY);
 	DPRINTF(("regulatory type=%d\n", sc->regulatory));
 
 	IEEE80211_ADDR_COPY(sc->sc_ic.ic_macaddr, rom->macaddr);
 }
 
 /*
  * Initialize rate adaptation in firmware.
  */
 static int
 rtwn_ra_init(struct rtwn_softc *sc)
 {
 	static const uint8_t map[] =
 	    { 2, 4, 11, 22, 12, 18, 24, 36, 48, 72, 96, 108 };
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct ieee80211_node *ni = ieee80211_ref_node(vap->iv_bss);
 	struct ieee80211_rateset *rs = &ni->ni_rates;
 	struct r92c_fw_cmd_macid_cfg cmd;
 	uint32_t rates, basicrates;
 	uint8_t mode;
 	int maxrate, maxbasicrate, error, i, j;
 
 	/* Get normal and basic rates mask. */
 	rates = basicrates = 0;
 	maxrate = maxbasicrate = 0;
 	for (i = 0; i < rs->rs_nrates; i++) {
 		/* Convert 802.11 rate to HW rate index. */
 		for (j = 0; j < nitems(map); j++)
 			if ((rs->rs_rates[i] & IEEE80211_RATE_VAL) == map[j])
 				break;
 		if (j == nitems(map))	/* Unknown rate, skip. */
 			continue;
 		rates |= 1 << j;
 		if (j > maxrate)
 			maxrate = j;
 		if (rs->rs_rates[i] & IEEE80211_RATE_BASIC) {
 			basicrates |= 1 << j;
 			if (j > maxbasicrate)
 				maxbasicrate = j;
 		}
 	}
 	if (ic->ic_curmode == IEEE80211_MODE_11B)
 		mode = R92C_RAID_11B;
 	else
 		mode = R92C_RAID_11BG;
 	DPRINTF(("mode=0x%x rates=0x%08x, basicrates=0x%08x\n",
 	    mode, rates, basicrates));
 
 	/* Set rates mask for group addressed frames. */
 	cmd.macid = RTWN_MACID_BC | RTWN_MACID_VALID;
 	cmd.mask = htole32(mode << 28 | basicrates);
 	error = rtwn_fw_cmd(sc, R92C_CMD_MACID_CONFIG, &cmd, sizeof(cmd));
 	if (error != 0) {
 		device_printf(sc->sc_dev,
 		    "could not add broadcast station\n");
 		return (error);
 	}
 	/* Set initial MRR rate. */
 	DPRINTF(("maxbasicrate=%d\n", maxbasicrate));
 	rtwn_write_1(sc, R92C_INIDATA_RATE_SEL(RTWN_MACID_BC),
 	    maxbasicrate);
 
 	/* Set rates mask for unicast frames. */
 	cmd.macid = RTWN_MACID_BSS | RTWN_MACID_VALID;
 	cmd.mask = htole32(mode << 28 | rates);
 	error = rtwn_fw_cmd(sc, R92C_CMD_MACID_CONFIG, &cmd, sizeof(cmd));
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not add BSS station\n");
 		return (error);
 	}
 	/* Set initial MRR rate. */
 	DPRINTF(("maxrate=%d\n", maxrate));
 	rtwn_write_1(sc, R92C_INIDATA_RATE_SEL(RTWN_MACID_BSS),
 	    maxrate);
 
 	/* Configure Automatic Rate Fallback Register. */
 	if (ic->ic_curmode == IEEE80211_MODE_11B) {
 		if (rates & 0x0c)
 			rtwn_write_4(sc, R92C_ARFR(0), htole32(rates & 0x0d));
 		else
 			rtwn_write_4(sc, R92C_ARFR(0), htole32(rates & 0x0f));
 	} else
 		rtwn_write_4(sc, R92C_ARFR(0), htole32(rates & 0x0ff5));
 
 	/* Indicate highest supported rate. */
 	ni->ni_txrate = rs->rs_rates[rs->rs_nrates - 1];
 	return (0);
 }
 
 static void
 rtwn_tsf_sync_enable(struct rtwn_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	struct ieee80211_node *ni = vap->iv_bss;
 	uint64_t tsf;
 
 	/* Enable TSF synchronization. */
 	rtwn_write_1(sc, R92C_BCN_CTRL,
 	    rtwn_read_1(sc, R92C_BCN_CTRL) & ~R92C_BCN_CTRL_DIS_TSF_UDT0);
 
 	rtwn_write_1(sc, R92C_BCN_CTRL,
 	    rtwn_read_1(sc, R92C_BCN_CTRL) & ~R92C_BCN_CTRL_EN_BCN);
 
 	/* Set initial TSF. */
 	memcpy(&tsf, ni->ni_tstamp.data, 8);
 	tsf = le64toh(tsf);
 	tsf = tsf - (tsf % (vap->iv_bss->ni_intval * IEEE80211_DUR_TU));
 	tsf -= IEEE80211_DUR_TU;
 	rtwn_write_4(sc, R92C_TSFTR + 0, tsf);
 	rtwn_write_4(sc, R92C_TSFTR + 4, tsf >> 32);
 
 	rtwn_write_1(sc, R92C_BCN_CTRL,
 	    rtwn_read_1(sc, R92C_BCN_CTRL) | R92C_BCN_CTRL_EN_BCN);
 }
 
 static void
 rtwn_set_led(struct rtwn_softc *sc, int led, int on)
 {
 	uint8_t reg;
 
 	if (led == RTWN_LED_LINK) {
 		reg = rtwn_read_1(sc, R92C_LEDCFG2) & 0xf0;
 		if (!on)
 			reg |= R92C_LEDCFG2_DIS;
 		else
 			reg |= R92C_LEDCFG2_EN;
 		rtwn_write_1(sc, R92C_LEDCFG2, reg);
 		sc->ledlink = on;	/* Save LED state. */
 	}
 }
 
 static void
 rtwn_calib_to(void *arg)
 {
 	struct rtwn_softc *sc = arg;
 	struct r92c_fw_cmd_rssi cmd;
 
 	if (sc->avg_pwdb != -1) {
 		/* Indicate Rx signal strength to FW for rate adaptation. */
 		memset(&cmd, 0, sizeof(cmd));
 		cmd.macid = 0;	/* BSS. */
 		cmd.pwdb = sc->avg_pwdb;
 		DPRINTFN(3, ("sending RSSI command avg=%d\n", sc->avg_pwdb));
 		rtwn_fw_cmd(sc, R92C_CMD_RSSI_SETTING, &cmd, sizeof(cmd));
 	}
 
 	/* Do temperature compensation. */
 	rtwn_temp_calib(sc);
 
 	callout_reset(&sc->calib_to, hz * 2, rtwn_calib_to, sc);
 }
 
 static int
 rtwn_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg)
 {
 	struct rtwn_vap *rvp = RTWN_VAP(vap);
 	struct ieee80211com *ic = vap->iv_ic;
 	struct ieee80211_node *ni = vap->iv_bss;
 	struct rtwn_softc *sc = ic->ic_softc;
 	uint32_t reg;
 
 	IEEE80211_UNLOCK(ic);
 	RTWN_LOCK(sc);
 
 	if (vap->iv_state == IEEE80211_S_RUN) {
 		/* Stop calibration. */
 		callout_stop(&sc->calib_to);
 
 		/* Turn link LED off. */
 		rtwn_set_led(sc, RTWN_LED_LINK, 0);
 
 		/* Set media status to 'No Link'. */
 		reg = rtwn_read_4(sc, R92C_CR);
 		reg = RW(reg, R92C_CR_NETTYPE, R92C_CR_NETTYPE_NOLINK);
 		rtwn_write_4(sc, R92C_CR, reg);
 
 		/* Stop Rx of data frames. */
 		rtwn_write_2(sc, R92C_RXFLTMAP2, 0);
 
 		/* Rest TSF. */
 		rtwn_write_1(sc, R92C_DUAL_TSF_RST, 0x03);
 
 		/* Disable TSF synchronization. */
 		rtwn_write_1(sc, R92C_BCN_CTRL,
 		    rtwn_read_1(sc, R92C_BCN_CTRL) |
 		    R92C_BCN_CTRL_DIS_TSF_UDT0);
 
 		/* Reset EDCA parameters. */
 		rtwn_write_4(sc, R92C_EDCA_VO_PARAM, 0x002f3217);
 		rtwn_write_4(sc, R92C_EDCA_VI_PARAM, 0x005e4317);
 		rtwn_write_4(sc, R92C_EDCA_BE_PARAM, 0x00105320);
 		rtwn_write_4(sc, R92C_EDCA_BK_PARAM, 0x0000a444);
 	}
 	switch (nstate) {
 	case IEEE80211_S_INIT:
 		/* Turn link LED off. */
 		rtwn_set_led(sc, RTWN_LED_LINK, 0);
 		break;
 	case IEEE80211_S_SCAN:
 		if (vap->iv_state != IEEE80211_S_SCAN) {
 			/* Allow Rx from any BSSID. */
 			rtwn_write_4(sc, R92C_RCR,
 			    rtwn_read_4(sc, R92C_RCR) &
 			    ~(R92C_RCR_CBSSID_DATA | R92C_RCR_CBSSID_BCN));
 
 			/* Set gain for scanning. */
 			reg = rtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(0));
 			reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x20);
 			rtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(0), reg);
 
 			reg = rtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(1));
 			reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x20);
 			rtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(1), reg);
 		}
 
 		/* Make link LED blink during scan. */
 		rtwn_set_led(sc, RTWN_LED_LINK, !sc->ledlink);
 
 		/* Pause AC Tx queues. */
 		rtwn_write_1(sc, R92C_TXPAUSE,
 		    rtwn_read_1(sc, R92C_TXPAUSE) | 0x0f);
 		break;
 	case IEEE80211_S_AUTH:
 		/* Set initial gain under link. */
 		reg = rtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(0));
 		reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x32);
 		rtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(0), reg);
 
 		reg = rtwn_bb_read(sc, R92C_OFDM0_AGCCORE1(1));
 		reg = RW(reg, R92C_OFDM0_AGCCORE1_GAIN, 0x32);
 		rtwn_bb_write(sc, R92C_OFDM0_AGCCORE1(1), reg);
 		rtwn_set_chan(sc, ic->ic_curchan, NULL);
 		break;
 	case IEEE80211_S_RUN:
 		if (ic->ic_opmode == IEEE80211_M_MONITOR) {
 			/* Enable Rx of data frames. */
 			rtwn_write_2(sc, R92C_RXFLTMAP2, 0xffff);
 
 			/* Turn link LED on. */
 			rtwn_set_led(sc, RTWN_LED_LINK, 1);
 			break;
 		}
 
 		/* Set media status to 'Associated'. */
 		reg = rtwn_read_4(sc, R92C_CR);
 		reg = RW(reg, R92C_CR_NETTYPE, R92C_CR_NETTYPE_INFRA);
 		rtwn_write_4(sc, R92C_CR, reg);
 
 		/* Set BSSID. */
 		rtwn_write_4(sc, R92C_BSSID + 0, LE_READ_4(&ni->ni_bssid[0]));
 		rtwn_write_4(sc, R92C_BSSID + 4, LE_READ_2(&ni->ni_bssid[4]));
 
 		if (ic->ic_curmode == IEEE80211_MODE_11B)
 			rtwn_write_1(sc, R92C_INIRTS_RATE_SEL, 0);
 		else	/* 802.11b/g */
 			rtwn_write_1(sc, R92C_INIRTS_RATE_SEL, 3);
 
 		/* Enable Rx of data frames. */
 		rtwn_write_2(sc, R92C_RXFLTMAP2, 0xffff);
 
 		/* Flush all AC queues. */
 		rtwn_write_1(sc, R92C_TXPAUSE, 0);
 
 		/* Set beacon interval. */
 		rtwn_write_2(sc, R92C_BCN_INTERVAL, ni->ni_intval);
 
 		/* Allow Rx from our BSSID only. */
 		rtwn_write_4(sc, R92C_RCR,
 		    rtwn_read_4(sc, R92C_RCR) |
 		    R92C_RCR_CBSSID_DATA | R92C_RCR_CBSSID_BCN);
 
 		/* Enable TSF synchronization. */
 		rtwn_tsf_sync_enable(sc);
 
 		rtwn_write_1(sc, R92C_SIFS_CCK + 1, 10);
 		rtwn_write_1(sc, R92C_SIFS_OFDM + 1, 10);
 		rtwn_write_1(sc, R92C_SPEC_SIFS + 1, 10);
 		rtwn_write_1(sc, R92C_MAC_SPEC_SIFS + 1, 10);
 		rtwn_write_1(sc, R92C_R2T_SIFS + 1, 10);
 		rtwn_write_1(sc, R92C_T2T_SIFS + 1, 10);
 
 		/* Intialize rate adaptation. */
 		rtwn_ra_init(sc);
 		/* Turn link LED on. */
 		rtwn_set_led(sc, RTWN_LED_LINK, 1);
 
 		sc->avg_pwdb = -1;	/* Reset average RSSI. */
 		/* Reset temperature calibration state machine. */
 		sc->thcal_state = 0;
 		sc->thcal_lctemp = 0;
 		/* Start periodic calibration. */
 		callout_reset(&sc->calib_to, hz * 2, rtwn_calib_to, sc);
 		break;
 	default:
 		break;
 	}
 	RTWN_UNLOCK(sc);
 	IEEE80211_LOCK(ic);
 	return (rvp->newstate(vap, nstate, arg));
 }
 
 static int
 rtwn_updateedca(struct ieee80211com *ic)
 {
 	struct rtwn_softc *sc = ic->ic_softc;
 	const uint16_t aci2reg[WME_NUM_AC] = {
 		R92C_EDCA_BE_PARAM,
 		R92C_EDCA_BK_PARAM,
 		R92C_EDCA_VI_PARAM,
 		R92C_EDCA_VO_PARAM
 	};
 	int aci, aifs, slottime;
 
 	IEEE80211_LOCK(ic);
 	slottime = (ic->ic_flags & IEEE80211_F_SHSLOT) ? 9 : 20;
 	for (aci = 0; aci < WME_NUM_AC; aci++) {
 		const struct wmeParams *ac =
 		    &ic->ic_wme.wme_chanParams.cap_wmeParams[aci];
 		/* AIFS[AC] = AIFSN[AC] * aSlotTime + aSIFSTime. */
 		aifs = ac->wmep_aifsn * slottime + 10;
 		rtwn_write_4(sc, aci2reg[aci],
 		    SM(R92C_EDCA_PARAM_TXOP, ac->wmep_txopLimit) |
 		    SM(R92C_EDCA_PARAM_ECWMIN, ac->wmep_logcwmin) |
 		    SM(R92C_EDCA_PARAM_ECWMAX, ac->wmep_logcwmax) |
 		    SM(R92C_EDCA_PARAM_AIFS, aifs));
 	}
 	IEEE80211_UNLOCK(ic);
 	return (0);
 }
 
 static void
 rtwn_update_avgrssi(struct rtwn_softc *sc, int rate, int8_t rssi)
 {
 	int pwdb;
 
 	/* Convert antenna signal to percentage. */
 	if (rssi <= -100 || rssi >= 20)
 		pwdb = 0;
 	else if (rssi >= 0)
 		pwdb = 100;
 	else
 		pwdb = 100 + rssi;
 	if (rate <= 3) {
 		/* CCK gain is smaller than OFDM/MCS gain. */
 		pwdb += 6;
 		if (pwdb > 100)
 			pwdb = 100;
 		if (pwdb <= 14)
 			pwdb -= 4;
 		else if (pwdb <= 26)
 			pwdb -= 8;
 		else if (pwdb <= 34)
 			pwdb -= 6;
 		else if (pwdb <= 42)
 			pwdb -= 2;
 	}
 	if (sc->avg_pwdb == -1)	/* Init. */
 		sc->avg_pwdb = pwdb;
 	else if (sc->avg_pwdb < pwdb)
 		sc->avg_pwdb = ((sc->avg_pwdb * 19 + pwdb) / 20) + 1;
 	else
 		sc->avg_pwdb = ((sc->avg_pwdb * 19 + pwdb) / 20);
 	DPRINTFN(4, ("PWDB=%d EMA=%d\n", pwdb, sc->avg_pwdb));
 }
 
 static int8_t
 rtwn_get_rssi(struct rtwn_softc *sc, int rate, void *physt)
 {
 	static const int8_t cckoff[] = { 16, -12, -26, -46 };
 	struct r92c_rx_phystat *phy;
 	struct r92c_rx_cck *cck;
 	uint8_t rpt;
 	int8_t rssi;
 
 	if (rate <= 3) {
 		cck = (struct r92c_rx_cck *)physt;
 		if (sc->sc_flags & RTWN_FLAG_CCK_HIPWR) {
 			rpt = (cck->agc_rpt >> 5) & 0x3;
 			rssi = (cck->agc_rpt & 0x1f) << 1;
 		} else {
 			rpt = (cck->agc_rpt >> 6) & 0x3;
 			rssi = cck->agc_rpt & 0x3e;
 		}
 		rssi = cckoff[rpt] - rssi;
 	} else {	/* OFDM/HT. */
 		phy = (struct r92c_rx_phystat *)physt;
 		rssi = ((le32toh(phy->phydw1) >> 1) & 0x7f) - 110;
 	}
 	return (rssi);
 }
 
 static void
 rtwn_rx_frame(struct rtwn_softc *sc, struct r92c_rx_desc *rx_desc,
     struct rtwn_rx_data *rx_data, int desc_idx)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211_frame *wh;
 	struct ieee80211_node *ni;
 	struct r92c_rx_phystat *phy = NULL;
 	uint32_t rxdw0, rxdw3;
 	struct mbuf *m, *m1;
 	bus_dma_segment_t segs[1];
 	bus_addr_t physaddr;
 	uint8_t rate;
 	int8_t rssi = 0, nf;
 	int infosz, nsegs, pktlen, shift, error;
 
 	rxdw0 = le32toh(rx_desc->rxdw0);
 	rxdw3 = le32toh(rx_desc->rxdw3);
 
 	if (__predict_false(rxdw0 & (R92C_RXDW0_CRCERR | R92C_RXDW0_ICVERR))) {
 		/*
 		 * This should not happen since we setup our Rx filter
 		 * to not receive these frames.
 		 */
 		counter_u64_add(ic->ic_ierrors, 1);
 		return;
 	}
 
 	pktlen = MS(rxdw0, R92C_RXDW0_PKTLEN);
 	if (__predict_false(pktlen < sizeof(*wh) || pktlen > MCLBYTES)) {
 		counter_u64_add(ic->ic_ierrors, 1);
 		return;
 	}
 
 	rate = MS(rxdw3, R92C_RXDW3_RATE);
 	infosz = MS(rxdw0, R92C_RXDW0_INFOSZ) * 8;
 	if (infosz > sizeof(struct r92c_rx_phystat))
 		infosz = sizeof(struct r92c_rx_phystat);
 	shift = MS(rxdw0, R92C_RXDW0_SHIFT);
 
 	/* Get RSSI from PHY status descriptor if present. */
 	if (infosz != 0 && (rxdw0 & R92C_RXDW0_PHYST)) {
 		phy = mtod(rx_data->m, struct r92c_rx_phystat *);
 		rssi = rtwn_get_rssi(sc, rate, phy);
 		/* Update our average RSSI. */
 		rtwn_update_avgrssi(sc, rate, rssi);
 	}
 
 	DPRINTFN(5, ("Rx frame len=%d rate=%d infosz=%d shift=%d rssi=%d\n",
 	    pktlen, rate, infosz, shift, rssi));
 
 	m1 = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m1 == NULL) {
 		counter_u64_add(ic->ic_ierrors, 1);
 		return;
 	}
 	bus_dmamap_unload(sc->rx_ring.data_dmat, rx_data->map);
 
 	error = bus_dmamap_load(sc->rx_ring.data_dmat, rx_data->map,
 	     mtod(m1, void *), MCLBYTES, rtwn_dma_map_addr,
 	     &physaddr, 0);
 	if (error != 0) {
 		m_freem(m1);
 
 		if (bus_dmamap_load_mbuf_sg(sc->rx_ring.data_dmat,
 		    rx_data->map, rx_data->m, segs, &nsegs, 0)) 
 			panic("%s: could not load old RX mbuf",
 			    device_get_name(sc->sc_dev));
 
 		/* Physical address may have changed. */
 		rtwn_setup_rx_desc(sc, rx_desc, physaddr, MCLBYTES, desc_idx);
 		counter_u64_add(ic->ic_ierrors, 1);
 		return;
 	}
 
 	/* Finalize mbuf. */
 	m = rx_data->m;
 	rx_data->m = m1;
 	m->m_pkthdr.len = m->m_len = pktlen + infosz + shift;
 
 	/* Update RX descriptor. */
 	rtwn_setup_rx_desc(sc, rx_desc, physaddr, MCLBYTES, desc_idx);
 
 	/* Get ieee80211 frame header. */
 	if (rxdw0 & R92C_RXDW0_PHYST)
 		m_adj(m, infosz + shift);
 	else
 		m_adj(m, shift);
 
 	nf = -95;
 	if (ieee80211_radiotap_active(ic)) {
 		struct rtwn_rx_radiotap_header *tap = &sc->sc_rxtap;
 
 		tap->wr_flags = 0;
 		if (!(rxdw3 & R92C_RXDW3_HT)) {
 			switch (rate) {
 			/* CCK. */
 			case  0: tap->wr_rate =   2; break;
 			case  1: tap->wr_rate =   4; break;
 			case  2: tap->wr_rate =  11; break;
 			case  3: tap->wr_rate =  22; break;
 			/* OFDM. */
 			case  4: tap->wr_rate =  12; break;
 			case  5: tap->wr_rate =  18; break;
 			case  6: tap->wr_rate =  24; break;
 			case  7: tap->wr_rate =  36; break;
 			case  8: tap->wr_rate =  48; break;
 			case  9: tap->wr_rate =  72; break;
 			case 10: tap->wr_rate =  96; break;
 			case 11: tap->wr_rate = 108; break;
 			}
 		} else if (rate >= 12) {	/* MCS0~15. */
 			/* Bit 7 set means HT MCS instead of rate. */
 			tap->wr_rate = 0x80 | (rate - 12);
 		}
 		tap->wr_dbm_antsignal = rssi;
 		tap->wr_chan_freq = htole16(ic->ic_curchan->ic_freq);
 		tap->wr_chan_flags = htole16(ic->ic_curchan->ic_flags);
 	}
 
 	RTWN_UNLOCK(sc);
 	wh = mtod(m, struct ieee80211_frame *);
 
 	/* Send the frame to the 802.11 layer. */
 	ni = ieee80211_find_rxnode(ic, (struct ieee80211_frame_min *)wh);
 	if (ni != NULL) {
 		(void)ieee80211_input(ni, m, rssi - nf, nf);
 		/* Node is no longer needed. */
 		ieee80211_free_node(ni);
 	} else
 		(void)ieee80211_input_all(ic, m, rssi - nf, nf);
 
 	RTWN_LOCK(sc);
 }
 
 static int
 rtwn_tx(struct rtwn_softc *sc, struct mbuf *m, struct ieee80211_node *ni)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ieee80211_frame *wh;
 	struct ieee80211_key *k = NULL;
 	struct rtwn_tx_ring *tx_ring;
 	struct rtwn_tx_data *data;
 	struct r92c_tx_desc *txd;
 	bus_dma_segment_t segs[1];
 	uint16_t qos;
 	uint8_t raid, type, tid, qid;
 	int nsegs, error;
 
 	wh = mtod(m, struct ieee80211_frame *);
 	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
 
 	/* Encrypt the frame if need be. */
 	if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) {
 		k = ieee80211_crypto_encap(ni, m);
 		if (k == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 		/* 802.11 header may have moved. */
 		wh = mtod(m, struct ieee80211_frame *);
 	}
 
 	if (IEEE80211_QOS_HAS_SEQ(wh)) {
 		qos = ((const struct ieee80211_qosframe *)wh)->i_qos[0];
 		tid = qos & IEEE80211_QOS_TID;
 	} else {
 		qos = 0;
 		tid = 0;
 	}
 
 	switch (type) {
 	case IEEE80211_FC0_TYPE_CTL:
 	case IEEE80211_FC0_TYPE_MGT:
 		qid = RTWN_VO_QUEUE;
 		break;
 	default:
 		qid = M_WME_GETAC(m);
 		break;
 	}
 
 	/* Grab a Tx buffer from the ring. */
 	tx_ring = &sc->tx_ring[qid];
 	data = &tx_ring->tx_data[tx_ring->cur];
 	if (data->m != NULL) {
 		m_freem(m);
 		return (ENOBUFS);
 	}
 
 	/* Fill Tx descriptor. */
 	txd = &tx_ring->desc[tx_ring->cur];
 	if (htole32(txd->txdw0) & R92C_RXDW0_OWN) {
 		m_freem(m);
 		return (ENOBUFS);
 	}
 	txd->txdw0 = htole32(
 	    SM(R92C_TXDW0_PKTLEN, m->m_pkthdr.len) |
 	    SM(R92C_TXDW0_OFFSET, sizeof(*txd)) |
 	    R92C_TXDW0_FSG | R92C_TXDW0_LSG);
 	if (IEEE80211_IS_MULTICAST(wh->i_addr1))
 		txd->txdw0 |= htole32(R92C_TXDW0_BMCAST);
 
 	txd->txdw1 = 0;
 	txd->txdw4 = 0;
 	txd->txdw5 = 0;
 
 	/* XXX TODO: rate control; implement low-rate for EAPOL */
 	if (!IEEE80211_IS_MULTICAST(wh->i_addr1) &&
 	    type == IEEE80211_FC0_TYPE_DATA) {
 		if (ic->ic_curmode == IEEE80211_MODE_11B)
 			raid = R92C_RAID_11B;
 		else
 			raid = R92C_RAID_11BG;
 		txd->txdw1 |= htole32(
 		    SM(R92C_TXDW1_MACID, RTWN_MACID_BSS) |
 		    SM(R92C_TXDW1_QSEL, R92C_TXDW1_QSEL_BE) |
 		    SM(R92C_TXDW1_RAID, raid) |
 		    R92C_TXDW1_AGGBK);
 
 		if (ic->ic_flags & IEEE80211_F_USEPROT) {
 			if (ic->ic_protmode == IEEE80211_PROT_CTSONLY) {
 				txd->txdw4 |= htole32(R92C_TXDW4_CTS2SELF |
 				    R92C_TXDW4_HWRTSEN);
 			} else if (ic->ic_protmode == IEEE80211_PROT_RTSCTS) {
 				txd->txdw4 |= htole32(R92C_TXDW4_RTSEN |
 				    R92C_TXDW4_HWRTSEN);
 			}
 		}
 
 		/* XXX TODO: implement rate control */
 
 		/* Send RTS at OFDM24. */
 		txd->txdw4 |= htole32(SM(R92C_TXDW4_RTSRATE, 8));
 		txd->txdw5 |= htole32(SM(R92C_TXDW5_RTSRATE_FBLIMIT, 0xf));
 		/* Send data at OFDM54. */
 		txd->txdw5 |= htole32(SM(R92C_TXDW5_DATARATE, 11));
 		txd->txdw5 |= htole32(SM(R92C_TXDW5_DATARATE_FBLIMIT, 0x1f));
 
 	} else {
 		txd->txdw1 |= htole32(
 		    SM(R92C_TXDW1_MACID, 0) |
 		    SM(R92C_TXDW1_QSEL, R92C_TXDW1_QSEL_MGNT) |
 		    SM(R92C_TXDW1_RAID, R92C_RAID_11B));
 
 		/* Force CCK1. */
 		txd->txdw4 |= htole32(R92C_TXDW4_DRVRATE);
 		txd->txdw5 |= htole32(SM(R92C_TXDW5_DATARATE, 0));
 	}
 	/* Set sequence number (already little endian). */
-	txd->txdseq = *(uint16_t *)wh->i_seq;
+	txd->txdseq = htole16(M_SEQNO_GET(m) % IEEE80211_SEQ_RANGE);
 	
 	if (!qos) {
 		/* Use HW sequence numbering for non-QoS frames. */
 		txd->txdw4  |= htole32(R92C_TXDW4_HWSEQ);
 		txd->txdseq |= htole16(0x8000);
 	} else
 		txd->txdw4 |= htole32(R92C_TXDW4_QOS);
 
 	error = bus_dmamap_load_mbuf_sg(tx_ring->data_dmat, data->map, m, segs,
 	    &nsegs, BUS_DMA_NOWAIT);
 	if (error != 0 && error != EFBIG) {
 		device_printf(sc->sc_dev, "can't map mbuf (error %d)\n", error);
 		m_freem(m);
 		return (error);
 	}
 	if (error != 0) {
 		struct mbuf *mnew;
 
 		mnew = m_defrag(m, M_NOWAIT);
 		if (mnew == NULL) {
 			device_printf(sc->sc_dev,
 			    "can't defragment mbuf\n");
 			m_freem(m);
 			return (ENOBUFS);
 		}
 		m = mnew;
 
 		error = bus_dmamap_load_mbuf_sg(tx_ring->data_dmat, data->map,
 		    m, segs, &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			device_printf(sc->sc_dev,
 			    "can't map mbuf (error %d)\n", error);
 			m_freem(m);
 			return (error);
 		}
 	}
 
 	txd->txbufaddr = htole32(segs[0].ds_addr);
 	txd->txbufsize = htole16(m->m_pkthdr.len);
 	bus_space_barrier(sc->sc_st, sc->sc_sh, 0, sc->sc_mapsize,
 	    BUS_SPACE_BARRIER_WRITE);
 	txd->txdw0 |= htole32(R92C_TXDW0_OWN);
 
 	bus_dmamap_sync(tx_ring->desc_dmat, tx_ring->desc_map,
 	    BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_sync(tx_ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE);
 
 	data->m = m;
 	data->ni = ni;
 
 	if (ieee80211_radiotap_active_vap(vap)) {
 		struct rtwn_tx_radiotap_header *tap = &sc->sc_txtap;
 
 		tap->wt_flags = 0;
 		tap->wt_chan_freq = htole16(ic->ic_curchan->ic_freq);
 		tap->wt_chan_flags = htole16(ic->ic_curchan->ic_flags);
 
 		ieee80211_radiotap_tx(vap, m);
 	}
 
 	tx_ring->cur = (tx_ring->cur + 1) % RTWN_TX_LIST_COUNT;
 	tx_ring->queued++;
 
 	if (tx_ring->queued >= (RTWN_TX_LIST_COUNT - 1))
 		sc->qfullmsk |= (1 << qid);
 
 	/* Kick TX. */
 	rtwn_write_2(sc, R92C_PCIE_CTRL_REG, (1 << qid));
 	return (0);
 }
 
 static void
 rtwn_tx_done(struct rtwn_softc *sc, int qid)
 {
 	struct rtwn_tx_ring *tx_ring = &sc->tx_ring[qid];
 	struct rtwn_tx_data *tx_data;
 	struct r92c_tx_desc *tx_desc;
 	int i;
 
 	bus_dmamap_sync(tx_ring->desc_dmat, tx_ring->desc_map,
 	    BUS_DMASYNC_POSTREAD);
 
 	for (i = 0; i < RTWN_TX_LIST_COUNT; i++) {
 		tx_data = &tx_ring->tx_data[i];
 		if (tx_data->m == NULL)
 			continue;
 
 		tx_desc = &tx_ring->desc[i];
 		if (le32toh(tx_desc->txdw0) & R92C_TXDW0_OWN)
 			continue;
 
 		bus_dmamap_unload(tx_ring->desc_dmat, tx_ring->desc_map);
 
 		/*
 		 * XXX TODO: figure out whether the transmit succeeded or not.
 		 * .. and then notify rate control.
 		 */
 		ieee80211_tx_complete(tx_data->ni, tx_data->m, 0);
 		tx_data->ni = NULL;
 		tx_data->m = NULL;
 
 		sc->sc_tx_timer = 0;
 		tx_ring->queued--;
 	}
 
 	if (tx_ring->queued < (RTWN_TX_LIST_COUNT - 1))
 		sc->qfullmsk &= ~(1 << qid);
 	rtwn_start(sc);
 }
 
 static int
 rtwn_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
     const struct ieee80211_bpf_params *params)
 {
 	struct ieee80211com *ic = ni->ni_ic;
 	struct rtwn_softc *sc = ic->ic_softc;
 
 	RTWN_LOCK(sc);
 
 	/* Prevent management frames from being sent if we're not ready. */
 	if (!(sc->sc_flags & RTWN_RUNNING)) {
 		RTWN_UNLOCK(sc);
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	if (rtwn_tx(sc, m, ni) != 0) {
 		m_freem(m);
 		RTWN_UNLOCK(sc);
 		return (EIO);
 	}
 	sc->sc_tx_timer = 5;
 	RTWN_UNLOCK(sc);
 	return (0);
 }
 
 static int
 rtwn_transmit(struct ieee80211com *ic, struct mbuf *m)   
 {
 	struct rtwn_softc *sc = ic->ic_softc;
 	int error;
 
 	RTWN_LOCK(sc);
 	if ((sc->sc_flags & RTWN_RUNNING) == 0) {
 		RTWN_UNLOCK(sc);
 		return (ENXIO);
 	}
 	error = mbufq_enqueue(&sc->sc_snd, m);
 	if (error) {
 		RTWN_UNLOCK(sc);
 		return (error);
 	}
 	rtwn_start(sc);
 	RTWN_UNLOCK(sc);
 	return (0);
 }
 
 static void
 rtwn_parent(struct ieee80211com *ic)
 {
 	struct rtwn_softc *sc = ic->ic_softc;
 	int startall = 0;
 
 	RTWN_LOCK(sc);
 	if (ic->ic_nrunning> 0) {
 		if (!(sc->sc_flags & RTWN_RUNNING)) {
 			rtwn_init_locked(sc);
 			startall = 1;
 		}
 	} else if (sc->sc_flags & RTWN_RUNNING)
 		 rtwn_stop_locked(sc);
 	RTWN_UNLOCK(sc);
 	if (startall)
 		ieee80211_start_all(ic);
 }
 
 static void
 rtwn_start(struct rtwn_softc *sc)
 {
 	struct ieee80211_node *ni;
 	struct mbuf *m;
 
 	RTWN_LOCK_ASSERT(sc);
 
 	if ((sc->sc_flags & RTWN_RUNNING) == 0)
 		return;
 
 	while (sc->qfullmsk == 0 && (m = mbufq_dequeue(&sc->sc_snd)) != NULL) {
 		ni = (struct ieee80211_node *)m->m_pkthdr.rcvif;
 		if (rtwn_tx(sc, m, ni) != 0) {
 			if_inc_counter(ni->ni_vap->iv_ifp,
 			    IFCOUNTER_OERRORS, 1);
 			ieee80211_free_node(ni);
 			continue;
 		}
 		sc->sc_tx_timer = 5;
 	}
 }
 
 static void
 rtwn_watchdog(void *arg)
 {
 	struct rtwn_softc *sc = arg;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	RTWN_LOCK_ASSERT(sc);
 
 	KASSERT(sc->sc_flags & RTWN_RUNNING, ("not running"));
 
 	if (sc->sc_tx_timer != 0 && --sc->sc_tx_timer == 0) {
 		ic_printf(ic, "device timeout\n");
 		ieee80211_runtask(ic, &sc->sc_reinit_task);
 		return;
 	}
 	callout_reset(&sc->watchdog_to, hz, rtwn_watchdog, sc);
 }
 
 static int
 rtwn_power_on(struct rtwn_softc *sc)
 {
 	uint32_t reg;
 	int ntries;
 
 	/* Wait for autoload done bit. */
 	for (ntries = 0; ntries < 1000; ntries++) {
 		if (rtwn_read_1(sc, R92C_APS_FSMCO) & R92C_APS_FSMCO_PFM_ALDN)
 			break;
 		DELAY(5);
 	}
 	if (ntries == 1000) {
 		device_printf(sc->sc_dev,
 		    "timeout waiting for chip autoload\n");
 		return (ETIMEDOUT);
 	}
 
 	/* Unlock ISO/CLK/Power control register. */
 	rtwn_write_1(sc, R92C_RSV_CTRL, 0);
 
 	/* TODO: check if we need this for 8188CE */
 	if (sc->board_type != R92C_BOARD_TYPE_DONGLE) {
 		/* bt coex */
 		reg = rtwn_read_4(sc, R92C_APS_FSMCO);
 		reg |= (R92C_APS_FSMCO_SOP_ABG |
 			R92C_APS_FSMCO_SOP_AMB |
 			R92C_APS_FSMCO_XOP_BTCK);
 		rtwn_write_4(sc, R92C_APS_FSMCO, reg);
 	}
 
 	/* Move SPS into PWM mode. */
 	rtwn_write_1(sc, R92C_SPS0_CTRL, 0x2b);
 
 	/* Set low byte to 0x0f, leave others unchanged. */
 	rtwn_write_4(sc, R92C_AFE_XTAL_CTRL,
 	    (rtwn_read_4(sc, R92C_AFE_XTAL_CTRL) & 0xffffff00) | 0x0f);
 
 	/* TODO: check if we need this for 8188CE */
 	if (sc->board_type != R92C_BOARD_TYPE_DONGLE) {
 		/* bt coex */
 		reg = rtwn_read_4(sc, R92C_AFE_XTAL_CTRL);
 		reg &= (~0x00024800); /* XXX magic from linux */
 		rtwn_write_4(sc, R92C_AFE_XTAL_CTRL, reg);
 	}
 
 	rtwn_write_2(sc, R92C_SYS_ISO_CTRL,
 	  (rtwn_read_2(sc, R92C_SYS_ISO_CTRL) & 0xff) |
 	  R92C_SYS_ISO_CTRL_PWC_EV12V | R92C_SYS_ISO_CTRL_DIOR);
 	DELAY(200);
 
 	/* TODO: linux does additional btcoex stuff here */
 
 	/* Auto enable WLAN. */
 	rtwn_write_2(sc, R92C_APS_FSMCO,
 	    rtwn_read_2(sc, R92C_APS_FSMCO) | R92C_APS_FSMCO_APFM_ONMAC);
 	for (ntries = 0; ntries < 1000; ntries++) {
 		if (!(rtwn_read_2(sc, R92C_APS_FSMCO) &
 		    R92C_APS_FSMCO_APFM_ONMAC))
 			break;
 		DELAY(5);
 	}
 	if (ntries == 1000) {
 		device_printf(sc->sc_dev, "timeout waiting for MAC auto ON\n");
 		return (ETIMEDOUT);
 	}
 
 	/* Enable radio, GPIO and LED functions. */
 	rtwn_write_2(sc, R92C_APS_FSMCO,
 	    R92C_APS_FSMCO_AFSM_PCIE |
 	    R92C_APS_FSMCO_PDN_EN |
 	    R92C_APS_FSMCO_PFM_ALDN);
 	/* Release RF digital isolation. */
 	rtwn_write_2(sc, R92C_SYS_ISO_CTRL,
 	    rtwn_read_2(sc, R92C_SYS_ISO_CTRL) & ~R92C_SYS_ISO_CTRL_DIOR);
 
 	if (sc->chip & RTWN_CHIP_92C)
 		rtwn_write_1(sc, R92C_PCIE_CTRL_REG + 3, 0x77);
 	else
 		rtwn_write_1(sc, R92C_PCIE_CTRL_REG + 3, 0x22);
 
 	rtwn_write_4(sc, R92C_INT_MIG, 0);
 
 	if (sc->board_type != R92C_BOARD_TYPE_DONGLE) {
 		/* bt coex */
 		reg = rtwn_read_4(sc, R92C_AFE_XTAL_CTRL + 2);
 		reg &= 0xfd; /* XXX magic from linux */
 		rtwn_write_4(sc, R92C_AFE_XTAL_CTRL + 2, reg);
 	}
 
 	rtwn_write_1(sc, R92C_GPIO_MUXCFG,
 	    rtwn_read_1(sc, R92C_GPIO_MUXCFG) & ~R92C_GPIO_MUXCFG_RFKILL);
 
 	reg = rtwn_read_1(sc, R92C_GPIO_IO_SEL);
 	if (!(reg & R92C_GPIO_IO_SEL_RFKILL)) {
 		device_printf(sc->sc_dev,
 		    "radio is disabled by hardware switch\n");
 		return (EPERM);
 	}
 
 	/* Initialize MAC. */
 	reg = rtwn_read_1(sc, R92C_APSD_CTRL);
 	rtwn_write_1(sc, R92C_APSD_CTRL,
 	    rtwn_read_1(sc, R92C_APSD_CTRL) & ~R92C_APSD_CTRL_OFF);
 	for (ntries = 0; ntries < 200; ntries++) {
 		if (!(rtwn_read_1(sc, R92C_APSD_CTRL) &
 		    R92C_APSD_CTRL_OFF_STATUS))
 			break;
 		DELAY(500);
 	}
 	if (ntries == 200) {
 		device_printf(sc->sc_dev,
 		    "timeout waiting for MAC initialization\n");
 		return (ETIMEDOUT);
 	}
 
 	/* Enable MAC DMA/WMAC/SCHEDULE/SEC blocks. */
 	reg = rtwn_read_2(sc, R92C_CR);
 	reg |= R92C_CR_HCI_TXDMA_EN | R92C_CR_HCI_RXDMA_EN |
 	    R92C_CR_TXDMA_EN | R92C_CR_RXDMA_EN | R92C_CR_PROTOCOL_EN |
 	    R92C_CR_SCHEDULE_EN | R92C_CR_MACTXEN | R92C_CR_MACRXEN |
 	    R92C_CR_ENSEC;
 	rtwn_write_2(sc, R92C_CR, reg);
 
 	rtwn_write_1(sc, 0xfe10, 0x19);
 
 	return (0);
 }
 
 static int
 rtwn_llt_init(struct rtwn_softc *sc)
 {
 	int i, error;
 
 	/* Reserve pages [0; R92C_TX_PAGE_COUNT]. */
 	for (i = 0; i < R92C_TX_PAGE_COUNT; i++) {
 		if ((error = rtwn_llt_write(sc, i, i + 1)) != 0)
 			return (error);
 	}
 	/* NB: 0xff indicates end-of-list. */
 	if ((error = rtwn_llt_write(sc, i, 0xff)) != 0)
 		return (error);
 	/*
 	 * Use pages [R92C_TX_PAGE_COUNT + 1; R92C_TXPKTBUF_COUNT - 1]
 	 * as ring buffer.
 	 */
 	for (++i; i < R92C_TXPKTBUF_COUNT - 1; i++) {
 		if ((error = rtwn_llt_write(sc, i, i + 1)) != 0)
 			return (error);
 	}
 	/* Make the last page point to the beginning of the ring buffer. */
 	error = rtwn_llt_write(sc, i, R92C_TX_PAGE_COUNT + 1);
 	return (error);
 }
 
 static void
 rtwn_fw_reset(struct rtwn_softc *sc)
 {
 	uint16_t reg;
 	int ntries;
 
 	/* Tell 8051 to reset itself. */
 	rtwn_write_1(sc, R92C_HMETFR + 3, 0x20);
 
 	/* Wait until 8051 resets by itself. */
 	for (ntries = 0; ntries < 100; ntries++) {
 		reg = rtwn_read_2(sc, R92C_SYS_FUNC_EN);
 		if (!(reg & R92C_SYS_FUNC_EN_CPUEN))
 			goto sleep;
 		DELAY(50);
 	}
 	/* Force 8051 reset. */
 	rtwn_write_2(sc, R92C_SYS_FUNC_EN, reg & ~R92C_SYS_FUNC_EN_CPUEN);
 sleep:
 	/* 
 	 * We must sleep for one second to let the firmware settle.
 	 * Accessing registers too early will hang the whole system.
 	 */
 	if (msleep(&reg, &sc->sc_mtx, 0, "rtwnrst", hz)) {
 		device_printf(sc->sc_dev, "timeout waiting for firmware "
 		    "initialization to complete\n");
 	}
 }
 
 static void
 rtwn_fw_loadpage(struct rtwn_softc *sc, int page, const uint8_t *buf, int len)
 {
 	uint32_t reg;
 	int off, mlen, i;
 
 	reg = rtwn_read_4(sc, R92C_MCUFWDL);
 	reg = RW(reg, R92C_MCUFWDL_PAGE, page);
 	rtwn_write_4(sc, R92C_MCUFWDL, reg);
 
 	DELAY(5);
 
 	off = R92C_FW_START_ADDR;
 	while (len > 0) {
 		if (len > 196)
 			mlen = 196;
 		else if (len > 4)
 			mlen = 4;
 		else
 			mlen = 1;
 		for (i = 0; i < mlen; i++)
 			rtwn_write_1(sc, off++, buf[i]);
 		buf += mlen;
 		len -= mlen;
 	}
 }
 
 static int
 rtwn_load_firmware(struct rtwn_softc *sc)
 {
 	const struct firmware *fw;
 	const struct r92c_fw_hdr *hdr;
 	const char *name;
 	const u_char *ptr;
 	size_t len;
 	uint32_t reg;
 	int mlen, ntries, page, error = 0;
 
 	/* Read firmware image from the filesystem. */
 	if ((sc->chip & (RTWN_CHIP_UMC_A_CUT | RTWN_CHIP_92C)) ==
 	    RTWN_CHIP_UMC_A_CUT)
 		name = "rtwn-rtl8192cfwU";
 	else
 		name = "rtwn-rtl8192cfwU_B";
 	RTWN_UNLOCK(sc);
 	fw = firmware_get(name);
 	RTWN_LOCK(sc);
 	if (fw == NULL) {
 		device_printf(sc->sc_dev,
 		    "could not read firmware %s\n", name);
 		return (ENOENT);
 	}
 	len = fw->datasize;
 	if (len < sizeof(*hdr)) {
 		device_printf(sc->sc_dev, "firmware too short\n");
 		error = EINVAL;
 		goto fail;
 	}
 	ptr = fw->data;
 	hdr = (const struct r92c_fw_hdr *)ptr;
 	/* Check if there is a valid FW header and skip it. */
 	if ((le16toh(hdr->signature) >> 4) == 0x88c ||
 	    (le16toh(hdr->signature) >> 4) == 0x92c) {
 		DPRINTF(("FW V%d.%d %02d-%02d %02d:%02d\n",
 		    le16toh(hdr->version), le16toh(hdr->subversion),
 		    hdr->month, hdr->date, hdr->hour, hdr->minute));
 		ptr += sizeof(*hdr);
 		len -= sizeof(*hdr);
 	}
 
 	if (rtwn_read_1(sc, R92C_MCUFWDL) & R92C_MCUFWDL_RAM_DL_SEL)
 		rtwn_fw_reset(sc);
 
 	/* Enable FW download. */
 	rtwn_write_2(sc, R92C_SYS_FUNC_EN,
 	    rtwn_read_2(sc, R92C_SYS_FUNC_EN) |
 	    R92C_SYS_FUNC_EN_CPUEN);
 	rtwn_write_1(sc, R92C_MCUFWDL,
 	    rtwn_read_1(sc, R92C_MCUFWDL) | R92C_MCUFWDL_EN);
 	rtwn_write_1(sc, R92C_MCUFWDL + 2,
 	    rtwn_read_1(sc, R92C_MCUFWDL + 2) & ~0x08);
 
 	/* Reset the FWDL checksum. */
 	rtwn_write_1(sc, R92C_MCUFWDL,
 	    rtwn_read_1(sc, R92C_MCUFWDL) | R92C_MCUFWDL_CHKSUM_RPT);
 
 	for (page = 0; len > 0; page++) {
 		mlen = MIN(len, R92C_FW_PAGE_SIZE);
 		rtwn_fw_loadpage(sc, page, ptr, mlen);
 		ptr += mlen;
 		len -= mlen;
 	}
 
 	/* Disable FW download. */
 	rtwn_write_1(sc, R92C_MCUFWDL,
 	    rtwn_read_1(sc, R92C_MCUFWDL) & ~R92C_MCUFWDL_EN);
 	rtwn_write_1(sc, R92C_MCUFWDL + 1, 0);
 
 	/* Wait for checksum report. */
 	for (ntries = 0; ntries < 1000; ntries++) {
 		if (rtwn_read_4(sc, R92C_MCUFWDL) & R92C_MCUFWDL_CHKSUM_RPT)
 			break;
 		DELAY(5);
 	}
 	if (ntries == 1000) {
 		device_printf(sc->sc_dev,
 		    "timeout waiting for checksum report\n");
 		error = ETIMEDOUT;
 		goto fail;
 	}
 
 	reg = rtwn_read_4(sc, R92C_MCUFWDL);
 	reg = (reg & ~R92C_MCUFWDL_WINTINI_RDY) | R92C_MCUFWDL_RDY;
 	rtwn_write_4(sc, R92C_MCUFWDL, reg);
 	/* Wait for firmware readiness. */
 	for (ntries = 0; ntries < 2000; ntries++) {
 		if (rtwn_read_4(sc, R92C_MCUFWDL) & R92C_MCUFWDL_WINTINI_RDY)
 			break;
 		DELAY(50);
 	}
 	if (ntries == 1000) {
 		device_printf(sc->sc_dev,
 		    "timeout waiting for firmware readiness\n");
 		error = ETIMEDOUT;
 		goto fail;
 	}
 fail:
 	firmware_put(fw, FIRMWARE_UNLOAD);
 	return (error);
 }
 
 static int
 rtwn_dma_init(struct rtwn_softc *sc)
 {
 	uint32_t reg;
 	int error;
 
 	/* Initialize LLT table. */
 	error = rtwn_llt_init(sc);
 	if (error != 0)
 		return error;
 
 	/* Set number of pages for normal priority queue. */
 	rtwn_write_2(sc, R92C_RQPN_NPQ, 0);
 	rtwn_write_4(sc, R92C_RQPN,
 	    /* Set number of pages for public queue. */
 	    SM(R92C_RQPN_PUBQ, R92C_PUBQ_NPAGES) |
 	    /* Set number of pages for high priority queue. */
 	    SM(R92C_RQPN_HPQ, R92C_HPQ_NPAGES) |
 	    /* Set number of pages for low priority queue. */
 	    SM(R92C_RQPN_LPQ, R92C_LPQ_NPAGES) |
 	    /* Load values. */
 	    R92C_RQPN_LD);
 
 	rtwn_write_1(sc, R92C_TXPKTBUF_BCNQ_BDNY, R92C_TX_PAGE_BOUNDARY);
 	rtwn_write_1(sc, R92C_TXPKTBUF_MGQ_BDNY, R92C_TX_PAGE_BOUNDARY);
 	rtwn_write_1(sc, R92C_TXPKTBUF_WMAC_LBK_BF_HD, R92C_TX_PAGE_BOUNDARY);
 	rtwn_write_1(sc, R92C_TRXFF_BNDY, R92C_TX_PAGE_BOUNDARY);
 	rtwn_write_1(sc, R92C_TDECTRL + 1, R92C_TX_PAGE_BOUNDARY);
 
 	reg = rtwn_read_2(sc, R92C_TRXDMA_CTRL);
 	reg &= ~R92C_TRXDMA_CTRL_QMAP_M;
 	reg |= 0xF771; 
 	rtwn_write_2(sc, R92C_TRXDMA_CTRL, reg);
 
 	rtwn_write_4(sc, R92C_TCR, R92C_TCR_CFENDFORM | (1 << 12) | (1 << 13));
 
 	/* Configure Tx DMA. */
 	rtwn_write_4(sc, R92C_BKQ_DESA, sc->tx_ring[RTWN_BK_QUEUE].paddr);
 	rtwn_write_4(sc, R92C_BEQ_DESA, sc->tx_ring[RTWN_BE_QUEUE].paddr);
 	rtwn_write_4(sc, R92C_VIQ_DESA, sc->tx_ring[RTWN_VI_QUEUE].paddr);
 	rtwn_write_4(sc, R92C_VOQ_DESA, sc->tx_ring[RTWN_VO_QUEUE].paddr);
 	rtwn_write_4(sc, R92C_BCNQ_DESA, sc->tx_ring[RTWN_BEACON_QUEUE].paddr);
 	rtwn_write_4(sc, R92C_MGQ_DESA, sc->tx_ring[RTWN_MGNT_QUEUE].paddr);
 	rtwn_write_4(sc, R92C_HQ_DESA, sc->tx_ring[RTWN_HIGH_QUEUE].paddr);
 
 	/* Configure Rx DMA. */
 	rtwn_write_4(sc, R92C_RX_DESA, sc->rx_ring.paddr);
 
 	/* Set Tx/Rx transfer page boundary. */
 	rtwn_write_2(sc, R92C_TRXFF_BNDY + 2, 0x27ff);
 
 	/* Set Tx/Rx transfer page size. */
 	rtwn_write_1(sc, R92C_PBP,
 	    SM(R92C_PBP_PSRX, R92C_PBP_128) |
 	    SM(R92C_PBP_PSTX, R92C_PBP_128));
 	return (0);
 }
 
 static void
 rtwn_mac_init(struct rtwn_softc *sc)
 {
 	int i;
 
 	/* Write MAC initialization values. */
 	for (i = 0; i < nitems(rtl8192ce_mac); i++)
 		rtwn_write_1(sc, rtl8192ce_mac[i].reg, rtl8192ce_mac[i].val);
 }
 
 static void
 rtwn_bb_init(struct rtwn_softc *sc)
 {
 	const struct rtwn_bb_prog *prog;
 	uint32_t reg;
 	int i;
 
 	/* Enable BB and RF. */
 	rtwn_write_2(sc, R92C_SYS_FUNC_EN,
 	    rtwn_read_2(sc, R92C_SYS_FUNC_EN) |
 	    R92C_SYS_FUNC_EN_BBRSTB | R92C_SYS_FUNC_EN_BB_GLB_RST |
 	    R92C_SYS_FUNC_EN_DIO_RF);
 
 	rtwn_write_2(sc, R92C_AFE_PLL_CTRL, 0xdb83);
 
 	rtwn_write_1(sc, R92C_RF_CTRL,
 	    R92C_RF_CTRL_EN | R92C_RF_CTRL_RSTB | R92C_RF_CTRL_SDMRSTB);
 
 	rtwn_write_1(sc, R92C_SYS_FUNC_EN,
 	    R92C_SYS_FUNC_EN_DIO_PCIE | R92C_SYS_FUNC_EN_PCIEA |
 	    R92C_SYS_FUNC_EN_PPLL | R92C_SYS_FUNC_EN_BB_GLB_RST |
 	    R92C_SYS_FUNC_EN_BBRSTB);
 
 	rtwn_write_1(sc, R92C_AFE_XTAL_CTRL + 1, 0x80);
 
 	rtwn_write_4(sc, R92C_LEDCFG0,
 	    rtwn_read_4(sc, R92C_LEDCFG0) | 0x00800000);
 
 	/* Select BB programming. */ 
 	prog = (sc->chip & RTWN_CHIP_92C) ?
 	    &rtl8192ce_bb_prog_2t : &rtl8192ce_bb_prog_1t;
 
 	/* Write BB initialization values. */
 	for (i = 0; i < prog->count; i++) {
 		rtwn_bb_write(sc, prog->regs[i], prog->vals[i]);
 		DELAY(1);
 	}
 
 	if (sc->chip & RTWN_CHIP_92C_1T2R) {
 		/* 8192C 1T only configuration. */
 		reg = rtwn_bb_read(sc, R92C_FPGA0_TXINFO);
 		reg = (reg & ~0x00000003) | 0x2;
 		rtwn_bb_write(sc, R92C_FPGA0_TXINFO, reg);
 
 		reg = rtwn_bb_read(sc, R92C_FPGA1_TXINFO);
 		reg = (reg & ~0x00300033) | 0x00200022;
 		rtwn_bb_write(sc, R92C_FPGA1_TXINFO, reg);
 
 		reg = rtwn_bb_read(sc, R92C_CCK0_AFESETTING);
 		reg = (reg & ~0xff000000) | 0x45 << 24;
 		rtwn_bb_write(sc, R92C_CCK0_AFESETTING, reg);
 
 		reg = rtwn_bb_read(sc, R92C_OFDM0_TRXPATHENA);
 		reg = (reg & ~0x000000ff) | 0x23;
 		rtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, reg);
 
 		reg = rtwn_bb_read(sc, R92C_OFDM0_AGCPARAM1);
 		reg = (reg & ~0x00000030) | 1 << 4;
 		rtwn_bb_write(sc, R92C_OFDM0_AGCPARAM1, reg);
 
 		reg = rtwn_bb_read(sc, 0xe74);
 		reg = (reg & ~0x0c000000) | 2 << 26;
 		rtwn_bb_write(sc, 0xe74, reg);
 		reg = rtwn_bb_read(sc, 0xe78);
 		reg = (reg & ~0x0c000000) | 2 << 26;
 		rtwn_bb_write(sc, 0xe78, reg);
 		reg = rtwn_bb_read(sc, 0xe7c);
 		reg = (reg & ~0x0c000000) | 2 << 26;
 		rtwn_bb_write(sc, 0xe7c, reg);
 		reg = rtwn_bb_read(sc, 0xe80);
 		reg = (reg & ~0x0c000000) | 2 << 26;
 		rtwn_bb_write(sc, 0xe80, reg);
 		reg = rtwn_bb_read(sc, 0xe88);
 		reg = (reg & ~0x0c000000) | 2 << 26;
 		rtwn_bb_write(sc, 0xe88, reg);
 	}
 
 	/* Write AGC values. */
 	for (i = 0; i < prog->agccount; i++) {
 		rtwn_bb_write(sc, R92C_OFDM0_AGCRSSITABLE,
 		    prog->agcvals[i]);
 		DELAY(1);
 	}
 
 	if (rtwn_bb_read(sc, R92C_HSSI_PARAM2(0)) &
 	    R92C_HSSI_PARAM2_CCK_HIPWR)
 		sc->sc_flags |= RTWN_FLAG_CCK_HIPWR;
 }
 
 static void
 rtwn_rf_init(struct rtwn_softc *sc)
 {
 	const struct rtwn_rf_prog *prog;
 	uint32_t reg, type;
 	int i, j, idx, off;
 
 	/* Select RF programming based on board type. */
 	if (!(sc->chip & RTWN_CHIP_92C)) {
 		if (sc->board_type == R92C_BOARD_TYPE_MINICARD)
 			prog = rtl8188ce_rf_prog;
 		else if (sc->board_type == R92C_BOARD_TYPE_HIGHPA)
 			prog = rtl8188ru_rf_prog;
 		else
 			prog = rtl8188cu_rf_prog;
 	} else
 		prog = rtl8192ce_rf_prog;
 
 	for (i = 0; i < sc->nrxchains; i++) {
 		/* Save RF_ENV control type. */
 		idx = i / 2;
 		off = (i % 2) * 16;
 		reg = rtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(idx));
 		type = (reg >> off) & 0x10;
 
 		/* Set RF_ENV enable. */
 		reg = rtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(i));
 		reg |= 0x100000;
 		rtwn_bb_write(sc, R92C_FPGA0_RFIFACEOE(i), reg);
 		DELAY(1);
 		/* Set RF_ENV output high. */
 		reg = rtwn_bb_read(sc, R92C_FPGA0_RFIFACEOE(i));
 		reg |= 0x10;
 		rtwn_bb_write(sc, R92C_FPGA0_RFIFACEOE(i), reg);
 		DELAY(1);
 		/* Set address and data lengths of RF registers. */
 		reg = rtwn_bb_read(sc, R92C_HSSI_PARAM2(i));
 		reg &= ~R92C_HSSI_PARAM2_ADDR_LENGTH;
 		rtwn_bb_write(sc, R92C_HSSI_PARAM2(i), reg);
 		DELAY(1);
 		reg = rtwn_bb_read(sc, R92C_HSSI_PARAM2(i));
 		reg &= ~R92C_HSSI_PARAM2_DATA_LENGTH;
 		rtwn_bb_write(sc, R92C_HSSI_PARAM2(i), reg);
 		DELAY(1);
 
 		/* Write RF initialization values for this chain. */
 		for (j = 0; j < prog[i].count; j++) {
 			if (prog[i].regs[j] >= 0xf9 &&
 			    prog[i].regs[j] <= 0xfe) {
 				/*
 				 * These are fake RF registers offsets that
 				 * indicate a delay is required.
 				 */
 				DELAY(50);
 				continue;
 			}
 			rtwn_rf_write(sc, i, prog[i].regs[j],
 			    prog[i].vals[j]);
 			DELAY(1);
 		}
 
 		/* Restore RF_ENV control type. */
 		reg = rtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(idx));
 		reg &= ~(0x10 << off) | (type << off);
 		rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(idx), reg);
 
 		/* Cache RF register CHNLBW. */
 		sc->rf_chnlbw[i] = rtwn_rf_read(sc, i, R92C_RF_CHNLBW);
 	}
 
 	if ((sc->chip & (RTWN_CHIP_UMC_A_CUT | RTWN_CHIP_92C)) ==
 	    RTWN_CHIP_UMC_A_CUT) {
 		rtwn_rf_write(sc, 0, R92C_RF_RX_G1, 0x30255);
 		rtwn_rf_write(sc, 0, R92C_RF_RX_G2, 0x50a00);
 	}
 }
 
 static void
 rtwn_cam_init(struct rtwn_softc *sc)
 {
 	/* Invalidate all CAM entries. */
 	rtwn_write_4(sc, R92C_CAMCMD,
 	    R92C_CAMCMD_POLLING | R92C_CAMCMD_CLR);
 }
 
 static void
 rtwn_pa_bias_init(struct rtwn_softc *sc)
 {
 	uint8_t reg;
 	int i;
 
 	for (i = 0; i < sc->nrxchains; i++) {
 		if (sc->pa_setting & (1 << i))
 			continue;
 		rtwn_rf_write(sc, i, R92C_RF_IPA, 0x0f406);
 		rtwn_rf_write(sc, i, R92C_RF_IPA, 0x4f406);
 		rtwn_rf_write(sc, i, R92C_RF_IPA, 0x8f406);
 		rtwn_rf_write(sc, i, R92C_RF_IPA, 0xcf406);
 	}
 	if (!(sc->pa_setting & 0x10)) {
 		reg = rtwn_read_1(sc, 0x16);
 		reg = (reg & ~0xf0) | 0x90;
 		rtwn_write_1(sc, 0x16, reg);
 	}
 }
 
 static void
 rtwn_rxfilter_init(struct rtwn_softc *sc)
 {
 	/* Initialize Rx filter. */
 	/* TODO: use better filter for monitor mode. */
 	rtwn_write_4(sc, R92C_RCR,
 	    R92C_RCR_AAP | R92C_RCR_APM | R92C_RCR_AM | R92C_RCR_AB |
 	    R92C_RCR_APP_ICV | R92C_RCR_AMF | R92C_RCR_HTC_LOC_CTRL |
 	    R92C_RCR_APP_MIC | R92C_RCR_APP_PHYSTS);
 	/* Accept all multicast frames. */
 	rtwn_write_4(sc, R92C_MAR + 0, 0xffffffff);
 	rtwn_write_4(sc, R92C_MAR + 4, 0xffffffff);
 	/* Accept all management frames. */
 	rtwn_write_2(sc, R92C_RXFLTMAP0, 0xffff);
 	/* Reject all control frames. */
 	rtwn_write_2(sc, R92C_RXFLTMAP1, 0x0000);
 	/* Accept all data frames. */
 	rtwn_write_2(sc, R92C_RXFLTMAP2, 0xffff);
 }
 
 static void
 rtwn_edca_init(struct rtwn_softc *sc)
 {
 
 	rtwn_write_2(sc, R92C_SPEC_SIFS, 0x1010);
 	rtwn_write_2(sc, R92C_MAC_SPEC_SIFS, 0x1010);
 	rtwn_write_2(sc, R92C_SIFS_CCK, 0x1010);
 	rtwn_write_2(sc, R92C_SIFS_OFDM, 0x0e0e);
 	rtwn_write_4(sc, R92C_EDCA_BE_PARAM, 0x005ea42b);
 	rtwn_write_4(sc, R92C_EDCA_BK_PARAM, 0x0000a44f);
 	rtwn_write_4(sc, R92C_EDCA_VI_PARAM, 0x005e4322);
 	rtwn_write_4(sc, R92C_EDCA_VO_PARAM, 0x002f3222);
 }
 
 static void
 rtwn_write_txpower(struct rtwn_softc *sc, int chain,
     uint16_t power[RTWN_RIDX_COUNT])
 {
 	uint32_t reg;
 
 	/* Write per-CCK rate Tx power. */
 	if (chain == 0) {
 		reg = rtwn_bb_read(sc, R92C_TXAGC_A_CCK1_MCS32);
 		reg = RW(reg, R92C_TXAGC_A_CCK1,  power[0]);
 		rtwn_bb_write(sc, R92C_TXAGC_A_CCK1_MCS32, reg);
 		reg = rtwn_bb_read(sc, R92C_TXAGC_B_CCK11_A_CCK2_11);
 		reg = RW(reg, R92C_TXAGC_A_CCK2,  power[1]);
 		reg = RW(reg, R92C_TXAGC_A_CCK55, power[2]);
 		reg = RW(reg, R92C_TXAGC_A_CCK11, power[3]);
 		rtwn_bb_write(sc, R92C_TXAGC_B_CCK11_A_CCK2_11, reg);
 	} else {
 		reg = rtwn_bb_read(sc, R92C_TXAGC_B_CCK1_55_MCS32);
 		reg = RW(reg, R92C_TXAGC_B_CCK1,  power[0]);
 		reg = RW(reg, R92C_TXAGC_B_CCK2,  power[1]);
 		reg = RW(reg, R92C_TXAGC_B_CCK55, power[2]);
 		rtwn_bb_write(sc, R92C_TXAGC_B_CCK1_55_MCS32, reg);
 		reg = rtwn_bb_read(sc, R92C_TXAGC_B_CCK11_A_CCK2_11);
 		reg = RW(reg, R92C_TXAGC_B_CCK11, power[3]);
 		rtwn_bb_write(sc, R92C_TXAGC_B_CCK11_A_CCK2_11, reg);
 	}
 	/* Write per-OFDM rate Tx power. */
 	rtwn_bb_write(sc, R92C_TXAGC_RATE18_06(chain),
 	    SM(R92C_TXAGC_RATE06, power[ 4]) |
 	    SM(R92C_TXAGC_RATE09, power[ 5]) |
 	    SM(R92C_TXAGC_RATE12, power[ 6]) |
 	    SM(R92C_TXAGC_RATE18, power[ 7]));
 	rtwn_bb_write(sc, R92C_TXAGC_RATE54_24(chain),
 	    SM(R92C_TXAGC_RATE24, power[ 8]) |
 	    SM(R92C_TXAGC_RATE36, power[ 9]) |
 	    SM(R92C_TXAGC_RATE48, power[10]) |
 	    SM(R92C_TXAGC_RATE54, power[11]));
 	/* Write per-MCS Tx power. */
 	rtwn_bb_write(sc, R92C_TXAGC_MCS03_MCS00(chain),
 	    SM(R92C_TXAGC_MCS00,  power[12]) |
 	    SM(R92C_TXAGC_MCS01,  power[13]) |
 	    SM(R92C_TXAGC_MCS02,  power[14]) |
 	    SM(R92C_TXAGC_MCS03,  power[15]));
 	rtwn_bb_write(sc, R92C_TXAGC_MCS07_MCS04(chain),
 	    SM(R92C_TXAGC_MCS04,  power[16]) |
 	    SM(R92C_TXAGC_MCS05,  power[17]) |
 	    SM(R92C_TXAGC_MCS06,  power[18]) |
 	    SM(R92C_TXAGC_MCS07,  power[19]));
 	rtwn_bb_write(sc, R92C_TXAGC_MCS11_MCS08(chain),
 	    SM(R92C_TXAGC_MCS08,  power[20]) |
 	    SM(R92C_TXAGC_MCS09,  power[21]) |
 	    SM(R92C_TXAGC_MCS10,  power[22]) |
 	    SM(R92C_TXAGC_MCS11,  power[23]));
 	rtwn_bb_write(sc, R92C_TXAGC_MCS15_MCS12(chain),
 	    SM(R92C_TXAGC_MCS12,  power[24]) |
 	    SM(R92C_TXAGC_MCS13,  power[25]) |
 	    SM(R92C_TXAGC_MCS14,  power[26]) |
 	    SM(R92C_TXAGC_MCS15,  power[27]));
 }
 
 static void
 rtwn_get_txpower(struct rtwn_softc *sc, int chain,
     struct ieee80211_channel *c, struct ieee80211_channel *extc,
     uint16_t power[RTWN_RIDX_COUNT])
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct r92c_rom *rom = &sc->rom;
 	uint16_t cckpow, ofdmpow, htpow, diff, max;
 	const struct rtwn_txpwr *base;
 	int ridx, chan, group;
 
 	/* Determine channel group. */
 	chan = ieee80211_chan2ieee(ic, c);	/* XXX center freq! */
 	if (chan <= 3)
 		group = 0;
 	else if (chan <= 9)
 		group = 1;
 	else
 		group = 2;
 
 	/* Get original Tx power based on board type and RF chain. */
 	if (!(sc->chip & RTWN_CHIP_92C)) {
 		if (sc->board_type == R92C_BOARD_TYPE_HIGHPA)
 			base = &rtl8188ru_txagc[chain];
 		else
 			base = &rtl8192cu_txagc[chain];
 	} else
 		base = &rtl8192cu_txagc[chain];
 
 	memset(power, 0, RTWN_RIDX_COUNT * sizeof(power[0]));
 	if (sc->regulatory == 0) {
 		for (ridx = 0; ridx <= 3; ridx++)
 			power[ridx] = base->pwr[0][ridx];
 	}
 	for (ridx = 4; ridx < RTWN_RIDX_COUNT; ridx++) {
 		if (sc->regulatory == 3) {
 			power[ridx] = base->pwr[0][ridx];
 			/* Apply vendor limits. */
 			if (extc != NULL)
 				max = rom->ht40_max_pwr[group];
 			else
 				max = rom->ht20_max_pwr[group];
 			max = (max >> (chain * 4)) & 0xf;
 			if (power[ridx] > max)
 				power[ridx] = max;
 		} else if (sc->regulatory == 1) {
 			if (extc == NULL)
 				power[ridx] = base->pwr[group][ridx];
 		} else if (sc->regulatory != 2)
 			power[ridx] = base->pwr[0][ridx];
 	}
 
 	/* Compute per-CCK rate Tx power. */
 	cckpow = rom->cck_tx_pwr[chain][group];
 	for (ridx = 0; ridx <= 3; ridx++) {
 		power[ridx] += cckpow;
 		if (power[ridx] > R92C_MAX_TX_PWR)
 			power[ridx] = R92C_MAX_TX_PWR;
 	}
 
 	htpow = rom->ht40_1s_tx_pwr[chain][group];
 	if (sc->ntxchains > 1) {
 		/* Apply reduction for 2 spatial streams. */
 		diff = rom->ht40_2s_tx_pwr_diff[group];
 		diff = (diff >> (chain * 4)) & 0xf;
 		htpow = (htpow > diff) ? htpow - diff : 0;
 	}
 
 	/* Compute per-OFDM rate Tx power. */
 	diff = rom->ofdm_tx_pwr_diff[group];
 	diff = (diff >> (chain * 4)) & 0xf;
 	ofdmpow = htpow + diff;	/* HT->OFDM correction. */
 	for (ridx = 4; ridx <= 11; ridx++) {
 		power[ridx] += ofdmpow;
 		if (power[ridx] > R92C_MAX_TX_PWR)
 			power[ridx] = R92C_MAX_TX_PWR;
 	}
 
 	/* Compute per-MCS Tx power. */
 	if (extc == NULL) {
 		diff = rom->ht20_tx_pwr_diff[group];
 		diff = (diff >> (chain * 4)) & 0xf;
 		htpow += diff;	/* HT40->HT20 correction. */
 	}
 	for (ridx = 12; ridx <= 27; ridx++) {
 		power[ridx] += htpow;
 		if (power[ridx] > R92C_MAX_TX_PWR)
 			power[ridx] = R92C_MAX_TX_PWR;
 	}
 #ifdef RTWN_DEBUG
 	if (sc->sc_debug >= 4) {
 		/* Dump per-rate Tx power values. */
 		printf("Tx power for chain %d:\n", chain);
 		for (ridx = 0; ridx < RTWN_RIDX_COUNT; ridx++)
 			printf("Rate %d = %u\n", ridx, power[ridx]);
 	}
 #endif
 }
 
 static void
 rtwn_set_txpower(struct rtwn_softc *sc, struct ieee80211_channel *c,
     struct ieee80211_channel *extc)
 {
 	uint16_t power[RTWN_RIDX_COUNT];
 	int i;
 
 	for (i = 0; i < sc->ntxchains; i++) {
 		/* Compute per-rate Tx power values. */
 		rtwn_get_txpower(sc, i, c, extc, power);
 		/* Write per-rate Tx power values to hardware. */
 		rtwn_write_txpower(sc, i, power);
 	}
 }
 
 static void
 rtwn_scan_start(struct ieee80211com *ic)
 {
 
 	/* XXX do nothing?  */
 }
 
 static void
 rtwn_scan_end(struct ieee80211com *ic)
 {
 
 	/* XXX do nothing?  */
 }
 
 static void
 rtwn_set_channel(struct ieee80211com *ic)
 {
 	struct rtwn_softc *sc = ic->ic_softc;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 
 	RTWN_LOCK(sc);
 	if (vap->iv_state == IEEE80211_S_SCAN) {
 		/* Make link LED blink during scan. */
 		rtwn_set_led(sc, RTWN_LED_LINK, !sc->ledlink);
 	}
 	rtwn_set_chan(sc, ic->ic_curchan, NULL);
 	RTWN_UNLOCK(sc);
 }
 
 static void
 rtwn_update_mcast(struct ieee80211com *ic)
 {
 
 	/* XXX do nothing?  */
 }
 
 static void
 rtwn_set_chan(struct rtwn_softc *sc, struct ieee80211_channel *c,
     struct ieee80211_channel *extc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	u_int chan;
 	int i;
 
 	chan = ieee80211_chan2ieee(ic, c);	/* XXX center freq! */
 	if (chan == 0 || chan == IEEE80211_CHAN_ANY) {
 		device_printf(sc->sc_dev,
 		    "%s: invalid channel %x\n", __func__, chan);
 		return;
 	}
 
 	/* Set Tx power for this new channel. */
 	rtwn_set_txpower(sc, c, extc);
 
 	for (i = 0; i < sc->nrxchains; i++) {
 		rtwn_rf_write(sc, i, R92C_RF_CHNLBW,
 		    RW(sc->rf_chnlbw[i], R92C_RF_CHNLBW_CHNL, chan));
 	}
 #ifndef IEEE80211_NO_HT
 	if (extc != NULL) {
 		uint32_t reg;
 
 		/* Is secondary channel below or above primary? */
 		int prichlo = c->ic_freq < extc->ic_freq;
 
 		rtwn_write_1(sc, R92C_BWOPMODE,
 		    rtwn_read_1(sc, R92C_BWOPMODE) & ~R92C_BWOPMODE_20MHZ);
 
 		reg = rtwn_read_1(sc, R92C_RRSR + 2);
 		reg = (reg & ~0x6f) | (prichlo ? 1 : 2) << 5;
 		rtwn_write_1(sc, R92C_RRSR + 2, reg);
 
 		rtwn_bb_write(sc, R92C_FPGA0_RFMOD,
 		    rtwn_bb_read(sc, R92C_FPGA0_RFMOD) | R92C_RFMOD_40MHZ);
 		rtwn_bb_write(sc, R92C_FPGA1_RFMOD,
 		    rtwn_bb_read(sc, R92C_FPGA1_RFMOD) | R92C_RFMOD_40MHZ);
 
 		/* Set CCK side band. */
 		reg = rtwn_bb_read(sc, R92C_CCK0_SYSTEM);
 		reg = (reg & ~0x00000010) | (prichlo ? 0 : 1) << 4;
 		rtwn_bb_write(sc, R92C_CCK0_SYSTEM, reg);
 
 		reg = rtwn_bb_read(sc, R92C_OFDM1_LSTF);
 		reg = (reg & ~0x00000c00) | (prichlo ? 1 : 2) << 10;
 		rtwn_bb_write(sc, R92C_OFDM1_LSTF, reg);
 
 		rtwn_bb_write(sc, R92C_FPGA0_ANAPARAM2,
 		    rtwn_bb_read(sc, R92C_FPGA0_ANAPARAM2) &
 		    ~R92C_FPGA0_ANAPARAM2_CBW20);
 
 		reg = rtwn_bb_read(sc, 0x818);
 		reg = (reg & ~0x0c000000) | (prichlo ? 2 : 1) << 26;
 		rtwn_bb_write(sc, 0x818, reg);
 
 		/* Select 40MHz bandwidth. */
 		rtwn_rf_write(sc, 0, R92C_RF_CHNLBW,
 		    (sc->rf_chnlbw[0] & ~0xfff) | chan);
 	} else
 #endif
 	{
 		rtwn_write_1(sc, R92C_BWOPMODE,
 		    rtwn_read_1(sc, R92C_BWOPMODE) | R92C_BWOPMODE_20MHZ);
 
 		rtwn_bb_write(sc, R92C_FPGA0_RFMOD,
 		    rtwn_bb_read(sc, R92C_FPGA0_RFMOD) & ~R92C_RFMOD_40MHZ);
 		rtwn_bb_write(sc, R92C_FPGA1_RFMOD,
 		    rtwn_bb_read(sc, R92C_FPGA1_RFMOD) & ~R92C_RFMOD_40MHZ);
 
 		rtwn_bb_write(sc, R92C_FPGA0_ANAPARAM2,
 		    rtwn_bb_read(sc, R92C_FPGA0_ANAPARAM2) |
 		    R92C_FPGA0_ANAPARAM2_CBW20);
 
 		/* Select 20MHz bandwidth. */
 		rtwn_rf_write(sc, 0, R92C_RF_CHNLBW,
 		    (sc->rf_chnlbw[0] & ~0xfff) | R92C_RF_CHNLBW_BW20 | chan);
 	}
 }
 
 static int
 rtwn_iq_calib_chain(struct rtwn_softc *sc, int chain, uint16_t tx[2],
     uint16_t rx[2])
 {
 	uint32_t status;
 	int offset = chain * 0x20;
 
 	if (chain == 0) {	/* IQ calibration for chain 0. */
 		/* IQ calibration settings for chain 0. */
 		rtwn_bb_write(sc, 0xe30, 0x10008c1f);
 		rtwn_bb_write(sc, 0xe34, 0x10008c1f);
 		rtwn_bb_write(sc, 0xe38, 0x82140102);
 
 		if (sc->ntxchains > 1) {
 			rtwn_bb_write(sc, 0xe3c, 0x28160202);	/* 2T */
 			/* IQ calibration settings for chain 1. */
 			rtwn_bb_write(sc, 0xe50, 0x10008c22);
 			rtwn_bb_write(sc, 0xe54, 0x10008c22);
 			rtwn_bb_write(sc, 0xe58, 0x82140102);
 			rtwn_bb_write(sc, 0xe5c, 0x28160202);
 		} else
 			rtwn_bb_write(sc, 0xe3c, 0x28160502);	/* 1T */
 
 		/* LO calibration settings. */
 		rtwn_bb_write(sc, 0xe4c, 0x001028d1);
 		/* We're doing LO and IQ calibration in one shot. */
 		rtwn_bb_write(sc, 0xe48, 0xf9000000);
 		rtwn_bb_write(sc, 0xe48, 0xf8000000);
 
 	} else {		/* IQ calibration for chain 1. */
 		/* We're doing LO and IQ calibration in one shot. */
 		rtwn_bb_write(sc, 0xe60, 0x00000002);
 		rtwn_bb_write(sc, 0xe60, 0x00000000);
 	}
 
 	/* Give LO and IQ calibrations the time to complete. */
 	DELAY(1000);
 
 	/* Read IQ calibration status. */
 	status = rtwn_bb_read(sc, 0xeac);
 
 	if (status & (1 << (28 + chain * 3)))
 		return (0);	/* Tx failed. */
 	/* Read Tx IQ calibration results. */
 	tx[0] = (rtwn_bb_read(sc, 0xe94 + offset) >> 16) & 0x3ff;
 	tx[1] = (rtwn_bb_read(sc, 0xe9c + offset) >> 16) & 0x3ff;
 	if (tx[0] == 0x142 || tx[1] == 0x042)
 		return (0);	/* Tx failed. */
 
 	if (status & (1 << (27 + chain * 3)))
 		return (1);	/* Rx failed. */
 	/* Read Rx IQ calibration results. */
 	rx[0] = (rtwn_bb_read(sc, 0xea4 + offset) >> 16) & 0x3ff;
 	rx[1] = (rtwn_bb_read(sc, 0xeac + offset) >> 16) & 0x3ff;
 	if (rx[0] == 0x132 || rx[1] == 0x036)
 		return (1);	/* Rx failed. */
 
 	return (3);	/* Both Tx and Rx succeeded. */
 }
 
 static void
 rtwn_iq_calib_run(struct rtwn_softc *sc, int n, uint16_t tx[2][2],
     uint16_t rx[2][2])
 {
 	/* Registers to save and restore during IQ calibration. */
 	struct iq_cal_regs {
 		uint32_t	adda[16];
 		uint8_t		txpause;
 		uint8_t		bcn_ctrl;
 		uint8_t		ustime_tsf;
 		uint32_t	gpio_muxcfg;
 		uint32_t	ofdm0_trxpathena;
 		uint32_t	ofdm0_trmuxpar;
 		uint32_t	fpga0_rfifacesw1;
 	} iq_cal_regs;
 	static const uint16_t reg_adda[16] = {
 		0x85c, 0xe6c, 0xe70, 0xe74,
 		0xe78, 0xe7c, 0xe80, 0xe84,
 		0xe88, 0xe8c, 0xed0, 0xed4,
 		0xed8, 0xedc, 0xee0, 0xeec
 	};
 	int i, chain;
 	uint32_t hssi_param1;
 
 	if (n == 0) {
 		for (i = 0; i < nitems(reg_adda); i++)
 			iq_cal_regs.adda[i] = rtwn_bb_read(sc, reg_adda[i]);
 
 		iq_cal_regs.txpause = rtwn_read_1(sc, R92C_TXPAUSE);
 		iq_cal_regs.bcn_ctrl = rtwn_read_1(sc, R92C_BCN_CTRL);
 		iq_cal_regs.ustime_tsf = rtwn_read_1(sc, R92C_USTIME_TSF);
 		iq_cal_regs.gpio_muxcfg = rtwn_read_4(sc, R92C_GPIO_MUXCFG);
 	}
 
 	if (sc->ntxchains == 1) {
 		rtwn_bb_write(sc, reg_adda[0], 0x0b1b25a0);
 		for (i = 1; i < nitems(reg_adda); i++)
 			rtwn_bb_write(sc, reg_adda[i], 0x0bdb25a0);
 	} else {
 		for (i = 0; i < nitems(reg_adda); i++)
 			rtwn_bb_write(sc, reg_adda[i], 0x04db25a4);
 	}
 
 	hssi_param1 = rtwn_bb_read(sc, R92C_HSSI_PARAM1(0));
 	if (!(hssi_param1 & R92C_HSSI_PARAM1_PI)) {
 		rtwn_bb_write(sc, R92C_HSSI_PARAM1(0),
 		    hssi_param1 | R92C_HSSI_PARAM1_PI);
 		rtwn_bb_write(sc, R92C_HSSI_PARAM1(1),
 		    hssi_param1 | R92C_HSSI_PARAM1_PI);
 	}
 
 	if (n == 0) {
 		iq_cal_regs.ofdm0_trxpathena =
 		    rtwn_bb_read(sc, R92C_OFDM0_TRXPATHENA);
 		iq_cal_regs.ofdm0_trmuxpar =
 		    rtwn_bb_read(sc, R92C_OFDM0_TRMUXPAR);
 		iq_cal_regs.fpga0_rfifacesw1 =
 		    rtwn_bb_read(sc, R92C_FPGA0_RFIFACESW(1));
 	}
 
 	rtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA, 0x03a05600);
 	rtwn_bb_write(sc, R92C_OFDM0_TRMUXPAR, 0x000800e4);
 	rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(1), 0x22204000);
 	if (sc->ntxchains > 1) {
 		rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00010000);
 		rtwn_bb_write(sc, R92C_LSSI_PARAM(1), 0x00010000);
 	}
 
 	rtwn_write_1(sc, R92C_TXPAUSE, 0x3f);
 	rtwn_write_1(sc, R92C_BCN_CTRL, iq_cal_regs.bcn_ctrl & ~(0x08));
 	rtwn_write_1(sc, R92C_USTIME_TSF, iq_cal_regs.ustime_tsf & ~(0x08));
 	rtwn_write_1(sc, R92C_GPIO_MUXCFG,
 	    iq_cal_regs.gpio_muxcfg & ~(0x20));
 
 	rtwn_bb_write(sc, 0x0b68, 0x00080000);
 	if (sc->ntxchains > 1)
 		rtwn_bb_write(sc, 0x0b6c, 0x00080000);
 
 	rtwn_bb_write(sc, 0x0e28, 0x80800000);
 	rtwn_bb_write(sc, 0x0e40, 0x01007c00);
 	rtwn_bb_write(sc, 0x0e44, 0x01004800);
 
 	rtwn_bb_write(sc, 0x0b68, 0x00080000);
 
 	for (chain = 0; chain < sc->ntxchains; chain++) {
 		if (chain > 0) {
 			/* Put chain 0 on standby. */
 			rtwn_bb_write(sc, 0x0e28, 0x00);
 			rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00010000);
 			rtwn_bb_write(sc, 0x0e28, 0x80800000);
 
 			/* Enable chain 1. */
 			for (i = 0; i < nitems(reg_adda); i++)
 				rtwn_bb_write(sc, reg_adda[i], 0x0b1b25a4);
 		}
 
 		/* Run IQ calibration twice. */
 		for (i = 0; i < 2; i++) {
 			int ret;
 
 			ret = rtwn_iq_calib_chain(sc, chain,
 			    tx[chain], rx[chain]);
 			if (ret == 0) {
 				DPRINTF(("%s: chain %d: Tx failed.\n",
 				    __func__, chain));
 				tx[chain][0] = 0xff;
 				tx[chain][1] = 0xff;
 				rx[chain][0] = 0xff;
 				rx[chain][1] = 0xff;
 			} else if (ret == 1) {
 				DPRINTF(("%s: chain %d: Rx failed.\n",
 				    __func__, chain));
 				rx[chain][0] = 0xff;
 				rx[chain][1] = 0xff;
 			} else if (ret == 3) {
 				DPRINTF(("%s: chain %d: Both Tx and Rx "
 				    "succeeded.\n", __func__, chain));
 			}
 		}
 
 		DPRINTF(("%s: results for run %d chain %d: tx[0]=0x%x, "
 		    "tx[1]=0x%x rx[0]=0x%x rx[1]=0x%x\n", __func__, n, chain,
 		    tx[chain][0], tx[chain][1], rx[chain][0], rx[chain][1]));
 	}
 
 	rtwn_bb_write(sc, R92C_OFDM0_TRXPATHENA,
 	    iq_cal_regs.ofdm0_trxpathena); 
 	rtwn_bb_write(sc, R92C_FPGA0_RFIFACESW(1),
 	    iq_cal_regs.fpga0_rfifacesw1);
 	rtwn_bb_write(sc, R92C_OFDM0_TRMUXPAR, iq_cal_regs.ofdm0_trmuxpar);
 
 	rtwn_bb_write(sc, 0x0e28, 0x00);
 	rtwn_bb_write(sc, R92C_LSSI_PARAM(0), 0x00032ed3);
 	if (sc->ntxchains > 1)
 		rtwn_bb_write(sc, R92C_LSSI_PARAM(1), 0x00032ed3);
 
 	if (n != 0) {
 		if (!(hssi_param1 & R92C_HSSI_PARAM1_PI)) {
 			rtwn_bb_write(sc, R92C_HSSI_PARAM1(0), hssi_param1);
 			rtwn_bb_write(sc, R92C_HSSI_PARAM1(1), hssi_param1);
 		}
 
 		for (i = 0; i < nitems(reg_adda); i++)
 			rtwn_bb_write(sc, reg_adda[i], iq_cal_regs.adda[i]);
 
 		rtwn_write_1(sc, R92C_TXPAUSE, iq_cal_regs.txpause);
 		rtwn_write_1(sc, R92C_BCN_CTRL, iq_cal_regs.bcn_ctrl);
 		rtwn_write_1(sc, R92C_USTIME_TSF, iq_cal_regs.ustime_tsf);
 		rtwn_write_4(sc, R92C_GPIO_MUXCFG, iq_cal_regs.gpio_muxcfg);
 	}
 }
 
 #define RTWN_IQ_CAL_MAX_TOLERANCE 5
 static int
 rtwn_iq_calib_compare_results(uint16_t tx1[2][2], uint16_t rx1[2][2],
     uint16_t tx2[2][2], uint16_t rx2[2][2], int ntxchains)
 {
 	int chain, i, tx_ok[2], rx_ok[2];
 
 	tx_ok[0] = tx_ok[1] = rx_ok[0] = rx_ok[1] = 0;
 	for (chain = 0; chain < ntxchains; chain++) {
 		for (i = 0; i < 2; i++)	{
 			if (tx1[chain][i] == 0xff || tx2[chain][i] == 0xff ||
 			    rx1[chain][i] == 0xff || rx2[chain][i] == 0xff)
 				continue;
 
 			tx_ok[chain] = (abs(tx1[chain][i] - tx2[chain][i]) <=
 			    RTWN_IQ_CAL_MAX_TOLERANCE);
 
 			rx_ok[chain] = (abs(rx1[chain][i] - rx2[chain][i]) <=
 			    RTWN_IQ_CAL_MAX_TOLERANCE);
 		}
 	}
 
 	if (ntxchains > 1)
 		return (tx_ok[0] && tx_ok[1] && rx_ok[0] && rx_ok[1]);
 	else
 		return (tx_ok[0] && rx_ok[0]);
 }
 #undef RTWN_IQ_CAL_MAX_TOLERANCE
 
 static void
 rtwn_iq_calib_write_results(struct rtwn_softc *sc, uint16_t tx[2],
     uint16_t rx[2], int chain)
 {
 	uint32_t reg, val, x;
 	long y, tx_c;
 
 	if (tx[0] == 0xff || tx[1] == 0xff)
 		return;
 
 	reg = rtwn_bb_read(sc, R92C_OFDM0_TXIQIMBALANCE(chain)); 
 	val = ((reg >> 22) & 0x3ff);
 	x = tx[0];
 	if (x & 0x0200)
 		x |= 0xfc00;
 	reg = (((x * val) >> 8) & 0x3ff);
 	rtwn_bb_write(sc, R92C_OFDM0_TXIQIMBALANCE(chain), reg);
 
 	reg = rtwn_bb_read(sc, R92C_OFDM0_ECCATHRESHOLD);
 	if (((x * val) >> 7) & 0x01)
 		reg |= 0x80000000;
 	else
 		reg &= ~0x80000000;
 	rtwn_bb_write(sc, R92C_OFDM0_ECCATHRESHOLD, reg);
 
 	y = tx[1];
 	if (y & 0x00000200)
 		y |= 0xfffffc00;
 	tx_c = (y * val) >> 8;
 	reg = rtwn_bb_read(sc, R92C_OFDM0_TXAFE(chain));
 	reg |= ((((tx_c & 0x3c0) >> 6) << 24) & 0xf0000000);
 	rtwn_bb_write(sc, R92C_OFDM0_TXAFE(chain), reg);
 
 	reg = rtwn_bb_read(sc, R92C_OFDM0_TXIQIMBALANCE(chain)); 
 	reg |= (((tx_c & 0x3f) << 16) & 0x003F0000);
 	rtwn_bb_write(sc, R92C_OFDM0_TXIQIMBALANCE(chain), reg);
 
 	reg = rtwn_bb_read(sc, R92C_OFDM0_ECCATHRESHOLD);
 	if (((y * val) >> 7) & 0x01)
 		reg |= 0x20000000;
 	else
 		reg &= ~0x20000000;
 	rtwn_bb_write(sc, R92C_OFDM0_ECCATHRESHOLD, reg);
 
 	if (rx[0] == 0xff || rx[1] == 0xff)
 		return;
 
 	reg = rtwn_bb_read(sc, R92C_OFDM0_RXIQIMBALANCE(chain));
 	reg |= (rx[0] & 0x3ff);
 	rtwn_bb_write(sc, R92C_OFDM0_RXIQIMBALANCE(chain), reg);
 	reg |= (((rx[1] & 0x03f) << 8) & 0xFC00);
 	rtwn_bb_write(sc, R92C_OFDM0_RXIQIMBALANCE(chain), reg);
 
 	if (chain == 0) {
 		reg = rtwn_bb_read(sc, R92C_OFDM0_RXIQEXTANTA);
 		reg |= (((rx[1] & 0xf) >> 6) & 0x000f);
 		rtwn_bb_write(sc, R92C_OFDM0_RXIQEXTANTA, reg);
 	} else {
 		reg = rtwn_bb_read(sc, R92C_OFDM0_AGCRSSITABLE);
 		reg |= ((((rx[1] & 0xf) >> 6) << 12) & 0xf000);
 		rtwn_bb_write(sc, R92C_OFDM0_AGCRSSITABLE, reg);
 	}
 }
 
 #define RTWN_IQ_CAL_NRUN	3
 static void
 rtwn_iq_calib(struct rtwn_softc *sc)
 {
 	uint16_t tx[RTWN_IQ_CAL_NRUN][2][2], rx[RTWN_IQ_CAL_NRUN][2][2];
 	int n, valid;
 
 	valid = 0;
 	for (n = 0; n < RTWN_IQ_CAL_NRUN; n++) {
 		rtwn_iq_calib_run(sc, n, tx[n], rx[n]);
 
 		if (n == 0)
 			continue;
 
 		/* Valid results remain stable after consecutive runs. */
 		valid = rtwn_iq_calib_compare_results(tx[n - 1], rx[n - 1],
 		    tx[n], rx[n], sc->ntxchains);
 		if (valid)
 			break;
 	}
 
 	if (valid) {
 		rtwn_iq_calib_write_results(sc, tx[n][0], rx[n][0], 0);
 		if (sc->ntxchains > 1)
 			rtwn_iq_calib_write_results(sc, tx[n][1], rx[n][1], 1);
 	}
 }
 #undef RTWN_IQ_CAL_NRUN
 
 static void
 rtwn_lc_calib(struct rtwn_softc *sc)
 {
 	uint32_t rf_ac[2];
 	uint8_t txmode;
 	int i;
 
 	txmode = rtwn_read_1(sc, R92C_OFDM1_LSTF + 3);
 	if ((txmode & 0x70) != 0) {
 		/* Disable all continuous Tx. */
 		rtwn_write_1(sc, R92C_OFDM1_LSTF + 3, txmode & ~0x70);
 
 		/* Set RF mode to standby mode. */
 		for (i = 0; i < sc->nrxchains; i++) {
 			rf_ac[i] = rtwn_rf_read(sc, i, R92C_RF_AC);
 			rtwn_rf_write(sc, i, R92C_RF_AC,
 			    RW(rf_ac[i], R92C_RF_AC_MODE,
 				R92C_RF_AC_MODE_STANDBY));
 		}
 	} else {
 		/* Block all Tx queues. */
 		rtwn_write_1(sc, R92C_TXPAUSE, 0xff);
 	}
 	/* Start calibration. */
 	rtwn_rf_write(sc, 0, R92C_RF_CHNLBW,
 	    rtwn_rf_read(sc, 0, R92C_RF_CHNLBW) | R92C_RF_CHNLBW_LCSTART);
 
 	/* Give calibration the time to complete. */
 	DELAY(100);
 
 	/* Restore configuration. */
 	if ((txmode & 0x70) != 0) {
 		/* Restore Tx mode. */
 		rtwn_write_1(sc, R92C_OFDM1_LSTF + 3, txmode);
 		/* Restore RF mode. */
 		for (i = 0; i < sc->nrxchains; i++)
 			rtwn_rf_write(sc, i, R92C_RF_AC, rf_ac[i]);
 	} else {
 		/* Unblock all Tx queues. */
 		rtwn_write_1(sc, R92C_TXPAUSE, 0x00);
 	}
 }
 
 static void
 rtwn_temp_calib(struct rtwn_softc *sc)
 {
 	int temp;
 
 	if (sc->thcal_state == 0) {
 		/* Start measuring temperature. */
 		rtwn_rf_write(sc, 0, R92C_RF_T_METER, 0x60);
 		sc->thcal_state = 1;
 		return;
 	}
 	sc->thcal_state = 0;
 
 	/* Read measured temperature. */
 	temp = rtwn_rf_read(sc, 0, R92C_RF_T_METER) & 0x1f;
 	if (temp == 0)	/* Read failed, skip. */
 		return;
 	DPRINTFN(2, ("temperature=%d\n", temp));
 
 	/*
 	 * Redo IQ and LC calibration if temperature changed significantly
 	 * since last calibration.
 	 */
 	if (sc->thcal_lctemp == 0) {
 		/* First calibration is performed in rtwn_init(). */
 		sc->thcal_lctemp = temp;
 	} else if (abs(temp - sc->thcal_lctemp) > 1) {
 		DPRINTF(("IQ/LC calib triggered by temp: %d -> %d\n",
 		    sc->thcal_lctemp, temp));
 		rtwn_iq_calib(sc);
 		rtwn_lc_calib(sc);
 		/* Record temperature of last calibration. */
 		sc->thcal_lctemp = temp;
 	}
 }
 
 static void
 rtwn_init_locked(struct rtwn_softc *sc)
 {
 	struct ieee80211com *ic = &sc->sc_ic;
 	struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps);
 	uint32_t reg;
 	uint8_t macaddr[IEEE80211_ADDR_LEN];
 	int i, error;
 
 	RTWN_LOCK_ASSERT(sc);
 
 	/* Init firmware commands ring. */
 	sc->fwcur = 0;
 
 	/* Power on adapter. */
 	error = rtwn_power_on(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not power on adapter\n");
 		goto fail;
 	}
 
 	/* Initialize DMA. */
 	error = rtwn_dma_init(sc);
 	if (error != 0) {
 		device_printf(sc->sc_dev, "could not initialize DMA\n");
 		goto fail;
 	}
 
 	/* Set info size in Rx descriptors (in 64-bit words). */
 	rtwn_write_1(sc, R92C_RX_DRVINFO_SZ, 4);
 
 	/* Disable interrupts. */
 	rtwn_write_4(sc, R92C_HISR, 0x00000000);
 	rtwn_write_4(sc, R92C_HIMR, 0x00000000);
 
 	/* Set MAC address. */
 	IEEE80211_ADDR_COPY(macaddr, vap ? vap->iv_myaddr : ic->ic_macaddr);
 	for (i = 0; i < IEEE80211_ADDR_LEN; i++)
 		rtwn_write_1(sc, R92C_MACID + i, macaddr[i]);
 
 	/* Set initial network type. */
 	reg = rtwn_read_4(sc, R92C_CR);
 	reg = RW(reg, R92C_CR_NETTYPE, R92C_CR_NETTYPE_INFRA);
 	rtwn_write_4(sc, R92C_CR, reg);
 
 	rtwn_rxfilter_init(sc);
 
 	reg = rtwn_read_4(sc, R92C_RRSR);
 	reg = RW(reg, R92C_RRSR_RATE_BITMAP, R92C_RRSR_RATE_ALL);
 	rtwn_write_4(sc, R92C_RRSR, reg);
 
 	/* Set short/long retry limits. */
 	rtwn_write_2(sc, R92C_RL,
 	    SM(R92C_RL_SRL, 0x07) | SM(R92C_RL_LRL, 0x07));
 
 	/* Initialize EDCA parameters. */
 	rtwn_edca_init(sc);
 
 	/* Set data and response automatic rate fallback retry counts. */
 	rtwn_write_4(sc, R92C_DARFRC + 0, 0x01000000);
 	rtwn_write_4(sc, R92C_DARFRC + 4, 0x07060504);
 	rtwn_write_4(sc, R92C_RARFRC + 0, 0x01000000);
 	rtwn_write_4(sc, R92C_RARFRC + 4, 0x07060504);
 
 	rtwn_write_2(sc, R92C_FWHW_TXQ_CTRL, 0x1f80);
 
 	/* Set ACK timeout. */
 	rtwn_write_1(sc, R92C_ACKTO, 0x40);
 
 	/* Initialize beacon parameters. */
 	rtwn_write_2(sc, R92C_TBTT_PROHIBIT, 0x6404);
 	rtwn_write_1(sc, R92C_DRVERLYINT, 0x05);
 	rtwn_write_1(sc, R92C_BCNDMATIM, 0x02);
 	rtwn_write_2(sc, R92C_BCNTCFG, 0x660f);
 
 	/* Setup AMPDU aggregation. */
 	rtwn_write_4(sc, R92C_AGGLEN_LMT, 0x99997631);	/* MCS7~0 */
 	rtwn_write_1(sc, R92C_AGGR_BREAK_TIME, 0x16);
 
 	rtwn_write_1(sc, R92C_BCN_MAX_ERR, 0xff);
 	rtwn_write_1(sc, R92C_BCN_CTRL, R92C_BCN_CTRL_DIS_TSF_UDT0);
 
 	rtwn_write_4(sc, R92C_PIFS, 0x1c);
 	rtwn_write_4(sc, R92C_MCUTST_1, 0x0);
 
 	/* Load 8051 microcode. */
 	error = rtwn_load_firmware(sc);
 	if (error != 0)
 		goto fail;
 
 	/* Initialize MAC/BB/RF blocks. */
 	rtwn_mac_init(sc);
 	rtwn_bb_init(sc);
 	rtwn_rf_init(sc);
 
 	/* Turn CCK and OFDM blocks on. */
 	reg = rtwn_bb_read(sc, R92C_FPGA0_RFMOD);
 	reg |= R92C_RFMOD_CCK_EN;
 	rtwn_bb_write(sc, R92C_FPGA0_RFMOD, reg);
 	reg = rtwn_bb_read(sc, R92C_FPGA0_RFMOD);
 	reg |= R92C_RFMOD_OFDM_EN;
 	rtwn_bb_write(sc, R92C_FPGA0_RFMOD, reg);
 
 	/* Clear per-station keys table. */
 	rtwn_cam_init(sc);
 
 	/* Enable hardware sequence numbering. */
 	rtwn_write_1(sc, R92C_HWSEQ_CTRL, 0xff);
 
 	/* Perform LO and IQ calibrations. */
 	rtwn_iq_calib(sc);
 	/* Perform LC calibration. */
 	rtwn_lc_calib(sc);
 
 	rtwn_pa_bias_init(sc);
 
 	/* Initialize GPIO setting. */
 	rtwn_write_1(sc, R92C_GPIO_MUXCFG,
 	    rtwn_read_1(sc, R92C_GPIO_MUXCFG) & ~R92C_GPIO_MUXCFG_ENBT);
 
 	/* Fix for lower temperature. */
 	rtwn_write_1(sc, 0x15, 0xe9);
 
 	/* CLear pending interrupts. */
 	rtwn_write_4(sc, R92C_HISR, 0xffffffff);
 
 	/* Enable interrupts. */
 	rtwn_write_4(sc, R92C_HIMR, RTWN_INT_ENABLE);
 
 	sc->sc_flags |= RTWN_RUNNING;
 
 	callout_reset(&sc->watchdog_to, hz, rtwn_watchdog, sc);
 	return;
 
 fail:
 	rtwn_stop_locked(sc);
 }
 
 static void
 rtwn_init(struct rtwn_softc *sc)
 {
 
 	RTWN_LOCK(sc);
 	rtwn_init_locked(sc);
 	RTWN_UNLOCK(sc);
 
 	if (sc->sc_flags & RTWN_RUNNING)
 		ieee80211_start_all(&sc->sc_ic);
 }
 
 static void
 rtwn_stop_locked(struct rtwn_softc *sc)
 {
 	uint16_t reg;
 	int i;
 
 	RTWN_LOCK_ASSERT(sc);
 
 	sc->sc_tx_timer = 0;
 	callout_stop(&sc->watchdog_to);
 	callout_stop(&sc->calib_to);
 	sc->sc_flags &= ~RTWN_RUNNING;
 
 	/* Disable interrupts. */
 	rtwn_write_4(sc, R92C_HISR, 0x00000000);
 	rtwn_write_4(sc, R92C_HIMR, 0x00000000);
 
 	/* Stop hardware. */
 	rtwn_write_1(sc, R92C_TXPAUSE, 0xff);
 	rtwn_write_1(sc, R92C_RF_CTRL, 0x00);
 	reg = rtwn_read_1(sc, R92C_SYS_FUNC_EN);
 	reg |= R92C_SYS_FUNC_EN_BB_GLB_RST;
 	rtwn_write_1(sc, R92C_SYS_FUNC_EN, reg);
 	reg &= ~R92C_SYS_FUNC_EN_BB_GLB_RST;
 	rtwn_write_1(sc, R92C_SYS_FUNC_EN, reg);
 	reg = rtwn_read_2(sc, R92C_CR);
 	reg &= ~(R92C_CR_HCI_TXDMA_EN | R92C_CR_HCI_RXDMA_EN |
 	    R92C_CR_TXDMA_EN | R92C_CR_RXDMA_EN | R92C_CR_PROTOCOL_EN |
 	    R92C_CR_SCHEDULE_EN | R92C_CR_MACTXEN | R92C_CR_MACRXEN |
 	    R92C_CR_ENSEC);
 	rtwn_write_2(sc, R92C_CR, reg);
 	if (rtwn_read_1(sc, R92C_MCUFWDL) & R92C_MCUFWDL_RAM_DL_SEL)
 		rtwn_fw_reset(sc);
 	/* TODO: linux does additional btcoex stuff here */
 	rtwn_write_2(sc, R92C_AFE_PLL_CTRL, 0x80); /* linux magic number */
 	rtwn_write_1(sc, R92C_SPS0_CTRL, 0x23); /* ditto */
 	rtwn_write_1(sc, R92C_AFE_XTAL_CTRL, 0x0e); /* different with btcoex */
 	rtwn_write_1(sc, R92C_RSV_CTRL, 0x0e);
 	rtwn_write_1(sc, R92C_APS_FSMCO, R92C_APS_FSMCO_PDN_EN);
 
 	for (i = 0; i < RTWN_NTXQUEUES; i++)
 		rtwn_reset_tx_list(sc, i);
 	rtwn_reset_rx_list(sc);
 }
 
 static void
 rtwn_stop(struct rtwn_softc *sc)
 {
 	RTWN_LOCK(sc);
 	rtwn_stop_locked(sc);
 	RTWN_UNLOCK(sc);
 }
 
 static void
 rtwn_intr(void *arg)
 {
 	struct rtwn_softc *sc = arg;
 	uint32_t status;
 	int i;
 
 	RTWN_LOCK(sc);
 	status = rtwn_read_4(sc, R92C_HISR);
 	if (status == 0 || status == 0xffffffff) {
 		RTWN_UNLOCK(sc);
 		return;
 	}
 
 	/* Disable interrupts. */
 	rtwn_write_4(sc, R92C_HIMR, 0x00000000);
 
 	/* Ack interrupts. */
 	rtwn_write_4(sc, R92C_HISR, status);
 
 	/* Vendor driver treats RX errors like ROK... */
 	if (status & (R92C_IMR_ROK | R92C_IMR_RXFOVW | R92C_IMR_RDU)) {
 		bus_dmamap_sync(sc->rx_ring.desc_dmat, sc->rx_ring.desc_map,
 		    BUS_DMASYNC_POSTREAD);
 
 		for (i = 0; i < RTWN_RX_LIST_COUNT; i++) {
 			struct r92c_rx_desc *rx_desc = &sc->rx_ring.desc[i];
 			struct rtwn_rx_data *rx_data = &sc->rx_ring.rx_data[i];
 
 			if (le32toh(rx_desc->rxdw0) & R92C_RXDW0_OWN)
 				continue;
 
 			rtwn_rx_frame(sc, rx_desc, rx_data, i);
 		}
 	}
 
 	if (status & R92C_IMR_BDOK)
 		rtwn_tx_done(sc, RTWN_BEACON_QUEUE);
 	if (status & R92C_IMR_HIGHDOK)
 		rtwn_tx_done(sc, RTWN_HIGH_QUEUE);
 	if (status & R92C_IMR_MGNTDOK)
 		rtwn_tx_done(sc, RTWN_MGNT_QUEUE);
 	if (status & R92C_IMR_BKDOK)
 		rtwn_tx_done(sc, RTWN_BK_QUEUE);
 	if (status & R92C_IMR_BEDOK)
 		rtwn_tx_done(sc, RTWN_BE_QUEUE);
 	if (status & R92C_IMR_VIDOK)
 		rtwn_tx_done(sc, RTWN_VI_QUEUE);
 	if (status & R92C_IMR_VODOK)
 		rtwn_tx_done(sc, RTWN_VO_QUEUE);
 
 	/* Enable interrupts. */
 	rtwn_write_4(sc, R92C_HIMR, RTWN_INT_ENABLE);
 
 	RTWN_UNLOCK(sc);
 }
 
 static void
 rtwn_hw_reset(void *arg0, int pending)
 {
 	struct rtwn_softc *sc = arg0;
 	struct ieee80211com *ic = &sc->sc_ic;
 
 	rtwn_stop(sc);
 	rtwn_init(sc);
 	ieee80211_notify_radio(ic, 1);
 }
Index: projects/clang380-import/sys/dev/sfxge/common/efsys.h
===================================================================
--- projects/clang380-import/sys/dev/sfxge/common/efsys.h	(revision 293686)
+++ projects/clang380-import/sys/dev/sfxge/common/efsys.h	(revision 293687)
@@ -1,1230 +1,1231 @@
 /*-
  * Copyright (c) 2010-2015 Solarflare Communications Inc.
  * All rights reserved.
  *
  * This software was developed in part by Philip Paeps under contract for
  * Solarflare Communications, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  *    this list of conditions and the following disclaimer in the documentation
  *    and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * The views and conclusions contained in the software and documentation are
  * those of the authors and should not be interpreted as representing official
  * policies, either expressed or implied, of the FreeBSD Project.
  *
  * $FreeBSD$
  */
 
 #ifndef	_SYS_EFSYS_H
 #define	_SYS_EFSYS_H
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/systm.h>
 
 #include <machine/bus.h>
 #include <machine/endian.h>
 
 #define	EFSYS_HAS_UINT64 1
 #if defined(__x86_64__)
 #define	EFSYS_USE_UINT64 1
 #else
 #define	EFSYS_USE_UINT64 0
 #endif
 #define	EFSYS_HAS_SSE2_M128 0
 #if _BYTE_ORDER == _BIG_ENDIAN
 #define	EFSYS_IS_BIG_ENDIAN 1
 #define	EFSYS_IS_LITTLE_ENDIAN 0
 #elif _BYTE_ORDER == _LITTLE_ENDIAN
 #define	EFSYS_IS_BIG_ENDIAN 0
 #define	EFSYS_IS_LITTLE_ENDIAN 1
 #endif
 #include "efx_types.h"
 
 /* Common code requires this */
 #if __FreeBSD_version < 800068
 #define	memmove(d, s, l) bcopy(s, d, l)
 #endif
 
 /* FreeBSD equivalents of Solaris things */
 #ifndef _NOTE
 #define	_NOTE(s)
 #endif
 
 #ifndef B_FALSE
 #define	B_FALSE	FALSE
 #endif
 #ifndef B_TRUE
 #define	B_TRUE	TRUE
 #endif
 
 #ifndef IS_P2ALIGNED
 #define	IS_P2ALIGNED(v, a)	((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0)
 #endif
 
 #ifndef P2ROUNDUP
 #define	P2ROUNDUP(x, align)	(-(-(x) & -(align)))
 #endif
 
 #ifndef P2ALIGN
 #define	P2ALIGN(_x, _a)		((_x) & -(_a))
 #endif
 
 #ifndef IS2P
 #define	ISP2(x)			(((x) & ((x) - 1)) == 0)
 #endif
 
 #if defined(__x86_64__) && __FreeBSD_version >= 1000000
 
 #define	SFXGE_USE_BUS_SPACE_8		1
 
 #if !defined(bus_space_read_stream_8)
 
 #define	bus_space_read_stream_8(t, h, o)				\
 	bus_space_read_8((t), (h), (o))
 
 #define	bus_space_write_stream_8(t, h, o, v)				\
 	bus_space_write_8((t), (h), (o), (v))
 
 #endif
 
 #endif
 
 #define	ENOTACTIVE EINVAL
 
 /* Memory type to use on FreeBSD */
 MALLOC_DECLARE(M_SFXGE);
 
 /* Machine dependend prefetch wrappers */
 #if defined(__i386__) || defined(__amd64__)
 static __inline void
 prefetch_read_many(void *addr)
 {
 
 	__asm__(
 	    "prefetcht0 (%0)"
 	    :
 	    : "r" (addr));
 }
 
 static __inline void
 prefetch_read_once(void *addr)
 {
 
 	__asm__(
 	    "prefetchnta (%0)"
 	    :
 	    : "r" (addr));
 }
 #elif defined(__sparc64__)
 static __inline void
 prefetch_read_many(void *addr)
 {
 
 	__asm__(
 	    "prefetch [%0], 0"
 	    :
 	    : "r" (addr));
 }
 
 static __inline void
 prefetch_read_once(void *addr)
 {
 
 	__asm__(
 	    "prefetch [%0], 1"
 	    :
 	    : "r" (addr));
 }
 #else
 static __inline void
 prefetch_read_many(void *addr)
 {
 
 }
 
 static __inline void
 prefetch_read_once(void *addr)
 {
 
 }
 #endif
 
 #if defined(__i386__) || defined(__amd64__)
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #endif
 static __inline void
 sfxge_map_mbuf_fast(bus_dma_tag_t tag, bus_dmamap_t map,
 		    struct mbuf *m, bus_dma_segment_t *seg)
 {
 #if defined(__i386__) || defined(__amd64__)
 	seg->ds_addr = pmap_kextract(mtod(m, vm_offset_t));
 	seg->ds_len = m->m_len;
 #else
 	int nsegstmp;
 
 	bus_dmamap_load_mbuf_sg(tag, map, m, seg, &nsegstmp, 0);
 #endif
 }
 
 /* Modifiers used for Windows builds */
 #define	__in
 #define	__in_opt
 #define	__in_ecount(_n)
 #define	__in_ecount_opt(_n)
 #define	__in_bcount(_n)
 #define	__in_bcount_opt(_n)
 
 #define	__out
 #define	__out_opt
 #define	__out_ecount(_n)
 #define	__out_ecount_opt(_n)
 #define	__out_bcount(_n)
 #define	__out_bcount_opt(_n)
 
 #define	__deref_out
 
 #define	__inout
 #define	__inout_opt
 #define	__inout_ecount(_n)
 #define	__inout_ecount_opt(_n)
 #define	__inout_bcount(_n)
 #define	__inout_bcount_opt(_n)
 #define	__inout_bcount_full_opt(_n)
 
 #define	__deref_out_bcount_opt(n)
 
 #define	__checkReturn
 #define	__success(_x)
 
 #define	__drv_when(_p, _c)
 
 /* Code inclusion options */
 
 
 #define	EFSYS_OPT_NAMES 1
 
 #define	EFSYS_OPT_FALCON 0
 #define	EFSYS_OPT_FALCON_NIC_CFG_OVERRIDE 0
 #define	EFSYS_OPT_SIENA 1
 #define	EFSYS_OPT_HUNTINGTON 1
+#define	EFSYS_OPT_MEDFORD 0
 #ifdef DEBUG
 #define	EFSYS_OPT_CHECK_REG 1
 #else
 #define	EFSYS_OPT_CHECK_REG 0
 #endif
 
 #define	EFSYS_OPT_MCDI 1
 #define	EFSYS_OPT_MCDI_LOGGING 0
 #define	EFSYS_OPT_MCDI_PROXY_AUTH 0
 
 #define	EFSYS_OPT_MAC_FALCON_GMAC 0
 #define	EFSYS_OPT_MAC_FALCON_XMAC 0
 #define	EFSYS_OPT_MAC_STATS 1
 
 #define	EFSYS_OPT_LOOPBACK 0
 
 #define	EFSYS_OPT_MON_NULL 0
 #define	EFSYS_OPT_MON_LM87 0
 #define	EFSYS_OPT_MON_MAX6647 0
 #define	EFSYS_OPT_MON_MCDI 0
 #define	EFSYS_OPT_MON_STATS 0
 
 #define	EFSYS_OPT_PHY_NULL 0
 #define	EFSYS_OPT_PHY_QT2022C2 0
 #define	EFSYS_OPT_PHY_SFX7101 0
 #define	EFSYS_OPT_PHY_TXC43128 0
 #define	EFSYS_OPT_PHY_SFT9001 0
 #define	EFSYS_OPT_PHY_QT2025C 0
 #define	EFSYS_OPT_PHY_STATS 1
 #define	EFSYS_OPT_PHY_PROPS 0
 #define	EFSYS_OPT_PHY_BIST 0
 #define	EFSYS_OPT_BIST 1
 #define	EFSYS_OPT_PHY_LED_CONTROL 1
 #define	EFSYS_OPT_PHY_FLAGS 0
 
 #define	EFSYS_OPT_VPD 1
 #define	EFSYS_OPT_NVRAM 1
 #define	EFSYS_OPT_NVRAM_FALCON_BOOTROM 0
 #define	EFSYS_OPT_NVRAM_SFT9001	0
 #define	EFSYS_OPT_NVRAM_SFX7101	0
 #define	EFSYS_OPT_BOOTCFG 0
 
 #define	EFSYS_OPT_PCIE_TUNE 0
 #define	EFSYS_OPT_DIAG 0
 #define	EFSYS_OPT_WOL 1
 #define	EFSYS_OPT_RX_SCALE 1
 #define	EFSYS_OPT_QSTATS 1
 #define	EFSYS_OPT_FILTER 1
 #define	EFSYS_OPT_MCAST_FILTER_LIST 1
 #define	EFSYS_OPT_RX_SCATTER 0
 #define	EFSYS_OPT_RX_HDR_SPLIT 0
 
 #define	EFSYS_OPT_EV_PREFETCH 0
 
 #define	EFSYS_OPT_DECODE_INTR_FATAL 1
 
 /* ID */
 
 typedef struct __efsys_identifier_s	efsys_identifier_t;
 
 /* PROBE */
 
 #ifndef DTRACE_PROBE
 
 #define	EFSYS_PROBE(_name)
 
 #define	EFSYS_PROBE1(_name, _type1, _arg1)
 
 #define	EFSYS_PROBE2(_name, _type1, _arg1, _type2, _arg2)
 
 #define	EFSYS_PROBE3(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3)
 
 #define	EFSYS_PROBE4(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4)
 
 #define	EFSYS_PROBE5(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5)
 
 #define	EFSYS_PROBE6(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5,		\
 	    _type6, _arg6)
 
 #define	EFSYS_PROBE7(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5,		\
 	    _type6, _arg6, _type7, _arg7)
 
 #else /* DTRACE_PROBE */
 
 #define	EFSYS_PROBE(_name)						\
 	DTRACE_PROBE(_name)
 
 #define	EFSYS_PROBE1(_name, _type1, _arg1)				\
 	DTRACE_PROBE1(_name, _type1, _arg1)
 
 #define	EFSYS_PROBE2(_name, _type1, _arg1, _type2, _arg2)		\
 	DTRACE_PROBE2(_name, _type1, _arg1, _type2, _arg2)
 
 #define	EFSYS_PROBE3(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3)						\
 	DTRACE_PROBE3(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3)
 
 #define	EFSYS_PROBE4(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4)				\
 	DTRACE_PROBE4(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4)
 
 #ifdef DTRACE_PROBE5
 #define	EFSYS_PROBE5(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5)		\
 	DTRACE_PROBE5(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5)
 #else
 #define	EFSYS_PROBE5(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5)		\
 	DTRACE_PROBE4(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4)
 #endif
 
 #ifdef DTRACE_PROBE6
 #define	EFSYS_PROBE6(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5,		\
 	    _type6, _arg6)						\
 	DTRACE_PROBE6(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5,		\
 	    _type6, _arg6)
 #else
 #define	EFSYS_PROBE6(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5,		\
 	    _type6, _arg6)						\
 	EFSYS_PROBE5(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5)
 #endif
 
 #ifdef DTRACE_PROBE7
 #define	EFSYS_PROBE7(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5,		\
 	    _type6, _arg6, _type7, _arg7)				\
 	DTRACE_PROBE7(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5,		\
 	    _type6, _arg6, _type7, _arg7)
 #else
 #define	EFSYS_PROBE7(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5,		\
 	    _type6, _arg6, _type7, _arg7)				\
 	EFSYS_PROBE6(_name, _type1, _arg1, _type2, _arg2,		\
 	    _type3, _arg3, _type4, _arg4, _type5, _arg5,		\
 	    _type6, _arg6)
 #endif
 
 #endif /* DTRACE_PROBE */
 
 /* DMA */
 
 typedef uint64_t		efsys_dma_addr_t;
 
 typedef struct efsys_mem_s {
 	bus_dma_tag_t		esm_tag;
 	bus_dmamap_t		esm_map;
 	caddr_t			esm_base;
 	efsys_dma_addr_t	esm_addr;
 } efsys_mem_t;
 
 
 #define	EFSYS_MEM_ZERO(_esmp, _size)					\
 	do {								\
 		(void) memset((_esmp)->esm_base, 0, (_size));		\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_MEM_READD(_esmp, _offset, _edp)				\
 	do {								\
 		uint32_t *addr;						\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_dword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		addr = (void *)((_esmp)->esm_base + (_offset));		\
 									\
 		(_edp)->ed_u32[0] = *addr;				\
 									\
 		EFSYS_PROBE2(mem_readd, unsigned int, (_offset),	\
 		    uint32_t, (_edp)->ed_u32[0]);			\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #if defined(__x86_64__)
 #define	EFSYS_MEM_READQ(_esmp, _offset, _eqp)				\
 	do {								\
 		uint64_t *addr;						\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		addr = (void *)((_esmp)->esm_base + (_offset));		\
 									\
 		(_eqp)->eq_u64[0] = *addr;				\
 									\
 		EFSYS_PROBE3(mem_readq, unsigned int, (_offset),	\
 		    uint32_t, (_eqp)->eq_u32[1],			\
 		    uint32_t, (_eqp)->eq_u32[0]);			\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #else
 #define	EFSYS_MEM_READQ(_esmp, _offset, _eqp)				\
 	do {								\
 		uint32_t *addr;						\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		addr = (void *)((_esmp)->esm_base + (_offset));		\
 									\
 		(_eqp)->eq_u32[0] = *addr++;				\
 		(_eqp)->eq_u32[1] = *addr;				\
 									\
 		EFSYS_PROBE3(mem_readq, unsigned int, (_offset),	\
 		    uint32_t, (_eqp)->eq_u32[1],			\
 		    uint32_t, (_eqp)->eq_u32[0]);			\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #endif
 
 #if defined(__x86_64__)
 #define	EFSYS_MEM_READO(_esmp, _offset, _eop)				\
 	do {								\
 		uint64_t *addr;						\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		addr = (void *)((_esmp)->esm_base + (_offset));		\
 									\
 		(_eop)->eo_u64[0] = *addr++;				\
 		(_eop)->eo_u64[1] = *addr;				\
 									\
 		EFSYS_PROBE5(mem_reado, unsigned int, (_offset),	\
 		    uint32_t, (_eop)->eo_u32[3],			\
 		    uint32_t, (_eop)->eo_u32[2],			\
 		    uint32_t, (_eop)->eo_u32[1],			\
 		    uint32_t, (_eop)->eo_u32[0]);			\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #else
 #define	EFSYS_MEM_READO(_esmp, _offset, _eop)				\
 	do {								\
 		uint32_t *addr;						\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		addr = (void *)((_esmp)->esm_base + (_offset));		\
 									\
 		(_eop)->eo_u32[0] = *addr++;				\
 		(_eop)->eo_u32[1] = *addr++;				\
 		(_eop)->eo_u32[2] = *addr++;				\
 		(_eop)->eo_u32[3] = *addr;				\
 									\
 		EFSYS_PROBE5(mem_reado, unsigned int, (_offset),	\
 		    uint32_t, (_eop)->eo_u32[3],			\
 		    uint32_t, (_eop)->eo_u32[2],			\
 		    uint32_t, (_eop)->eo_u32[1],			\
 		    uint32_t, (_eop)->eo_u32[0]);			\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #endif
 
 #define	EFSYS_MEM_WRITED(_esmp, _offset, _edp)				\
 	do {								\
 		uint32_t *addr;						\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_dword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		EFSYS_PROBE2(mem_writed, unsigned int, (_offset),	\
 		    uint32_t, (_edp)->ed_u32[0]);			\
 									\
 		addr = (void *)((_esmp)->esm_base + (_offset));		\
 									\
 		*addr = (_edp)->ed_u32[0];				\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #if defined(__x86_64__)
 #define	EFSYS_MEM_WRITEQ(_esmp, _offset, _eqp)				\
 	do {								\
 		uint64_t *addr;						\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		EFSYS_PROBE3(mem_writeq, unsigned int, (_offset),	\
 		    uint32_t, (_eqp)->eq_u32[1],			\
 		    uint32_t, (_eqp)->eq_u32[0]);			\
 									\
 		addr = (void *)((_esmp)->esm_base + (_offset));		\
 									\
 		*addr   = (_eqp)->eq_u64[0];				\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #else
 #define	EFSYS_MEM_WRITEQ(_esmp, _offset, _eqp)				\
 	do {								\
 		uint32_t *addr;						\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		EFSYS_PROBE3(mem_writeq, unsigned int, (_offset),	\
 		    uint32_t, (_eqp)->eq_u32[1],			\
 		    uint32_t, (_eqp)->eq_u32[0]);			\
 									\
 		addr = (void *)((_esmp)->esm_base + (_offset));		\
 									\
 		*addr++ = (_eqp)->eq_u32[0];				\
 		*addr   = (_eqp)->eq_u32[1];				\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #endif
 
 #if defined(__x86_64__)
 #define	EFSYS_MEM_WRITEO(_esmp, _offset, _eop)				\
 	do {								\
 		uint64_t *addr;						\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		EFSYS_PROBE5(mem_writeo, unsigned int, (_offset),	\
 		    uint32_t, (_eop)->eo_u32[3],			\
 		    uint32_t, (_eop)->eo_u32[2],			\
 		    uint32_t, (_eop)->eo_u32[1],			\
 		    uint32_t, (_eop)->eo_u32[0]);			\
 									\
 		addr = (void *)((_esmp)->esm_base + (_offset));		\
 									\
 		*addr++ = (_eop)->eo_u64[0];				\
 		*addr   = (_eop)->eo_u64[1];				\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #else
 #define	EFSYS_MEM_WRITEO(_esmp, _offset, _eop)				\
 	do {								\
 		uint32_t *addr;						\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		EFSYS_PROBE5(mem_writeo, unsigned int, (_offset),	\
 		    uint32_t, (_eop)->eo_u32[3],			\
 		    uint32_t, (_eop)->eo_u32[2],			\
 		    uint32_t, (_eop)->eo_u32[1],			\
 		    uint32_t, (_eop)->eo_u32[0]);			\
 									\
 		addr = (void *)((_esmp)->esm_base + (_offset));		\
 									\
 		*addr++ = (_eop)->eo_u32[0];				\
 		*addr++ = (_eop)->eo_u32[1];				\
 		*addr++ = (_eop)->eo_u32[2];				\
 		*addr   = (_eop)->eo_u32[3];				\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #endif
 
 #define	EFSYS_MEM_ADDR(_esmp)						\
 	((_esmp)->esm_addr)
 
 #define	EFSYS_MEM_IS_NULL(_esmp)					\
 	((_esmp)->esm_base == NULL)
 
 /* BAR */
 
 #define	SFXGE_LOCK_NAME_MAX	16
 
 typedef struct efsys_bar_s {
 	struct mtx		esb_lock;
 	char			esb_lock_name[SFXGE_LOCK_NAME_MAX];
 	bus_space_tag_t		esb_tag;
 	bus_space_handle_t	esb_handle;
 	int			esb_rid;
 	struct resource		*esb_res;
 } efsys_bar_t;
 
 #define	SFXGE_BAR_LOCK_INIT(_esbp, _ifname)				\
 	do {								\
 		snprintf((_esbp)->esb_lock_name,			\
 			 sizeof((_esbp)->esb_lock_name),		\
 			 "%s:bar", (_ifname));				\
 		mtx_init(&(_esbp)->esb_lock, (_esbp)->esb_lock_name,	\
 			 NULL, MTX_DEF);				\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #define	SFXGE_BAR_LOCK_DESTROY(_esbp)					\
 	mtx_destroy(&(_esbp)->esb_lock)
 #define	SFXGE_BAR_LOCK(_esbp)						\
 	mtx_lock(&(_esbp)->esb_lock)
 #define	SFXGE_BAR_UNLOCK(_esbp)						\
 	mtx_unlock(&(_esbp)->esb_lock)
 
 #define	EFSYS_BAR_READD(_esbp, _offset, _edp, _lock)			\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_dword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_LOCK(_esbp);				\
 									\
 		(_edp)->ed_u32[0] = bus_space_read_stream_4(		\
 		    (_esbp)->esb_tag, (_esbp)->esb_handle,		\
 		    (_offset));						\
 									\
 		EFSYS_PROBE2(bar_readd, unsigned int, (_offset),	\
 		    uint32_t, (_edp)->ed_u32[0]);			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_UNLOCK(_esbp);			\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #if defined(SFXGE_USE_BUS_SPACE_8)
 #define	EFSYS_BAR_READQ(_esbp, _offset, _eqp)				\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		SFXGE_BAR_LOCK(_esbp);					\
 									\
 		(_eqp)->eq_u64[0] = bus_space_read_stream_8(		\
 		    (_esbp)->esb_tag, (_esbp)->esb_handle,		\
 		    (_offset));						\
 									\
 		EFSYS_PROBE3(bar_readq, unsigned int, (_offset),	\
 		    uint32_t, (_eqp)->eq_u32[1],			\
 		    uint32_t, (_eqp)->eq_u32[0]);			\
 									\
 		SFXGE_BAR_UNLOCK(_esbp);				\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_BAR_READO(_esbp, _offset, _eop, _lock)			\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_LOCK(_esbp);				\
 									\
 		(_eop)->eo_u64[0] = bus_space_read_stream_8(		\
 		    (_esbp)->esb_tag, (_esbp)->esb_handle,		\
 		    (_offset));						\
 		(_eop)->eo_u64[1] = bus_space_read_stream_8(		\
 		    (_esbp)->esb_tag, (_esbp)->esb_handle,		\
 		    (_offset) + 8);					\
 									\
 		EFSYS_PROBE5(bar_reado, unsigned int, (_offset),	\
 		    uint32_t, (_eop)->eo_u32[3],			\
 		    uint32_t, (_eop)->eo_u32[2],			\
 		    uint32_t, (_eop)->eo_u32[1],			\
 		    uint32_t, (_eop)->eo_u32[0]);			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_UNLOCK(_esbp);			\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #else
 #define	EFSYS_BAR_READQ(_esbp, _offset, _eqp)				\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		SFXGE_BAR_LOCK(_esbp);					\
 									\
 		(_eqp)->eq_u32[0] = bus_space_read_stream_4(		\
 		    (_esbp)->esb_tag, (_esbp)->esb_handle,		\
 		    (_offset));						\
 		(_eqp)->eq_u32[1] = bus_space_read_stream_4(		\
 		    (_esbp)->esb_tag, (_esbp)->esb_handle,		\
 		    (_offset) + 4);					\
 									\
 		EFSYS_PROBE3(bar_readq, unsigned int, (_offset),	\
 		    uint32_t, (_eqp)->eq_u32[1],			\
 		    uint32_t, (_eqp)->eq_u32[0]);			\
 									\
 		SFXGE_BAR_UNLOCK(_esbp);				\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_BAR_READO(_esbp, _offset, _eop, _lock)			\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_LOCK(_esbp);				\
 									\
 		(_eop)->eo_u32[0] = bus_space_read_stream_4(		\
 		    (_esbp)->esb_tag, (_esbp)->esb_handle,		\
 		    (_offset));						\
 		(_eop)->eo_u32[1] = bus_space_read_stream_4(		\
 		    (_esbp)->esb_tag, (_esbp)->esb_handle,		\
 		    (_offset) + 4);					\
 		(_eop)->eo_u32[2] = bus_space_read_stream_4(		\
 		    (_esbp)->esb_tag, (_esbp)->esb_handle,		\
 		    (_offset) + 8);					\
 		(_eop)->eo_u32[3] = bus_space_read_stream_4(		\
 		    (_esbp)->esb_tag, (_esbp)->esb_handle,		\
 		    (_offset) + 12);					\
 									\
 		EFSYS_PROBE5(bar_reado, unsigned int, (_offset),	\
 		    uint32_t, (_eop)->eo_u32[3],			\
 		    uint32_t, (_eop)->eo_u32[2],			\
 		    uint32_t, (_eop)->eo_u32[1],			\
 		    uint32_t, (_eop)->eo_u32[0]);			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_UNLOCK(_esbp);			\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #endif
 
 #define	EFSYS_BAR_WRITED(_esbp, _offset, _edp, _lock)			\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_dword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_LOCK(_esbp);				\
 									\
 		EFSYS_PROBE2(bar_writed, unsigned int, (_offset),	\
 		    uint32_t, (_edp)->ed_u32[0]);			\
 									\
 		/*							\
 		 * Make sure that previous writes to the dword have	\
 		 * been done. It should be cheaper than barrier just	\
 		 * after the write below.				\
 		 */							\
 		bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\
 		    (_offset), sizeof (efx_dword_t),			\
 		    BUS_SPACE_BARRIER_WRITE);				\
 		bus_space_write_stream_4((_esbp)->esb_tag,		\
 		    (_esbp)->esb_handle,				\
 		    (_offset), (_edp)->ed_u32[0]);			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_UNLOCK(_esbp);			\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #if defined(SFXGE_USE_BUS_SPACE_8)
 #define	EFSYS_BAR_WRITEQ(_esbp, _offset, _eqp)				\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		SFXGE_BAR_LOCK(_esbp);					\
 									\
 		EFSYS_PROBE3(bar_writeq, unsigned int, (_offset),	\
 		    uint32_t, (_eqp)->eq_u32[1],			\
 		    uint32_t, (_eqp)->eq_u32[0]);			\
 									\
 		/*							\
 		 * Make sure that previous writes to the qword have	\
 		 * been done. It should be cheaper than barrier just	\
 		 * after the write below.				\
 		 */							\
 		bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\
 		    (_offset), sizeof (efx_qword_t),			\
 		    BUS_SPACE_BARRIER_WRITE);				\
 		bus_space_write_stream_8((_esbp)->esb_tag,		\
 		    (_esbp)->esb_handle,				\
 		    (_offset), (_eqp)->eq_u64[0]);			\
 									\
 		SFXGE_BAR_UNLOCK(_esbp);				\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #else
 #define	EFSYS_BAR_WRITEQ(_esbp, _offset, _eqp)				\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		SFXGE_BAR_LOCK(_esbp);					\
 									\
 		EFSYS_PROBE3(bar_writeq, unsigned int, (_offset),	\
 		    uint32_t, (_eqp)->eq_u32[1],			\
 		    uint32_t, (_eqp)->eq_u32[0]);			\
 									\
 		/*							\
 		 * Make sure that previous writes to the qword have	\
 		 * been done. It should be cheaper than barrier just	\
 		 * after the last write below.				\
 		 */							\
 		bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\
 		    (_offset), sizeof (efx_qword_t),			\
 		    BUS_SPACE_BARRIER_WRITE);				\
 		bus_space_write_stream_4((_esbp)->esb_tag,		\
 		    (_esbp)->esb_handle,				\
 		    (_offset), (_eqp)->eq_u32[0]);			\
 		/*							\
 		 * It should be guaranteed that the last dword comes	\
 		 * the last, so barrier entire qword to be sure that	\
 		 * neither above nor below writes are reordered.	\
 		 */							\
 		bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\
 		    (_offset), sizeof (efx_qword_t),			\
 		    BUS_SPACE_BARRIER_WRITE);				\
 		bus_space_write_stream_4((_esbp)->esb_tag,		\
 		    (_esbp)->esb_handle,				\
 		    (_offset) + 4, (_eqp)->eq_u32[1]);			\
 									\
 		SFXGE_BAR_UNLOCK(_esbp);				\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #endif
 
 /*
  * Guarantees 64bit aligned 64bit writes to write combined BAR mapping
  * (required by PIO hardware)
  */
 #define	EFSYS_BAR_WC_WRITEQ(_esbp, _offset, _eqp)			\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_qword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		(void) (_esbp);						\
 									\
 		/* FIXME: Perform a 64-bit write */			\
 		KASSERT(0, ("not implemented"));			\
 									\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #if defined(SFXGE_USE_BUS_SPACE_8)
 #define	EFSYS_BAR_WRITEO(_esbp, _offset, _eop, _lock)			\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_LOCK(_esbp);				\
 									\
 		EFSYS_PROBE5(bar_writeo, unsigned int, (_offset),	\
 		    uint32_t, (_eop)->eo_u32[3],			\
 		    uint32_t, (_eop)->eo_u32[2],			\
 		    uint32_t, (_eop)->eo_u32[1],			\
 		    uint32_t, (_eop)->eo_u32[0]);			\
 									\
 		/*							\
 		 * Make sure that previous writes to the oword have	\
 		 * been done. It should be cheaper than barrier just	\
 		 * after the last write below.				\
 		 */							\
 		bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\
 		    (_offset), sizeof (efx_oword_t),			\
 		    BUS_SPACE_BARRIER_WRITE);				\
 		bus_space_write_stream_8((_esbp)->esb_tag,		\
 		    (_esbp)->esb_handle,				\
 		    (_offset), (_eop)->eo_u64[0]);			\
 		/*							\
 		 * It should be guaranteed that the last qword comes	\
 		 * the last, so barrier entire oword to be sure that	\
 		 * neither above nor below writes are reordered.	\
 		 */							\
 		bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\
 		    (_offset), sizeof (efx_oword_t),			\
 		    BUS_SPACE_BARRIER_WRITE);				\
 		bus_space_write_stream_8((_esbp)->esb_tag,		\
 		    (_esbp)->esb_handle,				\
 		    (_offset) + 8, (_eop)->eo_u64[1]);			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_UNLOCK(_esbp);			\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #else
 #define	EFSYS_BAR_WRITEO(_esbp, _offset, _eop, _lock)			\
 	do {								\
 		_NOTE(CONSTANTCONDITION)				\
 		KASSERT(IS_P2ALIGNED(_offset, sizeof (efx_oword_t)),	\
 		    ("not power of 2 aligned"));			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_LOCK(_esbp);				\
 									\
 		EFSYS_PROBE5(bar_writeo, unsigned int, (_offset),	\
 		    uint32_t, (_eop)->eo_u32[3],			\
 		    uint32_t, (_eop)->eo_u32[2],			\
 		    uint32_t, (_eop)->eo_u32[1],			\
 		    uint32_t, (_eop)->eo_u32[0]);			\
 									\
 		/*							\
 		 * Make sure that previous writes to the oword have	\
 		 * been done. It should be cheaper than barrier just	\
 		 * after the last write below.				\
 		 */							\
 		bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\
 		    (_offset), sizeof (efx_oword_t),			\
 		    BUS_SPACE_BARRIER_WRITE);				\
 		bus_space_write_stream_4((_esbp)->esb_tag,		\
 		    (_esbp)->esb_handle,				\
 		    (_offset), (_eop)->eo_u32[0]);			\
 		bus_space_write_stream_4((_esbp)->esb_tag,		\
 		    (_esbp)->esb_handle,				\
 		    (_offset) + 4, (_eop)->eo_u32[1]);			\
 		bus_space_write_stream_4((_esbp)->esb_tag,		\
 		    (_esbp)->esb_handle,				\
 		    (_offset) + 8, (_eop)->eo_u32[2]);			\
 		/*							\
 		 * It should be guaranteed that the last dword comes	\
 		 * the last, so barrier entire oword to be sure that	\
 		 * neither above nor below writes are reordered.	\
 		 */							\
 		bus_space_barrier((_esbp)->esb_tag, (_esbp)->esb_handle,\
 		    (_offset), sizeof (efx_oword_t),			\
 		    BUS_SPACE_BARRIER_WRITE);				\
 		bus_space_write_stream_4((_esbp)->esb_tag,		\
 		    (_esbp)->esb_handle,				\
 		    (_offset) + 12, (_eop)->eo_u32[3]);			\
 									\
 		_NOTE(CONSTANTCONDITION)				\
 		if (_lock)						\
 			SFXGE_BAR_UNLOCK(_esbp);			\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #endif
 
 /* Use the standard octo-word write for doorbell writes */
 #define	EFSYS_BAR_DOORBELL_WRITEO(_esbp, _offset, _eop)			\
 	do {								\
 		EFSYS_BAR_WRITEO((_esbp), (_offset), (_eop), B_FALSE);	\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 /* SPIN */
 
 #define	EFSYS_SPIN(_us)							\
 	do {								\
 		DELAY(_us);						\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_SLEEP	EFSYS_SPIN
 
 /* BARRIERS */
 
 #define	EFSYS_MEM_READ_BARRIER()	rmb()
 #define	EFSYS_PIO_WRITE_BARRIER()
 
 /* DMA SYNC */
 #define	EFSYS_DMA_SYNC_FOR_KERNEL(_esmp, _offset, _size)		\
 	do {								\
 		bus_dmamap_sync((_esmp)->esm_tag,			\
 		    (_esmp)->esm_map,					\
 		    BUS_DMASYNC_POSTREAD);				\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_DMA_SYNC_FOR_DEVICE(_esmp, _offset, _size)		\
 	do {								\
 		bus_dmamap_sync((_esmp)->esm_tag,			\
 		    (_esmp)->esm_map,					\
 		    BUS_DMASYNC_PREWRITE);				\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 /* TIMESTAMP */
 
 typedef	clock_t	efsys_timestamp_t;
 
 #define	EFSYS_TIMESTAMP(_usp)						\
 	do {								\
 		clock_t now;						\
 									\
 		now = ticks;						\
 		*(_usp) = now * hz / 1000000;				\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 /* KMEM */
 
 #define	EFSYS_KMEM_ALLOC(_esip, _size, _p)				\
 	do {								\
 		(_esip) = (_esip);					\
 		/*							\
 		 * The macro is used in non-sleepable contexts, for	\
 		 * example, holding a mutex.				\
 		 */							\
 		(_p) = malloc((_size), M_SFXGE, M_NOWAIT|M_ZERO);	\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_KMEM_FREE(_esip, _size, _p)				\
 	do {								\
 		(void) (_esip);						\
 		(void) (_size);						\
 		free((_p), M_SFXGE);					\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 /* LOCK */
 
 typedef struct efsys_lock_s {
 	struct mtx	lock;
 	char		lock_name[SFXGE_LOCK_NAME_MAX];
 } efsys_lock_t;
 
 #define	SFXGE_EFSYS_LOCK_INIT(_eslp, _ifname, _label)			\
 	do {								\
 		efsys_lock_t *__eslp = (_eslp);				\
 									\
 		snprintf((__eslp)->lock_name,				\
 			 sizeof((__eslp)->lock_name),			\
 			 "%s:%s", (_ifname), (_label));			\
 		mtx_init(&(__eslp)->lock, (__eslp)->lock_name,		\
 			 NULL, MTX_DEF);				\
 	} while (B_FALSE)
 #define	SFXGE_EFSYS_LOCK_DESTROY(_eslp)					\
 	mtx_destroy(&(_eslp)->lock)
 #define	SFXGE_EFSYS_LOCK(_eslp)						\
 	mtx_lock(&(_eslp)->lock)
 #define	SFXGE_EFSYS_UNLOCK(_eslp)					\
 	mtx_unlock(&(_eslp)->lock)
 #define	SFXGE_EFSYS_LOCK_ASSERT_OWNED(_eslp)				\
 	mtx_assert(&(_eslp)->lock, MA_OWNED)
 
 #define	EFSYS_LOCK_MAGIC	0x000010c4
 
 #define	EFSYS_LOCK(_lockp, _state)					\
 	do {								\
 		SFXGE_EFSYS_LOCK(_lockp);				\
 		(_state) = EFSYS_LOCK_MAGIC;				\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_UNLOCK(_lockp, _state)					\
 	do {								\
 		if ((_state) != EFSYS_LOCK_MAGIC)			\
 			KASSERT(B_FALSE, ("not locked"));		\
 		SFXGE_EFSYS_UNLOCK(_lockp);				\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 /* PREEMPT */
 
 #define	EFSYS_PREEMPT_DISABLE(_state)					\
 	do {								\
 		(_state) = (_state);					\
 		critical_enter();					\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_PREEMPT_ENABLE(_state)					\
 	do {								\
 		(_state) = (_state);					\
 		critical_exit(_state);					\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 /* STAT */
 
 typedef uint64_t		efsys_stat_t;
 
 #define	EFSYS_STAT_INCR(_knp, _delta) 					\
 	do {								\
 		*(_knp) += (_delta);					\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_STAT_DECR(_knp, _delta) 					\
 	do {								\
 		*(_knp) -= (_delta);					\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_STAT_SET(_knp, _val)					\
 	do {								\
 		*(_knp) = (_val);					\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_STAT_SET_QWORD(_knp, _valp)				\
 	do {								\
 		*(_knp) = le64toh((_valp)->eq_u64[0]);			\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_STAT_SET_DWORD(_knp, _valp)				\
 	do {								\
 		*(_knp) = le32toh((_valp)->ed_u32[0]);			\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_STAT_INCR_QWORD(_knp, _valp)				\
 	do {								\
 		*(_knp) += le64toh((_valp)->eq_u64[0]);			\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 #define	EFSYS_STAT_SUBR_QWORD(_knp, _valp)				\
 	do {								\
 		*(_knp) -= le64toh((_valp)->eq_u64[0]);			\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 
 /* ERR */
 
 extern void	sfxge_err(efsys_identifier_t *, unsigned int,
 		    uint32_t, uint32_t);
 
 #if EFSYS_OPT_DECODE_INTR_FATAL
 #define	EFSYS_ERR(_esip, _code, _dword0, _dword1)			\
 	do {								\
 		sfxge_err((_esip), (_code), (_dword0), (_dword1));	\
 	_NOTE(CONSTANTCONDITION)					\
 	} while (B_FALSE)
 #endif
 
 /* ASSERT */
 
 #define	EFSYS_ASSERT(_exp) do {						\
 	if (!(_exp))							\
 		panic("%s", #_exp);					\
 	} while (0)
 
 #define	EFSYS_ASSERT3(_x, _op, _y, _t) do {				\
 	const _t __x = (_t)(_x);					\
 	const _t __y = (_t)(_y);					\
 	if (!(__x _op __y))						\
 		panic("assertion failed at %s:%u", __FILE__, __LINE__);	\
 	} while(0)
 
 #define	EFSYS_ASSERT3U(_x, _op, _y)	EFSYS_ASSERT3(_x, _op, _y, uint64_t)
 #define	EFSYS_ASSERT3S(_x, _op, _y)	EFSYS_ASSERT3(_x, _op, _y, int64_t)
 #define	EFSYS_ASSERT3P(_x, _op, _y)	EFSYS_ASSERT3(_x, _op, _y, uintptr_t)
 
 /* ROTATE */
 
 #define	EFSYS_HAS_ROTL_DWORD 0
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_EFSYS_H */
Index: projects/clang380-import/sys/dev/ti/if_ti.c
===================================================================
--- projects/clang380-import/sys/dev/ti/if_ti.c	(revision 293686)
+++ projects/clang380-import/sys/dev/ti/if_ti.c	(revision 293687)
@@ -1,4057 +1,4057 @@
 /*-
  * Copyright (c) 1997, 1998, 1999
  *	Bill Paul <wpaul@ctr.columbia.edu>.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Bill Paul.
  * 4. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Alteon Networks Tigon PCI gigabit ethernet driver for FreeBSD.
  * Manuals, sample driver and firmware source kits are available
  * from http://www.alteon.com/support/openkits.
  *
  * Written by Bill Paul <wpaul@ctr.columbia.edu>
  * Electrical Engineering Department
  * Columbia University, New York City
  */
 
 /*
  * The Alteon Networks Tigon chip contains an embedded R4000 CPU,
  * gigabit MAC, dual DMA channels and a PCI interface unit. NICs
  * using the Tigon may have anywhere from 512K to 2MB of SRAM. The
  * Tigon supports hardware IP, TCP and UCP checksumming, multicast
  * filtering and jumbo (9014 byte) frames. The hardware is largely
  * controlled by firmware, which must be loaded into the NIC during
  * initialization.
  *
  * The Tigon 2 contains 2 R4000 CPUs and requires a newer firmware
  * revision, which supports new features such as extended commands,
  * extended jumbo receive ring desciptors and a mini receive ring.
  *
  * Alteon Networks is to be commended for releasing such a vast amount
  * of development material for the Tigon NIC without requiring an NDA
  * (although they really should have done it a long time ago). With
  * any luck, the other vendors will finally wise up and follow Alteon's
  * stellar example.
  *
  * The firmware for the Tigon 1 and 2 NICs is compiled directly into
  * this driver by #including it as a C header file. This bloats the
  * driver somewhat, but it's the easiest method considering that the
  * driver code and firmware code need to be kept in sync. The source
  * for the firmware is not provided with the FreeBSD distribution since
  * compiling it requires a GNU toolchain targeted for mips-sgi-irix5.3.
  *
  * The following people deserve special thanks:
  * - Terry Murphy of 3Com, for providing a 3c985 Tigon 1 board
  *   for testing
  * - Raymond Lee of Netgear, for providing a pair of Netgear
  *   GA620 Tigon 2 boards for testing
  * - Ulf Zimmermann, for bringing the GA260 to my attention and
  *   convincing me to write this driver.
  * - Andrew Gallatin for providing FreeBSD/Alpha support.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ti.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <sys/conf.h>
 #include <sys/sf_buf.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <net/bpf.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #ifdef TI_SF_BUF_JUMBO
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #endif
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include <sys/tiio.h>
 #include <dev/ti/if_tireg.h>
 #include <dev/ti/ti_fw.h>
 #include <dev/ti/ti_fw2.h>
 
 #include <sys/sysctl.h>
 
 #define TI_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP)
 /*
  * We can only turn on header splitting if we're using extended receive
  * BDs.
  */
 #if defined(TI_JUMBO_HDRSPLIT) && !defined(TI_SF_BUF_JUMBO)
 #error "options TI_JUMBO_HDRSPLIT requires TI_SF_BUF_JUMBO"
 #endif /* TI_JUMBO_HDRSPLIT && !TI_SF_BUF_JUMBO */
 
 typedef enum {
 	TI_SWAP_HTON,
 	TI_SWAP_NTOH
 } ti_swap_type;
 
 /*
  * Various supported device vendors/types and their names.
  */
 
 static const struct ti_type ti_devs[] = {
 	{ ALT_VENDORID,	ALT_DEVICEID_ACENIC,
 		"Alteon AceNIC 1000baseSX Gigabit Ethernet" },
 	{ ALT_VENDORID,	ALT_DEVICEID_ACENIC_COPPER,
 		"Alteon AceNIC 1000baseT Gigabit Ethernet" },
 	{ TC_VENDORID,	TC_DEVICEID_3C985,
 		"3Com 3c985-SX Gigabit Ethernet" },
 	{ NG_VENDORID, NG_DEVICEID_GA620,
 		"Netgear GA620 1000baseSX Gigabit Ethernet" },
 	{ NG_VENDORID, NG_DEVICEID_GA620T,
 		"Netgear GA620 1000baseT Gigabit Ethernet" },
 	{ SGI_VENDORID, SGI_DEVICEID_TIGON,
 		"Silicon Graphics Gigabit Ethernet" },
 	{ DEC_VENDORID, DEC_DEVICEID_FARALLON_PN9000SX,
 		"Farallon PN9000SX Gigabit Ethernet" },
 	{ 0, 0, NULL }
 };
 
 
 static	d_open_t	ti_open;
 static	d_close_t	ti_close;
 static	d_ioctl_t	ti_ioctl2;
 
 static struct cdevsw ti_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_flags =	0,
 	.d_open =	ti_open,
 	.d_close =	ti_close,
 	.d_ioctl =	ti_ioctl2,
 	.d_name =	"ti",
 };
 
 static int ti_probe(device_t);
 static int ti_attach(device_t);
 static int ti_detach(device_t);
 static void ti_txeof(struct ti_softc *);
 static void ti_rxeof(struct ti_softc *);
 
 static int ti_encap(struct ti_softc *, struct mbuf **);
 
 static void ti_intr(void *);
 static void ti_start(struct ifnet *);
 static void ti_start_locked(struct ifnet *);
 static int ti_ioctl(struct ifnet *, u_long, caddr_t);
 static uint64_t ti_get_counter(struct ifnet *, ift_counter);
 static void ti_init(void *);
 static void ti_init_locked(void *);
 static void ti_init2(struct ti_softc *);
 static void ti_stop(struct ti_softc *);
 static void ti_watchdog(void *);
 static int ti_shutdown(device_t);
 static int ti_ifmedia_upd(struct ifnet *);
 static int ti_ifmedia_upd_locked(struct ti_softc *);
 static void ti_ifmedia_sts(struct ifnet *, struct ifmediareq *);
 
 static uint32_t ti_eeprom_putbyte(struct ti_softc *, int);
 static uint8_t	ti_eeprom_getbyte(struct ti_softc *, int, uint8_t *);
 static int ti_read_eeprom(struct ti_softc *, caddr_t, int, int);
 
 static void ti_add_mcast(struct ti_softc *, struct ether_addr *);
 static void ti_del_mcast(struct ti_softc *, struct ether_addr *);
 static void ti_setmulti(struct ti_softc *);
 
 static void ti_mem_read(struct ti_softc *, uint32_t, uint32_t, void *);
 static void ti_mem_write(struct ti_softc *, uint32_t, uint32_t, void *);
 static void ti_mem_zero(struct ti_softc *, uint32_t, uint32_t);
 static int ti_copy_mem(struct ti_softc *, uint32_t, uint32_t, caddr_t, int,
     int);
 static int ti_copy_scratch(struct ti_softc *, uint32_t, uint32_t, caddr_t,
     int, int, int);
 static int ti_bcopy_swap(const void *, void *, size_t, ti_swap_type);
 static void ti_loadfw(struct ti_softc *);
 static void ti_cmd(struct ti_softc *, struct ti_cmd_desc *);
 static void ti_cmd_ext(struct ti_softc *, struct ti_cmd_desc *, caddr_t, int);
 static void ti_handle_events(struct ti_softc *);
 static void ti_dma_map_addr(void *, bus_dma_segment_t *, int, int);
 static int ti_dma_alloc(struct ti_softc *);
 static void ti_dma_free(struct ti_softc *);
 static int ti_dma_ring_alloc(struct ti_softc *, bus_size_t, bus_size_t,
     bus_dma_tag_t *, uint8_t **, bus_dmamap_t *, bus_addr_t *, const char *);
 static void ti_dma_ring_free(struct ti_softc *, bus_dma_tag_t *, uint8_t **,
     bus_dmamap_t, bus_addr_t *);
 static int ti_newbuf_std(struct ti_softc *, int);
 static int ti_newbuf_mini(struct ti_softc *, int);
 static int ti_newbuf_jumbo(struct ti_softc *, int, struct mbuf *);
 static int ti_init_rx_ring_std(struct ti_softc *);
 static void ti_free_rx_ring_std(struct ti_softc *);
 static int ti_init_rx_ring_jumbo(struct ti_softc *);
 static void ti_free_rx_ring_jumbo(struct ti_softc *);
 static int ti_init_rx_ring_mini(struct ti_softc *);
 static void ti_free_rx_ring_mini(struct ti_softc *);
 static void ti_free_tx_ring(struct ti_softc *);
 static int ti_init_tx_ring(struct ti_softc *);
 static void ti_discard_std(struct ti_softc *, int);
 #ifndef TI_SF_BUF_JUMBO
 static void ti_discard_jumbo(struct ti_softc *, int);
 #endif
 static void ti_discard_mini(struct ti_softc *, int);
 
 static int ti_64bitslot_war(struct ti_softc *);
 static int ti_chipinit(struct ti_softc *);
 static int ti_gibinit(struct ti_softc *);
 
 #ifdef TI_JUMBO_HDRSPLIT
 static __inline void ti_hdr_split(struct mbuf *top, int hdr_len, int pkt_len,
     int idx);
 #endif /* TI_JUMBO_HDRSPLIT */
 
 static void ti_sysctl_node(struct ti_softc *);
 
 static device_method_t ti_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		ti_probe),
 	DEVMETHOD(device_attach,	ti_attach),
 	DEVMETHOD(device_detach,	ti_detach),
 	DEVMETHOD(device_shutdown,	ti_shutdown),
 	{ 0, 0 }
 };
 
 static driver_t ti_driver = {
 	"ti",
 	ti_methods,
 	sizeof(struct ti_softc)
 };
 
 static devclass_t ti_devclass;
 
 DRIVER_MODULE(ti, pci, ti_driver, ti_devclass, 0, 0);
 MODULE_DEPEND(ti, pci, 1, 1, 1);
 MODULE_DEPEND(ti, ether, 1, 1, 1);
 
 /*
  * Send an instruction or address to the EEPROM, check for ACK.
  */
 static uint32_t
 ti_eeprom_putbyte(struct ti_softc *sc, int byte)
 {
 	int i, ack = 0;
 
 	/*
 	 * Make sure we're in TX mode.
 	 */
 	TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_TXEN);
 
 	/*
 	 * Feed in each bit and stobe the clock.
 	 */
 	for (i = 0x80; i; i >>= 1) {
 		if (byte & i) {
 			TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_DOUT);
 		} else {
 			TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_DOUT);
 		}
 		DELAY(1);
 		TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK);
 		DELAY(1);
 		TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK);
 	}
 
 	/*
 	 * Turn off TX mode.
 	 */
 	TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_TXEN);
 
 	/*
 	 * Check for ack.
 	 */
 	TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK);
 	ack = CSR_READ_4(sc, TI_MISC_LOCAL_CTL) & TI_MLC_EE_DIN;
 	TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK);
 
 	return (ack);
 }
 
 /*
  * Read a byte of data stored in the EEPROM at address 'addr.'
  * We have to send two address bytes since the EEPROM can hold
  * more than 256 bytes of data.
  */
 static uint8_t
 ti_eeprom_getbyte(struct ti_softc *sc, int addr, uint8_t *dest)
 {
 	int i;
 	uint8_t byte = 0;
 
 	EEPROM_START;
 
 	/*
 	 * Send write control code to EEPROM.
 	 */
 	if (ti_eeprom_putbyte(sc, EEPROM_CTL_WRITE)) {
 		device_printf(sc->ti_dev,
 		    "failed to send write command, status: %x\n",
 		    CSR_READ_4(sc, TI_MISC_LOCAL_CTL));
 		return (1);
 	}
 
 	/*
 	 * Send first byte of address of byte we want to read.
 	 */
 	if (ti_eeprom_putbyte(sc, (addr >> 8) & 0xFF)) {
 		device_printf(sc->ti_dev, "failed to send address, status: %x\n",
 		    CSR_READ_4(sc, TI_MISC_LOCAL_CTL));
 		return (1);
 	}
 	/*
 	 * Send second byte address of byte we want to read.
 	 */
 	if (ti_eeprom_putbyte(sc, addr & 0xFF)) {
 		device_printf(sc->ti_dev, "failed to send address, status: %x\n",
 		    CSR_READ_4(sc, TI_MISC_LOCAL_CTL));
 		return (1);
 	}
 
 	EEPROM_STOP;
 	EEPROM_START;
 	/*
 	 * Send read control code to EEPROM.
 	 */
 	if (ti_eeprom_putbyte(sc, EEPROM_CTL_READ)) {
 		device_printf(sc->ti_dev,
 		    "failed to send read command, status: %x\n",
 		    CSR_READ_4(sc, TI_MISC_LOCAL_CTL));
 		return (1);
 	}
 
 	/*
 	 * Start reading bits from EEPROM.
 	 */
 	TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_TXEN);
 	for (i = 0x80; i; i >>= 1) {
 		TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK);
 		DELAY(1);
 		if (CSR_READ_4(sc, TI_MISC_LOCAL_CTL) & TI_MLC_EE_DIN)
 			byte |= i;
 		TI_CLRBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_EE_CLK);
 		DELAY(1);
 	}
 
 	EEPROM_STOP;
 
 	/*
 	 * No ACK generated for read, so just return byte.
 	 */
 
 	*dest = byte;
 
 	return (0);
 }
 
 /*
  * Read a sequence of bytes from the EEPROM.
  */
 static int
 ti_read_eeprom(struct ti_softc *sc, caddr_t dest, int off, int cnt)
 {
 	int err = 0, i;
 	uint8_t byte = 0;
 
 	for (i = 0; i < cnt; i++) {
 		err = ti_eeprom_getbyte(sc, off + i, &byte);
 		if (err)
 			break;
 		*(dest + i) = byte;
 	}
 
 	return (err ? 1 : 0);
 }
 
 /*
  * NIC memory read function.
  * Can be used to copy data from NIC local memory.
  */
 static void
 ti_mem_read(struct ti_softc *sc, uint32_t addr, uint32_t len, void *buf)
 {
 	int segptr, segsize, cnt;
 	char *ptr;
 
 	segptr = addr;
 	cnt = len;
 	ptr = buf;
 
 	while (cnt) {
 		if (cnt < TI_WINLEN)
 			segsize = cnt;
 		else
 			segsize = TI_WINLEN - (segptr % TI_WINLEN);
 		CSR_WRITE_4(sc, TI_WINBASE, (segptr & ~(TI_WINLEN - 1)));
 		bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle,
 		    TI_WINDOW + (segptr & (TI_WINLEN - 1)), (uint32_t *)ptr,
 		    segsize / 4);
 		ptr += segsize;
 		segptr += segsize;
 		cnt -= segsize;
 	}
 }
 
 
 /*
  * NIC memory write function.
  * Can be used to copy data into NIC local memory.
  */
 static void
 ti_mem_write(struct ti_softc *sc, uint32_t addr, uint32_t len, void *buf)
 {
 	int segptr, segsize, cnt;
 	char *ptr;
 
 	segptr = addr;
 	cnt = len;
 	ptr = buf;
 
 	while (cnt) {
 		if (cnt < TI_WINLEN)
 			segsize = cnt;
 		else
 			segsize = TI_WINLEN - (segptr % TI_WINLEN);
 		CSR_WRITE_4(sc, TI_WINBASE, (segptr & ~(TI_WINLEN - 1)));
 		bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle,
 		    TI_WINDOW + (segptr & (TI_WINLEN - 1)), (uint32_t *)ptr,
 		    segsize / 4);
 		ptr += segsize;
 		segptr += segsize;
 		cnt -= segsize;
 	}
 }
 
 /*
  * NIC memory read function.
  * Can be used to clear a section of NIC local memory.
  */
 static void
 ti_mem_zero(struct ti_softc *sc, uint32_t addr, uint32_t len)
 {
 	int segptr, segsize, cnt;
 
 	segptr = addr;
 	cnt = len;
 
 	while (cnt) {
 		if (cnt < TI_WINLEN)
 			segsize = cnt;
 		else
 			segsize = TI_WINLEN - (segptr % TI_WINLEN);
 		CSR_WRITE_4(sc, TI_WINBASE, (segptr & ~(TI_WINLEN - 1)));
 		bus_space_set_region_4(sc->ti_btag, sc->ti_bhandle,
 		    TI_WINDOW + (segptr & (TI_WINLEN - 1)), 0, segsize / 4);
 		segptr += segsize;
 		cnt -= segsize;
 	}
 }
 
 static int
 ti_copy_mem(struct ti_softc *sc, uint32_t tigon_addr, uint32_t len,
     caddr_t buf, int useraddr, int readdata)
 {
 	int segptr, segsize, cnt;
 	caddr_t ptr;
 	uint32_t origwin;
 	int resid, segresid;
 	int first_pass;
 
 	TI_LOCK_ASSERT(sc);
 
 	/*
 	 * At the moment, we don't handle non-aligned cases, we just bail.
 	 * If this proves to be a problem, it will be fixed.
 	 */
 	if (readdata == 0 && (tigon_addr & 0x3) != 0) {
 		device_printf(sc->ti_dev, "%s: tigon address %#x isn't "
 		    "word-aligned\n", __func__, tigon_addr);
 		device_printf(sc->ti_dev, "%s: unaligned writes aren't "
 		    "yet supported\n", __func__);
 		return (EINVAL);
 	}
 
 	segptr = tigon_addr & ~0x3;
 	segresid = tigon_addr - segptr;
 
 	/*
 	 * This is the non-aligned amount left over that we'll need to
 	 * copy.
 	 */
 	resid = len & 0x3;
 
 	/* Add in the left over amount at the front of the buffer */
 	resid += segresid;
 
 	cnt = len & ~0x3;
 	/*
 	 * If resid + segresid is >= 4, add multiples of 4 to the count and
 	 * decrease the residual by that much.
 	 */
 	cnt += resid & ~0x3;
 	resid -= resid & ~0x3;
 
 	ptr = buf;
 
 	first_pass = 1;
 
 	/*
 	 * Save the old window base value.
 	 */
 	origwin = CSR_READ_4(sc, TI_WINBASE);
 
 	while (cnt) {
 		bus_size_t ti_offset;
 
 		if (cnt < TI_WINLEN)
 			segsize = cnt;
 		else
 			segsize = TI_WINLEN - (segptr % TI_WINLEN);
 		CSR_WRITE_4(sc, TI_WINBASE, (segptr & ~(TI_WINLEN - 1)));
 
 		ti_offset = TI_WINDOW + (segptr & (TI_WINLEN -1));
 
 		if (readdata) {
 			bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle,
 			    ti_offset, (uint32_t *)sc->ti_membuf, segsize >> 2);
 			if (useraddr) {
 				/*
 				 * Yeah, this is a little on the kludgy
 				 * side, but at least this code is only
 				 * used for debugging.
 				 */
 				ti_bcopy_swap(sc->ti_membuf, sc->ti_membuf2,
 				    segsize, TI_SWAP_NTOH);
 
 				TI_UNLOCK(sc);
 				if (first_pass) {
 					copyout(&sc->ti_membuf2[segresid], ptr,
 					    segsize - segresid);
 					first_pass = 0;
 				} else
 					copyout(sc->ti_membuf2, ptr, segsize);
 				TI_LOCK(sc);
 			} else {
 				if (first_pass) {
 
 					ti_bcopy_swap(sc->ti_membuf,
 					    sc->ti_membuf2, segsize,
 					    TI_SWAP_NTOH);
 					TI_UNLOCK(sc);
 					bcopy(&sc->ti_membuf2[segresid], ptr,
 					    segsize - segresid);
 					TI_LOCK(sc);
 					first_pass = 0;
 				} else
 					ti_bcopy_swap(sc->ti_membuf, ptr,
 					    segsize, TI_SWAP_NTOH);
 			}
 
 		} else {
 			if (useraddr) {
 				TI_UNLOCK(sc);
 				copyin(ptr, sc->ti_membuf2, segsize);
 				TI_LOCK(sc);
 				ti_bcopy_swap(sc->ti_membuf2, sc->ti_membuf,
 				    segsize, TI_SWAP_HTON);
 			} else
 				ti_bcopy_swap(ptr, sc->ti_membuf, segsize,
 				    TI_SWAP_HTON);
 
 			bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle,
 			    ti_offset, (uint32_t *)sc->ti_membuf, segsize >> 2);
 		}
 		segptr += segsize;
 		ptr += segsize;
 		cnt -= segsize;
 	}
 
 	/*
 	 * Handle leftover, non-word-aligned bytes.
 	 */
 	if (resid != 0) {
 		uint32_t tmpval, tmpval2;
 		bus_size_t ti_offset;
 
 		/*
 		 * Set the segment pointer.
 		 */
 		CSR_WRITE_4(sc, TI_WINBASE, (segptr & ~(TI_WINLEN - 1)));
 
 		ti_offset = TI_WINDOW + (segptr & (TI_WINLEN - 1));
 
 		/*
 		 * First, grab whatever is in our source/destination.
 		 * We'll obviously need this for reads, but also for
 		 * writes, since we'll be doing read/modify/write.
 		 */
 		bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle,
 		    ti_offset, &tmpval, 1);
 
 		/*
 		 * Next, translate this from little-endian to big-endian
 		 * (at least on i386 boxes).
 		 */
 		tmpval2 = ntohl(tmpval);
 
 		if (readdata) {
 			/*
 			 * If we're reading, just copy the leftover number
 			 * of bytes from the host byte order buffer to
 			 * the user's buffer.
 			 */
 			if (useraddr) {
 				TI_UNLOCK(sc);
 				copyout(&tmpval2, ptr, resid);
 				TI_LOCK(sc);
 			} else
 				bcopy(&tmpval2, ptr, resid);
 		} else {
 			/*
 			 * If we're writing, first copy the bytes to be
 			 * written into the network byte order buffer,
 			 * leaving the rest of the buffer with whatever was
 			 * originally in there.  Then, swap the bytes
 			 * around into host order and write them out.
 			 *
 			 * XXX KDM the read side of this has been verified
 			 * to work, but the write side of it has not been
 			 * verified.  So user beware.
 			 */
 			if (useraddr) {
 				TI_UNLOCK(sc);
 				copyin(ptr, &tmpval2, resid);
 				TI_LOCK(sc);
 			} else
 				bcopy(ptr, &tmpval2, resid);
 
 			tmpval = htonl(tmpval2);
 
 			bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle,
 			    ti_offset, &tmpval, 1);
 		}
 	}
 
 	CSR_WRITE_4(sc, TI_WINBASE, origwin);
 
 	return (0);
 }
 
 static int
 ti_copy_scratch(struct ti_softc *sc, uint32_t tigon_addr, uint32_t len,
     caddr_t buf, int useraddr, int readdata, int cpu)
 {
 	uint32_t segptr;
 	int cnt;
 	uint32_t tmpval, tmpval2;
 	caddr_t ptr;
 
 	TI_LOCK_ASSERT(sc);
 
 	/*
 	 * At the moment, we don't handle non-aligned cases, we just bail.
 	 * If this proves to be a problem, it will be fixed.
 	 */
 	if (tigon_addr & 0x3) {
 		device_printf(sc->ti_dev, "%s: tigon address %#x "
 		    "isn't word-aligned\n", __func__, tigon_addr);
 		return (EINVAL);
 	}
 
 	if (len & 0x3) {
 		device_printf(sc->ti_dev, "%s: transfer length %d "
 		    "isn't word-aligned\n", __func__, len);
 		return (EINVAL);
 	}
 
 	segptr = tigon_addr;
 	cnt = len;
 	ptr = buf;
 
 	while (cnt) {
 		CSR_WRITE_4(sc, CPU_REG(TI_SRAM_ADDR, cpu), segptr);
 
 		if (readdata) {
 			tmpval2 = CSR_READ_4(sc, CPU_REG(TI_SRAM_DATA, cpu));
 
 			tmpval = ntohl(tmpval2);
 
 			/*
 			 * Note:  I've used this debugging interface
 			 * extensively with Alteon's 12.3.15 firmware,
 			 * compiled with GCC 2.7.2.1 and binutils 2.9.1.
 			 *
 			 * When you compile the firmware without
 			 * optimization, which is necessary sometimes in
 			 * order to properly step through it, you sometimes
 			 * read out a bogus value of 0xc0017c instead of
 			 * whatever was supposed to be in that scratchpad
 			 * location.  That value is on the stack somewhere,
 			 * but I've never been able to figure out what was
 			 * causing the problem.
 			 *
 			 * The address seems to pop up in random places,
 			 * often not in the same place on two subsequent
 			 * reads.
 			 *
 			 * In any case, the underlying data doesn't seem
 			 * to be affected, just the value read out.
 			 *
 			 * KDM, 3/7/2000
 			 */
 
 			if (tmpval2 == 0xc0017c)
 				device_printf(sc->ti_dev, "found 0xc0017c at "
 				    "%#x (tmpval2)\n", segptr);
 
 			if (tmpval == 0xc0017c)
 				device_printf(sc->ti_dev, "found 0xc0017c at "
 				    "%#x (tmpval)\n", segptr);
 
 			if (useraddr)
 				copyout(&tmpval, ptr, 4);
 			else
 				bcopy(&tmpval, ptr, 4);
 		} else {
 			if (useraddr)
 				copyin(ptr, &tmpval2, 4);
 			else
 				bcopy(ptr, &tmpval2, 4);
 
 			tmpval = htonl(tmpval2);
 
 			CSR_WRITE_4(sc, CPU_REG(TI_SRAM_DATA, cpu), tmpval);
 		}
 
 		cnt -= 4;
 		segptr += 4;
 		ptr += 4;
 	}
 
 	return (0);
 }
 
 static int
 ti_bcopy_swap(const void *src, void *dst, size_t len, ti_swap_type swap_type)
 {
 	const uint8_t *tmpsrc;
 	uint8_t *tmpdst;
 	size_t tmplen;
 
 	if (len & 0x3) {
 		printf("ti_bcopy_swap: length %zd isn't 32-bit aligned\n", len);
 		return (-1);
 	}
 
 	tmpsrc = src;
 	tmpdst = dst;
 	tmplen = len;
 
 	while (tmplen) {
 		if (swap_type == TI_SWAP_NTOH)
 			*(uint32_t *)tmpdst = ntohl(*(const uint32_t *)tmpsrc);
 		else
 			*(uint32_t *)tmpdst = htonl(*(const uint32_t *)tmpsrc);
 		tmpsrc += 4;
 		tmpdst += 4;
 		tmplen -= 4;
 	}
 
 	return (0);
 }
 
 /*
  * Load firmware image into the NIC. Check that the firmware revision
  * is acceptable and see if we want the firmware for the Tigon 1 or
  * Tigon 2.
  */
 static void
 ti_loadfw(struct ti_softc *sc)
 {
 
 	TI_LOCK_ASSERT(sc);
 
 	switch (sc->ti_hwrev) {
 	case TI_HWREV_TIGON:
 		if (tigonFwReleaseMajor != TI_FIRMWARE_MAJOR ||
 		    tigonFwReleaseMinor != TI_FIRMWARE_MINOR ||
 		    tigonFwReleaseFix != TI_FIRMWARE_FIX) {
 			device_printf(sc->ti_dev, "firmware revision mismatch; "
 			    "want %d.%d.%d, got %d.%d.%d\n",
 			    TI_FIRMWARE_MAJOR, TI_FIRMWARE_MINOR,
 			    TI_FIRMWARE_FIX, tigonFwReleaseMajor,
 			    tigonFwReleaseMinor, tigonFwReleaseFix);
 			return;
 		}
 		ti_mem_write(sc, tigonFwTextAddr, tigonFwTextLen, tigonFwText);
 		ti_mem_write(sc, tigonFwDataAddr, tigonFwDataLen, tigonFwData);
 		ti_mem_write(sc, tigonFwRodataAddr, tigonFwRodataLen,
 		    tigonFwRodata);
 		ti_mem_zero(sc, tigonFwBssAddr, tigonFwBssLen);
 		ti_mem_zero(sc, tigonFwSbssAddr, tigonFwSbssLen);
 		CSR_WRITE_4(sc, TI_CPU_PROGRAM_COUNTER, tigonFwStartAddr);
 		break;
 	case TI_HWREV_TIGON_II:
 		if (tigon2FwReleaseMajor != TI_FIRMWARE_MAJOR ||
 		    tigon2FwReleaseMinor != TI_FIRMWARE_MINOR ||
 		    tigon2FwReleaseFix != TI_FIRMWARE_FIX) {
 			device_printf(sc->ti_dev, "firmware revision mismatch; "
 			    "want %d.%d.%d, got %d.%d.%d\n",
 			    TI_FIRMWARE_MAJOR, TI_FIRMWARE_MINOR,
 			    TI_FIRMWARE_FIX, tigon2FwReleaseMajor,
 			    tigon2FwReleaseMinor, tigon2FwReleaseFix);
 			return;
 		}
 		ti_mem_write(sc, tigon2FwTextAddr, tigon2FwTextLen,
 		    tigon2FwText);
 		ti_mem_write(sc, tigon2FwDataAddr, tigon2FwDataLen,
 		    tigon2FwData);
 		ti_mem_write(sc, tigon2FwRodataAddr, tigon2FwRodataLen,
 		    tigon2FwRodata);
 		ti_mem_zero(sc, tigon2FwBssAddr, tigon2FwBssLen);
 		ti_mem_zero(sc, tigon2FwSbssAddr, tigon2FwSbssLen);
 		CSR_WRITE_4(sc, TI_CPU_PROGRAM_COUNTER, tigon2FwStartAddr);
 		break;
 	default:
 		device_printf(sc->ti_dev,
 		    "can't load firmware: unknown hardware rev\n");
 		break;
 	}
 }
 
 /*
  * Send the NIC a command via the command ring.
  */
 static void
 ti_cmd(struct ti_softc *sc, struct ti_cmd_desc *cmd)
 {
 	int index;
 
 	index = sc->ti_cmd_saved_prodidx;
 	CSR_WRITE_4(sc, TI_GCR_CMDRING + (index * 4), *(uint32_t *)(cmd));
 	TI_INC(index, TI_CMD_RING_CNT);
 	CSR_WRITE_4(sc, TI_MB_CMDPROD_IDX, index);
 	sc->ti_cmd_saved_prodidx = index;
 }
 
 /*
  * Send the NIC an extended command. The 'len' parameter specifies the
  * number of command slots to include after the initial command.
  */
 static void
 ti_cmd_ext(struct ti_softc *sc, struct ti_cmd_desc *cmd, caddr_t arg, int len)
 {
 	int index;
 	int i;
 
 	index = sc->ti_cmd_saved_prodidx;
 	CSR_WRITE_4(sc, TI_GCR_CMDRING + (index * 4), *(uint32_t *)(cmd));
 	TI_INC(index, TI_CMD_RING_CNT);
 	for (i = 0; i < len; i++) {
 		CSR_WRITE_4(sc, TI_GCR_CMDRING + (index * 4),
 		    *(uint32_t *)(&arg[i * 4]));
 		TI_INC(index, TI_CMD_RING_CNT);
 	}
 	CSR_WRITE_4(sc, TI_MB_CMDPROD_IDX, index);
 	sc->ti_cmd_saved_prodidx = index;
 }
 
 /*
  * Handle events that have triggered interrupts.
  */
 static void
 ti_handle_events(struct ti_softc *sc)
 {
 	struct ti_event_desc *e;
 
 	if (sc->ti_rdata.ti_event_ring == NULL)
 		return;
 
 	bus_dmamap_sync(sc->ti_cdata.ti_event_ring_tag,
 	    sc->ti_cdata.ti_event_ring_map, BUS_DMASYNC_POSTREAD);
 	while (sc->ti_ev_saved_considx != sc->ti_ev_prodidx.ti_idx) {
 		e = &sc->ti_rdata.ti_event_ring[sc->ti_ev_saved_considx];
 		switch (TI_EVENT_EVENT(e)) {
 		case TI_EV_LINKSTAT_CHANGED:
 			sc->ti_linkstat = TI_EVENT_CODE(e);
 			if (sc->ti_linkstat == TI_EV_CODE_LINK_UP) {
 				if_link_state_change(sc->ti_ifp, LINK_STATE_UP);
 				sc->ti_ifp->if_baudrate = IF_Mbps(100);
 				if (bootverbose)
 					device_printf(sc->ti_dev,
 					    "10/100 link up\n");
 			} else if (sc->ti_linkstat == TI_EV_CODE_GIG_LINK_UP) {
 				if_link_state_change(sc->ti_ifp, LINK_STATE_UP);
 				sc->ti_ifp->if_baudrate = IF_Gbps(1UL);
 				if (bootverbose)
 					device_printf(sc->ti_dev,
 					    "gigabit link up\n");
 			} else if (sc->ti_linkstat == TI_EV_CODE_LINK_DOWN) {
 				if_link_state_change(sc->ti_ifp,
 				    LINK_STATE_DOWN);
 				sc->ti_ifp->if_baudrate = 0;
 				if (bootverbose)
 					device_printf(sc->ti_dev,
 					    "link down\n");
 			}
 			break;
 		case TI_EV_ERROR:
 			if (TI_EVENT_CODE(e) == TI_EV_CODE_ERR_INVAL_CMD)
 				device_printf(sc->ti_dev, "invalid command\n");
 			else if (TI_EVENT_CODE(e) == TI_EV_CODE_ERR_UNIMP_CMD)
 				device_printf(sc->ti_dev, "unknown command\n");
 			else if (TI_EVENT_CODE(e) == TI_EV_CODE_ERR_BADCFG)
 				device_printf(sc->ti_dev, "bad config data\n");
 			break;
 		case TI_EV_FIRMWARE_UP:
 			ti_init2(sc);
 			break;
 		case TI_EV_STATS_UPDATED:
 		case TI_EV_RESET_JUMBO_RING:
 		case TI_EV_MCAST_UPDATED:
 			/* Who cares. */
 			break;
 		default:
 			device_printf(sc->ti_dev, "unknown event: %d\n",
 			    TI_EVENT_EVENT(e));
 			break;
 		}
 		/* Advance the consumer index. */
 		TI_INC(sc->ti_ev_saved_considx, TI_EVENT_RING_CNT);
 		CSR_WRITE_4(sc, TI_GCR_EVENTCONS_IDX, sc->ti_ev_saved_considx);
 	}
 	bus_dmamap_sync(sc->ti_cdata.ti_event_ring_tag,
 	    sc->ti_cdata.ti_event_ring_map, BUS_DMASYNC_PREREAD);
 }
 
 struct ti_dmamap_arg {
 	bus_addr_t	ti_busaddr;
 };
 
 static void
 ti_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	struct ti_dmamap_arg *ctx;
 
 	if (error)
 		return;
 
 	KASSERT(nseg == 1, ("%s: %d segments returned!", __func__, nseg));
 
 	ctx = arg;
 	ctx->ti_busaddr = segs->ds_addr;
 }
 
 static int
 ti_dma_ring_alloc(struct ti_softc *sc, bus_size_t alignment, bus_size_t maxsize,
     bus_dma_tag_t *tag, uint8_t **ring, bus_dmamap_t *map, bus_addr_t *paddr,
     const char *msg)
 {
 	struct ti_dmamap_arg ctx;
 	int error;
 
 	error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag,
 	    alignment, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL,
 	    NULL, maxsize, 1, maxsize, 0, NULL, NULL, tag);
 	if (error != 0) {
 		device_printf(sc->ti_dev,
 		    "could not create %s dma tag\n", msg);
 		return (error);
 	}
 	/* Allocate DMA'able memory for ring. */
 	error = bus_dmamem_alloc(*tag, (void **)ring,
 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, map);
 	if (error != 0) {
 		device_printf(sc->ti_dev,
 		    "could not allocate DMA'able memory for %s\n", msg);
 		return (error);
 	}
 	/* Load the address of the ring. */
 	ctx.ti_busaddr = 0;
 	error = bus_dmamap_load(*tag, *map, *ring, maxsize, ti_dma_map_addr,
 	    &ctx, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc->ti_dev,
 		    "could not load DMA'able memory for %s\n", msg);
 		return (error);
 	}
 	*paddr = ctx.ti_busaddr;
 	return (0);
 }
 
 static void
 ti_dma_ring_free(struct ti_softc *sc, bus_dma_tag_t *tag, uint8_t **ring,
     bus_dmamap_t map, bus_addr_t *paddr)
 {
 
 	if (*paddr != 0) {
 		bus_dmamap_unload(*tag, map);
 		*paddr = 0;
 	}
 	if (*ring != NULL) {
 		bus_dmamem_free(*tag, *ring, map);
 		*ring = NULL;
 	}
 	if (*tag) {
 		bus_dma_tag_destroy(*tag);
 		*tag = NULL;
 	}
 }
 
 static int
 ti_dma_alloc(struct ti_softc *sc)
 {
 	bus_addr_t lowaddr;
 	int i, error;
 
 	lowaddr = BUS_SPACE_MAXADDR;
 	if (sc->ti_dac == 0)
 		lowaddr = BUS_SPACE_MAXADDR_32BIT;
 
 	error = bus_dma_tag_create(bus_get_dma_tag(sc->ti_dev), 1, 0, lowaddr,
 	    BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, 0,
 	    BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL,
 	    &sc->ti_cdata.ti_parent_tag);
 	if (error != 0) {
 		device_printf(sc->ti_dev,
 		    "could not allocate parent dma tag\n");
 		return (ENOMEM);
 	}
 
 	error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, sizeof(struct ti_gib),
 	    &sc->ti_cdata.ti_gib_tag, (uint8_t **)&sc->ti_rdata.ti_info,
 	    &sc->ti_cdata.ti_gib_map, &sc->ti_rdata.ti_info_paddr, "GIB");
 	if (error)
 		return (error);
 
 	/* Producer/consumer status */
 	error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, sizeof(struct ti_status),
 	    &sc->ti_cdata.ti_status_tag, (uint8_t **)&sc->ti_rdata.ti_status,
 	    &sc->ti_cdata.ti_status_map, &sc->ti_rdata.ti_status_paddr,
 	    "event ring");
 	if (error)
 		return (error);
 
 	/* Event ring */
 	error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_EVENT_RING_SZ,
 	    &sc->ti_cdata.ti_event_ring_tag,
 	    (uint8_t **)&sc->ti_rdata.ti_event_ring,
 	    &sc->ti_cdata.ti_event_ring_map, &sc->ti_rdata.ti_event_ring_paddr,
 	    "event ring");
 	if (error)
 		return (error);
 
 	/* Command ring lives in shared memory so no need to create DMA area. */
 
 	/* Standard RX ring */
 	error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_STD_RX_RING_SZ,
 	    &sc->ti_cdata.ti_rx_std_ring_tag,
 	    (uint8_t **)&sc->ti_rdata.ti_rx_std_ring,
 	    &sc->ti_cdata.ti_rx_std_ring_map,
 	    &sc->ti_rdata.ti_rx_std_ring_paddr, "RX ring");
 	if (error)
 		return (error);
 
 	/* Jumbo RX ring */
 	error = ti_dma_ring_alloc(sc, TI_JUMBO_RING_ALIGN, TI_JUMBO_RX_RING_SZ,
 	    &sc->ti_cdata.ti_rx_jumbo_ring_tag,
 	    (uint8_t **)&sc->ti_rdata.ti_rx_jumbo_ring,
 	    &sc->ti_cdata.ti_rx_jumbo_ring_map,
 	    &sc->ti_rdata.ti_rx_jumbo_ring_paddr, "jumbo RX ring");
 	if (error)
 		return (error);
 
 	/* RX return ring */
 	error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_RX_RETURN_RING_SZ,
 	    &sc->ti_cdata.ti_rx_return_ring_tag,
 	    (uint8_t **)&sc->ti_rdata.ti_rx_return_ring,
 	    &sc->ti_cdata.ti_rx_return_ring_map,
 	    &sc->ti_rdata.ti_rx_return_ring_paddr, "RX return ring");
 	if (error)
 		return (error);
 
 	/* Create DMA tag for standard RX mbufs. */
 	error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
 	    MCLBYTES, 0, NULL, NULL, &sc->ti_cdata.ti_rx_std_tag);
 	if (error) {
 		device_printf(sc->ti_dev, "could not allocate RX dma tag\n");
 		return (error);
 	}
 
 	/* Create DMA tag for jumbo RX mbufs. */
 #ifdef TI_SF_BUF_JUMBO
 	/*
 	 * The VM system will take care of providing aligned pages.  Alignment
 	 * is set to 1 here so that busdma resources won't be wasted.
 	 */
 	error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, PAGE_SIZE * 4, 4,
 	    PAGE_SIZE, 0, NULL, NULL, &sc->ti_cdata.ti_rx_jumbo_tag);
 #else
 	error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MJUM9BYTES, 1,
 	    MJUM9BYTES, 0, NULL, NULL, &sc->ti_cdata.ti_rx_jumbo_tag);
 #endif
 	if (error) {
 		device_printf(sc->ti_dev,
 		    "could not allocate jumbo RX dma tag\n");
 		return (error);
 	}
 
 	/* Create DMA tag for TX mbufs. */
 	error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1,
 	    0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    MCLBYTES * TI_MAXTXSEGS, TI_MAXTXSEGS, MCLBYTES, 0, NULL, NULL,
 	    &sc->ti_cdata.ti_tx_tag);
 	if (error) {
 		device_printf(sc->ti_dev, "could not allocate TX dma tag\n");
 		return (ENOMEM);
 	}
 
 	/* Create DMA maps for RX buffers. */
 	for (i = 0; i < TI_STD_RX_RING_CNT; i++) {
 		error = bus_dmamap_create(sc->ti_cdata.ti_rx_std_tag, 0,
 		    &sc->ti_cdata.ti_rx_std_maps[i]);
 		if (error) {
 			device_printf(sc->ti_dev,
 			    "could not create DMA map for RX\n");
 			return (error);
 		}
 	}
 	error = bus_dmamap_create(sc->ti_cdata.ti_rx_std_tag, 0,
 	    &sc->ti_cdata.ti_rx_std_sparemap);
 	if (error) {
 		device_printf(sc->ti_dev,
 		    "could not create spare DMA map for RX\n");
 		return (error);
 	}
 
 	/* Create DMA maps for jumbo RX buffers. */
 	for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) {
 		error = bus_dmamap_create(sc->ti_cdata.ti_rx_jumbo_tag, 0,
 		    &sc->ti_cdata.ti_rx_jumbo_maps[i]);
 		if (error) {
 			device_printf(sc->ti_dev,
 			    "could not create DMA map for jumbo RX\n");
 			return (error);
 		}
 	}
 	error = bus_dmamap_create(sc->ti_cdata.ti_rx_jumbo_tag, 0,
 	    &sc->ti_cdata.ti_rx_jumbo_sparemap);
 	if (error) {
 		device_printf(sc->ti_dev,
 		    "could not create spare DMA map for jumbo RX\n");
 		return (error);
 	}
 
 	/* Create DMA maps for TX buffers. */
 	for (i = 0; i < TI_TX_RING_CNT; i++) {
 		error = bus_dmamap_create(sc->ti_cdata.ti_tx_tag, 0,
 		    &sc->ti_cdata.ti_txdesc[i].tx_dmamap);
 		if (error) {
 			device_printf(sc->ti_dev,
 			    "could not create DMA map for TX\n");
 			return (ENOMEM);
 		}
 	}
 
 	/* Mini ring and TX ring is not available on Tigon 1. */
 	if (sc->ti_hwrev == TI_HWREV_TIGON)
 		return (0);
 
 	/* TX ring */
 	error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_TX_RING_SZ,
 	    &sc->ti_cdata.ti_tx_ring_tag, (uint8_t **)&sc->ti_rdata.ti_tx_ring,
 	    &sc->ti_cdata.ti_tx_ring_map, &sc->ti_rdata.ti_tx_ring_paddr,
 	    "TX ring");
 	if (error)
 		return (error);
 
 	/* Mini RX ring */
 	error = ti_dma_ring_alloc(sc, TI_RING_ALIGN, TI_MINI_RX_RING_SZ,
 	    &sc->ti_cdata.ti_rx_mini_ring_tag,
 	    (uint8_t **)&sc->ti_rdata.ti_rx_mini_ring,
 	    &sc->ti_cdata.ti_rx_mini_ring_map,
 	    &sc->ti_rdata.ti_rx_mini_ring_paddr, "mini RX ring");
 	if (error)
 		return (error);
 
 	/* Create DMA tag for mini RX mbufs. */
 	error = bus_dma_tag_create(sc->ti_cdata.ti_parent_tag, 1, 0,
 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, MHLEN, 1,
 	    MHLEN, 0, NULL, NULL, &sc->ti_cdata.ti_rx_mini_tag);
 	if (error) {
 		device_printf(sc->ti_dev,
 		    "could not allocate mini RX dma tag\n");
 		return (error);
 	}
 
 	/* Create DMA maps for mini RX buffers. */
 	for (i = 0; i < TI_MINI_RX_RING_CNT; i++) {
 		error = bus_dmamap_create(sc->ti_cdata.ti_rx_mini_tag, 0,
 		    &sc->ti_cdata.ti_rx_mini_maps[i]);
 		if (error) {
 			device_printf(sc->ti_dev,
 			    "could not create DMA map for mini RX\n");
 			return (error);
 		}
 	}
 	error = bus_dmamap_create(sc->ti_cdata.ti_rx_mini_tag, 0,
 	    &sc->ti_cdata.ti_rx_mini_sparemap);
 	if (error) {
 		device_printf(sc->ti_dev,
 		    "could not create spare DMA map for mini RX\n");
 		return (error);
 	}
 
 	return (0);
 }
 
 static void
 ti_dma_free(struct ti_softc *sc)
 {
 	int i;
 
 	/* Destroy DMA maps for RX buffers. */
 	for (i = 0; i < TI_STD_RX_RING_CNT; i++) {
 		if (sc->ti_cdata.ti_rx_std_maps[i]) {
 			bus_dmamap_destroy(sc->ti_cdata.ti_rx_std_tag,
 			    sc->ti_cdata.ti_rx_std_maps[i]);
 			sc->ti_cdata.ti_rx_std_maps[i] = NULL;
 		}
 	}
 	if (sc->ti_cdata.ti_rx_std_sparemap) {
 		bus_dmamap_destroy(sc->ti_cdata.ti_rx_std_tag,
 		    sc->ti_cdata.ti_rx_std_sparemap);
 		sc->ti_cdata.ti_rx_std_sparemap = NULL;
 	}
 	if (sc->ti_cdata.ti_rx_std_tag) {
 		bus_dma_tag_destroy(sc->ti_cdata.ti_rx_std_tag);
 		sc->ti_cdata.ti_rx_std_tag = NULL;
 	}
 
 	/* Destroy DMA maps for jumbo RX buffers. */
 	for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) {
 		if (sc->ti_cdata.ti_rx_jumbo_maps[i]) {
 			bus_dmamap_destroy(sc->ti_cdata.ti_rx_jumbo_tag,
 			    sc->ti_cdata.ti_rx_jumbo_maps[i]);
 			sc->ti_cdata.ti_rx_jumbo_maps[i] = NULL;
 		}
 	}
 	if (sc->ti_cdata.ti_rx_jumbo_sparemap) {
 		bus_dmamap_destroy(sc->ti_cdata.ti_rx_jumbo_tag,
 		    sc->ti_cdata.ti_rx_jumbo_sparemap);
 		sc->ti_cdata.ti_rx_jumbo_sparemap = NULL;
 	}
 	if (sc->ti_cdata.ti_rx_jumbo_tag) {
 		bus_dma_tag_destroy(sc->ti_cdata.ti_rx_jumbo_tag);
 		sc->ti_cdata.ti_rx_jumbo_tag = NULL;
 	}
 
 	/* Destroy DMA maps for mini RX buffers. */
 	for (i = 0; i < TI_MINI_RX_RING_CNT; i++) {
 		if (sc->ti_cdata.ti_rx_mini_maps[i]) {
 			bus_dmamap_destroy(sc->ti_cdata.ti_rx_mini_tag,
 			    sc->ti_cdata.ti_rx_mini_maps[i]);
 			sc->ti_cdata.ti_rx_mini_maps[i] = NULL;
 		}
 	}
 	if (sc->ti_cdata.ti_rx_mini_sparemap) {
 		bus_dmamap_destroy(sc->ti_cdata.ti_rx_mini_tag,
 		    sc->ti_cdata.ti_rx_mini_sparemap);
 		sc->ti_cdata.ti_rx_mini_sparemap = NULL;
 	}
 	if (sc->ti_cdata.ti_rx_mini_tag) {
 		bus_dma_tag_destroy(sc->ti_cdata.ti_rx_mini_tag);
 		sc->ti_cdata.ti_rx_mini_tag = NULL;
 	}
 
 	/* Destroy DMA maps for TX buffers. */
 	for (i = 0; i < TI_TX_RING_CNT; i++) {
 		if (sc->ti_cdata.ti_txdesc[i].tx_dmamap) {
 			bus_dmamap_destroy(sc->ti_cdata.ti_tx_tag,
 			    sc->ti_cdata.ti_txdesc[i].tx_dmamap);
 			sc->ti_cdata.ti_txdesc[i].tx_dmamap = NULL;
 		}
 	}
 	if (sc->ti_cdata.ti_tx_tag) {
 		bus_dma_tag_destroy(sc->ti_cdata.ti_tx_tag);
 		sc->ti_cdata.ti_tx_tag = NULL;
 	}
 
 	/* Destroy standard RX ring. */
 	ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_std_ring_tag,
 	    (void *)&sc->ti_rdata.ti_rx_std_ring,
 	    sc->ti_cdata.ti_rx_std_ring_map,
 	    &sc->ti_rdata.ti_rx_std_ring_paddr);
 	/* Destroy jumbo RX ring. */
 	ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_jumbo_ring_tag,
 	    (void *)&sc->ti_rdata.ti_rx_jumbo_ring,
 	    sc->ti_cdata.ti_rx_jumbo_ring_map,
 	    &sc->ti_rdata.ti_rx_jumbo_ring_paddr);
 	/* Destroy mini RX ring. */
 	ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_mini_ring_tag,
 	    (void *)&sc->ti_rdata.ti_rx_mini_ring,
 	    sc->ti_cdata.ti_rx_mini_ring_map,
 	    &sc->ti_rdata.ti_rx_mini_ring_paddr);
 	/* Destroy RX return ring. */
 	ti_dma_ring_free(sc, &sc->ti_cdata.ti_rx_return_ring_tag,
 	    (void *)&sc->ti_rdata.ti_rx_return_ring,
 	    sc->ti_cdata.ti_rx_return_ring_map,
 	    &sc->ti_rdata.ti_rx_return_ring_paddr);
 	/* Destroy TX ring. */
 	ti_dma_ring_free(sc, &sc->ti_cdata.ti_tx_ring_tag,
 	    (void *)&sc->ti_rdata.ti_tx_ring, sc->ti_cdata.ti_tx_ring_map,
 	    &sc->ti_rdata.ti_tx_ring_paddr);
 	/* Destroy status block. */
 	ti_dma_ring_free(sc, &sc->ti_cdata.ti_status_tag,
 	    (void *)&sc->ti_rdata.ti_status, sc->ti_cdata.ti_status_map,
 	    &sc->ti_rdata.ti_status_paddr);
 	/* Destroy event ring. */
 	ti_dma_ring_free(sc, &sc->ti_cdata.ti_event_ring_tag,
 	    (void *)&sc->ti_rdata.ti_event_ring,
 	    sc->ti_cdata.ti_event_ring_map, &sc->ti_rdata.ti_event_ring_paddr);
 	/* Destroy GIB */
 	ti_dma_ring_free(sc, &sc->ti_cdata.ti_gib_tag,
 	    (void *)&sc->ti_rdata.ti_info, sc->ti_cdata.ti_gib_map,
 	    &sc->ti_rdata.ti_info_paddr);
 
 	/* Destroy the parent tag. */
 	if (sc->ti_cdata.ti_parent_tag) {
 		bus_dma_tag_destroy(sc->ti_cdata.ti_parent_tag);
 		sc->ti_cdata.ti_parent_tag = NULL;
 	}
 }
 
 /*
  * Intialize a standard receive ring descriptor.
  */
 static int
 ti_newbuf_std(struct ti_softc *sc, int i)
 {
 	bus_dmamap_t map;
 	bus_dma_segment_t segs[1];
 	struct mbuf *m;
 	struct ti_rx_desc *r;
 	int error, nsegs;
 
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = m->m_pkthdr.len = MCLBYTES;
 	m_adj(m, ETHER_ALIGN);
 
 	error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_std_tag,
 	    sc->ti_cdata.ti_rx_std_sparemap, m, segs, &nsegs, 0);
 	if (error != 0) {
 		m_freem(m);
 		return (error);
         }
 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
 
 	if (sc->ti_cdata.ti_rx_std_chain[i] != NULL) {
 		bus_dmamap_sync(sc->ti_cdata.ti_rx_std_tag,
 		    sc->ti_cdata.ti_rx_std_maps[i], BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->ti_cdata.ti_rx_std_tag,
 		    sc->ti_cdata.ti_rx_std_maps[i]);
 	}
 
 	map = sc->ti_cdata.ti_rx_std_maps[i];
 	sc->ti_cdata.ti_rx_std_maps[i] = sc->ti_cdata.ti_rx_std_sparemap;
 	sc->ti_cdata.ti_rx_std_sparemap = map;
 	sc->ti_cdata.ti_rx_std_chain[i] = m;
 
 	r = &sc->ti_rdata.ti_rx_std_ring[i];
 	ti_hostaddr64(&r->ti_addr, segs[0].ds_addr);
 	r->ti_len = segs[0].ds_len;
 	r->ti_type = TI_BDTYPE_RECV_BD;
 	r->ti_flags = 0;
 	r->ti_vlan_tag = 0;
 	r->ti_tcp_udp_cksum = 0;
 	if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM)
 		r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM;
 	r->ti_idx = i;
 
 	bus_dmamap_sync(sc->ti_cdata.ti_rx_std_tag,
 	    sc->ti_cdata.ti_rx_std_maps[i], BUS_DMASYNC_PREREAD);
 	return (0);
 }
 
 /*
  * Intialize a mini receive ring descriptor. This only applies to
  * the Tigon 2.
  */
 static int
 ti_newbuf_mini(struct ti_softc *sc, int i)
 {
 	bus_dmamap_t map;
 	bus_dma_segment_t segs[1];
 	struct mbuf *m;
 	struct ti_rx_desc *r;
 	int error, nsegs;
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = m->m_pkthdr.len = MHLEN;
 	m_adj(m, ETHER_ALIGN);
 
 	error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_mini_tag,
 	    sc->ti_cdata.ti_rx_mini_sparemap, m, segs, &nsegs, 0);
 	if (error != 0) {
 		m_freem(m);
 		return (error);
         }
 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
 
 	if (sc->ti_cdata.ti_rx_mini_chain[i] != NULL) {
 		bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_tag,
 		    sc->ti_cdata.ti_rx_mini_maps[i], BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->ti_cdata.ti_rx_mini_tag,
 		    sc->ti_cdata.ti_rx_mini_maps[i]);
 	}
 
 	map = sc->ti_cdata.ti_rx_mini_maps[i];
 	sc->ti_cdata.ti_rx_mini_maps[i] = sc->ti_cdata.ti_rx_mini_sparemap;
 	sc->ti_cdata.ti_rx_mini_sparemap = map;
 	sc->ti_cdata.ti_rx_mini_chain[i] = m;
 
 	r = &sc->ti_rdata.ti_rx_mini_ring[i];
 	ti_hostaddr64(&r->ti_addr, segs[0].ds_addr);
 	r->ti_len = segs[0].ds_len;
 	r->ti_type = TI_BDTYPE_RECV_BD;
 	r->ti_flags = TI_BDFLAG_MINI_RING;
 	r->ti_vlan_tag = 0;
 	r->ti_tcp_udp_cksum = 0;
 	if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM)
 		r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM;
 	r->ti_idx = i;
 
 	bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_tag,
 	    sc->ti_cdata.ti_rx_mini_maps[i], BUS_DMASYNC_PREREAD);
 	return (0);
 }
 
 #ifndef TI_SF_BUF_JUMBO
 
 /*
  * Initialize a jumbo receive ring descriptor. This allocates
  * a jumbo buffer from the pool managed internally by the driver.
  */
 static int
 ti_newbuf_jumbo(struct ti_softc *sc, int i, struct mbuf *dummy)
 {
 	bus_dmamap_t map;
 	bus_dma_segment_t segs[1];
 	struct mbuf *m;
 	struct ti_rx_desc *r;
 	int error, nsegs;
 
 	(void)dummy;
 
 	m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM9BYTES);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = m->m_pkthdr.len = MJUM9BYTES;
 	m_adj(m, ETHER_ALIGN);
 
 	error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_jumbo_tag,
 	    sc->ti_cdata.ti_rx_jumbo_sparemap, m, segs, &nsegs, 0);
 	if (error != 0) {
 		m_freem(m);
 		return (error);
         }
 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
 
 	if (sc->ti_cdata.ti_rx_jumbo_chain[i] != NULL) {
 		bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag,
 		    sc->ti_cdata.ti_rx_jumbo_maps[i], BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->ti_cdata.ti_rx_jumbo_tag,
 		    sc->ti_cdata.ti_rx_jumbo_maps[i]);
 	}
 
 	map = sc->ti_cdata.ti_rx_jumbo_maps[i];
 	sc->ti_cdata.ti_rx_jumbo_maps[i] = sc->ti_cdata.ti_rx_jumbo_sparemap;
 	sc->ti_cdata.ti_rx_jumbo_sparemap = map;
 	sc->ti_cdata.ti_rx_jumbo_chain[i] = m;
 
 	r = &sc->ti_rdata.ti_rx_jumbo_ring[i];
 	ti_hostaddr64(&r->ti_addr, segs[0].ds_addr);
 	r->ti_len = segs[0].ds_len;
 	r->ti_type = TI_BDTYPE_RECV_JUMBO_BD;
 	r->ti_flags = TI_BDFLAG_JUMBO_RING;
 	r->ti_vlan_tag = 0;
 	r->ti_tcp_udp_cksum = 0;
 	if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM)
 		r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM;
 	r->ti_idx = i;
 
 	bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag,
 	    sc->ti_cdata.ti_rx_jumbo_maps[i], BUS_DMASYNC_PREREAD);
 	return (0);
 }
 
 #else
 
 #if (PAGE_SIZE == 4096)
 #define NPAYLOAD 2
 #else
 #define NPAYLOAD 1
 #endif
 
 #define TCP_HDR_LEN (52 + sizeof(struct ether_header))
 #define UDP_HDR_LEN (28 + sizeof(struct ether_header))
 #define NFS_HDR_LEN (UDP_HDR_LEN)
 static int HDR_LEN = TCP_HDR_LEN;
 
 /*
  * Initialize a jumbo receive ring descriptor. This allocates
  * a jumbo buffer from the pool managed internally by the driver.
  */
 static int
 ti_newbuf_jumbo(struct ti_softc *sc, int idx, struct mbuf *m_old)
 {
 	bus_dmamap_t map;
 	struct mbuf *cur, *m_new = NULL;
 	struct mbuf *m[3] = {NULL, NULL, NULL};
 	struct ti_rx_desc_ext *r;
 	vm_page_t frame;
 	/* 1 extra buf to make nobufs easy*/
 	struct sf_buf *sf[3] = {NULL, NULL, NULL};
 	int i;
 	bus_dma_segment_t segs[4];
 	int nsegs;
 
 	if (m_old != NULL) {
 		m_new = m_old;
 		cur = m_old->m_next;
 		for (i = 0; i <= NPAYLOAD; i++){
 			m[i] = cur;
 			cur = cur->m_next;
 		}
 	} else {
 		/* Allocate the mbufs. */
 		MGETHDR(m_new, M_NOWAIT, MT_DATA);
 		if (m_new == NULL) {
 			device_printf(sc->ti_dev, "mbuf allocation failed "
 			    "-- packet dropped!\n");
 			goto nobufs;
 		}
 		MGET(m[NPAYLOAD], M_NOWAIT, MT_DATA);
 		if (m[NPAYLOAD] == NULL) {
 			device_printf(sc->ti_dev, "cluster mbuf allocation "
 			    "failed -- packet dropped!\n");
 			goto nobufs;
 		}
 		if (!(MCLGET(m[NPAYLOAD], M_NOWAIT))) {
 			device_printf(sc->ti_dev, "mbuf allocation failed "
 			    "-- packet dropped!\n");
 			goto nobufs;
 		}
 		m[NPAYLOAD]->m_len = MCLBYTES;
 
 		for (i = 0; i < NPAYLOAD; i++){
 			MGET(m[i], M_NOWAIT, MT_DATA);
 			if (m[i] == NULL) {
 				device_printf(sc->ti_dev, "mbuf allocation "
 				    "failed -- packet dropped!\n");
 				goto nobufs;
 			}
 			frame = vm_page_alloc(NULL, 0,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 			    VM_ALLOC_WIRED);
 			if (frame == NULL) {
 				device_printf(sc->ti_dev, "buffer allocation "
 				    "failed -- packet dropped!\n");
 				printf("      index %d page %d\n", idx, i);
 				goto nobufs;
 			}
 			sf[i] = sf_buf_alloc(frame, SFB_NOWAIT);
 			if (sf[i] == NULL) {
 				vm_page_unwire(frame, PQ_INACTIVE);
 				vm_page_free(frame);
 				device_printf(sc->ti_dev, "buffer allocation "
 				    "failed -- packet dropped!\n");
 				printf("      index %d page %d\n", idx, i);
 				goto nobufs;
 			}
 		}
 		for (i = 0; i < NPAYLOAD; i++){
 		/* Attach the buffer to the mbuf. */
 			m[i]->m_data = (void *)sf_buf_kva(sf[i]);
 			m[i]->m_len = PAGE_SIZE;
 			MEXTADD(m[i], sf_buf_kva(sf[i]), PAGE_SIZE,
-			    sf_buf_mext, (void*)sf_buf_kva(sf[i]), sf[i],
+			    sf_mext_free, (void*)sf_buf_kva(sf[i]), sf[i],
 			    0, EXT_DISPOSABLE);
 			m[i]->m_next = m[i+1];
 		}
 		/* link the buffers to the header */
 		m_new->m_next = m[0];
 		m_new->m_data += ETHER_ALIGN;
 		if (sc->ti_hdrsplit)
 			m_new->m_len = MHLEN - ETHER_ALIGN;
 		else
 			m_new->m_len = HDR_LEN;
 		m_new->m_pkthdr.len = NPAYLOAD * PAGE_SIZE + m_new->m_len;
 	}
 
 	/* Set up the descriptor. */
 	r = &sc->ti_rdata.ti_rx_jumbo_ring[idx];
 	sc->ti_cdata.ti_rx_jumbo_chain[idx] = m_new;
 	map = sc->ti_cdata.ti_rx_jumbo_maps[i];
 	if (bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_rx_jumbo_tag, map, m_new,
 	    segs, &nsegs, 0))
 		return (ENOBUFS);
 	if ((nsegs < 1) || (nsegs > 4))
 		return (ENOBUFS);
 	ti_hostaddr64(&r->ti_addr0, segs[0].ds_addr);
 	r->ti_len0 = m_new->m_len;
 
 	ti_hostaddr64(&r->ti_addr1, segs[1].ds_addr);
 	r->ti_len1 = PAGE_SIZE;
 
 	ti_hostaddr64(&r->ti_addr2, segs[2].ds_addr);
 	r->ti_len2 = m[1]->m_ext.ext_size; /* could be PAGE_SIZE or MCLBYTES */
 
 	if (PAGE_SIZE == 4096) {
 		ti_hostaddr64(&r->ti_addr3, segs[3].ds_addr);
 		r->ti_len3 = MCLBYTES;
 	} else {
 		r->ti_len3 = 0;
 	}
 	r->ti_type = TI_BDTYPE_RECV_JUMBO_BD;
 
 	r->ti_flags = TI_BDFLAG_JUMBO_RING|TI_RCB_FLAG_USE_EXT_RX_BD;
 
 	if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM)
 		r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM|TI_BDFLAG_IP_CKSUM;
 
 	r->ti_idx = idx;
 
 	bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, map, BUS_DMASYNC_PREREAD);
 	return (0);
 
 nobufs:
 
 	/*
 	 * Warning! :
 	 * This can only be called before the mbufs are strung together.
 	 * If the mbufs are strung together, m_freem() will free the chain,
 	 * so that the later mbufs will be freed multiple times.
 	 */
 	if (m_new)
 		m_freem(m_new);
 
 	for (i = 0; i < 3; i++) {
 		if (m[i])
 			m_freem(m[i]);
 		if (sf[i])
-			sf_buf_mext((void *)sf_buf_kva(sf[i]), sf[i]);
+			sf_mext_free((void *)sf_buf_kva(sf[i]), sf[i]);
 	}
 	return (ENOBUFS);
 }
 #endif
 
 /*
  * The standard receive ring has 512 entries in it. At 2K per mbuf cluster,
  * that's 1MB or memory, which is a lot. For now, we fill only the first
  * 256 ring entries and hope that our CPU is fast enough to keep up with
  * the NIC.
  */
 static int
 ti_init_rx_ring_std(struct ti_softc *sc)
 {
 	int i;
 	struct ti_cmd_desc cmd;
 
 	for (i = 0; i < TI_STD_RX_RING_CNT; i++) {
 		if (ti_newbuf_std(sc, i) != 0)
 			return (ENOBUFS);
 	};
 
 	sc->ti_std = TI_STD_RX_RING_CNT - 1;
 	TI_UPDATE_STDPROD(sc, TI_STD_RX_RING_CNT - 1);
 
 	return (0);
 }
 
 static void
 ti_free_rx_ring_std(struct ti_softc *sc)
 {
 	bus_dmamap_t map;
 	int i;
 
 	for (i = 0; i < TI_STD_RX_RING_CNT; i++) {
 		if (sc->ti_cdata.ti_rx_std_chain[i] != NULL) {
 			map = sc->ti_cdata.ti_rx_std_maps[i];
 			bus_dmamap_sync(sc->ti_cdata.ti_rx_std_tag, map,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(sc->ti_cdata.ti_rx_std_tag, map);
 			m_freem(sc->ti_cdata.ti_rx_std_chain[i]);
 			sc->ti_cdata.ti_rx_std_chain[i] = NULL;
 		}
 	}
 	bzero(sc->ti_rdata.ti_rx_std_ring, TI_STD_RX_RING_SZ);
 	bus_dmamap_sync(sc->ti_cdata.ti_rx_std_ring_tag,
 	    sc->ti_cdata.ti_rx_std_ring_map, BUS_DMASYNC_PREWRITE);
 }
 
 static int
 ti_init_rx_ring_jumbo(struct ti_softc *sc)
 {
 	struct ti_cmd_desc cmd;
 	int i;
 
 	for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) {
 		if (ti_newbuf_jumbo(sc, i, NULL) != 0)
 			return (ENOBUFS);
 	};
 
 	sc->ti_jumbo = TI_JUMBO_RX_RING_CNT - 1;
 	TI_UPDATE_JUMBOPROD(sc, TI_JUMBO_RX_RING_CNT - 1);
 
 	return (0);
 }
 
 static void
 ti_free_rx_ring_jumbo(struct ti_softc *sc)
 {
 	bus_dmamap_t map;
 	int i;
 
 	for (i = 0; i < TI_JUMBO_RX_RING_CNT; i++) {
 		if (sc->ti_cdata.ti_rx_jumbo_chain[i] != NULL) {
 			map = sc->ti_cdata.ti_rx_jumbo_maps[i];
 			bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, map,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(sc->ti_cdata.ti_rx_jumbo_tag, map);
 			m_freem(sc->ti_cdata.ti_rx_jumbo_chain[i]);
 			sc->ti_cdata.ti_rx_jumbo_chain[i] = NULL;
 		}
 	}
 	bzero(sc->ti_rdata.ti_rx_jumbo_ring, TI_JUMBO_RX_RING_SZ);
 	bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_ring_tag,
 	    sc->ti_cdata.ti_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE);
 }
 
 static int
 ti_init_rx_ring_mini(struct ti_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < TI_MINI_RX_RING_CNT; i++) {
 		if (ti_newbuf_mini(sc, i) != 0)
 			return (ENOBUFS);
 	};
 
 	sc->ti_mini = TI_MINI_RX_RING_CNT - 1;
 	TI_UPDATE_MINIPROD(sc, TI_MINI_RX_RING_CNT - 1);
 
 	return (0);
 }
 
 static void
 ti_free_rx_ring_mini(struct ti_softc *sc)
 {
 	bus_dmamap_t map;
 	int i;
 
 	if (sc->ti_rdata.ti_rx_mini_ring == NULL)
 		return;
 
 	for (i = 0; i < TI_MINI_RX_RING_CNT; i++) {
 		if (sc->ti_cdata.ti_rx_mini_chain[i] != NULL) {
 			map = sc->ti_cdata.ti_rx_mini_maps[i];
 			bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_tag, map,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(sc->ti_cdata.ti_rx_mini_tag, map);
 			m_freem(sc->ti_cdata.ti_rx_mini_chain[i]);
 			sc->ti_cdata.ti_rx_mini_chain[i] = NULL;
 		}
 	}
 	bzero(sc->ti_rdata.ti_rx_mini_ring, TI_MINI_RX_RING_SZ);
 	bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_ring_tag,
 	    sc->ti_cdata.ti_rx_mini_ring_map, BUS_DMASYNC_PREWRITE);
 }
 
 static void
 ti_free_tx_ring(struct ti_softc *sc)
 {
 	struct ti_txdesc *txd;
 	int i;
 
 	if (sc->ti_rdata.ti_tx_ring == NULL)
 		return;
 
 	for (i = 0; i < TI_TX_RING_CNT; i++) {
 		txd = &sc->ti_cdata.ti_txdesc[i];
 		if (txd->tx_m != NULL) {
 			bus_dmamap_sync(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(sc->ti_cdata.ti_tx_tag,
 			    txd->tx_dmamap);
 			m_freem(txd->tx_m);
 			txd->tx_m = NULL;
 		}
 	}
 	bzero(sc->ti_rdata.ti_tx_ring, TI_TX_RING_SZ);
 	bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag,
 	    sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_PREWRITE);
 }
 
 static int
 ti_init_tx_ring(struct ti_softc *sc)
 {
 	struct ti_txdesc *txd;
 	int i;
 
 	STAILQ_INIT(&sc->ti_cdata.ti_txfreeq);
 	STAILQ_INIT(&sc->ti_cdata.ti_txbusyq);
 	for (i = 0; i < TI_TX_RING_CNT; i++) {
 		txd = &sc->ti_cdata.ti_txdesc[i];
 		STAILQ_INSERT_TAIL(&sc->ti_cdata.ti_txfreeq, txd, tx_q);
 	}
 	sc->ti_txcnt = 0;
 	sc->ti_tx_saved_considx = 0;
 	sc->ti_tx_saved_prodidx = 0;
 	CSR_WRITE_4(sc, TI_MB_SENDPROD_IDX, 0);
 	return (0);
 }
 
 /*
  * The Tigon 2 firmware has a new way to add/delete multicast addresses,
  * but we have to support the old way too so that Tigon 1 cards will
  * work.
  */
 static void
 ti_add_mcast(struct ti_softc *sc, struct ether_addr *addr)
 {
 	struct ti_cmd_desc cmd;
 	uint16_t *m;
 	uint32_t ext[2] = {0, 0};
 
 	m = (uint16_t *)&addr->octet[0];
 
 	switch (sc->ti_hwrev) {
 	case TI_HWREV_TIGON:
 		CSR_WRITE_4(sc, TI_GCR_MAR0, htons(m[0]));
 		CSR_WRITE_4(sc, TI_GCR_MAR1, (htons(m[1]) << 16) | htons(m[2]));
 		TI_DO_CMD(TI_CMD_ADD_MCAST_ADDR, 0, 0);
 		break;
 	case TI_HWREV_TIGON_II:
 		ext[0] = htons(m[0]);
 		ext[1] = (htons(m[1]) << 16) | htons(m[2]);
 		TI_DO_CMD_EXT(TI_CMD_EXT_ADD_MCAST, 0, 0, (caddr_t)&ext, 2);
 		break;
 	default:
 		device_printf(sc->ti_dev, "unknown hwrev\n");
 		break;
 	}
 }
 
 static void
 ti_del_mcast(struct ti_softc *sc, struct ether_addr *addr)
 {
 	struct ti_cmd_desc cmd;
 	uint16_t *m;
 	uint32_t ext[2] = {0, 0};
 
 	m = (uint16_t *)&addr->octet[0];
 
 	switch (sc->ti_hwrev) {
 	case TI_HWREV_TIGON:
 		CSR_WRITE_4(sc, TI_GCR_MAR0, htons(m[0]));
 		CSR_WRITE_4(sc, TI_GCR_MAR1, (htons(m[1]) << 16) | htons(m[2]));
 		TI_DO_CMD(TI_CMD_DEL_MCAST_ADDR, 0, 0);
 		break;
 	case TI_HWREV_TIGON_II:
 		ext[0] = htons(m[0]);
 		ext[1] = (htons(m[1]) << 16) | htons(m[2]);
 		TI_DO_CMD_EXT(TI_CMD_EXT_DEL_MCAST, 0, 0, (caddr_t)&ext, 2);
 		break;
 	default:
 		device_printf(sc->ti_dev, "unknown hwrev\n");
 		break;
 	}
 }
 
 /*
  * Configure the Tigon's multicast address filter.
  *
  * The actual multicast table management is a bit of a pain, thanks to
  * slight brain damage on the part of both Alteon and us. With our
  * multicast code, we are only alerted when the multicast address table
  * changes and at that point we only have the current list of addresses:
  * we only know the current state, not the previous state, so we don't
  * actually know what addresses were removed or added. The firmware has
  * state, but we can't get our grubby mits on it, and there is no 'delete
  * all multicast addresses' command. Hence, we have to maintain our own
  * state so we know what addresses have been programmed into the NIC at
  * any given time.
  */
 static void
 ti_setmulti(struct ti_softc *sc)
 {
 	struct ifnet *ifp;
 	struct ifmultiaddr *ifma;
 	struct ti_cmd_desc cmd;
 	struct ti_mc_entry *mc;
 	uint32_t intrs;
 
 	TI_LOCK_ASSERT(sc);
 
 	ifp = sc->ti_ifp;
 
 	if (ifp->if_flags & IFF_ALLMULTI) {
 		TI_DO_CMD(TI_CMD_SET_ALLMULTI, TI_CMD_CODE_ALLMULTI_ENB, 0);
 		return;
 	} else {
 		TI_DO_CMD(TI_CMD_SET_ALLMULTI, TI_CMD_CODE_ALLMULTI_DIS, 0);
 	}
 
 	/* Disable interrupts. */
 	intrs = CSR_READ_4(sc, TI_MB_HOSTINTR);
 	CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1);
 
 	/* First, zot all the existing filters. */
 	while (SLIST_FIRST(&sc->ti_mc_listhead) != NULL) {
 		mc = SLIST_FIRST(&sc->ti_mc_listhead);
 		ti_del_mcast(sc, &mc->mc_addr);
 		SLIST_REMOVE_HEAD(&sc->ti_mc_listhead, mc_entries);
 		free(mc, M_DEVBUF);
 	}
 
 	/* Now program new ones. */
 	if_maddr_rlock(ifp);
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		mc = malloc(sizeof(struct ti_mc_entry), M_DEVBUF, M_NOWAIT);
 		if (mc == NULL) {
 			device_printf(sc->ti_dev,
 			    "no memory for mcast filter entry\n");
 			continue;
 		}
 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    (char *)&mc->mc_addr, ETHER_ADDR_LEN);
 		SLIST_INSERT_HEAD(&sc->ti_mc_listhead, mc, mc_entries);
 		ti_add_mcast(sc, &mc->mc_addr);
 	}
 	if_maddr_runlock(ifp);
 
 	/* Re-enable interrupts. */
 	CSR_WRITE_4(sc, TI_MB_HOSTINTR, intrs);
 }
 
 /*
  * Check to see if the BIOS has configured us for a 64 bit slot when
  * we aren't actually in one. If we detect this condition, we can work
  * around it on the Tigon 2 by setting a bit in the PCI state register,
  * but for the Tigon 1 we must give up and abort the interface attach.
  */
 static int
 ti_64bitslot_war(struct ti_softc *sc)
 {
 
 	if (!(CSR_READ_4(sc, TI_PCI_STATE) & TI_PCISTATE_32BIT_BUS)) {
 		CSR_WRITE_4(sc, 0x600, 0);
 		CSR_WRITE_4(sc, 0x604, 0);
 		CSR_WRITE_4(sc, 0x600, 0x5555AAAA);
 		if (CSR_READ_4(sc, 0x604) == 0x5555AAAA) {
 			if (sc->ti_hwrev == TI_HWREV_TIGON)
 				return (EINVAL);
 			else {
 				TI_SETBIT(sc, TI_PCI_STATE,
 				    TI_PCISTATE_32BIT_BUS);
 				return (0);
 			}
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Do endian, PCI and DMA initialization. Also check the on-board ROM
  * self-test results.
  */
 static int
 ti_chipinit(struct ti_softc *sc)
 {
 	uint32_t cacheline;
 	uint32_t pci_writemax = 0;
 	uint32_t hdrsplit;
 
 	/* Initialize link to down state. */
 	sc->ti_linkstat = TI_EV_CODE_LINK_DOWN;
 
 	/* Set endianness before we access any non-PCI registers. */
 #if 0 && BYTE_ORDER == BIG_ENDIAN
 	CSR_WRITE_4(sc, TI_MISC_HOST_CTL,
 	    TI_MHC_BIGENDIAN_INIT | (TI_MHC_BIGENDIAN_INIT << 24));
 #else
 	CSR_WRITE_4(sc, TI_MISC_HOST_CTL,
 	    TI_MHC_LITTLEENDIAN_INIT | (TI_MHC_LITTLEENDIAN_INIT << 24));
 #endif
 
 	/* Check the ROM failed bit to see if self-tests passed. */
 	if (CSR_READ_4(sc, TI_CPU_STATE) & TI_CPUSTATE_ROMFAIL) {
 		device_printf(sc->ti_dev, "board self-diagnostics failed!\n");
 		return (ENODEV);
 	}
 
 	/* Halt the CPU. */
 	TI_SETBIT(sc, TI_CPU_STATE, TI_CPUSTATE_HALT);
 
 	/* Figure out the hardware revision. */
 	switch (CSR_READ_4(sc, TI_MISC_HOST_CTL) & TI_MHC_CHIP_REV_MASK) {
 	case TI_REV_TIGON_I:
 		sc->ti_hwrev = TI_HWREV_TIGON;
 		break;
 	case TI_REV_TIGON_II:
 		sc->ti_hwrev = TI_HWREV_TIGON_II;
 		break;
 	default:
 		device_printf(sc->ti_dev, "unsupported chip revision\n");
 		return (ENODEV);
 	}
 
 	/* Do special setup for Tigon 2. */
 	if (sc->ti_hwrev == TI_HWREV_TIGON_II) {
 		TI_SETBIT(sc, TI_CPU_CTL_B, TI_CPUSTATE_HALT);
 		TI_SETBIT(sc, TI_MISC_LOCAL_CTL, TI_MLC_SRAM_BANK_512K);
 		TI_SETBIT(sc, TI_MISC_CONF, TI_MCR_SRAM_SYNCHRONOUS);
 	}
 
 	/*
 	 * We don't have firmware source for the Tigon 1, so Tigon 1 boards
 	 * can't do header splitting.
 	 */
 #ifdef TI_JUMBO_HDRSPLIT
 	if (sc->ti_hwrev != TI_HWREV_TIGON)
 		sc->ti_hdrsplit = 1;
 	else
 		device_printf(sc->ti_dev,
 		    "can't do header splitting on a Tigon I board\n");
 #endif /* TI_JUMBO_HDRSPLIT */
 
 	/* Set up the PCI state register. */
 	CSR_WRITE_4(sc, TI_PCI_STATE, TI_PCI_READ_CMD|TI_PCI_WRITE_CMD);
 	if (sc->ti_hwrev == TI_HWREV_TIGON_II) {
 		TI_SETBIT(sc, TI_PCI_STATE, TI_PCISTATE_USE_MEM_RD_MULT);
 	}
 
 	/* Clear the read/write max DMA parameters. */
 	TI_CLRBIT(sc, TI_PCI_STATE, (TI_PCISTATE_WRITE_MAXDMA|
 	    TI_PCISTATE_READ_MAXDMA));
 
 	/* Get cache line size. */
 	cacheline = CSR_READ_4(sc, TI_PCI_BIST) & 0xFF;
 
 	/*
 	 * If the system has set enabled the PCI memory write
 	 * and invalidate command in the command register, set
 	 * the write max parameter accordingly. This is necessary
 	 * to use MWI with the Tigon 2.
 	 */
 	if (CSR_READ_4(sc, TI_PCI_CMDSTAT) & PCIM_CMD_MWIEN) {
 		switch (cacheline) {
 		case 1:
 		case 4:
 		case 8:
 		case 16:
 		case 32:
 		case 64:
 			break;
 		default:
 		/* Disable PCI memory write and invalidate. */
 			if (bootverbose)
 				device_printf(sc->ti_dev, "cache line size %d"
 				    " not supported; disabling PCI MWI\n",
 				    cacheline);
 			CSR_WRITE_4(sc, TI_PCI_CMDSTAT, CSR_READ_4(sc,
 			    TI_PCI_CMDSTAT) & ~PCIM_CMD_MWIEN);
 			break;
 		}
 	}
 
 	TI_SETBIT(sc, TI_PCI_STATE, pci_writemax);
 
 	/* This sets the min dma param all the way up (0xff). */
 	TI_SETBIT(sc, TI_PCI_STATE, TI_PCISTATE_MINDMA);
 
 	if (sc->ti_hdrsplit)
 		hdrsplit = TI_OPMODE_JUMBO_HDRSPLIT;
 	else
 		hdrsplit = 0;
 
 	/* Configure DMA variables. */
 #if BYTE_ORDER == BIG_ENDIAN
 	CSR_WRITE_4(sc, TI_GCR_OPMODE, TI_OPMODE_BYTESWAP_BD |
 	    TI_OPMODE_BYTESWAP_DATA | TI_OPMODE_WORDSWAP_BD |
 	    TI_OPMODE_WARN_ENB | TI_OPMODE_FATAL_ENB |
 	    TI_OPMODE_DONT_FRAG_JUMBO | hdrsplit);
 #else /* BYTE_ORDER */
 	CSR_WRITE_4(sc, TI_GCR_OPMODE, TI_OPMODE_BYTESWAP_DATA|
 	    TI_OPMODE_WORDSWAP_BD|TI_OPMODE_DONT_FRAG_JUMBO|
 	    TI_OPMODE_WARN_ENB|TI_OPMODE_FATAL_ENB | hdrsplit);
 #endif /* BYTE_ORDER */
 
 	/*
 	 * Only allow 1 DMA channel to be active at a time.
 	 * I don't think this is a good idea, but without it
 	 * the firmware racks up lots of nicDmaReadRingFull
 	 * errors.  This is not compatible with hardware checksums.
 	 */
 	if ((sc->ti_ifp->if_capenable & (IFCAP_TXCSUM | IFCAP_RXCSUM)) == 0)
 		TI_SETBIT(sc, TI_GCR_OPMODE, TI_OPMODE_1_DMA_ACTIVE);
 
 	/* Recommended settings from Tigon manual. */
 	CSR_WRITE_4(sc, TI_GCR_DMA_WRITECFG, TI_DMA_STATE_THRESH_8W);
 	CSR_WRITE_4(sc, TI_GCR_DMA_READCFG, TI_DMA_STATE_THRESH_8W);
 
 	if (ti_64bitslot_war(sc)) {
 		device_printf(sc->ti_dev, "bios thinks we're in a 64 bit slot, "
 		    "but we aren't");
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 /*
  * Initialize the general information block and firmware, and
  * start the CPU(s) running.
  */
 static int
 ti_gibinit(struct ti_softc *sc)
 {
 	struct ifnet *ifp;
 	struct ti_rcb *rcb;
 	int i;
 
 	TI_LOCK_ASSERT(sc);
 
 	ifp = sc->ti_ifp;
 
 	/* Disable interrupts for now. */
 	CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1);
 
 	/* Tell the chip where to find the general information block. */
 	CSR_WRITE_4(sc, TI_GCR_GENINFO_HI,
 	    (uint64_t)sc->ti_rdata.ti_info_paddr >> 32);
 	CSR_WRITE_4(sc, TI_GCR_GENINFO_LO,
 	    sc->ti_rdata.ti_info_paddr & 0xFFFFFFFF);
 
 	/* Load the firmware into SRAM. */
 	ti_loadfw(sc);
 
 	/* Set up the contents of the general info and ring control blocks. */
 
 	/* Set up the event ring and producer pointer. */
 	bzero(sc->ti_rdata.ti_event_ring, TI_EVENT_RING_SZ);
 	rcb = &sc->ti_rdata.ti_info->ti_ev_rcb;
 	ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_event_ring_paddr);
 	rcb->ti_flags = 0;
 	ti_hostaddr64(&sc->ti_rdata.ti_info->ti_ev_prodidx_ptr,
 	    sc->ti_rdata.ti_status_paddr +
 	    offsetof(struct ti_status, ti_ev_prodidx_r));
 	sc->ti_ev_prodidx.ti_idx = 0;
 	CSR_WRITE_4(sc, TI_GCR_EVENTCONS_IDX, 0);
 	sc->ti_ev_saved_considx = 0;
 
 	/* Set up the command ring and producer mailbox. */
 	rcb = &sc->ti_rdata.ti_info->ti_cmd_rcb;
 	ti_hostaddr64(&rcb->ti_hostaddr, TI_GCR_NIC_ADDR(TI_GCR_CMDRING));
 	rcb->ti_flags = 0;
 	rcb->ti_max_len = 0;
 	for (i = 0; i < TI_CMD_RING_CNT; i++) {
 		CSR_WRITE_4(sc, TI_GCR_CMDRING + (i * 4), 0);
 	}
 	CSR_WRITE_4(sc, TI_GCR_CMDCONS_IDX, 0);
 	CSR_WRITE_4(sc, TI_MB_CMDPROD_IDX, 0);
 	sc->ti_cmd_saved_prodidx = 0;
 
 	/*
 	 * Assign the address of the stats refresh buffer.
 	 * We re-use the current stats buffer for this to
 	 * conserve memory.
 	 */
 	bzero(&sc->ti_rdata.ti_info->ti_stats, sizeof(struct ti_stats));
 	ti_hostaddr64(&sc->ti_rdata.ti_info->ti_refresh_stats_ptr,
 	    sc->ti_rdata.ti_info_paddr + offsetof(struct ti_gib, ti_stats));
 
 	/* Set up the standard receive ring. */
 	rcb = &sc->ti_rdata.ti_info->ti_std_rx_rcb;
 	ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_std_ring_paddr);
 	rcb->ti_max_len = TI_FRAMELEN;
 	rcb->ti_flags = 0;
 	if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM)
 		rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM |
 		     TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM;
 	if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
 		rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST;
 
 	/* Set up the jumbo receive ring. */
 	rcb = &sc->ti_rdata.ti_info->ti_jumbo_rx_rcb;
 	ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_jumbo_ring_paddr);
 
 #ifndef TI_SF_BUF_JUMBO
 	rcb->ti_max_len = MJUM9BYTES - ETHER_ALIGN;
 	rcb->ti_flags = 0;
 #else
 	rcb->ti_max_len = PAGE_SIZE;
 	rcb->ti_flags = TI_RCB_FLAG_USE_EXT_RX_BD;
 #endif
 	if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM)
 		rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM |
 		     TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM;
 	if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
 		rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST;
 
 	/*
 	 * Set up the mini ring. Only activated on the
 	 * Tigon 2 but the slot in the config block is
 	 * still there on the Tigon 1.
 	 */
 	rcb = &sc->ti_rdata.ti_info->ti_mini_rx_rcb;
 	ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_mini_ring_paddr);
 	rcb->ti_max_len = MHLEN - ETHER_ALIGN;
 	if (sc->ti_hwrev == TI_HWREV_TIGON)
 		rcb->ti_flags = TI_RCB_FLAG_RING_DISABLED;
 	else
 		rcb->ti_flags = 0;
 	if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM)
 		rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM |
 		     TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM;
 	if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
 		rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST;
 
 	/*
 	 * Set up the receive return ring.
 	 */
 	rcb = &sc->ti_rdata.ti_info->ti_return_rcb;
 	ti_hostaddr64(&rcb->ti_hostaddr, sc->ti_rdata.ti_rx_return_ring_paddr);
 	rcb->ti_flags = 0;
 	rcb->ti_max_len = TI_RETURN_RING_CNT;
 	ti_hostaddr64(&sc->ti_rdata.ti_info->ti_return_prodidx_ptr,
 	    sc->ti_rdata.ti_status_paddr +
 	    offsetof(struct ti_status, ti_return_prodidx_r));
 
 	/*
 	 * Set up the tx ring. Note: for the Tigon 2, we have the option
 	 * of putting the transmit ring in the host's address space and
 	 * letting the chip DMA it instead of leaving the ring in the NIC's
 	 * memory and accessing it through the shared memory region. We
 	 * do this for the Tigon 2, but it doesn't work on the Tigon 1,
 	 * so we have to revert to the shared memory scheme if we detect
 	 * a Tigon 1 chip.
 	 */
 	CSR_WRITE_4(sc, TI_WINBASE, TI_TX_RING_BASE);
 	if (sc->ti_rdata.ti_tx_ring != NULL)
 		bzero(sc->ti_rdata.ti_tx_ring, TI_TX_RING_SZ);
 	rcb = &sc->ti_rdata.ti_info->ti_tx_rcb;
 	if (sc->ti_hwrev == TI_HWREV_TIGON)
 		rcb->ti_flags = 0;
 	else
 		rcb->ti_flags = TI_RCB_FLAG_HOST_RING;
 	if (sc->ti_ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
 		rcb->ti_flags |= TI_RCB_FLAG_VLAN_ASSIST;
 	if (sc->ti_ifp->if_capenable & IFCAP_TXCSUM)
 		rcb->ti_flags |= TI_RCB_FLAG_TCP_UDP_CKSUM |
 		     TI_RCB_FLAG_IP_CKSUM | TI_RCB_FLAG_NO_PHDR_CKSUM;
 	rcb->ti_max_len = TI_TX_RING_CNT;
 	if (sc->ti_hwrev == TI_HWREV_TIGON)
 		ti_hostaddr64(&rcb->ti_hostaddr, TI_TX_RING_BASE);
 	else
 		ti_hostaddr64(&rcb->ti_hostaddr,
 		    sc->ti_rdata.ti_tx_ring_paddr);
 	ti_hostaddr64(&sc->ti_rdata.ti_info->ti_tx_considx_ptr,
 	    sc->ti_rdata.ti_status_paddr +
 	    offsetof(struct ti_status, ti_tx_considx_r));
 
 	bus_dmamap_sync(sc->ti_cdata.ti_gib_tag, sc->ti_cdata.ti_gib_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(sc->ti_cdata.ti_status_tag, sc->ti_cdata.ti_status_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(sc->ti_cdata.ti_event_ring_tag,
 	    sc->ti_cdata.ti_event_ring_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	if (sc->ti_rdata.ti_tx_ring != NULL)
 		bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag,
 		    sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_PREWRITE);
 
 	/* Set up tunables */
 #if 0
 	if (ifp->if_mtu > ETHERMTU + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)
 		CSR_WRITE_4(sc, TI_GCR_RX_COAL_TICKS,
 		    (sc->ti_rx_coal_ticks / 10));
 	else
 #endif
 		CSR_WRITE_4(sc, TI_GCR_RX_COAL_TICKS, sc->ti_rx_coal_ticks);
 	CSR_WRITE_4(sc, TI_GCR_TX_COAL_TICKS, sc->ti_tx_coal_ticks);
 	CSR_WRITE_4(sc, TI_GCR_STAT_TICKS, sc->ti_stat_ticks);
 	CSR_WRITE_4(sc, TI_GCR_RX_MAX_COAL_BD, sc->ti_rx_max_coal_bds);
 	CSR_WRITE_4(sc, TI_GCR_TX_MAX_COAL_BD, sc->ti_tx_max_coal_bds);
 	CSR_WRITE_4(sc, TI_GCR_TX_BUFFER_RATIO, sc->ti_tx_buf_ratio);
 
 	/* Turn interrupts on. */
 	CSR_WRITE_4(sc, TI_GCR_MASK_INTRS, 0);
 	CSR_WRITE_4(sc, TI_MB_HOSTINTR, 0);
 
 	/* Start CPU. */
 	TI_CLRBIT(sc, TI_CPU_STATE, (TI_CPUSTATE_HALT|TI_CPUSTATE_STEP));
 
 	return (0);
 }
 
 /*
  * Probe for a Tigon chip. Check the PCI vendor and device IDs
  * against our list and return its name if we find a match.
  */
 static int
 ti_probe(device_t dev)
 {
 	const struct ti_type *t;
 
 	t = ti_devs;
 
 	while (t->ti_name != NULL) {
 		if ((pci_get_vendor(dev) == t->ti_vid) &&
 		    (pci_get_device(dev) == t->ti_did)) {
 			device_set_desc(dev, t->ti_name);
 			return (BUS_PROBE_DEFAULT);
 		}
 		t++;
 	}
 
 	return (ENXIO);
 }
 
 static int
 ti_attach(device_t dev)
 {
 	struct ifnet *ifp;
 	struct ti_softc *sc;
 	int error = 0, rid;
 	u_char eaddr[6];
 
 	sc = device_get_softc(dev);
 	sc->ti_dev = dev;
 
 	mtx_init(&sc->ti_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
 	    MTX_DEF);
 	callout_init_mtx(&sc->ti_watchdog, &sc->ti_mtx, 0);
 	ifmedia_init(&sc->ifmedia, IFM_IMASK, ti_ifmedia_upd, ti_ifmedia_sts);
 	ifp = sc->ti_ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not if_alloc()\n");
 		error = ENOSPC;
 		goto fail;
 	}
 	sc->ti_ifp->if_hwassist = TI_CSUM_FEATURES;
 	sc->ti_ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_RXCSUM;
 	sc->ti_ifp->if_capenable = sc->ti_ifp->if_capabilities;
 
 	/*
 	 * Map control/status registers.
 	 */
 	pci_enable_busmaster(dev);
 
 	rid = PCIR_BAR(0);
 	sc->ti_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
 	    RF_ACTIVE);
 
 	if (sc->ti_res == NULL) {
 		device_printf(dev, "couldn't map memory\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	sc->ti_btag = rman_get_bustag(sc->ti_res);
 	sc->ti_bhandle = rman_get_bushandle(sc->ti_res);
 
 	/* Allocate interrupt */
 	rid = 0;
 
 	sc->ti_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 	    RF_SHAREABLE | RF_ACTIVE);
 
 	if (sc->ti_irq == NULL) {
 		device_printf(dev, "couldn't map interrupt\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	if (ti_chipinit(sc)) {
 		device_printf(dev, "chip initialization failed\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	/* Zero out the NIC's on-board SRAM. */
 	ti_mem_zero(sc, 0x2000, 0x100000 - 0x2000);
 
 	/* Init again -- zeroing memory may have clobbered some registers. */
 	if (ti_chipinit(sc)) {
 		device_printf(dev, "chip initialization failed\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	/*
 	 * Get station address from the EEPROM. Note: the manual states
 	 * that the MAC address is at offset 0x8c, however the data is
 	 * stored as two longwords (since that's how it's loaded into
 	 * the NIC). This means the MAC address is actually preceded
 	 * by two zero bytes. We need to skip over those.
 	 */
 	if (ti_read_eeprom(sc, eaddr, TI_EE_MAC_OFFSET + 2, ETHER_ADDR_LEN)) {
 		device_printf(dev, "failed to read station address\n");
 		error = ENXIO;
 		goto fail;
 	}
 
 	/* Allocate working area for memory dump. */
 	sc->ti_membuf = malloc(sizeof(uint8_t) * TI_WINLEN, M_DEVBUF, M_NOWAIT);
 	sc->ti_membuf2 = malloc(sizeof(uint8_t) * TI_WINLEN, M_DEVBUF,
 	    M_NOWAIT);
 	if (sc->ti_membuf == NULL || sc->ti_membuf2 == NULL) {
 		device_printf(dev, "cannot allocate memory buffer\n");
 		error = ENOMEM;
 		goto fail;
 	}
 	if ((error = ti_dma_alloc(sc)) != 0)
 		goto fail;
 
 	/*
 	 * We really need a better way to tell a 1000baseTX card
 	 * from a 1000baseSX one, since in theory there could be
 	 * OEMed 1000baseTX cards from lame vendors who aren't
 	 * clever enough to change the PCI ID. For the moment
 	 * though, the AceNIC is the only copper card available.
 	 */
 	if (pci_get_vendor(dev) == ALT_VENDORID &&
 	    pci_get_device(dev) == ALT_DEVICEID_ACENIC_COPPER)
 		sc->ti_copper = 1;
 	/* Ok, it's not the only copper card available. */
 	if (pci_get_vendor(dev) == NG_VENDORID &&
 	    pci_get_device(dev) == NG_DEVICEID_GA620T)
 		sc->ti_copper = 1;
 
 	/* Set default tunable values. */
 	ti_sysctl_node(sc);
 
 	/* Set up ifnet structure */
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = ti_ioctl;
 	ifp->if_start = ti_start;
 	ifp->if_init = ti_init;
 	ifp->if_get_counter = ti_get_counter;
 	ifp->if_baudrate = IF_Gbps(1UL);
 	ifp->if_snd.ifq_drv_maxlen = TI_TX_RING_CNT - 1;
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen);
 	IFQ_SET_READY(&ifp->if_snd);
 
 	/* Set up ifmedia support. */
 	if (sc->ti_copper) {
 		/*
 		 * Copper cards allow manual 10/100 mode selection,
 		 * but not manual 1000baseTX mode selection. Why?
 		 * Becuase currently there's no way to specify the
 		 * master/slave setting through the firmware interface,
 		 * so Alteon decided to just bag it and handle it
 		 * via autonegotiation.
 		 */
 		ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL);
 		ifmedia_add(&sc->ifmedia,
 		    IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL);
 		ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL);
 		ifmedia_add(&sc->ifmedia,
 		    IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL);
 		ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_1000_T, 0, NULL);
 		ifmedia_add(&sc->ifmedia,
 		    IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL);
 	} else {
 		/* Fiber cards don't support 10/100 modes. */
 		ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_1000_SX, 0, NULL);
 		ifmedia_add(&sc->ifmedia,
 		    IFM_ETHER|IFM_1000_SX|IFM_FDX, 0, NULL);
 	}
 	ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->ifmedia, IFM_ETHER|IFM_AUTO);
 
 	/*
 	 * We're assuming here that card initialization is a sequential
 	 * thing.  If it isn't, multiple cards probing at the same time
 	 * could stomp on the list of softcs here.
 	 */
 
 	/* Register the device */
 	sc->dev = make_dev(&ti_cdevsw, device_get_unit(dev), UID_ROOT,
 	    GID_OPERATOR, 0600, "ti%d", device_get_unit(dev));
 	sc->dev->si_drv1 = sc;
 
 	/*
 	 * Call MI attach routine.
 	 */
 	ether_ifattach(ifp, eaddr);
 
 	/* VLAN capability setup. */
 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM |
 	    IFCAP_VLAN_HWTAGGING;
 	ifp->if_capenable = ifp->if_capabilities;
 	/* Tell the upper layer we support VLAN over-sized frames. */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 
 	/* Driver supports link state tracking. */
 	ifp->if_capabilities |= IFCAP_LINKSTATE;
 	ifp->if_capenable |= IFCAP_LINKSTATE;
 
 	/* Hook interrupt last to avoid having to lock softc */
 	error = bus_setup_intr(dev, sc->ti_irq, INTR_TYPE_NET|INTR_MPSAFE,
 	   NULL, ti_intr, sc, &sc->ti_intrhand);
 
 	if (error) {
 		device_printf(dev, "couldn't set up irq\n");
 		goto fail;
 	}
 
 fail:
 	if (error)
 		ti_detach(dev);
 
 	return (error);
 }
 
 /*
  * Shutdown hardware and free up resources. This can be called any
  * time after the mutex has been initialized. It is called in both
  * the error case in attach and the normal detach case so it needs
  * to be careful about only freeing resources that have actually been
  * allocated.
  */
 static int
 ti_detach(device_t dev)
 {
 	struct ti_softc *sc;
 	struct ifnet *ifp;
 
 	sc = device_get_softc(dev);
 	if (sc->dev)
 		destroy_dev(sc->dev);
 	KASSERT(mtx_initialized(&sc->ti_mtx), ("ti mutex not initialized"));
 	ifp = sc->ti_ifp;
 	if (device_is_attached(dev)) {
 		ether_ifdetach(ifp);
 		TI_LOCK(sc);
 		ti_stop(sc);
 		TI_UNLOCK(sc);
 	}
 
 	/* These should only be active if attach succeeded */
 	callout_drain(&sc->ti_watchdog);
 	bus_generic_detach(dev);
 	ti_dma_free(sc);
 	ifmedia_removeall(&sc->ifmedia);
 
 	if (sc->ti_intrhand)
 		bus_teardown_intr(dev, sc->ti_irq, sc->ti_intrhand);
 	if (sc->ti_irq)
 		bus_release_resource(dev, SYS_RES_IRQ, 0, sc->ti_irq);
 	if (sc->ti_res) {
 		bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0),
 		    sc->ti_res);
 	}
 	if (ifp)
 		if_free(ifp);
 	if (sc->ti_membuf)
 		free(sc->ti_membuf, M_DEVBUF);
 	if (sc->ti_membuf2)
 		free(sc->ti_membuf2, M_DEVBUF);
 
 	mtx_destroy(&sc->ti_mtx);
 
 	return (0);
 }
 
 #ifdef TI_JUMBO_HDRSPLIT
 /*
  * If hdr_len is 0, that means that header splitting wasn't done on
  * this packet for some reason.  The two most likely reasons are that
  * the protocol isn't a supported protocol for splitting, or this
  * packet had a fragment offset that wasn't 0.
  *
  * The header length, if it is non-zero, will always be the length of
  * the headers on the packet, but that length could be longer than the
  * first mbuf.  So we take the minimum of the two as the actual
  * length.
  */
 static __inline void
 ti_hdr_split(struct mbuf *top, int hdr_len, int pkt_len, int idx)
 {
 	int i = 0;
 	int lengths[4] = {0, 0, 0, 0};
 	struct mbuf *m, *mp;
 
 	if (hdr_len != 0)
 		top->m_len = min(hdr_len, top->m_len);
 	pkt_len -= top->m_len;
 	lengths[i++] = top->m_len;
 
 	mp = top;
 	for (m = top->m_next; m && pkt_len; m = m->m_next) {
 		m->m_len = m->m_ext.ext_size = min(m->m_len, pkt_len);
 		pkt_len -= m->m_len;
 		lengths[i++] = m->m_len;
 		mp = m;
 	}
 
 #if 0
 	if (hdr_len != 0)
 		printf("got split packet: ");
 	else
 		printf("got non-split packet: ");
 
 	printf("%d,%d,%d,%d = %d\n", lengths[0],
 	    lengths[1], lengths[2], lengths[3],
 	    lengths[0] + lengths[1] + lengths[2] +
 	    lengths[3]);
 #endif
 
 	if (pkt_len)
 		panic("header splitting didn't");
 
 	if (m) {
 		m_freem(m);
 		mp->m_next = NULL;
 
 	}
 	if (mp->m_next != NULL)
 		panic("ti_hdr_split: last mbuf in chain should be null");
 }
 #endif /* TI_JUMBO_HDRSPLIT */
 
 static void
 ti_discard_std(struct ti_softc *sc, int i)
 {
 
 	struct ti_rx_desc *r;
 
 	r = &sc->ti_rdata.ti_rx_std_ring[i];
 	r->ti_len = MCLBYTES - ETHER_ALIGN;
 	r->ti_type = TI_BDTYPE_RECV_BD;
 	r->ti_flags = 0;
 	r->ti_vlan_tag = 0;
 	r->ti_tcp_udp_cksum = 0;
 	if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM)
 		r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM;
 	r->ti_idx = i;
 }
 
 static void
 ti_discard_mini(struct ti_softc *sc, int i)
 {
 
 	struct ti_rx_desc *r;
 
 	r = &sc->ti_rdata.ti_rx_mini_ring[i];
 	r->ti_len = MHLEN - ETHER_ALIGN;
 	r->ti_type = TI_BDTYPE_RECV_BD;
 	r->ti_flags = TI_BDFLAG_MINI_RING;
 	r->ti_vlan_tag = 0;
 	r->ti_tcp_udp_cksum = 0;
 	if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM)
 		r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM;
 	r->ti_idx = i;
 }
 
 #ifndef TI_SF_BUF_JUMBO
 static void
 ti_discard_jumbo(struct ti_softc *sc, int i)
 {
 
 	struct ti_rx_desc *r;
 
 	r = &sc->ti_rdata.ti_rx_jumbo_ring[i];
 	r->ti_len = MJUM9BYTES - ETHER_ALIGN;
 	r->ti_type = TI_BDTYPE_RECV_JUMBO_BD;
 	r->ti_flags = TI_BDFLAG_JUMBO_RING;
 	r->ti_vlan_tag = 0;
 	r->ti_tcp_udp_cksum = 0;
 	if (sc->ti_ifp->if_capenable & IFCAP_RXCSUM)
 		r->ti_flags |= TI_BDFLAG_TCP_UDP_CKSUM | TI_BDFLAG_IP_CKSUM;
 	r->ti_idx = i;
 }
 #endif
 
 /*
  * Frame reception handling. This is called if there's a frame
  * on the receive return list.
  *
  * Note: we have to be able to handle three possibilities here:
  * 1) the frame is from the mini receive ring (can only happen)
  *    on Tigon 2 boards)
  * 2) the frame is from the jumbo recieve ring
  * 3) the frame is from the standard receive ring
  */
 
 static void
 ti_rxeof(struct ti_softc *sc)
 {
 	struct ifnet *ifp;
 #ifdef TI_SF_BUF_JUMBO
 	bus_dmamap_t map;
 #endif
 	struct ti_cmd_desc cmd;
 	int jumbocnt, minicnt, stdcnt, ti_len;
 
 	TI_LOCK_ASSERT(sc);
 
 	ifp = sc->ti_ifp;
 
 	bus_dmamap_sync(sc->ti_cdata.ti_rx_std_ring_tag,
 	    sc->ti_cdata.ti_rx_std_ring_map, BUS_DMASYNC_POSTWRITE);
 	if (ifp->if_mtu > ETHERMTU + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)
 		bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_ring_tag,
 		    sc->ti_cdata.ti_rx_jumbo_ring_map, BUS_DMASYNC_POSTWRITE);
 	if (sc->ti_rdata.ti_rx_mini_ring != NULL)
 		bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_ring_tag,
 		    sc->ti_cdata.ti_rx_mini_ring_map, BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_sync(sc->ti_cdata.ti_rx_return_ring_tag,
 	    sc->ti_cdata.ti_rx_return_ring_map, BUS_DMASYNC_POSTREAD);
 
 	jumbocnt = minicnt = stdcnt = 0;
 	while (sc->ti_rx_saved_considx != sc->ti_return_prodidx.ti_idx) {
 		struct ti_rx_desc *cur_rx;
 		uint32_t rxidx;
 		struct mbuf *m = NULL;
 		uint16_t vlan_tag = 0;
 		int have_tag = 0;
 
 		cur_rx =
 		    &sc->ti_rdata.ti_rx_return_ring[sc->ti_rx_saved_considx];
 		rxidx = cur_rx->ti_idx;
 		ti_len = cur_rx->ti_len;
 		TI_INC(sc->ti_rx_saved_considx, TI_RETURN_RING_CNT);
 
 		if (cur_rx->ti_flags & TI_BDFLAG_VLAN_TAG) {
 			have_tag = 1;
 			vlan_tag = cur_rx->ti_vlan_tag;
 		}
 
 		if (cur_rx->ti_flags & TI_BDFLAG_JUMBO_RING) {
 			jumbocnt++;
 			TI_INC(sc->ti_jumbo, TI_JUMBO_RX_RING_CNT);
 			m = sc->ti_cdata.ti_rx_jumbo_chain[rxidx];
 #ifndef TI_SF_BUF_JUMBO
 			if (cur_rx->ti_flags & TI_BDFLAG_ERROR) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				ti_discard_jumbo(sc, rxidx);
 				continue;
 			}
 			if (ti_newbuf_jumbo(sc, rxidx, NULL) != 0) {
 				if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 				ti_discard_jumbo(sc, rxidx);
 				continue;
 			}
 			m->m_len = ti_len;
 #else /* !TI_SF_BUF_JUMBO */
 			sc->ti_cdata.ti_rx_jumbo_chain[rxidx] = NULL;
 			map = sc->ti_cdata.ti_rx_jumbo_maps[rxidx];
 			bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_tag, map,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(sc->ti_cdata.ti_rx_jumbo_tag, map);
 			if (cur_rx->ti_flags & TI_BDFLAG_ERROR) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				ti_newbuf_jumbo(sc, sc->ti_jumbo, m);
 				continue;
 			}
 			if (ti_newbuf_jumbo(sc, sc->ti_jumbo, NULL) == ENOBUFS) {
 				if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 				ti_newbuf_jumbo(sc, sc->ti_jumbo, m);
 				continue;
 			}
 #ifdef TI_JUMBO_HDRSPLIT
 			if (sc->ti_hdrsplit)
 				ti_hdr_split(m, TI_HOSTADDR(cur_rx->ti_addr),
 					     ti_len, rxidx);
 			else
 #endif /* TI_JUMBO_HDRSPLIT */
 			m_adj(m, ti_len - m->m_pkthdr.len);
 #endif /* TI_SF_BUF_JUMBO */
 		} else if (cur_rx->ti_flags & TI_BDFLAG_MINI_RING) {
 			minicnt++;
 			TI_INC(sc->ti_mini, TI_MINI_RX_RING_CNT);
 			m = sc->ti_cdata.ti_rx_mini_chain[rxidx];
 			if (cur_rx->ti_flags & TI_BDFLAG_ERROR) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				ti_discard_mini(sc, rxidx);
 				continue;
 			}
 			if (ti_newbuf_mini(sc, rxidx) != 0) {
 				if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 				ti_discard_mini(sc, rxidx);
 				continue;
 			}
 			m->m_len = ti_len;
 		} else {
 			stdcnt++;
 			TI_INC(sc->ti_std, TI_STD_RX_RING_CNT);
 			m = sc->ti_cdata.ti_rx_std_chain[rxidx];
 			if (cur_rx->ti_flags & TI_BDFLAG_ERROR) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				ti_discard_std(sc, rxidx);
 				continue;
 			}
 			if (ti_newbuf_std(sc, rxidx) != 0) {
 				if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 				ti_discard_std(sc, rxidx);
 				continue;
 			}
 			m->m_len = ti_len;
 		}
 
 		m->m_pkthdr.len = ti_len;
 		if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 		m->m_pkthdr.rcvif = ifp;
 
 		if (ifp->if_capenable & IFCAP_RXCSUM) {
 			if (cur_rx->ti_flags & TI_BDFLAG_IP_CKSUM) {
 				m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
 				if ((cur_rx->ti_ip_cksum ^ 0xffff) == 0)
 					m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
 			}
 			if (cur_rx->ti_flags & TI_BDFLAG_TCP_UDP_CKSUM) {
 				m->m_pkthdr.csum_data =
 				    cur_rx->ti_tcp_udp_cksum;
 				m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
 			}
 		}
 
 		/*
 		 * If we received a packet with a vlan tag,
 		 * tag it before passing the packet upward.
 		 */
 		if (have_tag) {
 			m->m_pkthdr.ether_vtag = vlan_tag;
 			m->m_flags |= M_VLANTAG;
 		}
 		TI_UNLOCK(sc);
 		(*ifp->if_input)(ifp, m);
 		TI_LOCK(sc);
 	}
 
 	bus_dmamap_sync(sc->ti_cdata.ti_rx_return_ring_tag,
 	    sc->ti_cdata.ti_rx_return_ring_map, BUS_DMASYNC_PREREAD);
 	/* Only necessary on the Tigon 1. */
 	if (sc->ti_hwrev == TI_HWREV_TIGON)
 		CSR_WRITE_4(sc, TI_GCR_RXRETURNCONS_IDX,
 		    sc->ti_rx_saved_considx);
 
 	if (stdcnt > 0) {
 		bus_dmamap_sync(sc->ti_cdata.ti_rx_std_ring_tag,
 		    sc->ti_cdata.ti_rx_std_ring_map, BUS_DMASYNC_PREWRITE);
 		TI_UPDATE_STDPROD(sc, sc->ti_std);
 	}
 	if (minicnt > 0) {
 		bus_dmamap_sync(sc->ti_cdata.ti_rx_mini_ring_tag,
 		    sc->ti_cdata.ti_rx_mini_ring_map, BUS_DMASYNC_PREWRITE);
 		TI_UPDATE_MINIPROD(sc, sc->ti_mini);
 	}
 	if (jumbocnt > 0) {
 		bus_dmamap_sync(sc->ti_cdata.ti_rx_jumbo_ring_tag,
 		    sc->ti_cdata.ti_rx_jumbo_ring_map, BUS_DMASYNC_PREWRITE);
 		TI_UPDATE_JUMBOPROD(sc, sc->ti_jumbo);
 	}
 }
 
 static void
 ti_txeof(struct ti_softc *sc)
 {
 	struct ti_txdesc *txd;
 	struct ti_tx_desc txdesc;
 	struct ti_tx_desc *cur_tx = NULL;
 	struct ifnet *ifp;
 	int idx;
 
 	ifp = sc->ti_ifp;
 
 	txd = STAILQ_FIRST(&sc->ti_cdata.ti_txbusyq);
 	if (txd == NULL)
 		return;
 
 	if (sc->ti_rdata.ti_tx_ring != NULL)
 		bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag,
 		    sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_POSTWRITE);
 	/*
 	 * Go through our tx ring and free mbufs for those
 	 * frames that have been sent.
 	 */
 	for (idx = sc->ti_tx_saved_considx; idx != sc->ti_tx_considx.ti_idx;
 	    TI_INC(idx, TI_TX_RING_CNT)) {
 		if (sc->ti_hwrev == TI_HWREV_TIGON) {
 			ti_mem_read(sc, TI_TX_RING_BASE + idx * sizeof(txdesc),
 			    sizeof(txdesc), &txdesc);
 			cur_tx = &txdesc;
 		} else
 			cur_tx = &sc->ti_rdata.ti_tx_ring[idx];
 		sc->ti_txcnt--;
 		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		if ((cur_tx->ti_flags & TI_BDFLAG_END) == 0)
 			continue;
 		bus_dmamap_sync(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap);
 
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		m_freem(txd->tx_m);
 		txd->tx_m = NULL;
 		STAILQ_REMOVE_HEAD(&sc->ti_cdata.ti_txbusyq, tx_q);
 		STAILQ_INSERT_TAIL(&sc->ti_cdata.ti_txfreeq, txd, tx_q);
 		txd = STAILQ_FIRST(&sc->ti_cdata.ti_txbusyq);
 	}
 	sc->ti_tx_saved_considx = idx;
 	if (sc->ti_txcnt == 0)
 		sc->ti_timer = 0;
 }
 
 static void
 ti_intr(void *xsc)
 {
 	struct ti_softc *sc;
 	struct ifnet *ifp;
 
 	sc = xsc;
 	TI_LOCK(sc);
 	ifp = sc->ti_ifp;
 
 	/* Make sure this is really our interrupt. */
 	if (!(CSR_READ_4(sc, TI_MISC_HOST_CTL) & TI_MHC_INTSTATE)) {
 		TI_UNLOCK(sc);
 		return;
 	}
 
 	/* Ack interrupt and stop others from occuring. */
 	CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		bus_dmamap_sync(sc->ti_cdata.ti_status_tag,
 		    sc->ti_cdata.ti_status_map, BUS_DMASYNC_POSTREAD);
 		/* Check RX return ring producer/consumer */
 		ti_rxeof(sc);
 
 		/* Check TX ring producer/consumer */
 		ti_txeof(sc);
 		bus_dmamap_sync(sc->ti_cdata.ti_status_tag,
 		    sc->ti_cdata.ti_status_map, BUS_DMASYNC_PREREAD);
 	}
 
 	ti_handle_events(sc);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		/* Re-enable interrupts. */
 		CSR_WRITE_4(sc, TI_MB_HOSTINTR, 0);
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 			ti_start_locked(ifp);
 	}
 
 	TI_UNLOCK(sc);
 }
 
 static uint64_t
 ti_get_counter(struct ifnet *ifp, ift_counter cnt)
 {
 
 	switch (cnt) {
 	case IFCOUNTER_COLLISIONS:
 	    {
 		struct ti_softc *sc;
 		struct ti_stats *s;
 		uint64_t rv;
 
 		sc = if_getsoftc(ifp);
 		s = &sc->ti_rdata.ti_info->ti_stats;
 
 		TI_LOCK(sc);
 		bus_dmamap_sync(sc->ti_cdata.ti_gib_tag,
 		    sc->ti_cdata.ti_gib_map, BUS_DMASYNC_POSTREAD);
 		rv = s->dot3StatsSingleCollisionFrames +
 		    s->dot3StatsMultipleCollisionFrames +
 		    s->dot3StatsExcessiveCollisions +
 		    s->dot3StatsLateCollisions;
 		bus_dmamap_sync(sc->ti_cdata.ti_gib_tag,
 		    sc->ti_cdata.ti_gib_map, BUS_DMASYNC_PREREAD);
 		TI_UNLOCK(sc);
 		return (rv);
 	    }
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 
 /*
  * Encapsulate an mbuf chain in the tx ring  by coupling the mbuf data
  * pointers to descriptors.
  */
 static int
 ti_encap(struct ti_softc *sc, struct mbuf **m_head)
 {
 	struct ti_txdesc *txd;
 	struct ti_tx_desc *f;
 	struct ti_tx_desc txdesc;
 	struct mbuf *m;
 	bus_dma_segment_t txsegs[TI_MAXTXSEGS];
 	uint16_t csum_flags;
 	int error, frag, i, nseg;
 
 	if ((txd = STAILQ_FIRST(&sc->ti_cdata.ti_txfreeq)) == NULL)
 		return (ENOBUFS);
 
 	error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap,
 	    *m_head, txsegs, &nseg, 0);
 	if (error == EFBIG) {
 		m = m_defrag(*m_head, M_NOWAIT);
 		if (m == NULL) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (ENOMEM);
 		}
 		*m_head = m;
 		error = bus_dmamap_load_mbuf_sg(sc->ti_cdata.ti_tx_tag,
 		    txd->tx_dmamap, *m_head, txsegs, &nseg, 0);
 		if (error) {
 			m_freem(*m_head);
 			*m_head = NULL;
 			return (error);
 		}
 	} else if (error != 0)
 		return (error);
 	if (nseg == 0) {
 		m_freem(*m_head);
 		*m_head = NULL;
 		return (EIO);
 	}
 
 	if (sc->ti_txcnt + nseg >= TI_TX_RING_CNT) {
 		bus_dmamap_unload(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap);
 		return (ENOBUFS);
 	}
 	bus_dmamap_sync(sc->ti_cdata.ti_tx_tag, txd->tx_dmamap,
 	    BUS_DMASYNC_PREWRITE);
 
 	m = *m_head;
 	csum_flags = 0;
 	if (m->m_pkthdr.csum_flags & CSUM_IP)
 		csum_flags |= TI_BDFLAG_IP_CKSUM;
 	if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))
 		csum_flags |= TI_BDFLAG_TCP_UDP_CKSUM;
 
 	frag = sc->ti_tx_saved_prodidx;
 	for (i = 0; i < nseg; i++) {
 		if (sc->ti_hwrev == TI_HWREV_TIGON) {
 			bzero(&txdesc, sizeof(txdesc));
 			f = &txdesc;
 		} else
 			f = &sc->ti_rdata.ti_tx_ring[frag];
 		ti_hostaddr64(&f->ti_addr, txsegs[i].ds_addr);
 		f->ti_len = txsegs[i].ds_len;
 		f->ti_flags = csum_flags;
 		if (m->m_flags & M_VLANTAG) {
 			f->ti_flags |= TI_BDFLAG_VLAN_TAG;
 			f->ti_vlan_tag = m->m_pkthdr.ether_vtag;
 		} else {
 			f->ti_vlan_tag = 0;
 		}
 
 		if (sc->ti_hwrev == TI_HWREV_TIGON)
 			ti_mem_write(sc, TI_TX_RING_BASE + frag *
 			    sizeof(txdesc), sizeof(txdesc), &txdesc);
 		TI_INC(frag, TI_TX_RING_CNT);
 	}
 
 	sc->ti_tx_saved_prodidx = frag;
 	/* set TI_BDFLAG_END on the last descriptor */
 	frag = (frag + TI_TX_RING_CNT - 1) % TI_TX_RING_CNT;
 	if (sc->ti_hwrev == TI_HWREV_TIGON) {
 		txdesc.ti_flags |= TI_BDFLAG_END;
 		ti_mem_write(sc, TI_TX_RING_BASE + frag * sizeof(txdesc),
 		    sizeof(txdesc), &txdesc);
 	} else
 		sc->ti_rdata.ti_tx_ring[frag].ti_flags |= TI_BDFLAG_END;
 
 	STAILQ_REMOVE_HEAD(&sc->ti_cdata.ti_txfreeq, tx_q);
 	STAILQ_INSERT_TAIL(&sc->ti_cdata.ti_txbusyq, txd, tx_q);
 	txd->tx_m = m;
 	sc->ti_txcnt += nseg;
 
 	return (0);
 }
 
 static void
 ti_start(struct ifnet *ifp)
 {
 	struct ti_softc *sc;
 
 	sc = ifp->if_softc;
 	TI_LOCK(sc);
 	ti_start_locked(ifp);
 	TI_UNLOCK(sc);
 }
 
 /*
  * Main transmit routine. To avoid having to do mbuf copies, we put pointers
  * to the mbuf data regions directly in the transmit descriptors.
  */
 static void
 ti_start_locked(struct ifnet *ifp)
 {
 	struct ti_softc *sc;
 	struct mbuf *m_head = NULL;
 	int enq = 0;
 
 	sc = ifp->if_softc;
 
 	for (; !IFQ_DRV_IS_EMPTY(&ifp->if_snd) &&
 	    sc->ti_txcnt < (TI_TX_RING_CNT - 16);) {
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		/*
 		 * Pack the data into the transmit ring. If we
 		 * don't have room, set the OACTIVE flag and wait
 		 * for the NIC to drain the ring.
 		 */
 		if (ti_encap(sc, &m_head)) {
 			if (m_head == NULL)
 				break;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 			break;
 		}
 
 		enq++;
 		/*
 		 * If there's a BPF listener, bounce a copy of this frame
 		 * to him.
 		 */
 		ETHER_BPF_MTAP(ifp, m_head);
 	}
 
 	if (enq > 0) {
 		if (sc->ti_rdata.ti_tx_ring != NULL)
 			bus_dmamap_sync(sc->ti_cdata.ti_tx_ring_tag,
 			    sc->ti_cdata.ti_tx_ring_map, BUS_DMASYNC_PREWRITE);
 		/* Transmit */
 		CSR_WRITE_4(sc, TI_MB_SENDPROD_IDX, sc->ti_tx_saved_prodidx);
 
 		/*
 		 * Set a timeout in case the chip goes out to lunch.
 		 */
 		sc->ti_timer = 5;
 	}
 }
 
 static void
 ti_init(void *xsc)
 {
 	struct ti_softc *sc;
 
 	sc = xsc;
 	TI_LOCK(sc);
 	ti_init_locked(sc);
 	TI_UNLOCK(sc);
 }
 
 static void
 ti_init_locked(void *xsc)
 {
 	struct ti_softc *sc = xsc;
 
 	if (sc->ti_ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	/* Cancel pending I/O and flush buffers. */
 	ti_stop(sc);
 
 	/* Init the gen info block, ring control blocks and firmware. */
 	if (ti_gibinit(sc)) {
 		device_printf(sc->ti_dev, "initialization failure\n");
 		return;
 	}
 }
 
 static void ti_init2(struct ti_softc *sc)
 {
 	struct ti_cmd_desc cmd;
 	struct ifnet *ifp;
 	uint8_t *ea;
 	struct ifmedia *ifm;
 	int tmp;
 
 	TI_LOCK_ASSERT(sc);
 
 	ifp = sc->ti_ifp;
 
 	/* Specify MTU and interface index. */
 	CSR_WRITE_4(sc, TI_GCR_IFINDEX, device_get_unit(sc->ti_dev));
 	CSR_WRITE_4(sc, TI_GCR_IFMTU, ifp->if_mtu +
 	    ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN);
 	TI_DO_CMD(TI_CMD_UPDATE_GENCOM, 0, 0);
 
 	/* Load our MAC address. */
 	ea = IF_LLADDR(sc->ti_ifp);
 	CSR_WRITE_4(sc, TI_GCR_PAR0, (ea[0] << 8) | ea[1]);
 	CSR_WRITE_4(sc, TI_GCR_PAR1,
 	    (ea[2] << 24) | (ea[3] << 16) | (ea[4] << 8) | ea[5]);
 	TI_DO_CMD(TI_CMD_SET_MAC_ADDR, 0, 0);
 
 	/* Enable or disable promiscuous mode as needed. */
 	if (ifp->if_flags & IFF_PROMISC) {
 		TI_DO_CMD(TI_CMD_SET_PROMISC_MODE, TI_CMD_CODE_PROMISC_ENB, 0);
 	} else {
 		TI_DO_CMD(TI_CMD_SET_PROMISC_MODE, TI_CMD_CODE_PROMISC_DIS, 0);
 	}
 
 	/* Program multicast filter. */
 	ti_setmulti(sc);
 
 	/*
 	 * If this is a Tigon 1, we should tell the
 	 * firmware to use software packet filtering.
 	 */
 	if (sc->ti_hwrev == TI_HWREV_TIGON) {
 		TI_DO_CMD(TI_CMD_FDR_FILTERING, TI_CMD_CODE_FILT_ENB, 0);
 	}
 
 	/* Init RX ring. */
 	if (ti_init_rx_ring_std(sc) != 0) {
 		/* XXX */
 		device_printf(sc->ti_dev, "no memory for std Rx buffers.\n");
 		return;
 	}
 
 	/* Init jumbo RX ring. */
 	if (ifp->if_mtu > ETHERMTU + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) {
 		if (ti_init_rx_ring_jumbo(sc) != 0) {
 			/* XXX */
 			device_printf(sc->ti_dev,
 			    "no memory for jumbo Rx buffers.\n");
 			return;
 		}
 	}
 
 	/*
 	 * If this is a Tigon 2, we can also configure the
 	 * mini ring.
 	 */
 	if (sc->ti_hwrev == TI_HWREV_TIGON_II) {
 		if (ti_init_rx_ring_mini(sc) != 0) {
 			/* XXX */
 			device_printf(sc->ti_dev,
 			    "no memory for mini Rx buffers.\n");
 			return;
 		}
 	}
 
 	CSR_WRITE_4(sc, TI_GCR_RXRETURNCONS_IDX, 0);
 	sc->ti_rx_saved_considx = 0;
 
 	/* Init TX ring. */
 	ti_init_tx_ring(sc);
 
 	/* Tell firmware we're alive. */
 	TI_DO_CMD(TI_CMD_HOST_STATE, TI_CMD_CODE_STACK_UP, 0);
 
 	/* Enable host interrupts. */
 	CSR_WRITE_4(sc, TI_MB_HOSTINTR, 0);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	callout_reset(&sc->ti_watchdog, hz, ti_watchdog, sc);
 
 	/*
 	 * Make sure to set media properly. We have to do this
 	 * here since we have to issue commands in order to set
 	 * the link negotiation and we can't issue commands until
 	 * the firmware is running.
 	 */
 	ifm = &sc->ifmedia;
 	tmp = ifm->ifm_media;
 	ifm->ifm_media = ifm->ifm_cur->ifm_media;
 	ti_ifmedia_upd_locked(sc);
 	ifm->ifm_media = tmp;
 }
 
 /*
  * Set media options.
  */
 static int
 ti_ifmedia_upd(struct ifnet *ifp)
 {
 	struct ti_softc *sc;
 	int error;
 
 	sc = ifp->if_softc;
 	TI_LOCK(sc);
 	error = ti_ifmedia_upd_locked(sc);
 	TI_UNLOCK(sc);
 
 	return (error);
 }
 
 static int
 ti_ifmedia_upd_locked(struct ti_softc *sc)
 {
 	struct ifmedia *ifm;
 	struct ti_cmd_desc cmd;
 	uint32_t flowctl;
 
 	ifm = &sc->ifmedia;
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	flowctl = 0;
 
 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
 	case IFM_AUTO:
 		/*
 		 * Transmit flow control doesn't work on the Tigon 1.
 		 */
 		flowctl = TI_GLNK_RX_FLOWCTL_Y;
 
 		/*
 		 * Transmit flow control can also cause problems on the
 		 * Tigon 2, apparantly with both the copper and fiber
 		 * boards.  The symptom is that the interface will just
 		 * hang.  This was reproduced with Alteon 180 switches.
 		 */
 #if 0
 		if (sc->ti_hwrev != TI_HWREV_TIGON)
 			flowctl |= TI_GLNK_TX_FLOWCTL_Y;
 #endif
 
 		CSR_WRITE_4(sc, TI_GCR_GLINK, TI_GLNK_PREF|TI_GLNK_1000MB|
 		    TI_GLNK_FULL_DUPLEX| flowctl |
 		    TI_GLNK_AUTONEGENB|TI_GLNK_ENB);
 
 		flowctl = TI_LNK_RX_FLOWCTL_Y;
 #if 0
 		if (sc->ti_hwrev != TI_HWREV_TIGON)
 			flowctl |= TI_LNK_TX_FLOWCTL_Y;
 #endif
 
 		CSR_WRITE_4(sc, TI_GCR_LINK, TI_LNK_100MB|TI_LNK_10MB|
 		    TI_LNK_FULL_DUPLEX|TI_LNK_HALF_DUPLEX| flowctl |
 		    TI_LNK_AUTONEGENB|TI_LNK_ENB);
 		TI_DO_CMD(TI_CMD_LINK_NEGOTIATION,
 		    TI_CMD_CODE_NEGOTIATE_BOTH, 0);
 		break;
 	case IFM_1000_SX:
 	case IFM_1000_T:
 		flowctl = TI_GLNK_RX_FLOWCTL_Y;
 #if 0
 		if (sc->ti_hwrev != TI_HWREV_TIGON)
 			flowctl |= TI_GLNK_TX_FLOWCTL_Y;
 #endif
 
 		CSR_WRITE_4(sc, TI_GCR_GLINK, TI_GLNK_PREF|TI_GLNK_1000MB|
 		    flowctl |TI_GLNK_ENB);
 		CSR_WRITE_4(sc, TI_GCR_LINK, 0);
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) {
 			TI_SETBIT(sc, TI_GCR_GLINK, TI_GLNK_FULL_DUPLEX);
 		}
 		TI_DO_CMD(TI_CMD_LINK_NEGOTIATION,
 		    TI_CMD_CODE_NEGOTIATE_GIGABIT, 0);
 		break;
 	case IFM_100_FX:
 	case IFM_10_FL:
 	case IFM_100_TX:
 	case IFM_10_T:
 		flowctl = TI_LNK_RX_FLOWCTL_Y;
 #if 0
 		if (sc->ti_hwrev != TI_HWREV_TIGON)
 			flowctl |= TI_LNK_TX_FLOWCTL_Y;
 #endif
 
 		CSR_WRITE_4(sc, TI_GCR_GLINK, 0);
 		CSR_WRITE_4(sc, TI_GCR_LINK, TI_LNK_ENB|TI_LNK_PREF|flowctl);
 		if (IFM_SUBTYPE(ifm->ifm_media) == IFM_100_FX ||
 		    IFM_SUBTYPE(ifm->ifm_media) == IFM_100_TX) {
 			TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_100MB);
 		} else {
 			TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_10MB);
 		}
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) {
 			TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_FULL_DUPLEX);
 		} else {
 			TI_SETBIT(sc, TI_GCR_LINK, TI_LNK_HALF_DUPLEX);
 		}
 		TI_DO_CMD(TI_CMD_LINK_NEGOTIATION,
 		    TI_CMD_CODE_NEGOTIATE_10_100, 0);
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * Report current media status.
  */
 static void
 ti_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct ti_softc *sc;
 	uint32_t media = 0;
 
 	sc = ifp->if_softc;
 
 	TI_LOCK(sc);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (sc->ti_linkstat == TI_EV_CODE_LINK_DOWN) {
 		TI_UNLOCK(sc);
 		return;
 	}
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 
 	if (sc->ti_linkstat == TI_EV_CODE_GIG_LINK_UP) {
 		media = CSR_READ_4(sc, TI_GCR_GLINK_STAT);
 		if (sc->ti_copper)
 			ifmr->ifm_active |= IFM_1000_T;
 		else
 			ifmr->ifm_active |= IFM_1000_SX;
 		if (media & TI_GLNK_FULL_DUPLEX)
 			ifmr->ifm_active |= IFM_FDX;
 		else
 			ifmr->ifm_active |= IFM_HDX;
 	} else if (sc->ti_linkstat == TI_EV_CODE_LINK_UP) {
 		media = CSR_READ_4(sc, TI_GCR_LINK_STAT);
 		if (sc->ti_copper) {
 			if (media & TI_LNK_100MB)
 				ifmr->ifm_active |= IFM_100_TX;
 			if (media & TI_LNK_10MB)
 				ifmr->ifm_active |= IFM_10_T;
 		} else {
 			if (media & TI_LNK_100MB)
 				ifmr->ifm_active |= IFM_100_FX;
 			if (media & TI_LNK_10MB)
 				ifmr->ifm_active |= IFM_10_FL;
 		}
 		if (media & TI_LNK_FULL_DUPLEX)
 			ifmr->ifm_active |= IFM_FDX;
 		if (media & TI_LNK_HALF_DUPLEX)
 			ifmr->ifm_active |= IFM_HDX;
 	}
 	TI_UNLOCK(sc);
 }
 
 static int
 ti_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ti_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *) data;
 	struct ti_cmd_desc cmd;
 	int mask, error = 0;
 
 	switch (command) {
 	case SIOCSIFMTU:
 		TI_LOCK(sc);
 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > TI_JUMBO_MTU)
 			error = EINVAL;
 		else {
 			ifp->if_mtu = ifr->ifr_mtu;
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 				ti_init_locked(sc);
 			}
 		}
 		TI_UNLOCK(sc);
 		break;
 	case SIOCSIFFLAGS:
 		TI_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			/*
 			 * If only the state of the PROMISC flag changed,
 			 * then just use the 'set promisc mode' command
 			 * instead of reinitializing the entire NIC. Doing
 			 * a full re-init means reloading the firmware and
 			 * waiting for it to start up, which may take a
 			 * second or two.
 			 */
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    ifp->if_flags & IFF_PROMISC &&
 			    !(sc->ti_if_flags & IFF_PROMISC)) {
 				TI_DO_CMD(TI_CMD_SET_PROMISC_MODE,
 				    TI_CMD_CODE_PROMISC_ENB, 0);
 			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    !(ifp->if_flags & IFF_PROMISC) &&
 			    sc->ti_if_flags & IFF_PROMISC) {
 				TI_DO_CMD(TI_CMD_SET_PROMISC_MODE,
 				    TI_CMD_CODE_PROMISC_DIS, 0);
 			} else
 				ti_init_locked(sc);
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				ti_stop(sc);
 			}
 		}
 		sc->ti_if_flags = ifp->if_flags;
 		TI_UNLOCK(sc);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		TI_LOCK(sc);
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			ti_setmulti(sc);
 		TI_UNLOCK(sc);
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command);
 		break;
 	case SIOCSIFCAP:
 		TI_LOCK(sc);
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if ((mask & IFCAP_TXCSUM) != 0 &&
 		    (ifp->if_capabilities & IFCAP_TXCSUM) != 0) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			if ((ifp->if_capenable & IFCAP_TXCSUM) != 0)
 				ifp->if_hwassist |= TI_CSUM_FEATURES;
                         else
 				ifp->if_hwassist &= ~TI_CSUM_FEATURES;
                 }
 		if ((mask & IFCAP_RXCSUM) != 0 &&
 		    (ifp->if_capabilities & IFCAP_RXCSUM) != 0)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		if ((mask & IFCAP_VLAN_HWTAGGING) != 0 &&
 		    (ifp->if_capabilities & IFCAP_VLAN_HWTAGGING) != 0)
                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 		if ((mask & IFCAP_VLAN_HWCSUM) != 0 &&
 		    (ifp->if_capabilities & IFCAP_VLAN_HWCSUM) != 0)
 			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
 		if ((mask & (IFCAP_TXCSUM | IFCAP_RXCSUM |
 		    IFCAP_VLAN_HWTAGGING)) != 0) {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 				ti_init_locked(sc);
 			}
 		}
 		TI_UNLOCK(sc);
 		VLAN_CAPABILITIES(ifp);
 		break;
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 static int
 ti_open(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct ti_softc *sc;
 
 	sc = dev->si_drv1;
 	if (sc == NULL)
 		return (ENODEV);
 
 	TI_LOCK(sc);
 	sc->ti_flags |= TI_FLAG_DEBUGING;
 	TI_UNLOCK(sc);
 
 	return (0);
 }
 
 static int
 ti_close(struct cdev *dev, int flag, int fmt, struct thread *td)
 {
 	struct ti_softc *sc;
 
 	sc = dev->si_drv1;
 	if (sc == NULL)
 		return (ENODEV);
 
 	TI_LOCK(sc);
 	sc->ti_flags &= ~TI_FLAG_DEBUGING;
 	TI_UNLOCK(sc);
 
 	return (0);
 }
 
 /*
  * This ioctl routine goes along with the Tigon character device.
  */
 static int
 ti_ioctl2(struct cdev *dev, u_long cmd, caddr_t addr, int flag,
     struct thread *td)
 {
 	struct ti_softc *sc;
 	int error;
 
 	sc = dev->si_drv1;
 	if (sc == NULL)
 		return (ENODEV);
 
 	error = 0;
 
 	switch (cmd) {
 	case TIIOCGETSTATS:
 	{
 		struct ti_stats *outstats;
 
 		outstats = (struct ti_stats *)addr;
 
 		TI_LOCK(sc);
 		bus_dmamap_sync(sc->ti_cdata.ti_gib_tag,
 		    sc->ti_cdata.ti_gib_map, BUS_DMASYNC_POSTREAD);
 		bcopy(&sc->ti_rdata.ti_info->ti_stats, outstats,
 		    sizeof(struct ti_stats));
 		bus_dmamap_sync(sc->ti_cdata.ti_gib_tag,
 		    sc->ti_cdata.ti_gib_map, BUS_DMASYNC_PREREAD);
 		TI_UNLOCK(sc);
 		break;
 	}
 	case TIIOCGETPARAMS:
 	{
 		struct ti_params *params;
 
 		params = (struct ti_params *)addr;
 
 		TI_LOCK(sc);
 		params->ti_stat_ticks = sc->ti_stat_ticks;
 		params->ti_rx_coal_ticks = sc->ti_rx_coal_ticks;
 		params->ti_tx_coal_ticks = sc->ti_tx_coal_ticks;
 		params->ti_rx_max_coal_bds = sc->ti_rx_max_coal_bds;
 		params->ti_tx_max_coal_bds = sc->ti_tx_max_coal_bds;
 		params->ti_tx_buf_ratio = sc->ti_tx_buf_ratio;
 		params->param_mask = TI_PARAM_ALL;
 		TI_UNLOCK(sc);
 		break;
 	}
 	case TIIOCSETPARAMS:
 	{
 		struct ti_params *params;
 
 		params = (struct ti_params *)addr;
 
 		TI_LOCK(sc);
 		if (params->param_mask & TI_PARAM_STAT_TICKS) {
 			sc->ti_stat_ticks = params->ti_stat_ticks;
 			CSR_WRITE_4(sc, TI_GCR_STAT_TICKS, sc->ti_stat_ticks);
 		}
 
 		if (params->param_mask & TI_PARAM_RX_COAL_TICKS) {
 			sc->ti_rx_coal_ticks = params->ti_rx_coal_ticks;
 			CSR_WRITE_4(sc, TI_GCR_RX_COAL_TICKS,
 				    sc->ti_rx_coal_ticks);
 		}
 
 		if (params->param_mask & TI_PARAM_TX_COAL_TICKS) {
 			sc->ti_tx_coal_ticks = params->ti_tx_coal_ticks;
 			CSR_WRITE_4(sc, TI_GCR_TX_COAL_TICKS,
 				    sc->ti_tx_coal_ticks);
 		}
 
 		if (params->param_mask & TI_PARAM_RX_COAL_BDS) {
 			sc->ti_rx_max_coal_bds = params->ti_rx_max_coal_bds;
 			CSR_WRITE_4(sc, TI_GCR_RX_MAX_COAL_BD,
 				    sc->ti_rx_max_coal_bds);
 		}
 
 		if (params->param_mask & TI_PARAM_TX_COAL_BDS) {
 			sc->ti_tx_max_coal_bds = params->ti_tx_max_coal_bds;
 			CSR_WRITE_4(sc, TI_GCR_TX_MAX_COAL_BD,
 				    sc->ti_tx_max_coal_bds);
 		}
 
 		if (params->param_mask & TI_PARAM_TX_BUF_RATIO) {
 			sc->ti_tx_buf_ratio = params->ti_tx_buf_ratio;
 			CSR_WRITE_4(sc, TI_GCR_TX_BUFFER_RATIO,
 				    sc->ti_tx_buf_ratio);
 		}
 		TI_UNLOCK(sc);
 		break;
 	}
 	case TIIOCSETTRACE: {
 		ti_trace_type trace_type;
 
 		trace_type = *(ti_trace_type *)addr;
 
 		/*
 		 * Set tracing to whatever the user asked for.  Setting
 		 * this register to 0 should have the effect of disabling
 		 * tracing.
 		 */
 		TI_LOCK(sc);
 		CSR_WRITE_4(sc, TI_GCR_NIC_TRACING, trace_type);
 		TI_UNLOCK(sc);
 		break;
 	}
 	case TIIOCGETTRACE: {
 		struct ti_trace_buf *trace_buf;
 		uint32_t trace_start, cur_trace_ptr, trace_len;
 
 		trace_buf = (struct ti_trace_buf *)addr;
 
 		TI_LOCK(sc);
 		trace_start = CSR_READ_4(sc, TI_GCR_NICTRACE_START);
 		cur_trace_ptr = CSR_READ_4(sc, TI_GCR_NICTRACE_PTR);
 		trace_len = CSR_READ_4(sc, TI_GCR_NICTRACE_LEN);
 #if 0
 		if_printf(sc->ti_ifp, "trace_start = %#x, cur_trace_ptr = %#x, "
 		       "trace_len = %d\n", trace_start,
 		       cur_trace_ptr, trace_len);
 		if_printf(sc->ti_ifp, "trace_buf->buf_len = %d\n",
 		       trace_buf->buf_len);
 #endif
 		error = ti_copy_mem(sc, trace_start, min(trace_len,
 		    trace_buf->buf_len), (caddr_t)trace_buf->buf, 1, 1);
 		if (error == 0) {
 			trace_buf->fill_len = min(trace_len,
 			    trace_buf->buf_len);
 			if (cur_trace_ptr < trace_start)
 				trace_buf->cur_trace_ptr =
 				    trace_start - cur_trace_ptr;
 			else
 				trace_buf->cur_trace_ptr =
 				    cur_trace_ptr - trace_start;
 		} else
 			trace_buf->fill_len = 0;
 		TI_UNLOCK(sc);
 		break;
 	}
 
 	/*
 	 * For debugging, five ioctls are needed:
 	 * ALT_ATTACH
 	 * ALT_READ_TG_REG
 	 * ALT_WRITE_TG_REG
 	 * ALT_READ_TG_MEM
 	 * ALT_WRITE_TG_MEM
 	 */
 	case ALT_ATTACH:
 		/*
 		 * From what I can tell, Alteon's Solaris Tigon driver
 		 * only has one character device, so you have to attach
 		 * to the Tigon board you're interested in.  This seems
 		 * like a not-so-good way to do things, since unless you
 		 * subsequently specify the unit number of the device
 		 * you're interested in every ioctl, you'll only be
 		 * able to debug one board at a time.
 		 */
 		break;
 	case ALT_READ_TG_MEM:
 	case ALT_WRITE_TG_MEM:
 	{
 		struct tg_mem *mem_param;
 		uint32_t sram_end, scratch_end;
 
 		mem_param = (struct tg_mem *)addr;
 
 		if (sc->ti_hwrev == TI_HWREV_TIGON) {
 			sram_end = TI_END_SRAM_I;
 			scratch_end = TI_END_SCRATCH_I;
 		} else {
 			sram_end = TI_END_SRAM_II;
 			scratch_end = TI_END_SCRATCH_II;
 		}
 
 		/*
 		 * For now, we'll only handle accessing regular SRAM,
 		 * nothing else.
 		 */
 		TI_LOCK(sc);
 		if (mem_param->tgAddr >= TI_BEG_SRAM &&
 		    mem_param->tgAddr + mem_param->len <= sram_end) {
 			/*
 			 * In this instance, we always copy to/from user
 			 * space, so the user space argument is set to 1.
 			 */
 			error = ti_copy_mem(sc, mem_param->tgAddr,
 			    mem_param->len, mem_param->userAddr, 1,
 			    cmd == ALT_READ_TG_MEM ? 1 : 0);
 		} else if (mem_param->tgAddr >= TI_BEG_SCRATCH &&
 		    mem_param->tgAddr <= scratch_end) {
 			error = ti_copy_scratch(sc, mem_param->tgAddr,
 			    mem_param->len, mem_param->userAddr, 1,
 			    cmd == ALT_READ_TG_MEM ?  1 : 0, TI_PROCESSOR_A);
 		} else if (mem_param->tgAddr >= TI_BEG_SCRATCH_B_DEBUG &&
 		    mem_param->tgAddr <= TI_BEG_SCRATCH_B_DEBUG) {
 			if (sc->ti_hwrev == TI_HWREV_TIGON) {
 				if_printf(sc->ti_ifp,
 				    "invalid memory range for Tigon I\n");
 				error = EINVAL;
 				break;
 			}
 			error = ti_copy_scratch(sc, mem_param->tgAddr -
 			    TI_SCRATCH_DEBUG_OFF, mem_param->len,
 			    mem_param->userAddr, 1,
 			    cmd == ALT_READ_TG_MEM ? 1 : 0, TI_PROCESSOR_B);
 		} else {
 			if_printf(sc->ti_ifp, "memory address %#x len %d is "
 			        "out of supported range\n",
 			        mem_param->tgAddr, mem_param->len);
 			error = EINVAL;
 		}
 		TI_UNLOCK(sc);
 		break;
 	}
 	case ALT_READ_TG_REG:
 	case ALT_WRITE_TG_REG:
 	{
 		struct tg_reg *regs;
 		uint32_t tmpval;
 
 		regs = (struct tg_reg *)addr;
 
 		/*
 		 * Make sure the address in question isn't out of range.
 		 */
 		if (regs->addr > TI_REG_MAX) {
 			error = EINVAL;
 			break;
 		}
 		TI_LOCK(sc);
 		if (cmd == ALT_READ_TG_REG) {
 			bus_space_read_region_4(sc->ti_btag, sc->ti_bhandle,
 			    regs->addr, &tmpval, 1);
 			regs->data = ntohl(tmpval);
 #if 0
 			if ((regs->addr == TI_CPU_STATE)
 			 || (regs->addr == TI_CPU_CTL_B)) {
 				if_printf(sc->ti_ifp, "register %#x = %#x\n",
 				       regs->addr, tmpval);
 			}
 #endif
 		} else {
 			tmpval = htonl(regs->data);
 			bus_space_write_region_4(sc->ti_btag, sc->ti_bhandle,
 			    regs->addr, &tmpval, 1);
 		}
 		TI_UNLOCK(sc);
 		break;
 	}
 	default:
 		error = ENOTTY;
 		break;
 	}
 	return (error);
 }
 
 static void
 ti_watchdog(void *arg)
 {
 	struct ti_softc *sc;
 	struct ifnet *ifp;
 
 	sc = arg;
 	TI_LOCK_ASSERT(sc);
 	callout_reset(&sc->ti_watchdog, hz, ti_watchdog, sc);
 	if (sc->ti_timer == 0 || --sc->ti_timer > 0)
 		return;
 
 	/*
 	 * When we're debugging, the chip is often stopped for long periods
 	 * of time, and that would normally cause the watchdog timer to fire.
 	 * Since that impedes debugging, we don't want to do that.
 	 */
 	if (sc->ti_flags & TI_FLAG_DEBUGING)
 		return;
 
 	ifp = sc->ti_ifp;
 	if_printf(ifp, "watchdog timeout -- resetting\n");
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	ti_init_locked(sc);
 
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 
 /*
  * Stop the adapter and free any mbufs allocated to the
  * RX and TX lists.
  */
 static void
 ti_stop(struct ti_softc *sc)
 {
 	struct ifnet *ifp;
 	struct ti_cmd_desc cmd;
 
 	TI_LOCK_ASSERT(sc);
 
 	ifp = sc->ti_ifp;
 
 	/* Disable host interrupts. */
 	CSR_WRITE_4(sc, TI_MB_HOSTINTR, 1);
 	/*
 	 * Tell firmware we're shutting down.
 	 */
 	TI_DO_CMD(TI_CMD_HOST_STATE, TI_CMD_CODE_STACK_DOWN, 0);
 
 	/* Halt and reinitialize. */
 	if (ti_chipinit(sc) == 0) {
 		ti_mem_zero(sc, 0x2000, 0x100000 - 0x2000);
 		/* XXX ignore init errors. */
 		ti_chipinit(sc);
 	}
 
 	/* Free the RX lists. */
 	ti_free_rx_ring_std(sc);
 
 	/* Free jumbo RX list. */
 	ti_free_rx_ring_jumbo(sc);
 
 	/* Free mini RX list. */
 	ti_free_rx_ring_mini(sc);
 
 	/* Free TX buffers. */
 	ti_free_tx_ring(sc);
 
 	sc->ti_ev_prodidx.ti_idx = 0;
 	sc->ti_return_prodidx.ti_idx = 0;
 	sc->ti_tx_considx.ti_idx = 0;
 	sc->ti_tx_saved_considx = TI_TXCONS_UNSET;
 
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	callout_stop(&sc->ti_watchdog);
 }
 
 /*
  * Stop all chip I/O so that the kernel's probe routines don't
  * get confused by errant DMAs when rebooting.
  */
 static int
 ti_shutdown(device_t dev)
 {
 	struct ti_softc *sc;
 
 	sc = device_get_softc(dev);
 	TI_LOCK(sc);
 	ti_chipinit(sc);
 	TI_UNLOCK(sc);
 
 	return (0);
 }
 
 static void
 ti_sysctl_node(struct ti_softc *sc)
 {
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid_list *child;
 	char tname[32];
 
 	ctx = device_get_sysctl_ctx(sc->ti_dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->ti_dev));
 
 	/* Use DAC */
 	sc->ti_dac = 1;
 	snprintf(tname, sizeof(tname), "dev.ti.%d.dac",
 	    device_get_unit(sc->ti_dev));
 	TUNABLE_INT_FETCH(tname, &sc->ti_dac);
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_coal_ticks", CTLFLAG_RW,
 	    &sc->ti_rx_coal_ticks, 0, "Receive coalcesced ticks");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rx_max_coal_bds", CTLFLAG_RW,
 	    &sc->ti_rx_max_coal_bds, 0, "Receive max coalcesced BDs");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_coal_ticks", CTLFLAG_RW,
 	    &sc->ti_tx_coal_ticks, 0, "Send coalcesced ticks");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_max_coal_bds", CTLFLAG_RW,
 	    &sc->ti_tx_max_coal_bds, 0, "Send max coalcesced BDs");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tx_buf_ratio", CTLFLAG_RW,
 	    &sc->ti_tx_buf_ratio, 0,
 	    "Ratio of NIC memory devoted to TX buffer");
 
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "stat_ticks", CTLFLAG_RW,
 	    &sc->ti_stat_ticks, 0,
 	    "Number of clock ticks for statistics update interval");
 
 	/* Pull in device tunables. */
 	sc->ti_rx_coal_ticks = 170;
 	resource_int_value(device_get_name(sc->ti_dev),
 	    device_get_unit(sc->ti_dev), "rx_coal_ticks",
 	    &sc->ti_rx_coal_ticks);
 	sc->ti_rx_max_coal_bds = 64;
 	resource_int_value(device_get_name(sc->ti_dev),
 	    device_get_unit(sc->ti_dev), "rx_max_coal_bds",
 	    &sc->ti_rx_max_coal_bds);
 
 	sc->ti_tx_coal_ticks = TI_TICKS_PER_SEC / 500;
 	resource_int_value(device_get_name(sc->ti_dev),
 	    device_get_unit(sc->ti_dev), "tx_coal_ticks",
 	    &sc->ti_tx_coal_ticks);
 	sc->ti_tx_max_coal_bds = 32;
 	resource_int_value(device_get_name(sc->ti_dev),
 	    device_get_unit(sc->ti_dev), "tx_max_coal_bds",
 	    &sc->ti_tx_max_coal_bds);
 	sc->ti_tx_buf_ratio = 21;
 	resource_int_value(device_get_name(sc->ti_dev),
 	    device_get_unit(sc->ti_dev), "tx_buf_ratio",
 	    &sc->ti_tx_buf_ratio);
 
 	sc->ti_stat_ticks = 2 * TI_TICKS_PER_SEC;
 	resource_int_value(device_get_name(sc->ti_dev),
 	    device_get_unit(sc->ti_dev), "stat_ticks",
 	    &sc->ti_stat_ticks);
 }
Index: projects/clang380-import/sys/dev/vt/hw/ofwfb/ofwfb.c
===================================================================
--- projects/clang380-import/sys/dev/vt/hw/ofwfb/ofwfb.c	(revision 293686)
+++ projects/clang380-import/sys/dev/vt/hw/ofwfb/ofwfb.c	(revision 293687)
@@ -1,481 +1,502 @@
 /*-
  * Copyright (c) 2011 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/fbio.h>
 
 #include <dev/vt/vt.h>
 #include <dev/vt/hw/fb/vt_fb.h>
 #include <dev/vt/colors/vt_termcolors.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #ifdef __sparc64__
 #include <machine/bus_private.h>
 #endif
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_pci.h>
 
 struct ofwfb_softc {
 	struct fb_info	fb;
 
 	phandle_t	sc_node;
 	ihandle_t	sc_handle;
 	bus_space_tag_t	sc_memt;
 	int		iso_palette;
 };
 
+static void ofwfb_initialize(struct vt_device *vd);
 static vd_probe_t	ofwfb_probe;
 static vd_init_t	ofwfb_init;
 static vd_bitblt_text_t	ofwfb_bitblt_text;
 static vd_bitblt_bmp_t	ofwfb_bitblt_bitmap;
 
 static const struct vt_driver vt_ofwfb_driver = {
 	.vd_name	= "ofwfb",
 	.vd_probe	= ofwfb_probe,
 	.vd_init	= ofwfb_init,
 	.vd_blank	= vt_fb_blank,
 	.vd_bitblt_text	= ofwfb_bitblt_text,
 	.vd_bitblt_bmp	= ofwfb_bitblt_bitmap,
 	.vd_fb_ioctl	= vt_fb_ioctl,
 	.vd_fb_mmap	= vt_fb_mmap,
 	.vd_priority	= VD_PRIORITY_GENERIC+1,
 };
 
 static unsigned char ofw_colors[16] = {
 	/* See "16-color Text Extension" Open Firmware document, page 4 */
 	0, 4, 2, 6, 1, 5, 3, 7,
 	8, 12, 10, 14, 9, 13, 11, 15
 };
 
 static struct ofwfb_softc ofwfb_conssoftc;
 VT_DRIVER_DECLARE(vt_ofwfb, vt_ofwfb_driver);
 
 static int
 ofwfb_probe(struct vt_device *vd)
 {
 	phandle_t chosen, node;
 	ihandle_t stdout;
 	char type[64];
 
 	chosen = OF_finddevice("/chosen");
 	OF_getprop(chosen, "stdout", &stdout, sizeof(stdout));
 	node = OF_instance_to_package(stdout);
 	if (node == -1) {
 		/*
 		 * The "/chosen/stdout" does not exist try
 		 * using "screen" directly.
 		 */
 		node = OF_finddevice("screen");
 	}
 	OF_getprop(node, "device_type", type, sizeof(type));
 	if (strcmp(type, "display") != 0)
 		return (CN_DEAD);
 
 	/* Looks OK... */
 	return (CN_INTERNAL);
 }
 
 static void
 ofwfb_bitblt_bitmap(struct vt_device *vd, const struct vt_window *vw,
     const uint8_t *pattern, const uint8_t *mask,
     unsigned int width, unsigned int height,
     unsigned int x, unsigned int y, term_color_t fg, term_color_t bg)
 {
 	struct fb_info *sc = vd->vd_softc;
 	u_long line;
 	uint32_t fgc, bgc;
 	int c, l;
 	uint8_t b, m;
 	union {
 		uint32_t l;
 		uint8_t	 c[4];
 	} ch1, ch2;
 
+#ifdef __powerpc__
+	/* Deal with unmapped framebuffers */
+	if (sc->fb_flags & FB_FLAG_NOWRITE) {
+		if (pmap_bootstrapped) {
+			sc->fb_flags &= ~FB_FLAG_NOWRITE;
+			ofwfb_initialize(vd);
+		} else {
+			return;
+		}
+	}
+#endif
+
 	fgc = sc->fb_cmap[fg];
 	bgc = sc->fb_cmap[bg];
 	b = m = 0;
 
 	if (((struct ofwfb_softc *)vd->vd_softc)->iso_palette) {
 		fg = ofw_colors[fg];
 		bg = ofw_colors[bg];
 	}
 
 	line = (sc->fb_stride * y) + x * sc->fb_bpp/8;
 	if (mask == NULL && sc->fb_bpp == 8 && (width % 8 == 0)) {
 		/* Don't try to put off screen pixels */
 		if (((x + width) > vd->vd_width) || ((y + height) >
 		    vd->vd_height))
 			return;
 
 		for (; height > 0; height--) {
 			for (c = 0; c < width; c += 8) {
 				b = *pattern++;
 
 				/*
 				 * Assume that there is more background than
 				 * foreground in characters and init accordingly
 				 */
 				ch1.l = ch2.l = (bg << 24) | (bg << 16) |
 				    (bg << 8) | bg;
 
 				/*
 				 * Calculate 2 x 4-chars at a time, and then
 				 * write these out.
 				 */
 				if (b & 0x80) ch1.c[0] = fg;
 				if (b & 0x40) ch1.c[1] = fg;
 				if (b & 0x20) ch1.c[2] = fg;
 				if (b & 0x10) ch1.c[3] = fg;
 
 				if (b & 0x08) ch2.c[0] = fg;
 				if (b & 0x04) ch2.c[1] = fg;
 				if (b & 0x02) ch2.c[2] = fg;
 				if (b & 0x01) ch2.c[3] = fg;
 
 				*(uint32_t *)(sc->fb_vbase + line + c) = ch1.l;
 				*(uint32_t *)(sc->fb_vbase + line + c + 4) =
 				    ch2.l;
 			}
 			line += sc->fb_stride;
 		}
 	} else {
 		for (l = 0;
 		    l < height && y + l < vw->vw_draw_area.tr_end.tp_row;
 		    l++) {
 			for (c = 0;
 			    c < width && x + c < vw->vw_draw_area.tr_end.tp_col;
 			    c++) {
 				if (c % 8 == 0)
 					b = *pattern++;
 				else
 					b <<= 1;
 				if (mask != NULL) {
 					if (c % 8 == 0)
 						m = *mask++;
 					else
 						m <<= 1;
 					/* Skip pixel write, if mask not set. */
 					if ((m & 0x80) == 0)
 						continue;
 				}
 				switch(sc->fb_bpp) {
 				case 8:
 					*(uint8_t *)(sc->fb_vbase + line + c) =
 					    b & 0x80 ? fg : bg;
 					break;
 				case 32:
 					*(uint32_t *)(sc->fb_vbase + line + 4*c)
 					    = (b & 0x80) ? fgc : bgc;
 					break;
 				default:
 					/* panic? */
 					break;
 				}
 			}
 			line += sc->fb_stride;
 		}
 	}
 }
 
 void
 ofwfb_bitblt_text(struct vt_device *vd, const struct vt_window *vw,
     const term_rect_t *area)
 {
 	unsigned int col, row, x, y;
 	struct vt_font *vf;
 	term_char_t c;
 	term_color_t fg, bg;
 	const uint8_t *pattern;
 
 	vf = vw->vw_font;
 
 	for (row = area->tr_begin.tp_row; row < area->tr_end.tp_row; ++row) {
 		for (col = area->tr_begin.tp_col; col < area->tr_end.tp_col;
 		    ++col) {
 			x = col * vf->vf_width +
 			    vw->vw_draw_area.tr_begin.tp_col;
 			y = row * vf->vf_height +
 			    vw->vw_draw_area.tr_begin.tp_row;
 
 			c = VTBUF_GET_FIELD(&vw->vw_buf, row, col);
 			pattern = vtfont_lookup(vf, c);
 			vt_determine_colors(c,
 			    VTBUF_ISCURSOR(&vw->vw_buf, row, col), &fg, &bg);
 
 			ofwfb_bitblt_bitmap(vd, vw,
 			    pattern, NULL, vf->vf_width, vf->vf_height,
 			    x, y, fg, bg);
 		}
 	}
 
 #ifndef SC_NO_CUTPASTE
 	if (!vd->vd_mshown)
 		return;
 
 	term_rect_t drawn_area;
 
 	drawn_area.tr_begin.tp_col = area->tr_begin.tp_col * vf->vf_width;
 	drawn_area.tr_begin.tp_row = area->tr_begin.tp_row * vf->vf_height;
 	drawn_area.tr_end.tp_col = area->tr_end.tp_col * vf->vf_width;
 	drawn_area.tr_end.tp_row = area->tr_end.tp_row * vf->vf_height;
 
 	if (vt_is_cursor_in_area(vd, &drawn_area)) {
 		ofwfb_bitblt_bitmap(vd, vw,
 		    vd->vd_mcursor->map, vd->vd_mcursor->mask,
 		    vd->vd_mcursor->width, vd->vd_mcursor->height,
 		    vd->vd_mx_drawn + vw->vw_draw_area.tr_begin.tp_col,
 		    vd->vd_my_drawn + vw->vw_draw_area.tr_begin.tp_row,
 		    vd->vd_mcursor_fg, vd->vd_mcursor_bg);
 	}
 #endif
 }
 
 static void
 ofwfb_initialize(struct vt_device *vd)
 {
 	struct ofwfb_softc *sc = vd->vd_softc;
 	int i, err;
 	cell_t retval;
 	uint32_t oldpix;
 
+	sc->fb.fb_cmsize = 16;
+
+	if (sc->fb.fb_flags & FB_FLAG_NOWRITE)
+		return;
+
 	/*
 	 * Set up the color map
 	 */
 
 	sc->iso_palette = 0;
 	switch (sc->fb.fb_bpp) {
 	case 8:
 		vt_generate_cons_palette(sc->fb.fb_cmap, COLOR_FORMAT_RGB, 255,
 		    16, 255, 8, 255, 0);
 
 		for (i = 0; i < 16; i++) {
 			err = OF_call_method("color!", sc->sc_handle, 4, 1,
 			    (cell_t)((sc->fb.fb_cmap[i] >> 16) & 0xff),
 			    (cell_t)((sc->fb.fb_cmap[i] >> 8) & 0xff),
 			    (cell_t)((sc->fb.fb_cmap[i] >> 0) & 0xff),
 			    (cell_t)i, &retval);
 			if (err)
 				break;
 		}
 		if (i != 16)
 			sc->iso_palette = 1;
 				
 		break;
 
 	case 32:
 		/*
 		 * We bypass the usual bus_space_() accessors here, mostly
 		 * for performance reasons. In particular, we don't want
 		 * any barrier operations that may be performed and handle
 		 * endianness slightly different. Figure out the host-view
 		 * endianness of the frame buffer.
 		 */
 		oldpix = bus_space_read_4(sc->sc_memt, sc->fb.fb_vbase, 0);
 		bus_space_write_4(sc->sc_memt, sc->fb.fb_vbase, 0, 0xff000000);
 		if (*(uint8_t *)(sc->fb.fb_vbase) == 0xff)
 			vt_generate_cons_palette(sc->fb.fb_cmap,
 			    COLOR_FORMAT_RGB, 255, 0, 255, 8, 255, 16);
 		else
 			vt_generate_cons_palette(sc->fb.fb_cmap,
 			    COLOR_FORMAT_RGB, 255, 16, 255, 8, 255, 0);
 		bus_space_write_4(sc->sc_memt, sc->fb.fb_vbase, 0, oldpix);
 		break;
 
 	default:
 		panic("Unknown color space depth %d", sc->fb.fb_bpp);
 		break;
         }
-
-	sc->fb.fb_cmsize = 16;
 }
 
 static int
 ofwfb_init(struct vt_device *vd)
 {
 	struct ofwfb_softc *sc;
 	char type[64];
 	phandle_t chosen;
 	phandle_t node;
 	uint32_t depth, height, width, stride;
 	uint32_t fb_phys;
 	int i, len;
 #ifdef __sparc64__
 	static struct bus_space_tag ofwfb_memt[1];
 	bus_addr_t phys;
 	int space;
 #endif
 
 	/* Initialize softc */
 	vd->vd_softc = sc = &ofwfb_conssoftc;
 
 	chosen = OF_finddevice("/chosen");
 	OF_getprop(chosen, "stdout", &sc->sc_handle, sizeof(ihandle_t));
 	node = OF_instance_to_package(sc->sc_handle);
 	if (node == -1) {
 		/*
 		 * The "/chosen/stdout" does not exist try
 		 * using "screen" directly.
 		 */
 		node = OF_finddevice("screen");
 		sc->sc_handle = OF_open("screen");
 	}
 	OF_getprop(node, "device_type", type, sizeof(type));
 	if (strcmp(type, "display") != 0)
 		return (CN_DEAD);
 
 	/* Keep track of the OF node */
 	sc->sc_node = node;
 
 	/*
 	 * Try to use a 32-bit framebuffer if possible. This may be
 	 * unimplemented and fail. That's fine -- it just means we are
 	 * stuck with the defaults.
 	 */
 	OF_call_method("set-depth", sc->sc_handle, 1, 1, (cell_t)32, &i);
 
 	/* Make sure we have needed properties */
 	if (OF_getproplen(node, "height") != sizeof(height) ||
 	    OF_getproplen(node, "width") != sizeof(width) ||
 	    OF_getproplen(node, "depth") != sizeof(depth) ||
 	    OF_getproplen(node, "linebytes") != sizeof(sc->fb.fb_stride))
 		return (CN_DEAD);
 
 	/* Only support 8 and 32-bit framebuffers */
 	OF_getprop(node, "depth", &depth, sizeof(depth));
 	if (depth != 8 && depth != 32)
 		return (CN_DEAD);
 	sc->fb.fb_bpp = sc->fb.fb_depth = depth;
 
 	OF_getprop(node, "height", &height, sizeof(height));
 	OF_getprop(node, "width", &width, sizeof(width));
 	OF_getprop(node, "linebytes", &stride, sizeof(stride));
 
 	sc->fb.fb_height = height;
 	sc->fb.fb_width = width;
 	sc->fb.fb_stride = stride;
 	sc->fb.fb_size = sc->fb.fb_height * sc->fb.fb_stride;
 
 	/*
 	 * Grab the physical address of the framebuffer, and then map it
 	 * into our memory space. If the MMU is not yet up, it will be
 	 * remapped for us when relocation turns on.
 	 */
 	if (OF_getproplen(node, "address") == sizeof(fb_phys)) {
 	 	/* XXX We assume #address-cells is 1 at this point. */
 		OF_getprop(node, "address", &fb_phys, sizeof(fb_phys));
 
 	#if defined(__powerpc__)
 		sc->sc_memt = &bs_be_tag;
 		bus_space_map(sc->sc_memt, fb_phys, sc->fb.fb_size,
 		    BUS_SPACE_MAP_PREFETCHABLE, &sc->fb.fb_vbase);
 	#elif defined(__sparc64__)
 		OF_decode_addr(node, 0, &space, &phys);
 		sc->sc_memt = &ofwfb_memt[0];
 		sc->fb.fb_vbase =
 		    sparc64_fake_bustag(space, fb_phys, sc->sc_memt);
 	#elif defined(__arm__)
 		sc->sc_memt = fdtbus_bs_tag;
 		bus_space_map(sc->sc_memt, sc->fb.fb_pbase, sc->fb.fb_size,
 		    BUS_SPACE_MAP_PREFETCHABLE,
 		    (bus_space_handle_t *)&sc->fb.fb_vbase);
 	#else
 		#error Unsupported platform!
 	#endif
 
 		sc->fb.fb_pbase = fb_phys;
 	} else {
 		/*
 		 * Some IBM systems don't have an address property. Try to
 		 * guess the framebuffer region from the assigned addresses.
 		 * This is ugly, but there doesn't seem to be an alternative.
 		 * Linux does the same thing.
 		 */
 
 		struct ofw_pci_register pciaddrs[8];
 		int num_pciaddrs = 0;
 
 		/*
 		 * Get the PCI addresses of the adapter, if present. The node
 		 * may be the child of the PCI device: in that case, try the
 		 * parent for the assigned-addresses property.
 		 */
 		len = OF_getprop(node, "assigned-addresses", pciaddrs,
 		    sizeof(pciaddrs));
 		if (len == -1) {
 			len = OF_getprop(OF_parent(node), "assigned-addresses",
 			    pciaddrs, sizeof(pciaddrs));
 		}
 		if (len == -1)
 			len = 0;
 		num_pciaddrs = len / sizeof(struct ofw_pci_register);
 
 		fb_phys = num_pciaddrs;
 		for (i = 0; i < num_pciaddrs; i++) {
 			/* If it is too small, not the framebuffer */
 			if (pciaddrs[i].size_lo < sc->fb.fb_stride * height)
 				continue;
 			/* If it is not memory, it isn't either */
 			if (!(pciaddrs[i].phys_hi &
 			    OFW_PCI_PHYS_HI_SPACE_MEM32))
 				continue;
 
 			/* This could be the framebuffer */
 			fb_phys = i;
 
 			/* If it is prefetchable, it certainly is */
 			if (pciaddrs[i].phys_hi & OFW_PCI_PHYS_HI_PREFETCHABLE)
 				break;
 		}
 
 		if (fb_phys == num_pciaddrs) /* No candidates found */
 			return (CN_DEAD);
 
 	#if defined(__powerpc__)
 		OF_decode_addr(node, fb_phys, &sc->sc_memt, &sc->fb.fb_vbase);
 		sc->fb.fb_pbase = sc->fb.fb_vbase; /* 1:1 mapped */
+		#ifdef __powerpc64__
+		/* Real mode under a hypervisor probably doesn't cover FB */
+		if (!(mfmsr() & (PSL_HV | PSL_DR)))
+			sc->fb.fb_flags |= FB_FLAG_NOWRITE;
+		#endif
 	#else
 		/* No ability to interpret assigned-addresses otherwise */
 		return (CN_DEAD);
 	#endif
         }
 
 
 	ofwfb_initialize(vd);
 	vt_fb_init(vd);
 
 	return (CN_INTERNAL);
 }
 
Index: projects/clang380-import/sys/fs/ext2fs/ext2_bmap.c
===================================================================
--- projects/clang380-import/sys/fs/ext2fs/ext2_bmap.c	(revision 293686)
+++ projects/clang380-import/sys/fs/ext2fs/ext2_bmap.c	(revision 293687)
@@ -1,369 +1,376 @@
 /*-
  * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_bmap.c	8.7 (Berkeley) 3/21/95
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
 #include <sys/stat.h>
 
 #include <fs/ext2fs/inode.h>
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/ext2_dinode.h>
 #include <fs/ext2fs/ext2_extern.h>
 #include <fs/ext2fs/ext2_mount.h>
 
 static int ext4_bmapext(struct vnode *, int32_t, int64_t *, int *, int *);
 
 /*
  * Bmap converts the logical block number of a file to its physical block
  * number on the disk. The conversion is done by using the logical block
  * number to index into the array of block pointers described by the dinode.
  */
 int
 ext2_bmap(struct vop_bmap_args *ap)
 {
 	daddr_t blkno;
 	int error;
 
 	/*
 	 * Check for underlying vnode requests and ensure that logical
 	 * to physical mapping is requested.
 	 */
 	if (ap->a_bop != NULL)
 		*ap->a_bop = &VTOI(ap->a_vp)->i_devvp->v_bufobj;
 	if (ap->a_bnp == NULL)
 		return (0);
 
 	if (VTOI(ap->a_vp)->i_flag & IN_E4EXTENTS)
 		error = ext4_bmapext(ap->a_vp, ap->a_bn, &blkno,
 		    ap->a_runp, ap->a_runb);
 	else
 		error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno,
 		    ap->a_runp, ap->a_runb);
 	*ap->a_bnp = blkno;
 	return (error);
 }
 
 /*
  * This function converts the logical block number of a file to
  * its physical block number on the disk within ext4 extents.
  */
 static int
 ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb)
 {
 	struct inode *ip;
 	struct m_ext2fs *fs;
 	struct ext4_extent *ep;
 	struct ext4_extent_path path = { .ep_bp = NULL };
 	daddr_t lbn;
 	int ret = 0;
 
 	ip = VTOI(vp);
 	fs = ip->i_e2fs;
 	lbn = bn;
 
-	/*
-	 * TODO: need to implement read ahead to improve the performance.
-	 */
 	if (runp != NULL)
 		*runp = 0;
 
 	if (runb != NULL)
 		*runb = 0;
 
 	ext4_ext_find_extent(fs, ip, lbn, &path);
-	ep = path.ep_ext;
-	if (ep == NULL)
-		ret = EIO;
-	else {
-		*bnp = fsbtodb(fs, lbn - ep->e_blk +
-		    (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32));
+	if (path.ep_is_sparse) {
+		*bnp = -1;
+		if (runp != NULL)
+			*runp = path.ep_sparse_ext.e_len -
+			    (lbn - path.ep_sparse_ext.e_blk) - 1;
+	} else {
+		ep = path.ep_ext;
+		if (ep == NULL)
+			ret = EIO;
+		else {
+			*bnp = fsbtodb(fs, lbn - ep->e_blk +
+			    (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32));
 
-		if (*bnp == 0)
-			*bnp = -1;
+			if (*bnp == 0)
+				*bnp = -1;
+
+			if (runp != NULL)
+				*runp = ep->e_len - (lbn - ep->e_blk) - 1;
+		}
 	}
 
 	if (path.ep_bp != NULL) {
 		brelse(path.ep_bp);
 		path.ep_bp = NULL;
 	}
 
 	return (ret);
 }
 
 /*
  * Indirect blocks are now on the vnode for the file.  They are given negative
  * logical block numbers.  Indirect blocks are addressed by the negative
  * address of the first data block to which they point.  Double indirect blocks
  * are addressed by one less than the address of the first indirect block to
  * which they point.  Triple indirect blocks are addressed by one less than
  * the address of the first double indirect block to which they point.
  *
  * ext2_bmaparray does the bmap conversion, and if requested returns the
  * array of logical blocks which must be traversed to get to a block.
  * Each entry contains the offset into that block that gets you to the
  * next block and the disk address of the block (if it is assigned).
  */
 
 int
 ext2_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, int *runp, int *runb)
 {
 	struct inode *ip;
 	struct buf *bp;
 	struct ext2mount *ump;
 	struct mount *mp;
 	struct indir a[NIADDR+1], *ap;
 	daddr_t daddr;
 	e2fs_lbn_t metalbn;
 	int error, num, maxrun = 0, bsize;
 	int *nump;
 
 	ap = NULL;
 	ip = VTOI(vp);
 	mp = vp->v_mount;
 	ump = VFSTOEXT2(mp);
 
 	bsize = EXT2_BLOCK_SIZE(ump->um_e2fs);
 
 	if (runp) {
 		maxrun = mp->mnt_iosize_max / bsize - 1;
 		*runp = 0;
 	}
 
 	if (runb) {
 		*runb = 0;
 	}
 
 
 	ap = a;
 	nump = &num;
 	error = ext2_getlbns(vp, bn, ap, nump);
 	if (error)
 		return (error);
 
 	num = *nump;
 	if (num == 0) {
 		*bnp = blkptrtodb(ump, ip->i_db[bn]);
 		if (*bnp == 0) {
 			*bnp = -1;
 		} else if (runp) {
 			daddr_t bnb = bn;
 			for (++bn; bn < NDADDR && *runp < maxrun &&
 			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
 			    ++bn, ++*runp);
 			bn = bnb;
 			if (runb && (bn > 0)) {
 				for (--bn; (bn >= 0) && (*runb < maxrun) &&
 					is_sequential(ump, ip->i_db[bn],
 						ip->i_db[bn + 1]);
 						--bn, ++*runb);
 			}
 		}
 		return (0);
 	}
 
 
 	/* Get disk address out of indirect block array */
 	daddr = ip->i_ib[ap->in_off];
 
 	for (bp = NULL, ++ap; --num; ++ap) {
 		/*
 		 * Exit the loop if there is no disk address assigned yet and
 		 * the indirect block isn't in the cache, or if we were
 		 * looking for an indirect block and we've found it.
 		 */
 
 		metalbn = ap->in_lbn;
 		if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn)
 			break;
 		/*
 		 * If we get here, we've either got the block in the cache
 		 * or we have a disk address for it, go fetch it.
 		 */
 		if (bp)
 			bqrelse(bp);
 
 		bp = getblk(vp, metalbn, bsize, 0, 0, 0);
 		if ((bp->b_flags & B_CACHE) == 0) {
 #ifdef INVARIANTS
 			if (!daddr)
 				panic("ext2_bmaparray: indirect block not in cache");
 #endif
 			bp->b_blkno = blkptrtodb(ump, daddr);
 			bp->b_iocmd = BIO_READ;
 			bp->b_flags &= ~B_INVAL;
 			bp->b_ioflags &= ~BIO_ERROR;
 			vfs_busy_pages(bp, 0);
 			bp->b_iooffset = dbtob(bp->b_blkno);
 			bstrategy(bp);
 			curthread->td_ru.ru_inblock++;
 			error = bufwait(bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 		}
 
 		daddr = ((e2fs_daddr_t *)bp->b_data)[ap->in_off];
 		if (num == 1 && daddr && runp) {
 			for (bn = ap->in_off + 1;
 			    bn < MNINDIR(ump) && *runp < maxrun &&
 			    is_sequential(ump,
 			    ((e2fs_daddr_t *)bp->b_data)[bn - 1],
 			    ((e2fs_daddr_t *)bp->b_data)[bn]);
 			    ++bn, ++*runp);
 			bn = ap->in_off;
 			if (runb && bn) {
 				for (--bn; bn >= 0 && *runb < maxrun &&
 					is_sequential(ump,
 					((e2fs_daddr_t *)bp->b_data)[bn],
 					((e2fs_daddr_t *)bp->b_data)[bn + 1]);
 					--bn, ++*runb);
 			}
 		}
 	}
 	if (bp)
 		bqrelse(bp);
 
 	/*
 	 * Since this is FFS independent code, we are out of scope for the
 	 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
 	 * will fall in the range 1..um_seqinc, so we use that test and
 	 * return a request for a zeroed out buffer if attempts are made
 	 * to read a BLK_NOCOPY or BLK_SNAP block.
 	 */
 	if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){
 		*bnp = -1;
 		return (0);
 	}
 	*bnp = blkptrtodb(ump, daddr);
 	if (*bnp == 0) {
 		*bnp = -1;
 	}
 	return (0);
 }
 
 /*
  * Create an array of logical block number/offset pairs which represent the
  * path of indirect blocks required to access a data block.  The first "pair"
  * contains the logical block number of the appropriate single, double or
  * triple indirect block and the offset into the inode indirect block array.
  * Note, the logical block number of the inode single/double/triple indirect
  * block appears twice in the array, once with the offset into the i_ib and
  * once with the offset into the page itself.
  */
 int
 ext2_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump)
 {
 	long blockcnt;
 	e2fs_lbn_t metalbn, realbn;
 	struct ext2mount *ump;
 	int i, numlevels, off;
 	int64_t qblockcnt;
 
 	ump = VFSTOEXT2(vp->v_mount);
 	if (nump)
 		*nump = 0;
 	numlevels = 0;
 	realbn = bn;
 	if ((long)bn < 0)
 		bn = -(long)bn;
 
 	/* The first NDADDR blocks are direct blocks. */
 	if (bn < NDADDR)
 		return (0);
 
 	/*
 	 * Determine the number of levels of indirection.  After this loop
 	 * is done, blockcnt indicates the number of data blocks possible
 	 * at the previous level of indirection, and NIADDR - i is the number
 	 * of levels of indirection needed to locate the requested block.
 	 */
 	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
 		if (i == 0)
 			return (EFBIG);
 		/*
 		 * Use int64_t's here to avoid overflow for triple indirect
 		 * blocks when longs have 32 bits and the block size is more
 		 * than 4K.
 		 */
 		qblockcnt = (int64_t)blockcnt * MNINDIR(ump);
 		if (bn < qblockcnt)
 			break;
 		blockcnt = qblockcnt;
 	}
 
 	/* Calculate the address of the first meta-block. */
 	if (realbn >= 0)
 		metalbn = -(realbn - bn + NIADDR - i);
 	else
 		metalbn = -(-realbn - bn + NIADDR - i);
 
 	/*
 	 * At each iteration, off is the offset into the bap array which is
 	 * an array of disk addresses at the current level of indirection.
 	 * The logical block number and the offset in that block are stored
 	 * into the argument array.
 	 */
 	ap->in_lbn = metalbn;
 	ap->in_off = off = NIADDR - i;
 	ap++;
 	for (++numlevels; i <= NIADDR; i++) {
 		/* If searching for a meta-data block, quit when found. */
 		if (metalbn == realbn)
 			break;
 
 		off = (bn / blockcnt) % MNINDIR(ump);
 
 		++numlevels;
 		ap->in_lbn = metalbn;
 		ap->in_off = off;
 		++ap;
 
 		metalbn -= -1 + off * blockcnt;
 		blockcnt /= MNINDIR(ump);
 	}
 	if (nump)
 		*nump = numlevels;
 	return (0);
 }
Index: projects/clang380-import/sys/fs/ext2fs/ext2_extents.c
===================================================================
--- projects/clang380-import/sys/fs/ext2fs/ext2_extents.c	(revision 293686)
+++ projects/clang380-import/sys/fs/ext2fs/ext2_extents.c	(revision 293687)
@@ -1,175 +1,195 @@
 /*-
  * Copyright (c) 2010 Zheng Liu <lz@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/types.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/vnode.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 
 #include <fs/ext2fs/ext2_mount.h>
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/inode.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/ext2_extents.h>
 #include <fs/ext2fs/ext2_extern.h>
 
 static void ext4_ext_binsearch_index(struct inode *ip, struct ext4_extent_path
 		*path, daddr_t lbn)
 {
 	struct ext4_extent_header *ehp = path->ep_header;
 	struct ext4_extent_index *l, *r, *m;
 
 	l = (struct ext4_extent_index *)(char *)(ehp + 1);
 	r = (struct ext4_extent_index *)(char *)(ehp + 1) + ehp->eh_ecount - 1;
 	while (l <= r) {
 		m = l + (r - l) / 2;
 		if (lbn < m->ei_blk)
 			r = m - 1;
 		else
 			l = m + 1;
 	}
 
 	path->ep_index = l - 1;
 }
 
 static void
 ext4_ext_binsearch(struct inode *ip, struct ext4_extent_path *path, daddr_t lbn)
 {
 	struct ext4_extent_header *ehp = path->ep_header;
-	struct ext4_extent *l, *r, *m;
+	struct ext4_extent *first, *l, *r, *m;
 
 	if (ehp->eh_ecount == 0)
 		return;
 
-	l = (struct ext4_extent *)(char *)(ehp + 1);
-	r = (struct ext4_extent *)(char *)(ehp + 1) + ehp->eh_ecount - 1;
+	first = (struct ext4_extent *)(char *)(ehp + 1);
+	l = first;
+	r = first + ehp->eh_ecount - 1;
 	while (l <= r) {
 		m = l + (r - l) / 2;
 		if (lbn < m->e_blk)
 			r = m - 1;
 		else
 			l = m + 1;
 	}
 
+	if (l == first) {
+		path->ep_sparse_ext.e_blk = lbn;
+		path->ep_sparse_ext.e_len = first->e_blk - lbn;
+		path->ep_sparse_ext.e_start_hi = 0;
+		path->ep_sparse_ext.e_start_lo = 0;
+		path->ep_is_sparse = 1;
+		return;
+	}
 	path->ep_ext = l - 1;
+	if (path->ep_ext->e_blk + path->ep_ext->e_len <= lbn) {
+		path->ep_sparse_ext.e_blk = lbn;
+		if (l <= (first + ehp->eh_ecount - 1))
+			path->ep_sparse_ext.e_len = l->e_blk - lbn;
+		else	// XXX: where does it end?
+			path->ep_sparse_ext.e_len = 1;
+		path->ep_sparse_ext.e_start_hi = 0;
+		path->ep_sparse_ext.e_start_lo = 0;
+		path->ep_is_sparse = 1;
+	}
 }
 
 /*
  * Find a block in ext4 extent cache.
  */
 int
 ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep)
 {
 	struct ext4_extent_cache *ecp;
 	int ret = EXT4_EXT_CACHE_NO;
 
 	ecp = &ip->i_ext_cache;
 
 	/* cache is invalid */
 	if (ecp->ec_type == EXT4_EXT_CACHE_NO)
 		return (ret);
 
 	if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) {
 		ep->e_blk = ecp->ec_blk;
 		ep->e_start_lo = ecp->ec_start & 0xffffffff;
 		ep->e_start_hi = ecp->ec_start >> 32 & 0xffff;
 		ep->e_len = ecp->ec_len;
 		ret = ecp->ec_type;
 	}
 	return (ret);
 }
 
 /*
  * Put an ext4_extent structure in ext4 cache.
  */
 void
 ext4_ext_put_cache(struct inode *ip, struct ext4_extent *ep, int type)
 {
 	struct ext4_extent_cache *ecp;
 
 	ecp = &ip->i_ext_cache;
 	ecp->ec_type = type;
 	ecp->ec_blk = ep->e_blk;
 	ecp->ec_len = ep->e_len;
 	ecp->ec_start = (daddr_t)ep->e_start_hi << 32 | ep->e_start_lo;
 }
 
 /*
  * Find an extent.
  */
 struct ext4_extent_path *
 ext4_ext_find_extent(struct m_ext2fs *fs, struct inode *ip,
 		     daddr_t lbn, struct ext4_extent_path *path)
 {
 	struct ext4_extent_header *ehp;
 	uint16_t i;
 	int error, size;
 	daddr_t nblk;
 
 	ehp = (struct ext4_extent_header *)(char *)ip->i_db;
 
 	if (ehp->eh_magic != EXT4_EXT_MAGIC)
 		return (NULL);
 
 	path->ep_header = ehp;
 
 	for (i = ehp->eh_depth; i != 0; --i) {
 		ext4_ext_binsearch_index(ip, path, lbn);
 		path->ep_depth = 0;
 		path->ep_ext = NULL;
 
 		nblk = (daddr_t)path->ep_index->ei_leaf_hi << 32 |
 		    path->ep_index->ei_leaf_lo;
 		size = blksize(fs, ip, nblk);
 		if (path->ep_bp != NULL) {
 			brelse(path->ep_bp);
 			path->ep_bp = NULL;
 		}
 		error = bread(ip->i_devvp, fsbtodb(fs, nblk), size, NOCRED,
 			    &path->ep_bp);
 		if (error) {
 			brelse(path->ep_bp);
 			path->ep_bp = NULL;
 			return (NULL);
 		}
 		ehp = (struct ext4_extent_header *)path->ep_bp->b_data;
 		path->ep_header = ehp;
 	}
 
 	path->ep_depth = i;
 	path->ep_ext = NULL;
 	path->ep_index = NULL;
+	path->ep_is_sparse = 0;
 
 	ext4_ext_binsearch(ip, path, lbn);
 	return (path);
 }
Index: projects/clang380-import/sys/fs/ext2fs/ext2_extents.h
===================================================================
--- projects/clang380-import/sys/fs/ext2fs/ext2_extents.h	(revision 293686)
+++ projects/clang380-import/sys/fs/ext2fs/ext2_extents.h	(revision 293687)
@@ -1,99 +1,103 @@
 /*-
  * Copyright (c) 2012, 2010 Zheng Liu <lz@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 #ifndef _FS_EXT2FS_EXT2_EXTENTS_H_
 #define	_FS_EXT2FS_EXT2_EXTENTS_H_
 
 #include <sys/types.h>
 
 #define	EXT4_EXT_MAGIC  0xf30a
 
 #define	EXT4_EXT_CACHE_NO	0
 #define	EXT4_EXT_CACHE_GAP	1
 #define	EXT4_EXT_CACHE_IN	2
 
 /*
  * Ext4 file system extent on disk.
  */
 struct ext4_extent {
 	uint32_t e_blk;	/* first logical block */
 	uint16_t e_len;	/* number of blocks */
 	uint16_t e_start_hi;	/* high 16 bits of physical block */
 	uint32_t e_start_lo;	/* low 32 bits of physical block */
 };
 
 /*
  * Extent index on disk.
  */
 struct ext4_extent_index {
 	uint32_t ei_blk;	/* indexes logical blocks */
 	uint32_t ei_leaf_lo;	/* points to physical block of the
 				 * next level */
 	uint16_t ei_leaf_hi;	/* high 16 bits of physical block */
 	uint16_t ei_unused;
 };
 
 /*
  * Extent tree header.
  */
 struct ext4_extent_header {
 	uint16_t eh_magic;	/* magic number: 0xf30a */
 	uint16_t eh_ecount;	/* number of valid entries */
 	uint16_t eh_max;	/* capacity of store in entries */
 	uint16_t eh_depth;	/* the depth of extent tree */
 	uint32_t eh_gen;	/* generation of extent tree */
 };
 
 /*
  * Save cached extent.
  */
 struct ext4_extent_cache {
 	daddr_t	ec_start;	/* extent start */
 	uint32_t ec_blk;	/* logical block */
 	uint32_t ec_len;
 	uint32_t ec_type;
 };
 
 /*
  * Save path to some extent.
  */
 struct ext4_extent_path {
 	uint16_t ep_depth;
 	struct buf *ep_bp;
-	struct ext4_extent *ep_ext;
+	int ep_is_sparse;
+	union {
+		struct ext4_extent ep_sparse_ext;
+		struct ext4_extent *ep_ext;
+	};
 	struct ext4_extent_index *ep_index;
 	struct ext4_extent_header *ep_header;
 };
 
 struct inode;
 struct m_ext2fs;
 int	ext4_ext_in_cache(struct inode *, daddr_t, struct ext4_extent *);
 void	ext4_ext_put_cache(struct inode *, struct ext4_extent *, int);
 struct ext4_extent_path *ext4_ext_find_extent(struct m_ext2fs *fs,
     struct inode *, daddr_t, struct ext4_extent_path *);
 
 #endif /* !_FS_EXT2FS_EXT2_EXTENTS_H_ */
Index: projects/clang380-import/sys/fs/ext2fs/ext2_vfsops.c
===================================================================
--- projects/clang380-import/sys/fs/ext2fs/ext2_vfsops.c	(revision 293686)
+++ projects/clang380-import/sys/fs/ext2fs/ext2_vfsops.c	(revision 293687)
@@ -1,1101 +1,1101 @@
 /*-
  *  modified for EXT2FS support in Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*-
  * Copyright (c) 1989, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/endian.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 #include <sys/mutex.h>
 
 #include <geom/geom.h>
 #include <geom/geom_vfs.h>
 
 #include <fs/ext2fs/ext2_mount.h>
 #include <fs/ext2fs/inode.h>
 
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/ext2_dinode.h>
 #include <fs/ext2fs/ext2_extern.h>
 
 static int	ext2_flushfiles(struct mount *mp, int flags, struct thread *td);
 static int	ext2_mountfs(struct vnode *, struct mount *);
 static int	ext2_reload(struct mount *mp, struct thread *td);
 static int	ext2_sbupdate(struct ext2mount *, int);
 static int	ext2_cgupdate(struct ext2mount *, int);
 static vfs_unmount_t		ext2_unmount;
 static vfs_root_t		ext2_root;
 static vfs_statfs_t		ext2_statfs;
 static vfs_sync_t		ext2_sync;
 static vfs_vget_t		ext2_vget;
 static vfs_fhtovp_t		ext2_fhtovp;
 static vfs_mount_t		ext2_mount;
 
 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part");
 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure");
 
 static struct vfsops ext2fs_vfsops = {
 	.vfs_fhtovp =		ext2_fhtovp,
 	.vfs_mount =		ext2_mount,
 	.vfs_root =		ext2_root,	/* root inode via vget */
 	.vfs_statfs =		ext2_statfs,
 	.vfs_sync =		ext2_sync,
 	.vfs_unmount =		ext2_unmount,
 	.vfs_vget =		ext2_vget,
 };
 
 VFS_SET(ext2fs_vfsops, ext2fs, 0);
 
 static int	ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev,
 		    int ronly);
 static int	compute_sb_data(struct vnode * devvp,
 		    struct ext2fs * es, struct m_ext2fs * fs);
 
 static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr", 
     "noclusterw", "noexec", "export", "force", "from", "multilabel",
     "suiddir", "nosymfollow", "sync", "union", NULL };
 
 /*
  * VFS Operations.
  *
  * mount system call
  */
 static int
 ext2_mount(struct mount *mp)
 {
 	struct vfsoptlist *opts;
 	struct vnode *devvp;
 	struct thread *td;
 	struct ext2mount *ump = NULL;
 	struct m_ext2fs *fs;
 	struct nameidata nd, *ndp = &nd;
 	accmode_t accmode;
 	char *path, *fspec;
 	int error, flags, len;
 
 	td = curthread;
 	opts = mp->mnt_optnew;
 
 	if (vfs_filteropt(opts, ext2_opts))
 		return (EINVAL);
 
 	vfs_getopt(opts, "fspath", (void **)&path, NULL);
 	/* Double-check the length of path.. */
 	if (strlen(path) >= MAXMNTLEN)
 		return (ENAMETOOLONG);
 
 	fspec = NULL;
 	error = vfs_getopt(opts, "from", (void **)&fspec, &len);
 	if (!error && fspec[len - 1] != '\0')
 		return (EINVAL);
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		ump = VFSTOEXT2(mp);
 		fs = ump->um_e2fs; 
 		error = 0;
 		if (fs->e2fs_ronly == 0 &&
 		    vfs_flagopt(opts, "ro", NULL, 0)) {
 			error = VFS_SYNC(mp, MNT_WAIT);
 			if (error)
 				return (error);
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			error = ext2_flushfiles(mp, flags, td);
 			if ( error == 0 && fs->e2fs_wasvalid && ext2_cgupdate(ump, MNT_WAIT) == 0) {
 				fs->e2fs->e2fs_state |= E2FS_ISCLEAN;
 				ext2_sbupdate(ump, MNT_WAIT);
 			}
 			fs->e2fs_ronly = 1;
 			vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY);
 			DROP_GIANT();
 			g_topology_lock();
 			g_access(ump->um_cp, 0, -1, 0);
 			g_topology_unlock();
 			PICKUP_GIANT();
 		}
 		if (!error && (mp->mnt_flag & MNT_RELOAD))
 			error = ext2_reload(mp, td);
 		if (error)
 			return (error);
 		devvp = ump->um_devvp;
 		if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) {
 			if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0))
 				return (EPERM);
 
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 			error = VOP_ACCESS(devvp, VREAD | VWRITE,
 			    td->td_ucred, td);
 			if (error)
 				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 			if (error) {
 				VOP_UNLOCK(devvp, 0);
 				return (error);
 			}
 			VOP_UNLOCK(devvp, 0);
 			DROP_GIANT();
 			g_topology_lock();
 			error = g_access(ump->um_cp, 0, 1, 0);
 			g_topology_unlock();
 			PICKUP_GIANT();
 			if (error)
 				return (error);
 
 			if ((fs->e2fs->e2fs_state & E2FS_ISCLEAN) == 0 ||
 			    (fs->e2fs->e2fs_state & E2FS_ERRORS)) {
 				if (mp->mnt_flag & MNT_FORCE) {
 					printf(
 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt);
 				} else {
 					printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 					    fs->e2fs_fsmnt);
 					return (EPERM);
 				}
 			}
 			fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN;
 			(void)ext2_cgupdate(ump, MNT_WAIT);
 			fs->e2fs_ronly = 0;
 			MNT_ILOCK(mp);
 			mp->mnt_flag &= ~MNT_RDONLY;
 			MNT_IUNLOCK(mp);
 		}
 		if (vfs_flagopt(opts, "export", NULL, 0)) {
 			/* Process export requests in vfs_mount.c. */
 			return (error);
 		}
 	}
 
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible disk device.
 	 */
 	if (fspec == NULL)
 		return (EINVAL);
 	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
 	if ((error = namei(ndp)) != 0)
 		return (error);
 	NDFREE(ndp, NDF_ONLY_PNBUF);
 	devvp = ndp->ni_vp;
 
 	if (!vn_isdisk(devvp, &error)) {
 		vput(devvp);
 		return (error);
 	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 *
 	 * XXXRW: VOP_ACCESS() enough?
 	 */
 	accmode = VREAD;
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		accmode |= VWRITE;
 	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
 	if (error)
 		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 	if (error) {
 		vput(devvp);
 		return (error);
 	}
 
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		error = ext2_mountfs(devvp, mp);
 	} else {
 		if (devvp != ump->um_devvp) {
 			vput(devvp);
 			return (EINVAL);	/* needs translation */
 		} else
 			vput(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return (error);
 	}
 	ump = VFSTOEXT2(mp);
 	fs = ump->um_e2fs;
 
 	/*
 	 * Note that this strncpy() is ok because of a check at the start
 	 * of ext2_mount().
 	 */
 	strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN);
 	fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0';
 	vfs_mountedfrom(mp, fspec);
 	return (0);
 }
 
 static int
 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly)
 {
 
 	if (es->e2fs_magic != E2FS_MAGIC) {
 		printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n",
 		    devtoname(dev), es->e2fs_magic, E2FS_MAGIC);
 		return (1);
 	}
 	if (es->e2fs_rev > E2FS_REV0) {
 		if (es->e2fs_features_incompat & ~(EXT2F_INCOMPAT_SUPP |
 						   EXT4F_RO_INCOMPAT_SUPP)) {
 			printf(
 "WARNING: mount of %s denied due to unsupported optional features\n",
 			    devtoname(dev));
 			return (1);
 		}
 		if (!ronly &&
 		    (es->e2fs_features_rocompat & ~EXT2F_ROCOMPAT_SUPP)) {
 			printf("WARNING: R/W mount of %s denied due to "
 			    "unsupported optional features\n", devtoname(dev));
 			return (1);
 		}
 	}
 	return (0);
 }
 
 /*
  * This computes the fields of the  ext2_sb_info structure from the
  * data in the ext2_super_block structure read in.
  */
 static int
 compute_sb_data(struct vnode *devvp, struct ext2fs *es,
     struct m_ext2fs *fs)
 {
 	int db_count, error;
 	int i;
 	int logic_sb_block = 1;	/* XXX for now */
 	struct buf *bp;
 	uint32_t e2fs_descpb;
 
 	fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->e2fs_log_bsize;
 	fs->e2fs_bsize = 1U << fs->e2fs_bshift;
 	fs->e2fs_fsbtodb = es->e2fs_log_bsize + 1;
 	fs->e2fs_qbmask = fs->e2fs_bsize - 1;
 	fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << es->e2fs_log_fsize;
 	if (fs->e2fs_fsize)
 		fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize;
 	fs->e2fs_bpg = es->e2fs_bpg;
 	fs->e2fs_fpg = es->e2fs_fpg;
 	fs->e2fs_ipg = es->e2fs_ipg;
 	if (es->e2fs_rev == E2FS_REV0) {
 		fs->e2fs_isize = E2FS_REV0_INODE_SIZE ;
 	} else {
 		fs->e2fs_isize = es->e2fs_inode_size;
 
 		/*
 		 * Simple sanity check for superblock inode size value.
 		 */
 		if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE ||
 		    EXT2_INODE_SIZE(fs) > fs->e2fs_bsize ||
 		    (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) {
 			printf("ext2fs: invalid inode size %d\n",
 			    fs->e2fs_isize);
 			return (EIO);
 		}
 	}
 	/* Check for extra isize in big inodes. */
 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) &&
 	    EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) {
 		printf("ext2fs: no space for extra inode timestamps\n");
 		return (EINVAL);
 	}
 
 	fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs);
 	fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb;
 	/* s_resuid / s_resgid ? */
 	fs->e2fs_gcount = (es->e2fs_bcount - es->e2fs_first_dblock +
 	    EXT2_BLOCKS_PER_GROUP(fs) - 1) / EXT2_BLOCKS_PER_GROUP(fs);
 	e2fs_descpb = fs->e2fs_bsize / sizeof(struct ext2_gd);
 	db_count = (fs->e2fs_gcount + e2fs_descpb - 1) / e2fs_descpb;
 	fs->e2fs_gdbcount = db_count;
 	fs->e2fs_gd = malloc(db_count * fs->e2fs_bsize,
 	    M_EXT2MNT, M_WAITOK);
 	fs->e2fs_contigdirs = malloc(fs->e2fs_gcount *
 	    sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO);
 
 	/*
 	 * Adjust logic_sb_block.
 	 * Godmar thinks: if the blocksize is greater than 1024, then
 	 * the superblock is logically part of block zero.
 	 */
 	if(fs->e2fs_bsize > SBSIZE)
 		logic_sb_block = 0;
 	for (i = 0; i < db_count; i++) {
 		error = bread(devvp ,
 			 fsbtodb(fs, logic_sb_block + i + 1 ),
 			fs->e2fs_bsize, NOCRED, &bp);
 		if (error) {
 			free(fs->e2fs_contigdirs, M_EXT2MNT);
 			free(fs->e2fs_gd, M_EXT2MNT);
 			brelse(bp);
 			return (error);
 		}
 		e2fs_cgload((struct ext2_gd *)bp->b_data,
 		    &fs->e2fs_gd[
 			i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
 		    fs->e2fs_bsize);
 		brelse(bp);
 		bp = NULL;
 	}
 	/* Initialization for the ext2 Orlov allocator variant. */
 	fs->e2fs_total_dir = 0;
 	for (i = 0; i < fs->e2fs_gcount; i++)
 		fs->e2fs_total_dir += fs->e2fs_gd[i].ext2bgd_ndirs;
 
 	if (es->e2fs_rev == E2FS_REV0 ||
 	    !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE))
 		fs->e2fs_maxfilesize = 0x7fffffff;
 	else
 		fs->e2fs_maxfilesize = 0x7fffffffffffffff;
 	return (0);
 }
 
 /*
  * Reload all incore data for a filesystem (used after running fsck on
  * the root filesystem and finding things to fix). The filesystem must
  * be mounted read-only.
  *
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
  *	3) invalidate all cluster summary information.
  *	4) invalidate all inactive vnodes.
  *	5) invalidate all cached file data.
  *	6) re-read inode data for all active vnodes.
  * XXX we are missing some steps, in particular # 3, this has to be reviewed.
  */
 static int
 ext2_reload(struct mount *mp, struct thread *td)
 {
 	struct vnode *vp, *mvp, *devvp;
 	struct inode *ip;
 	struct buf *bp;
 	struct ext2fs *es;
 	struct m_ext2fs *fs;
 	struct csum *sump;
 	int error, i;
 	int32_t *lp;
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EINVAL);
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOEXT2(mp)->um_devvp;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 	if (vinvalbuf(devvp, 0, 0, 0) != 0)
 		panic("ext2_reload: dirty1");
 	VOP_UNLOCK(devvp, 0);
 
 	/*
 	 * Step 2: re-read superblock from disk.
 	 * constants have been adjusted for ext2
 	 */
 	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
 		return (error);
 	es = (struct ext2fs *)bp->b_data;
 	if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) {
 		brelse(bp);
 		return (EIO);		/* XXX needs translation */
 	}
 	fs = VFSTOEXT2(mp)->um_e2fs;
 	bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs));
 
 	if((error = compute_sb_data(devvp, es, fs)) != 0) {
 		brelse(bp);
 		return (error);
 	}
 #ifdef UNKLAR
 	if (fs->fs_sbsize < SBSIZE)
 		bp->b_flags |= B_INVAL;
 #endif
 	brelse(bp);
 
 	/*
 	 * Step 3: invalidate all cluster summary information.
 	 */
 	if (fs->e2fs_contigsumsize > 0) {
 		lp = fs->e2fs_maxcluster;
 		sump = fs->e2fs_clustersum;
 		for (i = 0; i < fs->e2fs_gcount; i++, sump++) {
 			*lp++ = fs->e2fs_contigsumsize;
 			sump->cs_init = 0;
 			bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1);
 		}
 	}
 
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		/*
 		 * Step 4: invalidate all cached file data.
 		 */
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, 0, 0))
 			panic("ext2_reload: dirty2");
 
 		/*
 		 * Step 5: re-read inode data for all active vnodes.
 		 */
 		ip = VTOI(vp);
 		error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->e2fs_bsize, NOCRED, &bp);
 		if (error) {
 			VOP_UNLOCK(vp, 0);
 			vrele(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			return (error);
 		}
 		ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data +
 		    EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip);
 		brelse(bp);
 		VOP_UNLOCK(vp, 0);
 		vrele(vp);
 	}
 	return (0);
 }
 
 /*
  * Common code for mount and mountroot.
  */
 static int
 ext2_mountfs(struct vnode *devvp, struct mount *mp)
 {
 	struct ext2mount *ump;
 	struct buf *bp;
 	struct m_ext2fs *fs;
 	struct ext2fs *es;
 	struct cdev *dev = devvp->v_rdev;
 	struct g_consumer *cp;
 	struct bufobj *bo;
 	struct csum *sump;
 	int error;
 	int ronly;
 	int i, size;
 	int32_t *lp;
 	int32_t e2fs_maxcontig;
 
 	ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0);
 	/* XXX: use VOP_ACESS to check FS perms */
 	DROP_GIANT();
 	g_topology_lock();
 	error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1);
 	g_topology_unlock();
 	PICKUP_GIANT();
 	VOP_UNLOCK(devvp, 0);
 	if (error)
 		return (error);
 
 	/* XXX: should we check for some sectorsize or 512 instead? */
 	if (((SBSIZE % cp->provider->sectorsize) != 0) ||
 	    (SBSIZE < cp->provider->sectorsize)) {
 		DROP_GIANT();
 		g_topology_lock();
 		g_vfs_close(cp);
 		g_topology_unlock();
 		PICKUP_GIANT();
 		return (EINVAL);
 	}
 
 	bo = &devvp->v_bufobj;
 	bo->bo_private = cp;
 	bo->bo_ops = g_vfs_bufops;
 	if (devvp->v_rdev->si_iosize_max != 0)
 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
 	if (mp->mnt_iosize_max > MAXPHYS)
 		mp->mnt_iosize_max = MAXPHYS;
 
 	bp = NULL;
 	ump = NULL;
 	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
 		goto out;
 	es = (struct ext2fs *)bp->b_data;
 	if (ext2_check_sb_compat(es, dev, ronly) != 0) {
 		error = EINVAL;		/* XXX needs translation */
 		goto out;
 	}
 	if ((es->e2fs_state & E2FS_ISCLEAN) == 0 ||
 	    (es->e2fs_state & E2FS_ERRORS)) {
 		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
 			printf(
 "WARNING: Filesystem was not properly dismounted\n");
 		} else {
 			printf(
 "WARNING: R/W mount denied.  Filesystem is not clean - run fsck\n");
 			error = EPERM;
 			goto out;
 		}
 	}
 	ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO);
 
 	/*
 	 * I don't know whether this is the right strategy. Note that
 	 * we dynamically allocate both an ext2_sb_info and an ext2_super_block
 	 * while Linux keeps the super block in a locked buffer.
 	 */
 	ump->um_e2fs = malloc(sizeof(struct m_ext2fs),
-		M_EXT2MNT, M_WAITOK);
+		M_EXT2MNT, M_WAITOK | M_ZERO);
 	ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs),
 		M_EXT2MNT, M_WAITOK);
 	mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF);
 	bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs));
 	if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
 		goto out;
 
 	/*
 	 * Calculate the maximum contiguous blocks and size of cluster summary
 	 * array.  In FFS this is done by newfs; however, the superblock 
 	 * in ext2fs doesn't have these variables, so we can calculate 
 	 * them here.
 	 */
 	e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize);
 	ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG);
 	if (ump->um_e2fs->e2fs_contigsumsize > 0) {
 		size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t);
 		ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK);
 		size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum);
 		ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK);
 		lp = ump->um_e2fs->e2fs_maxcluster;
 		sump = ump->um_e2fs->e2fs_clustersum;
 		for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) {
 			*lp++ = ump->um_e2fs->e2fs_contigsumsize;
 			sump->cs_init = 0;
 			sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) *
 			    sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO);
 		}
 	}
 
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_e2fs;
 	fs->e2fs_ronly = ronly;	/* ronly is set according to mnt_flags */
 
 	/*
 	 * If the fs is not mounted read-only, make sure the super block is
 	 * always written back on a sync().
 	 */
 	fs->e2fs_wasvalid = fs->e2fs->e2fs_state & E2FS_ISCLEAN ? 1 : 0;
 	if (ronly == 0) {
 		fs->e2fs_fmod = 1;		/* mark it modified */
 		fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN;	/* set fs invalid */
 	}
 	mp->mnt_data = ump;
 	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_bo = &devvp->v_bufobj;
 	ump->um_cp = cp;
 
 	/*
 	 * Setting those two parameters allowed us to use
 	 * ufs_bmap w/o changse!
 	 */
 	ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
 	ump->um_bptrtodb = fs->e2fs->e2fs_log_bsize + 1;
 	ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
 	if (ronly == 0)
 		ext2_sbupdate(ump, MNT_WAIT);
 	/*
 	 * Initialize filesystem stat information in mount struct.
 	 */
 	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
 	    MNTK_USES_BCACHE;
 	MNT_IUNLOCK(mp);
 	return (0);
 out:
 	if (bp)
 		brelse(bp);
 	if (cp != NULL) {
 		DROP_GIANT();
 		g_topology_lock();
 		g_vfs_close(cp);
 		g_topology_unlock();
 		PICKUP_GIANT();
 	}
 	if (ump) {
 		mtx_destroy(EXT2_MTX(ump));
 		free(ump->um_e2fs->e2fs_gd, M_EXT2MNT);
 		free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT);
 		free(ump->um_e2fs->e2fs, M_EXT2MNT);
 		free(ump->um_e2fs, M_EXT2MNT);
 		free(ump, M_EXT2MNT);
 		mp->mnt_data = NULL;
 	}
 	return (error);
 }
 
 /*
  * Unmount system call.
  */
 static int
 ext2_unmount(struct mount *mp, int mntflags)
 {
 	struct ext2mount *ump;
 	struct m_ext2fs *fs;
 	struct csum *sump;
 	int error, flags, i, ronly;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE) {
 		if (mp->mnt_flag & MNT_ROOTFS)
 			return (EINVAL);
 		flags |= FORCECLOSE;
 	}
 	if ((error = ext2_flushfiles(mp, flags, curthread)) != 0)
 		return (error);
 	ump = VFSTOEXT2(mp);
 	fs = ump->um_e2fs;
 	ronly = fs->e2fs_ronly;
 	if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) {
 		if (fs->e2fs_wasvalid)
 			fs->e2fs->e2fs_state |= E2FS_ISCLEAN;
 		ext2_sbupdate(ump, MNT_WAIT);
 	}
 
 	DROP_GIANT();
 	g_topology_lock();
 	g_vfs_close(ump->um_cp);
 	g_topology_unlock();
 	PICKUP_GIANT();
 	vrele(ump->um_devvp);
 	sump = fs->e2fs_clustersum;
 	for (i = 0; i < fs->e2fs_gcount; i++, sump++)
 		free(sump->cs_sum, M_EXT2MNT);
 	free(fs->e2fs_clustersum, M_EXT2MNT);
 	free(fs->e2fs_maxcluster, M_EXT2MNT);
 	free(fs->e2fs_gd, M_EXT2MNT);
 	free(fs->e2fs_contigdirs, M_EXT2MNT);
 	free(fs->e2fs, M_EXT2MNT);
 	free(fs, M_EXT2MNT);
 	free(ump, M_EXT2MNT);
 	mp->mnt_data = NULL;
 	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	return (error);
 }
 
 /*
  * Flush out all the files in a filesystem.
  */
 static int
 ext2_flushfiles(struct mount *mp, int flags, struct thread *td)
 {
 	int error;
 
 	error = vflush(mp, 0, flags, td);
 	return (error);
 }
 /*
  * Get filesystem statistics.
  */
 int
 ext2_statfs(struct mount *mp, struct statfs *sbp)
 {
 	struct ext2mount *ump;
 	struct m_ext2fs *fs;
 	uint32_t overhead, overhead_per_group, ngdb;
 	int i, ngroups;
 
 	ump = VFSTOEXT2(mp);
 	fs = ump->um_e2fs;
 	if (fs->e2fs->e2fs_magic != E2FS_MAGIC)
 		panic("ext2_statfs");
 
 	/*
 	 * Compute the overhead (FS structures)
 	 */
 	overhead_per_group =
 	    1 /* block bitmap */ +
 	    1 /* inode bitmap */ +
 	    fs->e2fs_itpg;
 	overhead = fs->e2fs->e2fs_first_dblock +
 	    fs->e2fs_gcount * overhead_per_group;
 	if (fs->e2fs->e2fs_rev > E2FS_REV0 &&
 	    fs->e2fs->e2fs_features_rocompat & EXT2F_ROCOMPAT_SPARSESUPER) {
 		for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) {
 			if (cg_has_sb(i))
 				ngroups++;
 		}
 	} else {
 		ngroups = fs->e2fs_gcount;
 	}
 	ngdb = fs->e2fs_gdbcount;
 	if (fs->e2fs->e2fs_rev > E2FS_REV0 &&
 	    fs->e2fs->e2fs_features_compat & EXT2F_COMPAT_RESIZE)
 		ngdb += fs->e2fs->e2fs_reserved_ngdb;
 	overhead += ngroups * (1 /* superblock */ + ngdb);
 
 	sbp->f_bsize = EXT2_FRAG_SIZE(fs);
 	sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
 	sbp->f_blocks = fs->e2fs->e2fs_bcount - overhead;
 	sbp->f_bfree = fs->e2fs->e2fs_fbcount;
 	sbp->f_bavail = sbp->f_bfree - fs->e2fs->e2fs_rbcount;
 	sbp->f_files = fs->e2fs->e2fs_icount;
 	sbp->f_ffree = fs->e2fs->e2fs_ficount;
 	return (0);
 }
 
 /*
  * Go through the disk queues to initiate sandbagged IO;
  * go through the inodes to write those that have been modified;
  * initiate the writing of the super block if it has been modified.
  *
  * Note: we are always called with the filesystem marked `MPBUSY'.
  */
 static int
 ext2_sync(struct mount *mp, int waitfor)
 {
 	struct vnode *mvp, *vp;
 	struct thread *td;
 	struct inode *ip;
 	struct ext2mount *ump = VFSTOEXT2(mp);
 	struct m_ext2fs *fs;
 	int error, allerror = 0;
 
 	td = curthread;
 	fs = ump->um_e2fs;
 	if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) {		/* XXX */
 		printf("fs = %s\n", fs->e2fs_fsmnt);
 		panic("ext2_sync: rofs mod");
 	}
 
 	/*
 	 * Write back each (modified) inode.
 	 */
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		ip = VTOI(vp);
 		if ((ip->i_flag &
 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 		    (vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
 		    waitfor == MNT_LAZY)) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td);
 		if (error) {
 			if (error == ENOENT) {
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				goto loop;
 			}
 			continue;
 		}
 		if ((error = VOP_FSYNC(vp, waitfor, td)) != 0)
 			allerror = error;
 		VOP_UNLOCK(vp, 0);
 		vrele(vp);
 	}
 
 	/*
 	 * Force stale filesystem control information to be flushed.
 	 */
 	if (waitfor != MNT_LAZY) {
 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
 		if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0)
 			allerror = error;
 		VOP_UNLOCK(ump->um_devvp, 0);
 	}
 
 	/*
 	 * Write back modified superblock.
 	 */
 	if (fs->e2fs_fmod != 0) {
 		fs->e2fs_fmod = 0;
 		fs->e2fs->e2fs_wtime = time_second;
 		if ((error = ext2_cgupdate(ump, waitfor)) != 0)
 			allerror = error;
 	}
 	return (allerror);
 }
 
 /*
  * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it
  * in from disk.  If it is in core, wait for the lock bit to clear, then
  * return the inode locked.  Detection and handling of mount points must be
  * done by the calling routine.
  */
 static int
 ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
 {
 	struct m_ext2fs *fs;
 	struct inode *ip;
 	struct ext2mount *ump;
 	struct buf *bp;
 	struct vnode *vp;
 	struct thread *td;
 	int i, error;
 	int used_blocks;
 
 	td = curthread;
 	error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL);
 	if (error || *vpp != NULL)
 		return (error);
 
 	ump = VFSTOEXT2(mp);
 	ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO);
 
 	/* Allocate a new vnode/inode. */
 	if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) {
 		*vpp = NULL;
 		free(ip, M_EXT2NODE);
 		return (error);
 	}
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_e2fs = fs = ump->um_e2fs;
 	ip->i_ump  = ump;
 	ip->i_number = ino;
 
 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
 	error = insmntque(vp, mp);
 	if (error != 0) {
 		free(ip, M_EXT2NODE);
 		*vpp = NULL;
 		return (error);
 	}
 	error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
 	if (error || *vpp != NULL)
 		return (error);
 
 	/* Read in the disk contents for the inode, copy into the inode. */
 	if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 	    (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
 		/*
 		 * The inode does not contain anything useful, so it would
 		 * be misleading to leave it on its hash chain. With mode
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
 		brelse(bp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	/* convert ext2 inode to dinode */
 	ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) *
 			ino_to_fsbo(fs, ino)), ip);
 	ip->i_block_group = ino_to_cg(fs, ino);
 	ip->i_next_alloc_block = 0;
 	ip->i_next_alloc_goal = 0;
 
 	/*
 	 * Now we want to make sure that block pointers for unused
 	 * blocks are zeroed out - ext2_balloc depends on this
 	 * although for regular files and directories only
 	 *
 	 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed
 	 * out because we could corrupt the extent tree.
 	 */
 	if (!(ip->i_flag & IN_E4EXTENTS) &&
 	    (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) {
 		used_blocks = (ip->i_size+fs->e2fs_bsize-1) / fs->e2fs_bsize;
 		for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
 			ip->i_db[i] = 0;
 	}
 #ifdef EXT2FS_DEBUG
 	ext2_print_inode(ip);
 #endif
 	bqrelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.
 	 * Note that the underlying vnode may have changed.
 	 */
 	if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) {
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 
 	/*
 	 * Finish inode initialization.
 	 */
 
 	/*
 	 * Set up a generation number for this inode if it does not
 	 * already have one. This should only happen on old filesystems.
 	 */
 	if (ip->i_gen == 0) {
 		ip->i_gen = random() + 1;
 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
 			ip->i_flag |= IN_MODIFIED;
 	}
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is valid
  * - call ext2_vget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the given client host has export rights and return
  *   those rights via. exflagsp and credanonp
  */
 static int
 ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
 {
 	struct inode *ip;
 	struct ufid *ufhp;
 	struct vnode *nvp;
 	struct m_ext2fs *fs;
 	int error;
 
 	ufhp = (struct ufid *)fhp;
 	fs = VFSTOEXT2(mp)->um_e2fs;
 	if (ufhp->ufid_ino < EXT2_ROOTINO ||
 	    ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs->e2fs_ipg)
 		return (ESTALE);
 
 	error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	ip = VTOI(nvp);
 	if (ip->i_mode == 0 ||
 	    ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) {
 		vput(nvp);
 		*vpp = NULLVP;
 		return (ESTALE);
 	}
 	*vpp = nvp;
 	vnode_create_vobject(*vpp, 0, curthread);
 	return (0);
 }
 
 /*
  * Write a superblock and associated information back to disk.
  */
 static int
 ext2_sbupdate(struct ext2mount *mp, int waitfor)
 {
 	struct m_ext2fs *fs = mp->um_e2fs;
 	struct ext2fs *es = fs->e2fs;
 	struct buf *bp;
 	int error = 0;
 
 	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0);
 	bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs));
 	if (waitfor == MNT_WAIT)
 		error = bwrite(bp);
 	else
 		bawrite(bp);
 
 	/*
 	 * The buffers for group descriptors, inode bitmaps and block bitmaps
 	 * are not busy at this point and are (hopefully) written by the
 	 * usual sync mechanism. No need to write them here.
 	 */
 	return (error);
 }
 int
 ext2_cgupdate(struct ext2mount *mp, int waitfor)
 {
 	struct m_ext2fs *fs = mp->um_e2fs;
 	struct buf *bp;
 	int i, error = 0, allerror = 0;
 
 	allerror = ext2_sbupdate(mp, waitfor);
 	for (i = 0; i < fs->e2fs_gdbcount; i++) {
 		bp = getblk(mp->um_devvp, fsbtodb(fs,
 		    fs->e2fs->e2fs_first_dblock +
 		    1 /* superblock */ + i), fs->e2fs_bsize, 0, 0, 0);
 		e2fs_cgsave(&fs->e2fs_gd[
 		    i * fs->e2fs_bsize / sizeof(struct ext2_gd)],
 		    (struct ext2_gd *)bp->b_data, fs->e2fs_bsize);
 		if (waitfor == MNT_WAIT)
 			error = bwrite(bp);
 		else
 			bawrite(bp);
 	}
 
 	if (!allerror && error)
 		allerror = error;
 	return (allerror);
 }
 /*
  * Return the root of a filesystem.
  */
 static int
 ext2_root(struct mount *mp, int flags, struct vnode **vpp)
 {
 	struct vnode *nvp;
 	int error;
 
 	error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp);
 	if (error)
 		return (error);
 	*vpp = nvp;
 	return (0);
 }
Index: projects/clang380-import/sys/fs/ext2fs/ext2_vnops.c
===================================================================
--- projects/clang380-import/sys/fs/ext2fs/ext2_vnops.c	(revision 293686)
+++ projects/clang380-import/sys/fs/ext2fs/ext2_vnops.c	(revision 293687)
@@ -1,2064 +1,2079 @@
 /*-
  *  modified for EXT2FS support in Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_vnops.c	8.7 (Berkeley) 2/3/94
  *	@(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95
  * $FreeBSD$
  */
 
 #include "opt_suiddir.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
 #include <sys/stat.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/endian.h>
 #include <sys/priv.h>
 #include <sys/rwlock.h>
 #include <sys/mount.h>
 #include <sys/unistd.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/lockf.h>
 #include <sys/event.h>
 #include <sys/conf.h>
 #include <sys/file.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
 #include "opt_directio.h"
 
 #include <ufs/ufs/dir.h>
 
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/inode.h>
 #include <fs/ext2fs/ext2_extern.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/ext2_dinode.h>
 #include <fs/ext2fs/ext2_dir.h>
 #include <fs/ext2fs/ext2_mount.h>
 
 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
 static void ext2_itimes_locked(struct vnode *);
 static int ext4_ext_read(struct vop_read_args *);
 static int ext2_ind_read(struct vop_read_args *);
 
 static vop_access_t	ext2_access;
 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *);
 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *,
     struct thread *);
 static vop_close_t	ext2_close;
 static vop_create_t	ext2_create;
 static vop_fsync_t	ext2_fsync;
 static vop_getattr_t	ext2_getattr;
 static vop_ioctl_t	ext2_ioctl;
 static vop_link_t	ext2_link;
 static vop_mkdir_t	ext2_mkdir;
 static vop_mknod_t	ext2_mknod;
 static vop_open_t	ext2_open;
 static vop_pathconf_t	ext2_pathconf;
 static vop_print_t	ext2_print;
 static vop_read_t	ext2_read;
 static vop_readlink_t	ext2_readlink;
 static vop_remove_t	ext2_remove;
 static vop_rename_t	ext2_rename;
 static vop_rmdir_t	ext2_rmdir;
 static vop_setattr_t	ext2_setattr;
 static vop_strategy_t	ext2_strategy;
 static vop_symlink_t	ext2_symlink;
 static vop_write_t	ext2_write;
 static vop_vptofh_t	ext2_vptofh;
 static vop_close_t	ext2fifo_close;
 static vop_kqfilter_t	ext2fifo_kqfilter;
 
 /* Global vfs data structures for ext2. */
 struct vop_vector ext2_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_access =		ext2_access,
 	.vop_bmap =		ext2_bmap,
 	.vop_cachedlookup =	ext2_lookup,
 	.vop_close =		ext2_close,
 	.vop_create =		ext2_create,
 	.vop_fsync =		ext2_fsync,
 	.vop_getpages =		vnode_pager_local_getpages,
 	.vop_getpages_async =	vnode_pager_local_getpages_async,
 	.vop_getattr =		ext2_getattr,
 	.vop_inactive =		ext2_inactive,
 	.vop_ioctl =		ext2_ioctl,
 	.vop_link =		ext2_link,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_mkdir =		ext2_mkdir,
 	.vop_mknod =		ext2_mknod,
 	.vop_open =		ext2_open,
 	.vop_pathconf =		ext2_pathconf,
 	.vop_poll =		vop_stdpoll,
 	.vop_print =		ext2_print,
 	.vop_read =		ext2_read,
 	.vop_readdir =		ext2_readdir,
 	.vop_readlink =		ext2_readlink,
 	.vop_reallocblks =	ext2_reallocblks,
 	.vop_reclaim =		ext2_reclaim,
 	.vop_remove =		ext2_remove,
 	.vop_rename =		ext2_rename,
 	.vop_rmdir =		ext2_rmdir,
 	.vop_setattr =		ext2_setattr,
 	.vop_strategy =		ext2_strategy,
 	.vop_symlink =		ext2_symlink,
 	.vop_write =		ext2_write,
 	.vop_vptofh =		ext2_vptofh,
 };
 
 struct vop_vector ext2_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_access =		ext2_access,
 	.vop_close =		ext2fifo_close,
 	.vop_fsync =		ext2_fsync,
 	.vop_getattr =		ext2_getattr,
 	.vop_inactive =		ext2_inactive,
 	.vop_kqfilter =		ext2fifo_kqfilter,
 	.vop_print =		ext2_print,
 	.vop_read =		VOP_PANIC,
 	.vop_reclaim =		ext2_reclaim,
 	.vop_setattr =		ext2_setattr,
 	.vop_write =		VOP_PANIC,
 	.vop_vptofh =		ext2_vptofh,
 };
 
 /*
  * A virgin directory (no blushing please).
  * Note that the type and namlen fields are reversed relative to ext2.
  * Also, we don't use `struct odirtemplate', since it would just cause
  * endianness problems.
  */
 static struct dirtemplate mastertemplate = {
 	0, 12, 1, EXT2_FT_DIR, ".",
 	0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".."
 };
 static struct dirtemplate omastertemplate = {
 	0, 12, 1, EXT2_FT_UNKNOWN, ".",
 	0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".."
 };
 
 static void
 ext2_itimes_locked(struct vnode *vp)
 {
 	struct inode *ip;
 	struct timespec ts;
 
 	ASSERT_VI_LOCKED(vp, __func__);	
 
 	ip = VTOI(vp);
 	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
 		return;
 	if ((vp->v_type == VBLK || vp->v_type == VCHR))
 		ip->i_flag |= IN_LAZYMOD;
 	else
 		ip->i_flag |= IN_MODIFIED;
 	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 		vfs_timestamp(&ts);
 		if (ip->i_flag & IN_ACCESS) {
 			ip->i_atime = ts.tv_sec;
 			ip->i_atimensec = ts.tv_nsec;
 		}
 		if (ip->i_flag & IN_UPDATE) {
 			ip->i_mtime = ts.tv_sec;
 			ip->i_mtimensec = ts.tv_nsec;
 			ip->i_modrev++;
 		}
 		if (ip->i_flag & IN_CHANGE) {
 			ip->i_ctime = ts.tv_sec;
 			ip->i_ctimensec = ts.tv_nsec;
 		}
 	}
 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
 }
 
 void
 ext2_itimes(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	ext2_itimes_locked(vp);
 	VI_UNLOCK(vp);
 }
 
 /*
  * Create a regular file
  */
 static int
 ext2_create(struct vop_create_args *ap)
 {
 	int error;
 
 	error =
 	    ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
 	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
 	if (error != 0)
 		return (error);
 	if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp);
 	return (0);
 }
 
 static int
 ext2_open(struct vop_open_args *ap)
 {
 
 	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
 	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
 		return (EPERM);
 
 	vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td);
 
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 static int
 ext2_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	VI_LOCK(vp);
 	if (vp->v_usecount > 1)
 		ext2_itimes_locked(vp);
 	VI_UNLOCK(vp);
 	return (0);
 }
 
 static int
 ext2_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	accmode_t accmode = ap->a_accmode;
 	int error;
 
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (accmode & VWRITE) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 	}
 
 	/* If immutable bit set, nobody gets to write it. */
 	if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT)))
 		return (EPERM);
 
 	error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
 	    ap->a_accmode, ap->a_cred, NULL);
 	return (error);
 }
 
 static int
 ext2_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct vattr *vap = ap->a_vap;
 
 	ext2_itimes(vp);
 	/*
 	 * Copy from inode table
 	 */
 	vap->va_fsid = dev2udev(ip->i_devvp->v_rdev);
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mode & ~IFMT;
 	vap->va_nlink = ip->i_nlink;
 	vap->va_uid = ip->i_uid;
 	vap->va_gid = ip->i_gid;
 	vap->va_rdev = ip->i_rdev;
 	vap->va_size = ip->i_size;
 	vap->va_atime.tv_sec = ip->i_atime;
 	vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0;
 	vap->va_mtime.tv_sec = ip->i_mtime;
 	vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0;
 	vap->va_ctime.tv_sec = ip->i_ctime;
 	vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0;
 	if E2DI_HAS_XTIME(ip) {
 		vap->va_birthtime.tv_sec = ip->i_birthtime;
 		vap->va_birthtime.tv_nsec = ip->i_birthnsec;
 	}
 	vap->va_flags = ip->i_flags;
 	vap->va_gen = ip->i_gen;
 	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
 	vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
 	vap->va_type = IFTOVT(ip->i_mode);
 	vap->va_filerev = ip->i_modrev;
 	return (0);
 }
 
 /*
  * Set attribute vnode op. called from several syscalls
  */
 static int
 ext2_setattr(struct vop_setattr_args *ap)
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	int error;
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
 		/* Disallow flags not supported by ext2fs. */
 		if(vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP))
 			return (EOPNOTSUPP);
 
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		/*
 		 * Callers may only modify the file flags on objects they
 		 * have VADMIN rights for.
 		 */
 		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
 			return (error);
 		/*
 		 * Unprivileged processes and privileged processes in
 		 * jail() are not permitted to unset system flags, or
 		 * modify flags if any system flags are set.
 		 * Privileged non-jail processes may not modify system flags
 		 * if securelevel > 0 and any existing system flags are set.
 		 */
 		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
 			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) {
 				error = securelevel_gt(cred, 0);
 				if (error)
 					return (error);
 			}
 		} else {
 			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) ||
 			    ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
 				return (EPERM);
 		}
 		ip->i_flags = vap->va_flags;
 		ip->i_flag |= IN_CHANGE;
 		if (ip->i_flags & (IMMUTABLE | APPEND))
 			return (0);
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 	/*
 	 * Go through the fields and update iff not VNOVAL.
 	 */
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred,
 		    td)) != 0)
 			return (error);
 	}
 	if (vap->va_size != VNOVAL) {
 		/*
 		 * Disallow write attempts on read-only file systems;
 		 * unless the file is a socket, fifo, or a block or
 		 * character device resident on the file system.
 		 */
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 		if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0)
 			return (error);
 	}
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		/*
 		 * From utimes(2):
 		 * If times is NULL, ... The caller must be the owner of
 		 * the file, have permission to write the file, or be the
 		 * super-user.
 		 * If times is non-NULL, ... The caller must be the owner of
 		 * the file or be the super-user.
 		 */
 		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) &&
 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
 		    (error = VOP_ACCESS(vp, VWRITE, cred, td))))
 			return (error);
 		if (vap->va_atime.tv_sec != VNOVAL)
 			ip->i_flag |= IN_ACCESS;
 		if (vap->va_mtime.tv_sec != VNOVAL)
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		ext2_itimes(vp);
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			ip->i_atime = vap->va_atime.tv_sec;
 			ip->i_atimensec = vap->va_atime.tv_nsec;
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			ip->i_mtime = vap->va_mtime.tv_sec;
 			ip->i_mtimensec = vap->va_mtime.tv_nsec;
 		}
 		ip->i_birthtime = vap->va_birthtime.tv_sec;
 		ip->i_birthnsec = vap->va_birthtime.tv_nsec;
 		error = ext2_update(vp, 0);
 		if (error)
 			return (error);
 	}
 	error = 0;
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		error = ext2_chmod(vp, (int)vap->va_mode, cred, td);
 	}
 	return (error);
 }
 
 /*
  * Change the mode on a file.
  * Inode must be locked before calling.
  */
 static int
 ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
 {
 	struct inode *ip = VTOI(vp);
 	int error;
 
 	/*
 	 * To modify the permissions on a file, must possess VADMIN
 	 * for that file.
 	 */
 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
 		return (error);
 	/*
 	 * Privileged processes may set the sticky bit on non-directories,
 	 * as well as set the setgid bit on a file with a group that the
 	 * process is not a member of.
 	 */
 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
 		error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0);
 		if (error)
 			return (EFTYPE);
 	}
 	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
 		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
 		if (error)
 			return (error);
 	}
 	ip->i_mode &= ~ALLPERMS;
 	ip->i_mode |= (mode & ALLPERMS);
 	ip->i_flag |= IN_CHANGE;
 	return (0);
 }
 
 /*
  * Perform chown operation on inode ip;
  * inode must be locked prior to call.
  */
 static int
 ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
     struct thread *td)
 {
 	struct inode *ip = VTOI(vp);
 	uid_t ouid;
 	gid_t ogid;
 	int error = 0;
 
 	if (uid == (uid_t)VNOVAL)
 		uid = ip->i_uid;
 	if (gid == (gid_t)VNOVAL)
 		gid = ip->i_gid;
 	/*
 	 * To modify the ownership of a file, must possess VADMIN
 	 * for that file.
 	 */
 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
 		return (error);
 	/*
 	 * To change the owner of a file, or change the group of a file
 	 * to a group of which we are not a member, the caller must
 	 * have privilege.
 	 */
 	if (uid != ip->i_uid || (gid != ip->i_gid &&
 	    !groupmember(gid, cred))) {
 		error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0);
 		if (error)
 			return (error);
 	}
 	ogid = ip->i_gid;
 	ouid = ip->i_uid;
 	ip->i_gid = gid;
 	ip->i_uid = uid;
 	ip->i_flag |= IN_CHANGE;
 	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0)
 			ip->i_mode &= ~(ISUID | ISGID);
 	}
 	return (0);
 }
 
 /*
  * Synch an open file.
  */
 /* ARGSUSED */
 static int
 ext2_fsync(struct vop_fsync_args *ap)
 {
 	/*
 	 * Flush all dirty buffers associated with a vnode.
 	 */
 
 	vop_stdfsync(ap);
 
 	return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT));
 }
 
 /*
  * Mknod vnode call
  */
 /* ARGSUSED */
 static int
 ext2_mknod(struct vop_mknod_args *ap)
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode **vpp = ap->a_vpp;
 	struct inode *ip;
 	ino_t ino;
 	int error;
 
 	error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
 	    ap->a_dvp, vpp, ap->a_cnp);
 	if (error)
 		return (error);
 	ip = VTOI(*vpp);
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	if (vap->va_rdev != VNOVAL) {
 		/*
 		 * Want to be able to use this to make badblock
 		 * inodes, so don't truncate the dev number.
 		 */
 		ip->i_rdev = vap->va_rdev;
 	}
 	/*
 	 * Remove inode, then reload it through VFS_VGET so it is
 	 * checked to see if it is an alias of an existing entry in
 	 * the inode cache.	 XXX I don't believe this is necessary now.
 	 */
 	(*vpp)->v_type = VNON;
 	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
 	vgone(*vpp);
 	vput(*vpp);
 	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
 	if (error) {
 		*vpp = NULL;
 		return (error);
 	}
 	return (0);
 }
 
 static int
 ext2_remove(struct vop_remove_args *ap)
 {
 	struct inode *ip;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	int error;
 
 	ip = VTOI(vp);
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(dvp)->i_flags & APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 	error = ext2_dirremove(dvp, ap->a_cnp);
 	if (error == 0) {
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
 	}
 out:
 	return (error);
 }
 
 /*
  * link vnode call
  */
 static int
 ext2_link(struct vop_link_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip;
 	int error;
 
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ext2_link: no name");
 #endif
 	ip = VTOI(vp);
 	if ((nlink_t)ip->i_nlink >= EXT2_LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 	ip->i_nlink++;
 	ip->i_flag |= IN_CHANGE;
 	error = ext2_update(vp, !DOINGASYNC(vp));
 	if (!error)
 		error = ext2_direnter(ip, tdvp, cnp);
 	if (error) {
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
 	}
 out:
 	return (error);
 }
 
 /*
  * Rename system call.
  * 	rename("foo", "bar");
  * is essentially
  *	unlink("bar");
  *	link("foo", "bar");
  *	unlink("foo");
  * but ``atomically''.  Can't do full commit without saving state in the
  * inode on disk which isn't feasible at this time.  Best we can do is
  * always guarantee the target exists.
  *
  * Basic algorithm is:
  *
  * 1) Bump link count on source while we're linking it to the
  *    target.  This also ensure the inode won't be deleted out
  *    from underneath us while we work (it may be truncated by
  *    a concurrent `trunc' or `open' for creation).
  * 2) Link source to destination.  If destination already exists,
  *    delete it first.
  * 3) Unlink source reference to inode if still around. If a
  *    directory was moved and the parent of the destination
  *    is different from the source, patch the ".." entry in the
  *    directory.
  */
 static int
 ext2_rename(struct vop_rename_args *ap)
 {
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct inode *ip, *xp, *dp;
 	struct dirtemplate dirbuf;
 	int doingdirectory = 0, oldparent = 0, newparent = 0;
 	int error = 0;
 	u_char namlen;
 
 #ifdef INVARIANTS
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("ext2_rename: no name");
 #endif
 	/*
 	 * Check for cross-device rename.
 	 */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 abortit:
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		vrele(fdvp);
 		vrele(fvp);
 		return (error);
 	}
 
 	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(tdvp)->i_flags & APPEND))) {
 		error = EPERM;
 		goto abortit;
 	}
 
 	/*
 	 * Renaming a file to itself has no effect.  The upper layers should
 	 * not call us in that case.  Temporarily just warn if they do.
 	 */
 	if (fvp == tvp) {
 		printf("ext2_rename: fvp == tvp (can't happen)\n");
 		error = 0;
 		goto abortit;
 	}
 
 	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
 		goto abortit;
 	dp = VTOI(fdvp);
 	ip = VTOI(fvp);
 	if (ip->i_nlink >= EXT2_LINK_MAX) {
 		VOP_UNLOCK(fvp, 0);
 		error = EMLINK;
 		goto abortit;
 	}
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (dp->i_flags & APPEND)) {
 		VOP_UNLOCK(fvp, 0);
 		error = EPERM;
 		goto abortit;
 	}
 	if ((ip->i_mode & IFMT) == IFDIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
 		    (ip->i_flag & IN_RENAME)) {
 			VOP_UNLOCK(fvp, 0);
 			error = EINVAL;
 			goto abortit;
 		}
 		ip->i_flag |= IN_RENAME;
 		oldparent = dp->i_number;
 		doingdirectory++;
 	}
 	vrele(fdvp);
 
 	/*
 	 * When the target exists, both the directory
 	 * and target vnodes are returned locked.
 	 */
 	dp = VTOI(tdvp);
 	xp = NULL;
 	if (tvp)
 		xp = VTOI(tvp);
 
 	/*
 	 * 1) Bump link count while we're moving stuff
 	 *    around.  If we crash somewhere before
 	 *    completing our work, the link count
 	 *    may be wrong, but correctable.
 	 */
 	ip->i_nlink++;
 	ip->i_flag |= IN_CHANGE;
 	if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) {
 		VOP_UNLOCK(fvp, 0);
 		goto bad;
 	}
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory hierarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..". We must repeat the call
 	 * to namei, as the parent directory is unlocked by the
 	 * call to checkpath().
 	 */
 	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
 	VOP_UNLOCK(fvp, 0);
 	if (oldparent != dp->i_number)
 		newparent = dp->i_number;
 	if (doingdirectory && newparent) {
 		if (error)	/* write access check above */
 			goto bad;
 		if (xp != NULL)
 			vput(tvp);
 		error = ext2_checkpath(ip, dp, tcnp->cn_cred);
 		if (error)
 			goto out;
 		VREF(tdvp);
 		error = relookup(tdvp, &tvp, tcnp);
 		if (error)
 			goto out;
 		vrele(tdvp);
 		dp = VTOI(tdvp);
 		xp = NULL;
 		if (tvp)
 			xp = VTOI(tvp);
 	}
 	/*
 	 * 2) If target doesn't exist, link the target
 	 *    to the source and unlink the source.
 	 *    Otherwise, rewrite the target directory
 	 *    entry to reference the source inode and
 	 *    expunge the original entry's existence.
 	 */
 	if (xp == NULL) {
 		if (dp->i_devvp != ip->i_devvp)
 			panic("ext2_rename: EXDEV");
 		/*
 		 * Account for ".." in new directory.
 		 * When source and destination have the same
 		 * parent we don't fool with the link count.
 		 */
 		if (doingdirectory && newparent) {
 			if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) {
 				error = EMLINK;
 				goto bad;
 			}
 			dp->i_nlink++;
 			dp->i_flag |= IN_CHANGE;
 			error = ext2_update(tdvp, !DOINGASYNC(tdvp));
 			if (error)
 				goto bad;
 		}
 		error = ext2_direnter(ip, tdvp, tcnp);
 		if (error) {
 			if (doingdirectory && newparent) {
 				dp->i_nlink--;
 				dp->i_flag |= IN_CHANGE;
 				(void)ext2_update(tdvp, 1);
 			}
 			goto bad;
 		}
 		vput(tdvp);
 	} else {
 		if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp)
 		       panic("ext2_rename: EXDEV");
 		/*
 		 * Short circuit rename(foo, foo).
 		 */
 		if (xp->i_number == ip->i_number)
 			panic("ext2_rename: same file");
 		/*
 		 * If the parent directory is "sticky", then the user must
 		 * own the parent directory, or the destination of the rename,
 		 * otherwise the destination may not be changed (except by
 		 * root). This implements append-only directories.
 		 */
 		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
 		    tcnp->cn_cred->cr_uid != dp->i_uid &&
 		    xp->i_uid != tcnp->cn_cred->cr_uid) {
 			error = EPERM;
 			goto bad;
 		}
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if ((xp->i_mode&IFMT) == IFDIR) {
 			if (! ext2_dirempty(xp, dp->i_number, tcnp->cn_cred) || 
 			    xp->i_nlink > 2) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto bad;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto bad;
 		}
 		error = ext2_dirrewrite(dp, ip, tcnp);
 		if (error)
 			goto bad;
 		/*
 		 * If the target directory is in the same
 		 * directory as the source directory,
 		 * decrement the link count on the parent
 		 * of the target directory.
 		 */
 		if (doingdirectory && !newparent) {
 			dp->i_nlink--;
 			dp->i_flag |= IN_CHANGE;
 		}
 		vput(tdvp);
 		/*
 		 * Adjust the link count of the target to
 		 * reflect the dirrewrite above.  If this is
 		 * a directory it is empty and there are
 		 * no links to it, so we can squash the inode and
 		 * any space associated with it.  We disallowed
 		 * renaming over top of a directory with links to
 		 * it above, as the remaining link would point to
 		 * a directory without "." or ".." entries.
 		 */
 		xp->i_nlink--;
 		if (doingdirectory) {
 			if (--xp->i_nlink != 0)
 				panic("ext2_rename: linked directory");
 			error = ext2_truncate(tvp, (off_t)0, IO_SYNC,
 			    tcnp->cn_cred, tcnp->cn_thread);
 		}
 		xp->i_flag |= IN_CHANGE;
 		vput(tvp);
 		xp = NULL;
 	}
 
 	/*
 	 * 3) Unlink the source.
 	 */
 	fcnp->cn_flags &= ~MODMASK;
 	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
 	VREF(fdvp);
 	error = relookup(fdvp, &fvp, fcnp);
 	if (error == 0)
 		vrele(fdvp);
 	if (fvp != NULL) {
 		xp = VTOI(fvp);
 		dp = VTOI(fdvp);
 	} else {
 		/*
 		 * From name has disappeared.
 		 */
 		if (doingdirectory)
 			panic("ext2_rename: lost dir entry");
 		vrele(ap->a_fvp);
 		return (0);
 	}
 	/*
 	 * Ensure that the directory entry still exists and has not
 	 * changed while the new name has been entered. If the source is
 	 * a file then the entry may have been unlinked or renamed. In
 	 * either case there is no further work to be done. If the source
 	 * is a directory then it cannot have been rmdir'ed; its link
 	 * count of three would cause a rmdir to fail with ENOTEMPTY.
 	 * The IN_RENAME flag ensures that it cannot be moved by another
 	 * rename.
 	 */
 	if (xp != ip) {
 		if (doingdirectory)
 			panic("ext2_rename: lost dir entry");
 	} else {
 		/*
 		 * If the source is a directory with a
 		 * new parent, the link count of the old
 		 * parent directory must be decremented
 		 * and ".." set to point to the new parent.
 		 */
 		if (doingdirectory && newparent) {
 			dp->i_nlink--;
 			dp->i_flag |= IN_CHANGE;
 			error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
 				sizeof(struct dirtemplate), (off_t)0,
 				UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
 				tcnp->cn_cred, NOCRED, NULL, NULL);
 			if (error == 0) {
 				/* Like ufs little-endian: */
 				namlen = dirbuf.dotdot_type;
 				if (namlen != 2 ||
 				    dirbuf.dotdot_name[0] != '.' ||
 				    dirbuf.dotdot_name[1] != '.') {
 					ext2_dirbad(xp, (doff_t)12,
 					    "rename: mangled dir");
 				} else {
 					dirbuf.dotdot_ino = newparent;
 					(void) vn_rdwr(UIO_WRITE, fvp,
 					    (caddr_t)&dirbuf,
 					    sizeof(struct dirtemplate),
 					    (off_t)0, UIO_SYSSPACE,
 					    IO_NODELOCKED | IO_SYNC |
 					    IO_NOMACCHECK, tcnp->cn_cred,
 					    NOCRED, NULL, NULL);
 					cache_purge(fdvp);
 				}
 			}
 		}
 		error = ext2_dirremove(fdvp, fcnp);
 		if (!error) {
 			xp->i_nlink--;
 			xp->i_flag |= IN_CHANGE;
 		}
 		xp->i_flag &= ~IN_RENAME;
 	}
 	if (dp)
 		vput(fdvp);
 	if (xp)
 		vput(fvp);
 	vrele(ap->a_fvp);
 	return (error);
 
 bad:
 	if (xp)
 		vput(ITOV(xp));
 	vput(ITOV(dp));
 out:
 	if (doingdirectory)
 		ip->i_flag &= ~IN_RENAME;
 	if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
 		ip->i_flag &= ~IN_RENAME;
 		vput(fvp);
 	} else
 		vrele(fvp);
 	return (error);
 }
 
 /*
  * Mkdir system call
  */
 static int
 ext2_mkdir(struct vop_mkdir_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	struct vnode *tvp;
 	struct dirtemplate dirtemplate, *dtp;
 	int error, dmode;
 
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ext2_mkdir: no name");
 #endif
 	dp = VTOI(dvp);
 	if ((nlink_t)dp->i_nlink >= EXT2_LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 	dmode = vap->va_mode & 0777;
 	dmode |= IFDIR;
 	/*
 	 * Must simulate part of ext2_makeinode here to acquire the inode,
 	 * but not have it entered in the parent directory. The entry is
 	 * made later after writing "." and ".." entries.
 	 */
 	error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp);
 	if (error)
 		goto out;
 	ip = VTOI(tvp);
 	ip->i_gid = dp->i_gid;
 #ifdef SUIDDIR
 	{
 		/*
 		 * if we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TOO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * The new directory also inherits the SUID bit. 
 		 * If user's UID and dir UID are the same,
 		 * 'give it away' so that the SUID is still forced on.
 		 */
 		if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		   (dp->i_mode & ISUID) && dp->i_uid) {
 			dmode |= ISUID;
 			ip->i_uid = dp->i_uid;
 		} else {
 			ip->i_uid = cnp->cn_cred->cr_uid;
 		}
 	}
 #else
 	ip->i_uid = cnp->cn_cred->cr_uid;
 #endif
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_mode = dmode;
 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
 	ip->i_nlink = 2;
 	if (cnp->cn_flags & ISWHITEOUT)
 		ip->i_flags |= UF_OPAQUE;
 	error = ext2_update(tvp, 1);
 
 	/*
 	 * Bump link count in parent directory
 	 * to reflect work done below.  Should
 	 * be done before reference is created
 	 * so reparation is possible if we crash.
 	 */
 	dp->i_nlink++;
 	dp->i_flag |= IN_CHANGE;
 	error = ext2_update(dvp, !DOINGASYNC(dvp));
 	if (error)
 		goto bad;
 
 	/* Initialize directory with "." and ".." from static template. */
 	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs,
 	    EXT2F_INCOMPAT_FTYPE))
 		dtp = &mastertemplate;
 	else
 		dtp = &omastertemplate;
 	dirtemplate = *dtp;
 	dirtemplate.dot_ino = ip->i_number;
 	dirtemplate.dotdot_ino = dp->i_number;
 	/* note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE 
 	 * so let's just redefine it - for this function only
 	 */
 #undef  DIRBLKSIZ 
 #define DIRBLKSIZ  VTOI(dvp)->i_e2fs->e2fs_bsize
 	dirtemplate.dotdot_reclen = DIRBLKSIZ - 12;
 	error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
 	    sizeof(dirtemplate), (off_t)0, UIO_SYSSPACE,
 	    IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED,
 	    NULL, NULL);
 	if (error) {
 		dp->i_nlink--;
 		dp->i_flag |= IN_CHANGE;
 		goto bad;
 	}
 	if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
 		/* XXX should grow with balloc() */
 		panic("ext2_mkdir: blksize");
 	else {
 		ip->i_size = DIRBLKSIZ;
 		ip->i_flag |= IN_CHANGE;
 	}
 
 	/* Directory set up, now install its entry in the parent directory. */
 	error = ext2_direnter(ip, dvp, cnp);
 	if (error) {
 		dp->i_nlink--;
 		dp->i_flag |= IN_CHANGE;
 	}
 bad:
 	/*
 	 * No need to do an explicit VOP_TRUNCATE here, vrele will do this
 	 * for us because we set the link count to 0.
 	 */
 	if (error) {
 		ip->i_nlink = 0;
 		ip->i_flag |= IN_CHANGE;
 		vput(tvp);
 	} else
 		*ap->a_vpp = tvp;
 out:
 	return (error);
 #undef  DIRBLKSIZ
 #define DIRBLKSIZ  DEV_BSIZE
 }
 
 /*
  * Rmdir system call.
  */
 static int
 ext2_rmdir(struct vop_rmdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	int error;
 
 	ip = VTOI(vp);
 	dp = VTOI(dvp);
 
 	/*
 	 * Verify the directory is empty (and valid).
 	 * (Rmdir ".." won't be valid since
 	 *  ".." will contain a reference to
 	 *  the current directory and thus be
 	 *  non-empty.)
 	 */
 	if (ip->i_nlink != 2 || !ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	if ((dp->i_flags & APPEND)
 	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 	/*
 	 * Delete reference to directory before purging
 	 * inode.  If we crash in between, the directory
 	 * will be reattached to lost+found,
 	 */
 	error = ext2_dirremove(dvp, cnp);
 	if (error)
 		goto out;
 	dp->i_nlink--;
 	dp->i_flag |= IN_CHANGE;
 	cache_purge(dvp);
 	VOP_UNLOCK(dvp, 0);
 	/*
 	 * Truncate inode.  The only stuff left
 	 * in the directory is "." and "..".  The
 	 * "." reference is inconsequential since
 	 * we're quashing it.  The ".." reference
 	 * has already been adjusted above.  We've
 	 * removed the "." reference and the reference
 	 * in the parent directory, but there may be
 	 * other hard links so decrement by 2 and
 	 * worry about them later.
 	 */
 	ip->i_nlink -= 2;
 	error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
 	    cnp->cn_thread);
 	cache_purge(ITOV(ip));
 	if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(vp, 0);
 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 out:
 	return (error);
 }
 
 /*
  * symlink -- make a symbolic link
  */
 static int
 ext2_symlink(struct vop_symlink_args *ap)
 {
 	struct vnode *vp, **vpp = ap->a_vpp;
 	struct inode *ip;
 	int len, error;
 
 	error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
 	    vpp, ap->a_cnp);
 	if (error)
 		return (error);
 	vp = *vpp;
 	len = strlen(ap->a_target);
 	if (len < vp->v_mount->mnt_maxsymlinklen) {
 		ip = VTOI(vp);
 		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
 		ip->i_size = len;
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	} else
 		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
 		    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
 		    ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
 	if (error)
 		vput(vp);
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link
  */
 static int
 ext2_readlink(struct vop_readlink_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	int isize;
 
 	isize = ip->i_size;
 	if (isize < vp->v_mount->mnt_maxsymlinklen) {
 		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
 		return (0);
 	}
 	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  *
  * In order to be able to swap to a file, the ext2_bmaparray() operation may not
  * deadlock on memory.  See ext2_bmap() for details.
  */
 static int
 ext2_strategy(struct vop_strategy_args *ap)
 {
 	struct buf *bp = ap->a_bp;
 	struct vnode *vp = ap->a_vp;
 	struct bufobj *bo;
 	daddr_t blkno;
 	int error;
 
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		panic("ext2_strategy: spec");
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL);
 		bp->b_blkno = blkno;
 		if (error) {
 			bp->b_error = error;
 			bp->b_ioflags |= BIO_ERROR;
 			bufdone(bp);
 			return (0);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if ((long)bp->b_blkno == -1) {
 		bufdone(bp);
 		return (0);
 	}
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	bo = VFSTOEXT2(vp->v_mount)->um_bo;
 	BO_STRATEGY(bo, bp);
 	return (0);
 }
 
 /*
  * Print out the contents of an inode.
  */
 static int
 ext2_print(struct vop_print_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 
 	vn_printf(ip->i_devvp, "\tino %ju", (uintmax_t)ip->i_number);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the inode then do device close.
  */
 static int
 ext2fifo_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	VI_LOCK(vp);
 	if (vp->v_usecount > 1)
 		ext2_itimes_locked(vp);
 	VI_UNLOCK(vp);
 	return (fifo_specops.vop_close(ap));
 }
 
 /*
  * Kqfilter wrapper for fifos.
  *
  * Fall through to ext2 kqfilter routines if needed 
  */
 static int
 ext2fifo_kqfilter(struct vop_kqfilter_args *ap)
 {
 	int error;
 
 	error = fifo_specops.vop_kqfilter(ap);
 	if (error)
 		error = vfs_kqfilter(ap);
 	return (error);
 }
 
 /*
  * Return POSIX pathconf information applicable to ext2 filesystems.
  */
 static int
 ext2_pathconf(struct vop_pathconf_args *ap)
 {
 	int error = 0;
 
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = EXT2_LINK_MAX;
 		break;
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		break;
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		break;
 	case _PC_PIPE_BUF:
 		*ap->a_retval = PIPE_BUF;
 		break;
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		break;
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		break;
 	case _PC_MIN_HOLE_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_ASYNC_IO:
 		/* _PC_ASYNC_IO should have been handled by upper layers. */
 		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
 		error = EINVAL;
 		break;
 	case _PC_PRIO_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_SYNC_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ALLOC_SIZE_MIN:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
 		break;
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		break;
 	case _PC_REC_INCR_XFER_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_MAX_XFER_SIZE:
 		*ap->a_retval = -1; /* means ``unlimited'' */
 		break;
 	case _PC_REC_MIN_XFER_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_XFER_ALIGN:
 		*ap->a_retval = PAGE_SIZE;
 		break;
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 static int
 ext2_vptofh(struct vop_vptofh_args *ap)
 {
 	struct inode *ip;
 	struct ufid *ufhp;
 
 	ip = VTOI(ap->a_vp);
 	ufhp = (struct ufid *)ap->a_fhp;
 	ufhp->ufid_len = sizeof(struct ufid);
 	ufhp->ufid_ino = ip->i_number;
 	ufhp->ufid_gen = ip->i_gen;
 	return (0);
 }
 
 /*
  * Initialize the vnode associated with a new inode, handle aliased
  * vnodes.
  */
 int
 ext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp)
 {
 	struct inode *ip;
 	struct vnode *vp;
 
 	vp = *vpp;
 	ip = VTOI(vp);
 	vp->v_type = IFTOVT(ip->i_mode);
 	if (vp->v_type == VFIFO)
 		vp->v_op = fifoops;
 
 	if (ip->i_number == EXT2_ROOTINO)
 		vp->v_vflag |= VV_ROOT;
 	ip->i_modrev = init_va_filerev();
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Allocate a new inode.
  */
 static int
 ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp)
 {
 	struct inode *ip, *pdir;
 	struct vnode *tvp;
 	int error;
 
 	pdir = VTOI(dvp);
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ext2_makeinode: no name");
 #endif
 	*vpp = NULL;
 	if ((mode & IFMT) == 0)
 		mode |= IFREG;
 
 	error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp);
 	if (error) {
 		return (error);
 	}
 	ip = VTOI(tvp);
 	ip->i_gid = pdir->i_gid;
 #ifdef SUIDDIR
 	{
 		/*
 		 * if we are
 		 * not the owner of the directory,
 		 * and we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TOO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * Note that this drops off the execute bits for security.
 		 */
 		if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		     (pdir->i_mode & ISUID) &&
 		     (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
 			ip->i_uid = pdir->i_uid;
 			mode &= ~07111;
 		} else {
 			ip->i_uid = cnp->cn_cred->cr_uid;
 		}
 	}
 #else
 	ip->i_uid = cnp->cn_cred->cr_uid;
 #endif
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_mode = mode;
 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
 	ip->i_nlink = 1;
 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) {
 		if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0))
 			ip->i_mode &= ~ISGID;
 	}
 
 	if (cnp->cn_flags & ISWHITEOUT)
 		ip->i_flags |= UF_OPAQUE;
 
 	/*
 	 * Make sure inode goes to disk before directory entry.
 	 */
 	error = ext2_update(tvp, !DOINGASYNC(tvp));
 	if (error)
 		goto bad;
 	error = ext2_direnter(ip, dvp, cnp);
 	if (error)
 		goto bad;
 
 	*vpp = tvp;
 	return (0);
 
 bad:
 	/*
 	 * Write error occurred trying to update the inode
 	 * or the directory so must deallocate the inode.
 	 */
 	ip->i_nlink = 0;
 	ip->i_flag |= IN_CHANGE;
 	vput(tvp);
 	return (error);
 }
 
 /*
  * Vnode op for reading.
  */
 static int
 ext2_read(struct vop_read_args *ap)
 {
 	struct vnode *vp;
 	struct inode *ip;
 	int error;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 
 	/*EXT4_EXT_LOCK(ip);*/
 	if (ip->i_flag & IN_E4EXTENTS)
 		error = ext4_ext_read(ap);
 	else
 		error = ext2_ind_read(ap);
 	/*EXT4_EXT_UNLOCK(ip);*/
 	return (error);
 }
 
 /*
  * Vnode op for reading.
  */
 static int
 ext2_ind_read(struct vop_read_args *ap)
 {
 	struct vnode *vp;
 	struct inode *ip;
 	struct uio *uio;
 	struct m_ext2fs *fs;
 	struct buf *bp;
 	daddr_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
 	int error, orig_resid, seqcount;
 	int ioflag;
 
 	vp = ap->a_vp;
 	uio = ap->a_uio;
 	ioflag = ap->a_ioflag;
 
 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
 	ip = VTOI(vp);
 
 #ifdef INVARIANTS
 	if (uio->uio_rw != UIO_READ)
 		panic("%s: mode", "ext2_read");
 
 	if (vp->v_type == VLNK) {
 		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
 			panic("%s: short symlink", "ext2_read");
 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
 		panic("%s: type %d", "ext2_read", vp->v_type);
 #endif
 	orig_resid = uio->uio_resid;
 	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
 	if (orig_resid == 0)
 		return (0);
 	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
 	fs = ip->i_e2fs;
 	if (uio->uio_offset < ip->i_size &&
 	    uio->uio_offset >= fs->e2fs_maxfilesize)
 	    	return (EOVERFLOW);
 
 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
 			break;
 		lbn = lblkno(fs, uio->uio_offset);
 		nextlbn = lbn + 1;
 		size = blksize(fs, ip, lbn);
 		blkoffset = blkoff(fs, uio->uio_offset);
 
 		xfersize = fs->e2fs_fsize - blkoffset;
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
 		if (bytesinfile < xfersize)
 			xfersize = bytesinfile;
 
 		if (lblktosize(fs, nextlbn) >= ip->i_size)
 			error = bread(vp, lbn, size, NOCRED, &bp);
 		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			error = cluster_read(vp, ip->i_size, lbn, size,
 			    NOCRED, blkoffset + uio->uio_resid, seqcount,
 			    0, &bp);
 		} else if (seqcount > 1) {
 			u_int nextsize = blksize(fs, ip, nextlbn);
 			error = breadn(vp, lbn,
 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
 		} else
 			error = bread(vp, lbn, size, NOCRED, &bp);
 		if (error) {
 			brelse(bp);
 			bp = NULL;
 			break;
 		}
 
 		/*
 		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
 		 * will cause us to attempt to release the buffer later on
 		 * and will cause the buffer cache to attempt to free the
 		 * underlying pages.
 		 */
 		if (ioflag & IO_DIRECT)
 			bp->b_flags |= B_DIRECT;
 
 		/*
 		 * We should only get non-zero b_resid when an I/O error
 		 * has occurred, which should cause us to break above.
 		 * However, if the short read did not cause an error,
 		 * then we want to ensure that we do not uiomove bad
 		 * or uninitialized data.
 		 */
 		size -= bp->b_resid;
 		if (size < xfersize) {
 			if (size == 0)
 				break;
 			xfersize = size;
 		}
 		error = uiomove((char *)bp->b_data + blkoffset,
 			(int)xfersize, uio);
 		if (error)
 			break;
 
 		if (ioflag & (IO_VMIO|IO_DIRECT)) {
 			/*
 			 * If it's VMIO or direct I/O, then we don't
 			 * need the buf, mark it available for
 			 * freeing. If it's non-direct VMIO, the VM has
 			 * the data.
 			 */
 			bp->b_flags |= B_RELBUF;
 			brelse(bp);
 		} else {
 			/*
 			 * Otherwise let whoever
 			 * made the request take care of
 			 * freeing it. We just queue
 			 * it onto another list.
 			 */
 			bqrelse(bp);
 		}
 	}
 
 	/* 
 	 * This can only happen in the case of an error
 	 * because the loop above resets bp to NULL on each iteration
 	 * and on normal completion has not set a new value into it.
 	 * so it must have come from a 'break' statement
 	 */
 	if (bp != NULL) {
 		if (ioflag & (IO_VMIO|IO_DIRECT)) {
 			bp->b_flags |= B_RELBUF;
 			brelse(bp);
 		} else {
 			bqrelse(bp);
 		}
 	}
 
 	if ((error == 0 || uio->uio_resid != orig_resid) &&
 	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
 		ip->i_flag |= IN_ACCESS;
 	return (error);
 }
 
 static int
 ext2_ioctl(struct vop_ioctl_args *ap)
 {
 
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 	case FIOSEEKHOLE:
 		return (vn_bmap_seekhole(ap->a_vp, ap->a_command,
 		    (off_t *)ap->a_data, ap->a_cred));
 	default:
 		return (ENOTTY);
 	}
 }
 
 /*
  * this function handles ext4 extents block mapping
  */
 static int
 ext4_ext_read(struct vop_read_args *ap)
 {
+	static unsigned char zeroes[EXT2_MAX_BLOCK_SIZE];
 	struct vnode *vp;
 	struct inode *ip;
 	struct uio *uio;
 	struct m_ext2fs *fs;
 	struct buf *bp;
 	struct ext4_extent nex, *ep;
 	struct ext4_extent_path path;
 	daddr_t lbn, newblk;
 	off_t bytesinfile;
 	int cache_type;
 	ssize_t orig_resid;
 	int error;
 	long size, xfersize, blkoffset;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	uio = ap->a_uio;
 	memset(&path, 0, sizeof(path));
 
 	orig_resid = uio->uio_resid;
 	KASSERT(orig_resid >= 0, ("%s: uio->uio_resid < 0", __func__));
 	if (orig_resid == 0)
 		return (0);
 	KASSERT(uio->uio_offset >= 0, ("%s: uio->uio_offset < 0", __func__));
 	fs = ip->i_e2fs;
 	if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize)
 		return (EOVERFLOW);
 
 	while (uio->uio_resid > 0) {
 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
 			break;
 		lbn = lblkno(fs, uio->uio_offset);
 		size = blksize(fs, ip, lbn);
 		blkoffset = blkoff(fs, uio->uio_offset);
 
 		xfersize = fs->e2fs_fsize - blkoffset;
 		xfersize = MIN(xfersize, uio->uio_resid);
 		xfersize = MIN(xfersize, bytesinfile);
 
 		/* get block from ext4 extent cache */
 		cache_type = ext4_ext_in_cache(ip, lbn, &nex);
 		switch (cache_type) {
 		case EXT4_EXT_CACHE_NO:
 			ext4_ext_find_extent(fs, ip, lbn, &path);
-			ep = path.ep_ext;
+			if (path.ep_is_sparse)
+				ep = &path.ep_sparse_ext;
+			else
+				ep = path.ep_ext;
 			if (ep == NULL)
 				return (EIO);
 
-			ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN);
+			ext4_ext_put_cache(ip, ep,
+			    path.ep_is_sparse ? EXT4_EXT_CACHE_GAP : EXT4_EXT_CACHE_IN);
 
 			newblk = lbn - ep->e_blk + (ep->e_start_lo |
 			    (daddr_t)ep->e_start_hi << 32);
 
 			if (path.ep_bp != NULL) {
 				brelse(path.ep_bp);
 				path.ep_bp = NULL;
 			}
 			break;
 
 		case EXT4_EXT_CACHE_GAP:
 			/* block has not been allocated yet */
-			return (0);
+			break;
 
 		case EXT4_EXT_CACHE_IN:
 			newblk = lbn - nex.e_blk + (nex.e_start_lo |
 			    (daddr_t)nex.e_start_hi << 32);
 			break;
 
 		default:
 			panic("%s: invalid cache type", __func__);
 		}
 
-		error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp);
-		if (error) {
-			brelse(bp);
-			return (error);
-		}
+		if (cache_type == EXT4_EXT_CACHE_GAP ||
+		    (cache_type == EXT4_EXT_CACHE_NO && path.ep_is_sparse)) {
+			if (xfersize > sizeof(zeroes))
+				xfersize = sizeof(zeroes);
+			error = uiomove(zeroes, xfersize, uio);
+			if (error)
+				return (error);
+		} else {
+			error = bread(ip->i_devvp, fsbtodb(fs, newblk), size,
+			    NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
 
-		size -= bp->b_resid;
-		if (size < xfersize) {
-			if (size == 0) {
-				bqrelse(bp);
-				break;
+			size -= bp->b_resid;
+			if (size < xfersize) {
+				if (size == 0) {
+					bqrelse(bp);
+					break;
+				}
+				xfersize = size;
 			}
-			xfersize = size;
+			error = uiomove(bp->b_data + blkoffset, xfersize, uio);
+			bqrelse(bp);
+			if (error)
+				return (error);
 		}
-		error = uiomove(bp->b_data + blkoffset, (int)xfersize, uio);
-		bqrelse(bp);
-		if (error)
-			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Vnode op for writing.
  */
 static int
 ext2_write(struct vop_write_args *ap)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct inode *ip;
 	struct m_ext2fs *fs;
 	struct buf *bp;
 	daddr_t lbn;
 	off_t osize;
 	int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize;
 
 	ioflag = ap->a_ioflag;
 	uio = ap->a_uio;
 	vp = ap->a_vp;
 
 	seqcount = ioflag >> IO_SEQSHIFT;
 	ip = VTOI(vp);
 
 #ifdef INVARIANTS
 	if (uio->uio_rw != UIO_WRITE)
 		panic("%s: mode", "ext2_write");
 #endif
 
 	switch (vp->v_type) {
 	case VREG:
 		if (ioflag & IO_APPEND)
 			uio->uio_offset = ip->i_size;
 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
 			return (EPERM);
 		/* FALLTHROUGH */
 	case VLNK:
 		break;
 	case VDIR:
 		/* XXX differs from ffs -- this is called from ext2_mkdir(). */
 		if ((ioflag & IO_SYNC) == 0)
 		panic("ext2_write: nonsync dir write");
 		break;
 	default:
 		panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp,
 		    vp->v_type, (intmax_t)uio->uio_offset,
 		    (intmax_t)uio->uio_resid);
 	}
 
 	KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0"));
 	KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0"));
 	fs = ip->i_e2fs;
 	if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize)
 		return (EFBIG);
 	/*
 	 * Maybe this should be above the vnode op call, but so long as
 	 * file servers have no limits, I don't think it matters.
 	 */
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 		return (EFBIG);
 
 	resid = uio->uio_resid;
 	osize = ip->i_size;
 	if (seqcount > BA_SEQMAX)
 		flags = BA_SEQMAX << BA_SEQSHIFT;
 	else
 		flags = seqcount << BA_SEQSHIFT;
 	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
 		flags |= IO_SYNC;
 
 	for (error = 0; uio->uio_resid > 0;) {
 		lbn = lblkno(fs, uio->uio_offset);
 		blkoffset = blkoff(fs, uio->uio_offset);
 		xfersize = fs->e2fs_fsize - blkoffset;
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
 		if (uio->uio_offset + xfersize > ip->i_size)
 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
 
 		/*
 		 * We must perform a read-before-write if the transfer size
 		 * does not cover the entire buffer.
 		 */
 		if (fs->e2fs_bsize > xfersize)
 			flags |= BA_CLRBUF;
 		else
 			flags &= ~BA_CLRBUF;
 		error = ext2_balloc(ip, lbn, blkoffset + xfersize,
 		    ap->a_cred, &bp, flags);
 		if (error != 0)
 			break;
 
 		if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
 			bp->b_flags |= B_NOCACHE;
 		if (uio->uio_offset + xfersize > ip->i_size)
 			ip->i_size = uio->uio_offset + xfersize;
 		size = blksize(fs, ip, lbn) - bp->b_resid;
 		if (size < xfersize)
 			xfersize = size;
 
 		error =
 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
 		/*
 		 * If the buffer is not already filled and we encounter an
 		 * error while trying to fill it, we have to clear out any
 		 * garbage data from the pages instantiated for the buffer.
 		 * If we do not, a failed uiomove() during a write can leave
 		 * the prior contents of the pages exposed to a userland mmap.
 		 *
 		 * Note that we need only clear buffers with a transfer size
 		 * equal to the block size because buffers with a shorter
 		 * transfer size were cleared above by the call to ext2_balloc()
 		 * with the BA_CLRBUF flag set.
 		 *
 		 * If the source region for uiomove identically mmaps the
 		 * buffer, uiomove() performed the NOP copy, and the buffer
 		 * content remains valid because the page fault handler
 		 * validated the pages.
 		 */
 		if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
 		    fs->e2fs_bsize == xfersize)
 			vfs_bio_clrbuf(bp);
 		if (ioflag & (IO_VMIO|IO_DIRECT)) {
 			bp->b_flags |= B_RELBUF;
 		}
 
 		/*
 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
 		 * if we have a severe page deficiency write the buffer
 		 * asynchronously.  Otherwise try to cluster, and if that
 		 * doesn't do it then either do an async write (if O_DIRECT),
 		 * or a delayed write (if not).
 		 */
 		if (ioflag & IO_SYNC) {
 			(void)bwrite(bp);
 		} else if (vm_page_count_severe() ||
 		    buf_dirty_count_severe() ||
 		    (ioflag & IO_ASYNC)) {
 			bp->b_flags |= B_CLUSTEROK;
 			bawrite(bp);
 		} else if (xfersize + blkoffset == fs->e2fs_fsize) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
 				bp->b_flags |= B_CLUSTEROK;
 				cluster_write(vp, bp, ip->i_size, seqcount, 0);
 			} else {
 				bawrite(bp);
 			}
 		} else if (ioflag & IO_DIRECT) {
 			bp->b_flags |= B_CLUSTEROK;
 			bawrite(bp);
 		} else {
 			bp->b_flags |= B_CLUSTEROK;
 			bdwrite(bp);
 		}
 		if (error || xfersize == 0)
 			break;
 	}
 	/*
 	 * If we successfully wrote any data, and we are not the superuser
 	 * we clear the setuid and setgid bits as a precaution against
 	 * tampering.
 	 */
 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
 	    ap->a_cred) {
 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
 			ip->i_mode &= ~(ISUID | ISGID);
 	}
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			(void)ext2_truncate(vp, osize,
 			    ioflag & IO_SYNC, ap->a_cred, uio->uio_td);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		}
 	}
 	if (uio->uio_resid != resid) {
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		if (ioflag & IO_SYNC)
 			error = ext2_update(vp, 1);
 	}
 	return (error);
 }
Index: projects/clang380-import/sys/fs/smbfs/smbfs_smb.c
===================================================================
--- projects/clang380-import/sys/fs/smbfs/smbfs_smb.c	(revision 293686)
+++ projects/clang380-import/sys/fs/smbfs/smbfs_smb.c	(revision 293687)
@@ -1,1472 +1,1472 @@
 /*-
  * Copyright (c) 2000-2001 Boris Popov
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/vnode.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/endian.h>
 
 #ifdef USE_MD5_HASH
 #include <sys/md5.h>
 #endif
 
 #include <netsmb/smb.h>
 #include <netsmb/smb_subr.h>
 #include <netsmb/smb_rq.h>
 #include <netsmb/smb_conn.h>
 
 #include <fs/smbfs/smbfs.h>
 #include <fs/smbfs/smbfs_node.h>
 #include <fs/smbfs/smbfs_subr.h>
 
 /*
  * Lack of inode numbers leads us to the problem of generating them.
  * Partially this problem can be solved by having a dir/file cache
  * with inode numbers generated from the incremented by one counter.
  * However this way will require too much kernel memory, gives all
  * sorts of locking and consistency problems, not to mentinon counter overflows.
  * So, I'm decided to use a hash function to generate pseudo random (and unique)
  * inode numbers.
  */
 static long
 smbfs_getino(struct smbnode *dnp, const char *name, int nmlen)
 {
 #ifdef USE_MD5_HASH
 	MD5_CTX md5;
 	u_int32_t state[4];
 	long ino;
 	int i;
 
 	MD5Init(&md5);
 	MD5Update(&md5, name, nmlen);
 	MD5Final((u_char *)state, &md5);
 	for (i = 0, ino = 0; i < 4; i++)
 		ino += state[i];
 	return dnp->n_ino + ino;
 #endif
 	u_int32_t ino;
 
 	ino = dnp->n_ino + smbfs_hash(name, nmlen);
 	if (ino <= 2)
 		ino += 3;
 	return ino;
 }
 
 static int
 smbfs_smb_lockandx(struct smbnode *np, int op, u_int32_t pid, off_t start, off_t end,
 	struct smb_cred *scred)
 {
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct smb_rq *rqp;
 	struct mbchain *mbp;
 	u_char ltype = 0;
 	int error;
 
 	if (op == SMB_LOCK_SHARED)
 		ltype |= SMB_LOCKING_ANDX_SHARED_LOCK;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_LOCKING_ANDX, scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_uint8(mbp, 0xff);	/* secondary command */
 	mb_put_uint8(mbp, 0);		/* MBZ */
 	mb_put_uint16le(mbp, 0);
 	mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM);
 	mb_put_uint8(mbp, ltype);	/* locktype */
 	mb_put_uint8(mbp, 0);		/* oplocklevel - 0 seems is NO_OPLOCK */
 	mb_put_uint32le(mbp, 0);	/* timeout - break immediately */
 	mb_put_uint16le(mbp, op == SMB_LOCK_RELEASE ? 1 : 0);
 	mb_put_uint16le(mbp, op == SMB_LOCK_RELEASE ? 0 : 1);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint16le(mbp, pid);
 	mb_put_uint32le(mbp, start);
 	mb_put_uint32le(mbp, end - start);
 	smb_rq_bend(rqp);
 	error = smb_rq_simple(rqp);
 	smb_rq_done(rqp);
 	return error;
 }
 
 int
 smbfs_smb_lock(struct smbnode *np, int op, caddr_t id,
 	off_t start, off_t end,	struct smb_cred *scred)
 {
 	struct smb_share *ssp = np->n_mount->sm_share;
 
 	if (SMB_DIALECT(SSTOVC(ssp)) < SMB_DIALECT_LANMAN1_0)
 		/*
 		 * TODO: use LOCK_BYTE_RANGE here.
 		 */
 		return EINVAL;
 	else
 		return smbfs_smb_lockandx(np, op, (uintptr_t)id, start, end, scred);
 }
 
 static int
 smbfs_query_info_fs(struct smb_share *ssp, struct statfs *sbp,
 	struct smb_cred *scred)
 {
 	struct smb_t2rq *t2p;
 	struct mbchain *mbp;
 	struct mdchain *mdp;
 	uint32_t bsize, bpu;
 	int64_t units, funits;
 	int error;
 
 	error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_QUERY_FS_INFORMATION,
 	    scred, &t2p);
 	if (error)
 		return (error);
 	mbp = &t2p->t2_tparam;
 	mb_init(mbp);
 	mb_put_uint16le(mbp, SMB_QUERY_FS_SIZE_INFO);
 	t2p->t2_maxpcount = 2;
 	t2p->t2_maxdcount = sizeof(int64_t) * 2 + sizeof(uint32_t) * 2;
 	error = smb_t2_request(t2p);
 	if (error) {
 		smb_t2_done(t2p);
 		return (error);
 	}
 	mdp = &t2p->t2_rdata;
 	md_get_int64le(mdp, &units);
 	md_get_int64le(mdp, &funits);
 	md_get_uint32le(mdp, &bpu);
 	md_get_uint32le(mdp, &bsize);
 	sbp->f_bsize = bpu * bsize;	/* fundamental filesystem block size */
 	sbp->f_blocks= (uint64_t)units;	/* total data blocks in filesystem */
 	sbp->f_bfree = (uint64_t)funits;/* free blocks in fs */
 	sbp->f_bavail= (uint64_t)funits;/* free blocks avail to non-superuser */
 	sbp->f_files = 0xffff;		/* total file nodes in filesystem */
 	sbp->f_ffree = 0xffff;		/* free file nodes in fs */
 	smb_t2_done(t2p);
 	return (0);
 }
 
 
 static int
 smbfs_query_info_alloc(struct smb_share *ssp, struct statfs *sbp,
 	struct smb_cred *scred)
 {
 	struct smb_t2rq *t2p;
 	struct mbchain *mbp;
 	struct mdchain *mdp;
 	u_int16_t bsize;
 	u_int32_t units, bpu, funits;
 	int error;
 
 	error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_QUERY_FS_INFORMATION,
 	    scred, &t2p);
 	if (error)
 		return error;
 	mbp = &t2p->t2_tparam;
 	mb_init(mbp);
 	mb_put_uint16le(mbp, SMB_INFO_ALLOCATION);
 	t2p->t2_maxpcount = 4;
 	t2p->t2_maxdcount = 4 * 4 + 2;
 	error = smb_t2_request(t2p);
 	if (error) {
 		smb_t2_done(t2p);
 		return error;
 	}
 	mdp = &t2p->t2_rdata;
 	md_get_uint32(mdp, NULL);	/* fs id */
 	md_get_uint32le(mdp, &bpu);
 	md_get_uint32le(mdp, &units);
 	md_get_uint32le(mdp, &funits);
 	md_get_uint16le(mdp, &bsize);
 	sbp->f_bsize = bpu * bsize;	/* fundamental filesystem block size */
 	sbp->f_blocks= units;		/* total data blocks in filesystem */
 	sbp->f_bfree = funits;		/* free blocks in fs */
 	sbp->f_bavail= funits;		/* free blocks avail to non-superuser */
 	sbp->f_files = 0xffff;		/* total file nodes in filesystem */
 	sbp->f_ffree = 0xffff;		/* free file nodes in fs */
 	smb_t2_done(t2p);
 	return 0;
 }
 
 static int
 smbfs_query_info_disk(struct smb_share *ssp, struct statfs *sbp,
 	struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct mdchain *mdp;
 	u_int16_t units, bpu, bsize, funits;
 	int error;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_QUERY_INFORMATION_DISK,
 	    scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_wstart(rqp);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	smb_rq_bend(rqp);
 	error = smb_rq_simple(rqp);
 	if (error) {
 		smb_rq_done(rqp);
 		return error;
 	}
 	smb_rq_getreply(rqp, &mdp);
 	md_get_uint16le(mdp, &units);
 	md_get_uint16le(mdp, &bpu);
 	md_get_uint16le(mdp, &bsize);
 	md_get_uint16le(mdp, &funits);
 	sbp->f_bsize = bpu * bsize;	/* fundamental filesystem block size */
 	sbp->f_blocks= units;		/* total data blocks in filesystem */
 	sbp->f_bfree = funits;		/* free blocks in fs */
 	sbp->f_bavail= funits;		/* free blocks avail to non-superuser */
 	sbp->f_files = 0xffff;		/* total file nodes in filesystem */
 	sbp->f_ffree = 0xffff;		/* free file nodes in fs */
 	smb_rq_done(rqp);
 	return 0;
 }
 
 int
 smbfs_smb_statfs(struct smb_share *ssp, struct statfs *sbp,
 	struct smb_cred *scred)
 {
 
 	if (SMB_DIALECT(SSTOVC(ssp)) >= SMB_DIALECT_LANMAN2_0) {
 		if (smbfs_query_info_fs(ssp, sbp, scred) == 0)
 			return (0);
 		if (smbfs_query_info_alloc(ssp, sbp, scred) == 0)
 			return (0);
 	}
 	return (smbfs_query_info_disk(ssp, sbp, scred));
 }
 
 static int
 smbfs_smb_seteof(struct smbnode *np, int64_t newsize, struct smb_cred *scred)
 {
 	struct smb_t2rq *t2p;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct mbchain *mbp;
 	int error;
 
 	error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_FILE_INFORMATION,
 	    scred, &t2p);
 	if (error)
 		return error;
 	mbp = &t2p->t2_tparam;
 	mb_init(mbp);
 	mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM);
 	mb_put_uint16le(mbp, SMB_SET_FILE_END_OF_FILE_INFO);
 	mb_put_uint32le(mbp, 0);
 	mbp = &t2p->t2_tdata;
 	mb_init(mbp);
 	mb_put_int64le(mbp, newsize);
 	mb_put_uint32le(mbp, 0);			/* padding */
 	mb_put_uint16le(mbp, 0);
 	t2p->t2_maxpcount = 2;
 	t2p->t2_maxdcount = 0;
 	error = smb_t2_request(t2p);
 	smb_t2_done(t2p);
 	return error;
 }
 
 static int
 smb_smb_flush(struct smbnode *np, struct smb_cred *scred)
 {
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct smb_rq *rqp;
 	struct mbchain *mbp;
 	int error;
 
 	if ((np->n_flag & NOPEN) == 0 || !SMBTOV(np) ||
 	    SMBTOV(np)->v_type != VREG)
 		return 0; /* not a regular open file */
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_FLUSH, scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	smb_rq_bend(rqp);
 	error = smb_rq_simple(rqp);
 	smb_rq_done(rqp);
 	if (!error)
 		np->n_flag &= ~NFLUSHWIRE;
 	return (error);
 }
 
 int
 smbfs_smb_flush(struct smbnode *np, struct smb_cred *scred)
 {
 	if (np->n_flag & NFLUSHWIRE)
 		return (smb_smb_flush(np, scred));
 	return (0);
 }
 
 int
-smbfs_smb_setfsize(struct smbnode *np, int newsize, struct smb_cred *scred)
+smbfs_smb_setfsize(struct smbnode *np, int64_t newsize, struct smb_cred *scred)
 {
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct smb_rq *rqp;
 	struct mbchain *mbp;
 	int error;
 
-	if (!smbfs_smb_seteof(np, (int64_t) newsize, scred)) {
+	if (!smbfs_smb_seteof(np, newsize, scred)) {
 		np->n_flag |= NFLUSHWIRE;
 		return (0);
 	}
-
+	/* XXX: We should use SMB_COM_WRITE_ANDX to support large offsets */
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_WRITE, scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM);
 	mb_put_uint16le(mbp, 0);
-	mb_put_uint32le(mbp, newsize);
+	mb_put_uint32le(mbp, (uint32_t)newsize);
 	mb_put_uint16le(mbp, 0);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_DATA);
 	mb_put_uint16le(mbp, 0);
 	smb_rq_bend(rqp);
 	error = smb_rq_simple(rqp);
 	smb_rq_done(rqp);
 	return error;
 }
 
 int
 smbfs_smb_query_info(struct smbnode *np, const char *name, int len,
 		     struct smbfattr *fap, struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct mbchain *mbp;
 	struct mdchain *mdp;
 	u_int8_t wc;
 	int error;
 	u_int16_t wattr;
 	u_int32_t lint;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_QUERY_INFORMATION, scred,
 	    &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_ASCII);
 	do {
 		error = smbfs_fullpath(mbp, SSTOVC(ssp), np, name, len);
 		if (error)
 			break;
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
 		if (error)
 			break;
 		smb_rq_getreply(rqp, &mdp);
 		if (md_get_uint8(mdp, &wc) != 0 || wc != 10) {
 			error = EBADRPC;
 			break;
 		}
 		md_get_uint16le(mdp, &wattr);
 		fap->fa_attr = wattr;
 		/*
 		 * Be careful using the time returned here, as
 		 * with FAT on NT4SP6, at least, the time returned is low
 		 * 32 bits of 100s of nanoseconds (since 1601) so it rolls
 		 * over about every seven minutes!
 		 */
 		md_get_uint32le(mdp, &lint); /* specs: secs since 1970 */
 		if (lint)	/* avoid bogus zero returns */
 			smb_time_server2local(lint, SSTOVC(ssp)->vc_sopt.sv_tz,
 					      &fap->fa_mtime);
 		md_get_uint32le(mdp, &lint);
 		fap->fa_size = lint;
 	} while(0);
 	smb_rq_done(rqp);
 	return error;
 }
 
 /*
  * Set DOS file attributes. mtime should be NULL for dialects above lm10
  */
 int
 smbfs_smb_setpattr(struct smbnode *np, u_int16_t attr, struct timespec *mtime,
 	struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct mbchain *mbp;
 	u_long time;
 	int error, svtz;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_SET_INFORMATION, scred,
 	    &rqp);
 	if (error)
 		return (error);
 	svtz = SSTOVC(ssp)->vc_sopt.sv_tz;
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_uint16le(mbp, attr);
 	if (mtime) {
 		smb_time_local2server(mtime, svtz, &time);
 	} else
 		time = 0;
 	mb_put_uint32le(mbp, time);		/* mtime */
 	mb_put_mem(mbp, NULL, 5 * 2, MB_MZERO);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_ASCII);
 	do {
 		error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0);
 		if (error)
 			break;
 		mb_put_uint8(mbp, SMB_DT_ASCII);
 		if (SMB_UNICODE_STRINGS(SSTOVC(ssp))) {
 			mb_put_padbyte(mbp);
 			mb_put_uint8(mbp, 0);	/* 1st byte of NULL Unicode char */
 		}
 		mb_put_uint8(mbp, 0);
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
 		if (error) {
 			SMBERROR("smb_rq_simple(rqp) => error %d\n", error);
 			break;
 		}
 	} while(0);
 	smb_rq_done(rqp);
 	return error;
 }
 
 /*
  * Note, win95 doesn't support this call.
  */
 int
 smbfs_smb_setptime2(struct smbnode *np, struct timespec *mtime,
 	struct timespec *atime, int attr, struct smb_cred *scred)
 {
 	struct smb_t2rq *t2p;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct smb_vc *vcp = SSTOVC(ssp);
 	struct mbchain *mbp;
 	u_int16_t date, time;
 	int error, tzoff;
 
 	error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_PATH_INFORMATION,
 	    scred, &t2p);
 	if (error)
 		return error;
 	mbp = &t2p->t2_tparam;
 	mb_init(mbp);
 	mb_put_uint16le(mbp, SMB_INFO_STANDARD);
 	mb_put_uint32le(mbp, 0);		/* MBZ */
 	/* mb_put_uint8(mbp, SMB_DT_ASCII); specs incorrect */
 	error = smbfs_fullpath(mbp, vcp, np, NULL, 0);
 	if (error) {
 		smb_t2_done(t2p);
 		return error;
 	}
 	tzoff = vcp->vc_sopt.sv_tz;
 	mbp = &t2p->t2_tdata;
 	mb_init(mbp);
 	mb_put_uint32le(mbp, 0);		/* creation time */
 	if (atime)
 		smb_time_unix2dos(atime, tzoff, &date, &time, NULL);
 	else
 		time = date = 0;
 	mb_put_uint16le(mbp, date);
 	mb_put_uint16le(mbp, time);
 	if (mtime)
 		smb_time_unix2dos(mtime, tzoff, &date, &time, NULL);
 	else
 		time = date = 0;
 	mb_put_uint16le(mbp, date);
 	mb_put_uint16le(mbp, time);
 	mb_put_uint32le(mbp, 0);		/* file size */
 	mb_put_uint32le(mbp, 0);		/* allocation unit size */
 	mb_put_uint16le(mbp, attr);	/* DOS attr */
 	mb_put_uint32le(mbp, 0);		/* EA size */
 	t2p->t2_maxpcount = 5 * 2;
 	t2p->t2_maxdcount = vcp->vc_txmax;
 	error = smb_t2_request(t2p);
 	smb_t2_done(t2p);
 	return error;
 }
 
 /*
  * NT level. Specially for win9x
  */
 int
 smbfs_smb_setpattrNT(struct smbnode *np, u_short attr, struct timespec *mtime,
 	struct timespec *atime, struct smb_cred *scred)
 {
 	struct smb_t2rq *t2p;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct smb_vc *vcp = SSTOVC(ssp);
 	struct mbchain *mbp;
 	int64_t tm;
 	int error, tzoff;
 
 	error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_PATH_INFORMATION,
 	    scred, &t2p);
 	if (error)
 		return error;
 	mbp = &t2p->t2_tparam;
 	mb_init(mbp);
 	mb_put_uint16le(mbp, SMB_SET_FILE_BASIC_INFO);
 	mb_put_uint32le(mbp, 0);		/* MBZ */
 	/* mb_put_uint8(mbp, SMB_DT_ASCII); specs incorrect */
 	error = smbfs_fullpath(mbp, vcp, np, NULL, 0);
 	if (error) {
 		smb_t2_done(t2p);
 		return error;
 	}
 	tzoff = vcp->vc_sopt.sv_tz;
 	mbp = &t2p->t2_tdata;
 	mb_init(mbp);
 	mb_put_int64le(mbp, 0);		/* creation time */
 	if (atime) {
 		smb_time_local2NT(atime, tzoff, &tm);
 	} else
 		tm = 0;
 	mb_put_int64le(mbp, tm);
 	if (mtime) {
 		smb_time_local2NT(mtime, tzoff, &tm);
 	} else
 		tm = 0;
 	mb_put_int64le(mbp, tm);
 	mb_put_int64le(mbp, tm);		/* change time */
 	mb_put_uint32le(mbp, attr);		/* attr */
 	t2p->t2_maxpcount = 24;
 	t2p->t2_maxdcount = 56;
 	error = smb_t2_request(t2p);
 	smb_t2_done(t2p);
 	return error;
 }
 
 /*
  * Set file atime and mtime. Doesn't supported by core dialect.
  */
 int
 smbfs_smb_setftime(struct smbnode *np, struct timespec *mtime,
 	struct timespec *atime, struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct mbchain *mbp;
 	u_int16_t date, time;
 	int error, tzoff;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_SET_INFORMATION2, scred,
 	    &rqp);
 	if (error)
 		return (error);
 	tzoff = SSTOVC(ssp)->vc_sopt.sv_tz;
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM);
 	mb_put_uint32le(mbp, 0);		/* creation time */
 
 	if (atime)
 		smb_time_unix2dos(atime, tzoff, &date, &time, NULL);
 	else
 		time = date = 0;
 	mb_put_uint16le(mbp, date);
 	mb_put_uint16le(mbp, time);
 	if (mtime)
 		smb_time_unix2dos(mtime, tzoff, &date, &time, NULL);
 	else
 		time = date = 0;
 	mb_put_uint16le(mbp, date);
 	mb_put_uint16le(mbp, time);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	smb_rq_bend(rqp);
 	error = smb_rq_simple(rqp);
 	SMBSDEBUG("%d\n", error);
 	smb_rq_done(rqp);
 	return error;
 }
 
 /*
  * Set DOS file attributes.
  * Looks like this call can be used only if SMB_CAP_NT_SMBS bit is on.
  */
 int
 smbfs_smb_setfattrNT(struct smbnode *np, u_int16_t attr, struct timespec *mtime,
 	struct timespec *atime, struct smb_cred *scred)
 {
 	struct smb_t2rq *t2p;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct mbchain *mbp;
 	int64_t tm;
 	int error, svtz;
 
 	error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_FILE_INFORMATION,
 	    scred, &t2p);
 	if (error)
 		return error;
 	svtz = SSTOVC(ssp)->vc_sopt.sv_tz;
 	mbp = &t2p->t2_tparam;
 	mb_init(mbp);
 	mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM);
 	mb_put_uint16le(mbp, SMB_SET_FILE_BASIC_INFO);
 	mb_put_uint32le(mbp, 0);
 	mbp = &t2p->t2_tdata;
 	mb_init(mbp);
 	mb_put_int64le(mbp, 0);		/* creation time */
 	if (atime) {
 		smb_time_local2NT(atime, svtz, &tm);
 	} else
 		tm = 0;
 	mb_put_int64le(mbp, tm);
 	if (mtime) {
 		smb_time_local2NT(mtime, svtz, &tm);
 	} else
 		tm = 0;
 	mb_put_int64le(mbp, tm);
 	mb_put_int64le(mbp, tm);		/* change time */
 	mb_put_uint16le(mbp, attr);
 	mb_put_uint32le(mbp, 0);			/* padding */
 	mb_put_uint16le(mbp, 0);
 	t2p->t2_maxpcount = 2;
 	t2p->t2_maxdcount = 0;
 	error = smb_t2_request(t2p);
 	smb_t2_done(t2p);
 	return error;
 }
 
 
 int
 smbfs_smb_open(struct smbnode *np, int accmode, struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct mbchain *mbp;
 	struct mdchain *mdp;
 	u_int8_t wc;
 	u_int16_t fid, wattr, grantedmode;
 	int error;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_OPEN, scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_uint16le(mbp, accmode);
 	mb_put_uint16le(mbp, SMB_FA_SYSTEM | SMB_FA_HIDDEN);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_ASCII);
 	do {
 		error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0);
 		if (error)
 			break;
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
 		if (error)
 			break;
 		smb_rq_getreply(rqp, &mdp);
 		if (md_get_uint8(mdp, &wc) != 0 || wc != 7) {
 			error = EBADRPC;
 			break;
 		}
 		md_get_uint16(mdp, &fid);
 		md_get_uint16le(mdp, &wattr);
 		md_get_uint32(mdp, NULL);	/* mtime */
 		md_get_uint32(mdp, NULL);	/* fsize */
 		md_get_uint16le(mdp, &grantedmode);
 		/*
 		 * TODO: refresh attributes from this reply
 		 */
 	} while(0);
 	smb_rq_done(rqp);
 	if (error)
 		return error;
 	np->n_fid = fid;
 	np->n_rwstate = grantedmode;
 	return 0;
 }
 
 
 int
 smbfs_smb_close(struct smb_share *ssp, u_int16_t fid, struct timespec *mtime,
 	struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct mbchain *mbp;
 	u_long time;
 	int error;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_CLOSE, scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_mem(mbp, (caddr_t)&fid, sizeof(fid), MB_MSYSTEM);
 	if (mtime) {
 		smb_time_local2server(mtime, SSTOVC(ssp)->vc_sopt.sv_tz, &time);
 	} else
 		time = 0;
 	mb_put_uint32le(mbp, time);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	smb_rq_bend(rqp);
 	error = smb_rq_simple(rqp);
 	smb_rq_done(rqp);
 	return error;
 }
 
 int
 smbfs_smb_create(struct smbnode *dnp, const char *name, int nmlen,
 	struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct smb_share *ssp = dnp->n_mount->sm_share;
 	struct mbchain *mbp;
 	struct mdchain *mdp;
 	struct timespec ctime;
 	u_int8_t wc;
 	u_int16_t fid;
 	u_long tm;
 	int error;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_CREATE, scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_uint16le(mbp, SMB_FA_ARCHIVE);		/* attributes  */
 	nanotime(&ctime);
 	smb_time_local2server(&ctime, SSTOVC(ssp)->vc_sopt.sv_tz, &tm);
 	mb_put_uint32le(mbp, tm);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_ASCII);
 	error = smbfs_fullpath(mbp, SSTOVC(ssp), dnp, name, nmlen);
 	if (!error) {
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
 		if (!error) {
 			smb_rq_getreply(rqp, &mdp);
 			md_get_uint8(mdp, &wc);
 			if (wc == 1)
 				md_get_uint16(mdp, &fid);
 			else
 				error = EBADRPC;
 		}
 	}
 	smb_rq_done(rqp);
 	if (error)
 		return error;
 	smbfs_smb_close(ssp, fid, &ctime, scred);
 	return error;
 }
 
 int
 smbfs_smb_delete(struct smbnode *np, struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct mbchain *mbp;
 	int error;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_DELETE, scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_uint16le(mbp, SMB_FA_SYSTEM | SMB_FA_HIDDEN);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_ASCII);
 	error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0);
 	if (!error) {
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
 	}
 	smb_rq_done(rqp);
 	return error;
 }
 
 int
 smbfs_smb_rename(struct smbnode *src, struct smbnode *tdnp,
 	const char *tname, int tnmlen, struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct smb_share *ssp = src->n_mount->sm_share;
 	struct mbchain *mbp;
 	int error;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_RENAME, scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_uint16le(mbp, SMB_FA_SYSTEM | SMB_FA_HIDDEN);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_ASCII);
 	do {
 		error = smbfs_fullpath(mbp, SSTOVC(ssp), src, NULL, 0);
 		if (error)
 			break;
 		mb_put_uint8(mbp, SMB_DT_ASCII);
 		error = smbfs_fullpath(mbp, SSTOVC(ssp), tdnp, tname, tnmlen);
 		if (error)
 			break;
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
 	} while(0);
 	smb_rq_done(rqp);
 	return error;
 }
 
 int
 smbfs_smb_move(struct smbnode *src, struct smbnode *tdnp,
 	const char *tname, int tnmlen, u_int16_t flags, struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct smb_share *ssp = src->n_mount->sm_share;
 	struct mbchain *mbp;
 	int error;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_MOVE, scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_uint16le(mbp, SMB_TID_UNKNOWN);
 	mb_put_uint16le(mbp, 0x20);	/* delete target file */
 	mb_put_uint16le(mbp, flags);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_ASCII);
 	do {
 		error = smbfs_fullpath(mbp, SSTOVC(ssp), src, NULL, 0);
 		if (error)
 			break;
 		mb_put_uint8(mbp, SMB_DT_ASCII);
 		error = smbfs_fullpath(mbp, SSTOVC(ssp), tdnp, tname, tnmlen);
 		if (error)
 			break;
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
 	} while(0);
 	smb_rq_done(rqp);
 	return error;
 }
 
 int
 smbfs_smb_mkdir(struct smbnode *dnp, const char *name, int len,
 	struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct smb_share *ssp = dnp->n_mount->sm_share;
 	struct mbchain *mbp;
 	int error;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_CREATE_DIRECTORY, scred,
 	    &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_ASCII);
 	error = smbfs_fullpath(mbp, SSTOVC(ssp), dnp, name, len);
 	if (!error) {
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
 	}
 	smb_rq_done(rqp);
 	return error;
 }
 
 int
 smbfs_smb_rmdir(struct smbnode *np, struct smb_cred *scred)
 {
 	struct smb_rq *rqp;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct mbchain *mbp;
 	int error;
 
 	error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_DELETE_DIRECTORY, scred,
 	    &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_ASCII);
 	error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0);
 	if (!error) {
 		smb_rq_bend(rqp);
 		error = smb_rq_simple(rqp);
 	}
 	smb_rq_done(rqp);
 	return error;
 }
 
 static int
 smbfs_smb_search(struct smbfs_fctx *ctx)
 {
 	struct smb_vc *vcp = SSTOVC(ctx->f_ssp);
 	struct smb_rq *rqp;
 	struct mbchain *mbp;
 	struct mdchain *mdp;
 	u_int8_t wc, bt;
 	u_int16_t ec, dlen, bc;
 	int maxent, error, iseof = 0;
 
 	maxent = min(ctx->f_left, (vcp->vc_txmax - SMB_HDRLEN - 3) / SMB_DENTRYLEN);
 	if (ctx->f_rq) {
 		smb_rq_done(ctx->f_rq);
 		ctx->f_rq = NULL;
 	}
 	error = smb_rq_alloc(SSTOCP(ctx->f_ssp), SMB_COM_SEARCH, ctx->f_scred, &rqp);
 	if (error)
 		return (error);
 	ctx->f_rq = rqp;
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_uint16le(mbp, maxent);	/* max entries to return */
 	mb_put_uint16le(mbp, ctx->f_attrmask);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	mb_put_uint8(mbp, SMB_DT_ASCII);	/* buffer format */
 	if (ctx->f_flags & SMBFS_RDD_FINDFIRST) {
 		error = smbfs_fullpath(mbp, vcp, ctx->f_dnp, ctx->f_wildcard, ctx->f_wclen);
 		if (error)
 			return error;
 		mb_put_uint8(mbp, SMB_DT_VARIABLE);
 		mb_put_uint16le(mbp, 0);	/* context length */
 		ctx->f_flags &= ~SMBFS_RDD_FINDFIRST;
 	} else {
 		if (SMB_UNICODE_STRINGS(vcp)) {
 			mb_put_padbyte(mbp);
 			mb_put_uint8(mbp, 0);
 		}
 		mb_put_uint8(mbp, 0);	/* file name length */
 		mb_put_uint8(mbp, SMB_DT_VARIABLE);
 		mb_put_uint16le(mbp, SMB_SKEYLEN);
 		mb_put_mem(mbp, ctx->f_skey, SMB_SKEYLEN, MB_MSYSTEM);
 	}
 	smb_rq_bend(rqp);
 	error = smb_rq_simple(rqp);
 	if (error) {
 		if (rqp->sr_errclass == ERRDOS && rqp->sr_serror == ERRnofiles) {
 			error = 0;
 			iseof = 1;
 			ctx->f_flags |= SMBFS_RDD_EOF;
 		} else
 			return error;
 	}
 	smb_rq_getreply(rqp, &mdp);
 	md_get_uint8(mdp, &wc);
 	if (wc != 1) 
 		return iseof ? ENOENT : EBADRPC;
 	md_get_uint16le(mdp, &ec);
 	if (ec == 0)
 		return ENOENT;
 	ctx->f_ecnt = ec;
 	md_get_uint16le(mdp, &bc);
 	if (bc < 3)
 		return EBADRPC;
 	bc -= 3;
 	md_get_uint8(mdp, &bt);
 	if (bt != SMB_DT_VARIABLE)
 		return EBADRPC;
 	md_get_uint16le(mdp, &dlen);
 	if (dlen != bc || dlen % SMB_DENTRYLEN != 0)
 		return EBADRPC;
 	return 0;
 }
 
 static int
 smbfs_findopenLM1(struct smbfs_fctx *ctx, struct smbnode *dnp,
 	const char *wildcard, int wclen, int attr, struct smb_cred *scred)
 {
 	ctx->f_attrmask = attr;
 	if (wildcard) {
 		if (wclen == 1 && wildcard[0] == '*') {
 			ctx->f_wildcard = "*.*";
 			ctx->f_wclen = 3;
 		} else {
 			ctx->f_wildcard = wildcard;
 			ctx->f_wclen = wclen;
 		}
 	} else {
 		ctx->f_wildcard = NULL;
 		ctx->f_wclen = 0;
 	}
 	ctx->f_name = ctx->f_fname;
 	return 0;
 }
 
 static int
 smbfs_findnextLM1(struct smbfs_fctx *ctx, int limit)
 {
 	struct mdchain *mbp;
 	struct smb_rq *rqp;
 	char *cp;
 	u_int8_t battr;
 	u_int16_t date, time;
 	u_int32_t size;
 	int error;
 
 	if (ctx->f_ecnt == 0) {
 		if (ctx->f_flags & SMBFS_RDD_EOF)
 			return ENOENT;
 		ctx->f_left = ctx->f_limit = limit;
 		error = smbfs_smb_search(ctx);
 		if (error)
 			return error;
 	}
 	rqp = ctx->f_rq;
 	smb_rq_getreply(rqp, &mbp);
 	md_get_mem(mbp, ctx->f_skey, SMB_SKEYLEN, MB_MSYSTEM);
 	md_get_uint8(mbp, &battr);
 	md_get_uint16le(mbp, &time);
 	md_get_uint16le(mbp, &date);
 	md_get_uint32le(mbp, &size);
 	cp = ctx->f_name;
 	md_get_mem(mbp, cp, sizeof(ctx->f_fname), MB_MSYSTEM);
 	cp[sizeof(ctx->f_fname) - 1] = 0;
 	cp += strlen(cp) - 1;
 	while (*cp == ' ' && cp >= ctx->f_name)
 		*cp-- = 0;
 	ctx->f_attr.fa_attr = battr;
 	smb_dos2unixtime(date, time, 0, rqp->sr_vc->vc_sopt.sv_tz,
 	    &ctx->f_attr.fa_mtime);
 	ctx->f_attr.fa_size = size;
 	ctx->f_nmlen = strlen(ctx->f_name);
 	ctx->f_ecnt--;
 	ctx->f_left--;
 	return 0;
 }
 
 static int
 smbfs_findcloseLM1(struct smbfs_fctx *ctx)
 {
 	if (ctx->f_rq)
 		smb_rq_done(ctx->f_rq);
 	return 0;
 }
 
 /*
  * TRANS2_FIND_FIRST2/NEXT2, used for NT LM12 dialect
  */
 static int
 smbfs_smb_trans2find2(struct smbfs_fctx *ctx)
 {
 	struct smb_t2rq *t2p;
 	struct smb_vc *vcp = SSTOVC(ctx->f_ssp);
 	struct mbchain *mbp;
 	struct mdchain *mdp;
 	u_int16_t tw, flags;
 	int error;
 
 	if (ctx->f_t2) {
 		smb_t2_done(ctx->f_t2);
 		ctx->f_t2 = NULL;
 	}
 	ctx->f_flags &= ~SMBFS_RDD_GOTRNAME;
 	flags = 8 | 2;			/* <resume> | <close if EOS> */
 	if (ctx->f_flags & SMBFS_RDD_FINDSINGLE) {
 		flags |= 1;		/* close search after this request */
 		ctx->f_flags |= SMBFS_RDD_NOCLOSE;
 	}
 	if (ctx->f_flags & SMBFS_RDD_FINDFIRST) {
 		error = smb_t2_alloc(SSTOCP(ctx->f_ssp), SMB_TRANS2_FIND_FIRST2,
 		    ctx->f_scred, &t2p);
 		if (error)
 			return error;
 		ctx->f_t2 = t2p;
 		mbp = &t2p->t2_tparam;
 		mb_init(mbp);
 		mb_put_uint16le(mbp, ctx->f_attrmask);
 		mb_put_uint16le(mbp, ctx->f_limit);
 		mb_put_uint16le(mbp, flags);
 		mb_put_uint16le(mbp, ctx->f_infolevel);
 		mb_put_uint32le(mbp, 0);
 		error = smbfs_fullpath(mbp, vcp, ctx->f_dnp, ctx->f_wildcard, ctx->f_wclen);
 		if (error)
 			return error;
 	} else	{
 		error = smb_t2_alloc(SSTOCP(ctx->f_ssp), SMB_TRANS2_FIND_NEXT2,
 		    ctx->f_scred, &t2p);
 		if (error)
 			return error;
 		ctx->f_t2 = t2p;
 		mbp = &t2p->t2_tparam;
 		mb_init(mbp);
 		mb_put_mem(mbp, (caddr_t)&ctx->f_Sid, 2, MB_MSYSTEM);
 		mb_put_uint16le(mbp, ctx->f_limit);
 		mb_put_uint16le(mbp, ctx->f_infolevel);
 		mb_put_uint32le(mbp, 0);		/* resume key */
 		mb_put_uint16le(mbp, flags);
 		if (ctx->f_rname)
 			mb_put_mem(mbp, ctx->f_rname, ctx->f_rnamelen + 1, MB_MSYSTEM);
 		else
 			mb_put_uint8(mbp, 0);	/* resume file name */
 #if 0
 	struct timeval tv;
 	tv.tv_sec = 0;
 	tv.tv_usec = 200 * 1000;	/* 200ms */
 		if (vcp->vc_flags & SMBC_WIN95) {
 			/*
 			 * some implementations suggests to sleep here
 			 * for 200ms, due to the bug in the Win95.
 			 * I've didn't notice any problem, but put code
 			 * for it.
 			 */
 			 pause("fix95", tvtohz(&tv));
 		}
 #endif
 	}
 	t2p->t2_maxpcount = 5 * 2;
 	t2p->t2_maxdcount = vcp->vc_txmax;
 	error = smb_t2_request(t2p);
 	if (error)
 		return error;
 	mdp = &t2p->t2_rparam;
 	if (ctx->f_flags & SMBFS_RDD_FINDFIRST) {
 		if ((error = md_get_uint16(mdp, &ctx->f_Sid)) != 0)
 			return error;
 		ctx->f_flags &= ~SMBFS_RDD_FINDFIRST;
 	}
 	if ((error = md_get_uint16le(mdp, &tw)) != 0)
 		return error;
 	ctx->f_ecnt = tw;
 	if ((error = md_get_uint16le(mdp, &tw)) != 0)
 		return error;
 	if (tw)
 		ctx->f_flags |= SMBFS_RDD_EOF | SMBFS_RDD_NOCLOSE;
 	if ((error = md_get_uint16le(mdp, &tw)) != 0)
 		return error;
 	if ((error = md_get_uint16le(mdp, &tw)) != 0)
 		return error;
 	if (ctx->f_ecnt == 0) {
 		ctx->f_flags |= SMBFS_RDD_EOF | SMBFS_RDD_NOCLOSE;
 		return ENOENT;
 	}
 	ctx->f_rnameofs = tw;
 	mdp = &t2p->t2_rdata;
 	if (mdp->md_top == NULL) {
 		printf("bug: ecnt = %d, but data is NULL (please report)\n", ctx->f_ecnt);
 		return ENOENT;
 	}
 	if (mdp->md_top->m_len == 0) {
 		printf("bug: ecnt = %d, but m_len = 0 and m_next = %p (please report)\n", ctx->f_ecnt,mbp->mb_top->m_next);
 		return ENOENT;
 	}
 	ctx->f_eofs = 0;
 	return 0;
 }
 
 static int
 smbfs_smb_findclose2(struct smbfs_fctx *ctx)
 {
 	struct smb_rq *rqp;
 	struct mbchain *mbp;
 	int error;
 
 	error = smb_rq_alloc(SSTOCP(ctx->f_ssp), SMB_COM_FIND_CLOSE2,
 	    ctx->f_scred, &rqp);
 	if (error)
 		return (error);
 	smb_rq_getrequest(rqp, &mbp);
 	smb_rq_wstart(rqp);
 	mb_put_mem(mbp, (caddr_t)&ctx->f_Sid, 2, MB_MSYSTEM);
 	smb_rq_wend(rqp);
 	smb_rq_bstart(rqp);
 	smb_rq_bend(rqp);
 	error = smb_rq_simple(rqp);
 	smb_rq_done(rqp);
 	return error;
 }
 
 static int
 smbfs_findopenLM2(struct smbfs_fctx *ctx, struct smbnode *dnp,
 	const char *wildcard, int wclen, int attr, struct smb_cred *scred)
 {
 	if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
 		ctx->f_name = malloc(SMB_MAXFNAMELEN * 2, M_SMBFSDATA, M_WAITOK);
 	} else
 		ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK);
 	ctx->f_infolevel = SMB_DIALECT(SSTOVC(ctx->f_ssp)) < SMB_DIALECT_NTLM0_12 ?
 	    SMB_INFO_STANDARD : SMB_FIND_FILE_DIRECTORY_INFO;
 	ctx->f_attrmask = attr;
 	ctx->f_wildcard = wildcard;
 	ctx->f_wclen = wclen;
 	return 0;
 }
 
 static int
 smbfs_findnextLM2(struct smbfs_fctx *ctx, int limit)
 {
 	struct mdchain *mbp;
 	struct smb_t2rq *t2p;
 	char *cp;
 	u_int8_t tb;
 	u_int16_t date, time, wattr;
 	u_int32_t size, next, dattr;
 	int64_t lint;
 	int error, svtz, cnt, fxsz, nmlen, recsz;
 
 	if (ctx->f_ecnt == 0) {
 		if (ctx->f_flags & SMBFS_RDD_EOF)
 			return ENOENT;
 		ctx->f_left = ctx->f_limit = limit;
 		error = smbfs_smb_trans2find2(ctx);
 		if (error)
 			return error;
 	}
 	t2p = ctx->f_t2;
 	mbp = &t2p->t2_rdata;
 	svtz = SSTOVC(ctx->f_ssp)->vc_sopt.sv_tz;
 	switch (ctx->f_infolevel) {
 	    case SMB_INFO_STANDARD:
 		next = 0;
 		fxsz = 0;
 		md_get_uint16le(mbp, &date);
 		md_get_uint16le(mbp, &time);	/* creation time */
 		md_get_uint16le(mbp, &date);
 		md_get_uint16le(mbp, &time);	/* access time */
 		smb_dos2unixtime(date, time, 0, svtz, &ctx->f_attr.fa_atime);
 		md_get_uint16le(mbp, &date);
 		md_get_uint16le(mbp, &time);	/* access time */
 		smb_dos2unixtime(date, time, 0, svtz, &ctx->f_attr.fa_mtime);
 		md_get_uint32le(mbp, &size);
 		ctx->f_attr.fa_size = size;
 		md_get_uint32(mbp, NULL);	/* allocation size */
 		md_get_uint16le(mbp, &wattr);
 		ctx->f_attr.fa_attr = wattr;
 		md_get_uint8(mbp, &tb);
 		size = nmlen = tb;
 		fxsz = 23;
 		recsz = next = 24 + nmlen;	/* docs misses zero byte at end */
 		break;
 	    case SMB_FIND_FILE_DIRECTORY_INFO:
 		md_get_uint32le(mbp, &next);
 		md_get_uint32(mbp, NULL);	/* file index */
 		md_get_int64(mbp, NULL);	/* creation time */
 		md_get_int64le(mbp, &lint);
 		smb_time_NT2local(lint, svtz, &ctx->f_attr.fa_atime);
 		md_get_int64le(mbp, &lint);
 		smb_time_NT2local(lint, svtz, &ctx->f_attr.fa_mtime);
 		md_get_int64le(mbp, &lint);
 		smb_time_NT2local(lint, svtz, &ctx->f_attr.fa_ctime);
 		md_get_int64le(mbp, &lint);	/* file size */
 		ctx->f_attr.fa_size = lint;
 		md_get_int64(mbp, NULL);	/* real size (should use) */
 		md_get_uint32le(mbp, &dattr);	/* EA */
 		ctx->f_attr.fa_attr = dattr;
 		md_get_uint32le(mbp, &size);	/* name len */
 		fxsz = 64;
 		recsz = next ? next : fxsz + size;
 		break;
 	    default:
 		SMBERROR("unexpected info level %d\n", ctx->f_infolevel);
 		return EINVAL;
 	}
 	if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
 		nmlen = min(size, SMB_MAXFNAMELEN * 2);
 	} else
 		nmlen = min(size, SMB_MAXFNAMELEN);
 	cp = ctx->f_name;
 	error = md_get_mem(mbp, cp, nmlen, MB_MSYSTEM);
 	if (error)
 		return error;
 	if (next) {
 		cnt = next - nmlen - fxsz;
 		if (cnt > 0)
 			md_get_mem(mbp, NULL, cnt, MB_MSYSTEM);
 		else if (cnt < 0) {
 			SMBERROR("out of sync\n");
 			return EBADRPC;
 		}
 	}
 	if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
 		if (nmlen > 1 && cp[nmlen - 1] == 0 && cp[nmlen - 2] == 0)
 			nmlen -= 2;
 	} else
 		if (nmlen && cp[nmlen - 1] == 0)
 			nmlen--;
 	if (nmlen == 0)
 		return EBADRPC;
 
 	next = ctx->f_eofs + recsz;
 	if (ctx->f_rnameofs && (ctx->f_flags & SMBFS_RDD_GOTRNAME) == 0 &&
 	    (ctx->f_rnameofs >= ctx->f_eofs && ctx->f_rnameofs < next)) {
 		/*
 		 * Server needs a resume filename.
 		 */
 		if (ctx->f_rnamelen <= nmlen) {
 			if (ctx->f_rname)
 				free(ctx->f_rname, M_SMBFSDATA);
 			ctx->f_rname = malloc(nmlen + 1, M_SMBFSDATA, M_WAITOK);
 			ctx->f_rnamelen = nmlen;
 		}
 		bcopy(ctx->f_name, ctx->f_rname, nmlen);
 		ctx->f_rname[nmlen] = 0;
 		ctx->f_flags |= SMBFS_RDD_GOTRNAME;
 	}
 	ctx->f_nmlen = nmlen;
 	ctx->f_eofs = next;
 	ctx->f_ecnt--;
 	ctx->f_left--;
 	return 0;
 }
 
 static int
 smbfs_findcloseLM2(struct smbfs_fctx *ctx)
 {
 	if (ctx->f_name)
 		free(ctx->f_name, M_SMBFSDATA);
 	if (ctx->f_t2)
 		smb_t2_done(ctx->f_t2);
 	if ((ctx->f_flags & SMBFS_RDD_NOCLOSE) == 0)
 		smbfs_smb_findclose2(ctx);
 	return 0;
 }
 
 int
 smbfs_findopen(struct smbnode *dnp, const char *wildcard, int wclen, int attr,
 	struct smb_cred *scred, struct smbfs_fctx **ctxpp)
 {
 	struct smbfs_fctx *ctx;
 	int error;
 
 	ctx = malloc(sizeof(*ctx), M_SMBFSDATA, M_WAITOK | M_ZERO);
 	ctx->f_ssp = dnp->n_mount->sm_share;
 	ctx->f_dnp = dnp;
 	ctx->f_flags = SMBFS_RDD_FINDFIRST;
 	ctx->f_scred = scred;
 	if (SMB_DIALECT(SSTOVC(ctx->f_ssp)) < SMB_DIALECT_LANMAN2_0 ||
 	    (dnp->n_mount->sm_flags & SMBFS_MOUNT_NO_LONG)) {
 		ctx->f_flags |= SMBFS_RDD_USESEARCH;
 		error = smbfs_findopenLM1(ctx, dnp, wildcard, wclen, attr, scred);
 	} else
 		error = smbfs_findopenLM2(ctx, dnp, wildcard, wclen, attr, scred);
 	if (error)
 		smbfs_findclose(ctx, scred);
 	else
 		*ctxpp = ctx;
 	return error;
 }
 
 int
 smbfs_findnext(struct smbfs_fctx *ctx, int limit, struct smb_cred *scred)
 {
 	int error;
 
 	if (limit == 0)
 		limit = 1000000;
 	else if (limit > 1)
 		limit *= 4;	/* imperical */
 	ctx->f_scred = scred;
 	for (;;) {
 		if (ctx->f_flags & SMBFS_RDD_USESEARCH) {
 			error = smbfs_findnextLM1(ctx, limit);
 		} else
 			error = smbfs_findnextLM2(ctx, limit);
 		if (error)
 			return error;
 		if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
 			if ((ctx->f_nmlen == 2 &&
 			     *(u_int16_t *)ctx->f_name == htole16(0x002e)) ||
 			    (ctx->f_nmlen == 4 &&
 			     *(u_int32_t *)ctx->f_name == htole32(0x002e002e)))
 				continue;
 		} else
 			if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') ||
 			    (ctx->f_nmlen == 2 && ctx->f_name[0] == '.' &&
 			     ctx->f_name[1] == '.'))
 				continue;
 		break;
 	}
 	smbfs_fname_tolocal(SSTOVC(ctx->f_ssp), ctx->f_name, &ctx->f_nmlen,
 			    ctx->f_dnp->n_mount->sm_caseopt);
 	ctx->f_attr.fa_ino = smbfs_getino(ctx->f_dnp, ctx->f_name, ctx->f_nmlen);
 	return 0;
 }
 
 int
 smbfs_findclose(struct smbfs_fctx *ctx, struct smb_cred *scred)
 {
 	ctx->f_scred = scred;
 	if (ctx->f_flags & SMBFS_RDD_USESEARCH) {
 		smbfs_findcloseLM1(ctx);
 	} else
 		smbfs_findcloseLM2(ctx);
 	if (ctx->f_rname)
 		free(ctx->f_rname, M_SMBFSDATA);
 	free(ctx, M_SMBFSDATA);
 	return 0;
 }
 
 int
 smbfs_smb_lookup(struct smbnode *dnp, const char *name, int nmlen,
 	struct smbfattr *fap, struct smb_cred *scred)
 {
 	struct smbfs_fctx *ctx;
 	int error;
 
 	if (dnp == NULL || (dnp->n_ino == 2 && name == NULL)) {
 		bzero(fap, sizeof(*fap));
 		fap->fa_attr = SMB_FA_DIR;
 		fap->fa_ino = 2;
 		return 0;
 	}
 	MPASS(!(nmlen == 2 && name[0] == '.' && name[1] == '.'));
 	MPASS(!(nmlen == 1 && name[0] == '.'));
 	ASSERT_VOP_ELOCKED(dnp->n_vnode, "smbfs_smb_lookup");
 	error = smbfs_findopen(dnp, name, nmlen,
 	    SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR, scred, &ctx);
 	if (error)
 		return error;
 	ctx->f_flags |= SMBFS_RDD_FINDSINGLE;
 	error = smbfs_findnext(ctx, 1, scred);
 	if (error == 0) {
 		*fap = ctx->f_attr;
 		if (name == NULL)
 			fap->fa_ino = dnp->n_ino;
 	}
 	smbfs_findclose(ctx, scred);
 	return error;
 }
Index: projects/clang380-import/sys/fs/smbfs/smbfs_subr.h
===================================================================
--- projects/clang380-import/sys/fs/smbfs/smbfs_subr.h	(revision 293686)
+++ projects/clang380-import/sys/fs/smbfs/smbfs_subr.h	(revision 293687)
@@ -1,182 +1,183 @@
 /*-
  * Copyright (c) 2000-2001 Boris Popov
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 #ifndef _FS_SMBFS_SMBFS_SUBR_H_
 #define _FS_SMBFS_SMBFS_SUBR_H_
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_SMBFSDATA);
 MALLOC_DECLARE(M_SMBFSCRED);
 #endif
 
 #define SMBFSERR(format, args...) printf("%s: "format, __func__ ,## args)
 
 #ifdef SMB_VNODE_DEBUG
 #define SMBVDEBUG(format, args...) printf("%s: "format, __func__ ,## args)
 #else
 #define SMBVDEBUG(format, args...)
 #endif
 
 /*
  * Possible lock commands
  */
 #define SMB_LOCK_EXCL		0
 #define	SMB_LOCK_SHARED		1
 #define	SMB_LOCK_RELEASE	2
 
 struct smbmount;
 struct proc;
 struct timespec;
 struct ucred;
 struct vattr;
 struct vnode;
 struct statfs;
 
 struct smbfattr {
 	int		fa_attr;
 	int64_t		fa_size;
 	struct timespec	fa_atime;
 	struct timespec	fa_ctime;
 	struct timespec	fa_mtime;
 	long		fa_ino;
 };
 
 /*
  * Context to perform findfirst/findnext/findclose operations
  */
 #define	SMBFS_RDD_FINDFIRST	0x01
 #define	SMBFS_RDD_EOF		0x02
 #define	SMBFS_RDD_FINDSINGLE	0x04
 #define	SMBFS_RDD_USESEARCH	0x08
 #define	SMBFS_RDD_NOCLOSE	0x10
 #define	SMBFS_RDD_GOTRNAME	0x1000
 
 /*
  * Search context supplied by server
  */
 #define	SMB_SKEYLEN		21			/* search context */
 #define SMB_DENTRYLEN		(SMB_SKEYLEN + 22)	/* entire entry */
 
 struct smbfs_fctx {
 	/*
 	 * Setable values
 	 */
 	int		f_flags;	/* SMBFS_RDD_ */
 	/*
 	 * Return values
 	 */
 	struct smbfattr	f_attr;		/* current attributes */
 	char *		f_name;		/* current file name */
 	int		f_nmlen;	/* name len */
 	/*
 	 * Internal variables
 	 */
 	int		f_limit;	/* maximum number of entries */
 	int		f_attrmask;	/* SMB_FA_ */
 	int		f_wclen;
 	const char *	f_wildcard;
 	struct smbnode*	f_dnp;
 	struct smb_cred*f_scred;
 	struct smb_share *f_ssp;
 	union {
 		struct smb_rq *	uf_rq;
 		struct smb_t2rq * uf_t2;
 	} f_urq;
 	int		f_left;		/* entries left */
 	int		f_ecnt;		/* entries left in the current reponse */
 	int		f_eofs;		/* entry offset in the parameter block */
 	u_char 		f_skey[SMB_SKEYLEN]; /* server side search context */
 	u_char		f_fname[8 + 1 + 3 + 1]; /* common case for 8.3 filenames */
 	u_int16_t	f_Sid;
 	u_int16_t	f_infolevel;
 	int		f_rnamelen;
 	char *		f_rname;	/* resume name/key */
 	int		f_rnameofs;
 };
 
 #define f_rq	f_urq.uf_rq
 #define f_t2	f_urq.uf_t2
 
 /*
  * smb level
  */
 int  smbfs_smb_lock(struct smbnode *np, int op, caddr_t id,
 	off_t start, off_t end,	struct smb_cred *scred);
 int  smbfs_smb_statfs(struct smb_share *ssp, struct statfs *sbp,
 	struct smb_cred *scred);
-int  smbfs_smb_setfsize(struct smbnode *np, int newsize, struct smb_cred *scred);
+int  smbfs_smb_setfsize(struct smbnode *np, int64_t newsize,
+	struct smb_cred *scred);
 
 int  smbfs_smb_query_info(struct smbnode *np, const char *name, int len,
 	struct smbfattr *fap, struct smb_cred *scred);
 int  smbfs_smb_setpattr(struct smbnode *np, u_int16_t attr,
 	struct timespec *mtime, struct smb_cred *scred);
 int  smbfs_smb_setptime2(struct smbnode *np, struct timespec *mtime,
 	struct timespec *atime, int attr, struct smb_cred *scred);
 int  smbfs_smb_setpattrNT(struct smbnode *np, u_int16_t attr,
 	struct timespec *mtime, struct timespec *atime, struct smb_cred *scred);
 
 int  smbfs_smb_setftime(struct smbnode *np, struct timespec *mtime,
 	struct timespec *atime, struct smb_cred *scred);
 int  smbfs_smb_setfattrNT(struct smbnode *np, u_int16_t attr,
 	struct timespec *mtime,	struct timespec *atime, struct smb_cred *scred);
 
 int  smbfs_smb_open(struct smbnode *np, int accmode, struct smb_cred *scred);
 int  smbfs_smb_close(struct smb_share *ssp, u_int16_t fid,
 	 struct timespec *mtime, struct smb_cred *scred);
 int  smbfs_smb_create(struct smbnode *dnp, const char *name, int len,
 	struct smb_cred *scred);
 int  smbfs_smb_delete(struct smbnode *np, struct smb_cred *scred);
 int  smbfs_smb_flush(struct smbnode *np, struct smb_cred *scred);
 int  smbfs_smb_rename(struct smbnode *src, struct smbnode *tdnp,
 	const char *tname, int tnmlen, struct smb_cred *scred);
 int  smbfs_smb_move(struct smbnode *src, struct smbnode *tdnp,
 	const char *tname, int tnmlen, u_int16_t flags, struct smb_cred *scred);
 int  smbfs_smb_mkdir(struct smbnode *dnp, const char *name, int len,
 	struct smb_cred *scred);
 int  smbfs_smb_rmdir(struct smbnode *np, struct smb_cred *scred);
 int  smbfs_findopen(struct smbnode *dnp, const char *wildcard, int wclen,
 	int attr, struct smb_cred *scred, struct smbfs_fctx **ctxpp);
 int  smbfs_findnext(struct smbfs_fctx *ctx, int limit, struct smb_cred *scred);
 int  smbfs_findclose(struct smbfs_fctx *ctx, struct smb_cred *scred);
 int  smbfs_fullpath(struct mbchain *mbp, struct smb_vc *vcp,
 	struct smbnode *dnp, const char *name, int nmlen);
 int  smbfs_smb_lookup(struct smbnode *dnp, const char *name, int nmlen,
 	struct smbfattr *fap, struct smb_cred *scred);
 
 int  smbfs_fname_tolocal(struct smb_vc *vcp, char *name, int *nmlen, int caseopt);
 
 void  smb_time_local2server(struct timespec *tsp, int tzoff, u_long *seconds);
 void  smb_time_server2local(u_long seconds, int tzoff, struct timespec *tsp);
 void  smb_time_NT2local(int64_t nsec, int tzoff, struct timespec *tsp);
 void  smb_time_local2NT(struct timespec *tsp, int tzoff, int64_t *nsec);
 void  smb_time_unix2dos(struct timespec *tsp, int tzoff, u_int16_t *ddp, 
 	     u_int16_t *dtp, u_int8_t *dhp);
 void smb_dos2unixtime (u_int dd, u_int dt, u_int dh, int tzoff, struct timespec *tsp);
 
 void *smbfs_malloc_scred(void);
 void smbfs_free_scred(void *);
 #endif /* !_FS_SMBFS_SMBFS_SUBR_H_ */
Index: projects/clang380-import/sys/fs/smbfs/smbfs_vnops.c
===================================================================
--- projects/clang380-import/sys/fs/smbfs/smbfs_vnops.c	(revision 293686)
+++ projects/clang380-import/sys/fs/smbfs/smbfs_vnops.c	(revision 293687)
@@ -1,1373 +1,1374 @@
 /*-
  * Copyright (c) 2000-2001 Boris Popov
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/fcntl.h>
 #include <sys/mount.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/limits.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 
 #include <netsmb/smb.h>
 #include <netsmb/smb_conn.h>
 #include <netsmb/smb_subr.h>
 
 #include <fs/smbfs/smbfs.h>
 #include <fs/smbfs/smbfs_node.h>
 #include <fs/smbfs/smbfs_subr.h>
 
 /*
  * Prototypes for SMBFS vnode operations
  */
 static vop_create_t	smbfs_create;
 static vop_mknod_t	smbfs_mknod;
 static vop_open_t	smbfs_open;
 static vop_close_t	smbfs_close;
 static vop_access_t	smbfs_access;
 static vop_getattr_t	smbfs_getattr;
 static vop_setattr_t	smbfs_setattr;
 static vop_read_t	smbfs_read;
 static vop_write_t	smbfs_write;
 static vop_fsync_t	smbfs_fsync;
 static vop_remove_t	smbfs_remove;
 static vop_link_t	smbfs_link;
 static vop_lookup_t	smbfs_lookup;
 static vop_rename_t	smbfs_rename;
 static vop_mkdir_t	smbfs_mkdir;
 static vop_rmdir_t	smbfs_rmdir;
 static vop_symlink_t	smbfs_symlink;
 static vop_readdir_t	smbfs_readdir;
 static vop_strategy_t	smbfs_strategy;
 static vop_print_t	smbfs_print;
 static vop_pathconf_t	smbfs_pathconf;
 static vop_advlock_t	smbfs_advlock;
 static vop_getextattr_t	smbfs_getextattr;
 
 struct vop_vector smbfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		smbfs_access,
 	.vop_advlock =		smbfs_advlock,
 	.vop_close =		smbfs_close,
 	.vop_create =		smbfs_create,
 	.vop_fsync =		smbfs_fsync,
 	.vop_getattr =		smbfs_getattr,
 	.vop_getextattr = 	smbfs_getextattr,
 	.vop_getpages =		smbfs_getpages,
 	.vop_inactive =		smbfs_inactive,
 	.vop_ioctl =		smbfs_ioctl,
 	.vop_link =		smbfs_link,
 	.vop_lookup =		smbfs_lookup,
 	.vop_mkdir =		smbfs_mkdir,
 	.vop_mknod =		smbfs_mknod,
 	.vop_open =		smbfs_open,
 	.vop_pathconf =		smbfs_pathconf,
 	.vop_print =		smbfs_print,
 	.vop_putpages =		smbfs_putpages,
 	.vop_read =		smbfs_read,
 	.vop_readdir =		smbfs_readdir,
 	.vop_reclaim =		smbfs_reclaim,
 	.vop_remove =		smbfs_remove,
 	.vop_rename =		smbfs_rename,
 	.vop_rmdir =		smbfs_rmdir,
 	.vop_setattr =		smbfs_setattr,
 /*	.vop_setextattr =	smbfs_setextattr,*/
 	.vop_strategy =		smbfs_strategy,
 	.vop_symlink =		smbfs_symlink,
 	.vop_write =		smbfs_write,
 };
 
 static int
 smbfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		accmode_t a_accmode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	accmode_t accmode = ap->a_accmode;
 	mode_t mpmode;
 	struct smbmount *smp = VTOSMBFS(vp);
 
 	SMBVDEBUG("\n");
 	if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		    case VREG: case VDIR: case VLNK:
 			return EROFS;
 		    default:
 			break;
 		}
 	}
 	mpmode = vp->v_type == VREG ? smp->sm_file_mode : smp->sm_dir_mode;
 	return (vaccess(vp->v_type, mpmode, smp->sm_uid,
 	    smp->sm_gid, ap->a_accmode, ap->a_cred, NULL));
 }
 
 /* ARGSUSED */
 static int
 smbfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct smbnode *np = VTOSMB(vp);
 	struct smb_cred *scred;
 	struct vattr vattr;
 	int mode = ap->a_mode;
 	int error, accmode;
 
 	SMBVDEBUG("%s,%d\n", np->n_name, (np->n_flag & NOPEN) != 0);
 	if (vp->v_type != VREG && vp->v_type != VDIR) { 
 		SMBFSERR("open eacces vtype=%d\n", vp->v_type);
 		return EACCES;
 	}
 	if (vp->v_type == VDIR) {
 		np->n_flag |= NOPEN;
 		return 0;
 	}
 	if (np->n_flag & NMODIFIED) {
 		if ((error = smbfs_vinvalbuf(vp, ap->a_td)) == EINTR)
 			return error;
 		smbfs_attr_cacheremove(vp);
 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 		if (error)
 			return error;
 		np->n_mtime.tv_sec = vattr.va_mtime.tv_sec;
 	} else {
 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 		if (error)
 			return error;
 		if (np->n_mtime.tv_sec != vattr.va_mtime.tv_sec) {
 			error = smbfs_vinvalbuf(vp, ap->a_td);
 			if (error == EINTR)
 				return error;
 			np->n_mtime.tv_sec = vattr.va_mtime.tv_sec;
 		}
 	}
 	if ((np->n_flag & NOPEN) != 0)
 		return 0;
 	/*
 	 * Use DENYNONE to give unixy semantics of permitting
 	 * everything not forbidden by permissions.  Ie denial
 	 * is up to server with clients/openers needing to use
 	 * advisory locks for further control.
 	 */
 	accmode = SMB_SM_DENYNONE|SMB_AM_OPENREAD;
 	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
 		accmode = SMB_SM_DENYNONE|SMB_AM_OPENRW;
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, ap->a_td, ap->a_cred);
 	error = smbfs_smb_open(np, accmode, scred);
 	if (error) {
 		if (mode & FWRITE)
 			return EACCES;
 		else if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			accmode = SMB_SM_DENYNONE|SMB_AM_OPENREAD;
 			error = smbfs_smb_open(np, accmode, scred);
 		}
 	}
 	if (error == 0) {
 		np->n_flag |= NOPEN;
 		vnode_create_vobject(ap->a_vp, vattr.va_size, ap->a_td);
 	}
 	smbfs_attr_cacheremove(vp);
 	smbfs_free_scred(scred);
 	return error;
 }
 
 static int
 smbfs_close(ap)
 	struct vop_close_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	struct smbnode *np = VTOSMB(vp);
 	struct smb_cred *scred;
 
 	if (vp->v_type == VDIR && (np->n_flag & NOPEN) != 0 &&
 	    np->n_dirseq != NULL) {
 		scred = smbfs_malloc_scred();
 		smb_makescred(scred, td, ap->a_cred);
 		smbfs_findclose(np->n_dirseq, scred);
 		smbfs_free_scred(scred);
 		np->n_dirseq = NULL;
 	}
 	return 0;
 }
 
 /*
  * smbfs_getattr call from vfs.
  */
 static int
 smbfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct smbnode *np = VTOSMB(vp);
 	struct vattr *va=ap->a_vap;
 	struct smbfattr fattr;
 	struct smb_cred *scred;
 	u_quad_t oldsize;
 	int error;
 
 	SMBVDEBUG("%lx: '%s' %d\n", (long)vp, np->n_name, (vp->v_vflag & VV_ROOT) != 0);
 	error = smbfs_attr_cachelookup(vp, va);
 	if (!error)
 		return 0;
 	SMBVDEBUG("not in the cache\n");
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, curthread, ap->a_cred);
 	oldsize = np->n_size;
 	error = smbfs_smb_lookup(np, NULL, 0, &fattr, scred);
 	if (error) {
 		SMBVDEBUG("error %d\n", error);
 		smbfs_free_scred(scred);
 		return error;
 	}
 	smbfs_attr_cacheenter(vp, &fattr);
 	smbfs_attr_cachelookup(vp, va);
 	if (np->n_flag & NOPEN)
 		np->n_size = oldsize;
 		smbfs_free_scred(scred);
 	return 0;
 }
 
 static int
 smbfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct smbnode *np = VTOSMB(vp);
 	struct vattr *vap = ap->a_vap;
 	struct timespec *mtime, *atime;
 	struct smb_cred *scred;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct smb_vc *vcp = SSTOVC(ssp);
 	struct thread *td = curthread;
 	u_quad_t tsize = 0;
 	int isreadonly, doclose, error = 0;
 	int old_n_dosattr;
 
 	SMBVDEBUG("\n");
 	isreadonly = (vp->v_mount->mnt_flag & MNT_RDONLY);
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || 
 	     vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL ||
 	     vap->va_mode != (mode_t)VNOVAL || vap->va_flags != VNOVAL) &&
 	     isreadonly)
 		return EROFS;
 
 	/*
 	 * We only support setting four flags.  Don't allow setting others.
 	 *
 	 * We map UF_READONLY to SMB_FA_RDONLY, unlike the MacOS X version
 	 * of this code, which maps both UF_IMMUTABLE AND SF_IMMUTABLE to
 	 * SMB_FA_RDONLY.  The immutable flags have different semantics
 	 * than readonly, which is the reason for the difference.
 	 */
 	if (vap->va_flags != VNOVAL) {
 		if (vap->va_flags & ~(UF_HIDDEN|UF_SYSTEM|UF_ARCHIVE|
 				      UF_READONLY))
 			return EINVAL;
 	}
 
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, td, ap->a_cred);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		    case VDIR:
  			error = EISDIR;
 			goto out;
  		    case VREG:
 			break;
  		    default:
 			error = EINVAL;
 			goto out;
   		};
 		if (isreadonly) {
 			error = EROFS;
 			goto out;
 		}
 		doclose = 0;
 		vnode_pager_setsize(vp, (u_long)vap->va_size);
  		tsize = np->n_size;
  		np->n_size = vap->va_size;
 		if ((np->n_flag & NOPEN) == 0) {
 			error = smbfs_smb_open(np,
 					       SMB_SM_DENYNONE|SMB_AM_OPENRW,
 					       scred);
 			if (error == 0)
 				doclose = 1;
 		}
 		if (error == 0)
-			error = smbfs_smb_setfsize(np, vap->va_size, scred);
+			error = smbfs_smb_setfsize(np,
+			    (int64_t)vap->va_size, scred);
 		if (doclose)
 			smbfs_smb_close(ssp, np->n_fid, NULL, scred);
 		if (error) {
 			np->n_size = tsize;
 			vnode_pager_setsize(vp, (u_long)tsize);
 			goto out;
 		}
   	}
 	if ((vap->va_flags != VNOVAL) || (vap->va_mode != (mode_t)VNOVAL)) {
 		old_n_dosattr = np->n_dosattr;
 
 		if (vap->va_mode != (mode_t)VNOVAL) {
 			if (vap->va_mode & S_IWUSR)
 				np->n_dosattr &= ~SMB_FA_RDONLY;
 			else
 				np->n_dosattr |= SMB_FA_RDONLY;
 		}
 
 		if (vap->va_flags != VNOVAL) {
 			if (vap->va_flags & UF_HIDDEN)
 				np->n_dosattr |= SMB_FA_HIDDEN;
 			else
 				np->n_dosattr &= ~SMB_FA_HIDDEN;
 
 			if (vap->va_flags & UF_SYSTEM)
 				np->n_dosattr |= SMB_FA_SYSTEM;
 			else
 				np->n_dosattr &= ~SMB_FA_SYSTEM;
 
 			if (vap->va_flags & UF_ARCHIVE)
 				np->n_dosattr |= SMB_FA_ARCHIVE;
 			else
 				np->n_dosattr &= ~SMB_FA_ARCHIVE;
 
 			/*
 			 * We only support setting the immutable / readonly
 			 * bit for regular files.  According to comments in
 			 * the MacOS X version of this code, supporting the
 			 * readonly bit on directories doesn't do the same
 			 * thing in Windows as in Unix.
 			 */
 			if (vp->v_type == VREG) {
 				if (vap->va_flags & UF_READONLY)
 					np->n_dosattr |= SMB_FA_RDONLY;
 				else
 					np->n_dosattr &= ~SMB_FA_RDONLY;
 			}
 		}
 
 		if (np->n_dosattr != old_n_dosattr) {
 			error = smbfs_smb_setpattr(np, np->n_dosattr, NULL, scred);
 			if (error)
 				goto out;
 		}
 	}
 	mtime = atime = NULL;
 	if (vap->va_mtime.tv_sec != VNOVAL)
 		mtime = &vap->va_mtime;
 	if (vap->va_atime.tv_sec != VNOVAL)
 		atime = &vap->va_atime;
 	if (mtime != atime) {
 		if (vap->va_vaflags & VA_UTIMES_NULL) {
 			error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td);
 			if (error)
 				error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td);
 		} else
 			error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td);
 #if 0
 		if (mtime == NULL)
 			mtime = &np->n_mtime;
 		if (atime == NULL)
 			atime = &np->n_atime;
 #endif
 		/*
 		 * If file is opened, then we can use handle based calls.
 		 * If not, use path based ones.
 		 */
 		if ((np->n_flag & NOPEN) == 0) {
 			if (vcp->vc_flags & SMBV_WIN95) {
 				error = VOP_OPEN(vp, FWRITE, ap->a_cred, td,
 				    NULL);
 				if (!error) {
 /*					error = smbfs_smb_setfattrNT(np, 0,
 					    mtime, atime, scred);
 					VOP_GETATTR(vp, &vattr, ap->a_cred); */
 					if (mtime)
 						np->n_mtime = *mtime;
 					VOP_CLOSE(vp, FWRITE, ap->a_cred, td);
 				}
 			} else if ((vcp->vc_sopt.sv_caps & SMB_CAP_NT_SMBS)) {
 				error = smbfs_smb_setptime2(np, mtime, atime, 0, scred);
 /*				error = smbfs_smb_setpattrNT(np, 0, mtime, atime, scred);*/
 			} else if (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN2_0) {
 				error = smbfs_smb_setptime2(np, mtime, atime, 0, scred);
 			} else {
 				error = smbfs_smb_setpattr(np, 0, mtime, scred);
 			}
 		} else {
 			if (vcp->vc_sopt.sv_caps & SMB_CAP_NT_SMBS) {
 				error = smbfs_smb_setfattrNT(np, 0, mtime, atime, scred);
 			} else if (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN1_0) {
 				error = smbfs_smb_setftime(np, mtime, atime, scred);
 			} else {
 				/*
 				 * I have no idea how to handle this for core
 				 * level servers. The possible solution is to
 				 * update mtime after file is closed.
 				 */
 				 SMBERROR("can't update times on an opened file\n");
 			}
 		}
 	}
 	/*
 	 * Invalidate attribute cache in case if server doesn't set
 	 * required attributes.
 	 */
 	smbfs_attr_cacheremove(vp);	/* invalidate cache */
 	VOP_GETATTR(vp, vap, ap->a_cred);
 	np->n_mtime.tv_sec = vap->va_mtime.tv_sec;
 out:
 	smbfs_free_scred(scred);
 	return error;
 }
 /*
  * smbfs_read call.
  */
 static int
 smbfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 
 	SMBVDEBUG("\n");
 	if (vp->v_type != VREG && vp->v_type != VDIR)
 		return EPERM;
 	return smbfs_readvnode(vp, uio, ap->a_cred);
 }
 
 static int
 smbfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 
 	SMBVDEBUG("%d,ofs=%jd,sz=%zd\n",vp->v_type, (intmax_t)uio->uio_offset, 
 	    uio->uio_resid);
 	if (vp->v_type != VREG)
 		return (EPERM);
 	return smbfs_writevnode(vp, uio, ap->a_cred,ap->a_ioflag);
 }
 /*
  * smbfs_create call
  * Create a regular file. On entry the directory to contain the file being
  * created is locked.  We must release before we return. We must also free
  * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or
  * only if the SAVESTART bit in cn_flags is clear on success.
  */
 static int
 smbfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct vnode **vpp=ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct smbnode *dnp = VTOSMB(dvp);
 	struct vnode *vp;
 	struct vattr vattr;
 	struct smbfattr fattr;
 	struct smb_cred *scred;
 	char *name = cnp->cn_nameptr;
 	int nmlen = cnp->cn_namelen;
 	int error;
 	
 
 	SMBVDEBUG("\n");
 	*vpp = NULL;
 	if (vap->va_type != VREG)
 		return EOPNOTSUPP;
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 		return error;
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, cnp->cn_thread, cnp->cn_cred);
 	
 	error = smbfs_smb_create(dnp, name, nmlen, scred);
 	if (error)
 		goto out;
 	error = smbfs_smb_lookup(dnp, name, nmlen, &fattr, scred);
 	if (error)
 		goto out;
 	error = smbfs_nget(VTOVFS(dvp), dvp, name, nmlen, &fattr, &vp);
 	if (error)
 		goto out;
 	*vpp = vp;
 	if (cnp->cn_flags & MAKEENTRY)
 		cache_enter(dvp, vp, cnp);
 out:
 	smbfs_free_scred(scred);
 	return error;
 }
 
 static int
 smbfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_dvp;
 		struct vnode * a_vp;
 		struct componentname * a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 /*	struct vnode *dvp = ap->a_dvp;*/
 	struct componentname *cnp = ap->a_cnp;
 	struct smbnode *np = VTOSMB(vp);
 	struct smb_cred *scred;
 	int error;
 
 	if (vp->v_type == VDIR || (np->n_flag & NOPEN) != 0 || vrefcnt(vp) != 1)
 		return EPERM;
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, cnp->cn_thread, cnp->cn_cred);
 	error = smbfs_smb_delete(np, scred);
 	if (error == 0)
 		np->n_flag |= NGONE;
 	cache_purge(vp);
 	smbfs_free_scred(scred);
 	return error;
 }
 
 /*
  * smbfs_file rename call
  */
 static int
 smbfs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *tcnp = ap->a_tcnp;
 /*	struct componentname *fcnp = ap->a_fcnp;*/
 	struct smb_cred *scred;
 	u_int16_t flags = 6;
 	int error=0;
 
 	scred = NULL;
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	if (tvp && vrefcnt(tvp) > 1) {
 		error = EBUSY;
 		goto out;
 	}
 	flags = 0x10;			/* verify all writes */
 	if (fvp->v_type == VDIR) {
 		flags |= 2;
 	} else if (fvp->v_type == VREG) {
 		flags |= 1;
 	} else {
 		return EINVAL;
 	}
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, tcnp->cn_thread, tcnp->cn_cred);
 	/*
 	 * It seems that Samba doesn't implement SMB_COM_MOVE call...
 	 */
 #ifdef notnow
 	if (SMB_DIALECT(SSTOCN(smp->sm_share)) >= SMB_DIALECT_LANMAN1_0) {
 		error = smbfs_smb_move(VTOSMB(fvp), VTOSMB(tdvp),
 		    tcnp->cn_nameptr, tcnp->cn_namelen, flags, scred);
 	} else
 #endif
 	{
 		/*
 		 * We have to do the work atomicaly
 		 */
 		if (tvp && tvp != fvp) {
 			error = smbfs_smb_delete(VTOSMB(tvp), scred);
 			if (error)
 				goto out_cacherem;
 			VTOSMB(fvp)->n_flag |= NGONE;
 		}
 		error = smbfs_smb_rename(VTOSMB(fvp), VTOSMB(tdvp),
 		    tcnp->cn_nameptr, tcnp->cn_namelen, scred);
 	}
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 
 out_cacherem:
 	smbfs_attr_cacheremove(fdvp);
 	smbfs_attr_cacheremove(tdvp);
 out:
 	smbfs_free_scred(scred);
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 #ifdef possible_mistake
 	vgone(fvp);
 	if (tvp)
 		vgone(tvp);
 #endif
 	return error;
 }
 
 /*
  * somtime it will come true...
  */
 static int
 smbfs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	return EOPNOTSUPP;
 }
 
 /*
  * smbfs_symlink link create call.
  * Sometime it will be functional...
  */
 static int
 smbfs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	return EOPNOTSUPP;
 }
 
 static int
 smbfs_mknod(ap) 
 	struct vop_mknod_args /* {
 	} */ *ap;
 {
 	return EOPNOTSUPP;
 }
 
 static int
 smbfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vnode *dvp = ap->a_dvp;
 /*	struct vattr *vap = ap->a_vap;*/
 	struct vnode *vp;
 	struct componentname *cnp = ap->a_cnp;
 	struct smbnode *dnp = VTOSMB(dvp);
 	struct vattr vattr;
 	struct smb_cred *scred;
 	struct smbfattr fattr;
 	char *name = cnp->cn_nameptr;
 	int len = cnp->cn_namelen;
 	int error;
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) {
 		return error;
 	}	
 	if ((name[0] == '.') && ((len == 1) || ((len == 2) && (name[1] == '.'))))
 		return EEXIST;
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, cnp->cn_thread, cnp->cn_cred);
 	error = smbfs_smb_mkdir(dnp, name, len, scred);
 	if (error)
 		goto out;
 	error = smbfs_smb_lookup(dnp, name, len, &fattr, scred);
 	if (error)
 		goto out;
 	error = smbfs_nget(VTOVFS(dvp), dvp, name, len, &fattr, &vp);
 	if (error)
 		goto out;
 	*ap->a_vpp = vp;
 out:
 	smbfs_free_scred(scred);
 	return error;
 }
 
 /*
  * smbfs_remove directory call
  */
 static int
 smbfs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 /*	struct smbmount *smp = VTOSMBFS(vp);*/
 	struct smbnode *dnp = VTOSMB(dvp);
 	struct smbnode *np = VTOSMB(vp);
 	struct smb_cred *scred;
 	int error;
 
 	if (dvp == vp)
 		return EINVAL;
 
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, cnp->cn_thread, cnp->cn_cred);
 	error = smbfs_smb_rmdir(np, scred);
 	if (error == 0)
 		np->n_flag |= NGONE;
 	dnp->n_flag |= NMODIFIED;
 	smbfs_attr_cacheremove(dvp);
 /*	cache_purge(dvp);*/
 	cache_purge(vp);
 	smbfs_free_scred(scred);
 	return error;
 }
 
 /*
  * smbfs_readdir call
  */
 static int
 smbfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		u_long *a_cookies;
 		int a_ncookies;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	int error;
 
 	if (vp->v_type != VDIR)
 		return (EPERM);
 #ifdef notnow
 	if (ap->a_ncookies) {
 		printf("smbfs_readdir: no support for cookies now...");
 		return (EOPNOTSUPP);
 	}
 #endif
 	error = smbfs_readvnode(vp, uio, ap->a_cred);
 	return error;
 }
 
 /* ARGSUSED */
 static int
 smbfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_vp;
 		struct ucred * a_cred;
 		int  a_waitfor;
 		struct thread * a_td;
 	} */ *ap;
 {
 /*	return (smb_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_td, 1));*/
     return (0);
 }
 
 static 
 int smbfs_print (ap) 
 	struct vop_print_args /* {
 	struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct smbnode *np = VTOSMB(vp);
 
 	if (np == NULL) {
 		printf("no smbnode data\n");
 		return (0);
 	}
 	printf("\tname = %s, parent = %p, open = %d\n", np->n_name,
 	    np->n_parent ? np->n_parent : NULL, (np->n_flag & NOPEN) != 0);
 	return (0);
 }
 
 static int
 smbfs_pathconf (ap)
 	struct vop_pathconf_args  /* {
 	struct vnode *vp;
 	int name;
 	register_t *retval;
 	} */ *ap;
 {
 	struct smbmount *smp = VFSTOSMBFS(VTOVFS(ap->a_vp));
 	struct smb_vc *vcp = SSTOVC(smp->sm_share);
 	register_t *retval = ap->a_retval;
 	int error = 0;
 	
 	switch (ap->a_name) {
 	    case _PC_LINK_MAX:
 		*retval = 0;
 		break;
 	    case _PC_NAME_MAX:
 		*retval = (vcp->vc_hflags2 & SMB_FLAGS2_KNOWS_LONG_NAMES) ? 255 : 12;
 		break;
 	    case _PC_PATH_MAX:
 		*retval = 800;	/* XXX: a correct one ? */
 		break;
 	    default:
 		error = EINVAL;
 	}
 	return error;
 }
 
 static int
 smbfs_strategy (ap) 
 	struct vop_strategy_args /* {
 	struct buf *a_bp
 	} */ *ap;
 {
 	struct buf *bp=ap->a_bp;
 	struct ucred *cr;
 	struct thread *td;
 
 	SMBVDEBUG("\n");
 	if (bp->b_flags & B_ASYNC)
 		td = (struct thread *)0;
 	else
 		td = curthread;	/* XXX */
 	if (bp->b_iocmd == BIO_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 
 	if ((bp->b_flags & B_ASYNC) == 0 )
 		(void)smbfs_doio(ap->a_vp, bp, cr, td);
 	return (0);
 }
 
 int
 smbfs_ioctl(ap)
 	struct vop_ioctl_args /* {
 		struct vnode *a_vp;
 		u_long a_command;
 		caddr_t a_data;
 		int fflag;
 		struct ucred *cred;
 		struct thread *td;
 	} */ *ap;
 {
 	return ENOTTY;
 }
 
 static char smbfs_atl[] = "rhsvda";
 static int
 smbfs_getextattr(struct vop_getextattr_args *ap)
 /* {
         IN struct vnode *a_vp;
         IN char *a_name;
         INOUT struct uio *a_uio;
         IN struct ucred *a_cred;
         IN struct thread *a_td;
 };
 */
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	struct uio *uio = ap->a_uio;
 	const char *name = ap->a_name;
 	struct smbnode *np = VTOSMB(vp);
 	struct vattr vattr;
 	char buf[10];
 	int i, attr, error;
 
 	error = VOP_ACCESS(vp, VREAD, cred, td);
 	if (error)
 		return error;
 	error = VOP_GETATTR(vp, &vattr, cred);
 	if (error)
 		return error;
 	if (strcmp(name, "dosattr") == 0) {
 		attr = np->n_dosattr;
 		for (i = 0; i < 6; i++, attr >>= 1)
 			buf[i] = (attr & 1) ? smbfs_atl[i] : '-';
 		buf[i] = 0;
 		error = uiomove(buf, i, uio);
 		
 	} else
 		error = EINVAL;
 	return error;
 }
 
 /*
  * Since we expected to support F_GETLK (and SMB protocol has no such function),
  * it is necessary to use lf_advlock(). It would be nice if this function had
  * a callback mechanism because it will help to improve a level of consistency.
  */
 int
 smbfs_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct smbnode *np = VTOSMB(vp);
 	struct flock *fl = ap->a_fl;
 	caddr_t id = (caddr_t)1 /* ap->a_id */;
 /*	int flags = ap->a_flags;*/
 	struct thread *td = curthread;
 	struct smb_cred *scred;
 	u_quad_t size;
 	off_t start, end, oadd;
 	int error, lkop;
 
 	if (vp->v_type == VDIR) {
 		/*
 		 * SMB protocol have no support for directory locking.
 		 * Although locks can be processed on local machine, I don't
 		 * think that this is a good idea, because some programs
 		 * can work wrong assuming directory is locked. So, we just
 		 * return 'operation not supported
 		 */
 		 return EOPNOTSUPP;
 	}
 	size = np->n_size;
 	switch (fl->l_whence) {
 
 	case SEEK_SET:
 	case SEEK_CUR:
 		start = fl->l_start;
 		break;
 
 	case SEEK_END:
 		if (size > OFF_MAX ||
 		    (fl->l_start > 0 && size > OFF_MAX - fl->l_start))
 			return EOVERFLOW;
 		start = size + fl->l_start;
 		break;
 
 	default:
 		return EINVAL;
 	}
 	if (start < 0)
 		return EINVAL;
 	if (fl->l_len < 0) {
 		if (start == 0)
 			return EINVAL;
 		end = start - 1;
 		start += fl->l_len;
 		if (start < 0)
 			return EINVAL;
 	} else if (fl->l_len == 0)
 		end = -1;
 	else {
 		oadd = fl->l_len - 1;
 		if (oadd > OFF_MAX - start)
 			return EOVERFLOW;
 		end = start + oadd;
 	}
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, td, td->td_ucred);
 	switch (ap->a_op) {
 	    case F_SETLK:
 		switch (fl->l_type) {
 		    case F_WRLCK:
 			lkop = SMB_LOCK_EXCL;
 			break;
 		    case F_RDLCK:
 			lkop = SMB_LOCK_SHARED;
 			break;
 		    case F_UNLCK:
 			lkop = SMB_LOCK_RELEASE;
 			break;
 		    default:
 			smbfs_free_scred(scred);
 			return EINVAL;
 		}
 		error = lf_advlock(ap, &vp->v_lockf, size);
 		if (error)
 			break;
 		lkop = SMB_LOCK_EXCL;
 		error = smbfs_smb_lock(np, lkop, id, start, end, scred);
 		if (error) {
 			int oldtype = fl->l_type;
 			fl->l_type = F_UNLCK;
 			ap->a_op = F_UNLCK;
 			lf_advlock(ap, &vp->v_lockf, size);
 			fl->l_type = oldtype;
 		}
 		break;
 	    case F_UNLCK:
 		lf_advlock(ap, &vp->v_lockf, size);
 		error = smbfs_smb_lock(np, SMB_LOCK_RELEASE, id, start, end, scred);
 		break;
 	    case F_GETLK:
 		error = lf_advlock(ap, &vp->v_lockf, size);
 		break;
 	    default:
 		smbfs_free_scred(scred);
 		return EINVAL;
 	}
 	smbfs_free_scred(scred);
 	return error;
 }
 
 static int
 smbfs_pathcheck(struct smbmount *smp, const char *name, int nmlen, int nameiop)
 {
 	static const char *badchars = "*/:<>;?";
 	static const char *badchars83 = " +|,[]=";
 	const char *cp;
 	int i, error;
 
 	/*
 	 * Backslash characters, being a path delimiter, are prohibited
 	 * within a path component even for LOOKUP operations.
 	 */
 	if (strchr(name, '\\') != NULL)
 		return ENOENT;
 
 	if (nameiop == LOOKUP)
 		return 0;
 	error = ENOENT;
 	if (SMB_DIALECT(SSTOVC(smp->sm_share)) < SMB_DIALECT_LANMAN2_0) {
 		/*
 		 * Name should conform 8.3 format
 		 */
 		if (nmlen > 12)
 			return ENAMETOOLONG;
 		cp = strchr(name, '.');
 		if (cp == NULL)
 			return error;
 		if (cp == name || (cp - name) > 8)
 			return error;
 		cp = strchr(cp + 1, '.');
 		if (cp != NULL)
 			return error;
 		for (cp = name, i = 0; i < nmlen; i++, cp++)
 			if (strchr(badchars83, *cp) != NULL)
 				return error;
 	}
 	for (cp = name, i = 0; i < nmlen; i++, cp++)
 		if (strchr(badchars, *cp) != NULL)
 			return error;
 	return 0;
 }
 
 /*
  * Things go even weird without fixed inode numbers...
  */
 int
 smbfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct thread *td = cnp->cn_thread;
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct vnode *vp;
 	struct smbmount *smp;
 	struct mount *mp = dvp->v_mount;
 	struct smbnode *dnp;
 	struct smbfattr fattr, *fap;
 	struct smb_cred *scred;
 	char *name = cnp->cn_nameptr;
 	int flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 	int nmlen = cnp->cn_namelen;
 	int error, islastcn, isdot;
 	int killit;
 	
 	SMBVDEBUG("\n");
 	if (dvp->v_type != VDIR)
 		return ENOTDIR;
 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) {
 		SMBFSERR("invalid '..'\n");
 		return EIO;
 	}
 	islastcn = flags & ISLASTCN;
 	if (islastcn && (mp->mnt_flag & MNT_RDONLY) && (nameiop != LOOKUP))
 		return EROFS;
 	if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0)
 		return error;
 	smp = VFSTOSMBFS(mp);
 	dnp = VTOSMB(dvp);
 	isdot = (nmlen == 1 && name[0] == '.');
 
 	error = smbfs_pathcheck(smp, cnp->cn_nameptr, cnp->cn_namelen, nameiop);
 
 	if (error) 
 		return ENOENT;
 
 	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
 	SMBVDEBUG("cache_lookup returned %d\n", error);
 	if (error > 0)
 		return error;
 	if (error) {		/* name was found */
 		struct vattr vattr;
 
 		killit = 0;
 		vp = *vpp;
 		error = VOP_GETATTR(vp, &vattr, cnp->cn_cred);
 		/*
 		 * If the file type on the server is inconsistent
 		 * with what it was when we created the vnode,
 		 * kill the bogus vnode now and fall through to
 		 * the code below to create a new one with the
 		 * right type.
 		 */
 		if (error == 0 &&
 		   ((vp->v_type == VDIR &&
 		   (VTOSMB(vp)->n_dosattr & SMB_FA_DIR) == 0) ||
 		   (vp->v_type == VREG &&
 		   (VTOSMB(vp)->n_dosattr & SMB_FA_DIR) != 0)))
 		   killit = 1;
 		else if (error == 0
 	     /*    && vattr.va_ctime.tv_sec == VTOSMB(vp)->n_ctime*/) {
 		     if (nameiop != LOOKUP && islastcn)
 			     cnp->cn_flags |= SAVENAME;
 		     SMBVDEBUG("use cached vnode\n");
 		     return (0);
 		}
 		cache_purge(vp);
 		/*
 		 * XXX This is not quite right, if '.' is
 		 * inconsistent, we really need to start the lookup
 		 * all over again.  Hopefully there is some other
 		 * guarantee that prevents this case from happening.
 		 */
 		if (killit && vp != dvp)
 			vgone(vp);
 		if (vp != dvp)
 			vput(vp);
 		else
 			vrele(vp);
 		*vpp = NULLVP;
 	}
 	/* 
 	 * entry is not in the cache or has been expired
 	 */
 	error = 0;
 	*vpp = NULLVP;
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, td, cnp->cn_cred);
 	fap = &fattr;
 	if (flags & ISDOTDOT) {
 		/*
 		 * In the DOTDOT case, don't go over-the-wire
 		 * in order to request attributes. We already
 		 * know it's a directory and subsequent call to
 		 * smbfs_getattr() will restore consistency.
 		 *
 		 */
 		SMBVDEBUG("smbfs_smb_lookup: dotdot\n");
 	} else if (isdot) {
 		error = smbfs_smb_lookup(dnp, NULL, 0, fap, scred);
 		SMBVDEBUG("result of smbfs_smb_lookup: %d\n", error);
 	}
 	else {
 		error = smbfs_smb_lookup(dnp, name, nmlen, fap, scred);
 		SMBVDEBUG("result of smbfs_smb_lookup: %d\n", error);
 	}
 	if (error && error != ENOENT)
 		goto out;
 	if (error) {			/* entry not found */
 		/*
 		 * Handle RENAME or CREATE case...
 		 */
 		if ((nameiop == CREATE || nameiop == RENAME) && islastcn) {
 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 			if (error)
 				goto out;
 			cnp->cn_flags |= SAVENAME;
 			error = EJUSTRETURN;
 			goto out;
 		}
 		error = ENOENT;
 		goto out;
 	}/* else {
 		SMBVDEBUG("Found entry %s with id=%d\n", fap->entryName, fap->dirEntNum);
 	}*/
 	/*
 	 * handle DELETE case ...
 	 */
 	if (nameiop == DELETE && islastcn) { 	/* delete last component */
 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 		if (error)
 			goto out;
 		if (isdot) {
 			VREF(dvp);
 			*vpp = dvp;
 			goto out;
 		}
 		error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp);
 		if (error)
 			goto out;
 		*vpp = vp;
 		cnp->cn_flags |= SAVENAME;
 		goto out;
 	}
 	if (nameiop == RENAME && islastcn) {
 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 		if (error)
 			goto out;
 		if (isdot) {
 			error = EISDIR;
 			goto out;
 		}
 		error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp);
 		if (error)
 			goto out;
 		*vpp = vp;
 		cnp->cn_flags |= SAVENAME;
 		goto out;
 	}
 	if (flags & ISDOTDOT) {
 		mp = dvp->v_mount;
 		error = vfs_busy(mp, MBF_NOWAIT);
 		if (error != 0) {
 			vfs_ref(mp);
 			VOP_UNLOCK(dvp, 0);
 			error = vfs_busy(mp, 0);
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 			vfs_rel(mp);
 			if (error) {
 				error = ENOENT;
 				goto out;
 			}
 			if ((dvp->v_iflag & VI_DOOMED) != 0) {
 				vfs_unbusy(mp);
 				error = ENOENT;
 				goto out;
 			}
 		}	
 		VOP_UNLOCK(dvp, 0);
 		error = smbfs_nget(mp, dvp, name, nmlen, NULL, &vp);
 		vfs_unbusy(mp);
 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		if ((dvp->v_iflag & VI_DOOMED) != 0) {
 			if (error == 0)
 				vput(vp);
 			error = ENOENT;
 		}
 		if (error)
 			goto out;
 		*vpp = vp;
 	} else if (isdot) {
 		vref(dvp);
 		*vpp = dvp;
 	} else {
 		error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp);
 		if (error)
 			goto out;
 		*vpp = vp;
 		SMBVDEBUG("lookup: getnewvp!\n");
 	}
 	if ((cnp->cn_flags & MAKEENTRY)/* && !islastcn*/) {
 /*		VTOSMB(*vpp)->n_ctime = VTOSMB(*vpp)->n_vattr.va_ctime.tv_sec;*/
 		cache_enter(dvp, *vpp, cnp);
 	}
 out:
 	smbfs_free_scred(scred);
 	return (error);
 }
Index: projects/clang380-import/sys/i386/i386/elf_machdep.c
===================================================================
--- projects/clang380-import/sys/i386/i386/elf_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/i386/i386/elf_machdep.c	(revision 293687)
@@ -1,288 +1,289 @@
 /*-
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/linker.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 
 #include <machine/elf.h>
 #include <machine/md_var.h>
 #include <machine/npx.h>
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP |
 			    SV_TIMEKEEP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
 static Elf32_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&freebsd_brand_info);
 
 static Elf32_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&freebsd_brand_oinfo);
 
 static Elf32_Brandinfo kfreebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_386,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/lib/ld.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_kfreebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
 };
 
 SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&kfreebsd_brand_info);
 
 
 void
 elf32_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 #ifdef CPU_ENABLE_SSE
 	void *buf;
 #endif
 	size_t len;
 
 	len = 0;
 #ifdef CPU_ENABLE_SSE
 	if (use_xsave) {
 		if (dst != NULL) {
 			npxgetregs(td);
 			len += elf32_populate_note(NT_X86_XSTATE,
 			    get_pcb_user_save_td(td), dst,
 			    cpu_max_ext_state_size, &buf);
 			*(uint64_t *)((char *)buf + X86_XSTATE_XCR0_OFFSET) =
 			    xsave_mask;
 		} else
 			len += elf32_populate_note(NT_X86_XSTATE, NULL, NULL,
 			    cpu_max_ext_state_size, NULL);
 	}
 #endif
 	*off = len;
 }
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf_Addr *where;
 	Elf_Addr addr;
 	Elf_Addr addend;
 	Elf_Word rtype, symidx;
 	const Elf_Rel *rel;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		rel = (const Elf_Rel *)data;
 		where = (Elf_Addr *) (relocbase + rel->r_offset);
 		addend = *where;
 		rtype = ELF_R_TYPE(rel->r_info);
 		symidx = ELF_R_SYM(rel->r_info);
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) (relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("unknown reloc type %d\n", type);
 	}
 
 	if (local) {
 		if (rtype == R_386_RELATIVE) {	/* A + B */
 			addr = elf_relocaddr(lf, relocbase + addend);
 			if (*where != addr)
 				*where = addr;
 		}
 		return (0);
 	}
 
 	switch (rtype) {
 
 		case R_386_NONE:	/* none */
 			break;
 
 		case R_386_32:		/* S + A */
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return -1;
 			addr += addend;
 			if (*where != addr)
 				*where = addr;
 			break;
 
 		case R_386_PC32:	/* S + A - P */
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return -1;
 			addr += addend - (Elf_Addr)where;
 			if (*where != addr)
 				*where = addr;
 			break;
 
 		case R_386_COPY:	/* none */
 			/*
 			 * There shouldn't be copy relocations in kernel
 			 * objects.
 			 */
 			printf("kldload: unexpected R_COPY relocation\n");
 			return -1;
 			break;
 
 		case R_386_GLOB_DAT:	/* S */
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return -1;
 			if (*where != addr)
 				*where = addr;
 			break;
 
 		case R_386_RELATIVE:
 			break;
 
 		default:
 			printf("kldload: unexpected relocation type %d\n",
 			       rtype);
 			return -1;
 	}
 	return(0);
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
Index: projects/clang380-import/sys/i386/ibcs2/ibcs2_sysvec.c
===================================================================
--- projects/clang380-import/sys/i386/ibcs2/ibcs2_sysvec.c	(revision 293686)
+++ projects/clang380-import/sys/i386/ibcs2/ibcs2_sysvec.c	(revision 293687)
@@ -1,137 +1,138 @@
 /*-
  * Copyright (c) 1995 Steven Wallace
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Steven Wallace.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/sysent.h>
 #include <sys/signalvar.h>
 #include <sys/proc.h>
 #include <sys/sx.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 
 #include <i386/ibcs2/ibcs2_syscall.h>
 #include <i386/ibcs2/ibcs2_signal.h>
 
 MODULE_VERSION(ibcs2, 1);
 
 extern int bsd_to_ibcs2_errno[];
 extern struct sysent ibcs2_sysent[IBCS2_SYS_MAXSYSCALL];
 static int ibcs2_fixup(register_t **, struct image_params *);
 
 struct sysentvec ibcs2_svr3_sysvec = {
         .sv_size	= sizeof (ibcs2_sysent) / sizeof (ibcs2_sysent[0]),
         .sv_table	= ibcs2_sysent,
         .sv_mask	= 0xff,
         .sv_sigsize	= IBCS2_SIGTBLSZ,
         .sv_sigtbl	= bsd_to_ibcs2_sig,
         .sv_errsize	= ELAST + 1,
         .sv_errtbl	= bsd_to_ibcs2_errno,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= ibcs2_fixup,
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,	/* use generic trampoline */
 	.sv_szsigcode	= &szsigcode,
 	.sv_prepsyscall	= NULL,
 	.sv_name	= "IBCS2 COFF",
 	.sv_coredump	= NULL,	/* we don't have a COFF coredump function */
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= IBCS2_MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_UNDEF | SV_IA32 | SV_ILP32,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 
 static int
 ibcs2_fixup(register_t **stack_base, struct image_params *imgp)
 {
 
 	return (suword(--(*stack_base), imgp->args->argc));
 }
 
 /*
  * Create an "ibcs2" module that does nothing but allow checking for
  * the presence of the subsystem.
  */
 static int
 ibcs2_modevent(module_t mod, int type, void *unused)
 {
 	struct proc *p = NULL;
 	int rval = 0;
 
 	switch(type) {
 	case MOD_LOAD:
 		break;
 	case MOD_UNLOAD:
 		/* if this was an ELF module we'd use elf_brand_inuse()... */
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			if (p->p_sysent == &ibcs2_svr3_sysvec) {
 				rval = EBUSY;
 				break;
 			}
 		}
 		sx_sunlock(&allproc_lock);
 		break;
 	default:
 	        rval = EOPNOTSUPP;
 		break;
 	}
 	return (rval);
 }
 static moduledata_t ibcs2_mod = {
 	"ibcs2",
 	ibcs2_modevent,
 	0
 };
 DECLARE_MODULE_TIED(ibcs2, ibcs2_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
Index: projects/clang380-import/sys/i386/linux/linux_sysvec.c
===================================================================
--- projects/clang380-import/sys/i386/linux/linux_sysvec.c	(revision 293686)
+++ projects/clang380-import/sys/i386/linux/linux_sysvec.c	(revision 293687)
@@ -1,1195 +1,1197 @@
 /*-
  * Copyright (c) 1994-1996 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_aout.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
 #include <i386/linux/linux.h>
 #include <i386/linux/linux_proto.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_futex.h>
 #include <compat/linux/linux_ioctl.h>
 #include <compat/linux/linux_mib.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_vdso.h>
 
 MODULE_VERSION(linux, 1);
 
 #if BYTE_ORDER == LITTLE_ENDIAN
 #define SHELLMAGIC      0x2123 /* #! */
 #else
 #define SHELLMAGIC      0x2321
 #endif
 
 #if defined(DEBUG)
 SYSCTL_PROC(_compat_linux, OID_AUTO, debug,
             CTLTYPE_STRING | CTLFLAG_RW,
             0, 0, linux_sysctl_debug, "A",
             "Linux debugging control");
 #endif
 
 /*
  * Allow the sendsig functions to use the ldebug() facility
  * even though they are not syscalls themselves. Map them
  * to syscall 0. This is slightly less bogus than using
  * ldebug(sigreturn).
  */
 #define	LINUX_SYS_linux_rt_sendsig	0
 #define	LINUX_SYS_linux_sendsig		0
 
 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
 
 static int linux_szsigcode;
 static vm_object_t linux_shared_page_obj;
 static char *linux_shared_page_mapping;
 extern char _binary_linux_locore_o_start;
 extern char _binary_linux_locore_o_end;
 
 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
 
 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
 
 static int	linux_fixup(register_t **stack_base,
 		    struct image_params *iparams);
 static int	elf_linux_fixup(register_t **stack_base,
 		    struct image_params *iparams);
 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
 static void	exec_linux_setregs(struct thread *td,
 		    struct image_params *imgp, u_long stack);
 static register_t *linux_copyout_strings(struct image_params *imgp);
 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static void	linux_vdso_install(void *param);
 static void	linux_vdso_deinstall(void *param);
 
 static int linux_szplatform;
 const char *linux_kplatform;
 
 static eventhandler_tag linux_exit_tag;
 static eventhandler_tag linux_exec_tag;
 static eventhandler_tag linux_thread_dtor_tag;
 
 /*
  * Linux syscalls return negative errno's, we do positive and map them
  * Reference:
  *   FreeBSD: src/sys/sys/errno.h
  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
  *            linux-2.6.17.8/include/asm-generic/errno.h
  */
 static int bsd_to_linux_errno[ELAST + 1] = {
 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
 	 -72, -67, -71
 };
 
 #define LINUX_T_UNKNOWN  255
 static int _bsd_to_linux_trapcode[] = {
 	LINUX_T_UNKNOWN,	/* 0 */
 	6,			/* 1  T_PRIVINFLT */
 	LINUX_T_UNKNOWN,	/* 2 */
 	3,			/* 3  T_BPTFLT */
 	LINUX_T_UNKNOWN,	/* 4 */
 	LINUX_T_UNKNOWN,	/* 5 */
 	16,			/* 6  T_ARITHTRAP */
 	254,			/* 7  T_ASTFLT */
 	LINUX_T_UNKNOWN,	/* 8 */
 	13,			/* 9  T_PROTFLT */
 	1,			/* 10 T_TRCTRAP */
 	LINUX_T_UNKNOWN,	/* 11 */
 	14,			/* 12 T_PAGEFLT */
 	LINUX_T_UNKNOWN,	/* 13 */
 	17,			/* 14 T_ALIGNFLT */
 	LINUX_T_UNKNOWN,	/* 15 */
 	LINUX_T_UNKNOWN,	/* 16 */
 	LINUX_T_UNKNOWN,	/* 17 */
 	0,			/* 18 T_DIVIDE */
 	2,			/* 19 T_NMI */
 	4,			/* 20 T_OFLOW */
 	5,			/* 21 T_BOUND */
 	7,			/* 22 T_DNA */
 	8,			/* 23 T_DOUBLEFLT */
 	9,			/* 24 T_FPOPFLT */
 	10,			/* 25 T_TSSFLT */
 	11,			/* 26 T_SEGNPFLT */
 	12,			/* 27 T_STKFLT */
 	18,			/* 28 T_MCHK */
 	19,			/* 29 T_XMMFLT */
 	15			/* 30 T_RESERVED */
 };
 #define bsd_to_linux_trapcode(code) \
     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
      _bsd_to_linux_trapcode[(code)]: \
      LINUX_T_UNKNOWN)
 
 LINUX_VDSO_SYM_INTPTR(linux_sigcode);
 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
 LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
 
 /*
  * If FreeBSD & Linux have a difference of opinion about what a trap
  * means, deal with it here.
  *
  * MPSAFE
  */
 static int
 translate_traps(int signal, int trap_code)
 {
 	if (signal != SIGBUS)
 		return (signal);
 	switch (trap_code) {
 	case T_PROTFLT:
 	case T_TSSFLT:
 	case T_DOUBLEFLT:
 	case T_PAGEFLT:
 		return (SIGSEGV);
 	default:
 		return (signal);
 	}
 }
 
 static int
 linux_fixup(register_t **stack_base, struct image_params *imgp)
 {
 	register_t *argv, *envp;
 
 	argv = *stack_base;
 	envp = *stack_base + (imgp->args->argc + 1);
 	(*stack_base)--;
 	suword(*stack_base, (intptr_t)(void *)envp);
 	(*stack_base)--;
 	suword(*stack_base, (intptr_t)(void *)argv);
 	(*stack_base)--;
 	suword(*stack_base, imgp->args->argc);
 	return (0);
 }
 
 static int
 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
 {
 	struct proc *p;
 	Elf32_Auxargs *args;
 	Elf32_Addr *uplatform;
 	struct ps_strings *arginfo;
 	register_t *pos;
 
 	KASSERT(curthread->td_proc == imgp->proc,
 	    ("unsafe elf_linux_fixup(), should be curproc"));
 
 	p = imgp->proc;
 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
 	args = (Elf32_Auxargs *)imgp->auxargs;
 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
 
 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
 	    imgp->proc->p_sysent->sv_shared_page_base);
 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
 
 	/*
 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
 	 * as it has appeared in the 2.4.0-rc7 first time.
 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
 	 * is not present.
 	 * Also see linux_times() implementation.
 	 */
 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
 	AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary);
 	if (imgp->execpathp != 0)
 		AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp);
 	if (args->execfd != -1)
 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
 	AUXARGS_ENTRY(pos, AT_NULL, 0);
 
 	free(imgp->auxargs, M_TEMP);
 	imgp->auxargs = NULL;
 
 	(*stack_base)--;
 	suword(*stack_base, (register_t)imgp->args->argc);
 	return (0);
 }
 
 /*
  * Copied from kern/kern_exec.c
  */
 static register_t *
 linux_copyout_strings(struct image_params *imgp)
 {
 	int argc, envc;
 	char **vectp;
 	char *stringp, *destp;
 	register_t *stack_base;
 	struct ps_strings *arginfo;
 	char canary[LINUX_AT_RANDOM_LEN];
 	size_t execpath_len;
 	struct proc *p;
 
 	/*
 	 * Calculate string base and vector table pointers.
 	 */
 	p = imgp->proc;
 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
 		execpath_len = strlen(imgp->execpath) + 1;
 	else
 		execpath_len = 0;
 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
 	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
 	    roundup(sizeof(canary), sizeof(char *)) -
 	    roundup(execpath_len, sizeof(char *)) -
 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
 
 	/*
 	 * install LINUX_PLATFORM
 	 */
 	copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform),
 	    linux_szplatform);
 
 	if (execpath_len != 0) {
 		imgp->execpathp = (uintptr_t)arginfo -
 		linux_szplatform - execpath_len;
 		copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	imgp->canary = (uintptr_t)arginfo - linux_szplatform -
 	    roundup(execpath_len, sizeof(char *)) -
 	    roundup(sizeof(canary), sizeof(char *));
 	copyout(canary, (void *)imgp->canary, sizeof(canary));
 
 	/*
 	 * If we have a valid auxargs ptr, prepare some room
 	 * on the stack.
 	 */
 	if (imgp->auxargs) {
 		/*
 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
 		 * lower compatibility.
 		 */
 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
 		    (LINUX_AT_COUNT * 2);
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets,and imgp->auxarg_size is room
 		 * for argument of Runtime loader.
 		 */
 		vectp = (char **)(destp - (imgp->args->argc +
 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
 	} else {
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets
 		 */
 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
 		    sizeof(char *));
 	}
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	stack_base = (register_t *)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
 	suword(&arginfo->ps_nargvstr, argc);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		suword(vectp++, (long)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	suword(vectp++, 0);
 
 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
 	suword(&arginfo->ps_nenvstr, envc);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		suword(vectp++, (long)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* end of vector table is a null pointer */
 	suword(vectp, 0);
 
 	return (stack_base);
 }
 
 static void
 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct l_rt_sigframe *fp, frame;
 	int sig, code;
 	int oonstack;
 
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;	
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 #ifdef DEBUG
 	if (ldebug(rt_sendsig))
 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
 		    catcher, sig, (void*)mask, code);
 #endif
 	/*
 	 * Allocate space for the signal handler context.
 	 */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
 	} else
 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
 	mtx_unlock(&psp->ps_mtx);
 
 	/*
 	 * Build the argument list for the signal handler.
 	 */
 	sig = bsd_to_linux_signal(sig);
 
 	bzero(&frame, sizeof(frame));
 
 	frame.sf_handler = catcher;
 	frame.sf_sig = sig;
 	frame.sf_siginfo = &fp->sf_si;
 	frame.sf_ucontext = &fp->sf_sc;
 
 	/* Fill in POSIX parts */
 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
 
 	/*
 	 * Build the signal context to be used by sigreturn.
 	 */
 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
 
 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
 	PROC_UNLOCK(p);
 
 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
 
 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__mask;
 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
 	frame.sf_sc.uc_mcontext.sc_esp    = regs->tf_esp;
 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
 
 #ifdef DEBUG
 	if (ldebug(rt_sendsig))
 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
 #endif
 
 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 #ifdef DEBUG
 		if (ldebug(rt_sendsig))
 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
 			    fp, oonstack);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.
 	 */
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = linux_rt_sigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * in u. to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 static void
 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct l_sigframe *fp, frame;
 	l_sigset_t lmask;
 	int sig, code;
 	int oonstack;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		linux_rt_sendsig(catcher, ksi, mask);
 		return;
 	}
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 #ifdef DEBUG
 	if (ldebug(sendsig))
 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
 		    catcher, sig, (void*)mask, code);
 #endif
 
 	/*
 	 * Allocate space for the signal handler context.
 	 */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
 	} else
 		fp = (struct l_sigframe *)regs->tf_esp - 1;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * Build the argument list for the signal handler.
 	 */
 	sig = bsd_to_linux_signal(sig);
 
 	bzero(&frame, sizeof(frame));
 
 	frame.sf_handler = catcher;
 	frame.sf_sig = sig;
 
 	bsd_to_linux_sigset(mask, &lmask);
 
 	/*
 	 * Build the signal context to be used by sigreturn.
 	 */
 	frame.sf_sc.sc_mask   = lmask.__mask;
 	frame.sf_sc.sc_gs     = rgs();
 	frame.sf_sc.sc_fs     = regs->tf_fs;
 	frame.sf_sc.sc_es     = regs->tf_es;
 	frame.sf_sc.sc_ds     = regs->tf_ds;
 	frame.sf_sc.sc_edi    = regs->tf_edi;
 	frame.sf_sc.sc_esi    = regs->tf_esi;
 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
 	frame.sf_sc.sc_esp    = regs->tf_esp;
 	frame.sf_sc.sc_edx    = regs->tf_edx;
 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
 	frame.sf_sc.sc_eax    = regs->tf_eax;
 	frame.sf_sc.sc_eip    = regs->tf_eip;
 	frame.sf_sc.sc_cs     = regs->tf_cs;
 	frame.sf_sc.sc_eflags = regs->tf_eflags;
 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
 	frame.sf_sc.sc_ss     = regs->tf_ss;
 	frame.sf_sc.sc_err    = regs->tf_err;
 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
 
 	frame.sf_extramask[0] = lmask.__mask;
 
 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	/*
 	 * Build context to run handler in.
 	 */
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = linux_sigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * psl to gain improper privileges or to cause
  * a machine fault.
  */
 int
 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
 {
 	struct l_sigframe frame;
 	struct trapframe *regs;
 	l_sigset_t lmask;
 	sigset_t bmask;
 	int eflags;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 
 #ifdef DEBUG
 	if (ldebug(sigreturn))
 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
 #endif
 	/*
 	 * The trampoline code hands us the sigframe.
 	 * It is unsafe to keep track of it ourselves, in the event that a
 	 * program jumps out of a signal handler.
 	 */
 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
 		return (EFAULT);
 
 	/*
 	 * Check for security violations.
 	 */
 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 	eflags = frame.sf_sc.sc_eflags;
 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
 		return (EINVAL);
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_eip;
 		trapsignal(td, &ksi);
 		return (EINVAL);
 	}
 
 	lmask.__mask = frame.sf_sc.sc_mask;
 	linux_to_bsd_sigset(&lmask, &bmask);
 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
 
 	/*
 	 * Restore signal context.
 	 */
 	/* %gs was restored by the trampoline. */
 	regs->tf_fs     = frame.sf_sc.sc_fs;
 	regs->tf_es     = frame.sf_sc.sc_es;
 	regs->tf_ds     = frame.sf_sc.sc_ds;
 	regs->tf_edi    = frame.sf_sc.sc_edi;
 	regs->tf_esi    = frame.sf_sc.sc_esi;
 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
 	regs->tf_edx    = frame.sf_sc.sc_edx;
 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
 	regs->tf_eax    = frame.sf_sc.sc_eax;
 	regs->tf_eip    = frame.sf_sc.sc_eip;
 	regs->tf_cs     = frame.sf_sc.sc_cs;
 	regs->tf_eflags = eflags;
 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
 	regs->tf_ss     = frame.sf_sc.sc_ss;
 
 	return (EJUSTRETURN);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by rt_sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * psl to gain improper privileges or to cause
  * a machine fault.
  */
 int
 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
 {
 	struct l_ucontext uc;
 	struct l_sigcontext *context;
 	sigset_t bmask;
 	l_stack_t *lss;
 	stack_t ss;
 	struct trapframe *regs;
 	int eflags;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 
 #ifdef DEBUG
 	if (ldebug(rt_sigreturn))
 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
 #endif
 	/*
 	 * The trampoline code hands us the ucontext.
 	 * It is unsafe to keep track of it ourselves, in the event that a
 	 * program jumps out of a signal handler.
 	 */
 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
 		return (EFAULT);
 
 	context = &uc.uc_mcontext;
 
 	/*
 	 * Check for security violations.
 	 */
 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 	eflags = context->sc_eflags;
 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
 		return (EINVAL);
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
 	if (!CS_SECURE(context->sc_cs)) {
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_eip;
 		trapsignal(td, &ksi);
 		return (EINVAL);
 	}
 
 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
 
 	/*
 	 * Restore signal context
 	 */
 	/* %gs was restored by the trampoline. */
 	regs->tf_fs     = context->sc_fs;
 	regs->tf_es     = context->sc_es;
 	regs->tf_ds     = context->sc_ds;
 	regs->tf_edi    = context->sc_edi;
 	regs->tf_esi    = context->sc_esi;
 	regs->tf_ebp    = context->sc_ebp;
 	regs->tf_ebx    = context->sc_ebx;
 	regs->tf_edx    = context->sc_edx;
 	regs->tf_ecx    = context->sc_ecx;
 	regs->tf_eax    = context->sc_eax;
 	regs->tf_eip    = context->sc_eip;
 	regs->tf_cs     = context->sc_cs;
 	regs->tf_eflags = eflags;
 	regs->tf_esp    = context->sc_esp_at_signal;
 	regs->tf_ss     = context->sc_ss;
 
 	/*
 	 * call sigaltstack & ignore results..
 	 */
 	lss = &uc.uc_stack;
 	ss.ss_sp = lss->ss_sp;
 	ss.ss_size = lss->ss_size;
 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
 
 #ifdef DEBUG
 	if (ldebug(rt_sigreturn))
 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
 #endif
 	(void)kern_sigaltstack(td, &ss, NULL);
 
 	return (EJUSTRETURN);
 }
 
 static int
 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 {
 	struct proc *p;
 	struct trapframe *frame;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 
 	sa->code = frame->tf_eax;
 	sa->args[0] = frame->tf_ebx;
 	sa->args[1] = frame->tf_ecx;
 	sa->args[2] = frame->tf_edx;
 	sa->args[3] = frame->tf_esi;
 	sa->args[4] = frame->tf_edi;
 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
 
 	if (sa->code >= p->p_sysent->sv_size)
 		/* nosys */
 		sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1];
  	else
  		sa->callp = &p->p_sysent->sv_table[sa->code];
 	sa->narg = sa->callp->sy_narg;
 
 	td->td_retval[0] = 0;
 	td->td_retval[1] = frame->tf_edx;
 
 	return (0);
 }
 
 /*
  * If a linux binary is exec'ing something, try this image activator
  * first.  We override standard shell script execution in order to
  * be able to modify the interpreter path.  We only do this if a linux
  * binary is doing the exec, so we do not create an EXEC module for it.
  */
 static int	exec_linux_imgact_try(struct image_params *iparams);
 
 static int
 exec_linux_imgact_try(struct image_params *imgp)
 {
     const char *head = (const char *)imgp->image_header;
     char *rpath;
     int error = -1;
 
     /*
      * The interpreter for shell scripts run from a linux binary needs
      * to be located in /compat/linux if possible in order to recursively
      * maintain linux path emulation.
      */
     if (((const short *)head)[0] == SHELLMAGIC) {
 	    /*
 	     * Run our normal shell image activator.  If it succeeds attempt
 	     * to use the alternate path for the interpreter.  If an alternate
 	     * path is found, use our stringspace to store it.
 	     */
 	    if ((error = exec_shell_imgact(imgp)) == 0) {
 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
 		    if (rpath != NULL)
 			    imgp->args->fname_buf =
 				imgp->interpreter_name = rpath;
 	    }
     }
     return (error);
 }
 
 /*
  * exec_setregs may initialize some registers differently than Linux
  * does, thus potentially confusing Linux binaries. If necessary, we
  * override the exec_setregs default(s) here.
  */
 static void
 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct pcb *pcb = td->td_pcb;
 
 	exec_setregs(td, imgp, stack);
 
 	/* Linux sets %gs to 0, we default to _udatasel */
 	pcb->pcb_gs = 0;
 	load_gs(0);
 
 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
 }
 
 static void
 linux_get_machine(const char **dst)
 {
 
 	switch (cpu_class) {
 	case CPUCLASS_686:
 		*dst = "i686";
 		break;
 	case CPUCLASS_586:
 		*dst = "i586";
 		break;
 	case CPUCLASS_486:
 		*dst = "i486";
 		break;
 	default:
 		*dst = "i386";
 	}
 }
 
 struct sysentvec linux_sysvec = {
 	.sv_size	= LINUX_SYS_MAXSYSCALL,
 	.sv_table	= linux_sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= ELAST + 1,
 	.sv_errtbl	= bsd_to_linux_errno,
 	.sv_transtrap	= translate_traps,
 	.sv_fixup	= linux_fixup,
 	.sv_sendsig	= linux_sendsig,
 	.sv_sigcode	= &_binary_linux_locore_o_start,
 	.sv_szsigcode	= &linux_szsigcode,
 	.sv_name	= "Linux a.out",
 	.sv_coredump	= NULL,
 	.sv_imgact_try	= exec_linux_imgact_try,
 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= LINUX_USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_linux_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_shared_page_base = LINUX_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= linux_schedtail,
 	.sv_thread_detach = linux_thread_detach,
+	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
 
 struct sysentvec elf_linux_sysvec = {
 	.sv_size	= LINUX_SYS_MAXSYSCALL,
 	.sv_table	= linux_sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= ELAST + 1,
 	.sv_errtbl	= bsd_to_linux_errno,
 	.sv_transtrap	= translate_traps,
 	.sv_fixup	= elf_linux_fixup,
 	.sv_sendsig	= linux_sendsig,
 	.sv_sigcode	= &_binary_linux_locore_o_start,
 	.sv_szsigcode	= &linux_szsigcode,
 	.sv_name	= "Linux ELF",
 	.sv_coredump	= elf32_coredump,
 	.sv_imgact_try	= exec_linux_imgact_try,
 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= LINUX_USRSTACK,
 	.sv_psstrings	= LINUX_PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = linux_copyout_strings,
 	.sv_setregs	= exec_linux_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_shared_page_base = LINUX_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= linux_schedtail,
 	.sv_thread_detach = linux_thread_detach,
+	.sv_trap	= NULL,
 };
 
 static void
 linux_vdso_install(void *param)
 {
 
 	linux_szsigcode = (&_binary_linux_locore_o_end - 
 	    &_binary_linux_locore_o_start);
 
 	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
 		panic("Linux invalid vdso size\n");
 
 	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
 
 	linux_shared_page_obj = __elfN(linux_shared_page_init)
 	    (&linux_shared_page_mapping);
 
 	__elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE);
 
 	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
 	    linux_szsigcode);
 	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
 }
 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t)linux_vdso_install, NULL);
 
 static void
 linux_vdso_deinstall(void *param)
 {
 
 	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
 };
 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
 
 static char GNU_ABI_VENDOR[] = "GNU";
 static int GNULINUX_ABI_DESC = 0;
 
 static boolean_t
 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
 {
 	const Elf32_Word *desc;
 	uintptr_t p;
 
 	p = (uintptr_t)(note + 1);
 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
 
 	desc = (const Elf32_Word *)p;
 	if (desc[0] != GNULINUX_ABI_DESC)
 		return (FALSE);
 
 	/*
 	 * For linux we encode osrel as follows (see linux_mib.c):
 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
 	 */
 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
 
 	return (TRUE);
 }
 
 static Elf_Brandnote linux_brandnote = {
 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
 	.hdr.n_type	= 1,
 	.vendor		= GNU_ABI_VENDOR,
 	.flags		= BN_TRANSLATE_OSREL,
 	.trans_osrel	= linux_trans_osrel
 };
 
 static Elf32_Brandinfo linux_brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
 	.emul_path	= "/compat/linux",
 	.interp_path	= "/lib/ld-linux.so.1",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 static Elf32_Brandinfo linux_glibc2brand = {
 	.brand		= ELFOSABI_LINUX,
 	.machine	= EM_386,
 	.compat_3_brand	= "Linux",
 	.emul_path	= "/compat/linux",
 	.interp_path	= "/lib/ld-linux.so.2",
 	.sysvec		= &elf_linux_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &linux_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 Elf32_Brandinfo *linux_brandlist[] = {
 	&linux_brand,
 	&linux_glibc2brand,
 	NULL
 };
 
 static int
 linux_elf_modevent(module_t mod, int type, void *data)
 {
 	Elf32_Brandinfo **brandinfo;
 	int error;
 	struct linux_ioctl_handler **lihp;
 
 	error = 0;
 
 	switch(type) {
 	case MOD_LOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf32_insert_brand_entry(*brandinfo) < 0)
 				error = EINVAL;
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_register_handler(*lihp);
 			LIST_INIT(&futex_list);
 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
 			      NULL, 1000);
 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
 			      NULL, 1000);
 			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
 			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
 			linux_get_machine(&linux_kplatform);
 			linux_szplatform = roundup(strlen(linux_kplatform) + 1,
 			    sizeof(char *));
 			linux_osd_jail_register();
 			stclohz = (stathz ? stathz : hz);
 			if (bootverbose)
 				printf("Linux ELF exec handler installed\n");
 		} else
 			printf("cannot insert Linux ELF brand handler\n");
 		break;
 	case MOD_UNLOAD:
 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
 		     ++brandinfo)
 			if (elf32_brand_inuse(*brandinfo))
 				error = EBUSY;
 		if (error == 0) {
 			for (brandinfo = &linux_brandlist[0];
 			     *brandinfo != NULL; ++brandinfo)
 				if (elf32_remove_brand_entry(*brandinfo) < 0)
 					error = EINVAL;
 		}
 		if (error == 0) {
 			SET_FOREACH(lihp, linux_ioctl_handler_set)
 				linux_ioctl_unregister_handler(*lihp);
 			mtx_destroy(&futex_mtx);
 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
 			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
 			linux_osd_jail_deregister();
 			if (bootverbose)
 				printf("Linux ELF exec handler removed\n");
 		} else
 			printf("Could not deinstall ELF interpreter entry\n");
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (error);
 }
 
 static moduledata_t linux_elf_mod = {
 	"linuxelf",
 	linux_elf_modevent,
 	0
 };
 
 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
Index: projects/clang380-import/sys/kern/imgact_aout.c
===================================================================
--- projects/clang380-import/sys/kern/imgact_aout.c	(revision 293686)
+++ projects/clang380-import/sys/kern/imgact_aout.c	(revision 293687)
@@ -1,338 +1,339 @@
 /*-
  * Copyright (c) 1993, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/imgact_aout.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_param.h>
 
 #ifdef __amd64__
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_syscall.h>
 #include <compat/ia32/ia32_signal.h>
 #endif
 
 static int	exec_aout_imgact(struct image_params *imgp);
 static int	aout_fixup(register_t **stack_base, struct image_params *imgp);
 
 #if defined(__i386__)
 struct sysentvec aout_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= aout_fixup,
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD a.out",
 	.sv_coredump	= NULL,
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 
 #elif defined(__amd64__)
 
 #define	AOUT32_USRSTACK	0xbfc00000
 #define	AOUT32_PS_STRINGS \
     (AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings))
 #define	AOUT32_MINUSER	FREEBSD32_MINUSER
 
 extern const char *freebsd32_syscallnames[];
 extern u_long ia32_maxssiz;
 
 struct sysentvec aout_sysvec = {
 	.sv_size	= FREEBSD32_SYS_MAXSYSCALL,
 	.sv_table	= freebsd32_sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= aout_fixup,
 	.sv_sendsig	= ia32_sendsig,
 	.sv_sigcode	= ia32_sigcode,
 	.sv_szsigcode	= &sz_ia32_sigcode,
 	.sv_name	= "FreeBSD a.out",
 	.sv_coredump	= NULL,
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= IA32_PAGE_SIZE,
 	.sv_minuser	= AOUT32_MINUSER,
 	.sv_maxuser	= AOUT32_USRSTACK,
 	.sv_usrstack	= AOUT32_USRSTACK,
 	.sv_psstrings	= AOUT32_PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= freebsd32_copyout_strings,
 	.sv_setregs	= ia32_setregs,
 	.sv_fixlimit	= ia32_fixlimit,
 	.sv_maxssiz	= &ia32_maxssiz,
 	.sv_flags	= SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32,
 	.sv_set_syscall_retval = ia32_set_syscall_retval,
 	.sv_fetch_syscall_args = ia32_fetch_syscall_args,
 	.sv_syscallnames = freebsd32_syscallnames,
 };
 #else
 #error "Port me"
 #endif
 
 static int
 aout_fixup(register_t **stack_base, struct image_params *imgp)
 {
 
 	*(char **)stack_base -= sizeof(uint32_t);
 	return (suword32(*stack_base, imgp->args->argc));
 }
 
 static int
 exec_aout_imgact(struct image_params *imgp)
 {
 	const struct exec *a_out = (const struct exec *) imgp->image_header;
 	struct vmspace *vmspace;
 	vm_map_t map;
 	vm_object_t object;
 	vm_offset_t text_end, data_end;
 	unsigned long virtual_offset;
 	unsigned long file_offset;
 	unsigned long bss_size;
 	int error;
 
 	/*
 	 * Linux and *BSD binaries look very much alike,
 	 * only the machine id is different:
 	 * 0x64 for Linux, 0x86 for *BSD, 0x00 for BSDI.
 	 * NetBSD is in network byte order.. ugh.
 	 */
 	if (((a_out->a_midmag >> 16) & 0xff) != 0x86 &&
 	    ((a_out->a_midmag >> 16) & 0xff) != 0 &&
 	    ((((int)ntohl(a_out->a_midmag)) >> 16) & 0xff) != 0x86)
                 return -1;
 
 	/*
 	 * Set file/virtual offset based on a.out variant.
 	 *	We do two cases: host byte order and network byte order
 	 *	(for NetBSD compatibility)
 	 */
 	switch ((int)(a_out->a_midmag & 0xffff)) {
 	case ZMAGIC:
 		virtual_offset = 0;
 		if (a_out->a_text) {
 			file_offset = PAGE_SIZE;
 		} else {
 			/* Bill's "screwball mode" */
 			file_offset = 0;
 		}
 		break;
 	case QMAGIC:
 		virtual_offset = PAGE_SIZE;
 		file_offset = 0;
 		/* Pass PS_STRINGS for BSD/OS binaries only. */
 		if (N_GETMID(*a_out) == MID_ZERO)
 			imgp->ps_strings = aout_sysvec.sv_psstrings;
 		break;
 	default:
 		/* NetBSD compatibility */
 		switch ((int)(ntohl(a_out->a_midmag) & 0xffff)) {
 		case ZMAGIC:
 		case QMAGIC:
 			virtual_offset = PAGE_SIZE;
 			file_offset = 0;
 			break;
 		default:
 			return (-1);
 		}
 	}
 
 	bss_size = roundup(a_out->a_bss, PAGE_SIZE);
 
 	/*
 	 * Check various fields in header for validity/bounds.
 	 */
 	if (/* entry point must lay with text region */
 	    a_out->a_entry < virtual_offset ||
 	    a_out->a_entry >= virtual_offset + a_out->a_text ||
 
 	    /* text and data size must each be page rounded */
 	    a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK
 
 #ifdef __amd64__
 	    ||
 	    /* overflows */
 	    virtual_offset + a_out->a_text + a_out->a_data + bss_size > UINT_MAX
 #endif
 	    )
 		return (-1);
 
 	/* text + data can't exceed file size */
 	if (a_out->a_data + a_out->a_text > imgp->attr->va_size)
 		return (EFAULT);
 
 	/*
 	 * text/data/bss must not exceed limits
 	 */
 	PROC_LOCK(imgp->proc);
 	if (/* text can't exceed maximum text size */
 	    a_out->a_text > maxtsiz ||
 
 	    /* data + bss can't exceed rlimit */
 	    a_out->a_data + bss_size > lim_cur_proc(imgp->proc, RLIMIT_DATA) ||
 	    racct_set(imgp->proc, RACCT_DATA, a_out->a_data + bss_size) != 0) {
 			PROC_UNLOCK(imgp->proc);
 			return (ENOMEM);
 	}
 	PROC_UNLOCK(imgp->proc);
 
 	/*
 	 * Avoid a possible deadlock if the current address space is destroyed
 	 * and that address space maps the locked vnode.  In the common case,
 	 * the locked vnode's v_usecount is decremented but remains greater
 	 * than zero.  Consequently, the vnode lock is not needed by vrele().
 	 * However, in cases where the vnode lock is external, such as nullfs,
 	 * v_usecount may become zero.
 	 */
 	VOP_UNLOCK(imgp->vp, 0);
 
 	/*
 	 * Destroy old process VM and create a new one (with a new stack)
 	 */
 	error = exec_new_vmspace(imgp, &aout_sysvec);
 
 	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
 	if (error)
 		return (error);
 
 	/*
 	 * The vm space can be changed by exec_new_vmspace
 	 */
 	vmspace = imgp->proc->p_vmspace;
 
 	object = imgp->object;
 	map = &vmspace->vm_map;
 	vm_map_lock(map);
 	vm_object_reference(object);
 
 	text_end = virtual_offset + a_out->a_text;
 	error = vm_map_insert(map, object,
 		file_offset,
 		virtual_offset, text_end,
 		VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL,
 		MAP_COPY_ON_WRITE | MAP_PREFAULT);
 	if (error) {
 		vm_map_unlock(map);
 		vm_object_deallocate(object);
 		return (error);
 	}
 	data_end = text_end + a_out->a_data;
 	if (a_out->a_data) {
 		vm_object_reference(object);
 		error = vm_map_insert(map, object,
 			file_offset + a_out->a_text,
 			text_end, data_end,
 			VM_PROT_ALL, VM_PROT_ALL,
 			MAP_COPY_ON_WRITE | MAP_PREFAULT);
 		if (error) {
 			vm_map_unlock(map);
 			vm_object_deallocate(object);
 			return (error);
 		}
 	}
 
 	if (bss_size) {
 		error = vm_map_insert(map, NULL, 0,
 			data_end, data_end + bss_size,
 			VM_PROT_ALL, VM_PROT_ALL, 0);
 		if (error) {
 			vm_map_unlock(map);
 			return (error);
 		}
 	}
 	vm_map_unlock(map);
 
 	/* Fill in process VM information */
 	vmspace->vm_tsize = a_out->a_text >> PAGE_SHIFT;
 	vmspace->vm_dsize = (a_out->a_data + bss_size) >> PAGE_SHIFT;
 	vmspace->vm_taddr = (caddr_t) (uintptr_t) virtual_offset;
 	vmspace->vm_daddr = (caddr_t) (uintptr_t)
 			    (virtual_offset + a_out->a_text);
 
 	/* Fill in image_params */
 	imgp->interpreted = 0;
 	imgp->entry_addr = a_out->a_entry;
 
 	imgp->proc->p_sysent = &aout_sysvec;
 
 	return (0);
 }
 
 /*
  * Tell kern_execve.c about it, with a little help from the linker.
  */
 static struct execsw aout_execsw = { exec_aout_imgact, "a.out" };
 EXEC_SET(aout, aout_execsw);
Index: projects/clang380-import/sys/kern/init_main.c
===================================================================
--- projects/clang380-import/sys/kern/init_main.c	(revision 293686)
+++ projects/clang380-import/sys/kern/init_main.c	(revision 293687)
@@ -1,879 +1,880 @@
 /*-
  * Copyright (c) 1995 Terrence R. Lambert
  * All rights reserved.
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_init_path.h"
 #include "opt_verbose_sysinit.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/exec.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/loginclass.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/sysent.h>
 #include <sys/reboot.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 #include <sys/unistd.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/cpuset.h>
 
 #include <machine/cpu.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_domain.h>
 #include <sys/copyright.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 
 void mi_startup(void);				/* Should be elsewhere */
 
 /* Components of the first process -- never freed. */
 static struct session session0;
 static struct pgrp pgrp0;
 struct	proc proc0;
 struct	thread thread0 __aligned(16);
 struct	vmspace vmspace0;
 struct	proc *initproc;
 
 #ifndef BOOTHOWTO
 #define	BOOTHOWTO	0
 #endif
 int	boothowto = BOOTHOWTO;	/* initialized so that it can be patched */
 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0,
 	"Boot control flags, passed from loader");
 
 #ifndef BOOTVERBOSE
 #define	BOOTVERBOSE	0
 #endif
 int	bootverbose = BOOTVERBOSE;
 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0,
 	"Control the output of verbose kernel messages");
 
 #ifdef INVARIANTS
 FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance");
 #endif
 
 /*
  * This ensures that there is at least one entry so that the sysinit_set
  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
  * executed.
  */
 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL);
 
 /*
  * The sysinit table itself.  Items are checked off as the are run.
  * If we want to register new sysinit types, add them to newsysinit.
  */
 SET_DECLARE(sysinit_set, struct sysinit);
 struct sysinit **sysinit, **sysinit_end;
 struct sysinit **newsysinit, **newsysinit_end;
 
 /*
  * Merge a new sysinit set into the current set, reallocating it if
  * necessary.  This can only be called after malloc is running.
  */
 void
 sysinit_add(struct sysinit **set, struct sysinit **set_end)
 {
 	struct sysinit **newset;
 	struct sysinit **sipp;
 	struct sysinit **xipp;
 	int count;
 
 	count = set_end - set;
 	if (newsysinit)
 		count += newsysinit_end - newsysinit;
 	else
 		count += sysinit_end - sysinit;
 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
 	if (newset == NULL)
 		panic("cannot malloc for sysinit");
 	xipp = newset;
 	if (newsysinit)
 		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
 			*xipp++ = *sipp;
 	else
 		for (sipp = sysinit; sipp < sysinit_end; sipp++)
 			*xipp++ = *sipp;
 	for (sipp = set; sipp < set_end; sipp++)
 		*xipp++ = *sipp;
 	if (newsysinit)
 		free(newsysinit, M_TEMP);
 	newsysinit = newset;
 	newsysinit_end = newset + count;
 }
 
 #if defined (DDB) && defined(VERBOSE_SYSINIT)
 static const char *
 symbol_name(vm_offset_t va, db_strategy_t strategy)
 {
 	const char *name;
 	c_db_sym_t sym;
 	db_expr_t  offset;
 
 	if (va == 0)
 		return (NULL);
 	sym = db_search_symbol(va, strategy, &offset);
 	if (offset != 0)
 		return (NULL);
 	db_symbol_values(sym, &name, NULL);
 	return (name);
 }
 #endif
 
 /*
  * System startup; initialize the world, create process 0, mount root
  * filesystem, and fork to create init and pagedaemon.  Most of the
  * hard work is done in the lower-level initialization routines including
  * startup(), which does memory initialization and autoconfiguration.
  *
  * This allows simple addition of new kernel subsystems that require
  * boot time initialization.  It also allows substitution of subsystem
  * (for instance, a scheduler, kernel profiler, or VM system) by object
  * module.  Finally, it allows for optional "kernel threads".
  */
 void
 mi_startup(void)
 {
 
 	register struct sysinit **sipp;		/* system initialization*/
 	register struct sysinit **xipp;		/* interior loop of sort*/
 	register struct sysinit *save;		/* bubble*/
 
 #if defined(VERBOSE_SYSINIT)
 	int last;
 	int verbose;
 #endif
 
 	if (boothowto & RB_VERBOSE)
 		bootverbose++;
 
 	if (sysinit == NULL) {
 		sysinit = SET_BEGIN(sysinit_set);
 		sysinit_end = SET_LIMIT(sysinit_set);
 	}
 
 restart:
 	/*
 	 * Perform a bubble sort of the system initialization objects by
 	 * their subsystem (primary key) and order (secondary key).
 	 */
 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
 		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
 			      (*sipp)->order <= (*xipp)->order))
 				continue;	/* skip*/
 			save = *sipp;
 			*sipp = *xipp;
 			*xipp = save;
 		}
 	}
 
 #if defined(VERBOSE_SYSINIT)
 	last = SI_SUB_COPYRIGHT;
 	verbose = 0;
 #if !defined(DDB)
 	printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n");
 #endif
 #endif
 
 	/*
 	 * Traverse the (now) ordered list of system initialization tasks.
 	 * Perform each task, and continue on to the next task.
 	 */
 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
 
 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
 			continue;	/* skip dummy task(s)*/
 
 		if ((*sipp)->subsystem == SI_SUB_DONE)
 			continue;
 
 #if defined(VERBOSE_SYSINIT)
 		if ((*sipp)->subsystem > last) {
 			verbose = 1;
 			last = (*sipp)->subsystem;
 			printf("subsystem %x\n", last);
 		}
 		if (verbose) {
 #if defined(DDB)
 			const char *func, *data;
 
 			func = symbol_name((vm_offset_t)(*sipp)->func,
 			    DB_STGY_PROC);
 			data = symbol_name((vm_offset_t)(*sipp)->udata,
 			    DB_STGY_ANY);
 			if (func != NULL && data != NULL)
 				printf("   %s(&%s)... ", func, data);
 			else if (func != NULL)
 				printf("   %s(%p)... ", func, (*sipp)->udata);
 			else
 #endif
 				printf("   %p(%p)... ", (*sipp)->func,
 				    (*sipp)->udata);
 		}
 #endif
 
 		/* Call function */
 		(*((*sipp)->func))((*sipp)->udata);
 
 #if defined(VERBOSE_SYSINIT)
 		if (verbose)
 			printf("done.\n");
 #endif
 
 		/* Check off the one we're just done */
 		(*sipp)->subsystem = SI_SUB_DONE;
 
 		/* Check if we've installed more sysinit items via KLD */
 		if (newsysinit != NULL) {
 			if (sysinit != SET_BEGIN(sysinit_set))
 				free(sysinit, M_TEMP);
 			sysinit = newsysinit;
 			sysinit_end = newsysinit_end;
 			newsysinit = NULL;
 			newsysinit_end = NULL;
 			goto restart;
 		}
 	}
 
 	mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED);
 	mtx_unlock(&Giant);
 
 	/*
 	 * Now hand over this thread to swapper.
 	 */
 	swapper();
 	/* NOTREACHED*/
 }
 
 
 /*
  ***************************************************************************
  ****
  **** The following SYSINIT's belong elsewhere, but have not yet
  **** been moved.
  ****
  ***************************************************************************
  */
 static void
 print_caddr_t(void *data)
 {
 	printf("%s", (char *)data);
 }
 
 static void
 print_version(void *data __unused)
 {
 	int len;
 
 	/* Strip a trailing newline from version. */
 	len = strlen(version);
 	while (len > 0 && version[len - 1] == '\n')
 		len--;
 	printf("%.*s %s\n", len, version, machine);
 	printf("%s\n", compiler_version);
 }
 
 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t,
     copyright);
 SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t,
     trademark);
 SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL);
 
 #ifdef WITNESS
 static char wit_warn[] =
      "WARNING: WITNESS option enabled, expect reduced performance.\n";
 SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1,
    print_caddr_t, wit_warn);
 SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1,
    print_caddr_t, wit_warn);
 #endif
 
 #ifdef DIAGNOSTIC
 static char diag_warn[] =
      "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n";
 SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2,
     print_caddr_t, diag_warn);
 SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 2,
     print_caddr_t, diag_warn);
 #endif
 
 static int
 null_fetch_syscall_args(struct thread *td __unused,
     struct syscall_args *sa __unused)
 {
 
 	panic("null_fetch_syscall_args");
 }
 
 static void
 null_set_syscall_retval(struct thread *td __unused, int error __unused)
 {
 
 	panic("null_set_syscall_retval");
 }
 
 struct sysentvec null_sysvec = {
 	.sv_size	= 0,
 	.sv_table	= NULL,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= NULL,
 	.sv_sendsig	= NULL,
 	.sv_sigcode	= NULL,
 	.sv_szsigcode	= NULL,
 	.sv_name	= "null",
 	.sv_coredump	= NULL,
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= 0,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= NULL,
 	.sv_setregs	= NULL,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= 0,
 	.sv_set_syscall_retval = null_set_syscall_retval,
 	.sv_fetch_syscall_args = null_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 
 /*
  ***************************************************************************
  ****
  **** The two following SYSINIT's are proc0 specific glue code.  I am not
  **** convinced that they can not be safely combined, but their order of
  **** operation has been maintained as the same as the original init_main.c
  **** for right now.
  ****
  **** These probably belong in init_proc.c or kern_proc.c, since they
  **** deal with proc0 (the fork template process).
  ****
  ***************************************************************************
  */
 /* ARGSUSED*/
 static void
 proc0_init(void *dummy __unused)
 {
 	struct proc *p;
 	struct thread *td;
 	struct ucred *newcred;
 	vm_paddr_t pageablemem;
 	int i;
 
 	GIANT_REQUIRED;
 	p = &proc0;
 	td = &thread0;
 	
 	/*
 	 * Initialize magic number and osrel.
 	 */
 	p->p_magic = P_MAGIC;
 	p->p_osrel = osreldate;
 
 	/*
 	 * Initialize thread and process structures.
 	 */
 	procinit();	/* set up proc zone */
 	threadinit();	/* set up UMA zones */
 
 	/*
 	 * Initialise scheduler resources.
 	 * Add scheduler specific parts to proc, thread as needed.
 	 */
 	schedinit();	/* scheduler gets its house in order */
 
 	/*
 	 * Create process 0 (the swapper).
 	 */
 	LIST_INSERT_HEAD(&allproc, p, p_list);
 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
 	p->p_pgrp = &pgrp0;
 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
 	LIST_INIT(&pgrp0.pg_members);
 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
 
 	pgrp0.pg_session = &session0;
 	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
 	refcount_init(&session0.s_count, 1);
 	session0.s_leader = p;
 
 	p->p_sysent = &null_sysvec;
 	p->p_flag = P_SYSTEM | P_INMEM;
 	p->p_flag2 = 0;
 	p->p_state = PRS_NORMAL;
 	knlist_init_mtx(&p->p_klist, &p->p_mtx);
 	STAILQ_INIT(&p->p_ktr);
 	p->p_nice = NZERO;
 	/* pid_max cannot be greater than PID_MAX */
 	td->td_tid = PID_MAX + 1;
 	LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
 	td->td_state = TDS_RUNNING;
 	td->td_pri_class = PRI_TIMESHARE;
 	td->td_user_pri = PUSER;
 	td->td_base_user_pri = PUSER;
 	td->td_lend_user_pri = PRI_MAX;
 	td->td_priority = PVM;
 	td->td_base_pri = PVM;
 	td->td_oncpu = 0;
 	td->td_flags = TDF_INMEM;
 	td->td_pflags = TDP_KTHREAD;
 	td->td_cpuset = cpuset_thread0();
 	vm_domain_policy_init(&td->td_vm_dom_policy);
 	vm_domain_policy_set(&td->td_vm_dom_policy, VM_POLICY_NONE, -1);
 	vm_domain_policy_init(&p->p_vm_dom_policy);
 	vm_domain_policy_set(&p->p_vm_dom_policy, VM_POLICY_NONE, -1);
 	prison0_init();
 	p->p_peers = 0;
 	p->p_leader = p;
 	p->p_reaper = p;
 	LIST_INIT(&p->p_reaplist);
 
 	strncpy(p->p_comm, "kernel", sizeof (p->p_comm));
 	strncpy(td->td_name, "swapper", sizeof (td->td_name));
 
 	callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0);
 	callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
 	callout_init(&td->td_slpcallout, 1);
 
 	/* Create credentials. */
 	newcred = crget();
 	newcred->cr_ngroups = 1;	/* group 0 */
 	newcred->cr_uidinfo = uifind(0);
 	newcred->cr_ruidinfo = uifind(0);
 	newcred->cr_prison = &prison0;
 	newcred->cr_loginclass = loginclass_find("default");
 	proc_set_cred_init(p, newcred);
 #ifdef AUDIT
 	audit_cred_kproc0(newcred);
 #endif
 #ifdef MAC
 	mac_cred_create_swapper(newcred);
 #endif
 	/* Create sigacts. */
 	p->p_sigacts = sigacts_alloc();
 
 	/* Initialize signal state for process 0. */
 	siginit(&proc0);
 
 	/* Create the file descriptor table. */
 	p->p_fd = fdinit(NULL, false);
 	p->p_fdtol = NULL;
 
 	/* Create the limits structures. */
 	p->p_limit = lim_alloc();
 	for (i = 0; i < RLIM_NLIMITS; i++)
 		p->p_limit->pl_rlimit[i].rlim_cur =
 		    p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
 	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur =
 	    p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
 	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
 	    p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
 	p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz;
 	p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz;
 	p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
 	p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
 	/* Cast to avoid overflow on i386/PAE. */
 	pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count);
 	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur =
 	    p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem;
 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3;
 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem;
 	p->p_cpulimit = RLIM_INFINITY;
 
 	PROC_LOCK(p);
 	thread_cow_get_proc(td, p);
 	PROC_UNLOCK(p);
 
 	/* Initialize resource accounting structures. */
 	racct_create(&p->p_racct);
 
 	p->p_stats = pstats_alloc();
 
 	/* Allocate a prototype map so we have something to fork. */
 	p->p_vmspace = &vmspace0;
 	vmspace0.vm_refcnt = 1;
 	pmap_pinit0(vmspace_pmap(&vmspace0));
 
 	/*
 	 * proc0 is not expected to enter usermode, so there is no special
 	 * handling for sv_minuser here, like is done for exec_new_vmspace().
 	 */
 	vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0),
 	    p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser);
 
 	/*
 	 * Call the init and ctor for the new thread and proc.  We wait
 	 * to do this until all other structures are fairly sane.
 	 */
 	EVENTHANDLER_INVOKE(process_init, p);
 	EVENTHANDLER_INVOKE(thread_init, td);
 	EVENTHANDLER_INVOKE(process_ctor, p);
 	EVENTHANDLER_INVOKE(thread_ctor, td);
 
 	/*
 	 * Charge root for one process.
 	 */
 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
 	PROC_LOCK(p);
 	racct_add_force(p, RACCT_NPROC, 1);
 	PROC_UNLOCK(p);
 }
 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL);
 
 /* ARGSUSED*/
 static void
 proc0_post(void *dummy __unused)
 {
 	struct timespec ts;
 	struct proc *p;
 	struct rusage ru;
 	struct thread *td;
 
 	/*
 	 * Now we can look at the time, having had a chance to verify the
 	 * time from the filesystem.  Pretend that proc0 started now.
 	 */
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		microuptime(&p->p_stats->p_start);
 		PROC_STATLOCK(p);
 		rufetch(p, &ru);	/* Clears thread stats */
 		PROC_STATUNLOCK(p);
 		p->p_rux.rux_runtime = 0;
 		p->p_rux.rux_uticks = 0;
 		p->p_rux.rux_sticks = 0;
 		p->p_rux.rux_iticks = 0;
 		FOREACH_THREAD_IN_PROC(p, td) {
 			td->td_runtime = 0;
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	PCPU_SET(switchtime, cpu_ticks());
 	PCPU_SET(switchticks, ticks);
 
 	/*
 	 * Give the ``random'' number generator a thump.
 	 */
 	nanotime(&ts);
 	srandom(ts.tv_sec ^ ts.tv_nsec);
 }
 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL);
 
 static void
 random_init(void *dummy __unused)
 {
 
 	/*
 	 * After CPU has been started we have some randomness on most
 	 * platforms via get_cyclecount().  For platforms that don't
 	 * we will reseed random(9) in proc0_post() as well.
 	 */
 	srandom(get_cyclecount());
 }
 SYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL);
 
 /*
  ***************************************************************************
  ****
  **** The following SYSINIT's and glue code should be moved to the
  **** respective files on a per subsystem basis.
  ****
  ***************************************************************************
  */
 
 
 /*
  ***************************************************************************
  ****
  **** The following code probably belongs in another file, like
  **** kern/init_init.c.
  ****
  ***************************************************************************
  */
 
 /*
  * List of paths to try when searching for "init".
  */
 static char init_path[MAXPATHLEN] =
 #ifdef	INIT_PATH
     __XSTRING(INIT_PATH);
 #else
     "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init";
 #endif
 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
 	"Path used to search the init process");
 
 /*
  * Shutdown timeout of init(8).
  * Unused within kernel, but used to control init(8), hence do not remove.
  */
 #ifndef INIT_SHUTDOWN_TIMEOUT
 #define INIT_SHUTDOWN_TIMEOUT 120
 #endif
 static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT;
 SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout,
 	CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). "
 	"Unused within kernel, but used to control init(8)");
 
 /*
  * Start the initial user process; try exec'ing each pathname in init_path.
  * The program is invoked with one argument containing the boot flags.
  */
 static void
 start_init(void *dummy)
 {
 	vm_offset_t addr;
 	struct execve_args args;
 	int options, error;
 	char *var, *path, *next, *s;
 	char *ucp, **uap, *arg0, *arg1;
 	struct thread *td;
 	struct proc *p;
 
 	mtx_lock(&Giant);
 
 	GIANT_REQUIRED;
 
 	td = curthread;
 	p = td->td_proc;
 
 	vfs_mountroot();
 
 	/* Wipe GELI passphrase from the environment. */
 	kern_unsetenv("kern.geom.eli.passphrase");
 
 	/*
 	 * Need just enough stack to hold the faked-up "execve()" arguments.
 	 */
 	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, 0,
 	    VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
 		panic("init: couldn't allocate argument space");
 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
 	p->p_vmspace->vm_ssize = 1;
 
 	if ((var = kern_getenv("init_path")) != NULL) {
 		strlcpy(init_path, var, sizeof(init_path));
 		freeenv(var);
 	}
 	
 	for (path = init_path; *path != '\0'; path = next) {
 		while (*path == ':')
 			path++;
 		if (*path == '\0')
 			break;
 		for (next = path; *next != '\0' && *next != ':'; next++)
 			/* nothing */ ;
 		if (bootverbose)
 			printf("start_init: trying %.*s\n", (int)(next - path),
 			    path);
 			
 		/*
 		 * Move out the boot flag argument.
 		 */
 		options = 0;
 		ucp = (char *)p->p_sysent->sv_usrstack;
 		(void)subyte(--ucp, 0);		/* trailing zero */
 		if (boothowto & RB_SINGLE) {
 			(void)subyte(--ucp, 's');
 			options = 1;
 		}
 #ifdef notyet
                 if (boothowto & RB_FASTBOOT) {
 			(void)subyte(--ucp, 'f');
 			options = 1;
 		}
 #endif
 
 #ifdef BOOTCDROM
 		(void)subyte(--ucp, 'C');
 		options = 1;
 #endif
 
 		if (options == 0)
 			(void)subyte(--ucp, '-');
 		(void)subyte(--ucp, '-');		/* leading hyphen */
 		arg1 = ucp;
 
 		/*
 		 * Move out the file name (also arg 0).
 		 */
 		(void)subyte(--ucp, 0);
 		for (s = next - 1; s >= path; s--)
 			(void)subyte(--ucp, *s);
 		arg0 = ucp;
 
 		/*
 		 * Move out the arg pointers.
 		 */
 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
 
 		/*
 		 * Point at the arguments.
 		 */
 		args.fname = arg0;
 		args.argv = uap;
 		args.envv = NULL;
 
 		/*
 		 * Now try to exec the program.  If can't for any reason
 		 * other than it doesn't exist, complain.
 		 *
 		 * Otherwise, return via fork_trampoline() all the way
 		 * to user mode as init!
 		 */
 		if ((error = sys_execve(td, &args)) == 0) {
 			mtx_unlock(&Giant);
 			return;
 		}
 		if (error != ENOENT)
 			printf("exec %.*s: error %d\n", (int)(next - path), 
 			    path, error);
 	}
 	printf("init: not found in path %s\n", init_path);
 	panic("no init");
 }
 
 /*
  * Like kproc_create(), but runs in it's own address space.
  * We do this early to reserve pid 1.
  *
  * Note special case - do not make it runnable yet.  Other work
  * in progress will change this more.
  */
 static void
 create_init(const void *udata __unused)
 {
 	struct ucred *newcred, *oldcred;
 	struct thread *td;
 	int error;
 
 	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc,
 	    NULL, 0, NULL);
 	if (error)
 		panic("cannot fork init: %d\n", error);
 	KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
 	/* divorce init's credentials from the kernel's */
 	newcred = crget();
 	sx_xlock(&proctree_lock);
 	PROC_LOCK(initproc);
 	initproc->p_flag |= P_SYSTEM | P_INMEM;
 	initproc->p_treeflag |= P_TREE_REAPER;
 	LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling);
 	oldcred = initproc->p_ucred;
 	crcopy(newcred, oldcred);
 #ifdef MAC
 	mac_cred_create_init(newcred);
 #endif
 #ifdef AUDIT
 	audit_cred_proc1(newcred);
 #endif
 	proc_set_cred(initproc, newcred);
 	td = FIRST_THREAD_IN_PROC(initproc);
 	crfree(td->td_ucred);
 	td->td_ucred = crhold(initproc->p_ucred);
 	PROC_UNLOCK(initproc);
 	sx_xunlock(&proctree_lock);
 	crfree(oldcred);
 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
 }
 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL);
 
 /*
  * Make it runnable now.
  */
 static void
 kick_init(const void *udata __unused)
 {
 	struct thread *td;
 
 	td = FIRST_THREAD_IN_PROC(initproc);
 	thread_lock(td);
 	TD_SET_CAN_RUN(td);
 	sched_add(td, SRQ_BORING);
 	thread_unlock(td);
 }
 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL);
Index: projects/clang380-import/sys/kern/kern_condvar.c
===================================================================
--- projects/clang380-import/sys/kern/kern_condvar.c	(revision 293686)
+++ projects/clang380-import/sys/kern/kern_condvar.c	(revision 293687)
@@ -1,457 +1,476 @@
 /*-
  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/condvar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/resourcevar.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 
 /*
+ * A bound below which cv_waiters is valid.  Once cv_waiters reaches this bound,
+ * cv_signal must manually check the wait queue for threads.
+ */
+#define	CV_WAITERS_BOUND	INT_MAX
+
+#define	CV_WAITERS_INC(cvp) do {					\
+	if ((cvp)->cv_waiters < CV_WAITERS_BOUND)			\
+		(cvp)->cv_waiters++;					\
+} while (0)
+
+/*
  * Common sanity checks for cv_wait* functions.
  */
 #define	CV_ASSERT(cvp, lock, td) do {					\
 	KASSERT((td) != NULL, ("%s: td NULL", __func__));		\
 	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
 	KASSERT((lock) != NULL, ("%s: lock NULL", __func__));		\
 } while (0)
 
 /*
  * Initialize a condition variable.  Must be called before use.
  */
 void
 cv_init(struct cv *cvp, const char *desc)
 {
 
 	cvp->cv_description = desc;
 	cvp->cv_waiters = 0;
 }
 
 /*
  * Destroy a condition variable.  The condition variable must be re-initialized
  * in order to be re-used.
  */
 void
 cv_destroy(struct cv *cvp)
 {
 #ifdef INVARIANTS
 	struct sleepqueue *sq;
 
 	sleepq_lock(cvp);
 	sq = sleepq_lookup(cvp);
 	sleepq_release(cvp);
 	KASSERT(sq == NULL, ("%s: associated sleep queue non-empty", __func__));
 #endif
 }
 
 /*
  * Wait on a condition variable.  The current thread is placed on the condition
  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
  * condition variable will resume the thread.  The mutex is released before
  * sleeping and will be held on return.  It is recommended that the mutex be
  * held when cv_signal or cv_broadcast are called.
  */
 void
 _cv_wait(struct cv *cvp, struct lock_object *lock)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 	struct lock_class *class;
 	struct thread *td;
 	uintptr_t lock_state;
 
 	td = curthread;
 	lock_state = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, cv_wmesg(cvp));
 #endif
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	class = LOCK_CLASS(lock);
 
 	if (cold || panicstr) {
 		/*
 		 * During autoconfiguration, just give interrupts
 		 * a chance, then just return.  Don't run any other
 		 * thread or panic below, in case this is the idle
 		 * process and already asleep.
 		 */
 		return;
 	}
 
 	sleepq_lock(cvp);
 
-	cvp->cv_waiters++;
+	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	sleepq_wait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, cv_wmesg(cvp));
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 }
 
 /*
  * Wait on a condition variable.  This function differs from cv_wait by
  * not aquiring the mutex after condition variable was signaled.
  */
 void
 _cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
 {
 	struct lock_class *class;
 	struct thread *td;
 
 	td = curthread;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, cv_wmesg(cvp));
 #endif
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	KASSERT(lock != &Giant.lock_object,
 	    ("cv_wait_unlock cannot be used with Giant"));
 	class = LOCK_CLASS(lock);
 
 	if (cold || panicstr) {
 		/*
 		 * During autoconfiguration, just give interrupts
 		 * a chance, then just return.  Don't run any other
 		 * thread or panic below, in case this is the idle
 		 * process and already asleep.
 		 */
 		class->lc_unlock(lock);
 		return;
 	}
 
 	sleepq_lock(cvp);
 
-	cvp->cv_waiters++;
+	CV_WAITERS_INC(cvp);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	if (class->lc_flags & LC_SLEEPABLE)
 		sleepq_release(cvp);
 	class->lc_unlock(lock);
 	if (class->lc_flags & LC_SLEEPABLE)
 		sleepq_lock(cvp);
 	sleepq_wait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, cv_wmesg(cvp));
 #endif
 	PICKUP_GIANT();
 }
 
 /*
  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
  * a signal was caught.  If ERESTART is returned the system call should be
  * restarted if possible.
  */
 int
 _cv_wait_sig(struct cv *cvp, struct lock_object *lock)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 	struct lock_class *class;
 	struct thread *td;
 	uintptr_t lock_state;
 	int rval;
 
 	td = curthread;
 	lock_state = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, cv_wmesg(cvp));
 #endif
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	class = LOCK_CLASS(lock);
 
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration, just give
 		 * interrupts a chance, then just return; don't run any other
 		 * procs or panic below, in case this is the idle process and
 		 * already asleep.
 		 */
 		return (0);
 	}
 
 	sleepq_lock(cvp);
 
-	cvp->cv_waiters++;
+	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE, 0);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_wait_sig(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, cv_wmesg(cvp));
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Wait on a condition variable for (at most) the value specified in sbt
  * argument. Returns 0 if the process was resumed by cv_signal or cv_broadcast,
  * EWOULDBLOCK if the timeout expires.
  */
 int
 _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt,
     sbintime_t pr, int flags)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 	struct lock_class *class;
 	struct thread *td;
 	int lock_state, rval;
 
 	td = curthread;
 	lock_state = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, cv_wmesg(cvp));
 #endif
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	class = LOCK_CLASS(lock);
 
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration, just give
 		 * interrupts a chance, then just return; don't run any other
 		 * thread or panic below, in case this is the idle process and
 		 * already asleep.
 		 */
 		return 0;
 	}
 
 	sleepq_lock(cvp);
 
-	cvp->cv_waiters++;
+	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_timedwait(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, cv_wmesg(cvp));
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Wait on a condition variable for (at most) the value specified in sbt 
  * argument, allowing interruption by signals.
  * Returns 0 if the thread was resumed by cv_signal or cv_broadcast,
  * EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if a signal
  * was caught.
  */
 int
 _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock,
     sbintime_t sbt, sbintime_t pr, int flags)
 {
 	WITNESS_SAVE_DECL(lock_witness);
 	struct lock_class *class;
 	struct thread *td;
 	int lock_state, rval;
 
 	td = curthread;
 	lock_state = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, cv_wmesg(cvp));
 #endif
 	CV_ASSERT(cvp, lock, td);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Waiting on \"%s\"", cvp->cv_description);
 	class = LOCK_CLASS(lock);
 
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration, just give
 		 * interrupts a chance, then just return; don't run any other
 		 * thread or panic below, in case this is the idle process and
 		 * already asleep.
 		 */
 		return 0;
 	}
 
 	sleepq_lock(cvp);
 
-	cvp->cv_waiters++;
+	CV_WAITERS_INC(cvp);
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 
 	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR |
 	    SLEEPQ_INTERRUPTIBLE, 0);
 	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
 	if (lock != &Giant.lock_object) {
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_release(cvp);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		if (class->lc_flags & LC_SLEEPABLE)
 			sleepq_lock(cvp);
 	}
 	rval = sleepq_timedwait_sig(cvp, 0);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, cv_wmesg(cvp));
 #endif
 	PICKUP_GIANT();
 	if (lock != &Giant.lock_object) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 
 	return (rval);
 }
 
 /*
  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
  * the swapper if the process is not in memory, so that it can bring the
  * sleeping process in.  Note that this may also result in additional threads
  * being made runnable.  Should be called with the same mutex as was passed to
  * cv_wait held.
  */
 void
 cv_signal(struct cv *cvp)
 {
 	int wakeup_swapper;
 
 	wakeup_swapper = 0;
 	sleepq_lock(cvp);
 	if (cvp->cv_waiters > 0) {
-		cvp->cv_waiters--;
-		wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, 0);
+		if (cvp->cv_waiters == CV_WAITERS_BOUND &&
+		    sleepq_lookup(cvp) == NULL) {
+			cvp->cv_waiters = 0;
+		} else {
+			if (cvp->cv_waiters < CV_WAITERS_BOUND)
+				cvp->cv_waiters--;
+			wakeup_swapper = sleepq_signal(cvp, SLEEPQ_CONDVAR, 0,
+			    0);
+		}
 	}
 	sleepq_release(cvp);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
  * Should be called with the same mutex as was passed to cv_wait held.
  */
 void
 cv_broadcastpri(struct cv *cvp, int pri)
 {
 	int wakeup_swapper;
 
 	/*
 	 * XXX sleepq_broadcast pri argument changed from -1 meaning
 	 * no pri to 0 meaning no pri.
 	 */
 	wakeup_swapper = 0;
 	if (pri == -1)
 		pri = 0;
 	sleepq_lock(cvp);
 	if (cvp->cv_waiters > 0) {
 		cvp->cv_waiters = 0;
 		wakeup_swapper = sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0);
 	}
 	sleepq_release(cvp);
 	if (wakeup_swapper)
 		kick_proc0();
 }
Index: projects/clang380-import/sys/kern/uipc_mbuf.c
===================================================================
--- projects/clang380-import/sys/kern/uipc_mbuf.c	(revision 293686)
+++ projects/clang380-import/sys/kern/uipc_mbuf.c	(revision 293687)
@@ -1,2061 +1,2065 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_param.h"
 #include "opt_mbuf_stress_test.h"
 #include "opt_mbuf_profiling.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/sysctl.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/uio.h>
 
 int	max_linkhdr;
 int	max_protohdr;
 int	max_hdr;
 int	max_datalen;
 #ifdef MBUF_STRESS_TEST
 int	m_defragpackets;
 int	m_defragbytes;
 int	m_defraguseless;
 int	m_defragfailure;
 int	m_defragrandomfailures;
 #endif
 
 /*
  * sysctl(8) exported objects
  */
 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RD,
 	   &max_linkhdr, 0, "Size of largest link layer header");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RD,
 	   &max_protohdr, 0, "Size of largest protocol layer header");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RD,
 	   &max_hdr, 0, "Size of largest link plus protocol header");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RD,
 	   &max_datalen, 0, "Minimum space left in mbuf after max_hdr");
 #ifdef MBUF_STRESS_TEST
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
 	   &m_defragpackets, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
 	   &m_defragbytes, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
 	   &m_defraguseless, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
 	   &m_defragfailure, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
 	   &m_defragrandomfailures, 0, "");
 #endif
 
 /*
  * Ensure the correct size of various mbuf parameters.  It could be off due
  * to compiler-induced padding and alignment artifacts.
  */
 CTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN);
 CTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN);
 
 /*
  * mbuf data storage should be 64-bit aligned regardless of architectural
  * pointer size; check this is the case with and without a packet header.
  */
 CTASSERT(offsetof(struct mbuf, m_dat) % 8 == 0);
 CTASSERT(offsetof(struct mbuf, m_pktdat) % 8 == 0);
 
 /*
  * While the specific values here don't matter too much (i.e., +/- a few
  * words), we do want to ensure that changes to these values are carefully
  * reasoned about and properly documented.  This is especially the case as
  * network-protocol and device-driver modules encode these layouts, and must
  * be recompiled if the structures change.  Check these values at compile time
  * against the ones documented in comments in mbuf.h.
  *
  * NB: Possibly they should be documented there via #define's and not just
  * comments.
  */
 #if defined(__LP64__)
 CTASSERT(offsetof(struct mbuf, m_dat) == 32);
 CTASSERT(sizeof(struct pkthdr) == 56);
 CTASSERT(sizeof(struct m_ext) == 48);
 #else
 CTASSERT(offsetof(struct mbuf, m_dat) == 24);
 CTASSERT(sizeof(struct pkthdr) == 48);
 CTASSERT(sizeof(struct m_ext) == 28);
 #endif
 
 /*
  * Assert that the queue(3) macros produce code of the same size as an old
  * plain pointer does.
  */
 #ifdef INVARIANTS
 static struct mbuf m_assertbuf;
 CTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next));
 CTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next));
 CTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt));
 CTASSERT(sizeof(m_assertbuf.m_stailqpkt) == sizeof(m_assertbuf.m_nextpkt));
 #endif
 
 /*
  * m_get2() allocates minimum mbuf that would fit "size" argument.
  */
 struct mbuf *
 m_get2(int size, int how, short type, int flags)
 {
 	struct mb_args args;
 	struct mbuf *m, *n;
 
 	args.flags = flags;
 	args.type = type;
 
 	if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0))
 		return (uma_zalloc_arg(zone_mbuf, &args, how));
 	if (size <= MCLBYTES)
 		return (uma_zalloc_arg(zone_pack, &args, how));
 
 	if (size > MJUMPAGESIZE)
 		return (NULL);
 
 	m = uma_zalloc_arg(zone_mbuf, &args, how);
 	if (m == NULL)
 		return (NULL);
 
 	n = uma_zalloc_arg(zone_jumbop, m, how);
 	if (n == NULL) {
 		uma_zfree(zone_mbuf, m);
 		return (NULL);
 	}
 
 	return (m);
 }
 
 /*
  * m_getjcl() returns an mbuf with a cluster of the specified size attached.
  * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
  */
 struct mbuf *
 m_getjcl(int how, short type, int flags, int size)
 {
 	struct mb_args args;
 	struct mbuf *m, *n;
 	uma_zone_t zone;
 
 	if (size == MCLBYTES)
 		return m_getcl(how, type, flags);
 
 	args.flags = flags;
 	args.type = type;
 
 	m = uma_zalloc_arg(zone_mbuf, &args, how);
 	if (m == NULL)
 		return (NULL);
 
 	zone = m_getzone(size);
 	n = uma_zalloc_arg(zone, m, how);
 	if (n == NULL) {
 		uma_zfree(zone_mbuf, m);
 		return (NULL);
 	}
 	return (m);
 }
 
 /*
  * Allocate a given length worth of mbufs and/or clusters (whatever fits
  * best) and return a pointer to the top of the allocated chain.  If an
  * existing mbuf chain is provided, then we will append the new chain
  * to the existing one but still return the top of the newly allocated
  * chain.
  */
 struct mbuf *
 m_getm2(struct mbuf *m, int len, int how, short type, int flags)
 {
 	struct mbuf *mb, *nm = NULL, *mtail = NULL;
 
 	KASSERT(len >= 0, ("%s: len is < 0", __func__));
 
 	/* Validate flags. */
 	flags &= (M_PKTHDR | M_EOR);
 
 	/* Packet header mbuf must be first in chain. */
 	if ((flags & M_PKTHDR) && m != NULL)
 		flags &= ~M_PKTHDR;
 
 	/* Loop and append maximum sized mbufs to the chain tail. */
 	while (len > 0) {
 		if (len > MCLBYTES)
 			mb = m_getjcl(how, type, (flags & M_PKTHDR),
 			    MJUMPAGESIZE);
 		else if (len >= MINCLSIZE)
 			mb = m_getcl(how, type, (flags & M_PKTHDR));
 		else if (flags & M_PKTHDR)
 			mb = m_gethdr(how, type);
 		else
 			mb = m_get(how, type);
 
 		/* Fail the whole operation if one mbuf can't be allocated. */
 		if (mb == NULL) {
 			if (nm != NULL)
 				m_freem(nm);
 			return (NULL);
 		}
 
 		/* Book keeping. */
 		len -= M_SIZE(mb);
 		if (mtail != NULL)
 			mtail->m_next = mb;
 		else
 			nm = mb;
 		mtail = mb;
 		flags &= ~M_PKTHDR;	/* Only valid on the first mbuf. */
 	}
 	if (flags & M_EOR)
 		mtail->m_flags |= M_EOR;  /* Only valid on the last mbuf. */
 
 	/* If mbuf was supplied, append new chain to the end of it. */
 	if (m != NULL) {
 		for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next)
 			;
 		mtail->m_next = nm;
 		mtail->m_flags &= ~M_EOR;
 	} else
 		m = nm;
 
 	return (m);
 }
 
 /*
  * Free an entire chain of mbufs and associated external buffers, if
  * applicable.
  */
 void
 m_freem(struct mbuf *mb)
 {
 
 	while (mb != NULL)
 		mb = m_free(mb);
 }
 
 /*-
  * Configure a provided mbuf to refer to the provided external storage
  * buffer and setup a reference count for said buffer.  If the setting
  * up of the reference count fails, the M_EXT bit will not be set.  If
  * successfull, the M_EXT bit is set in the mbuf's flags.
  *
  * Arguments:
  *    mb     The existing mbuf to which to attach the provided buffer.
  *    buf    The address of the provided external storage buffer.
  *    size   The size of the provided buffer.
  *    freef  A pointer to a routine that is responsible for freeing the
  *           provided external storage buffer.
  *    args   A pointer to an argument structure (of any type) to be passed
  *           to the provided freef routine (may be NULL).
  *    flags  Any other flags to be passed to the provided mbuf.
  *    type   The type that the external storage buffer should be
  *           labeled with.
  *
  * Returns:
  *    Nothing.
  */
 int
 m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
     void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2,
     int flags, int type, int wait)
 {
 	KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
 
 	if (type != EXT_EXTREF)
 		mb->m_ext.ext_cnt = uma_zalloc(zone_ext_refcnt, wait);
 
 	if (mb->m_ext.ext_cnt == NULL)
 		return (ENOMEM);
 
 	*(mb->m_ext.ext_cnt) = 1;
 	mb->m_flags |= (M_EXT | flags);
 	mb->m_ext.ext_buf = buf;
 	mb->m_data = mb->m_ext.ext_buf;
 	mb->m_ext.ext_size = size;
 	mb->m_ext.ext_free = freef;
 	mb->m_ext.ext_arg1 = arg1;
 	mb->m_ext.ext_arg2 = arg2;
 	mb->m_ext.ext_type = type;
 	mb->m_ext.ext_flags = 0;
 
 	return (0);
 }
 
 /*
  * Non-directly-exported function to clean up after mbufs with M_EXT
  * storage attached to them if the reference count hits 1.
  */
 void
 mb_free_ext(struct mbuf *m)
 {
 	int freembuf;
 
 	KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
 
 	/*
 	 * Check if the header is embedded in the cluster.
 	 */
 	freembuf = (m->m_flags & M_NOFREE) ? 0 : 1;
 
 	switch (m->m_ext.ext_type) {
 	case EXT_SFBUF:
 		sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
 		break;
+	case EXT_SFBUF_NOCACHE:
+		sf_ext_free_nocache(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
+		break;
 	default:
 		KASSERT(m->m_ext.ext_cnt != NULL,
 		    ("%s: no refcounting pointer on %p", __func__, m));
 		/* 
 		 * Free attached storage if this mbuf is the only
 		 * reference to it.
 		 */
 		if (*(m->m_ext.ext_cnt) != 1) {
 			if (atomic_fetchadd_int(m->m_ext.ext_cnt, -1) != 1)
 				break;
 		}
 
 		switch (m->m_ext.ext_type) {
 		case EXT_PACKET:	/* The packet zone is special. */
 			if (*(m->m_ext.ext_cnt) == 0)
 				*(m->m_ext.ext_cnt) = 1;
 			uma_zfree(zone_pack, m);
 			return;		/* Job done. */
 		case EXT_CLUSTER:
 			uma_zfree(zone_clust, m->m_ext.ext_buf);
 			break;
 		case EXT_JUMBOP:
 			uma_zfree(zone_jumbop, m->m_ext.ext_buf);
 			break;
 		case EXT_JUMBO9:
 			uma_zfree(zone_jumbo9, m->m_ext.ext_buf);
 			break;
 		case EXT_JUMBO16:
 			uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
 			break;
 		case EXT_NET_DRV:
 		case EXT_MOD_TYPE:
 		case EXT_DISPOSABLE:
 			*(m->m_ext.ext_cnt) = 0;
 			uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
 				m->m_ext.ext_cnt));
 			/* FALLTHROUGH */
 		case EXT_EXTREF:
 			KASSERT(m->m_ext.ext_free != NULL,
 				("%s: ext_free not set", __func__));
 			(*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1,
 			    m->m_ext.ext_arg2);
 			break;
 		default:
 			KASSERT(m->m_ext.ext_type == 0,
 				("%s: unknown ext_type", __func__));
 		}
 	}
 
 	if (freembuf)
 		uma_zfree(zone_mbuf, m);
 }
 
 /*
  * Attach the cluster from *m to *n, set up m_ext in *n
  * and bump the refcount of the cluster.
  */
 void
 mb_dupcl(struct mbuf *n, const struct mbuf *m)
 {
 
 	KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
 	KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n));
 
 	switch (m->m_ext.ext_type) {
 	case EXT_SFBUF:
+	case EXT_SFBUF_NOCACHE:
 		sf_ext_ref(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
 		break;
 	default:
 		KASSERT(m->m_ext.ext_cnt != NULL,
 		    ("%s: no refcounting pointer on %p", __func__, m));
 		if (*(m->m_ext.ext_cnt) == 1)
 			*(m->m_ext.ext_cnt) += 1;
 		else
 			atomic_add_int(m->m_ext.ext_cnt, 1);
 	}
 
 	n->m_ext = m->m_ext;
 	n->m_flags |= M_EXT;
 	n->m_flags |= m->m_flags & M_RDONLY;
 }
 
 void
 m_demote_pkthdr(struct mbuf *m)
 {
 
 	M_ASSERTPKTHDR(m);
 
 	m_tag_delete_chain(m, NULL);
 	m->m_flags &= ~M_PKTHDR;
 	bzero(&m->m_pkthdr, sizeof(struct pkthdr));
 }
 
 /*
  * Clean up mbuf (chain) from any tags and packet headers.
  * If "all" is set then the first mbuf in the chain will be
  * cleaned too.
  */
 void
 m_demote(struct mbuf *m0, int all, int flags)
 {
 	struct mbuf *m;
 
 	for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) {
 		KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt in m %p, m0 %p",
 		    __func__, m, m0));
 		if (m->m_flags & M_PKTHDR)
 			m_demote_pkthdr(m);
 		m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags);
 	}
 }
 
 /*
  * Sanity checks on mbuf (chain) for use in KASSERT() and general
  * debugging.
  * Returns 0 or panics when bad and 1 on all tests passed.
  * Sanitize, 0 to run M_SANITY_ACTION, 1 to garble things so they
  * blow up later.
  */
 int
 m_sanity(struct mbuf *m0, int sanitize)
 {
 	struct mbuf *m;
 	caddr_t a, b;
 	int pktlen = 0;
 
 #ifdef INVARIANTS
 #define	M_SANITY_ACTION(s)	panic("mbuf %p: " s, m)
 #else
 #define	M_SANITY_ACTION(s)	printf("mbuf %p: " s, m)
 #endif
 
 	for (m = m0; m != NULL; m = m->m_next) {
 		/*
 		 * Basic pointer checks.  If any of these fails then some
 		 * unrelated kernel memory before or after us is trashed.
 		 * No way to recover from that.
 		 */
 		a = M_START(m);
 		b = a + M_SIZE(m);
 		if ((caddr_t)m->m_data < a)
 			M_SANITY_ACTION("m_data outside mbuf data range left");
 		if ((caddr_t)m->m_data > b)
 			M_SANITY_ACTION("m_data outside mbuf data range right");
 		if ((caddr_t)m->m_data + m->m_len > b)
 			M_SANITY_ACTION("m_data + m_len exeeds mbuf space");
 
 		/* m->m_nextpkt may only be set on first mbuf in chain. */
 		if (m != m0 && m->m_nextpkt != NULL) {
 			if (sanitize) {
 				m_freem(m->m_nextpkt);
 				m->m_nextpkt = (struct mbuf *)0xDEADC0DE;
 			} else
 				M_SANITY_ACTION("m->m_nextpkt on in-chain mbuf");
 		}
 
 		/* packet length (not mbuf length!) calculation */
 		if (m0->m_flags & M_PKTHDR)
 			pktlen += m->m_len;
 
 		/* m_tags may only be attached to first mbuf in chain. */
 		if (m != m0 && m->m_flags & M_PKTHDR &&
 		    !SLIST_EMPTY(&m->m_pkthdr.tags)) {
 			if (sanitize) {
 				m_tag_delete_chain(m, NULL);
 				/* put in 0xDEADC0DE perhaps? */
 			} else
 				M_SANITY_ACTION("m_tags on in-chain mbuf");
 		}
 
 		/* M_PKTHDR may only be set on first mbuf in chain */
 		if (m != m0 && m->m_flags & M_PKTHDR) {
 			if (sanitize) {
 				bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
 				m->m_flags &= ~M_PKTHDR;
 				/* put in 0xDEADCODE and leave hdr flag in */
 			} else
 				M_SANITY_ACTION("M_PKTHDR on in-chain mbuf");
 		}
 	}
 	m = m0;
 	if (pktlen && pktlen != m->m_pkthdr.len) {
 		if (sanitize)
 			m->m_pkthdr.len = 0;
 		else
 			M_SANITY_ACTION("m_pkthdr.len != mbuf chain length");
 	}
 	return 1;
 
 #undef	M_SANITY_ACTION
 }
 
 
 /*
  * "Move" mbuf pkthdr from "from" to "to".
  * "from" must have M_PKTHDR set, and "to" must be empty.
  */
 void
 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
 {
 
 #if 0
 	/* see below for why these are not enabled */
 	M_ASSERTPKTHDR(to);
 	/* Note: with MAC, this may not be a good assertion. */
 	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
 	    ("m_move_pkthdr: to has tags"));
 #endif
 #ifdef MAC
 	/*
 	 * XXXMAC: It could be this should also occur for non-MAC?
 	 */
 	if (to->m_flags & M_PKTHDR)
 		m_tag_delete_chain(to, NULL);
 #endif
 	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
 	if ((to->m_flags & M_EXT) == 0)
 		to->m_data = to->m_pktdat;
 	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
 	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
 	from->m_flags &= ~M_PKTHDR;
 }
 
 /*
  * Duplicate "from"'s mbuf pkthdr in "to".
  * "from" must have M_PKTHDR set, and "to" must be empty.
  * In particular, this does a deep copy of the packet tags.
  */
 int
 m_dup_pkthdr(struct mbuf *to, const struct mbuf *from, int how)
 {
 
 #if 0
 	/*
 	 * The mbuf allocator only initializes the pkthdr
 	 * when the mbuf is allocated with m_gethdr(). Many users
 	 * (e.g. m_copy*, m_prepend) use m_get() and then
 	 * smash the pkthdr as needed causing these
 	 * assertions to trip.  For now just disable them.
 	 */
 	M_ASSERTPKTHDR(to);
 	/* Note: with MAC, this may not be a good assertion. */
 	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
 #endif
 	MBUF_CHECKSLEEP(how);
 #ifdef MAC
 	if (to->m_flags & M_PKTHDR)
 		m_tag_delete_chain(to, NULL);
 #endif
 	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
 	if ((to->m_flags & M_EXT) == 0)
 		to->m_data = to->m_pktdat;
 	to->m_pkthdr = from->m_pkthdr;
 	SLIST_INIT(&to->m_pkthdr.tags);
 	return (m_tag_copy_chain(to, from, how));
 }
 
 /*
  * Lesser-used path for M_PREPEND:
  * allocate new mbuf to prepend to chain,
  * copy junk along.
  */
 struct mbuf *
 m_prepend(struct mbuf *m, int len, int how)
 {
 	struct mbuf *mn;
 
 	if (m->m_flags & M_PKTHDR)
 		mn = m_gethdr(how, m->m_type);
 	else
 		mn = m_get(how, m->m_type);
 	if (mn == NULL) {
 		m_freem(m);
 		return (NULL);
 	}
 	if (m->m_flags & M_PKTHDR)
 		m_move_pkthdr(mn, m);
 	mn->m_next = m;
 	m = mn;
 	if (len < M_SIZE(m))
 		M_ALIGN(m, len);
 	m->m_len = len;
 	return (m);
 }
 
 /*
  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
  * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller.
  * Note that the copy is read-only, because clusters are not copied,
  * only their reference counts are incremented.
  */
 struct mbuf *
 m_copym(const struct mbuf *m, int off0, int len, int wait)
 {
 	struct mbuf *n, **np;
 	int off = off0;
 	struct mbuf *top;
 	int copyhdr = 0;
 
 	KASSERT(off >= 0, ("m_copym, negative off %d", off));
 	KASSERT(len >= 0, ("m_copym, negative len %d", len));
 	MBUF_CHECKSLEEP(wait);
 	if (off == 0 && m->m_flags & M_PKTHDR)
 		copyhdr = 1;
 	while (off > 0) {
 		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	np = &top;
 	top = 0;
 	while (len > 0) {
 		if (m == NULL) {
 			KASSERT(len == M_COPYALL,
 			    ("m_copym, length > size of mbuf chain"));
 			break;
 		}
 		if (copyhdr)
 			n = m_gethdr(wait, m->m_type);
 		else
 			n = m_get(wait, m->m_type);
 		*np = n;
 		if (n == NULL)
 			goto nospace;
 		if (copyhdr) {
 			if (!m_dup_pkthdr(n, m, wait))
 				goto nospace;
 			if (len == M_COPYALL)
 				n->m_pkthdr.len -= off0;
 			else
 				n->m_pkthdr.len = len;
 			copyhdr = 0;
 		}
 		n->m_len = min(len, m->m_len - off);
 		if (m->m_flags & M_EXT) {
 			n->m_data = m->m_data + off;
 			mb_dupcl(n, m);
 		} else
 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
 			    (u_int)n->m_len);
 		if (len != M_COPYALL)
 			len -= n->m_len;
 		off = 0;
 		m = m->m_next;
 		np = &n->m_next;
 	}
 
 	return (top);
 nospace:
 	m_freem(top);
 	return (NULL);
 }
 
 /*
  * Copy an entire packet, including header (which must be present).
  * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
  * Note that the copy is read-only, because clusters are not copied,
  * only their reference counts are incremented.
  * Preserve alignment of the first mbuf so if the creator has left
  * some room at the beginning (e.g. for inserting protocol headers)
  * the copies still have the room available.
  */
 struct mbuf *
 m_copypacket(struct mbuf *m, int how)
 {
 	struct mbuf *top, *n, *o;
 
 	MBUF_CHECKSLEEP(how);
 	n = m_get(how, m->m_type);
 	top = n;
 	if (n == NULL)
 		goto nospace;
 
 	if (!m_dup_pkthdr(n, m, how))
 		goto nospace;
 	n->m_len = m->m_len;
 	if (m->m_flags & M_EXT) {
 		n->m_data = m->m_data;
 		mb_dupcl(n, m);
 	} else {
 		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
 		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
 	}
 
 	m = m->m_next;
 	while (m) {
 		o = m_get(how, m->m_type);
 		if (o == NULL)
 			goto nospace;
 
 		n->m_next = o;
 		n = n->m_next;
 
 		n->m_len = m->m_len;
 		if (m->m_flags & M_EXT) {
 			n->m_data = m->m_data;
 			mb_dupcl(n, m);
 		} else {
 			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
 		}
 
 		m = m->m_next;
 	}
 	return top;
 nospace:
 	m_freem(top);
 	return (NULL);
 }
 
 /*
  * Copy data from an mbuf chain starting "off" bytes from the beginning,
  * continuing for "len" bytes, into the indicated buffer.
  */
 void
 m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
 {
 	u_int count;
 
 	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
 	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
 	while (off > 0) {
 		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	while (len > 0) {
 		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
 		count = min(m->m_len - off, len);
 		bcopy(mtod(m, caddr_t) + off, cp, count);
 		len -= count;
 		cp += count;
 		off = 0;
 		m = m->m_next;
 	}
 }
 
 /*
  * Copy a packet header mbuf chain into a completely new chain, including
  * copying any mbuf clusters.  Use this instead of m_copypacket() when
  * you need a writable copy of an mbuf chain.
  */
 struct mbuf *
 m_dup(const struct mbuf *m, int how)
 {
 	struct mbuf **p, *top = NULL;
 	int remain, moff, nsize;
 
 	MBUF_CHECKSLEEP(how);
 	/* Sanity check */
 	if (m == NULL)
 		return (NULL);
 	M_ASSERTPKTHDR(m);
 
 	/* While there's more data, get a new mbuf, tack it on, and fill it */
 	remain = m->m_pkthdr.len;
 	moff = 0;
 	p = &top;
 	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
 		struct mbuf *n;
 
 		/* Get the next new mbuf */
 		if (remain >= MINCLSIZE) {
 			n = m_getcl(how, m->m_type, 0);
 			nsize = MCLBYTES;
 		} else {
 			n = m_get(how, m->m_type);
 			nsize = MLEN;
 		}
 		if (n == NULL)
 			goto nospace;
 
 		if (top == NULL) {		/* First one, must be PKTHDR */
 			if (!m_dup_pkthdr(n, m, how)) {
 				m_free(n);
 				goto nospace;
 			}
 			if ((n->m_flags & M_EXT) == 0)
 				nsize = MHLEN;
 			n->m_flags &= ~M_RDONLY;
 		}
 		n->m_len = 0;
 
 		/* Link it into the new chain */
 		*p = n;
 		p = &n->m_next;
 
 		/* Copy data from original mbuf(s) into new mbuf */
 		while (n->m_len < nsize && m != NULL) {
 			int chunk = min(nsize - n->m_len, m->m_len - moff);
 
 			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
 			moff += chunk;
 			n->m_len += chunk;
 			remain -= chunk;
 			if (moff == m->m_len) {
 				m = m->m_next;
 				moff = 0;
 			}
 		}
 
 		/* Check correct total mbuf length */
 		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
 		    	("%s: bogus m_pkthdr.len", __func__));
 	}
 	return (top);
 
 nospace:
 	m_freem(top);
 	return (NULL);
 }
 
 /*
  * Concatenate mbuf chain n to m.
  * Both chains must be of the same type (e.g. MT_DATA).
  * Any m_pkthdr is not updated.
  */
 void
 m_cat(struct mbuf *m, struct mbuf *n)
 {
 	while (m->m_next)
 		m = m->m_next;
 	while (n) {
 		if (!M_WRITABLE(m) ||
 		    M_TRAILINGSPACE(m) < n->m_len) {
 			/* just join the two chains */
 			m->m_next = n;
 			return;
 		}
 		/* splat the data from one into the other */
 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
 		    (u_int)n->m_len);
 		m->m_len += n->m_len;
 		n = m_free(n);
 	}
 }
 
 /*
  * Concatenate two pkthdr mbuf chains.
  */
 void
 m_catpkt(struct mbuf *m, struct mbuf *n)
 {
 
 	M_ASSERTPKTHDR(m);
 	M_ASSERTPKTHDR(n);
 
 	m->m_pkthdr.len += n->m_pkthdr.len;
 	m_demote(n, 1, 0);
 
 	m_cat(m, n);
 }
 
 void
 m_adj(struct mbuf *mp, int req_len)
 {
 	int len = req_len;
 	struct mbuf *m;
 	int count;
 
 	if ((m = mp) == NULL)
 		return;
 	if (len >= 0) {
 		/*
 		 * Trim from head.
 		 */
 		while (m != NULL && len > 0) {
 			if (m->m_len <= len) {
 				len -= m->m_len;
 				m->m_len = 0;
 				m = m->m_next;
 			} else {
 				m->m_len -= len;
 				m->m_data += len;
 				len = 0;
 			}
 		}
 		if (mp->m_flags & M_PKTHDR)
 			mp->m_pkthdr.len -= (req_len - len);
 	} else {
 		/*
 		 * Trim from tail.  Scan the mbuf chain,
 		 * calculating its length and finding the last mbuf.
 		 * If the adjustment only affects this mbuf, then just
 		 * adjust and return.  Otherwise, rescan and truncate
 		 * after the remaining size.
 		 */
 		len = -len;
 		count = 0;
 		for (;;) {
 			count += m->m_len;
 			if (m->m_next == (struct mbuf *)0)
 				break;
 			m = m->m_next;
 		}
 		if (m->m_len >= len) {
 			m->m_len -= len;
 			if (mp->m_flags & M_PKTHDR)
 				mp->m_pkthdr.len -= len;
 			return;
 		}
 		count -= len;
 		if (count < 0)
 			count = 0;
 		/*
 		 * Correct length for chain is "count".
 		 * Find the mbuf with last data, adjust its length,
 		 * and toss data from remaining mbufs on chain.
 		 */
 		m = mp;
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len = count;
 		for (; m; m = m->m_next) {
 			if (m->m_len >= count) {
 				m->m_len = count;
 				if (m->m_next != NULL) {
 					m_freem(m->m_next);
 					m->m_next = NULL;
 				}
 				break;
 			}
 			count -= m->m_len;
 		}
 	}
 }
 
 /*
  * Rearange an mbuf chain so that len bytes are contiguous
  * and in the data area of an mbuf (so that mtod will work
  * for a structure of size len).  Returns the resulting
  * mbuf chain on success, frees it and returns null on failure.
  * If there is room, it will add up to max_protohdr-len extra bytes to the
  * contiguous region in an attempt to avoid being called next time.
  */
 struct mbuf *
 m_pullup(struct mbuf *n, int len)
 {
 	struct mbuf *m;
 	int count;
 	int space;
 
 	/*
 	 * If first mbuf has no cluster, and has room for len bytes
 	 * without shifting current data, pullup into it,
 	 * otherwise allocate a new mbuf to prepend to the chain.
 	 */
 	if ((n->m_flags & M_EXT) == 0 &&
 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
 		if (n->m_len >= len)
 			return (n);
 		m = n;
 		n = n->m_next;
 		len -= m->m_len;
 	} else {
 		if (len > MHLEN)
 			goto bad;
 		m = m_get(M_NOWAIT, n->m_type);
 		if (m == NULL)
 			goto bad;
 		if (n->m_flags & M_PKTHDR)
 			m_move_pkthdr(m, n);
 	}
 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
 	do {
 		count = min(min(max(len, max_protohdr), space), n->m_len);
 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
 		  (u_int)count);
 		len -= count;
 		m->m_len += count;
 		n->m_len -= count;
 		space -= count;
 		if (n->m_len)
 			n->m_data += count;
 		else
 			n = m_free(n);
 	} while (len > 0 && n);
 	if (len > 0) {
 		(void) m_free(m);
 		goto bad;
 	}
 	m->m_next = n;
 	return (m);
 bad:
 	m_freem(n);
 	return (NULL);
 }
 
 /*
  * Like m_pullup(), except a new mbuf is always allocated, and we allow
  * the amount of empty space before the data in the new mbuf to be specified
  * (in the event that the caller expects to prepend later).
  */
 struct mbuf *
 m_copyup(struct mbuf *n, int len, int dstoff)
 {
 	struct mbuf *m;
 	int count, space;
 
 	if (len > (MHLEN - dstoff))
 		goto bad;
 	m = m_get(M_NOWAIT, n->m_type);
 	if (m == NULL)
 		goto bad;
 	if (n->m_flags & M_PKTHDR)
 		m_move_pkthdr(m, n);
 	m->m_data += dstoff;
 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
 	do {
 		count = min(min(max(len, max_protohdr), space), n->m_len);
 		memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
 		    (unsigned)count);
 		len -= count;
 		m->m_len += count;
 		n->m_len -= count;
 		space -= count;
 		if (n->m_len)
 			n->m_data += count;
 		else
 			n = m_free(n);
 	} while (len > 0 && n);
 	if (len > 0) {
 		(void) m_free(m);
 		goto bad;
 	}
 	m->m_next = n;
 	return (m);
  bad:
 	m_freem(n);
 	return (NULL);
 }
 
 /*
  * Partition an mbuf chain in two pieces, returning the tail --
  * all but the first len0 bytes.  In case of failure, it returns NULL and
  * attempts to restore the chain to its original state.
  *
  * Note that the resulting mbufs might be read-only, because the new
  * mbuf can end up sharing an mbuf cluster with the original mbuf if
  * the "breaking point" happens to lie within a cluster mbuf. Use the
  * M_WRITABLE() macro to check for this case.
  */
 struct mbuf *
 m_split(struct mbuf *m0, int len0, int wait)
 {
 	struct mbuf *m, *n;
 	u_int len = len0, remain;
 
 	MBUF_CHECKSLEEP(wait);
 	for (m = m0; m && len > m->m_len; m = m->m_next)
 		len -= m->m_len;
 	if (m == NULL)
 		return (NULL);
 	remain = m->m_len - len;
 	if (m0->m_flags & M_PKTHDR && remain == 0) {
 		n = m_gethdr(wait, m0->m_type);
 		if (n == NULL)
 			return (NULL);
 		n->m_next = m->m_next;
 		m->m_next = NULL;
 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
 		m0->m_pkthdr.len = len0;
 		return (n);
 	} else if (m0->m_flags & M_PKTHDR) {
 		n = m_gethdr(wait, m0->m_type);
 		if (n == NULL)
 			return (NULL);
 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
 		m0->m_pkthdr.len = len0;
 		if (m->m_flags & M_EXT)
 			goto extpacket;
 		if (remain > MHLEN) {
 			/* m can't be the lead packet */
 			M_ALIGN(n, 0);
 			n->m_next = m_split(m, len, wait);
 			if (n->m_next == NULL) {
 				(void) m_free(n);
 				return (NULL);
 			} else {
 				n->m_len = 0;
 				return (n);
 			}
 		} else
 			M_ALIGN(n, remain);
 	} else if (remain == 0) {
 		n = m->m_next;
 		m->m_next = NULL;
 		return (n);
 	} else {
 		n = m_get(wait, m->m_type);
 		if (n == NULL)
 			return (NULL);
 		M_ALIGN(n, remain);
 	}
 extpacket:
 	if (m->m_flags & M_EXT) {
 		n->m_data = m->m_data + len;
 		mb_dupcl(n, m);
 	} else {
 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
 	}
 	n->m_len = remain;
 	m->m_len = len;
 	n->m_next = m->m_next;
 	m->m_next = NULL;
 	return (n);
 }
 /*
  * Routine to copy from device local memory into mbufs.
  * Note that `off' argument is offset into first mbuf of target chain from
  * which to begin copying the data to.
  */
 struct mbuf *
 m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
     void (*copy)(char *from, caddr_t to, u_int len))
 {
 	struct mbuf *m;
 	struct mbuf *top = NULL, **mp = &top;
 	int len;
 
 	if (off < 0 || off > MHLEN)
 		return (NULL);
 
 	while (totlen > 0) {
 		if (top == NULL) {	/* First one, must be PKTHDR */
 			if (totlen + off >= MINCLSIZE) {
 				m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 				len = MCLBYTES;
 			} else {
 				m = m_gethdr(M_NOWAIT, MT_DATA);
 				len = MHLEN;
 
 				/* Place initial small packet/header at end of mbuf */
 				if (m && totlen + off + max_linkhdr <= MLEN) {
 					m->m_data += max_linkhdr;
 					len -= max_linkhdr;
 				}
 			}
 			if (m == NULL)
 				return NULL;
 			m->m_pkthdr.rcvif = ifp;
 			m->m_pkthdr.len = totlen;
 		} else {
 			if (totlen + off >= MINCLSIZE) {
 				m = m_getcl(M_NOWAIT, MT_DATA, 0);
 				len = MCLBYTES;
 			} else {
 				m = m_get(M_NOWAIT, MT_DATA);
 				len = MLEN;
 			}
 			if (m == NULL) {
 				m_freem(top);
 				return NULL;
 			}
 		}
 		if (off) {
 			m->m_data += off;
 			len -= off;
 			off = 0;
 		}
 		m->m_len = len = min(totlen, len);
 		if (copy)
 			copy(buf, mtod(m, caddr_t), (u_int)len);
 		else
 			bcopy(buf, mtod(m, caddr_t), (u_int)len);
 		buf += len;
 		*mp = m;
 		mp = &m->m_next;
 		totlen -= len;
 	}
 	return (top);
 }
 
 /*
  * Copy data from a buffer back into the indicated mbuf chain,
  * starting "off" bytes from the beginning, extending the mbuf
  * chain if necessary.
  */
 void
 m_copyback(struct mbuf *m0, int off, int len, c_caddr_t cp)
 {
 	int mlen;
 	struct mbuf *m = m0, *n;
 	int totlen = 0;
 
 	if (m0 == NULL)
 		return;
 	while (off > (mlen = m->m_len)) {
 		off -= mlen;
 		totlen += mlen;
 		if (m->m_next == NULL) {
 			n = m_get(M_NOWAIT, m->m_type);
 			if (n == NULL)
 				goto out;
 			bzero(mtod(n, caddr_t), MLEN);
 			n->m_len = min(MLEN, len + off);
 			m->m_next = n;
 		}
 		m = m->m_next;
 	}
 	while (len > 0) {
 		if (m->m_next == NULL && (len > m->m_len - off)) {
 			m->m_len += min(len - (m->m_len - off),
 			    M_TRAILINGSPACE(m));
 		}
 		mlen = min (m->m_len - off, len);
 		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
 		cp += mlen;
 		len -= mlen;
 		mlen += off;
 		off = 0;
 		totlen += mlen;
 		if (len == 0)
 			break;
 		if (m->m_next == NULL) {
 			n = m_get(M_NOWAIT, m->m_type);
 			if (n == NULL)
 				break;
 			n->m_len = min(MLEN, len);
 			m->m_next = n;
 		}
 		m = m->m_next;
 	}
 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
 		m->m_pkthdr.len = totlen;
 }
 
 /*
  * Append the specified data to the indicated mbuf chain,
  * Extend the mbuf chain if the new data does not fit in
  * existing space.
  *
  * Return 1 if able to complete the job; otherwise 0.
  */
 int
 m_append(struct mbuf *m0, int len, c_caddr_t cp)
 {
 	struct mbuf *m, *n;
 	int remainder, space;
 
 	for (m = m0; m->m_next != NULL; m = m->m_next)
 		;
 	remainder = len;
 	space = M_TRAILINGSPACE(m);
 	if (space > 0) {
 		/*
 		 * Copy into available space.
 		 */
 		if (space > remainder)
 			space = remainder;
 		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
 		m->m_len += space;
 		cp += space, remainder -= space;
 	}
 	while (remainder > 0) {
 		/*
 		 * Allocate a new mbuf; could check space
 		 * and allocate a cluster instead.
 		 */
 		n = m_get(M_NOWAIT, m->m_type);
 		if (n == NULL)
 			break;
 		n->m_len = min(MLEN, remainder);
 		bcopy(cp, mtod(n, caddr_t), n->m_len);
 		cp += n->m_len, remainder -= n->m_len;
 		m->m_next = n;
 		m = n;
 	}
 	if (m0->m_flags & M_PKTHDR)
 		m0->m_pkthdr.len += len - remainder;
 	return (remainder == 0);
 }
 
 /*
  * Apply function f to the data in an mbuf chain starting "off" bytes from
  * the beginning, continuing for "len" bytes.
  */
 int
 m_apply(struct mbuf *m, int off, int len,
     int (*f)(void *, void *, u_int), void *arg)
 {
 	u_int count;
 	int rval;
 
 	KASSERT(off >= 0, ("m_apply, negative off %d", off));
 	KASSERT(len >= 0, ("m_apply, negative len %d", len));
 	while (off > 0) {
 		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	while (len > 0) {
 		KASSERT(m != NULL, ("m_apply, offset > size of mbuf chain"));
 		count = min(m->m_len - off, len);
 		rval = (*f)(arg, mtod(m, caddr_t) + off, count);
 		if (rval)
 			return (rval);
 		len -= count;
 		off = 0;
 		m = m->m_next;
 	}
 	return (0);
 }
 
 /*
  * Return a pointer to mbuf/offset of location in mbuf chain.
  */
 struct mbuf *
 m_getptr(struct mbuf *m, int loc, int *off)
 {
 
 	while (loc >= 0) {
 		/* Normal end of search. */
 		if (m->m_len > loc) {
 			*off = loc;
 			return (m);
 		} else {
 			loc -= m->m_len;
 			if (m->m_next == NULL) {
 				if (loc == 0) {
 					/* Point at the end of valid data. */
 					*off = m->m_len;
 					return (m);
 				}
 				return (NULL);
 			}
 			m = m->m_next;
 		}
 	}
 	return (NULL);
 }
 
 void
 m_print(const struct mbuf *m, int maxlen)
 {
 	int len;
 	int pdata;
 	const struct mbuf *m2;
 
 	if (m == NULL) {
 		printf("mbuf: %p\n", m);
 		return;
 	}
 
 	if (m->m_flags & M_PKTHDR)
 		len = m->m_pkthdr.len;
 	else
 		len = -1;
 	m2 = m;
 	while (m2 != NULL && (len == -1 || len)) {
 		pdata = m2->m_len;
 		if (maxlen != -1 && pdata > maxlen)
 			pdata = maxlen;
 		printf("mbuf: %p len: %d, next: %p, %b%s", m2, m2->m_len,
 		    m2->m_next, m2->m_flags, "\20\20freelist\17skipfw"
 		    "\11proto5\10proto4\7proto3\6proto2\5proto1\4rdonly"
 		    "\3eor\2pkthdr\1ext", pdata ? "" : "\n");
 		if (pdata)
 			printf(", %*D\n", pdata, (u_char *)m2->m_data, "-");
 		if (len != -1)
 			len -= m2->m_len;
 		m2 = m2->m_next;
 	}
 	if (len > 0)
 		printf("%d bytes unaccounted for.\n", len);
 	return;
 }
 
 u_int
 m_fixhdr(struct mbuf *m0)
 {
 	u_int len;
 
 	len = m_length(m0, NULL);
 	m0->m_pkthdr.len = len;
 	return (len);
 }
 
 u_int
 m_length(struct mbuf *m0, struct mbuf **last)
 {
 	struct mbuf *m;
 	u_int len;
 
 	len = 0;
 	for (m = m0; m != NULL; m = m->m_next) {
 		len += m->m_len;
 		if (m->m_next == NULL)
 			break;
 	}
 	if (last != NULL)
 		*last = m;
 	return (len);
 }
 
 /*
  * Defragment a mbuf chain, returning the shortest possible
  * chain of mbufs and clusters.  If allocation fails and
  * this cannot be completed, NULL will be returned, but
  * the passed in chain will be unchanged.  Upon success,
  * the original chain will be freed, and the new chain
  * will be returned.
  *
  * If a non-packet header is passed in, the original
  * mbuf (chain?) will be returned unharmed.
  */
 struct mbuf *
 m_defrag(struct mbuf *m0, int how)
 {
 	struct mbuf *m_new = NULL, *m_final = NULL;
 	int progress = 0, length;
 
 	MBUF_CHECKSLEEP(how);
 	if (!(m0->m_flags & M_PKTHDR))
 		return (m0);
 
 	m_fixhdr(m0); /* Needed sanity check */
 
 #ifdef MBUF_STRESS_TEST
 	if (m_defragrandomfailures) {
 		int temp = arc4random() & 0xff;
 		if (temp == 0xba)
 			goto nospace;
 	}
 #endif
 
 	if (m0->m_pkthdr.len > MHLEN)
 		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
 	else
 		m_final = m_gethdr(how, MT_DATA);
 
 	if (m_final == NULL)
 		goto nospace;
 
 	if (m_dup_pkthdr(m_final, m0, how) == 0)
 		goto nospace;
 
 	m_new = m_final;
 
 	while (progress < m0->m_pkthdr.len) {
 		length = m0->m_pkthdr.len - progress;
 		if (length > MCLBYTES)
 			length = MCLBYTES;
 
 		if (m_new == NULL) {
 			if (length > MLEN)
 				m_new = m_getcl(how, MT_DATA, 0);
 			else
 				m_new = m_get(how, MT_DATA);
 			if (m_new == NULL)
 				goto nospace;
 		}
 
 		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
 		progress += length;
 		m_new->m_len = length;
 		if (m_new != m_final)
 			m_cat(m_final, m_new);
 		m_new = NULL;
 	}
 #ifdef MBUF_STRESS_TEST
 	if (m0->m_next == NULL)
 		m_defraguseless++;
 #endif
 	m_freem(m0);
 	m0 = m_final;
 #ifdef MBUF_STRESS_TEST
 	m_defragpackets++;
 	m_defragbytes += m0->m_pkthdr.len;
 #endif
 	return (m0);
 nospace:
 #ifdef MBUF_STRESS_TEST
 	m_defragfailure++;
 #endif
 	if (m_final)
 		m_freem(m_final);
 	return (NULL);
 }
 
 /*
  * Defragment an mbuf chain, returning at most maxfrags separate
  * mbufs+clusters.  If this is not possible NULL is returned and
  * the original mbuf chain is left in it's present (potentially
  * modified) state.  We use two techniques: collapsing consecutive
  * mbufs and replacing consecutive mbufs by a cluster.
  *
  * NB: this should really be named m_defrag but that name is taken
  */
 struct mbuf *
 m_collapse(struct mbuf *m0, int how, int maxfrags)
 {
 	struct mbuf *m, *n, *n2, **prev;
 	u_int curfrags;
 
 	/*
 	 * Calculate the current number of frags.
 	 */
 	curfrags = 0;
 	for (m = m0; m != NULL; m = m->m_next)
 		curfrags++;
 	/*
 	 * First, try to collapse mbufs.  Note that we always collapse
 	 * towards the front so we don't need to deal with moving the
 	 * pkthdr.  This may be suboptimal if the first mbuf has much
 	 * less data than the following.
 	 */
 	m = m0;
 again:
 	for (;;) {
 		n = m->m_next;
 		if (n == NULL)
 			break;
 		if (M_WRITABLE(m) &&
 		    n->m_len < M_TRAILINGSPACE(m)) {
 			bcopy(mtod(n, void *), mtod(m, char *) + m->m_len,
 				n->m_len);
 			m->m_len += n->m_len;
 			m->m_next = n->m_next;
 			m_free(n);
 			if (--curfrags <= maxfrags)
 				return m0;
 		} else
 			m = n;
 	}
 	KASSERT(maxfrags > 1,
 		("maxfrags %u, but normal collapse failed", maxfrags));
 	/*
 	 * Collapse consecutive mbufs to a cluster.
 	 */
 	prev = &m0->m_next;		/* NB: not the first mbuf */
 	while ((n = *prev) != NULL) {
 		if ((n2 = n->m_next) != NULL &&
 		    n->m_len + n2->m_len < MCLBYTES) {
 			m = m_getcl(how, MT_DATA, 0);
 			if (m == NULL)
 				goto bad;
 			bcopy(mtod(n, void *), mtod(m, void *), n->m_len);
 			bcopy(mtod(n2, void *), mtod(m, char *) + n->m_len,
 				n2->m_len);
 			m->m_len = n->m_len + n2->m_len;
 			m->m_next = n2->m_next;
 			*prev = m;
 			m_free(n);
 			m_free(n2);
 			if (--curfrags <= maxfrags)	/* +1 cl -2 mbufs */
 				return m0;
 			/*
 			 * Still not there, try the normal collapse
 			 * again before we allocate another cluster.
 			 */
 			goto again;
 		}
 		prev = &n->m_next;
 	}
 	/*
 	 * No place where we can collapse to a cluster; punt.
 	 * This can occur if, for example, you request 2 frags
 	 * but the packet requires that both be clusters (we
 	 * never reallocate the first mbuf to avoid moving the
 	 * packet header).
 	 */
 bad:
 	return NULL;
 }
 
 #ifdef MBUF_STRESS_TEST
 
 /*
  * Fragment an mbuf chain.  There's no reason you'd ever want to do
  * this in normal usage, but it's great for stress testing various
  * mbuf consumers.
  *
  * If fragmentation is not possible, the original chain will be
  * returned.
  *
  * Possible length values:
  * 0	 no fragmentation will occur
  * > 0	each fragment will be of the specified length
  * -1	each fragment will be the same random value in length
  * -2	each fragment's length will be entirely random
  * (Random values range from 1 to 256)
  */
 struct mbuf *
 m_fragment(struct mbuf *m0, int how, int length)
 {
 	struct mbuf *m_new = NULL, *m_final = NULL;
 	int progress = 0;
 
 	if (!(m0->m_flags & M_PKTHDR))
 		return (m0);
 
 	if ((length == 0) || (length < -2))
 		return (m0);
 
 	m_fixhdr(m0); /* Needed sanity check */
 
 	m_final = m_getcl(how, MT_DATA, M_PKTHDR);
 
 	if (m_final == NULL)
 		goto nospace;
 
 	if (m_dup_pkthdr(m_final, m0, how) == 0)
 		goto nospace;
 
 	m_new = m_final;
 
 	if (length == -1)
 		length = 1 + (arc4random() & 255);
 
 	while (progress < m0->m_pkthdr.len) {
 		int fraglen;
 
 		if (length > 0)
 			fraglen = length;
 		else
 			fraglen = 1 + (arc4random() & 255);
 		if (fraglen > m0->m_pkthdr.len - progress)
 			fraglen = m0->m_pkthdr.len - progress;
 
 		if (fraglen > MCLBYTES)
 			fraglen = MCLBYTES;
 
 		if (m_new == NULL) {
 			m_new = m_getcl(how, MT_DATA, 0);
 			if (m_new == NULL)
 				goto nospace;
 		}
 
 		m_copydata(m0, progress, fraglen, mtod(m_new, caddr_t));
 		progress += fraglen;
 		m_new->m_len = fraglen;
 		if (m_new != m_final)
 			m_cat(m_final, m_new);
 		m_new = NULL;
 	}
 	m_freem(m0);
 	m0 = m_final;
 	return (m0);
 nospace:
 	if (m_final)
 		m_freem(m_final);
 	/* Return the original chain on failure */
 	return (m0);
 }
 
 #endif
 
 /*
  * Copy the contents of uio into a properly sized mbuf chain.
  */
 struct mbuf *
 m_uiotombuf(struct uio *uio, int how, int len, int align, int flags)
 {
 	struct mbuf *m, *mb;
 	int error, length;
 	ssize_t total;
 	int progress = 0;
 
 	/*
 	 * len can be zero or an arbitrary large value bound by
 	 * the total data supplied by the uio.
 	 */
 	if (len > 0)
 		total = min(uio->uio_resid, len);
 	else
 		total = uio->uio_resid;
 
 	/*
 	 * The smallest unit returned by m_getm2() is a single mbuf
 	 * with pkthdr.  We can't align past it.
 	 */
 	if (align >= MHLEN)
 		return (NULL);
 
 	/*
 	 * Give us the full allocation or nothing.
 	 * If len is zero return the smallest empty mbuf.
 	 */
 	m = m_getm2(NULL, max(total + align, 1), how, MT_DATA, flags);
 	if (m == NULL)
 		return (NULL);
 	m->m_data += align;
 
 	/* Fill all mbufs with uio data and update header information. */
 	for (mb = m; mb != NULL; mb = mb->m_next) {
 		length = min(M_TRAILINGSPACE(mb), total - progress);
 
 		error = uiomove(mtod(mb, void *), length, uio);
 		if (error) {
 			m_freem(m);
 			return (NULL);
 		}
 
 		mb->m_len = length;
 		progress += length;
 		if (flags & M_PKTHDR)
 			m->m_pkthdr.len += length;
 	}
 	KASSERT(progress == total, ("%s: progress != total", __func__));
 
 	return (m);
 }
 
 /*
  * Copy an mbuf chain into a uio limited by len if set.
  */
 int
 m_mbuftouio(struct uio *uio, struct mbuf *m, int len)
 {
 	int error, length, total;
 	int progress = 0;
 
 	if (len > 0)
 		total = min(uio->uio_resid, len);
 	else
 		total = uio->uio_resid;
 
 	/* Fill the uio with data from the mbufs. */
 	for (; m != NULL; m = m->m_next) {
 		length = min(m->m_len, total - progress);
 
 		error = uiomove(mtod(m, void *), length, uio);
 		if (error)
 			return (error);
 
 		progress += length;
 	}
 
 	return (0);
 }
 
 /*
  * Create a writable copy of the mbuf chain.  While doing this
  * we compact the chain with a goal of producing a chain with
  * at most two mbufs.  The second mbuf in this chain is likely
  * to be a cluster.  The primary purpose of this work is to create
  * a writable packet for encryption, compression, etc.  The
  * secondary goal is to linearize the data so the data can be
  * passed to crypto hardware in the most efficient manner possible.
  */
 struct mbuf *
 m_unshare(struct mbuf *m0, int how)
 {
 	struct mbuf *m, *mprev;
 	struct mbuf *n, *mfirst, *mlast;
 	int len, off;
 
 	mprev = NULL;
 	for (m = m0; m != NULL; m = mprev->m_next) {
 		/*
 		 * Regular mbufs are ignored unless there's a cluster
 		 * in front of it that we can use to coalesce.  We do
 		 * the latter mainly so later clusters can be coalesced
 		 * also w/o having to handle them specially (i.e. convert
 		 * mbuf+cluster -> cluster).  This optimization is heavily
 		 * influenced by the assumption that we're running over
 		 * Ethernet where MCLBYTES is large enough that the max
 		 * packet size will permit lots of coalescing into a
 		 * single cluster.  This in turn permits efficient
 		 * crypto operations, especially when using hardware.
 		 */
 		if ((m->m_flags & M_EXT) == 0) {
 			if (mprev && (mprev->m_flags & M_EXT) &&
 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
 				/* XXX: this ignores mbuf types */
 				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
 				    mtod(m, caddr_t), m->m_len);
 				mprev->m_len += m->m_len;
 				mprev->m_next = m->m_next;	/* unlink from chain */
 				m_free(m);			/* reclaim mbuf */
 #if 0
 				newipsecstat.ips_mbcoalesced++;
 #endif
 			} else {
 				mprev = m;
 			}
 			continue;
 		}
 		/*
 		 * Writable mbufs are left alone (for now).
 		 */
 		if (M_WRITABLE(m)) {
 			mprev = m;
 			continue;
 		}
 
 		/*
 		 * Not writable, replace with a copy or coalesce with
 		 * the previous mbuf if possible (since we have to copy
 		 * it anyway, we try to reduce the number of mbufs and
 		 * clusters so that future work is easier).
 		 */
 		KASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags));
 		/* NB: we only coalesce into a cluster or larger */
 		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
 		    m->m_len <= M_TRAILINGSPACE(mprev)) {
 			/* XXX: this ignores mbuf types */
 			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
 			    mtod(m, caddr_t), m->m_len);
 			mprev->m_len += m->m_len;
 			mprev->m_next = m->m_next;	/* unlink from chain */
 			m_free(m);			/* reclaim mbuf */
 #if 0
 			newipsecstat.ips_clcoalesced++;
 #endif
 			continue;
 		}
 
 		/*
 		 * Allocate new space to hold the copy and copy the data.
 		 * We deal with jumbo mbufs (i.e. m_len > MCLBYTES) by
 		 * splitting them into clusters.  We could just malloc a
 		 * buffer and make it external but too many device drivers
 		 * don't know how to break up the non-contiguous memory when
 		 * doing DMA.
 		 */
 		n = m_getcl(how, m->m_type, m->m_flags);
 		if (n == NULL) {
 			m_freem(m0);
 			return (NULL);
 		}
 		if (m->m_flags & M_PKTHDR) {
 			KASSERT(mprev == NULL, ("%s: m0 %p, m %p has M_PKTHDR",
 			    __func__, m0, m));
 			m_move_pkthdr(n, m);
 		}
 		len = m->m_len;
 		off = 0;
 		mfirst = n;
 		mlast = NULL;
 		for (;;) {
 			int cc = min(len, MCLBYTES);
 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
 			n->m_len = cc;
 			if (mlast != NULL)
 				mlast->m_next = n;
 			mlast = n;
 #if 0
 			newipsecstat.ips_clcopied++;
 #endif
 
 			len -= cc;
 			if (len <= 0)
 				break;
 			off += cc;
 
 			n = m_getcl(how, m->m_type, m->m_flags);
 			if (n == NULL) {
 				m_freem(mfirst);
 				m_freem(m0);
 				return (NULL);
 			}
 		}
 		n->m_next = m->m_next;
 		if (mprev == NULL)
 			m0 = mfirst;		/* new head of chain */
 		else
 			mprev->m_next = mfirst;	/* replace old mbuf */
 		m_free(m);			/* release old mbuf */
 		mprev = mfirst;
 	}
 	return (m0);
 }
 
 #ifdef MBUF_PROFILING
 
 #define MP_BUCKETS 32 /* don't just change this as things may overflow.*/
 struct mbufprofile {
 	uintmax_t wasted[MP_BUCKETS];
 	uintmax_t used[MP_BUCKETS];
 	uintmax_t segments[MP_BUCKETS];
 } mbprof;
 
 #define MP_MAXDIGITS 21	/* strlen("16,000,000,000,000,000,000") == 21 */
 #define MP_NUMLINES 6
 #define MP_NUMSPERLINE 16
 #define MP_EXTRABYTES 64	/* > strlen("used:\nwasted:\nsegments:\n") */
 /* work out max space needed and add a bit of spare space too */
 #define MP_MAXLINE ((MP_MAXDIGITS+1) * MP_NUMSPERLINE)
 #define MP_BUFSIZE ((MP_MAXLINE * MP_NUMLINES) + 1 + MP_EXTRABYTES)
 
 char mbprofbuf[MP_BUFSIZE];
 
 void
 m_profile(struct mbuf *m)
 {
 	int segments = 0;
 	int used = 0;
 	int wasted = 0;
 
 	while (m) {
 		segments++;
 		used += m->m_len;
 		if (m->m_flags & M_EXT) {
 			wasted += MHLEN - sizeof(m->m_ext) +
 			    m->m_ext.ext_size - m->m_len;
 		} else {
 			if (m->m_flags & M_PKTHDR)
 				wasted += MHLEN - m->m_len;
 			else
 				wasted += MLEN - m->m_len;
 		}
 		m = m->m_next;
 	}
 	/* be paranoid.. it helps */
 	if (segments > MP_BUCKETS - 1)
 		segments = MP_BUCKETS - 1;
 	if (used > 100000)
 		used = 100000;
 	if (wasted > 100000)
 		wasted = 100000;
 	/* store in the appropriate bucket */
 	/* don't bother locking. if it's slightly off, so what? */
 	mbprof.segments[segments]++;
 	mbprof.used[fls(used)]++;
 	mbprof.wasted[fls(wasted)]++;
 }
 
 static void
 mbprof_textify(void)
 {
 	int offset;
 	char *c;
 	uint64_t *p;
 
 	p = &mbprof.wasted[0];
 	c = mbprofbuf;
 	offset = snprintf(c, MP_MAXLINE + 10,
 	    "wasted:\n"
 	    "%ju %ju %ju %ju %ju %ju %ju %ju "
 	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
 	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
 	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
 #ifdef BIG_ARRAY
 	p = &mbprof.wasted[16];
 	c += offset;
 	offset = snprintf(c, MP_MAXLINE,
 	    "%ju %ju %ju %ju %ju %ju %ju %ju "
 	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
 	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
 	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
 #endif
 	p = &mbprof.used[0];
 	c += offset;
 	offset = snprintf(c, MP_MAXLINE + 10,
 	    "used:\n"
 	    "%ju %ju %ju %ju %ju %ju %ju %ju "
 	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
 	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
 	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
 #ifdef BIG_ARRAY
 	p = &mbprof.used[16];
 	c += offset;
 	offset = snprintf(c, MP_MAXLINE,
 	    "%ju %ju %ju %ju %ju %ju %ju %ju "
 	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
 	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
 	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
 #endif
 	p = &mbprof.segments[0];
 	c += offset;
 	offset = snprintf(c, MP_MAXLINE + 10,
 	    "segments:\n"
 	    "%ju %ju %ju %ju %ju %ju %ju %ju "
 	    "%ju %ju %ju %ju %ju %ju %ju %ju\n",
 	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
 	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
 #ifdef BIG_ARRAY
 	p = &mbprof.segments[16];
 	c += offset;
 	offset = snprintf(c, MP_MAXLINE,
 	    "%ju %ju %ju %ju %ju %ju %ju %ju "
 	    "%ju %ju %ju %ju %ju %ju %ju %jju",
 	    p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7],
 	    p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
 #endif
 }
 
 static int
 mbprof_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	mbprof_textify();
 	error = SYSCTL_OUT(req, mbprofbuf, strlen(mbprofbuf) + 1);
 	return (error);
 }
 
 static int
 mbprof_clr_handler(SYSCTL_HANDLER_ARGS)
 {
 	int clear, error;
 
 	clear = 0;
 	error = sysctl_handle_int(oidp, &clear, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	if (clear) {
 		bzero(&mbprof, sizeof(mbprof));
 	}
 
 	return (error);
 }
 
 
 SYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofile, CTLTYPE_STRING|CTLFLAG_RD,
 	    NULL, 0, mbprof_handler, "A", "mbuf profiling statistics");
 
 SYSCTL_PROC(_kern_ipc, OID_AUTO, mbufprofileclr, CTLTYPE_INT|CTLFLAG_RW,
 	    NULL, 0, mbprof_clr_handler, "I", "clear mbuf profiling statistics");
 #endif
 
Index: projects/clang380-import/sys/kern/uipc_sockbuf.c
===================================================================
--- projects/clang380-import/sys/kern/uipc_sockbuf.c	(revision 293686)
+++ projects/clang380-import/sys/kern/uipc_sockbuf.c	(revision 293687)
@@ -1,1314 +1,1331 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_param.h"
 
 #include <sys/param.h>
 #include <sys/aio.h> /* for aio_swake proto */
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 /*
  * Function pointer set by the AIO routines so that the socket buffer code
  * can call back into the AIO module if it is loaded.
  */
 void	(*aio_swake)(struct socket *, struct sockbuf *);
 
 /*
  * Primitive routines for operating on socket buffers
  */
 
 u_long	sb_max = SB_MAX;
 u_long sb_max_adj =
        (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
 
 static	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
 
 static struct mbuf	*sbcut_internal(struct sockbuf *sb, int len);
 static void	sbflush_internal(struct sockbuf *sb);
 
 /*
+ * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY.
+ */
+static void
+sbm_clrprotoflags(struct mbuf *m, int flags)
+{
+	int mask;
+
+	mask = ~M_PROTOFLAGS;
+	if (flags & PRUS_NOTREADY)
+		mask |= M_NOTREADY;
+	while (m) {
+		m->m_flags &= mask;
+		m = m->m_next;
+	}
+}
+
+/*
  * Mark ready "count" mbufs starting with "m".
  */
 int
 sbready(struct sockbuf *sb, struct mbuf *m, int count)
 {
 	u_int blocker;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
 
 	blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
 
 	for (int i = 0; i < count; i++, m = m->m_next) {
 		KASSERT(m->m_flags & M_NOTREADY,
 		    ("%s: m %p !M_NOTREADY", __func__, m));
 		m->m_flags &= ~(M_NOTREADY | blocker);
 		if (blocker)
 			sb->sb_acc += m->m_len;
 	}
 
 	if (!blocker)
 		return (EINPROGRESS);
 
 	/* This one was blocking all the queue. */
 	for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
 		KASSERT(m->m_flags & M_BLOCKED,
 		    ("%s: m %p !M_BLOCKED", __func__, m));
 		m->m_flags &= ~M_BLOCKED;
 		sb->sb_acc += m->m_len;
 	}
 
 	sb->sb_fnrdy = m;
 
 	return (0);
 }
 
 /*
  * Adjust sockbuf state reflecting allocation of m.
  */
 void
 sballoc(struct sockbuf *sb, struct mbuf *m)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_ccc += m->m_len;
 
 	if (sb->sb_fnrdy == NULL) {
 		if (m->m_flags & M_NOTREADY)
 			sb->sb_fnrdy = m;
 		else
 			sb->sb_acc += m->m_len;
 	} else
 		m->m_flags |= M_BLOCKED;
 
 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 		sb->sb_ctl += m->m_len;
 
 	sb->sb_mbcnt += MSIZE;
 	sb->sb_mcnt += 1;
 
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt += m->m_ext.ext_size;
 		sb->sb_ccnt += 1;
 	}
 }
 
 /*
  * Adjust sockbuf state reflecting freeing of m.
  */
 void
 sbfree(struct sockbuf *sb, struct mbuf *m)
 {
 
 #if 0	/* XXX: not yet: soclose() call path comes here w/o lock. */
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	sb->sb_ccc -= m->m_len;
 
 	if (!(m->m_flags & M_NOTAVAIL))
 		sb->sb_acc -= m->m_len;
 
 	if (m == sb->sb_fnrdy) {
 		struct mbuf *n;
 
 		KASSERT(m->m_flags & M_NOTREADY,
 		    ("%s: m %p !M_NOTREADY", __func__, m));
 
 		n = m->m_next;
 		while (n != NULL && !(n->m_flags & M_NOTREADY)) {
 			n->m_flags &= ~M_BLOCKED;
 			sb->sb_acc += n->m_len;
 			n = n->m_next;
 		}
 		sb->sb_fnrdy = n;
 	}
 
 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 		sb->sb_ctl -= m->m_len;
 
 	sb->sb_mbcnt -= MSIZE;
 	sb->sb_mcnt -= 1;
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt -= m->m_ext.ext_size;
 		sb->sb_ccnt -= 1;
 	}
 
 	if (sb->sb_sndptr == m) {
 		sb->sb_sndptr = NULL;
 		sb->sb_sndptroff = 0;
 	}
 	if (sb->sb_sndptroff != 0)
 		sb->sb_sndptroff -= m->m_len;
 }
 
 /*
  * Socantsendmore indicates that no more data will be sent on the socket; it
  * would normally be applied to a socket when the user informs the system
  * that no more data is to be sent, by the protocol code (in case
  * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
  * received, and will normally be applied to the socket by a protocol when it
  * detects that the peer will send no more data.  Data queued for reading in
  * the socket may yet be read.
  */
 void
 socantsendmore_locked(struct socket *so)
 {
 
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 
 	so->so_snd.sb_state |= SBS_CANTSENDMORE;
 	sowwakeup_locked(so);
 	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
 }
 
 void
 socantsendmore(struct socket *so)
 {
 
 	SOCKBUF_LOCK(&so->so_snd);
 	socantsendmore_locked(so);
 	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
 }
 
 void
 socantrcvmore_locked(struct socket *so)
 {
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
 	sorwakeup_locked(so);
 	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
 }
 
 void
 socantrcvmore(struct socket *so)
 {
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	socantrcvmore_locked(so);
 	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
 }
 
 /*
  * Wait for data to arrive at/drain from a socket buffer.
  */
 int
 sbwait(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_flags |= SB_WAIT;
 	return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx,
 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 	    sb->sb_timeo, 0, 0));
 }
 
 int
 sblock(struct sockbuf *sb, int flags)
 {
 
 	KASSERT((flags & SBL_VALID) == flags,
 	    ("sblock: flags invalid (0x%x)", flags));
 
 	if (flags & SBL_WAIT) {
 		if ((sb->sb_flags & SB_NOINTR) ||
 		    (flags & SBL_NOINTR)) {
 			sx_xlock(&sb->sb_sx);
 			return (0);
 		}
 		return (sx_xlock_sig(&sb->sb_sx));
 	} else {
 		if (sx_try_xlock(&sb->sb_sx) == 0)
 			return (EWOULDBLOCK);
 		return (0);
 	}
 }
 
 void
 sbunlock(struct sockbuf *sb)
 {
 
 	sx_xunlock(&sb->sb_sx);
 }
 
 /*
  * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
  * via SIGIO if the socket has the SS_ASYNC flag set.
  *
  * Called with the socket buffer lock held; will release the lock by the end
  * of the function.  This allows the caller to acquire the socket buffer lock
  * while testing for the need for various sorts of wakeup and hold it through
  * to the point where it's no longer required.  We currently hold the lock
  * through calls out to other subsystems (with the exception of kqueue), and
  * then release it to avoid lock order issues.  It's not clear that's
  * correct.
  */
 void
 sowakeup(struct socket *so, struct sockbuf *sb)
 {
 	int ret;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	selwakeuppri(&sb->sb_sel, PSOCK);
 	if (!SEL_WAITING(&sb->sb_sel))
 		sb->sb_flags &= ~SB_SEL;
 	if (sb->sb_flags & SB_WAIT) {
 		sb->sb_flags &= ~SB_WAIT;
 		wakeup(&sb->sb_acc);
 	}
 	KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
 	if (sb->sb_upcall != NULL) {
 		ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
 		if (ret == SU_ISCONNECTED) {
 			KASSERT(sb == &so->so_rcv,
 			    ("SO_SND upcall returned SU_ISCONNECTED"));
 			soupcall_clear(so, SO_RCV);
 		}
 	} else
 		ret = SU_OK;
 	if (sb->sb_flags & SB_AIO)
 		aio_swake(so, sb);
 	SOCKBUF_UNLOCK(sb);
 	if (ret == SU_ISCONNECTED)
 		soisconnected(so);
 	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGIO, 0);
 	mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
 }
 
 /*
  * Socket buffer (struct sockbuf) utility routines.
  *
  * Each socket contains two socket buffers: one for sending data and one for
  * receiving data.  Each buffer contains a queue of mbufs, information about
  * the number of mbufs and amount of data in the queue, and other fields
  * allowing select() statements and notification on data availability to be
  * implemented.
  *
  * Data stored in a socket buffer is maintained as a list of records.  Each
  * record is a list of mbufs chained together with the m_next field.  Records
  * are chained together with the m_nextpkt field. The upper level routine
  * soreceive() expects the following conventions to be observed when placing
  * information in the receive buffer:
  *
  * 1. If the protocol requires each message be preceded by the sender's name,
  *    then a record containing that name must be present before any
  *    associated data (mbuf's must be of type MT_SONAME).
  * 2. If the protocol supports the exchange of ``access rights'' (really just
  *    additional data associated with the message), and there are ``rights''
  *    to be received, then a record containing this data should be present
  *    (mbuf's must be of type MT_RIGHTS).
  * 3. If a name or rights record exists, then it must be followed by a data
  *    record, perhaps of zero length.
  *
  * Before using a new socket structure it is first necessary to reserve
  * buffer space to the socket, by calling sbreserve().  This should commit
  * some of the available buffer space in the system buffer pool for the
  * socket (currently, it does nothing but enforce limits).  The space should
  * be released by calling sbrelease() when the socket is destroyed.
  */
 int
 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
 {
 	struct thread *td = curthread;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
 		goto bad;
 	if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
 		goto bad2;
 	if (so->so_rcv.sb_lowat == 0)
 		so->so_rcv.sb_lowat = 1;
 	if (so->so_snd.sb_lowat == 0)
 		so->so_snd.sb_lowat = MCLBYTES;
 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (0);
 bad2:
 	sbrelease_locked(&so->so_snd, so);
 bad:
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (ENOBUFS);
 }
 
 static int
 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	u_long tmp_sb_max = sb_max;
 
 	error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
 	if (error || !req->newptr)
 		return (error);
 	if (tmp_sb_max < MSIZE + MCLBYTES)
 		return (EINVAL);
 	sb_max = tmp_sb_max;
 	sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
 	return (0);
 }
 	
 /*
  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
  * become limiting if buffering efficiency is near the normal case.
  */
 int
 sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
     struct thread *td)
 {
 	rlim_t sbsize_limit;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	/*
 	 * When a thread is passed, we take into account the thread's socket
 	 * buffer size limit.  The caller will generally pass curthread, but
 	 * in the TCP input path, NULL will be passed to indicate that no
 	 * appropriate thread resource limits are available.  In that case,
 	 * we don't apply a process limit.
 	 */
 	if (cc > sb_max_adj)
 		return (0);
 	if (td != NULL) {
 		sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
 	} else
 		sbsize_limit = RLIM_INFINITY;
 	if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
 	    sbsize_limit))
 		return (0);
 	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 	if (sb->sb_lowat > sb->sb_hiwat)
 		sb->sb_lowat = sb->sb_hiwat;
 	return (1);
 }
 
 int
 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, 
     struct thread *td)
 {
 	int error;
 
 	SOCKBUF_LOCK(sb);
 	error = sbreserve_locked(sb, cc, so, td);
 	SOCKBUF_UNLOCK(sb);
 	return (error);
 }
 
 /*
  * Free mbufs held by a socket, and reserved mbuf space.
  */
 void
 sbrelease_internal(struct sockbuf *sb, struct socket *so)
 {
 
 	sbflush_internal(sb);
 	(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
 	    RLIM_INFINITY);
 	sb->sb_mbmax = 0;
 }
 
 void
 sbrelease_locked(struct sockbuf *sb, struct socket *so)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sbrelease_internal(sb, so);
 }
 
 void
 sbrelease(struct sockbuf *sb, struct socket *so)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbrelease_locked(sb, so);
 	SOCKBUF_UNLOCK(sb);
 }
 
 void
 sbdestroy(struct sockbuf *sb, struct socket *so)
 {
 
 	sbrelease_internal(sb, so);
 }
 
 /*
  * Routines to add and remove data from an mbuf queue.
  *
  * The routines sbappend() or sbappendrecord() are normally called to append
  * new mbufs to a socket buffer, after checking that adequate space is
  * available, comparing the function sbspace() with the amount of data to be
  * added.  sbappendrecord() differs from sbappend() in that data supplied is
  * treated as the beginning of a new record.  To place a sender's address,
  * optional access rights, and data in a socket receive buffer,
  * sbappendaddr() should be used.  To place access rights and data in a
  * socket receive buffer, sbappendrights() should be used.  In either case,
  * the new data begins a new record.  Note that unlike sbappend() and
  * sbappendrecord(), these routines check for the caller that there will be
  * enough space to store the data.  Each fails if there is not enough space,
  * or if it cannot find mbufs to store additional information in.
  *
  * Reliable protocols may use the socket send buffer to hold data awaiting
  * acknowledgement.  Data is normally copied from a socket send buffer in a
  * protocol with m_copy for output to a peer, and then removing the data from
  * the socket buffer with sbdrop() or sbdroprecord() when the data is
  * acknowledged by the peer.
  */
 #ifdef SOCKBUF_DEBUG
 void
 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m = sb->sb_mb;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
 	if (m != sb->sb_lastrecord) {
 		printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
 			__func__, sb->sb_mb, sb->sb_lastrecord, m);
 		printf("packet chain:\n");
 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
 			printf("\t%p\n", m);
 		panic("%s from %s:%u", __func__, file, line);
 	}
 }
 
 void
 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m = sb->sb_mb;
 	struct mbuf *n;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
 	while (m && m->m_next)
 		m = m->m_next;
 
 	if (m != sb->sb_mbtail) {
 		printf("%s: sb_mb %p sb_mbtail %p last %p\n",
 			__func__, sb->sb_mb, sb->sb_mbtail, m);
 		printf("packet tree:\n");
 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
 			printf("\t");
 			for (n = m; n != NULL; n = n->m_next)
 				printf("%p ", n);
 			printf("\n");
 		}
 		panic("%s from %s:%u", __func__, file, line);
 	}
 }
 #endif /* SOCKBUF_DEBUG */
 
 #define SBLINKRECORD(sb, m0) do {					\
 	SOCKBUF_LOCK_ASSERT(sb);					\
 	if ((sb)->sb_lastrecord != NULL)				\
 		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
 	else								\
 		(sb)->sb_mb = (m0);					\
 	(sb)->sb_lastrecord = (m0);					\
 } while (/*CONSTCOND*/0)
 
 /*
  * Append mbuf chain m to the last record in the socket buffer sb.  The
  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  * are discarded and mbufs are compacted where possible.
  */
 void
-sbappend_locked(struct sockbuf *sb, struct mbuf *m)
+sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 	struct mbuf *n;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m == 0)
 		return;
-	m_clrprotoflags(m);
+	sbm_clrprotoflags(m, flags);
 	SBLASTRECORDCHK(sb);
 	n = sb->sb_mb;
 	if (n) {
 		while (n->m_nextpkt)
 			n = n->m_nextpkt;
 		do {
 			if (n->m_flags & M_EOR) {
 				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 				return;
 			}
 		} while (n->m_next && (n = n->m_next));
 	} else {
 		/*
 		 * XXX Would like to simply use sb_mbtail here, but
 		 * XXX I need to verify that I won't miss an EOR that
 		 * XXX way.
 		 */
 		if ((n = sb->sb_lastrecord) != NULL) {
 			do {
 				if (n->m_flags & M_EOR) {
 					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 					return;
 				}
 			} while (n->m_next && (n = n->m_next));
 		} else {
 			/*
 			 * If this is the first record in the socket buffer,
 			 * it's also the last record.
 			 */
 			sb->sb_lastrecord = m;
 		}
 	}
 	sbcompress(sb, m, n);
 	SBLASTRECORDCHK(sb);
 }
 
 /*
  * Append mbuf chain m to the last record in the socket buffer sb.  The
  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  * are discarded and mbufs are compacted where possible.
  */
 void
-sbappend(struct sockbuf *sb, struct mbuf *m)
+sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 
 	SOCKBUF_LOCK(sb);
-	sbappend_locked(sb, m);
+	sbappend_locked(sb, m, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * This version of sbappend() should only be used when the caller absolutely
  * knows that there will never be more than one record in the socket buffer,
  * that is, a stream protocol (such as TCP).
  */
 void
 sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
 	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
 
 	SBLASTMBUFCHK(sb);
 
 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
 	m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
 
 	sbcompress(sb, m, sb->sb_mbtail);
 
 	sb->sb_lastrecord = sb->sb_mb;
 	SBLASTRECORDCHK(sb);
 }
 
 /*
  * This version of sbappend() should only be used when the caller absolutely
  * knows that there will never be more than one record in the socket buffer,
  * that is, a stream protocol (such as TCP).
  */
 void
 sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappendstream_locked(sb, m, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 #ifdef SOCKBUF_DEBUG
 void
 sbcheck(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m, *n, *fnrdy;
 	u_long acc, ccc, mbcnt;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	acc = ccc = mbcnt = 0;
 	fnrdy = NULL;
 
 	for (m = sb->sb_mb; m; m = n) {
 	    n = m->m_nextpkt;
 	    for (; m; m = m->m_next) {
 		if (m->m_len == 0) {
 			printf("sb %p empty mbuf %p\n", sb, m);
 			goto fail;
 		}
 		if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
 			if (m != sb->sb_fnrdy) {
 				printf("sb %p: fnrdy %p != m %p\n",
 				    sb, sb->sb_fnrdy, m);
 				goto fail;
 			}
 			fnrdy = m;
 		}
 		if (fnrdy) {
 			if (!(m->m_flags & M_NOTAVAIL)) {
 				printf("sb %p: fnrdy %p, m %p is avail\n",
 				    sb, sb->sb_fnrdy, m);
 				goto fail;
 			}
 		} else
 			acc += m->m_len;
 		ccc += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
 	    }
 	}
 	if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
 		printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
 		    acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
 		goto fail;
 	}
 	return;
 fail:
 	panic("%s from %s:%u", __func__, file, line);
 }
 #endif
 
 /*
  * As above, except the mbuf chain begins a new record.
  */
 void
 sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
 {
 	struct mbuf *m;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m0 == 0)
 		return;
 	m_clrprotoflags(m0);
 	/*
 	 * Put the first mbuf on the queue.  Note this permits zero length
 	 * records.
 	 */
 	sballoc(sb, m0);
 	SBLASTRECORDCHK(sb);
 	SBLINKRECORD(sb, m0);
 	sb->sb_mbtail = m0;
 	m = m0->m_next;
 	m0->m_next = 0;
 	if (m && (m0->m_flags & M_EOR)) {
 		m0->m_flags &= ~M_EOR;
 		m->m_flags |= M_EOR;
 	}
 	/* always call sbcompress() so it can do SBLASTMBUFCHK() */
 	sbcompress(sb, m, m0);
 }
 
 /*
  * As above, except the mbuf chain begins a new record.
  */
 void
 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappendrecord_locked(sb, m0);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /* Helper routine that appends data, control, and address to a sockbuf. */
 static int
 sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
 {
 	struct mbuf *m, *n, *nlast;
 #if MSIZE <= 256
 	if (asa->sa_len > MLEN)
 		return (0);
 #endif
 	m = m_get(M_NOWAIT, MT_SONAME);
 	if (m == NULL)
 		return (0);
 	m->m_len = asa->sa_len;
 	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
 	if (m0)
 		m_clrprotoflags(m0);
 	if (ctrl_last)
 		ctrl_last->m_next = m0;	/* concatenate data to control */
 	else
 		control = m0;
 	m->m_next = control;
 	for (n = m; n->m_next != NULL; n = n->m_next)
 		sballoc(sb, n);
 	sballoc(sb, n);
 	nlast = n;
 	SBLINKRECORD(sb, m);
 
 	sb->sb_mbtail = nlast;
 	SBLASTMBUFCHK(sb);
 
 	SBLASTRECORDCHK(sb);
 	return (1);
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if no space in sockbuf or insufficient
  * mbufs.
  */
 int
 sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	struct mbuf *ctrl_last;
 	int space = asa->sa_len;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 		panic("sbappendaddr_locked");
 	if (m0)
 		space += m0->m_pkthdr.len;
 	space += m_length(control, &ctrl_last);
 
 	if (space > sbspace(sb))
 		return (0);
 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if insufficient mbufs.  Does not validate space
  * on the receiving sockbuf.
  */
 int
 sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	struct mbuf *ctrl_last;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	ctrl_last = (control == NULL) ? NULL : m_last(control);
 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if no space in sockbuf or insufficient
  * mbufs.
  */
 int
 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	int retval;
 
 	SOCKBUF_LOCK(sb);
 	retval = sbappendaddr_locked(sb, asa, m0, control);
 	SOCKBUF_UNLOCK(sb);
 	return (retval);
 }
 
 int
 sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
     struct mbuf *control)
 {
 	struct mbuf *m, *n, *mlast;
 	int space;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (control == 0)
 		panic("sbappendcontrol_locked");
 	space = m_length(control, &n) + m_length(m0, NULL);
 
 	if (space > sbspace(sb))
 		return (0);
 	m_clrprotoflags(m0);
 	n->m_next = m0;			/* concatenate data to control */
 
 	SBLASTRECORDCHK(sb);
 
 	for (m = control; m->m_next; m = m->m_next)
 		sballoc(sb, m);
 	sballoc(sb, m);
 	mlast = m;
 	SBLINKRECORD(sb, control);
 
 	sb->sb_mbtail = mlast;
 	SBLASTMBUFCHK(sb);
 
 	SBLASTRECORDCHK(sb);
 	return (1);
 }
 
 int
 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
 {
 	int retval;
 
 	SOCKBUF_LOCK(sb);
 	retval = sbappendcontrol_locked(sb, m0, control);
 	SOCKBUF_UNLOCK(sb);
 	return (retval);
 }
 
 /*
  * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
  * (n).  If (n) is NULL, the buffer is presumed empty.
  *
  * When the data is compressed, mbufs in the chain may be handled in one of
  * three ways:
  *
  * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
  *     record boundary, and no change in data type).
  *
  * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
  *     an mbuf already in the socket buffer.  This can occur if an
  *     appropriate mbuf exists, there is room, both mbufs are not marked as
  *     not ready, and no merging of data types will occur.
  *
  * (3) The mbuf may be appended to the end of the existing mbuf chain.
  *
  * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
  * end-of-record.
  */
 void
 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 {
 	int eor = 0;
 	struct mbuf *o;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m) {
 		eor |= m->m_flags & M_EOR;
 		if (m->m_len == 0 &&
 		    (eor == 0 ||
 		     (((o = m->m_next) || (o = n)) &&
 		      o->m_type == m->m_type))) {
 			if (sb->sb_lastrecord == m)
 				sb->sb_lastrecord = m->m_next;
 			m = m_free(m);
 			continue;
 		}
 		if (n && (n->m_flags & M_EOR) == 0 &&
 		    M_WRITABLE(n) &&
 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
 		    !(m->m_flags & M_NOTREADY) &&
 		    !(n->m_flags & M_NOTREADY) &&
 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <= M_TRAILINGSPACE(n) &&
 		    n->m_type == m->m_type) {
 			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
 			    (unsigned)m->m_len);
 			n->m_len += m->m_len;
 			sb->sb_ccc += m->m_len;
 			if (sb->sb_fnrdy == NULL)
 				sb->sb_acc += m->m_len;
 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 				/* XXX: Probably don't need.*/
 				sb->sb_ctl += m->m_len;
 			m = m_free(m);
 			continue;
 		}
 		if (n)
 			n->m_next = m;
 		else
 			sb->sb_mb = m;
 		sb->sb_mbtail = m;
 		sballoc(sb, m);
 		n = m;
 		m->m_flags &= ~M_EOR;
 		m = m->m_next;
 		n->m_next = 0;
 	}
 	if (eor) {
 		KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
 		n->m_flags |= eor;
 	}
 	SBLASTMBUFCHK(sb);
 }
 
 /*
  * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
  */
 static void
 sbflush_internal(struct sockbuf *sb)
 {
 
 	while (sb->sb_mbcnt) {
 		/*
 		 * Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
 		 * we would loop forever. Panic instead.
 		 */
 		if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
 			break;
 		m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
 	}
 	KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
 	    ("%s: ccc %u mb %p mbcnt %u", __func__,
 	    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
 }
 
 void
 sbflush_locked(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	sbflush_internal(sb);
 }
 
 void
 sbflush(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbflush_locked(sb);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Cut data from (the front of) a sockbuf.
  */
 static struct mbuf *
 sbcut_internal(struct sockbuf *sb, int len)
 {
 	struct mbuf *m, *next, *mfree;
 
 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 	mfree = NULL;
 
 	while (len > 0) {
 		if (m == NULL) {
 			KASSERT(next, ("%s: no next, len %d", __func__, len));
 			m = next;
 			next = m->m_nextpkt;
 		}
 		if (m->m_len > len) {
 			KASSERT(!(m->m_flags & M_NOTAVAIL),
 			    ("%s: m %p M_NOTAVAIL", __func__, m));
 			m->m_len -= len;
 			m->m_data += len;
 			sb->sb_ccc -= len;
 			sb->sb_acc -= len;
 			if (sb->sb_sndptroff != 0)
 				sb->sb_sndptroff -= len;
 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 				sb->sb_ctl -= len;
 			break;
 		}
 		len -= m->m_len;
 		sbfree(sb, m);
 		/*
 		 * Do not put M_NOTREADY buffers to the free list, they
 		 * are referenced from outside.
 		 */
 		if (m->m_flags & M_NOTREADY)
 			m = m->m_next;
 		else {
 			struct mbuf *n;
 
 			n = m->m_next;
 			m->m_next = mfree;
 			mfree = m;
 			m = n;
 		}
 	}
 	/*
 	 * Free any zero-length mbufs from the buffer.
 	 * For SOCK_DGRAM sockets such mbufs represent empty records.
 	 * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer,
 	 * when sosend_generic() needs to send only control data.
 	 */
 	while (m && m->m_len == 0) {
 		struct mbuf *n;
 
 		sbfree(sb, m);
 		n = m->m_next;
 		m->m_next = mfree;
 		mfree = m;
 		m = n;
 	}
 	if (m) {
 		sb->sb_mb = m;
 		m->m_nextpkt = next;
 	} else
 		sb->sb_mb = next;
 	/*
 	 * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
 	 * sb_lastrecord is up-to-date if we dropped part of the last record.
 	 */
 	m = sb->sb_mb;
 	if (m == NULL) {
 		sb->sb_mbtail = NULL;
 		sb->sb_lastrecord = NULL;
 	} else if (m->m_nextpkt == NULL) {
 		sb->sb_lastrecord = m;
 	}
 
 	return (mfree);
 }
 
 /*
  * Drop data from (the front of) a sockbuf.
  */
 void
 sbdrop_locked(struct sockbuf *sb, int len)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	m_freem(sbcut_internal(sb, len));
 }
 
 /*
  * Drop data from (the front of) a sockbuf,
  * and return it to caller.
  */
 struct mbuf *
 sbcut_locked(struct sockbuf *sb, int len)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	return (sbcut_internal(sb, len));
 }
 
 void
 sbdrop(struct sockbuf *sb, int len)
 {
 	struct mbuf *mfree;
 
 	SOCKBUF_LOCK(sb);
 	mfree = sbcut_internal(sb, len);
 	SOCKBUF_UNLOCK(sb);
 
 	m_freem(mfree);
 }
 
 /*
  * Maintain a pointer and offset pair into the socket buffer mbuf chain to
  * avoid traversal of the entire socket buffer for larger offsets.
  */
 struct mbuf *
 sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
 {
 	struct mbuf *m, *ret;
 
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
 	KASSERT(off + len <= sb->sb_acc, ("%s: beyond sb", __func__));
 	KASSERT(sb->sb_sndptroff <= sb->sb_acc, ("%s: sndptroff broken", __func__));
 
 	/*
 	 * Is off below stored offset? Happens on retransmits.
 	 * Just return, we can't help here.
 	 */
 	if (sb->sb_sndptroff > off) {
 		*moff = off;
 		return (sb->sb_mb);
 	}
 
 	/* Return closest mbuf in chain for current offset. */
 	*moff = off - sb->sb_sndptroff;
 	m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb;
 	if (*moff == m->m_len) {
 		*moff = 0;
 		sb->sb_sndptroff += m->m_len;
 		m = ret = m->m_next;
 		KASSERT(ret->m_len > 0,
 		    ("mbuf %p in sockbuf %p chain has no valid data", ret, sb));
 	}
 
 	/* Advance by len to be as close as possible for the next transmit. */
 	for (off = off - sb->sb_sndptroff + len - 1;
 	     off > 0 && m != NULL && off >= m->m_len;
 	     m = m->m_next) {
 		sb->sb_sndptroff += m->m_len;
 		off -= m->m_len;
 	}
 	if (off > 0 && m == NULL)
 		panic("%s: sockbuf %p and mbuf %p clashing", __func__, sb, ret);
 	sb->sb_sndptr = m;
 
 	return (ret);
 }
 
 /*
  * Return the first mbuf and the mbuf data offset for the provided
  * send offset without changing the "sb_sndptroff" field.
  */
 struct mbuf *
 sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff)
 {
 	struct mbuf *m;
 
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
 
 	/*
 	 * If the "off" is below the stored offset, which happens on
 	 * retransmits, just use "sb_mb":
 	 */
 	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
 		m = sb->sb_mb;
 	} else {
 		m = sb->sb_sndptr;
 		off -= sb->sb_sndptroff;
 	}
 	while (off > 0 && m != NULL) {
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	*moff = off;
 	return (m);
 }
 
 /*
  * Drop a record off the front of a sockbuf and move the next record to the
  * front.
  */
 void
 sbdroprecord_locked(struct sockbuf *sb)
 {
 	struct mbuf *m;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	m = sb->sb_mb;
 	if (m) {
 		sb->sb_mb = m->m_nextpkt;
 		do {
 			sbfree(sb, m);
 			m = m_free(m);
 		} while (m);
 	}
 	SB_EMPTY_FIXUP(sb);
 }
 
 /*
  * Drop a record off the front of a sockbuf and move the next record to the
  * front.
  */
 void
 sbdroprecord(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbdroprecord_locked(sb);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Create a "control" mbuf containing the specified data with the specified
  * type for presentation on a socket buffer.
  */
 struct mbuf *
 sbcreatecontrol(caddr_t p, int size, int type, int level)
 {
 	struct cmsghdr *cp;
 	struct mbuf *m;
 
 	if (CMSG_SPACE((u_int)size) > MCLBYTES)
 		return ((struct mbuf *) NULL);
 	if (CMSG_SPACE((u_int)size) > MLEN)
 		m = m_getcl(M_NOWAIT, MT_CONTROL, 0);
 	else
 		m = m_get(M_NOWAIT, MT_CONTROL);
 	if (m == NULL)
 		return ((struct mbuf *) NULL);
 	cp = mtod(m, struct cmsghdr *);
 	m->m_len = 0;
 	KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
 	    ("sbcreatecontrol: short mbuf"));
 	/*
 	 * Don't leave the padding between the msg header and the
 	 * cmsg data and the padding after the cmsg data un-initialized.
 	 */
 	bzero(cp, CMSG_SPACE((u_int)size));
 	if (p != NULL)
 		(void)memcpy(CMSG_DATA(cp), p, size);
 	m->m_len = CMSG_SPACE(size);
 	cp->cmsg_len = CMSG_LEN(size);
 	cp->cmsg_level = level;
 	cp->cmsg_type = type;
 	return (m);
 }
 
 /*
  * This does the same for socket buffers that sotoxsocket does for sockets:
  * generate an user-format data structure describing the socket buffer.  Note
  * that the xsockbuf structure, since it is always embedded in a socket, does
  * not include a self pointer nor a length.  We make this entry point public
  * in case some other mechanism needs it.
  */
 void
 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 {
 
 	xsb->sb_cc = sb->sb_ccc;
 	xsb->sb_hiwat = sb->sb_hiwat;
 	xsb->sb_mbcnt = sb->sb_mbcnt;
 	xsb->sb_mcnt = sb->sb_mcnt;	
 	xsb->sb_ccnt = sb->sb_ccnt;
 	xsb->sb_mbmax = sb->sb_mbmax;
 	xsb->sb_lowat = sb->sb_lowat;
 	xsb->sb_flags = sb->sb_flags;
 	xsb->sb_timeo = sb->sb_timeo;
 }
 
 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 static int dummy;
 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW,
     &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
 SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
     &sb_efficiency, 0, "Socket buffer size waste factor");
Index: projects/clang380-import/sys/kern/uipc_syscalls.c
===================================================================
--- projects/clang380-import/sys/kern/uipc_syscalls.c	(revision 293686)
+++ projects/clang380-import/sys/kern/uipc_syscalls.c	(revision 293687)
@@ -1,2570 +1,2748 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * sendfile(2) and related extensions:
  * Copyright (c) 1998, David Greenman. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/condvar.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/malloc.h>
 #include <sys/filedesc.h>
 #include <sys/event.h>
 #include <sys/proc.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/sf_buf.h>
 #include <sys/sysent.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/vnode.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32_util.h>
 #endif
 
 #include <net/vnet.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 /*
  * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC
  * and SOCK_NONBLOCK.
  */
 #define	ACCEPT4_INHERIT	0x1
 #define	ACCEPT4_COMPAT	0x2
 
 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
 
 static int accept1(struct thread *td, int s, struct sockaddr *uname,
 		   socklen_t *anamelen, int flags);
 static int do_sendfile(struct thread *td, struct sendfile_args *uap,
 		   int compat);
 static int getsockname1(struct thread *td, struct getsockname_args *uap,
 			int compat);
 static int getpeername1(struct thread *td, struct getpeername_args *uap,
 			int compat);
 
 counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
 
-/*
- * sendfile(2)-related variables and associated sysctls
- */
-static SYSCTL_NODE(_kern_ipc, OID_AUTO, sendfile, CTLFLAG_RW, 0,
-    "sendfile(2) tunables");
-static int sfreadahead = 1;
-SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW,
-    &sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks");
-
 static void
 sfstat_init(const void *unused)
 {
 
 	COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t),
 	    M_WAITOK);
 }
 SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL);
 
 static int
 sfstat_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct sfstat s;
 
 	COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t));
 	if (req->newptr)
 		COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t));
 	return (SYSCTL_OUT(req, &s, sizeof(s)));
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW,
     NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
 
 /*
  * Convert a user file descriptor to a kernel file entry and check if required
  * capability rights are present.
  * A reference on the file entry is held upon returning.
  */
 int
 getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
     struct file **fpp, u_int *fflagp)
 {
 	struct file *fp;
 	int error;
 
 	error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL);
 	if (error != 0)
 		return (error);
 	if (fp->f_type != DTYPE_SOCKET) {
 		fdrop(fp, td);
 		return (ENOTSOCK);
 	}
 	if (fflagp != NULL)
 		*fflagp = fp->f_flag;
 	*fpp = fp;
 	return (0);
 }
 
 /*
  * System call interface to the socket abstraction.
  */
 #if defined(COMPAT_43)
 #define COMPAT_OLDSOCK
 #endif
 
 int
 sys_socket(td, uap)
 	struct thread *td;
 	struct socket_args /* {
 		int	domain;
 		int	type;
 		int	protocol;
 	} */ *uap;
 {
 	struct socket *so;
 	struct file *fp;
 	int fd, error, type, oflag, fflag;
 
 	AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
 
 	type = uap->type;
 	oflag = 0;
 	fflag = 0;
 	if ((type & SOCK_CLOEXEC) != 0) {
 		type &= ~SOCK_CLOEXEC;
 		oflag |= O_CLOEXEC;
 	}
 	if ((type & SOCK_NONBLOCK) != 0) {
 		type &= ~SOCK_NONBLOCK;
 		fflag |= FNONBLOCK;
 	}
 
 #ifdef MAC
 	error = mac_socket_check_create(td->td_ucred, uap->domain, type,
 	    uap->protocol);
 	if (error != 0)
 		return (error);
 #endif
 	error = falloc(td, &fp, &fd, oflag);
 	if (error != 0)
 		return (error);
 	/* An extra reference on `fp' has been held for us by falloc(). */
 	error = socreate(uap->domain, &so, type, uap->protocol,
 	    td->td_ucred, td);
 	if (error != 0) {
 		fdclose(td, fp, fd);
 	} else {
 		finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
 		if ((fflag & FNONBLOCK) != 0)
 			(void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td);
 		td->td_retval[0] = fd;
 	}
 	fdrop(fp, td);
 	return (error);
 }
 
 /* ARGSUSED */
 int
 sys_bind(td, uap)
 	struct thread *td;
 	struct bind_args /* {
 		int	s;
 		caddr_t	name;
 		int	namelen;
 	} */ *uap;
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_bindat(td, AT_FDCWD, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
 	error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_BIND),
 	    &fp, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(sa);
 #endif
 #ifdef MAC
 	error = mac_socket_check_bind(td->td_ucred, so, sa);
 	if (error == 0) {
 #endif
 		if (dirfd == AT_FDCWD)
 			error = sobind(so, sa, td);
 		else
 			error = sobindat(dirfd, so, sa, td);
 #ifdef MAC
 	}
 #endif
 	fdrop(fp, td);
 	return (error);
 }
 
 /* ARGSUSED */
 int
 sys_bindat(td, uap)
 	struct thread *td;
 	struct bindat_args /* {
 		int	fd;
 		int	s;
 		caddr_t	name;
 		int	namelen;
 	} */ *uap;
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_bindat(td, uap->fd, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 int
 sys_listen(td, uap)
 	struct thread *td;
 	struct listen_args /* {
 		int	s;
 		int	backlog;
 	} */ *uap;
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(uap->s);
 	error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_LISTEN),
 	    &fp, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 #ifdef MAC
 		error = mac_socket_check_listen(td->td_ucred, so);
 		if (error == 0)
 #endif
 			error = solisten(so, uap->backlog, td);
 		fdrop(fp, td);
 	}
 	return(error);
 }
 
 /*
  * accept1()
  */
 static int
 accept1(td, s, uname, anamelen, flags)
 	struct thread *td;
 	int s;
 	struct sockaddr *uname;
 	socklen_t *anamelen;
 	int flags;
 {
 	struct sockaddr *name;
 	socklen_t namelen;
 	struct file *fp;
 	int error;
 
 	if (uname == NULL)
 		return (kern_accept4(td, s, NULL, NULL, flags, NULL));
 
 	error = copyin(anamelen, &namelen, sizeof (namelen));
 	if (error != 0)
 		return (error);
 
 	error = kern_accept4(td, s, &name, &namelen, flags, &fp);
 
 	if (error != 0)
 		return (error);
 
 	if (error == 0 && uname != NULL) {
 #ifdef COMPAT_OLDSOCK
 		if (flags & ACCEPT4_COMPAT)
 			((struct osockaddr *)name)->sa_family =
 			    name->sa_family;
 #endif
 		error = copyout(name, uname, namelen);
 	}
 	if (error == 0)
 		error = copyout(&namelen, anamelen,
 		    sizeof(namelen));
 	if (error != 0)
 		fdclose(td, fp, td->td_retval[0]);
 	fdrop(fp, td);
 	free(name, M_SONAME);
 	return (error);
 }
 
 int
 kern_accept(struct thread *td, int s, struct sockaddr **name,
     socklen_t *namelen, struct file **fp)
 {
 	return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp));
 }
 
 int
 kern_accept4(struct thread *td, int s, struct sockaddr **name,
     socklen_t *namelen, int flags, struct file **fp)
 {
 	struct file *headfp, *nfp = NULL;
 	struct sockaddr *sa = NULL;
 	struct socket *head, *so;
 	cap_rights_t rights;
 	u_int fflag;
 	pid_t pgid;
 	int error, fd, tmp;
 
 	if (name != NULL)
 		*name = NULL;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_ACCEPT),
 	    &headfp, &fflag);
 	if (error != 0)
 		return (error);
 	head = headfp->f_data;
 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
 		error = EINVAL;
 		goto done;
 	}
 #ifdef MAC
 	error = mac_socket_check_accept(td->td_ucred, head);
 	if (error != 0)
 		goto done;
 #endif
 	error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0);
 	if (error != 0)
 		goto done;
 	ACCEPT_LOCK();
 	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
 		ACCEPT_UNLOCK();
 		error = EWOULDBLOCK;
 		goto noconnection;
 	}
 	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
 		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			head->so_error = ECONNABORTED;
 			break;
 		}
 		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
 		    "accept", 0);
 		if (error != 0) {
 			ACCEPT_UNLOCK();
 			goto noconnection;
 		}
 	}
 	if (head->so_error) {
 		error = head->so_error;
 		head->so_error = 0;
 		ACCEPT_UNLOCK();
 		goto noconnection;
 	}
 	so = TAILQ_FIRST(&head->so_comp);
 	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
 	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
 
 	/*
 	 * Before changing the flags on the socket, we have to bump the
 	 * reference count.  Otherwise, if the protocol calls sofree(),
 	 * the socket will be released due to a zero refcount.
 	 */
 	SOCK_LOCK(so);			/* soref() and so_state update */
 	soref(so);			/* file descriptor reference */
 
 	TAILQ_REMOVE(&head->so_comp, so, so_list);
 	head->so_qlen--;
 	if (flags & ACCEPT4_INHERIT)
 		so->so_state |= (head->so_state & SS_NBIO);
 	else
 		so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
 	so->so_qstate &= ~SQ_COMP;
 	so->so_head = NULL;
 
 	SOCK_UNLOCK(so);
 	ACCEPT_UNLOCK();
 
 	/* An extra reference on `nfp' has been held for us by falloc(). */
 	td->td_retval[0] = fd;
 
 	/* connection has been removed from the listen queue */
 	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
 
 	if (flags & ACCEPT4_INHERIT) {
 		pgid = fgetown(&head->so_sigio);
 		if (pgid != 0)
 			fsetown(pgid, &so->so_sigio);
 	} else {
 		fflag &= ~(FNONBLOCK | FASYNC);
 		if (flags & SOCK_NONBLOCK)
 			fflag |= FNONBLOCK;
 	}
 
 	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 	/* Sync socket nonblocking/async state with file flags */
 	tmp = fflag & FNONBLOCK;
 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
 	tmp = fflag & FASYNC;
 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
 	sa = 0;
 	error = soaccept(so, &sa);
 	if (error != 0)
 		goto noconnection;
 	if (sa == NULL) {
 		if (name)
 			*namelen = 0;
 		goto done;
 	}
 	AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa);
 	if (name) {
 		/* check sa_len before it is destroyed */
 		if (*namelen > sa->sa_len)
 			*namelen = sa->sa_len;
 #ifdef KTRACE
 		if (KTRPOINT(td, KTR_STRUCT))
 			ktrsockaddr(sa);
 #endif
 		*name = sa;
 		sa = NULL;
 	}
 noconnection:
 	free(sa, M_SONAME);
 
 	/*
 	 * close the new descriptor, assuming someone hasn't ripped it
 	 * out from under us.
 	 */
 	if (error != 0)
 		fdclose(td, nfp, fd);
 
 	/*
 	 * Release explicitly held references before returning.  We return
 	 * a reference on nfp to the caller on success if they request it.
 	 */
 done:
 	if (fp != NULL) {
 		if (error == 0) {
 			*fp = nfp;
 			nfp = NULL;
 		} else
 			*fp = NULL;
 	}
 	if (nfp != NULL)
 		fdrop(nfp, td);
 	fdrop(headfp, td);
 	return (error);
 }
 
 int
 sys_accept(td, uap)
 	struct thread *td;
 	struct accept_args *uap;
 {
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT));
 }
 
 int
 sys_accept4(td, uap)
 	struct thread *td;
 	struct accept4_args *uap;
 {
 
 	if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return (EINVAL);
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 oaccept(td, uap)
 	struct thread *td;
 	struct accept_args *uap;
 {
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen,
 	    ACCEPT4_INHERIT | ACCEPT4_COMPAT));
 }
 #endif /* COMPAT_OLDSOCK */
 
 /* ARGSUSED */
 int
 sys_connect(td, uap)
 	struct thread *td;
 	struct connect_args /* {
 		int	s;
 		caddr_t	name;
 		int	namelen;
 	} */ *uap;
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_connectat(td, AT_FDCWD, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	int error, interrupted = 0;
 
 	AUDIT_ARG_FD(fd);
 	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
 	error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_CONNECT),
 	    &fp, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	if (so->so_state & SS_ISCONNECTING) {
 		error = EALREADY;
 		goto done1;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(sa);
 #endif
 #ifdef MAC
 	error = mac_socket_check_connect(td->td_ucred, so, sa);
 	if (error != 0)
 		goto bad;
 #endif
 	if (dirfd == AT_FDCWD)
 		error = soconnect(so, sa, td);
 	else
 		error = soconnectat(dirfd, so, sa, td);
 	if (error != 0)
 		goto bad;
 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
 		error = EINPROGRESS;
 		goto done1;
 	}
 	SOCK_LOCK(so);
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
 		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
 		    "connec", 0);
 		if (error != 0) {
 			if (error == EINTR || error == ERESTART)
 				interrupted = 1;
 			break;
 		}
 	}
 	if (error == 0) {
 		error = so->so_error;
 		so->so_error = 0;
 	}
 	SOCK_UNLOCK(so);
 bad:
 	if (!interrupted)
 		so->so_state &= ~SS_ISCONNECTING;
 	if (error == ERESTART)
 		error = EINTR;
 done1:
 	fdrop(fp, td);
 	return (error);
 }
 
 /* ARGSUSED */
 int
 sys_connectat(td, uap)
 	struct thread *td;
 	struct connectat_args /* {
 		int	fd;
 		int	s;
 		caddr_t	name;
 		int	namelen;
 	} */ *uap;
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_connectat(td, uap->fd, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_socketpair(struct thread *td, int domain, int type, int protocol,
     int *rsv)
 {
 	struct file *fp1, *fp2;
 	struct socket *so1, *so2;
 	int fd, error, oflag, fflag;
 
 	AUDIT_ARG_SOCKET(domain, type, protocol);
 
 	oflag = 0;
 	fflag = 0;
 	if ((type & SOCK_CLOEXEC) != 0) {
 		type &= ~SOCK_CLOEXEC;
 		oflag |= O_CLOEXEC;
 	}
 	if ((type & SOCK_NONBLOCK) != 0) {
 		type &= ~SOCK_NONBLOCK;
 		fflag |= FNONBLOCK;
 	}
 #ifdef MAC
 	/* We might want to have a separate check for socket pairs. */
 	error = mac_socket_check_create(td->td_ucred, domain, type,
 	    protocol);
 	if (error != 0)
 		return (error);
 #endif
 	error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
 	if (error != 0)
 		return (error);
 	error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
 	if (error != 0)
 		goto free1;
 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
 	error = falloc(td, &fp1, &fd, oflag);
 	if (error != 0)
 		goto free2;
 	rsv[0] = fd;
 	fp1->f_data = so1;	/* so1 already has ref count */
 	error = falloc(td, &fp2, &fd, oflag);
 	if (error != 0)
 		goto free3;
 	fp2->f_data = so2;	/* so2 already has ref count */
 	rsv[1] = fd;
 	error = soconnect2(so1, so2);
 	if (error != 0)
 		goto free4;
 	if (type == SOCK_DGRAM) {
 		/*
 		 * Datagram socket connection is asymmetric.
 		 */
 		 error = soconnect2(so2, so1);
 		 if (error != 0)
 			goto free4;
 	}
 	finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
 	    &socketops);
 	finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data,
 	    &socketops);
 	if ((fflag & FNONBLOCK) != 0) {
 		(void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td);
 		(void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td);
 	}
 	fdrop(fp1, td);
 	fdrop(fp2, td);
 	return (0);
 free4:
 	fdclose(td, fp2, rsv[1]);
 	fdrop(fp2, td);
 free3:
 	fdclose(td, fp1, rsv[0]);
 	fdrop(fp1, td);
 free2:
 	if (so2 != NULL)
 		(void)soclose(so2);
 free1:
 	if (so1 != NULL)
 		(void)soclose(so1);
 	return (error);
 }
 
 int
 sys_socketpair(struct thread *td, struct socketpair_args *uap)
 {
 	int error, sv[2];
 
 	error = kern_socketpair(td, uap->domain, uap->type,
 	    uap->protocol, sv);
 	if (error != 0)
 		return (error);
 	error = copyout(sv, uap->rsv, 2 * sizeof(int));
 	if (error != 0) {
 		(void)kern_close(td, sv[0]);
 		(void)kern_close(td, sv[1]);
 	}
 	return (error);
 }
 
 static int
 sendit(td, s, mp, flags)
 	struct thread *td;
 	int s;
 	struct msghdr *mp;
 	int flags;
 {
 	struct mbuf *control;
 	struct sockaddr *to;
 	int error;
 
 #ifdef CAPABILITY_MODE
 	if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
 		return (ECAPMODE);
 #endif
 
 	if (mp->msg_name != NULL) {
 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
 		if (error != 0) {
 			to = NULL;
 			goto bad;
 		}
 		mp->msg_name = to;
 	} else {
 		to = NULL;
 	}
 
 	if (mp->msg_control) {
 		if (mp->msg_controllen < sizeof(struct cmsghdr)
 #ifdef COMPAT_OLDSOCK
 		    && mp->msg_flags != MSG_COMPAT
 #endif
 		) {
 			error = EINVAL;
 			goto bad;
 		}
 		error = sockargs(&control, mp->msg_control,
 		    mp->msg_controllen, MT_CONTROL);
 		if (error != 0)
 			goto bad;
 #ifdef COMPAT_OLDSOCK
 		if (mp->msg_flags == MSG_COMPAT) {
 			struct cmsghdr *cm;
 
 			M_PREPEND(control, sizeof(*cm), M_WAITOK);
 			cm = mtod(control, struct cmsghdr *);
 			cm->cmsg_len = control->m_len;
 			cm->cmsg_level = SOL_SOCKET;
 			cm->cmsg_type = SCM_RIGHTS;
 		}
 #endif
 	} else {
 		control = NULL;
 	}
 
 	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
 
 bad:
 	free(to, M_SONAME);
 	return (error);
 }
 
 int
 kern_sendit(td, s, mp, flags, control, segflg)
 	struct thread *td;
 	int s;
 	struct msghdr *mp;
 	int flags;
 	struct mbuf *control;
 	enum uio_seg segflg;
 {
 	struct file *fp;
 	struct uio auio;
 	struct iovec *iov;
 	struct socket *so;
 	cap_rights_t rights;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	ssize_t len;
 	int i, error;
 
 	AUDIT_ARG_FD(s);
 	cap_rights_init(&rights, CAP_SEND);
 	if (mp->msg_name != NULL) {
 		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name);
 		cap_rights_set(&rights, CAP_CONNECT);
 	}
 	error = getsock_cap(td, s, &rights, &fp, NULL);
 	if (error != 0)
 		return (error);
 	so = (struct socket *)fp->f_data;
 
 #ifdef KTRACE
 	if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(mp->msg_name);
 #endif
 #ifdef MAC
 	if (mp->msg_name != NULL) {
 		error = mac_socket_check_connect(td->td_ucred, so,
 		    mp->msg_name);
 		if (error != 0)
 			goto bad;
 	}
 	error = mac_socket_check_send(td->td_ucred, so);
 	if (error != 0)
 		goto bad;
 #endif
 
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
 	auio.uio_segflg = segflg;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
 			error = EINVAL;
 			goto bad;
 		}
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif
 	len = auio.uio_resid;
 	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		/* Generation of SIGPIPE can be controlled per socket */
 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 		    !(flags & MSG_NOSIGNAL)) {
 			PROC_LOCK(td->td_proc);
 			tdsignal(td, SIGPIPE);
 			PROC_UNLOCK(td->td_proc);
 		}
 	}
 	if (error == 0)
 		td->td_retval[0] = len - auio.uio_resid;
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = td->td_retval[0];
 		ktrgenio(s, UIO_WRITE, ktruio, error);
 	}
 #endif
 bad:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_sendto(td, uap)
 	struct thread *td;
 	struct sendto_args /* {
 		int	s;
 		caddr_t	buf;
 		size_t	len;
 		int	flags;
 		caddr_t	to;
 		int	tolen;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = uap->to;
 	msg.msg_namelen = uap->tolen;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	msg.msg_control = 0;
 #ifdef COMPAT_OLDSOCK
 	msg.msg_flags = 0;
 #endif
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	return (sendit(td, uap->s, &msg, uap->flags));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 osend(td, uap)
 	struct thread *td;
 	struct osend_args /* {
 		int	s;
 		caddr_t	buf;
 		int	len;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = 0;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = 0;
 	return (sendit(td, uap->s, &msg, uap->flags));
 }
 
 int
 osendmsg(td, uap)
 	struct thread *td;
 	struct osendmsg_args /* {
 		int	s;
 		caddr_t	msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_iov = iov;
 	msg.msg_flags = MSG_COMPAT;
 	error = sendit(td, uap->s, &msg, uap->flags);
 	free(iov, M_IOV);
 	return (error);
 }
 #endif
 
 int
 sys_sendmsg(td, uap)
 	struct thread *td;
 	struct sendmsg_args /* {
 		int	s;
 		caddr_t	msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (msg));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_iov = iov;
 #ifdef COMPAT_OLDSOCK
 	msg.msg_flags = 0;
 #endif
 	error = sendit(td, uap->s, &msg, uap->flags);
 	free(iov, M_IOV);
 	return (error);
 }
 
 int
 kern_recvit(td, s, mp, fromseg, controlp)
 	struct thread *td;
 	int s;
 	struct msghdr *mp;
 	enum uio_seg fromseg;
 	struct mbuf **controlp;
 {
 	struct uio auio;
 	struct iovec *iov;
 	struct mbuf *m, *control = NULL;
 	caddr_t ctlbuf;
 	struct file *fp;
 	struct socket *so;
 	struct sockaddr *fromsa = NULL;
 	cap_rights_t rights;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	ssize_t len;
 	int error, i;
 
 	if (controlp != NULL)
 		*controlp = NULL;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_RECV),
 	    &fp, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 
 #ifdef MAC
 	error = mac_socket_check_receive(td->td_ucred, so);
 	if (error != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 #endif
 
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_rw = UIO_READ;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
 			fdrop(fp, td);
 			return (EINVAL);
 		}
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif
 	len = auio.uio_resid;
 	error = soreceive(so, &fromsa, &auio, NULL,
 	    (mp->msg_control || controlp) ? &control : NULL,
 	    &mp->msg_flags);
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	}
 	if (fromsa != NULL)
 		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa);
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = len - auio.uio_resid;
 		ktrgenio(s, UIO_READ, ktruio, error);
 	}
 #endif
 	if (error != 0)
 		goto out;
 	td->td_retval[0] = len - auio.uio_resid;
 	if (mp->msg_name) {
 		len = mp->msg_namelen;
 		if (len <= 0 || fromsa == NULL)
 			len = 0;
 		else {
 			/* save sa_len before it is destroyed by MSG_COMPAT */
 			len = MIN(len, fromsa->sa_len);
 #ifdef COMPAT_OLDSOCK
 			if (mp->msg_flags & MSG_COMPAT)
 				((struct osockaddr *)fromsa)->sa_family =
 				    fromsa->sa_family;
 #endif
 			if (fromseg == UIO_USERSPACE) {
 				error = copyout(fromsa, mp->msg_name,
 				    (unsigned)len);
 				if (error != 0)
 					goto out;
 			} else
 				bcopy(fromsa, mp->msg_name, len);
 		}
 		mp->msg_namelen = len;
 	}
 	if (mp->msg_control && controlp == NULL) {
 #ifdef COMPAT_OLDSOCK
 		/*
 		 * We assume that old recvmsg calls won't receive access
 		 * rights and other control info, esp. as control info
 		 * is always optional and those options didn't exist in 4.3.
 		 * If we receive rights, trim the cmsghdr; anything else
 		 * is tossed.
 		 */
 		if (control && mp->msg_flags & MSG_COMPAT) {
 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
 			    SOL_SOCKET ||
 			    mtod(control, struct cmsghdr *)->cmsg_type !=
 			    SCM_RIGHTS) {
 				mp->msg_controllen = 0;
 				goto out;
 			}
 			control->m_len -= sizeof (struct cmsghdr);
 			control->m_data += sizeof (struct cmsghdr);
 		}
 #endif
 		len = mp->msg_controllen;
 		m = control;
 		mp->msg_controllen = 0;
 		ctlbuf = mp->msg_control;
 
 		while (m && len > 0) {
 			unsigned int tocopy;
 
 			if (len >= m->m_len)
 				tocopy = m->m_len;
 			else {
 				mp->msg_flags |= MSG_CTRUNC;
 				tocopy = len;
 			}
 
 			if ((error = copyout(mtod(m, caddr_t),
 					ctlbuf, tocopy)) != 0)
 				goto out;
 
 			ctlbuf += tocopy;
 			len -= tocopy;
 			m = m->m_next;
 		}
 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 	}
 out:
 	fdrop(fp, td);
 #ifdef KTRACE
 	if (fromsa && KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(fromsa);
 #endif
 	free(fromsa, M_SONAME);
 
 	if (error == 0 && controlp != NULL)
 		*controlp = control;
 	else  if (control)
 		m_freem(control);
 
 	return (error);
 }
 
 static int
 recvit(td, s, mp, namelenp)
 	struct thread *td;
 	int s;
 	struct msghdr *mp;
 	void *namelenp;
 {
 	int error;
 
 	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 	if (error != 0)
 		return (error);
 	if (namelenp != NULL) {
 		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 #ifdef COMPAT_OLDSOCK
 		if (mp->msg_flags & MSG_COMPAT)
 			error = 0;	/* old recvfrom didn't check */
 #endif
 	}
 	return (error);
 }
 
 int
 sys_recvfrom(td, uap)
 	struct thread *td;
 	struct recvfrom_args /* {
 		int	s;
 		caddr_t	buf;
 		size_t	len;
 		int	flags;
 		struct sockaddr * __restrict	from;
 		socklen_t * __restrict fromlenaddr;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov;
 	int error;
 
 	if (uap->fromlenaddr) {
 		error = copyin(uap->fromlenaddr,
 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
 		if (error != 0)
 			goto done2;
 	} else {
 		msg.msg_namelen = 0;
 	}
 	msg.msg_name = uap->from;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = uap->flags;
 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 done2:
 	return (error);
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 orecvfrom(td, uap)
 	struct thread *td;
 	struct recvfrom_args *uap;
 {
 
 	uap->flags |= MSG_COMPAT;
 	return (sys_recvfrom(td, uap));
 }
 #endif
 
 #ifdef COMPAT_OLDSOCK
 int
 orecv(td, uap)
 	struct thread *td;
 	struct orecv_args /* {
 		int	s;
 		caddr_t	buf;
 		int	len;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = 0;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = uap->flags;
 	return (recvit(td, uap->s, &msg, NULL));
 }
 
 /*
  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
  * overlays the new one, missing only the flags, and with the (old) access
  * rights where the control fields are now.
  */
 int
 orecvmsg(td, uap)
 	struct thread *td;
 	struct orecvmsg_args /* {
 		int	s;
 		struct	omsghdr *msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_flags = uap->flags | MSG_COMPAT;
 	msg.msg_iov = iov;
 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 	if (msg.msg_controllen && error == 0)
 		error = copyout(&msg.msg_controllen,
 		    &uap->msg->msg_accrightslen, sizeof (int));
 	free(iov, M_IOV);
 	return (error);
 }
 #endif
 
 int
 sys_recvmsg(td, uap)
 	struct thread *td;
 	struct recvmsg_args /* {
 		int	s;
 		struct	msghdr *msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec *uiov, *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (msg));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_flags = uap->flags;
 #ifdef COMPAT_OLDSOCK
 	msg.msg_flags &= ~MSG_COMPAT;
 #endif
 	uiov = msg.msg_iov;
 	msg.msg_iov = iov;
 	error = recvit(td, uap->s, &msg, NULL);
 	if (error == 0) {
 		msg.msg_iov = uiov;
 		error = copyout(&msg, uap->msg, sizeof(msg));
 	}
 	free(iov, M_IOV);
 	return (error);
 }
 
 /* ARGSUSED */
 int
 sys_shutdown(td, uap)
 	struct thread *td;
 	struct shutdown_args /* {
 		int	s;
 		int	how;
 	} */ *uap;
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(uap->s);
 	error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_SHUTDOWN),
 	    &fp, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = soshutdown(so, uap->how);
 		/*
 		 * Previous versions did not return ENOTCONN, but 0 in
 		 * case the socket was not connected. Some important
 		 * programs like syslogd up to r279016, 2015-02-19,
 		 * still depend on this behavior.
 		 */
 		if (error == ENOTCONN &&
 		    td->td_proc->p_osrel < P_OSREL_SHUTDOWN_ENOTCONN)
 			error = 0;
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 int
 sys_setsockopt(td, uap)
 	struct thread *td;
 	struct setsockopt_args /* {
 		int	s;
 		int	level;
 		int	name;
 		caddr_t	val;
 		int	valsize;
 	} */ *uap;
 {
 
 	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 	    uap->val, UIO_USERSPACE, uap->valsize));
 }
 
 int
 kern_setsockopt(td, s, level, name, val, valseg, valsize)
 	struct thread *td;
 	int s;
 	int level;
 	int name;
 	void *val;
 	enum uio_seg valseg;
 	socklen_t valsize;
 {
 	struct socket *so;
 	struct file *fp;
 	struct sockopt sopt;
 	cap_rights_t rights;
 	int error;
 
 	if (val == NULL && valsize != 0)
 		return (EFAULT);
 	if ((int)valsize < 0)
 		return (EINVAL);
 
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = level;
 	sopt.sopt_name = name;
 	sopt.sopt_val = val;
 	sopt.sopt_valsize = valsize;
 	switch (valseg) {
 	case UIO_USERSPACE:
 		sopt.sopt_td = td;
 		break;
 	case UIO_SYSSPACE:
 		sopt.sopt_td = NULL;
 		break;
 	default:
 		panic("kern_setsockopt called with bad valseg");
 	}
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SETSOCKOPT),
 	    &fp, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = sosetopt(so, &sopt);
 		fdrop(fp, td);
 	}
 	return(error);
 }
 
 /* ARGSUSED */
 int
 sys_getsockopt(td, uap)
 	struct thread *td;
 	struct getsockopt_args /* {
 		int	s;
 		int	level;
 		int	name;
 		void * __restrict	val;
 		socklen_t * __restrict avalsize;
 	} */ *uap;
 {
 	socklen_t valsize;
 	int error;
 
 	if (uap->val) {
 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 		if (error != 0)
 			return (error);
 	}
 
 	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 	    uap->val, UIO_USERSPACE, &valsize);
 
 	if (error == 0)
 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 	return (error);
 }
 
 /*
  * Kernel version of getsockopt.
  * optval can be a userland or userspace. optlen is always a kernel pointer.
  */
 int
 kern_getsockopt(td, s, level, name, val, valseg, valsize)
 	struct thread *td;
 	int s;
 	int level;
 	int name;
 	void *val;
 	enum uio_seg valseg;
 	socklen_t *valsize;
 {
 	struct socket *so;
 	struct file *fp;
 	struct sockopt sopt;
 	cap_rights_t rights;
 	int error;
 
 	if (val == NULL)
 		*valsize = 0;
 	if ((int)*valsize < 0)
 		return (EINVAL);
 
 	sopt.sopt_dir = SOPT_GET;
 	sopt.sopt_level = level;
 	sopt.sopt_name = name;
 	sopt.sopt_val = val;
 	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 	switch (valseg) {
 	case UIO_USERSPACE:
 		sopt.sopt_td = td;
 		break;
 	case UIO_SYSSPACE:
 		sopt.sopt_td = NULL;
 		break;
 	default:
 		panic("kern_getsockopt called with bad valseg");
 	}
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_GETSOCKOPT),
 	    &fp, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = sogetopt(so, &sopt);
 		*valsize = sopt.sopt_valsize;
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 /*
  * getsockname1() - Get socket name.
  */
 /* ARGSUSED */
 static int
 getsockname1(td, uap, compat)
 	struct thread *td;
 	struct getsockname_args /* {
 		int	fdes;
 		struct sockaddr * __restrict asa;
 		socklen_t * __restrict alen;
 	} */ *uap;
 	int compat;
 {
 	struct sockaddr *sa;
 	socklen_t len;
 	int error;
 
 	error = copyin(uap->alen, &len, sizeof(len));
 	if (error != 0)
 		return (error);
 
 	error = kern_getsockname(td, uap->fdes, &sa, &len);
 	if (error != 0)
 		return (error);
 
 	if (len != 0) {
 #ifdef COMPAT_OLDSOCK
 		if (compat)
 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
 #endif
 		error = copyout(sa, uap->asa, (u_int)len);
 	}
 	free(sa, M_SONAME);
 	if (error == 0)
 		error = copyout(&len, uap->alen, sizeof(len));
 	return (error);
 }
 
 int
 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
     socklen_t *alen)
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	socklen_t len;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETSOCKNAME),
 	    &fp, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	*sa = NULL;
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 	CURVNET_RESTORE();
 	if (error != 0)
 		goto bad;
 	if (*sa == NULL)
 		len = 0;
 	else
 		len = MIN(*alen, (*sa)->sa_len);
 	*alen = len;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(*sa);
 #endif
 bad:
 	fdrop(fp, td);
 	if (error != 0 && *sa != NULL) {
 		free(*sa, M_SONAME);
 		*sa = NULL;
 	}
 	return (error);
 }
 
 int
 sys_getsockname(td, uap)
 	struct thread *td;
 	struct getsockname_args *uap;
 {
 
 	return (getsockname1(td, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 ogetsockname(td, uap)
 	struct thread *td;
 	struct getsockname_args *uap;
 {
 
 	return (getsockname1(td, uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 /*
  * getpeername1() - Get name of peer for connected socket.
  */
 /* ARGSUSED */
 static int
 getpeername1(td, uap, compat)
 	struct thread *td;
 	struct getpeername_args /* {
 		int	fdes;
 		struct sockaddr * __restrict	asa;
 		socklen_t * __restrict	alen;
 	} */ *uap;
 	int compat;
 {
 	struct sockaddr *sa;
 	socklen_t len;
 	int error;
 
 	error = copyin(uap->alen, &len, sizeof (len));
 	if (error != 0)
 		return (error);
 
 	error = kern_getpeername(td, uap->fdes, &sa, &len);
 	if (error != 0)
 		return (error);
 
 	if (len != 0) {
 #ifdef COMPAT_OLDSOCK
 		if (compat)
 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
 #endif
 		error = copyout(sa, uap->asa, (u_int)len);
 	}
 	free(sa, M_SONAME);
 	if (error == 0)
 		error = copyout(&len, uap->alen, sizeof(len));
 	return (error);
 }
 
 int
 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
     socklen_t *alen)
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	socklen_t len;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETPEERNAME),
 	    &fp, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 		error = ENOTCONN;
 		goto done;
 	}
 	*sa = NULL;
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 	CURVNET_RESTORE();
 	if (error != 0)
 		goto bad;
 	if (*sa == NULL)
 		len = 0;
 	else
 		len = MIN(*alen, (*sa)->sa_len);
 	*alen = len;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(*sa);
 #endif
 bad:
 	if (error != 0 && *sa != NULL) {
 		free(*sa, M_SONAME);
 		*sa = NULL;
 	}
 done:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_getpeername(td, uap)
 	struct thread *td;
 	struct getpeername_args *uap;
 {
 
 	return (getpeername1(td, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 ogetpeername(td, uap)
 	struct thread *td;
 	struct ogetpeername_args *uap;
 {
 
 	/* XXX uap should have type `getpeername_args *' to begin with. */
 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 int
 sockargs(mp, buf, buflen, type)
 	struct mbuf **mp;
 	caddr_t buf;
 	int buflen, type;
 {
 	struct sockaddr *sa;
 	struct mbuf *m;
 	int error;
 
 	if (buflen > MLEN) {
 #ifdef COMPAT_OLDSOCK
 		if (type == MT_SONAME && buflen <= 112)
 			buflen = MLEN;		/* unix domain compat. hack */
 		else
 #endif
 			if (buflen > MCLBYTES)
 				return (EINVAL);
 	}
 	m = m_get2(buflen, M_WAITOK, type, 0);
 	m->m_len = buflen;
 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 	if (error != 0)
 		(void) m_free(m);
 	else {
 		*mp = m;
 		if (type == MT_SONAME) {
 			sa = mtod(m, struct sockaddr *);
 
 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 				sa->sa_family = sa->sa_len;
 #endif
 			sa->sa_len = buflen;
 		}
 	}
 	return (error);
 }
 
 int
 getsockaddr(namp, uaddr, len)
 	struct sockaddr **namp;
 	caddr_t uaddr;
 	size_t len;
 {
 	struct sockaddr *sa;
 	int error;
 
 	if (len > SOCK_MAXADDRLEN)
 		return (ENAMETOOLONG);
 	if (len < offsetof(struct sockaddr, sa_data[0]))
 		return (EINVAL);
 	sa = malloc(len, M_SONAME, M_WAITOK);
 	error = copyin(uaddr, sa, len);
 	if (error != 0) {
 		free(sa, M_SONAME);
 	} else {
 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 			sa->sa_family = sa->sa_len;
 #endif
 		sa->sa_len = len;
 		*namp = sa;
 	}
 	return (error);
 }
 
 struct sendfile_sync {
 	struct mtx	mtx;
 	struct cv	cv;
 	unsigned	count;
 };
 
 /*
  * Add more references to a vm_page + sf_buf + sendfile_sync.
  */
 void
 sf_ext_ref(void *arg1, void *arg2)
 {
 	struct sf_buf *sf = arg1;
 	struct sendfile_sync *sfs = arg2;
 	vm_page_t pg = sf_buf_page(sf);
 
 	sf_buf_ref(sf);
 
 	vm_page_lock(pg);
 	vm_page_wire(pg);
 	vm_page_unlock(pg);
 
 	if (sfs != NULL) {
 		mtx_lock(&sfs->mtx);
 		KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
 		sfs->count++;
 		mtx_unlock(&sfs->mtx);
 	}
 }
 
 /*
  * Detach mapped page and release resources back to the system.
  */
 void
 sf_ext_free(void *arg1, void *arg2)
 {
 	struct sf_buf *sf = arg1;
 	struct sendfile_sync *sfs = arg2;
 	vm_page_t pg = sf_buf_page(sf);
 
 	sf_buf_free(sf);
 
 	vm_page_lock(pg);
-	vm_page_unwire(pg, PQ_INACTIVE);
 	/*
 	 * Check for the object going away on us. This can
 	 * happen since we don't hold a reference to it.
 	 * If so, we're responsible for freeing the page.
 	 */
-	if (pg->wire_count == 0 && pg->object == NULL)
+	if (vm_page_unwire(pg, PQ_INACTIVE) && pg->object == NULL)
 		vm_page_free(pg);
 	vm_page_unlock(pg);
 
 	if (sfs != NULL) {
 		mtx_lock(&sfs->mtx);
 		KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
 		if (--sfs->count == 0)
 			cv_signal(&sfs->cv);
 		mtx_unlock(&sfs->mtx);
 	}
 }
 
 /*
+ * Same as above, but forces the page to be detached from the object
+ * and go into free pool.
+ */
+void
+sf_ext_free_nocache(void *arg1, void *arg2)
+{
+	struct sf_buf *sf = arg1;
+	struct sendfile_sync *sfs = arg2;
+	vm_page_t pg = sf_buf_page(sf);
+
+	sf_buf_free(sf);
+
+	vm_page_lock(pg);
+	if (vm_page_unwire(pg, PQ_NONE)) {
+		vm_object_t obj;
+
+		/* Try to free the page, but only if it is cheap to. */
+		if ((obj = pg->object) == NULL)
+			vm_page_free(pg);
+		else if (!vm_page_xbusied(pg) && VM_OBJECT_TRYWLOCK(obj)) {
+			vm_page_free(pg);
+			VM_OBJECT_WUNLOCK(obj);
+		} else
+			vm_page_deactivate(pg);
+	}
+	vm_page_unlock(pg);
+
+	if (sfs != NULL) {
+		mtx_lock(&sfs->mtx);
+		KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
+		if (--sfs->count == 0)
+			cv_signal(&sfs->cv);
+		mtx_unlock(&sfs->mtx);
+	}
+}
+
+/*
  * sendfile(2)
  *
  * int sendfile(int fd, int s, off_t offset, size_t nbytes,
  *	 struct sf_hdtr *hdtr, off_t *sbytes, int flags)
  *
  * Send a file specified by 'fd' and starting at 'offset' to a socket
  * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes ==
  * 0.  Optionally add a header and/or trailer to the socket output.  If
  * specified, write the total number of bytes sent into *sbytes.
  */
 int
 sys_sendfile(struct thread *td, struct sendfile_args *uap)
 {
 
 	return (do_sendfile(td, uap, 0));
 }
 
 static int
 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
 {
 	struct sf_hdtr hdtr;
 	struct uio *hdr_uio, *trl_uio;
 	struct file *fp;
 	cap_rights_t rights;
 	off_t sbytes;
 	int error;
 
 	/*
 	 * File offset must be positive.  If it goes beyond EOF
 	 * we send only the header/trailer and no payload data.
 	 */
 	if (uap->offset < 0)
 		return (EINVAL);
 
 	hdr_uio = trl_uio = NULL;
 
 	if (uap->hdtr != NULL) {
 		error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
 		if (error != 0)
 			goto out;
 		if (hdtr.headers != NULL) {
 			error = copyinuio(hdtr.headers, hdtr.hdr_cnt,
 			    &hdr_uio);
 			if (error != 0)
 				goto out;
 		}
 		if (hdtr.trailers != NULL) {
 			error = copyinuio(hdtr.trailers, hdtr.trl_cnt,
 			    &trl_uio);
 			if (error != 0)
 				goto out;
 		}
 	}
 
 	AUDIT_ARG_FD(uap->fd);
 
 	/*
 	 * sendfile(2) can start at any offset within a file so we require
 	 * CAP_READ+CAP_SEEK = CAP_PREAD.
 	 */
 	if ((error = fget_read(td, uap->fd,
 	    cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) {
 		goto out;
 	}
 
 	error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset,
 	    uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td);
 	fdrop(fp, td);
 
 	if (uap->sbytes != NULL)
 		copyout(&sbytes, uap->sbytes, sizeof(off_t));
 
 out:
 	free(hdr_uio, M_IOV);
 	free(trl_uio, M_IOV);
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
 {
 	struct sendfile_args args;
 
 	args.fd = uap->fd;
 	args.s = uap->s;
 	args.offset = uap->offset;
 	args.nbytes = uap->nbytes;
 	args.hdtr = uap->hdtr;
 	args.sbytes = uap->sbytes;
 	args.flags = uap->flags;
 
 	return (do_sendfile(td, &args, 1));
 }
 #endif /* COMPAT_FREEBSD4 */
 
+ /*
+  * How much data to put into page i of n.
+  * Only first and last pages are special.
+  */
+static inline off_t
+xfsize(int i, int n, off_t off, off_t len)
+{
+
+	if (i == 0)
+		return (omin(PAGE_SIZE - (off & PAGE_MASK), len));
+
+	if (i == n - 1 && ((off + len) & PAGE_MASK) > 0)
+		return ((off + len) & PAGE_MASK);
+
+	return (PAGE_SIZE);
+}
+
+/*
+ * Offset within object for i page.
+ */
+static inline vm_offset_t
+vmoff(int i, off_t off)
+{
+
+	if (i == 0)
+		return ((vm_offset_t)off);
+
+	return (trunc_page(off + i * PAGE_SIZE));
+}
+
+/*
+ * Pretend as if we don't have enough space, subtract xfsize() of
+ * all pages that failed.
+ */
+static inline void
+fixspace(int old, int new, off_t off, int *space)
+{
+
+	KASSERT(old > new, ("%s: old %d new %d", __func__, old, new));
+
+	/* Subtract last one. */
+	*space -= xfsize(old - 1, old, off, *space);
+	old--;
+
+	if (new == old)
+		/* There was only one page. */
+		return;
+
+	/* Subtract first one. */
+	if (new == 0) {
+		*space -= xfsize(0, old, off, *space);
+		new++;
+	}
+
+	/* Rest of pages are full sized. */
+	*space -= (old - new) * PAGE_SIZE;
+
+	KASSERT(*space >= 0, ("%s: space went backwards", __func__));
+}
+
+/*
+ * Structure describing a single sendfile(2) I/O, which may consist of
+ * several underlying pager I/Os.
+ *
+ * The syscall context allocates the structure and initializes 'nios'
+ * to 1.  As sendfile_swapin() runs through pages and starts asynchronous
+ * paging operations, it increments 'nios'.
+ *
+ * Every I/O completion calls sf_iodone(), which decrements the 'nios', and
+ * the syscall also calls sf_iodone() after allocating all mbufs, linking them
+ * and sending to socket.  Whoever reaches zero 'nios' is responsible to
+ * call pru_ready on the socket, to notify it of readyness of the data.
+ */
+struct sf_io {
+	volatile u_int	nios;
+	u_int		error;
+	int		npages;
+	struct file	*sock_fp;
+	struct mbuf	*m;
+	vm_page_t	pa[];
+};
+
+static void
+sf_iodone(void *arg, vm_page_t *pg, int count, int error)
+{
+	struct sf_io *sfio = arg;
+	struct socket *so;
+
+	for (int i = 0; i < count; i++)
+		vm_page_xunbusy(pg[i]);
+
+	if (error)
+		sfio->error = error;
+
+	if (!refcount_release(&sfio->nios))
+		return;
+
+	so = sfio->sock_fp->f_data;
+
+	if (sfio->error) {
+		struct mbuf *m;
+
+		/*
+		 * I/O operation failed.  The state of data in the socket
+		 * is now inconsistent, and all what we can do is to tear
+		 * it down. Protocol abort method would tear down protocol
+		 * state, free all ready mbufs and detach not ready ones.
+		 * We will free the mbufs corresponding to this I/O manually.
+		 *
+		 * The socket would be marked with EIO and made available
+		 * for read, so that application receives EIO on next
+		 * syscall and eventually closes the socket.
+		 */
+		so->so_proto->pr_usrreqs->pru_abort(so);
+		so->so_error = EIO;
+
+		m = sfio->m;
+		for (int i = 0; i < sfio->npages; i++)
+			m = m_free(m);
+	} else {
+		CURVNET_SET(so->so_vnet);
+		(void )(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m,
+		    sfio->npages);
+		CURVNET_RESTORE();
+	}
+
+	/* XXXGL: curthread */
+	fdrop(sfio->sock_fp, curthread);
+	free(sfio, M_TEMP);
+}
+
+/*
+ * Iterate through pages vector and request paging for non-valid pages.
+ */
 static int
-sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd,
-    off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res)
+sendfile_swapin(vm_object_t obj, struct sf_io *sfio, off_t off, off_t len,
+    int npages, int rhpages, int flags)
 {
-	vm_page_t m;
-	vm_pindex_t pindex;
-	ssize_t resid;
-	int error, readahead, rv;
+	vm_page_t *pa = sfio->pa;
+	int nios;
 
-	pindex = OFF_TO_IDX(off);
-	VM_OBJECT_WLOCK(obj);
-	m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY |
-	    VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL);
+	nios = 0;
+	flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0;
 
 	/*
-	 * Check if page is valid for what we need, otherwise initiate I/O.
-	 *
-	 * The non-zero nd argument prevents disk I/O, instead we
-	 * return the caller what he specified in nd.  In particular,
-	 * if we already turned some pages into mbufs, nd == EAGAIN
-	 * and the main function send them the pages before we come
-	 * here again and block.
+	 * First grab all the pages and wire them.  Note that we grab
+	 * only required pages.  Readahead pages are dealt with later.
 	 */
-	if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) {
-		if (vp == NULL)
-			vm_page_xunbusy(m);
-		VM_OBJECT_WUNLOCK(obj);
-		*res = m;
-		return (0);
-	} else if (nd != 0) {
-		if (vp == NULL)
-			vm_page_xunbusy(m);
-		error = nd;
-		goto free_page;
+	VM_OBJECT_WLOCK(obj);
+	for (int i = 0; i < npages; i++) {
+		pa[i] = vm_page_grab(obj, OFF_TO_IDX(vmoff(i, off)),
+		    VM_ALLOC_WIRED | VM_ALLOC_NORMAL | flags);
+		if (pa[i] == NULL) {
+			npages = i;
+			rhpages = 0;
+			break;
+		}
 	}
 
-	/*
-	 * Get the page from backing store.
-	 */
-	error = 0;
-	if (vp != NULL) {
-		VM_OBJECT_WUNLOCK(obj);
-		readahead = sfreadahead * MAXBSIZE;
+	for (int i = 0; i < npages;) {
+		int j, a, count, rv;
 
+		/* Skip valid pages. */
+		if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK,
+		    xfsize(i, npages, off, len))) {
+			vm_page_xunbusy(pa[i]);
+			SFSTAT_INC(sf_pages_valid);
+			i++;
+			continue;
+		}
+
 		/*
-		 * Use vn_rdwr() instead of the pager interface for
-		 * the vnode, to allow the read-ahead.
-		 *
-		 * XXXMAC: Because we don't have fp->f_cred here, we
-		 * pass in NOCRED.  This is probably wrong, but is
-		 * consistent with our original implementation.
+		 * Now 'i' points to first invalid page, iterate further
+		 * to make 'j' point at first valid after a bunch of
+		 * invalid ones.
 		 */
-		error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off),
-		    UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead /
-		    bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td);
-		SFSTAT_INC(sf_iocnt);
-		VM_OBJECT_WLOCK(obj);
-	} else {
-		if (vm_pager_has_page(obj, pindex, NULL, NULL)) {
-			rv = vm_pager_get_pages(obj, &m, 1, NULL, NULL);
-			SFSTAT_INC(sf_iocnt);
-			if (rv != VM_PAGER_OK) {
-				vm_page_lock(m);
-				vm_page_free(m);
-				vm_page_unlock(m);
-				m = NULL;
-				error = EIO;
+		for (j = i + 1; j < npages; j++)
+			if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
+			    xfsize(j, npages, off, len))) {
+				SFSTAT_INC(sf_pages_valid);
+				break;
 			}
-		} else {
-			pmap_zero_page(m);
-			m->valid = VM_PAGE_BITS_ALL;
-			m->dirty = 0;
+
+		/*
+		 * Now we got region of invalid pages between 'i' and 'j'.
+		 * Check that they belong to pager.  They may not be there,
+		 * which is a regular situation for shmem pager.  For vnode
+		 * pager this happens only in case of sparse file.
+		 *
+		 * Important feature of vm_pager_has_page() is the hint
+		 * stored in 'a', about how many pages we can pagein after
+		 * this page in a single I/O.
+		 */
+		while (!vm_pager_has_page(obj, OFF_TO_IDX(vmoff(i, off)),
+		    NULL, &a) && i < j) {
+			pmap_zero_page(pa[i]);
+			pa[i]->valid = VM_PAGE_BITS_ALL;
+			pa[i]->dirty = 0;
+			vm_page_xunbusy(pa[i]);
+			i++;
 		}
-		if (m != NULL)
-			vm_page_xunbusy(m);
-	}
-	if (error == 0) {
-		*res = m;
-	} else if (m != NULL) {
-free_page:
-		vm_page_lock(m);
-		vm_page_unwire(m, PQ_INACTIVE);
+		if (i == j)
+			continue;
 
 		/*
-		 * See if anyone else might know about this page.  If
-		 * not and it is not valid, then free it.
+		 * We want to pagein as many pages as possible, limited only
+		 * by the 'a' hint and actual request.
+		 *
+		 * We should not pagein into already valid page, thus if
+		 * 'j' didn't reach last page, trim by that page.
+		 *
+		 * When the pagein fulfils the request, also specify readahead.
 		 */
-		if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m))
-			vm_page_free(m);
-		vm_page_unlock(m);
+		if (j < npages)
+			a = min(a, j - i - 1);
+		count = min(a + 1, npages - i);
+
+		refcount_acquire(&sfio->nios);
+		rv = vm_pager_get_pages_async(obj, pa + i, count, NULL,
+		    i + count == npages ? &rhpages : NULL,
+		    &sf_iodone, sfio);
+		KASSERT(rv == VM_PAGER_OK, ("%s: pager fail obj %p page %p",
+		    __func__, obj, pa[i]));
+
+		SFSTAT_INC(sf_iocnt);
+		SFSTAT_ADD(sf_pages_read, count);
+		if (i + count == npages)
+			SFSTAT_ADD(sf_rhpages_read, rhpages);
+
+#ifdef INVARIANTS
+		for (j = i; j < i + count && j < npages; j++)
+			KASSERT(pa[j] == vm_page_lookup(obj,
+			    OFF_TO_IDX(vmoff(j, off))),
+			    ("pa[j] %p lookup %p\n", pa[j],
+			    vm_page_lookup(obj, OFF_TO_IDX(vmoff(j, off)))));
+#endif
+		i += count;
+		nios++;
 	}
-	KASSERT(error != 0 || (m->wire_count > 0 &&
-	    vm_page_is_valid(m, off & PAGE_MASK, xfsize)),
-	    ("wrong page state m %p off %#jx xfsize %d", m, (uintmax_t)off,
-	    xfsize));
+
 	VM_OBJECT_WUNLOCK(obj);
-	return (error);
+
+	if (nios == 0 && npages != 0)
+		SFSTAT_INC(sf_noiocnt);
+
+	return (nios);
 }
 
 static int
 sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res,
     struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size,
     int *bsize)
 {
 	struct vattr va;
 	vm_object_t obj;
 	struct vnode *vp;
 	struct shmfd *shmfd;
 	int error;
 
 	vp = *vp_res = NULL;
 	obj = NULL;
 	shmfd = *shmfd_res = NULL;
 	*bsize = 0;
 
 	/*
 	 * The file descriptor must be a regular file and have a
 	 * backing VM object.
 	 */
 	if (fp->f_type == DTYPE_VNODE) {
 		vp = fp->f_vnode;
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		if (vp->v_type != VREG) {
 			error = EINVAL;
 			goto out;
 		}
 		*bsize = vp->v_mount->mnt_stat.f_iosize;
 		error = VOP_GETATTR(vp, &va, td->td_ucred);
 		if (error != 0)
 			goto out;
 		*obj_size = va.va_size;
 		obj = vp->v_object;
 		if (obj == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 	} else if (fp->f_type == DTYPE_SHM) {
 		error = 0;
 		shmfd = fp->f_data;
 		obj = shmfd->shm_object;
 		*obj_size = shmfd->shm_size;
 	} else {
 		error = EINVAL;
 		goto out;
 	}
 
 	VM_OBJECT_WLOCK(obj);
 	if ((obj->flags & OBJ_DEAD) != 0) {
 		VM_OBJECT_WUNLOCK(obj);
 		error = EBADF;
 		goto out;
 	}
 
 	/*
 	 * Temporarily increase the backing VM object's reference
 	 * count so that a forced reclamation of its vnode does not
 	 * immediately destroy it.
 	 */
 	vm_object_reference_locked(obj);
 	VM_OBJECT_WUNLOCK(obj);
 	*obj_res = obj;
 	*vp_res = vp;
 	*shmfd_res = shmfd;
 
 out:
 	if (vp != NULL)
 		VOP_UNLOCK(vp, 0);
 	return (error);
 }
 
 static int
 kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp,
     struct socket **so)
 {
 	cap_rights_t rights;
 	int error;
 
 	*sock_fp = NULL;
 	*so = NULL;
 
 	/*
 	 * The socket must be a stream socket and connected.
 	 */
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SEND),
 	    sock_fp, NULL);
 	if (error != 0)
 		return (error);
 	*so = (*sock_fp)->f_data;
 	if ((*so)->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (((*so)->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 	return (0);
 }
 
 int
 vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
     struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
     int kflags, struct thread *td)
 {
 	struct file *sock_fp;
 	struct vnode *vp;
 	struct vm_object *obj;
 	struct socket *so;
-	struct mbuf *m;
+	struct mbuf *m, *mh, *mhtail;
 	struct sf_buf *sf;
-	struct vm_page *pg;
 	struct shmfd *shmfd;
 	struct sendfile_sync *sfs;
 	struct vattr va;
-	off_t off, xfsize, fsbytes, sbytes, rem, obj_size;
-	int error, bsize, nd, hdrlen, mnw;
+	off_t off, sbytes, rem, obj_size;
+	int error, softerr, bsize, hdrlen;
 
-	pg = NULL;
 	obj = NULL;
 	so = NULL;
-	m = NULL;
+	m = mh = NULL;
 	sfs = NULL;
-	fsbytes = sbytes = 0;
-	hdrlen = mnw = 0;
-	rem = nbytes;
-	obj_size = 0;
+	sbytes = 0;
+	softerr = 0;
 
 	error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
 	if (error != 0)
 		return (error);
-	if (rem == 0)
-		rem = obj_size;
 
 	error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so);
 	if (error != 0)
 		goto out;
 
-	/*
-	 * Do not wait on memory allocations but return ENOMEM for
-	 * caller to retry later.
-	 * XXX: Experimental.
-	 */
-	if (flags & SF_MNOWAIT)
-		mnw = 1;
+#ifdef MAC
+	error = mac_socket_check_send(td->td_ucred, so);
+	if (error != 0)
+		goto out;
+#endif
 
+	SFSTAT_INC(sf_syscalls);
+	SFSTAT_ADD(sf_rhpages_requested, SF_READAHEAD(flags));
+
 	if (flags & SF_SYNC) {
 		sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
 		mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
 		cv_init(&sfs->cv, "sendfile");
 	}
 
-#ifdef MAC
-	error = mac_socket_check_send(td->td_ucred, so);
-	if (error != 0)
-		goto out;
-#endif
-
 	/* If headers are specified copy them into mbufs. */
-	if (hdr_uio != NULL) {
+	if (hdr_uio != NULL && hdr_uio->uio_resid > 0) {
 		hdr_uio->uio_td = td;
 		hdr_uio->uio_rw = UIO_WRITE;
-		if (hdr_uio->uio_resid > 0) {
-			/*
-			 * In FBSD < 5.0 the nbytes to send also included
-			 * the header.  If compat is specified subtract the
-			 * header size from nbytes.
-			 */
-			if (kflags & SFK_COMPAT) {
-				if (nbytes > hdr_uio->uio_resid)
-					nbytes -= hdr_uio->uio_resid;
-				else
-					nbytes = 0;
-			}
-			m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK),
-			    0, 0, 0);
-			if (m == NULL) {
-				error = mnw ? EAGAIN : ENOBUFS;
-				goto out;
-			}
-			hdrlen = m_length(m, NULL);
+		/*
+		 * In FBSD < 5.0 the nbytes to send also included
+		 * the header.  If compat is specified subtract the
+		 * header size from nbytes.
+		 */
+		if (kflags & SFK_COMPAT) {
+			if (nbytes > hdr_uio->uio_resid)
+				nbytes -= hdr_uio->uio_resid;
+			else
+				nbytes = 0;
 		}
-	}
+		mh = m_uiotombuf(hdr_uio, M_WAITOK, 0, 0, 0);
+		hdrlen = m_length(mh, &mhtail);
+	} else
+		hdrlen = 0;
 
+	rem = nbytes ? omin(nbytes, obj_size - offset) : obj_size - offset;
+
 	/*
 	 * Protect against multiple writers to the socket.
 	 *
 	 * XXXRW: Historically this has assumed non-interruptibility, so now
 	 * we implement that, but possibly shouldn't.
 	 */
 	(void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
 
 	/*
 	 * Loop through the pages of the file, starting with the requested
 	 * offset. Get a file page (do I/O if necessary), map the file page
 	 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
 	 * it on the socket.
 	 * This is done in two loops.  The inner loop turns as many pages
 	 * as it can, up to available socket buffer space, without blocking
 	 * into mbufs to have it bulk delivered into the socket send buffer.
 	 * The outer loop checks the state and available space of the socket
 	 * and takes care of the overall progress.
 	 */
-	for (off = offset; ; ) {
+	for (off = offset; rem > 0; ) {
+		struct sf_io *sfio;
+		vm_page_t *pa;
 		struct mbuf *mtail;
-		int loopbytes;
-		int space;
-		int done;
+		int nios, space, npages, rhpages;
 
-		if ((nbytes != 0 && nbytes == fsbytes) ||
-		    (nbytes == 0 && obj_size == fsbytes))
-			break;
-
 		mtail = NULL;
-		loopbytes = 0;
-		space = 0;
-		done = 0;
-
 		/*
 		 * Check the socket state for ongoing connection,
 		 * no errors and space in socket buffer.
 		 * If space is low allow for the remainder of the
 		 * file to be processed if it fits the socket buffer.
 		 * Otherwise block in waiting for sufficient space
 		 * to proceed, or if the socket is nonblocking, return
 		 * to userland with EAGAIN while reporting how far
 		 * we've come.
 		 * We wait until the socket buffer has significant free
 		 * space to do bulk sends.  This makes good use of file
 		 * system read ahead and allows packet segmentation
 		 * offloading hardware to take over lots of work.  If
 		 * we were not careful here we would send off only one
 		 * sfbuf at a time.
 		 */
 		SOCKBUF_LOCK(&so->so_snd);
 		if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
 			so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
 retry_space:
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			error = EPIPE;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto done;
 		} else if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto done;
 		}
 		space = sbspace(&so->so_snd);
 		if (space < rem &&
 		    (space <= 0 ||
 		     space < so->so_snd.sb_lowat)) {
 			if (so->so_state & SS_NBIO) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EAGAIN;
 				goto done;
 			}
 			/*
 			 * sbwait drops the lock while sleeping.
 			 * When we loop back to retry_space the
 			 * state may have changed and we retest
 			 * for it.
 			 */
 			error = sbwait(&so->so_snd);
 			/*
 			 * An error from sbwait usually indicates that we've
 			 * been interrupted by a signal. If we've sent anything
 			 * then return bytes sent, otherwise return the error.
 			 */
 			if (error != 0) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				goto done;
 			}
 			goto retry_space;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 
 		/*
 		 * Reduce space in the socket buffer by the size of
 		 * the header mbuf chain.
 		 * hdrlen is set to 0 after the first loop.
 		 */
 		space -= hdrlen;
 
 		if (vp != NULL) {
 			error = vn_lock(vp, LK_SHARED);
 			if (error != 0)
 				goto done;
 			error = VOP_GETATTR(vp, &va, td->td_ucred);
 			if (error != 0 || off >= va.va_size) {
 				VOP_UNLOCK(vp, 0);
 				goto done;
 			}
-			obj_size = va.va_size;
+			if (va.va_size != obj_size) {
+				if (nbytes == 0)
+					rem += va.va_size - obj_size;
+				else if (offset + nbytes > va.va_size)
+					rem -= (offset + nbytes - va.va_size);
+				obj_size = va.va_size;
+			}
 		}
 
+		if (space > rem)
+			space = rem;
+
+		npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE);
+
 		/*
+		 * Calculate maximum allowed number of pages for readahead
+		 * at this iteration.  First, we allow readahead up to "rem".
+		 * If application wants more, let it be, but there is no
+		 * reason to go above MAXPHYS.  Also check against "obj_size",
+		 * since vm_pager_has_page() can hint beyond EOF.
+		 */
+		rhpages = howmany(rem + (off & PAGE_MASK), PAGE_SIZE) - npages;
+		rhpages += SF_READAHEAD(flags);
+		rhpages = min(howmany(MAXPHYS, PAGE_SIZE), rhpages);
+		rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) -
+		    npages, rhpages);
+
+		sfio = malloc(sizeof(struct sf_io) +
+		    npages * sizeof(vm_page_t), M_TEMP, M_WAITOK);
+		refcount_init(&sfio->nios, 1);
+		sfio->error = 0;
+
+		nios = sendfile_swapin(obj, sfio, off, space, npages, rhpages,
+		    flags);
+
+		/*
 		 * Loop and construct maximum sized mbuf chain to be bulk
 		 * dumped into socket buffer.
 		 */
-		while (space > loopbytes) {
-			vm_offset_t pgoff;
+		pa = sfio->pa;
+		for (int i = 0; i < npages; i++) {
 			struct mbuf *m0;
 
 			/*
-			 * Calculate the amount to transfer.
-			 * Not to exceed a page, the EOF,
-			 * or the passed in nbytes.
+			 * If a page wasn't grabbed successfully, then
+			 * trim the array. Can happen only with SF_NODISKIO.
 			 */
-			pgoff = (vm_offset_t)(off & PAGE_MASK);
-			rem = obj_size - offset;
-			if (nbytes != 0)
-				rem = omin(rem, nbytes);
-			rem -= fsbytes + loopbytes;
-			xfsize = omin(PAGE_SIZE - pgoff, rem);
-			xfsize = omin(space - loopbytes, xfsize);
-			if (xfsize <= 0) {
-				done = 1;		/* all data sent */
+			if (pa[i] == NULL) {
+				SFSTAT_INC(sf_busy);
+				fixspace(npages, i, off, &space);
+				npages = i;
+				softerr = EBUSY;
 				break;
 			}
 
 			/*
-			 * Attempt to look up the page.  Allocate
-			 * if not found or wait and loop if busy.
-			 */
-			if (m != NULL)
-				nd = EAGAIN; /* send what we already got */
-			else if ((flags & SF_NODISKIO) != 0)
-				nd = EBUSY;
-			else
-				nd = 0;
-			error = sendfile_readpage(obj, vp, nd, off,
-			    xfsize, bsize, td, &pg);
-			if (error != 0) {
-				if (error == EAGAIN)
-					error = 0;	/* not a real error */
-				break;
-			}
-
-			/*
 			 * Get a sendfile buf.  When allocating the
 			 * first buffer for mbuf chain, we usually
 			 * wait as long as necessary, but this wait
 			 * can be interrupted.  For consequent
 			 * buffers, do not sleep, since several
 			 * threads might exhaust the buffers and then
 			 * deadlock.
 			 */
-			sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
-			    SFB_CATCH);
+			sf = sf_buf_alloc(pa[i],
+			    m != NULL ? SFB_NOWAIT : SFB_CATCH);
 			if (sf == NULL) {
 				SFSTAT_INC(sf_allocfail);
-				vm_page_lock(pg);
-				vm_page_unwire(pg, PQ_INACTIVE);
-				KASSERT(pg->object != NULL,
-				    ("%s: object disappeared", __func__));
-				vm_page_unlock(pg);
+				for (int j = i; j < npages; j++) {
+					vm_page_lock(pa[j]);
+					vm_page_unwire(pa[j], PQ_INACTIVE);
+					vm_page_unlock(pa[j]);
+				}
 				if (m == NULL)
-					error = (mnw ? EAGAIN : EINTR);
+					softerr = ENOBUFS;
+				fixspace(npages, i, off, &space);
+				npages = i;
 				break;
 			}
 
-			/*
-			 * Get an mbuf and set it up as having
-			 * external storage.
-			 */
-			m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA);
-			if (m0 == NULL) {
-				error = (mnw ? EAGAIN : ENOBUFS);
-				sf_ext_free(sf, NULL);
-				break;
-			}
-			/*
-			 * Attach EXT_SFBUF external storage.
-			 */
-			m0->m_ext.ext_buf = (caddr_t )sf_buf_kva(sf);
+			m0 = m_get(M_WAITOK, MT_DATA);
+			m0->m_ext.ext_buf = (char *)sf_buf_kva(sf);
 			m0->m_ext.ext_size = PAGE_SIZE;
 			m0->m_ext.ext_arg1 = sf;
 			m0->m_ext.ext_arg2 = sfs;
-			m0->m_ext.ext_type = EXT_SFBUF;
+			/*
+			 * SF_NOCACHE sets the page as being freed upon send.
+			 * However, we ignore it for the last page in 'space',
+			 * if the page is truncated, and we got more data to
+			 * send (rem > space), or if we have readahead
+			 * configured (rhpages > 0).
+			 */
+			if ((flags & SF_NOCACHE) == 0 ||
+			    (i == npages - 1 &&
+			    ((off + space) & PAGE_MASK) &&
+			    (rem > space || rhpages > 0)))
+				m0->m_ext.ext_type = EXT_SFBUF;
+			else
+				m0->m_ext.ext_type = EXT_SFBUF_NOCACHE;
 			m0->m_ext.ext_flags = 0;
-			m0->m_flags |= (M_EXT|M_RDONLY);
-			m0->m_data = (char *)sf_buf_kva(sf) + pgoff;
-			m0->m_len = xfsize;
+			m0->m_flags |= (M_EXT | M_RDONLY);
+			if (nios)
+				m0->m_flags |= M_NOTREADY;
+			m0->m_data = (char *)sf_buf_kva(sf) +
+			    (vmoff(i, off) & PAGE_MASK);
+			m0->m_len = xfsize(i, npages, off, space);
 
+			if (i == 0)
+				sfio->m = m0;
+
 			/* Append to mbuf chain. */
 			if (mtail != NULL)
 				mtail->m_next = m0;
-			else if (m != NULL)
-				m_last(m)->m_next = m0;
 			else
 				m = m0;
 			mtail = m0;
 
-			/* Keep track of bits processed. */
-			loopbytes += xfsize;
-			off += xfsize;
-
 			if (sfs != NULL) {
 				mtx_lock(&sfs->mtx);
 				sfs->count++;
 				mtx_unlock(&sfs->mtx);
 			}
 		}
 
 		if (vp != NULL)
 			VOP_UNLOCK(vp, 0);
 
+		/* Keep track of bytes processed. */
+		off += space;
+		rem -= space;
+
+		/* Prepend header, if any. */
+		if (hdrlen) {
+			mhtail->m_next = m;
+			m = mh;
+			mh = NULL;
+		}
+
+		if (m == NULL) {
+			KASSERT(softerr, ("%s: m NULL, no error", __func__));
+			error = softerr;
+			free(sfio, M_TEMP);
+			goto done;
+		}
+
 		/* Add the buffer chain to the socket buffer. */
-		if (m != NULL) {
-			int mlen, err;
+		KASSERT(m_length(m, NULL) == space + hdrlen,
+		    ("%s: mlen %u space %d hdrlen %d",
+		    __func__, m_length(m, NULL), space, hdrlen));
 
-			mlen = m_length(m, NULL);
-			SOCKBUF_LOCK(&so->so_snd);
-			if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
-				error = EPIPE;
-				SOCKBUF_UNLOCK(&so->so_snd);
-				goto done;
-			}
-			SOCKBUF_UNLOCK(&so->so_snd);
-			CURVNET_SET(so->so_vnet);
-			/* Avoid error aliasing. */
-			err = (*so->so_proto->pr_usrreqs->pru_send)
-				    (so, 0, m, NULL, NULL, td);
-			CURVNET_RESTORE();
-			if (err == 0) {
-				/*
-				 * We need two counters to get the
-				 * file offset and nbytes to send
-				 * right:
-				 * - sbytes contains the total amount
-				 *   of bytes sent, including headers.
-				 * - fsbytes contains the total amount
-				 *   of bytes sent from the file.
-				 */
-				sbytes += mlen;
-				fsbytes += mlen;
-				if (hdrlen) {
-					fsbytes -= hdrlen;
-					hdrlen = 0;
-				}
-			} else if (error == 0)
-				error = err;
-			m = NULL;	/* pru_send always consumes */
+		CURVNET_SET(so->so_vnet);
+		if (nios == 0) {
+			/*
+			 * If sendfile_swapin() didn't initiate any I/Os,
+			 * which happens if all data is cached in VM, then
+			 * we can send data right now without the
+			 * PRUS_NOTREADY flag.
+			 */
+			free(sfio, M_TEMP);
+			error = (*so->so_proto->pr_usrreqs->pru_send)
+			    (so, 0, m, NULL, NULL, td);
+		} else {
+			sfio->sock_fp = sock_fp;
+			sfio->npages = npages;
+			fhold(sock_fp);
+			error = (*so->so_proto->pr_usrreqs->pru_send)
+			    (so, PRUS_NOTREADY, m, NULL, NULL, td);
+			sf_iodone(sfio, NULL, 0, 0);
 		}
+		CURVNET_RESTORE();
 
-		/* Quit outer loop on error or when we're done. */
-		if (done)
-			break;
-		if (error != 0)
+		m = NULL;	/* pru_send always consumes */
+		if (error)
 			goto done;
+		sbytes += space + hdrlen;
+		if (hdrlen)
+			hdrlen = 0;
+		if (softerr) {
+			error = softerr;
+			goto done;
+		}
 	}
 
 	/*
 	 * Send trailers. Wimp out and use writev(2).
 	 */
 	if (trl_uio != NULL) {
 		sbunlock(&so->so_snd);
 		error = kern_writev(td, sockfd, trl_uio);
 		if (error == 0)
 			sbytes += td->td_retval[0];
 		goto out;
 	}
 
 done:
 	sbunlock(&so->so_snd);
 out:
 	/*
 	 * If there was no error we have to clear td->td_retval[0]
 	 * because it may have been set by writev.
 	 */
 	if (error == 0) {
 		td->td_retval[0] = 0;
 	}
 	if (sent != NULL) {
 		(*sent) = sbytes;
 	}
 	if (obj != NULL)
 		vm_object_deallocate(obj);
 	if (so)
 		fdrop(sock_fp, td);
 	if (m)
 		m_freem(m);
+	if (mh)
+		m_freem(mh);
 
 	if (sfs != NULL) {
 		mtx_lock(&sfs->mtx);
 		if (sfs->count != 0)
 			cv_wait(&sfs->cv, &sfs->mtx);
 		KASSERT(sfs->count == 0, ("sendfile sync still busy"));
 		cv_destroy(&sfs->cv);
 		mtx_destroy(&sfs->mtx);
 		free(sfs, M_TEMP);
 	}
 
 	if (error == ERESTART)
 		error = EINTR;
 
 	return (error);
 }
Index: projects/clang380-import/sys/kern/uipc_usrreq.c
===================================================================
--- projects/clang380-import/sys/kern/uipc_usrreq.c	(revision 293686)
+++ projects/clang380-import/sys/kern/uipc_usrreq.c	(revision 293687)
@@ -1,2554 +1,2554 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2004-2009 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
  */
 
 /*
  * UNIX Domain (Local) Sockets
  *
  * This is an implementation of UNIX (local) domain sockets.  Each socket has
  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
  * may be connected to 0 or 1 other socket.  Datagram sockets may be
  * connected to 0, 1, or many other sockets.  Sockets may be created and
  * connected in pairs (socketpair(2)), or bound/connected to using the file
  * system name space.  For most purposes, only the receive socket buffer is
  * used, as sending on one socket delivers directly to the receive socket
  * buffer of a second socket.
  *
  * The implementation is substantially complicated by the fact that
  * "ancillary data", such as file descriptors or credentials, may be passed
  * across UNIX domain sockets.  The potential for passing UNIX domain sockets
  * over other UNIX domain sockets requires the implementation of a simple
  * garbage collector to find and tear down cycles of disconnected sockets.
  *
  * TODO:
  *	RDM
  *	rethink name space problems
  *	need a proper out-of-band
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/domain.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>		/* XXX must be before <sys/file.h> */
 #include <sys/eventhandler.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/vnode.h>
 
 #include <net/vnet.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 MALLOC_DECLARE(M_FILECAPS);
 
 /*
  * Locking key:
  * (l)	Locked using list lock
  * (g)	Locked using linkage lock
  */
 
 static uma_zone_t	unp_zone;
 static unp_gen_t	unp_gencnt;	/* (l) */
 static u_int		unp_count;	/* (l) Count of local sockets. */
 static ino_t		unp_ino;	/* Prototype for fake inode numbers. */
 static int		unp_rights;	/* (g) File descriptors in flight. */
 static struct unp_head	unp_shead;	/* (l) List of stream sockets. */
 static struct unp_head	unp_dhead;	/* (l) List of datagram sockets. */
 static struct unp_head	unp_sphead;	/* (l) List of seqpacket sockets. */
 
 struct unp_defer {
 	SLIST_ENTRY(unp_defer) ud_link;
 	struct file *ud_fp;
 };
 static SLIST_HEAD(, unp_defer) unp_defers;
 static int unp_defers_count;
 
 static const struct sockaddr	sun_noname = { sizeof(sun_noname), AF_LOCAL };
 
 /*
  * Garbage collection of cyclic file descriptor/socket references occurs
  * asynchronously in a taskqueue context in order to avoid recursion and
  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  * code.  See unp_gc() for a full description.
  */
 static struct timeout_task unp_gc_task;
 
 /*
  * The close of unix domain sockets attached as SCM_RIGHTS is
  * postponed to the taskqueue, to avoid arbitrary recursion depth.
  * The attached sockets might have another sockets attached.
  */
 static struct task	unp_defer_task;
 
 /*
  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  * stream sockets, although the total for sender and receiver is actually
  * only PIPSIZ.
  *
  * Datagram sockets really use the sendspace as the maximum datagram size,
  * and don't really want to reserve the sendspace.  Their recvspace should be
  * large enough for at least one max-size datagram plus address.
  */
 #ifndef PIPSIZ
 #define	PIPSIZ	8192
 #endif
 static u_long	unpst_sendspace = PIPSIZ;
 static u_long	unpst_recvspace = PIPSIZ;
 static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
 static u_long	unpdg_recvspace = 4*1024;
 static u_long	unpsp_sendspace = PIPSIZ;	/* really max datagram size */
 static u_long	unpsp_recvspace = PIPSIZ;
 
 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain");
 static SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0,
     "SOCK_STREAM");
 static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM");
 static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket, CTLFLAG_RW, 0,
     "SOCK_SEQPACKET");
 
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
 	   &unpst_sendspace, 0, "Default stream send space.");
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpst_recvspace, 0, "Default stream receive space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
 	   &unpdg_sendspace, 0, "Default datagram send space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpdg_recvspace, 0, "Default datagram receive space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
 	   &unpsp_sendspace, 0, "Default seqpacket send space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpsp_recvspace, 0, "Default seqpacket receive space.");
 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
     "File descriptors in flight.");
 SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
     &unp_defers_count, 0,
     "File descriptors deferred to taskqueue for close.");
 
 /*
  * Locking and synchronization:
  *
  * Three types of locks exit in the local domain socket implementation: a
  * global list mutex, a global linkage rwlock, and per-unpcb mutexes.  Of the
  * global locks, the list lock protects the socket count, global generation
  * number, and stream/datagram global lists.  The linkage lock protects the
  * interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be
  * held exclusively over the acquisition of multiple unpcb locks to prevent
  * deadlock.
  *
  * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
  * allocated in pru_attach() and freed in pru_detach().  The validity of that
  * pointer is an invariant, so no lock is required to dereference the so_pcb
  * pointer if a valid socket reference is held by the caller.  In practice,
  * this is always true during operations performed on a socket.  Each unpcb
  * has a back-pointer to its socket, unp_socket, which will be stable under
  * the same circumstances.
  *
  * This pointer may only be safely dereferenced as long as a valid reference
  * to the unpcb is held.  Typically, this reference will be from the socket,
  * or from another unpcb when the referring unpcb's lock is held (in order
  * that the reference not be invalidated during use).  For example, to follow
  * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn,
  * as unp_socket remains valid as long as the reference to unp_conn is valid.
  *
  * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx.  Individual
  * atomic reads without the lock may be performed "lockless", but more
  * complex reads and read-modify-writes require the mutex to be held.  No
  * lock order is defined between unpcb locks -- multiple unpcb locks may be
  * acquired at the same time only when holding the linkage rwlock
  * exclusively, which prevents deadlocks.
  *
  * Blocking with UNIX domain sockets is a tricky issue: unlike most network
  * protocols, bind() is a non-atomic operation, and connect() requires
  * potential sleeping in the protocol, due to potentially waiting on local or
  * distributed file systems.  We try to separate "lookup" operations, which
  * may sleep, and the IPC operations themselves, which typically can occur
  * with relative atomicity as locks can be held over the entire operation.
  *
  * Another tricky issue is simultaneous multi-threaded or multi-process
  * access to a single UNIX domain socket.  These are handled by the flags
  * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
  * binding, both of which involve dropping UNIX domain socket locks in order
  * to perform namei() and other file system operations.
  */
 static struct rwlock	unp_link_rwlock;
 static struct mtx	unp_list_lock;
 static struct mtx	unp_defers_lock;
 
 #define	UNP_LINK_LOCK_INIT()		rw_init(&unp_link_rwlock,	\
 					    "unp_link_rwlock")
 
 #define	UNP_LINK_LOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
 					    RA_LOCKED)
 #define	UNP_LINK_UNLOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
 					    RA_UNLOCKED)
 
 #define	UNP_LINK_RLOCK()		rw_rlock(&unp_link_rwlock)
 #define	UNP_LINK_RUNLOCK()		rw_runlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK()		rw_wlock(&unp_link_rwlock)
 #define	UNP_LINK_WUNLOCK()		rw_wunlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_WLOCKED)
 
 #define	UNP_LIST_LOCK_INIT()		mtx_init(&unp_list_lock,	\
 					    "unp_list_lock", NULL, MTX_DEF)
 #define	UNP_LIST_LOCK()			mtx_lock(&unp_list_lock)
 #define	UNP_LIST_UNLOCK()		mtx_unlock(&unp_list_lock)
 
 #define	UNP_DEFERRED_LOCK_INIT()	mtx_init(&unp_defers_lock, \
 					    "unp_defer", NULL, MTX_DEF)
 #define	UNP_DEFERRED_LOCK()		mtx_lock(&unp_defers_lock)
 #define	UNP_DEFERRED_UNLOCK()		mtx_unlock(&unp_defers_lock)
 
 #define UNP_PCB_LOCK_INIT(unp)		mtx_init(&(unp)->unp_mtx,	\
 					    "unp_mtx", "unp_mtx",	\
 					    MTX_DUPOK|MTX_DEF|MTX_RECURSE)
 #define	UNP_PCB_LOCK_DESTROY(unp)	mtx_destroy(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK(unp)		mtx_lock(&(unp)->unp_mtx)
 #define	UNP_PCB_UNLOCK(unp)		mtx_unlock(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_OWNED)
 
 static int	uipc_connect2(struct socket *, struct socket *);
 static int	uipc_ctloutput(struct socket *, struct sockopt *);
 static int	unp_connect(struct socket *, struct sockaddr *,
 		    struct thread *);
 static int	unp_connectat(int, struct socket *, struct sockaddr *,
 		    struct thread *);
 static int	unp_connect2(struct socket *so, struct socket *so2, int);
 static void	unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
 static void	unp_dispose(struct mbuf *);
 static void	unp_dispose_so(struct socket *so);
 static void	unp_shutdown(struct unpcb *);
 static void	unp_drop(struct unpcb *, int);
 static void	unp_gc(__unused void *, int);
 static void	unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
 static void	unp_discard(struct file *);
 static void	unp_freerights(struct filedescent **, int);
 static void	unp_init(void);
 static int	unp_internalize(struct mbuf **, struct thread *);
 static void	unp_internalize_fp(struct file *);
 static int	unp_externalize(struct mbuf *, struct mbuf **, int);
 static int	unp_externalize_fp(struct file *);
 static struct mbuf	*unp_addsockcred(struct thread *, struct mbuf *);
 static void	unp_process_defers(void * __unused, int);
 
 /*
  * Definitions of protocols supported in the LOCAL domain.
  */
 static struct domain localdomain;
 static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream;
 static struct pr_usrreqs uipc_usrreqs_seqpacket;
 static struct protosw localsw[] = {
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&localdomain,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_stream
 },
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&localdomain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_RIGHTS,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_dgram
 },
 {
 	.pr_type =		SOCK_SEQPACKET,
 	.pr_domain =		&localdomain,
 
 	/*
 	 * XXXRW: For now, PR_ADDR because soreceive will bump into them
 	 * due to our use of sbappendaddr.  A new sbappend variants is needed
 	 * that supports both atomic record writes and control data.
 	 */
 	.pr_flags =		PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD|
 				    PR_RIGHTS,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_seqpacket,
 },
 };
 
 static struct domain localdomain = {
 	.dom_family =		AF_LOCAL,
 	.dom_name =		"local",
 	.dom_init =		unp_init,
 	.dom_externalize =	unp_externalize,
 	.dom_dispose =		unp_dispose_so,
 	.dom_protosw =		localsw,
 	.dom_protoswNPROTOSW =	&localsw[sizeof(localsw)/sizeof(localsw[0])]
 };
 DOMAIN_SET(local);
 
 static void
 uipc_abort(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
 
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		unp_drop(unp2, ECONNABORTED);
 		UNP_PCB_UNLOCK(unp2);
 	}
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 }
 
 static int
 uipc_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	/*
 	 * Pass back name of connected socket, if it was bound and we are
 	 * still connected (our peer may have closed already!).
 	 */
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_LINK_RLOCK();
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL && unp2->unp_addr != NULL) {
 		UNP_PCB_LOCK(unp2);
 		sa = (struct sockaddr *) unp2->unp_addr;
 		bcopy(sa, *nam, sa->sa_len);
 		UNP_PCB_UNLOCK(unp2);
 	} else {
 		sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 	}
 	UNP_LINK_RUNLOCK();
 	return (0);
 }
 
 static int
 uipc_attach(struct socket *so, int proto, struct thread *td)
 {
 	u_long sendspace, recvspace;
 	struct unpcb *unp;
 	int error;
 
 	KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			sendspace = unpst_sendspace;
 			recvspace = unpst_recvspace;
 			break;
 
 		case SOCK_DGRAM:
 			sendspace = unpdg_sendspace;
 			recvspace = unpdg_recvspace;
 			break;
 
 		case SOCK_SEQPACKET:
 			sendspace = unpsp_sendspace;
 			recvspace = unpsp_recvspace;
 			break;
 
 		default:
 			panic("uipc_attach");
 		}
 		error = soreserve(so, sendspace, recvspace);
 		if (error)
 			return (error);
 	}
 	unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
 	if (unp == NULL)
 		return (ENOBUFS);
 	LIST_INIT(&unp->unp_refs);
 	UNP_PCB_LOCK_INIT(unp);
 	unp->unp_socket = so;
 	so->so_pcb = unp;
 	unp->unp_refcount = 1;
 
 	UNP_LIST_LOCK();
 	unp->unp_gencnt = ++unp_gencnt;
 	unp_count++;
 	switch (so->so_type) {
 	case SOCK_STREAM:
 		LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
 		break;
 
 	case SOCK_DGRAM:
 		LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
 		break;
 
 	case SOCK_SEQPACKET:
 		LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
 		break;
 
 	default:
 		panic("uipc_attach");
 	}
 	UNP_LIST_UNLOCK();
 
 	return (0);
 }
 
 static int
 uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 	struct vattr vattr;
 	int error, namelen;
 	struct nameidata nd;
 	struct unpcb *unp;
 	struct vnode *vp;
 	struct mount *mp;
 	cap_rights_t rights;
 	char *buf;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
 
 	if (soun->sun_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
 	if (namelen <= 0)
 		return (EINVAL);
 
 	/*
 	 * We don't allow simultaneous bind() calls on a single UNIX domain
 	 * socket, so flag in-progress operations, and return an error if an
 	 * operation is already in progress.
 	 *
 	 * Historically, we have not allowed a socket to be rebound, so this
 	 * also returns an error.  Not allowing re-binding simplifies the
 	 * implementation and avoids a great many possible failure modes.
 	 */
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode != NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (EINVAL);
 	}
 	if (unp->unp_flags & UNP_BINDING) {
 		UNP_PCB_UNLOCK(unp);
 		return (EALREADY);
 	}
 	unp->unp_flags |= UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 
 	buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
 	bcopy(soun->sun_path, buf, namelen);
 	buf[namelen] = 0;
 
 restart:
 	NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_BINDAT), td);
 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 	error = namei(&nd);
 	if (error)
 		goto error;
 	vp = nd.ni_vp;
 	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (vp != NULL) {
 			vrele(vp);
 			error = EADDRINUSE;
 			goto error;
 		}
 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 		if (error)
 			goto error;
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VSOCK;
 	vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 #endif
 	if (error == 0)
 		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	if (error) {
 		vn_finished_write(mp);
 		goto error;
 	}
 	vp = nd.ni_vp;
 	ASSERT_VOP_ELOCKED(vp, "uipc_bind");
 	soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
 
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	VOP_UNP_BIND(vp, unp->unp_socket);
 	unp->unp_vnode = vp;
 	unp->unp_addr = soun;
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	free(buf, M_TEMP);
 	return (0);
 
 error:
 	UNP_PCB_LOCK(unp);
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	free(buf, M_TEMP);
 	return (error);
 }
 
 static int
 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (uipc_bindat(AT_FDCWD, so, nam, td));
 }
 
 static int
 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connect: td != curthread"));
 	UNP_LINK_WLOCK();
 	error = unp_connect(so, nam, td);
 	UNP_LINK_WUNLOCK();
 	return (error);
 }
 
 static int
 uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
 	UNP_LINK_WLOCK();
 	error = unp_connectat(fd, so, nam, td);
 	UNP_LINK_WUNLOCK();
 	return (error);
 }
 
 static void
 uipc_close(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
 
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		unp_disconnect(unp, unp2);
 		UNP_PCB_UNLOCK(unp2);
 	}
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 }
 
 static int
 uipc_connect2(struct socket *so1, struct socket *so2)
 {
 	struct unpcb *unp, *unp2;
 	int error;
 
 	UNP_LINK_WLOCK();
 	unp = so1->so_pcb;
 	KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
 	UNP_PCB_LOCK(unp);
 	unp2 = so2->so_pcb;
 	KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
 	UNP_PCB_LOCK(unp2);
 	error = unp_connect2(so1, so2, PRU_CONNECT2);
 	UNP_PCB_UNLOCK(unp2);
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 	return (error);
 }
 
 static void
 uipc_detach(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 	struct sockaddr_un *saved_unp_addr;
 	struct vnode *vp;
 	int freeunp, local_unp_rights;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
 
 	UNP_LINK_WLOCK();
 	UNP_LIST_LOCK();
 	UNP_PCB_LOCK(unp);
 	LIST_REMOVE(unp, unp_link);
 	unp->unp_gencnt = ++unp_gencnt;
 	--unp_count;
 	UNP_LIST_UNLOCK();
 
 	/*
 	 * XXXRW: Should assert vp->v_socket == so.
 	 */
 	if ((vp = unp->unp_vnode) != NULL) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 	}
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		unp_disconnect(unp, unp2);
 		UNP_PCB_UNLOCK(unp2);
 	}
 
 	/*
 	 * We hold the linkage lock exclusively, so it's OK to acquire
 	 * multiple pcb locks at a time.
 	 */
 	while (!LIST_EMPTY(&unp->unp_refs)) {
 		struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
 
 		UNP_PCB_LOCK(ref);
 		unp_drop(ref, ECONNRESET);
 		UNP_PCB_UNLOCK(ref);
 	}
 	local_unp_rights = unp_rights;
 	UNP_LINK_WUNLOCK();
 	unp->unp_socket->so_pcb = NULL;
 	saved_unp_addr = unp->unp_addr;
 	unp->unp_addr = NULL;
 	unp->unp_refcount--;
 	freeunp = (unp->unp_refcount == 0);
 	if (saved_unp_addr != NULL)
 		free(saved_unp_addr, M_SONAME);
 	if (freeunp) {
 		UNP_PCB_LOCK_DESTROY(unp);
 		uma_zfree(unp_zone, unp);
 	} else
 		UNP_PCB_UNLOCK(unp);
 	if (vp)
 		vrele(vp);
 	if (local_unp_rights)
 		taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
 }
 
 static int
 uipc_disconnect(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
 
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		unp_disconnect(unp, unp2);
 		UNP_PCB_UNLOCK(unp2);
 	}
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 	return (0);
 }
 
 static int
 uipc_listen(struct socket *so, int backlog, struct thread *td)
 {
 	struct unpcb *unp;
 	int error;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode == NULL) {
 		/* Already connected or not bound to an address. */
 		error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
 		UNP_PCB_UNLOCK(unp);
 		return (error);
 	}
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error == 0) {
 		cru2x(td->td_ucred, &unp->unp_peercred);
 		unp->unp_flags |= UNP_HAVEPCCACHED;
 		solisten_proto(so, backlog);
 	}
 	SOCK_UNLOCK(so);
 	UNP_PCB_UNLOCK(unp);
 	return (error);
 }
 
 static int
 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_LINK_RLOCK();
 	/*
 	 * XXX: It seems that this test always fails even when connection is
 	 * established.  So, this else clause is added as workaround to
 	 * return PF_LOCAL sockaddr.
 	 */
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		if (unp2->unp_addr != NULL)
 			sa = (struct sockaddr *) unp2->unp_addr;
 		else
 			sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 		UNP_PCB_UNLOCK(unp2);
 	} else {
 		sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 	}
 	UNP_LINK_RUNLOCK();
 	return (0);
 }
 
 static int
 uipc_rcvd(struct socket *so, int flags)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	/*
 	 * Adjust backpressure on sender and wakeup any waiting to write.
 	 *
 	 * The unp lock is acquired to maintain the validity of the unp_conn
 	 * pointer; no lock on unp2 is required as unp2->unp_socket will be
 	 * static as long as we don't permit unp2 to disconnect from unp,
 	 * which is prevented by the lock on unp.  We cache values from
 	 * so_rcv to avoid holding the so_rcv lock over the entire
 	 * transaction on the remote so_snd.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	mbcnt = so->so_rcv.sb_mbcnt;
 	sbcc = sbavail(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	/*
 	 * There is a benign race condition at this point.  If we're planning to
 	 * clear SB_STOP, but uipc_send is called on the connected socket at
 	 * this instant, it might add data to the sockbuf and set SB_STOP.  Then
 	 * we would erroneously clear SB_STOP below, even though the sockbuf is
 	 * full.  The race is benign because the only ill effect is to allow the
 	 * sockbuf to exceed its size limit, and the size limits are not
 	 * strictly guaranteed anyway.
 	 */
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (0);
 	}
 	so2 = unp2->unp_socket;
 	SOCKBUF_LOCK(&so2->so_snd);
 	if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
 		so2->so_snd.sb_flags &= ~SB_STOP;
 	sowwakeup_locked(so2);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 	int error = 0;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
 	    so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	if (flags & PRUS_OOB) {
 		error = EOPNOTSUPP;
 		goto release;
 	}
 	if (control != NULL && (error = unp_internalize(&control, td)))
 		goto release;
 	if ((nam != NULL) || (flags & PRUS_EOF))
 		UNP_LINK_WLOCK();
 	else
 		UNP_LINK_RLOCK();
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 	{
 		const struct sockaddr *from;
 
 		unp2 = unp->unp_conn;
 		if (nam != NULL) {
 			UNP_LINK_WLOCK_ASSERT();
 			if (unp2 != NULL) {
 				error = EISCONN;
 				break;
 			}
 			error = unp_connect(so, nam, td);
 			if (error)
 				break;
 			unp2 = unp->unp_conn;
 		}
 
 		/*
 		 * Because connect() and send() are non-atomic in a sendto()
 		 * with a target address, it's possible that the socket will
 		 * have disconnected before the send() can run.  In that case
 		 * return the slightly counter-intuitive but otherwise
 		 * correct error that the socket is not connected.
 		 */
 		if (unp2 == NULL) {
 			error = ENOTCONN;
 			break;
 		}
 		/* Lockless read. */
 		if (unp2->unp_flags & UNP_WANTCRED)
 			control = unp_addsockcred(td, control);
 		UNP_PCB_LOCK(unp);
 		if (unp->unp_addr != NULL)
 			from = (struct sockaddr *)unp->unp_addr;
 		else
 			from = &sun_noname;
 		so2 = unp2->unp_socket;
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if (sbappendaddr_locked(&so2->so_rcv, from, m,
 		    control)) {
 			sorwakeup_locked(so2);
 			m = NULL;
 			control = NULL;
 		} else {
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 			error = ENOBUFS;
 		}
 		if (nam != NULL) {
 			UNP_LINK_WLOCK_ASSERT();
 			UNP_PCB_LOCK(unp2);
 			unp_disconnect(unp, unp2);
 			UNP_PCB_UNLOCK(unp2);
 		}
 		UNP_PCB_UNLOCK(unp);
 		break;
 	}
 
 	case SOCK_SEQPACKET:
 	case SOCK_STREAM:
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			if (nam != NULL) {
 				UNP_LINK_WLOCK_ASSERT();
 				error = unp_connect(so, nam, td);
 				if (error)
 					break;	/* XXX */
 			} else {
 				error = ENOTCONN;
 				break;
 			}
 		}
 
 		/* Lockless read. */
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			error = EPIPE;
 			break;
 		}
 
 		/*
 		 * Because connect() and send() are non-atomic in a sendto()
 		 * with a target address, it's possible that the socket will
 		 * have disconnected before the send() can run.  In that case
 		 * return the slightly counter-intuitive but otherwise
 		 * correct error that the socket is not connected.
 		 *
 		 * Locking here must be done carefully: the linkage lock
 		 * prevents interconnections between unpcbs from changing, so
 		 * we can traverse from unp to unp2 without acquiring unp's
 		 * lock.  Socket buffer locks follow unpcb locks, so we can
 		 * acquire both remote and lock socket buffer locks.
 		 */
 		unp2 = unp->unp_conn;
 		if (unp2 == NULL) {
 			error = ENOTCONN;
 			break;
 		}
 		so2 = unp2->unp_socket;
 		UNP_PCB_LOCK(unp2);
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if (unp2->unp_flags & UNP_WANTCRED) {
 			/*
 			 * Credentials are passed only once on SOCK_STREAM
 			 * and SOCK_SEQPACKET.
 			 */
 			unp2->unp_flags &= ~UNP_WANTCRED;
 			control = unp_addsockcred(td, control);
 		}
 		/*
 		 * Send to paired receive port, and then reduce send buffer
 		 * hiwater marks to maintain backpressure.  Wake up readers.
 		 */
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			if (control != NULL) {
 				if (sbappendcontrol_locked(&so2->so_rcv, m,
 				    control))
 					control = NULL;
 			} else
-				sbappend_locked(&so2->so_rcv, m);
+				sbappend_locked(&so2->so_rcv, m, flags);
 			break;
 
 		case SOCK_SEQPACKET: {
 			const struct sockaddr *from;
 
 			from = &sun_noname;
 			/*
 			 * Don't check for space available in so2->so_rcv.
 			 * Unix domain sockets only check for space in the
 			 * sending sockbuf, and that check is performed one
 			 * level up the stack.
 			 */
 			if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
 				from, m, control))
 				control = NULL;
 			break;
 			}
 		}
 
 		mbcnt = so2->so_rcv.sb_mbcnt;
 		sbcc = sbavail(&so2->so_rcv);
 		if (sbcc)
 			sorwakeup_locked(so2);
 		else
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 
 		/*
 		 * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
 		 * it would be possible for uipc_rcvd to be called at this
 		 * point, drain the receiving sockbuf, clear SB_STOP, and then
 		 * we would set SB_STOP below.  That could lead to an empty
 		 * sockbuf having SB_STOP set
 		 */
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
 			so->so_snd.sb_flags |= SB_STOP;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		UNP_PCB_UNLOCK(unp2);
 		m = NULL;
 		break;
 	}
 
 	/*
 	 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown.
 	 */
 	if (flags & PRUS_EOF) {
 		UNP_PCB_LOCK(unp);
 		socantsendmore(so);
 		unp_shutdown(unp);
 		UNP_PCB_UNLOCK(unp);
 	}
 
 	if ((nam != NULL) || (flags & PRUS_EOF))
 		UNP_LINK_WUNLOCK();
 	else
 		UNP_LINK_RUNLOCK();
 
 	if (control != NULL && error != 0)
 		unp_dispose(control);
 
 release:
 	if (control != NULL)
 		m_freem(control);
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 static int
 uipc_ready(struct socket *so, struct mbuf *m, int count)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	int error;
 
 	unp = sotounpcb(so);
 
 	UNP_LINK_RLOCK();
 	unp2 = unp->unp_conn;
 	UNP_PCB_LOCK(unp2);
 	so2 = unp2->unp_socket;
 
 	SOCKBUF_LOCK(&so2->so_rcv);
 	if ((error = sbready(&so2->so_rcv, m, count)) == 0)
 		sorwakeup_locked(so2);
 	else
 		SOCKBUF_UNLOCK(&so2->so_rcv);
 
 	UNP_PCB_UNLOCK(unp2);
 	UNP_LINK_RUNLOCK();
 
 	return (error);
 }
 
 static int
 uipc_sense(struct socket *so, struct stat *sb)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	UNP_PCB_LOCK(unp);
 	sb->st_dev = NODEV;
 	if (unp->unp_ino == 0)
 		unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
 	sb->st_ino = unp->unp_ino;
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_shutdown(struct socket *so)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
 
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	socantsendmore(so);
 	unp_shutdown(unp);
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 	return (0);
 }
 
 static int
 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_addr != NULL)
 		sa = (struct sockaddr *) unp->unp_addr;
 	else
 		sa = &sun_noname;
 	bcopy(sa, *nam, sa->sa_len);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static struct pr_usrreqs uipc_usrreqs_dgram = {
 	.pru_abort = 		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_close =		uipc_close,
 };
 
 static struct pr_usrreqs uipc_usrreqs_seqpacket = {
 	.pru_abort =		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_generic,	/* XXX: or...? */
 	.pru_close =		uipc_close,
 };
 
 static struct pr_usrreqs uipc_usrreqs_stream = {
 	.pru_abort = 		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_ready =		uipc_ready,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_generic,
 	.pru_close =		uipc_close,
 };
 
 static int
 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct unpcb *unp;
 	struct xucred xu;
 	int error, optval;
 
 	if (sopt->sopt_level != 0)
 		return (EINVAL);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case LOCAL_PEERCRED:
 			UNP_PCB_LOCK(unp);
 			if (unp->unp_flags & UNP_HAVEPC)
 				xu = unp->unp_peercred;
 			else {
 				if (so->so_type == SOCK_STREAM)
 					error = ENOTCONN;
 				else
 					error = EINVAL;
 			}
 			UNP_PCB_UNLOCK(unp);
 			if (error == 0)
 				error = sooptcopyout(sopt, &xu, sizeof(xu));
 			break;
 
 		case LOCAL_CREDS:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		case LOCAL_CONNWAIT:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EOPNOTSUPP;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case LOCAL_CREDS:
 		case LOCAL_CONNWAIT:
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 
 #define	OPTSET(bit) do {						\
 	UNP_PCB_LOCK(unp);						\
 	if (optval)							\
 		unp->unp_flags |= bit;					\
 	else								\
 		unp->unp_flags &= ~bit;					\
 	UNP_PCB_UNLOCK(unp);						\
 } while (0)
 
 			switch (sopt->sopt_name) {
 			case LOCAL_CREDS:
 				OPTSET(UNP_WANTCRED);
 				break;
 
 			case LOCAL_CONNWAIT:
 				OPTSET(UNP_CONNWAIT);
 				break;
 
 			default:
 				break;
 			}
 			break;
 #undef	OPTSET
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static int
 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (unp_connectat(AT_FDCWD, so, nam, td));
 }
 
 static int
 unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 	struct vnode *vp;
 	struct socket *so2, *so3;
 	struct unpcb *unp, *unp2, *unp3;
 	struct nameidata nd;
 	char buf[SOCK_MAXADDRLEN];
 	struct sockaddr *sa;
 	cap_rights_t rights;
 	int error, len;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 
 	UNP_LINK_WLOCK_ASSERT();
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 
 	if (nam->sa_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 	if (len <= 0)
 		return (EINVAL);
 	bcopy(soun->sun_path, buf, len);
 	buf[len] = 0;
 
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_flags & UNP_CONNECTING) {
 		UNP_PCB_UNLOCK(unp);
 		return (EALREADY);
 	}
 	UNP_LINK_WUNLOCK();
 	unp->unp_flags |= UNP_CONNECTING;
 	UNP_PCB_UNLOCK(unp);
 
 	sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_CONNECTAT), td);
 	error = namei(&nd);
 	if (error)
 		vp = NULL;
 	else
 		vp = nd.ni_vp;
 	ASSERT_VOP_LOCKED(vp, "unp_connect");
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (error)
 		goto bad;
 
 	if (vp->v_type != VSOCK) {
 		error = ENOTSOCK;
 		goto bad;
 	}
 #ifdef MAC
 	error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
 	if (error)
 		goto bad;
 #endif
 	error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 	if (error)
 		goto bad;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 
 	/*
 	 * Lock linkage lock for two reasons: make sure v_socket is stable,
 	 * and to protect simultaneous locking of multiple pcbs.
 	 */
 	UNP_LINK_WLOCK();
 	VOP_UNP_CONNECT(vp, &so2);
 	if (so2 == NULL) {
 		error = ECONNREFUSED;
 		goto bad2;
 	}
 	if (so->so_type != so2->so_type) {
 		error = EPROTOTYPE;
 		goto bad2;
 	}
 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
 		if (so2->so_options & SO_ACCEPTCONN) {
 			CURVNET_SET(so2->so_vnet);
 			so3 = sonewconn(so2, 0);
 			CURVNET_RESTORE();
 		} else
 			so3 = NULL;
 		if (so3 == NULL) {
 			error = ECONNREFUSED;
 			goto bad2;
 		}
 		unp = sotounpcb(so);
 		unp2 = sotounpcb(so2);
 		unp3 = sotounpcb(so3);
 		UNP_PCB_LOCK(unp);
 		UNP_PCB_LOCK(unp2);
 		UNP_PCB_LOCK(unp3);
 		if (unp2->unp_addr != NULL) {
 			bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 			unp3->unp_addr = (struct sockaddr_un *) sa;
 			sa = NULL;
 		}
 
 		/*
 		 * The connector's (client's) credentials are copied from its
 		 * process structure at the time of connect() (which is now).
 		 */
 		cru2x(td->td_ucred, &unp3->unp_peercred);
 		unp3->unp_flags |= UNP_HAVEPC;
 
 		/*
 		 * The receiver's (server's) credentials are copied from the
 		 * unp_peercred member of socket on which the former called
 		 * listen(); uipc_listen() cached that process's credentials
 		 * at that time so we can use them now.
 		 */
 		KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
 		    ("unp_connect: listener without cached peercred"));
 		memcpy(&unp->unp_peercred, &unp2->unp_peercred,
 		    sizeof(unp->unp_peercred));
 		unp->unp_flags |= UNP_HAVEPC;
 		if (unp2->unp_flags & UNP_WANTCRED)
 			unp3->unp_flags |= UNP_WANTCRED;
 		UNP_PCB_UNLOCK(unp3);
 		UNP_PCB_UNLOCK(unp2);
 		UNP_PCB_UNLOCK(unp);
 #ifdef MAC
 		mac_socketpeer_set_from_socket(so, so3);
 		mac_socketpeer_set_from_socket(so3, so);
 #endif
 
 		so2 = so3;
 	}
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 	unp2 = sotounpcb(so2);
 	KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL"));
 	UNP_PCB_LOCK(unp);
 	UNP_PCB_LOCK(unp2);
 	error = unp_connect2(so, so2, PRU_CONNECT);
 	UNP_PCB_UNLOCK(unp2);
 	UNP_PCB_UNLOCK(unp);
 bad2:
 	UNP_LINK_WUNLOCK();
 bad:
 	if (vp != NULL)
 		vput(vp);
 	free(sa, M_SONAME);
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	unp->unp_flags &= ~UNP_CONNECTING;
 	UNP_PCB_UNLOCK(unp);
 	return (error);
 }
 
 static int
 unp_connect2(struct socket *so, struct socket *so2, int req)
 {
 	struct unpcb *unp;
 	struct unpcb *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
 	unp2 = sotounpcb(so2);
 	KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
 
 	UNP_LINK_WLOCK_ASSERT();
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 
 	if (so2->so_type != so->so_type)
 		return (EPROTOTYPE);
 	unp->unp_conn = unp2;
 
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 		soisconnected(so);
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		unp2->unp_conn = unp;
 		if (req == PRU_CONNECT &&
 		    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 			soisconnecting(so);
 		else
 			soisconnected(so);
 		soisconnected(so2);
 		break;
 
 	default:
 		panic("unp_connect2");
 	}
 	return (0);
 }
 
 static void
 unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
 {
 	struct socket *so;
 
 	KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL"));
 
 	UNP_LINK_WLOCK_ASSERT();
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 
 	unp->unp_conn = NULL;
 	switch (unp->unp_socket->so_type) {
 	case SOCK_DGRAM:
 		LIST_REMOVE(unp, unp_reflink);
 		so = unp->unp_socket;
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_ISCONNECTED;
 		SOCK_UNLOCK(so);
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		soisdisconnected(unp->unp_socket);
 		unp2->unp_conn = NULL;
 		soisdisconnected(unp2->unp_socket);
 		break;
 	}
 }
 
 /*
  * unp_pcblist() walks the global list of struct unpcb's to generate a
  * pointer list, bumping the refcount on each unpcb.  It then copies them out
  * sequentially, validating the generation number on each to see if it has
  * been detached.  All of this is necessary because copyout() may sleep on
  * disk I/O.
  */
 static int
 unp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, n;
 	int freeunp;
 	struct unpcb *unp, **unp_list;
 	unp_gen_t gencnt;
 	struct xunpgen *xug;
 	struct unp_head *head;
 	struct xunpcb *xu;
 
 	switch ((intptr_t)arg1) {
 	case SOCK_STREAM:
 		head = &unp_shead;
 		break;
 
 	case SOCK_DGRAM:
 		head = &unp_dhead;
 		break;
 
 	case SOCK_SEQPACKET:
 		head = &unp_sphead;
 		break;
 
 	default:
 		panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
 	}
 
 	/*
 	 * The process of preparing the PCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = unp_count;
 		req->oldidx = 2 * (sizeof *xug)
 			+ (n + n/8) * sizeof(struct xunpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
 	UNP_LIST_LOCK();
 	gencnt = unp_gencnt;
 	n = unp_count;
 	UNP_LIST_UNLOCK();
 
 	xug->xug_len = sizeof *xug;
 	xug->xug_count = n;
 	xug->xug_gen = gencnt;
 	xug->xug_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, xug, sizeof *xug);
 	if (error) {
 		free(xug, M_TEMP);
 		return (error);
 	}
 
 	unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 
 	UNP_LIST_LOCK();
 	for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 	     unp = LIST_NEXT(unp, unp_link)) {
 		UNP_PCB_LOCK(unp);
 		if (unp->unp_gencnt <= gencnt) {
 			if (cr_cansee(req->td->td_ucred,
 			    unp->unp_socket->so_cred)) {
 				UNP_PCB_UNLOCK(unp);
 				continue;
 			}
 			unp_list[i++] = unp;
 			unp->unp_refcount++;
 		}
 		UNP_PCB_UNLOCK(unp);
 	}
 	UNP_LIST_UNLOCK();
 	n = i;			/* In case we lost some during malloc. */
 
 	error = 0;
 	xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 	for (i = 0; i < n; i++) {
 		unp = unp_list[i];
 		UNP_PCB_LOCK(unp);
 		unp->unp_refcount--;
 	        if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) {
 			xu->xu_len = sizeof *xu;
 			xu->xu_unpp = unp;
 			/*
 			 * XXX - need more locking here to protect against
 			 * connect/disconnect races for SMP.
 			 */
 			if (unp->unp_addr != NULL)
 				bcopy(unp->unp_addr, &xu->xu_addr,
 				      unp->unp_addr->sun_len);
 			if (unp->unp_conn != NULL &&
 			    unp->unp_conn->unp_addr != NULL)
 				bcopy(unp->unp_conn->unp_addr,
 				      &xu->xu_caddr,
 				      unp->unp_conn->unp_addr->sun_len);
 			bcopy(unp, &xu->xu_unp, sizeof *unp);
 			sotoxsocket(unp->unp_socket, &xu->xu_socket);
 			UNP_PCB_UNLOCK(unp);
 			error = SYSCTL_OUT(req, xu, sizeof *xu);
 		} else {
 			freeunp = (unp->unp_refcount == 0);
 			UNP_PCB_UNLOCK(unp);
 			if (freeunp) {
 				UNP_PCB_LOCK_DESTROY(unp);
 				uma_zfree(unp_zone, unp);
 			}
 		}
 	}
 	free(xu, M_TEMP);
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		xug->xug_gen = unp_gencnt;
 		xug->xug_sogen = so_gencnt;
 		xug->xug_count = unp_count;
 		error = SYSCTL_OUT(req, xug, sizeof *xug);
 	}
 	free(unp_list, M_TEMP);
 	free(xug, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
     (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local datagram sockets");
 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
     (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local stream sockets");
 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD,
     (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
     "List of active local seqpacket sockets");
 
 static void
 unp_shutdown(struct unpcb *unp)
 {
 	struct unpcb *unp2;
 	struct socket *so;
 
 	UNP_LINK_WLOCK_ASSERT();
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	unp2 = unp->unp_conn;
 	if ((unp->unp_socket->so_type == SOCK_STREAM ||
 	    (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
 		so = unp2->unp_socket;
 		if (so != NULL)
 			socantrcvmore(so);
 	}
 }
 
 static void
 unp_drop(struct unpcb *unp, int errno)
 {
 	struct socket *so = unp->unp_socket;
 	struct unpcb *unp2;
 
 	UNP_LINK_WLOCK_ASSERT();
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	so->so_error = errno;
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL)
 		return;
 	UNP_PCB_LOCK(unp2);
 	unp_disconnect(unp, unp2);
 	UNP_PCB_UNLOCK(unp2);
 }
 
 static void
 unp_freerights(struct filedescent **fdep, int fdcount)
 {
 	struct file *fp;
 	int i;
 
 	KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		filecaps_free(&fdep[i]->fde_caps);
 		unp_discard(fp);
 	}
 	free(fdep[0], M_FILECAPS);
 }
 
 static int
 unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
 {
 	struct thread *td = curthread;		/* XXX */
 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 	int i;
 	int *fdp;
 	struct filedesc *fdesc = td->td_proc->p_fd;
 	struct filedescent **fdep;
 	void *data;
 	socklen_t clen = control->m_len, datalen;
 	int error, newfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	error = 0;
 	if (controlp != NULL) /* controlp == NULL => free control messages */
 		*controlp = NULL;
 	while (cm != NULL) {
 		if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 			error = EINVAL;
 			break;
 		}
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 		if (cm->cmsg_level == SOL_SOCKET
 		    && cm->cmsg_type == SCM_RIGHTS) {
 			newfds = datalen / sizeof(*fdep);
 			if (newfds == 0)
 				goto next;
 			fdep = data;
 
 			/* If we're not outputting the descriptors free them. */
 			if (error || controlp == NULL) {
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 			FILEDESC_XLOCK(fdesc);
 
 			/*
 			 * Now change each pointer to an fd in the global
 			 * table to an integer that is the index to the local
 			 * fd table entry that we set up to point to the
 			 * global one we are transferring.
 			 */
 			newlen = newfds * sizeof(int);
 			*controlp = sbcreatecontrol(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET);
 			if (*controlp == NULL) {
 				FILEDESC_XUNLOCK(fdesc);
 				error = E2BIG;
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 
 			fdp = (int *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			if (fdallocn(td, 0, fdp, newfds) != 0) {
 				FILEDESC_XUNLOCK(fdesc);
 				error = EMSGSIZE;
 				unp_freerights(fdep, newfds);
 				m_freem(*controlp);
 				*controlp = NULL;
 				goto next;
 			}
 			for (i = 0; i < newfds; i++, fdp++) {
 				_finstall(fdesc, fdep[i]->fde_file, *fdp,
 				    (flags & MSG_CMSG_CLOEXEC) != 0 ? UF_EXCLOSE : 0,
 				    &fdep[i]->fde_caps);
 				unp_externalize_fp(fdep[i]->fde_file);
 			}
 			FILEDESC_XUNLOCK(fdesc);
 			free(fdep[0], M_FILECAPS);
 		} else {
 			/* We can just copy anything else across. */
 			if (error || controlp == NULL)
 				goto next;
 			*controlp = sbcreatecontrol(NULL, datalen,
 			    cm->cmsg_type, cm->cmsg_level);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto next;
 			}
 			bcopy(data,
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 			    datalen);
 		}
 		controlp = &(*controlp)->m_next;
 
 next:
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 	m_freem(control);
 	return (error);
 }
 
 static void
 unp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(unp_zone, maxsockets);
 }
 
 static void
 unp_init(void)
 {
 
 #ifdef VIMAGE
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 #endif
 	unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
 	if (unp_zone == NULL)
 		panic("unp_init");
 	uma_zone_set_max(unp_zone, maxsockets);
 	uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
 	LIST_INIT(&unp_dhead);
 	LIST_INIT(&unp_shead);
 	LIST_INIT(&unp_sphead);
 	SLIST_INIT(&unp_defers);
 	TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
 	TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
 	UNP_LINK_LOCK_INIT();
 	UNP_LIST_LOCK_INIT();
 	UNP_DEFERRED_LOCK_INIT();
 }
 
 static int
 unp_internalize(struct mbuf **controlp, struct thread *td)
 {
 	struct mbuf *control = *controlp;
 	struct proc *p = td->td_proc;
 	struct filedesc *fdesc = p->p_fd;
 	struct bintime *bt;
 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 	struct cmsgcred *cmcred;
 	struct filedescent *fde, **fdep, *fdev;
 	struct file *fp;
 	struct timeval *tv;
 	int i, *fdp;
 	void *data;
 	socklen_t clen = control->m_len, datalen;
 	int error, oldfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	error = 0;
 	*controlp = NULL;
 	while (cm != NULL) {
 		if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 		    || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
 			error = EINVAL;
 			goto out;
 		}
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 
 		switch (cm->cmsg_type) {
 		/*
 		 * Fill in credential information.
 		 */
 		case SCM_CREDS:
 			*controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 			    SCM_CREDS, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			cmcred = (struct cmsgcred *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			cmcred->cmcred_pid = p->p_pid;
 			cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 			cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 			cmcred->cmcred_euid = td->td_ucred->cr_uid;
 			cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 			    CMGROUP_MAX);
 			for (i = 0; i < cmcred->cmcred_ngroups; i++)
 				cmcred->cmcred_groups[i] =
 				    td->td_ucred->cr_groups[i];
 			break;
 
 		case SCM_RIGHTS:
 			oldfds = datalen / sizeof (int);
 			if (oldfds == 0)
 				break;
 			/*
 			 * Check that all the FDs passed in refer to legal
 			 * files.  If not, reject the entire operation.
 			 */
 			fdp = data;
 			FILEDESC_SLOCK(fdesc);
 			for (i = 0; i < oldfds; i++, fdp++) {
 				fp = fget_locked(fdesc, *fdp);
 				if (fp == NULL) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EBADF;
 					goto out;
 				}
 				if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EOPNOTSUPP;
 					goto out;
 				}
 
 			}
 
 			/*
 			 * Now replace the integer FDs with pointers to the
 			 * file structure and capability rights.
 			 */
 			newlen = oldfds * sizeof(fdep[0]);
 			*controlp = sbcreatecontrol(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET);
 			if (*controlp == NULL) {
 				FILEDESC_SUNLOCK(fdesc);
 				error = E2BIG;
 				goto out;
 			}
 			fdp = data;
 			fdep = (struct filedescent **)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
 			    M_WAITOK);
 			for (i = 0; i < oldfds; i++, fdev++, fdp++) {
 				fde = &fdesc->fd_ofiles[*fdp];
 				fdep[i] = fdev;
 				fdep[i]->fde_file = fde->fde_file;
 				filecaps_copy(&fde->fde_caps,
 				    &fdep[i]->fde_caps, true);
 				unp_internalize_fp(fdep[i]->fde_file);
 			}
 			FILEDESC_SUNLOCK(fdesc);
 			break;
 
 		case SCM_TIMESTAMP:
 			*controlp = sbcreatecontrol(NULL, sizeof(*tv),
 			    SCM_TIMESTAMP, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			tv = (struct timeval *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			microtime(tv);
 			break;
 
 		case SCM_BINTIME:
 			*controlp = sbcreatecontrol(NULL, sizeof(*bt),
 			    SCM_BINTIME, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			bt = (struct bintime *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			bintime(bt);
 			break;
 
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		controlp = &(*controlp)->m_next;
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 out:
 	m_freem(control);
 	return (error);
 }
 
 static struct mbuf *
 unp_addsockcred(struct thread *td, struct mbuf *control)
 {
 	struct mbuf *m, *n, *n_prev;
 	struct sockcred *sc;
 	const struct cmsghdr *cm;
 	int ngroups;
 	int i;
 
 	ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 	m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET);
 	if (m == NULL)
 		return (control);
 
 	sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *));
 	sc->sc_uid = td->td_ucred->cr_ruid;
 	sc->sc_euid = td->td_ucred->cr_uid;
 	sc->sc_gid = td->td_ucred->cr_rgid;
 	sc->sc_egid = td->td_ucred->cr_gid;
 	sc->sc_ngroups = ngroups;
 	for (i = 0; i < sc->sc_ngroups; i++)
 		sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 
 	/*
 	 * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 	 * created SCM_CREDS control message (struct sockcred) has another
 	 * format.
 	 */
 	if (control != NULL)
 		for (n = control, n_prev = NULL; n != NULL;) {
 			cm = mtod(n, struct cmsghdr *);
     			if (cm->cmsg_level == SOL_SOCKET &&
 			    cm->cmsg_type == SCM_CREDS) {
     				if (n_prev == NULL)
 					control = n->m_next;
 				else
 					n_prev->m_next = n->m_next;
 				n = m_free(n);
 			} else {
 				n_prev = n;
 				n = n->m_next;
 			}
 		}
 
 	/* Prepend it to the head. */
 	m->m_next = control;
 	return (m);
 }
 
 static struct unpcb *
 fptounp(struct file *fp)
 {
 	struct socket *so;
 
 	if (fp->f_type != DTYPE_SOCKET)
 		return (NULL);
 	if ((so = fp->f_data) == NULL)
 		return (NULL);
 	if (so->so_proto->pr_domain != &localdomain)
 		return (NULL);
 	return sotounpcb(so);
 }
 
 static void
 unp_discard(struct file *fp)
 {
 	struct unp_defer *dr;
 
 	if (unp_externalize_fp(fp)) {
 		dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
 		dr->ud_fp = fp;
 		UNP_DEFERRED_LOCK();
 		SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
 		UNP_DEFERRED_UNLOCK();
 		atomic_add_int(&unp_defers_count, 1);
 		taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
 	} else
 		(void) closef(fp, (struct thread *)NULL);
 }
 
 static void
 unp_process_defers(void *arg __unused, int pending)
 {
 	struct unp_defer *dr;
 	SLIST_HEAD(, unp_defer) drl;
 	int count;
 
 	SLIST_INIT(&drl);
 	for (;;) {
 		UNP_DEFERRED_LOCK();
 		if (SLIST_FIRST(&unp_defers) == NULL) {
 			UNP_DEFERRED_UNLOCK();
 			break;
 		}
 		SLIST_SWAP(&unp_defers, &drl, unp_defer);
 		UNP_DEFERRED_UNLOCK();
 		count = 0;
 		while ((dr = SLIST_FIRST(&drl)) != NULL) {
 			SLIST_REMOVE_HEAD(&drl, ud_link);
 			closef(dr->ud_fp, NULL);
 			free(dr, M_TEMP);
 			count++;
 		}
 		atomic_add_int(&unp_defers_count, -count);
 	}
 }
 
 static void
 unp_internalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_file = fp;
 		unp->unp_msgcount++;
 	}
 	fhold(fp);
 	unp_rights++;
 	UNP_LINK_WUNLOCK();
 }
 
 static int
 unp_externalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 	int ret;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_msgcount--;
 		ret = 1;
 	} else
 		ret = 0;
 	unp_rights--;
 	UNP_LINK_WUNLOCK();
 	return (ret);
 }
 
 /*
  * unp_defer indicates whether additional work has been defered for a future
  * pass through unp_gc().  It is thread local and does not require explicit
  * synchronization.
  */
 static int	unp_marked;
 static int	unp_unreachable;
 
 static void
 unp_accessable(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
 		if (unp->unp_gcflag & UNPGC_REF)
 			continue;
 		unp->unp_gcflag &= ~UNPGC_DEAD;
 		unp->unp_gcflag |= UNPGC_REF;
 		unp_marked++;
 	}
 }
 
 static void
 unp_gc_process(struct unpcb *unp)
 {
 	struct socket *soa;
 	struct socket *so;
 	struct file *fp;
 
 	/* Already processed. */
 	if (unp->unp_gcflag & UNPGC_SCANNED)
 		return;
 	fp = unp->unp_file;
 
 	/*
 	 * Check for a socket potentially in a cycle.  It must be in a
 	 * queue as indicated by msgcount, and this must equal the file
 	 * reference count.  Note that when msgcount is 0 the file is NULL.
 	 */
 	if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp &&
 	    unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) {
 		unp->unp_gcflag |= UNPGC_DEAD;
 		unp_unreachable++;
 		return;
 	}
 
 	/*
 	 * Mark all sockets we reference with RIGHTS.
 	 */
 	so = unp->unp_socket;
 	if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
 		SOCKBUF_LOCK(&so->so_rcv);
 		unp_scan(so->so_rcv.sb_mb, unp_accessable);
 		SOCKBUF_UNLOCK(&so->so_rcv);
 	}
 
 	/*
 	 * Mark all sockets in our accept queue.
 	 */
 	ACCEPT_LOCK();
 	TAILQ_FOREACH(soa, &so->so_comp, so_list) {
 		if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
 			continue;
 		SOCKBUF_LOCK(&soa->so_rcv);
 		unp_scan(soa->so_rcv.sb_mb, unp_accessable);
 		SOCKBUF_UNLOCK(&soa->so_rcv);
 	}
 	ACCEPT_UNLOCK();
 	unp->unp_gcflag |= UNPGC_SCANNED;
 }
 
 static int unp_recycled;
 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 
     "Number of unreachable sockets claimed by the garbage collector.");
 
 static int unp_taskcount;
 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
     "Number of times the garbage collector has run.");
 
 static void
 unp_gc(__unused void *arg, int pending)
 {
 	struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 				    NULL };
 	struct unp_head **head;
 	struct file *f, **unref;
 	struct unpcb *unp;
 	int i, total;
 
 	unp_taskcount++;
 	UNP_LIST_LOCK();
 	/*
 	 * First clear all gc flags from previous runs, apart from
 	 * UNPGC_IGNORE_RIGHTS.
 	 */
 	for (head = heads; *head != NULL; head++)
 		LIST_FOREACH(unp, *head, unp_link)
 			unp->unp_gcflag =
 			    (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS);
 
 	/*
 	 * Scan marking all reachable sockets with UNPGC_REF.  Once a socket
 	 * is reachable all of the sockets it references are reachable.
 	 * Stop the scan once we do a complete loop without discovering
 	 * a new reachable socket.
 	 */
 	do {
 		unp_unreachable = 0;
 		unp_marked = 0;
 		for (head = heads; *head != NULL; head++)
 			LIST_FOREACH(unp, *head, unp_link)
 				unp_gc_process(unp);
 	} while (unp_marked);
 	UNP_LIST_UNLOCK();
 	if (unp_unreachable == 0)
 		return;
 
 	/*
 	 * Allocate space for a local list of dead unpcbs.
 	 */
 	unref = malloc(unp_unreachable * sizeof(struct file *),
 	    M_TEMP, M_WAITOK);
 
 	/*
 	 * Iterate looking for sockets which have been specifically marked
 	 * as as unreachable and store them locally.
 	 */
 	UNP_LINK_RLOCK();
 	UNP_LIST_LOCK();
 	for (total = 0, head = heads; *head != NULL; head++)
 		LIST_FOREACH(unp, *head, unp_link)
 			if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
 				f = unp->unp_file;
 				if (unp->unp_msgcount == 0 || f == NULL ||
 				    f->f_count != unp->unp_msgcount)
 					continue;
 				unref[total++] = f;
 				fhold(f);
 				KASSERT(total <= unp_unreachable,
 				    ("unp_gc: incorrect unreachable count."));
 			}
 	UNP_LIST_UNLOCK();
 	UNP_LINK_RUNLOCK();
 
 	/*
 	 * Now flush all sockets, free'ing rights.  This will free the
 	 * struct files associated with these sockets but leave each socket
 	 * with one remaining ref.
 	 */
 	for (i = 0; i < total; i++) {
 		struct socket *so;
 
 		so = unref[i]->f_data;
 		CURVNET_SET(so->so_vnet);
 		sorflush(so);
 		CURVNET_RESTORE();
 	}
 
 	/*
 	 * And finally release the sockets so they can be reclaimed.
 	 */
 	for (i = 0; i < total; i++)
 		fdrop(unref[i], NULL);
 	unp_recycled += total;
 	free(unref, M_TEMP);
 }
 
 static void
 unp_dispose(struct mbuf *m)
 {
 
 	if (m)
 		unp_scan(m, unp_freerights);
 }
 
 /*
  * Synchronize against unp_gc, which can trip over data as we are freeing it.
  */
 static void
 unp_dispose_so(struct socket *so)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	UNP_LIST_LOCK();
 	unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
 	UNP_LIST_UNLOCK();
 	unp_dispose(so->so_rcv.sb_mb);
 }
 
 static void
 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 {
 	struct mbuf *m;
 	struct cmsghdr *cm;
 	void *data;
 	socklen_t clen, datalen;
 
 	while (m0 != NULL) {
 		for (m = m0; m; m = m->m_next) {
 			if (m->m_type != MT_CONTROL)
 				continue;
 
 			cm = mtod(m, struct cmsghdr *);
 			clen = m->m_len;
 
 			while (cm != NULL) {
 				if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 					break;
 
 				data = CMSG_DATA(cm);
 				datalen = (caddr_t)cm + cm->cmsg_len
 				    - (caddr_t)data;
 
 				if (cm->cmsg_level == SOL_SOCKET &&
 				    cm->cmsg_type == SCM_RIGHTS) {
 					(*op)(data, datalen /
 					    sizeof(struct filedescent *));
 				}
 
 				if (CMSG_SPACE(datalen) < clen) {
 					clen -= CMSG_SPACE(datalen);
 					cm = (struct cmsghdr *)
 					    ((caddr_t)cm + CMSG_SPACE(datalen));
 				} else {
 					clen = 0;
 					cm = NULL;
 				}
 			}
 		}
 		m0 = m0->m_nextpkt;
 	}
 }
 
 /*
  * A helper function called by VFS before socket-type vnode reclamation.
  * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
  * use count.
  */
 void
 vfs_unp_reclaim(struct vnode *vp)
 {
 	struct socket *so;
 	struct unpcb *unp;
 	int active;
 
 	ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
 	KASSERT(vp->v_type == VSOCK,
 	    ("vfs_unp_reclaim: vp->v_type != VSOCK"));
 
 	active = 0;
 	UNP_LINK_WLOCK();
 	VOP_UNP_CONNECT(vp, &so);
 	if (so == NULL)
 		goto done;
 	unp = sotounpcb(so);
 	if (unp == NULL)
 		goto done;
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode == vp) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 		active = 1;
 	}
 	UNP_PCB_UNLOCK(unp);
 done:
 	UNP_LINK_WUNLOCK();
 	if (active)
 		vunref(vp);
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_unpflags(int unp_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (unp_flags & UNP_HAVEPC) {
 		db_printf("%sUNP_HAVEPC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_HAVEPCCACHED) {
 		db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_WANTCRED) {
 		db_printf("%sUNP_WANTCRED", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNWAIT) {
 		db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNECTING) {
 		db_printf("%sUNP_CONNECTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_BINDING) {
 		db_printf("%sUNP_BINDING", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_xucred(int indent, struct xucred *xu)
 {
 	int comma, i;
 
 	db_print_indent(indent);
 	db_printf("cr_version: %u   cr_uid: %u   cr_ngroups: %d\n",
 	    xu->cr_version, xu->cr_uid, xu->cr_ngroups);
 	db_print_indent(indent);
 	db_printf("cr_groups: ");
 	comma = 0;
 	for (i = 0; i < xu->cr_ngroups; i++) {
 		db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
 		comma = 1;
 	}
 	db_printf("\n");
 }
 
 static void
 db_print_unprefs(int indent, struct unp_head *uh)
 {
 	struct unpcb *unp;
 	int counter;
 
 	counter = 0;
 	LIST_FOREACH(unp, uh, unp_reflink) {
 		if (counter % 4 == 0)
 			db_print_indent(indent);
 		db_printf("%p  ", unp);
 		if (counter % 4 == 3)
 			db_printf("\n");
 		counter++;
 	}
 	if (counter != 0 && counter % 4 != 0)
 		db_printf("\n");
 }
 
 DB_SHOW_COMMAND(unpcb, db_show_unpcb)
 {
 	struct unpcb *unp;
 
         if (!have_addr) {
                 db_printf("usage: show unpcb <addr>\n");
                 return;
         }
         unp = (struct unpcb *)addr;
 
 	db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
 	    unp->unp_vnode);
 
 	db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
 	    unp->unp_conn);
 
 	db_printf("unp_refs:\n");
 	db_print_unprefs(2, &unp->unp_refs);
 
 	/* XXXRW: Would be nice to print the full address, if any. */
 	db_printf("unp_addr: %p\n", unp->unp_addr);
 
 	db_printf("unp_gencnt: %llu\n",
 	    (unsigned long long)unp->unp_gencnt);
 
 	db_printf("unp_flags: %x (", unp->unp_flags);
 	db_print_unpflags(unp->unp_flags);
 	db_printf(")\n");
 
 	db_printf("unp_peercred:\n");
 	db_print_xucred(2, &unp->unp_peercred);
 
 	db_printf("unp_refcount: %u\n", unp->unp_refcount);
 }
 #endif
Index: projects/clang380-import/sys/mips/mips/elf_machdep.c
===================================================================
--- projects/clang380-import/sys/mips/mips/elf_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/mips/mips/elf_machdep.c	(revision 293687)
@@ -1,351 +1,353 @@
 /*-
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	from: src/sys/i386/i386/elf_machdep.c,v 1.20 2004/08/11 02:35:05 marcel
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/linker.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/proc.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 
 #include <machine/elf.h>
 #include <machine/md_var.h>
 #include <machine/cache.h>
 
 #ifdef __mips_n64
 struct sysentvec elf64_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD ELF64",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_LP64,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 
 static Elf64_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_MIPS,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.flags		= 0
 };
 
 SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t) elf64_insert_brand_entry,
     &freebsd_brand_info);
 
 void
 elf64_dump_thread(struct thread *td __unused, void *dst __unused,
     size_t *off __unused)
 {
 }
 #else
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_ILP32,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 
 static Elf32_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_MIPS,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.flags		= 0
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t) elf32_insert_brand_entry,
     &freebsd_brand_info);
 
 void
 elf32_dump_thread(struct thread *td __unused, void *dst __unused,
     size_t *off __unused)
 {
 }
 #endif
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf32_Addr *where = (Elf32_Addr *)NULL;
 	Elf_Addr addr;
 	Elf_Addr addend = (Elf_Addr)0;
 	Elf_Word rtype = (Elf_Word)0, symidx;
 	const Elf_Rel *rel = NULL;
 	const Elf_Rela *rela = NULL;
 	int error;
 
 	/*
 	 * Stash R_MIPS_HI16 info so we can use it when processing R_MIPS_LO16
 	 */
 	static Elf_Addr ahl;
 	static Elf32_Addr *where_hi16;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		rel = (const Elf_Rel *)data;
 		where = (Elf32_Addr *) (relocbase + rel->r_offset);
 		rtype = ELF_R_TYPE(rel->r_info);
 		symidx = ELF_R_SYM(rel->r_info);
 		switch (rtype) {
 		case R_MIPS_64:
 			addend = *(Elf64_Addr *)where;
 			break;
 		default:
 			addend = *where;
 			break;
 		}
 
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf32_Addr *) (relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("unknown reloc type %d\n", type);
 	}
 
 	switch (rtype) {
 	case R_MIPS_NONE:	/* none */
 		break;
 
 	case R_MIPS_32:		/* S + A */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		addr += addend;
 		if (*where != addr)
 			*where = (Elf32_Addr)addr;
 		break;
 
 	case R_MIPS_26:		/* ((A << 2) | (P & 0xf0000000) + S) >> 2 */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 
 		addend &= 0x03ffffff;
 		/*
 		 * Addendum for .rela R_MIPS_26 is not shifted right
 		 */
 		if (rela == NULL)
 			addend <<= 2;
 
 		addr += ((Elf_Addr)where & 0xf0000000) | addend;
 		addr >>= 2;
 
 		*where &= ~0x03ffffff;
 		*where |= addr & 0x03ffffff;
 		break;
 
 	case R_MIPS_64:		/* S + A */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		addr += addend;
 		if (*(Elf64_Addr*)where != addr)
 			*(Elf64_Addr*)where = addr;
 		break;
 
 	case R_MIPS_HI16:	/* ((AHL + S) - ((short)(AHL + S)) >> 16 */
 		if (rela != NULL) {
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return (-1);
 			addr += addend;
 			*where &= 0xffff0000;
 			*where |= ((((long long) addr + 0x8000LL) >> 16) & 0xffff);
 		}
 		else {
 			ahl = addend << 16;
 			where_hi16 = where;
 		}
 		break;
 
 	case R_MIPS_LO16:	/* AHL + S */
 		if (rela != NULL) {
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return (-1);
 			addr += addend;
 			*where &= 0xffff0000;
 			*where |= addr & 0xffff;
 		}
 		else {
 			ahl += (int16_t)addend;
 			error = lookup(lf, symidx, 1, &addr);
 			if (error != 0)
 				return (-1);
 
 			addend &= 0xffff0000;
 			addend |= (uint16_t)(ahl + addr);
 			*where = addend;
 
 			addend = *where_hi16;
 			addend &= 0xffff0000;
 			addend |= ((ahl + addr) - (int16_t)(ahl + addr)) >> 16;
 			*where_hi16 = addend;
 		}
 
 		break;
 
 	case R_MIPS_HIGHER:	/* %higher(A+S) */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		addr += addend;
 		*where &= 0xffff0000;
 		*where |= (((long long)addr + 0x80008000LL) >> 32) & 0xffff;
 		break;
 
 	case R_MIPS_HIGHEST:	/* %highest(A+S) */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		addr += addend;
 		*where &= 0xffff0000;
 		*where |= (((long long)addr + 0x800080008000LL) >> 48) & 0xffff;
 		break;
 
 	default:
 		printf("kldload: unexpected relocation type %d\n",
 			rtype);
 		return (-1);
 	}
 
 	return(0);
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf __unused)
 {
 
 	/*
 	 * Sync the I and D caches to make sure our relocations are visible.
 	 */
 	mips_icache_sync_all();
 
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
Index: projects/clang380-import/sys/mips/mips/freebsd32_machdep.c
===================================================================
--- projects/clang380-import/sys/mips/mips/freebsd32_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/mips/mips/freebsd32_machdep.c	(revision 293687)
@@ -1,488 +1,489 @@
 /*-
  * Copyright (c) 2012 Juli Mallett <jmallett@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Based on nwhitehorn's COMPAT_FREEBSD32 support code for PowerPC64.
  */
 
 #include "opt_compat.h"
 
 #define __ELF_WORD_SIZE 32
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/sysent.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/namei.h>
 #include <sys/fcntl.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/linker.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 
 #include <machine/md_var.h>
 #include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/sysarch.h>
 
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 static void freebsd32_exec_setregs(struct thread *, struct image_params *, u_long);
 static int get_mcontext32(struct thread *, mcontext32_t *, int);
 static int set_mcontext32(struct thread *, mcontext32_t *);
 static void freebsd32_sendsig(sig_t, ksiginfo_t *, sigset_t *);
 
 extern const char *freebsd32_syscallnames[];
 
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= freebsd32_sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= freebsd32_sendsig,
 	.sv_sigcode	= sigcode32,
 	.sv_szsigcode	= &szsigcode32,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= ((vm_offset_t)0x80000000),
 	.sv_usrstack	= FREEBSD32_USRSTACK,
 	.sv_psstrings	= FREEBSD32_PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = freebsd32_copyout_strings,
 	.sv_setregs	= freebsd32_exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_ILP32,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = freebsd32_syscallnames,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
 static Elf32_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_MIPS,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= "/libexec/ld-elf32.so.1",
 	.flags		= 0
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t) elf32_insert_brand_entry,
     &freebsd_brand_info);
 
 static void
 freebsd32_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	exec_setregs(td, imgp, stack);
 
 	/*
 	 * See comment in exec_setregs about running 32-bit binaries with 64-bit
 	 * registers.
 	 */
 	td->td_frame->sp -= 65536;
 
 	/*
 	 * Clear extended address space bit for userland.
 	 */
 	td->td_frame->sr &= ~MIPS_SR_UX;
 }
 
 int
 set_regs32(struct thread *td, struct reg32 *regs)
 {
 	struct reg r;
 	unsigned i;
 
 	for (i = 0; i < NUMSAVEREGS; i++)
 		r.r_regs[i] = regs->r_regs[i];
 
 	return (set_regs(td, &r));
 }
 
 int
 fill_regs32(struct thread *td, struct reg32 *regs)
 {
 	struct reg r;
 	unsigned i;
 	int error;
 
 	error = fill_regs(td, &r);
 	if (error != 0)
 		return (error);
 
 	for (i = 0; i < NUMSAVEREGS; i++)
 		regs->r_regs[i] = r.r_regs[i];
 
 	return (0);
 }
 
 int
 set_fpregs32(struct thread *td, struct fpreg32 *fpregs)
 {
 	struct fpreg fp;
 	unsigned i;
 
 	for (i = 0; i < NUMFPREGS; i++)
 		fp.r_regs[i] = fpregs->r_regs[i];
 
 	return (set_fpregs(td, &fp));
 }
 
 int
 fill_fpregs32(struct thread *td, struct fpreg32 *fpregs)
 {
 	struct fpreg fp;
 	unsigned i;
 	int error;
 
 	error = fill_fpregs(td, &fp);
 	if (error != 0)
 		return (error);
 
 	for (i = 0; i < NUMFPREGS; i++)
 		fpregs->r_regs[i] = fp.r_regs[i];
 
 	return (0);
 }
 
 static int
 get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags)
 {
 	mcontext_t mcp64;
 	unsigned i;
 	int error;
 
 	error = get_mcontext(td, &mcp64, flags);
 	if (error != 0)
 		return (error);
 
 	mcp->mc_onstack = mcp64.mc_onstack;
 	mcp->mc_pc = mcp64.mc_pc;
 	for (i = 0; i < 32; i++)
 		mcp->mc_regs[i] = mcp64.mc_regs[i];
 	mcp->sr = mcp64.sr;
 	mcp->mullo = mcp64.mullo;
 	mcp->mulhi = mcp64.mulhi;
 	mcp->mc_fpused = mcp64.mc_fpused;
 	for (i = 0; i < 33; i++)
 		mcp->mc_fpregs[i] = mcp64.mc_fpregs[i];
 	mcp->mc_fpc_eir = mcp64.mc_fpc_eir;
 	mcp->mc_tls = (int32_t)(intptr_t)mcp64.mc_tls;
 
 	return (0);
 }
 
 static int
 set_mcontext32(struct thread *td, mcontext32_t *mcp)
 {
 	mcontext_t mcp64;
 	unsigned i;
 
 	mcp64.mc_onstack = mcp->mc_onstack;
 	mcp64.mc_pc = mcp->mc_pc;
 	for (i = 0; i < 32; i++)
 		mcp64.mc_regs[i] = mcp->mc_regs[i];
 	mcp64.sr = mcp->sr;
 	mcp64.mullo = mcp->mullo;
 	mcp64.mulhi = mcp->mulhi;
 	mcp64.mc_fpused = mcp->mc_fpused;
 	for (i = 0; i < 33; i++)
 		mcp64.mc_fpregs[i] = mcp->mc_fpregs[i];
 	mcp64.mc_fpc_eir = mcp->mc_fpc_eir;
 	mcp64.mc_tls = (void *)(intptr_t)mcp->mc_tls;
 
 	return (set_mcontext(td, &mcp64));
 }
 
 int
 freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap)
 {
 	ucontext32_t uc;
 	int error;
 
 	CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp);
 
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) {
 		CTR1(KTR_SIG, "sigreturn: efault td=%p", td);
 		return (EFAULT);
 	}
 
 	error = set_mcontext32(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 #if 0
 	CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x",
 	     td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]);
 #endif
 
 	return (EJUSTRETURN);
 }
 
 /*
  * The first two fields of a ucontext_t are the signal mask and the machine
  * context.  The next field is uc_link; we want to avoid destroying the link
  * when copying out contexts.
  */
 #define	UC32_COPY_SIZE	offsetof(ucontext32_t, uc_link)
 
 int
 freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->ucp, UC32_COPY_SIZE);
 	}
 	return (ret);
 }
 
 int
 freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;	
 
 	if (uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
 		if (ret == 0) {
 			ret = set_mcontext32(td, &uc.uc_mcontext);
 			if (ret == 0) {
 				kern_sigprocmask(td, SIG_SETMASK,
 				    &uc.uc_sigmask, NULL, 0);
 			}
 		}
 	}
 	return (ret == 0 ? EJUSTRETURN : ret);
 }
 
 int
 freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap)
 {
 	ucontext32_t uc;
 	int ret;
 
 	if (uap->oucp == NULL || uap->ucp == NULL)
 		ret = EINVAL;
 	else {
 		get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET);
 		PROC_LOCK(td->td_proc);
 		uc.uc_sigmask = td->td_sigmask;
 		PROC_UNLOCK(td->td_proc);
 		ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE);
 		if (ret == 0) {
 			ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE);
 			if (ret == 0) {
 				ret = set_mcontext32(td, &uc.uc_mcontext);
 				if (ret == 0) {
 					kern_sigprocmask(td, SIG_SETMASK,
 					    &uc.uc_sigmask, NULL, 0);
 				}
 			}
 		}
 	}
 	return (ret == 0 ? EJUSTRETURN : ret);
 }
 
 #define	UCONTEXT_MAGIC	0xACEDBADE
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.	After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 static void
 freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct proc *p;
 	struct thread *td;
 	struct fpreg32 fpregs;
 	struct reg32 regs;
 	struct sigacts *psp;
 	struct sigframe32 sf, *sfp;
 	int sig;
 	int oonstack;
 	unsigned i;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	fill_regs32(td, &regs);
 	oonstack = sigonstack(td->td_frame->sp);
 
 	/* save user context */
 	bzero(&sf, sizeof sf);
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack.ss_sp = (int32_t)(intptr_t)td->td_sigstk.ss_sp;
 	sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
 	sf.sf_uc.uc_stack.ss_flags = td->td_sigstk.ss_flags;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_pc = regs.r_regs[PC];
 	sf.sf_uc.uc_mcontext.mullo = regs.r_regs[MULLO];
 	sf.sf_uc.uc_mcontext.mulhi = regs.r_regs[MULHI];
 	sf.sf_uc.uc_mcontext.mc_tls = (int32_t)(intptr_t)td->td_md.md_tls;
 	sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC;  /* magic number */
 	for (i = 1; i < 32; i++)
 		sf.sf_uc.uc_mcontext.mc_regs[i] = regs.r_regs[i];
 	sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED;
 	if (sf.sf_uc.uc_mcontext.mc_fpused) {
 		/* if FPU has current state, save it first */
 		if (td == PCPU_GET(fpcurthread))
 			MipsSaveCurFPState(td);
 		fill_fpregs32(td, &fpregs);
 		for (i = 0; i < 33; i++)
 			sf.sf_uc.uc_mcontext.mc_fpregs[i] = fpregs.r_regs[i];
 	}
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe32 *)((vm_offset_t)(td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct sigframe32))
 		    & ~(sizeof(__int64_t) - 1));
 	} else
 		sfp = (struct sigframe32 *)((vm_offset_t)(td->td_frame->sp - 
 		    sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1));
 
 	/* Build the argument list for the signal handler. */
 	td->td_frame->a0 = sig;
 	td->td_frame->a2 = (register_t)(intptr_t)&sfp->sf_uc;
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		td->td_frame->a1 = (register_t)(intptr_t)&sfp->sf_si;
 		/* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */
 
 		/* fill siginfo structure */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = ksi->ksi_code;
 		sf.sf_si.si_addr = td->td_frame->badvaddr;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		td->td_frame->a1 = ksi->ksi_code;
 		td->td_frame->a3 = td->td_frame->badvaddr;
 		/* sf.sf_ahu.sf_handler = catcher; */
 	}
 
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(struct sigframe32)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	td->td_frame->pc = (register_t)(intptr_t)catcher;
 	td->td_frame->t9 = (register_t)(intptr_t)catcher;
 	td->td_frame->sp = (register_t)(intptr_t)sfp;
 	/*
 	 * Signal trampoline code is at base of user stack.
 	 */
 	td->td_frame->ra = (register_t)(intptr_t)FREEBSD32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 int
 freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap)
 {
 	int error;
 	int32_t tlsbase;
 
 	switch (uap->op) {
 	case MIPS_SET_TLS:
 		td->td_md.md_tls = (void *)(intptr_t)uap->parms;
 		return (0);
 	case MIPS_GET_TLS: 
 		tlsbase = (int32_t)(intptr_t)td->td_md.md_tls;
 		error = copyout(&tlsbase, uap->parms, sizeof(tlsbase));
 		return (error);
 	default:
 		break;
 	}
 	return (EINVAL);
 }
 
 void
 elf32_dump_thread(struct thread *td __unused, void *dst __unused,
     size_t *off __unused)
 {
 }
Index: projects/clang380-import/sys/net/if_arcsubr.c
===================================================================
--- projects/clang380-import/sys/net/if_arcsubr.c	(revision 293686)
+++ projects/clang380-import/sys/net/if_arcsubr.c	(revision 293687)
@@ -1,833 +1,832 @@
 /*	$NetBSD: if_arcsubr.c,v 1.36 2001/06/14 05:44:23 itojun Exp $	*/
 /*	$FreeBSD$ */
 
 /*-
  * Copyright (c) 1994, 1995 Ignatios Souvatzis
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * from: NetBSD: if_ethersubr.c,v 1.9 1994/06/29 06:36:11 cgd Exp
  *       @(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
  *
  */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 
 #include <machine/cpu.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_arc.h>
 #include <net/if_arp.h>
 #include <net/bpf.h>
 #include <net/if_llatbl.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #endif
 
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #define ARCNET_ALLOW_BROKEN_ARP
 
 static struct mbuf *arc_defrag(struct ifnet *, struct mbuf *);
 static int arc_resolvemulti(struct ifnet *, struct sockaddr **,
 			    struct sockaddr *);
 
 u_int8_t  arcbroadcastaddr = 0;
 
 #define ARC_LLADDR(ifp)	(*(u_int8_t *)IF_LLADDR(ifp))
 
 #define senderr(e) { error = (e); goto bad;}
 #define SIN(s)	((const struct sockaddr_in *)(s))
 
 /*
  * ARCnet output routine.
  * Encapsulate a packet of type family for the local net.
  * Assumes that ifp is actually pointer to arccom structure.
  */
 int
 arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct arc_header	*ah;
 	int			error;
 	u_int8_t		atype, adst;
 	int			loop_copy = 0;
 	int			isphds;
 #if defined(INET) || defined(INET6)
 	int			is_gw = 0;
 #endif
 
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		return(ENETDOWN); /* m, m1 aren't initialized yet */
 
 	error = 0;
 #if defined(INET) || defined(INET6)
-	if (ro != NULL && ro->ro_rt != NULL &&
-	    (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0)
-		is_gw = 1;
+	if (ro != NULL)
+		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
 #endif
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 
 		/*
 		 * For now, use the simple IP addr -> ARCnet addr mapping
 		 */
 		if (m->m_flags & (M_BCAST|M_MCAST))
 			adst = arcbroadcastaddr; /* ARCnet broadcast address */
 		else if (ifp->if_flags & IFF_NOARP)
 			adst = ntohl(SIN(dst)->sin_addr.s_addr) & 0xFF;
 		else {
 			error = arpresolve(ifp, is_gw, m, dst, &adst, NULL);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
 
 		atype = (ifp->if_flags & IFF_LINK0) ?
 			ARCTYPE_IP_OLD : ARCTYPE_IP;
 		break;
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_ARCNET);
 
 		loop_copy = -1; /* if this is for us, don't do it */
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			atype = ARCTYPE_REVARP;
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			atype = ARCTYPE_ARP;
 			break;
 		}
 
 		if (m->m_flags & M_BCAST)
 			bcopy(ifp->if_broadcastaddr, &adst, ARC_ADDR_LEN);
 		else
 			bcopy(ar_tha(ah), &adst, ARC_ADDR_LEN);
         
 	}
 	break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if ((m->m_flags & M_MCAST) != 0)
 			adst = arcbroadcastaddr; /* ARCnet broadcast address */
 		else {
 			error = nd6_resolve(ifp, is_gw, m, dst, &adst, NULL);
 			if (error != 0)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
 		atype = ARCTYPE_INET6;
 		break;
 #endif
 	case AF_UNSPEC:
 	    {
 		const struct arc_header *ah;
 
 		loop_copy = -1;
 		ah = (const struct arc_header *)dst->sa_data;
 		adst = ah->arc_dhost;
 		atype = ah->arc_type;
 
 		if (atype == ARCTYPE_ARP) {
 			atype = (ifp->if_flags & IFF_LINK0) ?
 			    ARCTYPE_ARP_OLD: ARCTYPE_ARP;
 
 #ifdef ARCNET_ALLOW_BROKEN_ARP
 			/*
 			 * XXX It's not clear per RFC826 if this is needed, but
 			 * "assigned numbers" say this is wrong.
 			 * However, e.g., AmiTCP 3.0Beta used it... we make this
 			 * switchable for emergency cases. Not perfect, but...
 			 */
 			if (ifp->if_flags & IFF_LINK2)
 				mtod(m, struct arphdr *)->ar_pro = atype - 1;
 #endif
 		}
 		break;
 	    }
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
 	}
 
 	isphds = arc_isphds(atype);
 	M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_NOWAIT);
 	if (m == 0)
 		senderr(ENOBUFS);
 	ah = mtod(m, struct arc_header *);
 	ah->arc_type = atype;
 	ah->arc_dhost = adst;
 	ah->arc_shost = ARC_LLADDR(ifp);
 	if (isphds) {
 		ah->arc_flag = 0;
 		ah->arc_seqid = 0;
 	}
 
 	if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
 		if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
 			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
 
 			(void) if_simloop(ifp, n, dst->sa_family, ARC_HDRLEN);
 		} else if (ah->arc_dhost == ah->arc_shost) {
 			(void) if_simloop(ifp, m, dst->sa_family, ARC_HDRLEN);
 			return (0);     /* XXX */
 		}
 	}
 
 	BPF_MTAP(ifp, m);
 
 	error = ifp->if_transmit(ifp, m);
 
 	return (error);
 
 bad:
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 void
 arc_frag_init(struct ifnet *ifp)
 {
 	struct arccom *ac;
 
 	ac = (struct arccom *)ifp->if_l2com;
 	ac->curr_frag = 0;
 }
 
 struct mbuf *
 arc_frag_next(struct ifnet *ifp)
 {
 	struct arccom *ac;
 	struct mbuf *m;
 	struct arc_header *ah;
 
 	ac = (struct arccom *)ifp->if_l2com;
 	if ((m = ac->curr_frag) == 0) {
 		int tfrags;
 
 		/* dequeue new packet */
 		IF_DEQUEUE(&ifp->if_snd, m);
 		if (m == 0)
 			return 0;
 
 		ah = mtod(m, struct arc_header *);
 		if (!arc_isphds(ah->arc_type))
 			return m;
 
 		++ac->ac_seqid;		/* make the seqid unique */
 		tfrags = (m->m_pkthdr.len + ARC_MAX_DATA - 1) / ARC_MAX_DATA;
 		ac->fsflag = 2 * tfrags - 3;
 		ac->sflag = 0;
 		ac->rsflag = ac->fsflag;
 		ac->arc_dhost = ah->arc_dhost;
 		ac->arc_shost = ah->arc_shost;
 		ac->arc_type = ah->arc_type;
 
 		m_adj(m, ARC_HDRNEWLEN);
 		ac->curr_frag = m;
 	}
 
 	/* split out next fragment and return it */
 	if (ac->sflag < ac->fsflag) {
 		/* we CAN'T have short packets here */
 		ac->curr_frag = m_split(m, ARC_MAX_DATA, M_NOWAIT);
 		if (ac->curr_frag == 0) {
 			m_freem(m);
 			return 0;
 		}
 
 		M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
 		if (m == 0) {
 			m_freem(ac->curr_frag);
 			ac->curr_frag = 0;
 			return 0;
 		}
 
 		ah = mtod(m, struct arc_header *);
 		ah->arc_flag = ac->rsflag;
 		ah->arc_seqid = ac->ac_seqid;
 
 		ac->sflag += 2;
 		ac->rsflag = ac->sflag;
 	} else if ((m->m_pkthdr.len >=
 	    ARC_MIN_FORBID_LEN - ARC_HDRNEWLEN + 2) &&
 	    (m->m_pkthdr.len <=
 	    ARC_MAX_FORBID_LEN - ARC_HDRNEWLEN + 2)) {
 		ac->curr_frag = 0;
 
 		M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_NOWAIT);
 		if (m == 0)
 			return 0;
 
 		ah = mtod(m, struct arc_header *);
 		ah->arc_flag = 0xFF;
 		ah->arc_seqid = 0xFFFF;
 		ah->arc_type2 = ac->arc_type;
 		ah->arc_flag2 = ac->sflag;
 		ah->arc_seqid2 = ac->ac_seqid;
 	} else {
 		ac->curr_frag = 0;
 
 		M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT);
 		if (m == 0)
 			return 0;
 
 		ah = mtod(m, struct arc_header *);
 		ah->arc_flag = ac->sflag;
 		ah->arc_seqid = ac->ac_seqid;
 	}
 
 	ah->arc_dhost = ac->arc_dhost;
 	ah->arc_shost = ac->arc_shost;
 	ah->arc_type = ac->arc_type;
 
 	return m;
 }
 
 /*
  * Defragmenter. Returns mbuf if last packet found, else
  * NULL. frees imcoming mbuf as necessary.
  */
 
 static __inline struct mbuf *
 arc_defrag(struct ifnet *ifp, struct mbuf *m)
 {
 	struct arc_header *ah, *ah1;
 	struct arccom *ac;
 	struct ac_frag *af;
 	struct mbuf *m1;
 	char *s;
 	int newflen;
 	u_char src,dst,typ;
 
 	ac = (struct arccom *)ifp->if_l2com;
 
 	if (m->m_len < ARC_HDRNEWLEN) {
 		m = m_pullup(m, ARC_HDRNEWLEN);
 		if (m == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			return NULL;
 		}
 	}
 
 	ah = mtod(m, struct arc_header *);
 	typ = ah->arc_type;
 
 	if (!arc_isphds(typ))
 		return m;
 
 	src = ah->arc_shost;
 	dst = ah->arc_dhost;
 
 	if (ah->arc_flag == 0xff) {
 		m_adj(m, 4);
 
 		if (m->m_len < ARC_HDRNEWLEN) {
 			m = m_pullup(m, ARC_HDRNEWLEN);
 			if (m == NULL) {
 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 				return NULL;
 			}
 		}
 
 		ah = mtod(m, struct arc_header *);
 	}
 
 	af = &ac->ac_fragtab[src];
 	m1 = af->af_packet;
 	s = "debug code error";
 
 	if (ah->arc_flag & 1) {
 		/*
 		 * first fragment. We always initialize, which is
 		 * about the right thing to do, as we only want to
 		 * accept one fragmented packet per src at a time.
 		 */
 		if (m1 != NULL)
 			m_freem(m1);
 
 		af->af_packet = m;
 		m1 = m;
 		af->af_maxflag = ah->arc_flag;
 		af->af_lastseen = 0;
 		af->af_seqid = ah->arc_seqid;
 
 		return NULL;
 		/* notreached */
 	} else {
 		/* check for unfragmented packet */
 		if (ah->arc_flag == 0)
 			return m;
 
 		/* do we have a first packet from that src? */
 		if (m1 == NULL) {
 			s = "no first frag";
 			goto outofseq;
 		}
 
 		ah1 = mtod(m1, struct arc_header *);
 
 		if (ah->arc_seqid != ah1->arc_seqid) {
 			s = "seqid differs";
 			goto outofseq;
 		}
 
 		if (typ != ah1->arc_type) {
 			s = "type differs";
 			goto outofseq;
 		}
 
 		if (dst != ah1->arc_dhost) {
 			s = "dest host differs";
 			goto outofseq;
 		}
 
 		/* typ, seqid and dst are ok here. */
 
 		if (ah->arc_flag == af->af_lastseen) {
 			m_freem(m);
 			return NULL;
 		}
 
 		if (ah->arc_flag == af->af_lastseen + 2) {
 			/* ok, this is next fragment */
 			af->af_lastseen = ah->arc_flag;
 			m_adj(m,ARC_HDRNEWLEN);
 
 			/*
 			 * m_cat might free the first mbuf (with pkthdr)
 			 * in 2nd chain; therefore:
 			 */
 
 			newflen = m->m_pkthdr.len;
 
 			m_cat(m1,m);
 
 			m1->m_pkthdr.len += newflen;
 
 			/* is it the last one? */
 			if (af->af_lastseen > af->af_maxflag) {
 				af->af_packet = NULL;
 				return(m1);
 			} else
 				return NULL;
 		}
 		s = "other reason";
 		/* if all else fails, it is out of sequence, too */
 	}
 outofseq:
 	if (m1) {
 		m_freem(m1);
 		af->af_packet = NULL;
 	}
 
 	if (m)
 		m_freem(m);
 
 	log(LOG_INFO,"%s: got out of seq. packet: %s\n",
 	    ifp->if_xname, s);
 
 	return NULL;
 }
 
 /*
  * return 1 if Packet Header Definition Standard, else 0.
  * For now: old IP, old ARP aren't obviously. Lacking correct information,
  * we guess that besides new IP and new ARP also IPX and APPLETALK are PHDS.
  * (Apple and Novell corporations were involved, among others, in PHDS work).
  * Easiest is to assume that everybody else uses that, too.
  */
 int
 arc_isphds(u_int8_t type)
 {
 	return (type != ARCTYPE_IP_OLD &&
 		type != ARCTYPE_ARP_OLD &&
 		type != ARCTYPE_DIAGNOSE);
 }
 
 /*
  * Process a received Arcnet packet;
  * the packet is in the mbuf chain m with
  * the ARCnet header.
  */
 void
 arc_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct arc_header *ah;
 	int isr;
 	u_int8_t atype;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 
 	/* possibly defragment: */
 	m = arc_defrag(ifp, m);
 	if (m == NULL)
 		return;
 
 	BPF_MTAP(ifp, m);
 
 	ah = mtod(m, struct arc_header *);
 	/* does this belong to us? */
 	if ((ifp->if_flags & IFF_PROMISC) == 0
 	    && ah->arc_dhost != arcbroadcastaddr
 	    && ah->arc_dhost != ARC_LLADDR(ifp)) {
 		m_freem(m);
 		return;
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	if (ah->arc_dhost == arcbroadcastaddr) {
 		m->m_flags |= M_BCAST|M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 	atype = ah->arc_type;
 	switch (atype) {
 #ifdef INET
 	case ARCTYPE_IP:
 		m_adj(m, ARC_HDRNEWLEN);
 		isr = NETISR_IP;
 		break;
 
 	case ARCTYPE_IP_OLD:
 		m_adj(m, ARC_HDRLEN);
 		isr = NETISR_IP;
 		break;
 
 	case ARCTYPE_ARP:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		m_adj(m, ARC_HDRNEWLEN);
 		isr = NETISR_ARP;
 #ifdef ARCNET_ALLOW_BROKEN_ARP
 		mtod(m, struct arphdr *)->ar_pro = htons(ETHERTYPE_IP);
 #endif
 		break;
 
 	case ARCTYPE_ARP_OLD:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		m_adj(m, ARC_HDRLEN);
 		isr = NETISR_ARP;
 #ifdef ARCNET_ALLOW_BROKEN_ARP
 		mtod(m, struct arphdr *)->ar_pro = htons(ETHERTYPE_IP);
 #endif
 		break;
 #endif
 #ifdef INET6
 	case ARCTYPE_INET6:
 		m_adj(m, ARC_HDRNEWLEN);
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		m_freem(m);
 		return;
 	}
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 }
 
 /*
  * Register (new) link level address.
  */
 void
 arc_storelladdr(struct ifnet *ifp, u_int8_t lla)
 {
 	ARC_LLADDR(ifp) = lla;
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 arc_ifattach(struct ifnet *ifp, u_int8_t lla)
 {
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 	struct arccom *ac;
 
 	if_attach(ifp);
 	ifp->if_addrlen = 1;
 	ifp->if_hdrlen = ARC_HDRLEN;
 	ifp->if_mtu = 1500;
 	ifp->if_resolvemulti = arc_resolvemulti;
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = 2500000;
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ARCNET;
 	sdl->sdl_alen = ifp->if_addrlen;
 
 	if (ifp->if_flags & IFF_BROADCAST)
 		ifp->if_flags |= IFF_MULTICAST|IFF_ALLMULTI;
 
 	ac = (struct arccom *)ifp->if_l2com;
 	ac->ac_seqid = (time_second) & 0xFFFF; /* try to make seqid unique */
 	if (lla == 0) {
 		/* XXX this message isn't entirely clear, to me -- cgd */
 		log(LOG_ERR,"%s: link address 0 reserved for broadcasts.  Please change it and ifconfig %s down up\n",
 		   ifp->if_xname, ifp->if_xname);
 	}
 	arc_storelladdr(ifp, lla);
 
 	ifp->if_broadcastaddr = &arcbroadcastaddr;
 
 	bpfattach(ifp, DLT_ARCNET, ARC_HDRLEN);
 }
 
 void
 arc_ifdetach(struct ifnet *ifp)
 {
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 int
 arc_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		{
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) &ifr->ifr_data;
 			*(u_int8_t *)sa->sa_data = ARC_LLADDR(ifp);
 		}
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == NULL)
 			error = EAFNOSUPPORT;
 		else {
 			switch (ifr->ifr_addr.sa_family) {
 			case AF_INET:
 			case AF_INET6:
 				error = 0;
 				break;
 			default:
 				error = EAFNOSUPPORT;
 				break;
 			}
 		}
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 * mtu can't be larger than ARCMTU for RFC1051
 		 * and can't be larger than ARC_PHDS_MTU
 		 */
 		if (((ifp->if_flags & IFF_LINK0) && ifr->ifr_mtu > ARCMTU) ||
 		    ifr->ifr_mtu > ARC_PHDS_MAXMTU)
 			error = EINVAL;
 		else
 			ifp->if_mtu = ifr->ifr_mtu;
 		break;
 	}
 
 	return (error);
 }
 
 /* based on ether_resolvemulti() */
 int
 arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
     struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		* No mapping needed. Just check that it's a valid MC address.
 		*/
 		sdl = (struct sockaddr_dl *)sa;
 		if (*LLADDR(sdl) != arcbroadcastaddr)
 			return EADDRNOTAVAIL;
 		*llsa = 0;
 		return 0;
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ARC_ADDR_LEN;
 		*LLADDR(sdl) = 0;
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = 0;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ARC_ADDR_LEN;
 		*LLADDR(sdl) = 0;
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 static MALLOC_DEFINE(M_ARCCOM, "arccom", "ARCNET interface internals");
 
 static void*
 arc_alloc(u_char type, struct ifnet *ifp)
 {
 	struct arccom	*ac;
 	
 	ac = malloc(sizeof(struct arccom), M_ARCCOM, M_WAITOK | M_ZERO);
 	ac->ac_ifp = ifp;
 
 	return (ac);
 }
 
 static void
 arc_free(void *com, u_char type)
 {
 
 	free(com, M_ARCCOM);
 }
 
 static int
 arc_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		if_register_com_alloc(IFT_ARCNET, arc_alloc, arc_free);
 		break;
 	case MOD_UNLOAD:
 		if_deregister_com_alloc(IFT_ARCNET);
 		break;
 	default:
 		return EOPNOTSUPP;
 	}
 
 	return (0);
 }
 
 static moduledata_t arc_mod = {
 	"arcnet",
 	arc_modevent,
 	0
 };
 
 DECLARE_MODULE(arcnet, arc_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(arcnet, 1);
Index: projects/clang380-import/sys/net/if_ethersubr.c
===================================================================
--- projects/clang380-import/sys/net/if_ethersubr.c	(revision 293686)
+++ projects/clang380-import/sys/net/if_ethersubr.c	(revision 293687)
@@ -1,1200 +1,1198 @@
 /*-
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_ethersubr.c	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_netgraph.h"
 #include "opt_mbuf_profiling.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mbuf.h>
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if_bridgevar.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
 #include <net/pfil.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netpfil/pf/pf_mtag.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip_carp.h>
 #include <netinet/ip_var.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 #include <security/mac/mac_framework.h>
 
 #ifdef CTASSERT
 CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2);
 CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN);
 #endif
 
 VNET_DEFINE(struct pfil_head, link_pfil_hook);	/* Packet filter hooks */
 
 /* netgraph node hooks for ng_ether(4) */
 void	(*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m);
 int	(*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp);
 void	(*ng_ether_attach_p)(struct ifnet *ifp);
 void	(*ng_ether_detach_p)(struct ifnet *ifp);
 
 void	(*vlan_input_p)(struct ifnet *, struct mbuf *);
 
 /* if_bridge(4) support */
 struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *); 
 int	(*bridge_output_p)(struct ifnet *, struct mbuf *, 
 		struct sockaddr *, struct rtentry *);
 void	(*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 /* if_lagg(4) support */
 struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); 
 
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static	int ether_resolvemulti(struct ifnet *, struct sockaddr **,
 		struct sockaddr *);
 #ifdef VIMAGE
 static	void ether_reassign(struct ifnet *, struct vnet *, char *);
 #endif
 static	int ether_requestencap(struct ifnet *, struct if_encap_req *);
 
 #define	ETHER_IS_BROADCAST(addr) \
 	(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
 
 #define senderr(e) do { error = (e); goto bad;} while (0)
 
 static void
 update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
 {
 	int csum_flags = 0;
 
 	if (src->m_pkthdr.csum_flags & CSUM_IP)
 		csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID);
 	if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
 		csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
 	if (src->m_pkthdr.csum_flags & CSUM_SCTP)
 		csum_flags |= CSUM_SCTP_VALID;
 	dst->m_pkthdr.csum_flags |= csum_flags;
 	if (csum_flags & CSUM_DATA_VALID)
 		dst->m_pkthdr.csum_data = 0xffff;
 }
 
 /*
  * Handle link-layer encapsulation requests.
  */
 static int
 ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
 {
 	struct ether_header *eh;
 	struct arphdr *ah;
 	uint16_t etype;
 	const u_char *lladdr;
 
 	if (req->rtype != IFENCAP_LL)
 		return (EOPNOTSUPP);
 
 	if (req->bufsize < ETHER_HDR_LEN)
 		return (ENOMEM);
 
 	eh = (struct ether_header *)req->buf;
 	lladdr = req->lladdr;
 	req->lladdr_off = 0;
 
 	switch (req->family) {
 	case AF_INET:
 		etype = htons(ETHERTYPE_IP);
 		break;
 	case AF_INET6:
 		etype = htons(ETHERTYPE_IPV6);
 		break;
 	case AF_ARP:
 		ah = (struct arphdr *)req->hdata;
 		ah->ar_hrd = htons(ARPHRD_ETHER);
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			etype = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			etype = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (req->flags & IFENCAP_FLAG_BROADCAST)
 			lladdr = ifp->if_broadcastaddr;
 		break;
 	default:
 		return (EAFNOSUPPORT);
 	}
 
 	memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
 	memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
 	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 	req->bufsize = sizeof(struct ether_header);
 
 	return (0);
 }
 
 
 static int
 ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro, u_char *phdr,
 	uint32_t *pflags)
 {
 	struct ether_header *eh;
-	struct rtentry *rt;
 	uint32_t lleflags = 0;
 	int error = 0;
 #if defined(INET) || defined(INET6)
 	uint16_t etype;
 #endif
 
 	eh = (struct ether_header *)phdr;
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
 			error = arpresolve(ifp, 0, m, dst, phdr, &lleflags);
 		else {
 			if (m->m_flags & M_BCAST)
 				memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
 				    ETHER_ADDR_LEN);
 			else {
 				const struct in_addr *a;
 				a = &(((const struct sockaddr_in *)dst)->sin_addr);
 				ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
 			}
 			etype = htons(ETHERTYPE_IP);
 			memcpy(&eh->ether_type, &etype, sizeof(etype));
 			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if ((m->m_flags & M_MCAST) == 0)
 			error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags);
 		else {
 			const struct in6_addr *a6;
 			a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
 			ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
 			etype = htons(ETHERTYPE_IPV6);
 			memcpy(&eh->ether_type, &etype, sizeof(etype));
 			memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 		}
 		break;
 #endif
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		if (m != NULL)
 			m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 
 	if (error == EHOSTDOWN) {
-		rt = (ro != NULL) ? ro->ro_rt : NULL;
-		if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0)
+		if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0)
 			error = EHOSTUNREACH;
 	}
 
 	if (error != 0)
 		return (error);
 
 	*pflags = RT_MAY_LOOP;
 	if (lleflags & LLE_IFADDR)
 		*pflags |= RT_L2_ME;
 
 	return (0);
 }
 
 /*
  * Ethernet output routine.
  * Encapsulate a packet of type family for the local net.
  * Use trailer local net encapsulation if enough data in first
  * packet leaves a multiple of 512 bytes of data in remainder.
  */
 int
 ether_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	int error = 0;
 	char linkhdr[ETHER_HDR_LEN], *phdr;
 	struct ether_header *eh;
 	struct pf_mtag *t;
 	int loop_copy = 1;
 	int hlen;	/* link layer header length */
 	uint32_t pflags;
 
 	phdr = NULL;
 	pflags = 0;
 	if (ro != NULL) {
 		phdr = ro->ro_prepend;
 		hlen = ro->ro_plen;
 		pflags = ro->ro_flags;
 	}
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	M_PROFILE(m);
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 
 	if (phdr == NULL) {
 		/* No prepend data supplied. Try to calculate ourselves. */
 		phdr = linkhdr;
 		hlen = ETHER_HDR_LEN;
 		error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags);
 		if (error != 0)
 			return (error == EWOULDBLOCK ? 0 : error);
 	}
 
 	if ((pflags & RT_L2_ME) != 0) {
 		update_mbuf_csumflags(m, m);
 		return (if_simloop(ifp, m, dst->sa_family, 0));
 	}
 	loop_copy = pflags & RT_MAY_LOOP;
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 *
 	 * Note that we do prepend regardless of RT_HAS_HEADER flag.
 	 * This is done because BPF code shifts m_data pointer
 	 * to the end of ethernet header prior to calling if_output().
 	 */
 	M_PREPEND(m, hlen, M_NOWAIT);
 	if (m == NULL)
 		senderr(ENOBUFS);
 	if ((pflags & RT_HAS_HEADER) == 0) {
 		eh = mtod(m, struct ether_header *);
 		memcpy(eh, phdr, hlen);
 	}
 
 	/*
 	 * If a simplex interface, and the packet is being sent to our
 	 * Ethernet address or a broadcast address, loopback a copy.
 	 * XXX To make a simplex device behave exactly like a duplex
 	 * device, we should copy in the case of sending to our own
 	 * ethernet address (thus letting the original actually appear
 	 * on the wire). However, we don't do that here for security
 	 * reasons and compatibility with the original behavior.
 	 */
 	if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
 	    ((t = pf_find_mtag(m)) == NULL || !t->routed)) {
 		struct mbuf *n;
 
 		/*
 		 * Because if_simloop() modifies the packet, we need a
 		 * writable copy through m_dup() instead of a readonly
 		 * one as m_copy[m] would give us. The alternative would
 		 * be to modify if_simloop() to handle the readonly mbuf,
 		 * but performancewise it is mostly equivalent (trading
 		 * extra data copying vs. extra locking).
 		 *
 		 * XXX This is a local workaround.  A number of less
 		 * often used kernel parts suffer from the same bug.
 		 * See PR kern/105943 for a proposed general solution.
 		 */
 		if ((n = m_dup(m, M_NOWAIT)) != NULL) {
 			update_mbuf_csumflags(m, n);
 			(void)if_simloop(ifp, n, dst->sa_family, hlen);
 		} else
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	}
 
        /*
 	* Bridges require special output handling.
 	*/
 	if (ifp->if_bridge) {
 		BRIDGE_OUTPUT(ifp, m, error);
 		return (error);
 	}
 
 #if defined(INET) || defined(INET6)
 	if (ifp->if_carp &&
 	    (error = (*carp_output_p)(ifp, m, dst)))
 		goto bad;
 #endif
 
 	/* Handle ng_ether(4) processing, if any */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_output_p != NULL,
 		    ("ng_ether_output_p is NULL"));
 		if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) {
 bad:			if (m != NULL)
 				m_freem(m);
 			return (error);
 		}
 		if (m == NULL)
 			return (0);
 	}
 
 	/* Continue with link-layer output */
 	return ether_output_frame(ifp, m);
 }
 
 /*
  * Ethernet link layer output routine to send a raw frame to the device.
  *
  * This assumes that the 14 byte Ethernet header is present and contiguous
  * in the first mbuf (if BRIDGE'ing).
  */
 int
 ether_output_frame(struct ifnet *ifp, struct mbuf *m)
 {
 	int i;
 
 	if (PFIL_HOOKED(&V_link_pfil_hook)) {
 		i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, NULL);
 
 		if (i != 0)
 			return (EACCES);
 
 		if (m == NULL)
 			return (0);
 	}
 
 	/*
 	 * Queue message on interface, update output statistics if
 	 * successful, and start output if interface not yet active.
 	 */
 	return ((ifp->if_transmit)(ifp, m));
 }
 
 #if defined(INET) || defined(INET6)
 #endif
 
 /*
  * Process a received Ethernet packet; the packet is in the
  * mbuf chain m with the ethernet header at the front.
  */
 static void
 ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	u_short etype;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n");
 		m_freem(m);
 		return;
 	}
 #endif
 	if (m->m_len < ETHER_HDR_LEN) {
 		/* XXX maybe should pullup? */
 		if_printf(ifp, "discard frame w/o leading ethernet "
 				"header (len %u pkt len %u)\n",
 				m->m_len, m->m_pkthdr.len);
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_ETHER);
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
 		if (ETHER_IS_BROADCAST(eh->ether_dhost))
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	ETHER_BPF_MTAP(ifp, m);
 
 	/*
 	 * If the CRC is still on the packet, trim it off. We do this once
 	 * and once only in case we are re-entered. Nothing else on the
 	 * Ethernet receive path expects to see the FCS.
 	 */
 	if (m->m_flags & M_HASFCS) {
 		m_adj(m, -ETHER_CRC_LEN);
 		m->m_flags &= ~M_HASFCS;
 	}
 
 	if (!(ifp->if_capenable & IFCAP_HWSTATS))
 		if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		CURVNET_RESTORE();
 		return;
 	}
 
 	/* Handle input from a lagg(4) port */
 	if (ifp->if_type == IFT_IEEE8023ADLAG) {
 		KASSERT(lagg_input_p != NULL,
 		    ("%s: if_lagg not loaded!", __func__));
 		m = (*lagg_input_p)(ifp, m);
 		if (m != NULL)
 			ifp = m->m_pkthdr.rcvif;
 		else {
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 
 	/*
 	 * If the hardware did not process an 802.1Q tag, do this now,
 	 * to allow 802.1P priority frames to be passed to the main input
 	 * path correctly.
 	 * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels.
 	 */
 	if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) {
 		struct ether_vlan_header *evl;
 
 		if (m->m_len < sizeof(*evl) &&
 		    (m = m_pullup(m, sizeof(*evl))) == NULL) {
 #ifdef DIAGNOSTIC
 			if_printf(ifp, "cannot pullup VLAN header\n");
 #endif
 			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 			CURVNET_RESTORE();
 			return;
 		}
 
 		evl = mtod(m, struct ether_vlan_header *);
 		m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag);
 		m->m_flags |= M_VLANTAG;
 
 		bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN,
 		    ETHER_HDR_LEN - ETHER_TYPE_LEN);
 		m_adj(m, ETHER_VLAN_ENCAP_LEN);
 		eh = mtod(m, struct ether_header *);
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 
 	/* Allow ng_ether(4) to claim this frame. */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_p != NULL,
 		    ("%s: ng_ether_input_p is NULL", __func__));
 		m->m_flags &= ~M_PROMISC;
 		(*ng_ether_input_p)(ifp, &m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 	/*
 	 * Allow if_bridge(4) to claim this frame.
 	 * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
 	 * and the frame should be delivered locally.
 	 */
 	if (ifp->if_bridge != NULL) {
 		m->m_flags &= ~M_PROMISC;
 		BRIDGE_INPUT(ifp, m);
 		if (m == NULL) {
 			CURVNET_RESTORE();
 			return;
 		}
 		eh = mtod(m, struct ether_header *);
 	}
 
 #if defined(INET) || defined(INET6)
 	/*
 	 * Clear M_PROMISC on frame so that carp(4) will see it when the
 	 * mbuf flows up to Layer 3.
 	 * FreeBSD's implementation of carp(4) uses the inprotosw
 	 * to dispatch IPPROTO_CARP. carp(4) also allocates its own
 	 * Ethernet addresses of the form 00:00:5e:00:01:xx, which
 	 * is outside the scope of the M_PROMISC test below.
 	 * TODO: Maintain a hash table of ethernet addresses other than
 	 * ether_dhost which may be active on this ifp.
 	 */
 	if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) {
 		m->m_flags &= ~M_PROMISC;
 	} else
 #endif
 	{
 		/*
 		 * If the frame received was not for our MAC address, set the
 		 * M_PROMISC flag on the mbuf chain. The frame may need to
 		 * be seen by the rest of the Ethernet input path in case of
 		 * re-entry (e.g. bridge, vlan, netgraph) but should not be
 		 * seen by upper protocol layers.
 		 */
 		if (!ETHER_IS_MULTICAST(eh->ether_dhost) &&
 		    bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0)
 			m->m_flags |= M_PROMISC;
 	}
 
 	ether_demux(ifp, m);
 	CURVNET_RESTORE();
 }
 
 /*
  * Ethernet input dispatch; by default, direct dispatch here regardless of
  * global configuration.  However, if RSS is enabled, hook up RSS affinity
  * so that when deferred or hybrid dispatch is enabled, we can redistribute
  * load based on RSS.
  *
  * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or
  * not it had already done work distribution via multi-queue.  Then we could
  * direct dispatch in the event load balancing was already complete and
  * handle the case of interfaces with different capabilities better.
  *
  * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions
  * at multiple layers?
  *
  * XXXRW: For now, enable all this only if RSS is compiled in, although it
  * works fine without RSS.  Need to characterise the performance overhead
  * of the detour through the netisr code in the event the result is always
  * direct dispatch.
  */
 static void
 ether_nh_input(struct mbuf *m)
 {
 
 	M_ASSERTPKTHDR(m);
 	KASSERT(m->m_pkthdr.rcvif != NULL,
 	    ("%s: NULL interface pointer", __func__));
 	ether_input_internal(m->m_pkthdr.rcvif, m);
 }
 
 static struct netisr_handler	ether_nh = {
 	.nh_name = "ether",
 	.nh_handler = ether_nh_input,
 	.nh_proto = NETISR_ETHER,
 #ifdef RSS
 	.nh_policy = NETISR_POLICY_CPU,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 	.nh_m2cpuid = rss_m2cpuid,
 #else
 	.nh_policy = NETISR_POLICY_SOURCE,
 	.nh_dispatch = NETISR_DISPATCH_DIRECT,
 #endif
 };
 
 static void
 ether_init(__unused void *arg)
 {
 
 	netisr_register(&ether_nh);
 }
 SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL);
 
 static void
 vnet_ether_init(__unused void *arg)
 {
 	int i;
 
 	/* Initialize packet filter hooks. */
 	V_link_pfil_hook.ph_type = PFIL_TYPE_AF;
 	V_link_pfil_hook.ph_af = AF_LINK;
 	if ((i = pfil_head_register(&V_link_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to register pfil link hook, "
 			"error %d\n", __func__, i);
 }
 VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_init, NULL);
  
 static void
 vnet_ether_destroy(__unused void *arg)
 {
 	int i;
 
 	if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0)
 		printf("%s: WARNING: unable to unregister pfil link hook, "
 			"error %d\n", __func__, i);
 }
 VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_ether_destroy, NULL);
 
 
 
 static void
 ether_input(struct ifnet *ifp, struct mbuf *m)
 {
 
 	struct mbuf *mn;
 
 	/*
 	 * The drivers are allowed to pass in a chain of packets linked with
 	 * m_nextpkt. We split them up into separate packets here and pass
 	 * them up. This allows the drivers to amortize the receive lock.
 	 */
 	while (m) {
 		mn = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 
 		/*
 		 * We will rely on rcvif being set properly in the deferred context,
 		 * so assert it is correct here.
 		 */
 		KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__));
 		netisr_dispatch(NETISR_ETHER, m);
 		m = mn;
 	}
 }
 
 /*
  * Upper layer processing for a received Ethernet packet.
  */
 void
 ether_demux(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ether_header *eh;
 	int i, isr;
 	u_short ether_type;
 
 	KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__));
 
 	/* Do not grab PROMISC frames in case we are re-entered. */
 	if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) {
 		i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, NULL);
 
 		if (i != 0 || m == NULL)
 			return;
 	}
 
 	eh = mtod(m, struct ether_header *);
 	ether_type = ntohs(eh->ether_type);
 
 	/*
 	 * If this frame has a VLAN tag other than 0, call vlan_input()
 	 * if its module is loaded. Otherwise, drop.
 	 */
 	if ((m->m_flags & M_VLANTAG) &&
 	    EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) {
 		if (ifp->if_vlantrunk == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			m_freem(m);
 			return;
 		}
 		KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!",
 		    __func__));
 		/* Clear before possibly re-entering ether_input(). */
 		m->m_flags &= ~M_PROMISC;
 		(*vlan_input_p)(ifp, m);
 		return;
 	}
 
 	/*
 	 * Pass promiscuously received frames to the upper layer if the user
 	 * requested this by setting IFF_PPROMISC. Otherwise, drop them.
 	 */
 	if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) {
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Reset layer specific mbuf flags to avoid confusing upper layers.
 	 * Strip off Ethernet header.
 	 */
 	m->m_flags &= ~M_VLANTAG;
 	m_clrprotoflags(m);
 	m_adj(m, ETHER_HDR_LEN);
 
 	/*
 	 * Dispatch frame to upper layer.
 	 */
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		isr = NETISR_ARP;
 		break;
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		goto discard;
 	}
 	netisr_dispatch(isr, m);
 	return;
 
 discard:
 	/*
 	 * Packet is to be discarded.  If netgraph is present,
 	 * hand the packet to it for last chance processing;
 	 * otherwise dispose of it.
 	 */
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_input_orphan_p != NULL,
 		    ("ng_ether_input_orphan_p is NULL"));
 		/*
 		 * Put back the ethernet header so netgraph has a
 		 * consistent view of inbound packets.
 		 */
 		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 		(*ng_ether_input_orphan_p)(ifp, m);
 		return;
 	}
 	m_freem(m);
 }
 
 /*
  * Convert Ethernet address to printable (loggable) representation.
  * This routine is for compatibility; it's better to just use
  *
  *	printf("%6D", <pointer to address>, ":");
  *
  * since there's no static buffer involved.
  */
 char *
 ether_sprintf(const u_char *ap)
 {
 	static char etherbuf[18];
 	snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":");
 	return (etherbuf);
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
 {
 	int i;
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifp->if_addrlen = ETHER_ADDR_LEN;
 	ifp->if_hdrlen = ETHER_HDR_LEN;
 	if_attach(ifp);
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_output = ether_output;
 	ifp->if_input = ether_input;
 	ifp->if_resolvemulti = ether_resolvemulti;
 	ifp->if_requestencap = ether_requestencap;
 #ifdef VIMAGE
 	ifp->if_reassign = ether_reassign;
 #endif
 	if (ifp->if_baudrate == 0)
 		ifp->if_baudrate = IF_Mbps(10);		/* just a default */
 	ifp->if_broadcastaddr = etherbroadcastaddr;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_ETHER;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN);
 	if (ng_ether_attach_p != NULL)
 		(*ng_ether_attach_p)(ifp);
 
 	/* Announce Ethernet MAC address if non-zero. */
 	for (i = 0; i < ifp->if_addrlen; i++)
 		if (lla[i] != 0)
 			break; 
 	if (i != ifp->if_addrlen)
 		if_printf(ifp, "Ethernet address: %6D\n", lla, ":");
 
 	uuid_ether_add(LLADDR(sdl));
 }
 
 /*
  * Perform common duties while detaching an Ethernet interface
  */
 void
 ether_ifdetach(struct ifnet *ifp)
 {
 	struct sockaddr_dl *sdl;
 
 	sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr);
 	uuid_ether_del(LLADDR(sdl));
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 #ifdef VIMAGE
 void
 ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused)
 {
 
 	if (ifp->if_l2com != NULL) {
 		KASSERT(ng_ether_detach_p != NULL,
 		    ("ng_ether_detach_p is NULL"));
 		(*ng_ether_detach_p)(ifp);
 	}
 
 	if (ng_ether_attach_p != NULL) {
 		CURVNET_SET_QUIET(new_vnet);
 		(*ng_ether_attach_p)(ifp);
 		CURVNET_RESTORE();
 	}
 }
 #endif
 
 SYSCTL_DECL(_net_link);
 SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet");
 
 #if 0
 /*
  * This is for reference.  We have a table-driven version
  * of the little-endian crc32 generator, which is faster
  * than the double-loop.
  */
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = (crc ^ data) & 1;
 			crc >>= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_LE);
 		}
 	}
 
 	return (crc);
 }
 #else
 uint32_t
 ether_crc32_le(const uint8_t *buf, size_t len)
 {
 	static const uint32_t crctab[] = {
 		0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
 		0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
 		0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
 		0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c
 	};
 	size_t i;
 	uint32_t crc;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		crc ^= buf[i];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 		crc = (crc >> 4) ^ crctab[crc & 0xf];
 	}
 
 	return (crc);
 }
 #endif
 
 uint32_t
 ether_crc32_be(const uint8_t *buf, size_t len)
 {
 	size_t i;
 	uint32_t crc, carry;
 	int bit;
 	uint8_t data;
 
 	crc = 0xffffffff;	/* initial value */
 
 	for (i = 0; i < len; i++) {
 		for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) {
 			carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01);
 			crc <<= 1;
 			if (carry)
 				crc = (crc ^ ETHER_CRC_POLY_BE) | carry;
 		}
 	}
 
 	return (crc);
 }
 
 int
 ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		{
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) & ifr->ifr_data;
 			bcopy(IF_LLADDR(ifp),
 			      (caddr_t) sa->sa_data, ETHER_ADDR_LEN);
 		}
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > ETHERMTU) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
 	}
 	return (error);
 }
 
 static int
 ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 	struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if (!ETHER_IS_MULTICAST(e_addr))
 			return EADDRNOTAVAIL;
 		*llsa = 0;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = 0;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_ETHER);
 		sdl->sdl_alen = ETHER_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 static moduledata_t ether_mod = {
 	.name = "ether",
 };
 
 void
 ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen)
 {
 	struct ether_vlan_header vlan;
 	struct mbuf mv, mb;
 
 	KASSERT((m->m_flags & M_VLANTAG) != 0,
 	    ("%s: vlan information not present", __func__));
 	KASSERT(m->m_len >= sizeof(struct ether_header),
 	    ("%s: mbuf not large enough for header", __func__));
 	bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header));
 	vlan.evl_proto = vlan.evl_encap_proto;
 	vlan.evl_encap_proto = htons(ETHERTYPE_VLAN);
 	vlan.evl_tag = htons(m->m_pkthdr.ether_vtag);
 	m->m_len -= sizeof(struct ether_header);
 	m->m_data += sizeof(struct ether_header);
 	/*
 	 * If a data link has been supplied by the caller, then we will need to
 	 * re-create a stack allocated mbuf chain with the following structure:
 	 *
 	 * (1) mbuf #1 will contain the supplied data link
 	 * (2) mbuf #2 will contain the vlan header
 	 * (3) mbuf #3 will contain the original mbuf's packet data
 	 *
 	 * Otherwise, submit the packet and vlan header via bpf_mtap2().
 	 */
 	if (data != NULL) {
 		mv.m_next = m;
 		mv.m_data = (caddr_t)&vlan;
 		mv.m_len = sizeof(vlan);
 		mb.m_next = &mv;
 		mb.m_data = data;
 		mb.m_len = dlen;
 		bpf_mtap(bp, &mb);
 	} else
 		bpf_mtap2(bp, &vlan, sizeof(vlan), m);
 	m->m_len += sizeof(struct ether_header);
 	m->m_data -= sizeof(struct ether_header);
 }
 
 struct mbuf *
 ether_vlanencap(struct mbuf *m, uint16_t tag)
 {
 	struct ether_vlan_header *evl;
 
 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
 	if (m == NULL)
 		return (NULL);
 	/* M_PREPEND takes care of m_len, m_pkthdr.len for us */
 
 	if (m->m_len < sizeof(*evl)) {
 		m = m_pullup(m, sizeof(*evl));
 		if (m == NULL)
 			return (NULL);
 	}
 
 	/*
 	 * Transform the Ethernet header into an Ethernet header
 	 * with 802.1Q encapsulation.
 	 */
 	evl = mtod(m, struct ether_vlan_header *);
 	bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN,
 	    (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
 	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
 	evl->evl_tag = htons(tag);
 	return (m);
 }
 
 DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(ether, 1);
Index: projects/clang380-import/sys/net/if_fddisubr.c
===================================================================
--- projects/clang380-import/sys/net/if_fddisubr.c	(revision 293686)
+++ projects/clang380-import/sys/net/if_fddisubr.c	(revision 293687)
@@ -1,671 +1,670 @@
 /*-
  * Copyright (c) 1995, 1996
  *	Matt Thomas <matt@3am-software.com>.  All rights reserved.
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llc.h>
 #include <net/if_types.h>
 #include <net/if_llatbl.h>
 
 #include <net/ethernet.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/fddi.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #ifdef DECNET
 #include <netdnet/dn.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static int fddi_resolvemulti(struct ifnet *, struct sockaddr **,
 			      struct sockaddr *);
 static int fddi_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
 		       struct route *); 
 static void fddi_input(struct ifnet *ifp, struct mbuf *m);
 
 #define	senderr(e)	do { error = (e); goto bad; } while (0)
 
 /*
  * FDDI output routine.
  * Encapsulate a packet of type family for the local net.
  * Use trailer local net encapsulation if enough data in first
  * packet leaves a multiple of 512 bytes of data in remainder.
  */
 static int
 fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *ro)
 {
 	u_int16_t type;
 	int loop_copy = 0, error = 0, hdrcmplt = 0;
  	u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN];
 	struct fddi_header *fh;
 #if defined(INET) || defined(INET6)
 	int is_gw = 0;
 #endif
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 	getmicrotime(&ifp->if_lastchange);
 
 #if defined(INET) || defined(INET6)
-	if (ro != NULL && ro->ro_rt != NULL &&
-	    (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0)
-		is_gw = 1;
+	if (ro != NULL)
+		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
 #endif
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET: {
 		error = arpresolve(ifp, is_gw, m, dst, edst, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		type = htons(ETHERTYPE_IP);
 		break;
 	}
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_ETHER);
 
 		loop_copy = -1; /* if this is for us, don't do it */
 
 		switch (ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			type = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			type = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (m->m_flags & M_BCAST)
 			bcopy(ifp->if_broadcastaddr, edst, FDDI_ADDR_LEN);
                 else
 			bcopy(ar_tha(ah), edst, FDDI_ADDR_LEN);
 
 	}
 	break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		type = htons(ETHERTYPE_IPV6);
 		break;
 #endif /* INET6 */
 	case pseudo_AF_HDRCMPLT:
 	{
 		const struct ether_header *eh;
 
 		hdrcmplt = 1;
 		eh = (const struct ether_header *)dst->sa_data;
 		bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN);
 		/* FALLTHROUGH */
 	}
 
 	case AF_UNSPEC:
 	{
 		const struct ether_header *eh;
 
 		loop_copy = -1;
 		eh = (const struct ether_header *)dst->sa_data;
 		bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN);
 		if (*edst & 1)
 			m->m_flags |= (M_BCAST|M_MCAST);
 		type = eh->ether_type;
 		break;
 	}
 
 	case AF_IMPLINK:
 	{
 		fh = mtod(m, struct fddi_header *);
 		error = EPROTONOSUPPORT;
 		switch (fh->fddi_fc & (FDDIFC_C|FDDIFC_L|FDDIFC_F)) {
 			case FDDIFC_LLC_ASYNC: {
 				/* legal priorities are 0 through 7 */
 				if ((fh->fddi_fc & FDDIFC_Z) > 7)
 			        	goto bad;
 				break;
 			}
 			case FDDIFC_LLC_SYNC: {
 				/* FDDIFC_Z bits reserved, must be zero */
 				if (fh->fddi_fc & FDDIFC_Z)
 					goto bad;
 				break;
 			}
 			case FDDIFC_SMT: {
 				/* FDDIFC_Z bits must be non zero */
 				if ((fh->fddi_fc & FDDIFC_Z) == 0)
 					goto bad;
 				break;
 			}
 			default: {
 				/* anything else is too dangerous */
                	 		goto bad;
 			}
 		}
 		error = 0;
 		if (fh->fddi_dhost[0] & 1)
 			m->m_flags |= (M_BCAST|M_MCAST);
 		goto queue_it;
 	}
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
 	}
 
 	/*
 	 * Add LLC header.
 	 */
 	if (type != 0) {
 		struct llc *l;
 		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
 		if (m == 0)
 			senderr(ENOBUFS);
 		l = mtod(m, struct llc *);
 		l->llc_control = LLC_UI;
 		l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP;
 		l->llc_snap.org_code[0] =
 			l->llc_snap.org_code[1] =
 			l->llc_snap.org_code[2] = 0;
 		l->llc_snap.ether_type = htons(type);
 	}
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
 	M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT);
 	if (m == 0)
 		senderr(ENOBUFS);
 	fh = mtod(m, struct fddi_header *);
 	fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4;
 	bcopy((caddr_t)edst, (caddr_t)fh->fddi_dhost, FDDI_ADDR_LEN);
   queue_it:
 	if (hdrcmplt)
 		bcopy((caddr_t)esrc, (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN);
 	else
 		bcopy(IF_LLADDR(ifp), (caddr_t)fh->fddi_shost,
 			FDDI_ADDR_LEN);
 
 	/*
 	 * If a simplex interface, and the packet is being sent to our
 	 * Ethernet address or a broadcast address, loopback a copy.
 	 * XXX To make a simplex device behave exactly like a duplex
 	 * device, we should copy in the case of sending to our own
 	 * ethernet address (thus letting the original actually appear
 	 * on the wire). However, we don't do that here for security
 	 * reasons and compatibility with the original behavior.
 	 */
 	if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
 		if ((m->m_flags & M_BCAST) || (loop_copy > 0)) {
 			struct mbuf *n;
 			n = m_copy(m, 0, (int)M_COPYALL);
 			(void) if_simloop(ifp, n, dst->sa_family,
 					  FDDI_HDR_LEN);
 	     	} else if (bcmp(fh->fddi_dhost, fh->fddi_shost,
 				FDDI_ADDR_LEN) == 0) {
 			(void) if_simloop(ifp, m, dst->sa_family,
 					  FDDI_HDR_LEN);
 			return (0);	/* XXX */
 		}
 	}
 
 	error = (ifp->if_transmit)(ifp, m);
 	if (error)
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 
 	return (error);
 
 bad:
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Process a received FDDI packet.
  */
 static void
 fddi_input(ifp, m)
 	struct ifnet *ifp;
 	struct mbuf *m;
 {
 	int isr;
 	struct llc *l;
 	struct fddi_header *fh;
 
 	/*
 	 * Do consistency checks to verify assumptions
 	 * made by code past this point.
 	 */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		if_printf(ifp, "discard frame w/o packet header\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
         }
 
 	m = m_pullup(m, FDDI_HDR_LEN);
 	if (m == NULL) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	fh = mtod(m, struct fddi_header *);
 
 	/*
 	 * Discard packet if interface is not up.
 	 */
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		goto dropanyway;
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	BPF_MTAP(ifp, m);
 
 	/*
 	 * Interface marked for monitoring; discard packet.
 	 */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		return;
 	}
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Update interface statistics.
 	 */
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	getmicrotime(&ifp->if_lastchange);
 
 	/*
 	 * Discard non local unicast packets when interface
 	 * is in promiscuous mode.
 	 */
 	if ((ifp->if_flags & IFF_PROMISC) && ((fh->fddi_dhost[0] & 1) == 0) &&
 	    (bcmp(IF_LLADDR(ifp), (caddr_t)fh->fddi_dhost,
 	     FDDI_ADDR_LEN) != 0))
 		goto dropanyway;
 
 	/*
 	 * Set mbuf flags for bcast/mcast.
 	 */
 	if (fh->fddi_dhost[0] & 1) {
 		if (bcmp(ifp->if_broadcastaddr, fh->fddi_dhost,
 		    FDDI_ADDR_LEN) == 0)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 #ifdef M_LINK0
 	/*
 	 * If this has a LLC priority of 0, then mark it so upper
 	 * layers have a hint that it really came via a FDDI/Ethernet
 	 * bridge.
 	 */
 	if ((fh->fddi_fc & FDDIFC_LLC_PRIO7) == FDDIFC_LLC_PRIO0)
 		m->m_flags |= M_LINK0;
 #endif
 
 	/* Strip off FDDI header. */
 	m_adj(m, FDDI_HDR_LEN);
 
 	m = m_pullup(m, LLC_SNAPFRAMELEN);
 	if (m == 0) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	l = mtod(m, struct llc *);
 
 	switch (l->llc_dsap) {
 	case LLC_SNAP_LSAP:
 	{
 		u_int16_t type;
 		if ((l->llc_control != LLC_UI) ||
 		    (l->llc_ssap != LLC_SNAP_LSAP)) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 		if (l->llc_snap.org_code[0] != 0 ||
 		    l->llc_snap.org_code[1] != 0 ||
 		    l->llc_snap.org_code[2] != 0) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 
 		type = ntohs(l->llc_snap.ether_type);
 		m_adj(m, LLC_SNAPFRAMELEN);
 
 		switch (type) {
 #ifdef INET
 		case ETHERTYPE_IP:
 			isr = NETISR_IP;
 			break;
 
 		case ETHERTYPE_ARP:
 			if (ifp->if_flags & IFF_NOARP)
 				goto dropanyway;
 			isr = NETISR_ARP;
 			break;
 #endif
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			isr = NETISR_IPV6;
 			break;
 #endif
 #ifdef DECNET
 		case ETHERTYPE_DECNET:
 			isr = NETISR_DECNET;
 			break;
 #endif
 		default:
 			/* printf("fddi_input: unknown protocol 0x%x\n", type); */
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 		break;
 	}
 		
 	default:
 		/* printf("fddi_input: unknown dsap 0x%x\n", l->llc_dsap); */
 		if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 		goto dropanyway;
 	}
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 	return;
 
 dropanyway:
 	if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	if (m)
 		m_freem(m);
 	return;
 }
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 fddi_ifattach(ifp, lla, bpf)
 	struct ifnet *ifp;
 	const u_int8_t *lla;
 	int bpf;
 {
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 
 	ifp->if_type = IFT_FDDI;
 	ifp->if_addrlen = FDDI_ADDR_LEN;
 	ifp->if_hdrlen = 21;
 
 	if_attach(ifp);         /* Must be called before additional assignments */
 
 	ifp->if_mtu = FDDIMTU;
 	ifp->if_output = fddi_output;
 	ifp->if_input = fddi_input;
 	ifp->if_resolvemulti = fddi_resolvemulti;
 	ifp->if_broadcastaddr = fddibroadcastaddr;
 	ifp->if_baudrate = 100000000;
 #ifdef IFF_NOTRAILERS
 	ifp->if_flags |= IFF_NOTRAILERS;
 #endif
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_FDDI;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
 	if (bpf)
 		bpfattach(ifp, DLT_FDDI, FDDI_HDR_LEN);
 
 	return;
 }
 
 void
 fddi_ifdetach(ifp, bpf)
 	struct ifnet *ifp;
 	int bpf;
 {
      
 	if (bpf)
 		bpfdetach(ifp);
 
 	if_detach(ifp);
 
 	return;
 }
 
 int
 fddi_ioctl (ifp, command, data)
 	struct ifnet *ifp;
 	u_long command;
 	caddr_t data;
 {
 	struct ifaddr *ifa;
 	struct ifreq *ifr;
 	int error;
 
 	ifa = (struct ifaddr *) data;
 	ifr = (struct ifreq *) data;
 	error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:	/* before arpwhohas */
 			ifp->if_init(ifp->if_softc);
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 	case SIOCGIFADDR: {
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) & ifr->ifr_data;
 			bcopy(IF_LLADDR(ifp),
 			      (caddr_t) sa->sa_data, FDDI_ADDR_LEN);
 
 		}
 		break;
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > FDDIMTU) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return (error);
 }
 
 static int
 fddi_resolvemulti(ifp, llsa, sa)
 	struct ifnet *ifp;
 	struct sockaddr **llsa;
 	struct sockaddr *sa;
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if ((e_addr[0] & 1) != 1)
 			return (EADDRNOTAVAIL);
 		*llsa = 0;
 		return (0);
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return (EADDRNOTAVAIL);
 		sdl = link_init_sdl(ifp, *llsa, IFT_FDDI);
 		sdl->sdl_nlen = 0;
 		sdl->sdl_alen = FDDI_ADDR_LEN;
 		sdl->sdl_slen = 0;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = 0;
 			return (0);
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return (EADDRNOTAVAIL);
 		sdl = link_init_sdl(ifp, *llsa, IFT_FDDI);
 		sdl->sdl_nlen = 0;
 		sdl->sdl_alen = FDDI_ADDR_LEN;
 		sdl->sdl_slen = 0;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return (EAFNOSUPPORT);
 	}
 
 	return (0);
 }
 
 static moduledata_t fddi_mod = {
 	"fddi",	/* module name */
 	NULL,	/* event handler */
 	0	/* extra data */
 };
 
 DECLARE_MODULE(fddi, fddi_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(fddi, 1);
Index: projects/clang380-import/sys/net/if_fwsubr.c
===================================================================
--- projects/clang380-import/sys/net/if_fwsubr.c	(revision 293686)
+++ projects/clang380-import/sys/net/if_fwsubr.c	(revision 293687)
@@ -1,860 +1,855 @@
 /*-
  * Copyright (c) 2004 Doug Rabson
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/if_llc.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/bpf.h>
 #include <net/firewire.h>
 #include <net/if_llatbl.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 static MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals");
 
 struct fw_hwaddr firewire_broadcastaddr = {
 	0xffffffff,
 	0xffffffff,
 	0xff,
 	0xff,
 	0xffff,
 	0xffffffff
 };
 
 static int
 firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	int error, type;
 	struct m_tag *mtag;
 	union fw_encap *enc;
 	struct fw_hwaddr *destfw;
 	uint8_t speed;
 	uint16_t psize, fsize, dsize;
 	struct mbuf *mtail;
 	int unicast, dgl, foff;
 	static int next_dgl;
 #if defined(INET) || defined(INET6)
 	int is_gw = 0;
 #endif
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		goto bad;
 #endif
 
 	if (!((ifp->if_flags & IFF_UP) &&
 	   (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
 		error = ENETDOWN;
 		goto bad;
 	}
 
 #if defined(INET) || defined(INET6)
-	if (ro != NULL && ro->ro_rt != NULL &&
-	    (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0)
-		is_gw = 1;
+	if (ro != NULL)
+		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
 #endif
 	/*
 	 * For unicast, we make a tag to store the lladdr of the
 	 * destination. This might not be the first time we have seen
 	 * the packet (for instance, the arp code might be trying to
 	 * re-send it after receiving an arp reply) so we only
 	 * allocate a tag if there isn't one there already. For
 	 * multicast, we will eventually use a different tag to store
 	 * the channel number.
 	 */
 	unicast = !(m->m_flags & (M_BCAST | M_MCAST));
 	if (unicast) {
 		mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR, NULL);
 		if (!mtag) {
 			mtag = m_tag_alloc(MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR,
 			    sizeof (struct fw_hwaddr), M_NOWAIT);
 			if (!mtag) {
 				error = ENOMEM;
 				goto bad;
 			}
 			m_tag_prepend(m, mtag);
 		}
 		destfw = (struct fw_hwaddr *)(mtag + 1);
 	} else {
 		destfw = 0;
 	}
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		/*
 		 * Only bother with arp for unicast. Allocation of
 		 * channels etc. for firewire is quite different and
 		 * doesn't fit into the arp model.
 		 */
 		if (unicast) {
-			is_gw = 0;
-			if (ro != NULL && ro->ro_rt != NULL &&
-			    (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0)
-				is_gw = 1;
 			error = arpresolve(ifp, is_gw, m, dst, (u_char *) destfw, NULL);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
 		type = ETHERTYPE_IP;
 		break;
 
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_IEEE1394);
 		type = ETHERTYPE_ARP;
 		if (unicast)
 			*destfw = *(struct fw_hwaddr *) ar_tha(ah);
 
 		/*
 		 * The standard arp code leaves a hole for the target
 		 * hardware address which we need to close up.
 		 */
 		bcopy(ar_tpa(ah), ar_tha(ah), ah->ar_pln);
 		m_adj(m, -ah->ar_hln);
 		break;
 	}
 #endif
 
 #ifdef INET6
 	case AF_INET6:
 		if (unicast) {
 			error = nd6_resolve(fc->fc_ifp, is_gw, m, dst,
 			    (u_char *) destfw, NULL);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
 		type = ETHERTYPE_IPV6;
 		break;
 #endif
 
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		error = EAFNOSUPPORT;
 		goto bad;
 	}
 
 	/*
 	 * Let BPF tap off a copy before we encapsulate.
 	 */
 	if (bpf_peers_present(ifp->if_bpf)) {
 		struct fw_bpfhdr h;
 		if (unicast)
 			bcopy(destfw, h.firewire_dhost, 8);
 		else
 			bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8);
 		bcopy(&fc->fc_hwaddr, h.firewire_shost, 8);
 		h.firewire_type = htons(type);
 		bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m);
 	}
 
 	/*
 	 * Punt on MCAP for now and send all multicast packets on the
 	 * broadcast channel.
 	 */
 	if (m->m_flags & M_MCAST)
 		m->m_flags |= M_BCAST;
 
 	/*
 	 * Figure out what speed to use and what the largest supported
 	 * packet size is. For unicast, this is the minimum of what we
 	 * can speak and what they can hear. For broadcast, lets be
 	 * conservative and use S100. We could possibly improve that
 	 * by examining the bus manager's speed map or similar. We
 	 * also reduce the packet size for broadcast to account for
 	 * the GASP header.
 	 */
 	if (unicast) {
 		speed = min(fc->fc_speed, destfw->sspd);
 		psize = min(512 << speed, 2 << destfw->sender_max_rec);
 	} else {
 		speed = 0;
 		psize = 512 - 2*sizeof(uint32_t);
 	}
 
 	/*
 	 * Next, we encapsulate, possibly fragmenting the original
 	 * datagram if it won't fit into a single packet.
 	 */
 	if (m->m_pkthdr.len <= psize - sizeof(uint32_t)) {
 		/*
 		 * No fragmentation is necessary.
 		 */
 		M_PREPEND(m, sizeof(uint32_t), M_NOWAIT);
 		if (!m) {
 			error = ENOBUFS;
 			goto bad;
 		}
 		enc = mtod(m, union fw_encap *);
 		enc->unfrag.ether_type = type;
 		enc->unfrag.lf = FW_ENCAP_UNFRAG;
 		enc->unfrag.reserved = 0;
 
 		/*
 		 * Byte swap the encapsulation header manually.
 		 */
 		enc->ul[0] = htonl(enc->ul[0]);
 
 		error = (ifp->if_transmit)(ifp, m);
 		return (error);
 	} else {
 		/*
 		 * Fragment the datagram, making sure to leave enough
 		 * space for the encapsulation header in each packet.
 		 */
 		fsize = psize - 2*sizeof(uint32_t);
 		dgl = next_dgl++;
 		dsize = m->m_pkthdr.len;
 		foff = 0;
 		while (m) {
 			if (m->m_pkthdr.len > fsize) {
 				/*
 				 * Split off the tail segment from the
 				 * datagram, copying our tags over.
 				 */
 				mtail = m_split(m, fsize, M_NOWAIT);
 				m_tag_copy_chain(mtail, m, M_NOWAIT);
 			} else {
 				mtail = 0;
 			}
 
 			/*
 			 * Add our encapsulation header to this
 			 * fragment and hand it off to the link.
 			 */
 			M_PREPEND(m, 2*sizeof(uint32_t), M_NOWAIT);
 			if (!m) {
 				error = ENOBUFS;
 				goto bad;
 			}
 			enc = mtod(m, union fw_encap *);
 			if (foff == 0) {
 				enc->firstfrag.lf = FW_ENCAP_FIRST;
 				enc->firstfrag.reserved1 = 0;
 				enc->firstfrag.reserved2 = 0;
 				enc->firstfrag.datagram_size = dsize - 1;
 				enc->firstfrag.ether_type = type;
 				enc->firstfrag.dgl = dgl;
 			} else {
 				if (mtail)
 					enc->nextfrag.lf = FW_ENCAP_NEXT;
 				else
 					enc->nextfrag.lf = FW_ENCAP_LAST;
 				enc->nextfrag.reserved1 = 0;
 				enc->nextfrag.reserved2 = 0;
 				enc->nextfrag.reserved3 = 0;
 				enc->nextfrag.datagram_size = dsize - 1;
 				enc->nextfrag.fragment_offset = foff;
 				enc->nextfrag.dgl = dgl;
 			}
 			foff += m->m_pkthdr.len - 2*sizeof(uint32_t);
 
 			/*
 			 * Byte swap the encapsulation header manually.
 			 */
 			enc->ul[0] = htonl(enc->ul[0]);
 			enc->ul[1] = htonl(enc->ul[1]);
 
 			error = (ifp->if_transmit)(ifp, m);
 			if (error) {
 				if (mtail)
 					m_freem(mtail);
 				return (ENOBUFS);
 			}
 
 			m = mtail;
 		}
 
 		return (0);
 	}
 
 bad:
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 static struct mbuf *
 firewire_input_fragment(struct fw_com *fc, struct mbuf *m, int src)
 {
 	union fw_encap *enc;
 	struct fw_reass *r;
 	struct mbuf *mf, *mprev;
 	int dsize;
 	int fstart, fend, start, end, islast;
 	uint32_t id;
 
 	/*
 	 * Find an existing reassembly buffer or create a new one.
 	 */
 	enc = mtod(m, union fw_encap *);
 	id = enc->firstfrag.dgl | (src << 16);
 	STAILQ_FOREACH(r, &fc->fc_frags, fr_link)
 		if (r->fr_id == id)
 			break;
 	if (!r) {
 		r = malloc(sizeof(struct fw_reass), M_TEMP, M_NOWAIT);
 		if (!r) {
 			m_freem(m);
 			return 0;
 		}
 		r->fr_id = id;
 		r->fr_frags = 0;
 		STAILQ_INSERT_HEAD(&fc->fc_frags, r, fr_link);
 	}
 
 	/*
 	 * If this fragment overlaps any other fragment, we must discard
 	 * the partial reassembly and start again.
 	 */
 	if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 		fstart = 0;
 	else
 		fstart = enc->nextfrag.fragment_offset;
 	fend = fstart + m->m_pkthdr.len - 2*sizeof(uint32_t);
 	dsize = enc->nextfrag.datagram_size;
 	islast = (enc->nextfrag.lf == FW_ENCAP_LAST);
 
 	for (mf = r->fr_frags; mf; mf = mf->m_nextpkt) {
 		enc = mtod(mf, union fw_encap *);
 		if (enc->nextfrag.datagram_size != dsize) {
 			/*
 			 * This fragment must be from a different
 			 * packet.
 			 */
 			goto bad;
 		}
 		if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 			start = 0;
 		else
 			start = enc->nextfrag.fragment_offset;
 		end = start + mf->m_pkthdr.len - 2*sizeof(uint32_t);
 		if ((fstart < end && fend > start) ||
 		    (islast && enc->nextfrag.lf == FW_ENCAP_LAST)) {
 			/*
 			 * Overlap - discard reassembly buffer and start
 			 * again with this fragment.
 			 */
 			goto bad;
 		}
 	}
 
 	/*
 	 * Find where to put this fragment in the list.
 	 */
 	for (mf = r->fr_frags, mprev = NULL; mf;
 	    mprev = mf, mf = mf->m_nextpkt) {
 		enc = mtod(mf, union fw_encap *);
 		if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 			start = 0;
 		else
 			start = enc->nextfrag.fragment_offset;
 		if (start >= fend)
 			break;
 	}
 
 	/*
 	 * If this is a last fragment and we are not adding at the end
 	 * of the list, discard the buffer.
 	 */
 	if (islast && mprev && mprev->m_nextpkt)
 		goto bad;
 
 	if (mprev) {
 		m->m_nextpkt = mprev->m_nextpkt;
 		mprev->m_nextpkt = m;
 
 		/*
 		 * Coalesce forwards and see if we can make a whole
 		 * datagram.
 		 */
 		enc = mtod(mprev, union fw_encap *);
 		if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 			start = 0;
 		else
 			start = enc->nextfrag.fragment_offset;
 		end = start + mprev->m_pkthdr.len - 2*sizeof(uint32_t);
 		while (end == fstart) {
 			/*
 			 * Strip off the encap header from m and
 			 * append it to mprev, freeing m.
 			 */
 			m_adj(m, 2*sizeof(uint32_t));
 			mprev->m_nextpkt = m->m_nextpkt;
 			mprev->m_pkthdr.len += m->m_pkthdr.len;
 			m_cat(mprev, m);
 
 			if (mprev->m_pkthdr.len == dsize + 1 + 2*sizeof(uint32_t)) {
 				/*
 				 * We have assembled a complete packet
 				 * we must be finished. Make sure we have
 				 * merged the whole chain.
 				 */
 				STAILQ_REMOVE(&fc->fc_frags, r, fw_reass, fr_link);
 				free(r, M_TEMP);
 				m = mprev->m_nextpkt;
 				while (m) {
 					mf = m->m_nextpkt;
 					m_freem(m);
 					m = mf;
 				}
 				mprev->m_nextpkt = NULL;
 
 				return (mprev);
 			}
 
 			/*
 			 * See if we can continue merging forwards.
 			 */
 			end = fend;
 			m = mprev->m_nextpkt;
 			if (m) {
 				enc = mtod(m, union fw_encap *);
 				if (enc->firstfrag.lf == FW_ENCAP_FIRST)
 					fstart = 0;
 				else
 					fstart = enc->nextfrag.fragment_offset;
 				fend = fstart + m->m_pkthdr.len
 				    - 2*sizeof(uint32_t);
 			} else {
 				break;
 			}
 		}
 	} else {
 		m->m_nextpkt = 0;
 		r->fr_frags = m;
 	}
 
 	return (0);
 
 bad:
 	while (r->fr_frags) {
 		mf = r->fr_frags;
 		r->fr_frags = mf->m_nextpkt;
 		m_freem(mf);
 	}
 	m->m_nextpkt = 0;
 	r->fr_frags = m;
 
 	return (0);
 }
 
 void
 firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	union fw_encap *enc;
 	int type, isr;
 
 	/*
 	 * The caller has already stripped off the packet header
 	 * (stream or wreqb) and marked the mbuf's M_BCAST flag
 	 * appropriately. We de-encapsulate the IP packet and pass it
 	 * up the line after handling link-level fragmentation.
 	 */
 	if (m->m_pkthdr.len < sizeof(uint32_t)) {
 		if_printf(ifp, "discarding frame without "
 		    "encapsulation header (len %u pkt len %u)\n",
 		    m->m_len, m->m_pkthdr.len);
 	}
 
 	m = m_pullup(m, sizeof(uint32_t));
 	if (m == NULL)
 		return;
 	enc = mtod(m, union fw_encap *);
 
 	/*
 	 * Byte swap the encapsulation header manually.
 	 */
 	enc->ul[0] = ntohl(enc->ul[0]);
 
 	if (enc->unfrag.lf != 0) {
 		m = m_pullup(m, 2*sizeof(uint32_t));
 		if (!m)
 			return;
 		enc = mtod(m, union fw_encap *);
 		enc->ul[1] = ntohl(enc->ul[1]);
 		m = firewire_input_fragment(fc, m, src);
 		if (!m)
 			return;
 		enc = mtod(m, union fw_encap *);
 		type = enc->firstfrag.ether_type;
 		m_adj(m, 2*sizeof(uint32_t));
 	} else {
 		type = enc->unfrag.ether_type;
 		m_adj(m, sizeof(uint32_t));
 	}
 
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 #ifdef DIAGNOSTIC
 	if (m->m_pkthdr.rcvif != ifp) {
 		if_printf(ifp, "Warning, frame marked as received on %s\n",
 			m->m_pkthdr.rcvif->if_xname);
 	}
 #endif
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Give bpf a chance at the packet. The link-level driver
 	 * should have left us a tag with the EUID of the sender.
 	 */
 	if (bpf_peers_present(ifp->if_bpf)) {
 		struct fw_bpfhdr h;
 		struct m_tag *mtag;
 
 		mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_SENDER_EUID, 0);
 		if (mtag)
 			bcopy(mtag + 1, h.firewire_shost, 8);
 		else
 			bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8);
 		bcopy(&fc->fc_hwaddr, h.firewire_dhost, 8);
 		h.firewire_type = htons(type);
 		bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m);
 	}
 
 	if (ifp->if_flags & IFF_MONITOR) {
 		/*
 		 * Interface marked for monitoring; discard packet.
 		 */
 		m_freem(m);
 		return;
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 
 	/* Discard packet if interface is not up */
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 
 	if (m->m_flags & (M_BCAST|M_MCAST))
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 
 	switch (type) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 
 		/*
 		 * Adjust the arp packet to insert an empty tha slot.
 		 */
 		m->m_len += ah->ar_hln;
 		m->m_pkthdr.len += ah->ar_hln;
 		bcopy(ar_tha(ah), ar_tpa(ah), ah->ar_pln);
 		isr = NETISR_ARP;
 		break;
 	}
 #endif
 
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 
 	default:
 		m_freem(m);
 		return;
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 }
 
 int
 firewire_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		{
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) & ifr->ifr_data;
 			bcopy(&IFP2FWC(ifp)->fc_hwaddr,
 			    (caddr_t) sa->sa_data, sizeof(struct fw_hwaddr));
 		}
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		if (ifr->ifr_mtu > 1500) {
 			error = EINVAL;
 		} else {
 			ifp->if_mtu = ifr->ifr_mtu;
 		}
 		break;
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
 	}
 	return (error);
 }
 
 static int
 firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
     struct sockaddr *sa)
 {
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed.
 		 */
 		*llsa = 0;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		*llsa = 0;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = 0;
 			return 0;
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		*llsa = 0;
 		return 0;
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return EAFNOSUPPORT;
 	}
 }
 
 void
 firewire_ifattach(struct ifnet *ifp, struct fw_hwaddr *llc)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	struct ifaddr *ifa;
 	struct sockaddr_dl *sdl;
 	static const char* speeds[] = {
 		"S100", "S200", "S400", "S800",
 		"S1600", "S3200"
 	};
 
 	fc->fc_speed = llc->sspd;
 	STAILQ_INIT(&fc->fc_frags);
 
 	ifp->if_addrlen = sizeof(struct fw_hwaddr);
 	ifp->if_hdrlen = 0;
 	if_attach(ifp);
 	ifp->if_mtu = 1500;	/* XXX */
 	ifp->if_output = firewire_output;
 	ifp->if_resolvemulti = firewire_resolvemulti;
 	ifp->if_broadcastaddr = (u_char *) &firewire_broadcastaddr;
 
 	ifa = ifp->if_addr;
 	KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 	sdl = (struct sockaddr_dl *)ifa->ifa_addr;
 	sdl->sdl_type = IFT_IEEE1394;
 	sdl->sdl_alen = ifp->if_addrlen;
 	bcopy(llc, LLADDR(sdl), ifp->if_addrlen);
 
 	bpfattach(ifp, DLT_APPLE_IP_OVER_IEEE1394,
 	    sizeof(struct fw_hwaddr));
 
 	if_printf(ifp, "Firewire address: %8D @ 0x%04x%08x, %s, maxrec %d\n",
 	    (uint8_t *) &llc->sender_unique_ID_hi, ":",
 	    ntohs(llc->sender_unicast_FIFO_hi),
 	    ntohl(llc->sender_unicast_FIFO_lo),
 	    speeds[llc->sspd],
 	    (2 << llc->sender_max_rec));
 }
 
 void
 firewire_ifdetach(struct ifnet *ifp)
 {
 	bpfdetach(ifp);
 	if_detach(ifp);
 }
 
 void
 firewire_busreset(struct ifnet *ifp)
 {
 	struct fw_com *fc = IFP2FWC(ifp);
 	struct fw_reass *r;
 	struct mbuf *m;
 
 	/*
 	 * Discard any partial datagrams since the host ids may have changed.
 	 */
 	while ((r = STAILQ_FIRST(&fc->fc_frags))) {
 		STAILQ_REMOVE_HEAD(&fc->fc_frags, fr_link);
 		while (r->fr_frags) {
 			m = r->fr_frags;
 			r->fr_frags = m->m_nextpkt;
 			m_freem(m);
 		}
 		free(r, M_TEMP);
 	}
 }
 
 static void *
 firewire_alloc(u_char type, struct ifnet *ifp)
 {
 	struct fw_com	*fc;
 
 	fc = malloc(sizeof(struct fw_com), M_FWCOM, M_WAITOK | M_ZERO);
 	fc->fc_ifp = ifp;
 
 	return (fc);
 }
 
 static void
 firewire_free(void *com, u_char type)
 {
 
 	free(com, M_FWCOM);
 }
 
 static int
 firewire_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		if_register_com_alloc(IFT_IEEE1394,
 		    firewire_alloc, firewire_free);
 		break;
 	case MOD_UNLOAD:
 		if_deregister_com_alloc(IFT_IEEE1394);
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 
 	return (0);
 }
 
 static moduledata_t firewire_mod = {
 	"if_firewire",
 	firewire_modevent,
 	0
 };
 
 DECLARE_MODULE(if_firewire, firewire_mod, SI_SUB_INIT_IF, SI_ORDER_ANY);
 MODULE_VERSION(if_firewire, 1);
Index: projects/clang380-import/sys/net/if_iso88025subr.c
===================================================================
--- projects/clang380-import/sys/net/if_iso88025subr.c	(revision 293686)
+++ projects/clang380-import/sys/net/if_iso88025subr.c	(revision 293687)
@@ -1,701 +1,697 @@
 /*-
  * Copyright (c) 1998, Larry Lile
  * All rights reserved.
  *
  * For latest sources and information on this driver, please
  * go to http://anarchy.stdio.com.
  *
  * Questions, comments or suggestions should be directed to
  * Larry Lile <lile@stdio.com>.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 /*
  *
  * General ISO 802.5 (Token Ring) support routines
  * 
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/socket.h>
 #include <sys/sockio.h> 
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_llc.h>
 #include <net/if_types.h>
 #include <net/if_llatbl.h>
 
 #include <net/ethernet.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/iso88025.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #endif
 #ifdef INET6
 #include <netinet6/nd6.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 static const u_char iso88025_broadcastaddr[ISO88025_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
 static int iso88025_resolvemulti (struct ifnet *, struct sockaddr **,
 				  struct sockaddr *);
 
 #define	senderr(e)	do { error = (e); goto bad; } while (0)
 
 /*
  * Perform common duties while attaching to interface list
  */
 void
 iso88025_ifattach(struct ifnet *ifp, const u_int8_t *lla, int bpf)
 {
     struct ifaddr *ifa;
     struct sockaddr_dl *sdl;
 
     ifa = NULL;
 
     ifp->if_type = IFT_ISO88025;
     ifp->if_addrlen = ISO88025_ADDR_LEN;
     ifp->if_hdrlen = ISO88025_HDR_LEN;
 
     if_attach(ifp);	/* Must be called before additional assignments */
 
     ifp->if_output = iso88025_output;
     ifp->if_input = iso88025_input;
     ifp->if_resolvemulti = iso88025_resolvemulti;
     ifp->if_broadcastaddr = iso88025_broadcastaddr;
 
     if (ifp->if_baudrate == 0)
         ifp->if_baudrate = TR_16MBPS; /* 16Mbit should be a safe default */
     if (ifp->if_mtu == 0)
         ifp->if_mtu = ISO88025_DEFAULT_MTU;
 
     ifa = ifp->if_addr;
     KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__));
 
     sdl = (struct sockaddr_dl *)ifa->ifa_addr;
     sdl->sdl_type = IFT_ISO88025;
     sdl->sdl_alen = ifp->if_addrlen;
     bcopy(lla, LLADDR(sdl), ifp->if_addrlen);
 
     if (bpf)
         bpfattach(ifp, DLT_IEEE802, ISO88025_HDR_LEN);
 
     return;
 }
 
 /*
  * Perform common duties while detaching a Token Ring interface
  */
 void
 iso88025_ifdetach(ifp, bpf)
         struct ifnet *ifp;
         int bpf;
 {
 
 	if (bpf)
                 bpfdetach(ifp);
 
 	if_detach(ifp);
 
 	return;
 }
 
 int
 iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
         struct ifaddr *ifa;
         struct ifreq *ifr;
         int error;
 
 	ifa = (struct ifaddr *) data;
 	ifr = (struct ifreq *) data;
 	error = 0;
 
         switch (command) {
         case SIOCSIFADDR:
                 ifp->if_flags |= IFF_UP;
 
                 switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
                 case AF_INET:
                         ifp->if_init(ifp->if_softc);    /* before arpwhohas */
                         arp_ifinit(ifp, ifa);
                         break;
 #endif	/* INET */
                 default:
                         ifp->if_init(ifp->if_softc);
                         break;
                 }
                 break;
 
         case SIOCGIFADDR: {
                         struct sockaddr *sa;
 
                         sa = (struct sockaddr *) & ifr->ifr_data;
                         bcopy(IF_LLADDR(ifp),
                               (caddr_t) sa->sa_data, ISO88025_ADDR_LEN);
                 }
                 break;
 
         case SIOCSIFMTU:
                 /*
                  * Set the interface MTU.
                  */
                 if (ifr->ifr_mtu > ISO88025_MAX_MTU) {
                         error = EINVAL;
                 } else {
                         ifp->if_mtu = ifr->ifr_mtu;
                 }
                 break;
 	default:
 		error = EINVAL;			/* XXX netbsd has ENOTTY??? */
 		break;
         }
 
         return (error);
 }
 
 /*
  * ISO88025 encapsulation
  */
 int
 iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	struct route *ro)
 {
 	u_int16_t snap_type = 0;
 	int loop_copy = 0, error = 0, rif_len = 0;
 	u_char edst[ISO88025_ADDR_LEN];
 	struct iso88025_header *th;
 	struct iso88025_header gen_th;
 	struct sockaddr_dl *sdl = NULL;
 	struct rtentry *rt0 = NULL;
 	int is_gw = 0;
 
-	if (ro != NULL) {
-		rt0 = ro->ro_rt;
-		if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0)
-			is_gw = 1;
-	}
-
+	if (ro != NULL)
+		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		senderr(error);
 #endif
 
 	if (ifp->if_flags & IFF_MONITOR)
 		senderr(ENETDOWN);
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		senderr(ENETDOWN);
 	getmicrotime(&ifp->if_lastchange);
 
 	/* Calculate routing info length based on arp table entry */
 	/* XXX any better way to do this ? */
 
 	if (rt0 && (sdl = (struct sockaddr_dl *)rt0->rt_gateway))
 		if (SDL_ISO88025(sdl)->trld_rcf != 0)
 			rif_len = TR_RCF_RIFLEN(SDL_ISO88025(sdl)->trld_rcf);
 
 	/* Generate a generic 802.5 header for the packet */
 	gen_th.ac = TR_AC;
 	gen_th.fc = TR_LLC_FRAME;
 	(void)memcpy((caddr_t)gen_th.iso88025_shost, IF_LLADDR(ifp),
 		     ISO88025_ADDR_LEN);
 	if (rif_len) {
 		gen_th.iso88025_shost[0] |= TR_RII;
 		if (rif_len > 2) {
 			gen_th.rcf = SDL_ISO88025(sdl)->trld_rcf;
 			(void)memcpy((caddr_t)gen_th.rd,
 				(caddr_t)SDL_ISO88025(sdl)->trld_route,
 				rif_len - 2);
 		}
 	}
 	
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		error = arpresolve(ifp, is_gw, m, dst, edst, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		snap_type = ETHERTYPE_IP;
 		break;
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_IEEE802);
 
 		loop_copy = -1; /* if this is for us, don't do it */
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			snap_type = ETHERTYPE_REVARP;
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			snap_type = ETHERTYPE_ARP;
 			break;
 		}
 
 		if (m->m_flags & M_BCAST)
 			bcopy(ifp->if_broadcastaddr, edst, ISO88025_ADDR_LEN);
 		else
 			bcopy(ar_tha(ah), edst, ISO88025_ADDR_LEN);
 
 	}
 	break;
 #endif	/* INET */
 #ifdef INET6
 	case AF_INET6:
 		error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		snap_type = ETHERTYPE_IPV6;
 		break;
 #endif	/* INET6 */
 	case AF_UNSPEC:
 	{
 		const struct iso88025_sockaddr_data *sd;
 		/*
 		 * For AF_UNSPEC sockaddr.sa_data must contain all of the
 		 * mac information needed to send the packet.  This allows
 		 * full mac, llc, and source routing function to be controlled.
 		 * llc and source routing information must already be in the
 		 * mbuf provided, ac/fc are set in sa_data.  sockaddr.sa_data
 		 * should be an iso88025_sockaddr_data structure see iso88025.h
 		 */
                 loop_copy = -1;
 		sd = (const struct iso88025_sockaddr_data *)dst->sa_data;
 		gen_th.ac = sd->ac;
 		gen_th.fc = sd->fc;
 		(void)memcpy(edst, sd->ether_dhost, ISO88025_ADDR_LEN);
 		(void)memcpy(gen_th.iso88025_shost, sd->ether_shost,
 		    ISO88025_ADDR_LEN);
 		rif_len = 0;
 		break;
 	}
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		senderr(EAFNOSUPPORT);
 		break;
 	}
 
 	/*
 	 * Add LLC header.
 	 */
 	if (snap_type != 0) {
         	struct llc *l;
 		M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT);
 		if (m == 0)
 			senderr(ENOBUFS);
 		l = mtod(m, struct llc *);
 		l->llc_control = LLC_UI;
 		l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP;
 		l->llc_snap.org_code[0] =
 			l->llc_snap.org_code[1] =
 			l->llc_snap.org_code[2] = 0;
 		l->llc_snap.ether_type = htons(snap_type);
 	}
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
 	M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_NOWAIT);
 	if (m == 0)
 		senderr(ENOBUFS);
 	th = mtod(m, struct iso88025_header *);
 	bcopy((caddr_t)edst, (caddr_t)&gen_th.iso88025_dhost, ISO88025_ADDR_LEN);
 
 	/* Copy as much of the generic header as is needed into the mbuf */
 	memcpy(th, &gen_th, ISO88025_HDR_LEN + rif_len);
 
         /*
          * If a simplex interface, and the packet is being sent to our
          * Ethernet address or a broadcast address, loopback a copy.
          * XXX To make a simplex device behave exactly like a duplex
          * device, we should copy in the case of sending to our own
          * ethernet address (thus letting the original actually appear
          * on the wire). However, we don't do that here for security
          * reasons and compatibility with the original behavior.
          */     
         if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) {
                 if ((m->m_flags & M_BCAST) || (loop_copy > 0)) { 
                         struct mbuf *n;
 			n = m_copy(m, 0, (int)M_COPYALL);
                         (void) if_simloop(ifp, n, dst->sa_family,
 					  ISO88025_HDR_LEN);
                 } else if (bcmp(th->iso88025_dhost, th->iso88025_shost,
 				 ETHER_ADDR_LEN) == 0) {
 			(void) if_simloop(ifp, m, dst->sa_family,
 					  ISO88025_HDR_LEN);
                        	return(0);      /* XXX */
 		}       
         }      
 
 	IFQ_HANDOFF_ADJ(ifp, m, ISO88025_HDR_LEN + LLC_SNAPFRAMELEN, error);
 	if (error) {
 		printf("iso88025_output: packet dropped QFULL.\n");
 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	}
 	return (error);
 
 bad:
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 	if (m)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * ISO 88025 de-encapsulation
  */
 void
 iso88025_input(ifp, m)
 	struct ifnet *ifp;
 	struct mbuf *m;
 {
 	struct iso88025_header *th;
 	struct llc *l;
 	int isr;
 	int mac_hdr_len;
 
 	/*
 	 * Do consistency checks to verify assumptions
 	 * made by code past this point.
 	 */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		if_printf(ifp, "discard frame w/o packet header\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		m_freem(m);
 		return;
 	}
 	if (m->m_pkthdr.rcvif == NULL) {
 		if_printf(ifp, "discard frame w/o interface pointer\n");
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
  		m_freem(m);
 		return;
 	}
 
 	m = m_pullup(m, ISO88025_HDR_LEN);
 	if (m == NULL) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	th = mtod(m, struct iso88025_header *);
 
 	/*
 	 * Discard packet if interface is not up.
 	 */
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING)))
 		goto dropanyway;
 
 	/*
 	 * Give bpf a chance at the packet.
 	 */
 	BPF_MTAP(ifp, m);
 
 	/*
 	 * Interface marked for monitoring; discard packet.
 	 */
 	if (ifp->if_flags & IFF_MONITOR) {
 		m_freem(m);
 		return;
 	}
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Update interface statistics.
 	 */
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	getmicrotime(&ifp->if_lastchange);
 
 	/*
 	 * Discard non local unicast packets when interface
 	 * is in promiscuous mode.
 	 */
 	if ((ifp->if_flags & IFF_PROMISC) &&
 	    ((th->iso88025_dhost[0] & 1) == 0) &&
 	     (bcmp(IF_LLADDR(ifp), (caddr_t) th->iso88025_dhost,
 	     ISO88025_ADDR_LEN) != 0))
 		goto dropanyway;
 
 	/*
 	 * Set mbuf flags for bcast/mcast.
 	 */
 	if (th->iso88025_dhost[0] & 1) {
 		if (bcmp(iso88025_broadcastaddr, th->iso88025_dhost,
 		    ISO88025_ADDR_LEN) == 0)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 	mac_hdr_len = ISO88025_HDR_LEN;
 	/* Check for source routing info */
 	if (th->iso88025_shost[0] & TR_RII)
 		mac_hdr_len += TR_RCF_RIFLEN(th->rcf);
 
 	/* Strip off ISO88025 header. */
 	m_adj(m, mac_hdr_len);
 
 	m = m_pullup(m, LLC_SNAPFRAMELEN);
 	if (m == 0) {
 		if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
 		goto dropanyway;
 	}
 	l = mtod(m, struct llc *);
 
 	switch (l->llc_dsap) {
 	case LLC_SNAP_LSAP: {
 		u_int16_t type;
 		if ((l->llc_control != LLC_UI) ||
 		    (l->llc_ssap != LLC_SNAP_LSAP)) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 
 		if (l->llc_snap.org_code[0] != 0 ||
 		    l->llc_snap.org_code[1] != 0 ||
 		    l->llc_snap.org_code[2] != 0) {
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 
 		type = ntohs(l->llc_snap.ether_type);
 		m_adj(m, LLC_SNAPFRAMELEN);
 		switch (type) {
 #ifdef INET
 		case ETHERTYPE_IP:
 			th->iso88025_shost[0] &= ~(TR_RII); 
 			isr = NETISR_IP;
 			break;
 
 		case ETHERTYPE_ARP:
 			if (ifp->if_flags & IFF_NOARP)
 				goto dropanyway;
 			isr = NETISR_ARP;
 			break;
 #endif	/* INET */
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			th->iso88025_shost[0] &= ~(TR_RII); 
 			isr = NETISR_IPV6;
 			break;
 #endif	/* INET6 */
 		default:
 			printf("iso88025_input: unexpected llc_snap ether_type  0x%02x\n", type);
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 		}
 		break;
 	}
 #ifdef ISO
 	case LLC_ISO_LSAP:
 		switch (l->llc_control) {
 		case LLC_UI:
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 			break;
                 case LLC_XID:
                 case LLC_XID_P:
 			if(m->m_len < ISO88025_ADDR_LEN)
 				goto dropanyway;
 			l->llc_window = 0;
 			l->llc_fid = 9;  
 			l->llc_class = 1;
 			l->llc_dsap = l->llc_ssap = 0;
 			/* Fall through to */  
 		case LLC_TEST:
 		case LLC_TEST_P:
 		{
 			struct sockaddr sa;
 			struct iso88025_sockaddr_data *th2;
 			int i;
 			u_char c;
 
 			c = l->llc_dsap;
 
 			if (th->iso88025_shost[0] & TR_RII) { /* XXX */
 				printf("iso88025_input: dropping source routed LLC_TEST\n");
 				goto dropanyway;
 			}
 			l->llc_dsap = l->llc_ssap;
 			l->llc_ssap = c;
 			if (m->m_flags & (M_BCAST | M_MCAST))
 				bcopy((caddr_t)IF_LLADDR(ifp),
 				      (caddr_t)th->iso88025_dhost,
 					ISO88025_ADDR_LEN);
 			sa.sa_family = AF_UNSPEC;
 			sa.sa_len = sizeof(sa);
 			th2 = (struct iso88025_sockaddr_data *)sa.sa_data;
 			for (i = 0; i < ISO88025_ADDR_LEN; i++) {
 				th2->ether_shost[i] = c = th->iso88025_dhost[i];
 				th2->ether_dhost[i] = th->iso88025_dhost[i] =
 					th->iso88025_shost[i];
 				th->iso88025_shost[i] = c;
 			}
 			th2->ac = TR_AC;
 			th2->fc = TR_LLC_FRAME;
 			ifp->if_output(ifp, m, &sa, NULL);
 			return;
 		}
 		default:
 			printf("iso88025_input: unexpected llc control 0x%02x\n", l->llc_control);
 			if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 			goto dropanyway;
 			break;
 		}
 		break;
 #endif	/* ISO */
 	default:
 		printf("iso88025_input: unknown dsap 0x%x\n", l->llc_dsap);
 		if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1);
 		goto dropanyway;
 		break;
 	}
 
 	M_SETFIB(m, ifp->if_fib);
 	netisr_dispatch(isr, m);
 	return;
 
 dropanyway:
 	if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	if (m)
 		m_freem(m);
 	return;
 }
 
 static int
 iso88025_resolvemulti (ifp, llsa, sa)
 	struct ifnet *ifp;
 	struct sockaddr **llsa;
 	struct sockaddr *sa;
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if ((e_addr[0] & 1) != 1) {
 			return (EADDRNOTAVAIL);
 		}
 		*llsa = 0;
 		return (0);
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			return (EADDRNOTAVAIL);
 		}
 		sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025);
 		sdl->sdl_alen = ISO88025_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			/*
 			 * An IP6 address of 0 means listen to all
 			 * of the Ethernet multicast address used for IP6.
 			 * (This is used for multicast routers.)
 			 */
 			ifp->if_flags |= IFF_ALLMULTI;
 			*llsa = 0;
 			return (0);
 		}
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 			return (EADDRNOTAVAIL);
 		}
 		sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025);
 		sdl->sdl_alen = ISO88025_ADDR_LEN;
 		e_addr = LLADDR(sdl);
 		ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return (0);
 #endif
 
 	default:
 		/*
 		 * Well, the text isn't quite right, but it's the name
 		 * that counts...
 		 */
 		return (EAFNOSUPPORT);
 	}
 
 	return (0);
 }
 
 static moduledata_t iso88025_mod = {
 	.name = "iso88025",
 };
 
 DECLARE_MODULE(iso88025, iso88025_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(iso88025, 1);
Index: projects/clang380-import/sys/net/if_loop.c
===================================================================
--- projects/clang380-import/sys/net/if_loop.c	(revision 293686)
+++ projects/clang380-import/sys/net/if_loop.c	(revision 293687)
@@ -1,450 +1,446 @@
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_loop.c	8.2 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 /*
  * Loopback interface driver for protocol testing and timing.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_clone.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/vnet.h>
 
 #ifdef	INET
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #endif
 
 #ifdef INET6
 #ifndef INET
 #include <netinet/in.h>
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #ifdef TINY_LOMTU
 #define	LOMTU	(1024+512)
 #elif defined(LARGE_LOMTU)
 #define LOMTU	131072
 #else
 #define LOMTU	16384
 #endif
 
 #define	LO_CSUM_FEATURES	(CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)
 #define	LO_CSUM_FEATURES6	(CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6)
 #define	LO_CSUM_SET		(CSUM_DATA_VALID | CSUM_DATA_VALID_IPV6 | \
 				    CSUM_PSEUDO_HDR | \
 				    CSUM_IP_CHECKED | CSUM_IP_VALID | \
 				    CSUM_SCTP_VALID)
 
 int		loioctl(struct ifnet *, u_long, caddr_t);
 int		looutput(struct ifnet *ifp, struct mbuf *m,
 		    const struct sockaddr *dst, struct route *ro);
 static int	lo_clone_create(struct if_clone *, int, caddr_t);
 static void	lo_clone_destroy(struct ifnet *);
 
 VNET_DEFINE(struct ifnet *, loif);	/* Used externally */
 
 #ifdef VIMAGE
 static VNET_DEFINE(struct if_clone *, lo_cloner);
 #define	V_lo_cloner		VNET(lo_cloner)
 #endif
 
 static struct if_clone *lo_cloner;
 static const char loname[] = "lo";
 
 static void
 lo_clone_destroy(struct ifnet *ifp)
 {
 
 #ifndef VIMAGE
 	/* XXX: destroying lo0 will lead to panics. */
 	KASSERT(V_loif != ifp, ("%s: destroying lo0", __func__));
 #endif
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 	if_free(ifp);
 }
 
 static int
 lo_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct ifnet *ifp;
 
 	ifp = if_alloc(IFT_LOOP);
 	if (ifp == NULL)
 		return (ENOSPC);
 
 	if_initname(ifp, loname, unit);
 	ifp->if_mtu = LOMTU;
 	ifp->if_flags = IFF_LOOPBACK | IFF_MULTICAST;
 	ifp->if_ioctl = loioctl;
 	ifp->if_output = looutput;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	ifp->if_capabilities = ifp->if_capenable =
 	    IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
 	ifp->if_hwassist = LO_CSUM_FEATURES | LO_CSUM_FEATURES6;
 	if_attach(ifp);
 	bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
 	if (V_loif == NULL)
 		V_loif = ifp;
 
 	return (0);
 }
 
 static void
 vnet_loif_init(const void *unused __unused)
 {
 
 #ifdef VIMAGE
 	lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
 	    1);
 	V_lo_cloner = lo_cloner;
 #else
 	lo_cloner = if_clone_simple(loname, lo_clone_create, lo_clone_destroy,
 	    1);
 #endif
 }
 VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_loif_init, NULL);
 
 #ifdef VIMAGE
 static void
 vnet_loif_uninit(const void *unused __unused)
 {
 
 	if_clone_detach(V_lo_cloner);
 	V_loif = NULL;
 }
 VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
     vnet_loif_uninit, NULL);
 #endif
 
 static int
 loop_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		break;
 
 	case MOD_UNLOAD:
 		printf("loop module unload - not possible for this module type\n");
 		return (EINVAL);
 
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (0);
 }
 
 static moduledata_t loop_mod = {
 	"if_lo",
 	loop_modevent,
 	0
 };
 
 DECLARE_MODULE(if_lo, loop_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
 
 int
 looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
     struct route *ro)
 {
 	u_int32_t af;
-	struct rtentry *rt = NULL;
 #ifdef MAC
 	int error;
 #endif
 
 	M_ASSERTPKTHDR(m); /* check if we have the packet header */
 
-	if (ro != NULL)
-		rt = ro->ro_rt;
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error) {
 		m_freem(m);
 		return (error);
 	}
 #endif
 
-	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+	if (ro != NULL && ro->ro_flags & (RT_REJECT|RT_BLACKHOLE)) {
 		m_freem(m);
-		return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
-		        rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+		return (ro->ro_flags & RT_BLACKHOLE ? 0 : EHOSTUNREACH);
 	}
 
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
 
 	/* BPF writes need to be handled specially. */
 	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
 		af = dst->sa_family;
 
 #if 1	/* XXX */
 	switch (af) {
 	case AF_INET:
 		if (ifp->if_capenable & IFCAP_RXCSUM) {
 			m->m_pkthdr.csum_data = 0xffff;
 			m->m_pkthdr.csum_flags = LO_CSUM_SET;
 		}
 		m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES;
 		break;
 	case AF_INET6:
 #if 0
 		/*
 		 * XXX-BZ for now always claim the checksum is good despite
 		 * any interface flags.   This is a workaround for 9.1-R and
 		 * a proper solution ought to be sought later.
 		 */
 		if (ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
 			m->m_pkthdr.csum_data = 0xffff;
 			m->m_pkthdr.csum_flags = LO_CSUM_SET;
 		}
 #else
 		m->m_pkthdr.csum_data = 0xffff;
 		m->m_pkthdr.csum_flags = LO_CSUM_SET;
 #endif
 		m->m_pkthdr.csum_flags &= ~LO_CSUM_FEATURES6;
 		break;
 	default:
 		printf("looutput: af=%d unexpected\n", af);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 #endif
 	return (if_simloop(ifp, m, af, 0));
 }
 
 /*
  * if_simloop()
  *
  * This function is to support software emulation of hardware loopback,
  * i.e., for interfaces with the IFF_SIMPLEX attribute. Since they can't
  * hear their own broadcasts, we create a copy of the packet that we
  * would normally receive via a hardware loopback.
  *
  * This function expects the packet to include the media header of length hlen.
  */
 int
 if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen)
 {
 	int isr;
 
 	M_ASSERTPKTHDR(m);
 	m_tag_delete_nonpersistent(m);
 	m->m_pkthdr.rcvif = ifp;
 
 #ifdef MAC
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 
 	/*
 	 * Let BPF see incoming packet in the following manner:
 	 *  - Emulated packet loopback for a simplex interface
 	 *    (net/if_ethersubr.c)
 	 *	-> passes it to ifp's BPF
 	 *  - IPv4/v6 multicast packet loopback (netinet(6)/ip(6)_output.c)
 	 *	-> not passes it to any BPF
 	 *  - Normal packet loopback from myself to myself (net/if_loop.c)
 	 *	-> passes to lo0's BPF (even in case of IPv6, where ifp!=lo0)
 	 */
 	if (hlen > 0) {
 		if (bpf_peers_present(ifp->if_bpf)) {
 			bpf_mtap(ifp->if_bpf, m);
 		}
 	} else {
 		if (bpf_peers_present(V_loif->if_bpf)) {
 			if ((m->m_flags & M_MCAST) == 0 || V_loif == ifp) {
 				/* XXX beware sizeof(af) != 4 */
 				u_int32_t af1 = af;
 
 				/*
 				 * We need to prepend the address family.
 				 */
 				bpf_mtap2(V_loif->if_bpf, &af1, sizeof(af1), m);
 			}
 		}
 	}
 
 	/* Strip away media header */
 	if (hlen > 0) {
 		m_adj(m, hlen);
 #ifndef __NO_STRICT_ALIGNMENT
 		/*
 		 * Some archs do not like unaligned data, so
 		 * we move data down in the first mbuf.
 		 */
 		if (mtod(m, vm_offset_t) & 3) {
 			KASSERT(hlen >= 3, ("if_simloop: hlen too small"));
 			bcopy(m->m_data,
 			    (char *)(mtod(m, vm_offset_t)
 				- (mtod(m, vm_offset_t) & 3)),
 			    m->m_len);
 			m->m_data -= (mtod(m,vm_offset_t) & 3);
 		}
 #endif
 	}
 
 	/* Deliver to upper layer protocol */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		m->m_flags |= M_LOOP;
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		printf("if_simloop: can't handle af=%d\n", af);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
 	netisr_queue(isr, m);	/* mbuf is free'd on failure. */
 	return (0);
 }
 
 /*
  * Process an ioctl request.
  */
 /* ARGSUSED */
 int
 loioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct ifreq *ifr = (struct ifreq *)data;
 	int error = 0, mask;
 
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		ifp->if_drv_flags |= IFF_DRV_RUNNING;
 		/*
 		 * Everything else is done at a higher level.
 		 */
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == 0) {
 			error = EAFNOSUPPORT;		/* XXX */
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
 
 #ifdef INET
 		case AF_INET:
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			break;
 #endif
 
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 
 	case SIOCSIFFLAGS:
 		break;
 
 	case SIOCSIFCAP:
 		mask = ifp->if_capenable ^ ifr->ifr_reqcap;
 		if ((mask & IFCAP_RXCSUM) != 0)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		if ((mask & IFCAP_TXCSUM) != 0)
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 		if ((mask & IFCAP_RXCSUM_IPV6) != 0) {
 #if 0
 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 #else
 			error = EOPNOTSUPP;
 			break;
 #endif
 		}
 		if ((mask & IFCAP_TXCSUM_IPV6) != 0) {
 #if 0
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 #else
 			error = EOPNOTSUPP;
 			break;
 #endif
 		}
 		ifp->if_hwassist = 0;
 		if (ifp->if_capenable & IFCAP_TXCSUM)
 			ifp->if_hwassist = LO_CSUM_FEATURES;
 #if 0
 		if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 			ifp->if_hwassist |= LO_CSUM_FEATURES6;
 #endif
 		break;
 
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
Index: projects/clang380-import/sys/net/radix_mpath.c
===================================================================
--- projects/clang380-import/sys/net/radix_mpath.c	(revision 293686)
+++ projects/clang380-import/sys/net/radix_mpath.c	(revision 293687)
@@ -1,294 +1,313 @@
 /*	$KAME: radix_mpath.c,v 1.17 2004/11/08 10:29:39 itojun Exp $	*/
 
 /*
  * Copyright (C) 2001 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
  * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
  * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
  * PROPERTIES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/domain.h>
 #include <sys/syslog.h>
 #include <net/radix.h>
 #include <net/radix_mpath.h>
 #include <net/route.h>
 #include <net/if.h>
 #include <net/if_var.h>
 
 /*
  * give some jitter to hash, to avoid synchronization between routers
  */
 static uint32_t hashjitter;
 
 int
 rn_mpath_capable(struct radix_node_head *rnh)
 {
 
 	return rnh->rnh_multipath;
 }
 
 struct radix_node *
 rn_mpath_next(struct radix_node *rn)
 {
 	struct radix_node *next;
 
 	if (!rn->rn_dupedkey)
 		return NULL;
 	next = rn->rn_dupedkey;
 	if (rn->rn_mask == next->rn_mask)
 		return next;
 	else
 		return NULL;
 }
 
 uint32_t
 rn_mpath_count(struct radix_node *rn)
 {
 	uint32_t i = 0;
 	struct rtentry *rt;
 	
 	while (rn != NULL) {
 		rt = (struct rtentry *)rn;
 		i += rt->rt_weight;
 		rn = rn_mpath_next(rn);
 	}
 	return (i);
 }
 
 struct rtentry *
 rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
 {
 	struct radix_node *rn;
 
 	if (!gate || !rt->rt_gateway)
 		return NULL;
 
 	/* beyond here, we use rn as the master copy */
 	rn = (struct radix_node *)rt;
 	do {
 		rt = (struct rtentry *)rn;
 		/*
 		 * we are removing an address alias that has 
 		 * the same prefix as another address
 		 * we need to compare the interface address because
 		 * rt_gateway is a special sockadd_dl structure
 		 */
 		if (rt->rt_gateway->sa_family == AF_LINK) {
 			if (!memcmp(rt->rt_ifa->ifa_addr, gate, gate->sa_len))
 				break;
 		}
 
 		/*
 		 * Check for other options:
 		 * 1) Routes with 'real' IPv4/IPv6 gateway
 		 * 2) Loopback host routes (another AF_LINK/sockadd_dl check)
 		 * */
 		if (rt->rt_gateway->sa_len == gate->sa_len &&
 		    !memcmp(rt->rt_gateway, gate, gate->sa_len))
 			break;
 	} while ((rn = rn_mpath_next(rn)) != NULL);
 
 	return (struct rtentry *)rn;
 }
 
 /* 
  * go through the chain and unlink "rt" from the list
  * the caller will free "rt"
  */
 int
 rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt)
 {
         struct radix_node *t, *tt;
 
         if (!headrt || !rt)
             return (0);
         t = (struct radix_node *)headrt;
         tt = rn_mpath_next(t);
         while (tt) {
             if (tt == (struct radix_node *)rt) {
                 t->rn_dupedkey = tt->rn_dupedkey;
                 tt->rn_dupedkey = NULL;
     	        tt->rn_flags &= ~RNF_ACTIVE;
 	        tt[1].rn_flags &= ~RNF_ACTIVE;
                 return (1);
             }
             t = tt;
             tt = rn_mpath_next((struct radix_node *)t);
         }
         return (0);
 }
 
 /*
  * check if we have the same key/mask/gateway on the table already.
  * Assume @rt rt_key host bits are cleared according to @netmask
  */
 int
 rt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
     struct sockaddr *netmask)
 {
 	struct radix_node *rn, *rn1;
 	struct rtentry *rt1;
 
 	rn = (struct radix_node *)rt;
 	rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
 	if (!rn1 || rn1->rn_flags & RNF_ROOT)
 		return (0);
 
 	/* key/mask are the same. compare gateway for all multipaths */
 	do {
 		rt1 = (struct rtentry *)rn1;
 
 		/* sanity: no use in comparing the same thing */
 		if (rn1 == rn)
 			continue;
         
 		if (rt1->rt_gateway->sa_family == AF_LINK) {
 			if (rt1->rt_ifa->ifa_addr->sa_len != rt->rt_ifa->ifa_addr->sa_len ||
 			    bcmp(rt1->rt_ifa->ifa_addr, rt->rt_ifa->ifa_addr, 
 			    rt1->rt_ifa->ifa_addr->sa_len))
 				continue;
 		} else {
 			if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len ||
 			    bcmp(rt1->rt_gateway, rt->rt_gateway,
 			    rt1->rt_gateway->sa_len))
 				continue;
 		}
 
 		/* all key/mask/gateway are the same.  conflicting entry. */
 		return (EEXIST);
 	} while ((rn1 = rn_mpath_next(rn1)) != NULL);
 
 	return (0);
 }
 
-void
-rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
+static struct rtentry *
+rt_mpath_selectrte(struct rtentry *rte, uint32_t hash)
 {
 	struct radix_node *rn0, *rn;
 	u_int32_t n;
 	struct rtentry *rt;
 	int64_t weight;
 
+	/* beyond here, we use rn as the master copy */
+	rn0 = rn = (struct radix_node *)rte;
+	n = rn_mpath_count(rn0);
+
+	/* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
+	hash += hashjitter;
+	hash %= n;
+	for (weight = abs((int32_t)hash), rt = rte;
+	     weight >= rt->rt_weight && rn; 
+	     weight -= rt->rt_weight) {
+		
+		/* stay within the multipath routes */
+		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
+			break;
+		rn = rn->rn_dupedkey;
+		rt = (struct rtentry *)rn;
+	}
+
+	return (rt);
+}
+
+struct rtentry *
+rt_mpath_select(struct rtentry *rte, uint32_t hash)
+{
+	if (rn_mpath_next((struct radix_node *)rte) == NULL)
+		return (rte);
+
+	return (rt_mpath_selectrte(rte, hash));
+}
+
+void
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
+{
+	struct rtentry *rt;
+
 	/*
 	 * XXX we don't attempt to lookup cached route again; what should
 	 * be done for sendto(3) case?
 	 */
 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)
 	    && RT_LINK_IS_UP(ro->ro_rt->rt_ifp))
 		return;				 
 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum);
 
 	/* if the route does not exist or it is not multipath, don't care */
 	if (ro->ro_rt == NULL)
 		return;
 	if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) {
 		RT_UNLOCK(ro->ro_rt);
 		return;
 	}
 
-	/* beyond here, we use rn as the master copy */
-	rn0 = rn = (struct radix_node *)ro->ro_rt;
-	n = rn_mpath_count(rn0);
-
-	/* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
-	hash += hashjitter;
-	hash %= n;
-	for (weight = abs((int32_t)hash), rt = ro->ro_rt;
-	     weight >= rt->rt_weight && rn; 
-	     weight -= rt->rt_weight) {
-		
-		/* stay within the multipath routes */
-		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
-			break;
-		rn = rn->rn_dupedkey;
-		rt = (struct rtentry *)rn;
-	}
+	rt = rt_mpath_selectrte(ro->ro_rt, hash);
 	/* XXX try filling rt_gwroute and avoid unreachable gw  */
 
 	/* gw selection has failed - there must be only zero weight routes */
-	if (!rn) {
+	if (!rt) {
 		RT_UNLOCK(ro->ro_rt);
 		ro->ro_rt = NULL;
 		return;
 	}
 	if (ro->ro_rt != rt) {
 		RTFREE_LOCKED(ro->ro_rt);
-		ro->ro_rt = (struct rtentry *)rn;
+		ro->ro_rt = rt;
 		RT_LOCK(ro->ro_rt);
 		RT_ADDREF(ro->ro_rt);
 
 	} 
 	RT_UNLOCK(ro->ro_rt);
 }
 
 extern int	in6_inithead(void **head, int off);
 extern int	in_inithead(void **head, int off);
 
 #ifdef INET
 int
 rn4_mpath_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
 	hashjitter = arc4random();
 	if (in_inithead(head, off) == 1) {
 		rnh = (struct radix_node_head *)*head;
 		rnh->rnh_multipath = 1;
 		return 1;
 	} else
 		return 0;
 }
 #endif
 
 #ifdef INET6
 int
 rn6_mpath_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
 	hashjitter = arc4random();
 	if (in6_inithead(head, off) == 1) {
 		rnh = (struct radix_node_head *)*head;
 		rnh->rnh_multipath = 1;
 		return 1;
 	} else
 		return 0;
 }
 
 #endif
Index: projects/clang380-import/sys/net/radix_mpath.h
===================================================================
--- projects/clang380-import/sys/net/radix_mpath.h	(revision 293686)
+++ projects/clang380-import/sys/net/radix_mpath.h	(revision 293687)
@@ -1,63 +1,64 @@
 /*	$KAME: radix_mpath.h,v 1.10 2004/11/06 15:44:28 itojun Exp $	*/
 
 /*
  * Copyright (C) 2001 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
  * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
  * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
  * PROPERTIES.
  */
 /* $FreeBSD$ */
 
 #ifndef _NET_RADIX_MPATH_H_
 #define	_NET_RADIX_MPATH_H_
 
 #ifdef _KERNEL
 /*
  * Radix tree API with multipath support
  */
 struct route;
 struct rtentry;
 struct sockaddr;
 int	rn_mpath_capable(struct radix_node_head *);
 struct radix_node *rn_mpath_next(struct radix_node *);
 u_int32_t rn_mpath_count(struct radix_node *);
 struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *);
 int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
     struct sockaddr *);
 void rtalloc_mpath_fib(struct route *, u_int32_t, u_int);
 #define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0)
+struct rtentry *rt_mpath_select(struct rtentry *, uint32_t);
 struct radix_node *rn_mpath_lookup(void *, void *,
     struct radix_node_head *);
 int rt_mpath_deldup(struct rtentry *, struct rtentry *);
 int	rn4_mpath_inithead(void **, int);
 int	rn6_mpath_inithead(void **, int);
 
 #endif
 
 #endif /* _NET_RADIX_MPATH_H_ */
Index: projects/clang380-import/sys/net/route.c
===================================================================
--- projects/clang380-import/sys/net/route.c	(revision 293686)
+++ projects/clang380-import/sys/net/route.c	(revision 293687)
@@ -1,2323 +1,2312 @@
 /*-
  * Copyright (c) 1980, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)route.c	8.3.1.1 (Berkeley) 2/23/95
  * $FreeBSD$
  */
 /************************************************************************
  * Note: In this file a 'fib' is a "forwarding information base"	*
  * Which is the new name for an in kernel routing (next hop) table.	*
  ***********************************************************************/
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_route.h"
 #include "opt_sctp.h"
 #include "opt_mrouting.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/sysproto.h>
 #include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/vnet.h>
 #include <net/flowtable.h>
 
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/ip_mroute.h>
 
 #include <vm/uma.h>
 
 #define	RT_MAXFIBS	UINT16_MAX
 
 /* Kernel config default option. */
 #ifdef ROUTETABLES
 #if ROUTETABLES <= 0
 #error "ROUTETABLES defined too low"
 #endif
 #if ROUTETABLES > RT_MAXFIBS
 #error "ROUTETABLES defined too big"
 #endif
 #define	RT_NUMFIBS	ROUTETABLES
 #endif /* ROUTETABLES */
 /* Initialize to default if not otherwise set. */
 #ifndef	RT_NUMFIBS
 #define	RT_NUMFIBS	1
 #endif
 
 #if defined(INET) || defined(INET6)
 #ifdef SCTP
 extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
 #endif /* SCTP */
 #endif
 
 
 /* This is read-only.. */
 u_int rt_numfibs = RT_NUMFIBS;
 SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, "");
 
 /*
  * By default add routes to all fibs for new interfaces.
  * Once this is set to 0 then only allocate routes on interface
  * changes for the FIB of the caller when adding a new set of addresses
  * to an interface.  XXX this is a shotgun aproach to a problem that needs
  * a more fine grained solution.. that will come.
  * XXX also has the problems getting the FIB from curthread which will not
  * always work given the fib can be overridden and prefixes can be added
  * from the network stack context.
  */
 VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1;
 SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET,
     &VNET_NAME(rt_add_addr_allfibs), 0, "");
 
 VNET_DEFINE(struct rtstat, rtstat);
 #define	V_rtstat	VNET(rtstat)
 
 VNET_DEFINE(struct radix_node_head *, rt_tables);
 #define	V_rt_tables	VNET(rt_tables)
 
 VNET_DEFINE(int, rttrash);		/* routes not in table but not freed */
 #define	V_rttrash	VNET(rttrash)
 
 
 /*
  * Convert a 'struct radix_node *' to a 'struct rtentry *'.
  * The operation can be done safely (in this code) because a
  * 'struct rtentry' starts with two 'struct radix_node''s, the first
  * one representing leaf nodes in the routing tree, which is
  * what the code in radix.c passes us as a 'struct radix_node'.
  *
  * But because there are a lot of assumptions in this conversion,
  * do not cast explicitly, but always use the macro below.
  */
 #define RNTORT(p)	((struct rtentry *)(p))
 
 static VNET_DEFINE(uma_zone_t, rtzone);		/* Routing table UMA zone. */
 #define	V_rtzone	VNET(rtzone)
 
 static int rtrequest1_fib_change(struct radix_node_head *, struct rt_addrinfo *,
     struct rtentry **, u_int);
 static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *);
 static int rt_ifdelroute(const struct rtentry *rt, void *arg);
 static struct rtentry *rt_unlinkrte(struct radix_node_head *rnh,
     struct rt_addrinfo *info, int *perror);
 static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info);
 #ifdef RADIX_MPATH
 static struct radix_node *rt_mpath_unlink(struct radix_node_head *rnh,
     struct rt_addrinfo *info, struct rtentry *rto, int *perror);
 #endif
 static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info,
     int flags);
 
 struct if_mtuinfo
 {
 	struct ifnet	*ifp;
 	int		mtu;
 };
 
 static int	if_updatemtu_cb(struct radix_node *, void *);
 
 /*
  * handler for net.my_fibnum
  */
 static int
 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
 {
         int fibnum;
         int error;
  
         fibnum = curthread->td_proc->p_fibnum;
         error = sysctl_handle_int(oidp, &fibnum, 0, req);
         return (error);
 }
 
 SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
             NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
 
 static __inline struct radix_node_head **
 rt_tables_get_rnh_ptr(int table, int fam)
 {
 	struct radix_node_head **rnh;
 
 	KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.",
 	    __func__));
 	KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.",
 	    __func__));
 
 	/* rnh is [fib=0][af=0]. */
 	rnh = (struct radix_node_head **)V_rt_tables;
 	/* Get the offset to the requested table and fam. */
 	rnh += table * (AF_MAX+1) + fam;
 
 	return (rnh);
 }
 
 struct radix_node_head *
 rt_tables_get_rnh(int table, int fam)
 {
 
 	return (*rt_tables_get_rnh_ptr(table, fam));
 }
 
 /*
  * route initialization must occur before ip6_init2(), which happenas at
  * SI_ORDER_MIDDLE.
  */
 static void
 route_init(void)
 {
 
 	/* whack the tunable ints into  line. */
 	if (rt_numfibs > RT_MAXFIBS)
 		rt_numfibs = RT_MAXFIBS;
 	if (rt_numfibs == 0)
 		rt_numfibs = 1;
 }
 SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
 
 static int
 rtentry_zinit(void *mem, int size, int how)
 {
 	struct rtentry *rt = mem;
 
 	rt->rt_pksent = counter_u64_alloc(how);
 	if (rt->rt_pksent == NULL)
 		return (ENOMEM);
 
 	RT_LOCK_INIT(rt);
 
 	return (0);
 }
 
 static void
 rtentry_zfini(void *mem, int size)
 {
 	struct rtentry *rt = mem;
 
 	RT_LOCK_DESTROY(rt);
 	counter_u64_free(rt->rt_pksent);
 }
 
 static int
 rtentry_ctor(void *mem, int size, void *arg, int how)
 {
 	struct rtentry *rt = mem;
 
 	bzero(rt, offsetof(struct rtentry, rt_endzero));
 	counter_u64_zero(rt->rt_pksent);
 	rt->rt_chain = NULL;
 
 	return (0);
 }
 
 static void
 rtentry_dtor(void *mem, int size, void *arg)
 {
 	struct rtentry *rt = mem;
 
 	RT_UNLOCK_COND(rt);
 }
 
 static void
 vnet_route_init(const void *unused __unused)
 {
 	struct domain *dom;
 	struct radix_node_head **rnh;
 	int table;
 	int fam;
 
 	V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
 	    sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO);
 
 	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
 	    rtentry_ctor, rtentry_dtor,
 	    rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0);
 	for (dom = domains; dom; dom = dom->dom_next) {
 		if (dom->dom_rtattach == NULL)
 			continue;
 
 		for  (table = 0; table < rt_numfibs; table++) {
 			fam = dom->dom_family;
 			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
 				break;
 
 			rnh = rt_tables_get_rnh_ptr(table, fam);
 			if (rnh == NULL)
 				panic("%s: rnh NULL", __func__);
 			dom->dom_rtattach((void **)rnh, 0);
 		}
 	}
 }
 VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
     vnet_route_init, 0);
 
 #ifdef VIMAGE
 static void
 vnet_route_uninit(const void *unused __unused)
 {
 	int table;
 	int fam;
 	struct domain *dom;
 	struct radix_node_head **rnh;
 
 	for (dom = domains; dom; dom = dom->dom_next) {
 		if (dom->dom_rtdetach == NULL)
 			continue;
 
 		for (table = 0; table < rt_numfibs; table++) {
 			fam = dom->dom_family;
 
 			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
 				break;
 
 			rnh = rt_tables_get_rnh_ptr(table, fam);
 			if (rnh == NULL)
 				panic("%s: rnh NULL", __func__);
 			dom->dom_rtdetach((void **)rnh, 0);
 		}
 	}
 
 	free(V_rt_tables, M_RTABLE);
 	uma_zdestroy(V_rtzone);
 }
 VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
     vnet_route_uninit, 0);
 #endif
 
 #ifndef _SYS_SYSPROTO_H_
 struct setfib_args {
 	int     fibnum;
 };
 #endif
 int
 sys_setfib(struct thread *td, struct setfib_args *uap)
 {
 	if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
 		return EINVAL;
 	td->td_proc->p_fibnum = uap->fibnum;
 	return (0);
 }
 
 /*
  * Packet routing routines.
  */
 void
 rtalloc(struct route *ro)
 {
 
 	rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB);
 }
 
 void
 rtalloc_fib(struct route *ro, u_int fibnum)
 {
 	rtalloc_ign_fib(ro, 0UL, fibnum);
 }
 
 void
 rtalloc_ign(struct route *ro, u_long ignore)
 {
 	struct rtentry *rt;
 
 	if ((rt = ro->ro_rt) != NULL) {
 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
 			return;
 		RTFREE(rt);
 		ro->ro_rt = NULL;
 	}
 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB);
 	if (ro->ro_rt)
 		RT_UNLOCK(ro->ro_rt);
 }
 
 void
 rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
 {
 	struct rtentry *rt;
 
 	if ((rt = ro->ro_rt) != NULL) {
 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
 			return;
 		RTFREE(rt);
 		ro->ro_rt = NULL;
 	}
 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
 	if (ro->ro_rt)
 		RT_UNLOCK(ro->ro_rt);
 }
 
 /*
  * Look up the route that matches the address given
  * Or, at least try.. Create a cloned route if needed.
  *
  * The returned route, if any, is locked.
  */
 struct rtentry *
 rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
 {
 
 	return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB));
 }
 
 struct rtentry *
 rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
 		    u_int fibnum)
 {
 	struct radix_node_head *rnh;
 	struct radix_node *rn;
 	struct rtentry *newrt;
 	struct rt_addrinfo info;
 	int err = 0, msgtype = RTM_MISS;
 	int needlock;
 
 	KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
 	newrt = NULL;
 	if (rnh == NULL)
 		goto miss;
 
 	/*
 	 * Look up the address in the table for that Address Family
 	 */
 	needlock = !(ignflags & RTF_RNH_LOCKED);
 	if (needlock)
 		RADIX_NODE_HEAD_RLOCK(rnh);
 #ifdef INVARIANTS	
 	else
 		RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
 #endif
 	rn = rnh->rnh_matchaddr(dst, rnh);
 	if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		newrt = RNTORT(rn);
 		RT_LOCK(newrt);
 		RT_ADDREF(newrt);
 		if (needlock)
 			RADIX_NODE_HEAD_RUNLOCK(rnh);
 		goto done;
 
 	} else if (needlock)
 		RADIX_NODE_HEAD_RUNLOCK(rnh);
 	
 	/*
 	 * Either we hit the root or couldn't find any match,
 	 * Which basically means
 	 * "caint get there frm here"
 	 */
 miss:
 	V_rtstat.rts_unreach++;
 
 	if (report) {
 		/*
 		 * If required, report the failure to the supervising
 		 * Authorities.
 		 * For a delete, this is not an error. (report == 0)
 		 */
 		bzero(&info, sizeof(info));
 		info.rti_info[RTAX_DST] = dst;
 		rt_missmsg_fib(msgtype, &info, 0, err, fibnum);
 	}	
 done:
 	if (newrt)
 		RT_LOCK_ASSERT(newrt);
 	return (newrt);
 }
 
 /*
  * Remove a reference count from an rtentry.
  * If the count gets low enough, take it out of the routing table
  */
 void
 rtfree(struct rtentry *rt)
 {
 	struct radix_node_head *rnh;
 
 	KASSERT(rt != NULL,("%s: NULL rt", __func__));
 	rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
 	KASSERT(rnh != NULL,("%s: NULL rnh", __func__));
 
 	RT_LOCK_ASSERT(rt);
 
 	/*
 	 * The callers should use RTFREE_LOCKED() or RTFREE(), so
 	 * we should come here exactly with the last reference.
 	 */
 	RT_REMREF(rt);
 	if (rt->rt_refcnt > 0) {
 		log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt);
 		goto done;
 	}
 
 	/*
 	 * On last reference give the "close method" a chance
 	 * to cleanup private state.  This also permits (for
 	 * IPv4 and IPv6) a chance to decide if the routing table
 	 * entry should be purged immediately or at a later time.
 	 * When an immediate purge is to happen the close routine
 	 * typically calls rtexpunge which clears the RTF_UP flag
 	 * on the entry so that the code below reclaims the storage.
 	 */
 	if (rt->rt_refcnt == 0 && rnh->rnh_close)
 		rnh->rnh_close((struct radix_node *)rt, rnh);
 
 	/*
 	 * If we are no longer "up" (and ref == 0)
 	 * then we can free the resources associated
 	 * with the route.
 	 */
 	if ((rt->rt_flags & RTF_UP) == 0) {
 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 			panic("rtfree 2");
 		/*
 		 * the rtentry must have been removed from the routing table
 		 * so it is represented in rttrash.. remove that now.
 		 */
 		V_rttrash--;
 #ifdef	DIAGNOSTIC
 		if (rt->rt_refcnt < 0) {
 			printf("rtfree: %p not freed (neg refs)\n", rt);
 			goto done;
 		}
 #endif
 		/*
 		 * release references on items we hold them on..
 		 * e.g other routes and ifaddrs.
 		 */
 		if (rt->rt_ifa)
 			ifa_free(rt->rt_ifa);
 		/*
 		 * The key is separatly alloc'd so free it (see rt_setgate()).
 		 * This also frees the gateway, as they are always malloc'd
 		 * together.
 		 */
 		R_Free(rt_key(rt));
 
 		/*
 		 * and the rtentry itself of course
 		 */
 		uma_zfree(V_rtzone, rt);
 		return;
 	}
 done:
 	RT_UNLOCK(rt);
 }
 
 
 /*
  * Force a routing table entry to the specified
  * destination to go through the given gateway.
  * Normally called as a result of a routing redirect
  * message from the network layer.
  */
 void
 rtredirect(struct sockaddr *dst,
 	struct sockaddr *gateway,
 	struct sockaddr *netmask,
 	int flags,
 	struct sockaddr *src)
 {
 
 	rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB);
 }
 
 void
 rtredirect_fib(struct sockaddr *dst,
 	struct sockaddr *gateway,
 	struct sockaddr *netmask,
 	int flags,
 	struct sockaddr *src,
 	u_int fibnum)
 {
-	struct rtentry *rt, *rt0 = NULL;
+	struct rtentry *rt;
 	int error = 0;
 	short *stat = NULL;
 	struct rt_addrinfo info;
 	struct ifaddr *ifa;
 	struct radix_node_head *rnh;
 
 	ifa = NULL;
 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
 	if (rnh == NULL) {
 		error = EAFNOSUPPORT;
 		goto out;
 	}
 
 	/* verify the gateway is directly reachable */
 	if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) {
 		error = ENETUNREACH;
 		goto out;
 	}
 	rt = rtalloc1_fib(dst, 0, 0UL, fibnum);	/* NB: rt is locked */
 	/*
 	 * If the redirect isn't from our current router for this dst,
 	 * it's either old or wrong.  If it redirects us to ourselves,
 	 * we have a routing loop, perhaps as a result of an interface
 	 * going down recently.
 	 */
 	if (!(flags & RTF_DONE) && rt) {
 		if (!sa_equal(src, rt->rt_gateway)) {
 			error = EINVAL;
 			goto done;
 		}
 		if (rt->rt_ifa != ifa && ifa->ifa_addr->sa_family != AF_LINK) {
 			error = EINVAL;
 			goto done;
 		}
 	}
 	if ((flags & RTF_GATEWAY) && ifa_ifwithaddr_check(gateway)) {
 		error = EHOSTUNREACH;
 		goto done;
 	}
 	/*
 	 * Create a new entry if we just got back a wildcard entry
 	 * or the lookup failed.  This is necessary for hosts
 	 * which use routing redirects generated by smart gateways
 	 * to dynamically build the routing tables.
 	 */
 	if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
 		goto create;
 	/*
 	 * Don't listen to the redirect if it's
 	 * for a route to an interface.
 	 */
 	if (rt->rt_flags & RTF_GATEWAY) {
 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
 			/*
 			 * Changing from route to net => route to host.
 			 * Create new route, rather than smashing route to net.
 			 */
 		create:
-			rt0 = rt;
+			RTFREE(rt);
 			rt = NULL;
 		
 			flags |= RTF_DYNAMIC;
 			bzero((caddr_t)&info, sizeof(info));
 			info.rti_info[RTAX_DST] = dst;
 			info.rti_info[RTAX_GATEWAY] = gateway;
 			info.rti_info[RTAX_NETMASK] = netmask;
 			info.rti_ifa = ifa;
 			info.rti_flags = flags;
-			if (rt0 != NULL)
-				RT_UNLOCK(rt0);	/* drop lock to avoid LOR with RNH */
 			error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
 			if (rt != NULL) {
 				RT_LOCK(rt);
-				if (rt0 != NULL)
-					EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
 				flags = rt->rt_flags;
 			}
-			if (rt0 != NULL)
-				RTFREE(rt0);
 			
 			stat = &V_rtstat.rts_dynamic;
 		} else {
-			struct rtentry *gwrt;
 
 			/*
 			 * Smash the current notion of the gateway to
 			 * this destination.  Should check about netmask!!!
 			 */
 			if ((flags & RTF_GATEWAY) == 0)
 				rt->rt_flags &= ~RTF_GATEWAY;
 			rt->rt_flags |= RTF_MODIFIED;
 			flags |= RTF_MODIFIED;
 			stat = &V_rtstat.rts_newgateway;
 			/*
 			 * add the key and gateway (in one malloc'd chunk).
 			 */
 			RT_UNLOCK(rt);
 			RADIX_NODE_HEAD_LOCK(rnh);
 			RT_LOCK(rt);
 			rt_setgate(rt, rt_key(rt), gateway);
-			gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED);
 			RADIX_NODE_HEAD_UNLOCK(rnh);
-			EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
-			if (gwrt)
-				RTFREE_LOCKED(gwrt);
 		}
 	} else
 		error = EHOSTUNREACH;
 done:
 	if (rt)
 		RTFREE_LOCKED(rt);
 out:
 	if (error)
 		V_rtstat.rts_badredirect++;
 	else if (stat != NULL)
 		(*stat)++;
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_info[RTAX_DST] = dst;
 	info.rti_info[RTAX_GATEWAY] = gateway;
 	info.rti_info[RTAX_NETMASK] = netmask;
 	info.rti_info[RTAX_AUTHOR] = src;
 	rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum);
 	if (ifa != NULL)
 		ifa_free(ifa);
 }
 
 int
 rtioctl(u_long req, caddr_t data)
 {
 
 	return (rtioctl_fib(req, data, RT_DEFAULT_FIB));
 }
 
 /*
  * Routing table ioctl interface.
  */
 int
 rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
 {
 
 	/*
 	 * If more ioctl commands are added here, make sure the proper
 	 * super-user checks are being performed because it is possible for
 	 * prison-root to make it this far if raw sockets have been enabled
 	 * in jails.
 	 */
 #ifdef INET
 	/* Multicast goop, grrr... */
 	return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
 #else /* INET */
 	return ENXIO;
 #endif /* INET */
 }
 
 struct ifaddr *
 ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway,
 				u_int fibnum)
 {
 	struct ifaddr *ifa;
 	int not_found = 0;
 
 	if ((flags & RTF_GATEWAY) == 0) {
 		/*
 		 * If we are adding a route to an interface,
 		 * and the interface is a pt to pt link
 		 * we should search for the destination
 		 * as our clue to the interface.  Otherwise
 		 * we can use the local address.
 		 */
 		ifa = NULL;
 		if (flags & RTF_HOST)
 			ifa = ifa_ifwithdstaddr(dst, fibnum);
 		if (ifa == NULL)
 			ifa = ifa_ifwithaddr(gateway);
 	} else {
 		/*
 		 * If we are adding a route to a remote net
 		 * or host, the gateway may still be on the
 		 * other end of a pt to pt link.
 		 */
 		ifa = ifa_ifwithdstaddr(gateway, fibnum);
 	}
 	if (ifa == NULL)
 		ifa = ifa_ifwithnet(gateway, 0, fibnum);
 	if (ifa == NULL) {
 		struct rtentry *rt = rtalloc1_fib(gateway, 0, 0, fibnum);
 		if (rt == NULL)
 			return (NULL);
 		/*
 		 * dismiss a gateway that is reachable only
 		 * through the default router
 		 */
 		switch (gateway->sa_family) {
 		case AF_INET:
 			if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY)
 				not_found = 1;
 			break;
 		case AF_INET6:
 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr))
 				not_found = 1;
 			break;
 		default:
 			break;
 		}
 		if (!not_found && rt->rt_ifa != NULL) {
 			ifa = rt->rt_ifa;
 			ifa_ref(ifa);
 		}
 		RT_REMREF(rt);
 		RT_UNLOCK(rt);
 		if (not_found || ifa == NULL)
 			return (NULL);
 	}
 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
 		struct ifaddr *oifa = ifa;
 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
 		if (ifa == NULL)
 			ifa = oifa;
 		else
 			ifa_free(oifa);
 	}
 	return (ifa);
 }
 
 /*
  * Do appropriate manipulations of a routing tree given
  * all the bits of info needed
  */
 int
 rtrequest(int req,
 	struct sockaddr *dst,
 	struct sockaddr *gateway,
 	struct sockaddr *netmask,
 	int flags,
 	struct rtentry **ret_nrt)
 {
 
 	return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt,
 	    RT_DEFAULT_FIB));
 }
 
 int
 rtrequest_fib(int req,
 	struct sockaddr *dst,
 	struct sockaddr *gateway,
 	struct sockaddr *netmask,
 	int flags,
 	struct rtentry **ret_nrt,
 	u_int fibnum)
 {
 	struct rt_addrinfo info;
 
 	if (dst->sa_len == 0)
 		return(EINVAL);
 
 	bzero((caddr_t)&info, sizeof(info));
 	info.rti_flags = flags;
 	info.rti_info[RTAX_DST] = dst;
 	info.rti_info[RTAX_GATEWAY] = gateway;
 	info.rti_info[RTAX_NETMASK] = netmask;
 	return rtrequest1_fib(req, &info, ret_nrt, fibnum);
 }
 
 
 /*
  * Copy most of @rt data into @info.
  *
  * If @flags contains NHR_COPY, copies dst,netmask and gw to the
  * pointers specified by @info structure. Assume such pointers
  * are zeroed sockaddr-like structures with sa_len field initialized
  * to reflect size of the provided buffer. if no NHR_COPY is specified,
  * point dst,netmask and gw @info fields to appropriate @rt values.
  *
  * if @flags contains NHR_REF, do refcouting on rt_ifp.
  *
  * Returns 0 on success.
  */
 int
 rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
 {
 	struct rt_metrics *rmx;
 	struct sockaddr *src, *dst;
 	int sa_len;
 
 	if (flags & NHR_COPY) {
 		/* Copy destination if dst is non-zero */
 		src = rt_key(rt);
 		dst = info->rti_info[RTAX_DST];
 		sa_len = src->sa_len;
-		if (src != NULL && dst != NULL) {
+		if (dst != NULL) {
 			if (src->sa_len > dst->sa_len)
 				return (ENOMEM);
 			memcpy(dst, src, src->sa_len);
 			info->rti_addrs |= RTA_DST;
 		}
 
 		/* Copy mask if set && dst is non-zero */
 		src = rt_mask(rt);
 		dst = info->rti_info[RTAX_NETMASK];
 		if (src != NULL && dst != NULL) {
 
 			/*
 			 * Radix stores different value in sa_len,
 			 * assume rt_mask() to have the same length
 			 * as rt_key()
 			 */
 			if (sa_len > dst->sa_len)
 				return (ENOMEM);
 			memcpy(dst, src, src->sa_len);
 			info->rti_addrs |= RTA_NETMASK;
 		}
 
 		/* Copy gateway is set && dst is non-zero */
 		src = rt->rt_gateway;
 		dst = info->rti_info[RTAX_GATEWAY];
 		if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){
 			if (src->sa_len > dst->sa_len)
 				return (ENOMEM);
 			memcpy(dst, src, src->sa_len);
 			info->rti_addrs |= RTA_GATEWAY;
 		}
 	} else {
 		info->rti_info[RTAX_DST] = rt_key(rt);
 		info->rti_addrs |= RTA_DST;
 		if (rt_mask(rt) != NULL) {
 			info->rti_info[RTAX_NETMASK] = rt_mask(rt);
 			info->rti_addrs |= RTA_NETMASK;
 		}
 		if (rt->rt_flags & RTF_GATEWAY) {
 			info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 			info->rti_addrs |= RTA_GATEWAY;
 		}
 	}
 
 	rmx = info->rti_rmx;
 	if (rmx != NULL) {
 		info->rti_mflags |= RTV_MTU;
 		rmx->rmx_mtu = rt->rt_mtu;
 	}
 
 	info->rti_flags = rt->rt_flags;
 	info->rti_ifp = rt->rt_ifp;
 	info->rti_ifa = rt->rt_ifa;
 
 	if (flags & NHR_REF) {
 		/* Do 'traditional' refcouting */
 		if_ref(info->rti_ifp);
 	}
 
 	return (0);
 }
 
 /*
  * Lookups up route entry for @dst in RIB database for fib @fibnum.
  * Exports entry data to @info using rt_exportinfo().
  *
  * if @flags contains NHR_REF, refcouting is performed on rt_ifp.
  *   All references can be released later by calling rib_free_info()
  *
  * Returns 0 on success.
  * Returns ENOENT for lookup failure, ENOMEM for export failure.
  */
 int
 rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
     uint32_t flowid, struct rt_addrinfo *info)
 {
 	struct radix_node_head *rh;
 	struct radix_node *rn;
 	struct rtentry *rt;
 	int error;
 
 	KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, dst->sa_family);
 	if (rh == NULL)
 		return (ENOENT);
 
 	RADIX_NODE_HEAD_RLOCK(rh);
 	rn = rh->rnh_matchaddr(__DECONST(void *, dst), rh);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rt = RNTORT(rn);
 		/* Ensure route & ifp is UP */
 		if (RT_LINK_IS_UP(rt->rt_ifp)) {
 			flags = (flags & NHR_REF) | NHR_COPY;
 			error = rt_exportinfo(rt, info, flags);
 			RADIX_NODE_HEAD_RUNLOCK(rh);
 
 			return (error);
 		}
 	}
 	RADIX_NODE_HEAD_RUNLOCK(rh);
 
 	return (ENOENT);
 }
 
 /*
  * Releases all references acquired by rib_lookup_info() when
  * called with NHR_REF flags.
  */
 void
 rib_free_info(struct rt_addrinfo *info)
 {
 
 	if_rele(info->rti_ifp);
 }
 
 /*
  * Iterates over all existing fibs in system calling
  *  @setwa_f function prior to traversing each fib.
  *  Calls @wa_f function for each element in current fib.
  * If af is not AF_UNSPEC, iterates over fibs in particular
  * address family.
  */
 void
 rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f,
     void *arg)
 {
 	struct radix_node_head *rnh;
 	uint32_t fibnum;
 	int i;
 
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		/* Do we want some specific family? */
 		if (af != AF_UNSPEC) {
 			rnh = rt_tables_get_rnh(fibnum, af);
 			if (rnh == NULL)
 				continue;
 			if (setwa_f != NULL)
 				setwa_f(rnh, fibnum, af, arg);
 
 			RADIX_NODE_HEAD_LOCK(rnh);
 			rnh->rnh_walktree(rnh, (walktree_f_t *)wa_f, arg);
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 			continue;
 		}
 
 		for (i = 1; i <= AF_MAX; i++) {
 			rnh = rt_tables_get_rnh(fibnum, i);
 			if (rnh == NULL)
 				continue;
 			if (setwa_f != NULL)
 				setwa_f(rnh, fibnum, i, arg);
 
 			RADIX_NODE_HEAD_LOCK(rnh);
 			rnh->rnh_walktree(rnh, (walktree_f_t *)wa_f, arg);
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 		}
 	}
 }
 
 struct rt_delinfo
 {
 	struct rt_addrinfo info;
 	struct radix_node_head *rnh;
 	struct rtentry *head;
 };
 
 /*
  * Conditionally unlinks @rn from radix tree based
  * on info data passed in @arg.
  */
 static int
 rt_checkdelroute(struct radix_node *rn, void *arg)
 {
 	struct rt_delinfo *di;
 	struct rt_addrinfo *info;
 	struct rtentry *rt;
 	int error;
 
 	di = (struct rt_delinfo *)arg;
 	rt = (struct rtentry *)rn;
 	info = &di->info;
 	error = 0;
 
 	info->rti_info[RTAX_DST] = rt_key(rt);
 	info->rti_info[RTAX_NETMASK] = rt_mask(rt);
 	info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 
 	rt = rt_unlinkrte(di->rnh, info, &error);
 	if (rt == NULL) {
 		/* Either not allowed or not matched. Skip entry */
 		return (0);
 	}
 
 	/* Entry was unlinked. Add to the list and return */
 	rt->rt_chain = di->head;
 	di->head = rt;
 
 	return (0);
 }
 
 /*
  * Iterates over all existing fibs in system.
  * Deletes each element for which @filter_f function returned
  * non-zero value.
  * If @af is not AF_UNSPEC, iterates over fibs in particular
  * address family.
  */
 void
 rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg)
 {
 	struct radix_node_head *rnh;
 	struct rt_delinfo di;
 	struct rtentry *rt;
 	uint32_t fibnum;
 	int i, start, end;
 
 	bzero(&di, sizeof(di));
 	di.info.rti_filter = filter_f;
 	di.info.rti_filterdata = arg;
 
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
 		/* Do we want some specific family? */
 		if (af != AF_UNSPEC) {
 			start = af;
 			end = af;
 		} else {
 			start = 1;
 			end = AF_MAX;
 		}
 
 		for (i = start; i <= end; i++) {
 			rnh = rt_tables_get_rnh(fibnum, i);
 			if (rnh == NULL)
 				continue;
 			di.rnh = rnh;
 
 			RADIX_NODE_HEAD_LOCK(rnh);
 			rnh->rnh_walktree(rnh, rt_checkdelroute, &di);
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 
 			if (di.head == NULL)
 				continue;
 
 			/* We might have something to reclaim */
 			while (di.head != NULL) {
 				rt = di.head;
 				di.head = rt->rt_chain;
 				rt->rt_chain = NULL;
 
 				/* TODO std rt -> rt_addrinfo export */
 				di.info.rti_info[RTAX_DST] = rt_key(rt);
 				di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 
 				rt_notifydelete(rt, &di.info);
 				RTFREE_LOCKED(rt);
 			}
 
 		}
 	}
 }
 
 /*
  * Delete Routes for a Network Interface
  *
  * Called for each routing entry via the rnh->rnh_walktree() call above
  * to delete all route entries referencing a detaching network interface.
  *
  * Arguments:
  *	rt	pointer to rtentry
  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
  *
  * Returns:
  *	0	successful
  *	errno	failed - reason indicated
  */
 static int
 rt_ifdelroute(const struct rtentry *rt, void *arg)
 {
 	struct ifnet	*ifp = arg;
 
 	if (rt->rt_ifp != ifp)
 		return (0);
 
 	/*
 	 * Protect (sorta) against walktree recursion problems
 	 * with cloned routes
 	 */
 	if ((rt->rt_flags & RTF_UP) == 0)
 		return (0);
 
 	return (1);
 }
 
 /*
  * Delete all remaining routes using this interface
  * Unfortuneatly the only way to do this is to slog through
  * the entire routing table looking for routes which point
  * to this interface...oh well...
  */
 void
 rt_flushifroutes(struct ifnet *ifp)
 {
 
 	rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp);
 }
 
 /*
  * Conditionally unlinks rtentry matching data inside @info from @rnh.
  * Returns unlinked, locked and referenced @rtentry on success,
  * Returns NULL and sets @perror to:
  * ESRCH - if prefix was not found,
  * EADDRINUSE - if trying to delete PINNED route without appropriate flag.
  * ENOENT - if supplied filter function returned 0 (not matched).
  */
 static struct rtentry *
 rt_unlinkrte(struct radix_node_head *rnh, struct rt_addrinfo *info, int *perror)
 {
 	struct sockaddr *dst, *netmask;
 	struct rtentry *rt;
 	struct radix_node *rn;
 
 	dst = info->rti_info[RTAX_DST];
 	netmask = info->rti_info[RTAX_NETMASK];
 
 	rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, rnh);
 	if (rt == NULL) {
 		*perror = ESRCH;
 		return (NULL);
 	}
 
 	if ((info->rti_flags & RTF_PINNED) == 0) {
 		/* Check if target route can be deleted */
 		if (rt->rt_flags & RTF_PINNED) {
 			*perror = EADDRINUSE;
 			return (NULL);
 		}
 	}
 
 	if (info->rti_filter != NULL) {
 		if (info->rti_filter(rt, info->rti_filterdata) == 0) {
 			/* Not matched */
 			*perror = ENOENT;
 			return (NULL);
 		}
 
 		/*
 		 * Filter function requested rte deletion.
 		 * Ease the caller work by filling in remaining info
 		 * from that particular entry.
 		 */
 		info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 	}
 
 	/*
 	 * Remove the item from the tree and return it.
 	 * Complain if it is not there and do no more processing.
 	 */
 	*perror = ESRCH;
 #ifdef RADIX_MPATH
 	if (rn_mpath_capable(rnh))
 		rn = rt_mpath_unlink(rnh, info, rt, perror);
 	else
 #endif
 	rn = rnh->rnh_deladdr(dst, netmask, rnh);
 	if (rn == NULL)
 		return (NULL);
 
 	if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 		panic ("rtrequest delete");
 
 	rt = RNTORT(rn);
 	RT_LOCK(rt);
 	RT_ADDREF(rt);
 	rt->rt_flags &= ~RTF_UP;
 
 	*perror = 0;
 
 	return (rt);
 }
 
 static void
 rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info)
 {
 	struct ifaddr *ifa;
 
 	/*
 	 * give the protocol a chance to keep things in sync.
 	 */
 	ifa = rt->rt_ifa;
 	if (ifa != NULL && ifa->ifa_rtrequest != NULL)
 		ifa->ifa_rtrequest(RTM_DELETE, rt, info);
 
 	/*
 	 * One more rtentry floating around that is not
 	 * linked to the routing table. rttrash will be decremented
 	 * when RTFREE(rt) is eventually called.
 	 */
 	V_rttrash++;
 }
 
 
 /*
  * These (questionable) definitions of apparent local variables apply
  * to the next two functions.  XXXXXX!!!
  */
 #define	dst	info->rti_info[RTAX_DST]
 #define	gateway	info->rti_info[RTAX_GATEWAY]
 #define	netmask	info->rti_info[RTAX_NETMASK]
 #define	ifaaddr	info->rti_info[RTAX_IFA]
 #define	ifpaddr	info->rti_info[RTAX_IFP]
 #define	flags	info->rti_flags
 
 /*
  * Look up rt_addrinfo for a specific fib.  Note that if rti_ifa is defined,
  * it will be referenced so the caller must free it.
  */
 int
 rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
 {
 	struct ifaddr *ifa;
 	int error = 0;
 
 	/*
 	 * ifp may be specified by sockaddr_dl
 	 * when protocol address is ambiguous.
 	 */
 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
 	    ifpaddr->sa_family == AF_LINK &&
 	    (ifa = ifa_ifwithnet(ifpaddr, 0, fibnum)) != NULL) {
 		info->rti_ifp = ifa->ifa_ifp;
 		ifa_free(ifa);
 	}
 	if (info->rti_ifa == NULL && ifaaddr != NULL)
 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
 	if (info->rti_ifa == NULL) {
 		struct sockaddr *sa;
 
 		sa = ifaaddr != NULL ? ifaaddr :
 		    (gateway != NULL ? gateway : dst);
 		if (sa != NULL && info->rti_ifp != NULL)
 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
 		else if (dst != NULL && gateway != NULL)
 			info->rti_ifa = ifa_ifwithroute(flags, dst, gateway,
 							fibnum);
 		else if (sa != NULL)
 			info->rti_ifa = ifa_ifwithroute(flags, sa, sa,
 							fibnum);
 	}
 	if ((ifa = info->rti_ifa) != NULL) {
 		if (info->rti_ifp == NULL)
 			info->rti_ifp = ifa->ifa_ifp;
 	} else
 		error = ENETUNREACH;
 	return (error);
 }
 
 static int
 if_updatemtu_cb(struct radix_node *rn, void *arg)
 {
 	struct rtentry *rt;
 	struct if_mtuinfo *ifmtu;
 
 	rt = (struct rtentry *)rn;
 	ifmtu = (struct if_mtuinfo *)arg;
 
 	if (rt->rt_ifp != ifmtu->ifp)
 		return (0);
 
 	if (rt->rt_mtu >= ifmtu->mtu) {
 		/* We have to decrease mtu regardless of flags */
 		rt->rt_mtu = ifmtu->mtu;
 		return (0);
 	}
 
 	/*
 	 * New MTU is bigger. Check if are allowed to alter it
 	 */
 	if ((rt->rt_flags & (RTF_FIXEDMTU | RTF_GATEWAY | RTF_HOST)) != 0) {
 
 		/*
 		 * Skip routes with user-supplied MTU and
 		 * non-interface routes
 		 */
 		return (0);
 	}
 
 	/* We are safe to update route MTU */
 	rt->rt_mtu = ifmtu->mtu;
 
 	return (0);
 }
 
 void
 rt_updatemtu(struct ifnet *ifp)
 {
 	struct if_mtuinfo ifmtu;
 	struct radix_node_head *rnh;
 	int i, j;
 
 	ifmtu.ifp = ifp;
 
 	/*
 	 * Try to update rt_mtu for all routes using this interface
 	 * Unfortunately the only way to do this is to traverse all
 	 * routing tables in all fibs/domains.
 	 */
 	for (i = 1; i <= AF_MAX; i++) {
 		ifmtu.mtu = if_getmtu_family(ifp, i);
 		for (j = 0; j < rt_numfibs; j++) {
 			rnh = rt_tables_get_rnh(j, i);
 			if (rnh == NULL)
 				continue;
 			RADIX_NODE_HEAD_LOCK(rnh);
 			rnh->rnh_walktree(rnh, if_updatemtu_cb, &ifmtu);
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 		}
 	}
 }
 
 
 #if 0
 int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
 int rt_print(char *buf, int buflen, struct rtentry *rt);
 
 int
 p_sockaddr(char *buf, int buflen, struct sockaddr *s)
 {
 	void *paddr = NULL;
 
 	switch (s->sa_family) {
 	case AF_INET:
 		paddr = &((struct sockaddr_in *)s)->sin_addr;
 		break;
 	case AF_INET6:
 		paddr = &((struct sockaddr_in6 *)s)->sin6_addr;
 		break;
 	}
 
 	if (paddr == NULL)
 		return (0);
 
 	if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL)
 		return (0);
 	
 	return (strlen(buf));
 }
 
 int
 rt_print(char *buf, int buflen, struct rtentry *rt)
 {
 	struct sockaddr *addr, *mask;
 	int i = 0;
 
 	addr = rt_key(rt);
 	mask = rt_mask(rt);
 
 	i = p_sockaddr(buf, buflen, addr);
 	if (!(rt->rt_flags & RTF_HOST)) {
 		buf[i++] = '/';
 		i += p_sockaddr(buf + i, buflen - i, mask);
 	}
 
 	if (rt->rt_flags & RTF_GATEWAY) {
 		buf[i++] = '>';
 		i += p_sockaddr(buf + i, buflen - i, rt->rt_gateway);
 	}
 
 	return (i);
 }
 #endif
 
 #ifdef RADIX_MPATH
 /*
  * Deletes key for single-path routes, unlinks rtentry with
  * gateway specified in @info from multi-path routes.
  *
  * Returnes unlinked entry. In case of failure, returns NULL
  * and sets @perror to ESRCH.
  */
 static struct radix_node *
 rt_mpath_unlink(struct radix_node_head *rnh, struct rt_addrinfo *info,
     struct rtentry *rto, int *perror)
 {
 	/*
 	 * if we got multipath routes, we require users to specify
 	 * a matching RTAX_GATEWAY.
 	 */
 	struct rtentry *rt; // *rto = NULL;
 	struct radix_node *rn;
 	struct sockaddr *gw;
 
 	gw = info->rti_info[RTAX_GATEWAY];
 	rt = rt_mpath_matchgate(rto, gw);
 	if (rt == NULL) {
 		*perror = ESRCH;
 		return (NULL);
 	}
 
 	/*
 	 * this is the first entry in the chain
 	 */
 	if (rto == rt) {
 		rn = rn_mpath_next((struct radix_node *)rt);
 		/*
 		 * there is another entry, now it's active
 		 */
 		if (rn) {
 			rto = RNTORT(rn);
 			RT_LOCK(rto);
 			rto->rt_flags |= RTF_UP;
 			RT_UNLOCK(rto);
 		} else if (rt->rt_flags & RTF_GATEWAY) {
 			/*
 			 * For gateway routes, we need to 
 			 * make sure that we we are deleting
 			 * the correct gateway. 
 			 * rt_mpath_matchgate() does not 
 			 * check the case when there is only
 			 * one route in the chain.  
 			 */
 			if (gw &&
 			    (rt->rt_gateway->sa_len != gw->sa_len ||
 				memcmp(rt->rt_gateway, gw, gw->sa_len))) {
 				*perror = ESRCH;
 				return (NULL);
 			}
 		}
 
 		/*
 		 * use the normal delete code to remove
 		 * the first entry
 		 */
 		rn = rnh->rnh_deladdr(dst, netmask, rnh);
 		*perror = 0;
 		return (rn);
 	}
 		
 	/*
 	 * if the entry is 2nd and on up
 	 */
 	if (rt_mpath_deldup(rto, rt) == 0)
 		panic ("rtrequest1: rt_mpath_deldup");
 	*perror = 0;
 	rn = (struct radix_node *)rt;
 	return (rn);
 }
 #endif
 
 #ifdef FLOWTABLE
 static struct rtentry *
 rt_flowtable_check_route(struct radix_node_head *rnh, struct rt_addrinfo *info)
 {
 #if defined(INET6) || defined(INET)
 	struct radix_node *rn;
 #endif
 	struct rtentry *rt0;
 
 	rt0 = NULL;
 	/* "flow-table" only supports IPv6 and IPv4 at the moment. */
 	switch (dst->sa_family) {
 #ifdef INET6
 	case AF_INET6:
 #endif
 #ifdef INET
 	case AF_INET:
 #endif
 #if defined(INET6) || defined(INET)
 		rn = rnh->rnh_matchaddr(dst, rnh);
 		if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
 			struct sockaddr *mask;
 			u_char *m, *n;
 			int len;
 
 			/*
 			 * compare mask to see if the new route is
 			 * more specific than the existing one
 			 */
 			rt0 = RNTORT(rn);
 			RT_LOCK(rt0);
 			RT_ADDREF(rt0);
 			RT_UNLOCK(rt0);
 			/*
 			 * A host route is already present, so
 			 * leave the flow-table entries as is.
 			 */
 			if (rt0->rt_flags & RTF_HOST) {
 				RTFREE(rt0);
 				rt0 = NULL;
 			} else if (!(flags & RTF_HOST) && netmask) {
 				mask = rt_mask(rt0);
 				len = mask->sa_len;
 				m = (u_char *)mask;
 				n = (u_char *)netmask;
 				while (len-- > 0) {
 					if (*n != *m)
 						break;
 					n++;
 					m++;
 				}
 				if (len == 0 || (*n < *m)) {
 					RTFREE(rt0);
 					rt0 = NULL;
 				}
 			}
 		}
 #endif/* INET6 || INET */
 	}
 
 	return (rt0);
 }
 #endif
 
 int
 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 				u_int fibnum)
 {
 	int error = 0;
 	struct rtentry *rt, *rt_old;
 #ifdef FLOWTABLE
 	struct rtentry *rt0;
 #endif
 	struct radix_node *rn;
 	struct radix_node_head *rnh;
 	struct ifaddr *ifa;
 	struct sockaddr *ndst;
 	struct sockaddr_storage mdst;
 
 	KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
 	KASSERT((flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked"));
 	switch (dst->sa_family) {
 	case AF_INET6:
 	case AF_INET:
 		/* We support multiple FIBs. */
 		break;
 	default:
 		fibnum = RT_DEFAULT_FIB;
 		break;
 	}
 
 	/*
 	 * Find the correct routing tree to use for this Address Family
 	 */
 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
 
 	/*
 	 * If we are adding a host route then we don't want to put
 	 * a netmask in the tree, nor do we want to clone it.
 	 */
 	if (flags & RTF_HOST)
 		netmask = NULL;
 
 	switch (req) {
 	case RTM_DELETE:
 		if (netmask) {
 			rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
 			dst = (struct sockaddr *)&mdst;
 		}
 
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rt = rt_unlinkrte(rnh, info, &error);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 		if (error != 0)
 			return (error);
 
 		rt_notifydelete(rt, info);
 
 		/*
 		 * If the caller wants it, then it can have it,
 		 * but it's up to it to free the rtentry as we won't be
 		 * doing it.
 		 */
 		if (ret_nrt) {
 			*ret_nrt = rt;
 			RT_UNLOCK(rt);
 		} else
 			RTFREE_LOCKED(rt);
 		break;
 	case RTM_RESOLVE:
 		/*
 		 * resolve was only used for route cloning
 		 * here for compat
 		 */
 		break;
 	case RTM_ADD:
 		if ((flags & RTF_GATEWAY) && !gateway)
 			return (EINVAL);
 		if (dst && gateway && (dst->sa_family != gateway->sa_family) && 
 		    (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
 			return (EINVAL);
 
 		if (info->rti_ifa == NULL) {
 			error = rt_getifa_fib(info, fibnum);
 			if (error)
 				return (error);
 		} else
 			ifa_ref(info->rti_ifa);
 		ifa = info->rti_ifa;
 		rt = uma_zalloc(V_rtzone, M_NOWAIT);
 		if (rt == NULL) {
 			ifa_free(ifa);
 			return (ENOBUFS);
 		}
 		rt->rt_flags = RTF_UP | flags;
 		rt->rt_fibnum = fibnum;
 		/*
 		 * Add the gateway. Possibly re-malloc-ing the storage for it.
 		 */
 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
 			ifa_free(ifa);
 			uma_zfree(V_rtzone, rt);
 			return (error);
 		}
 
 		/*
 		 * point to the (possibly newly malloc'd) dest address.
 		 */
 		ndst = (struct sockaddr *)rt_key(rt);
 
 		/*
 		 * make sure it contains the value we want (masked if needed).
 		 */
 		if (netmask) {
 			rt_maskedcopy(dst, ndst, netmask);
 		} else
 			bcopy(dst, ndst, dst->sa_len);
 
 		/*
 		 * We use the ifa reference returned by rt_getifa_fib().
 		 * This moved from below so that rnh->rnh_addaddr() can
 		 * examine the ifa and  ifa->ifa_ifp if it so desires.
 		 */
 		rt->rt_ifa = ifa;
 		rt->rt_ifp = ifa->ifa_ifp;
 		rt->rt_weight = 1;
 
 		rt_setmetrics(info, rt);
 
 		RADIX_NODE_HEAD_LOCK(rnh);
 		RT_LOCK(rt);
 #ifdef RADIX_MPATH
 		/* do not permit exactly the same dst/mask/gw pair */
 		if (rn_mpath_capable(rnh) &&
 			rt_mpath_conflict(rnh, rt, netmask)) {
 			RADIX_NODE_HEAD_UNLOCK(rnh);
 
 			ifa_free(rt->rt_ifa);
 			R_Free(rt_key(rt));
 			uma_zfree(V_rtzone, rt);
 			return (EEXIST);
 		}
 #endif
 
 #ifdef FLOWTABLE
 		rt0 = rt_flowtable_check_route(rnh, info);
 #endif /* FLOWTABLE */
 
 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
 		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
 
 		rt_old = NULL;
 		if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) {
 
 			/*
 			 * Force removal and re-try addition
 			 * TODO: better multipath&pinned support
 			 */
 			struct sockaddr *info_dst = info->rti_info[RTAX_DST];
 			info->rti_info[RTAX_DST] = ndst;
 			/* Do not delete existing PINNED(interface) routes */
 			info->rti_flags &= ~RTF_PINNED;
 			rt_old = rt_unlinkrte(rnh, info, &error);
 			info->rti_flags |= RTF_PINNED;
 			info->rti_info[RTAX_DST] = info_dst;
 			if (rt_old != NULL)
 				rn = rnh->rnh_addaddr(ndst, netmask, rnh,
 				    rt->rt_nodes);
 		}
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 
 		if (rt_old != NULL)
 			RT_UNLOCK(rt_old);
 
 		/*
 		 * If it still failed to go into the tree,
 		 * then un-make it (this should be a function)
 		 */
 		if (rn == NULL) {
 			ifa_free(rt->rt_ifa);
 			R_Free(rt_key(rt));
 			uma_zfree(V_rtzone, rt);
 #ifdef FLOWTABLE
 			if (rt0 != NULL)
 				RTFREE(rt0);
 #endif
 			return (EEXIST);
 		} 
 #ifdef FLOWTABLE
 		else if (rt0 != NULL) {
 			flowtable_route_flush(dst->sa_family, rt0);
 			RTFREE(rt0);
 		}
 #endif
 
 		if (rt_old != NULL) {
 			rt_notifydelete(rt_old, info);
 			RTFREE(rt_old);
 		}
 
 		/*
 		 * If this protocol has something to add to this then
 		 * allow it to do that as well.
 		 */
 		if (ifa->ifa_rtrequest)
 			ifa->ifa_rtrequest(req, rt, info);
 
 		/*
 		 * actually return a resultant rtentry and
 		 * give the caller a single reference.
 		 */
 		if (ret_nrt) {
 			*ret_nrt = rt;
 			RT_ADDREF(rt);
 		}
 		RT_UNLOCK(rt);
 		break;
 	case RTM_CHANGE:
 		RADIX_NODE_HEAD_LOCK(rnh);
 		error = rtrequest1_fib_change(rnh, info, ret_nrt, fibnum);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 		break;
 	default:
 		error = EOPNOTSUPP;
 	}
 
 	return (error);
 }
 
 #undef dst
 #undef gateway
 #undef netmask
 #undef ifaaddr
 #undef ifpaddr
 #undef flags
 
 static int
 rtrequest1_fib_change(struct radix_node_head *rnh, struct rt_addrinfo *info,
     struct rtentry **ret_nrt, u_int fibnum)
 {
 	struct rtentry *rt = NULL;
 	int error = 0;
 	int free_ifa = 0;
 	int family, mtu;
 	struct if_mtuinfo ifmtu;
 
 	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
 	    info->rti_info[RTAX_NETMASK], rnh);
 
 	if (rt == NULL)
 		return (ESRCH);
 
 #ifdef RADIX_MPATH
 	/*
 	 * If we got multipath routes,
 	 * we require users to specify a matching RTAX_GATEWAY.
 	 */
 	if (rn_mpath_capable(rnh)) {
 		rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]);
 		if (rt == NULL)
 			return (ESRCH);
 	}
 #endif
 
 	RT_LOCK(rt);
 
 	rt_setmetrics(info, rt);
 
 	/*
 	 * New gateway could require new ifaddr, ifp;
 	 * flags may also be different; ifp may be specified
 	 * by ll sockaddr when protocol address is ambiguous
 	 */
 	if (((rt->rt_flags & RTF_GATEWAY) &&
 	    info->rti_info[RTAX_GATEWAY] != NULL) ||
 	    info->rti_info[RTAX_IFP] != NULL ||
 	    (info->rti_info[RTAX_IFA] != NULL &&
 	     !sa_equal(info->rti_info[RTAX_IFA], rt->rt_ifa->ifa_addr))) {
 
 		error = rt_getifa_fib(info, fibnum);
 		if (info->rti_ifa != NULL)
 			free_ifa = 1;
 
 		if (error != 0)
 			goto bad;
 	}
 
 	/* Check if outgoing interface has changed */
 	if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa &&
 	    rt->rt_ifa != NULL && rt->rt_ifa->ifa_rtrequest != NULL) {
 		rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, info);
 		ifa_free(rt->rt_ifa);
 	}
 	/* Update gateway address */
 	if (info->rti_info[RTAX_GATEWAY] != NULL) {
 		error = rt_setgate(rt, rt_key(rt), info->rti_info[RTAX_GATEWAY]);
 		if (error != 0)
 			goto bad;
 
 		rt->rt_flags &= ~RTF_GATEWAY;
 		rt->rt_flags |= (RTF_GATEWAY & info->rti_flags);
 	}
 
 	if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa) {
 		ifa_ref(info->rti_ifa);
 		rt->rt_ifa = info->rti_ifa;
 		rt->rt_ifp = info->rti_ifp;
 	}
 	/* Allow some flags to be toggled on change. */
 	rt->rt_flags &= ~RTF_FMASK;
 	rt->rt_flags |= info->rti_flags & RTF_FMASK;
 
 	if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest != NULL)
 	       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, info);
 
 	/* Alter route MTU if necessary */
 	if (rt->rt_ifp != NULL) {
 		family = info->rti_info[RTAX_DST]->sa_family;
 		mtu = if_getmtu_family(rt->rt_ifp, family);
 		/* Set default MTU */
 		if (rt->rt_mtu == 0)
 			rt->rt_mtu = mtu;
 		if (rt->rt_mtu != mtu) {
 			/* Check if we really need to update */
 			ifmtu.ifp = rt->rt_ifp;
 			ifmtu.mtu = mtu;
 			if_updatemtu_cb(rt->rt_nodes, &ifmtu);
 		}
 	}
 
 	if (ret_nrt) {
 		*ret_nrt = rt;
 		RT_ADDREF(rt);
 	}
 bad:
 	RT_UNLOCK(rt);
 	if (free_ifa != 0)
 		ifa_free(info->rti_ifa);
 	return (error);
 }
 
 static void
 rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt)
 {
 
 	if (info->rti_mflags & RTV_MTU) {
 		if (info->rti_rmx->rmx_mtu != 0) {
 
 			/*
 			 * MTU was explicitly provided by user.
 			 * Keep it.
 			 */
 			rt->rt_flags |= RTF_FIXEDMTU;
 		} else {
 
 			/*
 			 * User explicitly sets MTU to 0.
 			 * Assume rollback to default.
 			 */
 			rt->rt_flags &= ~RTF_FIXEDMTU;
 		}
 		rt->rt_mtu = info->rti_rmx->rmx_mtu;
 	}
 	if (info->rti_mflags & RTV_WEIGHT)
 		rt->rt_weight = info->rti_rmx->rmx_weight;
 	/* Kernel -> userland timebase conversion. */
 	if (info->rti_mflags & RTV_EXPIRE)
 		rt->rt_expire = info->rti_rmx->rmx_expire ?
 		    info->rti_rmx->rmx_expire - time_second + time_uptime : 0;
 }
 
 int
 rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 {
 	/* XXX dst may be overwritten, can we move this to below */
 	int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
 
 	/*
 	 * Prepare to store the gateway in rt->rt_gateway.
 	 * Both dst and gateway are stored one after the other in the same
 	 * malloc'd chunk. If we have room, we can reuse the old buffer,
 	 * rt_gateway already points to the right place.
 	 * Otherwise, malloc a new block and update the 'dst' address.
 	 */
 	if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
 		caddr_t new;
 
 		R_Malloc(new, caddr_t, dlen + glen);
 		if (new == NULL)
 			return ENOBUFS;
 		/*
 		 * XXX note, we copy from *dst and not *rt_key(rt) because
 		 * rt_setgate() can be called to initialize a newly
 		 * allocated route entry, in which case rt_key(rt) == NULL
 		 * (and also rt->rt_gateway == NULL).
 		 * Free()/free() handle a NULL argument just fine.
 		 */
 		bcopy(dst, new, dlen);
 		R_Free(rt_key(rt));	/* free old block, if any */
 		rt_key(rt) = (struct sockaddr *)new;
 		rt->rt_gateway = (struct sockaddr *)(new + dlen);
 	}
 
 	/*
 	 * Copy the new gateway value into the memory chunk.
 	 */
 	bcopy(gate, rt->rt_gateway, glen);
 
 	return (0);
 }
 
 void
 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
 {
 	u_char *cp1 = (u_char *)src;
 	u_char *cp2 = (u_char *)dst;
 	u_char *cp3 = (u_char *)netmask;
 	u_char *cplim = cp2 + *cp3;
 	u_char *cplim2 = cp2 + *cp1;
 
 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
 	cp3 += 2;
 	if (cplim > cplim2)
 		cplim = cplim2;
 	while (cp2 < cplim)
 		*cp2++ = *cp1++ & *cp3++;
 	if (cp2 < cplim2)
 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
 }
 
 /*
  * Set up a routing table entry, normally
  * for an interface.
  */
 #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
 static inline  int
 rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
 {
 	struct sockaddr *dst;
 	struct sockaddr *netmask;
 	struct rtentry *rt = NULL;
 	struct rt_addrinfo info;
 	int error = 0;
 	int startfib, endfib;
 	char tempbuf[_SOCKADDR_TMPSIZE];
 	int didwork = 0;
 	int a_failure = 0;
 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
 	struct radix_node_head *rnh;
 
 	if (flags & RTF_HOST) {
 		dst = ifa->ifa_dstaddr;
 		netmask = NULL;
 	} else {
 		dst = ifa->ifa_addr;
 		netmask = ifa->ifa_netmask;
 	}
 	if (dst->sa_len == 0)
 		return(EINVAL);
 	switch (dst->sa_family) {
 	case AF_INET6:
 	case AF_INET:
 		/* We support multiple FIBs. */
 		break;
 	default:
 		fibnum = RT_DEFAULT_FIB;
 		break;
 	}
 	if (fibnum == RT_ALL_FIBS) {
 		if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD)
 			startfib = endfib = ifa->ifa_ifp->if_fib;
 		else {
 			startfib = 0;
 			endfib = rt_numfibs - 1;
 		}
 	} else {
 		KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
 		startfib = fibnum;
 		endfib = fibnum;
 	}
 
 	/*
 	 * If it's a delete, check that if it exists,
 	 * it's on the correct interface or we might scrub
 	 * a route to another ifa which would
 	 * be confusing at best and possibly worse.
 	 */
 	if (cmd == RTM_DELETE) {
 		/*
 		 * It's a delete, so it should already exist..
 		 * If it's a net, mask off the host bits
 		 * (Assuming we have a mask)
 		 * XXX this is kinda inet specific..
 		 */
 		if (netmask != NULL) {
 			rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
 			dst = (struct sockaddr *)tempbuf;
 		}
 	}
 	/*
 	 * Now go through all the requested tables (fibs) and do the
 	 * requested action. Realistically, this will either be fib 0
 	 * for protocols that don't do multiple tables or all the
 	 * tables for those that do.
 	 */
 	for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
 		if (cmd == RTM_DELETE) {
 			struct radix_node *rn;
 			/*
 			 * Look up an rtentry that is in the routing tree and
 			 * contains the correct info.
 			 */
 			rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
 			if (rnh == NULL)
 				/* this table doesn't exist but others might */
 				continue;
 			RADIX_NODE_HEAD_RLOCK(rnh);
 			rn = rnh->rnh_lookup(dst, netmask, rnh);
 #ifdef RADIX_MPATH
 			if (rn_mpath_capable(rnh)) {
 
 				if (rn == NULL) 
 					error = ESRCH;
 				else {
 					rt = RNTORT(rn);
 					/*
 					 * for interface route the
 					 * rt->rt_gateway is sockaddr_intf
 					 * for cloning ARP entries, so
 					 * rt_mpath_matchgate must use the
 					 * interface address
 					 */
 					rt = rt_mpath_matchgate(rt,
 					    ifa->ifa_addr);
 					if (rt == NULL) 
 						error = ESRCH;
 				}
 			}
 #endif
 			error = (rn == NULL ||
 			    (rn->rn_flags & RNF_ROOT) ||
 			    RNTORT(rn)->rt_ifa != ifa);
 			RADIX_NODE_HEAD_RUNLOCK(rnh);
 			if (error) {
 				/* this is only an error if bad on ALL tables */
 				continue;
 			}
 		}
 		/*
 		 * Do the actual request
 		 */
 		bzero((caddr_t)&info, sizeof(info));
 		info.rti_ifa = ifa;
 		info.rti_flags = flags |
 		    (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED;
 		info.rti_info[RTAX_DST] = dst;
 		/* 
 		 * doing this for compatibility reasons
 		 */
 		if (cmd == RTM_ADD)
 			info.rti_info[RTAX_GATEWAY] =
 			    (struct sockaddr *)&null_sdl;
 		else
 			info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
 		info.rti_info[RTAX_NETMASK] = netmask;
 		error = rtrequest1_fib(cmd, &info, &rt, fibnum);
 
 		if (error == 0 && rt != NULL) {
 			/*
 			 * notify any listening routing agents of the change
 			 */
 			RT_LOCK(rt);
 #ifdef RADIX_MPATH
 			/*
 			 * in case address alias finds the first address
 			 * e.g. ifconfig bge0 192.0.2.246/24
 			 * e.g. ifconfig bge0 192.0.2.247/24
 			 * the address set in the route is 192.0.2.246
 			 * so we need to replace it with 192.0.2.247
 			 */
 			if (memcmp(rt->rt_ifa->ifa_addr,
 			    ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
 				ifa_free(rt->rt_ifa);
 				ifa_ref(ifa);
 				rt->rt_ifp = ifa->ifa_ifp;
 				rt->rt_ifa = ifa;
 			}
 #endif
 			/* 
 			 * doing this for compatibility reasons
 			 */
 			if (cmd == RTM_ADD) {
 			    ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type  =
 				rt->rt_ifp->if_type;
 			    ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
 				rt->rt_ifp->if_index;
 			}
 			RT_ADDREF(rt);
 			RT_UNLOCK(rt);
 			rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum);
 			RT_LOCK(rt);
 			RT_REMREF(rt);
 			if (cmd == RTM_DELETE) {
 				/*
 				 * If we are deleting, and we found an entry,
 				 * then it's been removed from the tree..
 				 * now throw it away.
 				 */
 				RTFREE_LOCKED(rt);
 			} else {
 				if (cmd == RTM_ADD) {
 					/*
 					 * We just wanted to add it..
 					 * we don't actually need a reference.
 					 */
 					RT_REMREF(rt);
 				}
 				RT_UNLOCK(rt);
 			}
 			didwork = 1;
 		}
 		if (error)
 			a_failure = error;
 	}
 	if (cmd == RTM_DELETE) {
 		if (didwork) {
 			error = 0;
 		} else {
 			/* we only give an error if it wasn't in any table */
 			error = ((flags & RTF_HOST) ?
 			    EHOSTUNREACH : ENETUNREACH);
 		}
 	} else {
 		if (a_failure) {
 			/* return an error if any of them failed */
 			error = a_failure;
 		}
 	}
 	return (error);
 }
 
 /*
  * Set up a routing table entry, normally
  * for an interface.
  */
 int
 rtinit(struct ifaddr *ifa, int cmd, int flags)
 {
 	struct sockaddr *dst;
 	int fib = RT_DEFAULT_FIB;
 
 	if (flags & RTF_HOST) {
 		dst = ifa->ifa_dstaddr;
 	} else {
 		dst = ifa->ifa_addr;
 	}
 
 	switch (dst->sa_family) {
 	case AF_INET6:
 	case AF_INET:
 		/* We do support multiple FIBs. */
 		fib = RT_ALL_FIBS;
 		break;
 	}
 	return (rtinit1(ifa, cmd, flags, fib));
 }
 
 /*
  * Announce interface address arrival/withdraw
  * Returns 0 on success.
  */
 int
 rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
 {
 
 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
 	    ("unexpected cmd %d", cmd));
 	
 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
 
 #if defined(INET) || defined(INET6)
 #ifdef SCTP
 	/*
 	 * notify the SCTP stack
 	 * this will only get called when an address is added/deleted
 	 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
 	 */
 	sctp_addr_change(ifa, cmd);
 #endif /* SCTP */
 #endif
 	return (rtsock_addrmsg(cmd, ifa, fibnum));
 }
 
 /*
  * Announce route addition/removal.
  * Users of this function MUST validate input data BEFORE calling.
  * However we have to be able to handle invalid data:
  * if some userland app sends us "invalid" route message (invalid mask,
  * no dst, wrong address families, etc...) we need to pass it back
  * to app (and any other rtsock consumers) with rtm_errno field set to
  * non-zero value.
  * Returns 0 on success.
  */
 int
 rt_routemsg(int cmd, struct ifnet *ifp, int error, struct rtentry *rt,
     int fibnum)
 {
 
 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
 	    ("unexpected cmd %d", cmd));
 	
 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
 
 	KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__));
 
 	return (rtsock_routemsg(cmd, ifp, error, rt, fibnum));
 }
 
 void
 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
 {
 
 	rt_newaddrmsg_fib(cmd, ifa, error, rt, RT_ALL_FIBS);
 }
 
 /*
  * This is called to generate messages from the routing socket
  * indicating a network interface has had addresses associated with it.
  */
 void
 rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt,
     int fibnum)
 {
 
 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
 		("unexpected cmd %u", cmd));
 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
 
 	if (cmd == RTM_ADD) {
 		rt_addrmsg(cmd, ifa, fibnum);
 		if (rt != NULL)
 			rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum);
 	} else {
 		if (rt != NULL)
 			rt_routemsg(cmd, ifa->ifa_ifp, error, rt, fibnum);
 		rt_addrmsg(cmd, ifa, fibnum);
 	}
 }
 
Index: projects/clang380-import/sys/net/route.h
===================================================================
--- projects/clang380-import/sys/net/route.h	(revision 293686)
+++ projects/clang380-import/sys/net/route.h	(revision 293687)
@@ -1,475 +1,491 @@
 /*-
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)route.h	8.4 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _NET_ROUTE_H_
 #define _NET_ROUTE_H_
 
 #include <sys/counter.h>
 #include <net/vnet.h>
 
 /*
  * Kernel resident routing tables.
  *
  * The routing tables are initialized when interface addresses
  * are set by making entries for all directly connected interfaces.
  */
 
 /*
  * Struct route consiste of a destination address,
  * a route entry pointer, link-layer prepend data pointer along
  * with its length.
  */
 struct route {
 	struct	rtentry *ro_rt;
 	char		*ro_prepend;
 	uint16_t	ro_plen;
 	uint16_t	ro_flags;
 	uint16_t	ro_mtu;	/* saved ro_rt mtu */
 	uint16_t	spare;
 	struct	sockaddr ro_dst;
 };
 
 #define	RT_L2_ME_BIT		2	/* dst L2 addr is our address */
 #define	RT_MAY_LOOP_BIT		3	/* dst may require loop copy */
 #define	RT_HAS_HEADER_BIT	4	/* mbuf already have its header prepended */
 
 #define	RT_CACHING_CONTEXT	0x1	/* XXX: not used anywhere */
 #define	RT_NORTREF		0x2	/* doesn't hold reference on ro_rt */
-#define	RT_L2_ME		(1 << RT_L2_ME_BIT)
-#define	RT_MAY_LOOP		(1 << RT_MAY_LOOP_BIT)
-#define	RT_HAS_HEADER		(1 << RT_HAS_HEADER_BIT)
+#define	RT_L2_ME		(1 << RT_L2_ME_BIT)		/* 0x0004 */
+#define	RT_MAY_LOOP		(1 << RT_MAY_LOOP_BIT)		/* 0x0008 */
+#define	RT_HAS_HEADER		(1 << RT_HAS_HEADER_BIT)	/* 0x0010 */
 
+#define	RT_REJECT		0x0020		/* Destination is reject */
+#define	RT_BLACKHOLE		0x0040		/* Destination is blackhole */
+#define	RT_HAS_GW		0x0080		/* Destination has GW  */
+
 struct rt_metrics {
 	u_long	rmx_locks;	/* Kernel must leave these values alone */
 	u_long	rmx_mtu;	/* MTU for this path */
 	u_long	rmx_hopcount;	/* max hops expected */
 	u_long	rmx_expire;	/* lifetime for route, e.g. redirect */
 	u_long	rmx_recvpipe;	/* inbound delay-bandwidth product */
 	u_long	rmx_sendpipe;	/* outbound delay-bandwidth product */
 	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
 	u_long	rmx_rtt;	/* estimated round trip time */
 	u_long	rmx_rttvar;	/* estimated rtt variance */
 	u_long	rmx_pksent;	/* packets sent using this route */
 	u_long	rmx_weight;	/* route weight */
 	u_long	rmx_filler[3];	/* will be used for T/TCP later */
 };
 
 /*
  * rmx_rtt and rmx_rttvar are stored as microseconds;
  * RTTTOPRHZ(rtt) converts to a value suitable for use
  * by a protocol slowtimo counter.
  */
 #define	RTM_RTTUNIT	1000000	/* units for rtt, rttvar, as units per sec */
 #define	RTTTOPRHZ(r)	((r) / (RTM_RTTUNIT / PR_SLOWHZ))
 
 /* lle state is exported in rmx_state rt_metrics field */
 #define	rmx_state	rmx_weight
 
 #define	RT_DEFAULT_FIB	0	/* Explicitly mark fib=0 restricted cases */
 #define	RT_ALL_FIBS	-1	/* Announce event for every fib */
 #ifdef _KERNEL
 extern u_int rt_numfibs;	/* number of usable routing tables */
 VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */
 #define	V_rt_add_addr_allfibs	VNET(rt_add_addr_allfibs)
 #endif
 
 /*
  * We distinguish between routes to hosts and routes to networks,
  * preferring the former if available.  For each route we infer
  * the interface to use from the gateway address supplied when
  * the route was entered.  Routes that forward packets through
  * gateways are marked so that the output routines know to address the
  * gateway rather than the ultimate destination.
  */
 #ifndef RNF_NORMAL
 #include <net/radix.h>
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 #endif
 
 #if defined(_KERNEL) || defined(_WANT_RTENTRY)
 struct rtentry {
 	struct	radix_node rt_nodes[2];	/* tree glue, and other values */
 	/*
 	 * XXX struct rtentry must begin with a struct radix_node (or two!)
 	 * because the code does some casts of a 'struct radix_node *'
 	 * to a 'struct rtentry *'
 	 */
 #define	rt_key(r)	(*((struct sockaddr **)(&(r)->rt_nodes->rn_key)))
 #define	rt_mask(r)	(*((struct sockaddr **)(&(r)->rt_nodes->rn_mask)))
 	struct	sockaddr *rt_gateway;	/* value */
 	struct	ifnet *rt_ifp;		/* the answer: interface to use */
 	struct	ifaddr *rt_ifa;		/* the answer: interface address to use */
 	int		rt_flags;	/* up/down?, host/net */
 	int		rt_refcnt;	/* # held references */
 	u_int		rt_fibnum;	/* which FIB */
 	u_long		rt_mtu;		/* MTU for this path */
 	u_long		rt_weight;	/* absolute weight */ 
 	u_long		rt_expire;	/* lifetime for route, e.g. redirect */
 #define	rt_endzero	rt_pksent
 	counter_u64_t	rt_pksent;	/* packets sent using this route */
 	struct mtx	rt_mtx;		/* mutex for routing entry */
 	struct rtentry	*rt_chain;	/* pointer to next rtentry to delete */
 };
 #endif /* _KERNEL || _WANT_RTENTRY */
 
 #define	RTF_UP		0x1		/* route usable */
 #define	RTF_GATEWAY	0x2		/* destination is a gateway */
 #define	RTF_HOST	0x4		/* host entry (net otherwise) */
 #define	RTF_REJECT	0x8		/* host or net unreachable */
 #define	RTF_DYNAMIC	0x10		/* created dynamically (by redirect) */
 #define	RTF_MODIFIED	0x20		/* modified dynamically (by redirect) */
 #define RTF_DONE	0x40		/* message confirmed */
 /*			0x80		   unused, was RTF_DELCLONE */
 /*			0x100		   unused, was RTF_CLONING */
 #define RTF_XRESOLVE	0x200		/* external daemon resolves name */
 #define RTF_LLINFO	0x400		/* DEPRECATED - exists ONLY for backward 
 					   compatibility */
 #define RTF_LLDATA	0x400		/* used by apps to add/del L2 entries */
 #define RTF_STATIC	0x800		/* manually added */
 #define RTF_BLACKHOLE	0x1000		/* just discard pkts (during updates) */
 #define RTF_PROTO2	0x4000		/* protocol specific routing flag */
 #define RTF_PROTO1	0x8000		/* protocol specific routing flag */
 /*			0x10000		   unused, was RTF_PRCLONING */
 /*			0x20000		   unused, was RTF_WASCLONED */
 #define RTF_PROTO3	0x40000		/* protocol specific routing flag */
 #define	RTF_FIXEDMTU	0x80000		/* MTU was explicitly specified */
 #define RTF_PINNED	0x100000	/* route is immutable */
 #define	RTF_LOCAL	0x200000 	/* route represents a local address */
 #define	RTF_BROADCAST	0x400000	/* route represents a bcast address */
 #define	RTF_MULTICAST	0x800000	/* route represents a mcast address */
 					/* 0x8000000 and up unassigned */
 #define	RTF_STICKY	 0x10000000	/* always route dst->src */
 
 #define	RTF_RNH_LOCKED	 0x40000000	/* radix node head is locked */
 
 #define	RTF_GWFLAG_COMPAT 0x80000000	/* a compatibility bit for interacting
 					   with existing routing apps */
 
 /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */
 #define RTF_FMASK	\
 	(RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \
 	 RTF_REJECT | RTF_STATIC | RTF_STICKY)
 
 /*
  * fib_ nexthop API flags.
  */
 
 /* Consumer-visible nexthop info flags */
 #define	NHF_REJECT		0x0010	/* RTF_REJECT */
 #define	NHF_BLACKHOLE		0x0020	/* RTF_BLACKHOLE */
 #define	NHF_REDIRECT		0x0040	/* RTF_DYNAMIC|RTF_MODIFIED */
 #define	NHF_DEFAULT		0x0080	/* Default route */
 #define	NHF_BROADCAST		0x0100	/* RTF_BROADCAST */
 #define	NHF_GATEWAY		0x0200	/* RTF_GATEWAY */
 
 /* Nexthop request flags */
 #define	NHR_IFAIF		0x01	/* Return ifa_ifp interface */
 #define	NHR_REF			0x02	/* For future use */
 
 /* Control plane route request flags */
 #define	NHR_COPY		0x100	/* Copy rte data */
 
 /* rte<>nhop translation */
 static inline uint16_t
 fib_rte_to_nh_flags(int rt_flags)
 {
 	uint16_t res;
 
 	res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
 	res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
 	res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
 	res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;
 	res |= (rt_flags & RTF_GATEWAY) ? NHF_GATEWAY : 0;
 
 	return (res);
 }
 
+#ifdef _KERNEL
+/* rte<>ro_flags translation */
+static inline void
+rt_update_ro_flags(struct route *ro)
+{
+	int rt_flags = ro->ro_rt->rt_flags;
+
+	ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
+
+	ro->ro_flags |= (rt_flags & RTF_REJECT) ? RT_REJECT : 0;
+	ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0;
+	ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0;
+}
+#endif
+
 /*
  * Routing statistics.
  */
 struct	rtstat {
 	short	rts_badredirect;	/* bogus redirect calls */
 	short	rts_dynamic;		/* routes created by redirects */
 	short	rts_newgateway;		/* routes modified by redirects */
 	short	rts_unreach;		/* lookups which failed */
 	short	rts_wildcard;		/* lookups satisfied by a wildcard */
 };
 /*
  * Structures for routing messages.
  */
 struct rt_msghdr {
 	u_short	rtm_msglen;	/* to skip over non-understood messages */
 	u_char	rtm_version;	/* future binary compatibility */
 	u_char	rtm_type;	/* message type */
 	u_short	rtm_index;	/* index for associated ifp */
 	int	rtm_flags;	/* flags, incl. kern & message, e.g. DONE */
 	int	rtm_addrs;	/* bitmask identifying sockaddrs in msg */
 	pid_t	rtm_pid;	/* identify sender */
 	int	rtm_seq;	/* for sender to identify action */
 	int	rtm_errno;	/* why failed */
 	int	rtm_fmask;	/* bitmask used in RTM_CHANGE message */
 	u_long	rtm_inits;	/* which metrics we are initializing */
 	struct	rt_metrics rtm_rmx; /* metrics themselves */
 };
 
 #define RTM_VERSION	5	/* Up the ante and ignore older versions */
 
 /*
  * Message types.
  */
 #define RTM_ADD		0x1	/* Add Route */
 #define RTM_DELETE	0x2	/* Delete Route */
 #define RTM_CHANGE	0x3	/* Change Metrics or flags */
 #define RTM_GET		0x4	/* Report Metrics */
 #define RTM_LOSING	0x5	/* Kernel Suspects Partitioning */
 #define RTM_REDIRECT	0x6	/* Told to use different route */
 #define RTM_MISS	0x7	/* Lookup failed on this address */
 #define RTM_LOCK	0x8	/* fix specified metrics */
 		    /*	0x9  */
 		    /*	0xa  */
 #define RTM_RESOLVE	0xb	/* req to resolve dst to LL addr */
 #define RTM_NEWADDR	0xc	/* address being added to iface */
 #define RTM_DELADDR	0xd	/* address being removed from iface */
 #define RTM_IFINFO	0xe	/* iface going up/down etc. */
 #define	RTM_NEWMADDR	0xf	/* mcast group membership being added to if */
 #define	RTM_DELMADDR	0x10	/* mcast group membership being deleted */
 #define	RTM_IFANNOUNCE	0x11	/* iface arrival/departure */
 #define	RTM_IEEE80211	0x12	/* IEEE80211 wireless event */
 
 /*
  * Bitmask values for rtm_inits and rmx_locks.
  */
 #define RTV_MTU		0x1	/* init or lock _mtu */
 #define RTV_HOPCOUNT	0x2	/* init or lock _hopcount */
 #define RTV_EXPIRE	0x4	/* init or lock _expire */
 #define RTV_RPIPE	0x8	/* init or lock _recvpipe */
 #define RTV_SPIPE	0x10	/* init or lock _sendpipe */
 #define RTV_SSTHRESH	0x20	/* init or lock _ssthresh */
 #define RTV_RTT		0x40	/* init or lock _rtt */
 #define RTV_RTTVAR	0x80	/* init or lock _rttvar */
 #define RTV_WEIGHT	0x100	/* init or lock _weight */
 
 /*
  * Bitmask values for rtm_addrs.
  */
 #define RTA_DST		0x1	/* destination sockaddr present */
 #define RTA_GATEWAY	0x2	/* gateway sockaddr present */
 #define RTA_NETMASK	0x4	/* netmask sockaddr present */
 #define RTA_GENMASK	0x8	/* cloning mask sockaddr present */
 #define RTA_IFP		0x10	/* interface name sockaddr present */
 #define RTA_IFA		0x20	/* interface addr sockaddr present */
 #define RTA_AUTHOR	0x40	/* sockaddr for author of redirect */
 #define RTA_BRD		0x80	/* for NEWADDR, broadcast or p-p dest addr */
 
 /*
  * Index offsets for sockaddr array for alternate internal encoding.
  */
 #define RTAX_DST	0	/* destination sockaddr present */
 #define RTAX_GATEWAY	1	/* gateway sockaddr present */
 #define RTAX_NETMASK	2	/* netmask sockaddr present */
 #define RTAX_GENMASK	3	/* cloning mask sockaddr present */
 #define RTAX_IFP	4	/* interface name sockaddr present */
 #define RTAX_IFA	5	/* interface addr sockaddr present */
 #define RTAX_AUTHOR	6	/* sockaddr for author of redirect */
 #define RTAX_BRD	7	/* for NEWADDR, broadcast or p-p dest addr */
 #define RTAX_MAX	8	/* size of array to allocate */
 
 typedef int rt_filter_f_t(const struct rtentry *, void *);
 
 struct rt_addrinfo {
 	int	rti_addrs;			/* Route RTF_ flags */
 	int	rti_flags;			/* Route RTF_ flags */
 	struct	sockaddr *rti_info[RTAX_MAX];	/* Sockaddr data */
 	struct	ifaddr *rti_ifa;		/* value of rt_ifa addr */
 	struct	ifnet *rti_ifp;			/* route interface */
 	rt_filter_f_t	*rti_filter;		/* filter function */
 	void	*rti_filterdata;		/* filter paramenters */
 	u_long	rti_mflags;			/* metrics RTV_ flags */
 	u_long	rti_spare;			/* Will be used for fib */
 	struct	rt_metrics *rti_rmx;		/* Pointer to route metrics */
 };
 
 /*
  * This macro returns the size of a struct sockaddr when passed
  * through a routing socket. Basically we round up sa_len to
  * a multiple of sizeof(long), with a minimum of sizeof(long).
  * The check for a NULL pointer is just a convenience, probably never used.
  * The case sa_len == 0 should only apply to empty structures.
  */
 #define SA_SIZE(sa)						\
     (  (!(sa) || ((struct sockaddr *)(sa))->sa_len == 0) ?	\
 	sizeof(long)		:				\
 	1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(long) - 1) ) )
 
 #define	sa_equal(a, b) (	\
     (((const struct sockaddr *)(a))->sa_len == ((const struct sockaddr *)(b))->sa_len) && \
     (bcmp((a), (b), ((const struct sockaddr *)(b))->sa_len) == 0))
 
 #ifdef _KERNEL
 
 #define RT_LINK_IS_UP(ifp)	(!((ifp)->if_capabilities & IFCAP_LINKSTATE) \
 				 || (ifp)->if_link_state == LINK_STATE_UP)
 
 #define	RT_LOCK_INIT(_rt) \
 	mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
 #define	RT_LOCK(_rt)		mtx_lock(&(_rt)->rt_mtx)
 #define	RT_UNLOCK(_rt)		mtx_unlock(&(_rt)->rt_mtx)
 #define	RT_LOCK_DESTROY(_rt)	mtx_destroy(&(_rt)->rt_mtx)
 #define	RT_LOCK_ASSERT(_rt)	mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
 #define	RT_UNLOCK_COND(_rt)	do {				\
 	if (mtx_owned(&(_rt)->rt_mtx))				\
 		mtx_unlock(&(_rt)->rt_mtx);			\
 } while (0)
 
 #define	RT_ADDREF(_rt)	do {					\
 	RT_LOCK_ASSERT(_rt);					\
 	KASSERT((_rt)->rt_refcnt >= 0,				\
 		("negative refcnt %d", (_rt)->rt_refcnt));	\
 	(_rt)->rt_refcnt++;					\
 } while (0)
 
 #define	RT_REMREF(_rt)	do {					\
 	RT_LOCK_ASSERT(_rt);					\
 	KASSERT((_rt)->rt_refcnt > 0,				\
 		("bogus refcnt %d", (_rt)->rt_refcnt));	\
 	(_rt)->rt_refcnt--;					\
 } while (0)
 
 #define	RTFREE_LOCKED(_rt) do {					\
 	if ((_rt)->rt_refcnt <= 1)				\
 		rtfree(_rt);					\
 	else {							\
 		RT_REMREF(_rt);					\
 		RT_UNLOCK(_rt);					\
 	}							\
 	/* guard against invalid refs */			\
 	_rt = 0;						\
 } while (0)
 
 #define	RTFREE(_rt) do {					\
 	RT_LOCK(_rt);						\
 	RTFREE_LOCKED(_rt);					\
 } while (0)
 
 #define	RO_RTFREE(_ro) do {					\
 	if ((_ro)->ro_rt) {					\
 		if ((_ro)->ro_flags & RT_NORTREF) {		\
 			(_ro)->ro_flags &= ~RT_NORTREF;		\
 			(_ro)->ro_rt = NULL;			\
 		} else {					\
 			RT_LOCK((_ro)->ro_rt);			\
 			RTFREE_LOCKED((_ro)->ro_rt);		\
 		}						\
 	}							\
 } while (0)
 
 struct radix_node_head *rt_tables_get_rnh(int, int);
 
 struct ifmultiaddr;
 
 void	 rt_ieee80211msg(struct ifnet *, int, void *, size_t);
 void	 rt_ifannouncemsg(struct ifnet *, int);
 void	 rt_ifmsg(struct ifnet *);
 void	 rt_missmsg(int, struct rt_addrinfo *, int, int);
 void	 rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int);
 void	 rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
 void	 rt_newaddrmsg_fib(int, struct ifaddr *, int, struct rtentry *, int);
 int	 rt_addrmsg(int, struct ifaddr *, int);
 int	 rt_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
 void	 rt_newmaddrmsg(int, struct ifmultiaddr *);
 int	 rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
 void 	 rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
 
 int	rtsock_addrmsg(int, struct ifaddr *, int);
 int	rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);
 
 /*
  * Note the following locking behavior:
  *
  *    rtalloc_ign() and rtalloc() return ro->ro_rt unlocked
  *
  *    rtalloc1() returns a locked rtentry
  *
  *    rtfree() and RTFREE_LOCKED() require a locked rtentry
  *
  *    RTFREE() uses an unlocked entry.
  */
 
 int	 rt_expunge(struct radix_node_head *, struct rtentry *);
 void	 rtfree(struct rtentry *);
 int	 rt_check(struct rtentry **, struct rtentry **, struct sockaddr *);
 void	rt_updatemtu(struct ifnet *);
 
 typedef int rt_walktree_f_t(struct rtentry *, void *);
 typedef void rt_setwarg_t(struct radix_node_head *, uint32_t, int, void *);
 void	rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *);
 void	rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg);
 void	rt_flushifroutes(struct ifnet *ifp);
 
 /* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */
 /* Thes are used by old code not yet converted to use multiple FIBS */
 void	 rtalloc_ign(struct route *ro, u_long ignflags);
 void	 rtalloc(struct route *ro); /* XXX deprecated, use rtalloc_ign(ro, 0) */
 struct rtentry *rtalloc1(struct sockaddr *, int, u_long);
 int	 rtinit(struct ifaddr *, int, int);
 int	 rtioctl(u_long, caddr_t);
 void	 rtredirect(struct sockaddr *, struct sockaddr *,
 	    struct sockaddr *, int, struct sockaddr *);
 int	 rtrequest(int, struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *, int, struct rtentry **);
 
 /* XXX MRT NEW VERSIONS THAT USE FIBs
  * For now the protocol indepedent versions are the same as the AF_INET ones
  * but this will change.. 
  */
 int	 rt_getifa_fib(struct rt_addrinfo *, u_int fibnum);
 void	 rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum);
 void	 rtalloc_fib(struct route *ro, u_int fibnum);
 struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int);
 int	 rtioctl_fib(u_long, caddr_t, u_int);
 void	 rtredirect_fib(struct sockaddr *, struct sockaddr *,
 	    struct sockaddr *, int, struct sockaddr *, u_int);
 int	 rtrequest_fib(int, struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
 int	 rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int);
 int	rib_lookup_info(uint32_t, const struct sockaddr *, uint32_t, uint32_t,
 	    struct rt_addrinfo *);
 void	rib_free_info(struct rt_addrinfo *info);
 
-#include <sys/eventhandler.h>
-typedef void (*rtevent_redirect_fn)(void *, struct rtentry *, struct rtentry *, struct sockaddr *);
-EVENTHANDLER_DECLARE(route_redirect_event, rtevent_redirect_fn);
 #endif
 
 #endif
Index: projects/clang380-import/sys/net80211/ieee80211_freebsd.c
===================================================================
--- projects/clang380-import/sys/net80211/ieee80211_freebsd.c	(revision 293686)
+++ projects/clang380-import/sys/net80211/ieee80211_freebsd.c	(revision 293687)
@@ -1,915 +1,916 @@
 /*-
  * Copyright (c) 2003-2009 Sam Leffler, Errno Consulting
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * IEEE 802.11 support (FreeBSD-specific code)
  */
 #include "opt_wlan.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h> 
+#include <sys/eventhandler.h>
 #include <sys/linker.h>
 #include <sys/mbuf.h>   
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 
 #include <sys/socket.h>
 
 #include <net/bpf.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_clone.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 #include <net/ethernet.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <net80211/ieee80211_var.h>
 #include <net80211/ieee80211_input.h>
 
 SYSCTL_NODE(_net, OID_AUTO, wlan, CTLFLAG_RD, 0, "IEEE 80211 parameters");
 
 #ifdef IEEE80211_DEBUG
 int	ieee80211_debug = 0;
 SYSCTL_INT(_net_wlan, OID_AUTO, debug, CTLFLAG_RW, &ieee80211_debug,
 	    0, "debugging printfs");
 #endif
 
 static MALLOC_DEFINE(M_80211_COM, "80211com", "802.11 com state");
 
 static const char wlanname[] = "wlan";
 static struct if_clone *wlan_cloner;
 
 static int
 wlan_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
 	struct ieee80211_clone_params cp;
 	struct ieee80211vap *vap;
 	struct ieee80211com *ic;
 	int error;
 
 	error = copyin(params, &cp, sizeof(cp));
 	if (error)
 		return error;
 	ic = ieee80211_find_com(cp.icp_parent);
 	if (ic == NULL)
 		return ENXIO;
 	if (cp.icp_opmode >= IEEE80211_OPMODE_MAX) {
 		ic_printf(ic, "%s: invalid opmode %d\n", __func__,
 		    cp.icp_opmode);
 		return EINVAL;
 	}
 	if ((ic->ic_caps & ieee80211_opcap[cp.icp_opmode]) == 0) {
 		ic_printf(ic, "%s mode not supported\n",
 		    ieee80211_opmode_name[cp.icp_opmode]);
 		return EOPNOTSUPP;
 	}
 	if ((cp.icp_flags & IEEE80211_CLONE_TDMA) &&
 #ifdef IEEE80211_SUPPORT_TDMA
 	    (ic->ic_caps & IEEE80211_C_TDMA) == 0
 #else
 	    (1)
 #endif
 	) {
 		ic_printf(ic, "TDMA not supported\n");
 		return EOPNOTSUPP;
 	}
 	vap = ic->ic_vap_create(ic, wlanname, unit,
 			cp.icp_opmode, cp.icp_flags, cp.icp_bssid,
 			cp.icp_flags & IEEE80211_CLONE_MACADDR ?
 			    cp.icp_macaddr : ic->ic_macaddr);
 
 	return (vap == NULL ? EIO : 0);
 }
 
 static void
 wlan_clone_destroy(struct ifnet *ifp)
 {
 	struct ieee80211vap *vap = ifp->if_softc;
 	struct ieee80211com *ic = vap->iv_ic;
 
 	ic->ic_vap_delete(vap);
 }
 
 void
 ieee80211_vap_destroy(struct ieee80211vap *vap)
 {
 	CURVNET_SET(vap->iv_ifp->if_vnet);
 	if_clone_destroyif(wlan_cloner, vap->iv_ifp);
 	CURVNET_RESTORE();
 }
 
 int
 ieee80211_sysctl_msecs_ticks(SYSCTL_HANDLER_ARGS)
 {
 	int msecs = ticks_to_msecs(*(int *)arg1);
 	int error, t;
 
 	error = sysctl_handle_int(oidp, &msecs, 0, req);
 	if (error || !req->newptr)
 		return error;
 	t = msecs_to_ticks(msecs);
 	*(int *)arg1 = (t < 1) ? 1 : t;
 	return 0;
 }
 
 static int
 ieee80211_sysctl_inact(SYSCTL_HANDLER_ARGS)
 {
 	int inact = (*(int *)arg1) * IEEE80211_INACT_WAIT;
 	int error;
 
 	error = sysctl_handle_int(oidp, &inact, 0, req);
 	if (error || !req->newptr)
 		return error;
 	*(int *)arg1 = inact / IEEE80211_INACT_WAIT;
 	return 0;
 }
 
 static int
 ieee80211_sysctl_parent(SYSCTL_HANDLER_ARGS)
 {
 	struct ieee80211com *ic = arg1;
 
 	return SYSCTL_OUT_STR(req, ic->ic_name);
 }
 
 static int
 ieee80211_sysctl_radar(SYSCTL_HANDLER_ARGS)
 {
 	struct ieee80211com *ic = arg1;
 	int t = 0, error;
 
 	error = sysctl_handle_int(oidp, &t, 0, req);
 	if (error || !req->newptr)
 		return error;
 	IEEE80211_LOCK(ic);
 	ieee80211_dfs_notify_radar(ic, ic->ic_curchan);
 	IEEE80211_UNLOCK(ic);
 	return 0;
 }
 
 void
 ieee80211_sysctl_attach(struct ieee80211com *ic)
 {
 }
 
 void
 ieee80211_sysctl_detach(struct ieee80211com *ic)
 {
 }
 
 void
 ieee80211_sysctl_vattach(struct ieee80211vap *vap)
 {
 	struct ifnet *ifp = vap->iv_ifp;
 	struct sysctl_ctx_list *ctx;
 	struct sysctl_oid *oid;
 	char num[14];			/* sufficient for 32 bits */
 
 	ctx = (struct sysctl_ctx_list *) IEEE80211_MALLOC(sizeof(struct sysctl_ctx_list),
 		M_DEVBUF, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO);
 	if (ctx == NULL) {
 		if_printf(ifp, "%s: cannot allocate sysctl context!\n",
 			__func__);
 		return;
 	}
 	sysctl_ctx_init(ctx);
 	snprintf(num, sizeof(num), "%u", ifp->if_dunit);
 	oid = SYSCTL_ADD_NODE(ctx, &SYSCTL_NODE_CHILDREN(_net, wlan),
 		OID_AUTO, num, CTLFLAG_RD, NULL, "");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 		"%parent", CTLTYPE_STRING | CTLFLAG_RD, vap->iv_ic, 0,
 		ieee80211_sysctl_parent, "A", "parent device");
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 		"driver_caps", CTLFLAG_RW, &vap->iv_caps, 0,
 		"driver capabilities");
 #ifdef IEEE80211_DEBUG
 	vap->iv_debug = ieee80211_debug;
 	SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 		"debug", CTLFLAG_RW, &vap->iv_debug, 0,
 		"control debugging printfs");
 #endif
 	SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 		"bmiss_max", CTLFLAG_RW, &vap->iv_bmiss_max, 0,
 		"consecutive beacon misses before scanning");
 	/* XXX inherit from tunables */
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 		"inact_run", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_run, 0,
 		ieee80211_sysctl_inact, "I",
 		"station inactivity timeout (sec)");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 		"inact_probe", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_probe, 0,
 		ieee80211_sysctl_inact, "I",
 		"station inactivity probe timeout (sec)");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 		"inact_auth", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_auth, 0,
 		ieee80211_sysctl_inact, "I",
 		"station authentication timeout (sec)");
 	SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 		"inact_init", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_init, 0,
 		ieee80211_sysctl_inact, "I",
 		"station initial state timeout (sec)");
 	if (vap->iv_htcaps & IEEE80211_HTC_HT) {
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 			"ampdu_mintraffic_bk", CTLFLAG_RW,
 			&vap->iv_ampdu_mintraffic[WME_AC_BK], 0,
 			"BK traffic tx aggr threshold (pps)");
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 			"ampdu_mintraffic_be", CTLFLAG_RW,
 			&vap->iv_ampdu_mintraffic[WME_AC_BE], 0,
 			"BE traffic tx aggr threshold (pps)");
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 			"ampdu_mintraffic_vo", CTLFLAG_RW,
 			&vap->iv_ampdu_mintraffic[WME_AC_VO], 0,
 			"VO traffic tx aggr threshold (pps)");
 		SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 			"ampdu_mintraffic_vi", CTLFLAG_RW,
 			&vap->iv_ampdu_mintraffic[WME_AC_VI], 0,
 			"VI traffic tx aggr threshold (pps)");
 	}
 	if (vap->iv_caps & IEEE80211_C_DFS) {
 		SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
 			"radar", CTLTYPE_INT | CTLFLAG_RW, vap->iv_ic, 0,
 			ieee80211_sysctl_radar, "I", "simulate radar event");
 	}
 	vap->iv_sysctl = ctx;
 	vap->iv_oid = oid;
 }
 
 void
 ieee80211_sysctl_vdetach(struct ieee80211vap *vap)
 {
 
 	if (vap->iv_sysctl != NULL) {
 		sysctl_ctx_free(vap->iv_sysctl);
 		IEEE80211_FREE(vap->iv_sysctl, M_DEVBUF);
 		vap->iv_sysctl = NULL;
 	}
 }
 
 int
 ieee80211_node_dectestref(struct ieee80211_node *ni)
 {
 	/* XXX need equivalent of atomic_dec_and_test */
 	atomic_subtract_int(&ni->ni_refcnt, 1);
 	return atomic_cmpset_int(&ni->ni_refcnt, 0, 1);
 }
 
 void
 ieee80211_drain_ifq(struct ifqueue *ifq)
 {
 	struct ieee80211_node *ni;
 	struct mbuf *m;
 
 	for (;;) {
 		IF_DEQUEUE(ifq, m);
 		if (m == NULL)
 			break;
 
 		ni = (struct ieee80211_node *)m->m_pkthdr.rcvif;
 		KASSERT(ni != NULL, ("frame w/o node"));
 		ieee80211_free_node(ni);
 		m->m_pkthdr.rcvif = NULL;
 
 		m_freem(m);
 	}
 }
 
 void
 ieee80211_flush_ifq(struct ifqueue *ifq, struct ieee80211vap *vap)
 {
 	struct ieee80211_node *ni;
 	struct mbuf *m, **mprev;
 
 	IF_LOCK(ifq);
 	mprev = &ifq->ifq_head;
 	while ((m = *mprev) != NULL) {
 		ni = (struct ieee80211_node *)m->m_pkthdr.rcvif;
 		if (ni != NULL && ni->ni_vap == vap) {
 			*mprev = m->m_nextpkt;		/* remove from list */
 			ifq->ifq_len--;
 
 			m_freem(m);
 			ieee80211_free_node(ni);	/* reclaim ref */
 		} else
 			mprev = &m->m_nextpkt;
 	}
 	/* recalculate tail ptr */
 	m = ifq->ifq_head;
 	for (; m != NULL && m->m_nextpkt != NULL; m = m->m_nextpkt)
 		;
 	ifq->ifq_tail = m;
 	IF_UNLOCK(ifq);
 }
 
 /*
  * As above, for mbufs allocated with m_gethdr/MGETHDR
  * or initialized by M_COPY_PKTHDR.
  */
 #define	MC_ALIGN(m, len)						\
 do {									\
 	(m)->m_data += (MCLBYTES - (len)) &~ (sizeof(long) - 1);	\
 } while (/* CONSTCOND */ 0)
 
 /*
  * Allocate and setup a management frame of the specified
  * size.  We return the mbuf and a pointer to the start
  * of the contiguous data area that's been reserved based
  * on the packet length.  The data area is forced to 32-bit
  * alignment and the buffer length to a multiple of 4 bytes.
  * This is done mainly so beacon frames (that require this)
  * can use this interface too.
  */
 struct mbuf *
 ieee80211_getmgtframe(uint8_t **frm, int headroom, int pktlen)
 {
 	struct mbuf *m;
 	u_int len;
 
 	/*
 	 * NB: we know the mbuf routines will align the data area
 	 *     so we don't need to do anything special.
 	 */
 	len = roundup2(headroom + pktlen, 4);
 	KASSERT(len <= MCLBYTES, ("802.11 mgt frame too large: %u", len));
 	if (len < MINCLSIZE) {
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		/*
 		 * Align the data in case additional headers are added.
 		 * This should only happen when a WEP header is added
 		 * which only happens for shared key authentication mgt
 		 * frames which all fit in MHLEN.
 		 */
 		if (m != NULL)
 			M_ALIGN(m, len);
 	} else {
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m != NULL)
 			MC_ALIGN(m, len);
 	}
 	if (m != NULL) {
 		m->m_data += headroom;
 		*frm = m->m_data;
 	}
 	return m;
 }
 
 #ifndef __NO_STRICT_ALIGNMENT
 /*
  * Re-align the payload in the mbuf.  This is mainly used (right now)
  * to handle IP header alignment requirements on certain architectures.
  */
 struct mbuf *
 ieee80211_realign(struct ieee80211vap *vap, struct mbuf *m, size_t align)
 {
 	int pktlen, space;
 	struct mbuf *n;
 
 	pktlen = m->m_pkthdr.len;
 	space = pktlen + align;
 	if (space < MINCLSIZE)
 		n = m_gethdr(M_NOWAIT, MT_DATA);
 	else {
 		n = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
 		    space <= MCLBYTES ?     MCLBYTES :
 #if MJUMPAGESIZE != MCLBYTES
 		    space <= MJUMPAGESIZE ? MJUMPAGESIZE :
 #endif
 		    space <= MJUM9BYTES ?   MJUM9BYTES : MJUM16BYTES);
 	}
 	if (__predict_true(n != NULL)) {
 		m_move_pkthdr(n, m);
 		n->m_data = (caddr_t)(ALIGN(n->m_data + align) - align);
 		m_copydata(m, 0, pktlen, mtod(n, caddr_t));
 		n->m_len = pktlen;
 	} else {
 		IEEE80211_DISCARD(vap, IEEE80211_MSG_ANY,
 		    mtod(m, const struct ieee80211_frame *), NULL,
 		    "%s", "no mbuf to realign");
 		vap->iv_stats.is_rx_badalign++;
 	}
 	m_freem(m);
 	return n;
 }
 #endif /* !__NO_STRICT_ALIGNMENT */
 
 int
 ieee80211_add_callback(struct mbuf *m,
 	void (*func)(struct ieee80211_node *, void *, int), void *arg)
 {
 	struct m_tag *mtag;
 	struct ieee80211_cb *cb;
 
 	mtag = m_tag_alloc(MTAG_ABI_NET80211, NET80211_TAG_CALLBACK,
 			sizeof(struct ieee80211_cb), M_NOWAIT);
 	if (mtag == NULL)
 		return 0;
 
 	cb = (struct ieee80211_cb *)(mtag+1);
 	cb->func = func;
 	cb->arg = arg;
 	m_tag_prepend(m, mtag);
 	m->m_flags |= M_TXCB;
 	return 1;
 }
 
 int
 ieee80211_add_xmit_params(struct mbuf *m,
     const struct ieee80211_bpf_params *params)
 {
 	struct m_tag *mtag;
 	struct ieee80211_tx_params *tx;
 
 	mtag = m_tag_alloc(MTAG_ABI_NET80211, NET80211_TAG_XMIT_PARAMS,
 	    sizeof(struct ieee80211_tx_params), M_NOWAIT);
 	if (mtag == NULL)
 		return (0);
 
 	tx = (struct ieee80211_tx_params *)(mtag+1);
 	memcpy(&tx->params, params, sizeof(struct ieee80211_bpf_params));
 	m_tag_prepend(m, mtag);
 	return (1);
 }
 
 int
 ieee80211_get_xmit_params(struct mbuf *m,
     struct ieee80211_bpf_params *params)
 {
 	struct m_tag *mtag;
 	struct ieee80211_tx_params *tx;
 
 	mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_XMIT_PARAMS,
 	    NULL);
 	if (mtag == NULL)
 		return (-1);
 	tx = (struct ieee80211_tx_params *)(mtag + 1);
 	memcpy(params, &tx->params, sizeof(struct ieee80211_bpf_params));
 	return (0);
 }
 
 void
 ieee80211_process_callback(struct ieee80211_node *ni,
 	struct mbuf *m, int status)
 {
 	struct m_tag *mtag;
 
 	mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_CALLBACK, NULL);
 	if (mtag != NULL) {
 		struct ieee80211_cb *cb = (struct ieee80211_cb *)(mtag+1);
 		cb->func(ni, cb->arg, status);
 	}
 }
 
 /*
  * Add RX parameters to the given mbuf.
  *
  * Returns 1 if OK, 0 on error.
  */
 int
 ieee80211_add_rx_params(struct mbuf *m, const struct ieee80211_rx_stats *rxs)
 {
 	struct m_tag *mtag;
 	struct ieee80211_rx_params *rx;
 
 	mtag = m_tag_alloc(MTAG_ABI_NET80211, NET80211_TAG_RECV_PARAMS,
 	    sizeof(struct ieee80211_rx_stats), M_NOWAIT);
 	if (mtag == NULL)
 		return (0);
 
 	rx = (struct ieee80211_rx_params *)(mtag + 1);
 	memcpy(&rx->params, rxs, sizeof(*rxs));
 	m_tag_prepend(m, mtag);
 	return (1);
 }
 
 int
 ieee80211_get_rx_params(struct mbuf *m, struct ieee80211_rx_stats *rxs)
 {
 	struct m_tag *mtag;
 	struct ieee80211_rx_params *rx;
 
 	mtag = m_tag_locate(m, MTAG_ABI_NET80211, NET80211_TAG_RECV_PARAMS,
 	    NULL);
 	if (mtag == NULL)
 		return (-1);
 	rx = (struct ieee80211_rx_params *)(mtag + 1);
 	memcpy(rxs, &rx->params, sizeof(*rxs));
 	return (0);
 }
 
 /*
  * Transmit a frame to the parent interface.
  */
 int
 ieee80211_parent_xmitpkt(struct ieee80211com *ic, struct mbuf *m)
 {
 	int error;
 
 	/*
 	 * Assert the IC TX lock is held - this enforces the
 	 * processing -> queuing order is maintained
 	 */
 	IEEE80211_TX_LOCK_ASSERT(ic);
 	error = ic->ic_transmit(ic, m);
 	if (error) {
 		struct ieee80211_node *ni;
 
 		ni = (struct ieee80211_node *)m->m_pkthdr.rcvif;
 
 		/* XXX number of fragments */
 		if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1);
 		ieee80211_free_node(ni);
 		ieee80211_free_mbuf(m);
 	}
 	return (error);
 }
 
 /*
  * Transmit a frame to the VAP interface.
  */
 int
 ieee80211_vap_xmitpkt(struct ieee80211vap *vap, struct mbuf *m)
 {
 	struct ifnet *ifp = vap->iv_ifp;
 
 	/*
 	 * When transmitting via the VAP, we shouldn't hold
 	 * any IC TX lock as the VAP TX path will acquire it.
 	 */
 	IEEE80211_TX_UNLOCK_ASSERT(vap->iv_ic);
 
 	return (ifp->if_transmit(ifp, m));
 
 }
 
 #include <sys/libkern.h>
 
 void
 get_random_bytes(void *p, size_t n)
 {
 	uint8_t *dp = p;
 
 	while (n > 0) {
 		uint32_t v = arc4random();
 		size_t nb = n > sizeof(uint32_t) ? sizeof(uint32_t) : n;
 		bcopy(&v, dp, n > sizeof(uint32_t) ? sizeof(uint32_t) : n);
 		dp += sizeof(uint32_t), n -= nb;
 	}
 }
 
 /*
  * Helper function for events that pass just a single mac address.
  */
 static void
 notify_macaddr(struct ifnet *ifp, int op, const uint8_t mac[IEEE80211_ADDR_LEN])
 {
 	struct ieee80211_join_event iev;
 
 	CURVNET_SET(ifp->if_vnet);
 	memset(&iev, 0, sizeof(iev));
 	IEEE80211_ADDR_COPY(iev.iev_addr, mac);
 	rt_ieee80211msg(ifp, op, &iev, sizeof(iev));
 	CURVNET_RESTORE();
 }
 
 void
 ieee80211_notify_node_join(struct ieee80211_node *ni, int newassoc)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ifnet *ifp = vap->iv_ifp;
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%snode join",
 	    (ni == vap->iv_bss) ? "bss " : "");
 
 	if (ni == vap->iv_bss) {
 		notify_macaddr(ifp, newassoc ?
 		    RTM_IEEE80211_ASSOC : RTM_IEEE80211_REASSOC, ni->ni_bssid);
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else {
 		notify_macaddr(ifp, newassoc ?
 		    RTM_IEEE80211_JOIN : RTM_IEEE80211_REJOIN, ni->ni_macaddr);
 	}
 	CURVNET_RESTORE();
 }
 
 void
 ieee80211_notify_node_leave(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ifnet *ifp = vap->iv_ifp;
 
 	CURVNET_SET_QUIET(ifp->if_vnet);
 	IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%snode leave",
 	    (ni == vap->iv_bss) ? "bss " : "");
 
 	if (ni == vap->iv_bss) {
 		rt_ieee80211msg(ifp, RTM_IEEE80211_DISASSOC, NULL, 0);
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 	} else {
 		/* fire off wireless event station leaving */
 		notify_macaddr(ifp, RTM_IEEE80211_LEAVE, ni->ni_macaddr);
 	}
 	CURVNET_RESTORE();
 }
 
 void
 ieee80211_notify_scan_done(struct ieee80211vap *vap)
 {
 	struct ifnet *ifp = vap->iv_ifp;
 
 	IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s\n", "notify scan done");
 
 	/* dispatch wireless event indicating scan completed */
 	CURVNET_SET(ifp->if_vnet);
 	rt_ieee80211msg(ifp, RTM_IEEE80211_SCAN, NULL, 0);
 	CURVNET_RESTORE();
 }
 
 void
 ieee80211_notify_replay_failure(struct ieee80211vap *vap,
 	const struct ieee80211_frame *wh, const struct ieee80211_key *k,
 	u_int64_t rsc, int tid)
 {
 	struct ifnet *ifp = vap->iv_ifp;
 
 	IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_CRYPTO, wh->i_addr2,
 	    "%s replay detected tid %d <rsc %ju, csc %ju, keyix %u rxkeyix %u>",
 	    k->wk_cipher->ic_name, tid, (intmax_t) rsc,
 	    (intmax_t) k->wk_keyrsc[tid],
 	    k->wk_keyix, k->wk_rxkeyix);
 
 	if (ifp != NULL) {		/* NB: for cipher test modules */
 		struct ieee80211_replay_event iev;
 
 		IEEE80211_ADDR_COPY(iev.iev_dst, wh->i_addr1);
 		IEEE80211_ADDR_COPY(iev.iev_src, wh->i_addr2);
 		iev.iev_cipher = k->wk_cipher->ic_cipher;
 		if (k->wk_rxkeyix != IEEE80211_KEYIX_NONE)
 			iev.iev_keyix = k->wk_rxkeyix;
 		else
 			iev.iev_keyix = k->wk_keyix;
 		iev.iev_keyrsc = k->wk_keyrsc[tid];
 		iev.iev_rsc = rsc;
 		CURVNET_SET(ifp->if_vnet);
 		rt_ieee80211msg(ifp, RTM_IEEE80211_REPLAY, &iev, sizeof(iev));
 		CURVNET_RESTORE();
 	}
 }
 
 void
 ieee80211_notify_michael_failure(struct ieee80211vap *vap,
 	const struct ieee80211_frame *wh, u_int keyix)
 {
 	struct ifnet *ifp = vap->iv_ifp;
 
 	IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_CRYPTO, wh->i_addr2,
 	    "michael MIC verification failed <keyix %u>", keyix);
 	vap->iv_stats.is_rx_tkipmic++;
 
 	if (ifp != NULL) {		/* NB: for cipher test modules */
 		struct ieee80211_michael_event iev;
 
 		IEEE80211_ADDR_COPY(iev.iev_dst, wh->i_addr1);
 		IEEE80211_ADDR_COPY(iev.iev_src, wh->i_addr2);
 		iev.iev_cipher = IEEE80211_CIPHER_TKIP;
 		iev.iev_keyix = keyix;
 		CURVNET_SET(ifp->if_vnet);
 		rt_ieee80211msg(ifp, RTM_IEEE80211_MICHAEL, &iev, sizeof(iev));
 		CURVNET_RESTORE();
 	}
 }
 
 void
 ieee80211_notify_wds_discover(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ifnet *ifp = vap->iv_ifp;
 
 	notify_macaddr(ifp, RTM_IEEE80211_WDS, ni->ni_macaddr);
 }
 
 void
 ieee80211_notify_csa(struct ieee80211com *ic,
 	const struct ieee80211_channel *c, int mode, int count)
 {
 	struct ieee80211_csa_event iev;
 	struct ieee80211vap *vap;
 	struct ifnet *ifp;
 
 	memset(&iev, 0, sizeof(iev));
 	iev.iev_flags = c->ic_flags;
 	iev.iev_freq = c->ic_freq;
 	iev.iev_ieee = c->ic_ieee;
 	iev.iev_mode = mode;
 	iev.iev_count = count;
 	TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 		ifp = vap->iv_ifp;
 		CURVNET_SET(ifp->if_vnet);
 		rt_ieee80211msg(ifp, RTM_IEEE80211_CSA, &iev, sizeof(iev));
 		CURVNET_RESTORE();
 	}
 }
 
 void
 ieee80211_notify_radar(struct ieee80211com *ic,
 	const struct ieee80211_channel *c)
 {
 	struct ieee80211_radar_event iev;
 	struct ieee80211vap *vap;
 	struct ifnet *ifp;
 
 	memset(&iev, 0, sizeof(iev));
 	iev.iev_flags = c->ic_flags;
 	iev.iev_freq = c->ic_freq;
 	iev.iev_ieee = c->ic_ieee;
 	TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 		ifp = vap->iv_ifp;
 		CURVNET_SET(ifp->if_vnet);
 		rt_ieee80211msg(ifp, RTM_IEEE80211_RADAR, &iev, sizeof(iev));
 		CURVNET_RESTORE();
 	}
 }
 
 void
 ieee80211_notify_cac(struct ieee80211com *ic,
 	const struct ieee80211_channel *c, enum ieee80211_notify_cac_event type)
 {
 	struct ieee80211_cac_event iev;
 	struct ieee80211vap *vap;
 	struct ifnet *ifp;
 
 	memset(&iev, 0, sizeof(iev));
 	iev.iev_flags = c->ic_flags;
 	iev.iev_freq = c->ic_freq;
 	iev.iev_ieee = c->ic_ieee;
 	iev.iev_type = type;
 	TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 		ifp = vap->iv_ifp;
 		CURVNET_SET(ifp->if_vnet);
 		rt_ieee80211msg(ifp, RTM_IEEE80211_CAC, &iev, sizeof(iev));
 		CURVNET_RESTORE();
 	}
 }
 
 void
 ieee80211_notify_node_deauth(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ifnet *ifp = vap->iv_ifp;
 
 	IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%s", "node deauth");
 
 	notify_macaddr(ifp, RTM_IEEE80211_DEAUTH, ni->ni_macaddr);
 }
 
 void
 ieee80211_notify_node_auth(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 	struct ifnet *ifp = vap->iv_ifp;
 
 	IEEE80211_NOTE(vap, IEEE80211_MSG_NODE, ni, "%s", "node auth");
 
 	notify_macaddr(ifp, RTM_IEEE80211_AUTH, ni->ni_macaddr);
 }
 
 void
 ieee80211_notify_country(struct ieee80211vap *vap,
 	const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t cc[2])
 {
 	struct ifnet *ifp = vap->iv_ifp;
 	struct ieee80211_country_event iev;
 
 	memset(&iev, 0, sizeof(iev));
 	IEEE80211_ADDR_COPY(iev.iev_addr, bssid);
 	iev.iev_cc[0] = cc[0];
 	iev.iev_cc[1] = cc[1];
 	CURVNET_SET(ifp->if_vnet);
 	rt_ieee80211msg(ifp, RTM_IEEE80211_COUNTRY, &iev, sizeof(iev));
 	CURVNET_RESTORE();
 }
 
 void
 ieee80211_notify_radio(struct ieee80211com *ic, int state)
 {
 	struct ieee80211_radio_event iev;
 	struct ieee80211vap *vap;
 	struct ifnet *ifp;
 
 	memset(&iev, 0, sizeof(iev));
 	iev.iev_state = state;
 	TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) {
 		ifp = vap->iv_ifp;
 		CURVNET_SET(ifp->if_vnet);
 		rt_ieee80211msg(ifp, RTM_IEEE80211_RADIO, &iev, sizeof(iev));
 		CURVNET_RESTORE();
 	}
 }
 
 void
 ieee80211_load_module(const char *modname)
 {
 
 #ifdef notyet
 	(void)kern_kldload(curthread, modname, NULL);
 #else
 	printf("%s: load the %s module by hand for now.\n", __func__, modname);
 #endif
 }
 
 static eventhandler_tag wlan_bpfevent;
 
 static void
 bpf_track(void *arg, struct ifnet *ifp, int dlt, int attach)
 {
 	/* NB: identify vap's by if_init */
 	if (dlt == DLT_IEEE802_11_RADIO &&
 	    ifp->if_init == ieee80211_init) {
 		struct ieee80211vap *vap = ifp->if_softc;
 		/*
 		 * Track bpf radiotap listener state.  We mark the vap
 		 * to indicate if any listener is present and the com
 		 * to indicate if any listener exists on any associated
 		 * vap.  This flag is used by drivers to prepare radiotap
 		 * state only when needed.
 		 */
 		if (attach) {
 			ieee80211_syncflag_ext(vap, IEEE80211_FEXT_BPF);
 			if (vap->iv_opmode == IEEE80211_M_MONITOR)
 				atomic_add_int(&vap->iv_ic->ic_montaps, 1);
 		} else if (!bpf_peers_present(vap->iv_rawbpf)) {
 			ieee80211_syncflag_ext(vap, -IEEE80211_FEXT_BPF);
 			if (vap->iv_opmode == IEEE80211_M_MONITOR)
 				atomic_subtract_int(&vap->iv_ic->ic_montaps, 1);
 		}
 	}
 }
 
 /*
  * Module glue.
  *
  * NB: the module name is "wlan" for compatibility with NetBSD.
  */
 static int
 wlan_modevent(module_t mod, int type, void *unused)
 {
 	switch (type) {
 	case MOD_LOAD:
 		if (bootverbose)
 			printf("wlan: <802.11 Link Layer>\n");
 		wlan_bpfevent = EVENTHANDLER_REGISTER(bpf_track,
 		    bpf_track, 0, EVENTHANDLER_PRI_ANY);
 		wlan_cloner = if_clone_simple(wlanname, wlan_clone_create,
 		    wlan_clone_destroy, 0);
 		return 0;
 	case MOD_UNLOAD:
 		if_clone_detach(wlan_cloner);
 		EVENTHANDLER_DEREGISTER(bpf_track, wlan_bpfevent);
 		return 0;
 	}
 	return EINVAL;
 }
 
 static moduledata_t wlan_mod = {
 	wlanname,
 	wlan_modevent,
 	0
 };
 DECLARE_MODULE(wlan, wlan_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
 MODULE_VERSION(wlan, 1);
 MODULE_DEPEND(wlan, ether, 1, 1, 1);
 #ifdef	IEEE80211_ALQ
 MODULE_DEPEND(wlan, alq, 1, 1, 1);
 #endif	/* IEEE80211_ALQ */
 
Index: projects/clang380-import/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
===================================================================
--- projects/clang380-import/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c	(revision 293686)
+++ projects/clang380-import/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c	(revision 293687)
@@ -1,3582 +1,3582 @@
 /*
  * ng_btsocket_rfcomm.c
  */
 
 /*-
  * Copyright (c) 2001-2003 Maksim Yevmenkin <m_evmenkin@yahoo.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $Id: ng_btsocket_rfcomm.c,v 1.28 2003/09/14 23:29:06 max Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/domain.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/filedesc.h>
 #include <sys/ioccom.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 
 #include <net/vnet.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/bluetooth/include/ng_bluetooth.h>
 #include <netgraph/bluetooth/include/ng_hci.h>
 #include <netgraph/bluetooth/include/ng_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket.h>
 #include <netgraph/bluetooth/include/ng_btsocket_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket_rfcomm.h>
 
 /* MALLOC define */
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_RFCOMM, "netgraph_btsocks_rfcomm",
 		"Netgraph Bluetooth RFCOMM sockets");
 #else
 #define M_NETGRAPH_BTSOCKET_RFCOMM M_NETGRAPH
 #endif /* NG_SEPARATE_MALLOC */
 
 /* Debug */
 #define NG_BTSOCKET_RFCOMM_INFO \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_INFO_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_RFCOMM_WARN \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_WARN_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_RFCOMM_ERR \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_ERR_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_RFCOMM_ALERT \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define	ALOT	0x7fff
 
 /* Local prototypes */
 static int ng_btsocket_rfcomm_upcall
 	(struct socket *so, void *arg, int waitflag);
 static void ng_btsocket_rfcomm_sessions_task
 	(void *ctx, int pending);
 static void ng_btsocket_rfcomm_session_task
 	(ng_btsocket_rfcomm_session_p s);
 #define ng_btsocket_rfcomm_task_wakeup() \
 	taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_rfcomm_task)
 
 static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_connect_ind
 	(ng_btsocket_rfcomm_session_p s, int channel);
 static void ng_btsocket_rfcomm_connect_cfm
 	(ng_btsocket_rfcomm_session_p s);
 
 static int ng_btsocket_rfcomm_session_create
 	(ng_btsocket_rfcomm_session_p *sp, struct socket *l2so,
 	 bdaddr_p src, bdaddr_p dst, struct thread *td);
 static int ng_btsocket_rfcomm_session_accept
 	(ng_btsocket_rfcomm_session_p s0);
 static int ng_btsocket_rfcomm_session_connect
 	(ng_btsocket_rfcomm_session_p s);
 static int ng_btsocket_rfcomm_session_receive
 	(ng_btsocket_rfcomm_session_p s);
 static int ng_btsocket_rfcomm_session_send
 	(ng_btsocket_rfcomm_session_p s);
 static void ng_btsocket_rfcomm_session_clean
 	(ng_btsocket_rfcomm_session_p s);
 static void ng_btsocket_rfcomm_session_process_pcb
 	(ng_btsocket_rfcomm_session_p s);
 static ng_btsocket_rfcomm_session_p ng_btsocket_rfcomm_session_by_addr
 	(bdaddr_p src, bdaddr_p dst);
 
 static int ng_btsocket_rfcomm_receive_frame
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_sabm
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_disc
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_ua
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_dm
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_uih
 	(ng_btsocket_rfcomm_session_p s, int dlci, int pf, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_mcc
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_test
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_fc
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_msc
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_rpn
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_rls
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_pn
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static void ng_btsocket_rfcomm_set_pn
 	(ng_btsocket_rfcomm_pcb_p pcb, u_int8_t cr, u_int8_t flow_control, 
 	 u_int8_t credits, u_int16_t mtu);
 
 static int ng_btsocket_rfcomm_send_command
 	(ng_btsocket_rfcomm_session_p s, u_int8_t type, u_int8_t dlci);
 static int ng_btsocket_rfcomm_send_uih
 	(ng_btsocket_rfcomm_session_p s, u_int8_t address, u_int8_t pf, 
 	 u_int8_t credits, struct mbuf *data);
 static int ng_btsocket_rfcomm_send_msc
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static int ng_btsocket_rfcomm_send_pn
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static int ng_btsocket_rfcomm_send_credits
 	(ng_btsocket_rfcomm_pcb_p pcb);
 
 static int ng_btsocket_rfcomm_pcb_send
 	(ng_btsocket_rfcomm_pcb_p pcb, int limit);
 static void ng_btsocket_rfcomm_pcb_kill
 	(ng_btsocket_rfcomm_pcb_p pcb, int error);
 static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_by_dlci
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_listener
 	(bdaddr_p src, int channel);
 
 static void ng_btsocket_rfcomm_timeout
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static void ng_btsocket_rfcomm_untimeout
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static void ng_btsocket_rfcomm_process_timeout
 	(void *xpcb);
 
 static struct mbuf * ng_btsocket_rfcomm_prepare_packet
 	(struct sockbuf *sb, int length);
 
 /* Globals */
 extern int					ifqmaxlen;
 static u_int32_t				ng_btsocket_rfcomm_debug_level;
 static u_int32_t				ng_btsocket_rfcomm_timo;
 struct task					ng_btsocket_rfcomm_task;
 static LIST_HEAD(, ng_btsocket_rfcomm_session)	ng_btsocket_rfcomm_sessions;
 static struct mtx				ng_btsocket_rfcomm_sessions_mtx;
 static LIST_HEAD(, ng_btsocket_rfcomm_pcb)	ng_btsocket_rfcomm_sockets;
 static struct mtx				ng_btsocket_rfcomm_sockets_mtx;
 static struct timeval				ng_btsocket_rfcomm_lasttime;
 static int					ng_btsocket_rfcomm_curpps;
 
 /* Sysctl tree */
 SYSCTL_DECL(_net_bluetooth_rfcomm_sockets);
 static SYSCTL_NODE(_net_bluetooth_rfcomm_sockets, OID_AUTO, stream, CTLFLAG_RW,
 	0, "Bluetooth STREAM RFCOMM sockets family");
 SYSCTL_UINT(_net_bluetooth_rfcomm_sockets_stream, OID_AUTO, debug_level,
 	CTLFLAG_RW,
 	&ng_btsocket_rfcomm_debug_level, NG_BTSOCKET_INFO_LEVEL,
 	"Bluetooth STREAM RFCOMM sockets debug level");
 SYSCTL_UINT(_net_bluetooth_rfcomm_sockets_stream, OID_AUTO, timeout,
 	CTLFLAG_RW,
 	&ng_btsocket_rfcomm_timo, 60,
 	"Bluetooth STREAM RFCOMM sockets timeout");
 
 /*****************************************************************************
  *****************************************************************************
  **                              RFCOMM CRC
  *****************************************************************************
  *****************************************************************************/
 
 static u_int8_t	ng_btsocket_rfcomm_crc_table[256] = {
 	0x00, 0x91, 0xe3, 0x72, 0x07, 0x96, 0xe4, 0x75,
 	0x0e, 0x9f, 0xed, 0x7c, 0x09, 0x98, 0xea, 0x7b,
 	0x1c, 0x8d, 0xff, 0x6e, 0x1b, 0x8a, 0xf8, 0x69,
 	0x12, 0x83, 0xf1, 0x60, 0x15, 0x84, 0xf6, 0x67,
 
 	0x38, 0xa9, 0xdb, 0x4a, 0x3f, 0xae, 0xdc, 0x4d,
 	0x36, 0xa7, 0xd5, 0x44, 0x31, 0xa0, 0xd2, 0x43,
 	0x24, 0xb5, 0xc7, 0x56, 0x23, 0xb2, 0xc0, 0x51,
 	0x2a, 0xbb, 0xc9, 0x58, 0x2d, 0xbc, 0xce, 0x5f,
 
 	0x70, 0xe1, 0x93, 0x02, 0x77, 0xe6, 0x94, 0x05,
 	0x7e, 0xef, 0x9d, 0x0c, 0x79, 0xe8, 0x9a, 0x0b,
 	0x6c, 0xfd, 0x8f, 0x1e, 0x6b, 0xfa, 0x88, 0x19,
 	0x62, 0xf3, 0x81, 0x10, 0x65, 0xf4, 0x86, 0x17,
 
 	0x48, 0xd9, 0xab, 0x3a, 0x4f, 0xde, 0xac, 0x3d,
 	0x46, 0xd7, 0xa5, 0x34, 0x41, 0xd0, 0xa2, 0x33,
 	0x54, 0xc5, 0xb7, 0x26, 0x53, 0xc2, 0xb0, 0x21,
 	0x5a, 0xcb, 0xb9, 0x28, 0x5d, 0xcc, 0xbe, 0x2f,
 
 	0xe0, 0x71, 0x03, 0x92, 0xe7, 0x76, 0x04, 0x95,
 	0xee, 0x7f, 0x0d, 0x9c, 0xe9, 0x78, 0x0a, 0x9b,
 	0xfc, 0x6d, 0x1f, 0x8e, 0xfb, 0x6a, 0x18, 0x89,
 	0xf2, 0x63, 0x11, 0x80, 0xf5, 0x64, 0x16, 0x87,
 
 	0xd8, 0x49, 0x3b, 0xaa, 0xdf, 0x4e, 0x3c, 0xad,
 	0xd6, 0x47, 0x35, 0xa4, 0xd1, 0x40, 0x32, 0xa3,
 	0xc4, 0x55, 0x27, 0xb6, 0xc3, 0x52, 0x20, 0xb1,
 	0xca, 0x5b, 0x29, 0xb8, 0xcd, 0x5c, 0x2e, 0xbf,
 
 	0x90, 0x01, 0x73, 0xe2, 0x97, 0x06, 0x74, 0xe5,
 	0x9e, 0x0f, 0x7d, 0xec, 0x99, 0x08, 0x7a, 0xeb,
 	0x8c, 0x1d, 0x6f, 0xfe, 0x8b, 0x1a, 0x68, 0xf9,
 	0x82, 0x13, 0x61, 0xf0, 0x85, 0x14, 0x66, 0xf7,
 
 	0xa8, 0x39, 0x4b, 0xda, 0xaf, 0x3e, 0x4c, 0xdd,
 	0xa6, 0x37, 0x45, 0xd4, 0xa1, 0x30, 0x42, 0xd3,
 	0xb4, 0x25, 0x57, 0xc6, 0xb3, 0x22, 0x50, 0xc1,
 	0xba, 0x2b, 0x59, 0xc8, 0xbd, 0x2c, 0x5e, 0xcf
 };
 
 /* CRC */
 static u_int8_t
 ng_btsocket_rfcomm_crc(u_int8_t *data, int length)
 {
 	u_int8_t	crc = 0xff;
 
 	while (length --)
 		crc = ng_btsocket_rfcomm_crc_table[crc ^ *data++];
 
 	return (crc);
 } /* ng_btsocket_rfcomm_crc */
 
 /* FCS on 2 bytes */
 static u_int8_t
 ng_btsocket_rfcomm_fcs2(u_int8_t *data)
 {
 	return (0xff - ng_btsocket_rfcomm_crc(data, 2));
 } /* ng_btsocket_rfcomm_fcs2 */
   
 /* FCS on 3 bytes */
 static u_int8_t
 ng_btsocket_rfcomm_fcs3(u_int8_t *data)
 {
 	return (0xff - ng_btsocket_rfcomm_crc(data, 3));
 } /* ng_btsocket_rfcomm_fcs3 */
 
 /* 
  * Check FCS
  *
  * From Bluetooth spec
  *
  * "... In 07.10, the frame check sequence (FCS) is calculated on different 
  * sets of fields for different frame types. These are the fields that the 
  * FCS are calculated on:
  *
  * For SABM, DISC, UA, DM frames: on Address, Control and length field.
  * For UIH frames: on Address and Control field.
  *
  * (This is stated here for clarification, and to set the standard for RFCOMM;
  * the fields included in FCS calculation have actually changed in version
  * 7.0.0 of TS 07.10, but RFCOMM will not change the FCS calculation scheme
  * from the one above.) ..."
  */
 
 static int
 ng_btsocket_rfcomm_check_fcs(u_int8_t *data, int type, u_int8_t fcs)
 {
 	if (type != RFCOMM_FRAME_UIH)
 		return (ng_btsocket_rfcomm_fcs3(data) != fcs);
 
 	return (ng_btsocket_rfcomm_fcs2(data) != fcs);
 } /* ng_btsocket_rfcomm_check_fcs */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Socket interface
  *****************************************************************************
  *****************************************************************************/
 
 /* 
  * Initialize everything
  */
 
 void
 ng_btsocket_rfcomm_init(void)
 {
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	ng_btsocket_rfcomm_debug_level = NG_BTSOCKET_WARN_LEVEL;
 	ng_btsocket_rfcomm_timo = 60;
 
 	/* RFCOMM task */
 	TASK_INIT(&ng_btsocket_rfcomm_task, 0,
 		ng_btsocket_rfcomm_sessions_task, NULL);
 
 	/* RFCOMM sessions list */
 	LIST_INIT(&ng_btsocket_rfcomm_sessions);
 	mtx_init(&ng_btsocket_rfcomm_sessions_mtx,
 		"btsocks_rfcomm_sessions_mtx", NULL, MTX_DEF);
 
 	/* RFCOMM sockets list */
 	LIST_INIT(&ng_btsocket_rfcomm_sockets);
 	mtx_init(&ng_btsocket_rfcomm_sockets_mtx,
 		"btsocks_rfcomm_sockets_mtx", NULL, MTX_DEF);
 } /* ng_btsocket_rfcomm_init */
 
 /*
  * Abort connection on socket
  */
 
 void
 ng_btsocket_rfcomm_abort(struct socket *so)
 {
 
 	so->so_error = ECONNABORTED;
 	(void)ng_btsocket_rfcomm_disconnect(so);
 } /* ng_btsocket_rfcomm_abort */
 
 void
 ng_btsocket_rfcomm_close(struct socket *so)
 {
 
 	(void)ng_btsocket_rfcomm_disconnect(so);
 } /* ng_btsocket_rfcomm_close */
 
 /*
  * Accept connection on socket. Nothing to do here, socket must be connected
  * and ready, so just return peer address and be done with it.
  */
 
 int
 ng_btsocket_rfcomm_accept(struct socket *so, struct sockaddr **nam)
 {
 	return (ng_btsocket_rfcomm_peeraddr(so, nam));
 } /* ng_btsocket_rfcomm_accept */
 
 /*
  * Create and attach new socket
  */
 
 int
 ng_btsocket_rfcomm_attach(struct socket *so, int proto, struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 	int				error;
 
 	/* Check socket and protocol */
 	if (so->so_type != SOCK_STREAM)
 		return (ESOCKTNOSUPPORT);
 
 #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */
 	if (proto != 0) 
 		if (proto != BLUETOOTH_PROTO_RFCOMM)
 			return (EPROTONOSUPPORT);
 #endif /* XXX */
 
 	if (pcb != NULL)
 		return (EISCONN);
 
 	/* Reserve send and receive space if it is not reserved yet */
 	if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) {
 		error = soreserve(so, NG_BTSOCKET_RFCOMM_SENDSPACE,
 					NG_BTSOCKET_RFCOMM_RECVSPACE);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Allocate the PCB */
         pcb = malloc(sizeof(*pcb),
 		M_NETGRAPH_BTSOCKET_RFCOMM, M_NOWAIT | M_ZERO);
         if (pcb == NULL)
                 return (ENOMEM);
 
 	/* Link the PCB and the socket */
 	so->so_pcb = (caddr_t) pcb;
 	pcb->so = so;
 
 	/* Initialize PCB */
 	pcb->state = NG_BTSOCKET_RFCOMM_DLC_CLOSED;
 	pcb->flags = NG_BTSOCKET_RFCOMM_DLC_CFC;
 
 	pcb->lmodem =
 	pcb->rmodem = (RFCOMM_MODEM_RTC | RFCOMM_MODEM_RTR | RFCOMM_MODEM_DV);
 
 	pcb->mtu = RFCOMM_DEFAULT_MTU;
 	pcb->tx_cred = 0;
 	pcb->rx_cred = RFCOMM_DEFAULT_CREDITS;
 
 	mtx_init(&pcb->pcb_mtx, "btsocks_rfcomm_pcb_mtx", NULL, MTX_DEF);
 	callout_init_mtx(&pcb->timo, &pcb->pcb_mtx, 0);
 
 	/* Add the PCB to the list */
 	mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 	LIST_INSERT_HEAD(&ng_btsocket_rfcomm_sockets, pcb, next);
 	mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 
         return (0);
 } /* ng_btsocket_rfcomm_attach */
 
 /*
  * Bind socket
  */
 
 int
 ng_btsocket_rfcomm_bind(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_t	*pcb = so2rfcomm_pcb(so), *pcb1;
 	struct sockaddr_rfcomm		*sa = (struct sockaddr_rfcomm *) nam;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->rfcomm_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->rfcomm_len != sizeof(*sa))
 		return (EINVAL);
 	if (sa->rfcomm_channel > 30)
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (sa->rfcomm_channel != 0) {
 		mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 
 		LIST_FOREACH(pcb1, &ng_btsocket_rfcomm_sockets, next) {
 			if (pcb1->channel == sa->rfcomm_channel &&
 			    bcmp(&pcb1->src, &sa->rfcomm_bdaddr,
 					sizeof(pcb1->src)) == 0) {
 				mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 				mtx_unlock(&pcb->pcb_mtx);
 
 				return (EADDRINUSE);
 			}
 		}
 
 		mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 	}
 
 	bcopy(&sa->rfcomm_bdaddr, &pcb->src, sizeof(pcb->src));
 	pcb->channel = sa->rfcomm_channel;
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_rfcomm_bind */
 
 /*
  * Connect socket
  */
 
 int
 ng_btsocket_rfcomm_connect(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_t	*pcb = so2rfcomm_pcb(so);
 	struct sockaddr_rfcomm		*sa = (struct sockaddr_rfcomm *) nam;
 	ng_btsocket_rfcomm_session_t	*s = NULL;
 	struct socket			*l2so = NULL;
 	int				 dlci, error = 0;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->rfcomm_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->rfcomm_len != sizeof(*sa))
 		return (EINVAL);
 	if (sa->rfcomm_channel > 30)
 		return (EINVAL);
 	if (sa->rfcomm_channel == 0 ||
 	    bcmp(&sa->rfcomm_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 		return (EDESTADDRREQ);
 
 	/*
 	 * Note that we will not check for errors in socreate() because
 	 * if we failed to create L2CAP socket at this point we still
 	 * might have already open session.
 	 */
 
 	error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET,
 			BLUETOOTH_PROTO_L2CAP, td->td_ucred, td);
 
 	/* 
 	 * Look for session between "pcb->src" and "sa->rfcomm_bdaddr" (dst)
 	 */
 
 	mtx_lock(&ng_btsocket_rfcomm_sessions_mtx);
 
 	s = ng_btsocket_rfcomm_session_by_addr(&pcb->src, &sa->rfcomm_bdaddr);
 	if (s == NULL) {
 		/*
 		 * We need to create new RFCOMM session. Check if we have L2CAP
 		 * socket. If l2so == NULL then error has the error code from
 		 * socreate()
 		 */
 
 		if (l2so == NULL) {
 			mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 			return (error);
 		}
 
 		error = ng_btsocket_rfcomm_session_create(&s, l2so,
 				&pcb->src, &sa->rfcomm_bdaddr, td);
 		if (error != 0) {
 			mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 			soclose(l2so);
 
 			return (error);
 		}
 	} else if (l2so != NULL)
 		soclose(l2so); /* we don't need new L2CAP socket */
 
 	/*
 	 * Check if we already have the same DLCI the same session
 	 */
 
 	mtx_lock(&s->session_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	dlci = RFCOMM_MKDLCI(!INITIATOR(s), sa->rfcomm_channel);
 
 	if (ng_btsocket_rfcomm_pcb_by_dlci(s, dlci) != NULL) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&s->session_mtx);
 		mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 
 		return (EBUSY);
 	}
 
 	/*
 	 * Check session state and if its not acceptable then refuse connection
 	 */
 
 	switch (s->state) {
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING:
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 	case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 		/*
 		 * Update destination address and channel and attach 
 		 * DLC to the session
 		 */
 
 		bcopy(&sa->rfcomm_bdaddr, &pcb->dst, sizeof(pcb->dst));
 		pcb->channel = sa->rfcomm_channel;
 		pcb->dlci = dlci;
 
 		LIST_INSERT_HEAD(&s->dlcs, pcb, session_next);
 		pcb->session = s;
 
 		ng_btsocket_rfcomm_timeout(pcb);
 		soisconnecting(pcb->so);
 
 		if (s->state == NG_BTSOCKET_RFCOMM_SESSION_OPEN) {
 			pcb->mtu = s->mtu;
 			bcopy(&so2l2cap_pcb(s->l2so)->src, &pcb->src,
 				sizeof(pcb->src));
 
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONFIGURING;
 
 			error = ng_btsocket_rfcomm_send_pn(pcb);
 			if (error == 0)
 				error = ng_btsocket_rfcomm_task_wakeup();
 		} else
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT;
 		break;
 
 	default:
 		error = ECONNRESET;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&s->session_mtx);
 	mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 
 	return (error);
 } /* ng_btsocket_rfcomm_connect */
 
 /*
  * Process ioctl's calls on socket.
  * XXX FIXME this should provide interface to the RFCOMM multiplexor channel
  */
 
 int
 ng_btsocket_rfcomm_control(struct socket *so, u_long cmd, caddr_t data,
 		struct ifnet *ifp, struct thread *td)
 {
 	return (EINVAL);
 } /* ng_btsocket_rfcomm_control */
 
 /*
  * Process getsockopt/setsockopt system calls
  */
 
 int
 ng_btsocket_rfcomm_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	ng_btsocket_rfcomm_pcb_p		pcb = so2rfcomm_pcb(so);
 	struct ng_btsocket_rfcomm_fc_info	fcinfo;
 	int					error = 0;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (sopt->sopt_level != SOL_RFCOMM)
 		return (0);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case SO_RFCOMM_MTU:
 			error = sooptcopyout(sopt, &pcb->mtu, sizeof(pcb->mtu));
 			break;
 
 		case SO_RFCOMM_FC_INFO:
 			fcinfo.lmodem = pcb->lmodem;
 			fcinfo.rmodem = pcb->rmodem;
 			fcinfo.tx_cred = pcb->tx_cred;
 			fcinfo.rx_cred = pcb->rx_cred;
 			fcinfo.cfc = (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)?
 				1 : 0;
 			fcinfo.reserved = 0;
 
 			error = sooptcopyout(sopt, &fcinfo, sizeof(fcinfo));
 			break;
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (error);
 } /* ng_btsocket_rfcomm_ctloutput */
 
 /*
  * Detach and destroy socket
  */
 
 void
 ng_btsocket_rfcomm_detach(struct socket *so)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 
 	KASSERT(pcb != NULL, ("ng_btsocket_rfcomm_detach: pcb == NULL"));
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 	case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING:
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTED:
 		/* XXX What to do with pending request? */
 		if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 			ng_btsocket_rfcomm_untimeout(pcb);
 
 		if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT)
 			pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_DETACHED;
 		else
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING;
 
 		ng_btsocket_rfcomm_task_wakeup();
 		break;
 
 	case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 		ng_btsocket_rfcomm_task_wakeup();
 		break;
 	}
 	
 	while (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CLOSED)
 		msleep(&pcb->state, &pcb->pcb_mtx, PZERO, "rf_det", 0);
 
 	if (pcb->session != NULL)
 		panic("%s: pcb->session != NULL\n", __func__);
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 		panic("%s: timeout on closed DLC, flags=%#x\n",
 			__func__, pcb->flags);
 
 	mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 	LIST_REMOVE(pcb, next);
 	mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	mtx_destroy(&pcb->pcb_mtx);
 	bzero(pcb, sizeof(*pcb));
 	free(pcb, M_NETGRAPH_BTSOCKET_RFCOMM);
 
 	soisdisconnected(so);
 	so->so_pcb = NULL;
 } /* ng_btsocket_rfcomm_detach */
 
 /*
  * Disconnect socket
  */
 
 int
 ng_btsocket_rfcomm_disconnect(struct socket *so)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		return (EINPROGRESS);
 	}
 
 	/* XXX What to do with pending request? */
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 		ng_btsocket_rfcomm_untimeout(pcb);
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: /* XXX can we get here? */
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: /* XXX can we get here? */
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTED:
 
 		/*
 		 * Just change DLC state and enqueue RFCOMM task. It will
 		 * queue and send DISC on the DLC.
 		 */ 
 
 		pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING;
 		soisdisconnecting(so);
 
 		ng_btsocket_rfcomm_task_wakeup();
 		break;
 
 	case NG_BTSOCKET_RFCOMM_DLC_CLOSED:
 	case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 		break;
 
 	default:
 		panic("%s: Invalid DLC state=%d, flags=%#x\n",
 			__func__, pcb->state, pcb->flags);
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_rfcomm_disconnect */
 
 /*
  * Listen on socket. First call to listen() will create listening RFCOMM session
  */
 
 int
 ng_btsocket_rfcomm_listen(struct socket *so, int backlog, struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_p	 pcb = so2rfcomm_pcb(so), pcb1;
 	ng_btsocket_rfcomm_session_p	 s = NULL;
 	struct socket			*l2so = NULL;
 	int				 error, socreate_error, usedchannels;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (pcb->channel > 30)
 		return (EADDRNOTAVAIL);
 
 	usedchannels = 0;
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->channel == 0) {
 		mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 
 		LIST_FOREACH(pcb1, &ng_btsocket_rfcomm_sockets, next)
 			if (pcb1->channel != 0 &&
 			    bcmp(&pcb1->src, &pcb->src, sizeof(pcb->src)) == 0)
 				usedchannels |= (1 << (pcb1->channel - 1));
 
 		for (pcb->channel = 30; pcb->channel > 0; pcb->channel --)
 			if (!(usedchannels & (1 << (pcb->channel - 1))))
 				break;
 
 		if (pcb->channel == 0) {
 			mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 			mtx_unlock(&pcb->pcb_mtx);
 
 			return (EADDRNOTAVAIL);
 		}
 
 		mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	/*
 	 * Note that we will not check for errors in socreate() because
 	 * if we failed to create L2CAP socket at this point we still
 	 * might have already open session.
 	 */
 
 	socreate_error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET,
 			BLUETOOTH_PROTO_L2CAP, td->td_ucred, td);
 
 	/*
 	 * Transition the socket and session into the LISTENING state.  Check
 	 * for collisions first, as there can only be one.
 	 */
 	mtx_lock(&ng_btsocket_rfcomm_sessions_mtx);
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	SOCK_UNLOCK(so);
 	if (error != 0)
 		goto out;
 
 	LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next)
 		if (s->state == NG_BTSOCKET_RFCOMM_SESSION_LISTENING)
 			break;
 
 	if (s == NULL) {
 		/*
 		 * We need to create default RFCOMM session. Check if we have 
 		 * L2CAP socket. If l2so == NULL then error has the error code 
 		 * from socreate()
 		 */
 		if (l2so == NULL) {
 			error = socreate_error;
 			goto out;
 		}
 
 		/* 
 		 * Create default listen RFCOMM session. The default RFCOMM 
 		 * session will listen on ANY address.
 		 *
 		 * XXX FIXME Note that currently there is no way to adjust MTU
 		 * for the default session.
 		 */
 		error = ng_btsocket_rfcomm_session_create(&s, l2so,
 					NG_HCI_BDADDR_ANY, NULL, td);
 		if (error != 0)
 			goto out;
 		l2so = NULL;
 	}
 	SOCK_LOCK(so);
 	solisten_proto(so, backlog);
 	SOCK_UNLOCK(so);
 out:
 	mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 	/*
 	 * If we still have an l2so reference here, it's unneeded, so release
 	 * it.
 	 */
 	if (l2so != NULL)
 		soclose(l2so);
 	return (error);
 } /* ng_btsocket_listen */
 
 /*
  * Get peer address
  */
 
 int
 ng_btsocket_rfcomm_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 	struct sockaddr_rfcomm		sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	bcopy(&pcb->dst, &sa.rfcomm_bdaddr, sizeof(sa.rfcomm_bdaddr));
 	sa.rfcomm_channel = pcb->channel;
 	sa.rfcomm_len = sizeof(sa);
 	sa.rfcomm_family = AF_BLUETOOTH;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_rfcomm_peeraddr */
 
 /*
  * Send data to socket
  */
 
 int
 ng_btsocket_rfcomm_send(struct socket *so, int flags, struct mbuf *m,
 		struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_t	*pcb = so2rfcomm_pcb(so);
 	int				 error = 0;
 
 	/* Check socket and input */
 	if (pcb == NULL || m == NULL || control != NULL) {
 		error = EINVAL;
 		goto drop;
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Make sure DLC is connected */
 	if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) {
 		mtx_unlock(&pcb->pcb_mtx);
 		error = ENOTCONN;
 		goto drop;
 	}
 
 	/* Put the packet on the socket's send queue and wakeup RFCOMM task */
-	sbappend(&pcb->so->so_snd, m);
+	sbappend(&pcb->so->so_snd, m, flags);
 	m = NULL;
 	
 	if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_SENDING)) {
 		pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_SENDING;
 		error = ng_btsocket_rfcomm_task_wakeup();
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 drop:
 	NG_FREE_M(m); /* checks for != NULL */
 	NG_FREE_M(control);
 
 	return (error);
 } /* ng_btsocket_rfcomm_send */
 
 /*
  * Get socket address
  */
 
 int
 ng_btsocket_rfcomm_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 	struct sockaddr_rfcomm		sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	bcopy(&pcb->src, &sa.rfcomm_bdaddr, sizeof(sa.rfcomm_bdaddr));
 	sa.rfcomm_channel = pcb->channel;
 	sa.rfcomm_len = sizeof(sa);
 	sa.rfcomm_family = AF_BLUETOOTH;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_rfcomm_sockaddr */
 
 /*
  * Upcall function for L2CAP sockets. Enqueue RFCOMM task.
  */
 
 static int
 ng_btsocket_rfcomm_upcall(struct socket *so, void *arg, int waitflag)
 {
 	int	error;
 
 	if (so == NULL)
 		panic("%s: so == NULL\n", __func__);
 
 	if ((error = ng_btsocket_rfcomm_task_wakeup()) != 0)
 		NG_BTSOCKET_RFCOMM_ALERT(
 "%s: Could not enqueue RFCOMM task, error=%d\n", __func__, error);
 	return (SU_OK);
 } /* ng_btsocket_rfcomm_upcall */
 
 /*
  * RFCOMM task. Will handle all RFCOMM sessions in one pass.
  * XXX FIXME does not scale very well
  */
 
 static void
 ng_btsocket_rfcomm_sessions_task(void *ctx, int pending)
 {
 	ng_btsocket_rfcomm_session_p	s = NULL, s_next = NULL;
 
 	mtx_lock(&ng_btsocket_rfcomm_sessions_mtx);
 
 	for (s = LIST_FIRST(&ng_btsocket_rfcomm_sessions); s != NULL; ) {
 		mtx_lock(&s->session_mtx);
 		s_next = LIST_NEXT(s, next);
 
 		ng_btsocket_rfcomm_session_task(s);
 
 		if (s->state == NG_BTSOCKET_RFCOMM_SESSION_CLOSED) {
 			/* Unlink and clean the session */
 			LIST_REMOVE(s, next);
 
 			NG_BT_MBUFQ_DRAIN(&s->outq);
 			if (!LIST_EMPTY(&s->dlcs))
 				panic("%s: DLC list is not empty\n", __func__);
 
 			/* Close L2CAP socket */
 			SOCKBUF_LOCK(&s->l2so->so_rcv);
 			soupcall_clear(s->l2so, SO_RCV);
 			SOCKBUF_UNLOCK(&s->l2so->so_rcv);
 			SOCKBUF_LOCK(&s->l2so->so_snd);
 			soupcall_clear(s->l2so, SO_SND);
 			SOCKBUF_UNLOCK(&s->l2so->so_snd);
 			soclose(s->l2so);
 
 			mtx_unlock(&s->session_mtx);
 
 			mtx_destroy(&s->session_mtx);
 			bzero(s, sizeof(*s));
 			free(s, M_NETGRAPH_BTSOCKET_RFCOMM);
 		} else
 			mtx_unlock(&s->session_mtx);
 
 		s = s_next;
 	}
 
 	mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 } /* ng_btsocket_rfcomm_sessions_task */
 
 /*
  * Process RFCOMM session. Will handle all RFCOMM sockets in one pass.
  */
 
 static void
 ng_btsocket_rfcomm_session_task(ng_btsocket_rfcomm_session_p s)
 {
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	if (s->l2so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: L2CAP connection has been terminated, so=%p, so_state=%#x, so_count=%d, " \
 "state=%d, flags=%#x\n", __func__, s->l2so, s->l2so->so_state, 
 			s->l2so->so_count, s->state, s->flags);
 
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 		ng_btsocket_rfcomm_session_clean(s);
 	}
 
 	/* Now process upcall */
 	switch (s->state) {
 	/* Try to accept new L2CAP connection(s) */
 	case NG_BTSOCKET_RFCOMM_SESSION_LISTENING:
 		while (ng_btsocket_rfcomm_session_accept(s) == 0)
 			;
 		break;
 
 	/* Process the results of the L2CAP connect */
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING:
 		ng_btsocket_rfcomm_session_process_pcb(s);
 
 		if (ng_btsocket_rfcomm_session_connect(s) != 0) {
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 		} 
 		break;
 
 	/* Try to receive/send more data */
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 	case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 	case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING:
 		ng_btsocket_rfcomm_session_process_pcb(s);
 
 		if (ng_btsocket_rfcomm_session_receive(s) != 0) {
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 		} else if (ng_btsocket_rfcomm_session_send(s) != 0) {
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 		}
 		break;
 
 	case NG_BTSOCKET_RFCOMM_SESSION_CLOSED:
 		break;
 
 	default:
 		panic("%s: Invalid session state=%d, flags=%#x\n",
 			__func__, s->state, s->flags);
 		break;
 	}
 } /* ng_btsocket_rfcomm_session_task */
 
 /*
  * Process RFCOMM connection indicator. Caller must hold s->session_mtx
  */
 
 static ng_btsocket_rfcomm_pcb_p
 ng_btsocket_rfcomm_connect_ind(ng_btsocket_rfcomm_session_p s, int channel)
 {
 	ng_btsocket_rfcomm_pcb_p	 pcb = NULL, pcb1 = NULL;
 	ng_btsocket_l2cap_pcb_p		 l2pcb = NULL;
 	struct socket			*so1 = NULL;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Try to find RFCOMM socket that listens on given source address 
 	 * and channel. This will return the best possible match.
 	 */
 
 	l2pcb = so2l2cap_pcb(s->l2so);
 	pcb = ng_btsocket_rfcomm_pcb_listener(&l2pcb->src, channel);
 	if (pcb == NULL)
 		return (NULL);
 
 	/*
 	 * Check the pending connections queue and if we have space then 
 	 * create new socket and set proper source and destination address,
 	 * and channel.
 	 */
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
 		CURVNET_SET(pcb->so->so_vnet);
 		so1 = sonewconn(pcb->so, 0);
 		CURVNET_RESTORE();
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	if (so1 == NULL)
 		return (NULL);
 
 	/*
 	 * If we got here than we have created new socket. So complete the 
 	 * connection. Set source and destination address from the session.
 	 */
 
 	pcb1 = so2rfcomm_pcb(so1);
 	if (pcb1 == NULL)
 		panic("%s: pcb1 == NULL\n", __func__);
 
 	mtx_lock(&pcb1->pcb_mtx);
 
 	bcopy(&l2pcb->src, &pcb1->src, sizeof(pcb1->src));
 	bcopy(&l2pcb->dst, &pcb1->dst, sizeof(pcb1->dst));
 	pcb1->channel = channel;
 
 	/* Link new DLC to the session. We already hold s->session_mtx */
 	LIST_INSERT_HEAD(&s->dlcs, pcb1, session_next);
 	pcb1->session = s;
 			
 	mtx_unlock(&pcb1->pcb_mtx);
 
 	return (pcb1);
 } /* ng_btsocket_rfcomm_connect_ind */
 
 /*
  * Process RFCOMM connect confirmation. Caller must hold s->session_mtx.
  */
 
 static void
 ng_btsocket_rfcomm_connect_cfm(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb_next = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Wake up all waiting sockets and send PN request for each of them. 
 	 * Note that timeout already been set in ng_btsocket_rfcomm_connect()
 	 *
 	 * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill
 	 * will unlink DLC from the session
 	 */
 
 	for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, session_next);
 
 		if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT) {
 			pcb->mtu = s->mtu;
 			bcopy(&so2l2cap_pcb(s->l2so)->src, &pcb->src,
 				sizeof(pcb->src));
 
 			error = ng_btsocket_rfcomm_send_pn(pcb);
 			if (error == 0)
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONFIGURING;
 			else
 				ng_btsocket_rfcomm_pcb_kill(pcb, error);
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 } /* ng_btsocket_rfcomm_connect_cfm */
 
 /*****************************************************************************
  *****************************************************************************
  **                              RFCOMM sessions
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Create new RFCOMM session. That function WILL NOT take ownership over l2so.
  * Caller MUST free l2so if function failed.
  */
 
 static int
 ng_btsocket_rfcomm_session_create(ng_btsocket_rfcomm_session_p *sp,
 		struct socket *l2so, bdaddr_p src, bdaddr_p dst,
 		struct thread *td)
 {
 	ng_btsocket_rfcomm_session_p	s = NULL;
 	struct sockaddr_l2cap		l2sa;
 	struct sockopt			l2sopt;
 	int				error;
 	u_int16_t			mtu;
 
 	mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
 
 	/* Allocate the RFCOMM session */
         s = malloc(sizeof(*s),
 		M_NETGRAPH_BTSOCKET_RFCOMM, M_NOWAIT | M_ZERO);
         if (s == NULL)
                 return (ENOMEM);
 
 	/* Set defaults */
 	s->mtu = RFCOMM_DEFAULT_MTU;
 	s->flags = 0;
 	s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 	NG_BT_MBUFQ_INIT(&s->outq, ifqmaxlen);
 
 	/*
 	 * XXX Mark session mutex as DUPOK to prevent "duplicated lock of 
 	 * the same type" message. When accepting new L2CAP connection
 	 * ng_btsocket_rfcomm_session_accept() holds both session mutexes 
 	 * for "old" (accepting) session and "new" (created) session.
 	 */
 
 	mtx_init(&s->session_mtx, "btsocks_rfcomm_session_mtx", NULL,
 		MTX_DEF|MTX_DUPOK);
 
 	LIST_INIT(&s->dlcs);
 
 	/* Prepare L2CAP socket */
 	SOCKBUF_LOCK(&l2so->so_rcv);
 	soupcall_set(l2so, SO_RCV, ng_btsocket_rfcomm_upcall, NULL);
 	SOCKBUF_UNLOCK(&l2so->so_rcv);
 	SOCKBUF_LOCK(&l2so->so_snd);
 	soupcall_set(l2so, SO_SND, ng_btsocket_rfcomm_upcall, NULL);
 	SOCKBUF_UNLOCK(&l2so->so_snd);
 	l2so->so_state |= SS_NBIO;
 	s->l2so = l2so;
 
 	mtx_lock(&s->session_mtx);
 
 	/*
 	 * "src" == NULL and "dst" == NULL means just create session.
 	 * caller must do the rest
 	 */
 
 	if (src == NULL && dst == NULL)
 		goto done;
 
 	/*
 	 * Set incoming MTU on L2CAP socket. It is RFCOMM session default MTU 
 	 * plus 5 bytes: RFCOMM frame header, one extra byte for length and one
 	 * extra byte for credits.
 	 */
 
 	mtu = s->mtu + sizeof(struct rfcomm_frame_hdr) + 1 + 1;
 
 	l2sopt.sopt_dir = SOPT_SET;
 	l2sopt.sopt_level = SOL_L2CAP;
 	l2sopt.sopt_name = SO_L2CAP_IMTU;
 	l2sopt.sopt_val = (void *) &mtu;
 	l2sopt.sopt_valsize = sizeof(mtu);
 	l2sopt.sopt_td = NULL;
 
 	error = sosetopt(s->l2so, &l2sopt);
 	if (error != 0)
 		goto bad;
 
 	/* Bind socket to "src" address */
 	l2sa.l2cap_len = sizeof(l2sa);
 	l2sa.l2cap_family = AF_BLUETOOTH;
 	l2sa.l2cap_psm = (dst == NULL)? htole16(NG_L2CAP_PSM_RFCOMM) : 0;
 	bcopy(src, &l2sa.l2cap_bdaddr, sizeof(l2sa.l2cap_bdaddr));
 	l2sa.l2cap_cid = 0;
 	l2sa.l2cap_bdaddr_type = BDADDR_BREDR;
 
 	error = sobind(s->l2so, (struct sockaddr *) &l2sa, td);
 	if (error != 0)
 		goto bad;
 
 	/* If "dst" is not NULL then initiate connect(), otherwise listen() */
 	if (dst == NULL) {
 		s->flags = 0;
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_LISTENING;
 
 		error = solisten(s->l2so, 10, td);
 		if (error != 0)
 			goto bad;
 	} else {
 		s->flags = NG_BTSOCKET_RFCOMM_SESSION_INITIATOR;
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTING;
 
 		l2sa.l2cap_len = sizeof(l2sa);   
 		l2sa.l2cap_family = AF_BLUETOOTH;
 		l2sa.l2cap_psm = htole16(NG_L2CAP_PSM_RFCOMM);
 	        bcopy(dst, &l2sa.l2cap_bdaddr, sizeof(l2sa.l2cap_bdaddr));
 		l2sa.l2cap_cid = 0;
 		l2sa.l2cap_bdaddr_type = BDADDR_BREDR;
 
 		error = soconnect(s->l2so, (struct sockaddr *) &l2sa, td);
 		if (error != 0)
 			goto bad;
 	}
 
 done:
 	LIST_INSERT_HEAD(&ng_btsocket_rfcomm_sessions, s, next);
 	*sp = s;
 
 	mtx_unlock(&s->session_mtx);
 
 	return (0);
 
 bad:
 	mtx_unlock(&s->session_mtx);
 
 	/* Return L2CAP socket back to its original state */
 	SOCKBUF_LOCK(&l2so->so_rcv);
 	soupcall_clear(s->l2so, SO_RCV);
 	SOCKBUF_UNLOCK(&l2so->so_rcv);
 	SOCKBUF_LOCK(&l2so->so_snd);
 	soupcall_clear(s->l2so, SO_SND);
 	SOCKBUF_UNLOCK(&l2so->so_snd);
 	l2so->so_state &= ~SS_NBIO;
 
 	mtx_destroy(&s->session_mtx);
 	bzero(s, sizeof(*s));
 	free(s, M_NETGRAPH_BTSOCKET_RFCOMM);
 
 	return (error);
 } /* ng_btsocket_rfcomm_session_create */
 
 /*
  * Process accept() on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_accept(ng_btsocket_rfcomm_session_p s0)
 {
 	struct socket			*l2so = NULL;
 	struct sockaddr_l2cap		*l2sa = NULL;
 	ng_btsocket_l2cap_pcb_t		*l2pcb = NULL;
 	ng_btsocket_rfcomm_session_p	 s = NULL;
 	int				 error = 0;
 
 	mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
 	mtx_assert(&s0->session_mtx, MA_OWNED);
 
 	/* Check if there is a complete L2CAP connection in the queue */
 	if ((error = s0->l2so->so_error) != 0) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not accept connection on L2CAP socket, error=%d\n", __func__, error);
 		s0->l2so->so_error = 0;
 
 		return (error);
 	}
 
 	ACCEPT_LOCK();
 	if (TAILQ_EMPTY(&s0->l2so->so_comp)) {
 		ACCEPT_UNLOCK();
 		if (s0->l2so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			return (ECONNABORTED);
 		return (EWOULDBLOCK);
 	}
 
 	/* Accept incoming L2CAP connection */
 	l2so = TAILQ_FIRST(&s0->l2so->so_comp);
 	if (l2so == NULL)
 		panic("%s: l2so == NULL\n", __func__);
 
 	TAILQ_REMOVE(&s0->l2so->so_comp, l2so, so_list);
 	s0->l2so->so_qlen --;
 	l2so->so_qstate &= ~SQ_COMP;
 	l2so->so_head = NULL;
 	SOCK_LOCK(l2so);
 	soref(l2so);
 	l2so->so_state |= SS_NBIO;
 	SOCK_UNLOCK(l2so);
 	ACCEPT_UNLOCK();
 
 	error = soaccept(l2so, (struct sockaddr **) &l2sa);
 	if (error != 0) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: soaccept() on L2CAP socket failed, error=%d\n", __func__, error);
 		soclose(l2so);
 
 		return (error);
 	}
 
 	/*
 	 * Check if there is already active RFCOMM session between two devices.
 	 * If so then close L2CAP connection. We only support one RFCOMM session
 	 * between each pair of devices. Note that here we assume session in any
 	 * state. The session even could be in the middle of disconnecting.
 	 */
 
 	l2pcb = so2l2cap_pcb(l2so);
 	s = ng_btsocket_rfcomm_session_by_addr(&l2pcb->src, &l2pcb->dst);
 	if (s == NULL) {
 		/* Create a new RFCOMM session */
 		error = ng_btsocket_rfcomm_session_create(&s, l2so, NULL, NULL,
 				curthread /* XXX */);
 		if (error == 0) {
 			mtx_lock(&s->session_mtx);
 
 			s->flags = 0;
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTED;
 
 			/*
 			 * Adjust MTU on incomming connection. Reserve 5 bytes:
 			 * RFCOMM frame header, one extra byte for length and 
 			 * one extra byte for credits.
 			 */
 
 			s->mtu = min(l2pcb->imtu, l2pcb->omtu) -
 					sizeof(struct rfcomm_frame_hdr) - 1 - 1;
 
 			mtx_unlock(&s->session_mtx);
 		} else {
 			NG_BTSOCKET_RFCOMM_ALERT(
 "%s: Failed to create new RFCOMM session, error=%d\n", __func__, error);
 
 			soclose(l2so);
 		}
 	} else {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Rejecting duplicating RFCOMM session between src=%x:%x:%x:%x:%x:%x and " \
 "dst=%x:%x:%x:%x:%x:%x, state=%d, flags=%#x\n",	__func__,
 			l2pcb->src.b[5], l2pcb->src.b[4], l2pcb->src.b[3],
 			l2pcb->src.b[2], l2pcb->src.b[1], l2pcb->src.b[0],
 			l2pcb->dst.b[5], l2pcb->dst.b[4], l2pcb->dst.b[3],
 			l2pcb->dst.b[2], l2pcb->dst.b[1], l2pcb->dst.b[0],
 			s->state, s->flags);
 
 		error = EBUSY;
 		soclose(l2so);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_session_accept */
 
 /*
  * Process connect() on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_connect(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_l2cap_pcb_p	l2pcb = so2l2cap_pcb(s->l2so);
 	int			error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* First check if connection has failed */
 	if ((error = s->l2so->so_error) != 0) {
 		s->l2so->so_error = 0;
 
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not connect RFCOMM session, error=%d, state=%d, flags=%#x\n",
 			__func__, error, s->state, s->flags);
 
 		return (error);
 	}
 
 	/* Is connection still in progress? */
 	if (s->l2so->so_state & SS_ISCONNECTING)
 		return (0); 
 
 	/* 
 	 * If we got here then we are connected. Send SABM on DLCI 0 to 
 	 * open multiplexor channel.
 	 */
 
 	if (error == 0) {
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTED;
 
 		/*
 		 * Adjust MTU on outgoing connection. Reserve 5 bytes: RFCOMM 
 		 * frame header, one extra byte for length and one extra byte 
 		 * for credits.
 		 */
 
 		s->mtu = min(l2pcb->imtu, l2pcb->omtu) -
 				sizeof(struct rfcomm_frame_hdr) - 1 - 1;
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_SABM,0);
 		if (error == 0)
 			error = ng_btsocket_rfcomm_task_wakeup();
 	}
 
 	return (error);
 }/* ng_btsocket_rfcomm_session_connect */
 
 /*
  * Receive data on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_receive(ng_btsocket_rfcomm_session_p s)
 {
 	struct mbuf	*m = NULL;
 	struct uio	 uio;
 	int		 more, flags, error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* Can we read from the L2CAP socket? */
 	if (!soreadable(s->l2so))
 		return (0);
 
 	/* First check for error on L2CAP socket */
 	if ((error = s->l2so->so_error) != 0) {
 		s->l2so->so_error = 0;
 
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not receive data from L2CAP socket, error=%d, state=%d, flags=%#x\n",
 			__func__, error, s->state, s->flags);
 
 		return (error);
 	}
 
 	/*
 	 * Read all packets from the L2CAP socket. 
 	 * XXX FIXME/VERIFY is that correct? For now use m->m_nextpkt as
 	 * indication that there is more packets on the socket's buffer.
 	 * Also what should we use in uio.uio_resid?
 	 * May be s->mtu + sizeof(struct rfcomm_frame_hdr) + 1 + 1?
 	 */
 
 	for (more = 1; more; ) {
 		/* Try to get next packet from socket */
 		bzero(&uio, sizeof(uio));
 /*		uio.uio_td = NULL; */
 		uio.uio_resid = 1000000000;
 		flags = MSG_DONTWAIT;
 
 		m = NULL;
 		error = soreceive(s->l2so, NULL, &uio, &m,
 		    (struct mbuf **) NULL, &flags);
 		if (error != 0) {
 			if (error == EWOULDBLOCK)
 				return (0); /* XXX can happen? */
 
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not receive data from L2CAP socket, error=%d\n", __func__, error);
 
 			return (error);
 		}
 	
 		more = (m->m_nextpkt != NULL);
 		m->m_nextpkt = NULL;
 
 		ng_btsocket_rfcomm_receive_frame(s, m);
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_session_receive */
 
 /*
  * Send data on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_send(ng_btsocket_rfcomm_session_p s)
 {
 	struct mbuf	*m = NULL;
 	int		 error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* Send as much as we can from the session queue */
 	while (sowriteable(s->l2so)) {
 		/* Check if socket still OK */
 		if ((error = s->l2so->so_error) != 0) {
 			s->l2so->so_error = 0;
 
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Detected error=%d on L2CAP socket, state=%d, flags=%#x\n",
 				__func__, error, s->state, s->flags);
 
 			return (error);
 		}
 
 		NG_BT_MBUFQ_DEQUEUE(&s->outq, m);
 		if (m == NULL)
 			return (0); /* we are done */
 
 		/* Call send function on the L2CAP socket */
 		error = (*s->l2so->so_proto->pr_usrreqs->pru_send)(s->l2so,
 				0, m, NULL, NULL, curthread /* XXX */);
 		if (error != 0) {
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not send data to L2CAP socket, error=%d\n", __func__, error);
 
 			return (error);
 		}
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_session_send */
 
 /*
  * Close and disconnect all DLCs for the given session. Caller must hold 
  * s->sesson_mtx. Will wakeup session.
  */
 
 static void
 ng_btsocket_rfcomm_session_clean(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb_next = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill
 	 * will unlink DLC from the session
 	 */
 
 	for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, session_next);
 
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Disconnecting dlci=%d, state=%d, flags=%#x\n",
 			__func__, pcb->dlci, pcb->state, pcb->flags);
 
 		if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED)
 			error = ECONNRESET;
 		else
 			error = ECONNREFUSED;
 
 		ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 } /* ng_btsocket_rfcomm_session_clean */
 
 /*
  * Process all DLCs on the session. Caller MUST hold s->session_mtx.
  */
 
 static void
 ng_btsocket_rfcomm_session_process_pcb(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb_next = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill
 	 * will unlink DLC from the session
 	 */
 
 	for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, session_next);
 
 		switch (pcb->state) {
 
 		/*
 		 * If DLC in W4_CONNECT state then we should check for both
 		 * timeout and detach.
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_DETACHED)
 				ng_btsocket_rfcomm_pcb_kill(pcb, 0);
 			else if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT)
 				ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT);
 			break;
 
 		/*
 		 * If DLC in CONFIGURING or CONNECTING state then we only
 		 * should check for timeout. If detach() was called then
 		 * DLC will be moved into DISCONNECTING state.
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING:
 		case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT)
 				ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT);
 			break;
 
 		/*
 		 * If DLC in CONNECTED state then we need to send data (if any)
 		 * from the socket's send queue. Note that we will send data
 		 * from either all sockets or none. This may overload session's
 		 * outgoing queue (but we do not check for that).
 		 *
  		 * XXX FIXME need scheduler for RFCOMM sockets
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_CONNECTED:
 			error = ng_btsocket_rfcomm_pcb_send(pcb, ALOT);
 			if (error != 0)
 				ng_btsocket_rfcomm_pcb_kill(pcb, error);
 			break;
 
 		/*
 		 * If DLC in DISCONNECTING state then we must send DISC frame.
 		 * Note that if DLC has timeout set then we do not need to 
 		 * resend DISC frame.
 		 *
 		 * XXX FIXME need to drain all data from the socket's queue
 		 * if LINGER option was set
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 			if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)) {
 				error = ng_btsocket_rfcomm_send_command(
 						pcb->session, RFCOMM_FRAME_DISC,
 						pcb->dlci);
 				if (error == 0)
 					ng_btsocket_rfcomm_timeout(pcb);
 				else
 					ng_btsocket_rfcomm_pcb_kill(pcb, error);
 			} else if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT)
 				ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT);
 			break;
 		
 /*		case NG_BTSOCKET_RFCOMM_DLC_CLOSED: */
 		default:
 			panic("%s: Invalid DLC state=%d, flags=%#x\n",
 				__func__, pcb->state, pcb->flags);
 			break;
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 } /* ng_btsocket_rfcomm_session_process_pcb */
 
 /*
  * Find RFCOMM session between "src" and "dst".
  * Caller MUST hold ng_btsocket_rfcomm_sessions_mtx.
  */
 
 static ng_btsocket_rfcomm_session_p
 ng_btsocket_rfcomm_session_by_addr(bdaddr_p src, bdaddr_p dst)
 {
 	ng_btsocket_rfcomm_session_p	s = NULL;
 	ng_btsocket_l2cap_pcb_p		l2pcb = NULL;
 	int				any_src;
 
 	mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
 
 	any_src = (bcmp(src, NG_HCI_BDADDR_ANY, sizeof(*src)) == 0);
 
 	LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next) {
 		l2pcb = so2l2cap_pcb(s->l2so);
 
 		if ((any_src || bcmp(&l2pcb->src, src, sizeof(*src)) == 0) &&
 		    bcmp(&l2pcb->dst, dst, sizeof(*dst)) == 0)
 			break;
 	}
 
 	return (s);
 } /* ng_btsocket_rfcomm_session_by_addr */
 
 /*****************************************************************************
  *****************************************************************************
  **                                  RFCOMM 
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Process incoming RFCOMM frame. Caller must hold s->session_mtx.
  * XXX FIXME check frame length
  */
 
 static int
 ng_btsocket_rfcomm_receive_frame(ng_btsocket_rfcomm_session_p s,
 		struct mbuf *m0)
 {
 	struct rfcomm_frame_hdr	*hdr = NULL;
 	struct mbuf		*m = NULL;
 	u_int16_t		 length;
 	u_int8_t		 dlci, type;
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* Pullup as much as we can into first mbuf (for direct access) */
 	length = min(m0->m_pkthdr.len, MHLEN);
 	if (m0->m_len < length) {
 		if ((m0 = m_pullup(m0, length)) == NULL) {
 			NG_BTSOCKET_RFCOMM_ALERT(
 "%s: m_pullup(%d) failed\n", __func__, length);
 
 			return (ENOBUFS);
 		}
 	}
 
 	hdr = mtod(m0, struct rfcomm_frame_hdr *);
 	dlci = RFCOMM_DLCI(hdr->address);
 	type = RFCOMM_TYPE(hdr->control);
 
 	/* Test EA bit in length. If not set then we have 2 bytes of length */
 	if (!RFCOMM_EA(hdr->length)) {
 		bcopy(&hdr->length, &length, sizeof(length));
 		length = le16toh(length) >> 1;
 		m_adj(m0, sizeof(*hdr) + 1);
 	} else {
 		length = hdr->length >> 1;
 		m_adj(m0, sizeof(*hdr));
 	}
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got frame type=%#x, dlci=%d, length=%d, cr=%d, pf=%d, len=%d\n",
 		__func__, type, dlci, length, RFCOMM_CR(hdr->address),
 		RFCOMM_PF(hdr->control), m0->m_pkthdr.len);
 
 	/*
 	 * Get FCS (the last byte in the frame)
 	 * XXX this will not work if mbuf chain ends with empty mbuf.
 	 * XXX let's hope it never happens :)
 	 */
 
 	for (m = m0; m->m_next != NULL; m = m->m_next)
 		;
 	if (m->m_len <= 0)
 		panic("%s: Empty mbuf at the end of the chain, len=%d\n",
 			__func__, m->m_len);
 
 	/*
 	 * Check FCS. We only need to calculate FCS on first 2 or 3 bytes
 	 * and already m_pullup'ed mbuf chain, so it should be safe.
 	 */
 
 	if (ng_btsocket_rfcomm_check_fcs((u_int8_t *) hdr, type, m->m_data[m->m_len - 1])) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Invalid RFCOMM packet. Bad checksum\n", __func__);
 		NG_FREE_M(m0);
 
 		return (EINVAL);
 	}
 
 	m_adj(m0, -1); /* Trim FCS byte */
 
 	/*
 	 * Process RFCOMM frame.
 	 *
 	 * From TS 07.10 spec
 	 * 
 	 * "... In the case where a SABM or DISC command with the P bit set
 	 * to 0 is received then the received frame shall be discarded..."
  	 *
 	 * "... If a unsolicited DM response is received then the frame shall
 	 * be processed irrespective of the P/F setting... "
 	 *
 	 * "... The station may transmit response frames with the F bit set 
 	 * to 0 at any opportunity on an asynchronous basis. However, in the 
 	 * case where a UA response is received with the F bit set to 0 then 
 	 * the received frame shall be discarded..."
 	 *
 	 * From Bluetooth spec
 	 *
 	 * "... When credit based flow control is being used, the meaning of
 	 * the P/F bit in the control field of the RFCOMM header is redefined
 	 * for UIH frames..."
 	 */
 
 	switch (type) {
 	case RFCOMM_FRAME_SABM:
 		if (RFCOMM_PF(hdr->control))
 			error = ng_btsocket_rfcomm_receive_sabm(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_DISC:
 		if (RFCOMM_PF(hdr->control))
 			error = ng_btsocket_rfcomm_receive_disc(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_UA:
 		if (RFCOMM_PF(hdr->control))
 			error = ng_btsocket_rfcomm_receive_ua(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_DM:
 		error = ng_btsocket_rfcomm_receive_dm(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_UIH:
 		if (dlci == 0)
 			error = ng_btsocket_rfcomm_receive_mcc(s, m0);
 		else
 			error = ng_btsocket_rfcomm_receive_uih(s, dlci,
 					RFCOMM_PF(hdr->control), m0);
 
 		return (error);
 		/* NOT REACHED */
 
 	default:
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Invalid RFCOMM packet. Unknown type=%#x\n", __func__, type);
 		error = EINVAL;
 		break;
 	}
 
 	NG_FREE_M(m0);
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_frame */
 
 /*
  * Process RFCOMM SABM frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_sabm(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got SABM, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* DLCI == 0 means open multiplexor channel */
 	if (dlci == 0) {
 		switch (s->state) {
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 		case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_UA, dlci);
 			if (error == 0) {
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_OPEN;
 				ng_btsocket_rfcomm_connect_cfm(s);
 			} else {
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 				ng_btsocket_rfcomm_session_clean(s);
 			}
 			break;
 
 		default:
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got SABM for session in invalid state state=%d, flags=%#x\n",
 				__func__, s->state, s->flags);
 			error = EINVAL;
 			break;
 		}
 
 		return (error);
 	}
 
 	/* Make sure multiplexor channel is open */
 	if (s->state != NG_BTSOCKET_RFCOMM_SESSION_OPEN) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got SABM for dlci=%d with mulitplexor channel closed, state=%d, " \
 "flags=%#x\n",		__func__, dlci, s->state, s->flags);
 
 		return (EINVAL);
 	}
 
 	/*
 	 * Check if we have this DLCI. This might happen when remote
 	 * peer uses PN command before actual open (SABM) happens.
 	 */
 
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTING) {
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got SABM for dlci=%d in invalid state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 			mtx_unlock(&pcb->pcb_mtx);
 
 			return (ENOENT);
 		}
 
 		ng_btsocket_rfcomm_untimeout(pcb);
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_UA,dlci);
 		if (error == 0)
 			error = ng_btsocket_rfcomm_send_msc(pcb);
 
 		if (error == 0) {
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED;
 			soisconnected(pcb->so);
 		} else
 			ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 		mtx_unlock(&pcb->pcb_mtx);
 
 		return (error);
 	}
 
 	/*
 	 * We do not have requested DLCI, so it must be an incoming connection
 	 * with default parameters. Try to accept it.
 	 */ 
 
 	pcb = ng_btsocket_rfcomm_connect_ind(s, RFCOMM_SRVCHANNEL(dlci));
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		pcb->dlci = dlci;
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_UA,dlci);
 		if (error == 0)
 			error = ng_btsocket_rfcomm_send_msc(pcb);
 
 		if (error == 0) {
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED;
 			soisconnected(pcb->so);
 		} else
 			ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else
 		/* Nobody is listen()ing on the requested DLCI */
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci);
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_sabm */
 
 /*
  * Process RFCOMM DISC frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_disc(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DISC, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* DLCI == 0 means close multiplexor channel */
 	if (dlci == 0) {
 		/* XXX FIXME assume that remote side will close the socket */
 		error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_UA, 0);
 		if (error == 0) {
 			if (s->state == NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING)
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; /* XXX */
 			else
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING;
 		} else
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; /* XXX */
 
 		ng_btsocket_rfcomm_session_clean(s);
 	} else {
 		pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 		if (pcb != NULL) {
 			int	err;
 
 			mtx_lock(&pcb->pcb_mtx);
 
 			NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DISC for dlci=%d, state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_UA, dlci);
 
 			if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED)
 				err = 0;
 			else
 				err = ECONNREFUSED;
 
 			ng_btsocket_rfcomm_pcb_kill(pcb, err);
 
 			mtx_unlock(&pcb->pcb_mtx);
 		} else {
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got DISC for non-existing dlci=%d\n", __func__, dlci);
 
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_DM, dlci);
 		}
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_disc */
 
 /*
  * Process RFCOMM UA frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_ua(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got UA, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* dlci == 0 means multiplexor channel */
 	if (dlci == 0) {
 		switch (s->state) {
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_OPEN;
 			ng_btsocket_rfcomm_connect_cfm(s);
 			break;
 
 		case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING:
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 			break;
 
 		default:
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UA for session in invalid state=%d(%d), flags=%#x, mtu=%d\n",
 				__func__, s->state, INITIATOR(s), s->flags,
 				s->mtu);
 			error = ENOENT;
 			break;
 		}
 
 		return (error);
 	}
 
 	/* Check if we have this DLCI */
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got UA for dlci=%d, state=%d, flags=%#x\n",
 			__func__, dlci, pcb->state, pcb->flags);
 
 		switch (pcb->state) {
 		case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 			ng_btsocket_rfcomm_untimeout(pcb);
 
 			error = ng_btsocket_rfcomm_send_msc(pcb);
 			if (error == 0) {
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED;
 				soisconnected(pcb->so);
 			}
 			break;
 
 		case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 			ng_btsocket_rfcomm_pcb_kill(pcb, 0);
 			break;
 
 		default:
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UA for dlci=%d in invalid state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 			error = ENOENT;
 			break;
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UA for non-existing dlci=%d\n", __func__, dlci);
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_ua */
 
 /*
  * Process RFCOMM DM frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_dm(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DM, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* DLCI == 0 means multiplexor channel */
 	if (dlci == 0) {
 		/* Disconnect all dlc's on the session */
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 		ng_btsocket_rfcomm_session_clean(s);
 	} else {
 		pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 		if (pcb != NULL) {
 			mtx_lock(&pcb->pcb_mtx);
 
 			NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DM for dlci=%d, state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 
 			if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED)
 				error = ECONNRESET;
 			else
 				error = ECONNREFUSED;
 
 			ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 			mtx_unlock(&pcb->pcb_mtx);
 		} else
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got DM for non-existing dlci=%d\n", __func__, dlci);
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_receive_dm */
 
 /*
  * Process RFCOMM UIH frame (data)
  */
 
 static int
 ng_btsocket_rfcomm_receive_uih(ng_btsocket_rfcomm_session_p s, int dlci,
 		int pf, struct mbuf *m0)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got UIH, session state=%d, flags=%#x, mtu=%d, dlci=%d, pf=%d, len=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci, pf,
 		m0->m_pkthdr.len);
 
 	/* XXX should we do it here? Check for session flow control */
 	if (s->flags & NG_BTSOCKET_RFCOMM_SESSION_LFC) {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UIH with session flow control asserted, state=%d, flags=%#x\n",
 			__func__, s->state, s->flags);
 		goto drop;
 	}
 
 	/* Check if we have this dlci */
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 	if (pcb == NULL) {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UIH for non-existing dlci=%d\n", __func__, dlci);
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci);
 		goto drop;
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Check dlci state */	
 	if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UIH for dlci=%d in invalid state=%d, flags=%#x\n",
 			__func__, dlci, pcb->state, pcb->flags);
 		error = EINVAL;
 		goto drop1;
 	}
 
 	/* Check dlci flow control */
 	if (((pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) && pcb->rx_cred <= 0) ||
 	     (pcb->lmodem & RFCOMM_MODEM_FC)) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got UIH for dlci=%d with asserted flow control, state=%d, " \
 "flags=%#x, rx_cred=%d, lmodem=%#x\n",
 			__func__, dlci, pcb->state, pcb->flags,
 			pcb->rx_cred, pcb->lmodem);
 		goto drop1;
 	}
 
 	/* Did we get any credits? */
 	if ((pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) && pf) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got %d more credits for dlci=%d, state=%d, flags=%#x, " \
 "rx_cred=%d, tx_cred=%d\n",
 			__func__, *mtod(m0, u_int8_t *), dlci, pcb->state, 
 			pcb->flags, pcb->rx_cred, pcb->tx_cred);
 
 		pcb->tx_cred += *mtod(m0, u_int8_t *);
 		m_adj(m0, 1);
 
 		/* Send more from the DLC. XXX check for errors? */
 		ng_btsocket_rfcomm_pcb_send(pcb, ALOT);
 	} 
 
 	/* OK the of the rest of the mbuf is the data */
 	if (m0->m_pkthdr.len > 0) {
 		/* If we are using credit flow control decrease rx_cred here */
 		if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 			/* Give remote peer more credits (if needed) */
 			if (-- pcb->rx_cred <= RFCOMM_MAX_CREDITS / 2)
 				ng_btsocket_rfcomm_send_credits(pcb);
 			else
 				NG_BTSOCKET_RFCOMM_INFO(
 "%s: Remote side still has credits, dlci=%d, state=%d, flags=%#x, " \
 "rx_cred=%d, tx_cred=%d\n",		__func__, dlci, pcb->state, pcb->flags,
 					pcb->rx_cred, pcb->tx_cred);
 		}
 		
 		/* Check packet against mtu on dlci */
 		if (m0->m_pkthdr.len > pcb->mtu) {
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got oversized UIH for dlci=%d, state=%d, flags=%#x, mtu=%d, len=%d\n",
 				__func__, dlci, pcb->state, pcb->flags,
 				pcb->mtu, m0->m_pkthdr.len);
 
 			error = EMSGSIZE;
 		} else if (m0->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) {
  
 			/*
 			 * This is really bad. Receive queue on socket does
 			 * not have enough space for the packet. We do not
 			 * have any other choice but drop the packet. 
 			 */
  
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Not enough space in socket receive queue. Dropping UIH for dlci=%d, " \
 "state=%d, flags=%#x, len=%d, space=%ld\n",
 				__func__, dlci, pcb->state, pcb->flags,
 				m0->m_pkthdr.len, sbspace(&pcb->so->so_rcv));
 
 			error = ENOBUFS;
 		} else {
 			/* Append packet to the socket receive queue */
-			sbappend(&pcb->so->so_rcv, m0);
+			sbappend(&pcb->so->so_rcv, m0, 0);
 			m0 = NULL;
 
 			sorwakeup(pcb->so);
 		}
 	}
 drop1:
 	mtx_unlock(&pcb->pcb_mtx);
 drop:
 	NG_FREE_M(m0); /* checks for != NULL */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_uih */
 
 /*
  * Process RFCOMM MCC command (Multiplexor)
  * 
  * From TS 07.10 spec
  *
  * "5.4.3.1 Information Data
  * 
  *  ...The frames (UIH) sent by the initiating station have the C/R bit set 
  *  to 1 and those sent by the responding station have the C/R bit set to 0..."
  *
  * "5.4.6.2 Operating procedures
  *
  *  Messages always exist in pairs; a command message and a corresponding 
  *  response message. If the C/R bit is set to 1 the message is a command, 
  *  if it is set to 0 the message is a response...
  *
  *  ...
  * 
  *  NOTE: Notice that when UIH frames are used to convey information on DLCI 0
  *  there are at least two different fields that contain a C/R bit, and the 
  *  bits are set of different form. The C/R bit in the Type field shall be set
  *  as it is stated above, while the C/R bit in the Address field (see subclause
  *  5.2.1.2) shall be set as it is described in subclause 5.4.3.1."
  */
 
 static int
 ng_btsocket_rfcomm_receive_mcc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = NULL;
 	u_int8_t		 cr, type, length;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * We can access data directly in the first mbuf, because we have
 	 * m_pullup()'ed mbuf chain in ng_btsocket_rfcomm_receive_frame().
 	 * All MCC commands should fit into single mbuf (except probably TEST).
 	 */
 
 	hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	cr = RFCOMM_CR(hdr->type);
 	type = RFCOMM_MCC_TYPE(hdr->type);
 	length = RFCOMM_MCC_LENGTH(hdr->length);
 
 	/* Check MCC frame length */
 	if (sizeof(*hdr) + length != m0->m_pkthdr.len) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Invalid MCC frame length=%d, len=%d\n",
 			__func__, length, m0->m_pkthdr.len);
 		NG_FREE_M(m0);
 
 		return (EMSGSIZE);
 	}
 
 	switch (type) {
 	case RFCOMM_MCC_TEST:
 		return (ng_btsocket_rfcomm_receive_test(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_FCON:
 	case RFCOMM_MCC_FCOFF:
 		return (ng_btsocket_rfcomm_receive_fc(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_MSC:
 		return (ng_btsocket_rfcomm_receive_msc(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_RPN:
 		return (ng_btsocket_rfcomm_receive_rpn(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_RLS:
 		return (ng_btsocket_rfcomm_receive_rls(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_PN:
 		return (ng_btsocket_rfcomm_receive_pn(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_NSC:
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got MCC NSC, type=%#x, cr=%d, length=%d, session state=%d, flags=%#x, " \
 "mtu=%d, len=%d\n",	__func__, RFCOMM_MCC_TYPE(*((u_int8_t *)(hdr + 1))), cr,
 			 length, s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 		NG_FREE_M(m0);
 		break;
 
 	default:
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got unknown MCC, type=%#x, cr=%d, length=%d, session state=%d, " \
 "flags=%#x, mtu=%d, len=%d\n",
 			__func__, type, cr, length, s->state, s->flags,
 			s->mtu, m0->m_pkthdr.len);
 
 		/* Reuse mbuf to send NSC */
 		hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 		m0->m_pkthdr.len = m0->m_len = sizeof(*hdr);
 
 		/* Create MCC NSC header */
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_NSC);
 		hdr->length = RFCOMM_MKLEN8(1);
 
 		/* Put back MCC command type we did not like */
 		m0->m_data[m0->m_len] = RFCOMM_MKMCC_TYPE(cr, type);
 		m0->m_pkthdr.len ++;
 		m0->m_len ++;
 
 		/* Send UIH frame */
 		return (ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0));
 		/* NOT REACHED */
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_receive_mcc */
 
 /*
  * Receive RFCOMM TEST MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_test(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC TEST, cr=%d, length=%d, session state=%d, flags=%#x, mtu=%d, " \
 "len=%d\n",	__func__, RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length),
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_TEST);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_test */
 
 /*
  * Receive RFCOMM FCON/FCOFF MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_fc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	u_int8_t		 type = RFCOMM_MCC_TYPE(hdr->type);
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Turn ON/OFF aggregate flow on the entire session. When remote peer 
 	 * asserted flow control no transmission shall occur except on dlci 0
 	 * (control channel).
 	 */
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC FC%s, cr=%d, length=%d, session state=%d, flags=%#x, mtu=%d, " \
 "len=%d\n",	__func__, (type == RFCOMM_MCC_FCON)? "ON" : "OFF",
 		RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length),
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		if (type == RFCOMM_MCC_FCON)
 			s->flags &= ~NG_BTSOCKET_RFCOMM_SESSION_RFC;
 		else
 			s->flags |= NG_BTSOCKET_RFCOMM_SESSION_RFC;
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, type);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_fc  */
 
 /*
  * Receive RFCOMM MSC MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_msc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr		*hdr = mtod(m0, struct rfcomm_mcc_hdr*);
 	struct rfcomm_mcc_msc		*msc = (struct rfcomm_mcc_msc *)(hdr+1);
 	ng_btsocket_rfcomm_pcb_t	*pcb = NULL;
 	int				 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC MSC, dlci=%d, cr=%d, length=%d, session state=%d, flags=%#x, " \
 "mtu=%d, len=%d\n",
 		__func__,  RFCOMM_DLCI(msc->address), RFCOMM_CR(hdr->type),
 		RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags,
 		s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, RFCOMM_DLCI(msc->address));
 		if (pcb == NULL) {
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got MSC command for non-existing dlci=%d\n",
 				__func__, RFCOMM_DLCI(msc->address));
 			NG_FREE_M(m0);
 
 			return (ENOENT);
 		}
 
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTING &&
 		    pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) {
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got MSC on dlci=%d in invalid state=%d\n",
 				__func__, RFCOMM_DLCI(msc->address),
 				pcb->state);
 
 			mtx_unlock(&pcb->pcb_mtx);
 			NG_FREE_M(m0);
 
 			return (EINVAL);
 		}
 
 		pcb->rmodem = msc->modem; /* Update remote port signals */
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_MSC);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 
 #if 0 /* YYY */
 		/* Send more data from DLC. XXX check for errors? */
 		if (!(pcb->rmodem & RFCOMM_MODEM_FC) &&
 		    !(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC))
 			ng_btsocket_rfcomm_pcb_send(pcb, ALOT);
 #endif /* YYY */
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_msc */
 
 /*
  * Receive RFCOMM RPN MCC command
  * XXX FIXME do we need htole16/le16toh for RPN param_mask?
  */
 
 static int
 ng_btsocket_rfcomm_receive_rpn(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	struct rfcomm_mcc_rpn	*rpn = (struct rfcomm_mcc_rpn *)(hdr + 1);
 	int			 error = 0;
 	u_int16_t		 param_mask;
 	u_int8_t		 bit_rate, data_bits, stop_bits, parity,
 				 flow_control, xon_char, xoff_char;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC RPN, dlci=%d, cr=%d, length=%d, session state=%d, flags=%#x, " \
 "mtu=%d, len=%d\n",
 		__func__, RFCOMM_DLCI(rpn->dlci), RFCOMM_CR(hdr->type),
 		RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags,
 		s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		param_mask = RFCOMM_RPN_PM_ALL;
 
 		if (RFCOMM_MCC_LENGTH(hdr->length) == 1) {
 			/* Request - return default setting */
 			bit_rate = RFCOMM_RPN_BR_115200;
 			data_bits = RFCOMM_RPN_DATA_8;
 			stop_bits = RFCOMM_RPN_STOP_1;
 			parity = RFCOMM_RPN_PARITY_NONE;
 			flow_control = RFCOMM_RPN_FLOW_NONE;
 			xon_char = RFCOMM_RPN_XON_CHAR;
 			xoff_char = RFCOMM_RPN_XOFF_CHAR;
                 } else {
 			/*
 			 * Ignore/accept bit_rate, 8 bits, 1 stop bit, no 
 			 * parity, no flow control lines, default XON/XOFF 
 			 * chars.
 			 */
 
 			bit_rate = rpn->bit_rate;
 			rpn->param_mask = le16toh(rpn->param_mask); /* XXX */
 
 			data_bits = RFCOMM_RPN_DATA_BITS(rpn->line_settings);
 			if (rpn->param_mask & RFCOMM_RPN_PM_DATA &&
 			    data_bits != RFCOMM_RPN_DATA_8) {
 				data_bits = RFCOMM_RPN_DATA_8;
 				param_mask ^= RFCOMM_RPN_PM_DATA;
 			}
 
 			stop_bits = RFCOMM_RPN_STOP_BITS(rpn->line_settings);
 			if (rpn->param_mask & RFCOMM_RPN_PM_STOP &&
 			    stop_bits != RFCOMM_RPN_STOP_1) {
 				stop_bits = RFCOMM_RPN_STOP_1;
 				param_mask ^= RFCOMM_RPN_PM_STOP;
 			}
 
 			parity = RFCOMM_RPN_PARITY(rpn->line_settings);
 			if (rpn->param_mask & RFCOMM_RPN_PM_PARITY &&
 			    parity != RFCOMM_RPN_PARITY_NONE) {
 				parity = RFCOMM_RPN_PARITY_NONE;
 				param_mask ^= RFCOMM_RPN_PM_PARITY;
 			}
 
 			flow_control = rpn->flow_control;
 			if (rpn->param_mask & RFCOMM_RPN_PM_FLOW &&
 			    flow_control != RFCOMM_RPN_FLOW_NONE) {
 				flow_control = RFCOMM_RPN_FLOW_NONE;
 				param_mask ^= RFCOMM_RPN_PM_FLOW;
 			}
 
 			xon_char = rpn->xon_char;
 			if (rpn->param_mask & RFCOMM_RPN_PM_XON &&
 			    xon_char != RFCOMM_RPN_XON_CHAR) {
 				xon_char = RFCOMM_RPN_XON_CHAR;
 				param_mask ^= RFCOMM_RPN_PM_XON;
 			}
 
 			xoff_char = rpn->xoff_char;
 			if (rpn->param_mask & RFCOMM_RPN_PM_XOFF &&
 			    xoff_char != RFCOMM_RPN_XOFF_CHAR) {
 				xoff_char = RFCOMM_RPN_XOFF_CHAR;
 				param_mask ^= RFCOMM_RPN_PM_XOFF;
 			}
 		}
 
 		rpn->bit_rate = bit_rate;
 		rpn->line_settings = RFCOMM_MKRPN_LINE_SETTINGS(data_bits, 
 						stop_bits, parity);
 		rpn->flow_control = flow_control;
 		rpn->xon_char = xon_char;
 		rpn->xoff_char = xoff_char;
 		rpn->param_mask = htole16(param_mask); /* XXX */
 
 		m0->m_pkthdr.len = m0->m_len = sizeof(*hdr) + sizeof(*rpn);
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_RPN);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_rpn */
 
 /*
  * Receive RFCOMM RLS MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_rls(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	struct rfcomm_mcc_rls	*rls = (struct rfcomm_mcc_rls *)(hdr + 1);
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * XXX FIXME Do we have to do anything else here? Remote peer tries to 
 	 * tell us something about DLCI. Just report what we have received and
 	 * return back received values as required by TS 07.10 spec.
 	 */
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC RLS, dlci=%d, status=%#x, cr=%d, length=%d, session state=%d, " \
 "flags=%#x, mtu=%d, len=%d\n",
 		__func__, RFCOMM_DLCI(rls->address), rls->status,
 		RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length),
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		if (rls->status & 0x1)
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got RLS dlci=%d, error=%#x\n", __func__, RFCOMM_DLCI(rls->address),
 				rls->status >> 1);
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_RLS);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore responses */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_rls */
 
 /*
  * Receive RFCOMM PN MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_pn(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr		*hdr = mtod(m0, struct rfcomm_mcc_hdr*);
 	struct rfcomm_mcc_pn		*pn = (struct rfcomm_mcc_pn *)(hdr+1);
 	ng_btsocket_rfcomm_pcb_t	*pcb = NULL;
 	int				 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC PN, dlci=%d, cr=%d, length=%d, flow_control=%#x, priority=%d, " \
 "ack_timer=%d, mtu=%d, max_retrans=%d, credits=%d, session state=%d, " \
 "flags=%#x, session mtu=%d, len=%d\n",
 		__func__, pn->dlci, RFCOMM_CR(hdr->type),
 		RFCOMM_MCC_LENGTH(hdr->length), pn->flow_control, pn->priority,
 		pn->ack_timer, le16toh(pn->mtu), pn->max_retrans, pn->credits,
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (pn->dlci == 0) {
 		NG_BTSOCKET_RFCOMM_ERR("%s: Zero dlci in MCC PN\n", __func__);
 		NG_FREE_M(m0);
 
 		return (EINVAL);
 	}
 
 	/* Check if we have this dlci */
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, pn->dlci);
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (RFCOMM_CR(hdr->type)) {
 			/* PN Request */
 			ng_btsocket_rfcomm_set_pn(pcb, 1, pn->flow_control,
 				pn->credits, pn->mtu);
 
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 				pn->flow_control = 0xe0;
 				pn->credits = RFCOMM_DEFAULT_CREDITS;
 			} else {
 				pn->flow_control = 0;
 				pn->credits = 0;
 			}
 
 			hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_PN);
 			error = ng_btsocket_rfcomm_send_uih(s, 
 					RFCOMM_MKADDRESS(INITIATOR(s), 0),
 					0, 0, m0);
 		} else {
 			/* PN Response - proceed with SABM. Timeout still set */
 			if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONFIGURING) {
 				ng_btsocket_rfcomm_set_pn(pcb, 0,
 					pn->flow_control, pn->credits, pn->mtu);
 
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTING;
 				error = ng_btsocket_rfcomm_send_command(s,
 						RFCOMM_FRAME_SABM, pn->dlci);
 			} else
 				NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got PN response for dlci=%d in invalid state=%d\n",
 					__func__, pn->dlci, pcb->state);
 
 			NG_FREE_M(m0);
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else if (RFCOMM_CR(hdr->type)) {
 		/* PN request to non-existing dlci - incomming connection */
 		pcb = ng_btsocket_rfcomm_connect_ind(s,
 				RFCOMM_SRVCHANNEL(pn->dlci));
 		if (pcb != NULL) {
 			mtx_lock(&pcb->pcb_mtx);
 
 			pcb->dlci = pn->dlci;
 
 			ng_btsocket_rfcomm_set_pn(pcb, 1, pn->flow_control,
 				pn->credits, pn->mtu);
 
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 				pn->flow_control = 0xe0;
 				pn->credits = RFCOMM_DEFAULT_CREDITS;
 			} else {
 				pn->flow_control = 0;
 				pn->credits = 0;
 			}
 
 			hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_PN);
 			error = ng_btsocket_rfcomm_send_uih(s, 
 					RFCOMM_MKADDRESS(INITIATOR(s), 0),
 					0, 0, m0);
 
 			if (error == 0) {
 				ng_btsocket_rfcomm_timeout(pcb);
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTING;
 				soisconnecting(pcb->so);
 			} else
 				ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 			mtx_unlock(&pcb->pcb_mtx);
 		} else {
 			/* Nobody is listen()ing on this channel */
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_DM, pn->dlci);
 			NG_FREE_M(m0);
 		}
 	} else
 		NG_FREE_M(m0); /* XXX ignore response to non-existing dlci */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_pn */
 
 /*
  * Set PN parameters for dlci. Caller must hold pcb->pcb_mtx.
  * 
  * From Bluetooth spec.
  * 
  * "... The CL1 - CL4 field is completely redefined. (In TS07.10 this defines 
  *  the convergence layer to use, which is not applicable to RFCOMM. In RFCOMM,
  *  in Bluetooth versions up to 1.0B, this field was forced to 0).
  *
  *  In the PN request sent prior to a DLC establishment, this field must contain
  *  the value 15 (0xF), indicating support of credit based flow control in the 
  *  sender. See Table 5.3 below. If the PN response contains any other value 
  *  than 14 (0xE) in this field, it is inferred that the peer RFCOMM entity is 
  *  not supporting the credit based flow control feature. (This is only possible
  *  if the peer RFCOMM implementation is only conforming to Bluetooth version 
  *  1.0B.) If a PN request is sent on an already open DLC, then this field must
  *  contain the value zero; it is not possible to set initial credits  more 
  *  than once per DLC activation. A responding implementation must set this 
  *  field in the PN response to 14 (0xE), if (and only if) the value in the PN 
  *  request was 15..."
  */
 
 static void
 ng_btsocket_rfcomm_set_pn(ng_btsocket_rfcomm_pcb_p pcb, u_int8_t cr,
 		u_int8_t flow_control, u_int8_t credits, u_int16_t mtu)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	pcb->mtu = le16toh(mtu);
 
 	if (cr) {
 		if (flow_control == 0xf0) {
 			pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = credits;
 		} else {
 			pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = 0;
 		}
 	} else {
 		if (flow_control == 0xe0) {
 			pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = credits;
 		} else {
 			pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = 0;
 		}
 	}
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: cr=%d, dlci=%d, state=%d, flags=%#x, mtu=%d, rx_cred=%d, tx_cred=%d\n",
 		__func__, cr, pcb->dlci, pcb->state, pcb->flags, pcb->mtu,
 		pcb->rx_cred, pcb->tx_cred);
 } /* ng_btsocket_rfcomm_set_pn */
 
 /*
  * Send RFCOMM SABM/DISC/UA/DM frames. Caller must hold s->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_command(ng_btsocket_rfcomm_session_p s,
 		u_int8_t type, u_int8_t dlci)
 {
 	struct rfcomm_cmd_hdr	*hdr = NULL;
 	struct mbuf		*m = NULL;
 	int			 cr;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending command type %#x, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, type, s->state, s->flags, s->mtu, dlci);
 
 	switch (type) {
 	case RFCOMM_FRAME_SABM:
 	case RFCOMM_FRAME_DISC:
 		cr = INITIATOR(s);
 		break;
 
 	case RFCOMM_FRAME_UA:
 	case RFCOMM_FRAME_DM:
 		cr = !INITIATOR(s);
 		break;
 
 	default:
 		panic("%s: Invalid frame type=%#x\n", __func__, type);
 		return (EINVAL);
 		/* NOT REACHED */
 	}
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr);
 
 	hdr = mtod(m, struct rfcomm_cmd_hdr *);
 	hdr->address = RFCOMM_MKADDRESS(cr, dlci);
 	hdr->control = RFCOMM_MKCONTROL(type, 1);
 	hdr->length = RFCOMM_MKLEN8(0);
 	hdr->fcs = ng_btsocket_rfcomm_fcs3((u_int8_t *) hdr);
 
 	NG_BT_MBUFQ_ENQUEUE(&s->outq, m);
 
 	return (0);
 } /* ng_btsocket_rfcomm_send_command */
 
 /*
  * Send RFCOMM UIH frame. Caller must hold s->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_uih(ng_btsocket_rfcomm_session_p s, u_int8_t address,
 		u_int8_t pf, u_int8_t credits, struct mbuf *data)
 {
 	struct rfcomm_frame_hdr	*hdr = NULL;
 	struct mbuf		*m = NULL, *mcrc = NULL;
 	u_int16_t		 length;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		NG_FREE_M(data);
 		return (ENOBUFS);
 	}
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr);
 
 	MGET(mcrc, M_NOWAIT, MT_DATA);
 	if (mcrc == NULL) {
 		NG_FREE_M(data);
 		return (ENOBUFS);
 	}
 	mcrc->m_len = 1;
 
 	/* Fill UIH frame header */
 	hdr = mtod(m, struct rfcomm_frame_hdr *);
 	hdr->address = address;
 	hdr->control = RFCOMM_MKCONTROL(RFCOMM_FRAME_UIH, pf);
 
 	/* Calculate FCS */
 	mcrc->m_data[0] = ng_btsocket_rfcomm_fcs2((u_int8_t *) hdr);
 
 	/* Put length back */
 	length = (data != NULL)? data->m_pkthdr.len : 0;
 	if (length > 127) {
 		u_int16_t	l = htole16(RFCOMM_MKLEN16(length));
 
 		bcopy(&l, &hdr->length, sizeof(l));
 		m->m_pkthdr.len ++;
 		m->m_len ++;
 	} else
 		hdr->length = RFCOMM_MKLEN8(length);
 
 	if (pf) {
 		m->m_data[m->m_len] = credits;
 		m->m_pkthdr.len ++;
 		m->m_len ++;
 	}
 
 	/* Add payload */
 	if (data != NULL) {
 		m_cat(m, data);
 		m->m_pkthdr.len += length;
 	}
 
 	/* Put FCS back */
 	m_cat(m, mcrc);
 	m->m_pkthdr.len ++;
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending UIH state=%d, flags=%#x, address=%d, length=%d, pf=%d, " \
 "credits=%d, len=%d\n",
 		__func__, s->state, s->flags, address, length, pf, credits,
 		m->m_pkthdr.len);
 
 	NG_BT_MBUFQ_ENQUEUE(&s->outq, m);
 
 	return (0);
 } /* ng_btsocket_rfcomm_send_uih */
 
 /*
  * Send MSC request. Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_msc(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	struct mbuf		*m = NULL;
 	struct rfcomm_mcc_hdr	*hdr = NULL;
 	struct rfcomm_mcc_msc	*msc = NULL;
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr) + sizeof(*msc);
 
 	hdr = mtod(m, struct rfcomm_mcc_hdr *);
 	msc = (struct rfcomm_mcc_msc *)(hdr + 1);
 
 	hdr->type = RFCOMM_MKMCC_TYPE(1, RFCOMM_MCC_MSC);
 	hdr->length = RFCOMM_MKLEN8(sizeof(*msc));
 
 	msc->address = RFCOMM_MKADDRESS(1, pcb->dlci);
 	msc->modem = pcb->lmodem;
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending MSC dlci=%d, state=%d, flags=%#x, address=%d, modem=%#x\n",
 		__func__, pcb->dlci, pcb->state, pcb->flags, msc->address,
 		msc->modem);
 
 	return (ng_btsocket_rfcomm_send_uih(pcb->session,
 			RFCOMM_MKADDRESS(INITIATOR(pcb->session), 0), 0, 0, m));
 } /* ng_btsocket_rfcomm_send_msc */
 
 /*
  * Send PN request. Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_pn(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	struct mbuf		*m = NULL;
 	struct rfcomm_mcc_hdr	*hdr = NULL;
 	struct rfcomm_mcc_pn	*pn = NULL;
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr) + sizeof(*pn);
 
 	hdr = mtod(m, struct rfcomm_mcc_hdr *);
 	pn = (struct rfcomm_mcc_pn *)(hdr + 1);
 
 	hdr->type = RFCOMM_MKMCC_TYPE(1, RFCOMM_MCC_PN);
 	hdr->length = RFCOMM_MKLEN8(sizeof(*pn));
 
 	pn->dlci = pcb->dlci;
 
 	/*
 	 * Set default DLCI priority as described in GSM 07.10
 	 * (ETSI TS 101 369) clause 5.6 page 42
 	 */
 
 	pn->priority = (pcb->dlci < 56)? (((pcb->dlci >> 3) << 3) + 7) : 61;
 	pn->ack_timer = 0;
 	pn->mtu = htole16(pcb->mtu);
 	pn->max_retrans = 0;
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 		pn->flow_control = 0xf0;
 		pn->credits = pcb->rx_cred;
 	} else {
 		pn->flow_control = 0;
 		pn->credits = 0;
 	}
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending PN dlci=%d, state=%d, flags=%#x, mtu=%d, flow_control=%#x, " \
 "credits=%d\n",	__func__, pcb->dlci, pcb->state, pcb->flags, pcb->mtu,
 		pn->flow_control, pn->credits);
 
 	return (ng_btsocket_rfcomm_send_uih(pcb->session,
 			RFCOMM_MKADDRESS(INITIATOR(pcb->session), 0), 0, 0, m));
 } /* ng_btsocket_rfcomm_send_pn */
 
 /*
  * Calculate and send credits based on available space in receive buffer
  */
 
 static int
 ng_btsocket_rfcomm_send_credits(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	int		error = 0;
 	u_int8_t	credits;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending more credits, dlci=%d, state=%d, flags=%#x, mtu=%d, " \
 "space=%ld, tx_cred=%d, rx_cred=%d\n",
 		__func__, pcb->dlci, pcb->state, pcb->flags, pcb->mtu,
 		sbspace(&pcb->so->so_rcv), pcb->tx_cred, pcb->rx_cred);
 
 	credits = sbspace(&pcb->so->so_rcv) / pcb->mtu;
 	if (credits > 0) {
 		if (pcb->rx_cred + credits > RFCOMM_MAX_CREDITS)
 			credits = RFCOMM_MAX_CREDITS - pcb->rx_cred;
 
 		error = ng_btsocket_rfcomm_send_uih(
 				pcb->session,
 				RFCOMM_MKADDRESS(INITIATOR(pcb->session),
 					pcb->dlci), 1, credits, NULL);
 		if (error == 0) {
 			pcb->rx_cred += credits;
 
 			NG_BTSOCKET_RFCOMM_INFO(
 "%s: Gave remote side %d more credits, dlci=%d, state=%d, flags=%#x, " \
 "rx_cred=%d, tx_cred=%d\n",	__func__, credits, pcb->dlci, pcb->state,
 				pcb->flags, pcb->rx_cred, pcb->tx_cred);
 		} else
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not send credits, error=%d, dlci=%d, state=%d, flags=%#x, " \
 "mtu=%d, space=%ld, tx_cred=%d, rx_cred=%d\n",
 				__func__, error, pcb->dlci, pcb->state,
 				pcb->flags, pcb->mtu, sbspace(&pcb->so->so_rcv),
 				pcb->tx_cred, pcb->rx_cred);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_send_credits */
 
 /*****************************************************************************
  *****************************************************************************
  **                              RFCOMM DLCs
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Send data from socket send buffer
  * Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_pcb_send(ng_btsocket_rfcomm_pcb_p pcb, int limit)
 {
 	struct mbuf	*m = NULL;
 	int		 sent, length, error;
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)
 		limit = min(limit, pcb->tx_cred);
 	else if (!(pcb->rmodem & RFCOMM_MODEM_FC))
 		limit = min(limit, RFCOMM_MAX_CREDITS); /* XXX ??? */
 	else
 		limit = 0;
 
 	if (limit == 0) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Could not send - remote flow control asserted, dlci=%d, flags=%#x, " \
 "rmodem=%#x, tx_cred=%d\n",
 			__func__, pcb->dlci, pcb->flags, pcb->rmodem,
 			pcb->tx_cred);
 
 		return (0);
 	}
 
 	for (error = 0, sent = 0; sent < limit; sent ++) { 
 		length = min(pcb->mtu, sbavail(&pcb->so->so_snd));
 		if (length == 0)
 			break;
 
 		/* Get the chunk from the socket's send buffer */
 		m = ng_btsocket_rfcomm_prepare_packet(&pcb->so->so_snd, length);
 		if (m == NULL) {
 			error = ENOBUFS;
 			break;
 		}
 
 		sbdrop(&pcb->so->so_snd, length);
 
 		error = ng_btsocket_rfcomm_send_uih(pcb->session,
 				RFCOMM_MKADDRESS(INITIATOR(pcb->session),
 					pcb->dlci), 0, 0, m);
 		if (error != 0)
 			break;
 	}
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)
 		pcb->tx_cred -= sent;
 
 	if (error == 0 && sent > 0) {
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_SENDING;
 		sowwakeup(pcb->so);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_pcb_send */
 
 /*
  * Unlink and disconnect DLC. If ng_btsocket_rfcomm_pcb_kill() returns
  * non zero value than socket has no reference and has to be detached.
  * Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static void
 ng_btsocket_rfcomm_pcb_kill(ng_btsocket_rfcomm_pcb_p pcb, int error)
 {
 	ng_btsocket_rfcomm_session_p	s = pcb->session;
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Killing DLC, so=%p, dlci=%d, state=%d, flags=%#x, error=%d\n",
 		__func__, pcb->so, pcb->dlci, pcb->state, pcb->flags, error);
 
 	if (pcb->session == NULL)
 		panic("%s: DLC without session, pcb=%p, state=%d, flags=%#x\n",
 			__func__, pcb, pcb->state, pcb->flags);
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 		ng_btsocket_rfcomm_untimeout(pcb);
 
 	/* Detach DLC from the session. Does not matter which state DLC in */
 	LIST_REMOVE(pcb, session_next);
 	pcb->session = NULL;
 
 	/* Change DLC state and wakeup all sleepers */
 	pcb->state = NG_BTSOCKET_RFCOMM_DLC_CLOSED;
 	pcb->so->so_error = error;
 	soisdisconnected(pcb->so);
 	wakeup(&pcb->state);
 
 	/* Check if we have any DLCs left on the session */
 	if (LIST_EMPTY(&s->dlcs) && INITIATOR(s)) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Disconnecting session, state=%d, flags=%#x, mtu=%d\n",
 			__func__, s->state, s->flags, s->mtu);
 
 		switch (s->state) {
 		case NG_BTSOCKET_RFCOMM_SESSION_CLOSED:
 		case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING:
 			/*
 			 * Do not have to do anything here. We can get here
 			 * when L2CAP connection was terminated or we have 
 			 * received DISC on multiplexor channel
 			 */
 			break;
 
 		case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 			/* Send DISC on multiplexor channel */
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_DISC, 0);
 			if (error == 0) {
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING;
 				break;
 			}
 			/* FALL THROUGH */
 
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING:
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			break;
 
 /*		case NG_BTSOCKET_RFCOMM_SESSION_LISTENING: */
 		default:
 			panic("%s: Invalid session state=%d, flags=%#x\n",
 				__func__, s->state, s->flags);
 			break;
 		}
 
 		ng_btsocket_rfcomm_task_wakeup();
 	}
 } /* ng_btsocket_rfcomm_pcb_kill */
 
 /*
  * Look for given dlci for given RFCOMM session. Caller must hold s->session_mtx
  */
 
 static ng_btsocket_rfcomm_pcb_p
 ng_btsocket_rfcomm_pcb_by_dlci(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	LIST_FOREACH(pcb, &s->dlcs, session_next)
 		if (pcb->dlci == dlci)
 			break;
 
 	return (pcb);
 } /* ng_btsocket_rfcomm_pcb_by_dlci */
 
 /*
  * Look for socket that listens on given src address and given channel
  */
 
 static ng_btsocket_rfcomm_pcb_p
 ng_btsocket_rfcomm_pcb_listener(bdaddr_p src, int channel)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb1 = NULL;
 
 	mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 
 	LIST_FOREACH(pcb, &ng_btsocket_rfcomm_sockets, next) {
 		if (pcb->channel != channel ||
 		    !(pcb->so->so_options & SO_ACCEPTCONN))
 			continue;
 
 		if (bcmp(&pcb->src, src, sizeof(*src)) == 0)
 			break;
 
 		if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 			pcb1 = pcb;
 	}
 
 	mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 
 	return ((pcb != NULL)? pcb : pcb1);
 } /* ng_btsocket_rfcomm_pcb_listener */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Misc. functions 
  *****************************************************************************
  *****************************************************************************/
 
 /*
  *  Set timeout. Caller MUST hold pcb_mtx
  */
 
 static void
 ng_btsocket_rfcomm_timeout(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)) {
 		pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_TIMO;
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT;
 		callout_reset(&pcb->timo, ng_btsocket_rfcomm_timo * hz,
 		    ng_btsocket_rfcomm_process_timeout, pcb);
 	} else
 		panic("%s: Duplicated socket timeout?!\n", __func__);
 } /* ng_btsocket_rfcomm_timeout */
 
 /*
  *  Unset pcb timeout. Caller MUST hold pcb_mtx
  */
 
 static void
 ng_btsocket_rfcomm_untimeout(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) {
 		callout_stop(&pcb->timo);
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMO;
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT;
 	} else
 		panic("%s: No socket timeout?!\n", __func__);
 } /* ng_btsocket_rfcomm_timeout */
 
 /*
  * Process pcb timeout
  */
 
 static void
 ng_btsocket_rfcomm_process_timeout(void *xpcb)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = (ng_btsocket_rfcomm_pcb_p) xpcb;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Timeout, so=%p, dlci=%d, state=%d, flags=%#x\n",
 		__func__, pcb->so, pcb->dlci, pcb->state, pcb->flags);
 
 	pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMO;
 	pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT;
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING:
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 		pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING;
 		break;
 
 	case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 	case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 		break;
 
 	default:
 		panic(
 "%s: DLC timeout in invalid state, dlci=%d, state=%d, flags=%#x\n",
 			__func__, pcb->dlci, pcb->state, pcb->flags);
 		break;
 	}
 
 	ng_btsocket_rfcomm_task_wakeup();
 } /* ng_btsocket_rfcomm_process_timeout */
 
 /*
  * Get up to length bytes from the socket buffer
  */
 
 static struct mbuf *
 ng_btsocket_rfcomm_prepare_packet(struct sockbuf *sb, int length)
 {
 	struct mbuf	*top = NULL, *m = NULL, *n = NULL, *nextpkt = NULL;
 	int		 mlen, noff, len;
 
 	MGETHDR(top, M_NOWAIT, MT_DATA);
 	if (top == NULL)
 		return (NULL);
 
 	top->m_pkthdr.len = length;
 	top->m_len = 0;
 	mlen = MHLEN;
 
 	m = top;
 	n = sb->sb_mb;
 	nextpkt = n->m_nextpkt;
 	noff = 0;
 
 	while (length > 0 && n != NULL) {
 		len = min(mlen - m->m_len, n->m_len - noff);
 		if (len > length)
 			len = length;
 
 		bcopy(mtod(n, caddr_t)+noff, mtod(m, caddr_t)+m->m_len, len);
 		m->m_len += len;
 		noff += len;
 		length -= len;
 
 		if (length > 0 && m->m_len == mlen) {
 			MGET(m->m_next, M_NOWAIT, MT_DATA);
 			if (m->m_next == NULL) {
 				NG_FREE_M(top);
 				return (NULL);
 			}
 
 			m = m->m_next;
 			m->m_len = 0;
 			mlen = MLEN;
 		}
 
 		if (noff == n->m_len) {
 			noff = 0;
 			n = n->m_next;
 
 			if (n == NULL)
 				n = nextpkt;
 
 			nextpkt = (n != NULL)? n->m_nextpkt : NULL;
 		}
 	}
 
 	if (length < 0)
 		panic("%s: length=%d\n", __func__, length);
 	if (length > 0 && n == NULL)
 		panic("%s: bogus length=%d, n=%p\n", __func__, length, n);
 
 	return (top);
 } /* ng_btsocket_rfcomm_prepare_packet */
 
Index: projects/clang380-import/sys/netgraph/netflow/netflow.c
===================================================================
--- projects/clang380-import/sys/netgraph/netflow/netflow.c	(revision 293686)
+++ projects/clang380-import/sys/netgraph/netflow/netflow.c	(revision 293687)
@@ -1,1189 +1,1190 @@
 /*-
  * Copyright (c) 2010-2011 Alexander V. Chernikov <melifaro@ipfw.ru>
  * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org>
  * Copyright (c) 2001-2003 Roman V. Palagin <romanp@unshadow.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_route.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/kernel.h>
+#include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/ethernet.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 
 #include <netgraph/netflow/netflow.h>
 #include <netgraph/netflow/netflow_v9.h>
 #include <netgraph/netflow/ng_netflow.h>
 
 #define	NBUCKETS	(65536)		/* must be power of 2 */
 
 /* This hash is for TCP or UDP packets. */
 #define FULL_HASH(addr1, addr2, port1, port2)	\
 	(((addr1 ^ (addr1 >> 16) ^ 		\
 	htons(addr2 ^ (addr2 >> 16))) ^ 	\
 	port1 ^ htons(port2)) &			\
 	(NBUCKETS - 1))
 
 /* This hash is for all other IP packets. */
 #define ADDR_HASH(addr1, addr2)			\
 	((addr1 ^ (addr1 >> 16) ^ 		\
 	htons(addr2 ^ (addr2 >> 16))) &		\
 	(NBUCKETS - 1))
 
 /* Macros to shorten logical constructions */
 /* XXX: priv must exist in namespace */
 #define	INACTIVE(fle)	(time_uptime - fle->f.last > priv->nfinfo_inact_t)
 #define	AGED(fle)	(time_uptime - fle->f.first > priv->nfinfo_act_t)
 #define	ISFREE(fle)	(fle->f.packets == 0)
 
 /*
  * 4 is a magical number: statistically number of 4-packet flows is
  * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP
  * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case
  * of reachable host and 4-packet otherwise.
  */
 #define	SMALL(fle)	(fle->f.packets <= 4)
 
 MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash");
 
 static int export_add(item_p, struct flow_entry *);
 static int export_send(priv_p, fib_export_p, item_p, int);
 
 static int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *,
     int, uint8_t, uint8_t);
 #ifdef INET6
 static int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *,
     int, uint8_t, uint8_t);
 #endif
 
 static void expire_flow(priv_p, fib_export_p, struct flow_entry *, int);
 
 /*
  * Generate hash for a given flow record.
  *
  * FIB is not used here, because:
  * most VRFS will carry public IPv4 addresses which are unique even
  * without FIB private addresses can overlap, but this is worked out
  * via flow_rec bcmp() containing fib id. In IPv6 world addresses are
  * all globally unique (it's not fully true, there is FC00::/7 for example,
  * but chances of address overlap are MUCH smaller)
  */
 static inline uint32_t
 ip_hash(struct flow_rec *r)
 {
 
 	switch (r->r_ip_p) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 		return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr,
 		    r->r_sport, r->r_dport);
 	default:
 		return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr);
 	}
 }
 
 #ifdef INET6
 /* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */
 static inline uint32_t
 ip6_hash(struct flow6_rec *r)
 {
 
 	switch (r->r_ip_p) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 		return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
 		    r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport,
 		    r->r_dport);
 	default:
 		return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
 		    r->dst.r_dst6.__u6_addr.__u6_addr32[3]);
  	}
 }
 #endif
 
 /*
  * Detach export datagram from priv, if there is any.
  * If there is no, allocate a new one.
  */
 static item_p
 get_export_dgram(priv_p priv, fib_export_p fe)
 {
 	item_p	item = NULL;
 
 	mtx_lock(&fe->export_mtx);
 	if (fe->exp.item != NULL) {
 		item = fe->exp.item;
 		fe->exp.item = NULL;
 	}
 	mtx_unlock(&fe->export_mtx);
 
 	if (item == NULL) {
 		struct netflow_v5_export_dgram *dgram;
 		struct mbuf *m;
 
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL)
 			return (NULL);
 		item = ng_package_data(m, NG_NOFLAGS);
 		if (item == NULL)
 			return (NULL);
 		dgram = mtod(m, struct netflow_v5_export_dgram *);
 		dgram->header.count = 0;
 		dgram->header.version = htons(NETFLOW_V5);
 		dgram->header.pad = 0;
 	}
 
 	return (item);
 }
 
 /*
  * Re-attach incomplete datagram back to priv.
  * If there is already another one, then send incomplete. */
 static void
 return_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags)
 {
 
 	/*
 	 * It may happen on SMP, that some thread has already
 	 * put its item there, in this case we bail out and
 	 * send what we have to collector.
 	 */
 	mtx_lock(&fe->export_mtx);
 	if (fe->exp.item == NULL) {
 		fe->exp.item = item;
 		mtx_unlock(&fe->export_mtx);
 	} else {
 		mtx_unlock(&fe->export_mtx);
 		export_send(priv, fe, item, flags);
 	}
 }
 
 /*
  * The flow is over. Call export_add() and free it. If datagram is
  * full, then call export_send().
  */
 static void
 expire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags)
 {
 	struct netflow_export_item exp;
 	uint16_t version = fle->f.version;
 
 	if ((priv->export != NULL) && (version == IPVERSION)) {
 		exp.item = get_export_dgram(priv, fe);
 		if (exp.item == NULL) {
 			priv->nfinfo_export_failed++;
 			if (priv->export9 != NULL)
 				priv->nfinfo_export9_failed++;
 			/* fle definitely contains IPv4 flow. */
 			uma_zfree_arg(priv->zone, fle, priv);
 			return;
 		}
 
 		if (export_add(exp.item, fle) > 0)
 			export_send(priv, fe, exp.item, flags);
 		else
 			return_export_dgram(priv, fe, exp.item, NG_QUEUE);
 	}
 
 	if (priv->export9 != NULL) {
 		exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt);
 		if (exp.item9 == NULL) {
 			priv->nfinfo_export9_failed++;
 			if (version == IPVERSION)
 				uma_zfree_arg(priv->zone, fle, priv);
 #ifdef INET6
 			else if (version == IP6VERSION)
 				uma_zfree_arg(priv->zone6, fle, priv);
 #endif
 			else
 				panic("ng_netflow: Unknown IP proto: %d",
 				    version);
 			return;
 		}
 
 		if (export9_add(exp.item9, exp.item9_opt, fle) > 0)
 			export9_send(priv, fe, exp.item9, exp.item9_opt, flags);
 		else
 			return_export9_dgram(priv, fe, exp.item9,
 			    exp.item9_opt, NG_QUEUE);
 	}
 
 	if (version == IPVERSION)
 		uma_zfree_arg(priv->zone, fle, priv);
 #ifdef INET6
 	else if (version == IP6VERSION)
 		uma_zfree_arg(priv->zone6, fle, priv);
 #endif
 }
 
 /* Get a snapshot of node statistics */
 void
 ng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i)
 {
 
 	i->nfinfo_bytes = counter_u64_fetch(priv->nfinfo_bytes);
 	i->nfinfo_packets = counter_u64_fetch(priv->nfinfo_packets);
 	i->nfinfo_bytes6 = counter_u64_fetch(priv->nfinfo_bytes6);
 	i->nfinfo_packets6 = counter_u64_fetch(priv->nfinfo_packets6);
 	i->nfinfo_sbytes = counter_u64_fetch(priv->nfinfo_sbytes);
 	i->nfinfo_spackets = counter_u64_fetch(priv->nfinfo_spackets);
 	i->nfinfo_sbytes6 = counter_u64_fetch(priv->nfinfo_sbytes6);
 	i->nfinfo_spackets6 = counter_u64_fetch(priv->nfinfo_spackets6);
 	i->nfinfo_act_exp = counter_u64_fetch(priv->nfinfo_act_exp);
 	i->nfinfo_inact_exp = counter_u64_fetch(priv->nfinfo_inact_exp);
 
 	i->nfinfo_used = uma_zone_get_cur(priv->zone);
 #ifdef INET6
 	i->nfinfo_used6 = uma_zone_get_cur(priv->zone6);
 #endif
 
 	i->nfinfo_alloc_failed = priv->nfinfo_alloc_failed;
 	i->nfinfo_export_failed = priv->nfinfo_export_failed;
 	i->nfinfo_export9_failed = priv->nfinfo_export9_failed;
 	i->nfinfo_realloc_mbuf = priv->nfinfo_realloc_mbuf;
 	i->nfinfo_alloc_fibs = priv->nfinfo_alloc_fibs;
 	i->nfinfo_inact_t = priv->nfinfo_inact_t;
 	i->nfinfo_act_t = priv->nfinfo_act_t;
 }
 
 /*
  * Insert a record into defined slot.
  *
  * First we get for us a free flow entry, then fill in all
  * possible fields in it.
  *
  * TODO: consider dropping hash mutex while filling in datagram,
  * as this was done in previous version. Need to test & profile
  * to be sure.
  */
 static int
 hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r,
 	int plen, uint8_t flags, uint8_t tcp_flags)
 {
 	struct flow_entry *fle;
 	struct sockaddr_in sin;
 	struct rtentry *rt;
 
 	mtx_assert(&hsh->mtx, MA_OWNED);
 
 	fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT);
 	if (fle == NULL) {
 		priv->nfinfo_alloc_failed++;
 		return (ENOMEM);
 	}
 
 	/*
 	 * Now fle is totally ours. It is detached from all lists,
 	 * we can safely edit it.
 	 */
 	fle->f.version = IPVERSION;
 	bcopy(r, &fle->f.r, sizeof(struct flow_rec));
 	fle->f.bytes = plen;
 	fle->f.packets = 1;
 	fle->f.tcp_flags = tcp_flags;
 
 	fle->f.first = fle->f.last = time_uptime;
 
 	/*
 	 * First we do route table lookup on destination address. So we can
 	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
 	 */
 	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
 		bzero(&sin, sizeof(sin));
 		sin.sin_len = sizeof(struct sockaddr_in);
 		sin.sin_family = AF_INET;
 		sin.sin_addr = fle->f.r.r_dst;
 		rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib);
 		if (rt != NULL) {
 			fle->f.fle_o_ifx = rt->rt_ifp->if_index;
 
 			if (rt->rt_flags & RTF_GATEWAY &&
 			    rt->rt_gateway->sa_family == AF_INET)
 				fle->f.next_hop =
 				    ((struct sockaddr_in *)(rt->rt_gateway))->sin_addr;
 
 			if (rt_mask(rt))
 				fle->f.dst_mask =
 				    bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr);
 			else if (rt->rt_flags & RTF_HOST)
 				/* Give up. We can't determine mask :( */
 				fle->f.dst_mask = 32;
 
 			RTFREE_LOCKED(rt);
 		}
 	}
 
 	/* Do route lookup on source address, to fill in src_mask. */
 	if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
 		bzero(&sin, sizeof(sin));
 		sin.sin_len = sizeof(struct sockaddr_in);
 		sin.sin_family = AF_INET;
 		sin.sin_addr = fle->f.r.r_src;
 		rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib);
 		if (rt != NULL) {
 			if (rt_mask(rt))
 				fle->f.src_mask =
 				    bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr);
 			else if (rt->rt_flags & RTF_HOST)
 				/* Give up. We can't determine mask :( */
 				fle->f.src_mask = 32;
 
 			RTFREE_LOCKED(rt);
 		}
 	}
 
 	/* Push new flow at the and of hash. */
 	TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 
 	return (0);
 }
 
 #ifdef INET6
 /* XXX: make normal function, instead of.. */
 #define ipv6_masklen(x)		bitcount32((x).__u6_addr.__u6_addr32[0]) + \
 				bitcount32((x).__u6_addr.__u6_addr32[1]) + \
 				bitcount32((x).__u6_addr.__u6_addr32[2]) + \
 				bitcount32((x).__u6_addr.__u6_addr32[3])
 #define RT_MASK6(x)	(ipv6_masklen(((struct sockaddr_in6 *)rt_mask(x))->sin6_addr))
 static int
 hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r,
 	int plen, uint8_t flags, uint8_t tcp_flags)
 {
 	struct flow6_entry *fle6;
 	struct sockaddr_in6 sin6;
 	struct rtentry *rt;
 
 	mtx_assert(&hsh6->mtx, MA_OWNED);
 
 	fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT);
 	if (fle6 == NULL) {
 		priv->nfinfo_alloc_failed++;
 		return (ENOMEM);
 	}
 
 	/*
 	 * Now fle is totally ours. It is detached from all lists,
 	 * we can safely edit it.
 	 */
 
 	fle6->f.version = IP6VERSION;
 	bcopy(r, &fle6->f.r, sizeof(struct flow6_rec));
 	fle6->f.bytes = plen;
 	fle6->f.packets = 1;
 	fle6->f.tcp_flags = tcp_flags;
 
 	fle6->f.first = fle6->f.last = time_uptime;
 
 	/*
 	 * First we do route table lookup on destination address. So we can
 	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
 	 */
 	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
 		bzero(&sin6, sizeof(struct sockaddr_in6));
 		sin6.sin6_len = sizeof(struct sockaddr_in6);
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_addr = r->dst.r_dst6;
 
 		rt = rtalloc1_fib((struct sockaddr *)&sin6, 0, 0, r->fib);
 
 		if (rt != NULL) {
 			fle6->f.fle_o_ifx = rt->rt_ifp->if_index;
 
 			if (rt->rt_flags & RTF_GATEWAY &&
 			    rt->rt_gateway->sa_family == AF_INET6)
 				fle6->f.n.next_hop6 =
 				    ((struct sockaddr_in6 *)(rt->rt_gateway))->sin6_addr;
 
 			if (rt_mask(rt))
 				fle6->f.dst_mask = RT_MASK6(rt);
 			else
 				fle6->f.dst_mask = 128;
 
 			RTFREE_LOCKED(rt);
 		}
 	}
 
 	if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
 		/* Do route lookup on source address, to fill in src_mask. */
 		bzero(&sin6, sizeof(struct sockaddr_in6));
 		sin6.sin6_len = sizeof(struct sockaddr_in6);
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_addr = r->src.r_src6;
 
 		rt = rtalloc1_fib((struct sockaddr *)&sin6, 0, 0, r->fib);
 
 		if (rt != NULL) {
 			if (rt_mask(rt))
 				fle6->f.src_mask = RT_MASK6(rt);
 			else
 				fle6->f.src_mask = 128;
 
 			RTFREE_LOCKED(rt);
 		}
 	}
 
 	/* Push new flow at the and of hash. */
 	TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash);
 
 	return (0);
 }
 #undef ipv6_masklen
 #undef RT_MASK6
 #endif
 
 
 /*
  * Non-static functions called from ng_netflow.c
  */
 
 /* Allocate memory and set up flow cache */
 void
 ng_netflow_cache_init(priv_p priv)
 {
 	struct flow_hash_entry *hsh;
 	int i;
 
 	/* Initialize cache UMA zone. */
 	priv->zone = uma_zcreate("NetFlow IPv4 cache",
 	    sizeof(struct flow_entry), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(priv->zone, CACHESIZE);
 #ifdef INET6	
 	priv->zone6 = uma_zcreate("NetFlow IPv6 cache",
 	    sizeof(struct flow6_entry), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(priv->zone6, CACHESIZE);
 #endif	
 
 	/* Allocate hash. */
 	priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
 	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
 
 	/* Initialize hash. */
 	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) {
 		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
 		TAILQ_INIT(&hsh->head);
 	}
 
 #ifdef INET6
 	/* Allocate hash. */
 	priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
 	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
 
 	/* Initialize hash. */
 	for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) {
 		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
 		TAILQ_INIT(&hsh->head);
 	}
 #endif
 
 	priv->nfinfo_bytes = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_packets = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_bytes6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_packets6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_sbytes = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_spackets = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_sbytes6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_spackets6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_act_exp = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_inact_exp = counter_u64_alloc(M_WAITOK);
 
 	ng_netflow_v9_cache_init(priv);
 	CTR0(KTR_NET, "ng_netflow startup()");
 }
 
 /* Initialize new FIB table for v5 and v9 */
 int
 ng_netflow_fib_init(priv_p priv, int fib)
 {
 	fib_export_p	fe = priv_to_fib(priv, fib);
 
 	CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib);
 
 	if (fe != NULL)
 		return (0);
 
 	if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF);
 	mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF);
 	fe->fib = fib;
 	fe->domain_id = fib;
 
 	if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib],
 	    (uintptr_t)NULL, (uintptr_t)fe) == 0) {
 		/* FIB already set up by other ISR */
 		CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p",
 		    fib, fe, priv_to_fib(priv, fib));
 		mtx_destroy(&fe->export_mtx);
 		mtx_destroy(&fe->export9_mtx);
 		free(fe, M_NETGRAPH);
 	} else {
 		/* Increase counter for statistics */
 		CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)",
 		    fib, fe, priv_to_fib(priv, fib));
 		priv->nfinfo_alloc_fibs++;
 	}
 	
 	return (0);
 }
 
 /* Free all flow cache memory. Called from node close method. */
 void
 ng_netflow_cache_flush(priv_p priv)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	struct netflow_export_item exp;
 	fib_export_p fe;
 	int i;
 
 	bzero(&exp, sizeof(exp));
 
 	/*
 	 * We are going to free probably billable data.
 	 * Expire everything before freeing it.
 	 * No locking is required since callout is already drained.
 	 */
 	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++)
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			fe = priv_to_fib(priv, fle->f.r.fib);
 			expire_flow(priv, fe, fle, NG_QUEUE);
 		}
 #ifdef INET6
 	for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++)
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			fe = priv_to_fib(priv, fle->f.r.fib);
 			expire_flow(priv, fe, fle, NG_QUEUE);
 		}
 #endif
 
 	uma_zdestroy(priv->zone);
 	/* Destroy hash mutexes. */
 	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++)
 		mtx_destroy(&hsh->mtx);
 
 	/* Free hash memory. */
 	if (priv->hash != NULL)
 		free(priv->hash, M_NETFLOW_HASH);
 #ifdef INET6
 	uma_zdestroy(priv->zone6);
 	/* Destroy hash mutexes. */
 	for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++)
 		mtx_destroy(&hsh->mtx);
 
 	/* Free hash memory. */
 	if (priv->hash6 != NULL)
 		free(priv->hash6, M_NETFLOW_HASH);
 #endif
 
 	for (i = 0; i < priv->maxfibs; i++) {
 		if ((fe = priv_to_fib(priv, i)) == NULL)
 			continue;
 
 		if (fe->exp.item != NULL)
 			export_send(priv, fe, fe->exp.item, NG_QUEUE);
 
 		if (fe->exp.item9 != NULL)
 			export9_send(priv, fe, fe->exp.item9,
 			    fe->exp.item9_opt, NG_QUEUE);
 
 		mtx_destroy(&fe->export_mtx);
 		mtx_destroy(&fe->export9_mtx);
 		free(fe, M_NETGRAPH);
 	}
 
 	counter_u64_free(priv->nfinfo_bytes);
 	counter_u64_free(priv->nfinfo_packets);
 	counter_u64_free(priv->nfinfo_bytes6);
 	counter_u64_free(priv->nfinfo_packets6);
 	counter_u64_free(priv->nfinfo_sbytes);
 	counter_u64_free(priv->nfinfo_spackets);
 	counter_u64_free(priv->nfinfo_sbytes6);
 	counter_u64_free(priv->nfinfo_spackets6);
 	counter_u64_free(priv->nfinfo_act_exp);
 	counter_u64_free(priv->nfinfo_inact_exp);
 
 	ng_netflow_v9_cache_flush(priv);
 }
 
 /* Insert packet from into flow cache. */
 int
 ng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip,
     caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
     unsigned int src_if_index)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	struct flow_rec		r;
 	int			hlen, plen;
 	int			error = 0;
 	uint16_t		eproto;
 	uint8_t			tcp_flags = 0;
 
 	bzero(&r, sizeof(r));
 
 	if (ip->ip_v != IPVERSION)
 		return (EINVAL);
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip))
 		return (EINVAL);
 
 	eproto = ETHERTYPE_IP;
 	/* Assume L4 template by default */
 	r.flow_type = NETFLOW_V9_FLOW_V4_L4;
 
 	r.r_src = ip->ip_src;
 	r.r_dst = ip->ip_dst;
 	r.fib = fe->fib;
 
 	plen = ntohs(ip->ip_len);
 
 	r.r_ip_p = ip->ip_p;
 	r.r_tos = ip->ip_tos;
 
 	r.r_i_ifx = src_if_index;
 
 	/*
 	 * XXX NOTE: only first fragment of fragmented TCP, UDP and
 	 * ICMP packet will be recorded with proper s_port and d_port.
 	 * Following fragments will be recorded simply as IP packet with
 	 * ip_proto = ip->ip_p and s_port, d_port set to zero.
 	 * I know, it looks like bug. But I don't want to re-implement
 	 * ip packet assebmling here. Anyway, (in)famous trafd works this way -
 	 * and nobody complains yet :)
 	 */
 	if ((ip->ip_off & htons(IP_OFFMASK)) == 0)
 		switch(r.r_ip_p) {
 		case IPPROTO_TCP:
 		    {
 			struct tcphdr *tcp;
 
 			tcp = (struct tcphdr *)((caddr_t )ip + hlen);
 			r.r_sport = tcp->th_sport;
 			r.r_dport = tcp->th_dport;
 			tcp_flags = tcp->th_flags;
 			break;
 		    }
 		case IPPROTO_UDP:
 			r.r_ports = *(uint32_t *)((caddr_t )ip + hlen);
 			break;
 		}
 
 	counter_u64_add(priv->nfinfo_packets, 1);
 	counter_u64_add(priv->nfinfo_bytes, plen);
 
 	/* Find hash slot. */
 	hsh = &priv->hash[ip_hash(&r)];
 
 	mtx_lock(&hsh->mtx);
 
 	/*
 	 * Go through hash and find our entry. If we encounter an
 	 * entry, that should be expired, purge it. We do a reverse
 	 * search since most active entries are first, and most
 	 * searches are done on most active entries.
 	 */
 	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
 		if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0)
 			break;
 		if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
 			    fle, NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		}
 	}
 
 	if (fle) {			/* An existent entry. */
 
 		fle->f.bytes += plen;
 		fle->f.packets ++;
 		fle->f.tcp_flags |= tcp_flags;
 		fle->f.last = time_uptime;
 
 		/*
 		 * We have the following reasons to expire flow in active way:
 		 * - it hit active timeout
 		 * - a TCP connection closed
 		 * - it is going to overflow counter
 		 */
 		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) ||
 		    (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
 			    fle, NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		} else {
 			/*
 			 * It is the newest, move it to the tail,
 			 * if it isn't there already. Next search will
 			 * locate it quicker.
 			 */
 			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 			}
 		}
 	} else				/* A new flow entry. */
 		error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags);
 
 	mtx_unlock(&hsh->mtx);
 
 	return (error);
 }
 
 #ifdef INET6
 /* Insert IPv6 packet from into flow cache. */
 int
 ng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6,
     caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
     unsigned int src_if_index)
 {
 	struct flow_entry	*fle = NULL, *fle1;
 	struct flow6_entry	*fle6;
 	struct flow_hash_entry	*hsh;
 	struct flow6_rec	r;
 	int			plen;
 	int			error = 0;
 	uint8_t			tcp_flags = 0;
 
 	/* check version */
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
 		return (EINVAL);
 
 	bzero(&r, sizeof(r));
 
 	r.src.r_src6 = ip6->ip6_src;
 	r.dst.r_dst6 = ip6->ip6_dst;
 	r.fib = fe->fib;
 
 	/* Assume L4 template by default */
 	r.flow_type = NETFLOW_V9_FLOW_V6_L4;
 
 	plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
 
 #if 0
 	/* XXX: set DSCP/CoS value */
 	r.r_tos = ip->ip_tos;
 #endif
 	if ((flags & NG_NETFLOW_IS_FRAG) == 0) {
 		switch(upper_proto) {
 		case IPPROTO_TCP:
 		    {
 			struct tcphdr *tcp;
 
 			tcp = (struct tcphdr *)upper_ptr;
 			r.r_ports = *(uint32_t *)upper_ptr;
 			tcp_flags = tcp->th_flags;
 			break;
 		    }
  		case IPPROTO_UDP:
 		case IPPROTO_SCTP:
 			r.r_ports = *(uint32_t *)upper_ptr;
 			break;
 		}
 	}	
 
 	r.r_ip_p = upper_proto;
 	r.r_i_ifx = src_if_index;
  
 	counter_u64_add(priv->nfinfo_packets6, 1);
 	counter_u64_add(priv->nfinfo_bytes6, plen);
 
 	/* Find hash slot. */
 	hsh = &priv->hash6[ip6_hash(&r)];
 
 	mtx_lock(&hsh->mtx);
 
 	/*
 	 * Go through hash and find our entry. If we encounter an
 	 * entry, that should be expired, purge it. We do a reverse
 	 * search since most active entries are first, and most
 	 * searches are done on most active entries.
 	 */
 	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
 		if (fle->f.version != IP6VERSION)
 			continue;
 		fle6 = (struct flow6_entry *)fle;
 		if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0)
 			break;
 		if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
 			    NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		}
 	}
 
 	if (fle != NULL) {			/* An existent entry. */
 		fle6 = (struct flow6_entry *)fle;
 
 		fle6->f.bytes += plen;
 		fle6->f.packets ++;
 		fle6->f.tcp_flags |= tcp_flags;
 		fle6->f.last = time_uptime;
 
 		/*
 		 * We have the following reasons to expire flow in active way:
 		 * - it hit active timeout
 		 * - a TCP connection closed
 		 * - it is going to overflow counter
 		 */
 		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) ||
 		    (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
 			    NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		} else {
 			/*
 			 * It is the newest, move it to the tail,
 			 * if it isn't there already. Next search will
 			 * locate it quicker.
 			 */
 			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 			}
 		}
 	} else				/* A new flow entry. */
 		error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags);
 
 	mtx_unlock(&hsh->mtx);
 
 	return (error);
 }
 #endif
 
 /*
  * Return records from cache to userland.
  *
  * TODO: matching particular IP should be done in kernel, here.
  */
 int
 ng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req,
 struct ngnf_show_header *resp)
 {
 	struct flow_hash_entry	*hsh;
 	struct flow_entry	*fle;
 	struct flow_entry_data	*data = (struct flow_entry_data *)(resp + 1);
 #ifdef INET6
 	struct flow6_entry_data	*data6 = (struct flow6_entry_data *)(resp + 1);
 #endif
 	int	i, max;
 
 	i = req->hash_id;
 	if (i > NBUCKETS-1)
 		return (EINVAL);
 
 #ifdef INET6
 	if (req->version == 6) {
 		resp->version = 6;
 		hsh = priv->hash6 + i;
 		max = NREC6_AT_ONCE;
 	} else
 #endif
 	if (req->version == 4) {
 		resp->version = 4;
 		hsh = priv->hash + i;
 		max = NREC_AT_ONCE;
 	} else
 		return (EINVAL);
 
 	/*
 	 * We will transfer not more than NREC_AT_ONCE. More data
 	 * will come in next message.
 	 * We send current hash index and current record number in list 
 	 * to userland, and userland should return it back to us. 
 	 * Then, we will restart with new entry.
 	 *
 	 * The resulting cache snapshot can be inaccurate if flow expiration
 	 * is taking place on hash item between userland data requests for 
 	 * this hash item id.
 	 */
 	resp->nentries = 0;
 	for (; i < NBUCKETS; hsh++, i++) {
 		int list_id;
 
 		if (mtx_trylock(&hsh->mtx) == 0) {
 			/* 
 			 * Requested hash index is not available,
 			 * relay decision to skip or re-request data
 			 * to userland.
 			 */
 			resp->hash_id = i;
 			resp->list_id = 0;
 			return (0);
 		}
 
 		list_id = 0;
 		TAILQ_FOREACH(fle, &hsh->head, fle_hash) {
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED) {
 				resp->hash_id = i;
 				resp->list_id = list_id;
 				mtx_unlock(&hsh->mtx);
 				return (0);
 			}
 
 			list_id++;
 			/* Search for particular record in list. */
 			if (req->list_id > 0) {
 				if (list_id < req->list_id)
 					continue;
 
 				/* Requested list position found. */
 				req->list_id = 0;
 			}
 #ifdef INET6
 			if (req->version == 6) {
 				struct flow6_entry *fle6;
 
 				fle6 = (struct flow6_entry *)fle;
 				bcopy(&fle6->f, data6 + resp->nentries,
 				    sizeof(fle6->f));
 			} else
 #endif
 				bcopy(&fle->f, data + resp->nentries,
 				    sizeof(fle->f));
 			resp->nentries++;
 			if (resp->nentries == max) {
 				resp->hash_id = i;
 				/* 
 				 * If it was the last item in list
 				 * we simply skip to next hash_id.
 				 */
 				resp->list_id = list_id + 1;
 				mtx_unlock(&hsh->mtx);
 				return (0);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 
 	resp->hash_id = resp->list_id = 0;
 
 	return (0);
 }
 
 /* We have full datagram in privdata. Send it to export hook. */
 static int
 export_send(priv_p priv, fib_export_p fe, item_p item, int flags)
 {
 	struct mbuf *m = NGI_M(item);
 	struct netflow_v5_export_dgram *dgram = mtod(m,
 					struct netflow_v5_export_dgram *);
 	struct netflow_v5_header *header = &dgram->header;
 	struct timespec ts;
 	int error = 0;
 
 	/* Fill mbuf header. */
 	m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) *
 	   header->count + sizeof(struct netflow_v5_header);
 
 	/* Fill export header. */
 	header->sys_uptime = htonl(MILLIUPTIME(time_uptime));
 	getnanotime(&ts);
 	header->unix_secs  = htonl(ts.tv_sec);
 	header->unix_nsecs = htonl(ts.tv_nsec);
 	header->engine_type = 0;
 	header->engine_id = fe->domain_id;
 	header->pad = 0;
 	header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq,
 	    header->count));
 	header->count = htons(header->count);
 
 	if (priv->export != NULL)
 		NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags);
 	else
 		NG_FREE_ITEM(item);
 
 	return (error);
 }
 
 
 /* Add export record to dgram. */
 static int
 export_add(item_p item, struct flow_entry *fle)
 {
 	struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item),
 					struct netflow_v5_export_dgram *);
 	struct netflow_v5_header *header = &dgram->header;
 	struct netflow_v5_record *rec;
 
 	rec = &dgram->r[header->count];
 	header->count ++;
 
 	KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS,
 	    ("ng_netflow: export too big"));
 
 	/* Fill in export record. */
 	rec->src_addr = fle->f.r.r_src.s_addr;
 	rec->dst_addr = fle->f.r.r_dst.s_addr;
 	rec->next_hop = fle->f.next_hop.s_addr;
 	rec->i_ifx    = htons(fle->f.fle_i_ifx);
 	rec->o_ifx    = htons(fle->f.fle_o_ifx);
 	rec->packets  = htonl(fle->f.packets);
 	rec->octets   = htonl(fle->f.bytes);
 	rec->first    = htonl(MILLIUPTIME(fle->f.first));
 	rec->last     = htonl(MILLIUPTIME(fle->f.last));
 	rec->s_port   = fle->f.r.r_sport;
 	rec->d_port   = fle->f.r.r_dport;
 	rec->flags    = fle->f.tcp_flags;
 	rec->prot     = fle->f.r.r_ip_p;
 	rec->tos      = fle->f.r.r_tos;
 	rec->dst_mask = fle->f.dst_mask;
 	rec->src_mask = fle->f.src_mask;
 	rec->pad1     = 0;
 	rec->pad2     = 0;
 
 	/* Not supported fields. */
 	rec->src_as = rec->dst_as = 0;
 
 	if (header->count == NETFLOW_V5_MAX_RECORDS)
 		return (1); /* end of datagram */
 	else
 		return (0);	
 }
 
 /* Periodic flow expiry run. */
 void
 ng_netflow_expire(void *arg)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	priv_p			priv = (priv_p )arg;
 	int			used, i;
 
 	/*
 	 * Going through all the cache.
 	 */
 	used = uma_zone_get_cur(priv->zone);
 	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) {
 		/*
 		 * Skip entries, that are already being worked on.
 		 */
 		if (mtx_trylock(&hsh->mtx) == 0)
 			continue;
 
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			/*
 			 * Interrupt thread wants this entry!
 			 * Quick! Quick! Bail out!
 			 */
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
 				break;
 
 			/*
 			 * Don't expire aggressively while hash collision
 			 * ratio is predicted small.
 			 */
 			if (used <= (NBUCKETS*2) && !INACTIVE(fle))
 				break;
 
 			if ((INACTIVE(fle) && (SMALL(fle) ||
 			    (used > (NBUCKETS*2)))) || AGED(fle)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				expire_flow(priv, priv_to_fib(priv,
 				    fle->f.r.fib), fle, NG_NOFLAGS);
 				used--;
 				counter_u64_add(priv->nfinfo_inact_exp, 1);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 
 #ifdef INET6
 	used = uma_zone_get_cur(priv->zone6);
 	for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) {
 		struct flow6_entry	*fle6;
 
 		/*
 		 * Skip entries, that are already being worked on.
 		 */
 		if (mtx_trylock(&hsh->mtx) == 0)
 			continue;
 
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			fle6 = (struct flow6_entry *)fle;
 			/*
 			 * Interrupt thread wants this entry!
 			 * Quick! Quick! Bail out!
 			 */
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
 				break;
 
 			/*
 			 * Don't expire aggressively while hash collision
 			 * ratio is predicted small.
 			 */
 			if (used <= (NBUCKETS*2) && !INACTIVE(fle6))
 				break;
 
 			if ((INACTIVE(fle6) && (SMALL(fle6) ||
 			    (used > (NBUCKETS*2)))) || AGED(fle6)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				expire_flow(priv, priv_to_fib(priv,
 				    fle->f.r.fib), fle, NG_NOFLAGS);
 				used--;
 				counter_u64_add(priv->nfinfo_inact_exp, 1);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 #endif
 
 	/* Schedule next expire. */
 	callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire,
 	    (void *)priv);
 }
Index: projects/clang380-import/sys/netgraph/netflow/netflow_v9.c
===================================================================
--- projects/clang380-import/sys/netgraph/netflow/netflow_v9.c	(revision 293686)
+++ projects/clang380-import/sys/netgraph/netflow/netflow_v9.c	(revision 293687)
@@ -1,490 +1,491 @@
 /*-
  * Copyright (c) 2010 Alexander V. Chernikov <melifaro@ipfw.ru>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * 	$FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_route.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/kernel.h>
+#include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/ethernet.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 
 #include <netgraph/netflow/netflow.h>
 #include <netgraph/netflow/ng_netflow.h>
 #include <netgraph/netflow/netflow_v9.h>
 
 MALLOC_DECLARE(M_NETFLOW_GENERAL);
 MALLOC_DEFINE(M_NETFLOW_GENERAL, "netflow_general", "plog, V9 templates data");
 
 /*
  * Base V9 templates for L4+ IPv4/IPv6 protocols
  */
 struct netflow_v9_template _netflow_v9_record_ipv4_tcp[] =
 {
 	{ NETFLOW_V9_FIELD_IPV4_SRC_ADDR, 4},
 	{ NETFLOW_V9_FIELD_IPV4_DST_ADDR, 4},
 	{ NETFLOW_V9_FIELD_IPV4_NEXT_HOP, 4},
 	{ NETFLOW_V9_FIELD_INPUT_SNMP, 2},
 	{ NETFLOW_V9_FIELD_OUTPUT_SNMP, 2},
 	{ NETFLOW_V9_FIELD_IN_PKTS, sizeof(CNTR)},
 	{ NETFLOW_V9_FIELD_IN_BYTES, sizeof(CNTR)},
 	{ NETFLOW_V9_FIELD_OUT_PKTS, sizeof(CNTR)},
 	{ NETFLOW_V9_FIELD_OUT_BYTES, sizeof(CNTR)},
 	{ NETFLOW_V9_FIELD_FIRST_SWITCHED, 4},
 	{ NETFLOW_V9_FIELD_LAST_SWITCHED, 4},
 	{ NETFLOW_V9_FIELD_L4_SRC_PORT, 2},
 	{ NETFLOW_V9_FIELD_L4_DST_PORT, 2},
 	{ NETFLOW_V9_FIELD_TCP_FLAGS, 1},
 	{ NETFLOW_V9_FIELD_PROTOCOL, 1},
 	{ NETFLOW_V9_FIELD_TOS, 1},
 	{ NETFLOW_V9_FIELD_SRC_AS, 4},
 	{ NETFLOW_V9_FIELD_DST_AS, 4},
 	{ NETFLOW_V9_FIELD_SRC_MASK, 1},
 	{ NETFLOW_V9_FIELD_DST_MASK, 1},
 	{0, 0}
 };
 
 struct netflow_v9_template _netflow_v9_record_ipv6_tcp[] =
 {
 	{ NETFLOW_V9_FIELD_IPV6_SRC_ADDR, 16},
 	{ NETFLOW_V9_FIELD_IPV6_DST_ADDR, 16},
 	{ NETFLOW_V9_FIELD_IPV6_NEXT_HOP, 16},
 	{ NETFLOW_V9_FIELD_INPUT_SNMP, 2},
 	{ NETFLOW_V9_FIELD_OUTPUT_SNMP, 2},
 	{ NETFLOW_V9_FIELD_IN_PKTS, sizeof(CNTR)},
 	{ NETFLOW_V9_FIELD_IN_BYTES, sizeof(CNTR)},
 	{ NETFLOW_V9_FIELD_OUT_PKTS, sizeof(CNTR)},
 	{ NETFLOW_V9_FIELD_OUT_BYTES, sizeof(CNTR)},
 	{ NETFLOW_V9_FIELD_FIRST_SWITCHED, 4},
 	{ NETFLOW_V9_FIELD_LAST_SWITCHED, 4},
 	{ NETFLOW_V9_FIELD_L4_SRC_PORT, 2},
 	{ NETFLOW_V9_FIELD_L4_DST_PORT, 2},
 	{ NETFLOW_V9_FIELD_TCP_FLAGS, 1},
 	{ NETFLOW_V9_FIELD_PROTOCOL, 1},
 	{ NETFLOW_V9_FIELD_TOS, 1},
 	{ NETFLOW_V9_FIELD_SRC_AS, 4},
 	{ NETFLOW_V9_FIELD_DST_AS, 4},
 	{ NETFLOW_V9_FIELD_SRC_MASK, 1},
 	{ NETFLOW_V9_FIELD_DST_MASK, 1},
 	{0, 0}
 };
 
 /*
  * Pre-compiles flow exporter for all possible FlowSets
  * so we can add flowset to packet via simple memcpy()
  */
 static void
 generate_v9_templates(priv_p priv)
 {
 	uint16_t *p, *template_fields_cnt;
 	int cnt;
 
 	int flowset_size = sizeof(struct netflow_v9_flowset_header) +
 		_NETFLOW_V9_TEMPLATE_SIZE(_netflow_v9_record_ipv4_tcp) + /* netflow_v9_record_ipv4_tcp */
 		_NETFLOW_V9_TEMPLATE_SIZE(_netflow_v9_record_ipv6_tcp); /* netflow_v9_record_ipv6_tcp */
 
 	priv->v9_flowsets[0] = malloc(flowset_size, M_NETFLOW_GENERAL, M_WAITOK | M_ZERO);
 
 	if (flowset_size % 4)
 		flowset_size += 4 - (flowset_size % 4); /* Padding to 4-byte boundary */
 
 	priv->flowsets_count = 1;
 	p = (uint16_t *)priv->v9_flowsets[0];
 	*p++ = 0; /* Flowset ID, 0 is reserved for Template FlowSets  */
 	*p++ = htons(flowset_size); /* Total FlowSet length */
 
 	/*
 	 * Most common TCP/UDP IPv4 template, ID = 256
 	 */
 	*p++ = htons(NETFLOW_V9_MAX_RESERVED_FLOWSET + NETFLOW_V9_FLOW_V4_L4);
 	template_fields_cnt = p++;
 	for (cnt = 0; _netflow_v9_record_ipv4_tcp[cnt].field_id != 0; cnt++) {
 		*p++ = htons(_netflow_v9_record_ipv4_tcp[cnt].field_id);
 		*p++ = htons(_netflow_v9_record_ipv4_tcp[cnt].field_length);
 	}
 	*template_fields_cnt = htons(cnt);
 
 	/*
 	 * TCP/UDP IPv6 template, ID = 257
 	 */
 	*p++ = htons(NETFLOW_V9_MAX_RESERVED_FLOWSET + NETFLOW_V9_FLOW_V6_L4);
 	template_fields_cnt = p++;
 	for (cnt = 0; _netflow_v9_record_ipv6_tcp[cnt].field_id != 0; cnt++) {
 		*p++ = htons(_netflow_v9_record_ipv6_tcp[cnt].field_id);
 		*p++ = htons(_netflow_v9_record_ipv6_tcp[cnt].field_length);
 	}
 	*template_fields_cnt = htons(cnt);
 
 	priv->flowset_records[0] = 2;
 }
 
 /* Closes current data flowset */
 static void inline
 close_flowset(struct mbuf *m, struct netflow_v9_packet_opt *t)
 {
 	struct mbuf *m_old;
 	uint32_t zero = 0;
 	int offset = 0;
 	uint16_t *flowset_length, len;
 
 	/* Hack to ensure we are not crossing mbuf boundary, length is uint16_t  */
 	m_old = m_getptr(m, t->flow_header + offsetof(struct netflow_v9_flowset_header, length), &offset);
 	flowset_length = (uint16_t *)(mtod(m_old, char *) + offset);
 
 	len = (uint16_t)(m_pktlen(m) - t->flow_header);
 	/* Align on 4-byte boundary (RFC 3954, Clause 5.3) */
 	if (len % 4) {
 		if (m_append(m, 4 - (len % 4), (void *)&zero) != 1)
 			panic("ng_netflow: m_append() failed!");
 
 		len += 4 - (len % 4);
 	}
 
 	*flowset_length = htons(len);
 }
 
 /*
  * Non-static functions called from ng_netflow.c
  */
 
 /* We have full datagram in fib data. Send it to export hook. */
 int
 export9_send(priv_p priv, fib_export_p fe, item_p item, struct netflow_v9_packet_opt *t, int flags)
 {
 	struct mbuf *m = NGI_M(item);
 	struct netflow_v9_export_dgram *dgram = mtod(m,
 					struct netflow_v9_export_dgram *);
 	struct netflow_v9_header *header = &dgram->header;
 	struct timespec ts;
 	int error = 0;
 
 	if (t == NULL) {
 		CTR0(KTR_NET, "export9_send(): V9 export packet without tag");
 		NG_FREE_ITEM(item);
 		return (0);
 	}
 
 	/* Close flowset if not closed already */
 	if (m_pktlen(m) != t->flow_header)
 		close_flowset(m, t);
 
 	/* Fill export header. */
 	header->count = t->count;
 	header->sys_uptime = htonl(MILLIUPTIME(time_uptime));
 	getnanotime(&ts);
 	header->unix_secs  = htonl(ts.tv_sec);
 	header->seq_num = htonl(atomic_fetchadd_32(&fe->flow9_seq, 1));
 	header->count = htons(t->count);
 	header->source_id = htonl(fe->domain_id);
 
 	if (priv->export9 != NULL)
 		NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export9, flags);
 	else
 		NG_FREE_ITEM(item);
 
 	free(t, M_NETFLOW_GENERAL);
 
 	return (error);
 }
 
 
 
 /* Add V9 record to dgram. */
 int
 export9_add(item_p item, struct netflow_v9_packet_opt *t, struct flow_entry *fle)
 {
 	size_t len = 0;
 	struct netflow_v9_flowset_header fsh;
 	struct netflow_v9_record_general rg;
 	struct mbuf *m = NGI_M(item);
 	uint16_t flow_type;
 	struct flow_entry_data *fed;
 #ifdef INET6	
 	struct flow6_entry_data *fed6;
 #endif
 	if (t == NULL) {
 		CTR0(KTR_NET, "ng_netflow: V9 export packet without tag!");
 		return (0);
 	}
 
 	/* Prepare flow record */
 	fed = (struct flow_entry_data *)&fle->f;
 #ifdef INET6
 	fed6 = (struct flow6_entry_data *)&fle->f;
 #endif
 	/* We can use flow_type field since fle6 offset is equal to fle */
 	flow_type = fed->r.flow_type;
 
 	switch (flow_type) {
 	case NETFLOW_V9_FLOW_V4_L4:
 	{
 		/* IPv4 TCP/UDP/[SCTP] */
 		struct netflow_v9_record_ipv4_tcp *rec = &rg.rec.v4_tcp;
 		
 		rec->src_addr = fed->r.r_src.s_addr;
 		rec->dst_addr = fed->r.r_dst.s_addr;
 		rec->next_hop = fed->next_hop.s_addr;
 		rec->i_ifx    = htons(fed->fle_i_ifx);
 		rec->o_ifx    = htons(fed->fle_o_ifx);
 		rec->i_packets  = htonl(fed->packets);
 		rec->i_octets   = htonl(fed->bytes);
 		rec->o_packets  = htonl(0);
 		rec->o_octets   = htonl(0);
 		rec->first    = htonl(MILLIUPTIME(fed->first));
 		rec->last     = htonl(MILLIUPTIME(fed->last));
 		rec->s_port   = fed->r.r_sport;
 		rec->d_port   = fed->r.r_dport;
 		rec->flags    = fed->tcp_flags;
 		rec->prot     = fed->r.r_ip_p;
 		rec->tos      = fed->r.r_tos;
 		rec->dst_mask = fed->dst_mask;
 		rec->src_mask = fed->src_mask;
 
 		/* Not supported fields. */
 		rec->src_as = rec->dst_as = 0;
 
 		len = sizeof(struct netflow_v9_record_ipv4_tcp);
 		break;
 	}
 #ifdef INET6	
 	case NETFLOW_V9_FLOW_V6_L4:
 	{
 		/* IPv6 TCP/UDP/[SCTP] */
 		struct netflow_v9_record_ipv6_tcp *rec = &rg.rec.v6_tcp;
 
 		rec->src_addr = fed6->r.src.r_src6;
 		rec->dst_addr = fed6->r.dst.r_dst6;
 		rec->next_hop = fed6->n.next_hop6;
 		rec->i_ifx    = htons(fed6->fle_i_ifx);
 		rec->o_ifx    = htons(fed6->fle_o_ifx);
 		rec->i_packets  = htonl(fed6->packets);
 		rec->i_octets   = htonl(fed6->bytes);
 		rec->o_packets  = htonl(0);
 		rec->o_octets   = htonl(0);
 		rec->first    = htonl(MILLIUPTIME(fed6->first));
 		rec->last     = htonl(MILLIUPTIME(fed6->last));
 		rec->s_port   = fed6->r.r_sport;
 		rec->d_port   = fed6->r.r_dport;
 		rec->flags    = fed6->tcp_flags;
 		rec->prot     = fed6->r.r_ip_p;
 		rec->tos      = fed6->r.r_tos;
 		rec->dst_mask = fed6->dst_mask;
 		rec->src_mask = fed6->src_mask;
 
 		/* Not supported fields. */
 		rec->src_as = rec->dst_as = 0;
 
 		len = sizeof(struct netflow_v9_record_ipv6_tcp);
 		break;
 	}
 #endif	
 	default:
 	{
 		CTR1(KTR_NET, "export9_add(): Don't know what to do with %d flow type!", flow_type);
 		return (0);
 	}
 	}
 
 	/* Check if new records has the same template */
 	if (flow_type != t->flow_type) {
 		/* close old flowset */
 		if (t->flow_type != 0)
 			close_flowset(m, t);
 
 		t->flow_type = flow_type;
 		t->flow_header = m_pktlen(m);
 
 		/* Generate data flowset ID */
 		fsh.id = htons(NETFLOW_V9_MAX_RESERVED_FLOWSET + flow_type);
 		fsh.length = 0;
 
 		/* m_append should not fail since all data is already allocated */
 		if (m_append(m, sizeof(fsh), (void *)&fsh) != 1)
 			panic("ng_netflow: m_append() failed");
 		
 	}
 
 	if (m_append(m, len, (void *)&rg.rec) != 1)
 		panic("ng_netflow: m_append() failed");
 
 	t->count++;
 
 	if (m_pktlen(m) + sizeof(struct netflow_v9_record_general) + sizeof(struct netflow_v9_flowset_header) >= _NETFLOW_V9_MAX_SIZE(t->mtu))
 		return (1); /* end of datagram */
 	return (0);
 }
 
 /*
  * Detach export datagram from fib instance, if there is any.
  * If there is no, allocate a new one.
  */
 item_p
 get_export9_dgram(priv_p priv, fib_export_p fe, struct netflow_v9_packet_opt **tt)
 {
 	item_p	item = NULL;
 	struct netflow_v9_packet_opt *t = NULL;
 
 	mtx_lock(&fe->export9_mtx);
 	if (fe->exp.item9 != NULL) {
 		item = fe->exp.item9;
 		fe->exp.item9 = NULL;
 		t = fe->exp.item9_opt;
 		fe->exp.item9_opt = NULL;
 	}
 	mtx_unlock(&fe->export9_mtx);
 
 	if (item == NULL) {
 		struct netflow_v9_export_dgram *dgram;
 		struct mbuf *m;
 		uint16_t mtu = priv->mtu;
 
 		/* Allocate entire packet at once, allowing easy m_append() calls */
 		m = m_getm(NULL, mtu, M_NOWAIT, MT_DATA);
 		if (m == NULL)
 			return (NULL);
 
 		t = malloc(sizeof(struct netflow_v9_packet_opt), M_NETFLOW_GENERAL, M_NOWAIT | M_ZERO);
 		if (t == NULL) {
 			m_free(m);
 			return (NULL);
 		}
 
 		item = ng_package_data(m, NG_NOFLAGS);
 		if (item == NULL) {
 			free(t, M_NETFLOW_GENERAL);
 			return (NULL);
 		}
 
 		dgram = mtod(m, struct netflow_v9_export_dgram *);
 		dgram->header.count = 0;
 		dgram->header.version = htons(NETFLOW_V9);
 		/* Set mbuf current data length */
 		m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v9_header);
 
 		t->count = 0;
 		t->mtu = mtu;
 		t->flow_header = m->m_len;
 	
 		/*
 		 * Check if we need to insert templates into packet
 		 */
 		
 		struct netflow_v9_flowset_header	*fl;
 	
 		if ((time_uptime >= priv->templ_time + fe->templ_last_ts) ||
 				(fe->sent_packets >= priv->templ_packets + fe->templ_last_pkt)) {
 
 			fe->templ_last_ts = time_uptime;
 			fe->templ_last_pkt = fe->sent_packets;
 
 			fl = priv->v9_flowsets[0];
 			m_append(m, ntohs(fl->length), (void *)fl);
 			t->flow_header = m->m_len;
 			t->count += priv->flowset_records[0];
 		}
 
 	}
 
 	*tt = t;
 	return (item);
 }
 
 /*
  * Re-attach incomplete datagram back to fib instance.
  * If there is already another one, then send incomplete.
  */
 void
 return_export9_dgram(priv_p priv, fib_export_p fe, item_p item, struct netflow_v9_packet_opt *t, int flags)
 {
 	/*
 	 * It may happen on SMP, that some thread has already
 	 * put its item there, in this case we bail out and
 	 * send what we have to collector.
 	 */
 	mtx_lock(&fe->export9_mtx);
 	if (fe->exp.item9 == NULL) {
 		fe->exp.item9 = item;
 		fe->exp.item9_opt = t;
 		mtx_unlock(&fe->export9_mtx);
 	} else {
 		mtx_unlock(&fe->export9_mtx);
 		export9_send(priv, fe, item, t, flags);
 	}
 }
 
 /* Allocate memory and set up flow cache */
 void
 ng_netflow_v9_cache_init(priv_p priv)
 {
 	generate_v9_templates(priv);
 
 	priv->templ_time = NETFLOW_V9_MAX_TIME_TEMPL;
 	priv->templ_packets = NETFLOW_V9_MAX_PACKETS_TEMPL;
 	priv->mtu = BASE_MTU;
 }
 
 /* Free all flow cache memory. Called from ng_netflow_cache_flush() */
 void
 ng_netflow_v9_cache_flush(priv_p priv)
 {
 	int i;
 
 	/* Free flowsets*/
 	for (i = 0; i < priv->flowsets_count; i++)
 		free(priv->v9_flowsets[i], M_NETFLOW_GENERAL);
 }
 
 /* Get a snapshot of NetFlow v9 settings */
 void
 ng_netflow_copyv9info(priv_p priv, struct ng_netflow_v9info *i)
 {
 
 	i->templ_time = priv->templ_time;
 	i->templ_packets = priv->templ_packets;
 	i->mtu = priv->mtu;
 }
 
Index: projects/clang380-import/sys/netgraph/netflow/ng_netflow.c
===================================================================
--- projects/clang380-import/sys/netgraph/netflow/ng_netflow.c	(revision 293686)
+++ projects/clang380-import/sys/netgraph/netflow/ng_netflow.c	(revision 293687)
@@ -1,1036 +1,1037 @@
 /*-
  * Copyright (c) 2010-2011 Alexander V. Chernikov <melifaro@ipfw.ru>
  * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org>
  * Copyright (c) 2001-2003 Roman V. Palagin <romanp@unshadow.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $SourceForge: ng_netflow.c,v 1.30 2004/09/05 11:37:43 glebius Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/kernel.h>
+#include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
 #include <sys/ctype.h>
 
 #include <net/if.h>
 #include <net/ethernet.h>
 #include <net/route.h>
 #include <net/if_arp.h>
 #include <net/if_var.h>
 #include <net/if_vlan_var.h>
 #include <net/bpf.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <netinet/sctp.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/ng_parse.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/netflow/netflow.h>
 #include <netgraph/netflow/netflow_v9.h>
 #include <netgraph/netflow/ng_netflow.h>
 
 /* Netgraph methods */
 static ng_constructor_t	ng_netflow_constructor;
 static ng_rcvmsg_t	ng_netflow_rcvmsg;
 static ng_close_t	ng_netflow_close;
 static ng_shutdown_t	ng_netflow_rmnode;
 static ng_newhook_t	ng_netflow_newhook;
 static ng_rcvdata_t	ng_netflow_rcvdata;
 static ng_disconnect_t	ng_netflow_disconnect;
 
 /* Parse type for struct ng_netflow_info */
 static const struct ng_parse_struct_field ng_netflow_info_type_fields[]
 	= NG_NETFLOW_INFO_TYPE;
 static const struct ng_parse_type ng_netflow_info_type = {
 	&ng_parse_struct_type,
 	&ng_netflow_info_type_fields
 };
 
 /*  Parse type for struct ng_netflow_ifinfo */
 static const struct ng_parse_struct_field ng_netflow_ifinfo_type_fields[]
 	= NG_NETFLOW_IFINFO_TYPE;
 static const struct ng_parse_type ng_netflow_ifinfo_type = {
 	&ng_parse_struct_type,
 	&ng_netflow_ifinfo_type_fields
 };
 
 /* Parse type for struct ng_netflow_setdlt */
 static const struct ng_parse_struct_field ng_netflow_setdlt_type_fields[]
 	= NG_NETFLOW_SETDLT_TYPE;
 static const struct ng_parse_type ng_netflow_setdlt_type = {
 	&ng_parse_struct_type,
 	&ng_netflow_setdlt_type_fields
 };
 
 /* Parse type for ng_netflow_setifindex */
 static const struct ng_parse_struct_field ng_netflow_setifindex_type_fields[]
 	= NG_NETFLOW_SETIFINDEX_TYPE;
 static const struct ng_parse_type ng_netflow_setifindex_type = {
 	&ng_parse_struct_type,
 	&ng_netflow_setifindex_type_fields
 };
 
 /* Parse type for ng_netflow_settimeouts */
 static const struct ng_parse_struct_field ng_netflow_settimeouts_type_fields[]
 	= NG_NETFLOW_SETTIMEOUTS_TYPE;
 static const struct ng_parse_type ng_netflow_settimeouts_type = {
 	&ng_parse_struct_type,
 	&ng_netflow_settimeouts_type_fields
 };
 
 /* Parse type for ng_netflow_setconfig */
 static const struct ng_parse_struct_field ng_netflow_setconfig_type_fields[]
 	= NG_NETFLOW_SETCONFIG_TYPE;
 static const struct ng_parse_type ng_netflow_setconfig_type = {
 	&ng_parse_struct_type,
 	&ng_netflow_setconfig_type_fields
 };
 
 /* Parse type for ng_netflow_settemplate */
 static const struct ng_parse_struct_field ng_netflow_settemplate_type_fields[]
 	= NG_NETFLOW_SETTEMPLATE_TYPE;
 static const struct ng_parse_type ng_netflow_settemplate_type = {
 	&ng_parse_struct_type,
 	&ng_netflow_settemplate_type_fields
 };
 
 /* Parse type for ng_netflow_setmtu */
 static const struct ng_parse_struct_field ng_netflow_setmtu_type_fields[]
 	= NG_NETFLOW_SETMTU_TYPE;
 static const struct ng_parse_type ng_netflow_setmtu_type = {
 	&ng_parse_struct_type,
 	&ng_netflow_setmtu_type_fields
 };
 
 /* Parse type for struct ng_netflow_v9info */
 static const struct ng_parse_struct_field ng_netflow_v9info_type_fields[]
 	= NG_NETFLOW_V9INFO_TYPE;
 static const struct ng_parse_type ng_netflow_v9info_type = {
 	&ng_parse_struct_type,
 	&ng_netflow_v9info_type_fields
 };
 
 /* List of commands and how to convert arguments to/from ASCII */
 static const struct ng_cmdlist ng_netflow_cmds[] = {
        {
 	 NGM_NETFLOW_COOKIE,
 	 NGM_NETFLOW_INFO,
 	 "info",
 	 NULL,
 	 &ng_netflow_info_type
        },
        {
 	NGM_NETFLOW_COOKIE,
 	NGM_NETFLOW_IFINFO,
 	"ifinfo",
 	&ng_parse_uint16_type,
 	&ng_netflow_ifinfo_type
        },
        {
 	NGM_NETFLOW_COOKIE,
 	NGM_NETFLOW_SETDLT,
 	"setdlt",
 	&ng_netflow_setdlt_type,
 	NULL
        },
        {
 	NGM_NETFLOW_COOKIE,
 	NGM_NETFLOW_SETIFINDEX,
 	"setifindex",
 	&ng_netflow_setifindex_type,
 	NULL
        },
        {
 	NGM_NETFLOW_COOKIE,
 	NGM_NETFLOW_SETTIMEOUTS,
 	"settimeouts",
 	&ng_netflow_settimeouts_type,
 	NULL
        },
        {
 	NGM_NETFLOW_COOKIE,
 	NGM_NETFLOW_SETCONFIG,
 	"setconfig",
 	&ng_netflow_setconfig_type,
 	NULL
        },
        {
 	NGM_NETFLOW_COOKIE,
 	NGM_NETFLOW_SETTEMPLATE,
 	"settemplate",
 	&ng_netflow_settemplate_type,
 	NULL
        },
        {
 	NGM_NETFLOW_COOKIE,
 	NGM_NETFLOW_SETMTU,
 	"setmtu",
 	&ng_netflow_setmtu_type,
 	NULL
        },
        {
 	 NGM_NETFLOW_COOKIE,
 	 NGM_NETFLOW_V9INFO,
 	 "v9info",
 	 NULL,
 	 &ng_netflow_v9info_type
        },
        { 0 }
 };
 
 
 /* Netgraph node type descriptor */
 static struct ng_type ng_netflow_typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_NETFLOW_NODE_TYPE,
 	.constructor =	ng_netflow_constructor,
 	.rcvmsg =	ng_netflow_rcvmsg,
 	.close =	ng_netflow_close,
 	.shutdown =	ng_netflow_rmnode,
 	.newhook =	ng_netflow_newhook,
 	.rcvdata =	ng_netflow_rcvdata,
 	.disconnect =	ng_netflow_disconnect,
 	.cmdlist =	ng_netflow_cmds,
 };
 NETGRAPH_INIT(netflow, &ng_netflow_typestruct);
 
 /* Called at node creation */
 static int
 ng_netflow_constructor(node_p node)
 {
 	priv_p priv;
 	int i;
 
 	/* Initialize private data */
 	priv = malloc(sizeof(*priv), M_NETGRAPH, M_WAITOK | M_ZERO);
 
 	/* Initialize fib data */
 	priv->maxfibs = rt_numfibs;
 	priv->fib_data = malloc(sizeof(fib_export_p) * priv->maxfibs,
 	    M_NETGRAPH, M_WAITOK | M_ZERO);
 
 	/* Make node and its data point at each other */
 	NG_NODE_SET_PRIVATE(node, priv);
 	priv->node = node;
 
 	/* Initialize timeouts to default values */
 	priv->nfinfo_inact_t = INACTIVE_TIMEOUT;
 	priv->nfinfo_act_t = ACTIVE_TIMEOUT;
 
 	/* Set default config */
 	for (i = 0; i < NG_NETFLOW_MAXIFACES; i++)
 		priv->ifaces[i].info.conf = NG_NETFLOW_CONF_INGRESS;
 
 	/* Initialize callout handle */
 	callout_init(&priv->exp_callout, 1);
 
 	/* Allocate memory and set up flow cache */
 	ng_netflow_cache_init(priv);
 
 	return (0);
 }
 
 /*
  * ng_netflow supports two hooks: data and export.
  * Incoming traffic is expected on data, and expired
  * netflow datagrams are sent to export.
  */
 static int
 ng_netflow_newhook(node_p node, hook_p hook, const char *name)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 
 	if (strncmp(name, NG_NETFLOW_HOOK_DATA,	/* an iface hook? */
 	    strlen(NG_NETFLOW_HOOK_DATA)) == 0) {
 		iface_p iface;
 		int ifnum = -1;
 		const char *cp;
 		char *eptr;
 
 		cp = name + strlen(NG_NETFLOW_HOOK_DATA);
 		if (!isdigit(*cp) || (cp[0] == '0' && cp[1] != '\0'))
 			return (EINVAL);
 
 		ifnum = (int)strtoul(cp, &eptr, 10);
 		if (*eptr != '\0' || ifnum < 0 || ifnum >= NG_NETFLOW_MAXIFACES)
 			return (EINVAL);
 
 		/* See if hook is already connected */
 		if (priv->ifaces[ifnum].hook != NULL)
 			return (EISCONN);
 
 		iface = &priv->ifaces[ifnum];
 
 		/* Link private info and hook together */
 		NG_HOOK_SET_PRIVATE(hook, iface);
 		iface->hook = hook;
 
 		/*
 		 * In most cases traffic accounting is done on an
 		 * Ethernet interface, so default data link type
 		 * will be DLT_EN10MB.
 		 */
 		iface->info.ifinfo_dlt = DLT_EN10MB;
 
 	} else if (strncmp(name, NG_NETFLOW_HOOK_OUT,
 	    strlen(NG_NETFLOW_HOOK_OUT)) == 0) {
 		iface_p iface;
 		int ifnum = -1;
 		const char *cp;
 		char *eptr;
 
 		cp = name + strlen(NG_NETFLOW_HOOK_OUT);
 		if (!isdigit(*cp) || (cp[0] == '0' && cp[1] != '\0'))
 			return (EINVAL);
 
 		ifnum = (int)strtoul(cp, &eptr, 10);
 		if (*eptr != '\0' || ifnum < 0 || ifnum >= NG_NETFLOW_MAXIFACES)
 			return (EINVAL);
 
 		/* See if hook is already connected */
 		if (priv->ifaces[ifnum].out != NULL)
 			return (EISCONN);
 
 		iface = &priv->ifaces[ifnum];
 
 		/* Link private info and hook together */
 		NG_HOOK_SET_PRIVATE(hook, iface);
 		iface->out = hook;
 
 	} else if (strcmp(name, NG_NETFLOW_HOOK_EXPORT) == 0) {
 
 		if (priv->export != NULL)
 			return (EISCONN);
 
 		/* Netflow version 5 supports 32-bit counters only */
 		if (CNTR_MAX == UINT64_MAX)
 			return (EINVAL);
 
 		priv->export = hook;
 
 		/* Exporter is ready. Let's schedule expiry. */
 		callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire,
 		    (void *)priv);
 	} else if (strcmp(name, NG_NETFLOW_HOOK_EXPORT9) == 0) {
 
 		if (priv->export9 != NULL)
 			return (EISCONN);
 
 		priv->export9 = hook;
 
 		/* Exporter is ready. Let's schedule expiry. */
 		callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire,
 		    (void *)priv);
 	} else
 		return (EINVAL);
 
 	return (0);
 }
 
 /* Get a netgraph control message. */
 static int
 ng_netflow_rcvmsg (node_p node, item_p item, hook_p lasthook)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct ng_mesg *resp = NULL;
 	int error = 0;
 	struct ng_mesg *msg;
 
 	NGI_GET_MSG(item, msg);
 
 	/* Deal with message according to cookie and command */
 	switch (msg->header.typecookie) {
 	case NGM_NETFLOW_COOKIE:
 		switch (msg->header.cmd) {
 		case NGM_NETFLOW_INFO:
 		    {
 			struct ng_netflow_info *i;
 
 			NG_MKRESPONSE(resp, msg, sizeof(struct ng_netflow_info),
 			    M_NOWAIT);
 			i = (struct ng_netflow_info *)resp->data;
 			ng_netflow_copyinfo(priv, i);
 
 			break;
 		    }
 		case NGM_NETFLOW_IFINFO:
 		    {
 			struct ng_netflow_ifinfo *i;
 			const uint16_t *index;
 
 			if (msg->header.arglen != sizeof(uint16_t))
 				 ERROUT(EINVAL);
 
 			index  = (uint16_t *)msg->data;
 			if (*index >= NG_NETFLOW_MAXIFACES)
 				ERROUT(EINVAL);
 
 			/* connected iface? */
 			if (priv->ifaces[*index].hook == NULL)
 				 ERROUT(EINVAL);
 
 			NG_MKRESPONSE(resp, msg,
 			     sizeof(struct ng_netflow_ifinfo), M_NOWAIT);
 			i = (struct ng_netflow_ifinfo *)resp->data;
 			memcpy((void *)i, (void *)&priv->ifaces[*index].info,
 			    sizeof(priv->ifaces[*index].info));
 
 			break;
 		    }
 		case NGM_NETFLOW_SETDLT:
 		    {
 			struct ng_netflow_setdlt *set;
 			struct ng_netflow_iface *iface;
 
 			if (msg->header.arglen !=
 			    sizeof(struct ng_netflow_setdlt))
 				ERROUT(EINVAL);
 
 			set = (struct ng_netflow_setdlt *)msg->data;
 			if (set->iface >= NG_NETFLOW_MAXIFACES)
 				ERROUT(EINVAL);
 			iface = &priv->ifaces[set->iface];
 
 			/* connected iface? */
 			if (iface->hook == NULL)
 				ERROUT(EINVAL);
 
 			switch (set->dlt) {
 			case	DLT_EN10MB:
 				iface->info.ifinfo_dlt = DLT_EN10MB;
 				break;
 			case	DLT_RAW:
 				iface->info.ifinfo_dlt = DLT_RAW;
 				break;
 			default:
 				ERROUT(EINVAL);
 			}
 			break;
 		    }
 		case NGM_NETFLOW_SETIFINDEX:
 		    {
 			struct ng_netflow_setifindex *set;
 			struct ng_netflow_iface *iface;
 
 			if (msg->header.arglen !=
 			    sizeof(struct ng_netflow_setifindex))
 				ERROUT(EINVAL);
 
 			set = (struct ng_netflow_setifindex *)msg->data;
 			if (set->iface >= NG_NETFLOW_MAXIFACES)
 				ERROUT(EINVAL);
 			iface = &priv->ifaces[set->iface];
 
 			/* connected iface? */
 			if (iface->hook == NULL)
 				ERROUT(EINVAL);
 
 			iface->info.ifinfo_index = set->index;
 
 			break;
 		    }
 		case NGM_NETFLOW_SETTIMEOUTS:
 		    {
 			struct ng_netflow_settimeouts *set;
 
 			if (msg->header.arglen !=
 			    sizeof(struct ng_netflow_settimeouts))
 				ERROUT(EINVAL);
 
 			set = (struct ng_netflow_settimeouts *)msg->data;
 
 			priv->nfinfo_inact_t = set->inactive_timeout;
 			priv->nfinfo_act_t = set->active_timeout;
 
 			break;
 		    }
 		case NGM_NETFLOW_SETCONFIG:
 		    {
 			struct ng_netflow_setconfig *set;
 
 			if (msg->header.arglen !=
 			    sizeof(struct ng_netflow_setconfig))
 				ERROUT(EINVAL);
 
 			set = (struct ng_netflow_setconfig *)msg->data;
 
 			if (set->iface >= NG_NETFLOW_MAXIFACES)
 				ERROUT(EINVAL);
 			
 			priv->ifaces[set->iface].info.conf = set->conf;
 	
 			break;
 		    }
 		case NGM_NETFLOW_SETTEMPLATE:
 		    {
 			struct ng_netflow_settemplate *set;
 
 			if (msg->header.arglen !=
 			    sizeof(struct ng_netflow_settemplate))
 				ERROUT(EINVAL);
 
 			set = (struct ng_netflow_settemplate *)msg->data;
 
 			priv->templ_packets = set->packets;
 			priv->templ_time = set->time;
 
 			break;
 		    }
 		case NGM_NETFLOW_SETMTU:
 		    {
 			struct ng_netflow_setmtu *set;
 
 			if (msg->header.arglen !=
 			    sizeof(struct ng_netflow_setmtu))
 				ERROUT(EINVAL);
 
 			set = (struct ng_netflow_setmtu *)msg->data;
 			if ((set->mtu < MIN_MTU) || (set->mtu > MAX_MTU))
 				ERROUT(EINVAL);
 
 			priv->mtu = set->mtu;
 
 			break;
 		    }
 		case NGM_NETFLOW_SHOW:
 			if (msg->header.arglen !=
 			    sizeof(struct ngnf_show_header))
 				ERROUT(EINVAL);
 
 			NG_MKRESPONSE(resp, msg, NGRESP_SIZE, M_NOWAIT);
 
 			if (!resp)
 				ERROUT(ENOMEM);
 
 			error = ng_netflow_flow_show(priv,
 			    (struct ngnf_show_header *)msg->data,
 			    (struct ngnf_show_header *)resp->data);
 
 			if (error)
 				NG_FREE_MSG(resp);
 
 			break;
 		case NGM_NETFLOW_V9INFO:
 		    {
 			struct ng_netflow_v9info *i;
 
 			NG_MKRESPONSE(resp, msg,
 			    sizeof(struct ng_netflow_v9info), M_NOWAIT);
 			i = (struct ng_netflow_v9info *)resp->data;
 			ng_netflow_copyv9info(priv, i);
 
 			break;
 		    }
 		default:
 			ERROUT(EINVAL);		/* unknown command */
 			break;
 		}
 		break;
 	default:
 		ERROUT(EINVAL);		/* incorrect cookie */
 		break;
 	}
 
 	/*
 	 * Take care of synchronous response, if any.
 	 * Free memory and return.
 	 */
 done:
 	NG_RESPOND_MSG(error, node, item, resp);
 	NG_FREE_MSG(msg);
 
 	return (error);
 }
 
 /* Receive data on hook. */
 static int
 ng_netflow_rcvdata (hook_p hook, item_p item)
 {
 	const node_p node = NG_HOOK_NODE(hook);
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	const iface_p iface = NG_HOOK_PRIVATE(hook);
 	hook_p out;
 	struct mbuf *m = NULL, *m_old = NULL;
 	struct ip *ip = NULL;
 	struct ip6_hdr *ip6 = NULL;
 	struct m_tag *mtag;
 	int pullup_len = 0, off;
 	uint8_t acct = 0, bypass = 0, flags = 0, upper_proto = 0;
 	int error = 0, l3_off = 0;
 	unsigned int src_if_index;
 	caddr_t upper_ptr = NULL;
 	fib_export_p fe;	
 	uint32_t fib;
 
 	if ((hook == priv->export) || (hook == priv->export9)) {
 		/*
 		 * Data arrived on export hook.
 		 * This must not happen.
 		 */
 		log(LOG_ERR, "ng_netflow: incoming data on export hook!\n");
 		ERROUT(EINVAL);
 	};
 
 	if (hook == iface->hook) {
 		if ((iface->info.conf & NG_NETFLOW_CONF_INGRESS) == 0)
 			bypass = 1;
 		out = iface->out;
 	} else if (hook == iface->out) {
 		if ((iface->info.conf & NG_NETFLOW_CONF_EGRESS) == 0)
 			bypass = 1;
 		out = iface->hook;
 	} else
 		ERROUT(EINVAL);
 
 	if ((!bypass) && (iface->info.conf &
 	    (NG_NETFLOW_CONF_ONCE | NG_NETFLOW_CONF_THISONCE))) {
 		mtag = m_tag_locate(NGI_M(item), MTAG_NETFLOW,
 		    MTAG_NETFLOW_CALLED, NULL);
 		while (mtag != NULL) {
 			if ((iface->info.conf & NG_NETFLOW_CONF_ONCE) ||
 			    ((ng_ID_t *)(mtag + 1))[0] == NG_NODE_ID(node)) {
 				bypass = 1;
 				break;
 			}
 			mtag = m_tag_locate(NGI_M(item), MTAG_NETFLOW,
 			    MTAG_NETFLOW_CALLED, mtag);
 		}
 	}
 	
 	if (bypass) {
 		if (out == NULL)
 			ERROUT(ENOTCONN);
 
 		NG_FWD_ITEM_HOOK(error, item, out);
 		return (error);
 	}
 	
 	if (iface->info.conf &
 	    (NG_NETFLOW_CONF_ONCE | NG_NETFLOW_CONF_THISONCE)) {
 		mtag = m_tag_alloc(MTAG_NETFLOW, MTAG_NETFLOW_CALLED,
 		    sizeof(ng_ID_t), M_NOWAIT);
 		if (mtag) {
 			((ng_ID_t *)(mtag + 1))[0] = NG_NODE_ID(node);
 			m_tag_prepend(NGI_M(item), mtag);
 		}
 	}
 
 	/* Import configuration flags related to flow creation */
 	flags = iface->info.conf & NG_NETFLOW_FLOW_FLAGS;
 
 	NGI_GET_M(item, m);
 	m_old = m;
 
 	/* Increase counters. */
 	iface->info.ifinfo_packets++;
 
 	/*
 	 * Depending on interface data link type and packet contents
 	 * we pullup enough data, so that ng_netflow_flow_add() does not
 	 * need to know about mbuf at all. We keep current length of data
 	 * needed to be contiguous in pullup_len. mtod() is done at the
 	 * very end one more time, since m can had changed after pulluping.
 	 *
 	 * In case of unrecognized data we don't return error, but just
 	 * pass data to downstream hook, if it is available.
 	 */
 
 #define	M_CHECK(length)	do {					\
 	pullup_len += length;					\
 	if (((m)->m_pkthdr.len < (pullup_len)) ||		\
 	   ((pullup_len) > MHLEN)) {				\
 		error = EINVAL;					\
 		goto bypass;					\
 	} 							\
 	if ((m)->m_len < (pullup_len) &&			\
 	   (((m) = m_pullup((m),(pullup_len))) == NULL)) {	\
 		error = ENOBUFS;				\
 		goto done;					\
 	}							\
 } while (0)
 
 	switch (iface->info.ifinfo_dlt) {
 	case DLT_EN10MB:	/* Ethernet */
 	    {
 		struct ether_header *eh;
 		uint16_t etype;
 
 		M_CHECK(sizeof(struct ether_header));
 		eh = mtod(m, struct ether_header *);
 
 		/* Make sure this is IP frame. */
 		etype = ntohs(eh->ether_type);
 		switch (etype) {
 		case ETHERTYPE_IP:
 			M_CHECK(sizeof(struct ip));
 			eh = mtod(m, struct ether_header *);
 			ip = (struct ip *)(eh + 1);
 			l3_off = sizeof(struct ether_header);
 			break;
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			/*
 			 * m_pullup() called by M_CHECK() pullups
 			 * kern.ipc.max_protohdr (default 60 bytes)
 			 * which is enough.
 			 */
 			M_CHECK(sizeof(struct ip6_hdr));
 			eh = mtod(m, struct ether_header *);
 			ip6 = (struct ip6_hdr *)(eh + 1);
 			l3_off = sizeof(struct ether_header);
 			break;
 #endif
 		case ETHERTYPE_VLAN:
 		    {
 			struct ether_vlan_header *evh;
 
 			M_CHECK(sizeof(struct ether_vlan_header) -
 			    sizeof(struct ether_header));
 			evh = mtod(m, struct ether_vlan_header *);
 			etype = ntohs(evh->evl_proto);
 			l3_off = sizeof(struct ether_vlan_header);
 
 			if (etype == ETHERTYPE_IP) {
 				M_CHECK(sizeof(struct ip));
 				ip = (struct ip *)(evh + 1);
 				break;
 #ifdef INET6
 			} else if (etype == ETHERTYPE_IPV6) {
 				M_CHECK(sizeof(struct ip6_hdr));
 				ip6 = (struct ip6_hdr *)(evh + 1);
 				break;
 #endif
 			}
 		    }
 		default:
 			goto bypass;	/* pass this frame */
 		}
 		break;
 	    }
 	case DLT_RAW:		/* IP packets */
 		M_CHECK(sizeof(struct ip));
 		ip = mtod(m, struct ip *);
 		/* l3_off is already zero */
 #ifdef INET6
 		/*
 		 * If INET6 is not defined IPv6 packets
 		 * will be discarded in ng_netflow_flow_add().
 		 */
 		if (ip->ip_v == IP6VERSION) {
 			ip = NULL;
 			M_CHECK(sizeof(struct ip6_hdr) - sizeof(struct ip));
 			ip6 = mtod(m, struct ip6_hdr *);
 		}
 #endif
 		break;
 	default:
 		goto bypass;
 		break;
 	}
 
 	off = pullup_len;
 
 	if ((ip != NULL) && ((ip->ip_off & htons(IP_OFFMASK)) == 0)) {
 		if ((ip->ip_v != IPVERSION) ||
 		    ((ip->ip_hl << 2) < sizeof(struct ip)))
 			goto bypass;
 		/*
 		 * In case of IPv4 header with options, we haven't pulled
 		 * up enough, yet.
 		 */
 		M_CHECK((ip->ip_hl << 2) - sizeof(struct ip));
 
 		/* Save upper layer offset and proto */
 		off = pullup_len;
 		upper_proto = ip->ip_p;
 
 		/*
 		 * XXX: in case of wrong upper layer header we will
 		 * forward this packet but skip this record in netflow.
 		 */
 		switch (ip->ip_p) {
 		case IPPROTO_TCP:
 			M_CHECK(sizeof(struct tcphdr));
 			break;
 		case IPPROTO_UDP:
 			M_CHECK(sizeof(struct udphdr));
 			break;
 		case IPPROTO_SCTP:
 			M_CHECK(sizeof(struct sctphdr));
 			break;
 		}
 	} else if (ip != NULL) {
 		/*
 		 * Nothing to save except upper layer proto,
 		 * since this is a packet fragment.
 		 */
 		flags |= NG_NETFLOW_IS_FRAG;
 		upper_proto = ip->ip_p;
 		if ((ip->ip_v != IPVERSION) ||
 		    ((ip->ip_hl << 2) < sizeof(struct ip)))
 			goto bypass;
 #ifdef INET6
 	} else if (ip6 != NULL) {
 		int cur = ip6->ip6_nxt, hdr_off = 0;
 		struct ip6_ext *ip6e;
 		struct ip6_frag *ip6f;
 
 		if (priv->export9 == NULL)
 			goto bypass;
 
 		/* Save upper layer info. */
 		off = pullup_len;
 		upper_proto = cur;
 
 		if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
 			goto bypass;
 
 		/*
 		 * Loop thru IPv6 extended headers to get upper
 		 * layer header / frag.
 		 */
 		for (;;) {
 			switch (cur) {
 			/*
 			 * Same as in IPv4, we can forward a 'bad'
 			 * packet without accounting.
 			 */
 			case IPPROTO_TCP:
 				M_CHECK(sizeof(struct tcphdr));
 				goto loopend;
 			case IPPROTO_UDP:
 				M_CHECK(sizeof(struct udphdr));
 				goto loopend;
 			case IPPROTO_SCTP:
 				M_CHECK(sizeof(struct sctphdr));
 				goto loopend;
 
 			/* Loop until 'real' upper layer headers */
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_ROUTING:
 			case IPPROTO_DSTOPTS:
 				M_CHECK(sizeof(struct ip6_ext));
 				ip6e = (struct ip6_ext *)(mtod(m, caddr_t) +
 				    off);
 				upper_proto = ip6e->ip6e_nxt;
 				hdr_off = (ip6e->ip6e_len + 1) << 3;
 				break;
 
 			/* RFC4302, can be before DSTOPTS */
 			case IPPROTO_AH:
 				M_CHECK(sizeof(struct ip6_ext));
 				ip6e = (struct ip6_ext *)(mtod(m, caddr_t) +
 				    off);
 				upper_proto = ip6e->ip6e_nxt;
 				hdr_off = (ip6e->ip6e_len + 2) << 2;
 				break;
 
 			case IPPROTO_FRAGMENT:
 				M_CHECK(sizeof(struct ip6_frag));
 				ip6f = (struct ip6_frag *)(mtod(m, caddr_t) +
 				    off);
 				upper_proto = ip6f->ip6f_nxt;
 				hdr_off = sizeof(struct ip6_frag);
 				off += hdr_off;
 				flags |= NG_NETFLOW_IS_FRAG;
 				goto loopend;
 
 #if 0				
 			case IPPROTO_NONE:
 				goto loopend;
 #endif
 			/*
 			 * Any unknown header (new extension or IPv6/IPv4
 			 * header for tunnels) ends loop.
 			 */
 			default:
 				goto loopend;
 			}
 
 			off += hdr_off;
 			cur = upper_proto;
 		}
 #endif
 	}
 #undef	M_CHECK
 
 #ifdef INET6
 loopend:
 #endif
 	/* Just in case of real reallocation in M_CHECK() / m_pullup() */
 	if (m != m_old) {
 		priv->nfinfo_realloc_mbuf++;
 		/* Restore ip/ipv6 pointer */
 		if (ip != NULL)
 			ip = (struct ip *)(mtod(m, caddr_t) + l3_off);
 		else if (ip6 != NULL)
 			ip6 = (struct ip6_hdr *)(mtod(m, caddr_t) + l3_off);
  	}
 
 	upper_ptr = (caddr_t)(mtod(m, caddr_t) + off);
 
 	/* Determine packet input interface. Prefer configured. */
 	src_if_index = 0;
 	if (hook == iface->out || iface->info.ifinfo_index == 0) {
 		if (m->m_pkthdr.rcvif != NULL)
 			src_if_index = m->m_pkthdr.rcvif->if_index;
 	} else
 		src_if_index = iface->info.ifinfo_index;
 	
 	/* Check packet FIB */
 	fib = M_GETFIB(m);
 	if (fib >= priv->maxfibs) {
 		CTR2(KTR_NET, "ng_netflow_rcvdata(): packet fib %d is out of "
 		    "range of available fibs: 0 .. %d",
 		    fib, priv->maxfibs);
 		goto bypass;
 	}
 
 	if ((fe = priv_to_fib(priv, fib)) == NULL) {
 		/* Setup new FIB */
 		if (ng_netflow_fib_init(priv, fib) != 0) {
 			/* malloc() failed */
 			goto bypass;
 		}
 
 		fe = priv_to_fib(priv, fib);
 	}
 
 	if (ip != NULL)
 		error = ng_netflow_flow_add(priv, fe, ip, upper_ptr,
 		    upper_proto, flags, src_if_index);
 #ifdef INET6		
 	else if (ip6 != NULL)
 		error = ng_netflow_flow6_add(priv, fe, ip6, upper_ptr,
 		    upper_proto, flags, src_if_index);
 #endif
 	else
 		goto bypass;
 	
 	acct = 1;
 bypass:
 	if (out != NULL) {
 		if (acct == 0) {
 			/* Accounting failure */
 			if (ip != NULL) {
 				counter_u64_add(priv->nfinfo_spackets, 1);
 				counter_u64_add(priv->nfinfo_sbytes,
 				    m->m_pkthdr.len);
 			} else if (ip6 != NULL) {
 				counter_u64_add(priv->nfinfo_spackets6, 1);
 				counter_u64_add(priv->nfinfo_sbytes6,
 				    m->m_pkthdr.len);
 			}
 		}
 
 		/* XXX: error gets overwritten here */
 		NG_FWD_NEW_DATA(error, item, out, m);
 		return (error);
 	}
 done:
 	if (item)
 		NG_FREE_ITEM(item);
 	if (m)
 		NG_FREE_M(m);
 
 	return (error);	
 }
 
 /* We will be shut down in a moment */
 static int
 ng_netflow_close(node_p node)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 
 	callout_drain(&priv->exp_callout);
 	ng_netflow_cache_flush(priv);
 
 	return (0);
 }
 
 /* Do local shutdown processing. */
 static int
 ng_netflow_rmnode(node_p node)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 
 	NG_NODE_SET_PRIVATE(node, NULL);
 	NG_NODE_UNREF(priv->node);
 
 	free(priv->fib_data, M_NETGRAPH);
 	free(priv, M_NETGRAPH);
 
 	return (0);
 }
 
 /* Hook disconnection. */
 static int
 ng_netflow_disconnect(hook_p hook)
 {
 	node_p node = NG_HOOK_NODE(hook);
 	priv_p priv = NG_NODE_PRIVATE(node);
 	iface_p iface = NG_HOOK_PRIVATE(hook);
 
 	if (iface != NULL) {
 		if (iface->hook == hook)
 			iface->hook = NULL;
 		if (iface->out == hook)
 			iface->out = NULL;
 	}
 
 	/* if export hook disconnected stop running expire(). */
 	if (hook == priv->export) {
 		if (priv->export9 == NULL)
 			callout_drain(&priv->exp_callout);
 		priv->export = NULL;
 	}
 
 	if (hook == priv->export9) {
 		if (priv->export == NULL)
 			callout_drain(&priv->exp_callout);
 		priv->export9 = NULL;
 	}
 
 	/* Removal of the last link destroys the node. */
 	if (NG_NODE_NUMHOOKS(node) == 0)
 		ng_rmnode_self(node);
 
 	return (0);
 }
Index: projects/clang380-import/sys/netinet/in_fib.c
===================================================================
--- projects/clang380-import/sys/netinet/in_fib.c	(revision 293686)
+++ projects/clang380-import/sys/netinet/in_fib.c	(revision 293687)
@@ -1,225 +1,232 @@
 /*-
  * Copyright (c) 2015
  * 	Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_route.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_fib.h>
 
 #ifdef INET
 static void fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst,
     uint32_t flags, struct nhop4_basic *pnh4);
 static void fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
     uint32_t flags, struct nhop4_extended *pnh4);
 
 #define RNTORT(p)	((struct rtentry *)(p))
 
 static void
 fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst,
     uint32_t flags, struct nhop4_basic *pnh4)
 {
 	struct sockaddr_in *gw;
 
 	if ((flags & NHR_IFAIF) != 0)
 		pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
 	else
 		pnh4->nh_ifp = rte->rt_ifp;
 	pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu);
 	if (rte->rt_flags & RTF_GATEWAY) {
 		gw = (struct sockaddr_in *)rte->rt_gateway;
 		pnh4->nh_addr = gw->sin_addr;
 	} else
 		pnh4->nh_addr = dst;
 	/* Set flags */
 	pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
 	gw = (struct sockaddr_in *)rt_key(rte);
 	if (gw->sin_addr.s_addr == 0)
 		pnh4->nh_flags |= NHF_DEFAULT;
 	/* TODO: Handle RTF_BROADCAST here */
 }
 
 static void
 fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
     uint32_t flags, struct nhop4_extended *pnh4)
 {
 	struct sockaddr_in *gw;
 	struct in_ifaddr *ia;
 
 	if ((flags & NHR_IFAIF) != 0)
 		pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
 	else
 		pnh4->nh_ifp = rte->rt_ifp;
 	pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu);
 	if (rte->rt_flags & RTF_GATEWAY) {
 		gw = (struct sockaddr_in *)rte->rt_gateway;
 		pnh4->nh_addr = gw->sin_addr;
 	} else
 		pnh4->nh_addr = dst;
 	/* Set flags */
 	pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
 	gw = (struct sockaddr_in *)rt_key(rte);
 	if (gw->sin_addr.s_addr == 0)
 		pnh4->nh_flags |= NHF_DEFAULT;
 	/* XXX: Set RTF_BROADCAST if GW address is broadcast */
 
 	ia = ifatoia(rte->rt_ifa);
 	pnh4->nh_src = IA_SIN(ia)->sin_addr;
 }
 
 /*
  * Performs IPv4 route table lookup on @dst. Returns 0 on success.
  * Stores nexthop info provided @pnh4 structure.
  * Note that
  * - nh_ifp cannot be safely dereferenced
  * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if
  *   looking up address on interface "ix0" pointer to "lo0" interface
  *   will be returned instead of "ix0")
  * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
  * - howewer mtu from "transmit" interface will be returned.
  */
 int
 fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
     uint32_t flowid, struct nhop4_basic *pnh4)
 {
 	struct radix_node_head *rh;
 	struct radix_node *rn;
 	struct sockaddr_in sin;
 	struct rtentry *rte;
 
 	KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_basic: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, AF_INET);
 	if (rh == NULL)
 		return (ENOENT);
 
 	/* Prepare lookup key */
 	memset(&sin, 0, sizeof(sin));
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_addr = dst;
 
 	RADIX_NODE_HEAD_RLOCK(rh);
 	rn = rh->rnh_matchaddr((void *)&sin, rh);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rte = RNTORT(rn);
 		/* Ensure route & ifp is UP */
 		if (RT_LINK_IS_UP(rte->rt_ifp)) {
 			fib4_rte_to_nh_basic(rte, dst, flags, pnh4);
 			RADIX_NODE_HEAD_RUNLOCK(rh);
 
 			return (0);
 		}
 	}
 	RADIX_NODE_HEAD_RUNLOCK(rh);
 
 	return (ENOENT);
 }
 
 /*
  * Performs IPv4 route table lookup on @dst. Returns 0 on success.
  * Stores extende nexthop info provided @pnh4 structure.
  * Note that
  * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified.
  * - in that case you need to call fib4_free_nh_ext()
  * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if
  *   looking up address of interface "ix0" pointer to "lo0" interface
  *   will be returned instead of "ix0")
  * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
  * - howewer mtu from "transmit" interface will be returned.
  */
 int
 fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags,
     uint32_t flowid, struct nhop4_extended *pnh4)
 {
 	struct radix_node_head *rh;
 	struct radix_node *rn;
 	struct sockaddr_in sin;
 	struct rtentry *rte;
 
 	KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_ext: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, AF_INET);
 	if (rh == NULL)
 		return (ENOENT);
 
 	/* Prepare lookup key */
 	memset(&sin, 0, sizeof(sin));
 	sin.sin_len = sizeof(struct sockaddr_in);
 	sin.sin_addr = dst;
 
 	RADIX_NODE_HEAD_RLOCK(rh);
 	rn = rh->rnh_matchaddr((void *)&sin, rh);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rte = RNTORT(rn);
+#ifdef RADIX_MPATH
+		rte = rt_mpath_select(rte, flowid);
+		if (rte == NULL) {
+			RADIX_NODE_HEAD_RUNLOCK(rh);
+			return (ENOENT);
+		}
+#endif
 		/* Ensure route & ifp is UP */
 		if (RT_LINK_IS_UP(rte->rt_ifp)) {
 			fib4_rte_to_nh_extended(rte, dst, flags, pnh4);
 			if ((flags & NHR_REF) != 0) {
 				/* TODO: lwref on egress ifp's ? */
 			}
 			RADIX_NODE_HEAD_RUNLOCK(rh);
 
 			return (0);
 		}
 	}
 	RADIX_NODE_HEAD_RUNLOCK(rh);
 
 	return (ENOENT);
 }
 
 void
 fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4)
 {
 
 }
 
 #endif
Index: projects/clang380-import/sys/netinet/in_pcb.c
===================================================================
--- projects/clang380-import/sys/netinet/in_pcb.c	(revision 293686)
+++ projects/clang380-import/sys/netinet/in_pcb.c	(revision 293687)
@@ -1,2657 +1,2658 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993, 1995
  *	The Regents of the University of California.
  * Copyright (c) 2007-2009 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_ipsec.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_pcbgroup.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/callout.h>
+#include <sys/eventhandler.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/refcount.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #endif
 #ifdef INET
 #include <netinet/in_var.h>
 #endif
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #endif /* INET6 */
 
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #endif /* IPSEC */
 
 #include <security/mac/mac_framework.h>
 
 static struct callout	ipport_tick_callout;
 
 /*
  * These configure the range of local port addresses assigned to
  * "unspecified" outgoing connections/packets/whatever.
  */
 VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1;	/* 1023 */
 VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART;	/* 600 */
 VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST;	/* 10000 */
 VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST;	/* 65535 */
 VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO;	/* 49152 */
 VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO;	/* 65535 */
 
 /*
  * Reserved ports accessible only to root. There are significant
  * security considerations that must be accounted for when changing these,
  * but the security benefits can be great. Please be careful.
  */
 VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1;	/* 1023 */
 VNET_DEFINE(int, ipport_reservedlow);
 
 /* Variables dealing with random ephemeral port allocation. */
 VNET_DEFINE(int, ipport_randomized) = 1;	/* user controlled via sysctl */
 VNET_DEFINE(int, ipport_randomcps) = 10;	/* user controlled via sysctl */
 VNET_DEFINE(int, ipport_randomtime) = 45;	/* user controlled via sysctl */
 VNET_DEFINE(int, ipport_stoprandom);		/* toggled by ipport_tick */
 VNET_DEFINE(int, ipport_tcpallocs);
 static VNET_DEFINE(int, ipport_tcplastcount);
 
 #define	V_ipport_tcplastcount		VNET(ipport_tcplastcount)
 
 static void	in_pcbremlists(struct inpcb *inp);
 #ifdef INET
 static struct inpcb	*in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo,
 			    struct in_addr faddr, u_int fport_arg,
 			    struct in_addr laddr, u_int lport_arg,
 			    int lookupflags, struct ifnet *ifp);
 
 #define RANGECHK(var, min, max) \
 	if ((var) < (min)) { (var) = (min); } \
 	else if ((var) > (max)) { (var) = (max); }
 
 static int
 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	error = sysctl_handle_int(oidp, arg1, arg2, req);
 	if (error == 0) {
 		RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
 		RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
 	}
 	return (error);
 }
 
 #undef RANGECHK
 
 static SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0,
     "IP Ports");
 
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_lowfirstauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_lowlastauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_firstauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_lastauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_hifirstauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast,
 	CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
 	&VNET_NAME(ipport_hilastauto), 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
 	CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_SECURE,
 	&VNET_NAME(ipport_reservedhigh), 0, "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
 	CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, "");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized,
 	CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ipport_randomized), 0, "Enable random port allocation");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps,
 	CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ipport_randomcps), 0, "Maximum number of random port "
 	"allocations before switching to a sequental one");
 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime,
 	CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(ipport_randomtime), 0,
 	"Minimum time to keep sequental port "
 	"allocation before switching to a random one");
 #endif /* INET */
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
  *
  * NOTE: It is assumed that most of these functions will be called with
  * the pcbinfo lock held, and often, the inpcb lock held, as these utility
  * functions often modify hash chains or addresses in pcbs.
  */
 
 /*
  * Initialize an inpcbinfo -- we should be able to reduce the number of
  * arguments in time.
  */
 void
 in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
     struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
     char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
     uint32_t inpcbzone_flags, u_int hashfields)
 {
 
 	INP_INFO_LOCK_INIT(pcbinfo, name);
 	INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash");	/* XXXRW: argument? */
 	INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist");
 #ifdef VIMAGE
 	pcbinfo->ipi_vnet = curvnet;
 #endif
 	pcbinfo->ipi_listhead = listhead;
 	LIST_INIT(pcbinfo->ipi_listhead);
 	pcbinfo->ipi_count = 0;
 	pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB,
 	    &pcbinfo->ipi_hashmask);
 	pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
 	    &pcbinfo->ipi_porthashmask);
 #ifdef PCBGROUP
 	in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
 #endif
 	pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
 	    NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
 	    inpcbzone_flags);
 	uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
 	uma_zone_set_warning(pcbinfo->ipi_zone,
 	    "kern.ipc.maxsockets limit reached");
 }
 
 /*
  * Destroy an inpcbinfo.
  */
 void
 in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
 {
 
 	KASSERT(pcbinfo->ipi_count == 0,
 	    ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count));
 
 	hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
 	hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
 	    pcbinfo->ipi_porthashmask);
 #ifdef PCBGROUP
 	in_pcbgroup_destroy(pcbinfo);
 #endif
 	uma_zdestroy(pcbinfo->ipi_zone);
 	INP_LIST_LOCK_DESTROY(pcbinfo);
 	INP_HASH_LOCK_DESTROY(pcbinfo);
 	INP_INFO_LOCK_DESTROY(pcbinfo);
 }
 
 /*
  * Allocate a PCB and associate it with the socket.
  * On success return with the PCB locked.
  */
 int
 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
 {
 	struct inpcb *inp;
 	int error;
 
 #ifdef INVARIANTS
 	if (pcbinfo == &V_tcbinfo) {
 		INP_INFO_RLOCK_ASSERT(pcbinfo);
 	} else {
 		INP_INFO_WLOCK_ASSERT(pcbinfo);
 	}
 #endif
 
 	error = 0;
 	inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
 	if (inp == NULL)
 		return (ENOBUFS);
 	bzero(inp, inp_zero_size);
 	inp->inp_pcbinfo = pcbinfo;
 	inp->inp_socket = so;
 	inp->inp_cred = crhold(so->so_cred);
 	inp->inp_inc.inc_fibnum = so->so_fibnum;
 #ifdef MAC
 	error = mac_inpcb_init(inp, M_NOWAIT);
 	if (error != 0)
 		goto out;
 	mac_inpcb_create(so, inp);
 #endif
 #ifdef IPSEC
 	error = ipsec_init_policy(so, &inp->inp_sp);
 	if (error != 0) {
 #ifdef MAC
 		mac_inpcb_destroy(inp);
 #endif
 		goto out;
 	}
 #endif /*IPSEC*/
 #ifdef INET6
 	if (INP_SOCKAF(so) == AF_INET6) {
 		inp->inp_vflag |= INP_IPV6PROTO;
 		if (V_ip6_v6only)
 			inp->inp_flags |= IN6P_IPV6_V6ONLY;
 	}
 #endif
 	INP_WLOCK(inp);
 	INP_LIST_WLOCK(pcbinfo);
 	LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
 	pcbinfo->ipi_count++;
 	so->so_pcb = (caddr_t)inp;
 #ifdef INET6
 	if (V_ip6_auto_flowlabel)
 		inp->inp_flags |= IN6P_AUTOFLOWLABEL;
 #endif
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	refcount_init(&inp->inp_refcount, 1);	/* Reference from inpcbinfo */
 	INP_LIST_WUNLOCK(pcbinfo);
 #if defined(IPSEC) || defined(MAC)
 out:
 	if (error != 0) {
 		crfree(inp->inp_cred);
 		uma_zfree(pcbinfo->ipi_zone, inp);
 	}
 #endif
 	return (error);
 }
 
 #ifdef INET
 int
 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
 {
 	int anonport, error;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	anonport = nam == NULL || ((struct sockaddr_in *)nam)->sin_port == 0;
 	error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
 	    &inp->inp_lport, cred);
 	if (error)
 		return (error);
 	if (in_pcbinshash(inp) != 0) {
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
 		return (EAGAIN);
 	}
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
 }
 #endif
 
 /*
  * Select a local port (number) to use.
  */
 #if defined(INET) || defined(INET6)
 int
 in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp,
     struct ucred *cred, int lookupflags)
 {
 	struct inpcbinfo *pcbinfo;
 	struct inpcb *tmpinp;
 	unsigned short *lastport;
 	int count, dorandom, error;
 	u_short aux, first, last, lport;
 #ifdef INET
 	struct in_addr laddr;
 #endif
 
 	pcbinfo = inp->inp_pcbinfo;
 
 	/*
 	 * Because no actual state changes occur here, a global write lock on
 	 * the pcbinfo isn't required.
 	 */
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	if (inp->inp_flags & INP_HIGHPORT) {
 		first = V_ipport_hifirstauto;	/* sysctl */
 		last  = V_ipport_hilastauto;
 		lastport = &pcbinfo->ipi_lasthi;
 	} else if (inp->inp_flags & INP_LOWPORT) {
 		error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0);
 		if (error)
 			return (error);
 		first = V_ipport_lowfirstauto;	/* 1023 */
 		last  = V_ipport_lowlastauto;	/* 600 */
 		lastport = &pcbinfo->ipi_lastlow;
 	} else {
 		first = V_ipport_firstauto;	/* sysctl */
 		last  = V_ipport_lastauto;
 		lastport = &pcbinfo->ipi_lastport;
 	}
 	/*
 	 * For UDP(-Lite), use random port allocation as long as the user
 	 * allows it.  For TCP (and as of yet unknown) connections,
 	 * use random port allocation only if the user allows it AND
 	 * ipport_tick() allows it.
 	 */
 	if (V_ipport_randomized &&
 		(!V_ipport_stoprandom || pcbinfo == &V_udbinfo ||
 		pcbinfo == &V_ulitecbinfo))
 		dorandom = 1;
 	else
 		dorandom = 0;
 	/*
 	 * It makes no sense to do random port allocation if
 	 * we have the only port available.
 	 */
 	if (first == last)
 		dorandom = 0;
 	/* Make sure to not include UDP(-Lite) packets in the count. */
 	if (pcbinfo != &V_udbinfo || pcbinfo != &V_ulitecbinfo)
 		V_ipport_tcpallocs++;
 	/*
 	 * Instead of having two loops further down counting up or down
 	 * make sure that first is always <= last and go with only one
 	 * code path implementing all logic.
 	 */
 	if (first > last) {
 		aux = first;
 		first = last;
 		last = aux;
 	}
 
 #ifdef INET
 	/* Make the compiler happy. */
 	laddr.s_addr = 0;
 	if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) {
 		KASSERT(laddrp != NULL, ("%s: laddrp NULL for v4 inp %p",
 		    __func__, inp));
 		laddr = *laddrp;
 	}
 #endif
 	tmpinp = NULL;	/* Make compiler happy. */
 	lport = *lportp;
 
 	if (dorandom)
 		*lastport = first + (arc4random() % (last - first));
 
 	count = last - first;
 
 	do {
 		if (count-- < 0)	/* completely used? */
 			return (EADDRNOTAVAIL);
 		++*lastport;
 		if (*lastport < first || *lastport > last)
 			*lastport = first;
 		lport = htons(*lastport);
 
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV6) != 0)
 			tmpinp = in6_pcblookup_local(pcbinfo,
 			    &inp->in6p_laddr, lport, lookupflags, cred);
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 			tmpinp = in_pcblookup_local(pcbinfo, laddr,
 			    lport, lookupflags, cred);
 #endif
 	} while (tmpinp != NULL);
 
 #ifdef INET
 	if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4)
 		laddrp->s_addr = laddr.s_addr;
 #endif
 	*lportp = lport;
 
 	return (0);
 }
 
 /*
  * Return cached socket options.
  */
 short
 inp_so_options(const struct inpcb *inp)
 {
    short so_options;
 
    so_options = 0;
 
    if ((inp->inp_flags2 & INP_REUSEPORT) != 0)
 	   so_options |= SO_REUSEPORT;
    if ((inp->inp_flags2 & INP_REUSEADDR) != 0)
 	   so_options |= SO_REUSEADDR;
    return (so_options);
 }
 #endif /* INET || INET6 */
 
 /*
  * Check if a new BINDMULTI socket is allowed to be created.
  *
  * ni points to the new inp.
  * oi points to the exisitng inp.
  *
  * This checks whether the existing inp also has BINDMULTI and
  * whether the credentials match.
  */
 int
 in_pcbbind_check_bindmulti(const struct inpcb *ni, const struct inpcb *oi)
 {
 	/* Check permissions match */
 	if ((ni->inp_flags2 & INP_BINDMULTI) &&
 	    (ni->inp_cred->cr_uid !=
 	    oi->inp_cred->cr_uid))
 		return (0);
 
 	/* Check the existing inp has BINDMULTI set */
 	if ((ni->inp_flags2 & INP_BINDMULTI) &&
 	    ((oi->inp_flags2 & INP_BINDMULTI) == 0))
 		return (0);
 
 	/*
 	 * We're okay - either INP_BINDMULTI isn't set on ni, or
 	 * it is and it matches the checks.
 	 */
 	return (1);
 }
 
 #ifdef INET
 /*
  * Set up a bind operation on a PCB, performing port allocation
  * as required, but do not actually modify the PCB. Callers can
  * either complete the bind by setting inp_laddr/inp_lport and
  * calling in_pcbinshash(), or they can just use the resulting
  * port and address to authorise the sending of a once-off packet.
  *
  * On error, the values of *laddrp and *lportp are not changed.
  */
 int
 in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
     u_short *lportp, struct ucred *cred)
 {
 	struct socket *so = inp->inp_socket;
 	struct sockaddr_in *sin;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct in_addr laddr;
 	u_short lport = 0;
 	int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT);
 	int error;
 
 	/*
 	 * No state changes, so read locks are sufficient here.
 	 */
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */
 		return (EADDRNOTAVAIL);
 	laddr.s_addr = *laddrp;
 	if (nam != NULL && laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		lookupflags = INPLOOKUP_WILDCARD;
 	if (nam == NULL) {
 		if ((error = prison_local_ip4(cred, &laddr)) != 0)
 			return (error);
 	} else {
 		sin = (struct sockaddr_in *)nam;
 		if (nam->sa_len != sizeof (*sin))
 			return (EINVAL);
 #ifdef notdef
 		/*
 		 * We should check the family, but old programs
 		 * incorrectly fail to initialize it.
 		 */
 		if (sin->sin_family != AF_INET)
 			return (EAFNOSUPPORT);
 #endif
 		error = prison_local_ip4(cred, &sin->sin_addr);
 		if (error)
 			return (error);
 		if (sin->sin_port != *lportp) {
 			/* Don't allow the port to change. */
 			if (*lportp != 0)
 				return (EINVAL);
 			lport = sin->sin_port;
 		}
 		/* NB: lport is left as 0 if the port isn't being changed. */
 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			/*
 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
 			 * allow complete duplication of binding if
 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
 			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
 			sin->sin_port = 0;		/* yech... */
 			bzero(&sin->sin_zero, sizeof(sin->sin_zero));
 			/*
 			 * Is the address a local IP address? 
 			 * If INP_BINDANY is set, then the socket may be bound
 			 * to any endpoint address, local or not.
 			 */
 			if ((inp->inp_flags & INP_BINDANY) == 0 &&
 			    ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) 
 				return (EADDRNOTAVAIL);
 		}
 		laddr = sin->sin_addr;
 		if (lport) {
 			struct inpcb *t;
 			struct tcptw *tw;
 
 			/* GROSS */
 			if (ntohs(lport) <= V_ipport_reservedhigh &&
 			    ntohs(lport) >= V_ipport_reservedlow &&
 			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
 			    0))
 				return (EACCES);
 			if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
 			    priv_check_cred(inp->inp_cred,
 			    PRIV_NETINET_REUSEPORT, 0) != 0) {
 				t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 				    lport, INPLOOKUP_WILDCARD, cred);
 	/*
 	 * XXX
 	 * This entire block sorely needs a rewrite.
 	 */
 				if (t &&
 				    ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
 				    ((t->inp_flags & INP_TIMEWAIT) == 0) &&
 				    (so->so_type != SOCK_STREAM ||
 				     ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
 				    (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
 				     ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
 				     (t->inp_flags2 & INP_REUSEPORT) == 0) &&
 				    (inp->inp_cred->cr_uid !=
 				     t->inp_cred->cr_uid))
 					return (EADDRINUSE);
 
 				/*
 				 * If the socket is a BINDMULTI socket, then
 				 * the credentials need to match and the
 				 * original socket also has to have been bound
 				 * with BINDMULTI.
 				 */
 				if (t && (! in_pcbbind_check_bindmulti(inp, t)))
 					return (EADDRINUSE);
 			}
 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 			    lport, lookupflags, cred);
 			if (t && (t->inp_flags & INP_TIMEWAIT)) {
 				/*
 				 * XXXRW: If an incpb has had its timewait
 				 * state recycled, we treat the address as
 				 * being in use (for now).  This is better
 				 * than a panic, but not desirable.
 				 */
 				tw = intotw(t);
 				if (tw == NULL ||
 				    (reuseport & tw->tw_so_options) == 0)
 					return (EADDRINUSE);
 			} else if (t &&
 			    ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
 			    (reuseport & inp_so_options(t)) == 0) {
 #ifdef INET6
 				if (ntohl(sin->sin_addr.s_addr) !=
 				    INADDR_ANY ||
 				    ntohl(t->inp_laddr.s_addr) !=
 				    INADDR_ANY ||
 				    (inp->inp_vflag & INP_IPV6PROTO) == 0 ||
 				    (t->inp_vflag & INP_IPV6PROTO) == 0)
 #endif
 				return (EADDRINUSE);
 				if (t && (! in_pcbbind_check_bindmulti(inp, t)))
 					return (EADDRINUSE);
 			}
 		}
 	}
 	if (*lportp != 0)
 		lport = *lportp;
 	if (lport == 0) {
 		error = in_pcb_lport(inp, &laddr, &lport, cred, lookupflags);
 		if (error != 0)
 			return (error);
 
 	}
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
 	return (0);
 }
 
 /*
  * Connect from a socket to a specified address.
  * Both address and port must be specified in argument sin.
  * If don't have a local address for this socket yet,
  * then pick one.
  */
 int
 in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
     struct ucred *cred, struct mbuf *m)
 {
 	u_short lport, fport;
 	in_addr_t laddr, faddr;
 	int anonport, error;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	lport = inp->inp_lport;
 	laddr = inp->inp_laddr.s_addr;
 	anonport = (lport == 0);
 	error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
 	    NULL, cred);
 	if (error)
 		return (error);
 
 	/* Do the initial binding of the local address if required. */
 	if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
 		inp->inp_lport = lport;
 		inp->inp_laddr.s_addr = laddr;
 		if (in_pcbinshash(inp) != 0) {
 			inp->inp_laddr.s_addr = INADDR_ANY;
 			inp->inp_lport = 0;
 			return (EAGAIN);
 		}
 	}
 
 	/* Commit the remaining changes. */
 	inp->inp_lport = lport;
 	inp->inp_laddr.s_addr = laddr;
 	inp->inp_faddr.s_addr = faddr;
 	inp->inp_fport = fport;
 	in_pcbrehash_mbuf(inp, m);
 
 	if (anonport)
 		inp->inp_flags |= INP_ANONPORT;
 	return (0);
 }
 
 int
 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
 {
 
 	return (in_pcbconnect_mbuf(inp, nam, cred, NULL));
 }
 
 /*
  * Do proper source address selection on an unbound socket in case
  * of connect. Take jails into account as well.
  */
 int
 in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
     struct ucred *cred)
 {
 	struct ifaddr *ifa;
 	struct sockaddr *sa;
 	struct sockaddr_in *sin;
 	struct route sro;
 	int error;
 
 	KASSERT(laddr != NULL, ("%s: laddr NULL", __func__));
 
 	/*
 	 * Bypass source address selection and use the primary jail IP
 	 * if requested.
 	 */
 	if (cred != NULL && !prison_saddrsel_ip4(cred, laddr))
 		return (0);
 
 	error = 0;
 	bzero(&sro, sizeof(sro));
 
 	sin = (struct sockaddr_in *)&sro.ro_dst;
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(struct sockaddr_in);
 	sin->sin_addr.s_addr = faddr->s_addr;
 
 	/*
 	 * If route is known our src addr is taken from the i/f,
 	 * else punt.
 	 *
 	 * Find out route to destination.
 	 */
 	if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
 		in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum);
 
 	/*
 	 * If we found a route, use the address corresponding to
 	 * the outgoing interface.
 	 * 
 	 * Otherwise assume faddr is reachable on a directly connected
 	 * network and try to find a corresponding interface to take
 	 * the source address from.
 	 */
 	if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) {
 		struct in_ifaddr *ia;
 		struct ifnet *ifp;
 
 		ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin,
 					inp->inp_socket->so_fibnum));
 		if (ia == NULL)
 			ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0,
 						inp->inp_socket->so_fibnum));
 		if (ia == NULL) {
 			error = ENETUNREACH;
 			goto done;
 		}
 
 		if (cred == NULL || !prison_flag(cred, PR_IP4)) {
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			ifa_free(&ia->ia_ifa);
 			goto done;
 		}
 
 		ifp = ia->ia_ifp;
 		ifa_free(&ia->ia_ifa);
 		ia = NULL;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 
 			sa = ifa->ifa_addr;
 			if (sa->sa_family != AF_INET)
 				continue;
 			sin = (struct sockaddr_in *)sa;
 			if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
 				ia = (struct in_ifaddr *)ifa;
 				break;
 			}
 		}
 		if (ia != NULL) {
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			IF_ADDR_RUNLOCK(ifp);
 			goto done;
 		}
 		IF_ADDR_RUNLOCK(ifp);
 
 		/* 3. As a last resort return the 'default' jail address. */
 		error = prison_get_ip4(cred, laddr);
 		goto done;
 	}
 
 	/*
 	 * If the outgoing interface on the route found is not
 	 * a loopback interface, use the address from that interface.
 	 * In case of jails do those three steps:
 	 * 1. check if the interface address belongs to the jail. If so use it.
 	 * 2. check if we have any address on the outgoing interface
 	 *    belonging to this jail. If so use it.
 	 * 3. as a last resort return the 'default' jail address.
 	 */
 	if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) {
 		struct in_ifaddr *ia;
 		struct ifnet *ifp;
 
 		/* If not jailed, use the default returned. */
 		if (cred == NULL || !prison_flag(cred, PR_IP4)) {
 			ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			goto done;
 		}
 
 		/* Jailed. */
 		/* 1. Check if the iface address belongs to the jail. */
 		sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr;
 		if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
 			ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			goto done;
 		}
 
 		/*
 		 * 2. Check if we have any address on the outgoing interface
 		 *    belonging to this jail.
 		 */
 		ia = NULL;
 		ifp = sro.ro_rt->rt_ifp;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			sa = ifa->ifa_addr;
 			if (sa->sa_family != AF_INET)
 				continue;
 			sin = (struct sockaddr_in *)sa;
 			if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
 				ia = (struct in_ifaddr *)ifa;
 				break;
 			}
 		}
 		if (ia != NULL) {
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			IF_ADDR_RUNLOCK(ifp);
 			goto done;
 		}
 		IF_ADDR_RUNLOCK(ifp);
 
 		/* 3. As a last resort return the 'default' jail address. */
 		error = prison_get_ip4(cred, laddr);
 		goto done;
 	}
 
 	/*
 	 * The outgoing interface is marked with 'loopback net', so a route
 	 * to ourselves is here.
 	 * Try to find the interface of the destination address and then
 	 * take the address from there. That interface is not necessarily
 	 * a loopback interface.
 	 * In case of jails, check that it is an address of the jail
 	 * and if we cannot find, fall back to the 'default' jail address.
 	 */
 	if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) {
 		struct sockaddr_in sain;
 		struct in_ifaddr *ia;
 
 		bzero(&sain, sizeof(struct sockaddr_in));
 		sain.sin_family = AF_INET;
 		sain.sin_len = sizeof(struct sockaddr_in);
 		sain.sin_addr.s_addr = faddr->s_addr;
 
 		ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain),
 					inp->inp_socket->so_fibnum));
 		if (ia == NULL)
 			ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0,
 						inp->inp_socket->so_fibnum));
 		if (ia == NULL)
 			ia = ifatoia(ifa_ifwithaddr(sintosa(&sain)));
 
 		if (cred == NULL || !prison_flag(cred, PR_IP4)) {
 			if (ia == NULL) {
 				error = ENETUNREACH;
 				goto done;
 			}
 			laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 			ifa_free(&ia->ia_ifa);
 			goto done;
 		}
 
 		/* Jailed. */
 		if (ia != NULL) {
 			struct ifnet *ifp;
 
 			ifp = ia->ia_ifp;
 			ifa_free(&ia->ia_ifa);
 			ia = NULL;
 			IF_ADDR_RLOCK(ifp);
 			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 
 				sa = ifa->ifa_addr;
 				if (sa->sa_family != AF_INET)
 					continue;
 				sin = (struct sockaddr_in *)sa;
 				if (prison_check_ip4(cred,
 				    &sin->sin_addr) == 0) {
 					ia = (struct in_ifaddr *)ifa;
 					break;
 				}
 			}
 			if (ia != NULL) {
 				laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
 				IF_ADDR_RUNLOCK(ifp);
 				goto done;
 			}
 			IF_ADDR_RUNLOCK(ifp);
 		}
 
 		/* 3. As a last resort return the 'default' jail address. */
 		error = prison_get_ip4(cred, laddr);
 		goto done;
 	}
 
 done:
 	if (sro.ro_rt != NULL)
 		RTFREE(sro.ro_rt);
 	return (error);
 }
 
 /*
  * Set up for a connect from a socket to the specified address.
  * On entry, *laddrp and *lportp should contain the current local
  * address and port for the PCB; these are updated to the values
  * that should be placed in inp_laddr and inp_lport to complete
  * the connect.
  *
  * On success, *faddrp and *fportp will be set to the remote address
  * and port. These are not updated in the error case.
  *
  * If the operation fails because the connection already exists,
  * *oinpp will be set to the PCB of that connection so that the
  * caller can decide to override it. In all other cases, *oinpp
  * is set to NULL.
  */
 int
 in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
     in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
     struct inpcb **oinpp, struct ucred *cred)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct sockaddr_in *sin = (struct sockaddr_in *)nam;
 	struct in_ifaddr *ia;
 	struct inpcb *oinp;
 	struct in_addr laddr, faddr;
 	u_short lport, fport;
 	int error;
 
 	/*
 	 * Because a global state change doesn't actually occur here, a read
 	 * lock is sufficient.
 	 */
 	INP_LOCK_ASSERT(inp);
 	INP_HASH_LOCK_ASSERT(inp->inp_pcbinfo);
 
 	if (oinpp != NULL)
 		*oinpp = NULL;
 	if (nam->sa_len != sizeof (*sin))
 		return (EINVAL);
 	if (sin->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (sin->sin_port == 0)
 		return (EADDRNOTAVAIL);
 	laddr.s_addr = *laddrp;
 	lport = *lportp;
 	faddr = sin->sin_addr;
 	fport = sin->sin_port;
 
 	if (!TAILQ_EMPTY(&V_in_ifaddrhead)) {
 		/*
 		 * If the destination address is INADDR_ANY,
 		 * use the primary local address.
 		 * If the supplied address is INADDR_BROADCAST,
 		 * and the primary interface supports broadcast,
 		 * choose the broadcast address for that interface.
 		 */
 		if (faddr.s_addr == INADDR_ANY) {
 			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			faddr =
 			    IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr;
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			if (cred != NULL &&
 			    (error = prison_get_ip4(cred, &faddr)) != 0)
 				return (error);
 		} else if (faddr.s_addr == (u_long)INADDR_BROADCAST) {
 			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags &
 			    IFF_BROADCAST)
 				faddr = satosin(&TAILQ_FIRST(
 				    &V_in_ifaddrhead)->ia_broadaddr)->sin_addr;
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		}
 	}
 	if (laddr.s_addr == INADDR_ANY) {
 		error = in_pcbladdr(inp, &faddr, &laddr, cred);
 		/*
 		 * If the destination address is multicast and an outgoing
 		 * interface has been set as a multicast option, prefer the
 		 * address of that interface as our source address.
 		 */
 		if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
 		    inp->inp_moptions != NULL) {
 			struct ip_moptions *imo;
 			struct ifnet *ifp;
 
 			imo = inp->inp_moptions;
 			if (imo->imo_multicast_ifp != NULL) {
 				ifp = imo->imo_multicast_ifp;
 				IN_IFADDR_RLOCK(&in_ifa_tracker);
 				TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 					if ((ia->ia_ifp == ifp) &&
 					    (cred == NULL ||
 					    prison_check_ip4(cred,
 					    &ia->ia_addr.sin_addr) == 0))
 						break;
 				}
 				if (ia == NULL)
 					error = EADDRNOTAVAIL;
 				else {
 					laddr = ia->ia_addr.sin_addr;
 					error = 0;
 				}
 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			}
 		}
 		if (error)
 			return (error);
 	}
 	oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport,
 	    laddr, lport, 0, NULL);
 	if (oinp != NULL) {
 		if (oinpp != NULL)
 			*oinpp = oinp;
 		return (EADDRINUSE);
 	}
 	if (lport == 0) {
 		error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport,
 		    cred);
 		if (error)
 			return (error);
 	}
 	*laddrp = laddr.s_addr;
 	*lportp = lport;
 	*faddrp = faddr.s_addr;
 	*fportp = fport;
 	return (0);
 }
 
 void
 in_pcbdisconnect(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	inp->inp_fport = 0;
 	in_pcbrehash(inp);
 }
 #endif /* INET */
 
 /*
  * in_pcbdetach() is responsibe for disassociating a socket from an inpcb.
  * For most protocols, this will be invoked immediately prior to calling
  * in_pcbfree().  However, with TCP the inpcb may significantly outlive the
  * socket, in which case in_pcbfree() is deferred.
  */
 void
 in_pcbdetach(struct inpcb *inp)
 {
 
 	KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__));
 
 	inp->inp_socket->so_pcb = NULL;
 	inp->inp_socket = NULL;
 }
 
 /*
  * in_pcbref() bumps the reference count on an inpcb in order to maintain
  * stability of an inpcb pointer despite the inpcb lock being released.  This
  * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
  * but where the inpcb lock may already held, or when acquiring a reference
  * via a pcbgroup.
  *
  * in_pcbref() should be used only to provide brief memory stability, and
  * must always be followed by a call to INP_WLOCK() and in_pcbrele() to
  * garbage collect the inpcb if it has been in_pcbfree()'d from another
  * context.  Until in_pcbrele() has returned that the inpcb is still valid,
  * lock and rele are the *only* safe operations that may be performed on the
  * inpcb.
  *
  * While the inpcb will not be freed, releasing the inpcb lock means that the
  * connection's state may change, so the caller should be careful to
  * revalidate any cached state on reacquiring the lock.  Drop the reference
  * using in_pcbrele().
  */
 void
 in_pcbref(struct inpcb *inp)
 {
 
 	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
 
 	refcount_acquire(&inp->inp_refcount);
 }
 
 /*
  * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to
  * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we
  * return a flag indicating whether or not the inpcb remains valid.  If it is
  * valid, we return with the inpcb lock held.
  *
  * Notice that, unlike in_pcbref(), the inpcb lock must be held to drop a
  * reference on an inpcb.  Historically more work was done here (actually, in
  * in_pcbfree_internal()) but has been moved to in_pcbfree() to avoid the
  * need for the pcbinfo lock in in_pcbrele().  Deferring the free is entirely
  * about memory stability (and continued use of the write lock).
  */
 int
 in_pcbrele_rlocked(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 
 	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
 
 	INP_RLOCK_ASSERT(inp);
 
 	if (refcount_release(&inp->inp_refcount) == 0) {
 		/*
 		 * If the inpcb has been freed, let the caller know, even if
 		 * this isn't the last reference.
 		 */
 		if (inp->inp_flags2 & INP_FREED) {
 			INP_RUNLOCK(inp);
 			return (1);
 		}
 		return (0);
 	}
 
 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
 
 	INP_RUNLOCK(inp);
 	pcbinfo = inp->inp_pcbinfo;
 	uma_zfree(pcbinfo->ipi_zone, inp);
 	return (1);
 }
 
 int
 in_pcbrele_wlocked(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 
 	KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__));
 
 	INP_WLOCK_ASSERT(inp);
 
 	if (refcount_release(&inp->inp_refcount) == 0) {
 		/*
 		 * If the inpcb has been freed, let the caller know, even if
 		 * this isn't the last reference.
 		 */
 		if (inp->inp_flags2 & INP_FREED) {
 			INP_WUNLOCK(inp);
 			return (1);
 		}
 		return (0);
 	}
 
 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
 
 	INP_WUNLOCK(inp);
 	pcbinfo = inp->inp_pcbinfo;
 	uma_zfree(pcbinfo->ipi_zone, inp);
 	return (1);
 }
 
 /*
  * Temporary wrapper.
  */
 int
 in_pcbrele(struct inpcb *inp)
 {
 
 	return (in_pcbrele_wlocked(inp));
 }
 
 /*
  * Unconditionally schedule an inpcb to be freed by decrementing its
  * reference count, which should occur only after the inpcb has been detached
  * from its socket.  If another thread holds a temporary reference (acquired
  * using in_pcbref()) then the free is deferred until that reference is
  * released using in_pcbrele(), but the inpcb is still unlocked.  Almost all
  * work, including removal from global lists, is done in this context, where
  * the pcbinfo lock is held.
  */
 void
 in_pcbfree(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 
 	KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
 
 #ifdef INVARIANTS
 	if (pcbinfo == &V_tcbinfo) {
 		INP_INFO_LOCK_ASSERT(pcbinfo);
 	} else {
 		INP_INFO_WLOCK_ASSERT(pcbinfo);
 	}
 #endif
 	INP_WLOCK_ASSERT(inp);
 
 	/* XXXRW: Do as much as possible here. */
 #ifdef IPSEC
 	if (inp->inp_sp != NULL)
 		ipsec_delete_pcbpolicy(inp);
 #endif
 	INP_LIST_WLOCK(pcbinfo);
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	in_pcbremlists(inp);
 	INP_LIST_WUNLOCK(pcbinfo);
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6PROTO) {
 		ip6_freepcbopts(inp->in6p_outputopts);
 		if (inp->in6p_moptions != NULL)
 			ip6_freemoptions(inp->in6p_moptions);
 	}
 #endif
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 #ifdef INET
 	if (inp->inp_moptions != NULL)
 		inp_freemoptions(inp->inp_moptions);
 #endif
 	inp->inp_vflag = 0;
 	inp->inp_flags2 |= INP_FREED;
 	crfree(inp->inp_cred);
 #ifdef MAC
 	mac_inpcb_destroy(inp);
 #endif
 	if (!in_pcbrele_wlocked(inp))
 		INP_WUNLOCK(inp);
 }
 
 /*
  * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
  * port reservation, and preventing it from being returned by inpcb lookups.
  *
  * It is used by TCP to mark an inpcb as unused and avoid future packet
  * delivery or event notification when a socket remains open but TCP has
  * closed.  This might occur as a result of a shutdown()-initiated TCP close
  * or a RST on the wire, and allows the port binding to be reused while still
  * maintaining the invariant that so_pcb always points to a valid inpcb until
  * in_pcbdetach().
  *
  * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
  * in_pcbnotifyall() and in_pcbpurgeif0()?
  */
 void
 in_pcbdrop(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * XXXRW: Possibly we should protect the setting of INP_DROPPED with
 	 * the hash lock...?
 	 */
 	inp->inp_flags |= INP_DROPPED;
 	if (inp->inp_flags & INP_INHASHLIST) {
 		struct inpcbport *phd = inp->inp_phd;
 
 		INP_HASH_WLOCK(inp->inp_pcbinfo);
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
 			LIST_REMOVE(phd, phd_hash);
 			free(phd, M_PCB);
 		}
 		INP_HASH_WUNLOCK(inp->inp_pcbinfo);
 		inp->inp_flags &= ~INP_INHASHLIST;
 #ifdef PCBGROUP
 		in_pcbgroup_remove(inp);
 #endif
 	}
 }
 
 #ifdef INET
 /*
  * Common routines to return the socket addresses associated with inpcbs.
  */
 struct sockaddr *
 in_sockaddr(in_port_t port, struct in_addr *addr_p)
 {
 	struct sockaddr_in *sin;
 
 	sin = malloc(sizeof *sin, M_SONAME,
 		M_WAITOK | M_ZERO);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = *addr_p;
 	sin->sin_port = port;
 
 	return (struct sockaddr *)sin;
 }
 
 int
 in_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
 
 	INP_RLOCK(inp);
 	port = inp->inp_lport;
 	addr = inp->inp_laddr;
 	INP_RUNLOCK(inp);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
 int
 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in_addr addr;
 	in_port_t port;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
 
 	INP_RLOCK(inp);
 	port = inp->inp_fport;
 	addr = inp->inp_faddr;
 	INP_RUNLOCK(inp);
 
 	*nam = in_sockaddr(port, &addr);
 	return 0;
 }
 
 void
 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
     struct inpcb *(*notify)(struct inpcb *, int))
 {
 	struct inpcb *inp, *inp_temp;
 
 	INP_INFO_WLOCK(pcbinfo);
 	LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
 		INP_WLOCK(inp);
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV4) == 0) {
 			INP_WUNLOCK(inp);
 			continue;
 		}
 #endif
 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
 		    inp->inp_socket == NULL) {
 			INP_WUNLOCK(inp);
 			continue;
 		}
 		if ((*notify)(inp, errno))
 			INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 void
 in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
 {
 	struct inpcb *inp;
 	struct ip_moptions *imo;
 	int i, gap;
 
 	INP_INFO_WLOCK(pcbinfo);
 	LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
 		INP_WLOCK(inp);
 		imo = inp->inp_moptions;
 		if ((inp->inp_vflag & INP_IPV4) &&
 		    imo != NULL) {
 			/*
 			 * Unselect the outgoing interface if it is being
 			 * detached.
 			 */
 			if (imo->imo_multicast_ifp == ifp)
 				imo->imo_multicast_ifp = NULL;
 
 			/*
 			 * Drop multicast group membership if we joined
 			 * through the interface being detached.
 			 */
 			for (i = 0, gap = 0; i < imo->imo_num_memberships;
 			    i++) {
 				if (imo->imo_membership[i]->inm_ifp == ifp) {
 					in_delmulti(imo->imo_membership[i]);
 					gap++;
 				} else if (gap != 0)
 					imo->imo_membership[i - gap] =
 					    imo->imo_membership[i];
 			}
 			imo->imo_num_memberships -= gap;
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 /*
  * Lookup a PCB based on the local address and port.  Caller must hold the
  * hash lock.  No inpcb locks or references are acquired.
  */
 #define INP_LOOKUP_MAPPED_PCB_COST	3
 struct inpcb *
 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
     u_short lport, int lookupflags, struct ucred *cred)
 {
 	struct inpcb *inp;
 #ifdef INET6
 	int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
 #else
 	int matchwild = 3;
 #endif
 	int wildcard;
 
 	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
 		    0, pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_laddr.s_addr == laddr.s_addr &&
 			    inp->inp_lport == lport) {
 				/*
 				 * Found?
 				 */
 				if (cred == NULL ||
 				    prison_equal_ip4(cred->cr_prison,
 					inp->inp_cred->cr_prison))
 					return (inp);
 			}
 		}
 		/*
 		 * Not found.
 		 */
 		return (NULL);
 	} else {
 		struct inpcbporthead *porthash;
 		struct inpcbport *phd;
 		struct inpcb *match = NULL;
 		/*
 		 * Best fit PCB lookup.
 		 *
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
 		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->ipi_porthashmask)];
 		LIST_FOREACH(phd, porthash, phd_hash) {
 			if (phd->phd_port == lport)
 				break;
 		}
 		if (phd != NULL) {
 			/*
 			 * Port is in use by one or more PCBs. Look for best
 			 * fit.
 			 */
 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
 				wildcard = 0;
 				if (cred != NULL &&
 				    !prison_equal_ip4(inp->inp_cred->cr_prison,
 					cred->cr_prison))
 					continue;
 #ifdef INET6
 				/* XXX inp locking */
 				if ((inp->inp_vflag & INP_IPV4) == 0)
 					continue;
 				/*
 				 * We never select the PCB that has
 				 * INP_IPV6 flag and is bound to :: if
 				 * we have another PCB which is bound
 				 * to 0.0.0.0.  If a PCB has the
 				 * INP_IPV6 flag, then we set its cost
 				 * higher than IPv4 only PCBs.
 				 *
 				 * Note that the case only happens
 				 * when a socket is bound to ::, under
 				 * the condition that the use of the
 				 * mapped address is allowed.
 				 */
 				if ((inp->inp_vflag & INP_IPV6) != 0)
 					wildcard += INP_LOOKUP_MAPPED_PCB_COST;
 #endif
 				if (inp->inp_faddr.s_addr != INADDR_ANY)
 					wildcard++;
 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
 					if (laddr.s_addr == INADDR_ANY)
 						wildcard++;
 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
 						continue;
 				} else {
 					if (laddr.s_addr != INADDR_ANY)
 						wildcard++;
 				}
 				if (wildcard < matchwild) {
 					match = inp;
 					matchwild = wildcard;
 					if (matchwild == 0)
 						break;
 				}
 			}
 		}
 		return (match);
 	}
 }
 #undef INP_LOOKUP_MAPPED_PCB_COST
 
 #ifdef PCBGROUP
 /*
  * Lookup PCB in hash list, using pcbgroup tables.
  */
 static struct inpcb *
 in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
     struct in_addr faddr, u_int fport_arg, struct in_addr laddr,
     u_int lport_arg, int lookupflags, struct ifnet *ifp)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp, *tmpinp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	/*
 	 * First look for an exact match.
 	 */
 	tmpinp = NULL;
 	INP_GROUP_LOCK(pcbgroup);
 	head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
 	    pcbgroup->ipg_hashmask)];
 	LIST_FOREACH(inp, head, inp_pcbgrouphash) {
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
 		    inp->inp_laddr.s_addr == laddr.s_addr &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * XXX We should be able to directly return
 			 * the inp here, without any checks.
 			 * Well unless both bound with SO_REUSEPORT?
 			 */
 			if (prison_flag(inp->inp_cred, PR_IP4))
 				goto found;
 			if (tmpinp == NULL)
 				tmpinp = inp;
 		}
 	}
 	if (tmpinp != NULL) {
 		inp = tmpinp;
 		goto found;
 	}
 
 #ifdef	RSS
 	/*
 	 * For incoming connections, we may wish to do a wildcard
 	 * match for an RSS-local socket.
 	 */
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		struct inpcb *local_wild = NULL, *local_exact = NULL;
 #ifdef INET6
 		struct inpcb *local_wild_mapped = NULL;
 #endif
 		struct inpcb *jail_wild = NULL;
 		struct inpcbhead *head;
 		int injail;
 
 		/*
 		 * Order of socket selection - we always prefer jails.
 		 *      1. jailed, non-wild.
 		 *      2. jailed, wild.
 		 *      3. non-jailed, non-wild.
 		 *      4. non-jailed, wild.
 		 */
 
 		head = &pcbgroup->ipg_hashbase[INP_PCBHASH(INADDR_ANY,
 		    lport, 0, pcbgroup->ipg_hashmask)];
 		LIST_FOREACH(inp, head, inp_pcbgrouphash) {
 #ifdef INET6
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr != INADDR_ANY ||
 			    inp->inp_lport != lport)
 				continue;
 
 			injail = prison_flag(inp->inp_cred, PR_IP4);
 			if (injail) {
 				if (prison_check_ip4(inp->inp_cred,
 				    &laddr) != 0)
 					continue;
 			} else {
 				if (local_exact != NULL)
 					continue;
 			}
 
 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
 				if (injail)
 					goto found;
 				else
 					local_exact = inp;
 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 #ifdef INET6
 				/* XXX inp locking, NULL check */
 				if (inp->inp_vflag & INP_IPV6PROTO)
 					local_wild_mapped = inp;
 				else
 #endif
 					if (injail)
 						jail_wild = inp;
 					else
 						local_wild = inp;
 			}
 		} /* LIST_FOREACH */
 
 		inp = jail_wild;
 		if (inp == NULL)
 			inp = local_exact;
 		if (inp == NULL)
 			inp = local_wild;
 #ifdef INET6
 		if (inp == NULL)
 			inp = local_wild_mapped;
 #endif
 		if (inp != NULL)
 			goto found;
 	}
 #endif
 
 	/*
 	 * Then look for a wildcard match, if requested.
 	 */
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		struct inpcb *local_wild = NULL, *local_exact = NULL;
 #ifdef INET6
 		struct inpcb *local_wild_mapped = NULL;
 #endif
 		struct inpcb *jail_wild = NULL;
 		struct inpcbhead *head;
 		int injail;
 
 		/*
 		 * Order of socket selection - we always prefer jails.
 		 *      1. jailed, non-wild.
 		 *      2. jailed, wild.
 		 *      3. non-jailed, non-wild.
 		 *      4. non-jailed, wild.
 		 */
 		head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
 		    0, pcbinfo->ipi_wildmask)];
 		LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
 #ifdef INET6
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr != INADDR_ANY ||
 			    inp->inp_lport != lport)
 				continue;
 
 			injail = prison_flag(inp->inp_cred, PR_IP4);
 			if (injail) {
 				if (prison_check_ip4(inp->inp_cred,
 				    &laddr) != 0)
 					continue;
 			} else {
 				if (local_exact != NULL)
 					continue;
 			}
 
 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
 				if (injail)
 					goto found;
 				else
 					local_exact = inp;
 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 #ifdef INET6
 				/* XXX inp locking, NULL check */
 				if (inp->inp_vflag & INP_IPV6PROTO)
 					local_wild_mapped = inp;
 				else
 #endif
 					if (injail)
 						jail_wild = inp;
 					else
 						local_wild = inp;
 			}
 		} /* LIST_FOREACH */
 		inp = jail_wild;
 		if (inp == NULL)
 			inp = local_exact;
 		if (inp == NULL)
 			inp = local_wild;
 #ifdef INET6
 		if (inp == NULL)
 			inp = local_wild_mapped;
 #endif
 		if (inp != NULL)
 			goto found;
 	} /* if (lookupflags & INPLOOKUP_WILDCARD) */
 	INP_GROUP_UNLOCK(pcbgroup);
 	return (NULL);
 
 found:
 	in_pcbref(inp);
 	INP_GROUP_UNLOCK(pcbgroup);
 	if (lookupflags & INPLOOKUP_WLOCKPCB) {
 		INP_WLOCK(inp);
 		if (in_pcbrele_wlocked(inp))
 			return (NULL);
 	} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
 		INP_RLOCK(inp);
 		if (in_pcbrele_rlocked(inp))
 			return (NULL);
 	} else
 		panic("%s: locking bug", __func__);
 	return (inp);
 }
 #endif /* PCBGROUP */
 
 /*
  * Lookup PCB in hash list, using pcbinfo tables.  This variation assumes
  * that the caller has locked the hash list, and will not perform any further
  * locking or reference operations on either the hash list or the connection.
  */
 static struct inpcb *
 in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport_arg, struct in_addr laddr, u_int lport_arg, int lookupflags,
     struct ifnet *ifp)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp, *tmpinp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	/*
 	 * First look for an exact match.
 	 */
 	tmpinp = NULL;
 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
 	    pcbinfo->ipi_hashmask)];
 	LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
 		    inp->inp_laddr.s_addr == laddr.s_addr &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * XXX We should be able to directly return
 			 * the inp here, without any checks.
 			 * Well unless both bound with SO_REUSEPORT?
 			 */
 			if (prison_flag(inp->inp_cred, PR_IP4))
 				return (inp);
 			if (tmpinp == NULL)
 				tmpinp = inp;
 		}
 	}
 	if (tmpinp != NULL)
 		return (tmpinp);
 
 	/*
 	 * Then look for a wildcard match, if requested.
 	 */
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		struct inpcb *local_wild = NULL, *local_exact = NULL;
 #ifdef INET6
 		struct inpcb *local_wild_mapped = NULL;
 #endif
 		struct inpcb *jail_wild = NULL;
 		int injail;
 
 		/*
 		 * Order of socket selection - we always prefer jails.
 		 *      1. jailed, non-wild.
 		 *      2. jailed, wild.
 		 *      3. non-jailed, non-wild.
 		 *      4. non-jailed, wild.
 		 */
 
 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
 		    0, pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 #ifdef INET6
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV4) == 0)
 				continue;
 #endif
 			if (inp->inp_faddr.s_addr != INADDR_ANY ||
 			    inp->inp_lport != lport)
 				continue;
 
 			injail = prison_flag(inp->inp_cred, PR_IP4);
 			if (injail) {
 				if (prison_check_ip4(inp->inp_cred,
 				    &laddr) != 0)
 					continue;
 			} else {
 				if (local_exact != NULL)
 					continue;
 			}
 
 			if (inp->inp_laddr.s_addr == laddr.s_addr) {
 				if (injail)
 					return (inp);
 				else
 					local_exact = inp;
 			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
 #ifdef INET6
 				/* XXX inp locking, NULL check */
 				if (inp->inp_vflag & INP_IPV6PROTO)
 					local_wild_mapped = inp;
 				else
 #endif
 					if (injail)
 						jail_wild = inp;
 					else
 						local_wild = inp;
 			}
 		} /* LIST_FOREACH */
 		if (jail_wild != NULL)
 			return (jail_wild);
 		if (local_exact != NULL)
 			return (local_exact);
 		if (local_wild != NULL)
 			return (local_wild);
 #ifdef INET6
 		if (local_wild_mapped != NULL)
 			return (local_wild_mapped);
 #endif
 	} /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
 
 	return (NULL);
 }
 
 /*
  * Lookup PCB in hash list, using pcbinfo tables.  This variation locks the
  * hash list lock, and will return the inpcb locked (i.e., requires
  * INPLOOKUP_LOCKPCB).
  */
 static struct inpcb *
 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
     struct ifnet *ifp)
 {
 	struct inpcb *inp;
 
 	INP_HASH_RLOCK(pcbinfo);
 	inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
 	    (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
 	if (inp != NULL) {
 		in_pcbref(inp);
 		INP_HASH_RUNLOCK(pcbinfo);
 		if (lookupflags & INPLOOKUP_WLOCKPCB) {
 			INP_WLOCK(inp);
 			if (in_pcbrele_wlocked(inp))
 				return (NULL);
 		} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
 			INP_RLOCK(inp);
 			if (in_pcbrele_rlocked(inp))
 				return (NULL);
 		} else
 			panic("%s: locking bug", __func__);
 	} else
 		INP_HASH_RUNLOCK(pcbinfo);
 	return (inp);
 }
 
 /*
  * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
  * from which a pre-calculated hash value may be extracted.
  *
  * Possibly more of this logic should be in in_pcbgroup.c.
  */
 struct inpcb *
 in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
     struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
 {
 #if defined(PCBGROUP) && !defined(RSS)
 	struct inpcbgroup *pcbgroup;
 #endif
 
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
 	/*
 	 * When not using RSS, use connection groups in preference to the
 	 * reservation table when looking up 4-tuples.  When using RSS, just
 	 * use the reservation table, due to the cost of the Toeplitz hash
 	 * in software.
 	 *
 	 * XXXRW: This policy belongs in the pcbgroup code, as in principle
 	 * we could be doing RSS with a non-Toeplitz hash that is affordable
 	 * in software.
 	 */
 #if defined(PCBGROUP) && !defined(RSS)
 	if (in_pcbgroup_enabled(pcbinfo)) {
 		pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
 		    fport);
 		return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
 		    laddr, lport, lookupflags, ifp));
 	}
 #endif
 	return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, ifp));
 }
 
 struct inpcb *
 in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
     struct ifnet *ifp, struct mbuf *m)
 {
 #ifdef PCBGROUP
 	struct inpcbgroup *pcbgroup;
 #endif
 
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
 #ifdef PCBGROUP
 	/*
 	 * If we can use a hardware-generated hash to look up the connection
 	 * group, use that connection group to find the inpcb.  Otherwise
 	 * fall back on a software hash -- or the reservation table if we're
 	 * using RSS.
 	 *
 	 * XXXRW: As above, that policy belongs in the pcbgroup code.
 	 */
 	if (in_pcbgroup_enabled(pcbinfo) &&
 	    !(M_HASHTYPE_TEST(m, M_HASHTYPE_NONE))) {
 		pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
 		    m->m_pkthdr.flowid);
 		if (pcbgroup != NULL)
 			return (in_pcblookup_group(pcbinfo, pcbgroup, faddr,
 			    fport, laddr, lport, lookupflags, ifp));
 #ifndef RSS
 		pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
 		    fport);
 		return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
 		    laddr, lport, lookupflags, ifp));
 #endif
 	}
 #endif
 	return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, ifp));
 }
 #endif /* INET */
 
 /*
  * Insert PCB onto various hash lists.
  */
 static int
 in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
 {
 	struct inpcbhead *pcbhash;
 	struct inpcbporthead *pcbporthash;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbport *phd;
 	u_int32_t hashkey_faddr;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	KASSERT((inp->inp_flags & INP_INHASHLIST) == 0,
 	    ("in_pcbinshash: INP_INHASHLIST"));
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
 	else
 #endif
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
 		 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
 
 	pcbporthash = &pcbinfo->ipi_porthashbase[
 	    INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
 
 	/*
 	 * Go through port list and look for a head for this lport.
 	 */
 	LIST_FOREACH(phd, pcbporthash, phd_hash) {
 		if (phd->phd_port == inp->inp_lport)
 			break;
 	}
 	/*
 	 * If none exists, malloc one and tack it on.
 	 */
 	if (phd == NULL) {
 		phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT);
 		if (phd == NULL) {
 			return (ENOBUFS); /* XXX */
 		}
 		phd->phd_port = inp->inp_lport;
 		LIST_INIT(&phd->phd_pcblist);
 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
 	}
 	inp->inp_phd = phd;
 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 	inp->inp_flags |= INP_INHASHLIST;
 #ifdef PCBGROUP
 	if (do_pcbgroup_update)
 		in_pcbgroup_update(inp);
 #endif
 	return (0);
 }
 
 /*
  * For now, there are two public interfaces to insert an inpcb into the hash
  * lists -- one that does update pcbgroups, and one that doesn't.  The latter
  * is used only in the TCP syncache, where in_pcbinshash is called before the
  * full 4-tuple is set for the inpcb, and we don't want to install in the
  * pcbgroup until later.
  *
  * XXXRW: This seems like a misfeature.  in_pcbinshash should always update
  * connection groups, and partially initialised inpcbs should not be exposed
  * to either reservation hash tables or pcbgroups.
  */
 int
 in_pcbinshash(struct inpcb *inp)
 {
 
 	return (in_pcbinshash_internal(inp, 1));
 }
 
 int
 in_pcbinshash_nopcbgroup(struct inpcb *inp)
 {
 
 	return (in_pcbinshash_internal(inp, 0));
 }
 
 /*
  * Move PCB to the proper hash bucket when { faddr, fport } have  been
  * changed. NOTE: This does not handle the case of the lport changing (the
  * hashed port list would have to be updated as well), so the lport must
  * not change after in_pcbinshash() has been called.
  */
 void
 in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbhead *head;
 	u_int32_t hashkey_faddr;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	KASSERT(inp->inp_flags & INP_INHASHLIST,
 	    ("in_pcbrehash: !INP_INHASHLIST"));
 
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6)
 		hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr);
 	else
 #endif
 	hashkey_faddr = inp->inp_faddr.s_addr;
 
 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
 		inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
 
 	LIST_REMOVE(inp, inp_hash);
 	LIST_INSERT_HEAD(head, inp, inp_hash);
 
 #ifdef PCBGROUP
 	if (m != NULL)
 		in_pcbgroup_update_mbuf(inp, m);
 	else
 		in_pcbgroup_update(inp);
 #endif
 }
 
 void
 in_pcbrehash(struct inpcb *inp)
 {
 
 	in_pcbrehash_mbuf(inp, NULL);
 }
 
 /*
  * Remove PCB from various lists.
  */
 static void
 in_pcbremlists(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 
 #ifdef INVARIANTS
 	if (pcbinfo == &V_tcbinfo) {
 		INP_INFO_RLOCK_ASSERT(pcbinfo);
 	} else {
 		INP_INFO_WLOCK_ASSERT(pcbinfo);
 	}
 #endif
 
 	INP_WLOCK_ASSERT(inp);
 	INP_LIST_WLOCK_ASSERT(pcbinfo);
 
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	if (inp->inp_flags & INP_INHASHLIST) {
 		struct inpcbport *phd = inp->inp_phd;
 
 		INP_HASH_WLOCK(pcbinfo);
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
 			LIST_REMOVE(phd, phd_hash);
 			free(phd, M_PCB);
 		}
 		INP_HASH_WUNLOCK(pcbinfo);
 		inp->inp_flags &= ~INP_INHASHLIST;
 	}
 	LIST_REMOVE(inp, inp_list);
 	pcbinfo->ipi_count--;
 #ifdef PCBGROUP
 	in_pcbgroup_remove(inp);
 #endif
 }
 
 /*
  * A set label operation has occurred at the socket layer, propagate the
  * label change into the in_pcb for the socket.
  */
 void
 in_pcbsosetlabel(struct socket *so)
 {
 #ifdef MAC
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
 
 	INP_WLOCK(inp);
 	SOCK_LOCK(so);
 	mac_inpcb_sosetlabel(so, inp);
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 #endif
 }
 
 /*
  * ipport_tick runs once per second, determining if random port allocation
  * should be continued.  If more than ipport_randomcps ports have been
  * allocated in the last second, then we return to sequential port
  * allocation. We return to random allocation only once we drop below
  * ipport_randomcps for at least ipport_randomtime seconds.
  */
 static void
 ipport_tick(void *xtp)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);	/* XXX appease INVARIANTS here */
 		if (V_ipport_tcpallocs <=
 		    V_ipport_tcplastcount + V_ipport_randomcps) {
 			if (V_ipport_stoprandom > 0)
 				V_ipport_stoprandom--;
 		} else
 			V_ipport_stoprandom = V_ipport_randomtime;
 		V_ipport_tcplastcount = V_ipport_tcpallocs;
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 	callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
 }
 
 static void
 ip_fini(void *xtp)
 {
 
 	callout_stop(&ipport_tick_callout);
 }
 
 /* 
  * The ipport_callout should start running at about the time we attach the
  * inet or inet6 domains.
  */
 static void
 ipport_tick_init(const void *unused __unused)
 {
 
 	/* Start ipport_tick. */
 	callout_init(&ipport_tick_callout, 1);
 	callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 }
 SYSINIT(ipport_tick_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, 
     ipport_tick_init, NULL);
 
 void
 inp_wlock(struct inpcb *inp)
 {
 
 	INP_WLOCK(inp);
 }
 
 void
 inp_wunlock(struct inpcb *inp)
 {
 
 	INP_WUNLOCK(inp);
 }
 
 void
 inp_rlock(struct inpcb *inp)
 {
 
 	INP_RLOCK(inp);
 }
 
 void
 inp_runlock(struct inpcb *inp)
 {
 
 	INP_RUNLOCK(inp);
 }
 
 #ifdef INVARIANTS
 void
 inp_lock_assert(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 }
 
 void
 inp_unlock_assert(struct inpcb *inp)
 {
 
 	INP_UNLOCK_ASSERT(inp);
 }
 #endif
 
 void
 inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
 {
 	struct inpcb *inp;
 
 	INP_INFO_WLOCK(&V_tcbinfo);
 	LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
 		INP_WLOCK(inp);
 		func(inp, arg);
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 }
 
 struct socket *
 inp_inpcbtosocket(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	return (inp->inp_socket);
 }
 
 struct tcpcb *
 inp_inpcbtotcpcb(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	return ((struct tcpcb *)inp->inp_ppcb);
 }
 
 int
 inp_ip_tos_get(const struct inpcb *inp)
 {
 
 	return (inp->inp_ip_tos);
 }
 
 void
 inp_ip_tos_set(struct inpcb *inp, int val)
 {
 
 	inp->inp_ip_tos = val;
 }
 
 void
 inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
     uint32_t *faddr, uint16_t *fp)
 {
 
 	INP_LOCK_ASSERT(inp);
 	*laddr = inp->inp_laddr.s_addr;
 	*faddr = inp->inp_faddr.s_addr;
 	*lp = inp->inp_lport;
 	*fp = inp->inp_fport;
 }
 
 struct inpcb *
 so_sotoinpcb(struct socket *so)
 {
 
 	return (sotoinpcb(so));
 }
 
 struct tcpcb *
 so_sototcpcb(struct socket *so)
 {
 
 	return (sototcpcb(so));
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
 {
 	char faddr_str[48], laddr_str[48];
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, inc);
 
 	indent += 2;
 
 #ifdef INET6
 	if (inc->inc_flags & INC_ISIPV6) {
 		/* IPv6. */
 		ip6_sprintf(laddr_str, &inc->inc6_laddr);
 		ip6_sprintf(faddr_str, &inc->inc6_faddr);
 	} else
 #endif
 	{
 		/* IPv4. */
 		inet_ntoa_r(inc->inc_laddr, laddr_str);
 		inet_ntoa_r(inc->inc_faddr, faddr_str);
 	}
 	db_print_indent(indent);
 	db_printf("inc_laddr %s   inc_lport %u\n", laddr_str,
 	    ntohs(inc->inc_lport));
 	db_print_indent(indent);
 	db_printf("inc_faddr %s   inc_fport %u\n", faddr_str,
 	    ntohs(inc->inc_fport));
 }
 
 static void
 db_print_inpflags(int inp_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (inp_flags & INP_RECVOPTS) {
 		db_printf("%sINP_RECVOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVRETOPTS) {
 		db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVDSTADDR) {
 		db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_HDRINCL) {
 		db_printf("%sINP_HDRINCL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_HIGHPORT) {
 		db_printf("%sINP_HIGHPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_LOWPORT) {
 		db_printf("%sINP_LOWPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_ANONPORT) {
 		db_printf("%sINP_ANONPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVIF) {
 		db_printf("%sINP_RECVIF", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_MTUDISC) {
 		db_printf("%sINP_MTUDISC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVTTL) {
 		db_printf("%sINP_RECVTTL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_DONTFRAG) {
 		db_printf("%sINP_DONTFRAG", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_RECVTOS) {
 		db_printf("%sINP_RECVTOS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_IPV6_V6ONLY) {
 		db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_PKTINFO) {
 		db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_HOPLIMIT) {
 		db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_HOPOPTS) {
 		db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_DSTOPTS) {
 		db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_RTHDR) {
 		db_printf("%sIN6P_RTHDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_RTHDRDSTOPTS) {
 		db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_TCLASS) {
 		db_printf("%sIN6P_TCLASS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_AUTOFLOWLABEL) {
 		db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & INP_TIMEWAIT) {
 		db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & INP_ONESBCAST) {
 		db_printf("%sINP_ONESBCAST", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & INP_DROPPED) {
 		db_printf("%sINP_DROPPED", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & INP_SOCKREF) {
 		db_printf("%sINP_SOCKREF", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_flags & IN6P_RFC2292) {
 		db_printf("%sIN6P_RFC2292", comma ? ", " : "");
 		comma = 1;
 	}
 	if (inp_flags & IN6P_MTU) {
 		db_printf("IN6P_MTU%s", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_inpvflag(u_char inp_vflag)
 {
 	int comma;
 
 	comma = 0;
 	if (inp_vflag & INP_IPV4) {
 		db_printf("%sINP_IPV4", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_IPV6) {
 		db_printf("%sINP_IPV6", comma ? ", " : "");
 		comma  = 1;
 	}
 	if (inp_vflag & INP_IPV6PROTO) {
 		db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
 		comma  = 1;
 	}
 }
 
 static void
 db_print_inpcb(struct inpcb *inp, const char *name, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, inp);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("inp_flow: 0x%x\n", inp->inp_flow);
 
 	db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
 
 	db_print_indent(indent);
 	db_printf("inp_ppcb: %p   inp_pcbinfo: %p   inp_socket: %p\n",
 	    inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
 
 	db_print_indent(indent);
 	db_printf("inp_label: %p   inp_flags: 0x%x (",
 	   inp->inp_label, inp->inp_flags);
 	db_print_inpflags(inp->inp_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("inp_sp: %p   inp_vflag: 0x%x (", inp->inp_sp,
 	    inp->inp_vflag);
 	db_print_inpvflag(inp->inp_vflag);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("inp_ip_ttl: %d   inp_ip_p: %d   inp_ip_minttl: %d\n",
 	    inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
 
 	db_print_indent(indent);
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6) {
 		db_printf("in6p_options: %p   in6p_outputopts: %p   "
 		    "in6p_moptions: %p\n", inp->in6p_options,
 		    inp->in6p_outputopts, inp->in6p_moptions);
 		db_printf("in6p_icmp6filt: %p   in6p_cksum %d   "
 		    "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
 		    inp->in6p_hops);
 	} else
 #endif
 	{
 		db_printf("inp_ip_tos: %d   inp_ip_options: %p   "
 		    "inp_ip_moptions: %p\n", inp->inp_ip_tos,
 		    inp->inp_options, inp->inp_moptions);
 	}
 
 	db_print_indent(indent);
 	db_printf("inp_phd: %p   inp_gencnt: %ju\n", inp->inp_phd,
 	    (uintmax_t)inp->inp_gencnt);
 }
 
 DB_SHOW_COMMAND(inpcb, db_show_inpcb)
 {
 	struct inpcb *inp;
 
 	if (!have_addr) {
 		db_printf("usage: show inpcb <addr>\n");
 		return;
 	}
 	inp = (struct inpcb *)addr;
 
 	db_print_inpcb(inp, "inpcb", 0);
 }
 #endif /* DDB */
Index: projects/clang380-import/sys/netinet/ip_encap.c
===================================================================
--- projects/clang380-import/sys/netinet/ip_encap.c	(revision 293686)
+++ projects/clang380-import/sys/netinet/ip_encap.c	(revision 293687)
@@ -1,477 +1,479 @@
 /*	$KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*
  * My grandfather said that there's a devil inside tunnelling technology...
  *
  * We have surprisingly many protocols that want packets with IP protocol
  * #4 or #41.  Here's a list of protocols that want protocol #41:
  *	RFC1933 configured tunnel
  *	RFC1933 automatic tunnel
  *	RFC2401 IPsec tunnel
  *	RFC2473 IPv6 generic packet tunnelling
  *	RFC2529 6over4 tunnel
  *	mobile-ip6 (uses RFC2473)
  *	RFC3056 6to4 tunnel
  *	isatap tunnel
  * Here's a list of protocol that want protocol #4:
  *	RFC1853 IPv4-in-IPv4 tunnelling
  *	RFC2003 IPv4 encapsulation within IPv4
  *	RFC2344 reverse tunnelling for mobile-ip4
  *	RFC2401 IPsec tunnel
  * Well, what can I say.  They impose different en/decapsulation mechanism
  * from each other, so they need separate protocol handler.  The only one
  * we can easily determine by protocol # is IPsec, which always has
  * AH/ESP/IPComp header right after outer IP header.
  *
  * So, clearly good old protosw does not work for protocol #4 and #41.
  * The code will let you match protocol via src/dst address pair.
  */
 /* XXX is M_NETADDR correct? */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_mrouting.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_encap.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
 
 #include <machine/stdarg.h>
 
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address structure");
 
 static void encap_add(struct encaptab *);
 static int mask_match(const struct encaptab *, const struct sockaddr *,
 		const struct sockaddr *);
 static void encap_fillarg(struct mbuf *, void *);
 
 /*
  * All global variables in ip_encap.c are locked using encapmtx.
  */
 static struct mtx encapmtx;
 MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF);
 static LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(encaptab);
 
 /*
  * We currently keey encap_init() for source code compatibility reasons --
  * it's referenced by KAME pieces in netinet6.
  */
 void
 encap_init(void)
 {
 }
 
 #ifdef INET
 int
 encap4_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ip *ip;
 	struct mbuf *m;
 	struct sockaddr_in s, d;
 	const struct protosw *psw;
 	struct encaptab *ep, *match;
 	void *arg;
 	int matchprio, off, prio;
 
 	m = *mp;
 	off = *offp;
 	ip = mtod(m, struct ip *);
 
 	bzero(&s, sizeof(s));
 	s.sin_family = AF_INET;
 	s.sin_len = sizeof(struct sockaddr_in);
 	s.sin_addr = ip->ip_src;
 	bzero(&d, sizeof(d));
 	d.sin_family = AF_INET;
 	d.sin_len = sizeof(struct sockaddr_in);
 	d.sin_addr = ip->ip_dst;
 
 	arg = NULL;
 	psw = NULL;
 	match = NULL;
 	matchprio = 0;
 	mtx_lock(&encapmtx);
 	LIST_FOREACH(ep, &encaptab, chain) {
 		if (ep->af != AF_INET)
 			continue;
 		if (ep->proto >= 0 && ep->proto != proto)
 			continue;
 		if (ep->func)
 			prio = (*ep->func)(m, off, proto, ep->arg);
 		else {
 			/*
 			 * it's inbound traffic, we need to match in reverse
 			 * order
 			 */
 			prio = mask_match(ep, (struct sockaddr *)&d,
 			    (struct sockaddr *)&s);
 		}
 
 		/*
 		 * We prioritize the matches by using bit length of the
 		 * matches.  mask_match() and user-supplied matching function
 		 * should return the bit length of the matches (for example,
 		 * if both src/dst are matched for IPv4, 64 should be returned).
 		 * 0 or negative return value means "it did not match".
 		 *
 		 * The question is, since we have two "mask" portion, we
 		 * cannot really define total order between entries.
 		 * For example, which of these should be preferred?
 		 * mask_match() returns 48 (32 + 16) for both of them.
 		 *	src=3ffe::/16, dst=3ffe:501::/32
 		 *	src=3ffe:501::/32, dst=3ffe::/16
 		 *
 		 * We need to loop through all the possible candidates
 		 * to get the best match - the search takes O(n) for
 		 * n attachments (i.e. interfaces).
 		 */
 		if (prio <= 0)
 			continue;
 		if (prio > matchprio) {
 			matchprio = prio;
 			match = ep;
 		}
 	}
 	if (match != NULL) {
 		psw = match->psw;
 		arg = match->arg;
 	}
 	mtx_unlock(&encapmtx);
 
 	if (match != NULL) {
 		/* found a match, "match" has the best one */
 		if (psw != NULL && psw->pr_input != NULL) {
 			encap_fillarg(m, arg);
 			(*psw->pr_input)(mp, offp, proto);
 		} else
 			m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	/* last resort: inject to raw socket */
 	return (rip_input(mp, offp, proto));
 }
 #endif
 
 #ifdef INET6
 int
 encap6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6;
 	struct sockaddr_in6 s, d;
 	const struct protosw *psw;
 	struct encaptab *ep, *match;
 	void *arg;
 	int prio, matchprio;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	bzero(&s, sizeof(s));
 	s.sin6_family = AF_INET6;
 	s.sin6_len = sizeof(struct sockaddr_in6);
 	s.sin6_addr = ip6->ip6_src;
 	bzero(&d, sizeof(d));
 	d.sin6_family = AF_INET6;
 	d.sin6_len = sizeof(struct sockaddr_in6);
 	d.sin6_addr = ip6->ip6_dst;
 
 	arg = NULL;
 	psw = NULL;
 	match = NULL;
 	matchprio = 0;
 	mtx_lock(&encapmtx);
 	LIST_FOREACH(ep, &encaptab, chain) {
 		if (ep->af != AF_INET6)
 			continue;
 		if (ep->proto >= 0 && ep->proto != proto)
 			continue;
 		if (ep->func)
 			prio = (*ep->func)(m, *offp, proto, ep->arg);
 		else {
 			/*
 			 * it's inbound traffic, we need to match in reverse
 			 * order
 			 */
 			prio = mask_match(ep, (struct sockaddr *)&d,
 			    (struct sockaddr *)&s);
 		}
 
 		/* see encap4_input() for issues here */
 		if (prio <= 0)
 			continue;
 		if (prio > matchprio) {
 			matchprio = prio;
 			match = ep;
 		}
 	}
 	if (match != NULL) {
 		psw = match->psw;
 		arg = match->arg;
 	}
 	mtx_unlock(&encapmtx);
 
 	if (match != NULL) {
 		/* found a match */
 		if (psw != NULL && psw->pr_input != NULL) {
 			encap_fillarg(m, arg);
 			return (*psw->pr_input)(mp, offp, proto);
 		} else {
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 
 	/* last resort: inject to raw socket */
 	return rip6_input(mp, offp, proto);
 }
 #endif
 
 /*lint -sem(encap_add, custodial(1)) */
 static void
 encap_add(struct encaptab *ep)
 {
 
 	mtx_assert(&encapmtx, MA_OWNED);
 	LIST_INSERT_HEAD(&encaptab, ep, chain);
 }
 
 /*
  * sp (src ptr) is always my side, and dp (dst ptr) is always remote side.
  * length of mask (sm and dm) is assumed to be same as sp/dp.
  * Return value will be necessary as input (cookie) for encap_detach().
  */
 const struct encaptab *
 encap_attach(int af, int proto, const struct sockaddr *sp,
     const struct sockaddr *sm, const struct sockaddr *dp,
     const struct sockaddr *dm, const struct protosw *psw, void *arg)
 {
 	struct encaptab *ep;
 
 	/* sanity check on args */
 	if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst))
 		return (NULL);
 	if (sp->sa_len != dp->sa_len)
 		return (NULL);
 	if (af != sp->sa_family || af != dp->sa_family)
 		return (NULL);
 
 	/* check if anyone have already attached with exactly same config */
 	mtx_lock(&encapmtx);
 	LIST_FOREACH(ep, &encaptab, chain) {
 		if (ep->af != af)
 			continue;
 		if (ep->proto != proto)
 			continue;
 		if (ep->src.ss_len != sp->sa_len ||
 		    bcmp(&ep->src, sp, sp->sa_len) != 0 ||
 		    bcmp(&ep->srcmask, sm, sp->sa_len) != 0)
 			continue;
 		if (ep->dst.ss_len != dp->sa_len ||
 		    bcmp(&ep->dst, dp, dp->sa_len) != 0 ||
 		    bcmp(&ep->dstmask, dm, dp->sa_len) != 0)
 			continue;
 
 		mtx_unlock(&encapmtx);
 		return (NULL);
 	}
 
 	ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT);	/*XXX*/
 	if (ep == NULL) {
 		mtx_unlock(&encapmtx);
 		return (NULL);
 	}
 	bzero(ep, sizeof(*ep));
 
 	ep->af = af;
 	ep->proto = proto;
 	bcopy(sp, &ep->src, sp->sa_len);
 	bcopy(sm, &ep->srcmask, sp->sa_len);
 	bcopy(dp, &ep->dst, dp->sa_len);
 	bcopy(dm, &ep->dstmask, dp->sa_len);
 	ep->psw = psw;
 	ep->arg = arg;
 
 	encap_add(ep);
 	mtx_unlock(&encapmtx);
 	return (ep);
 }
 
 const struct encaptab *
 encap_attach_func(int af, int proto,
     int (*func)(const struct mbuf *, int, int, void *),
     const struct protosw *psw, void *arg)
 {
 	struct encaptab *ep;
 
 	/* sanity check on args */
 	if (!func)
 		return (NULL);
 
 	ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT);	/*XXX*/
 	if (ep == NULL)
 		return (NULL);
 	bzero(ep, sizeof(*ep));
 
 	ep->af = af;
 	ep->proto = proto;
 	ep->func = func;
 	ep->psw = psw;
 	ep->arg = arg;
 
 	mtx_lock(&encapmtx);
 	encap_add(ep);
 	mtx_unlock(&encapmtx);
 	return (ep);
 }
 
 int
 encap_detach(const struct encaptab *cookie)
 {
 	const struct encaptab *ep = cookie;
 	struct encaptab *p;
 
 	mtx_lock(&encapmtx);
 	LIST_FOREACH(p, &encaptab, chain) {
 		if (p == ep) {
 			LIST_REMOVE(p, chain);
 			mtx_unlock(&encapmtx);
 			free(p, M_NETADDR);	/*XXX*/
 			return 0;
 		}
 	}
 	mtx_unlock(&encapmtx);
 
 	return EINVAL;
 }
 
 static int
 mask_match(const struct encaptab *ep, const struct sockaddr *sp,
     const struct sockaddr *dp)
 {
 	struct sockaddr_storage s;
 	struct sockaddr_storage d;
 	int i;
 	const u_int8_t *p, *q;
 	u_int8_t *r;
 	int matchlen;
 
 	if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d))
 		return 0;
 	if (sp->sa_family != ep->af || dp->sa_family != ep->af)
 		return 0;
 	if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len)
 		return 0;
 
 	matchlen = 0;
 
 	p = (const u_int8_t *)sp;
 	q = (const u_int8_t *)&ep->srcmask;
 	r = (u_int8_t *)&s;
 	for (i = 0 ; i < sp->sa_len; i++) {
 		r[i] = p[i] & q[i];
 		/* XXX estimate */
 		matchlen += (q[i] ? 8 : 0);
 	}
 
 	p = (const u_int8_t *)dp;
 	q = (const u_int8_t *)&ep->dstmask;
 	r = (u_int8_t *)&d;
 	for (i = 0 ; i < dp->sa_len; i++) {
 		r[i] = p[i] & q[i];
 		/* XXX rough estimate */
 		matchlen += (q[i] ? 8 : 0);
 	}
 
 	/* need to overwrite len/family portion as we don't compare them */
 	s.ss_len = sp->sa_len;
 	s.ss_family = sp->sa_family;
 	d.ss_len = dp->sa_len;
 	d.ss_family = dp->sa_family;
 
 	if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 &&
 	    bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) {
 		return matchlen;
 	} else
 		return 0;
 }
 
 static void
 encap_fillarg(struct mbuf *m, void *arg)
 {
 	struct m_tag *tag;
 
 	if (arg != NULL) {
 		tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT);
 		if (tag != NULL) {
 			*(void**)(tag+1) = arg;
 			m_tag_prepend(m, tag);
 		}
 	}
 }
 
 void *
 encap_getarg(struct mbuf *m)
 {
 	void *p = NULL;
 	struct m_tag *tag;
 
 	tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL);
 	if (tag) {
 		p = *(void**)(tag+1);
 		m_tag_delete(m, tag);
 	}
 	return p;
 }
Index: projects/clang380-import/sys/netinet/ip_mroute.c
===================================================================
--- projects/clang380-import/sys/netinet/ip_mroute.c	(revision 293686)
+++ projects/clang380-import/sys/netinet/ip_mroute.c	(revision 293687)
@@ -1,2948 +1,2949 @@
 /*-
  * Copyright (c) 1989 Stephen Deering
  * Copyright (c) 1992, 1993
  *      The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Stephen Deering of Stanford University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
  */
 
 /*
  * IP multicast forwarding procedures
  *
  * Written by David Waitzman, BBN Labs, August 1988.
  * Modified by Steve Deering, Stanford, February 1989.
  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  * Modified by Van Jacobson, LBL, January 1993
  * Modified by Ajit Thyagarajan, PARC, August 1993
  * Modified by Bill Fenner, PARC, April 1995
  * Modified by Ahmed Helmy, SGI, June 1996
  * Modified by George Edmond Eddy (Rusty), ISI, February 1998
  * Modified by Pavlin Radoslavov, USC/ISI, May 1998, August 1999, October 2000
  * Modified by Hitoshi Asaeda, WIDE, August 2000
  * Modified by Pavlin Radoslavov, ICSI, October 2002
  *
  * MROUTING Revision: 3.5
  * and PIM-SMv2 and PIM-DM support, advanced API support,
  * bandwidth metering and signaling
  */
 
 /*
  * TODO: Prefix functions with ipmf_.
  * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol
  * domain attachment (if_afdata) so we can track consumers of that service.
  * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT,
  * move it to socket options.
  * TODO: Cleanup LSRR removal further.
  * TODO: Push RSVP stubs into raw_ip.c.
  * TODO: Use bitstring.h for vif set.
  * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded.
  * TODO: Sync ip6_mroute.c with this file.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_mrouting.h"
 
 #define _PIM_VT 1
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/stddef.h>
+#include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 #include <sys/counter.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/igmp.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_encap.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/pim.h>
 #include <netinet/pim_var.h>
 #include <netinet/udp.h>
 
 #include <machine/in_cksum.h>
 
 #ifndef KTR_IPMF
 #define KTR_IPMF KTR_INET
 #endif
 
 #define		VIFI_INVALID	((vifi_t) -1)
 
 static VNET_DEFINE(uint32_t, last_tv_sec); /* last time we processed this */
 #define	V_last_tv_sec	VNET(last_tv_sec)
 
 static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache");
 
 /*
  * Locking.  We use two locks: one for the virtual interface table and
  * one for the forwarding table.  These locks may be nested in which case
  * the VIF lock must always be taken first.  Note that each lock is used
  * to cover not only the specific data structure but also related data
  * structures.
  */
 
 static struct mtx mrouter_mtx;
 #define	MROUTER_LOCK()		mtx_lock(&mrouter_mtx)
 #define	MROUTER_UNLOCK()	mtx_unlock(&mrouter_mtx)
 #define	MROUTER_LOCK_ASSERT()	mtx_assert(&mrouter_mtx, MA_OWNED)
 #define	MROUTER_LOCK_INIT()						\
 	mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF)
 #define	MROUTER_LOCK_DESTROY()	mtx_destroy(&mrouter_mtx)
 
 static int ip_mrouter_cnt;	/* # of vnets with active mrouters */
 static int ip_mrouter_unloading; /* Allow no more V_ip_mrouter sockets */
 
 static VNET_PCPUSTAT_DEFINE(struct mrtstat, mrtstat);
 VNET_PCPUSTAT_SYSINIT(mrtstat);
 VNET_PCPUSTAT_SYSUNINIT(mrtstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_ip, OID_AUTO, mrtstat, struct mrtstat,
     mrtstat, "IPv4 Multicast Forwarding Statistics (struct mrtstat, "
     "netinet/ip_mroute.h)");
 
 static VNET_DEFINE(u_long, mfchash);
 #define	V_mfchash		VNET(mfchash)
 #define	MFCHASH(a, g)							\
 	((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \
 	  ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & V_mfchash)
 #define	MFCHASHSIZE	256
 
 static u_long mfchashsize;			/* Hash size */
 static VNET_DEFINE(u_char *, nexpire);		/* 0..mfchashsize-1 */
 #define	V_nexpire		VNET(nexpire)
 static VNET_DEFINE(LIST_HEAD(mfchashhdr, mfc)*, mfchashtbl);
 #define	V_mfchashtbl		VNET(mfchashtbl)
 
 static struct mtx mfc_mtx;
 #define	MFC_LOCK()		mtx_lock(&mfc_mtx)
 #define	MFC_UNLOCK()		mtx_unlock(&mfc_mtx)
 #define	MFC_LOCK_ASSERT()	mtx_assert(&mfc_mtx, MA_OWNED)
 #define	MFC_LOCK_INIT()							\
 	mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF)
 #define	MFC_LOCK_DESTROY()	mtx_destroy(&mfc_mtx)
 
 static VNET_DEFINE(vifi_t, numvifs);
 #define	V_numvifs		VNET(numvifs)
 static VNET_DEFINE(struct vif, viftable[MAXVIFS]);
 #define	V_viftable		VNET(viftable)
 SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(viftable), sizeof(V_viftable), "S,vif[MAXVIFS]",
     "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)");
 
 static struct mtx vif_mtx;
 #define	VIF_LOCK()		mtx_lock(&vif_mtx)
 #define	VIF_UNLOCK()		mtx_unlock(&vif_mtx)
 #define	VIF_LOCK_ASSERT()	mtx_assert(&vif_mtx, MA_OWNED)
 #define	VIF_LOCK_INIT()							\
 	mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF)
 #define	VIF_LOCK_DESTROY()	mtx_destroy(&vif_mtx)
 
 static eventhandler_tag if_detach_event_tag = NULL;
 
 static VNET_DEFINE(struct callout, expire_upcalls_ch);
 #define	V_expire_upcalls_ch	VNET(expire_upcalls_ch)
 
 #define		EXPIRE_TIMEOUT	(hz / 4)	/* 4x / second		*/
 #define		UPCALL_EXPIRE	6		/* number of timeouts	*/
 
 /*
  * Bandwidth meter variables and constants
  */
 static MALLOC_DEFINE(M_BWMETER, "bwmeter", "multicast upcall bw meters");
 /*
  * Pending timeouts are stored in a hash table, the key being the
  * expiration time. Periodically, the entries are analysed and processed.
  */
 #define	BW_METER_BUCKETS	1024
 static VNET_DEFINE(struct bw_meter*, bw_meter_timers[BW_METER_BUCKETS]);
 #define	V_bw_meter_timers	VNET(bw_meter_timers)
 static VNET_DEFINE(struct callout, bw_meter_ch);
 #define	V_bw_meter_ch		VNET(bw_meter_ch)
 #define	BW_METER_PERIOD (hz)		/* periodical handling of bw meters */
 
 /*
  * Pending upcalls are stored in a vector which is flushed when
  * full, or periodically
  */
 static VNET_DEFINE(struct bw_upcall, bw_upcalls[BW_UPCALLS_MAX]);
 #define	V_bw_upcalls		VNET(bw_upcalls)
 static VNET_DEFINE(u_int, bw_upcalls_n); /* # of pending upcalls */
 #define	V_bw_upcalls_n    	VNET(bw_upcalls_n)
 static VNET_DEFINE(struct callout, bw_upcalls_ch);
 #define	V_bw_upcalls_ch		VNET(bw_upcalls_ch)
 
 #define BW_UPCALLS_PERIOD (hz)		/* periodical flush of bw upcalls */
 
 static VNET_PCPUSTAT_DEFINE(struct pimstat, pimstat);
 VNET_PCPUSTAT_SYSINIT(pimstat);
 VNET_PCPUSTAT_SYSUNINIT(pimstat);
 
 SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
 SYSCTL_VNET_PCPUSTAT(_net_inet_pim, PIMCTL_STATS, stats, struct pimstat,
     pimstat, "PIM Statistics (struct pimstat, netinet/pim_var.h)");
 
 static u_long	pim_squelch_wholepkt = 0;
 SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW,
     &pim_squelch_wholepkt, 0,
     "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified");
 
 extern  struct domain inetdomain;
 static const struct protosw in_pim_protosw = {
 	.pr_type =		SOCK_RAW,
 	.pr_domain =		&inetdomain,
 	.pr_protocol =		IPPROTO_PIM,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_LASTHDR,
 	.pr_input =		pim_input,
 	.pr_output =		rip_output,
 	.pr_ctloutput =		rip_ctloutput,
 	.pr_usrreqs =		&rip_usrreqs
 };
 static const struct encaptab *pim_encap_cookie;
 
 static int pim_encapcheck(const struct mbuf *, int, int, void *);
 
 /*
  * Note: the PIM Register encapsulation adds the following in front of a
  * data packet:
  *
  * struct pim_encap_hdr {
  *    struct ip ip;
  *    struct pim_encap_pimhdr  pim;
  * }
  *
  */
 
 struct pim_encap_pimhdr {
 	struct pim pim;
 	uint32_t   flags;
 };
 #define		PIM_ENCAP_TTL	64
 
 static struct ip pim_encap_iphdr = {
 #if BYTE_ORDER == LITTLE_ENDIAN
 	sizeof(struct ip) >> 2,
 	IPVERSION,
 #else
 	IPVERSION,
 	sizeof(struct ip) >> 2,
 #endif
 	0,			/* tos */
 	sizeof(struct ip),	/* total length */
 	0,			/* id */
 	0,			/* frag offset */
 	PIM_ENCAP_TTL,
 	IPPROTO_PIM,
 	0,			/* checksum */
 };
 
 static struct pim_encap_pimhdr pim_encap_pimhdr = {
     {
 	PIM_MAKE_VT(PIM_VERSION, PIM_REGISTER), /* PIM vers and message type */
 	0,			/* reserved */
 	0,			/* checksum */
     },
     0				/* flags */
 };
 
 static VNET_DEFINE(vifi_t, reg_vif_num) = VIFI_INVALID;
 #define	V_reg_vif_num		VNET(reg_vif_num)
 static VNET_DEFINE(struct ifnet, multicast_register_if);
 #define	V_multicast_register_if	VNET(multicast_register_if)
 
 /*
  * Private variables.
  */
 
 static u_long	X_ip_mcast_src(int);
 static int	X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *,
 		    struct ip_moptions *);
 static int	X_ip_mrouter_done(void);
 static int	X_ip_mrouter_get(struct socket *, struct sockopt *);
 static int	X_ip_mrouter_set(struct socket *, struct sockopt *);
 static int	X_legal_vif_num(int);
 static int	X_mrt_ioctl(u_long, caddr_t, int);
 
 static int	add_bw_upcall(struct bw_upcall *);
 static int	add_mfc(struct mfcctl2 *);
 static int	add_vif(struct vifctl *);
 static void	bw_meter_prepare_upcall(struct bw_meter *, struct timeval *);
 static void	bw_meter_process(void);
 static void	bw_meter_receive_packet(struct bw_meter *, int,
 		    struct timeval *);
 static void	bw_upcalls_send(void);
 static int	del_bw_upcall(struct bw_upcall *);
 static int	del_mfc(struct mfcctl2 *);
 static int	del_vif(vifi_t);
 static int	del_vif_locked(vifi_t);
 static void	expire_bw_meter_process(void *);
 static void	expire_bw_upcalls_send(void *);
 static void	expire_mfc(struct mfc *);
 static void	expire_upcalls(void *);
 static void	free_bw_list(struct bw_meter *);
 static int	get_sg_cnt(struct sioc_sg_req *);
 static int	get_vif_cnt(struct sioc_vif_req *);
 static void	if_detached_event(void *, struct ifnet *);
 static int	ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t);
 static int	ip_mrouter_init(struct socket *, int);
 static __inline struct mfc *
 		mfc_find(struct in_addr *, struct in_addr *);
 static void	phyint_send(struct ip *, struct vif *, struct mbuf *);
 static struct mbuf *
 		pim_register_prepare(struct ip *, struct mbuf *);
 static int	pim_register_send(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static int	pim_register_send_rp(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static int	pim_register_send_upcall(struct ip *, struct vif *,
 		    struct mbuf *, struct mfc *);
 static void	schedule_bw_meter(struct bw_meter *, struct timeval *);
 static void	send_packet(struct vif *, struct mbuf *);
 static int	set_api_config(uint32_t *);
 static int	set_assert(int);
 static int	socket_send(struct socket *, struct mbuf *,
 		    struct sockaddr_in *);
 static void	unschedule_bw_meter(struct bw_meter *);
 
 /*
  * Kernel multicast forwarding API capabilities and setup.
  * If more API capabilities are added to the kernel, they should be
  * recorded in `mrt_api_support'.
  */
 #define MRT_API_VERSION		0x0305
 
 static const int mrt_api_version = MRT_API_VERSION;
 static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF |
 					 MRT_MFC_FLAGS_BORDER_VIF |
 					 MRT_MFC_RP |
 					 MRT_MFC_BW_UPCALL);
 static VNET_DEFINE(uint32_t, mrt_api_config);
 #define	V_mrt_api_config	VNET(mrt_api_config)
 static VNET_DEFINE(int, pim_assert_enabled);
 #define	V_pim_assert_enabled	VNET(pim_assert_enabled)
 static struct timeval pim_assert_interval = { 3, 0 };	/* Rate limit */
 
 /*
  * Find a route for a given origin IP address and multicast group address.
  * Statistics must be updated by the caller.
  */
 static __inline struct mfc *
 mfc_find(struct in_addr *o, struct in_addr *g)
 {
 	struct mfc *rt;
 
 	MFC_LOCK_ASSERT();
 
 	LIST_FOREACH(rt, &V_mfchashtbl[MFCHASH(*o, *g)], mfc_hash) {
 		if (in_hosteq(rt->mfc_origin, *o) &&
 		    in_hosteq(rt->mfc_mcastgrp, *g) &&
 		    TAILQ_EMPTY(&rt->mfc_stall))
 			break;
 	}
 
 	return (rt);
 }
 
 /*
  * Handle MRT setsockopt commands to modify the multicast forwarding tables.
  */
 static int
 X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 {
     int	error, optval;
     vifi_t	vifi;
     struct	vifctl vifc;
     struct	mfcctl2 mfc;
     struct	bw_upcall bw_upcall;
     uint32_t	i;
 
     if (so != V_ip_mrouter && sopt->sopt_name != MRT_INIT)
 	return EPERM;
 
     error = 0;
     switch (sopt->sopt_name) {
     case MRT_INIT:
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error)
 	    break;
 	error = ip_mrouter_init(so, optval);
 	break;
 
     case MRT_DONE:
 	error = ip_mrouter_done();
 	break;
 
     case MRT_ADD_VIF:
 	error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
 	if (error)
 	    break;
 	error = add_vif(&vifc);
 	break;
 
     case MRT_DEL_VIF:
 	error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
 	if (error)
 	    break;
 	error = del_vif(vifi);
 	break;
 
     case MRT_ADD_MFC:
     case MRT_DEL_MFC:
 	/*
 	 * select data size depending on API version.
 	 */
 	if (sopt->sopt_name == MRT_ADD_MFC &&
 		V_mrt_api_config & MRT_API_FLAGS_ALL) {
 	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl2),
 				sizeof(struct mfcctl2));
 	} else {
 	    error = sooptcopyin(sopt, &mfc, sizeof(struct mfcctl),
 				sizeof(struct mfcctl));
 	    bzero((caddr_t)&mfc + sizeof(struct mfcctl),
 			sizeof(mfc) - sizeof(struct mfcctl));
 	}
 	if (error)
 	    break;
 	if (sopt->sopt_name == MRT_ADD_MFC)
 	    error = add_mfc(&mfc);
 	else
 	    error = del_mfc(&mfc);
 	break;
 
     case MRT_ASSERT:
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error)
 	    break;
 	set_assert(optval);
 	break;
 
     case MRT_API_CONFIG:
 	error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
 	if (!error)
 	    error = set_api_config(&i);
 	if (!error)
 	    error = sooptcopyout(sopt, &i, sizeof i);
 	break;
 
     case MRT_ADD_BW_UPCALL:
     case MRT_DEL_BW_UPCALL:
 	error = sooptcopyin(sopt, &bw_upcall, sizeof bw_upcall,
 				sizeof bw_upcall);
 	if (error)
 	    break;
 	if (sopt->sopt_name == MRT_ADD_BW_UPCALL)
 	    error = add_bw_upcall(&bw_upcall);
 	else
 	    error = del_bw_upcall(&bw_upcall);
 	break;
 
     default:
 	error = EOPNOTSUPP;
 	break;
     }
     return error;
 }
 
 /*
  * Handle MRT getsockopt commands
  */
 static int
 X_ip_mrouter_get(struct socket *so, struct sockopt *sopt)
 {
     int error;
 
     switch (sopt->sopt_name) {
     case MRT_VERSION:
 	error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version);
 	break;
 
     case MRT_ASSERT:
 	error = sooptcopyout(sopt, &V_pim_assert_enabled,
 	    sizeof V_pim_assert_enabled);
 	break;
 
     case MRT_API_SUPPORT:
 	error = sooptcopyout(sopt, &mrt_api_support, sizeof mrt_api_support);
 	break;
 
     case MRT_API_CONFIG:
 	error = sooptcopyout(sopt, &V_mrt_api_config, sizeof V_mrt_api_config);
 	break;
 
     default:
 	error = EOPNOTSUPP;
 	break;
     }
     return error;
 }
 
 /*
  * Handle ioctl commands to obtain information from the cache
  */
 static int
 X_mrt_ioctl(u_long cmd, caddr_t data, int fibnum __unused)
 {
     int error = 0;
 
     /*
      * Currently the only function calling this ioctl routine is rtioctl().
      * Typically, only root can create the raw socket in order to execute
      * this ioctl method, however the request might be coming from a prison
      */
     error = priv_check(curthread, PRIV_NETINET_MROUTE);
     if (error)
 	return (error);
     switch (cmd) {
     case (SIOCGETVIFCNT):
 	error = get_vif_cnt((struct sioc_vif_req *)data);
 	break;
 
     case (SIOCGETSGCNT):
 	error = get_sg_cnt((struct sioc_sg_req *)data);
 	break;
 
     default:
 	error = EINVAL;
 	break;
     }
     return error;
 }
 
 /*
  * returns the packet, byte, rpf-failure count for the source group provided
  */
 static int
 get_sg_cnt(struct sioc_sg_req *req)
 {
     struct mfc *rt;
 
     MFC_LOCK();
     rt = mfc_find(&req->src, &req->grp);
     if (rt == NULL) {
 	MFC_UNLOCK();
 	req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
 	return EADDRNOTAVAIL;
     }
     req->pktcnt = rt->mfc_pkt_cnt;
     req->bytecnt = rt->mfc_byte_cnt;
     req->wrong_if = rt->mfc_wrong_if;
     MFC_UNLOCK();
     return 0;
 }
 
 /*
  * returns the input and output packet and byte counts on the vif provided
  */
 static int
 get_vif_cnt(struct sioc_vif_req *req)
 {
     vifi_t vifi = req->vifi;
 
     VIF_LOCK();
     if (vifi >= V_numvifs) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
 
     req->icount = V_viftable[vifi].v_pkt_in;
     req->ocount = V_viftable[vifi].v_pkt_out;
     req->ibytes = V_viftable[vifi].v_bytes_in;
     req->obytes = V_viftable[vifi].v_bytes_out;
     VIF_UNLOCK();
 
     return 0;
 }
 
 static void
 if_detached_event(void *arg __unused, struct ifnet *ifp)
 {
     vifi_t vifi;
     u_long i;
 
     MROUTER_LOCK();
 
     if (V_ip_mrouter == NULL) {
 	MROUTER_UNLOCK();
 	return;
     }
 
     VIF_LOCK();
     MFC_LOCK();
 
     /*
      * Tear down multicast forwarder state associated with this ifnet.
      * 1. Walk the vif list, matching vifs against this ifnet.
      * 2. Walk the multicast forwarding cache (mfc) looking for
      *    inner matches with this vif's index.
      * 3. Expire any matching multicast forwarding cache entries.
      * 4. Free vif state. This should disable ALLMULTI on the interface.
      */
     for (vifi = 0; vifi < V_numvifs; vifi++) {
 	if (V_viftable[vifi].v_ifp != ifp)
 		continue;
 	for (i = 0; i < mfchashsize; i++) {
 		struct mfc *rt, *nrt;
 
 		LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 			if (rt->mfc_parent == vifi) {
 				expire_mfc(rt);
 			}
 		}
 	}
 	del_vif_locked(vifi);
     }
 
     MFC_UNLOCK();
     VIF_UNLOCK();
 
     MROUTER_UNLOCK();
 }
                         
 /*
  * Enable multicast forwarding.
  */
 static int
 ip_mrouter_init(struct socket *so, int version)
 {
 
     CTR3(KTR_IPMF, "%s: so_type %d, pr_protocol %d", __func__,
         so->so_type, so->so_proto->pr_protocol);
 
     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP)
 	return EOPNOTSUPP;
 
     if (version != 1)
 	return ENOPROTOOPT;
 
     MROUTER_LOCK();
 
     if (ip_mrouter_unloading) {
 	MROUTER_UNLOCK();
 	return ENOPROTOOPT;
     }
 
     if (V_ip_mrouter != NULL) {
 	MROUTER_UNLOCK();
 	return EADDRINUSE;
     }
 
     V_mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &V_mfchash,
 	HASH_NOWAIT);
 
     callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
 	curvnet);
     callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
 	curvnet);
     callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
 	curvnet);
 
     V_ip_mrouter = so;
     ip_mrouter_cnt++;
 
     MROUTER_UNLOCK();
 
     CTR1(KTR_IPMF, "%s: done", __func__);
 
     return 0;
 }
 
 /*
  * Disable multicast forwarding.
  */
 static int
 X_ip_mrouter_done(void)
 {
     struct ifnet *ifp;
     u_long i;
     vifi_t vifi;
 
     MROUTER_LOCK();
 
     if (V_ip_mrouter == NULL) {
 	MROUTER_UNLOCK();
 	return EINVAL;
     }
 
     /*
      * Detach/disable hooks to the reset of the system.
      */
     V_ip_mrouter = NULL;
     ip_mrouter_cnt--;
     V_mrt_api_config = 0;
 
     VIF_LOCK();
 
     /*
      * For each phyint in use, disable promiscuous reception of all IP
      * multicasts.
      */
     for (vifi = 0; vifi < V_numvifs; vifi++) {
 	if (!in_nullhost(V_viftable[vifi].v_lcl_addr) &&
 		!(V_viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) {
 	    ifp = V_viftable[vifi].v_ifp;
 	    if_allmulti(ifp, 0);
 	}
     }
     bzero((caddr_t)V_viftable, sizeof(V_viftable));
     V_numvifs = 0;
     V_pim_assert_enabled = 0;
     
     VIF_UNLOCK();
 
     callout_stop(&V_expire_upcalls_ch);
     callout_stop(&V_bw_upcalls_ch);
     callout_stop(&V_bw_meter_ch);
 
     MFC_LOCK();
 
     /*
      * Free all multicast forwarding cache entries.
      * Do not use hashdestroy(), as we must perform other cleanup.
      */
     for (i = 0; i < mfchashsize; i++) {
 	struct mfc *rt, *nrt;
 
 	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 		expire_mfc(rt);
 	}
     }
     free(V_mfchashtbl, M_MRTABLE);
     V_mfchashtbl = NULL;
 
     bzero(V_nexpire, sizeof(V_nexpire[0]) * mfchashsize);
 
     V_bw_upcalls_n = 0;
     bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
 
     MFC_UNLOCK();
 
     V_reg_vif_num = VIFI_INVALID;
 
     MROUTER_UNLOCK();
 
     CTR1(KTR_IPMF, "%s: done", __func__);
 
     return 0;
 }
 
 /*
  * Set PIM assert processing global
  */
 static int
 set_assert(int i)
 {
     if ((i != 1) && (i != 0))
 	return EINVAL;
 
     V_pim_assert_enabled = i;
 
     return 0;
 }
 
 /*
  * Configure API capabilities
  */
 int
 set_api_config(uint32_t *apival)
 {
     u_long i;
 
     /*
      * We can set the API capabilities only if it is the first operation
      * after MRT_INIT. I.e.:
      *  - there are no vifs installed
      *  - pim_assert is not enabled
      *  - the MFC table is empty
      */
     if (V_numvifs > 0) {
 	*apival = 0;
 	return EPERM;
     }
     if (V_pim_assert_enabled) {
 	*apival = 0;
 	return EPERM;
     }
 
     MFC_LOCK();
 
     for (i = 0; i < mfchashsize; i++) {
 	if (LIST_FIRST(&V_mfchashtbl[i]) != NULL) {
 	    MFC_UNLOCK();
 	    *apival = 0;
 	    return EPERM;
 	}
     }
 
     MFC_UNLOCK();
 
     V_mrt_api_config = *apival & mrt_api_support;
     *apival = V_mrt_api_config;
 
     return 0;
 }
 
 /*
  * Add a vif to the vif table
  */
 static int
 add_vif(struct vifctl *vifcp)
 {
     struct vif *vifp = V_viftable + vifcp->vifc_vifi;
     struct sockaddr_in sin = {sizeof sin, AF_INET};
     struct ifaddr *ifa;
     struct ifnet *ifp;
     int error;
 
     VIF_LOCK();
     if (vifcp->vifc_vifi >= MAXVIFS) {
 	VIF_UNLOCK();
 	return EINVAL;
     }
     /* rate limiting is no longer supported by this code */
     if (vifcp->vifc_rate_limit != 0) {
 	log(LOG_ERR, "rate limiting is no longer supported\n");
 	VIF_UNLOCK();
 	return EINVAL;
     }
     if (!in_nullhost(vifp->v_lcl_addr)) {
 	VIF_UNLOCK();
 	return EADDRINUSE;
     }
     if (in_nullhost(vifcp->vifc_lcl_addr)) {
 	VIF_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     /* Find the interface with an address in AF_INET family */
     if (vifcp->vifc_flags & VIFF_REGISTER) {
 	/*
 	 * XXX: Because VIFF_REGISTER does not really need a valid
 	 * local interface (e.g. it could be 127.0.0.2), we don't
 	 * check its address.
 	 */
 	ifp = NULL;
     } else {
 	sin.sin_addr = vifcp->vifc_lcl_addr;
 	ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
 	if (ifa == NULL) {
 	    VIF_UNLOCK();
 	    return EADDRNOTAVAIL;
 	}
 	ifp = ifa->ifa_ifp;
 	ifa_free(ifa);
     }
 
     if ((vifcp->vifc_flags & VIFF_TUNNEL) != 0) {
 	CTR1(KTR_IPMF, "%s: tunnels are no longer supported", __func__);
 	VIF_UNLOCK();
 	return EOPNOTSUPP;
     } else if (vifcp->vifc_flags & VIFF_REGISTER) {
 	ifp = &V_multicast_register_if;
 	CTR2(KTR_IPMF, "%s: add register vif for ifp %p", __func__, ifp);
 	if (V_reg_vif_num == VIFI_INVALID) {
 	    if_initname(&V_multicast_register_if, "register_vif", 0);
 	    V_multicast_register_if.if_flags = IFF_LOOPBACK;
 	    V_reg_vif_num = vifcp->vifc_vifi;
 	}
     } else {		/* Make sure the interface supports multicast */
 	if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 	    VIF_UNLOCK();
 	    return EOPNOTSUPP;
 	}
 
 	/* Enable promiscuous reception of all IP multicasts from the if */
 	error = if_allmulti(ifp, 1);
 	if (error) {
 	    VIF_UNLOCK();
 	    return error;
 	}
     }
 
     vifp->v_flags     = vifcp->vifc_flags;
     vifp->v_threshold = vifcp->vifc_threshold;
     vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
     vifp->v_ifp       = ifp;
     /* initialize per vif pkt counters */
     vifp->v_pkt_in    = 0;
     vifp->v_pkt_out   = 0;
     vifp->v_bytes_in  = 0;
     vifp->v_bytes_out = 0;
 
     /* Adjust numvifs up if the vifi is higher than numvifs */
     if (V_numvifs <= vifcp->vifc_vifi)
 	V_numvifs = vifcp->vifc_vifi + 1;
 
     VIF_UNLOCK();
 
     CTR4(KTR_IPMF, "%s: add vif %d laddr %s thresh %x", __func__,
 	(int)vifcp->vifc_vifi, inet_ntoa(vifcp->vifc_lcl_addr),
 	(int)vifcp->vifc_threshold);
 
     return 0;
 }
 
 /*
  * Delete a vif from the vif table
  */
 static int
 del_vif_locked(vifi_t vifi)
 {
     struct vif *vifp;
 
     VIF_LOCK_ASSERT();
 
     if (vifi >= V_numvifs) {
 	return EINVAL;
     }
     vifp = &V_viftable[vifi];
     if (in_nullhost(vifp->v_lcl_addr)) {
 	return EADDRNOTAVAIL;
     }
 
     if (!(vifp->v_flags & (VIFF_TUNNEL | VIFF_REGISTER)))
 	if_allmulti(vifp->v_ifp, 0);
 
     if (vifp->v_flags & VIFF_REGISTER)
 	V_reg_vif_num = VIFI_INVALID;
 
     bzero((caddr_t)vifp, sizeof (*vifp));
 
     CTR2(KTR_IPMF, "%s: delete vif %d", __func__, (int)vifi);
 
     /* Adjust numvifs down */
     for (vifi = V_numvifs; vifi > 0; vifi--)
 	if (!in_nullhost(V_viftable[vifi-1].v_lcl_addr))
 	    break;
     V_numvifs = vifi;
 
     return 0;
 }
 
 static int
 del_vif(vifi_t vifi)
 {
     int cc;
 
     VIF_LOCK();
     cc = del_vif_locked(vifi);
     VIF_UNLOCK();
 
     return cc;
 }
 
 /*
  * update an mfc entry without resetting counters and S,G addresses.
  */
 static void
 update_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
 {
     int i;
 
     rt->mfc_parent = mfccp->mfcc_parent;
     for (i = 0; i < V_numvifs; i++) {
 	rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 	rt->mfc_flags[i] = mfccp->mfcc_flags[i] & V_mrt_api_config &
 	    MRT_MFC_FLAGS_ALL;
     }
     /* set the RP address */
     if (V_mrt_api_config & MRT_MFC_RP)
 	rt->mfc_rp = mfccp->mfcc_rp;
     else
 	rt->mfc_rp.s_addr = INADDR_ANY;
 }
 
 /*
  * fully initialize an mfc entry from the parameter.
  */
 static void
 init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp)
 {
     rt->mfc_origin     = mfccp->mfcc_origin;
     rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 
     update_mfc_params(rt, mfccp);
 
     /* initialize pkt counters per src-grp */
     rt->mfc_pkt_cnt    = 0;
     rt->mfc_byte_cnt   = 0;
     rt->mfc_wrong_if   = 0;
     timevalclear(&rt->mfc_last_assert);
 }
 
 static void
 expire_mfc(struct mfc *rt)
 {
 	struct rtdetq *rte, *nrte;
 
 	MFC_LOCK_ASSERT();
 
 	free_bw_list(rt->mfc_bw_meter);
 
 	TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
 		m_freem(rte->m);
 		TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
 		free(rte, M_MRTABLE);
 	}
 
 	LIST_REMOVE(rt, mfc_hash);
 	free(rt, M_MRTABLE);
 }
 
 /*
  * Add an mfc entry
  */
 static int
 add_mfc(struct mfcctl2 *mfccp)
 {
     struct mfc *rt;
     struct rtdetq *rte, *nrte;
     u_long hash = 0;
     u_short nstl;
 
     VIF_LOCK();
     MFC_LOCK();
 
     rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp);
 
     /* If an entry already exists, just update the fields */
     if (rt) {
 	CTR4(KTR_IPMF, "%s: update mfc orig %s group %lx parent %x",
 	    __func__, inet_ntoa(mfccp->mfcc_origin),
 	    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 	    mfccp->mfcc_parent);
 	update_mfc_params(rt, mfccp);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return (0);
     }
 
     /*
      * Find the entry for which the upcall was made and update
      */
     nstl = 0;
     hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp);
     LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 	if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
 	    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) &&
 	    !TAILQ_EMPTY(&rt->mfc_stall)) {
 		CTR5(KTR_IPMF,
 		    "%s: add mfc orig %s group %lx parent %x qh %p",
 		    __func__, inet_ntoa(mfccp->mfcc_origin),
 		    (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 		    mfccp->mfcc_parent,
 		    TAILQ_FIRST(&rt->mfc_stall));
 		if (nstl++)
 			CTR1(KTR_IPMF, "%s: multiple matches", __func__);
 
 		init_mfc_params(rt, mfccp);
 		rt->mfc_expire = 0;	/* Don't clean this guy up */
 		V_nexpire[hash]--;
 
 		/* Free queued packets, but attempt to forward them first. */
 		TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) {
 			if (rte->ifp != NULL)
 				ip_mdq(rte->m, rte->ifp, rt, -1);
 			m_freem(rte->m);
 			TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link);
 			rt->mfc_nstall--;
 			free(rte, M_MRTABLE);
 		}
 	}
     }
 
     /*
      * It is possible that an entry is being inserted without an upcall
      */
     if (nstl == 0) {
 	CTR1(KTR_IPMF, "%s: adding mfc w/o upcall", __func__);
 	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 		if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) &&
 		    in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) {
 			init_mfc_params(rt, mfccp);
 			if (rt->mfc_expire)
 			    V_nexpire[hash]--;
 			rt->mfc_expire = 0;
 			break; /* XXX */
 		}
 	}
 
 	if (rt == NULL) {		/* no upcall, so make a new entry */
 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 	    if (rt == NULL) {
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return (ENOBUFS);
 	    }
 
 	    init_mfc_params(rt, mfccp);
 	    TAILQ_INIT(&rt->mfc_stall);
 	    rt->mfc_nstall = 0;
 
 	    rt->mfc_expire     = 0;
 	    rt->mfc_bw_meter = NULL;
 
 	    /* insert new entry at head of hash chain */
 	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
 	}
     }
 
     MFC_UNLOCK();
     VIF_UNLOCK();
 
     return (0);
 }
 
 /*
  * Delete an mfc entry
  */
 static int
 del_mfc(struct mfcctl2 *mfccp)
 {
     struct in_addr	origin;
     struct in_addr	mcastgrp;
     struct mfc		*rt;
 
     origin = mfccp->mfcc_origin;
     mcastgrp = mfccp->mfcc_mcastgrp;
 
     CTR3(KTR_IPMF, "%s: delete mfc orig %s group %lx", __func__,
 	inet_ntoa(origin), (u_long)ntohl(mcastgrp.s_addr));
 
     MFC_LOCK();
 
     rt = mfc_find(&origin, &mcastgrp);
     if (rt == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     }
 
     /*
      * free the bw_meter entries
      */
     free_bw_list(rt->mfc_bw_meter);
     rt->mfc_bw_meter = NULL;
 
     LIST_REMOVE(rt, mfc_hash);
     free(rt, M_MRTABLE);
 
     MFC_UNLOCK();
 
     return (0);
 }
 
 /*
  * Send a message to the routing daemon on the multicast routing socket.
  */
 static int
 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
 {
     if (s) {
 	SOCKBUF_LOCK(&s->so_rcv);
 	if (sbappendaddr_locked(&s->so_rcv, (struct sockaddr *)src, mm,
 	    NULL) != 0) {
 	    sorwakeup_locked(s);
 	    return 0;
 	}
 	SOCKBUF_UNLOCK(&s->so_rcv);
     }
     m_freem(mm);
     return -1;
 }
 
 /*
  * IP multicast forwarding function. This function assumes that the packet
  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
  * pointed to by "ifp", and the packet is to be relayed to other networks
  * that have members of the packet's destination IP multicast group.
  *
  * The packet is returned unscathed to the caller, unless it is
  * erroneous, in which case a non-zero return value tells the caller to
  * discard it.
  */
 
 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
 
 static int
 X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
     struct ip_moptions *imo)
 {
     struct mfc *rt;
     int error;
     vifi_t vifi;
 
     CTR3(KTR_IPMF, "ip_mforward: delete mfc orig %s group %lx ifp %p",
 	inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr), ifp);
 
     if (ip->ip_hl < (sizeof(struct ip) + TUNNEL_LEN) >> 2 ||
 		((u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
 	/*
 	 * Packet arrived via a physical interface or
 	 * an encapsulated tunnel or a register_vif.
 	 */
     } else {
 	/*
 	 * Packet arrived through a source-route tunnel.
 	 * Source-route tunnels are no longer supported.
 	 */
 	return (1);
     }
 
     VIF_LOCK();
     MFC_LOCK();
     if (imo && ((vifi = imo->imo_multicast_vif) < V_numvifs)) {
 	if (ip->ip_ttl < MAXTTL)
 	    ip->ip_ttl++;	/* compensate for -1 in *_send routines */
 	error = ip_mdq(m, ifp, NULL, vifi);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return error;
     }
 
     /*
      * Don't forward a packet with time-to-live of zero or one,
      * or a packet destined to a local-only group.
      */
     if (ip->ip_ttl <= 1 || IN_LOCAL_GROUP(ntohl(ip->ip_dst.s_addr))) {
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return 0;
     }
 
     /*
      * Determine forwarding vifs from the forwarding cache table
      */
     MRTSTAT_INC(mrts_mfc_lookups);
     rt = mfc_find(&ip->ip_src, &ip->ip_dst);
 
     /* Entry exists, so forward if necessary */
     if (rt != NULL) {
 	error = ip_mdq(m, ifp, rt, -1);
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 	return error;
     } else {
 	/*
 	 * If we don't have a route for packet's origin,
 	 * Make a copy of the packet & send message to routing daemon
 	 */
 
 	struct mbuf *mb0;
 	struct rtdetq *rte;
 	u_long hash;
 	int hlen = ip->ip_hl << 2;
 
 	MRTSTAT_INC(mrts_mfc_misses);
 	MRTSTAT_INC(mrts_no_route);
 	CTR2(KTR_IPMF, "ip_mforward: no mfc for (%s,%lx)",
 	    inet_ntoa(ip->ip_src), (u_long)ntohl(ip->ip_dst.s_addr));
 
 	/*
 	 * Allocate mbufs early so that we don't do extra work if we are
 	 * just going to fail anyway.  Make sure to pullup the header so
 	 * that other people can't step on it.
 	 */
 	rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE,
 	    M_NOWAIT|M_ZERO);
 	if (rte == NULL) {
 	    MFC_UNLOCK();
 	    VIF_UNLOCK();
 	    return ENOBUFS;
 	}
 
 	mb0 = m_copypacket(m, M_NOWAIT);
 	if (mb0 && (!M_WRITABLE(mb0) || mb0->m_len < hlen))
 	    mb0 = m_pullup(mb0, hlen);
 	if (mb0 == NULL) {
 	    free(rte, M_MRTABLE);
 	    MFC_UNLOCK();
 	    VIF_UNLOCK();
 	    return ENOBUFS;
 	}
 
 	/* is there an upcall waiting for this flow ? */
 	hash = MFCHASH(ip->ip_src, ip->ip_dst);
 	LIST_FOREACH(rt, &V_mfchashtbl[hash], mfc_hash) {
 		if (in_hosteq(ip->ip_src, rt->mfc_origin) &&
 		    in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) &&
 		    !TAILQ_EMPTY(&rt->mfc_stall))
 			break;
 	}
 
 	if (rt == NULL) {
 	    int i;
 	    struct igmpmsg *im;
 	    struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 	    struct mbuf *mm;
 
 	    /*
 	     * Locate the vifi for the incoming interface for this packet.
 	     * If none found, drop packet.
 	     */
 	    for (vifi = 0; vifi < V_numvifs &&
 		    V_viftable[vifi].v_ifp != ifp; vifi++)
 		;
 	    if (vifi >= V_numvifs)	/* vif not found, drop packet */
 		goto non_fatal;
 
 	    /* no upcall, so make a new entry */
 	    rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 	    if (rt == NULL)
 		goto fail;
 
 	    /* Make a copy of the header to send to the user level process */
 	    mm = m_copy(mb0, 0, hlen);
 	    if (mm == NULL)
 		goto fail1;
 
 	    /*
 	     * Send message to routing daemon to install
 	     * a route into the kernel table
 	     */
 
 	    im = mtod(mm, struct igmpmsg *);
 	    im->im_msgtype = IGMPMSG_NOCACHE;
 	    im->im_mbz = 0;
 	    im->im_vif = vifi;
 
 	    MRTSTAT_INC(mrts_upcalls);
 
 	    k_igmpsrc.sin_addr = ip->ip_src;
 	    if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
 		CTR0(KTR_IPMF, "ip_mforward: socket queue full");
 		MRTSTAT_INC(mrts_upq_sockfull);
 fail1:
 		free(rt, M_MRTABLE);
 fail:
 		free(rte, M_MRTABLE);
 		m_freem(mb0);
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return ENOBUFS;
 	    }
 
 	    /* insert new entry at head of hash chain */
 	    rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
 	    rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
 	    rt->mfc_expire	      = UPCALL_EXPIRE;
 	    V_nexpire[hash]++;
 	    for (i = 0; i < V_numvifs; i++) {
 		rt->mfc_ttls[i] = 0;
 		rt->mfc_flags[i] = 0;
 	    }
 	    rt->mfc_parent = -1;
 
 	    /* clear the RP address */
 	    rt->mfc_rp.s_addr = INADDR_ANY;
 	    rt->mfc_bw_meter = NULL;
 
 	    /* initialize pkt counters per src-grp */
 	    rt->mfc_pkt_cnt = 0;
 	    rt->mfc_byte_cnt = 0;
 	    rt->mfc_wrong_if = 0;
 	    timevalclear(&rt->mfc_last_assert);
 
 	    TAILQ_INIT(&rt->mfc_stall);
 	    rt->mfc_nstall = 0;
 
 	    /* link into table */
 	    LIST_INSERT_HEAD(&V_mfchashtbl[hash], rt, mfc_hash);
 	    TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link);
 	    rt->mfc_nstall++;
 
 	} else {
 	    /* determine if queue has overflowed */
 	    if (rt->mfc_nstall > MAX_UPQ) {
 		MRTSTAT_INC(mrts_upq_ovflw);
 non_fatal:
 		free(rte, M_MRTABLE);
 		m_freem(mb0);
 		MFC_UNLOCK();
 		VIF_UNLOCK();
 		return (0);
 	    }
 	    TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link);
 	    rt->mfc_nstall++;
 	}
 
 	rte->m			= mb0;
 	rte->ifp		= ifp;
 
 	MFC_UNLOCK();
 	VIF_UNLOCK();
 
 	return 0;
     }
 }
 
 /*
  * Clean up the cache entry if upcall is not serviced
  */
 static void
 expire_upcalls(void *arg)
 {
     u_long i;
 
     CURVNET_SET((struct vnet *) arg);
 
     MFC_LOCK();
 
     for (i = 0; i < mfchashsize; i++) {
 	struct mfc *rt, *nrt;
 
 	if (V_nexpire[i] == 0)
 	    continue;
 
 	LIST_FOREACH_SAFE(rt, &V_mfchashtbl[i], mfc_hash, nrt) {
 		if (TAILQ_EMPTY(&rt->mfc_stall))
 			continue;
 
 		if (rt->mfc_expire == 0 || --rt->mfc_expire > 0)
 			continue;
 
 		/*
 		 * free the bw_meter entries
 		 */
 		while (rt->mfc_bw_meter != NULL) {
 		    struct bw_meter *x = rt->mfc_bw_meter;
 
 		    rt->mfc_bw_meter = x->bm_mfc_next;
 		    free(x, M_BWMETER);
 		}
 
 		MRTSTAT_INC(mrts_cache_cleanups);
 		CTR3(KTR_IPMF, "%s: expire (%lx, %lx)", __func__,
 		    (u_long)ntohl(rt->mfc_origin.s_addr),
 		    (u_long)ntohl(rt->mfc_mcastgrp.s_addr));
 
 		expire_mfc(rt);
 	    }
     }
 
     MFC_UNLOCK();
 
     callout_reset(&V_expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls,
 	curvnet);
 
     CURVNET_RESTORE();
 }
 
 /*
  * Packet forwarding routine once entry in the cache is made
  */
 static int
 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif)
 {
     struct ip  *ip = mtod(m, struct ip *);
     vifi_t vifi;
     int plen = ntohs(ip->ip_len);
 
     VIF_LOCK_ASSERT();
 
     /*
      * If xmt_vif is not -1, send on only the requested vif.
      *
      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
      */
     if (xmt_vif < V_numvifs) {
 	if (V_viftable[xmt_vif].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, V_viftable + xmt_vif, m, rt);
 	else
 		phyint_send(ip, V_viftable + xmt_vif, m);
 	return 1;
     }
 
     /*
      * Don't forward if it didn't arrive from the parent vif for its origin.
      */
     vifi = rt->mfc_parent;
     if ((vifi >= V_numvifs) || (V_viftable[vifi].v_ifp != ifp)) {
 	CTR4(KTR_IPMF, "%s: rx on wrong ifp %p (vifi %d, v_ifp %p)",
 	    __func__, ifp, (int)vifi, V_viftable[vifi].v_ifp);
 	MRTSTAT_INC(mrts_wrong_if);
 	++rt->mfc_wrong_if;
 	/*
 	 * If we are doing PIM assert processing, send a message
 	 * to the routing daemon.
 	 *
 	 * XXX: A PIM-SM router needs the WRONGVIF detection so it
 	 * can complete the SPT switch, regardless of the type
 	 * of the iif (broadcast media, GRE tunnel, etc).
 	 */
 	if (V_pim_assert_enabled && (vifi < V_numvifs) &&
 	    V_viftable[vifi].v_ifp) {
 
 	    if (ifp == &V_multicast_register_if)
 		PIMSTAT_INC(pims_rcv_registers_wrongiif);
 
 	    /* Get vifi for the incoming packet */
 	    for (vifi = 0; vifi < V_numvifs && V_viftable[vifi].v_ifp != ifp;
 		vifi++)
 		;
 	    if (vifi >= V_numvifs)
 		return 0;	/* The iif is not found: ignore the packet. */
 
 	    if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF)
 		return 0;	/* WRONGVIF disabled: ignore the packet */
 
 	    if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) {
 		struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 		struct igmpmsg *im;
 		int hlen = ip->ip_hl << 2;
 		struct mbuf *mm = m_copy(m, 0, hlen);
 
 		if (mm && (!M_WRITABLE(mm) || mm->m_len < hlen))
 		    mm = m_pullup(mm, hlen);
 		if (mm == NULL)
 		    return ENOBUFS;
 
 		im = mtod(mm, struct igmpmsg *);
 		im->im_msgtype	= IGMPMSG_WRONGVIF;
 		im->im_mbz		= 0;
 		im->im_vif		= vifi;
 
 		MRTSTAT_INC(mrts_upcalls);
 
 		k_igmpsrc.sin_addr = im->im_src;
 		if (socket_send(V_ip_mrouter, mm, &k_igmpsrc) < 0) {
 		    CTR1(KTR_IPMF, "%s: socket queue full", __func__);
 		    MRTSTAT_INC(mrts_upq_sockfull);
 		    return ENOBUFS;
 		}
 	    }
 	}
 	return 0;
     }
 
 
     /* If I sourced this packet, it counts as output, else it was input. */
     if (in_hosteq(ip->ip_src, V_viftable[vifi].v_lcl_addr)) {
 	V_viftable[vifi].v_pkt_out++;
 	V_viftable[vifi].v_bytes_out += plen;
     } else {
 	V_viftable[vifi].v_pkt_in++;
 	V_viftable[vifi].v_bytes_in += plen;
     }
     rt->mfc_pkt_cnt++;
     rt->mfc_byte_cnt += plen;
 
     /*
      * For each vif, decide if a copy of the packet should be forwarded.
      * Forward if:
      *		- the ttl exceeds the vif's threshold
      *		- there are group members downstream on interface
      */
     for (vifi = 0; vifi < V_numvifs; vifi++)
 	if ((rt->mfc_ttls[vifi] > 0) && (ip->ip_ttl > rt->mfc_ttls[vifi])) {
 	    V_viftable[vifi].v_pkt_out++;
 	    V_viftable[vifi].v_bytes_out += plen;
 	    if (V_viftable[vifi].v_flags & VIFF_REGISTER)
 		pim_register_send(ip, V_viftable + vifi, m, rt);
 	    else
 		phyint_send(ip, V_viftable + vifi, m);
 	}
 
     /*
      * Perform upcall-related bw measuring.
      */
     if (rt->mfc_bw_meter != NULL) {
 	struct bw_meter *x;
 	struct timeval now;
 
 	microtime(&now);
 	MFC_LOCK_ASSERT();
 	for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next)
 	    bw_meter_receive_packet(x, plen, &now);
     }
 
     return 0;
 }
 
 /*
  * Check if a vif number is legal/ok. This is used by in_mcast.c.
  */
 static int
 X_legal_vif_num(int vif)
 {
 	int ret;
 
 	ret = 0;
 	if (vif < 0)
 		return (ret);
 
 	VIF_LOCK();
 	if (vif < V_numvifs)
 		ret = 1;
 	VIF_UNLOCK();
 
 	return (ret);
 }
 
 /*
  * Return the local address used by this vif
  */
 static u_long
 X_ip_mcast_src(int vifi)
 {
 	in_addr_t addr;
 
 	addr = INADDR_ANY;
 	if (vifi < 0)
 		return (addr);
 
 	VIF_LOCK();
 	if (vifi < V_numvifs)
 		addr = V_viftable[vifi].v_lcl_addr.s_addr;
 	VIF_UNLOCK();
 
 	return (addr);
 }
 
 static void
 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
 {
     struct mbuf *mb_copy;
     int hlen = ip->ip_hl << 2;
 
     VIF_LOCK_ASSERT();
 
     /*
      * Make a new reference to the packet; make sure that
      * the IP header is actually copied, not just referenced,
      * so that ip_output() only scribbles on the copy.
      */
     mb_copy = m_copypacket(m, M_NOWAIT);
     if (mb_copy && (!M_WRITABLE(mb_copy) || mb_copy->m_len < hlen))
 	mb_copy = m_pullup(mb_copy, hlen);
     if (mb_copy == NULL)
 	return;
 
     send_packet(vifp, mb_copy);
 }
 
 static void
 send_packet(struct vif *vifp, struct mbuf *m)
 {
 	struct ip_moptions imo;
 	struct in_multi *imm[2];
 	int error;
 
 	VIF_LOCK_ASSERT();
 
 	imo.imo_multicast_ifp  = vifp->v_ifp;
 	imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
 	imo.imo_multicast_loop = 1;
 	imo.imo_multicast_vif  = -1;
 	imo.imo_num_memberships = 0;
 	imo.imo_max_memberships = 2;
 	imo.imo_membership  = &imm[0];
 
 	/*
 	 * Re-entrancy should not be a problem here, because
 	 * the packets that we send out and are looped back at us
 	 * should get rejected because they appear to come from
 	 * the loopback interface, thus preventing looping.
 	 */
 	error = ip_output(m, NULL, NULL, IP_FORWARDING, &imo, NULL);
 	CTR3(KTR_IPMF, "%s: vif %td err %d", __func__,
 	    (ptrdiff_t)(vifp - V_viftable), error);
 }
 
 /*
  * Stubs for old RSVP socket shim implementation.
  */
 
 static int
 X_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused)
 {
 
 	return (EOPNOTSUPP);
 }
 
 static void
 X_ip_rsvp_force_done(struct socket *so __unused)
 {
 
 }
 
 static int
 X_rsvp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m;
 
 	m = *mp;
 	*mp = NULL;
 	if (!V_rsvp_on)
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 /*
  * Code for bandwidth monitors
  */
 
 /*
  * Define common interface for timeval-related methods
  */
 #define	BW_TIMEVALCMP(tvp, uvp, cmp) timevalcmp((tvp), (uvp), cmp)
 #define	BW_TIMEVALDECR(vvp, uvp) timevalsub((vvp), (uvp))
 #define	BW_TIMEVALADD(vvp, uvp) timevaladd((vvp), (uvp))
 
 static uint32_t
 compute_bw_meter_flags(struct bw_upcall *req)
 {
     uint32_t flags = 0;
 
     if (req->bu_flags & BW_UPCALL_UNIT_PACKETS)
 	flags |= BW_METER_UNIT_PACKETS;
     if (req->bu_flags & BW_UPCALL_UNIT_BYTES)
 	flags |= BW_METER_UNIT_BYTES;
     if (req->bu_flags & BW_UPCALL_GEQ)
 	flags |= BW_METER_GEQ;
     if (req->bu_flags & BW_UPCALL_LEQ)
 	flags |= BW_METER_LEQ;
 
     return flags;
 }
 
 /*
  * Add a bw_meter entry
  */
 static int
 add_bw_upcall(struct bw_upcall *req)
 {
     struct mfc *mfc;
     struct timeval delta = { BW_UPCALL_THRESHOLD_INTERVAL_MIN_SEC,
 		BW_UPCALL_THRESHOLD_INTERVAL_MIN_USEC };
     struct timeval now;
     struct bw_meter *x;
     uint32_t flags;
 
     if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
 	return EOPNOTSUPP;
 
     /* Test if the flags are valid */
     if (!(req->bu_flags & (BW_UPCALL_UNIT_PACKETS | BW_UPCALL_UNIT_BYTES)))
 	return EINVAL;
     if (!(req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ)))
 	return EINVAL;
     if ((req->bu_flags & (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
 	    == (BW_UPCALL_GEQ | BW_UPCALL_LEQ))
 	return EINVAL;
 
     /* Test if the threshold time interval is valid */
     if (BW_TIMEVALCMP(&req->bu_threshold.b_time, &delta, <))
 	return EINVAL;
 
     flags = compute_bw_meter_flags(req);
 
     /*
      * Find if we have already same bw_meter entry
      */
     MFC_LOCK();
     mfc = mfc_find(&req->bu_src, &req->bu_dst);
     if (mfc == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     }
     for (x = mfc->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) {
 	if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
 			   &req->bu_threshold.b_time, ==)) &&
 	    (x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
 	    (x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
 	    (x->bm_flags & BW_METER_USER_FLAGS) == flags)  {
 	    MFC_UNLOCK();
 	    return 0;		/* XXX Already installed */
 	}
     }
 
     /* Allocate the new bw_meter entry */
     x = (struct bw_meter *)malloc(sizeof(*x), M_BWMETER, M_NOWAIT);
     if (x == NULL) {
 	MFC_UNLOCK();
 	return ENOBUFS;
     }
 
     /* Set the new bw_meter entry */
     x->bm_threshold.b_time = req->bu_threshold.b_time;
     microtime(&now);
     x->bm_start_time = now;
     x->bm_threshold.b_packets = req->bu_threshold.b_packets;
     x->bm_threshold.b_bytes = req->bu_threshold.b_bytes;
     x->bm_measured.b_packets = 0;
     x->bm_measured.b_bytes = 0;
     x->bm_flags = flags;
     x->bm_time_next = NULL;
     x->bm_time_hash = BW_METER_BUCKETS;
 
     /* Add the new bw_meter entry to the front of entries for this MFC */
     x->bm_mfc = mfc;
     x->bm_mfc_next = mfc->mfc_bw_meter;
     mfc->mfc_bw_meter = x;
     schedule_bw_meter(x, &now);
     MFC_UNLOCK();
 
     return 0;
 }
 
 static void
 free_bw_list(struct bw_meter *list)
 {
     while (list != NULL) {
 	struct bw_meter *x = list;
 
 	list = list->bm_mfc_next;
 	unschedule_bw_meter(x);
 	free(x, M_BWMETER);
     }
 }
 
 /*
  * Delete one or multiple bw_meter entries
  */
 static int
 del_bw_upcall(struct bw_upcall *req)
 {
     struct mfc *mfc;
     struct bw_meter *x;
 
     if (!(V_mrt_api_config & MRT_MFC_BW_UPCALL))
 	return EOPNOTSUPP;
 
     MFC_LOCK();
 
     /* Find the corresponding MFC entry */
     mfc = mfc_find(&req->bu_src, &req->bu_dst);
     if (mfc == NULL) {
 	MFC_UNLOCK();
 	return EADDRNOTAVAIL;
     } else if (req->bu_flags & BW_UPCALL_DELETE_ALL) {
 	/*
 	 * Delete all bw_meter entries for this mfc
 	 */
 	struct bw_meter *list;
 
 	list = mfc->mfc_bw_meter;
 	mfc->mfc_bw_meter = NULL;
 	free_bw_list(list);
 	MFC_UNLOCK();
 	return 0;
     } else {			/* Delete a single bw_meter entry */
 	struct bw_meter *prev;
 	uint32_t flags = 0;
 
 	flags = compute_bw_meter_flags(req);
 
 	/* Find the bw_meter entry to delete */
 	for (prev = NULL, x = mfc->mfc_bw_meter; x != NULL;
 	     prev = x, x = x->bm_mfc_next) {
 	    if ((BW_TIMEVALCMP(&x->bm_threshold.b_time,
 			       &req->bu_threshold.b_time, ==)) &&
 		(x->bm_threshold.b_packets == req->bu_threshold.b_packets) &&
 		(x->bm_threshold.b_bytes == req->bu_threshold.b_bytes) &&
 		(x->bm_flags & BW_METER_USER_FLAGS) == flags)
 		break;
 	}
 	if (x != NULL) { /* Delete entry from the list for this MFC */
 	    if (prev != NULL)
 		prev->bm_mfc_next = x->bm_mfc_next;	/* remove from middle*/
 	    else
 		x->bm_mfc->mfc_bw_meter = x->bm_mfc_next;/* new head of list */
 
 	    unschedule_bw_meter(x);
 	    MFC_UNLOCK();
 	    /* Free the bw_meter entry */
 	    free(x, M_BWMETER);
 	    return 0;
 	} else {
 	    MFC_UNLOCK();
 	    return EINVAL;
 	}
     }
     /* NOTREACHED */
 }
 
 /*
  * Perform bandwidth measurement processing that may result in an upcall
  */
 static void
 bw_meter_receive_packet(struct bw_meter *x, int plen, struct timeval *nowp)
 {
     struct timeval delta;
 
     MFC_LOCK_ASSERT();
 
     delta = *nowp;
     BW_TIMEVALDECR(&delta, &x->bm_start_time);
 
     if (x->bm_flags & BW_METER_GEQ) {
 	/*
 	 * Processing for ">=" type of bw_meter entry
 	 */
 	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
 	    /* Reset the bw_meter entry */
 	    x->bm_start_time = *nowp;
 	    x->bm_measured.b_packets = 0;
 	    x->bm_measured.b_bytes = 0;
 	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 	}
 
 	/* Record that a packet is received */
 	x->bm_measured.b_packets++;
 	x->bm_measured.b_bytes += plen;
 
 	/*
 	 * Test if we should deliver an upcall
 	 */
 	if (!(x->bm_flags & BW_METER_UPCALL_DELIVERED)) {
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets >= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes >= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, nowp);
 		x->bm_flags |= BW_METER_UPCALL_DELIVERED;
 	    }
 	}
     } else if (x->bm_flags & BW_METER_LEQ) {
 	/*
 	 * Processing for "<=" type of bw_meter entry
 	 */
 	if (BW_TIMEVALCMP(&delta, &x->bm_threshold.b_time, >)) {
 	    /*
 	     * We are behind time with the multicast forwarding table
 	     * scanning for "<=" type of bw_meter entries, so test now
 	     * if we should deliver an upcall.
 	     */
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, nowp);
 	    }
 	    /* Reschedule the bw_meter entry */
 	    unschedule_bw_meter(x);
 	    schedule_bw_meter(x, nowp);
 	}
 
 	/* Record that a packet is received */
 	x->bm_measured.b_packets++;
 	x->bm_measured.b_bytes += plen;
 
 	/*
 	 * Test if we should restart the measuring interval
 	 */
 	if ((x->bm_flags & BW_METER_UNIT_PACKETS &&
 	     x->bm_measured.b_packets <= x->bm_threshold.b_packets) ||
 	    (x->bm_flags & BW_METER_UNIT_BYTES &&
 	     x->bm_measured.b_bytes <= x->bm_threshold.b_bytes)) {
 	    /* Don't restart the measuring interval */
 	} else {
 	    /* Do restart the measuring interval */
 	    /*
 	     * XXX: note that we don't unschedule and schedule, because this
 	     * might be too much overhead per packet. Instead, when we process
 	     * all entries for a given timer hash bin, we check whether it is
 	     * really a timeout. If not, we reschedule at that time.
 	     */
 	    x->bm_start_time = *nowp;
 	    x->bm_measured.b_packets = 0;
 	    x->bm_measured.b_bytes = 0;
 	    x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 	}
     }
 }
 
 /*
  * Prepare a bandwidth-related upcall
  */
 static void
 bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp)
 {
     struct timeval delta;
     struct bw_upcall *u;
 
     MFC_LOCK_ASSERT();
 
     /*
      * Compute the measured time interval
      */
     delta = *nowp;
     BW_TIMEVALDECR(&delta, &x->bm_start_time);
 
     /*
      * If there are too many pending upcalls, deliver them now
      */
     if (V_bw_upcalls_n >= BW_UPCALLS_MAX)
 	bw_upcalls_send();
 
     /*
      * Set the bw_upcall entry
      */
     u = &V_bw_upcalls[V_bw_upcalls_n++];
     u->bu_src = x->bm_mfc->mfc_origin;
     u->bu_dst = x->bm_mfc->mfc_mcastgrp;
     u->bu_threshold.b_time = x->bm_threshold.b_time;
     u->bu_threshold.b_packets = x->bm_threshold.b_packets;
     u->bu_threshold.b_bytes = x->bm_threshold.b_bytes;
     u->bu_measured.b_time = delta;
     u->bu_measured.b_packets = x->bm_measured.b_packets;
     u->bu_measured.b_bytes = x->bm_measured.b_bytes;
     u->bu_flags = 0;
     if (x->bm_flags & BW_METER_UNIT_PACKETS)
 	u->bu_flags |= BW_UPCALL_UNIT_PACKETS;
     if (x->bm_flags & BW_METER_UNIT_BYTES)
 	u->bu_flags |= BW_UPCALL_UNIT_BYTES;
     if (x->bm_flags & BW_METER_GEQ)
 	u->bu_flags |= BW_UPCALL_GEQ;
     if (x->bm_flags & BW_METER_LEQ)
 	u->bu_flags |= BW_UPCALL_LEQ;
 }
 
 /*
  * Send the pending bandwidth-related upcalls
  */
 static void
 bw_upcalls_send(void)
 {
     struct mbuf *m;
     int len = V_bw_upcalls_n * sizeof(V_bw_upcalls[0]);
     struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
     static struct igmpmsg igmpmsg = { 0,		/* unused1 */
 				      0,		/* unused2 */
 				      IGMPMSG_BW_UPCALL,/* im_msgtype */
 				      0,		/* im_mbz  */
 				      0,		/* im_vif  */
 				      0,		/* unused3 */
 				      { 0 },		/* im_src  */
 				      { 0 } };		/* im_dst  */
 
     MFC_LOCK_ASSERT();
 
     if (V_bw_upcalls_n == 0)
 	return;			/* No pending upcalls */
 
     V_bw_upcalls_n = 0;
 
     /*
      * Allocate a new mbuf, initialize it with the header and
      * the payload for the pending calls.
      */
     m = m_gethdr(M_NOWAIT, MT_DATA);
     if (m == NULL) {
 	log(LOG_WARNING, "bw_upcalls_send: cannot allocate mbuf\n");
 	return;
     }
 
     m_copyback(m, 0, sizeof(struct igmpmsg), (caddr_t)&igmpmsg);
     m_copyback(m, sizeof(struct igmpmsg), len, (caddr_t)&V_bw_upcalls[0]);
 
     /*
      * Send the upcalls
      * XXX do we need to set the address in k_igmpsrc ?
      */
     MRTSTAT_INC(mrts_upcalls);
     if (socket_send(V_ip_mrouter, m, &k_igmpsrc) < 0) {
 	log(LOG_WARNING, "bw_upcalls_send: ip_mrouter socket queue full\n");
 	MRTSTAT_INC(mrts_upq_sockfull);
     }
 }
 
 /*
  * Compute the timeout hash value for the bw_meter entries
  */
 #define	BW_METER_TIMEHASH(bw_meter, hash)				\
     do {								\
 	struct timeval next_timeval = (bw_meter)->bm_start_time;	\
 									\
 	BW_TIMEVALADD(&next_timeval, &(bw_meter)->bm_threshold.b_time); \
 	(hash) = next_timeval.tv_sec;					\
 	if (next_timeval.tv_usec)					\
 	    (hash)++; /* XXX: make sure we don't timeout early */	\
 	(hash) %= BW_METER_BUCKETS;					\
     } while (0)
 
 /*
  * Schedule a timer to process periodically bw_meter entry of type "<="
  * by linking the entry in the proper hash bucket.
  */
 static void
 schedule_bw_meter(struct bw_meter *x, struct timeval *nowp)
 {
     int time_hash;
 
     MFC_LOCK_ASSERT();
 
     if (!(x->bm_flags & BW_METER_LEQ))
 	return;		/* XXX: we schedule timers only for "<=" entries */
 
     /*
      * Reset the bw_meter entry
      */
     x->bm_start_time = *nowp;
     x->bm_measured.b_packets = 0;
     x->bm_measured.b_bytes = 0;
     x->bm_flags &= ~BW_METER_UPCALL_DELIVERED;
 
     /*
      * Compute the timeout hash value and insert the entry
      */
     BW_METER_TIMEHASH(x, time_hash);
     x->bm_time_next = V_bw_meter_timers[time_hash];
     V_bw_meter_timers[time_hash] = x;
     x->bm_time_hash = time_hash;
 }
 
 /*
  * Unschedule the periodic timer that processes bw_meter entry of type "<="
  * by removing the entry from the proper hash bucket.
  */
 static void
 unschedule_bw_meter(struct bw_meter *x)
 {
     int time_hash;
     struct bw_meter *prev, *tmp;
 
     MFC_LOCK_ASSERT();
 
     if (!(x->bm_flags & BW_METER_LEQ))
 	return;		/* XXX: we schedule timers only for "<=" entries */
 
     /*
      * Compute the timeout hash value and delete the entry
      */
     time_hash = x->bm_time_hash;
     if (time_hash >= BW_METER_BUCKETS)
 	return;		/* Entry was not scheduled */
 
     for (prev = NULL, tmp = V_bw_meter_timers[time_hash];
 	     tmp != NULL; prev = tmp, tmp = tmp->bm_time_next)
 	if (tmp == x)
 	    break;
 
     if (tmp == NULL)
 	panic("unschedule_bw_meter: bw_meter entry not found");
 
     if (prev != NULL)
 	prev->bm_time_next = x->bm_time_next;
     else
 	V_bw_meter_timers[time_hash] = x->bm_time_next;
 
     x->bm_time_next = NULL;
     x->bm_time_hash = BW_METER_BUCKETS;
 }
 
 
 /*
  * Process all "<=" type of bw_meter that should be processed now,
  * and for each entry prepare an upcall if necessary. Each processed
  * entry is rescheduled again for the (periodic) processing.
  *
  * This is run periodically (once per second normally). On each round,
  * all the potentially matching entries are in the hash slot that we are
  * looking at.
  */
 static void
 bw_meter_process()
 {
     uint32_t loops;
     int i;
     struct timeval now, process_endtime;
 
     microtime(&now);
     if (V_last_tv_sec == now.tv_sec)
 	return;		/* nothing to do */
 
     loops = now.tv_sec - V_last_tv_sec;
     V_last_tv_sec = now.tv_sec;
     if (loops > BW_METER_BUCKETS)
 	loops = BW_METER_BUCKETS;
 
     MFC_LOCK();
     /*
      * Process all bins of bw_meter entries from the one after the last
      * processed to the current one. On entry, i points to the last bucket
      * visited, so we need to increment i at the beginning of the loop.
      */
     for (i = (now.tv_sec - loops) % BW_METER_BUCKETS; loops > 0; loops--) {
 	struct bw_meter *x, *tmp_list;
 
 	if (++i >= BW_METER_BUCKETS)
 	    i = 0;
 
 	/* Disconnect the list of bw_meter entries from the bin */
 	tmp_list = V_bw_meter_timers[i];
 	V_bw_meter_timers[i] = NULL;
 
 	/* Process the list of bw_meter entries */
 	while (tmp_list != NULL) {
 	    x = tmp_list;
 	    tmp_list = tmp_list->bm_time_next;
 
 	    /* Test if the time interval is over */
 	    process_endtime = x->bm_start_time;
 	    BW_TIMEVALADD(&process_endtime, &x->bm_threshold.b_time);
 	    if (BW_TIMEVALCMP(&process_endtime, &now, >)) {
 		/* Not yet: reschedule, but don't reset */
 		int time_hash;
 
 		BW_METER_TIMEHASH(x, time_hash);
 		if (time_hash == i && process_endtime.tv_sec == now.tv_sec) {
 		    /*
 		     * XXX: somehow the bin processing is a bit ahead of time.
 		     * Put the entry in the next bin.
 		     */
 		    if (++time_hash >= BW_METER_BUCKETS)
 			time_hash = 0;
 		}
 		x->bm_time_next = V_bw_meter_timers[time_hash];
 		V_bw_meter_timers[time_hash] = x;
 		x->bm_time_hash = time_hash;
 
 		continue;
 	    }
 
 	    /*
 	     * Test if we should deliver an upcall
 	     */
 	    if (((x->bm_flags & BW_METER_UNIT_PACKETS) &&
 		 (x->bm_measured.b_packets <= x->bm_threshold.b_packets)) ||
 		((x->bm_flags & BW_METER_UNIT_BYTES) &&
 		 (x->bm_measured.b_bytes <= x->bm_threshold.b_bytes))) {
 		/* Prepare an upcall for delivery */
 		bw_meter_prepare_upcall(x, &now);
 	    }
 
 	    /*
 	     * Reschedule for next processing
 	     */
 	    schedule_bw_meter(x, &now);
 	}
     }
 
     /* Send all upcalls that are pending delivery */
     bw_upcalls_send();
 
     MFC_UNLOCK();
 }
 
 /*
  * A periodic function for sending all upcalls that are pending delivery
  */
 static void
 expire_bw_upcalls_send(void *arg)
 {
     CURVNET_SET((struct vnet *) arg);
 
     MFC_LOCK();
     bw_upcalls_send();
     MFC_UNLOCK();
 
     callout_reset(&V_bw_upcalls_ch, BW_UPCALLS_PERIOD, expire_bw_upcalls_send,
 	curvnet);
     CURVNET_RESTORE();
 }
 
 /*
  * A periodic function for periodic scanning of the multicast forwarding
  * table for processing all "<=" bw_meter entries.
  */
 static void
 expire_bw_meter_process(void *arg)
 {
     CURVNET_SET((struct vnet *) arg);
 
     if (V_mrt_api_config & MRT_MFC_BW_UPCALL)
 	bw_meter_process();
 
     callout_reset(&V_bw_meter_ch, BW_METER_PERIOD, expire_bw_meter_process,
 	curvnet);
     CURVNET_RESTORE();
 }
 
 /*
  * End of bandwidth monitoring code
  */
 
 /*
  * Send the packet up to the user daemon, or eventually do kernel encapsulation
  *
  */
 static int
 pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m,
     struct mfc *rt)
 {
     struct mbuf *mb_copy, *mm;
 
     /*
      * Do not send IGMP_WHOLEPKT notifications to userland, if the
      * rendezvous point was unspecified, and we were told not to.
      */
     if (pim_squelch_wholepkt != 0 && (V_mrt_api_config & MRT_MFC_RP) &&
 	in_nullhost(rt->mfc_rp))
 	return 0;
 
     mb_copy = pim_register_prepare(ip, m);
     if (mb_copy == NULL)
 	return ENOBUFS;
 
     /*
      * Send all the fragments. Note that the mbuf for each fragment
      * is freed by the sending machinery.
      */
     for (mm = mb_copy; mm; mm = mb_copy) {
 	mb_copy = mm->m_nextpkt;
 	mm->m_nextpkt = 0;
 	mm = m_pullup(mm, sizeof(struct ip));
 	if (mm != NULL) {
 	    ip = mtod(mm, struct ip *);
 	    if ((V_mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) {
 		pim_register_send_rp(ip, vifp, mm, rt);
 	    } else {
 		pim_register_send_upcall(ip, vifp, mm, rt);
 	    }
 	}
     }
 
     return 0;
 }
 
 /*
  * Return a copy of the data packet that is ready for PIM Register
  * encapsulation.
  * XXX: Note that in the returned copy the IP header is a valid one.
  */
 static struct mbuf *
 pim_register_prepare(struct ip *ip, struct mbuf *m)
 {
     struct mbuf *mb_copy = NULL;
     int mtu;
 
     /* Take care of delayed checksums */
     if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 	in_delayed_cksum(m);
 	m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
     }
 
     /*
      * Copy the old packet & pullup its IP header into the
      * new mbuf so we can modify it.
      */
     mb_copy = m_copypacket(m, M_NOWAIT);
     if (mb_copy == NULL)
 	return NULL;
     mb_copy = m_pullup(mb_copy, ip->ip_hl << 2);
     if (mb_copy == NULL)
 	return NULL;
 
     /* take care of the TTL */
     ip = mtod(mb_copy, struct ip *);
     --ip->ip_ttl;
 
     /* Compute the MTU after the PIM Register encapsulation */
     mtu = 0xffff - sizeof(pim_encap_iphdr) - sizeof(pim_encap_pimhdr);
 
     if (ntohs(ip->ip_len) <= mtu) {
 	/* Turn the IP header into a valid one */
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
     } else {
 	/* Fragment the packet */
 	mb_copy->m_pkthdr.csum_flags |= CSUM_IP;
 	if (ip_fragment(ip, &mb_copy, mtu, 0) != 0) {
 	    m_freem(mb_copy);
 	    return NULL;
 	}
     }
     return mb_copy;
 }
 
 /*
  * Send an upcall with the data packet to the user-level process.
  */
 static int
 pim_register_send_upcall(struct ip *ip, struct vif *vifp,
     struct mbuf *mb_copy, struct mfc *rt)
 {
     struct mbuf *mb_first;
     int len = ntohs(ip->ip_len);
     struct igmpmsg *im;
     struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET };
 
     VIF_LOCK_ASSERT();
 
     /*
      * Add a new mbuf with an upcall header
      */
     mb_first = m_gethdr(M_NOWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
     }
     mb_first->m_data += max_linkhdr;
     mb_first->m_pkthdr.len = len + sizeof(struct igmpmsg);
     mb_first->m_len = sizeof(struct igmpmsg);
     mb_first->m_next = mb_copy;
 
     /* Send message to routing daemon */
     im = mtod(mb_first, struct igmpmsg *);
     im->im_msgtype	= IGMPMSG_WHOLEPKT;
     im->im_mbz		= 0;
     im->im_vif		= vifp - V_viftable;
     im->im_src		= ip->ip_src;
     im->im_dst		= ip->ip_dst;
 
     k_igmpsrc.sin_addr	= ip->ip_src;
 
     MRTSTAT_INC(mrts_upcalls);
 
     if (socket_send(V_ip_mrouter, mb_first, &k_igmpsrc) < 0) {
 	CTR1(KTR_IPMF, "%s: socket queue full", __func__);
 	MRTSTAT_INC(mrts_upq_sockfull);
 	return ENOBUFS;
     }
 
     /* Keep statistics */
     PIMSTAT_INC(pims_snd_registers_msgs);
     PIMSTAT_ADD(pims_snd_registers_bytes, len);
 
     return 0;
 }
 
 /*
  * Encapsulate the data packet in PIM Register message and send it to the RP.
  */
 static int
 pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy,
     struct mfc *rt)
 {
     struct mbuf *mb_first;
     struct ip *ip_outer;
     struct pim_encap_pimhdr *pimhdr;
     int len = ntohs(ip->ip_len);
     vifi_t vifi = rt->mfc_parent;
 
     VIF_LOCK_ASSERT();
 
     if ((vifi >= V_numvifs) || in_nullhost(V_viftable[vifi].v_lcl_addr)) {
 	m_freem(mb_copy);
 	return EADDRNOTAVAIL;		/* The iif vif is invalid */
     }
 
     /*
      * Add a new mbuf with the encapsulating header
      */
     mb_first = m_gethdr(M_NOWAIT, MT_DATA);
     if (mb_first == NULL) {
 	m_freem(mb_copy);
 	return ENOBUFS;
     }
     mb_first->m_data += max_linkhdr;
     mb_first->m_len = sizeof(pim_encap_iphdr) + sizeof(pim_encap_pimhdr);
     mb_first->m_next = mb_copy;
 
     mb_first->m_pkthdr.len = len + mb_first->m_len;
 
     /*
      * Fill in the encapsulating IP and PIM header
      */
     ip_outer = mtod(mb_first, struct ip *);
     *ip_outer = pim_encap_iphdr;
     ip_outer->ip_len = htons(len + sizeof(pim_encap_iphdr) +
 	sizeof(pim_encap_pimhdr));
     ip_outer->ip_src = V_viftable[vifi].v_lcl_addr;
     ip_outer->ip_dst = rt->mfc_rp;
     /*
      * Copy the inner header TOS to the outer header, and take care of the
      * IP_DF bit.
      */
     ip_outer->ip_tos = ip->ip_tos;
     if (ip->ip_off & htons(IP_DF))
 	ip_outer->ip_off |= htons(IP_DF);
     ip_fillid(ip_outer);
     pimhdr = (struct pim_encap_pimhdr *)((caddr_t)ip_outer
 					 + sizeof(pim_encap_iphdr));
     *pimhdr = pim_encap_pimhdr;
     /* If the iif crosses a border, set the Border-bit */
     if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_BORDER_VIF & V_mrt_api_config)
 	pimhdr->flags |= htonl(PIM_BORDER_REGISTER);
 
     mb_first->m_data += sizeof(pim_encap_iphdr);
     pimhdr->pim.pim_cksum = in_cksum(mb_first, sizeof(pim_encap_pimhdr));
     mb_first->m_data -= sizeof(pim_encap_iphdr);
 
     send_packet(vifp, mb_first);
 
     /* Keep statistics */
     PIMSTAT_INC(pims_snd_registers_msgs);
     PIMSTAT_ADD(pims_snd_registers_bytes, len);
 
     return 0;
 }
 
 /*
  * pim_encapcheck() is called by the encap4_input() path at runtime to
  * determine if a packet is for PIM; allowing PIM to be dynamically loaded
  * into the kernel.
  */
 static int
 pim_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
 {
 
 #ifdef DIAGNOSTIC
     KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM"));
 #endif
     if (proto != IPPROTO_PIM)
 	return 0;	/* not for us; reject the datagram. */
 
     return 64;		/* claim the datagram. */
 }
 
 /*
  * PIM-SMv2 and PIM-DM messages processing.
  * Receives and verifies the PIM control messages, and passes them
  * up to the listening socket, using rip_input().
  * The only message with special processing is the PIM_REGISTER message
  * (used by PIM-SM): the PIM header is stripped off, and the inner packet
  * is passed to if_simloop().
  */
 int
 pim_input(struct mbuf **mp, int *offp, int proto)
 {
     struct mbuf *m = *mp;
     struct ip *ip = mtod(m, struct ip *);
     struct pim *pim;
     int iphlen = *offp;
     int minlen;
     int datalen = ntohs(ip->ip_len) - iphlen;
     int ip_tos;
 
     *mp = NULL;
 
     /* Keep statistics */
     PIMSTAT_INC(pims_rcv_total_msgs);
     PIMSTAT_ADD(pims_rcv_total_bytes, datalen);
 
     /*
      * Validate lengths
      */
     if (datalen < PIM_MINLEN) {
 	PIMSTAT_INC(pims_rcv_tooshort);
 	CTR3(KTR_IPMF, "%s: short packet (%d) from %s",
 	    __func__, datalen, inet_ntoa(ip->ip_src));
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /*
      * If the packet is at least as big as a REGISTER, go agead
      * and grab the PIM REGISTER header size, to avoid another
      * possible m_pullup() later.
      *
      * PIM_MINLEN       == pimhdr + u_int32_t == 4 + 4 = 8
      * PIM_REG_MINLEN   == pimhdr + reghdr + encap_iphdr == 4 + 4 + 20 = 28
      */
     minlen = iphlen + (datalen >= PIM_REG_MINLEN ? PIM_REG_MINLEN : PIM_MINLEN);
     /*
      * Get the IP and PIM headers in contiguous memory, and
      * possibly the PIM REGISTER header.
      */
     if (m->m_len < minlen && (m = m_pullup(m, minlen)) == 0) {
 	CTR1(KTR_IPMF, "%s: m_pullup() failed", __func__);
 	return (IPPROTO_DONE);
     }
 
     /* m_pullup() may have given us a new mbuf so reset ip. */
     ip = mtod(m, struct ip *);
     ip_tos = ip->ip_tos;
 
     /* adjust mbuf to point to the PIM header */
     m->m_data += iphlen;
     m->m_len  -= iphlen;
     pim = mtod(m, struct pim *);
 
     /*
      * Validate checksum. If PIM REGISTER, exclude the data packet.
      *
      * XXX: some older PIMv2 implementations don't make this distinction,
      * so for compatibility reason perform the checksum over part of the
      * message, and if error, then over the whole message.
      */
     if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER && in_cksum(m, PIM_MINLEN) == 0) {
 	/* do nothing, checksum okay */
     } else if (in_cksum(m, datalen)) {
 	PIMSTAT_INC(pims_rcv_badsum);
 	CTR1(KTR_IPMF, "%s: invalid checksum", __func__);
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /* PIM version check */
     if (PIM_VT_V(pim->pim_vt) < PIM_VERSION) {
 	PIMSTAT_INC(pims_rcv_badversion);
 	CTR3(KTR_IPMF, "%s: bad version %d expect %d", __func__,
 	    (int)PIM_VT_V(pim->pim_vt), PIM_VERSION);
 	m_freem(m);
 	return (IPPROTO_DONE);
     }
 
     /* restore mbuf back to the outer IP */
     m->m_data -= iphlen;
     m->m_len  += iphlen;
 
     if (PIM_VT_T(pim->pim_vt) == PIM_REGISTER) {
 	/*
 	 * Since this is a REGISTER, we'll make a copy of the register
 	 * headers ip + pim + u_int32 + encap_ip, to be passed up to the
 	 * routing daemon.
 	 */
 	struct sockaddr_in dst = { sizeof(dst), AF_INET };
 	struct mbuf *mcp;
 	struct ip *encap_ip;
 	u_int32_t *reghdr;
 	struct ifnet *vifp;
 
 	VIF_LOCK();
 	if ((V_reg_vif_num >= V_numvifs) || (V_reg_vif_num == VIFI_INVALID)) {
 	    VIF_UNLOCK();
 	    CTR2(KTR_IPMF, "%s: register vif not set: %d", __func__,
 		(int)V_reg_vif_num);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 	/* XXX need refcnt? */
 	vifp = V_viftable[V_reg_vif_num].v_ifp;
 	VIF_UNLOCK();
 
 	/*
 	 * Validate length
 	 */
 	if (datalen < PIM_REG_MINLEN) {
 	    PIMSTAT_INC(pims_rcv_tooshort);
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR1(KTR_IPMF, "%s: register packet size too small", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	reghdr = (u_int32_t *)(pim + 1);
 	encap_ip = (struct ip *)(reghdr + 1);
 
 	CTR3(KTR_IPMF, "%s: register: encap ip src %s len %d",
 	    __func__, inet_ntoa(encap_ip->ip_src), ntohs(encap_ip->ip_len));
 
 	/* verify the version number of the inner packet */
 	if (encap_ip->ip_v != IPVERSION) {
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR1(KTR_IPMF, "%s: bad encap ip version", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* verify the inner packet is destined to a mcast group */
 	if (!IN_MULTICAST(ntohl(encap_ip->ip_dst.s_addr))) {
 	    PIMSTAT_INC(pims_rcv_badregisters);
 	    CTR2(KTR_IPMF, "%s: bad encap ip dest %s", __func__,
 		inet_ntoa(encap_ip->ip_dst));
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* If a NULL_REGISTER, pass it to the daemon */
 	if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
 	    goto pim_input_to_daemon;
 
 	/*
 	 * Copy the TOS from the outer IP header to the inner IP header.
 	 */
 	if (encap_ip->ip_tos != ip_tos) {
 	    /* Outer TOS -> inner TOS */
 	    encap_ip->ip_tos = ip_tos;
 	    /* Recompute the inner header checksum. Sigh... */
 
 	    /* adjust mbuf to point to the inner IP header */
 	    m->m_data += (iphlen + PIM_MINLEN);
 	    m->m_len  -= (iphlen + PIM_MINLEN);
 
 	    encap_ip->ip_sum = 0;
 	    encap_ip->ip_sum = in_cksum(m, encap_ip->ip_hl << 2);
 
 	    /* restore mbuf to point back to the outer IP header */
 	    m->m_data -= (iphlen + PIM_MINLEN);
 	    m->m_len  += (iphlen + PIM_MINLEN);
 	}
 
 	/*
 	 * Decapsulate the inner IP packet and loopback to forward it
 	 * as a normal multicast packet. Also, make a copy of the
 	 *     outer_iphdr + pimhdr + reghdr + encap_iphdr
 	 * to pass to the daemon later, so it can take the appropriate
 	 * actions (e.g., send back PIM_REGISTER_STOP).
 	 * XXX: here m->m_data points to the outer IP header.
 	 */
 	mcp = m_copy(m, 0, iphlen + PIM_REG_MINLEN);
 	if (mcp == NULL) {
 	    CTR1(KTR_IPMF, "%s: m_copy() failed", __func__);
 	    m_freem(m);
 	    return (IPPROTO_DONE);
 	}
 
 	/* Keep statistics */
 	/* XXX: registers_bytes include only the encap. mcast pkt */
 	PIMSTAT_INC(pims_rcv_registers_msgs);
 	PIMSTAT_ADD(pims_rcv_registers_bytes, ntohs(encap_ip->ip_len));
 
 	/*
 	 * forward the inner ip packet; point m_data at the inner ip.
 	 */
 	m_adj(m, iphlen + PIM_MINLEN);
 
 	CTR4(KTR_IPMF,
 	    "%s: forward decap'd REGISTER: src %lx dst %lx vif %d",
 	    __func__,
 	    (u_long)ntohl(encap_ip->ip_src.s_addr),
 	    (u_long)ntohl(encap_ip->ip_dst.s_addr),
 	    (int)V_reg_vif_num);
 
 	/* NB: vifp was collected above; can it change on us? */
 	if_simloop(vifp, m, dst.sin_family, 0);
 
 	/* prepare the register head to send to the mrouting daemon */
 	m = mcp;
     }
 
 pim_input_to_daemon:
     /*
      * Pass the PIM message up to the daemon; if it is a Register message,
      * pass the 'head' only up to the daemon. This includes the
      * outer IP header, PIM header, PIM-Register header and the
      * inner IP header.
      * XXX: the outer IP header pkt size of a Register is not adjust to
      * reflect the fact that the inner multicast data is truncated.
      */
     *mp = m;
     rip_input(mp, offp, proto);
 
     return (IPPROTO_DONE);
 }
 
 static int
 sysctl_mfctable(SYSCTL_HANDLER_ARGS)
 {
 	struct mfc	*rt;
 	int		 error, i;
 
 	if (req->newptr)
 		return (EPERM);
 	if (V_mfchashtbl == NULL)	/* XXX unlocked */
 		return (0);
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error)
 		return (error);
 
 	MFC_LOCK();
 	for (i = 0; i < mfchashsize; i++) {
 		LIST_FOREACH(rt, &V_mfchashtbl[i], mfc_hash) {
 			error = SYSCTL_OUT(req, rt, sizeof(struct mfc));
 			if (error)
 				goto out_locked;
 		}
 	}
 out_locked:
 	MFC_UNLOCK();
 	return (error);
 }
 
 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD,
     sysctl_mfctable, "IPv4 Multicast Forwarding Table "
     "(struct *mfc[mfchashsize], netinet/ip_mroute.h)");
 
 static void
 vnet_mroute_init(const void *unused __unused)
 {
 
 	MALLOC(V_nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO);
 	bzero(V_bw_meter_timers, sizeof(V_bw_meter_timers));
 	callout_init(&V_expire_upcalls_ch, 1);
 	callout_init(&V_bw_upcalls_ch, 1);
 	callout_init(&V_bw_meter_ch, 1);
 }
 
 VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init,
 	NULL);
 
 static void
 vnet_mroute_uninit(const void *unused __unused)
 {
 
 	FREE(V_nexpire, M_MRTABLE);
 	V_nexpire = NULL;
 }
 
 VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, 
 	vnet_mroute_uninit, NULL);
 
 static int
 ip_mroute_modevent(module_t mod, int type, void *unused)
 {
 
     switch (type) {
     case MOD_LOAD:
 	MROUTER_LOCK_INIT();
 
 	if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, 
 	    if_detached_event, NULL, EVENTHANDLER_PRI_ANY);
 	if (if_detach_event_tag == NULL) {
 		printf("ip_mroute: unable to register "
 		    "ifnet_departure_event handler\n");
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}
 
 	MFC_LOCK_INIT();
 	VIF_LOCK_INIT();
 
 	mfchashsize = MFCHASHSIZE;
 	if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) &&
 	    !powerof2(mfchashsize)) {
 		printf("WARNING: %s not a power of 2; using default\n",
 		    "net.inet.ip.mfchashsize");
 		mfchashsize = MFCHASHSIZE;
 	}
 
 	pim_squelch_wholepkt = 0;
 	TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt",
 	    &pim_squelch_wholepkt);
 
 	pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM,
 	    pim_encapcheck, &in_pim_protosw, NULL);
 	if (pim_encap_cookie == NULL) {
 		printf("ip_mroute: unable to attach pim encap\n");
 		VIF_LOCK_DESTROY();
 		MFC_LOCK_DESTROY();
 		MROUTER_LOCK_DESTROY();
 		return (EINVAL);
 	}
 
 	ip_mcast_src = X_ip_mcast_src;
 	ip_mforward = X_ip_mforward;
 	ip_mrouter_done = X_ip_mrouter_done;
 	ip_mrouter_get = X_ip_mrouter_get;
 	ip_mrouter_set = X_ip_mrouter_set;
 
 	ip_rsvp_force_done = X_ip_rsvp_force_done;
 	ip_rsvp_vif = X_ip_rsvp_vif;
 
 	legal_vif_num = X_legal_vif_num;
 	mrt_ioctl = X_mrt_ioctl;
 	rsvp_input_p = X_rsvp_input;
 	break;
 
     case MOD_UNLOAD:
 	/*
 	 * Typically module unload happens after the user-level
 	 * process has shutdown the kernel services (the check
 	 * below insures someone can't just yank the module out
 	 * from under a running process).  But if the module is
 	 * just loaded and then unloaded w/o starting up a user
 	 * process we still need to cleanup.
 	 */
 	MROUTER_LOCK();
 	if (ip_mrouter_cnt != 0) {
 	    MROUTER_UNLOCK();
 	    return (EINVAL);
 	}
 	ip_mrouter_unloading = 1;
 	MROUTER_UNLOCK();
 
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
 
 	if (pim_encap_cookie) {
 	    encap_detach(pim_encap_cookie);
 	    pim_encap_cookie = NULL;
 	}
 
 	ip_mcast_src = NULL;
 	ip_mforward = NULL;
 	ip_mrouter_done = NULL;
 	ip_mrouter_get = NULL;
 	ip_mrouter_set = NULL;
 
 	ip_rsvp_force_done = NULL;
 	ip_rsvp_vif = NULL;
 
 	legal_vif_num = NULL;
 	mrt_ioctl = NULL;
 	rsvp_input_p = NULL;
 
 	VIF_LOCK_DESTROY();
 	MFC_LOCK_DESTROY();
 	MROUTER_LOCK_DESTROY();
 	break;
 
     default:
 	return EOPNOTSUPP;
     }
     return 0;
 }
 
 static moduledata_t ip_mroutemod = {
     "ip_mroute",
     ip_mroute_modevent,
     0
 };
 
 DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
Index: projects/clang380-import/sys/netinet/ip_output.c
===================================================================
--- projects/clang380-import/sys/netinet/ip_output.c	(revision 293686)
+++ projects/clang380-import/sys/netinet/ip_output.c	(revision 293687)
@@ -1,1391 +1,1392 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_mbuf_stress_test.h"
 #include "opt_mpath.h"
 #include "opt_route.h"
 #include "opt_sctp.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/ucred.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_llatbl.h>
 #include <net/netisr.h>
 #include <net/pfil.h>
 #include <net/route.h>
 #include <net/flowtable.h>
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_rss.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef SCTP
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #endif
 
 #ifdef IPSEC
 #include <netinet/ip_ipsec.h>
 #include <netipsec/ipsec.h>
 #endif /* IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 #ifdef MBUF_STRESS_TEST
 static int mbuf_frag_size = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
 	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
 #endif
 
 static void	ip_mloopback(struct ifnet *, const struct mbuf *, int);
 
 
 extern int in_mcast_loop;
 extern	struct protosw inetsw[];
 
 static inline int
 ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, struct inpcb *inp,
     struct sockaddr_in *dst, int *fibnum, int *error)
 {
 	struct m_tag *fwd_tag = NULL;
 	struct mbuf *m;
 	struct in_addr odst;
 	struct ip *ip;
 
 	m = *mp;
 	ip = mtod(m, struct ip *);
 
 	/* Run through list of hooks for output packets. */
 	odst.s_addr = ip->ip_dst.s_addr;
 	*error = pfil_run_hooks(&V_inet_pfil_hook, mp, ifp, PFIL_OUT, inp);
 	m = *mp;
 	if ((*error) != 0 || m == NULL)
 		return 1; /* Finished */
 
 	ip = mtod(m, struct ip *);
 
 	/* See if destination IP address was changed by packet filter. */
 	if (odst.s_addr != ip->ip_dst.s_addr) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip_input(). */
 		if (in_localip(ip->ip_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = V_loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 				m->m_pkthdr.csum_flags |=
 					CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 			m->m_pkthdr.csum_flags |=
 				CSUM_IP_CHECKED | CSUM_IP_VALID;
 #ifdef SCTP
 			if (m->m_pkthdr.csum_flags & CSUM_SCTP)
 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 			*error = netisr_queue(NETISR_IP, m);
 			return 1; /* Finished */
 		}
 
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 
 		return -1; /* Reloop */
 	}
 	/* See if fib was changed by packet filter. */
 	if ((*fibnum) != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		*fibnum = M_GETFIB(m);
 		return -1; /* Reloop for FIB change */
 	}
 
 	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			m->m_pkthdr.csum_flags |=
 				CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		m->m_pkthdr.csum_flags |=
 			CSUM_IP_CHECKED | CSUM_IP_VALID;
 
 		*error = netisr_queue(NETISR_IP, m);
 		return 1; /* Finished */
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP_NEXTHOP) &&
 	    ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 
 		return -1; /* Reloop for CHANGE of dst */
 	}
 
 	return 0;
 }
 
 /*
  * IP output.  The packet in mbuf chain m contains a skeletal IP
  * header (with len, off, ttl, proto, tos, src, dst).
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route ro is present and has ro_rt initialized, route lookup would be
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
  * In the IP forwarding case, the packet will arrive with options already
  * inserted, so must have a NULL opt pointer.
  */
 int
 ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
     struct ip_moptions *imo, struct inpcb *inp)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct ip *ip;
 	struct ifnet *ifp = NULL;	/* keep compiler happy */
 	struct mbuf *m0;
 	int hlen = sizeof (struct ip);
 	int mtu;
 	int error = 0;
 	struct sockaddr_in *dst;
 	const struct sockaddr_in *gw;
 	struct in_ifaddr *ia;
 	int isbroadcast;
 	uint16_t ip_len, ip_off;
 	struct route iproute;
 	struct rtentry *rte;	/* cache for ro->ro_rt */
 	uint32_t fibnum;
 	int have_ia_ref;
 #ifdef IPSEC
 	int no_route_but_check_spd = 0;
 #endif
 	M_ASSERTPKTHDR(m);
 
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
 		}
 	}
 
 	if (ro == NULL) {
 		ro = &iproute;
 		bzero(ro, sizeof (*ro));
 	}
 
 #ifdef FLOWTABLE
 	if (ro->ro_rt == NULL)
 		(void )flowtable_lookup(AF_INET, m, ro);
 #endif
 
 	if (opt) {
 		int len = 0;
 		m = ip_insertoptions(m, opt, &len);
 		if (len != 0)
 			hlen = len; /* ip->ip_hl is updated above */
 	}
 	ip = mtod(m, struct ip *);
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = hlen >> 2;
 		ip_fillid(ip);
 		IPSTAT_INC(ips_localout);
 	} else {
 		/* Header already set, fetch hlen from there */
 		hlen = ip->ip_hl << 2;
 	}
 
 	/*
 	 * dst/gw handling:
 	 *
 	 * dst can be rewritten but always points to &ro->ro_dst.
 	 * gw is readonly but can point either to dst OR rt_gateway,
 	 * therefore we need restore gw if we're redoing lookup.
 	 */
 	gw = dst = (struct sockaddr_in *)&ro->ro_dst;
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
 	rte = ro->ro_rt;
 	/*
 	 * The address family should also be checked in case of sharing
 	 * the cache with IPv6.
 	 */
 	if (rte == NULL || dst->sin_family != AF_INET) {
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 	}
 again:
 	ia = NULL;
 	have_ia_ref = 0;
 	/*
 	 * If routing to interface only, short circuit routing lookup.
 	 * The use of an all-ones broadcast address implies this; an
 	 * interface is specified by the broadcast address of an interface,
 	 * or the destination address of a ptp interface.
 	 */
 	if (flags & IP_SENDONES) {
 		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst),
 						      M_GETFIB(m)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL) {
 			IPSTAT_INC(ips_noroute);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		have_ia_ref = 1;
 		ip->ip_dst.s_addr = INADDR_BROADCAST;
 		dst->sin_addr = ip->ip_dst;
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = 1;
 	} else if (flags & IP_ROUTETOIF) {
 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
 						    M_GETFIB(m)))) == NULL &&
 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0,
 						M_GETFIB(m)))) == NULL) {
 			IPSTAT_INC(ips_noroute);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		have_ia_ref = 1;
 		ifp = ia->ia_ifp;
 		ip->ip_ttl = 1;
 		isbroadcast = in_broadcast(dst->sin_addr, ifp);
 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
 		/*
 		 * Bypass the normal routing lookup for multicast
 		 * packets if the interface is specified.
 		 */
 		ifp = imo->imo_multicast_ifp;
 		IFP_TO_IA(ifp, ia, &in_ifa_tracker);
 		if (ia)
 			have_ia_ref = 1;
 		isbroadcast = 0;	/* fool gcc */
 	} else {
 		/*
 		 * We want to do any cloning requested by the link layer,
 		 * as this is probably required in all cases for correct
 		 * operation (as it is for ARP).
 		 */
 		if (rte == NULL) {
 #ifdef RADIX_MPATH
 			rtalloc_mpath_fib(ro,
 			    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
 			    fibnum);
 #else
 			in_rtalloc_ign(ro, 0, fibnum);
 #endif
 			rte = ro->ro_rt;
 		}
 		if (rte == NULL ||
 		    (rte->rt_flags & RTF_UP) == 0 ||
 		    rte->rt_ifp == NULL ||
 		    !RT_LINK_IS_UP(rte->rt_ifp)) {
 #ifdef IPSEC
 			/*
 			 * There is no route for this packet, but it is
 			 * possible that a matching SPD entry exists.
 			 */
 			no_route_but_check_spd = 1;
 			mtu = 0; /* Silence GCC warning. */
 			goto sendit;
 #endif
 			IPSTAT_INC(ips_noroute);
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 		ia = ifatoia(rte->rt_ifa);
 		ifp = rte->rt_ifp;
 		counter_u64_add(rte->rt_pksent, 1);
+		rt_update_ro_flags(ro);
 		if (rte->rt_flags & RTF_GATEWAY)
 			gw = (struct sockaddr_in *)rte->rt_gateway;
 		if (rte->rt_flags & RTF_HOST)
 			isbroadcast = (rte->rt_flags & RTF_BROADCAST);
 		else
 			isbroadcast = in_broadcast(gw->sin_addr, ifp);
 	}
 
 	/*
 	 * Calculate MTU.  If we have a route that is up, use that,
 	 * otherwise use the interface's MTU.
 	 */
 	if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
 		mtu = rte->rt_mtu;
 	else
 		mtu = ifp->if_mtu;
 	/* Catch a possible divide by zero later. */
 	KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
 	    __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
 
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		m->m_flags |= M_MCAST;
 		/*
 		 * IP destination address is multicast.  Make sure "gw"
 		 * still points to the address in "ro".  (It may have been
 		 * changed to point to a gateway address, above.)
 		 */
 		gw = dst;
 		/*
 		 * See if the caller provided any multicast options
 		 */
 		if (imo != NULL) {
 			ip->ip_ttl = imo->imo_multicast_ttl;
 			if (imo->imo_multicast_vif != -1)
 				ip->ip_src.s_addr =
 				    ip_mcast_src ?
 				    ip_mcast_src(imo->imo_multicast_vif) :
 				    INADDR_ANY;
 		} else
 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 				IPSTAT_INC(ips_noroute);
 				error = ENETUNREACH;
 				goto bad;
 			}
 		}
 		/*
 		 * If source address not specified yet, use address
 		 * of outgoing interface.
 		 */
 		if (ip->ip_src.s_addr == INADDR_ANY) {
 			/* Interface may have no addresses. */
 			if (ia != NULL)
 				ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 
 		if ((imo == NULL && in_mcast_loop) ||
 		    (imo && imo->imo_multicast_loop)) {
 			/*
 			 * Loop back multicast datagram if not expressly
 			 * forbidden to do so, even if we are not a member
 			 * of the group; ip_input() will filter it later,
 			 * thus deferring a hash lookup and mutex acquisition
 			 * at the expense of a cheap copy using m_copym().
 			 */
 			ip_mloopback(ifp, m, hlen);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IP_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip_mloopback(),
 			 * above, will be forwarded by the ip_input() routine,
 			 * if necessary.
 			 */
 			if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
 				/*
 				 * If rsvp daemon is not running, do not
 				 * set ip_moptions. This ensures that the packet
 				 * is multicast and not just sent down one link
 				 * as prescribed by rsvpd.
 				 */
 				if (!V_rsvp_on)
 					imo = NULL;
 				if (ip_mforward &&
 				    ip_mforward(ip, ifp, m, imo) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 
 		/*
 		 * Multicasts with a time-to-live of zero may be looped-
 		 * back, above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip_mloopback() will
 		 * loop back a copy. ip_input() will drop the copy if
 		 * this host does not belong to the destination group on
 		 * the loopback interface.
 		 */
 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
 			m_freem(m);
 			goto done;
 		}
 
 		goto sendit;
 	}
 
 	/*
 	 * If the source address is not specified yet, use the address
 	 * of the outoing interface.
 	 */
 	if (ip->ip_src.s_addr == INADDR_ANY) {
 		/* Interface may have no addresses. */
 		if (ia != NULL) {
 			ip->ip_src = IA_SIN(ia)->sin_addr;
 		}
 	}
 
 	/*
 	 * Look for broadcast address and
 	 * verify user is allowed to send
 	 * such a packet.
 	 */
 	if (isbroadcast) {
 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 		if ((flags & IP_ALLOWBROADCAST) == 0) {
 			error = EACCES;
 			goto bad;
 		}
 		/* don't allow broadcast messages to be fragmented */
 		if (ip_len > mtu) {
 			error = EMSGSIZE;
 			goto bad;
 		}
 		m->m_flags |= M_BCAST;
 	} else {
 		m->m_flags &= ~M_BCAST;
 	}
 
 sendit:
 #ifdef IPSEC
 	switch(ip_ipsec_output(&m, inp, &error)) {
 	case 1:
 		goto bad;
 	case -1:
 		goto done;
 	case 0:
 	default:
 		break;	/* Continue with packet processing. */
 	}
 	/*
 	 * Check if there was a route for this packet; return error if not.
 	 */
 	if (no_route_but_check_spd) {
 		IPSTAT_INC(ips_noroute);
 		error = EHOSTUNREACH;
 		goto bad;
 	}
 	/* Update variables that are affected by ipsec4_output(). */
 	ip = mtod(m, struct ip *);
 	hlen = ip->ip_hl << 2;
 #endif /* IPSEC */
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (PFIL_HOOKED(&V_inet_pfil_hook)) {
 		switch (ip_output_pfil(&m, ifp, inp, dst, &fibnum, &error)) {
 		case 1: /* Finished */
 			goto done;
 
 		case 0: /* Continue normally */
 			ip = mtod(m, struct ip *);
 			break;
 
 		case -1: /* Need to try again */
 			/* Reset everything for a new round */
 			RO_RTFREE(ro);
 			if (have_ia_ref)
 				ifa_free(&ia->ia_ifa);
 			ro->ro_prepend = NULL;
 			rte = NULL;
 			gw = dst;
 			ip = mtod(m, struct ip *);
 			goto again;
 
 		}
 	}
 
 	/* 127/8 must not appear on wire - RFC1122. */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
 			IPSTAT_INC(ips_badaddr);
 			error = EADDRNOTAVAIL;
 			goto bad;
 		}
 	}
 
 	m->m_pkthdr.csum_flags |= CSUM_IP;
 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
 		in_delayed_cksum(m);
 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
 	 */
 	if (ip_len <= mtu ||
 	    (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
 		ip->ip_sum = 0;
 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
 			ip->ip_sum = in_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 
 		/*
 		 * Record statistics for this interface address.
 		 * With CSUM_TSO the byte/packet count will be slightly
 		 * incorrect because we count the IP+TCP headers only
 		 * once instead of for every generated packet.
 		 */
 		if (!(flags & IP_FORWARDING) && ia) {
 			if (m->m_pkthdr.csum_flags & CSUM_TSO)
 				counter_u64_add(ia->ia_ifa.ifa_opackets,
 				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
 			else
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 
 			counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len);
 		}
 #ifdef MBUF_STRESS_TEST
 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
 			m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
 #endif
 		/*
 		 * Reset layer specific mbuf flags
 		 * to avoid confusing lower layers.
 		 */
 		m_clrprotoflags(m);
 		IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 		error = (*ifp->if_output)(ifp, m,
 		    (const struct sockaddr *)gw, ro);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
 	if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
 		error = EMSGSIZE;
 		IPSTAT_INC(ips_cantfrag);
 		goto bad;
 	}
 
 	/*
 	 * Too large for interface; fragment if possible. If successful,
 	 * on return, m will point to a list of packets to be sent.
 	 */
 	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
 	if (error)
 		goto bad;
 	for (; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia != NULL) {
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
 			/*
 			 * Reset layer specific mbuf flags
 			 * to avoid confusing upper layers.
 			 */
 			m_clrprotoflags(m);
 
 			IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
 			error = (*ifp->if_output)(ifp, m,
 			    (const struct sockaddr *)gw, ro);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		IPSTAT_INC(ips_fragmented);
 
 done:
 	if (ro == &iproute)
 		RO_RTFREE(ro);
 	else if (rte == NULL)
 		/*
 		 * If the caller supplied a route but somehow the reference
 		 * to it has been released need to prevent the caller
 		 * calling RTFREE on it again.
 		 */
 		ro->ro_rt = NULL;
 	if (have_ia_ref)
 		ifa_free(&ia->ia_ifa);
 	return (error);
 bad:
 	m_freem(m);
 	goto done;
 }
 
 /*
  * Create a chain of fragments which fit the given mtu. m_frag points to the
  * mbuf to be fragmented; on return it points to the chain with the fragments.
  * Return 0 if no error. If error, m_frag may contain a partially built
  * chain of fragments that should be freed by the caller.
  *
  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
  */
 int
 ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
     u_long if_hwassist_flags)
 {
 	int error = 0;
 	int hlen = ip->ip_hl << 2;
 	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
 	int off;
 	struct mbuf *m0 = *m_frag;	/* the original packet		*/
 	int firstlen;
 	struct mbuf **mnext;
 	int nfrags;
 	uint16_t ip_len, ip_off;
 
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	if (ip_off & IP_DF) {	/* Fragmentation not allowed */
 		IPSTAT_INC(ips_cantfrag);
 		return EMSGSIZE;
 	}
 
 	/*
 	 * Must be able to put at least 8 bytes per fragment.
 	 */
 	if (len < 8)
 		return EMSGSIZE;
 
 	/*
 	 * If the interface will not calculate checksums on
 	 * fragmented packets, then do it here.
 	 */
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
 		sctp_delayed_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 	if (len > PAGE_SIZE) {
 		/*
 		 * Fragment large datagrams such that each segment
 		 * contains a multiple of PAGE_SIZE amount of data,
 		 * plus headers. This enables a receiver to perform
 		 * page-flipping zero-copy optimizations.
 		 *
 		 * XXX When does this help given that sender and receiver
 		 * could have different page sizes, and also mtu could
 		 * be less than the receiver's page size ?
 		 */
 		int newlen;
 
 		off = MIN(mtu, m0->m_pkthdr.len);
 
 		/*
 		 * firstlen (off - hlen) must be aligned on an
 		 * 8-byte boundary
 		 */
 		if (off < hlen)
 			goto smart_frag_failure;
 		off = ((off - hlen) & ~7) + hlen;
 		newlen = (~PAGE_MASK) & mtu;
 		if ((newlen + sizeof (struct ip)) > mtu) {
 			/* we failed, go back the default */
 smart_frag_failure:
 			newlen = len;
 			off = hlen + len;
 		}
 		len = newlen;
 
 	} else {
 		off = hlen + len;
 	}
 
 	firstlen = off - hlen;
 	mnext = &m0->m_nextpkt;		/* pointer to next packet */
 
 	/*
 	 * Loop through length of segment after first fragment,
 	 * make new header and copy data of each part and link onto chain.
 	 * Here, m0 is the original packet, m is the fragment being created.
 	 * The fragments are linked off the m_nextpkt of the original
 	 * packet, which after processing serves as the first fragment.
 	 */
 	for (nfrags = 1; off < ip_len; off += len, nfrags++) {
 		struct ip *mhip;	/* ip header on the fragment */
 		struct mbuf *m;
 		int mhlen = sizeof (struct ip);
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		/*
 		 * Make sure the complete packet header gets copied
 		 * from the originating mbuf to the newly created
 		 * mbuf. This also ensures that existing firewall
 		 * classification(s), VLAN tags and so on get copied
 		 * to the resulting fragmented packet(s):
 		 */
 		if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
 			m_free(m);
 			error = ENOBUFS;
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		/*
 		 * In the first mbuf, leave room for the link header, then
 		 * copy the original IP header including options. The payload
 		 * goes into an additional mbuf chain returned by m_copym().
 		 */
 		m->m_data += max_linkhdr;
 		mhip = mtod(m, struct ip *);
 		*mhip = *ip;
 		if (hlen > sizeof (struct ip)) {
 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
 			mhip->ip_v = IPVERSION;
 			mhip->ip_hl = mhlen >> 2;
 		}
 		m->m_len = mhlen;
 		/* XXX do we need to add ip_off below ? */
 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
 		if (off + len >= ip_len)
 			len = ip_len - off;
 		else
 			mhip->ip_off |= IP_MF;
 		mhip->ip_len = htons((u_short)(len + mhlen));
 		m->m_next = m_copym(m0, off, len, M_NOWAIT);
 		if (m->m_next == NULL) {	/* copy failed */
 			m_free(m);
 			error = ENOBUFS;	/* ??? */
 			IPSTAT_INC(ips_odropped);
 			goto done;
 		}
 		m->m_pkthdr.len = mhlen + len;
 #ifdef MAC
 		mac_netinet_fragment(m0, m);
 #endif
 		mhip->ip_off = htons(mhip->ip_off);
 		mhip->ip_sum = 0;
 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 			mhip->ip_sum = in_cksum(m, mhlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 	}
 	IPSTAT_ADD(ips_ofragments, nfrags);
 
 	/*
 	 * Update first fragment by trimming what's been copied out
 	 * and updating header.
 	 */
 	m_adj(m0, hlen + firstlen - ip_len);
 	m0->m_pkthdr.len = hlen + firstlen;
 	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
 	ip->ip_off = htons(ip_off | IP_MF);
 	ip->ip_sum = 0;
 	if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
 		ip->ip_sum = in_cksum(m0, hlen);
 		m0->m_pkthdr.csum_flags &= ~CSUM_IP;
 	}
 
 done:
 	*m_frag = m0;
 	return error;
 }
 
 void
 in_delayed_cksum(struct mbuf *m)
 {
 	struct ip *ip;
 	uint16_t csum, offset, ip_len;
 
 	ip = mtod(m, struct ip *);
 	offset = ip->ip_hl << 2 ;
 	ip_len = ntohs(ip->ip_len);
 	csum = in_cksum_skip(m, ip_len, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	/* find the mbuf in the chain where the checksum starts*/
 	while ((m != NULL) && (offset >= m->m_len)) {
 		offset -= m->m_len;
 		m = m->m_next;
 	}
 	KASSERT(m != NULL, ("in_delayed_cksum: checksum outside mbuf chain."));
 	KASSERT(offset + sizeof(u_short) <= m->m_len, ("in_delayed_cksum: checksum split between mbufs."));
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 /*
  * IP socket option processing.
  */
 int
 ip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 #ifdef	RSS
 	uint32_t rss_bucket;
 	int retval;
 #endif
 
 	error = optval = 0;
 	if (sopt->sopt_level != IPPROTO_IP) {
 		error = EINVAL;
 
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEADDR) != 0)
 					inp->inp_flags2 |= INP_REUSEADDR;
 				else
 					inp->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_REUSEPORT:
 				INP_WLOCK(inp);
 				if ((so->so_options & SO_REUSEPORT) != 0)
 					inp->inp_flags2 |= INP_REUSEPORT;
 				else
 					inp->inp_flags2 &= ~INP_REUSEPORT;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			case SO_SETFIB:
 				INP_WLOCK(inp);
 				inp->inp_inc.inc_fibnum = so->so_fibnum;
 				INP_WUNLOCK(inp);
 				error = 0;
 				break;
 			default:
 				break;
 			}
 		}
 		return (error);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 #ifdef notyet
 		case IP_RETOPTS:
 #endif
 		{
 			struct mbuf *m;
 			if (sopt->sopt_valsize > MLEN) {
 				error = EMSGSIZE;
 				break;
 			}
 			m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 			if (m == NULL) {
 				error = ENOBUFS;
 				break;
 			}
 			m->m_len = sopt->sopt_valsize;
 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
 					    m->m_len);
 			if (error) {
 				m_free(m);
 				break;
 			}
 			INP_WLOCK(inp);
 			error = ip_pcbopts(inp, sopt->sopt_name, m);
 			INP_WUNLOCK(inp);
 			return (error);
 		}
 
 		case IP_BINDANY:
 			if (sopt->sopt_td != NULL) {
 				error = priv_check(sopt->sopt_td,
 				    PRIV_NETINET_BINDANY);
 				if (error)
 					break;
 			}
 			/* FALLTHROUGH */
 		case IP_BINDMULTI:
 #ifdef	RSS
 		case IP_RSS_LISTEN_BUCKET:
 #endif
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_RECVTOS:
 		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RECVRSSBUCKETID:
 #endif
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			switch (sopt->sopt_name) {
 			case IP_TOS:
 				inp->inp_ip_tos = optval;
 				break;
 
 			case IP_TTL:
 				inp->inp_ip_ttl = optval;
 				break;
 
 			case IP_MINTTL:
 				if (optval >= 0 && optval <= MAXTTL)
 					inp->inp_ip_minttl = optval;
 				else
 					error = EINVAL;
 				break;
 
 #define	OPTSET(bit) do {						\
 	INP_WLOCK(inp);							\
 	if (optval)							\
 		inp->inp_flags |= bit;					\
 	else								\
 		inp->inp_flags &= ~bit;					\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 #define	OPTSET2(bit, val) do {						\
 	INP_WLOCK(inp);							\
 	if (val)							\
 		inp->inp_flags2 |= bit;					\
 	else								\
 		inp->inp_flags2 &= ~bit;				\
 	INP_WUNLOCK(inp);						\
 } while (0)
 
 			case IP_RECVOPTS:
 				OPTSET(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				OPTSET(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				OPTSET(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				OPTSET(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				OPTSET(INP_RECVIF);
 				break;
 
 			case IP_ONESBCAST:
 				OPTSET(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				OPTSET(INP_DONTFRAG);
 				break;
 			case IP_BINDANY:
 				OPTSET(INP_BINDANY);
 				break;
 			case IP_RECVTOS:
 				OPTSET(INP_RECVTOS);
 				break;
 			case IP_BINDMULTI:
 				OPTSET2(INP_BINDMULTI, optval);
 				break;
 			case IP_RECVFLOWID:
 				OPTSET2(INP_RECVFLOWID, optval);
 				break;
 #ifdef	RSS
 			case IP_RSS_LISTEN_BUCKET:
 				if ((optval >= 0) &&
 				    (optval < rss_getnumbuckets())) {
 					inp->inp_rss_listen_bucket = optval;
 					OPTSET2(INP_RSS_BUCKET_SET, 1);
 				} else {
 					error = EINVAL;
 				}
 				break;
 			case IP_RECVRSSBUCKETID:
 				OPTSET2(INP_RECVRSSBUCKETID, optval);
 				break;
 #endif
 			}
 			break;
 #undef OPTSET
 #undef OPTSET2
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_ADD_MEMBERSHIP:
 		case IP_DROP_MEMBERSHIP:
 		case IP_ADD_SOURCE_MEMBERSHIP:
 		case IP_DROP_SOURCE_MEMBERSHIP:
 		case IP_BLOCK_SOURCE:
 		case IP_UNBLOCK_SOURCE:
 		case IP_MSFILTER:
 		case MCAST_JOIN_GROUP:
 		case MCAST_LEAVE_GROUP:
 		case MCAST_JOIN_SOURCE_GROUP:
 		case MCAST_LEAVE_SOURCE_GROUP:
 		case MCAST_BLOCK_SOURCE:
 		case MCAST_UNBLOCK_SOURCE:
 			error = inp_setmoptions(inp, sopt);
 			break;
 
 		case IP_PORTRANGE:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 
 			INP_WLOCK(inp);
 			switch (optval) {
 			case IP_PORTRANGE_DEFAULT:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				break;
 
 			case IP_PORTRANGE_HIGH:
 				inp->inp_flags &= ~(INP_LOWPORT);
 				inp->inp_flags |= INP_HIGHPORT;
 				break;
 
 			case IP_PORTRANGE_LOW:
 				inp->inp_flags &= ~(INP_HIGHPORT);
 				inp->inp_flags |= INP_LOWPORT;
 				break;
 
 			default:
 				error = EINVAL;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			break;
 
 #ifdef IPSEC
 		case IP_IPSEC_POLICY:
 		{
 			caddr_t req;
 			struct mbuf *m;
 
 			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 				break;
 			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 				break;
 			req = mtod(m, caddr_t);
 			error = ipsec_set_policy(inp, sopt->sopt_name, req,
 			    m->m_len, (sopt->sopt_td != NULL) ?
 			    sopt->sopt_td->td_ucred : NULL);
 			m_freem(m);
 			break;
 		}
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_OPTIONS:
 		case IP_RETOPTS:
 			if (inp->inp_options)
 				error = sooptcopyout(sopt,
 						     mtod(inp->inp_options,
 							  char *),
 						     inp->inp_options->m_len);
 			else
 				sopt->sopt_valsize = 0;
 			break;
 
 		case IP_TOS:
 		case IP_TTL:
 		case IP_MINTTL:
 		case IP_RECVOPTS:
 		case IP_RECVRETOPTS:
 		case IP_RECVDSTADDR:
 		case IP_RECVTTL:
 		case IP_RECVIF:
 		case IP_PORTRANGE:
 		case IP_ONESBCAST:
 		case IP_DONTFRAG:
 		case IP_BINDANY:
 		case IP_RECVTOS:
 		case IP_BINDMULTI:
 		case IP_FLOWID:
 		case IP_FLOWTYPE:
 		case IP_RECVFLOWID:
 #ifdef	RSS
 		case IP_RSSBUCKETID:
 		case IP_RECVRSSBUCKETID:
 #endif
 			switch (sopt->sopt_name) {
 
 			case IP_TOS:
 				optval = inp->inp_ip_tos;
 				break;
 
 			case IP_TTL:
 				optval = inp->inp_ip_ttl;
 				break;
 
 			case IP_MINTTL:
 				optval = inp->inp_ip_minttl;
 				break;
 
 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
 #define	OPTBIT2(bit)	(inp->inp_flags2 & bit ? 1 : 0)
 
 			case IP_RECVOPTS:
 				optval = OPTBIT(INP_RECVOPTS);
 				break;
 
 			case IP_RECVRETOPTS:
 				optval = OPTBIT(INP_RECVRETOPTS);
 				break;
 
 			case IP_RECVDSTADDR:
 				optval = OPTBIT(INP_RECVDSTADDR);
 				break;
 
 			case IP_RECVTTL:
 				optval = OPTBIT(INP_RECVTTL);
 				break;
 
 			case IP_RECVIF:
 				optval = OPTBIT(INP_RECVIF);
 				break;
 
 			case IP_PORTRANGE:
 				if (inp->inp_flags & INP_HIGHPORT)
 					optval = IP_PORTRANGE_HIGH;
 				else if (inp->inp_flags & INP_LOWPORT)
 					optval = IP_PORTRANGE_LOW;
 				else
 					optval = 0;
 				break;
 
 			case IP_ONESBCAST:
 				optval = OPTBIT(INP_ONESBCAST);
 				break;
 			case IP_DONTFRAG:
 				optval = OPTBIT(INP_DONTFRAG);
 				break;
 			case IP_BINDANY:
 				optval = OPTBIT(INP_BINDANY);
 				break;
 			case IP_RECVTOS:
 				optval = OPTBIT(INP_RECVTOS);
 				break;
 			case IP_FLOWID:
 				optval = inp->inp_flowid;
 				break;
 			case IP_FLOWTYPE:
 				optval = inp->inp_flowtype;
 				break;
 			case IP_RECVFLOWID:
 				optval = OPTBIT2(INP_RECVFLOWID);
 				break;
 #ifdef	RSS
 			case IP_RSSBUCKETID:
 				retval = rss_hash2bucket(inp->inp_flowid,
 				    inp->inp_flowtype,
 				    &rss_bucket);
 				if (retval == 0)
 					optval = rss_bucket;
 				else
 					error = EINVAL;
 				break;
 			case IP_RECVRSSBUCKETID:
 				optval = OPTBIT2(INP_RECVRSSBUCKETID);
 				break;
 #endif
 			case IP_BINDMULTI:
 				optval = OPTBIT2(INP_BINDMULTI);
 				break;
 			}
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		/*
 		 * Multicast socket options are processed by the in_mcast
 		 * module.
 		 */
 		case IP_MULTICAST_IF:
 		case IP_MULTICAST_VIF:
 		case IP_MULTICAST_TTL:
 		case IP_MULTICAST_LOOP:
 		case IP_MSFILTER:
 			error = inp_getmoptions(inp, sopt);
 			break;
 
 #ifdef IPSEC
 		case IP_IPSEC_POLICY:
 		{
 			struct mbuf *m = NULL;
 			caddr_t req = NULL;
 			size_t len = 0;
 
 			if (m != 0) {
 				req = mtod(m, caddr_t);
 				len = m->m_len;
 			}
 			error = ipsec_get_policy(sotoinpcb(so), req, len, &m);
 			if (error == 0)
 				error = soopt_mcopyout(sopt, m); /* XXX */
 			if (error == 0)
 				m_freem(m);
 			break;
 		}
 #endif /* IPSEC */
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 
 /*
  * Routine called from ip_output() to loop back a copy of an IP multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be a loopback interface -- evil, but easier than
  * replicating that code here.
  */
 static void
 ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen)
 {
 	struct ip *ip;
 	struct mbuf *copym;
 
 	/*
 	 * Make a deep copy of the packet because we're going to
 	 * modify the pack in order to generate checksums.
 	 */
 	copym = m_dup(m, M_NOWAIT);
 	if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen))
 		copym = m_pullup(copym, hlen);
 	if (copym != NULL) {
 		/* If needed, compute the checksum and mark it as valid. */
 		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
 			in_delayed_cksum(copym);
 			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 			copym->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
 			copym->m_pkthdr.csum_data = 0xffff;
 		}
 		/*
 		 * We don't bother to fragment if the IP length is greater
 		 * than the interface's MTU.  Can this possibly matter?
 		 */
 		ip = mtod(copym, struct ip *);
 		ip->ip_sum = 0;
 		ip->ip_sum = in_cksum(copym, hlen);
 		if_simloop(ifp, copym, AF_INET, 0);
 	}
 }
Index: projects/clang380-import/sys/netinet/raw_ip.c
===================================================================
--- projects/clang380-import/sys/netinet/raw_ip.c	(revision 293686)
+++ projects/clang380-import/sys/netinet/raw_ip.c	(revision 293687)
@@ -1,1131 +1,1132 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
+#include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/rmlock.h>
 #include <sys/rwlock.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_mroute.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #endif /*IPSEC*/
 
 #include <machine/stdarg.h>
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(ip_defttl), 0,
     "Maximum TTL on IP packets");
 
 VNET_DEFINE(struct inpcbhead, ripcb);
 VNET_DEFINE(struct inpcbinfo, ripcbinfo);
 
 #define	V_ripcb			VNET(ripcb)
 #define	V_ripcbinfo		VNET(ripcbinfo)
 
 /*
  * Control and data hooks for ipfw, dummynet, divert and so on.
  * The data hooks are not used here but it is convenient
  * to keep them all in one place.
  */
 VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL;
 VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL;
 
 int	(*ip_dn_ctl_ptr)(struct sockopt *);
 int	(*ip_dn_io_ptr)(struct mbuf **, int, struct ip_fw_args *);
 void	(*ip_divert_ptr)(struct mbuf *, int);
 int	(*ng_ipfw_input_p)(struct mbuf **, int,
 			struct ip_fw_args *, int);
 
 #ifdef INET
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
  * initialized and rely on BSS being set to 0.
  */
 
 /*
  * The socket used to communicate with the multicast routing daemon.
  */
 VNET_DEFINE(struct socket *, ip_mrouter);
 
 /*
  * The various mrouter and rsvp functions.
  */
 int (*ip_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip_mrouter_get)(struct socket *, struct sockopt *);
 int (*ip_mrouter_done)(void);
 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 		   struct ip_moptions *);
 int (*mrt_ioctl)(u_long, caddr_t, int);
 int (*legal_vif_num)(int);
 u_long (*ip_mcast_src)(int);
 
 int (*rsvp_input_p)(struct mbuf **, int *, int);
 int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
 void (*ip_rsvp_force_done)(struct socket *);
 #endif /* INET */
 
 u_long	rip_sendspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW,
     &rip_sendspace, 0, "Maximum outgoing raw IP datagram size");
 
 u_long	rip_recvspace = 9216;
 SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW,
     &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams");
 
 /*
  * Hash functions
  */
 
 #define INP_PCBHASH_RAW_SIZE	256
 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \
         (((proto) + (laddr) + (faddr)) % (mask) + 1)
 
 #ifdef INET
 static void
 rip_inshash(struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbhead *pcbhash;
 	int hash;
 
 	INP_INFO_WLOCK_ASSERT(pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 	
 	if (inp->inp_ip_p != 0 &&
 	    inp->inp_laddr.s_addr != INADDR_ANY &&
 	    inp->inp_faddr.s_addr != INADDR_ANY) {
 		hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr,
 		    inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask);
 	} else
 		hash = 0;
 	pcbhash = &pcbinfo->ipi_hashbase[hash];
 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 }
 
 static void
 rip_delhash(struct inpcb *inp)
 {
 
 	INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	LIST_REMOVE(inp, inp_hash);
 }
 #endif /* INET */
 
 /*
  * Raw interface to IP protocol.
  */
 
 /*
  * Initialize raw connection block q.
  */
 static void
 rip_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
 }
 
 static int
 rip_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "rawinp");
 	return (0);
 }
 
 void
 rip_init(void)
 {
 
 	in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
 	    1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE,
 	    IPI_HASHFIELDS_NONE);
 	EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
 
 #ifdef VIMAGE
 void
 rip_destroy(void)
 {
 
 	in_pcbinfo_destroy(&V_ripcbinfo);
 }
 #endif
 
 #ifdef INET
 static int
 rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n,
     struct sockaddr_in *ripsrc)
 {
 	int policyfail = 0;
 
 	INP_LOCK_ASSERT(last);
 
 #ifdef IPSEC
 	/* check AH/ESP integrity. */
 	if (ipsec4_in_reject(n, last)) {
 		policyfail = 1;
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (!policyfail && mac_inpcb_check_deliver(last, n) != 0)
 		policyfail = 1;
 #endif
 	/* Check the minimum TTL for socket. */
 	if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl)
 		policyfail = 1;
 	if (!policyfail) {
 		struct mbuf *opts = NULL;
 		struct socket *so;
 
 		so = last->inp_socket;
 		if ((last->inp_flags & INP_CONTROLOPTS) ||
 		    (so->so_options & (SO_TIMESTAMP | SO_BINTIME)))
 			ip_savecontrol(last, &opts, ip, n);
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (sbappendaddr_locked(&so->so_rcv,
 		    (struct sockaddr *)ripsrc, n, opts) == 0) {
 			/* should notify about lost packet */
 			m_freem(n);
 			if (opts)
 				m_freem(opts);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 		} else
 			sorwakeup_locked(so);
 	} else
 		m_freem(n);
 	return (policyfail);
 }
 
 /*
  * Setup generic address and protocol structures for raw_input routine, then
  * pass them along with mbuf chain.
  */
 int
 rip_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ifnet *ifp;
 	struct mbuf *m = *mp;
 	struct ip *ip = mtod(m, struct ip *);
 	struct inpcb *inp, *last;
 	struct sockaddr_in ripsrc;
 	int hash;
 
 	*mp = NULL;
 
 	bzero(&ripsrc, sizeof(ripsrc));
 	ripsrc.sin_len = sizeof(ripsrc);
 	ripsrc.sin_family = AF_INET;
 	ripsrc.sin_addr = ip->ip_src;
 	last = NULL;
 
 	ifp = m->m_pkthdr.rcvif;
 
 	hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
 	    ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
 		if (inp->inp_ip_p != proto)
 			continue;
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr)
 			continue;
 		if (inp->inp_faddr.s_addr != ip->ip_src.s_addr)
 			continue;
 		if (jailed_without_vnet(inp->inp_cred)) {
 			/*
 			 * XXX: If faddr was bound to multicast group,
 			 * jailed raw socket will drop datagram.
 			 */
 			if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
 				continue;
 		}
 		if (last != NULL) {
 			struct mbuf *n;
 
 			n = m_copy(m, 0, (int)M_COPYALL);
 			if (n != NULL)
 		    	    (void) rip_append(last, ip, n, &ripsrc);
 			/* XXX count dropped packet */
 			INP_RUNLOCK(last);
 		}
 		INP_RLOCK(inp);
 		last = inp;
 	}
 	LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) {
 		if (inp->inp_ip_p && inp->inp_ip_p != proto)
 			continue;
 #ifdef INET6
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV4) == 0)
 			continue;
 #endif
 		if (!in_nullhost(inp->inp_laddr) &&
 		    !in_hosteq(inp->inp_laddr, ip->ip_dst))
 			continue;
 		if (!in_nullhost(inp->inp_faddr) &&
 		    !in_hosteq(inp->inp_faddr, ip->ip_src))
 			continue;
 		if (jailed_without_vnet(inp->inp_cred)) {
 			/*
 			 * Allow raw socket in jail to receive multicast;
 			 * assume process had PRIV_NETINET_RAW at attach,
 			 * and fall through into normal filter path if so.
 			 */
 			if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
 			    prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0)
 				continue;
 		}
 		/*
 		 * If this raw socket has multicast state, and we
 		 * have received a multicast, check if this socket
 		 * should receive it, as multicast filtering is now
 		 * the responsibility of the transport layer.
 		 */
 		if (inp->inp_moptions != NULL &&
 		    IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 			/*
 			 * If the incoming datagram is for IGMP, allow it
 			 * through unconditionally to the raw socket.
 			 *
 			 * In the case of IGMPv2, we may not have explicitly
 			 * joined the group, and may have set IFF_ALLMULTI
 			 * on the interface. imo_multi_filter() may discard
 			 * control traffic we actually need to see.
 			 *
 			 * Userland multicast routing daemons should continue
 			 * filter the control traffic appropriately.
 			 */
 			int blocked;
 
 			blocked = MCAST_PASS;
 			if (proto != IPPROTO_IGMP) {
 				struct sockaddr_in group;
 
 				bzero(&group, sizeof(struct sockaddr_in));
 				group.sin_len = sizeof(struct sockaddr_in);
 				group.sin_family = AF_INET;
 				group.sin_addr = ip->ip_dst;
 
 				blocked = imo_multi_filter(inp->inp_moptions,
 				    ifp,
 				    (struct sockaddr *)&group,
 				    (struct sockaddr *)&ripsrc);
 			}
 
 			if (blocked != MCAST_PASS) {
 				IPSTAT_INC(ips_notmember);
 				continue;
 			}
 		}
 		if (last != NULL) {
 			struct mbuf *n;
 
 			n = m_copy(m, 0, (int)M_COPYALL);
 			if (n != NULL)
 				(void) rip_append(last, ip, n, &ripsrc);
 			/* XXX count dropped packet */
 			INP_RUNLOCK(last);
 		}
 		INP_RLOCK(inp);
 		last = inp;
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 	if (last != NULL) {
 		if (rip_append(last, ip, m, &ripsrc) != 0)
 			IPSTAT_INC(ips_delivered);
 		INP_RUNLOCK(last);
 	} else {
 		m_freem(m);
 		IPSTAT_INC(ips_noproto);
 		IPSTAT_DEC(ips_delivered);
 	}
 	return (IPPROTO_DONE);
 }
 
 /*
  * Generate IP header and pass packet to ip_output.  Tack on options user may
  * have setup with control call.
  */
 int
 rip_output(struct mbuf *m, struct socket *so, ...)
 {
 	struct ip *ip;
 	int error;
 	struct inpcb *inp = sotoinpcb(so);
 	va_list ap;
 	u_long dst;
 	int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) |
 	    IP_ALLOWBROADCAST;
 
 	va_start(ap, so);
 	dst = va_arg(ap, u_long);
 	va_end(ap);
 
 	/*
 	 * If the user handed us a complete IP packet, use it.  Otherwise,
 	 * allocate an mbuf for a header and fill it in.
 	 */
 	if ((inp->inp_flags & INP_HDRINCL) == 0) {
 		if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		M_PREPEND(m, sizeof(struct ip), M_NOWAIT);
 		if (m == NULL)
 			return(ENOBUFS);
 
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		ip->ip_tos = inp->inp_ip_tos;
 		if (inp->inp_flags & INP_DONTFRAG)
 			ip->ip_off = htons(IP_DF);
 		else
 			ip->ip_off = htons(0);
 		ip->ip_p = inp->inp_ip_p;
 		ip->ip_len = htons(m->m_pkthdr.len);
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst.s_addr = dst;
 		if (jailed(inp->inp_cred)) {
 			/*
 			 * prison_local_ip4() would be good enough but would
 			 * let a source of INADDR_ANY pass, which we do not
 			 * want to see from jails.
 			 */
 			if (ip->ip_src.s_addr == INADDR_ANY) {
 				error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src,
 				    inp->inp_cred);
 			} else {
 				error = prison_local_ip4(inp->inp_cred,
 				    &ip->ip_src);
 			}
 			if (error != 0) {
 				INP_RUNLOCK(inp);
 				m_freem(m);
 				return (error);
 			}
 		}
 		ip->ip_ttl = inp->inp_ip_ttl;
 	} else {
 		if (m->m_pkthdr.len > IP_MAXPACKET) {
 			m_freem(m);
 			return(EMSGSIZE);
 		}
 		INP_RLOCK(inp);
 		ip = mtod(m, struct ip *);
 		error = prison_check_ip4(inp->inp_cred, &ip->ip_src);
 		if (error != 0) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (error);
 		}
 
 		/*
 		 * Don't allow both user specified and setsockopt options,
 		 * and don't allow packet length sizes that will crash.
 		 */
 		if (((ip->ip_hl != (sizeof (*ip) >> 2)) && inp->inp_options)
 		    || (ntohs(ip->ip_len) > m->m_pkthdr.len)
 		    || (ntohs(ip->ip_len) < (ip->ip_hl << 2))) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (EINVAL);
 		}
 		/*
 		 * This doesn't allow application to specify ID of zero,
 		 * but we got this limitation from the beginning of history.
 		 */
 		if (ip->ip_id == 0)
 			ip_fillid(ip);
 
 		/*
 		 * XXX prevent ip_output from overwriting header fields.
 		 */
 		flags |= IP_RAWOUTPUT;
 		IPSTAT_INC(ips_rawout);
 	}
 
 	if (inp->inp_flags & INP_ONESBCAST)
 		flags |= IP_SENDONES;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 	error = ip_output(m, inp->inp_options, NULL, flags,
 	    inp->inp_moptions, inp);
 	INP_RUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Raw IP socket option processing.
  *
  * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could
  * only be created by a privileged process, and as such, socket option
  * operations to manage system properties on any raw socket were allowed to
  * take place without explicit additional access control checks.  However,
  * raw sockets can now also be created in jail(), and therefore explicit
  * checks are now required.  Likewise, raw sockets can be used by a process
  * after it gives up privilege, so some caution is required.  For options
  * passed down to the IP layer via ip_ctloutput(), checks are assumed to be
  * performed in ip_ctloutput() and therefore no check occurs here.
  * Unilaterally checking priv_check() here breaks normal IP socket option
  * operations on raw sockets.
  *
  * When adding new socket options here, make sure to add access control
  * checks here as necessary.
  *
  * XXX-BZ inp locking?
  */
 int
 rip_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct	inpcb *inp = sotoinpcb(so);
 	int	error, optval;
 
 	if (sopt->sopt_level != IPPROTO_IP) {
 		if ((sopt->sopt_level == SOL_SOCKET) &&
 		    (sopt->sopt_name == SO_SETFIB)) {
 			inp->inp_inc.inc_fibnum = so->so_fibnum;
 			return (0);
 		}
 		return (EINVAL);
 	}
 
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			optval = inp->inp_flags & INP_HDRINCL;
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:	/* ADD actually returns the body... */
 		case IP_FW_GET:
 		case IP_FW_TABLE_GETSIZE:
 		case IP_FW_TABLE_LIST:
 		case IP_FW_NAT_GET_CONFIG:
 		case IP_FW_NAT_GET_LOG:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_GET:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break ;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_get ? ip_mrouter_get(so, sopt) :
 				EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case IP_HDRINCL:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				break;
 			if (optval)
 				inp->inp_flags |= INP_HDRINCL;
 			else
 				inp->inp_flags &= ~INP_HDRINCL;
 			break;
 
 		case IP_FW3:	/* generic ipfw v.3 functions */
 		case IP_FW_ADD:
 		case IP_FW_DEL:
 		case IP_FW_FLUSH:
 		case IP_FW_ZERO:
 		case IP_FW_RESETLOG:
 		case IP_FW_TABLE_ADD:
 		case IP_FW_TABLE_DEL:
 		case IP_FW_TABLE_FLUSH:
 		case IP_FW_NAT_CFG:
 		case IP_FW_NAT_DEL:
 			if (V_ip_fw_ctl_ptr != NULL)
 				error = V_ip_fw_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT;
 			break;
 
 		case IP_DUMMYNET3:	/* generic dummynet v.3 functions */
 		case IP_DUMMYNET_CONFIGURE:
 		case IP_DUMMYNET_DEL:
 		case IP_DUMMYNET_FLUSH:
 			if (ip_dn_ctl_ptr != NULL)
 				error = ip_dn_ctl_ptr(sopt);
 			else
 				error = ENOPROTOOPT ;
 			break ;
 
 		case IP_RSVP_ON:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_init(so);
 			break;
 
 		case IP_RSVP_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_done();
 			break;
 
 		case IP_RSVP_VIF_ON:
 		case IP_RSVP_VIF_OFF:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_rsvp_vif ?
 				ip_rsvp_vif(so, sopt) : EINVAL;
 			break;
 
 		case MRT_INIT:
 		case MRT_DONE:
 		case MRT_ADD_VIF:
 		case MRT_DEL_VIF:
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
 		case MRT_VERSION:
 		case MRT_ASSERT:
 		case MRT_API_SUPPORT:
 		case MRT_API_CONFIG:
 		case MRT_ADD_BW_UPCALL:
 		case MRT_DEL_BW_UPCALL:
 			error = priv_check(curthread, PRIV_NETINET_MROUTE);
 			if (error != 0)
 				return (error);
 			error = ip_mrouter_set ? ip_mrouter_set(so, sopt) :
 					EOPNOTSUPP;
 			break;
 
 		default:
 			error = ip_ctloutput(so, sopt);
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * This function exists solely to receive the PRC_IFDOWN messages which are
  * sent by if_down().  It looks for an ifaddr whose ifa_addr is sa, and calls
  * in_ifadown() to remove all routes corresponding to that address.  It also
  * receives the PRC_IFUP messages from if_up() and reinstalls the interface
  * routes.
  */
 void
 rip_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct rm_priotracker in_ifa_tracker;
 	struct in_ifaddr *ia;
 	struct ifnet *ifp;
 	int err;
 	int flags;
 
 	switch (cmd) {
 	case PRC_IFDOWN:
 		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 			if (ia->ia_ifa.ifa_addr == sa
 			    && (ia->ia_flags & IFA_ROUTE)) {
 				ifa_ref(&ia->ia_ifa);
 				IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 				/*
 				 * in_scrubprefix() kills the interface route.
 				 */
 				in_scrubprefix(ia, 0);
 				/*
 				 * in_ifadown gets rid of all the rest of the
 				 * routes.  This is not quite the right thing
 				 * to do, but at least if we are running a
 				 * routing process they will come back.
 				 */
 				in_ifadown(&ia->ia_ifa, 0);
 				ifa_free(&ia->ia_ifa);
 				break;
 			}
 		}
 		if (ia == NULL)		/* If ia matched, already unlocked. */
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		break;
 
 	case PRC_IFUP:
 		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
 			if (ia->ia_ifa.ifa_addr == sa)
 				break;
 		}
 		if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) {
 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			return;
 		}
 		ifa_ref(&ia->ia_ifa);
 		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		flags = RTF_UP;
 		ifp = ia->ia_ifa.ifa_ifp;
 
 		if ((ifp->if_flags & IFF_LOOPBACK)
 		    || (ifp->if_flags & IFF_POINTOPOINT))
 			flags |= RTF_HOST;
 
 		err = ifa_del_loopback_route((struct ifaddr *)ia, sa);
 
 		err = rtinit(&ia->ia_ifa, RTM_ADD, flags);
 		if (err == 0)
 			ia->ia_flags |= IFA_ROUTE;
 
 		err = ifa_add_loopback_route((struct ifaddr *)ia, sa);
 
 		ifa_free(&ia->ia_ifa);
 		break;
 	}
 }
 
 static int
 rip_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("rip_attach: inp != NULL"));
 
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return (error);
 	if (proto >= IPPROTO_MAX || proto < 0)
 		return EPROTONOSUPPORT;
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return (error);
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	error = in_pcballoc(so, &V_ripcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&V_ripcbinfo);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_vflag |= INP_IPV4;
 	inp->inp_ip_p = proto;
 	inp->inp_ip_ttl = V_ip_defttl;
 	rip_inshash(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static void
 rip_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_detach: inp == NULL"));
 	KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, 
 	    ("rip_detach: not closed"));
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	if (so == V_ip_mrouter && ip_mrouter_done)
 		ip_mrouter_done();
 	if (ip_rsvp_force_done)
 		ip_rsvp_force_done(so);
 	if (so == V_ip_rsvpd)
 		ip_rsvp_done();
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 }
 
 static void
 rip_dodisconnect(struct socket *so, struct inpcb *inp)
 {
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = inp->inp_pcbinfo;
 	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	rip_inshash(inp);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;
 	SOCK_UNLOCK(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 static void
 rip_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_abort: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static void
 rip_close(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_close: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 }
 
 static int
 rip_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_disconnect: inp == NULL"));
 
 	rip_dodisconnect(so, inp);
 	return (0);
 }
 
 static int
 rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 	int error;
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 
 	error = prison_check_ip4(td->td_ucred, &addr->sin_addr);
 	if (error != 0)
 		return (error);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_bind: inp == NULL"));
 
 	if (TAILQ_EMPTY(&V_ifnet) ||
 	    (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) ||
 	    (addr->sin_addr.s_addr &&
 	     (inp->inp_flags & INP_BINDANY) == 0 &&
 	     ifa_ifwithaddr_check((struct sockaddr *)addr) == 0))
 		return (EADDRNOTAVAIL);
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_laddr = addr->sin_addr;
 	rip_inshash(inp);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)nam;
 	struct inpcb *inp;
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if (TAILQ_EMPTY(&V_ifnet))
 		return (EADDRNOTAVAIL);
 	if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK)
 		return (EAFNOSUPPORT);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_connect: inp == NULL"));
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	rip_delhash(inp);
 	inp->inp_faddr = addr->sin_addr;
 	rip_inshash(inp);
 	soisconnected(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_shutdown: inp == NULL"));
 
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	u_long dst;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip_send: inp == NULL"));
 
 	/*
 	 * Note: 'dst' reads below are unlocked.
 	 */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			m_freem(m);
 			return (EISCONN);
 		}
 		dst = inp->inp_faddr.s_addr;	/* Unlocked read. */
 	} else {
 		if (nam == NULL) {
 			m_freem(m);
 			return (ENOTCONN);
 		}
 		dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr;
 	}
 	return (rip_output(m, so, dst));
 }
 #endif /* INET */
 
 static int
 rip_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, n;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == 0) {
 		n = V_ripcbinfo.ipi_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
 		return (0);
 	}
 
 	if (req->newptr != 0)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	gencnt = V_ripcbinfo.ipi_gencnt;
 	n = V_ripcbinfo.ipi_count;
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == 0)
 		return (ENOMEM);
 
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	for (inp = LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
 	     inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt &&
 		    cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
 			in_pcbref(inp);
 			inp_list[i++] = inp;
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xinpcb xi;
 
 			bzero(&xi, sizeof(xi));
 			xi.xi_len = sizeof xi;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xi.xi_inp, sizeof *inp);
 			if (inp->inp_socket)
 				sotoxsocket(inp->inp_socket, &xi.xi_socket);
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xi, sizeof xi);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		INP_INFO_RLOCK(&V_ripcbinfo);
 		xig.xig_gen = V_ripcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_ripcbinfo.ipi_count;
 		INP_INFO_RUNLOCK(&V_ripcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
     rip_pcblist, "S,xinpcb", "List of active raw IP sockets");
 
 #ifdef INET
 struct pr_usrreqs rip_usrreqs = {
 	.pru_abort =		rip_abort,
 	.pru_attach =		rip_attach,
 	.pru_bind =		rip_bind,
 	.pru_connect =		rip_connect,
 	.pru_control =		in_control,
 	.pru_detach =		rip_detach,
 	.pru_disconnect =	rip_disconnect,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_send =		rip_send,
 	.pru_shutdown =		rip_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		rip_close,
 };
 #endif /* INET */
Index: projects/clang380-import/sys/netinet/tcp_reass.c
===================================================================
--- projects/clang380-import/sys/netinet/tcp_reass.c	(revision 293686)
+++ projects/clang380-import/sys/netinet/tcp_reass.c	(revision 293687)
@@ -1,332 +1,333 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/tcp6_var.h>
 #include <netinet/tcpip.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
 
 static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
     "TCP Segment Reassembly Queue");
 
 static int tcp_reass_maxseg = 0;
 SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
     &tcp_reass_maxseg, 0,
     "Global maximum number of TCP Segments in Reassembly Queue");
 
 static uma_zone_t tcp_reass_zone;
 SYSCTL_UMA_CUR(_net_inet_tcp_reass, OID_AUTO, cursegments, 0,
     &tcp_reass_zone,
     "Global number of TCP Segments currently in Reassembly Queue");
 
 /* Initialize TCP reassembly queue */
 static void
 tcp_reass_zone_change(void *tag)
 {
 
 	/* Set the zone limit and read back the effective value. */
 	tcp_reass_maxseg = nmbclusters / 16;
 	tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
 	    tcp_reass_maxseg);
 }
 
 void
 tcp_reass_global_init(void)
 {
 
 	tcp_reass_maxseg = nmbclusters / 16;
 	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
 	    &tcp_reass_maxseg);
 	tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	/* Set the zone limit and read back the effective value. */
 	tcp_reass_maxseg = uma_zone_set_max(tcp_reass_zone,
 	    tcp_reass_maxseg);
 	EVENTHANDLER_REGISTER(nmbclusters_change,
 	    tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
 }
 
 void
 tcp_reass_flush(struct tcpcb *tp)
 {
 	struct tseg_qent *qe;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	while ((qe = LIST_FIRST(&tp->t_segq)) != NULL) {
 		LIST_REMOVE(qe, tqe_q);
 		m_freem(qe->tqe_m);
 		uma_zfree(tcp_reass_zone, qe);
 		tp->t_segqlen--;
 	}
 
 	KASSERT((tp->t_segqlen == 0),
 	    ("TCP reass queue %p segment count is %d instead of 0 after flush.",
 	    tp, tp->t_segqlen));
 }
 
 int
 tcp_reass(struct tcpcb *tp, struct tcphdr *th, int *tlenp, struct mbuf *m)
 {
 	struct tseg_qent *q;
 	struct tseg_qent *p = NULL;
 	struct tseg_qent *nq;
 	struct tseg_qent *te = NULL;
 	struct socket *so = tp->t_inpcb->inp_socket;
 	char *s = NULL;
 	int flags;
 	struct tseg_qent tqs;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * XXX: tcp_reass() is rather inefficient with its data structures
 	 * and should be rewritten (see NetBSD for optimizations).
 	 */
 
 	/*
 	 * Call with th==NULL after become established to
 	 * force pre-ESTABLISHED data up to user socket.
 	 */
 	if (th == NULL)
 		goto present;
 
 	/*
 	 * Limit the number of segments that can be queued to reduce the
 	 * potential for mbuf exhaustion. For best performance, we want to be
 	 * able to queue a full window's worth of segments. The size of the
 	 * socket receive buffer determines our advertised window and grows
 	 * automatically when socket buffer autotuning is enabled. Use it as the
 	 * basis for our queue limit.
 	 * Always let the missing segment through which caused this queue.
 	 * NB: Access to the socket buffer is left intentionally unlocked as we
 	 * can tolerate stale information here.
 	 *
 	 * XXXLAS: Using sbspace(so->so_rcv) instead of so->so_rcv.sb_hiwat
 	 * should work but causes packets to be dropped when they shouldn't.
 	 * Investigate why and re-evaluate the below limit after the behaviour
 	 * is understood.
 	 */
 	if ((th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) &&
 	    tp->t_segqlen >= (so->so_rcv.sb_hiwat / tp->t_maxseg) + 1) {
 		TCPSTAT_INC(tcps_rcvreassfull);
 		*tlenp = 0;
 		if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: queue limit reached, "
 			    "segment dropped\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 		m_freem(m);
 		return (0);
 	}
 
 	/*
 	 * Allocate a new queue entry. If we can't, or hit the zone limit
 	 * just drop the pkt.
 	 *
 	 * Use a temporary structure on the stack for the missing segment
 	 * when the zone is exhausted. Otherwise we may get stuck.
 	 */
 	te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
 	if (te == NULL) {
 		if (th->th_seq != tp->rcv_nxt || !TCPS_HAVEESTABLISHED(tp->t_state)) {
 			TCPSTAT_INC(tcps_rcvmemdrop);
 			m_freem(m);
 			*tlenp = 0;
 			if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
 			    NULL))) {
 				log(LOG_DEBUG, "%s; %s: global zone limit "
 				    "reached, segment dropped\n", s, __func__);
 				free(s, M_TCPLOG);
 			}
 			return (0);
 		} else {
 			bzero(&tqs, sizeof(struct tseg_qent));
 			te = &tqs;
 			if ((s = tcp_log_addrs(&tp->t_inpcb->inp_inc, th, NULL,
 			    NULL))) {
 				log(LOG_DEBUG,
 				    "%s; %s: global zone limit reached, using "
 				    "stack for missing segment\n", s, __func__);
 				free(s, M_TCPLOG);
 			}
 		}
 	}
 	tp->t_segqlen++;
 
 	/*
 	 * Find a segment which begins after this one does.
 	 */
 	LIST_FOREACH(q, &tp->t_segq, tqe_q) {
 		if (SEQ_GT(q->tqe_th->th_seq, th->th_seq))
 			break;
 		p = q;
 	}
 
 	/*
 	 * If there is a preceding segment, it may provide some of
 	 * our data already.  If so, drop the data from the incoming
 	 * segment.  If it provides all of our data, drop us.
 	 */
 	if (p != NULL) {
 		int i;
 		/* conversion to int (in i) handles seq wraparound */
 		i = p->tqe_th->th_seq + p->tqe_len - th->th_seq;
 		if (i > 0) {
 			if (i >= *tlenp) {
 				TCPSTAT_INC(tcps_rcvduppack);
 				TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
 				m_freem(m);
 				if (te != &tqs)
 					uma_zfree(tcp_reass_zone, te);
 				tp->t_segqlen--;
 				/*
 				 * Try to present any queued data
 				 * at the left window edge to the user.
 				 * This is needed after the 3-WHS
 				 * completes.
 				 */
 				goto present;	/* ??? */
 			}
 			m_adj(m, i);
 			*tlenp -= i;
 			th->th_seq += i;
 		}
 	}
 	tp->t_rcvoopack++;
 	TCPSTAT_INC(tcps_rcvoopack);
 	TCPSTAT_ADD(tcps_rcvoobyte, *tlenp);
 
 	/*
 	 * While we overlap succeeding segments trim them or,
 	 * if they are completely covered, dequeue them.
 	 */
 	while (q) {
 		int i = (th->th_seq + *tlenp) - q->tqe_th->th_seq;
 		if (i <= 0)
 			break;
 		if (i < q->tqe_len) {
 			q->tqe_th->th_seq += i;
 			q->tqe_len -= i;
 			m_adj(q->tqe_m, i);
 			break;
 		}
 
 		nq = LIST_NEXT(q, tqe_q);
 		LIST_REMOVE(q, tqe_q);
 		m_freem(q->tqe_m);
 		uma_zfree(tcp_reass_zone, q);
 		tp->t_segqlen--;
 		q = nq;
 	}
 
 	/* Insert the new segment queue entry into place. */
 	te->tqe_m = m;
 	te->tqe_th = th;
 	te->tqe_len = *tlenp;
 
 	if (p == NULL) {
 		LIST_INSERT_HEAD(&tp->t_segq, te, tqe_q);
 	} else {
 		KASSERT(te != &tqs, ("%s: temporary stack based entry not "
 		    "first element in queue", __func__));
 		LIST_INSERT_AFTER(p, te, tqe_q);
 	}
 
 present:
 	/*
 	 * Present data to user, advancing rcv_nxt through
 	 * completed sequence space.
 	 */
 	if (!TCPS_HAVEESTABLISHED(tp->t_state))
 		return (0);
 	q = LIST_FIRST(&tp->t_segq);
 	if (!q || q->tqe_th->th_seq != tp->rcv_nxt)
 		return (0);
 	SOCKBUF_LOCK(&so->so_rcv);
 	do {
 		tp->rcv_nxt += q->tqe_len;
 		flags = q->tqe_th->th_flags & TH_FIN;
 		nq = LIST_NEXT(q, tqe_q);
 		LIST_REMOVE(q, tqe_q);
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			m_freem(q->tqe_m);
 		else
 			sbappendstream_locked(&so->so_rcv, q->tqe_m, 0);
 		if (q != &tqs)
 			uma_zfree(tcp_reass_zone, q);
 		tp->t_segqlen--;
 		q = nq;
 	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
 	sorwakeup_locked(so);
 	return (flags);
 }
Index: projects/clang380-import/sys/netinet/tcp_subr.c
===================================================================
--- projects/clang380-import/sys/netinet/tcp_subr.c	(revision 293686)
+++ projects/clang380-import/sys/netinet/tcp_subr.c	(revision 293687)
@@ -1,2920 +1,2921 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
+#include <sys/eventhandler.h>
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/khelp.h>
 #include <sys/sysctl.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/refcount.h>
 #include <sys/mbuf.h>
 #ifdef INET6
 #include <sys/domain.h>
 #endif
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/random.h>
 
 #include <vm/uma.h>
 
 #include <net/route.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #endif
 
 #ifdef TCP_RFC7413
 #include <netinet/tcp_fastopen.h>
 #endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #include <netinet/tcpip.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #ifdef INET6
 #include <netinet6/ip6protosw.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/xform.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 #include <netipsec/key.h>
 #include <sys/syslog.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 #include <sys/md5.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
 #ifdef INET6
 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
 #endif
 
 struct rwlock tcp_function_lock;
 
 static int
 sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_mssdflt;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new < TCP_MINMSS)
 			error = EINVAL;
 		else
 			V_tcp_mssdflt = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0,
     &sysctl_net_inet_tcp_mss_check, "I",
     "Default TCP Maximum Segment Size");
 
 #ifdef INET6
 static int
 sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_v6mssdflt;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new < TCP_MINMSS)
 			error = EINVAL;
 		else
 			V_tcp_v6mssdflt = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0,
     &sysctl_net_inet_tcp_mss_v6_check, "I",
    "Default TCP Maximum Segment Size for IPv6");
 #endif /* INET6 */
 
 /*
  * Minimum MSS we accept and use. This prevents DoS attacks where
  * we are forced to a ridiculous low MSS like 20 and send hundreds
  * of packets instead of one. The effect scales with the available
  * bandwidth and quickly saturates the CPU and network interface
  * with packet generation and sending. Set to zero to disable MINMSS
  * checking. This setting prevents us from sending too small packets.
  */
 VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW,
      &VNET_NAME(tcp_minmss), 0,
     "Minimum TCP Maximum Segment Size");
 
 VNET_DEFINE(int, tcp_do_rfc1323) = 1;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc1323), 0,
     "Enable rfc1323 (high performance TCP) extensions");
 
 static int	tcp_log_debug = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
     &tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
 
 static int	tcp_tcbhashsize;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
 
 static int	do_tcpdrain = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
     "Enable tcp_drain routine for extra help when low on mbufs");
 
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs");
 
 static VNET_DEFINE(int, icmp_may_rst) = 1;
 #define	V_icmp_may_rst			VNET(icmp_may_rst)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(icmp_may_rst), 0,
     "Certain ICMP unreachable messages may abort connections in SYN_SENT");
 
 static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0;
 #define	V_tcp_isn_reseed_interval	VNET(tcp_isn_reseed_interval)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_isn_reseed_interval), 0,
     "Seconds between reseeding of ISN secret");
 
 static int	tcp_soreceive_stream;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
     &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets");
 
 #ifdef TCP_SIGNATURE
 static int	tcp_sig_checksigs = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, signature_verify_input, CTLFLAG_RW,
     &tcp_sig_checksigs, 0, "Verify RFC2385 digests on inbound traffic");
 #endif
 
 VNET_DEFINE(uma_zone_t, sack_hole_zone);
 #define	V_sack_hole_zone		VNET(sack_hole_zone)
 
 VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
 
 static struct inpcb *tcp_notify(struct inpcb *, int);
 static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
 static void tcp_mtudisc(struct inpcb *, int);
 static char *	tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
 		    void *ip4hdr, const void *ip6hdr);
 static void	tcp_timer_discard(struct tcpcb *, uint32_t);
 
 
 static struct tcp_function_block tcp_def_funcblk = {
 	"default",
 	tcp_output,
 	tcp_do_segment,
 	tcp_default_ctloutput,
 	NULL,
 	NULL,	
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	0,
 	0
 };
 
 struct tcp_funchead t_functions;
 static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
 
 static struct tcp_function_block *
 find_tcp_functions_locked(struct tcp_function_set *fs)
 {
 	struct tcp_function *f;
 	struct tcp_function_block *blk=NULL;
 
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) {
 			blk = f->tf_fb;
 			break;
 		}
 	}
 	return(blk);
 }
 
 static struct tcp_function_block *
 find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
 {
 	struct tcp_function_block *rblk=NULL;
 	struct tcp_function *f;
 
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (f->tf_fb == blk) {
 			rblk = blk;
 			if (s) {
 				*s = f;
 			}
 			break;
 		}
 	}
 	return (rblk);
 }
 
 struct tcp_function_block *
 find_and_ref_tcp_functions(struct tcp_function_set *fs)
 {
 	struct tcp_function_block *blk;
 	
 	rw_rlock(&tcp_function_lock);	
 	blk = find_tcp_functions_locked(fs);
 	if (blk)
 		refcount_acquire(&blk->tfb_refcnt); 
 	rw_runlock(&tcp_function_lock);
 	return(blk);
 }
 
 struct tcp_function_block *
 find_and_ref_tcp_fb(struct tcp_function_block *blk)
 {
 	struct tcp_function_block *rblk;
 	
 	rw_rlock(&tcp_function_lock);	
 	rblk = find_tcp_fb_locked(blk, NULL);
 	if (rblk) 
 		refcount_acquire(&rblk->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	return(rblk);
 }
 
 
 static int
 sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
 {
 	int error=ENOENT;
 	struct tcp_function_set fs;
 	struct tcp_function_block *blk;
 
 	memset(&fs, 0, sizeof(fs));
 	rw_rlock(&tcp_function_lock);
 	blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL);
 	if (blk) {
 		/* Found him */
 		strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
 		fs.pcbcnt = blk->tfb_refcnt;
 	}
 	rw_runlock(&tcp_function_lock);	
 	error = sysctl_handle_string(oidp, fs.function_set_name,
 				     sizeof(fs.function_set_name), req);
 
 	/* Check for error or no change */
 	if (error != 0 || req->newptr == NULL)
 		return(error);
 
 	rw_wlock(&tcp_function_lock);
 	blk = find_tcp_functions_locked(&fs);
 	if ((blk == NULL) ||
 	    (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) { 
 		error = ENOENT; 
 		goto done;
 	}
 	tcp_func_set_ptr = blk;
 done:
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default,
 	    CTLTYPE_STRING | CTLFLAG_RW,
 	    NULL, 0, sysctl_net_inet_default_tcp_functions, "A",
 	    "Set/get the default TCP functions");
 
 static int
 sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
 {
 	int error, cnt, linesz;
 	struct tcp_function *f;
 	char *buffer, *cp;
 	size_t bufsz, outsz;
 
 	cnt = 0;
 	rw_rlock(&tcp_function_lock);
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		cnt++;
 	}
 	rw_runlock(&tcp_function_lock);
 
 	bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1;
 	buffer = malloc(bufsz, M_TEMP, M_WAITOK);
 
 	error = 0;
 	cp = buffer;
 
 	linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count");
 	cp += linesz;
 	bufsz -= linesz;
 	outsz = linesz;
 
 	rw_rlock(&tcp_function_lock);	
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		linesz = snprintf(cp, bufsz, "%-32s%c %u\n",
 		    f->tf_fb->tfb_tcp_block_name,
 		    (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ',
 		    f->tf_fb->tfb_refcnt);
 		if (linesz >= bufsz) {
 			error = EOVERFLOW;
 			break;
 		}
 		cp += linesz;
 		bufsz -= linesz;
 		outsz += linesz;
 	}
 	rw_runlock(&tcp_function_lock);
 	if (error == 0)
 		error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
 	free(buffer, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
 	    CTLTYPE_STRING|CTLFLAG_RD,
 	    NULL, 0, sysctl_net_inet_list_available, "A",
 	    "list available TCP Function sets");
 
 /*
  * Target size of TCP PCB hash tables. Must be a power of two.
  *
  * Note that this can be overridden by the kernel environment
  * variable net.inet.tcp.tcbhashsize
  */
 #ifndef TCBHASHSIZE
 #define TCBHASHSIZE	0
 #endif
 
 /*
  * XXX
  * Callouts should be moved into struct tcp directly.  They are currently
  * separate because the tcpcb structure is exported to userland for sysctl
  * parsing purposes, which do not know about callouts.
  */
 struct tcpcb_mem {
 	struct	tcpcb		tcb;
 	struct	tcp_timer	tt;
 	struct	cc_var		ccv;
 	struct	osd		osd;
 };
 
 static VNET_DEFINE(uma_zone_t, tcpcb_zone);
 #define	V_tcpcb_zone			VNET(tcpcb_zone)
 
 MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
 MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory");
 
 static struct mtx isn_mtx;
 
 #define	ISN_LOCK_INIT()	mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
 #define	ISN_LOCK()	mtx_lock(&isn_mtx)
 #define	ISN_UNLOCK()	mtx_unlock(&isn_mtx)
 
 /*
  * TCP initialization.
  */
 static void
 tcp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
 	tcp_tw_zone_change();
 }
 
 static int
 tcp_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "tcpinp");
 	return (0);
 }
 
 /*
  * Take a value and get the next power of 2 that doesn't overflow.
  * Used to size the tcp_inpcb hash buckets.
  */
 static int
 maketcp_hashsize(int size)
 {
 	int hashsize;
 
 	/*
 	 * auto tune.
 	 * get the next power of 2 higher than maxsockets.
 	 */
 	hashsize = 1 << fls(size);
 	/* catch overflow, and just go one power of 2 smaller */
 	if (hashsize < size) {
 		hashsize = 1 << (fls(size) - 1);
 	}
 	return (hashsize);
 }
 
 int
 register_tcp_functions(struct tcp_function_block *blk, int wait)
 {
 	struct tcp_function_block *lblk;
 	struct tcp_function *n;
 	struct tcp_function_set fs;
 
 	if ((blk->tfb_tcp_output == NULL) ||
 	    (blk->tfb_tcp_do_segment == NULL) ||
 	    (blk->tfb_tcp_ctloutput == NULL) ||
 	    (strlen(blk->tfb_tcp_block_name) == 0)) {
 		/* 
 		 * These functions are required and you
 		 * need a name.
 		 */
 		return (EINVAL);
 	}
 	if (blk->tfb_tcp_timer_stop_all ||
 	    blk->tfb_tcp_timers_left ||
 	    blk->tfb_tcp_timer_activate ||
 	    blk->tfb_tcp_timer_active ||
 	    blk->tfb_tcp_timer_stop) {
 		/*
 		 * If you define one timer function you 
 		 * must have them all.
 		 */
 		if ((blk->tfb_tcp_timer_stop_all == NULL) ||
 		    (blk->tfb_tcp_timers_left  == NULL) ||
 		    (blk->tfb_tcp_timer_activate == NULL) ||
 		    (blk->tfb_tcp_timer_active == NULL) ||
 		    (blk->tfb_tcp_timer_stop == NULL)) {
 			return (EINVAL);			
 		}
 	}	
 	n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
 	if (n == NULL) {
 		return (ENOMEM);
 	}
 	n->tf_fb = blk;
 	strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
 	rw_wlock(&tcp_function_lock);
 	lblk = find_tcp_functions_locked(&fs);
 	if (lblk) {
 		/* Duplicate name space not allowed */
 		rw_wunlock(&tcp_function_lock);
 		free(n, M_TCPFUNCTIONS);
 		return (EALREADY);
 	}
 	refcount_init(&blk->tfb_refcnt, 0);
 	blk->tfb_flags = 0;
 	TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
 	rw_wunlock(&tcp_function_lock);
 	return(0);
 }	
 
 int
 deregister_tcp_functions(struct tcp_function_block *blk)
 {
 	struct tcp_function_block *lblk;
 	struct tcp_function *f;
 	int error=ENOENT;
 	
 	if (strcmp(blk->tfb_tcp_block_name, "default") == 0) {
 		/* You can't un-register the default */
 		return (EPERM);
 	}
 	rw_wlock(&tcp_function_lock);
 	if (blk == tcp_func_set_ptr) {
 		/* You can't free the current default */
 		rw_wunlock(&tcp_function_lock);
 		return (EBUSY);
 	}
 	if (blk->tfb_refcnt) {
 		/* Still tcb attached, mark it. */
 		blk->tfb_flags |= TCP_FUNC_BEING_REMOVED;
 		rw_wunlock(&tcp_function_lock);		
 		return (EBUSY);
 	}
 	lblk = find_tcp_fb_locked(blk, &f);
 	if (lblk) {
 		/* Found */
 		TAILQ_REMOVE(&t_functions, f, tf_next);
 		f->tf_fb = NULL;
 		free(f, M_TCPFUNCTIONS);
 		error = 0;
 	}
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 void
 tcp_init(void)
 {
 	const char *tcbhash_tuneable;
 	int hashsize;
 
 	tcbhash_tuneable = "net.inet.tcp.tcbhashsize";
 
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
 	    &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
 	    &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 	hashsize = TCBHASHSIZE;
 	TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
 	if (hashsize == 0) {
 		/*
 		 * Auto tune the hash size based on maxsockets.
 		 * A perfect hash would have a 1:1 mapping
 		 * (hashsize = maxsockets) however it's been
 		 * suggested that O(2) average is better.
 		 */
 		hashsize = maketcp_hashsize(maxsockets / 4);
 		/*
 		 * Our historical default is 512,
 		 * do not autotune lower than this.
 		 */
 		if (hashsize < 512)
 			hashsize = 512;
 		if (bootverbose && IS_DEFAULT_VNET(curvnet))
 			printf("%s: %s auto tuned to %d\n", __func__,
 			    tcbhash_tuneable, hashsize);
 	}
 	/*
 	 * We require a hashsize to be a power of two.
 	 * Previously if it was not a power of two we would just reset it
 	 * back to 512, which could be a nasty surprise if you did not notice
 	 * the error message.
 	 * Instead what we do is clip it to the closest power of two lower
 	 * than the specified hash value.
 	 */
 	if (!powerof2(hashsize)) {
 		int oldhashsize = hashsize;
 
 		hashsize = maketcp_hashsize(hashsize);
 		/* prevent absurdly low value */
 		if (hashsize < 16)
 			hashsize = 16;
 		printf("%s: WARNING: TCB hash size not a power of 2, "
 		    "clipped from %d to %d.\n", __func__, oldhashsize,
 		    hashsize);
 	}
 	in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
 	    "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
 	    IPI_HASHFIELDS_4TUPLE);
 
 	/*
 	 * These have to be type stable for the benefit of the timers.
 	 */
 	V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
 	uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached");
 
 	tcp_tw_init();
 	syncache_init();
 	tcp_hc_init();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack);
 	V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	tcp_reass_global_init();
 
 	/* XXX virtualize those bellow? */
 	tcp_delacktime = TCPTV_DELACK;
 	tcp_keepinit = TCPTV_KEEP_INIT;
 	tcp_keepidle = TCPTV_KEEP_IDLE;
 	tcp_keepintvl = TCPTV_KEEPINTVL;
 	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
 	tcp_msl = TCPTV_MSL;
 	tcp_rexmit_min = TCPTV_MIN;
 	if (tcp_rexmit_min < 1)
 		tcp_rexmit_min = 1;
 	tcp_rexmit_slop = TCPTV_CPU_VAR;
 	tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
 	tcp_tcbhashsize = hashsize;
 	/* Setup the tcp function block list */
 	TAILQ_INIT(&t_functions);
 	rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0);
 	register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
 
 	if (tcp_soreceive_stream) {
 #ifdef INET
 		tcp_usrreqs.pru_soreceive = soreceive_stream;
 #endif
 #ifdef INET6
 		tcp6_usrreqs.pru_soreceive = soreceive_stream;
 #endif /* INET6 */
 	}
 
 #ifdef INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
 #else /* INET6 */
 #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
 #endif /* INET6 */
 	if (max_protohdr < TCP_MINPROTOHDR)
 		max_protohdr = TCP_MINPROTOHDR;
 	if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
 		panic("tcp_init");
 #undef TCP_MINPROTOHDR
 
 	ISN_LOCK_INIT();
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 	EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
 		EVENTHANDLER_PRI_ANY);
 #ifdef TCPPCAP
 	tcp_pcap_init();
 #endif
 
 #ifdef TCP_RFC7413
 	tcp_fastopen_init();
 #endif
 }
 
 #ifdef VIMAGE
 void
 tcp_destroy(void)
 {
 	int error;
 
 #ifdef TCP_RFC7413
 	tcp_fastopen_destroy();
 #endif
 	tcp_hc_destroy();
 	syncache_destroy();
 	tcp_tw_destroy();
 	in_pcbinfo_destroy(&V_tcbinfo);
 	uma_zdestroy(V_sack_hole_zone);
 	uma_zdestroy(V_tcpcb_zone);
 
 	error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister helper hook "
 		    "type=%d, id=%d: error %d returned\n", __func__,
 		    HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, error);
 	}
 	error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_OUT]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister helper hook "
 		    "type=%d, id=%d: error %d returned\n", __func__,
 		    HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, error);
 	}
 }
 #endif
 
 void
 tcp_fini(void *xtp)
 {
 
 }
 
 /*
  * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb.
  * tcp_template used to store this data in mbufs, but we now recopy it out
  * of the tcpcb each time to conserve mbufs.
  */
 void
 tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
 {
 	struct tcphdr *th = (struct tcphdr *)tcp_ptr;
 
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		struct ip6_hdr *ip6;
 
 		ip6 = (struct ip6_hdr *)ip_ptr;
 		ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
 			(inp->inp_flow & IPV6_FLOWINFO_MASK);
 		ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
 			(IPV6_VERSION & IPV6_VERSION_MASK);
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_src = inp->in6p_laddr;
 		ip6->ip6_dst = inp->in6p_faddr;
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		struct ip *ip;
 
 		ip = (struct ip *)ip_ptr;
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = 5;
 		ip->ip_tos = inp->inp_ip_tos;
 		ip->ip_len = 0;
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_ttl = inp->inp_ip_ttl;
 		ip->ip_sum = 0;
 		ip->ip_p = IPPROTO_TCP;
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst = inp->inp_faddr;
 	}
 #endif /* INET */
 	th->th_sport = inp->inp_lport;
 	th->th_dport = inp->inp_fport;
 	th->th_seq = 0;
 	th->th_ack = 0;
 	th->th_x2 = 0;
 	th->th_off = 5;
 	th->th_flags = 0;
 	th->th_win = 0;
 	th->th_urp = 0;
 	th->th_sum = 0;		/* in_pseudo() is called later for ipv4 */
 }
 
 /*
  * Create template to be used to send tcp packets on a connection.
  * Allocates an mbuf and fills in a skeletal tcp/ip header.  The only
  * use for this function is in keepalives, which use tcp_respond.
  */
 struct tcptemp *
 tcpip_maketemplate(struct inpcb *inp)
 {
 	struct tcptemp *t;
 
 	t = malloc(sizeof(*t), M_TEMP, M_NOWAIT);
 	if (t == NULL)
 		return (NULL);
 	tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t);
 	return (t);
 }
 
 /*
  * Send a single message to the TCP at address specified by
  * the given TCP/IP header.  If m == NULL, then we make a copy
  * of the tcpiphdr at th and send directly to the addressed host.
  * This is used to force keep alive messages out using the TCP
  * template for a connection.  If flags are given then we send
  * a message back to the TCP which originated the segment th,
  * and discard the mbuf containing it and any other attached mbufs.
  *
  * In any case the ack and sequence number of the transmitted
  * segment are as specified by the parameters.
  *
  * NOTE: If m != NULL, then th must point to *inside* the mbuf.
  */
 void
 tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
     tcp_seq ack, tcp_seq seq, int flags)
 {
 	int tlen;
 	int win = 0;
 	struct ip *ip;
 	struct tcphdr *nth;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	int isipv6;
 #endif /* INET6 */
 	int ipflags = 0;
 	struct inpcb *inp;
 
 	KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
 
 #ifdef INET6
 	isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4);
 	ip6 = ipgen;
 #endif /* INET6 */
 	ip = ipgen;
 
 	if (tp != NULL) {
 		inp = tp->t_inpcb;
 		KASSERT(inp != NULL, ("tcp control block w/o inpcb"));
 		INP_WLOCK_ASSERT(inp);
 	} else
 		inp = NULL;
 
 	if (tp != NULL) {
 		if (!(flags & TH_RST)) {
 			win = sbspace(&inp->inp_socket->so_rcv);
 			if (win > (long)TCP_MAXWIN << tp->rcv_scale)
 				win = (long)TCP_MAXWIN << tp->rcv_scale;
 		}
 	}
 	if (m == NULL) {
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL)
 			return;
 		tlen = 0;
 		m->m_data += max_linkhdr;
 #ifdef INET6
 		if (isipv6) {
 			bcopy((caddr_t)ip6, mtod(m, caddr_t),
 			      sizeof(struct ip6_hdr));
 			ip6 = mtod(m, struct ip6_hdr *);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
 			ip = mtod(m, struct ip *);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 		flags = TH_ACK;
 	} else {
 		/*
 		 *  reuse the mbuf. 
 		 * XXX MRT We inherrit the FIB, which is lucky.
 		 */
 		m_freem(m->m_next);
 		m->m_next = NULL;
 		m->m_data = (caddr_t)ipgen;
 		/* m_len is set later */
 		tlen = 0;
 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
 #ifdef INET6
 		if (isipv6) {
 			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		if (th != nth) {
 			/*
 			 * this is usually a case when an extension header
 			 * exists between the IPv6 header and the
 			 * TCP header.
 			 */
 			nth->th_sport = th->th_sport;
 			nth->th_dport = th->th_dport;
 		}
 		xchg(nth->th_dport, nth->th_sport, uint16_t);
 #undef xchg
 	}
 #ifdef INET6
 	if (isipv6) {
 		ip6->ip6_flow = 0;
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_nxt = IPPROTO_TCP;
 		tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
 		ip6->ip6_plen = htons(tlen - sizeof(*ip6));
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		tlen += sizeof (struct tcpiphdr);
 		ip->ip_len = htons(tlen);
 		ip->ip_ttl = V_ip_defttl;
 		if (V_path_mtu_discovery)
 			ip->ip_off |= htons(IP_DF);
 	}
 #endif
 	m->m_len = tlen;
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 	if (inp != NULL) {
 		/*
 		 * Packet is associated with a socket, so allow the
 		 * label of the response to reflect the socket label.
 		 */
 		INP_WLOCK_ASSERT(inp);
 		mac_inpcb_create_mbuf(inp, m);
 	} else {
 		/*
 		 * Packet is not associated with a socket, so possibly
 		 * update the label in place.
 		 */
 		mac_netinet_tcp_reply(m);
 	}
 #endif
 	nth->th_seq = htonl(seq);
 	nth->th_ack = htonl(ack);
 	nth->th_x2 = 0;
 	nth->th_off = sizeof (struct tcphdr) >> 2;
 	nth->th_flags = flags;
 	if (tp != NULL)
 		nth->th_win = htons((u_short) (win >> tp->rcv_scale));
 	else
 		nth->th_win = htons((u_short)win);
 	nth->th_urp = 0;
 
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 #ifdef INET6
 	if (isipv6) {
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 		nth->th_sum = in6_cksum_pseudo(ip6,
 		    tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
 		ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
 		    NULL, NULL);
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
 	}
 #endif /* INET */
 #ifdef TCPDEBUG
 	if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 	if (flags & TH_RST)
 		TCP_PROBE5(accept__refused, NULL, NULL, mtod(m, const char *),
 		    tp, nth);
 
 	TCP_PROBE5(send, NULL, tp, mtod(m, const char *), tp, nth);
 #ifdef INET6
 	if (isipv6)
 		(void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp);
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 		(void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
 #endif
 }
 
 /*
  * Create a new TCP control block, making an
  * empty reassembly queue and hooking it to the argument
  * protocol control block.  The `inp' parameter must have
  * come from the zone allocator set up in tcp_init().
  */
 struct tcpcb *
 tcp_newtcpcb(struct inpcb *inp)
 {
 	struct tcpcb_mem *tm;
 	struct tcpcb *tp;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 
 	tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO);
 	if (tm == NULL)
 		return (NULL);
 	tp = &tm->tcb;
 
 	/* Initialise cc_var struct for this tcpcb. */
 	tp->ccv = &tm->ccv;
 	tp->ccv->type = IPPROTO_TCP;
 	tp->ccv->ccvc.tcp = tp;
 	rw_rlock(&tcp_function_lock);
 	tp->t_fb = tcp_func_set_ptr;
 	refcount_acquire(&tp->t_fb->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	if (tp->t_fb->tfb_tcp_fb_init) {
 		(*tp->t_fb->tfb_tcp_fb_init)(tp);
 	}
 	/*
 	 * Use the current system default CC algorithm.
 	 */
 	CC_LIST_RLOCK();
 	KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!"));
 	CC_ALGO(tp) = CC_DEFAULT();
 	CC_LIST_RUNLOCK();
 
 	if (CC_ALGO(tp)->cb_init != NULL)
 		if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
 			if (tp->t_fb->tfb_tcp_fb_fini)
 				(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 			refcount_release(&tp->t_fb->tfb_refcnt);
 			uma_zfree(V_tcpcb_zone, tm);
 			return (NULL);
 		}
 
 	tp->osd = &tm->osd;
 	if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		uma_zfree(V_tcpcb_zone, tm);
 		return (NULL);
 	}
 
 #ifdef VIMAGE
 	tp->t_vnet = inp->inp_vnet;
 #endif
 	tp->t_timers = &tm->tt;
 	/*	LIST_INIT(&tp->t_segq); */	/* XXX covered by M_ZERO */
 	tp->t_maxseg =
 #ifdef INET6
 		isipv6 ? V_tcp_v6mssdflt :
 #endif /* INET6 */
 		V_tcp_mssdflt;
 
 	/* Set up our timeouts. */
 	callout_init(&tp->t_timers->tt_rexmt, 1);
 	callout_init(&tp->t_timers->tt_persist, 1);
 	callout_init(&tp->t_timers->tt_keep, 1);
 	callout_init(&tp->t_timers->tt_2msl, 1);
 	callout_init(&tp->t_timers->tt_delack, 1);
 
 	if (V_tcp_do_rfc1323)
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
 	if (V_tcp_do_sack)
 		tp->t_flags |= TF_SACK_PERMIT;
 	TAILQ_INIT(&tp->snd_holes);
 	/*
 	 * The tcpcb will hold a reference on its inpcb until tcp_discardcb()
 	 * is called.
 	 */
 	in_pcbref(inp);	/* Reference for tcpcb */
 	tp->t_inpcb = inp;
 
 	/*
 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
 	 * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
 	 * reasonable initial retransmit time.
 	 */
 	tp->t_srtt = TCPTV_SRTTBASE;
 	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
 	tp->t_rttmin = tcp_rexmit_min;
 	tp->t_rxtcur = TCPTV_RTOBASE;
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->t_rcvtime = ticks;
 	/*
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
 	inp->inp_ppcb = tp;
 #ifdef TCPPCAP
 	/*
 	 * Init the TCP PCAP queues.
 	 */
 	tcp_pcap_tcpcb_init(tp);
 #endif
 	return (tp);		/* XXX */
 }
 
 /*
  * Switch the congestion control algorithm back to NewReno for any active
  * control blocks using an algorithm which is about to go away.
  * This ensures the CC framework can allow the unload to proceed without leaving
  * any dangling pointers which would trigger a panic.
  * Returning non-zero would inform the CC framework that something went wrong
  * and it would be unsafe to allow the unload to proceed. However, there is no
  * way for this to occur with this implementation so we always return zero.
  */
 int
 tcp_ccalgounload(struct cc_algo *unload_algo)
 {
 	struct cc_algo *tmpalgo;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	/*
 	 * Check all active control blocks across all network stacks and change
 	 * any that are using "unload_algo" back to NewReno. If "unload_algo"
 	 * requires cleanup code to be run, call it.
 	 */
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		INP_INFO_WLOCK(&V_tcbinfo);
 		/*
 		 * New connections already part way through being initialised
 		 * with the CC algo we're removing will not race with this code
 		 * because the INP_INFO_WLOCK is held during initialisation. We
 		 * therefore don't enter the loop below until the connection
 		 * list has stabilised.
 		 */
 		LIST_FOREACH(inp, &V_tcb, inp_list) {
 			INP_WLOCK(inp);
 			/* Important to skip tcptw structs. */
 			if (!(inp->inp_flags & INP_TIMEWAIT) &&
 			    (tp = intotcpcb(inp)) != NULL) {
 				/*
 				 * By holding INP_WLOCK here, we are assured
 				 * that the connection is not currently
 				 * executing inside the CC module's functions
 				 * i.e. it is safe to make the switch back to
 				 * NewReno.
 				 */
 				if (CC_ALGO(tp) == unload_algo) {
 					tmpalgo = CC_ALGO(tp);
 					/* NewReno does not require any init. */
 					CC_ALGO(tp) = &newreno_cc_algo;
 					if (tmpalgo->cb_destroy != NULL)
 						tmpalgo->cb_destroy(tp->ccv);
 				}
 			}
 			INP_WUNLOCK(inp);
 		}
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 
 	return (0);
 }
 
 /*
  * Drop a TCP connection, reporting
  * the specified error.  If connection is synchronized,
  * then send a RST to peer.
  */
 struct tcpcb *
 tcp_drop(struct tcpcb *tp, int errno)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
 		tcp_state_change(tp, TCPS_CLOSED);
 		(void) tp->t_fb->tfb_tcp_output(tp);
 		TCPSTAT_INC(tcps_drops);
 	} else
 		TCPSTAT_INC(tcps_conndrops);
 	if (errno == ETIMEDOUT && tp->t_softerror)
 		errno = tp->t_softerror;
 	so->so_error = errno;
 	return (tcp_close(tp));
 }
 
 void
 tcp_discardcb(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 	int released;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Make sure that all of our timers are stopped before we delete the
 	 * PCB.
 	 *
 	 * If stopping a timer fails, we schedule a discard function in same
 	 * callout, and the last discard function called will take care of
 	 * deleting the tcpcb.
 	 */
 	tcp_timer_stop(tp, TT_REXMT);
 	tcp_timer_stop(tp, TT_PERSIST);
 	tcp_timer_stop(tp, TT_KEEP);
 	tcp_timer_stop(tp, TT_2MSL);
 	tcp_timer_stop(tp, TT_DELACK);
 	if (tp->t_fb->tfb_tcp_timer_stop_all) {
 		/* Call the stop-all function of the methods */
 		tp->t_fb->tfb_tcp_timer_stop_all(tp);
 	}
 
 	/*
 	 * If we got enough samples through the srtt filter,
 	 * save the rtt and rttvar in the routing entry.
 	 * 'Enough' is arbitrarily defined as 4 rtt samples.
 	 * 4 samples is enough for the srtt filter to converge
 	 * to within enough % of the correct value; fewer samples
 	 * and we could save a bogus rtt. The danger is not high
 	 * as tcp quickly recovers from everything.
 	 * XXX: Works very well but needs some more statistics!
 	 */
 	if (tp->t_rttupdated >= 4) {
 		struct hc_metrics_lite metrics;
 		u_long ssthresh;
 
 		bzero(&metrics, sizeof(metrics));
 		/*
 		 * Update the ssthresh always when the conditions below
 		 * are satisfied. This gives us better new start value
 		 * for the congestion avoidance for new connections.
 		 * ssthresh is only set if packet loss occured on a session.
 		 *
 		 * XXXRW: 'so' may be NULL here, and/or socket buffer may be
 		 * being torn down.  Ideally this code would not use 'so'.
 		 */
 		ssthresh = tp->snd_ssthresh;
 		if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
 			/*
 			 * convert the limit from user data bytes to
 			 * packets then to packet data bytes.
 			 */
 			ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg;
 			if (ssthresh < 2)
 				ssthresh = 2;
 			ssthresh *= (u_long)(tp->t_maxseg +
 #ifdef INET6
 			    (isipv6 ? sizeof (struct ip6_hdr) +
 				sizeof (struct tcphdr) :
 #endif
 				sizeof (struct tcpiphdr)
 #ifdef INET6
 			    )
 #endif
 			    );
 		} else
 			ssthresh = 0;
 		metrics.rmx_ssthresh = ssthresh;
 
 		metrics.rmx_rtt = tp->t_srtt;
 		metrics.rmx_rttvar = tp->t_rttvar;
 		metrics.rmx_cwnd = tp->snd_cwnd;
 		metrics.rmx_sendpipe = 0;
 		metrics.rmx_recvpipe = 0;
 
 		tcp_hc_update(&inp->inp_inc, &metrics);
 	}
 
 	/* free the reassembly queue, if any */
 	tcp_reass_flush(tp);
 
 #ifdef TCP_OFFLOAD
 	/* Disconnect offload device, if any. */
 	if (tp->t_flags & TF_TOE)
 		tcp_offload_detach(tp);
 #endif
 		
 	tcp_free_sackholes(tp);
 
 #ifdef TCPPCAP
 	/* Free the TCP PCAP queues. */
 	tcp_pcap_drain(&(tp->t_inpkts));
 	tcp_pcap_drain(&(tp->t_outpkts));
 #endif
 
 	/* Allow the CC algorithm to clean up after itself. */
 	if (CC_ALGO(tp)->cb_destroy != NULL)
 		CC_ALGO(tp)->cb_destroy(tp->ccv);
 
 	khelp_destroy_osd(tp->osd);
 
 	CC_ALGO(tp) = NULL;
 	inp->inp_ppcb = NULL;
 	if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
 		/* We own the last reference on tcpcb, let's free it. */
 		if ((tp->t_fb->tfb_tcp_timers_left) &&
 		    (tp->t_fb->tfb_tcp_timers_left(tp))) {
 			    /* Some fb timers left running! */
 			    return;
 		}
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_inpcb = NULL;
 		uma_zfree(V_tcpcb_zone, tp);
 		released = in_pcbrele_wlocked(inp);
 		KASSERT(!released, ("%s: inp %p should not have been released "
 			"here", __func__, inp));
 	}
 }
 
 void
 tcp_timer_2msl_discard(void *xtp)
 {
 
 	tcp_timer_discard((struct tcpcb *)xtp, TT_2MSL);
 }
 
 void
 tcp_timer_keep_discard(void *xtp)
 {
 
 	tcp_timer_discard((struct tcpcb *)xtp, TT_KEEP);
 }
 
 void
 tcp_timer_persist_discard(void *xtp)
 {
 
 	tcp_timer_discard((struct tcpcb *)xtp, TT_PERSIST);
 }
 
 void
 tcp_timer_rexmt_discard(void *xtp)
 {
 
 	tcp_timer_discard((struct tcpcb *)xtp, TT_REXMT);
 }
 
 void
 tcp_timer_delack_discard(void *xtp)
 {
 
 	tcp_timer_discard((struct tcpcb *)xtp, TT_DELACK);
 }
 
 void
 tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type)
 {
 	struct inpcb *inp;
 
 	CURVNET_SET(tp->t_vnet);
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
 		__func__, tp));
 	INP_WLOCK(inp);
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
 		("%s: tcpcb has to be stopped here", __func__));
 	KASSERT((tp->t_timers->tt_flags & timer_type) != 0,
 		("%s: discard callout should be running", __func__));
 	tp->t_timers->tt_flags &= ~timer_type;
 	if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
 		/* We own the last reference on this tcpcb, let's free it. */
 		if ((tp->t_fb->tfb_tcp_timers_left) &&
 		    (tp->t_fb->tfb_tcp_timers_left(tp))) {
 			    /* Some fb timers left running! */
 			    goto leave;
 		}
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_inpcb = NULL;
 		uma_zfree(V_tcpcb_zone, tp);
 		if (in_pcbrele_wlocked(inp)) {
 			INP_INFO_RUNLOCK(&V_tcbinfo);
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 leave:
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 /*
  * Attempt to close a TCP control block, marking it as dropped, and freeing
  * the socket if we hold the only reference.
  */
 struct tcpcb *
 tcp_close(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_state == TCPS_LISTEN)
 		tcp_offload_listen_stop(tp);
 #endif
 #ifdef TCP_RFC7413
 	/*
 	 * This releases the TFO pending counter resource for TFO listen
 	 * sockets as well as passively-created TFO sockets that transition
 	 * from SYN_RECEIVED to CLOSED.
 	 */
 	if (tp->t_tfo_pending) {
 		tcp_fastopen_decrement_counter(tp->t_tfo_pending);
 		tp->t_tfo_pending = NULL;
 	}
 #endif
 	in_pcbdrop(inp);
 	TCPSTAT_INC(tcps_closed);
 	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
 	so = inp->inp_socket;
 	soisdisconnected(so);
 	if (inp->inp_flags & INP_SOCKREF) {
 		KASSERT(so->so_state & SS_PROTOREF,
 		    ("tcp_close: !SS_PROTOREF"));
 		inp->inp_flags &= ~INP_SOCKREF;
 		INP_WUNLOCK(inp);
 		ACCEPT_LOCK();
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_PROTOREF;
 		sofree(so);
 		return (NULL);
 	}
 	return (tp);
 }
 
 void
 tcp_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	if (!do_tcpdrain)
 		return;
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		struct inpcb *inpb;
 		struct tcpcb *tcpb;
 
 	/*
 	 * Walk the tcpbs, if existing, and flush the reassembly queue,
 	 * if there is one...
 	 * XXX: The "Net/3" implementation doesn't imply that the TCP
 	 *      reassembly queue should be flushed, but in a situation
 	 *	where we're really low on mbufs, this is potentially
 	 *	useful.
 	 */
 		INP_INFO_WLOCK(&V_tcbinfo);
 		LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
 			if (inpb->inp_flags & INP_TIMEWAIT)
 				continue;
 			INP_WLOCK(inpb);
 			if ((tcpb = intotcpcb(inpb)) != NULL) {
 				tcp_reass_flush(tcpb);
 				tcp_clean_sackreport(tcpb);
 			}
 			INP_WUNLOCK(inpb);
 		}
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Notify a tcp user of an asynchronous error;
  * store error as soft error, but wake up user
  * (for now, won't do anything until can select for soft error).
  *
  * Do not wake up user since there currently is no mechanism for
  * reporting soft errors (yet - a kqueue filter may be added).
  */
 static struct inpcb *
 tcp_notify(struct inpcb *inp, int error)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_notify: tp == NULL"));
 
 	/*
 	 * Ignore some errors if we are hooked up.
 	 * If connection hasn't completed, has retransmitted several times,
 	 * and receives a second error, give up now.  This is better
 	 * than waiting a long time to establish a connection that
 	 * can never complete.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
 	     error == EHOSTDOWN)) {
 		return (inp);
 	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
 	    tp->t_softerror) {
 		tp = tcp_drop(tp, error);
 		if (tp != NULL)
 			return (inp);
 		else
 			return (NULL);
 	} else {
 		tp->t_softerror = error;
 		return (inp);
 	}
 #if 0
 	wakeup( &so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 #endif
 }
 
 static int
 tcp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, m, n, pcb_count;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = V_tcbinfo.ipi_count + syncache_pcbcount();
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_LIST_RLOCK(&V_tcbinfo);
 	gencnt = V_tcbinfo.ipi_gencnt;
 	n = V_tcbinfo.ipi_count;
 	INP_LIST_RUNLOCK(&V_tcbinfo);
 
 	m = syncache_pcbcount();
 
 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ (n + m) * sizeof(struct xtcpcb));
 	if (error != 0)
 		return (error);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n + m;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	error = syncache_pcblist(req, m, &pcb_count);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == NULL)
 		return (ENOMEM);
 
 	INP_INFO_WLOCK(&V_tcbinfo);
 	for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
 	    inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			/*
 			 * XXX: This use of cr_cansee(), introduced with
 			 * TCP state changes, is not quite right, but for
 			 * now, better than nothing.
 			 */
 			if (inp->inp_flags & INP_TIMEWAIT) {
 				if (intotw(inp) != NULL)
 					error = cr_cansee(req->td->td_ucred,
 					    intotw(inp)->tw_cred);
 				else
 					error = EINVAL;	/* Skip this inp. */
 			} else
 				error = cr_canseeinpcb(req->td->td_ucred, inp);
 			if (error == 0) {
 				in_pcbref(inp);
 				inp_list[i++] = inp;
 			}
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xtcpcb xt;
 			void *inp_ppcb;
 
 			bzero(&xt, sizeof(xt));
 			xt.xt_len = sizeof xt;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xt.xt_inp, sizeof *inp);
 			inp_ppcb = inp->inp_ppcb;
 			if (inp_ppcb == NULL)
 				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
 			else if (inp->inp_flags & INP_TIMEWAIT) {
 				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
 				xt.xt_tp.t_state = TCPS_TIME_WAIT;
 			} else {
 				bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
 				if (xt.xt_tp.t_timers)
 					tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer);
 			}
 			if (inp->inp_socket != NULL)
 				sotoxsocket(inp->inp_socket, &xt.xt_socket);
 			else {
 				bzero(&xt.xt_socket, sizeof xt.xt_socket);
 				xt.xt_socket.xso_protocol = IPPROTO_TCP;
 			}
 			xt.xt_inp.inp_gencnt = inp->inp_gencnt;
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xt, sizeof xt);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_RLOCK(&V_tcbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		INP_LIST_RLOCK(&V_tcbinfo);
 		xig.xig_gen = V_tcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
 		INP_LIST_RUNLOCK(&V_tcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
     tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
 
 #ifdef INET
 static int
 tcp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
 	    addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     tcp_getcred, "S,xucred", "Get the xucred of a TCP connection");
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct inpcb *inp;
 	int error;
 #ifdef INET
 	int mapped = 0;
 #endif
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
 #ifdef INET
 		if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
 			mapped = 1;
 		else
 #endif
 			return (EINVAL);
 	}
 
 #ifdef INET
 	if (mapped == 1)
 		inp = in_pcblookup(&V_tcbinfo,
 			*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
 			addrs[1].sin6_port,
 			*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
 			addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL);
 	else
 #endif
 		inp = in6_pcblookup(&V_tcbinfo,
 			&addrs[1].sin6_addr, addrs[1].sin6_port,
 			&addrs[0].sin6_addr, addrs[0].sin6_port,
 			INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection");
 #endif /* INET6 */
 
 
 #ifdef INET
 void
 tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct ip *ip = vip;
 	struct tcphdr *th;
 	struct in_addr faddr;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct icmp *icp;
 	struct in_conninfo inc;
 	tcp_seq icmp_tcp_seq;
 	int mtu;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc_notify;
 	else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
 		cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip)
 		notify = tcp_drop_syn_sent;
 	/*
 	 * Redirects don't need to be handled up here.
 	 */
 	else if (PRC_IS_REDIRECT(cmd))
 		return;
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 * XXX: We never get this from ICMP, otherwise it makes an
 	 * excellent DoS attack on machines with many connections.
 	 */
 	else if (cmd == PRC_HOSTDEAD)
 		ip = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
 
 	if (ip == NULL) {
 		in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
 		return;
 	}
 
 	icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
 	th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
 	    th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
 	if (inp != NULL)  {
 		if (!(inp->inp_flags & INP_TIMEWAIT) &&
 		    !(inp->inp_flags & INP_DROPPED) &&
 		    !(inp->inp_socket == NULL)) {
 			icmp_tcp_seq = ntohl(th->th_seq);
 			tp = intotcpcb(inp);
 			if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
 			    SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
 				if (cmd == PRC_MSGSIZE) {
 					/*
 					 * MTU discovery:
 					 * If we got a needfrag set the MTU
 					 * in the route to the suggested new
 					 * value (if given) and then notify.
 					 */
 				    	mtu = ntohs(icp->icmp_nextmtu);
 					/*
 					 * If no alternative MTU was
 					 * proposed, try the next smaller
 					 * one.
 					 */
 					if (!mtu)
 						mtu = ip_next_mtu(
 						    ntohs(ip->ip_len), 1);
 					if (mtu < V_tcp_minmss +
 					    sizeof(struct tcpiphdr))
 						mtu = V_tcp_minmss +
 						    sizeof(struct tcpiphdr);
 					/*
 					 * Only process the offered MTU if it
 					 * is smaller than the current one.
 					 */
 					if (mtu < tp->t_maxseg +
 					    sizeof(struct tcpiphdr)) {
 						bzero(&inc, sizeof(inc));
 						inc.inc_faddr = faddr;
 						inc.inc_fibnum =
 						    inp->inp_inc.inc_fibnum;
 						tcp_hc_updatemtu(&inc, mtu);
 						tcp_mtudisc(inp, mtu);
 					}
 				} else
 					inp = (*notify)(inp,
 					    inetctlerrmap[cmd]);
 			}
 		}
 		if (inp != NULL)
 			INP_WUNLOCK(inp);
 	} else {
 		bzero(&inc, sizeof(inc));
 		inc.inc_fport = th->th_dport;
 		inc.inc_lport = th->th_sport;
 		inc.inc_faddr = faddr;
 		inc.inc_laddr = ip->ip_src;
 		syncache_unreach(&inc, th);
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 #endif /* INET */
 
 #ifdef INET6
 void
 tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	struct tcphdr th;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	int off;
 	struct tcp_portonly {
 		u_int16_t th_sport;
 		u_int16_t th_dport;
 	} *thp;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc_notify;
 	else if (!PRC_IS_REDIRECT(cmd) &&
 		 ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		sa6_src = ip6cp->ip6c_src;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		off = 0;	/* fool gcc */
 		sa6_src = &sa6_any;
 	}
 
 	if (ip6 != NULL) {
 		struct in_conninfo inc;
 		/*
 		 * XXX: We assume that when IPV6 is non NULL,
 		 * M and OFF are valid.
 		 */
 
 		/* check if we can safely examine src and dst ports */
 		if (m->m_pkthdr.len < off + sizeof(*thp))
 			return;
 
 		bzero(&th, sizeof(th));
 		m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
 
 		in6_pcbnotify(&V_tcbinfo, sa, th.th_dport,
 		    (struct sockaddr *)ip6cp->ip6c_src,
 		    th.th_sport, cmd, NULL, notify);
 
 		bzero(&inc, sizeof(inc));
 		inc.inc_fport = th.th_dport;
 		inc.inc_lport = th.th_sport;
 		inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
 		inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
 		inc.inc_flags |= INC_ISIPV6;
 		INP_INFO_RLOCK(&V_tcbinfo);
 		syncache_unreach(&inc, &th);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	} else
 		in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
 			      0, cmd, NULL, notify);
 }
 #endif /* INET6 */
 
 
 /*
  * Following is where TCP initial sequence number generation occurs.
  *
  * There are two places where we must use initial sequence numbers:
  * 1.  In SYN-ACK packets.
  * 2.  In SYN packets.
  *
  * All ISNs for SYN-ACK packets are generated by the syncache.  See
  * tcp_syncache.c for details.
  *
  * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling
  * depends on this property.  In addition, these ISNs should be
  * unguessable so as to prevent connection hijacking.  To satisfy
  * the requirements of this situation, the algorithm outlined in
  * RFC 1948 is used, with only small modifications.
  *
  * Implementation details:
  *
  * Time is based off the system timer, and is corrected so that it
  * increases by one megabyte per second.  This allows for proper
  * recycling on high speed LANs while still leaving over an hour
  * before rollover.
  *
  * As reading the *exact* system time is too expensive to be done
  * whenever setting up a TCP connection, we increment the time
  * offset in two ways.  First, a small random positive increment
  * is added to isn_offset for each connection that is set up.
  * Second, the function tcp_isn_tick fires once per clock tick
  * and increments isn_offset as necessary so that sequence numbers
  * are incremented at approximately ISN_BYTES_PER_SECOND.  The
  * random positive increments serve only to ensure that the same
  * exact sequence number is never sent out twice (as could otherwise
  * happen when a port is recycled in less than the system tick
  * interval.)
  *
  * net.inet.tcp.isn_reseed_interval controls the number of seconds
  * between seeding of isn_secret.  This is normally set to zero,
  * as reseeding should not be necessary.
  *
  * Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
  * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock.  In
  * general, this means holding an exclusive (write) lock.
  */
 
 #define ISN_BYTES_PER_SECOND 1048576
 #define ISN_STATIC_INCREMENT 4096
 #define ISN_RANDOM_INCREMENT (4096 - 1)
 
 static VNET_DEFINE(u_char, isn_secret[32]);
 static VNET_DEFINE(int, isn_last);
 static VNET_DEFINE(int, isn_last_reseed);
 static VNET_DEFINE(u_int32_t, isn_offset);
 static VNET_DEFINE(u_int32_t, isn_offset_old);
 
 #define	V_isn_secret			VNET(isn_secret)
 #define	V_isn_last			VNET(isn_last)
 #define	V_isn_last_reseed		VNET(isn_last_reseed)
 #define	V_isn_offset			VNET(isn_offset)
 #define	V_isn_offset_old		VNET(isn_offset_old)
 
 tcp_seq
 tcp_new_isn(struct tcpcb *tp)
 {
 	MD5_CTX isn_ctx;
 	u_int32_t md5_buffer[4];
 	tcp_seq new_isn;
 	u_int32_t projected_offset;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	ISN_LOCK();
 	/* Seed if this is the first use, reseed if requested. */
 	if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) &&
 	     (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz)
 		< (u_int)ticks))) {
 		read_random(&V_isn_secret, sizeof(V_isn_secret));
 		V_isn_last_reseed = ticks;
 	}
 
 	/* Compute the md5 hash and return the ISN. */
 	MD5Init(&isn_ctx);
 	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short));
 	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short));
 #ifdef INET6
 	if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) {
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr,
 			  sizeof(struct in6_addr));
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr,
 			  sizeof(struct in6_addr));
 	} else
 #endif
 	{
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr,
 			  sizeof(struct in_addr));
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr,
 			  sizeof(struct in_addr));
 	}
 	MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret));
 	MD5Final((u_char *) &md5_buffer, &isn_ctx);
 	new_isn = (tcp_seq) md5_buffer[0];
 	V_isn_offset += ISN_STATIC_INCREMENT +
 		(arc4random() & ISN_RANDOM_INCREMENT);
 	if (ticks != V_isn_last) {
 		projected_offset = V_isn_offset_old +
 		    ISN_BYTES_PER_SECOND / hz * (ticks - V_isn_last);
 		if (SEQ_GT(projected_offset, V_isn_offset))
 			V_isn_offset = projected_offset;
 		V_isn_offset_old = V_isn_offset;
 		V_isn_last = ticks;
 	}
 	new_isn += V_isn_offset;
 	ISN_UNLOCK();
 	return (new_isn);
 }
 
 /*
  * When a specific ICMP unreachable message is received and the
  * connection state is SYN-SENT, drop the connection.  This behavior
  * is controlled by the icmp_may_rst sysctl.
  */
 struct inpcb *
 tcp_drop_syn_sent(struct inpcb *inp, int errno)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	if (tp->t_state != TCPS_SYN_SENT)
 		return (inp);
 
 	tp = tcp_drop(tp, errno);
 	if (tp != NULL)
 		return (inp);
 	else
 		return (NULL);
 }
 
 /*
  * When `need fragmentation' ICMP is received, update our idea of the MSS
  * based on the new value. Also nudge TCP to send something, since we
  * know the packet we just sent was dropped.
  * This duplicates some code in the tcp_mss() function in tcp_input.c.
  */
 static struct inpcb *
 tcp_mtudisc_notify(struct inpcb *inp, int error)
 {
 
 	tcp_mtudisc(inp, -1);
 	return (inp);
 }
 
 static void
 tcp_mtudisc(struct inpcb *inp, int mtuoffer)
 {
 	struct tcpcb *tp;
 	struct socket *so;
 
 	INP_WLOCK_ASSERT(inp);
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return;
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
 
 	tcp_mss_update(tp, -1, mtuoffer, NULL, NULL);
   
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_snd);
 	/* If the mss is larger than the socket buffer, decrease the mss. */
 	if (so->so_snd.sb_hiwat < tp->t_maxseg)
 		tp->t_maxseg = so->so_snd.sb_hiwat;
 	SOCKBUF_UNLOCK(&so->so_snd);
 
 	TCPSTAT_INC(tcps_mturesent);
 	tp->t_rtttime = 0;
 	tp->snd_nxt = tp->snd_una;
 	tcp_free_sackholes(tp);
 	tp->snd_recover = tp->snd_max;
 	if (tp->t_flags & TF_SACK_PERMIT)
 		EXIT_FASTRECOVERY(tp->t_flags);
 	tp->t_fb->tfb_tcp_output(tp);
 }
 
 #ifdef INET
 /*
  * Look-up the routing entry to the peer of this inpcb.  If no route
  * is found and it cannot be allocated, then return 0.  This routine
  * is called by TCP routines that access the rmx structure and by
  * tcp_mss_update to get the peer/interface MTU.
  */
 u_long
 tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct route sro;
 	struct sockaddr_in *dst;
 	struct ifnet *ifp;
 	u_long maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer"));
 
 	bzero(&sro, sizeof(sro));
 	if (inc->inc_faddr.s_addr != INADDR_ANY) {
 	        dst = (struct sockaddr_in *)&sro.ro_dst;
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = inc->inc_faddr;
 		in_rtalloc_ign(&sro, 0, inc->inc_fibnum);
 	}
 	if (sro.ro_rt != NULL) {
 		ifp = sro.ro_rt->rt_ifp;
 		if (sro.ro_rt->rt_mtu == 0)
 			maxmtu = ifp->if_mtu;
 		else
 			maxmtu = min(sro.ro_rt->rt_mtu, ifp->if_mtu);
 
 		/* Report additional interface capabilities. */
 		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO4 &&
 			    ifp->if_hwassist & CSUM_TSO) {
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 			}
 		}
 		RTFREE(sro.ro_rt);
 	}
 	return (maxmtu);
 }
 #endif /* INET */
 
 #ifdef INET6
 u_long
 tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct route_in6 sro6;
 	struct ifnet *ifp;
 	u_long maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
 
 	bzero(&sro6, sizeof(sro6));
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
 		sro6.ro_dst.sin6_family = AF_INET6;
 		sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
 		sro6.ro_dst.sin6_addr = inc->inc6_faddr;
 		in6_rtalloc_ign(&sro6, 0, inc->inc_fibnum);
 	}
 	if (sro6.ro_rt != NULL) {
 		ifp = sro6.ro_rt->rt_ifp;
 		if (sro6.ro_rt->rt_mtu == 0)
 			maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp);
 		else
 			maxmtu = min(sro6.ro_rt->rt_mtu,
 				     IN6_LINKMTU(sro6.ro_rt->rt_ifp));
 
 		/* Report additional interface capabilities. */
 		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO6 &&
 			    ifp->if_hwassist & CSUM_TSO) {
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 			}
 		}
 		RTFREE(sro6.ro_rt);
 	}
 
 	return (maxmtu);
 }
 #endif /* INET6 */
 
 /*
  * Calculate effective SMSS per RFC5681 definition for a given TCP
  * connection at its current state, taking into account SACK and etc.
  */
 u_int
 tcp_maxseg(const struct tcpcb *tp)
 {
 	u_int optlen;
 
 	if (tp->t_flags & TF_NOOPT)
 		return (tp->t_maxseg);
 
 	/*
 	 * Here we have a simplified code from tcp_addoptions(),
 	 * without a proper loop, and having most of paddings hardcoded.
 	 * We might make mistakes with padding here in some edge cases,
 	 * but this is harmless, since result of tcp_maxseg() is used
 	 * only in cwnd and ssthresh estimations.
 	 */
 #define	PAD(len)	((((len) / 4) + !!((len) % 4)) * 4)
 	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 		if (tp->t_flags & TF_RCVD_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
 			optlen = 0;
 #ifdef TCP_SIGNATURE
 		if (tp->t_flags & TF_SIGNATURE)
 			optlen += PAD(TCPOLEN_SIGNATURE);
 #endif
 		if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) {
 			optlen += TCPOLEN_SACKHDR;
 			optlen += tp->rcv_numsacks * TCPOLEN_SACK;
 			optlen = PAD(optlen);
 		}
 	} else {
 		if (tp->t_flags & TF_REQ_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
 			optlen = PAD(TCPOLEN_MAXSEG);
 		if (tp->t_flags & TF_REQ_SCALE)
 			optlen += PAD(TCPOLEN_WINDOW);
 #ifdef TCP_SIGNATURE
 		if (tp->t_flags & TF_SIGNATURE)
 			optlen += PAD(TCPOLEN_SIGNATURE);
 #endif
 		if (tp->t_flags & TF_SACK_PERMIT)
 			optlen += PAD(TCPOLEN_SACK_PERMITTED);
 	}
 #undef PAD
 	optlen = min(optlen, TCP_MAXOLEN);
 	return (tp->t_maxseg - optlen);
 }
 
 #ifdef IPSEC
 /* compute ESP/AH header size for TCP, including outer IP header. */
 size_t
 ipsec_hdrsiz_tcp(struct tcpcb *tp)
 {
 	struct inpcb *inp;
 	struct mbuf *m;
 	size_t hdrsiz;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct tcphdr *th;
 
 	if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) ||
 		(!key_havesp(IPSEC_DIR_OUTBOUND)))
 		return (0);
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (!m)
 		return (0);
 
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)(ip6 + 1);
 		m->m_pkthdr.len = m->m_len =
 			sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 		tcpip_fillheaders(inp, ip6, th);
 		hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
 	} else
 #endif /* INET6 */
 	{
 		ip = mtod(m, struct ip *);
 		th = (struct tcphdr *)(ip + 1);
 		m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
 		tcpip_fillheaders(inp, ip, th);
 		hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
 	}
 
 	m_free(m);
 	return (hdrsiz);
 }
 #endif /* IPSEC */
 
 #ifdef TCP_SIGNATURE
 /*
  * Callback function invoked by m_apply() to digest TCP segment data
  * contained within an mbuf chain.
  */
 static int
 tcp_signature_apply(void *fstate, void *data, u_int len)
 {
 
 	MD5Update(fstate, (u_char *)data, len);
 	return (0);
 }
 
 /*
  * XXX The key is retrieved from the system's PF_KEY SADB, by keying a
  * search with the destination IP address, and a 'magic SPI' to be
  * determined by the application. This is hardcoded elsewhere to 1179
 */
 struct secasvar *
 tcp_get_sav(struct mbuf *m, u_int direction)
 {
 	union sockaddr_union dst;
 	struct secasvar *sav;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
 
 	/* Extract the destination from the IP header in the mbuf. */
 	bzero(&dst, sizeof(union sockaddr_union));
 	ip = mtod(m, struct ip *);
 #ifdef INET6
 	ip6 = NULL;	/* Make the compiler happy. */
 #endif
 	switch (ip->ip_v) {
 #ifdef INET
 	case IPVERSION:
 		dst.sa.sa_len = sizeof(struct sockaddr_in);
 		dst.sa.sa_family = AF_INET;
 		dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ?
 		    ip->ip_src : ip->ip_dst;
 		break;
 #endif
 #ifdef INET6
 	case (IPV6_VERSION >> 4):
 		ip6 = mtod(m, struct ip6_hdr *);
 		dst.sa.sa_len = sizeof(struct sockaddr_in6);
 		dst.sa.sa_family = AF_INET6;
 		dst.sin6.sin6_addr = (direction == IPSEC_DIR_INBOUND) ?
 		    ip6->ip6_src : ip6->ip6_dst;
 		break;
 #endif
 	default:
 		return (NULL);
 		/* NOTREACHED */
 		break;
 	}
 
 	/* Look up an SADB entry which matches the address of the peer. */
 	sav = KEY_ALLOCSA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI));
 	if (sav == NULL) {
 		ipseclog((LOG_ERR, "%s: SADB lookup failed for %s\n", __func__,
 		    (ip->ip_v == IPVERSION) ? inet_ntoa(dst.sin.sin_addr) :
 #ifdef INET6
 			(ip->ip_v == (IPV6_VERSION >> 4)) ?
 			    ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) :
 #endif
 			"(unsupported)"));
 	}
 
 	return (sav);
 }
 
 /*
  * Compute TCP-MD5 hash of a TCP segment. (RFC2385)
  *
  * Parameters:
  * m		pointer to head of mbuf chain
  * len		length of TCP segment data, excluding options
  * optlen	length of TCP segment options
  * buf		pointer to storage for computed MD5 digest
  * sav		pointer to security assosiation
  *
  * We do this over ip, tcphdr, segment data, and the key in the SADB.
  * When called from tcp_input(), we can be sure that th_sum has been
  * zeroed out and verified already.
  *
  * Releases reference to SADB key before return. 
  *
  * Return 0 if successful, otherwise return -1.
  *
  */
 int
 tcp_signature_do_compute(struct mbuf *m, int len, int optlen,
     u_char *buf, struct secasvar *sav)
 {
 #ifdef INET
 	struct ippseudo ippseudo;
 #endif
 	MD5_CTX ctx;
 	int doff;
 	struct ip *ip;
 #ifdef INET
 	struct ipovly *ipovly;
 #endif
 	struct tcphdr *th;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	struct in6_addr in6;
 	uint32_t plen;
 	uint16_t nhdr;
 #endif
 	u_short savecsum;
 
 	KASSERT(m != NULL, ("NULL mbuf chain"));
 	KASSERT(buf != NULL, ("NULL signature pointer"));
 
 	/* Extract the destination from the IP header in the mbuf. */
 	ip = mtod(m, struct ip *);
 #ifdef INET6
 	ip6 = NULL;	/* Make the compiler happy. */
 #endif
 
 	MD5Init(&ctx);
 	/*
 	 * Step 1: Update MD5 hash with IP(v6) pseudo-header.
 	 *
 	 * XXX The ippseudo header MUST be digested in network byte order,
 	 * or else we'll fail the regression test. Assume all fields we've
 	 * been doing arithmetic on have been in host byte order.
 	 * XXX One cannot depend on ipovly->ih_len here. When called from
 	 * tcp_output(), the underlying ip_len member has not yet been set.
 	 */
 	switch (ip->ip_v) {
 #ifdef INET
 	case IPVERSION:
 		ipovly = (struct ipovly *)ip;
 		ippseudo.ippseudo_src = ipovly->ih_src;
 		ippseudo.ippseudo_dst = ipovly->ih_dst;
 		ippseudo.ippseudo_pad = 0;
 		ippseudo.ippseudo_p = IPPROTO_TCP;
 		ippseudo.ippseudo_len = htons(len + sizeof(struct tcphdr) +
 		    optlen);
 		MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ippseudo));
 
 		th = (struct tcphdr *)((u_char *)ip + sizeof(struct ip));
 		doff = sizeof(struct ip) + sizeof(struct tcphdr) + optlen;
 		break;
 #endif
 #ifdef INET6
 	/*
 	 * RFC 2385, 2.0  Proposal
 	 * For IPv6, the pseudo-header is as described in RFC 2460, namely the
 	 * 128-bit source IPv6 address, 128-bit destination IPv6 address, zero-
 	 * extended next header value (to form 32 bits), and 32-bit segment
 	 * length.
 	 * Note: Upper-Layer Packet Length comes before Next Header.
 	 */
 	case (IPV6_VERSION >> 4):
 		in6 = ip6->ip6_src;
 		in6_clearscope(&in6);
 		MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr));
 		in6 = ip6->ip6_dst;
 		in6_clearscope(&in6);
 		MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr));
 		plen = htonl(len + sizeof(struct tcphdr) + optlen);
 		MD5Update(&ctx, (char *)&plen, sizeof(uint32_t));
 		nhdr = 0;
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 		nhdr = IPPROTO_TCP;
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 
 		th = (struct tcphdr *)((u_char *)ip6 + sizeof(struct ip6_hdr));
 		doff = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + optlen;
 		break;
 #endif
 	default:
 		KEY_FREESAV(&sav);
 		return (-1);
 		/* NOTREACHED */
 		break;
 	}
 
 
 	/*
 	 * Step 2: Update MD5 hash with TCP header, excluding options.
 	 * The TCP checksum must be set to zero.
 	 */
 	savecsum = th->th_sum;
 	th->th_sum = 0;
 	MD5Update(&ctx, (char *)th, sizeof(struct tcphdr));
 	th->th_sum = savecsum;
 
 	/*
 	 * Step 3: Update MD5 hash with TCP segment data.
 	 *         Use m_apply() to avoid an early m_pullup().
 	 */
 	if (len > 0)
 		m_apply(m, doff, len, tcp_signature_apply, &ctx);
 
 	/*
 	 * Step 4: Update MD5 hash with shared secret.
 	 */
 	MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth));
 	MD5Final(buf, &ctx);
 
 	key_sa_recordxfer(sav, m);
 	KEY_FREESAV(&sav);
 	return (0);
 }
 
 /*
  * Compute TCP-MD5 hash of a TCP segment. (RFC2385)
  *
  * Return 0 if successful, otherwise return -1.
  */
 int
 tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
     u_char *buf, u_int direction)
 {
 	struct secasvar *sav;
 
 	if ((sav = tcp_get_sav(m, direction)) == NULL)
 		return (-1);
 
 	return (tcp_signature_do_compute(m, len, optlen, buf, sav));
 }
 
 /*
  * Verify the TCP-MD5 hash of a TCP segment. (RFC2385)
  *
  * Parameters:
  * m		pointer to head of mbuf chain
  * len		length of TCP segment data, excluding options
  * optlen	length of TCP segment options
  * buf		pointer to storage for computed MD5 digest
  * direction	direction of flow (IPSEC_DIR_INBOUND or OUTBOUND)
  *
  * Return 1 if successful, otherwise return 0.
  */
 int
 tcp_signature_verify(struct mbuf *m, int off0, int tlen, int optlen,
     struct tcpopt *to, struct tcphdr *th, u_int tcpbflag)
 {
 	char tmpdigest[TCP_SIGLEN];
 
 	if (tcp_sig_checksigs == 0)
 		return (1);
 	if ((tcpbflag & TF_SIGNATURE) == 0) {
 		if ((to->to_flags & TOF_SIGNATURE) != 0) {
 
 			/*
 			 * If this socket is not expecting signature but
 			 * the segment contains signature just fail.
 			 */
 			TCPSTAT_INC(tcps_sig_err_sigopt);
 			TCPSTAT_INC(tcps_sig_rcvbadsig);
 			return (0);
 		}
 
 		/* Signature is not expected, and not present in segment. */
 		return (1);
 	}
 
 	/*
 	 * If this socket is expecting signature but the segment does not
 	 * contain any just fail.
 	 */
 	if ((to->to_flags & TOF_SIGNATURE) == 0) {
 		TCPSTAT_INC(tcps_sig_err_nosigopt);
 		TCPSTAT_INC(tcps_sig_rcvbadsig);
 		return (0);
 	}
 	if (tcp_signature_compute(m, off0, tlen, optlen, &tmpdigest[0],
 	    IPSEC_DIR_INBOUND) == -1) {
 		TCPSTAT_INC(tcps_sig_err_buildsig);
 		TCPSTAT_INC(tcps_sig_rcvbadsig);
 		return (0);
 	}
 	
 	if (bcmp(to->to_signature, &tmpdigest[0], TCP_SIGLEN) != 0) {
 		TCPSTAT_INC(tcps_sig_rcvbadsig);
 		return (0);
 	}
 	TCPSTAT_INC(tcps_sig_rcvgoodsig);
 	return (1);
 }
 #endif /* TCP_SIGNATURE */
 
 static int
 sysctl_drop(SYSCTL_HANDLER_ARGS)
 {
 	/* addrs[0] is a foreign socket, addrs[1] is a local one. */
 	struct sockaddr_storage addrs[2];
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct tcptw *tw;
 	struct sockaddr_in *fin, *lin;
 #ifdef INET6
 	struct sockaddr_in6 *fin6, *lin6;
 #endif
 	int error;
 
 	inp = NULL;
 	fin = lin = NULL;
 #ifdef INET6
 	fin6 = lin6 = NULL;
 #endif
 	error = 0;
 
 	if (req->oldptr != NULL || req->oldlen != 0)
 		return (EINVAL);
 	if (req->newptr == NULL)
 		return (EPERM);
 	if (req->newlen < sizeof(addrs))
 		return (ENOMEM);
 	error = SYSCTL_IN(req, &addrs, sizeof(addrs));
 	if (error)
 		return (error);
 
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		fin6 = (struct sockaddr_in6 *)&addrs[0];
 		lin6 = (struct sockaddr_in6 *)&addrs[1];
 		if (fin6->sin6_len != sizeof(struct sockaddr_in6) ||
 		    lin6->sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 		if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
 			if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
 				return (EINVAL);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]);
 			fin = (struct sockaddr_in *)&addrs[0];
 			lin = (struct sockaddr_in *)&addrs[1];
 			break;
 		}
 		error = sa6_embedscope(fin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		error = sa6_embedscope(lin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		fin = (struct sockaddr_in *)&addrs[0];
 		lin = (struct sockaddr_in *)&addrs[1];
 		if (fin->sin_len != sizeof(struct sockaddr_in) ||
 		    lin->sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 		break;
 #endif
 	default:
 		return (EINVAL);
 	}
 	INP_INFO_RLOCK(&V_tcbinfo);
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr,
 		    fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
 		    INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port,
 		    lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 	}
 	if (inp != NULL) {
 		if (inp->inp_flags & INP_TIMEWAIT) {
 			/*
 			 * XXXRW: There currently exists a state where an
 			 * inpcb is present, but its timewait state has been
 			 * discarded.  For now, don't allow dropping of this
 			 * type of inpcb.
 			 */
 			tw = intotw(inp);
 			if (tw != NULL)
 				tcp_twclose(tw, 0);
 			else
 				INP_WUNLOCK(inp);
 		} else if (!(inp->inp_flags & INP_DROPPED) &&
 			   !(inp->inp_socket->so_options & SO_ACCEPTCONN)) {
 			tp = intotcpcb(inp);
 			tp = tcp_drop(tp, ECONNABORTED);
 			if (tp != NULL)
 				INP_WUNLOCK(inp);
 		} else
 			INP_WUNLOCK(inp);
 	} else
 		error = ESRCH;
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
     CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL,
     0, sysctl_drop, "", "Drop TCP connection");
 
 /*
  * Generate a standardized TCP log line for use throughout the
  * tcp subsystem.  Memory allocation is done with M_NOWAIT to
  * allow use in the interrupt context.
  *
  * NB: The caller MUST free(s, M_TCPLOG) the returned string.
  * NB: The function may return NULL if memory allocation failed.
  *
  * Due to header inclusion and ordering limitations the struct ip
  * and ip6_hdr pointers have to be passed as void pointers.
  */
 char *
 tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 
 	/* Is logging enabled? */
 	if (tcp_log_in_vain == 0)
 		return (NULL);
 
 	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
 }
 
 char *
 tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 
 	/* Is logging enabled? */
 	if (tcp_log_debug == 0)
 		return (NULL);
 
 	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
 }
 
 static char *
 tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 	char *s, *sp;
 	size_t size;
 	struct ip *ip;
 #ifdef INET6
 	const struct ip6_hdr *ip6;
 
 	ip6 = (const struct ip6_hdr *)ip6hdr;
 #endif /* INET6 */
 	ip = (struct ip *)ip4hdr;
 
 	/*
 	 * The log line looks like this:
 	 * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2<SYN>"
 	 */
 	size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") +
 	    sizeof(PRINT_TH_FLAGS) + 1 +
 #ifdef INET6
 	    2 * INET6_ADDRSTRLEN;
 #else
 	    2 * INET_ADDRSTRLEN;
 #endif /* INET6 */
 
 	s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT);
 	if (s == NULL)
 		return (NULL);
 
 	strcat(s, "TCP: [");
 	sp = s + strlen(s);
 
 	if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) {
 		inet_ntoa_r(inc->inc_faddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		inet_ntoa_r(inc->inc_laddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 #ifdef INET6
 	} else if (inc) {
 		ip6_sprintf(sp, &inc->inc6_faddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &inc->inc6_laddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 	} else if (ip6 && th) {
 		ip6_sprintf(sp, &ip6->ip6_src);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &ip6->ip6_dst);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET6 */
 #ifdef INET
 	} else if (ip && th) {
 		inet_ntoa_r(ip->ip_src, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		inet_ntoa_r(ip->ip_dst, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET */
 	} else {
 		free(s, M_TCPLOG);
 		return (NULL);
 	}
 	sp = s + strlen(s);
 	if (th)
 		sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
 	if (*(s + size - 1) != '\0')
 		panic("%s: string too long", __func__);
 	return (s);
 }
 
 /*
  * A subroutine which makes it easy to track TCP state changes with DTrace.
  * This function shouldn't be called for t_state initializations that don't
  * correspond to actual TCP state transitions.
  */
 void
 tcp_state_change(struct tcpcb *tp, int newstate)
 {
 #if defined(KDTRACE_HOOKS)
 	int pstate = tp->t_state;
 #endif
 
 	tp->t_state = newstate;
 	TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate);
 }
Index: projects/clang380-import/sys/netinet/toecore.c
===================================================================
--- projects/clang380-import/sys/netinet/toecore.c	(revision 293686)
+++ projects/clang380-import/sys/netinet/toecore.c	(revision 293687)
@@ -1,584 +1,569 @@
 /*-
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/types.h>
 #include <sys/sockopt.h>
 #include <sys/sysctl.h>
 #include <sys/socket.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 #include <net/if_llatbl.h>
 #include <net/route.h>
 
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/nd6.h>
 #define TCPSTATES
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/tcp_offload.h>
 #include <netinet/toecore.h>
 
 static struct mtx toedev_lock;
 static TAILQ_HEAD(, toedev) toedev_list;
 static eventhandler_tag listen_start_eh;
 static eventhandler_tag listen_stop_eh;
 static eventhandler_tag lle_event_eh;
-static eventhandler_tag route_redirect_eh;
 
 static int
 toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
     struct rtentry *rt __unused, struct sockaddr *nam __unused)
 {
 
 	return (ENOTSUP);
 }
 
 static int
 toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
 {
 
 	return (ENOTSUP);
 }
 
 static int
 toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
 {
 
 	return (ENOTSUP);
 }
 
 static void
 toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
     struct mbuf *m)
 {
 
 	m_freem(m);
 	return;
 }
 
 static void
 toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
 {
 
 	return;
 }
 
 static int
 toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
 {
 
 	return (ENOTSUP);
 }
 
 static void
 toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
 {
 
 	return;
 }
 
 static void
 toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
     struct sockaddr *sa __unused, uint8_t *lladdr __unused,
     uint16_t vtag __unused)
 {
 
 	return;
 }
 
 static void
 toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
     struct rtentry *rt0 __unused, struct rtentry *rt1 __unused)
 {
 
 	return;
 }
 
 static void
 toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
 {
 
 	return;
 }
 
 static void
 toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
 {
 
 	return;
 }
 
 static int
 toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
     struct mbuf *m)
 {
 
 	m_freem(m);
 	return (0);
 }
 
 static void
 toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
     struct socket *so __unused)
 {
 
 	return;
 }
 
 static void
 toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
     int sopt_dir __unused, int sopt_name __unused)
 {
 
 	return;
 }
 
 /*
  * Inform one or more TOE devices about a listening socket.
  */
 static void
 toe_listen_start(struct inpcb *inp, void *arg)
 {
 	struct toedev *t, *tod;
 	struct tcpcb *tp;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
 	    ("%s: inp is not a TCP inp", __func__));
 
 	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
 		return;
 
 	tp = intotcpcb(inp);
 	if (tp->t_state != TCPS_LISTEN)
 		return;
 
 	t = arg;
 	mtx_lock(&toedev_lock);
 	TAILQ_FOREACH(tod, &toedev_list, link) {
 		if (t == NULL || t == tod)
 			tod->tod_listen_start(tod, tp);
 	}
 	mtx_unlock(&toedev_lock);
 }
 
 static void
 toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(tp->t_state == TCPS_LISTEN,
 	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
 
 	toe_listen_start(inp, NULL);
 }
 
 static void
 toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
 {
 	struct toedev *tod;
 #ifdef INVARIANTS
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(tp->t_state == TCPS_LISTEN,
 	    ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
 
 	mtx_lock(&toedev_lock);
 	TAILQ_FOREACH(tod, &toedev_list, link)
 	    tod->tod_listen_stop(tod, tp);
 	mtx_unlock(&toedev_lock);
 }
 
 /*
  * Fill up a freshly allocated toedev struct with reasonable defaults.
  */
 void
 init_toedev(struct toedev *tod)
 {
 
 	tod->tod_softc = NULL;
 
 	/*
 	 * Provide no-op defaults so that the kernel can call any toedev
 	 * function without having to check whether the TOE driver supplied one
 	 * or not.
 	 */
 	tod->tod_connect = toedev_connect;
 	tod->tod_listen_start = toedev_listen_start;
 	tod->tod_listen_stop = toedev_listen_stop;
 	tod->tod_input = toedev_input;
 	tod->tod_rcvd = toedev_rcvd;
 	tod->tod_output = toedev_output;
 	tod->tod_send_rst = toedev_output;
 	tod->tod_send_fin = toedev_output;
 	tod->tod_pcb_detach = toedev_pcb_detach;
 	tod->tod_l2_update = toedev_l2_update;
 	tod->tod_route_redirect = toedev_route_redirect;
 	tod->tod_syncache_added = toedev_syncache_added;
 	tod->tod_syncache_removed = toedev_syncache_removed;
 	tod->tod_syncache_respond = toedev_syncache_respond;
 	tod->tod_offload_socket = toedev_offload_socket;
 	tod->tod_ctloutput = toedev_ctloutput;
 }
 
 /*
  * Register an active TOE device with the system.  This allows it to receive
  * notifications from the kernel.
  */
 int
 register_toedev(struct toedev *tod)
 {
 	struct toedev *t;
 
 	mtx_lock(&toedev_lock);
 	TAILQ_FOREACH(t, &toedev_list, link) {
 		if (t == tod) {
 			mtx_unlock(&toedev_lock);
 			return (EEXIST);
 		}
 	}
 
 	TAILQ_INSERT_TAIL(&toedev_list, tod, link);
 	registered_toedevs++;
 	mtx_unlock(&toedev_lock);
 
 	inp_apply_all(toe_listen_start, tod);
 
 	return (0);
 }
 
 /*
  * Remove the TOE device from the global list of active TOE devices.  It is the
  * caller's responsibility to ensure that the TOE device is quiesced prior to
  * this call.
  */
 int
 unregister_toedev(struct toedev *tod)
 {
 	struct toedev *t, *t2;
 	int rc = ENODEV;
 
 	mtx_lock(&toedev_lock);
 	TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
 		if (t == tod) {
 			TAILQ_REMOVE(&toedev_list, tod, link);
 			registered_toedevs--;
 			rc = 0;
 			break;
 		}
 	}
 	KASSERT(registered_toedevs >= 0,
 	    ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
 	mtx_unlock(&toedev_lock);
 	return (rc);
 }
 
 void
 toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct inpcb *inp, void *tod, void *todctx)
 {
 	struct socket *lso = inp->inp_socket;
 
 	INP_WLOCK_ASSERT(inp);
 
 	syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
 }
 
 int
 toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
     struct tcphdr *th, struct socket **lsop)
 {
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 	return (syncache_expand(inc, to, th, lsop, NULL));
 }
 
 /*
  * General purpose check to see if a 4-tuple is in use by the kernel.  If a TCP
  * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
  * in TIME_WAIT may be assassinated freeing it up for re-use.
  *
  * Note that the TCP header must have been run through tcp_fields_to_host() or
  * equivalent.
  */
 int
 toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
 {
 	struct inpcb *inp;
 
 	if (inc->inc_flags & INC_ISIPV6) {
 		inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr,
 		    inc->inc_fport, &inc->inc6_laddr, inc->inc_lport,
 		    INPLOOKUP_WLOCKPCB, ifp);
 	} else {
 		inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
 		    inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
 	}
 	if (inp != NULL) {
 		INP_WLOCK_ASSERT(inp);
 
 		if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
 
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
 			if (!tcp_twcheck(inp, NULL, th, NULL, 0))
 				return (EADDRINUSE);
 		} else {
 			INP_WUNLOCK(inp);
 			return (EADDRINUSE);
 		}
 	}
 
 	return (0);
 }
 
 static void
 toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
 {
 	struct toedev *tod;
 	struct ifnet *ifp;
 	struct sockaddr *sa;
 	uint8_t *lladdr;
 	uint16_t vtag;
 	int family;
 	struct sockaddr_in6 sin6;
 
 	LLE_WLOCK_ASSERT(lle);
 
 	ifp = lltable_get_ifp(lle->lle_tbl);
 	family = lltable_get_af(lle->lle_tbl);
 
 	if (family != AF_INET && family != AF_INET6)
 		return;
 	/*
 	 * Not interested if the interface's TOE capability is not enabled.
 	 */
 	if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
 	    (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
 		return;
 
 	tod = TOEDEV(ifp);
 	if (tod == NULL)
 		return;
 
 	sa = (struct sockaddr *)&sin6;
 	lltable_fill_sa_entry(lle, sa);
 
 	vtag = 0xfff;
 	if (evt != LLENTRY_RESOLVED) {
 
 		/*
 		 * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
 		 * this entry is going to be deleted.
 		 */
 
 		lladdr = NULL;
 	} else {
 
 		KASSERT(lle->la_flags & LLE_VALID,
 		    ("%s: %p resolved but not valid?", __func__, lle));
 
 		lladdr = (uint8_t *)lle->ll_addr;
 #ifdef VLAN_TAG
 		VLAN_TAG(ifp, &vtag);
 #endif
 	}
 
 	tod->tod_l2_update(tod, ifp, sa, lladdr, vtag);
 }
 
 /*
- * XXX: implement.
- */
-static void
-toe_route_redirect_event(void *arg __unused, struct rtentry *rt0,
-    struct rtentry *rt1, struct sockaddr *sa)
-{
-
-	return;
-}
-
-/*
  * Returns 0 or EWOULDBLOCK on success (any other value is an error).  0 means
  * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
  * tod_l2_update will be called later, when the entry is resolved or times out.
  */
 int
 toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
     uint8_t *lladdr, uint16_t *vtag)
 {
 	int rc;
 
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 		rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		rc = nd6_resolve(ifp, 0, NULL, sa, lladdr, NULL);
 		break;
 #endif
 	default:
 		return (EPROTONOSUPPORT);
 	}
 
 	if (rc == 0) {
 #ifdef VLAN_TAG
 		if (VLAN_TAG(ifp, vtag) != 0)
 #endif
 			*vtag = 0xfff;
 	}
 
 	return (rc);
 }
 
 void
 toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err)
 {
 
 	INP_WLOCK_ASSERT(inp);
 
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		struct tcpcb *tp = intotcpcb(inp);
 
 		KASSERT(tp->t_flags & TF_TOE,
 		    ("%s: tp %p not offloaded.", __func__, tp));
 
 		if (err == EAGAIN) {
 
 			/*
 			 * Temporary failure during offload, take this PCB back.
 			 * Detach from the TOE driver and do the rest of what
 			 * TCP's pru_connect would have done if the connection
 			 * wasn't offloaded.
 			 */
 
 			tod->tod_pcb_detach(tod, tp);
 			KASSERT(!(tp->t_flags & TF_TOE),
 			    ("%s: tp %p still offloaded.", __func__, tp));
 			tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 			(void) tp->t_fb->tfb_tcp_output(tp);
 		} else {
 
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			tp = tcp_drop(tp, err);
 			if (tp == NULL)
 				INP_WLOCK(inp);	/* re-acquire */
 		}
 	}
 	INP_WLOCK_ASSERT(inp);
 }
 
 static int
 toecore_load(void)
 {
 
 	mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
 	TAILQ_INIT(&toedev_list);
 
 	listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
 	    toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
 	listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
 	    toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
 	lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
 	    EVENTHANDLER_PRI_ANY);
-	route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event,
-	    toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY);
 
 	return (0);
 }
 
 static int
 toecore_unload(void)
 {
 
 	mtx_lock(&toedev_lock);
 	if (!TAILQ_EMPTY(&toedev_list)) {
 		mtx_unlock(&toedev_lock);
 		return (EBUSY);
 	}
 
 	EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
 	EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
 	EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
-	EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh);
 
 	mtx_unlock(&toedev_lock);
 	mtx_destroy(&toedev_lock);
 
 	return (0);
 }
 
 static int
 toecore_mod_handler(module_t mod, int cmd, void *arg)
 {
 
 	if (cmd == MOD_LOAD)
 		return (toecore_load());
 
 	if (cmd == MOD_UNLOAD)
 		return (toecore_unload());
 
 	return (EOPNOTSUPP);
 }
 
 static moduledata_t mod_data= {
 	"toecore",
 	toecore_mod_handler,
 	0
 };
 
 MODULE_VERSION(toecore, 1);
 DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
Index: projects/clang380-import/sys/netinet6/frag6.c
===================================================================
--- projects/clang380-import/sys/netinet6/frag6.c	(revision 293686)
+++ projects/clang380-import/sys/netinet6/frag6.c	(revision 293687)
@@ -1,819 +1,820 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: frag6.c,v 1.33 2002/01/07 11:34:48 kjc Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
+#include <sys/eventhandler.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/icmp6.h>
 #include <netinet/in_systm.h>	/* for ECN definitions */
 #include <netinet/ip.h>		/* for ECN definitions */
 
 #include <security/mac/mac_framework.h>
 
 static void frag6_enq(struct ip6asfrag *, struct ip6asfrag *);
 static void frag6_deq(struct ip6asfrag *);
 static void frag6_insque(struct ip6q *, struct ip6q *);
 static void frag6_remque(struct ip6q *);
 static void frag6_freef(struct ip6q *);
 
 static struct mtx ip6qlock;
 /*
  * These fields all protected by ip6qlock.
  */
 static VNET_DEFINE(u_int, frag6_nfragpackets);
 static VNET_DEFINE(u_int, frag6_nfrags);
 static VNET_DEFINE(struct ip6q, ip6q);	/* ip6 reassemble queue */
 
 #define	V_frag6_nfragpackets		VNET(frag6_nfragpackets)
 #define	V_frag6_nfrags			VNET(frag6_nfrags)
 #define	V_ip6q				VNET(ip6q)
 
 #define	IP6Q_LOCK_INIT()	mtx_init(&ip6qlock, "ip6qlock", NULL, MTX_DEF);
 #define	IP6Q_LOCK()		mtx_lock(&ip6qlock)
 #define	IP6Q_TRYLOCK()		mtx_trylock(&ip6qlock)
 #define	IP6Q_LOCK_ASSERT()	mtx_assert(&ip6qlock, MA_OWNED)
 #define	IP6Q_UNLOCK()		mtx_unlock(&ip6qlock)
 
 static MALLOC_DEFINE(M_FTABLE, "fragment", "fragment reassembly header");
 
 /*
  * Initialise reassembly queue and fragment identifier.
  */
 static void
 frag6_change(void *tag)
 {
 
 	V_ip6_maxfragpackets = nmbclusters / 4;
 	V_ip6_maxfrags = nmbclusters / 4;
 }
 
 void
 frag6_init(void)
 {
 
 	V_ip6_maxfragpackets = nmbclusters / 4;
 	V_ip6_maxfrags = nmbclusters / 4;
 	V_ip6q.ip6q_next = V_ip6q.ip6q_prev = &V_ip6q;
 
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	EVENTHANDLER_REGISTER(nmbclusters_change,
 	    frag6_change, NULL, EVENTHANDLER_PRI_ANY);
 
 	IP6Q_LOCK_INIT();
 }
 
 /*
  * In RFC2460, fragment and reassembly rule do not agree with each other,
  * in terms of next header field handling in fragment header.
  * While the sender will use the same value for all of the fragmented packets,
  * receiver is suggested not to check the consistency.
  *
  * fragment rule (p20):
  *	(2) A Fragment header containing:
  *	The Next Header value that identifies the first header of
  *	the Fragmentable Part of the original packet.
  *		-> next header field is same for all fragments
  *
  * reassembly rule (p21):
  *	The Next Header field of the last header of the Unfragmentable
  *	Part is obtained from the Next Header field of the first
  *	fragment's Fragment header.
  *		-> should grab it from the first fragment only
  *
  * The following note also contradicts with fragment rule - noone is going to
  * send different fragment with different next header field.
  *
  * additional note (p22):
  *	The Next Header values in the Fragment headers of different
  *	fragments of the same original packet may differ.  Only the value
  *	from the Offset zero fragment packet is used for reassembly.
  *		-> should grab it from the first fragment only
  *
  * There is no explicit reason given in the RFC.  Historical reason maybe?
  */
 /*
  * Fragment input
  */
 int
 frag6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp, *t;
 	struct ip6_hdr *ip6;
 	struct ip6_frag *ip6f;
 	struct ip6q *q6;
 	struct ip6asfrag *af6, *ip6af, *af6dwn;
 	struct in6_ifaddr *ia;
 	int offset = *offp, nxt, i, next;
 	int first_frag = 0;
 	int fragoff, frgpartlen;	/* must be larger than u_int16_t */
 	struct ifnet *dstifp;
 	u_int8_t ecn, ecn0;
 #ifdef RSS
 	struct m_tag *mtag;
 	struct ip6_direct_ctx *ip6dc;
 #endif
 
 #if 0
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, offset, sizeof(struct ip6_frag), IPPROTO_DONE);
 	ip6f = (struct ip6_frag *)((caddr_t)ip6 + offset);
 #else
 	IP6_EXTHDR_GET(ip6f, struct ip6_frag *, m, offset, sizeof(*ip6f));
 	if (ip6f == NULL)
 		return (IPPROTO_DONE);
 #endif
 
 	dstifp = NULL;
 	/* find the destination interface of the packet. */
 	ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 	if (ia != NULL) {
 		dstifp = ia->ia_ifp;
 		ifa_free(&ia->ia_ifa);
 	}
 	/* jumbo payload can't contain a fragment header */
 	if (ip6->ip6_plen == 0) {
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, offset);
 		in6_ifstat_inc(dstifp, ifs6_reass_fail);
 		return IPPROTO_DONE;
 	}
 
 	/*
 	 * check whether fragment packet's fragment length is
 	 * multiple of 8 octets.
 	 * sizeof(struct ip6_frag) == 8
 	 * sizeof(struct ip6_hdr) = 40
 	 */
 	if ((ip6f->ip6f_offlg & IP6F_MORE_FRAG) &&
 	    (((ntohs(ip6->ip6_plen) - offset) & 0x7) != 0)) {
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
 		    offsetof(struct ip6_hdr, ip6_plen));
 		in6_ifstat_inc(dstifp, ifs6_reass_fail);
 		return IPPROTO_DONE;
 	}
 
 	IP6STAT_INC(ip6s_fragments);
 	in6_ifstat_inc(dstifp, ifs6_reass_reqd);
 
 	/* offset now points to data portion */
 	offset += sizeof(struct ip6_frag);
 
 	/*
 	 * RFC 6946: Handle "atomic" fragments (offset and m bit set to 0)
 	 * upfront, unrelated to any reassembly.  Just skip the fragment header.
 	 */
 	if ((ip6f->ip6f_offlg & ~IP6F_RESERVED_MASK) == 0) {
 		/* XXX-BZ we want dedicated counters for this. */
 		IP6STAT_INC(ip6s_reassembled);
 		in6_ifstat_inc(dstifp, ifs6_reass_ok);
 		*offp = offset;
 		return (ip6f->ip6f_nxt);
 	}
 
 	IP6Q_LOCK();
 
 	/*
 	 * Enforce upper bound on number of fragments.
 	 * If maxfrag is 0, never accept fragments.
 	 * If maxfrag is -1, accept all fragments without limitation.
 	 */
 	if (V_ip6_maxfrags < 0)
 		;
 	else if (V_frag6_nfrags >= (u_int)V_ip6_maxfrags)
 		goto dropfrag;
 
 	for (q6 = V_ip6q.ip6q_next; q6 != &V_ip6q; q6 = q6->ip6q_next)
 		if (ip6f->ip6f_ident == q6->ip6q_ident &&
 		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &q6->ip6q_src) &&
 		    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &q6->ip6q_dst)
 #ifdef MAC
 		    && mac_ip6q_match(m, q6)
 #endif
 		    )
 			break;
 
 	if (q6 == &V_ip6q) {
 		/*
 		 * the first fragment to arrive, create a reassembly queue.
 		 */
 		first_frag = 1;
 
 		/*
 		 * Enforce upper bound on number of fragmented packets
 		 * for which we attempt reassembly;
 		 * If maxfragpackets is 0, never accept fragments.
 		 * If maxfragpackets is -1, accept all fragments without
 		 * limitation.
 		 */
 		if (V_ip6_maxfragpackets < 0)
 			;
 		else if (V_frag6_nfragpackets >= (u_int)V_ip6_maxfragpackets)
 			goto dropfrag;
 		V_frag6_nfragpackets++;
 		q6 = (struct ip6q *)malloc(sizeof(struct ip6q), M_FTABLE,
 		    M_NOWAIT);
 		if (q6 == NULL)
 			goto dropfrag;
 		bzero(q6, sizeof(*q6));
 #ifdef MAC
 		if (mac_ip6q_init(q6, M_NOWAIT) != 0) {
 			free(q6, M_FTABLE);
 			goto dropfrag;
 		}
 		mac_ip6q_create(m, q6);
 #endif
 		frag6_insque(q6, &V_ip6q);
 
 		/* ip6q_nxt will be filled afterwards, from 1st fragment */
 		q6->ip6q_down	= q6->ip6q_up = (struct ip6asfrag *)q6;
 #ifdef notyet
 		q6->ip6q_nxtp	= (u_char *)nxtp;
 #endif
 		q6->ip6q_ident	= ip6f->ip6f_ident;
 		q6->ip6q_ttl	= IPV6_FRAGTTL;
 		q6->ip6q_src	= ip6->ip6_src;
 		q6->ip6q_dst	= ip6->ip6_dst;
 		q6->ip6q_ecn	=
 		    (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
 		q6->ip6q_unfrglen = -1;	/* The 1st fragment has not arrived. */
 
 		q6->ip6q_nfrag = 0;
 	}
 
 	/*
 	 * If it's the 1st fragment, record the length of the
 	 * unfragmentable part and the next header of the fragment header.
 	 */
 	fragoff = ntohs(ip6f->ip6f_offlg & IP6F_OFF_MASK);
 	if (fragoff == 0) {
 		q6->ip6q_unfrglen = offset - sizeof(struct ip6_hdr) -
 		    sizeof(struct ip6_frag);
 		q6->ip6q_nxt = ip6f->ip6f_nxt;
 	}
 
 	/*
 	 * Check that the reassembled packet would not exceed 65535 bytes
 	 * in size.
 	 * If it would exceed, discard the fragment and return an ICMP error.
 	 */
 	frgpartlen = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - offset;
 	if (q6->ip6q_unfrglen >= 0) {
 		/* The 1st fragment has already arrived. */
 		if (q6->ip6q_unfrglen + fragoff + frgpartlen > IPV6_MAXPACKET) {
 			icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
 			    offset - sizeof(struct ip6_frag) +
 			    offsetof(struct ip6_frag, ip6f_offlg));
 			IP6Q_UNLOCK();
 			return (IPPROTO_DONE);
 		}
 	} else if (fragoff + frgpartlen > IPV6_MAXPACKET) {
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER,
 		    offset - sizeof(struct ip6_frag) +
 		    offsetof(struct ip6_frag, ip6f_offlg));
 		IP6Q_UNLOCK();
 		return (IPPROTO_DONE);
 	}
 	/*
 	 * If it's the first fragment, do the above check for each
 	 * fragment already stored in the reassembly queue.
 	 */
 	if (fragoff == 0) {
 		for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
 		     af6 = af6dwn) {
 			af6dwn = af6->ip6af_down;
 
 			if (q6->ip6q_unfrglen + af6->ip6af_off + af6->ip6af_frglen >
 			    IPV6_MAXPACKET) {
 				struct mbuf *merr = IP6_REASS_MBUF(af6);
 				struct ip6_hdr *ip6err;
 				int erroff = af6->ip6af_offset;
 
 				/* dequeue the fragment. */
 				frag6_deq(af6);
 				free(af6, M_FTABLE);
 
 				/* adjust pointer. */
 				ip6err = mtod(merr, struct ip6_hdr *);
 
 				/*
 				 * Restore source and destination addresses
 				 * in the erroneous IPv6 header.
 				 */
 				ip6err->ip6_src = q6->ip6q_src;
 				ip6err->ip6_dst = q6->ip6q_dst;
 
 				icmp6_error(merr, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    erroff - sizeof(struct ip6_frag) +
 				    offsetof(struct ip6_frag, ip6f_offlg));
 			}
 		}
 	}
 
 	ip6af = (struct ip6asfrag *)malloc(sizeof(struct ip6asfrag), M_FTABLE,
 	    M_NOWAIT);
 	if (ip6af == NULL)
 		goto dropfrag;
 	bzero(ip6af, sizeof(*ip6af));
 	ip6af->ip6af_mff = ip6f->ip6f_offlg & IP6F_MORE_FRAG;
 	ip6af->ip6af_off = fragoff;
 	ip6af->ip6af_frglen = frgpartlen;
 	ip6af->ip6af_offset = offset;
 	IP6_REASS_MBUF(ip6af) = m;
 
 	if (first_frag) {
 		af6 = (struct ip6asfrag *)q6;
 		goto insert;
 	}
 
 	/*
 	 * Handle ECN by comparing this segment with the first one;
 	 * if CE is set, do not lose CE.
 	 * drop if CE and not-ECT are mixed for the same packet.
 	 */
 	ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
 	ecn0 = q6->ip6q_ecn;
 	if (ecn == IPTOS_ECN_CE) {
 		if (ecn0 == IPTOS_ECN_NOTECT) {
 			free(ip6af, M_FTABLE);
 			goto dropfrag;
 		}
 		if (ecn0 != IPTOS_ECN_CE)
 			q6->ip6q_ecn = IPTOS_ECN_CE;
 	}
 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
 		free(ip6af, M_FTABLE);
 		goto dropfrag;
 	}
 
 	/*
 	 * Find a segment which begins after this one does.
 	 */
 	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
 	     af6 = af6->ip6af_down)
 		if (af6->ip6af_off > ip6af->ip6af_off)
 			break;
 
 #if 0
 	/*
 	 * If there is a preceding segment, it may provide some of
 	 * our data already.  If so, drop the data from the incoming
 	 * segment.  If it provides all of our data, drop us.
 	 */
 	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
 		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
 			- ip6af->ip6af_off;
 		if (i > 0) {
 			if (i >= ip6af->ip6af_frglen)
 				goto dropfrag;
 			m_adj(IP6_REASS_MBUF(ip6af), i);
 			ip6af->ip6af_off += i;
 			ip6af->ip6af_frglen -= i;
 		}
 	}
 
 	/*
 	 * While we overlap succeeding segments trim them or,
 	 * if they are completely covered, dequeue them.
 	 */
 	while (af6 != (struct ip6asfrag *)q6 &&
 	       ip6af->ip6af_off + ip6af->ip6af_frglen > af6->ip6af_off) {
 		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
 		if (i < af6->ip6af_frglen) {
 			af6->ip6af_frglen -= i;
 			af6->ip6af_off += i;
 			m_adj(IP6_REASS_MBUF(af6), i);
 			break;
 		}
 		af6 = af6->ip6af_down;
 		m_freem(IP6_REASS_MBUF(af6->ip6af_up));
 		frag6_deq(af6->ip6af_up);
 	}
 #else
 	/*
 	 * If the incoming framgent overlaps some existing fragments in
 	 * the reassembly queue, drop it, since it is dangerous to override
 	 * existing fragments from a security point of view.
 	 * We don't know which fragment is the bad guy - here we trust
 	 * fragment that came in earlier, with no real reason.
 	 *
 	 * Note: due to changes after disabling this part, mbuf passed to
 	 * m_adj() below now does not meet the requirement.
 	 */
 	if (af6->ip6af_up != (struct ip6asfrag *)q6) {
 		i = af6->ip6af_up->ip6af_off + af6->ip6af_up->ip6af_frglen
 			- ip6af->ip6af_off;
 		if (i > 0) {
 #if 0				/* suppress the noisy log */
 			log(LOG_ERR, "%d bytes of a fragment from %s "
 			    "overlaps the previous fragment\n",
 			    i, ip6_sprintf(ip6buf, &q6->ip6q_src));
 #endif
 			free(ip6af, M_FTABLE);
 			goto dropfrag;
 		}
 	}
 	if (af6 != (struct ip6asfrag *)q6) {
 		i = (ip6af->ip6af_off + ip6af->ip6af_frglen) - af6->ip6af_off;
 		if (i > 0) {
 #if 0				/* suppress the noisy log */
 			log(LOG_ERR, "%d bytes of a fragment from %s "
 			    "overlaps the succeeding fragment",
 			    i, ip6_sprintf(ip6buf, &q6->ip6q_src));
 #endif
 			free(ip6af, M_FTABLE);
 			goto dropfrag;
 		}
 	}
 #endif
 
 insert:
 #ifdef MAC
 	if (!first_frag)
 		mac_ip6q_update(m, q6);
 #endif
 
 	/*
 	 * Stick new segment in its place;
 	 * check for complete reassembly.
 	 * Move to front of packet queue, as we are
 	 * the most recently active fragmented packet.
 	 */
 	frag6_enq(ip6af, af6->ip6af_up);
 	V_frag6_nfrags++;
 	q6->ip6q_nfrag++;
 #if 0 /* xxx */
 	if (q6 != V_ip6q.ip6q_next) {
 		frag6_remque(q6);
 		frag6_insque(q6, &V_ip6q);
 	}
 #endif
 	next = 0;
 	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
 	     af6 = af6->ip6af_down) {
 		if (af6->ip6af_off != next) {
 			IP6Q_UNLOCK();
 			return IPPROTO_DONE;
 		}
 		next += af6->ip6af_frglen;
 	}
 	if (af6->ip6af_up->ip6af_mff) {
 		IP6Q_UNLOCK();
 		return IPPROTO_DONE;
 	}
 
 	/*
 	 * Reassembly is complete; concatenate fragments.
 	 */
 	ip6af = q6->ip6q_down;
 	t = m = IP6_REASS_MBUF(ip6af);
 	af6 = ip6af->ip6af_down;
 	frag6_deq(ip6af);
 	while (af6 != (struct ip6asfrag *)q6) {
 		af6dwn = af6->ip6af_down;
 		frag6_deq(af6);
 		while (t->m_next)
 			t = t->m_next;
 		m_adj(IP6_REASS_MBUF(af6), af6->ip6af_offset);
 		m_cat(t, IP6_REASS_MBUF(af6));
 		free(af6, M_FTABLE);
 		af6 = af6dwn;
 	}
 
 	/* adjust offset to point where the original next header starts */
 	offset = ip6af->ip6af_offset - sizeof(struct ip6_frag);
 	free(ip6af, M_FTABLE);
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_plen = htons((u_short)next + offset - sizeof(struct ip6_hdr));
 	if (q6->ip6q_ecn == IPTOS_ECN_CE)
 		ip6->ip6_flow |= htonl(IPTOS_ECN_CE << 20);
 	nxt = q6->ip6q_nxt;
 #ifdef notyet
 	*q6->ip6q_nxtp = (u_char)(nxt & 0xff);
 #endif
 
 	if (ip6_deletefraghdr(m, offset, M_NOWAIT) != 0) {
 		frag6_remque(q6);
 		V_frag6_nfrags -= q6->ip6q_nfrag;
 #ifdef MAC
 		mac_ip6q_destroy(q6);
 #endif
 		free(q6, M_FTABLE);
 		V_frag6_nfragpackets--;
 
 		goto dropfrag;
 	}
 
 	/*
 	 * Store NXT to the original.
 	 */
 	{
 		char *prvnxtp = ip6_get_prevhdr(m, offset); /* XXX */
 		*prvnxtp = nxt;
 	}
 
 	frag6_remque(q6);
 	V_frag6_nfrags -= q6->ip6q_nfrag;
 #ifdef MAC
 	mac_ip6q_reassemble(q6, m);
 	mac_ip6q_destroy(q6);
 #endif
 	free(q6, M_FTABLE);
 	V_frag6_nfragpackets--;
 
 	if (m->m_flags & M_PKTHDR) { /* Isn't it always true? */
 		int plen = 0;
 		for (t = m; t; t = t->m_next)
 			plen += t->m_len;
 		m->m_pkthdr.len = plen;
 	}
 
 #ifdef RSS
 	mtag = m_tag_alloc(MTAG_ABI_IPV6, IPV6_TAG_DIRECT, sizeof(*ip6dc),
 	    M_NOWAIT);
 	if (mtag == NULL)
 		goto dropfrag;
 
 	ip6dc = (struct ip6_direct_ctx *)(mtag + 1);
 	ip6dc->ip6dc_nxt = nxt;
 	ip6dc->ip6dc_off = offset;
 
 	m_tag_prepend(m, mtag);
 #endif
 
 	IP6Q_UNLOCK();
 	IP6STAT_INC(ip6s_reassembled);
 	in6_ifstat_inc(dstifp, ifs6_reass_ok);
 
 #ifdef RSS
 	/*
 	 * Queue/dispatch for reprocessing.
 	 */
 	netisr_dispatch(NETISR_IPV6_DIRECT, m);
 	return IPPROTO_DONE;
 #endif
 
 	/*
 	 * Tell launch routine the next header
 	 */
 
 	*mp = m;
 	*offp = offset;
 
 	return nxt;
 
  dropfrag:
 	IP6Q_UNLOCK();
 	in6_ifstat_inc(dstifp, ifs6_reass_fail);
 	IP6STAT_INC(ip6s_fragdropped);
 	m_freem(m);
 	return IPPROTO_DONE;
 }
 
 /*
  * Free a fragment reassembly header and all
  * associated datagrams.
  */
 void
 frag6_freef(struct ip6q *q6)
 {
 	struct ip6asfrag *af6, *down6;
 
 	IP6Q_LOCK_ASSERT();
 
 	for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
 	     af6 = down6) {
 		struct mbuf *m = IP6_REASS_MBUF(af6);
 
 		down6 = af6->ip6af_down;
 		frag6_deq(af6);
 
 		/*
 		 * Return ICMP time exceeded error for the 1st fragment.
 		 * Just free other fragments.
 		 */
 		if (af6->ip6af_off == 0) {
 			struct ip6_hdr *ip6;
 
 			/* adjust pointer */
 			ip6 = mtod(m, struct ip6_hdr *);
 
 			/* restore source and destination addresses */
 			ip6->ip6_src = q6->ip6q_src;
 			ip6->ip6_dst = q6->ip6q_dst;
 
 			icmp6_error(m, ICMP6_TIME_EXCEEDED,
 				    ICMP6_TIME_EXCEED_REASSEMBLY, 0);
 		} else
 			m_freem(m);
 		free(af6, M_FTABLE);
 	}
 	frag6_remque(q6);
 	V_frag6_nfrags -= q6->ip6q_nfrag;
 #ifdef MAC
 	mac_ip6q_destroy(q6);
 #endif
 	free(q6, M_FTABLE);
 	V_frag6_nfragpackets--;
 }
 
 /*
  * Put an ip fragment on a reassembly chain.
  * Like insque, but pointers in middle of structure.
  */
 void
 frag6_enq(struct ip6asfrag *af6, struct ip6asfrag *up6)
 {
 
 	IP6Q_LOCK_ASSERT();
 
 	af6->ip6af_up = up6;
 	af6->ip6af_down = up6->ip6af_down;
 	up6->ip6af_down->ip6af_up = af6;
 	up6->ip6af_down = af6;
 }
 
 /*
  * To frag6_enq as remque is to insque.
  */
 void
 frag6_deq(struct ip6asfrag *af6)
 {
 
 	IP6Q_LOCK_ASSERT();
 
 	af6->ip6af_up->ip6af_down = af6->ip6af_down;
 	af6->ip6af_down->ip6af_up = af6->ip6af_up;
 }
 
 void
 frag6_insque(struct ip6q *new, struct ip6q *old)
 {
 
 	IP6Q_LOCK_ASSERT();
 
 	new->ip6q_prev = old;
 	new->ip6q_next = old->ip6q_next;
 	old->ip6q_next->ip6q_prev= new;
 	old->ip6q_next = new;
 }
 
 void
 frag6_remque(struct ip6q *p6)
 {
 
 	IP6Q_LOCK_ASSERT();
 
 	p6->ip6q_prev->ip6q_next = p6->ip6q_next;
 	p6->ip6q_next->ip6q_prev = p6->ip6q_prev;
 }
 
 /*
  * IPv6 reassembling timer processing;
  * if a timer expires on a reassembly
  * queue, discard it.
  */
 void
 frag6_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 	struct ip6q *q6;
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	IP6Q_LOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		q6 = V_ip6q.ip6q_next;
 		if (q6)
 			while (q6 != &V_ip6q) {
 				--q6->ip6q_ttl;
 				q6 = q6->ip6q_next;
 				if (q6->ip6q_prev->ip6q_ttl == 0) {
 					IP6STAT_INC(ip6s_fragtimeout);
 					/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
 					frag6_freef(q6->ip6q_prev);
 				}
 			}
 		/*
 		 * If we are over the maximum number of fragments
 		 * (due to the limit being lowered), drain off
 		 * enough to get down to the new limit.
 		 */
 		while (V_frag6_nfragpackets > (u_int)V_ip6_maxfragpackets &&
 		    V_ip6q.ip6q_prev) {
 			IP6STAT_INC(ip6s_fragoverflow);
 			/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
 			frag6_freef(V_ip6q.ip6q_prev);
 		}
 		CURVNET_RESTORE();
 	}
 	IP6Q_UNLOCK();
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Drain off all datagram fragments.
  */
 void
 frag6_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	if (IP6Q_TRYLOCK() == 0) {
 		VNET_LIST_RUNLOCK_NOSLEEP();
 		return;
 	}
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		while (V_ip6q.ip6q_next != &V_ip6q) {
 			IP6STAT_INC(ip6s_fragdropped);
 			/* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
 			frag6_freef(V_ip6q.ip6q_next);
 		}
 		CURVNET_RESTORE();
 	}
 	IP6Q_UNLOCK();
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 int
 ip6_deletefraghdr(struct mbuf *m, int offset, int wait)
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct mbuf *t;
 
 	/* Delete frag6 header. */
 	if (m->m_len >= offset + sizeof(struct ip6_frag)) {
 		/* This is the only possible case with !PULLDOWN_TEST. */
 		bcopy(ip6, (char *)ip6 + sizeof(struct ip6_frag),
 		    offset);
 		m->m_data += sizeof(struct ip6_frag);
 		m->m_len -= sizeof(struct ip6_frag);
 	} else {
 		/* This comes with no copy if the boundary is on cluster. */
 		if ((t = m_split(m, offset, wait)) == NULL)
 			return (ENOMEM);
 		m_adj(t, sizeof(struct ip6_frag));
 		m_cat(m, t);
 	}
 
 	return (0);
 }
Index: projects/clang380-import/sys/netinet6/icmp6.c
===================================================================
--- projects/clang380-import/sys/netinet6/icmp6.c	(revision 293686)
+++ projects/clang380-import/sys/netinet6/icmp6.c	(revision 293687)
@@ -1,2869 +1,2870 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define	MBUF_PRIVATE	/* XXXRW: Optimisation tries to avoid M_EXT mbufs */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_llatbl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet/tcp_var.h>
 
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/send.h>
 
 extern struct domain inet6domain;
 
 VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat);
 VNET_PCPUSTAT_SYSINIT(icmp6stat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(icmp6stat);
 #endif /* VIMAGE */
 
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 VNET_DECLARE(struct inpcbhead, ripcb);
 VNET_DECLARE(int, icmp6errppslim);
 static VNET_DEFINE(int, icmp6errpps_count) = 0;
 static VNET_DEFINE(struct timeval, icmp6errppslim_last);
 VNET_DECLARE(int, icmp6_nodeinfo);
 
 #define	V_ripcbinfo			VNET(ripcbinfo)
 #define	V_ripcb				VNET(ripcb)
 #define	V_icmp6errppslim		VNET(icmp6errppslim)
 #define	V_icmp6errpps_count		VNET(icmp6errpps_count)
 #define	V_icmp6errppslim_last		VNET(icmp6errppslim_last)
 #define	V_icmp6_nodeinfo		VNET(icmp6_nodeinfo)
 
 static void icmp6_errcount(int, int);
 static int icmp6_rip6_input(struct mbuf **, int);
 static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
 static const char *icmp6_redirect_diag(struct in6_addr *,
 	struct in6_addr *, struct in6_addr *);
 static struct mbuf *ni6_input(struct mbuf *, int);
 static struct mbuf *ni6_nametodns(const char *, int, int);
 static int ni6_dnsmatch(const char *, int, const char *, int);
 static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *,
 			  struct ifnet **, struct in6_addr *);
 static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
 				struct ifnet *, int);
 static int icmp6_notify_error(struct mbuf **, int, int, int);
 
 /*
  * Kernel module interface for updating icmp6stat.  The argument is an index
  * into icmp6stat treated as an array of u_quad_t.  While this encodes the
  * general layout of icmp6stat into the caller, it doesn't encode its
  * location, so that future changes to add, for example, per-CPU stats
  * support won't cause binary compatibility problems for kernel modules.
  */
 void
 kmod_icmp6stat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(icmp6stat)[statnum], 1);
 }
 
 static void
 icmp6_errcount(int type, int code)
 {
 	switch (type) {
 	case ICMP6_DST_UNREACH:
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 			ICMP6STAT_INC(icp6s_odst_unreach_noroute);
 			return;
 		case ICMP6_DST_UNREACH_ADMIN:
 			ICMP6STAT_INC(icp6s_odst_unreach_admin);
 			return;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			ICMP6STAT_INC(icp6s_odst_unreach_beyondscope);
 			return;
 		case ICMP6_DST_UNREACH_ADDR:
 			ICMP6STAT_INC(icp6s_odst_unreach_addr);
 			return;
 		case ICMP6_DST_UNREACH_NOPORT:
 			ICMP6STAT_INC(icp6s_odst_unreach_noport);
 			return;
 		}
 		break;
 	case ICMP6_PACKET_TOO_BIG:
 		ICMP6STAT_INC(icp6s_opacket_too_big);
 		return;
 	case ICMP6_TIME_EXCEEDED:
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			ICMP6STAT_INC(icp6s_otime_exceed_transit);
 			return;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			ICMP6STAT_INC(icp6s_otime_exceed_reassembly);
 			return;
 		}
 		break;
 	case ICMP6_PARAM_PROB:
 		switch (code) {
 		case ICMP6_PARAMPROB_HEADER:
 			ICMP6STAT_INC(icp6s_oparamprob_header);
 			return;
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			ICMP6STAT_INC(icp6s_oparamprob_nextheader);
 			return;
 		case ICMP6_PARAMPROB_OPTION:
 			ICMP6STAT_INC(icp6s_oparamprob_option);
 			return;
 		}
 		break;
 	case ND_REDIRECT:
 		ICMP6STAT_INC(icp6s_oredirect);
 		return;
 	}
 	ICMP6STAT_INC(icp6s_ounknown);
 }
 
 /*
  * A wrapper function for icmp6_error() necessary when the erroneous packet
  * may not contain enough scope zone information.
  */
 void
 icmp6_error2(struct mbuf *m, int type, int code, int param,
     struct ifnet *ifp)
 {
 	struct ip6_hdr *ip6;
 
 	if (ifp == NULL)
 		return;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
 #else
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			return;
 	}
 #endif
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
 		return;
 	if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
 		return;
 
 	icmp6_error(m, type, code, param);
 }
 
 /*
  * Generate an error packet of type error in response to bad IP6 packet.
  */
 void
 icmp6_error(struct mbuf *m, int type, int code, int param)
 {
 	struct ip6_hdr *oip6, *nip6;
 	struct icmp6_hdr *icmp6;
 	u_int preplen;
 	int off;
 	int nxt;
 
 	ICMP6STAT_INC(icp6s_error);
 
 	/* count per-type-code statistics */
 	icmp6_errcount(type, code);
 
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m->m_flags & M_DECRYPTED) {
 		ICMP6STAT_INC(icp6s_canterror);
 		goto freeit;
 	}
 #endif
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
 #else
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			return;
 	}
 #endif
 	oip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * If the destination address of the erroneous packet is a multicast
 	 * address, or the packet was sent using link-layer multicast,
 	 * we should basically suppress sending an error (RFC 2463, Section
 	 * 2.4).
 	 * We have two exceptions (the item e.2 in that section):
 	 * - the Packet Too Big message can be sent for path MTU discovery.
 	 * - the Parameter Problem Message that can be allowed an icmp6 error
 	 *   in the option type field.  This check has been done in
 	 *   ip6_unknown_opt(), so we can just check the type and code.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST) ||
 	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
 	    (type != ICMP6_PACKET_TOO_BIG &&
 	     (type != ICMP6_PARAM_PROB ||
 	      code != ICMP6_PARAMPROB_OPTION)))
 		goto freeit;
 
 	/*
 	 * RFC 2463, 2.4 (e.5): source address check.
 	 * XXX: the case of anycast source?
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
 	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
 		goto freeit;
 
 	/*
 	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
 	 * don't do it.
 	 */
 	nxt = -1;
 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
 	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
 		struct icmp6_hdr *icp;
 
 #ifndef PULLDOWN_TEST
 		IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), );
 		icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
 			sizeof(*icp));
 		if (icp == NULL) {
 			ICMP6STAT_INC(icp6s_tooshort);
 			return;
 		}
 #endif
 		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
 		    icp->icmp6_type == ND_REDIRECT) {
 			/*
 			 * ICMPv6 error
 			 * Special case: for redirect (which is
 			 * informational) we must not send icmp6 error.
 			 */
 			ICMP6STAT_INC(icp6s_canterror);
 			goto freeit;
 		} else {
 			/* ICMPv6 informational - send the error */
 		}
 	} else {
 		/* non-ICMPv6 - send the error */
 	}
 
 	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
 
 	/* Finally, do rate limitation check. */
 	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
 		ICMP6STAT_INC(icp6s_toofreq);
 		goto freeit;
 	}
 
 	/*
 	 * OK, ICMP6 can be generated.
 	 */
 
 	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
 		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
 
 	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 	M_PREPEND(m, preplen, M_NOWAIT);	/* FIB is also copied over. */
 	if (m == NULL) {
 		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
 		return;
 	}
 
 	nip6 = mtod(m, struct ip6_hdr *);
 	nip6->ip6_src  = oip6->ip6_src;
 	nip6->ip6_dst  = oip6->ip6_dst;
 
 	in6_clearscope(&oip6->ip6_src);
 	in6_clearscope(&oip6->ip6_dst);
 
 	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
 	icmp6->icmp6_type = type;
 	icmp6->icmp6_code = code;
 	icmp6->icmp6_pptr = htonl((u_int32_t)param);
 
 	/*
 	 * icmp6_reflect() is designed to be in the input path.
 	 * icmp6_error() can be called from both input and output path,
 	 * and if we are in output path rcvif could contain bogus value.
 	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
 	 * information in ip header (nip6).
 	 */
 	m->m_pkthdr.rcvif = NULL;
 
 	ICMP6STAT_INC(icp6s_outhist[type]);
 	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
 
 	return;
 
   freeit:
 	/*
 	 * If we can't tell whether or not we can generate ICMP6, free it.
 	 */
 	m_freem(m);
 }
 
 /*
  * Process a received ICMP6 message.
  */
 int
 icmp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp, *n;
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6, *nip6;
 	struct icmp6_hdr *icmp6, *nicmp6;
 	int off = *offp;
 	int icmp6len = m->m_pkthdr.len - *offp;
 	int code, sum, noff;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 	int ip6len, error;
 
 	ifp = m->m_pkthdr.rcvif;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
 	/* m might change if M_LOOP.  So, call mtod after this */
 #endif
 
 	/*
 	 * Locate icmp6 structure in mbuf, and check
 	 * that not corrupted and of at least minimum length
 	 */
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
 	if (icmp6len < sizeof(struct icmp6_hdr)) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		goto freeit;
 	}
 
 	/*
 	 * Check multicast group membership.
 	 * Note: SSM filters are not applied for ICMPv6 traffic.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		struct in6_multi	*inm;
 
 		inm = in6m_lookup(ifp, &ip6->ip6_dst);
 		if (inm == NULL) {
 			IP6STAT_INC(ip6s_notmember);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			goto freeit;
 		}
 	}
 
 	/*
 	 * calculate the checksum
 	 */
 #ifndef PULLDOWN_TEST
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
 	if (icmp6 == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return IPPROTO_DONE;
 	}
 #endif
 	code = icmp6->icmp6_code;
 
 	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
 		nd6log((LOG_ERR,
 		    "ICMP6 checksum error(%d|%x) %s\n",
 		    icmp6->icmp6_type, sum,
 		    ip6_sprintf(ip6bufs, &ip6->ip6_src)));
 		ICMP6STAT_INC(icp6s_checksum);
 		goto freeit;
 	}
 
 	ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]);
 	icmp6_ifstat_inc(ifp, ifs6_in_msg);
 	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
 		icmp6_ifstat_inc(ifp, ifs6_in_error);
 
 	switch (icmp6->icmp6_type) {
 	case ICMP6_DST_UNREACH:
 		icmp6_ifstat_inc(ifp, ifs6_in_dstunreach);
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 			code = PRC_UNREACH_NET;
 			break;
 		case ICMP6_DST_UNREACH_ADMIN:
 			icmp6_ifstat_inc(ifp, ifs6_in_adminprohib);
 			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
 			break;
 		case ICMP6_DST_UNREACH_ADDR:
 			code = PRC_HOSTDEAD;
 			break;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			/* I mean "source address was incorrect." */
 			code = PRC_PARAMPROB;
 			break;
 		case ICMP6_DST_UNREACH_NOPORT:
 			code = PRC_UNREACH_PORT;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PACKET_TOO_BIG:
 		icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig);
 
 		/* validation is made in icmp6_mtudisc_update */
 
 		code = PRC_MSGSIZE;
 
 		/*
 		 * Updating the path MTU will be done after examining
 		 * intermediate extension headers.
 		 */
 		goto deliver;
 		break;
 
 	case ICMP6_TIME_EXCEEDED:
 		icmp6_ifstat_inc(ifp, ifs6_in_timeexceed);
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			code = PRC_TIMXCEED_INTRANS;
 			break;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			code = PRC_TIMXCEED_REASS;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PARAM_PROB:
 		icmp6_ifstat_inc(ifp, ifs6_in_paramprob);
 		switch (code) {
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			code = PRC_UNREACH_PROTOCOL;
 			break;
 		case ICMP6_PARAMPROB_HEADER:
 		case ICMP6_PARAMPROB_OPTION:
 			code = PRC_PARAMPROB;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_ECHO_REQUEST:
 		icmp6_ifstat_inc(ifp, ifs6_in_echo);
 		if (code != 0)
 			goto badcode;
 		if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
 			/* Give up remote */
 			break;
 		}
 		if (!M_WRITABLE(n)
 		 || n->m_len < off + sizeof(struct icmp6_hdr)) {
 			struct mbuf *n0 = n;
 			int n0len;
 
 			CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN);
 			n = m_gethdr(M_NOWAIT, n0->m_type);
 			if (n == NULL) {
 				/* Give up remote */
 				m_freem(n0);
 				break;
 			}
 
 			m_move_pkthdr(n, n0);	/* FIB copied. */
 			n0len = n0->m_pkthdr.len;	/* save for use below */
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			noff = sizeof(struct ip6_hdr);
 			/* new mbuf contains only ipv6+icmpv6 headers */
 			n->m_len = noff + sizeof(struct icmp6_hdr);
 			/*
 			 * Adjust mbuf.  ip6_plen will be adjusted in
 			 * ip6_output().
 			 */
 			m_adj(n0, off + sizeof(struct icmp6_hdr));
 			/* recalculate complete packet size */
 			n->m_pkthdr.len = n0len + (noff - off);
 			n->m_next = n0;
 		} else {
 			nip6 = mtod(n, struct ip6_hdr *);
 			IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
 			    sizeof(*nicmp6));
 			noff = off;
 		}
 		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
 		nicmp6->icmp6_code = 0;
 		if (n) {
 			ICMP6STAT_INC(icp6s_reflect);
 			ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]);
 			icmp6_reflect(n, noff);
 		}
 		break;
 
 	case ICMP6_ECHO_REPLY:
 		icmp6_ifstat_inc(ifp, ifs6_in_echoreply);
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case MLD_LISTENER_QUERY:
 	case MLD_LISTENER_REPORT:
 	case MLD_LISTENER_DONE:
 	case MLDV2_LISTENER_REPORT:
 		/*
 		 * Drop MLD traffic which is not link-local, has a hop limit
 		 * of greater than 1 hop, or which does not have the
 		 * IPv6 HBH Router Alert option.
 		 * As IPv6 HBH options are stripped in ip6_input() we must
 		 * check an mbuf header flag.
 		 * XXX Should we also sanity check that these messages
 		 * were directed to a link-local multicast prefix?
 		 */
 		if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0)
 			goto freeit;
 		if (mld_input(m, off, icmp6len) != 0)
 			return (IPPROTO_DONE);
 		/* m stays. */
 		break;
 
 	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
 	    {
 		enum { WRU, FQDN } mode;
 
 		if (!V_icmp6_nodeinfo)
 			break;
 
 		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
 			mode = WRU;
 		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
 			mode = FQDN;
 		else
 			goto badlen;
 
 		if (mode == FQDN) {
 #ifndef PULLDOWN_TEST
 			IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
 			    IPPROTO_DONE);
 #endif
 			n = m_copy(m, 0, M_COPYALL);
 			if (n)
 				n = ni6_input(n, off);
 			/* XXX meaningless if n == NULL */
 			noff = sizeof(struct ip6_hdr);
 		} else {
 			struct prison *pr;
 			u_char *p;
 			int maxhlen, hlen;
 
 			/*
 			 * XXX: this combination of flags is pointless,
 			 * but should we keep this for compatibility?
 			 */
 			if ((V_icmp6_nodeinfo & 5) != 5)
 				break;
 
 			if (code != 0)
 				goto badcode;
 
 			CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN);
 			n = m_gethdr(M_NOWAIT, m->m_type);
 			if (n == NULL) {
 				/* Give up remote */
 				break;
 			}
 			if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
 				/*
 				 * Previous code did a blind M_COPY_PKTHDR
 				 * and said "just for rcvif".  If true, then
 				 * we could tolerate the dup failing (due to
 				 * the deep copy of the tag chain).  For now
 				 * be conservative and just fail.
 				 */
 				m_free(n);
 				n = NULL;
 			}
 			maxhlen = M_TRAILINGSPACE(n) -
 			    (sizeof(*nip6) + sizeof(*nicmp6) + 4);
 			pr = curthread->td_ucred->cr_prison;
 			mtx_lock(&pr->pr_mtx);
 			hlen = strlen(pr->pr_hostname);
 			if (maxhlen > hlen)
 				maxhlen = hlen;
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			p = (u_char *)(nicmp6 + 1);
 			bzero(p, 4);
 			/* meaningless TTL */
 			bcopy(pr->pr_hostname, p + 4, maxhlen);
 			mtx_unlock(&pr->pr_mtx);
 			noff = sizeof(struct ip6_hdr);
 			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 				sizeof(struct icmp6_hdr) + 4 + maxhlen;
 			nicmp6->icmp6_type = ICMP6_WRUREPLY;
 			nicmp6->icmp6_code = 0;
 		}
 		if (n) {
 			ICMP6STAT_INC(icp6s_reflect);
 			ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]);
 			icmp6_reflect(n, noff);
 		}
 		break;
 	    }
 
 	case ICMP6_WRUREPLY:
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case ND_ROUTER_SOLICIT:
 		icmp6_ifstat_inc(ifp, ifs6_in_routersolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_solicit))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			/* give up local */
 
 			/* Send incoming SeND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
 				IP6_EXTHDR_CHECK(m, off,
 				    icmp6len, IPPROTO_DONE);
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				/* -1 == no app on SEND socket */
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_rs_input(m, off, icmp6len);
 			} else
 				nd6_rs_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			IP6_EXTHDR_CHECK(n, off,
 			    icmp6len, IPPROTO_DONE);
                         error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			/* -1 == no app on SEND socket */
 			nd6_rs_input(n, off, icmp6len);
 		} else
 			nd6_rs_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_ROUTER_ADVERT:
 		icmp6_ifstat_inc(ifp, ifs6_in_routeradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_advert))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 
 			/* Send incoming SeND-protected/ND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_ra_input(m, off, icmp6len);
 			} else
 				nd6_ra_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			nd6_ra_input(n, off, icmp6len);
 		} else
 			nd6_ra_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_NEIGHBOR_SOLICIT:
 		icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_ns_input(m, off, icmp6len);
 			} else
 				nd6_ns_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			nd6_ns_input(n, off, icmp6len);
 		} else
 			nd6_ns_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_NEIGHBOR_ADVERT:
 		icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_advert))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 
 			/* Send incoming SeND-protected/ND packet to user space. */
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 				if (error == 0)
 					return (IPPROTO_DONE);
 				nd6_na_input(m, off, icmp6len);
 			} else
 				nd6_na_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			nd6_na_input(n, off, icmp6len);
 		} else
 			nd6_na_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_REDIRECT:
 		icmp6_ifstat_inc(ifp, ifs6_in_redirect);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_redirect))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
 			if (send_sendso_input_hook != NULL) {
 				error = send_sendso_input_hook(m, ifp,
 				    SND_IN, ip6len);
 		 		if (error == 0)
 					return (IPPROTO_DONE);
 			    icmp6_redirect_input(m, off);
 			} else
 				icmp6_redirect_input(m, off);
 			m = NULL;
 			goto freeit;
 		}
 		if (send_sendso_input_hook != NULL) {
 			error = send_sendso_input_hook(n, ifp,
 			    SND_IN, ip6len);
 			if (error == 0)
 				goto freeit;
 			icmp6_redirect_input(n, off);
 		} else
 			icmp6_redirect_input(n, off);
 		/* m stays. */
 		break;
 
 	case ICMP6_ROUTER_RENUMBERING:
 		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
 		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
 			goto badcode;
 		if (icmp6len < sizeof(struct icmp6_router_renum))
 			goto badlen;
 		break;
 
 	default:
 		nd6log((LOG_DEBUG,
 		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
 		    icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 		    ifp ? ifp->if_index : 0));
 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
 			/* ICMPv6 error: MUST deliver it by spec... */
 			code = PRC_NCMDS;
 			/* deliver */
 		} else {
 			/* ICMPv6 informational: MUST not deliver */
 			break;
 		}
 	deliver:
 		if (icmp6_notify_error(&m, off, icmp6len, code) != 0) {
 			/* In this case, m should've been freed. */
 			return (IPPROTO_DONE);
 		}
 		break;
 
 	badcode:
 		ICMP6STAT_INC(icp6s_badcode);
 		break;
 
 	badlen:
 		ICMP6STAT_INC(icp6s_badlen);
 		break;
 	}
 
 	/* deliver the packet to appropriate sockets */
 	icmp6_rip6_input(&m, *offp);
 
 	return IPPROTO_DONE;
 
  freeit:
 	m_freem(m);
 	return IPPROTO_DONE;
 }
 
 static int
 icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
 {
 	struct mbuf *m = *mp;
 	struct icmp6_hdr *icmp6;
 	struct ip6_hdr *eip6;
 	u_int32_t notifymtu;
 	struct sockaddr_in6 icmp6src, icmp6dst;
 
 	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		goto freeit;
 	}
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off,
 	    sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1);
 	icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
 	    sizeof(*icmp6) + sizeof(struct ip6_hdr));
 	if (icmp6 == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return (-1);
 	}
 #endif
 	eip6 = (struct ip6_hdr *)(icmp6 + 1);
 
 	/* Detect the upper level protocol */
 	{
 		void (*ctlfunc)(int, struct sockaddr *, void *);
 		u_int8_t nxt = eip6->ip6_nxt;
 		int eoff = off + sizeof(struct icmp6_hdr) +
 		    sizeof(struct ip6_hdr);
 		struct ip6ctlparam ip6cp;
 		struct in6_addr *finaldst = NULL;
 		int icmp6type = icmp6->icmp6_type;
 		struct ip6_frag *fh;
 		struct ip6_rthdr *rth;
 		struct ip6_rthdr0 *rth0;
 		int rthlen;
 
 		while (1) { /* XXX: should avoid infinite loop explicitly? */
 			struct ip6_ext *eh;
 
 			switch (nxt) {
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_AH:
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0,
 				    eoff + sizeof(struct ip6_ext), -1);
 				eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff);
 #else
 				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
 				    eoff, sizeof(*eh));
 				if (eh == NULL) {
 					ICMP6STAT_INC(icp6s_tooshort);
 					return (-1);
 				}
 #endif
 
 				if (nxt == IPPROTO_AH)
 					eoff += (eh->ip6e_len + 2) << 2;
 				else
 					eoff += (eh->ip6e_len + 1) << 3;
 				nxt = eh->ip6e_nxt;
 				break;
 			case IPPROTO_ROUTING:
 				/*
 				 * When the erroneous packet contains a
 				 * routing header, we should examine the
 				 * header to determine the final destination.
 				 * Otherwise, we can't properly update
 				 * information that depends on the final
 				 * destination (e.g. path MTU).
 				 */
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1);
 				rth = (struct ip6_rthdr *)
 				    (mtod(m, caddr_t) + eoff);
 #else
 				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
 				    eoff, sizeof(*rth));
 				if (rth == NULL) {
 					ICMP6STAT_INC(icp6s_tooshort);
 					return (-1);
 				}
 #endif
 				rthlen = (rth->ip6r_len + 1) << 3;
 				/*
 				 * XXX: currently there is no
 				 * officially defined type other
 				 * than type-0.
 				 * Note that if the segment left field
 				 * is 0, all intermediate hops must
 				 * have been passed.
 				 */
 				if (rth->ip6r_segleft &&
 				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
 					int hops;
 
 #ifndef PULLDOWN_TEST
 					IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1);
 					rth0 = (struct ip6_rthdr0 *)
 					    (mtod(m, caddr_t) + eoff);
 #else
 					IP6_EXTHDR_GET(rth0,
 					    struct ip6_rthdr0 *, m,
 					    eoff, rthlen);
 					if (rth0 == NULL) {
 						ICMP6STAT_INC(icp6s_tooshort);
 						return (-1);
 					}
 #endif
 					/* just ignore a bogus header */
 					if ((rth0->ip6r0_len % 2) == 0 &&
 					    (hops = rth0->ip6r0_len/2))
 						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
 				}
 				eoff += rthlen;
 				nxt = rth->ip6r_nxt;
 				break;
 			case IPPROTO_FRAGMENT:
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff +
 				    sizeof(struct ip6_frag), -1);
 				fh = (struct ip6_frag *)(mtod(m, caddr_t) +
 				    eoff);
 #else
 				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
 				    eoff, sizeof(*fh));
 				if (fh == NULL) {
 					ICMP6STAT_INC(icp6s_tooshort);
 					return (-1);
 				}
 #endif
 				/*
 				 * Data after a fragment header is meaningless
 				 * unless it is the first fragment, but
 				 * we'll go to the notify label for path MTU
 				 * discovery.
 				 */
 				if (fh->ip6f_offlg & IP6F_OFF_MASK)
 					goto notify;
 
 				eoff += sizeof(struct ip6_frag);
 				nxt = fh->ip6f_nxt;
 				break;
 			default:
 				/*
 				 * This case includes ESP and the No Next
 				 * Header.  In such cases going to the notify
 				 * label does not have any meaning
 				 * (i.e. ctlfunc will be NULL), but we go
 				 * anyway since we might have to update
 				 * path MTU information.
 				 */
 				goto notify;
 			}
 		}
 	  notify:
 #ifndef PULLDOWN_TEST
 		icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
 		    sizeof(*icmp6) + sizeof(struct ip6_hdr));
 		if (icmp6 == NULL) {
 			ICMP6STAT_INC(icp6s_tooshort);
 			return (-1);
 		}
 #endif
 
 		/*
 		 * retrieve parameters from the inner IPv6 header, and convert
 		 * them into sockaddr structures.
 		 * XXX: there is no guarantee that the source or destination
 		 * addresses of the inner packet are in the same scope as
 		 * the addresses of the icmp packet.  But there is no other
 		 * way to determine the zone.
 		 */
 		eip6 = (struct ip6_hdr *)(icmp6 + 1);
 
 		bzero(&icmp6dst, sizeof(icmp6dst));
 		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6dst.sin6_family = AF_INET6;
 		if (finaldst == NULL)
 			icmp6dst.sin6_addr = eip6->ip6_dst;
 		else
 			icmp6dst.sin6_addr = *finaldst;
 		if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL))
 			goto freeit;
 		bzero(&icmp6src, sizeof(icmp6src));
 		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6src.sin6_family = AF_INET6;
 		icmp6src.sin6_addr = eip6->ip6_src;
 		if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL))
 			goto freeit;
 		icmp6src.sin6_flowinfo =
 		    (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
 
 		if (finaldst == NULL)
 			finaldst = &eip6->ip6_dst;
 		ip6cp.ip6c_m = m;
 		ip6cp.ip6c_icmp6 = icmp6;
 		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
 		ip6cp.ip6c_off = eoff;
 		ip6cp.ip6c_finaldst = finaldst;
 		ip6cp.ip6c_src = &icmp6src;
 		ip6cp.ip6c_nxt = nxt;
 
 		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
 			notifymtu = ntohl(icmp6->icmp6_mtu);
 			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
 			icmp6_mtudisc_update(&ip6cp, 1);	/*XXX*/
 		}
 
 		ctlfunc = (void (*)(int, struct sockaddr *, void *))
 		    (inet6sw[ip6_protox[nxt]].pr_ctlinput);
 		if (ctlfunc) {
 			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
 			    &ip6cp);
 		}
 	}
 	*mp = m;
 	return (0);
 
   freeit:
 	m_freem(m);
 	return (-1);
 }
 
 void
 icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
 {
 	struct in6_addr *dst = ip6cp->ip6c_finaldst;
 	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
 	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
 	u_int mtu = ntohl(icmp6->icmp6_mtu);
 	struct in_conninfo inc;
 
 #if 0
 	/*
 	 * RFC2460 section 5, last paragraph.
 	 * even though minimum link MTU for IPv6 is IPV6_MMTU,
 	 * we may see ICMPv6 too big with mtu < IPV6_MMTU
 	 * due to packet translator in the middle.
 	 * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for
 	 * special handling.
 	 */
 	if (mtu < IPV6_MMTU)
 		return;
 #endif
 
 	/*
 	 * we reject ICMPv6 too big with abnormally small value.
 	 * XXX what is the good definition of "abnormally small"?
 	 */
 	if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
 		return;
 
 	if (!validated)
 		return;
 
 	/*
 	 * In case the suggested mtu is less than IPV6_MMTU, we
 	 * only need to remember that it was for above mentioned
 	 * "alwaysfrag" case.
 	 * Try to be as close to the spec as possible.
 	 */
 	if (mtu < IPV6_MMTU)
 		mtu = IPV6_MMTU - 8;
 
 	bzero(&inc, sizeof(inc));
 	inc.inc_fibnum = M_GETFIB(m);
 	inc.inc_flags |= INC_ISIPV6;
 	inc.inc6_faddr = *dst;
 	if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
 		return;
 
 	if (mtu < tcp_maxmtu6(&inc, NULL)) {
 		tcp_hc_updatemtu(&inc, mtu);
 		ICMP6STAT_INC(icp6s_pmtuchg);
 	}
 }
 
 /*
  * Process a Node Information Query packet, based on
  * draft-ietf-ipngwg-icmp-name-lookups-07.
  *
  * Spec incompatibilities:
  * - IPv6 Subject address handling
  * - IPv4 Subject address handling support missing
  * - Proxy reply (answer even if it's not for me)
  * - joins NI group address at in6_ifattach() time only, does not cope
  *   with hostname changes by sethostname(3)
  */
 static struct mbuf *
 ni6_input(struct mbuf *m, int off)
 {
 	struct icmp6_nodeinfo *ni6, *nni6;
 	struct mbuf *n = NULL;
 	struct prison *pr;
 	u_int16_t qtype;
 	int subjlen;
 	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 	struct ni_reply_fqdn *fqdn;
 	int addrs;		/* for NI_QTYPE_NODEADDR */
 	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
 	struct in6_addr in6_subj; /* subject address */
 	struct ip6_hdr *ip6;
 	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
 	char *subj = NULL;
 	struct in6_ifaddr *ia6 = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #ifndef PULLDOWN_TEST
 	ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
 	if (ni6 == NULL) {
 		/* m is already reclaimed */
 		return (NULL);
 	}
 #endif
 
 	/*
 	 * Validate IPv6 source address.
 	 * The default configuration MUST be to refuse answering queries from
 	 * global-scope addresses according to RFC4602.
 	 * Notes:
 	 *  - it's not very clear what "refuse" means; this implementation
 	 *    simply drops it.
 	 *  - it's not very easy to identify global-scope (unicast) addresses
 	 *    since there are many prefixes for them.  It should be safer
 	 *    and in practice sufficient to check "all" but loopback and
 	 *    link-local (note that site-local unicast was deprecated and
 	 *    ULA is defined as global scope-wise)
 	 */
 	if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 &&
 	    !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
 	    !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
 		goto bad;
 
 	/*
 	 * Validate IPv6 destination address.
 	 *
 	 * The Responder must discard the Query without further processing
 	 * unless it is one of the Responder's unicast or anycast addresses, or
 	 * a link-local scope multicast address which the Responder has joined.
 	 * [RFC4602, Section 5.]
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
 			goto bad;
 		/* else it's a link-local multicast, fine */
 	} else {		/* unicast or anycast */
 		ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 		if (ia6 == NULL)
 			goto bad; /* XXX impossible */
 
 		if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
 		    !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) {
 			ifa_free(&ia6->ia_ifa);
 			nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
 				"a temporary address in %s:%d",
 			       __FILE__, __LINE__));
 			goto bad;
 		}
 		ifa_free(&ia6->ia_ifa);
 	}
 
 	/* validate query Subject field. */
 	qtype = ntohs(ni6->ni_qtype);
 	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 	case NI_QTYPE_SUPTYPES:
 		/* 07 draft */
 		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
 			break;
 		/* FALLTHROUGH */
 	case NI_QTYPE_FQDN:
 	case NI_QTYPE_NODEADDR:
 	case NI_QTYPE_IPV4ADDR:
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 #if ICMP6_NI_SUBJ_IPV6 != 0
 		case 0:
 #endif
 			/*
 			 * backward compatibility - try to accept 03 draft
 			 * format, where no Subject is present.
 			 */
 			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
 			    subjlen == 0) {
 				oldfqdn++;
 				break;
 			}
 #if ICMP6_NI_SUBJ_IPV6 != 0
 			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
 				goto bad;
 #endif
 
 			if (subjlen != sizeof(struct in6_addr))
 				goto bad;
 
 			/*
 			 * Validate Subject address.
 			 *
 			 * Not sure what exactly "address belongs to the node"
 			 * means in the spec, is it just unicast, or what?
 			 *
 			 * At this moment we consider Subject address as
 			 * "belong to the node" if the Subject address equals
 			 * to the IPv6 destination address; validation for
 			 * IPv6 destination address should have done enough
 			 * check for us.
 			 *
 			 * We do not do proxy at this moment.
 			 */
 			/* m_pulldown instead of copy? */
 			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
 			    subjlen, (caddr_t)&in6_subj);
 			if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL))
 				goto bad;
 
 			subj = (char *)&in6_subj;
 			if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
 				break;
 
 			/*
 			 * XXX if we are to allow other cases, we should really
 			 * be careful about scope here.
 			 * basically, we should disallow queries toward IPv6
 			 * destination X with subject Y,
 			 * if scope(X) > scope(Y).
 			 * if we allow scope(X) > scope(Y), it will result in
 			 * information leakage across scope boundary.
 			 */
 			goto bad;
 
 		case ICMP6_NI_SUBJ_FQDN:
 			/*
 			 * Validate Subject name with gethostname(3).
 			 *
 			 * The behavior may need some debate, since:
 			 * - we are not sure if the node has FQDN as
 			 *   hostname (returned by gethostname(3)).
 			 * - the code does wildcard match for truncated names.
 			 *   however, we are not sure if we want to perform
 			 *   wildcard match, if gethostname(3) side has
 			 *   truncated hostname.
 			 */
 			pr = curthread->td_ucred->cr_prison;
 			mtx_lock(&pr->pr_mtx);
 			n = ni6_nametodns(pr->pr_hostname,
 			    strlen(pr->pr_hostname), 0);
 			mtx_unlock(&pr->pr_mtx);
 			if (!n || n->m_next || n->m_len == 0)
 				goto bad;
 			IP6_EXTHDR_GET(subj, char *, m,
 			    off + sizeof(struct icmp6_nodeinfo), subjlen);
 			if (subj == NULL)
 				goto bad;
 			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
 			    n->m_len)) {
 				goto bad;
 			}
 			m_freem(n);
 			n = NULL;
 			break;
 
 		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
 		default:
 			goto bad;
 		}
 		break;
 	}
 
 	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
 	switch (qtype) {
 	case NI_QTYPE_FQDN:
 		if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0)
 			goto bad;
 		break;
 	case NI_QTYPE_NODEADDR:
 	case NI_QTYPE_IPV4ADDR:
 		if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0)
 			goto bad;
 		break;
 	}
 
 	/* guess reply length */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		break;		/* no reply data */
 	case NI_QTYPE_SUPTYPES:
 		replylen += sizeof(u_int32_t);
 		break;
 	case NI_QTYPE_FQDN:
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		break;
 	case NI_QTYPE_NODEADDR:
 		addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj);
 		if ((replylen += addrs * (sizeof(struct in6_addr) +
 		    sizeof(u_int32_t))) > MCLBYTES)
 			replylen = MCLBYTES; /* XXX: will truncate pkt later */
 		break;
 	case NI_QTYPE_IPV4ADDR:
 		/* unsupported - should respond with unknown Qtype? */
 		break;
 	default:
 		/*
 		 * XXX: We must return a reply with the ICMP6 code
 		 * `unknown Qtype' in this case.  However we regard the case
 		 * as an FQDN query for backward compatibility.
 		 * Older versions set a random value to this field,
 		 * so it rarely varies in the defined qtypes.
 		 * But the mechanism is not reliable...
 		 * maybe we should obsolete older versions.
 		 */
 		qtype = NI_QTYPE_FQDN;
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		oldfqdn++;
 		break;
 	}
 
 	/* Allocate an mbuf to reply. */
 	if (replylen > MCLBYTES) {
 		/*
 		 * XXX: should we try to allocate more? But MCLBYTES
 		 * is probably much larger than IPV6_MMTU...
 		 */
 		goto bad;
 	}
 	if (replylen > MHLEN)
 		n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR);
 	else
 		n = m_gethdr(M_NOWAIT, m->m_type);
 	if (n == NULL) {
 		m_freem(m);
 		return (NULL);
 	}
 	m_move_pkthdr(n, m); /* just for recvif and FIB */
 	n->m_pkthdr.len = n->m_len = replylen;
 
 	/* copy mbuf header and IPv6 + Node Information base headers */
 	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
 	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
 	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
 
 	/* qtype dependent procedure */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = 0;
 		break;
 	case NI_QTYPE_SUPTYPES:
 	{
 		u_int32_t v;
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
 		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
 		v = (u_int32_t)htonl(0x0000000f);
 		bcopy(&v, nni6 + 1, sizeof(u_int32_t));
 		break;
 	}
 	case NI_QTYPE_FQDN:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
 		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
 		nni6->ni_flags = 0; /* XXX: meaningless TTL */
 		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
 		/*
 		 * XXX do we really have FQDN in hostname?
 		 */
 		pr = curthread->td_ucred->cr_prison;
 		mtx_lock(&pr->pr_mtx);
 		n->m_next = ni6_nametodns(pr->pr_hostname,
 		    strlen(pr->pr_hostname), oldfqdn);
 		mtx_unlock(&pr->pr_mtx);
 		if (n->m_next == NULL)
 			goto bad;
 		/* XXX we assume that n->m_next is not a chain */
 		if (n->m_next->m_next != NULL)
 			goto bad;
 		n->m_pkthdr.len += n->m_next->m_len;
 		break;
 	case NI_QTYPE_NODEADDR:
 	{
 		int lenlim, copied;
 
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		n->m_pkthdr.len = n->m_len =
 		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 		lenlim = M_TRAILINGSPACE(n);
 		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
 		/* XXX: reset mbuf length */
 		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 		    sizeof(struct icmp6_nodeinfo) + copied;
 		break;
 	}
 	default:
 		break;		/* XXX impossible! */
 	}
 
 	nni6->ni_type = ICMP6_NI_REPLY;
 	m_freem(m);
 	return (n);
 
   bad:
 	m_freem(m);
 	if (n)
 		m_freem(n);
 	return (NULL);
 }
 
 /*
  * make a mbuf with DNS-encoded string.  no compression support.
  *
  * XXX names with less than 2 dots (like "foo" or "foo.section") will be
  * treated as truncated name (two \0 at the end).  this is a wild guess.
  *
  * old - return pascal string if non-zero
  */
 static struct mbuf *
 ni6_nametodns(const char *name, int namelen, int old)
 {
 	struct mbuf *m;
 	char *cp, *ep;
 	const char *p, *q;
 	int i, len, nterm;
 
 	if (old)
 		len = namelen + 1;
 	else
 		len = MCLBYTES;
 
 	/* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */
 	if (len > MLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, 0);
 	else
 		m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		goto fail;
 
 	if (old) {
 		m->m_len = len;
 		*mtod(m, char *) = namelen;
 		bcopy(name, mtod(m, char *) + 1, namelen);
 		return m;
 	} else {
 		m->m_len = 0;
 		cp = mtod(m, char *);
 		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
 
 		/* if not certain about my name, return empty buffer */
 		if (namelen == 0)
 			return m;
 
 		/*
 		 * guess if it looks like shortened hostname, or FQDN.
 		 * shortened hostname needs two trailing "\0".
 		 */
 		i = 0;
 		for (p = name; p < name + namelen; p++) {
 			if (*p && *p == '.')
 				i++;
 		}
 		if (i < 2)
 			nterm = 2;
 		else
 			nterm = 1;
 
 		p = name;
 		while (cp < ep && p < name + namelen) {
 			i = 0;
 			for (q = p; q < name + namelen && *q && *q != '.'; q++)
 				i++;
 			/* result does not fit into mbuf */
 			if (cp + i + 1 >= ep)
 				goto fail;
 			/*
 			 * DNS label length restriction, RFC1035 page 8.
 			 * "i == 0" case is included here to avoid returning
 			 * 0-length label on "foo..bar".
 			 */
 			if (i <= 0 || i >= 64)
 				goto fail;
 			*cp++ = i;
 			bcopy(p, cp, i);
 			cp += i;
 			p = q;
 			if (p < name + namelen && *p == '.')
 				p++;
 		}
 		/* termination */
 		if (cp + nterm >= ep)
 			goto fail;
 		while (nterm-- > 0)
 			*cp++ = '\0';
 		m->m_len = cp - mtod(m, char *);
 		return m;
 	}
 
 	panic("should not reach here");
 	/* NOTREACHED */
 
  fail:
 	if (m)
 		m_freem(m);
 	return NULL;
 }
 
 /*
  * check if two DNS-encoded string matches.  takes care of truncated
  * form (with \0\0 at the end).  no compression support.
  * XXX upper/lowercase match (see RFC2065)
  */
 static int
 ni6_dnsmatch(const char *a, int alen, const char *b, int blen)
 {
 	const char *a0, *b0;
 	int l;
 
 	/* simplest case - need validation? */
 	if (alen == blen && bcmp(a, b, alen) == 0)
 		return 1;
 
 	a0 = a;
 	b0 = b;
 
 	/* termination is mandatory */
 	if (alen < 2 || blen < 2)
 		return 0;
 	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
 		return 0;
 	alen--;
 	blen--;
 
 	while (a - a0 < alen && b - b0 < blen) {
 		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
 			return 0;
 
 		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
 			return 0;
 		/* we don't support compression yet */
 		if (a[0] >= 64 || b[0] >= 64)
 			return 0;
 
 		/* truncated case */
 		if (a[0] == 0 && a - a0 == alen - 1)
 			return 1;
 		if (b[0] == 0 && b - b0 == blen - 1)
 			return 1;
 		if (a[0] == 0 || b[0] == 0)
 			return 0;
 
 		if (a[0] != b[0])
 			return 0;
 		l = a[0];
 		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
 			return 0;
 		if (bcmp(a + 1, b + 1, l) != 0)
 			return 0;
 
 		a += 1 + l;
 		b += 1 + l;
 	}
 
 	if (a - a0 == alen && b - b0 == blen)
 		return 1;
 	else
 		return 0;
 }
 
 /*
  * calculate the number of addresses to be returned in the node info reply.
  */
 static int
 ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
     struct in6_addr *subj)
 {
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	int addrs = 0, addrsofif, iffound = 0;
 	int niflags = ni6->ni_flags;
 
 	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 			if (subj == NULL) /* must be impossible... */
 				return (0);
 			break;
 		default:
 			/*
 			 * XXX: we only support IPv6 subject address for
 			 * this Qtype.
 			 */
 			return (0);
 		}
 	}
 
 	IFNET_RLOCK_NOSLEEP();
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		addrsofif = 0;
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
 			    IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr))
 				iffound = 1;
 
 			/*
 			 * IPv4-mapped addresses can only be returned by a
 			 * Node Information proxy, since they represent
 			 * addresses of IPv4-only nodes, which perforce do
 			 * not implement this protocol.
 			 * [icmp-name-lookups-07, Section 5.4]
 			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
 			 * this function at this moment.
 			 */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue; /* we need only unicast addresses */
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
 				continue;
 			}
 			addrsofif++; /* count the address */
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		if (iffound) {
 			*ifpp = ifp;
 			IFNET_RUNLOCK_NOSLEEP();
 			return (addrsofif);
 		}
 
 		addrs += addrsofif;
 	}
 	IFNET_RUNLOCK_NOSLEEP();
 
 	return (addrs);
 }
 
 static int
 ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
     struct ifnet *ifp0, int resid)
 {
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	struct ifnet *ifp_dep = NULL;
 	int copied = 0, allow_deprecated = 0;
 	u_char *cp = (u_char *)(nni6 + 1);
 	int niflags = ni6->ni_flags;
 	u_int32_t ltime;
 
 	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
 		return (0);	/* needless to copy */
 
 	IFNET_RLOCK_NOSLEEP();
 	ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet);
   again:
 
 	for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) {
 		IF_ADDR_RLOCK(ifp);
 		TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
 			    allow_deprecated == 0) {
 				/*
 				 * prefererred address should be put before
 				 * deprecated addresses.
 				 */
 
 				/* record the interface for later search */
 				if (ifp_dep == NULL)
 					ifp_dep = ifp;
 
 				continue;
 			} else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
 			    allow_deprecated != 0)
 				continue; /* we now collect deprecated addrs */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue;
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
 				continue;
 			}
 
 			/* now we can copy the address */
 			if (resid < sizeof(struct in6_addr) +
 			    sizeof(u_int32_t)) {
 				IF_ADDR_RUNLOCK(ifp);
 				/*
 				 * We give up much more copy.
 				 * Set the truncate flag and return.
 				 */
 				nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
 				IFNET_RUNLOCK_NOSLEEP();
 				return (copied);
 			}
 
 			/*
 			 * Set the TTL of the address.
 			 * The TTL value should be one of the following
 			 * according to the specification:
 			 *
 			 * 1. The remaining lifetime of a DHCP lease on the
 			 *    address, or
 			 * 2. The remaining Valid Lifetime of a prefix from
 			 *    which the address was derived through Stateless
 			 *    Autoconfiguration.
 			 *
 			 * Note that we currently do not support stateful
 			 * address configuration by DHCPv6, so the former
 			 * case can't happen.
 			 */
 			if (ifa6->ia6_lifetime.ia6t_expire == 0)
 				ltime = ND6_INFINITE_LIFETIME;
 			else {
 				if (ifa6->ia6_lifetime.ia6t_expire >
 				    time_uptime)
 					ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime);
 				else
 					ltime = 0;
 			}
 
 			bcopy(&ltime, cp, sizeof(u_int32_t));
 			cp += sizeof(u_int32_t);
 
 			/* copy the address itself */
 			bcopy(&ifa6->ia_addr.sin6_addr, cp,
 			    sizeof(struct in6_addr));
 			in6_clearscope((struct in6_addr *)cp); /* XXX */
 			cp += sizeof(struct in6_addr);
 
 			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
 			copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
 		}
 		IF_ADDR_RUNLOCK(ifp);
 		if (ifp0)	/* we need search only on the specified IF */
 			break;
 	}
 
 	if (allow_deprecated == 0 && ifp_dep != NULL) {
 		ifp = ifp_dep;
 		allow_deprecated = 1;
 
 		goto again;
 	}
 
 	IFNET_RUNLOCK_NOSLEEP();
 
 	return (copied);
 }
 
 /*
  * XXX almost dup'ed code with rip6_input.
  */
 static int
 icmp6_rip6_input(struct mbuf **mp, int off)
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct inpcb *in6p;
 	struct inpcb *last = NULL;
 	struct sockaddr_in6 fromsa;
 	struct icmp6_hdr *icmp6;
 	struct mbuf *opts = NULL;
 
 #ifndef PULLDOWN_TEST
 	/* this is assumed to be safe. */
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
 	if (icmp6 == NULL) {
 		/* m is already reclaimed */
 		return (IPPROTO_DONE);
 	}
 #endif
 
 	/*
 	 * XXX: the address may have embedded scope zone ID, which should be
 	 * hidden from applications.
 	 */
 	bzero(&fromsa, sizeof(fromsa));
 	fromsa.sin6_family = AF_INET6;
 	fromsa.sin6_len = sizeof(struct sockaddr_in6);
 	fromsa.sin6_addr = ip6->ip6_src;
 	if (sa6_recoverscope(&fromsa)) {
 		m_freem(m);
 		return (IPPROTO_DONE);
 	}
 
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	LIST_FOREACH(in6p, &V_ripcb, inp_list) {
 		if ((in6p->inp_vflag & INP_IPV6) == 0)
 			continue;
 		if (in6p->inp_ip_p != IPPROTO_ICMPV6)
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
 			continue;
 		INP_RLOCK(in6p);
 		if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
 		    in6p->in6p_icmp6filt)) {
 			INP_RUNLOCK(in6p);
 			continue;
 		}
 		if (last != NULL) {
 			struct	mbuf *n = NULL;
 
 			/*
 			 * Recent network drivers tend to allocate a single
 			 * mbuf cluster, rather than to make a couple of
 			 * mbufs without clusters.  Also, since the IPv6 code
 			 * path tries to avoid m_pullup(), it is highly
 			 * probable that we still have an mbuf cluster here
 			 * even though the necessary length can be stored in an
 			 * mbuf's internal buffer.
 			 * Meanwhile, the default size of the receive socket
 			 * buffer for raw sockets is not so large.  This means
 			 * the possibility of packet loss is relatively higher
 			 * than before.  To avoid this scenario, we copy the
 			 * received data to a separate mbuf that does not use
 			 * a cluster, if possible.
 			 * XXX: it is better to copy the data after stripping
 			 * intermediate headers.
 			 */
 			if ((m->m_flags & M_EXT) && m->m_next == NULL &&
 			    m->m_len <= MHLEN) {
 				n = m_get(M_NOWAIT, m->m_type);
 				if (n != NULL) {
 					if (m_dup_pkthdr(n, m, M_NOWAIT)) {
 						bcopy(m->m_data, n->m_data,
 						      m->m_len);
 						n->m_len = m->m_len;
 					} else {
 						m_free(n);
 						n = NULL;
 					}
 				}
 			}
 			if (n != NULL ||
 			    (n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
 				if (last->inp_flags & INP_CONTROLOPTS)
 					ip6_savecontrol(last, n, &opts);
 				/* strip intermediate headers */
 				m_adj(n, off);
 				SOCKBUF_LOCK(&last->inp_socket->so_rcv);
 				if (sbappendaddr_locked(
 				    &last->inp_socket->so_rcv,
 				    (struct sockaddr *)&fromsa, n, opts)
 				    == 0) {
 					/* should notify about lost packet */
 					m_freem(n);
 					if (opts) {
 						m_freem(opts);
 					}
 					SOCKBUF_UNLOCK(
 					    &last->inp_socket->so_rcv);
 				} else
 					sorwakeup_locked(last->inp_socket);
 				opts = NULL;
 			}
 			INP_RUNLOCK(last);
 		}
 		last = in6p;
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 	if (last != NULL) {
 		if (last->inp_flags & INP_CONTROLOPTS)
 			ip6_savecontrol(last, m, &opts);
 		/* strip intermediate headers */
 		m_adj(m, off);
 
 		/* avoid using mbuf clusters if possible (see above) */
 		if ((m->m_flags & M_EXT) && m->m_next == NULL &&
 		    m->m_len <= MHLEN) {
 			struct mbuf *n;
 
 			n = m_get(M_NOWAIT, m->m_type);
 			if (n != NULL) {
 				if (m_dup_pkthdr(n, m, M_NOWAIT)) {
 					bcopy(m->m_data, n->m_data, m->m_len);
 					n->m_len = m->m_len;
 
 					m_freem(m);
 					m = n;
 				} else {
 					m_freem(n);
 					n = NULL;
 				}
 			}
 		}
 		SOCKBUF_LOCK(&last->inp_socket->so_rcv);
 		if (sbappendaddr_locked(&last->inp_socket->so_rcv,
 		    (struct sockaddr *)&fromsa, m, opts) == 0) {
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
 			SOCKBUF_UNLOCK(&last->inp_socket->so_rcv);
 		} else
 			sorwakeup_locked(last->inp_socket);
 		INP_RUNLOCK(last);
 	} else {
 		m_freem(m);
 		IP6STAT_DEC(ip6s_delivered);
 	}
 	return IPPROTO_DONE;
 }
 
 /*
  * Reflect the ip6 packet back to the source.
  * OFF points to the icmp6 header, counted from the top of the mbuf.
  */
 void
 icmp6_reflect(struct mbuf *m, size_t off)
 {
-	struct in6_addr src, *srcp = NULL;
+	struct in6_addr src6, *srcp;
 	struct ip6_hdr *ip6;
 	struct icmp6_hdr *icmp6;
 	struct in6_ifaddr *ia = NULL;
 	struct ifnet *outif = NULL;
 	int plen;
-	int type, code;
+	int type, code, hlim;
 
 	/* too short to reflect */
 	if (off < sizeof(struct ip6_hdr)) {
 		nd6log((LOG_DEBUG,
 		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
 		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
 		    __FILE__, __LINE__));
 		goto bad;
 	}
 
 	/*
 	 * If there are extra headers between IPv6 and ICMPv6, strip
 	 * off that header first.
 	 */
 #ifdef DIAGNOSTIC
 	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
 		panic("assumption failed in icmp6_reflect");
 #endif
 	if (off > sizeof(struct ip6_hdr)) {
 		size_t l;
 		struct ip6_hdr nip6;
 
 		l = off - sizeof(struct ip6_hdr);
 		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
 		m_adj(m, l);
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
 	} else /* off == sizeof(struct ip6_hdr) */ {
 		size_t l;
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 	}
 	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
 	type = icmp6->icmp6_type; /* keep type for statistics */
 	code = icmp6->icmp6_code; /* ditto. */
+	hlim = 0;
+	srcp = NULL;
 
 	/*
 	 * If the incoming packet was addressed directly to us (i.e. unicast),
 	 * use dst as the src for the reply.
 	 * The IN6_IFF_NOTREADY case should be VERY rare, but is possible
 	 * (for example) when we encounter an error while forwarding procedure
 	 * destined to a duplicated address of ours.
 	 */
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 		if (ia != NULL && !(ia->ia6_flags &
-		    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)))
-			srcp = &ia->ia_addr.sin6_addr;
+		    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
+			src6 = ia->ia_addr.sin6_addr;
+			srcp = &src6;
+
+			if (m->m_pkthdr.rcvif != NULL) {
+				/* XXX: This may not be the outgoing interface */
+				hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
+			} else
+				hlim = V_ip6_defhlim;
+		}
+		if (ia != NULL)
+			ifa_free(&ia->ia_ifa);
 	}
 
 	if (srcp == NULL) {
-		int e;
-		struct sockaddr_in6 sin6;
+		int error;
+		struct in6_addr dst6;
+		uint32_t scopeid;
 
 		/*
 		 * This case matches to multicasts, our anycast, or unicasts
 		 * that we do not own.  Select a source address based on the
 		 * source address of the erroneous packet.
 		 */
-		bzero(&sin6, sizeof(sin6));
-		sin6.sin6_family = AF_INET6;
-		sin6.sin6_len = sizeof(sin6);
-		sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */
+		in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid);
+		error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
+		    scopeid, NULL, &src6, &hlim);
 
-		e = in6_selectsrc(&sin6, NULL, NULL, NULL, &outif, &src);
-		if (e) {
+		if (error) {
 			char ip6buf[INET6_ADDRSTRLEN];
 			nd6log((LOG_DEBUG,
 			    "icmp6_reflect: source can't be determined: "
 			    "dst=%s, error=%d\n",
-			    ip6_sprintf(ip6buf, &sin6.sin6_addr), e));
+			    ip6_sprintf(ip6buf, &ip6->ip6_dst), error));
 			goto bad;
 		}
-		srcp = &src;
+		srcp = &src6;
 	}
 	/*
 	 * ip6_input() drops a packet if its src is multicast.
 	 * So, the src is never multicast.
 	 */
 	ip6->ip6_dst = ip6->ip6_src;
 	ip6->ip6_src = *srcp;
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
-	if (outif)
-		ip6->ip6_hlim = ND_IFINFO(outif)->chlim;
-	else if (m->m_pkthdr.rcvif) {
-		/* XXX: This may not be the outgoing interface */
-		ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
-	} else
-		ip6->ip6_hlim = V_ip6_defhlim;
+	ip6->ip6_hlim = hlim;
 
 	icmp6->icmp6_cksum = 0;
 	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 	    sizeof(struct ip6_hdr), plen);
 
 	/*
 	 * XXX option handling
 	 */
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif)
 		icmp6_ifoutstat_inc(outif, type, code);
 
-	if (ia != NULL)
-		ifa_free(&ia->ia_ifa);
 	return;
 
  bad:
-	if (ia != NULL)
-		ifa_free(&ia->ia_ifa);
 	m_freem(m);
 	return;
 }
 
 void
 icmp6_fasttimo(void)
 {
 
 	mld_fasttimo();
 }
 
 void
 icmp6_slowtimo(void)
 {
 
 	mld_slowtimo();
 }
 
 static const char *
 icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6,
     struct in6_addr *tgt6)
 {
 	static char buf[1024];
 	char ip6bufs[INET6_ADDRSTRLEN];
 	char ip6bufd[INET6_ADDRSTRLEN];
 	char ip6buft[INET6_ADDRSTRLEN];
 	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
 	    ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6),
 	    ip6_sprintf(ip6buft, tgt6));
 	return buf;
 }
 
 void
 icmp6_redirect_input(struct mbuf *m, int off)
 {
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_redirect *nd_rd;
 	int icmp6len = ntohs(ip6->ip6_plen);
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	int is_router;
 	int is_onlink;
 	struct in6_addr src6 = ip6->ip6_src;
 	struct in6_addr redtgt6;
 	struct in6_addr reddst6;
 	union nd_opts ndopts;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	M_ASSERTPKTHDR(m);
 	KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__));
 
 	ifp = m->m_pkthdr.rcvif;
 
 	/* XXX if we are router, we don't update route by icmp6 redirect */
 	if (V_ip6_forwarding)
 		goto freeit;
 	if (!V_icmp6_rediraccept)
 		goto freeit;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
 	if (nd_rd == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return;
 	}
 #endif
 	redtgt6 = nd_rd->nd_rd_target;
 	reddst6 = nd_rd->nd_rd_dst;
 
 	if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) ||
 	    in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) {
 		goto freeit;
 	}
 
 	/* validation */
 	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect sent from %s rejected; "
 		    "must be from linklocal\n",
 		    ip6_sprintf(ip6buf, &src6)));
 		goto bad;
 	}
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect sent from %s rejected; "
 		    "hlim=%d (must be 255)\n",
 		    ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim));
 		goto bad;
 	}
     {
 	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
 	struct nhop6_basic nh6;
 	struct in6_addr kdst;
 	uint32_t scopeid;
 
 	in6_splitscope(&reddst6, &kdst, &scopeid);
 	if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &kdst, scopeid, 0, 0,&nh6)==0){
 		if ((nh6.nh_flags & NHF_GATEWAY) == 0) {
 			nd6log((LOG_ERR,
 			    "ICMP6 redirect rejected; no route "
 			    "with inet6 gateway found for redirect dst: %s\n",
 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 			goto bad;
 		}
 
 		if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) {
 			nd6log((LOG_ERR,
 			    "ICMP6 redirect rejected; "
 			    "not equal to gw-for-src=%s (must be same): "
 			    "%s\n",
 			    ip6_sprintf(ip6buf, &nh6.nh_addr),
 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 			goto bad;
 		}
 	} else {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "no route found for redirect dst: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
     }
 	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "redirect dst must be unicast: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	is_router = is_onlink = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
 		is_router = 1;	/* router case */
 	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
 		is_onlink = 1;	/* on-link destination case */
 	if (!is_router && !is_onlink) {
 		nd6log((LOG_ERR,
 		    "ICMP6 redirect rejected; "
 		    "neither router case nor onlink case: %s\n",
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	icmp6len -= sizeof(*nd_rd);
 	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n",
 		    __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_tgt_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s "
 		    "(if %d, icmp6 packet %d): %s\n",
 		    __func__, ip6_sprintf(ip6buf, &redtgt6),
 		    ifp->if_addrlen, lladdrlen - 2,
 		    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	/* Validation passed. */
 
 	/* RFC 2461 8.3 */
 	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
 	    is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
 
 	/*
 	 * Install a gateway route in the better-router case or an interface
 	 * route in the on-link-destination case.
 	 */
 	{
 		struct sockaddr_in6 sdst;
 		struct sockaddr_in6 sgw;
 		struct sockaddr_in6 ssrc;
 		struct sockaddr *gw;
 		int rt_flags;
 		u_int fibnum;
 
 		bzero(&sdst, sizeof(sdst));
 		bzero(&ssrc, sizeof(ssrc));
 		sdst.sin6_family = ssrc.sin6_family = AF_INET6;
 		sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6);
 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
 		rt_flags = RTF_HOST;
 		if (is_router) {
 			bzero(&sgw, sizeof(sgw));
 			sgw.sin6_family = AF_INET6;
 			sgw.sin6_len = sizeof(struct sockaddr_in6);
 			bcopy(&redtgt6, &sgw.sin6_addr,
 				sizeof(struct in6_addr));
 			gw = (struct sockaddr *)&sgw;
 			rt_flags |= RTF_GATEWAY;
 		} else
 			gw = ifp->if_addr->ifa_addr;
 		for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
 			in6_rtredirect((struct sockaddr *)&sdst, gw,
 			    (struct sockaddr *)NULL, rt_flags,
 			    (struct sockaddr *)&ssrc, fibnum);
 	}
 	/* finally update cached route in each socket via pfctlinput */
     {
 	struct sockaddr_in6 sdst;
 
 	bzero(&sdst, sizeof(sdst));
 	sdst.sin6_family = AF_INET6;
 	sdst.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 	pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
     }
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	ICMP6STAT_INC(icp6s_badredirect);
 	m_freem(m);
 }
 
 void
 icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
 {
 	struct ifnet *ifp;	/* my outgoing interface */
 	struct in6_addr *ifp_ll6;
 	struct in6_addr *router_ll6;
 	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
 	struct mbuf *m = NULL;	/* newly allocated one */
 	struct m_tag *mtag;
 	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
 	struct nd_redirect *nd_rd;
 	struct llentry *ln = NULL;
 	size_t maxlen;
 	u_char *p;
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 src_sa;
 
 	icmp6_errcount(ND_REDIRECT, 0);
 
 	/* if we are not router, we don't send icmp6 redirect */
 	if (!V_ip6_forwarding)
 		goto fail;
 
 	/* sanity check */
 	if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
 		goto fail;
 
 	/*
 	 * Address check:
 	 *  the source address must identify a neighbor, and
 	 *  the destination address must not be a multicast address
 	 *  [RFC 2461, sec 8.2]
 	 */
 	sip6 = mtod(m0, struct ip6_hdr *);
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = sip6->ip6_src;
 	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
 		goto fail;
 	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
 		goto fail;	/* what should we do here? */
 
 	/* rate limit */
 	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
 		goto fail;
 
 	/*
 	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
 	 * we almost always ask for an mbuf cluster for simplicity.
 	 * (MHLEN < IPV6_MMTU is almost always true)
 	 */
 #if IPV6_MMTU >= MCLBYTES
 # error assumption failed about IPV6_MMTU and MCLBYTES
 #endif
 	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (m == NULL)
 		goto fail;
 	M_SETFIB(m, rt->rt_fibnum);
 	maxlen = M_TRAILINGSPACE(m);
 	maxlen = min(IPV6_MMTU, maxlen);
 	/* just for safety */
 	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
 	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
 		goto fail;
 	}
 
 	{
 		/* get ip6 linklocal address for ifp(my outgoing interface). */
 		struct in6_ifaddr *ia;
 		if ((ia = in6ifa_ifpforlinklocal(ifp,
 						 IN6_IFF_NOTREADY|
 						 IN6_IFF_ANYCAST)) == NULL)
 			goto fail;
 		ifp_ll6 = &ia->ia_addr.sin6_addr;
 		/* XXXRW: reference released prematurely. */
 		ifa_free(&ia->ia_ifa);
 	}
 
 	/* get ip6 linklocal address for the router. */
 	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
 		struct sockaddr_in6 *sin6;
 		sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
 		router_ll6 = &sin6->sin6_addr;
 		if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
 			router_ll6 = (struct in6_addr *)NULL;
 	} else
 		router_ll6 = (struct in6_addr *)NULL;
 
 	/* ip6 */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	/* ip6->ip6_plen will be set later */
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = 255;
 	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
 	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
 	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
 
 	/* ND Redirect */
 	nd_rd = (struct nd_redirect *)(ip6 + 1);
 	nd_rd->nd_rd_type = ND_REDIRECT;
 	nd_rd->nd_rd_code = 0;
 	nd_rd->nd_rd_reserved = 0;
 	if (rt->rt_flags & RTF_GATEWAY) {
 		/*
 		 * nd_rd->nd_rd_target must be a link-local address in
 		 * better router cases.
 		 */
 		if (!router_ll6)
 			goto fail;
 		bcopy(router_ll6, &nd_rd->nd_rd_target,
 		    sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		    sizeof(nd_rd->nd_rd_dst));
 	} else {
 		/* make sure redtgt == reddst */
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
 		    sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		    sizeof(nd_rd->nd_rd_dst));
 	}
 
 	p = (u_char *)(nd_rd + 1);
 
 	if (!router_ll6)
 		goto nolladdropt;
 
 	{
 		/* target lladdr option */
 		int len;
 		struct nd_opt_hdr *nd_opt;
 		char *lladdr;
 
 		IF_AFDATA_RLOCK(ifp);
 		ln = nd6_lookup(router_ll6, 0, ifp);
 		IF_AFDATA_RUNLOCK(ifp);
 		if (ln == NULL)
 			goto nolladdropt;
 
 		len = sizeof(*nd_opt) + ifp->if_addrlen;
 		len = (len + 7) & ~7;	/* round by 8 */
 		/* safety check */
 		if (len + (p - (u_char *)ip6) > maxlen) 			
 			goto nolladdropt;
 
 		if (ln->la_flags & LLE_VALID) {
 			nd_opt = (struct nd_opt_hdr *)p;
 			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
 			nd_opt->nd_opt_len = len >> 3;
 			lladdr = (char *)(nd_opt + 1);
 			bcopy(ln->ll_addr, lladdr, ifp->if_addrlen);
 			p += len;
 		}
 	}
 nolladdropt:
 	if (ln != NULL)
 		LLE_RUNLOCK(ln);
 		
 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 	/* just to be safe */
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m0->m_flags & M_DECRYPTED)
 		goto noredhdropt;
 #endif
 	if (p - (u_char *)ip6 > maxlen)
 		goto noredhdropt;
 
 	{
 		/* redirected header option */
 		int len;
 		struct nd_opt_rd_hdr *nd_opt_rh;
 
 		/*
 		 * compute the maximum size for icmp6 redirect header option.
 		 * XXX room for auth header?
 		 */
 		len = maxlen - (p - (u_char *)ip6);
 		len &= ~7;
 
 		/* This is just for simplicity. */
 		if (m0->m_pkthdr.len != m0->m_len) {
 			if (m0->m_next) {
 				m_freem(m0->m_next);
 				m0->m_next = NULL;
 			}
 			m0->m_pkthdr.len = m0->m_len;
 		}
 
 		/*
 		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
 		 * about padding/truncate rule for the original IP packet.
 		 * From the discussion on IPv6imp in Feb 1999,
 		 * the consensus was:
 		 * - "attach as much as possible" is the goal
 		 * - pad if not aligned (original size can be guessed by
 		 *   original ip6 header)
 		 * Following code adds the padding if it is simple enough,
 		 * and truncates if not.
 		 */
 		if (m0->m_next || m0->m_pkthdr.len != m0->m_len)
 			panic("assumption failed in %s:%d", __FILE__,
 			    __LINE__);
 
 		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
 			/* not enough room, truncate */
 			m0->m_pkthdr.len = m0->m_len = len -
 			    sizeof(*nd_opt_rh);
 		} else {
 			/* enough room, pad or truncate */
 			size_t extra;
 
 			extra = m0->m_pkthdr.len % 8;
 			if (extra) {
 				/* pad if easy enough, truncate if not */
 				if (8 - extra <= M_TRAILINGSPACE(m0)) {
 					/* pad */
 					m0->m_len += (8 - extra);
 					m0->m_pkthdr.len += (8 - extra);
 				} else {
 					/* truncate */
 					m0->m_pkthdr.len -= extra;
 					m0->m_len -= extra;
 				}
 			}
 			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
 			m0->m_pkthdr.len = m0->m_len = len -
 			    sizeof(*nd_opt_rh);
 		}
 
 		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
 		bzero(nd_opt_rh, sizeof(*nd_opt_rh));
 		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
 		nd_opt_rh->nd_opt_rh_len = len >> 3;
 		p += sizeof(*nd_opt_rh);
 		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 		/* connect m0 to m */
 		m_tag_delete_chain(m0, NULL);
 		m0->m_flags &= ~M_PKTHDR;
 		m->m_next = m0;
 		m->m_pkthdr.len = m->m_len + m0->m_len;
 		m0 = NULL;
 	}
 noredhdropt:;
 	if (m0) {
 		m_freem(m0);
 		m0 = NULL;
 	}
 
 	/* XXX: clear embedded link IDs in the inner header */
 	in6_clearscope(&sip6->ip6_src);
 	in6_clearscope(&sip6->ip6_dst);
 	in6_clearscope(&nd_rd->nd_rd_target);
 	in6_clearscope(&nd_rd->nd_rd_dst);
 
 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
 
 	nd_rd->nd_rd_cksum = 0;
 	nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 	    sizeof(*ip6), ntohs(ip6->ip6_plen));
 
         if (send_sendso_input_hook != NULL) {
 		mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short),
 			M_NOWAIT);
 		if (mtag == NULL)
 			goto fail;
 		*(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type;
 		m_tag_prepend(m, mtag);
 	}
 
 	/* send the packet to outside... */
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif) {
 		icmp6_ifstat_inc(outif, ifs6_out_msg);
 		icmp6_ifstat_inc(outif, ifs6_out_redirect);
 	}
 	ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]);
 
 	return;
 
 fail:
 	if (m)
 		m_freem(m);
 	if (m0)
 		m_freem(m0);
 }
 
 /*
  * ICMPv6 socket option processing.
  */
 int
 icmp6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0;
 	int optlen;
 	struct inpcb *inp = sotoinpcb(so);
 	int level, op, optname;
 
 	if (sopt) {
 		level = sopt->sopt_level;
 		op = sopt->sopt_dir;
 		optname = sopt->sopt_name;
 		optlen = sopt->sopt_valsize;
 	} else
 		level = op = optname = optlen = 0;
 
 	if (level != IPPROTO_ICMPV6) {
 		return EINVAL;
 	}
 
 	switch (op) {
 	case PRCO_SETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			struct icmp6_filter ic6f;
 
 			if (optlen != sizeof(ic6f)) {
 				error = EMSGSIZE;
 				break;
 			}
 			error = sooptcopyin(sopt, &ic6f, optlen, optlen);
 			if (error == 0) {
 				INP_WLOCK(inp);
 				*inp->in6p_icmp6filt = ic6f;
 				INP_WUNLOCK(inp);
 			}
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case PRCO_GETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			struct icmp6_filter ic6f;
 
 			INP_RLOCK(inp);
 			ic6f = *inp->in6p_icmp6filt;
 			INP_RUNLOCK(inp);
 			error = sooptcopyout(sopt, &ic6f, sizeof(ic6f));
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Perform rate limit check.
  * Returns 0 if it is okay to send the icmp6 packet.
  * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
  * limitation.
  *
  * XXX per-destination/type check necessary?
  *
  * dst - not used at this moment
  * type - not used at this moment
  * code - not used at this moment
  */
 static int
 icmp6_ratelimit(const struct in6_addr *dst, const int type,
     const int code)
 {
 	int ret;
 
 	ret = 0;	/* okay to send */
 
 	/* PPS limit */
 	if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count,
 	    V_icmp6errppslim)) {
 		/* The packet is subject to rate limit */
 		ret++;
 	}
 
 	return ret;
 }
Index: projects/clang380-import/sys/netinet6/in6_fib.c
===================================================================
--- projects/clang380-import/sys/netinet6/in6_fib.c	(revision 293686)
+++ projects/clang380-import/sys/netinet6/in6_fib.c	(revision 293687)
@@ -1,268 +1,275 @@
 /*-
  * Copyright (c) 2015
  * 	Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_route.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_mroute.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/scope6_var.h>
 
 #include <net/if_types.h>
 
 #ifdef INET6
 static void fib6_rte_to_nh_extended(struct rtentry *rte,
     const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6);
 static void fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
     uint32_t flags, struct nhop6_basic *pnh6);
 static struct ifnet *fib6_get_ifaifp(struct rtentry *rte);
 #define RNTORT(p)	((struct rtentry *)(p))
 
 /*
  * Gets real interface for the @rte.
  * Returns rt_ifp for !IFF_LOOPBACK routers.
  * Extracts "real" address interface from interface address
  * loopback routes.
  */
 static struct ifnet *
 fib6_get_ifaifp(struct rtentry *rte)
 {
 	struct ifnet *ifp;
 	struct sockaddr_dl *sdl;
 
 	ifp = rte->rt_ifp;
 	if ((ifp->if_flags & IFF_LOOPBACK) &&
 	    rte->rt_gateway->sa_family == AF_LINK) {
 		sdl = (struct sockaddr_dl *)rte->rt_gateway;
 		return (ifnet_byindex(sdl->sdl_index));
 	}
 
 	return (ifp);
 }
 
 static void
 fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
     uint32_t flags, struct nhop6_basic *pnh6)
 {
 	struct sockaddr_in6 *gw;
 
 	/* Do explicit nexthop zero unless we're copying it */
 	memset(pnh6, 0, sizeof(*pnh6));
 
 	if ((flags & NHR_IFAIF) != 0)
 		pnh6->nh_ifp = fib6_get_ifaifp(rte);
 	else
 		pnh6->nh_ifp = rte->rt_ifp;
 
 	pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
 	if (rte->rt_flags & RTF_GATEWAY) {
 		gw = (struct sockaddr_in6 *)rte->rt_gateway;
 		pnh6->nh_addr = gw->sin6_addr;
 		in6_clearscope(&pnh6->nh_addr);
 	} else
 		pnh6->nh_addr = *dst;
 	/* Set flags */
 	pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
 	gw = (struct sockaddr_in6 *)rt_key(rte);
 	if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
 		pnh6->nh_flags |= NHF_DEFAULT;
 }
 
 static void
 fib6_rte_to_nh_extended(struct rtentry *rte, const struct in6_addr *dst,
     uint32_t flags, struct nhop6_extended *pnh6)
 {
 	struct sockaddr_in6 *gw;
 
 	/* Do explicit nexthop zero unless we're copying it */
 	memset(pnh6, 0, sizeof(*pnh6));
 
 	if ((flags & NHR_IFAIF) != 0)
 		pnh6->nh_ifp = fib6_get_ifaifp(rte);
 	else
 		pnh6->nh_ifp = rte->rt_ifp;
 
 	pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
 	if (rte->rt_flags & RTF_GATEWAY) {
 		gw = (struct sockaddr_in6 *)rte->rt_gateway;
 		pnh6->nh_addr = gw->sin6_addr;
 		in6_clearscope(&pnh6->nh_addr);
 	} else
 		pnh6->nh_addr = *dst;
 	/* Set flags */
 	pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
 	gw = (struct sockaddr_in6 *)rt_key(rte);
 	if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
 		pnh6->nh_flags |= NHF_DEFAULT;
 }
 
 /*
  * Performs IPv6 route table lookup on @dst. Returns 0 on success.
  * Stores basic nexthop info into provided @pnh6 structure.
  * Note that
  * - nh_ifp represents logical transmit interface (rt_ifp) by default
  * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
  * - mtu from logical transmit interface will be returned.
  * - nh_ifp cannot be safely dereferenced
  * - nh_ifp represents rt_ifp (e.g. if looking up address on
  *   interface "ix0" pointer to "ix0" interface will be returned instead
  *   of "lo0")
  * - howewer mtu from "transmit" interface will be returned.
  * - scope will be embedded in nh_addr
  */
 int
 fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid,
     uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6)
 {
 	struct radix_node_head *rh;
 	struct radix_node *rn;
 	struct sockaddr_in6 sin6;
 	struct rtentry *rte;
 
 	KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, AF_INET6);
 	if (rh == NULL)
 		return (ENOENT);
 
 	/* Prepare lookup key */
 	memset(&sin6, 0, sizeof(sin6));
 	sin6.sin6_addr = *dst;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	/* Assume scopeid is valid and embed it directly */
 	if (IN6_IS_SCOPE_LINKLOCAL(dst))
 		sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
 
 	RADIX_NODE_HEAD_RLOCK(rh);
 	rn = rh->rnh_matchaddr((void *)&sin6, rh);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rte = RNTORT(rn);
 		/* Ensure route & ifp is UP */
 		if (RT_LINK_IS_UP(rte->rt_ifp)) {
 			fib6_rte_to_nh_basic(rte, &sin6.sin6_addr, flags, pnh6);
 			RADIX_NODE_HEAD_RUNLOCK(rh);
 			return (0);
 		}
 	}
 	RADIX_NODE_HEAD_RUNLOCK(rh);
 
 	return (ENOENT);
 }
 
 /*
  * Performs IPv6 route table lookup on @dst. Returns 0 on success.
  * Stores extended nexthop info into provided @pnh6 structure.
  * Note that
  * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified.
  * - in that case you need to call fib6_free_nh_ext()
  * - nh_ifp represents logical transmit interface (rt_ifp) by default
  * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed
  * - mtu from logical transmit interface will be returned.
  * - scope will be embedded in nh_addr
  */
 int
 fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid,
     uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6)
 {
 	struct radix_node_head *rh;
 	struct radix_node *rn;
 	struct sockaddr_in6 sin6;
 	struct rtentry *rte;
 
 	KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum"));
 	rh = rt_tables_get_rnh(fibnum, AF_INET6);
 	if (rh == NULL)
 		return (ENOENT);
 
 	/* Prepare lookup key */
 	memset(&sin6, 0, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_addr = *dst;
 	/* Assume scopeid is valid and embed it directly */
 	if (IN6_IS_SCOPE_LINKLOCAL(dst))
 		sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
 
 	RADIX_NODE_HEAD_RLOCK(rh);
 	rn = rh->rnh_matchaddr((void *)&sin6, rh);
 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
 		rte = RNTORT(rn);
+#ifdef RADIX_MPATH
+		rte = rt_mpath_select(rte, flowid);
+		if (rte == NULL) {
+			RADIX_NODE_HEAD_RUNLOCK(rh);
+			return (ENOENT);
+		}
+#endif
 		/* Ensure route & ifp is UP */
 		if (RT_LINK_IS_UP(rte->rt_ifp)) {
 			fib6_rte_to_nh_extended(rte, &sin6.sin6_addr, flags,
 			    pnh6);
 			if ((flags & NHR_REF) != 0) {
 				/* TODO: Do lwref on egress ifp's */
 			}
 			RADIX_NODE_HEAD_RUNLOCK(rh);
 
 			return (0);
 		}
 	}
 	RADIX_NODE_HEAD_RUNLOCK(rh);
 
 	return (ENOENT);
 }
 
 void
 fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6)
 {
 
 }
 
 #endif
 
Index: projects/clang380-import/sys/netinet6/in6_pcb.c
===================================================================
--- projects/clang380-import/sys/netinet6/in6_pcb.c	(revision 293686)
+++ projects/clang380-import/sys/netinet6/in6_pcb.c	(revision 293687)
@@ -1,1281 +1,1275 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_pcbgroup.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/tcp_var.h>
 #include <netinet/ip6.h>
 #include <netinet/ip_var.h>
 
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/scope6_var.h>
 
 static struct inpcb *in6_pcblookup_hash_locked(struct inpcbinfo *,
     struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *);
 
 int
 in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam,
     struct ucred *cred)
 {
 	struct socket *so = inp->inp_socket;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	u_short	lport = 0;
 	int error, lookupflags = 0;
 	int reuseport = (so->so_options & SO_REUSEPORT);
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	if (TAILQ_EMPTY(&V_in6_ifaddrhead))	/* XXX broken! */
 		return (EADDRNOTAVAIL);
 	if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 		return (EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		lookupflags = INPLOOKUP_WILDCARD;
 	if (nam == NULL) {
 		if ((error = prison_local_ip6(cred, &inp->in6p_laddr,
 		    ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
 			return (error);
 	} else {
 		sin6 = (struct sockaddr_in6 *)nam;
 		if (nam->sa_len != sizeof(*sin6))
 			return (EINVAL);
 		/*
 		 * family check.
 		 */
 		if (nam->sa_family != AF_INET6)
 			return (EAFNOSUPPORT);
 
 		if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
 			return(error);
 
 		if ((error = prison_local_ip6(cred, &sin6->sin6_addr,
 		    ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
 			return (error);
 
 		lport = sin6->sin6_port;
 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
 			/*
 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
 			 * allow compepte duplication of binding if
 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
 			if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 			struct ifaddr *ifa;
 
 			sin6->sin6_port = 0;		/* yech... */
 			if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) ==
 			    NULL &&
 			    (inp->inp_flags & INP_BINDANY) == 0) {
 				return (EADDRNOTAVAIL);
 			}
 
 			/*
 			 * XXX: bind to an anycast address might accidentally
 			 * cause sending a packet with anycast source address.
 			 * We should allow to bind to a deprecated address, since
 			 * the application dares to use it.
 			 */
 			if (ifa != NULL &&
 			    ((struct in6_ifaddr *)ifa)->ia6_flags &
 			    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) {
 				ifa_free(ifa);
 				return (EADDRNOTAVAIL);
 			}
 			if (ifa != NULL)
 				ifa_free(ifa);
 		}
 		if (lport) {
 			struct inpcb *t;
 			struct tcptw *tw;
 
 			/* GROSS */
 			if (ntohs(lport) <= V_ipport_reservedhigh &&
 			    ntohs(lport) >= V_ipport_reservedlow &&
 			    priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
 			    0))
 				return (EACCES);
 			if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) &&
 			    priv_check_cred(inp->inp_cred,
 			    PRIV_NETINET_REUSEPORT, 0) != 0) {
 				t = in6_pcblookup_local(pcbinfo,
 				    &sin6->sin6_addr, lport,
 				    INPLOOKUP_WILDCARD, cred);
 				if (t &&
 				    ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
 				    ((t->inp_flags & INP_TIMEWAIT) == 0) &&
 				    (so->so_type != SOCK_STREAM ||
 				     IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
 				    (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
 				     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
 				     (t->inp_flags2 & INP_REUSEPORT) == 0) &&
 				    (inp->inp_cred->cr_uid !=
 				     t->inp_cred->cr_uid))
 					return (EADDRINUSE);
 
 				/*
 				 * If the socket is a BINDMULTI socket, then
 				 * the credentials need to match and the
 				 * original socket also has to have been bound
 				 * with BINDMULTI.
 				 */
 				if (t && (! in_pcbbind_check_bindmulti(inp, t)))
 					return (EADDRINUSE);
 
 #ifdef INET
 				if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
 				    IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 					struct sockaddr_in sin;
 
 					in6_sin6_2_sin(&sin, sin6);
 					t = in_pcblookup_local(pcbinfo,
 					    sin.sin_addr, lport,
 					    INPLOOKUP_WILDCARD, cred);
 					if (t &&
 					    ((inp->inp_flags2 & INP_BINDMULTI) == 0) &&
 					    ((t->inp_flags &
 					      INP_TIMEWAIT) == 0) &&
 					    (so->so_type != SOCK_STREAM ||
 					     ntohl(t->inp_faddr.s_addr) ==
 					      INADDR_ANY) &&
 					    (inp->inp_cred->cr_uid !=
 					     t->inp_cred->cr_uid))
 						return (EADDRINUSE);
 
 					if (t && (! in_pcbbind_check_bindmulti(inp, t)))
 						return (EADDRINUSE);
 				}
 #endif
 			}
 			t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr,
 			    lport, lookupflags, cred);
 			if (t && (t->inp_flags & INP_TIMEWAIT)) {
 				/*
 				 * XXXRW: If an incpb has had its timewait
 				 * state recycled, we treat the address as
 				 * being in use (for now).  This is better
 				 * than a panic, but not desirable.
 				 */
 				tw = intotw(t);
 				if (tw == NULL ||
 				    (reuseport & tw->tw_so_options) == 0)
 					return (EADDRINUSE);
 			} else if (t && (reuseport & inp_so_options(t)) == 0) {
 				return (EADDRINUSE);
 			}
 #ifdef INET
 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 &&
 			    IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 				struct sockaddr_in sin;
 
 				in6_sin6_2_sin(&sin, sin6);
 				t = in_pcblookup_local(pcbinfo, sin.sin_addr,
 				    lport, lookupflags, cred);
 				if (t && t->inp_flags & INP_TIMEWAIT) {
 					tw = intotw(t);
 					if (tw == NULL)
 						return (EADDRINUSE);
 					if ((reuseport & tw->tw_so_options) == 0
 					    && (ntohl(t->inp_laddr.s_addr) !=
 					     INADDR_ANY || ((inp->inp_vflag &
 					     INP_IPV6PROTO) ==
 					     (t->inp_vflag & INP_IPV6PROTO))))
 						return (EADDRINUSE);
 				} else if (t &&
 				    (reuseport & inp_so_options(t)) == 0 &&
 				    (ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
 				    (t->inp_vflag & INP_IPV6PROTO) != 0))
 					return (EADDRINUSE);
 			}
 #endif
 		}
 		inp->in6p_laddr = sin6->sin6_addr;
 	}
 	if (lport == 0) {
 		if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) {
 			/* Undo an address bind that may have occurred. */
 			inp->in6p_laddr = in6addr_any;
 			return (error);
 		}
 	} else {
 		inp->inp_lport = lport;
 		if (in_pcbinshash(inp) != 0) {
 			inp->in6p_laddr = in6addr_any;
 			inp->inp_lport = 0;
 			return (EAGAIN);
 		}
 	}
 	return (0);
 }
 
 /*
  *   Transform old in6_pcbconnect() into an inner subroutine for new
  *   in6_pcbconnect(): Do some validity-checking on the remote
  *   address (in mbuf 'nam') and then determine local host address
  *   (i.e., which interface) to use to access that remote host.
  *
  *   This preserves definition of in6_pcbconnect(), while supporting a
  *   slightly different version for T/TCP.  (This is more than
  *   a bit of a kludge, but cleaning up the internal interfaces would
  *   have forced minor changes in every protocol).
  */
 static int
 in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam,
     struct in6_addr *plocal_addr6)
 {
 	register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 	int error = 0;
-	struct ifnet *ifp = NULL;
 	int scope_ambiguous = 0;
 	struct in6_addr in6a;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);	/* XXXRW: why? */
 
 	if (nam->sa_len != sizeof (*sin6))
 		return (EINVAL);
 	if (sin6->sin6_family != AF_INET6)
 		return (EAFNOSUPPORT);
 	if (sin6->sin6_port == 0)
 		return (EADDRNOTAVAIL);
 
 	if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
 		scope_ambiguous = 1;
 	if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
 		return(error);
 
 	if (!TAILQ_EMPTY(&V_in6_ifaddrhead)) {
 		/*
 		 * If the destination address is UNSPECIFIED addr,
 		 * use the loopback addr, e.g ::1.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 			sin6->sin6_addr = in6addr_loopback;
 	}
 	if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0)
 		return (error);
 
-	error = in6_selectsrc(sin6, inp->in6p_outputopts,
-	    inp, inp->inp_cred, &ifp, &in6a);
+	error = in6_selectsrc_socket(sin6, inp->in6p_outputopts,
+	    inp, inp->inp_cred, scope_ambiguous, &in6a, NULL);
 	if (error)
 		return (error);
 
-	if (ifp && scope_ambiguous &&
-	    (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) {
-		return(error);
-	}
-
 	/*
 	 * Do not update this earlier, in case we return with an error.
 	 *
-	 * XXX: this in6_selectsrc result might replace the bound local
+	 * XXX: this in6_selectsrc_socket result might replace the bound local
 	 * address with the address specified by setsockopt(IPV6_PKTINFO).
 	 * Is it the intended behavior?
 	 */
 	*plocal_addr6 = in6a;
 
 	/*
 	 * Don't do pcblookup call here; return interface in
 	 * plocal_addr6
 	 * and exit to caller, that will do the lookup.
 	 */
 
 	return (0);
 }
 
 /*
  * Outer subroutine:
  * Connect from a socket to a specified address.
  * Both address and port must be specified in argument sin.
  * If don't have a local address for this socket yet,
  * then pick one.
  */
 int
 in6_pcbconnect_mbuf(register struct inpcb *inp, struct sockaddr *nam,
     struct ucred *cred, struct mbuf *m)
 {
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
 	struct in6_addr addr6;
 	int error;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	/*
 	 * Call inner routine, to assign local interface address.
 	 * in6_pcbladdr() may automatically fill in sin6_scope_id.
 	 */
 	if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0)
 		return (error);
 
 	if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr,
 			       sin6->sin6_port,
 			      IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
 			      ? &addr6 : &inp->in6p_laddr,
 			      inp->inp_lport, 0, NULL) != NULL) {
 		return (EADDRINUSE);
 	}
 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 		if (inp->inp_lport == 0) {
 			error = in6_pcbbind(inp, (struct sockaddr *)0, cred);
 			if (error)
 				return (error);
 		}
 		inp->in6p_laddr = addr6;
 	}
 	inp->in6p_faddr = sin6->sin6_addr;
 	inp->inp_fport = sin6->sin6_port;
 	/* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
 	inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
 	if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
 		inp->inp_flow |=
 		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
 
 	in_pcbrehash_mbuf(inp, m);
 
 	return (0);
 }
 
 int
 in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
 {
 
 	return (in6_pcbconnect_mbuf(inp, nam, cred, NULL));
 }
 
 void
 in6_pcbdisconnect(struct inpcb *inp)
 {
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr));
 	inp->inp_fport = 0;
 	/* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
 	inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
 	in_pcbrehash(inp);
 }
 
 struct sockaddr *
 in6_sockaddr(in_port_t port, struct in6_addr *addr_p)
 {
 	struct sockaddr_in6 *sin6;
 
 	sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK);
 	bzero(sin6, sizeof *sin6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_len = sizeof(*sin6);
 	sin6->sin6_port = port;
 	sin6->sin6_addr = *addr_p;
 	(void)sa6_recoverscope(sin6); /* XXX: should catch errors */
 
 	return (struct sockaddr *)sin6;
 }
 
 struct sockaddr *
 in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p)
 {
 	struct sockaddr_in sin;
 	struct sockaddr_in6 *sin6_p;
 
 	bzero(&sin, sizeof sin);
 	sin.sin_family = AF_INET;
 	sin.sin_len = sizeof(sin);
 	sin.sin_port = port;
 	sin.sin_addr = *addr_p;
 
 	sin6_p = malloc(sizeof *sin6_p, M_SONAME,
 		M_WAITOK);
 	in6_sin_2_v4mapsin6(&sin, sin6_p);
 
 	return (struct sockaddr *)sin6_p;
 }
 
 int
 in6_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
 	register struct inpcb *inp;
 	struct in6_addr addr;
 	in_port_t port;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL"));
 
 	INP_RLOCK(inp);
 	port = inp->inp_lport;
 	addr = inp->in6p_laddr;
 	INP_RUNLOCK(inp);
 
 	*nam = in6_sockaddr(port, &addr);
 	return 0;
 }
 
 int
 in6_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp;
 	struct in6_addr addr;
 	in_port_t port;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL"));
 
 	INP_RLOCK(inp);
 	port = inp->inp_fport;
 	addr = inp->in6p_faddr;
 	INP_RUNLOCK(inp);
 
 	*nam = in6_sockaddr(port, &addr);
 	return 0;
 }
 
 int
 in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct	inpcb *inp;
 	int	error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL"));
 
 #ifdef INET
 	if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
 		error = in_getsockaddr(so, nam);
 		if (error == 0)
 			in6_sin_2_v4mapsin6_in_sock(nam);
 	} else
 #endif
 	{
 		/* scope issues will be handled in in6_getsockaddr(). */
 		error = in6_getsockaddr(so, nam);
 	}
 
 	return error;
 }
 
 int
 in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct	inpcb *inp;
 	int	error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL"));
 
 #ifdef INET
 	if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) {
 		error = in_getpeeraddr(so, nam);
 		if (error == 0)
 			in6_sin_2_v4mapsin6_in_sock(nam);
 	} else
 #endif
 	/* scope issues will be handled in in6_getpeeraddr(). */
 	error = in6_getpeeraddr(so, nam);
 
 	return error;
 }
 
 /*
  * Pass some notification to all connections of a protocol
  * associated with address dst.  The local address and/or port numbers
  * may be specified to limit the search.  The "usual action" will be
  * taken, depending on the ctlinput cmd.  The caller must filter any
  * cmds that are uninteresting (e.g., no error in the map).
  * Call the protocol specific routine (if any) to report
  * any errors for each matching socket.
  */
 void
 in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst,
     u_int fport_arg, const struct sockaddr *src, u_int lport_arg,
     int cmd, void *cmdarg,
     struct inpcb *(*notify)(struct inpcb *, int))
 {
 	struct inpcb *inp, *inp_temp;
 	struct sockaddr_in6 sa6_src, *sa6_dst;
 	u_short	fport = fport_arg, lport = lport_arg;
 	u_int32_t flowinfo;
 	int errno;
 
 	if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6)
 		return;
 
 	sa6_dst = (struct sockaddr_in6 *)dst;
 	if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr))
 		return;
 
 	/*
 	 * note that src can be NULL when we get notify by local fragmentation.
 	 */
 	sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src;
 	flowinfo = sa6_src.sin6_flowinfo;
 
 	/*
 	 * Redirects go to all references to the destination,
 	 * and use in6_rtchange to invalidate the route cache.
 	 * Dead host indications: also use in6_rtchange to invalidate
 	 * the cache, and deliver the error to all the sockets.
 	 * Otherwise, if we have knowledge of the local port and address,
 	 * deliver only to that socket.
 	 */
 	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
 		fport = 0;
 		lport = 0;
 		bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr));
 
 		if (cmd != PRC_HOSTDEAD)
 			notify = in6_rtchange;
 	}
 	errno = inet6ctlerrmap[cmd];
 	INP_INFO_WLOCK(pcbinfo);
 	LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
 		INP_WLOCK(inp);
 		if ((inp->inp_vflag & INP_IPV6) == 0) {
 			INP_WUNLOCK(inp);
 			continue;
 		}
 
 		/*
 		 * If the error designates a new path MTU for a destination
 		 * and the application (associated with this socket) wanted to
 		 * know the value, notify.
 		 * XXX: should we avoid to notify the value to TCP sockets?
 		 */
 		if (cmd == PRC_MSGSIZE && cmdarg != NULL)
 			ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst,
 					*(u_int32_t *)cmdarg);
 
 		/*
 		 * Detect if we should notify the error. If no source and
 		 * destination ports are specifed, but non-zero flowinfo and
 		 * local address match, notify the error. This is the case
 		 * when the error is delivered with an encrypted buffer
 		 * by ESP. Otherwise, just compare addresses and ports
 		 * as usual.
 		 */
 		if (lport == 0 && fport == 0 && flowinfo &&
 		    inp->inp_socket != NULL &&
 		    flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) &&
 		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr))
 			goto do_notify;
 		else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
 					     &sa6_dst->sin6_addr) ||
 			 inp->inp_socket == 0 ||
 			 (lport && inp->inp_lport != lport) ||
 			 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
 			  !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
 					      &sa6_src.sin6_addr)) ||
 			 (fport && inp->inp_fport != fport)) {
 			INP_WUNLOCK(inp);
 			continue;
 		}
 
 	  do_notify:
 		if (notify) {
 			if ((*notify)(inp, errno))
 				INP_WUNLOCK(inp);
 		} else
 			INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 /*
  * Lookup a PCB based on the local address and port.  Caller must hold the
  * hash lock.  No inpcb locks or references are acquired.
  */
 struct inpcb *
 in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
     u_short lport, int lookupflags, struct ucred *cred)
 {
 	register struct inpcb *inp;
 	int matchwild = 3, wildcard;
 
 	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	if ((lookupflags & INPLOOKUP_WILDCARD) == 0) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
 		    INP6_PCBHASHKEY(&in6addr_any), lport, 0,
 		    pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 			if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) &&
 			    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
 			    inp->inp_lport == lport) {
 				/* Found. */
 				if (cred == NULL ||
 				    prison_equal_ip6(cred->cr_prison,
 					inp->inp_cred->cr_prison))
 					return (inp);
 			}
 		}
 		/*
 		 * Not found.
 		 */
 		return (NULL);
 	} else {
 		struct inpcbporthead *porthash;
 		struct inpcbport *phd;
 		struct inpcb *match = NULL;
 		/*
 		 * Best fit PCB lookup.
 		 *
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
 		porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->ipi_porthashmask)];
 		LIST_FOREACH(phd, porthash, phd_hash) {
 			if (phd->phd_port == lport)
 				break;
 		}
 		if (phd != NULL) {
 			/*
 			 * Port is in use by one or more PCBs. Look for best
 			 * fit.
 			 */
 			LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
 				wildcard = 0;
 				if (cred != NULL &&
 				    !prison_equal_ip6(cred->cr_prison,
 					inp->inp_cred->cr_prison))
 					continue;
 				/* XXX inp locking */
 				if ((inp->inp_vflag & INP_IPV6) == 0)
 					continue;
 				if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr))
 					wildcard++;
 				if (!IN6_IS_ADDR_UNSPECIFIED(
 					&inp->in6p_laddr)) {
 					if (IN6_IS_ADDR_UNSPECIFIED(laddr))
 						wildcard++;
 					else if (!IN6_ARE_ADDR_EQUAL(
 					    &inp->in6p_laddr, laddr))
 						continue;
 				} else {
 					if (!IN6_IS_ADDR_UNSPECIFIED(laddr))
 						wildcard++;
 				}
 				if (wildcard < matchwild) {
 					match = inp;
 					matchwild = wildcard;
 					if (matchwild == 0)
 						break;
 				}
 			}
 		}
 		return (match);
 	}
 }
 
 void
 in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
 {
 	struct inpcb *in6p;
 	struct ip6_moptions *im6o;
 	int i, gap;
 
 	INP_INFO_WLOCK(pcbinfo);
 	LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
 		INP_WLOCK(in6p);
 		im6o = in6p->in6p_moptions;
 		if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) {
 			/*
 			 * Unselect the outgoing ifp for multicast if it
 			 * is being detached.
 			 */
 			if (im6o->im6o_multicast_ifp == ifp)
 				im6o->im6o_multicast_ifp = NULL;
 			/*
 			 * Drop multicast group membership if we joined
 			 * through the interface being detached.
 			 */
 			gap = 0;
 			for (i = 0; i < im6o->im6o_num_memberships; i++) {
 				if (im6o->im6o_membership[i]->in6m_ifp ==
 				    ifp) {
 					in6_mc_leave(im6o->im6o_membership[i],
 					    NULL);
 					gap++;
 				} else if (gap != 0) {
 					im6o->im6o_membership[i - gap] =
 					    im6o->im6o_membership[i];
 				}
 			}
 			im6o->im6o_num_memberships -= gap;
 		}
 		INP_WUNLOCK(in6p);
 	}
 	INP_INFO_WUNLOCK(pcbinfo);
 }
 
 /*
  * Check for alternatives when higher level complains
  * about service problems.  For now, invalidate cached
  * routing information.  If the route was created dynamically
  * (by a redirect), time to try a default gateway again.
  */
 void
 in6_losing(struct inpcb *in6p)
 {
 
 	/*
 	 * We don't store route pointers in the routing table anymore
 	 */
 	return;
 }
 
 /*
  * After a routing change, flush old routing
  * and allocate a (hopefully) better one.
  */
 struct inpcb *
 in6_rtchange(struct inpcb *inp, int errno)
 {
 	/*
 	 * We don't store route pointers in the routing table anymore
 	 */
 	return inp;
 }
 
 #ifdef PCBGROUP
 /*
  * Lookup PCB in hash list, using pcbgroup tables.
  */
 static struct inpcb *
 in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
     struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr,
     u_int lport_arg, int lookupflags, struct ifnet *ifp)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp, *tmpinp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	/*
 	 * First look for an exact match.
 	 */
 	tmpinp = NULL;
 	INP_GROUP_LOCK(pcbgroup);
 	head = &pcbgroup->ipg_hashbase[INP_PCBHASH(
 	    INP6_PCBHASHKEY(faddr), lport, fport, pcbgroup->ipg_hashmask)];
 	LIST_FOREACH(inp, head, inp_pcbgrouphash) {
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV6) == 0)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
 		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * XXX We should be able to directly return
 			 * the inp here, without any checks.
 			 * Well unless both bound with SO_REUSEPORT?
 			 */
 			if (prison_flag(inp->inp_cred, PR_IP6))
 				goto found;
 			if (tmpinp == NULL)
 				tmpinp = inp;
 		}
 	}
 	if (tmpinp != NULL) {
 		inp = tmpinp;
 		goto found;
 	}
 
 	/*
 	 * Then look for a wildcard match in the pcbgroup.
 	 */
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		struct inpcb *local_wild = NULL, *local_exact = NULL;
 		struct inpcb *jail_wild = NULL;
 		int injail;
 
 		/*
 		 * Order of socket selection - we always prefer jails.
 		 *      1. jailed, non-wild.
 		 *      2. jailed, wild.
 		 *      3. non-jailed, non-wild.
 		 *      4. non-jailed, wild.
 		 */
 		head = &pcbgroup->ipg_hashbase[
 		    INP_PCBHASH(INADDR_ANY, lport, 0, pcbgroup->ipg_hashmask)];
 		LIST_FOREACH(inp, head, inp_pcbgrouphash) {
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
 			    inp->inp_lport != lport) {
 				continue;
 			}
 
 			injail = prison_flag(inp->inp_cred, PR_IP6);
 			if (injail) {
 				if (prison_check_ip6(inp->inp_cred,
 				    laddr) != 0)
 					continue;
 			} else {
 				if (local_exact != NULL)
 					continue;
 			}
 
 			if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
 				if (injail)
 					goto found;
 				else
 					local_exact = inp;
 			} else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 				if (injail)
 					jail_wild = inp;
 				else
 					local_wild = inp;
 			}
 		} /* LIST_FOREACH */
 
 		inp = jail_wild;
 		if (inp == NULL)
 			inp = jail_wild;
 		if (inp == NULL)
 			inp = local_exact;
 		if (inp == NULL)
 			inp = local_wild;
 		if (inp != NULL)
 			goto found;
 	}
 
 	/*
 	 * Then look for a wildcard match, if requested.
 	 */
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		struct inpcb *local_wild = NULL, *local_exact = NULL;
 		struct inpcb *jail_wild = NULL;
 		int injail;
 
 		/*
 		 * Order of socket selection - we always prefer jails.
 		 *      1. jailed, non-wild.
 		 *      2. jailed, wild.
 		 *      3. non-jailed, non-wild.
 		 *      4. non-jailed, wild.
 		 */
 		head = &pcbinfo->ipi_wildbase[INP_PCBHASH(
 		    INP6_PCBHASHKEY(&in6addr_any), lport, 0,
 		    pcbinfo->ipi_wildmask)];
 		LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
 			    inp->inp_lport != lport) {
 				continue;
 			}
 
 			injail = prison_flag(inp->inp_cred, PR_IP6);
 			if (injail) {
 				if (prison_check_ip6(inp->inp_cred,
 				    laddr) != 0)
 					continue;
 			} else {
 				if (local_exact != NULL)
 					continue;
 			}
 
 			if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
 				if (injail)
 					goto found;
 				else
 					local_exact = inp;
 			} else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 				if (injail)
 					jail_wild = inp;
 				else
 					local_wild = inp;
 			}
 		} /* LIST_FOREACH */
 
 		inp = jail_wild;
 		if (inp == NULL)
 			inp = jail_wild;
 		if (inp == NULL)
 			inp = local_exact;
 		if (inp == NULL)
 			inp = local_wild;
 		if (inp != NULL)
 			goto found;
 	} /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
 	INP_GROUP_UNLOCK(pcbgroup);
 	return (NULL);
 
 found:
 	in_pcbref(inp);
 	INP_GROUP_UNLOCK(pcbgroup);
 	if (lookupflags & INPLOOKUP_WLOCKPCB) {
 		INP_WLOCK(inp);
 		if (in_pcbrele_wlocked(inp))
 			return (NULL);
 	} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
 		INP_RLOCK(inp);
 		if (in_pcbrele_rlocked(inp))
 			return (NULL);
 	} else
 		panic("%s: locking buf", __func__);
 	return (inp);
 }
 #endif /* PCBGROUP */
 
 /*
  * Lookup PCB in hash list.
  */
 static struct inpcb *
 in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
     u_int fport_arg, struct in6_addr *laddr, u_int lport_arg,
     int lookupflags, struct ifnet *ifp)
 {
 	struct inpcbhead *head;
 	struct inpcb *inp, *tmpinp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 
 	INP_HASH_LOCK_ASSERT(pcbinfo);
 
 	/*
 	 * First look for an exact match.
 	 */
 	tmpinp = NULL;
 	head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
 	    INP6_PCBHASHKEY(faddr), lport, fport, pcbinfo->ipi_hashmask)];
 	LIST_FOREACH(inp, head, inp_hash) {
 		/* XXX inp locking */
 		if ((inp->inp_vflag & INP_IPV6) == 0)
 			continue;
 		if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
 		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * XXX We should be able to directly return
 			 * the inp here, without any checks.
 			 * Well unless both bound with SO_REUSEPORT?
 			 */
 			if (prison_flag(inp->inp_cred, PR_IP6))
 				return (inp);
 			if (tmpinp == NULL)
 				tmpinp = inp;
 		}
 	}
 	if (tmpinp != NULL)
 		return (tmpinp);
 
 	/*
 	 * Then look for a wildcard match, if requested.
 	 */
 	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
 		struct inpcb *local_wild = NULL, *local_exact = NULL;
 		struct inpcb *jail_wild = NULL;
 		int injail;
 
 		/*
 		 * Order of socket selection - we always prefer jails.
 		 *      1. jailed, non-wild.
 		 *      2. jailed, wild.
 		 *      3. non-jailed, non-wild.
 		 *      4. non-jailed, wild.
 		 */
 		head = &pcbinfo->ipi_hashbase[INP_PCBHASH(
 		    INP6_PCBHASHKEY(&in6addr_any), lport, 0,
 		    pcbinfo->ipi_hashmask)];
 		LIST_FOREACH(inp, head, inp_hash) {
 			/* XXX inp locking */
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
 			    inp->inp_lport != lport) {
 				continue;
 			}
 
 			injail = prison_flag(inp->inp_cred, PR_IP6);
 			if (injail) {
 				if (prison_check_ip6(inp->inp_cred,
 				    laddr) != 0)
 					continue;
 			} else {
 				if (local_exact != NULL)
 					continue;
 			}
 
 			if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
 				if (injail)
 					return (inp);
 				else
 					local_exact = inp;
 			} else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 				if (injail)
 					jail_wild = inp;
 				else
 					local_wild = inp;
 			}
 		} /* LIST_FOREACH */
 
 		if (jail_wild != NULL)
 			return (jail_wild);
 		if (local_exact != NULL)
 			return (local_exact);
 		if (local_wild != NULL)
 			return (local_wild);
 	} /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
 
 	/*
 	 * Not found.
 	 */
 	return (NULL);
 }
 
 /*
  * Lookup PCB in hash list, using pcbinfo tables.  This variation locks the
  * hash list lock, and will return the inpcb locked (i.e., requires
  * INPLOOKUP_LOCKPCB).
  */
 static struct inpcb *
 in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
     u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
     struct ifnet *ifp)
 {
 	struct inpcb *inp;
 
 	INP_HASH_RLOCK(pcbinfo);
 	inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
 	    (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
 	if (inp != NULL) {
 		in_pcbref(inp);
 		INP_HASH_RUNLOCK(pcbinfo);
 		if (lookupflags & INPLOOKUP_WLOCKPCB) {
 			INP_WLOCK(inp);
 			if (in_pcbrele_wlocked(inp))
 				return (NULL);
 		} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
 			INP_RLOCK(inp);
 			if (in_pcbrele_rlocked(inp))
 				return (NULL);
 		} else
 			panic("%s: locking bug", __func__);
 	} else
 		INP_HASH_RUNLOCK(pcbinfo);
 	return (inp);
 }
 
 /*
  * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
  * from which a pre-calculated hash value may be extracted.
  *
  * Possibly more of this logic should be in in6_pcbgroup.c.
  */
 struct inpcb *
 in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
     struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
 {
 #if defined(PCBGROUP) && !defined(RSS)
 	struct inpcbgroup *pcbgroup;
 #endif
 
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
 	/*
 	 * When not using RSS, use connection groups in preference to the
 	 * reservation table when looking up 4-tuples.  When using RSS, just
 	 * use the reservation table, due to the cost of the Toeplitz hash
 	 * in software.
 	 *
 	 * XXXRW: This policy belongs in the pcbgroup code, as in principle
 	 * we could be doing RSS with a non-Toeplitz hash that is affordable
 	 * in software.
 	 */
 #if defined(PCBGROUP) && !defined(RSS)
 	if (in_pcbgroup_enabled(pcbinfo)) {
 		pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
 		    fport);
 		return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
 		    laddr, lport, lookupflags, ifp));
 	}
 #endif
 	return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, ifp));
 }
 
 struct inpcb *
 in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
     u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
     struct ifnet *ifp, struct mbuf *m)
 {
 #ifdef PCBGROUP
 	struct inpcbgroup *pcbgroup;
 #endif
 
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
 #ifdef PCBGROUP
 	/*
 	 * If we can use a hardware-generated hash to look up the connection
 	 * group, use that connection group to find the inpcb.  Otherwise
 	 * fall back on a software hash -- or the reservation table if we're
 	 * using RSS.
 	 *
 	 * XXXRW: As above, that policy belongs in the pcbgroup code.
 	 */
 	if (in_pcbgroup_enabled(pcbinfo) &&
 	    M_HASHTYPE_TEST(m, M_HASHTYPE_NONE) == 0) {
 		pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
 		    m->m_pkthdr.flowid);
 		if (pcbgroup != NULL)
 			return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr,
 			    fport, laddr, lport, lookupflags, ifp));
 #ifndef RSS
 		pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
 		    fport);
 		return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
 		    laddr, lport, lookupflags, ifp));
 #endif
 	}
 #endif
 	return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, ifp));
 }
 
 void
 init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m)
 {
 	struct ip6_hdr *ip;
 
 	ip = mtod(m, struct ip6_hdr *);
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_len = sizeof(*sin6);
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_addr = ip->ip6_src;
 
 	(void)sa6_recoverscope(sin6); /* XXX: should catch errors... */
 
 	return;
 }
Index: projects/clang380-import/sys/netinet6/in6_rmx.c
===================================================================
--- projects/clang380-import/sys/netinet6/in6_rmx.c	(revision 293686)
+++ projects/clang380-import/sys/netinet6/in6_rmx.c	(revision 293687)
@@ -1,311 +1,283 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $
  */
 
 /*-
  * Copyright 1994, 1995 Massachusetts Institute of Technology
  *
  * Permission to use, copy, modify, and distribute this software and
  * its documentation for any purpose and without fee is hereby
  * granted, provided that both the above copyright notice and this
  * permission notice appear in all copies, that both the above
  * copyright notice and this permission notice appear in all
  * supporting documentation, and that the name of M.I.T. not be used
  * in advertising or publicity pertaining to distribution of the
  * software without specific, written prior permission.  M.I.T. makes
  * no representations about the suitability of this software for any
  * purpose.  It is provided "as is" without express or implied
  * warranty.
  *
  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/mbuf.h>
 #include <sys/rwlock.h>
 #include <sys/syslog.h>
 #include <sys/callout.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 
 #include <netinet/tcp.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 
 extern int	in6_inithead(void **head, int off);
 #ifdef VIMAGE
 extern int	in6_detachhead(void **head, int off);
 #endif
 
 /*
  * Do what we need to do when inserting a route.
  */
 static struct radix_node *
 in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
     struct radix_node *treenodes)
 {
 	struct rtentry *rt = (struct rtentry *)treenodes;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
-	struct radix_node *ret;
 
 	RADIX_NODE_HEAD_WLOCK_ASSERT(head);
 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 		rt->rt_flags |= RTF_MULTICAST;
 
 	/*
 	 * A little bit of help for both IPv6 output and input:
 	 *   For local addresses, we make sure that RTF_LOCAL is set,
 	 *   with the thought that this might one day be used to speed up
 	 *   ip_input().
 	 *
 	 * We also mark routes to multicast addresses as such, because
 	 * it's easy to do and might be useful (but this is much more
 	 * dubious since it's so easy to inspect the address).  (This
 	 * is done above.)
 	 *
 	 * XXX
 	 * should elaborate the code.
 	 */
 	if (rt->rt_flags & RTF_HOST) {
 		if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)
 					->sin6_addr,
 				       &sin6->sin6_addr)) {
 			rt->rt_flags |= RTF_LOCAL;
 		}
 	}
 
 	if (rt->rt_ifp != NULL) {
 
 		/*
 		 * Check route MTU:
 		 * inherit interface MTU if not set or
 		 * check if MTU is too large.
 		 */
 		if (rt->rt_mtu == 0) {
 			rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
 		} else if (rt->rt_mtu > IN6_LINKMTU(rt->rt_ifp))
 			rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp);
 	}
 
-	ret = rn_addroute(v_arg, n_arg, head, treenodes);
-	if (ret == NULL) {
-		struct rtentry *rt2;
-		/*
-		 * We are trying to add a net route, but can't.
-		 * The following case should be allowed, so we'll make a
-		 * special check for this:
-		 *	Two IPv6 addresses with the same prefix is assigned
-		 *	to a single interrface.
-		 *	# ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
-		 *	# ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
-		 *	In this case, (*1) and (*2) want to add the same
-		 *	net route entry, 3ffe:0501:: -> if0.
-		 *	This case should not raise an error.
-		 */
-		rt2 = in6_rtalloc1((struct sockaddr *)sin6, 0, RTF_RNH_LOCKED,
-		    rt->rt_fibnum);
-		if (rt2) {
-			if (((rt2->rt_flags & (RTF_HOST|RTF_GATEWAY)) == 0)
-			 && rt2->rt_gateway
-			 && rt2->rt_gateway->sa_family == AF_LINK
-			 && rt2->rt_ifp == rt->rt_ifp) {
-				ret = rt2->rt_nodes;
-			}
-			RTFREE_LOCKED(rt2);
-		}
-	}
-	return (ret);
+	return (rn_addroute(v_arg, n_arg, head, treenodes));
 }
 
 /*
  * Age old PMTUs.
  */
 struct mtuex_arg {
 	struct radix_node_head *rnh;
 	time_t nextstop;
 };
 static VNET_DEFINE(struct callout, rtq_mtutimer);
 #define	V_rtq_mtutimer			VNET(rtq_mtutimer)
 
 static int
 in6_mtuexpire(struct rtentry *rt, void *rock)
 {
 	struct mtuex_arg *ap = rock;
 
 	if (rt->rt_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
 		if (rt->rt_expire <= time_uptime) {
 			rt->rt_flags |= RTF_PROBEMTU;
 		} else {
 			ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
 		}
 	}
 
 	return (0);
 }
 
 #define	MTUTIMO_DEFAULT	(60*1)
 
 static void
 in6_mtutimo_setwa(struct radix_node_head *rnh, uint32_t fibum, int af,
     void *_arg)
 {
 	struct mtuex_arg *arg;
 
 	arg = (struct mtuex_arg *)_arg;
 
 	arg->rnh = rnh;
 }
 
 static void
 in6_mtutimo(void *rock)
 {
 	CURVNET_SET_QUIET((struct vnet *) rock);
 	struct timeval atv;
 	struct mtuex_arg arg;
 
 	rt_foreach_fib_walk(AF_INET6, in6_mtutimo_setwa, in6_mtuexpire, &arg);
 
 	atv.tv_sec = MTUTIMO_DEFAULT;
 	atv.tv_usec = 0;
 	callout_reset(&V_rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock);
 	CURVNET_RESTORE();
 }
 
 /*
  * Initialize our routing tree.
  */
 static VNET_DEFINE(int, _in6_rt_was_here);
 #define	V__in6_rt_was_here	VNET(_in6_rt_was_here)
 
 int
 in6_inithead(void **head, int off)
 {
 	struct radix_node_head *rnh;
 
 	if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3))
 		return (0);
 
 	rnh = *head;
 	RADIX_NODE_HEAD_LOCK_INIT(rnh);
 
 	rnh->rnh_addaddr = in6_addroute;
 
 	if (V__in6_rt_was_here == 0) {
 		callout_init(&V_rtq_mtutimer, 1);
 		in6_mtutimo(curvnet);	/* kick off timeout first time */
 		V__in6_rt_was_here = 1;
 	}
 
 	return (1);
 }
 
 #ifdef VIMAGE
 int
 in6_detachhead(void **head, int off)
 {
 
 	callout_drain(&V_rtq_mtutimer);
 	return (rn_detachhead(head));
 }
 #endif
 
 /*
  * Extended API for IPv6 FIB support.
  */
 void
 in6_rtredirect(struct sockaddr *dst, struct sockaddr *gw, struct sockaddr *nm,
     int flags, struct sockaddr *src, u_int fibnum)
 {
 
 	rtredirect_fib(dst, gw, nm, flags, src, fibnum);
 }
 
 int
 in6_rtrequest(int req, struct sockaddr *dst, struct sockaddr *gw,
     struct sockaddr *mask, int flags, struct rtentry **ret_nrt, u_int fibnum)
 {
 
 	return (rtrequest_fib(req, dst, gw, mask, flags, ret_nrt, fibnum));
 }
 
 void
 in6_rtalloc(struct route_in6 *ro, u_int fibnum)
 {
 
 	rtalloc_ign_fib((struct route *)ro, 0ul, fibnum);
 }
 
 void
 in6_rtalloc_ign(struct route_in6 *ro, u_long ignflags, u_int fibnum)
 {
 
 	rtalloc_ign_fib((struct route *)ro, ignflags, fibnum);
 }
 
 struct rtentry *
 in6_rtalloc1(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum)
 {
 
 	return (rtalloc1_fib(dst, report, ignflags, fibnum));
 }
Index: projects/clang380-import/sys/netinet6/in6_src.c
===================================================================
--- projects/clang380-import/sys/netinet6/in6_src.c	(revision 293686)
+++ projects/clang380-import/sys/netinet6/in6_src.c	(revision 293687)
@@ -1,1168 +1,1244 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/rmlock.h>
 #include <sys/sx.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/if_llatbl.h>
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 
 static struct mtx addrsel_lock;
 #define	ADDRSEL_LOCK_INIT()	mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF)
 #define	ADDRSEL_LOCK()		mtx_lock(&addrsel_lock)
 #define	ADDRSEL_UNLOCK()	mtx_unlock(&addrsel_lock)
 #define	ADDRSEL_LOCK_ASSERT()	mtx_assert(&addrsel_lock, MA_OWNED)
 
 static struct sx addrsel_sxlock;
 #define	ADDRSEL_SXLOCK_INIT()	sx_init(&addrsel_sxlock, "addrsel_sxlock")
 #define	ADDRSEL_SLOCK()		sx_slock(&addrsel_sxlock)
 #define	ADDRSEL_SUNLOCK()	sx_sunlock(&addrsel_sxlock)
 #define	ADDRSEL_XLOCK()		sx_xlock(&addrsel_sxlock)
 #define	ADDRSEL_XUNLOCK()	sx_xunlock(&addrsel_sxlock)
 
 #define ADDR_LABEL_NOTAPP (-1)
 static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy);
 #define	V_defaultaddrpolicy		VNET(defaultaddrpolicy)
 
 VNET_DEFINE(int, ip6_prefer_tempaddr) = 0;
 
 static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
 	struct ip6_moptions *, struct route_in6 *, struct ifnet **,
 	struct rtentry **, int, u_int);
 static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
 	struct ip6_moptions *, struct ifnet **,
 	struct ifnet *, u_int);
+static int in6_selectsrc(uint32_t, struct sockaddr_in6 *,
+	struct ip6_pktopts *, struct inpcb *, struct ucred *,
+	struct ifnet **, struct in6_addr *);
 
 static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
 
 static void init_policy_queue(void);
 static int add_addrsel_policyent(struct in6_addrpolicy *);
 static int delete_addrsel_policyent(struct in6_addrpolicy *);
 static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *),
 	void *);
 static int dump_addrsel_policyent(struct in6_addrpolicy *, void *);
 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
 
 /*
  * Return an IPv6 address, which is the most appropriate for a given
  * destination and user specified options.
  * If necessary, this function lookups the routing table and returns
  * an entry to the caller for later use.
  */
 #define REPLACE(r) do {\
 	IP6STAT_INC(ip6s_sources_rule[(r)]); \
 	rule = (r);	\
 	/* { \
 	char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
 	printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
 	} */ \
 	goto replace; \
 } while(0)
 #define NEXT(r) do {\
 	/* { \
 	char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \
 	printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \
 	} */ \
 	goto next;		/* XXX: we can't use 'continue' here */ \
 } while(0)
 #define BREAK(r) do { \
 	IP6STAT_INC(ip6s_sources_rule[(r)]); \
 	rule = (r);	\
 	goto out;		/* XXX: we can't use 'break' here */ \
 } while(0)
 
-int
-in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
-    struct inpcb *inp, struct ucred *cred,
+static int
+in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock,
+    struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred,
     struct ifnet **ifpp, struct in6_addr *srcp)
 {
 	struct rm_priotracker in6_ifa_tracker;
 	struct in6_addr dst, tmp;
 	struct ifnet *ifp = NULL, *oifp = NULL;
 	struct in6_ifaddr *ia = NULL, *ia_best = NULL;
 	struct in6_pktinfo *pi = NULL;
 	int dst_scope = -1, best_scope = -1, best_matchlen = -1;
 	struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL;
 	u_int32_t odstzone;
 	int prefer_tempaddr;
 	int error, rule;
 	struct ip6_moptions *mopts;
 
 	KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__));
 
 	dst = dstsock->sin6_addr; /* make a copy for local operation */
 	if (ifpp) {
 		/*
 		 * Save a possibly passed in ifp for in6_selectsrc. Only
 		 * neighbor discovery code should use this feature, where
 		 * we may know the interface but not the FIB number holding
 		 * the connected subnet in case someone deleted it from the
 		 * default FIB and we need to check the interface.
 		 */
 		if (*ifpp != NULL)
 			oifp = *ifpp;
 		*ifpp = NULL;
 	}
 
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		mopts = inp->in6p_moptions;
 	} else {
 		mopts = NULL;
 	}
 
 	/*
 	 * If the source address is explicitly specified by the caller,
 	 * check if the requested source address is indeed a unicast address
 	 * assigned to the node, and can be used as the packet's source
 	 * address.  If everything is okay, use the address as source.
 	 */
 	if (opts && (pi = opts->ip6po_pktinfo) &&
 	    !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) {
 		struct sockaddr_in6 srcsock;
 		struct in6_ifaddr *ia6;
 
 		/* get the outgoing interface */
 		if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp,
-		    (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB))
+		    fibnum))
 		    != 0)
 			return (error);
 
 		/*
 		 * determine the appropriate zone id of the source based on
 		 * the zone of the destination and the outgoing interface.
 		 * If the specified address is ambiguous wrt the scope zone,
 		 * the interface must be specified; otherwise, ifa_ifwithaddr()
 		 * will fail matching the address.
 		 */
 		bzero(&srcsock, sizeof(srcsock));
 		srcsock.sin6_family = AF_INET6;
 		srcsock.sin6_len = sizeof(srcsock);
 		srcsock.sin6_addr = pi->ipi6_addr;
 		if (ifp) {
 			error = in6_setscope(&srcsock.sin6_addr, ifp, NULL);
 			if (error)
 				return (error);
 		}
 		if (cred != NULL && (error = prison_local_ip6(cred,
 		    &srcsock.sin6_addr, (inp != NULL &&
 		    (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
 			return (error);
 
 		ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(
 		    (struct sockaddr *)&srcsock);
 		if (ia6 == NULL ||
 		    (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) {
 			if (ia6 != NULL)
 				ifa_free(&ia6->ia_ifa);
 			return (EADDRNOTAVAIL);
 		}
 		pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */
 		if (ifpp)
 			*ifpp = ifp;
 		bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp));
 		ifa_free(&ia6->ia_ifa);
 		return (0);
 	}
 
 	/*
 	 * Otherwise, if the socket has already bound the source, just use it.
 	 */
 	if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 		if (cred != NULL &&
 		    (error = prison_local_ip6(cred, &inp->in6p_laddr,
 		    ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0)
 			return (error);
 		bcopy(&inp->in6p_laddr, srcp, sizeof(*srcp));
 		return (0);
 	}
 
 	/*
 	 * Bypass source address selection and use the primary jail IP
 	 * if requested.
 	 */
 	if (cred != NULL && !prison_saddrsel_ip6(cred, srcp))
 		return (0);
 
 	/*
 	 * If the address is not specified, choose the best one based on
 	 * the outgoing interface and the destination address.
 	 */
 	/* get the outgoing interface */
 	if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp,
 	    (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0)
 		return (error);
 
 #ifdef DIAGNOSTIC
 	if (ifp == NULL)	/* this should not happen */
 		panic("in6_selectsrc: NULL ifp");
 #endif
 	error = in6_setscope(&dst, ifp, &odstzone);
 	if (error)
 		return (error);
 
 	rule = 0;
 	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
 	TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
 		int new_scope = -1, new_matchlen = -1;
 		struct in6_addrpolicy *new_policy = NULL;
 		u_int32_t srczone, osrczone, dstzone;
 		struct in6_addr src;
 		struct ifnet *ifp1 = ia->ia_ifp;
 
 		/*
 		 * We'll never take an address that breaks the scope zone
 		 * of the destination.  We also skip an address if its zone
 		 * does not contain the outgoing interface.
 		 * XXX: we should probably use sin6_scope_id here.
 		 */
 		if (in6_setscope(&dst, ifp1, &dstzone) ||
 		    odstzone != dstzone) {
 			continue;
 		}
 		src = ia->ia_addr.sin6_addr;
 		if (in6_setscope(&src, ifp, &osrczone) ||
 		    in6_setscope(&src, ifp1, &srczone) ||
 		    osrczone != srczone) {
 			continue;
 		}
 
 		/* avoid unusable addresses */
 		if ((ia->ia6_flags &
 		     (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) {
 				continue;
 		}
 		if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia))
 			continue;
 
 		/* If jailed only take addresses of the jail into account. */
 		if (cred != NULL &&
 		    prison_check_ip6(cred, &ia->ia_addr.sin6_addr) != 0)
 			continue;
 
 		/* Rule 1: Prefer same address */
 		if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) {
 			ia_best = ia;
 			BREAK(1); /* there should be no better candidate */
 		}
 
 		if (ia_best == NULL)
 			REPLACE(0);
 
 		/* Rule 2: Prefer appropriate scope */
 		if (dst_scope < 0)
 			dst_scope = in6_addrscope(&dst);
 		new_scope = in6_addrscope(&ia->ia_addr.sin6_addr);
 		if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) {
 			if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0)
 				REPLACE(2);
 			NEXT(2);
 		} else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) {
 			if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0)
 				NEXT(2);
 			REPLACE(2);
 		}
 
 		/*
 		 * Rule 3: Avoid deprecated addresses.  Note that the case of
 		 * !ip6_use_deprecated is already rejected above.
 		 */
 		if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia))
 			NEXT(3);
 		if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia))
 			REPLACE(3);
 
 		/* Rule 4: Prefer home addresses */
 		/*
 		 * XXX: This is a TODO.  We should probably merge the MIP6
 		 * case above.
 		 */
 
 		/* Rule 5: Prefer outgoing interface */
 		if (!(ND_IFINFO(ifp)->flags & ND6_IFF_NO_PREFER_IFACE)) {
 			if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp)
 				NEXT(5);
 			if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp)
 				REPLACE(5);
 		}
 
 		/*
 		 * Rule 6: Prefer matching label
 		 * Note that best_policy should be non-NULL here.
 		 */
 		if (dst_policy == NULL)
 			dst_policy = lookup_addrsel_policy(dstsock);
 		if (dst_policy->label != ADDR_LABEL_NOTAPP) {
 			new_policy = lookup_addrsel_policy(&ia->ia_addr);
 			if (dst_policy->label == best_policy->label &&
 			    dst_policy->label != new_policy->label)
 				NEXT(6);
 			if (dst_policy->label != best_policy->label &&
 			    dst_policy->label == new_policy->label)
 				REPLACE(6);
 		}
 
 		/*
 		 * Rule 7: Prefer public addresses.
 		 * We allow users to reverse the logic by configuring
 		 * a sysctl variable, so that privacy conscious users can
 		 * always prefer temporary addresses.
 		 */
 		if (opts == NULL ||
 		    opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) {
 			prefer_tempaddr = V_ip6_prefer_tempaddr;
 		} else if (opts->ip6po_prefer_tempaddr ==
 		    IP6PO_TEMPADDR_NOTPREFER) {
 			prefer_tempaddr = 0;
 		} else
 			prefer_tempaddr = 1;
 		if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
 		    (ia->ia6_flags & IN6_IFF_TEMPORARY)) {
 			if (prefer_tempaddr)
 				REPLACE(7);
 			else
 				NEXT(7);
 		}
 		if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) &&
 		    !(ia->ia6_flags & IN6_IFF_TEMPORARY)) {
 			if (prefer_tempaddr)
 				NEXT(7);
 			else
 				REPLACE(7);
 		}
 
 		/*
 		 * Rule 8: prefer addresses on alive interfaces.
 		 * This is a KAME specific rule.
 		 */
 		if ((ia_best->ia_ifp->if_flags & IFF_UP) &&
 		    !(ia->ia_ifp->if_flags & IFF_UP))
 			NEXT(8);
 		if (!(ia_best->ia_ifp->if_flags & IFF_UP) &&
 		    (ia->ia_ifp->if_flags & IFF_UP))
 			REPLACE(8);
 
 		/*
 		 * Rule 9: prefer address with better virtual status.
 		 */
 		if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa))
 			REPLACE(9);
 		if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa))
 			NEXT(9);
 
 		/*
 		 * Rule 10: prefer address with `prefer_source' flag.
 		 */
 		if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 &&
 		    (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0)
 			REPLACE(10);
 		if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0 &&
 		    (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0)
 			NEXT(10);
 
 		/*
 		 * Rule 14: Use longest matching prefix.
 		 * Note: in the address selection draft, this rule is
 		 * documented as "Rule 8".  However, since it is also
 		 * documented that this rule can be overridden, we assign
 		 * a large number so that it is easy to assign smaller numbers
 		 * to more preferred rules.
 		 */
 		new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst);
 		if (best_matchlen < new_matchlen)
 			REPLACE(14);
 		if (new_matchlen < best_matchlen)
 			NEXT(14);
 
 		/* Rule 15 is reserved. */
 
 		/*
 		 * Last resort: just keep the current candidate.
 		 * Or, do we need more rules?
 		 */
 		continue;
 
 	  replace:
 		ia_best = ia;
 		best_scope = (new_scope >= 0 ? new_scope :
 			      in6_addrscope(&ia_best->ia_addr.sin6_addr));
 		best_policy = (new_policy ? new_policy :
 			       lookup_addrsel_policy(&ia_best->ia_addr));
 		best_matchlen = (new_matchlen >= 0 ? new_matchlen :
 				 in6_matchlen(&ia_best->ia_addr.sin6_addr,
 					      &dst));
 
 	  next:
 		continue;
 
 	  out:
 		break;
 	}
 
 	if ((ia = ia_best) == NULL) {
 		IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 		IP6STAT_INC(ip6s_sources_none);
 		return (EADDRNOTAVAIL);
 	}
 
 	/*
 	 * At this point at least one of the addresses belonged to the jail
 	 * but it could still be, that we want to further restrict it, e.g.
 	 * theoratically IN6_IS_ADDR_LOOPBACK.
 	 * It must not be IN6_IS_ADDR_UNSPECIFIED anymore.
 	 * prison_local_ip6() will fix an IN6_IS_ADDR_LOOPBACK but should
 	 * let all others previously selected pass.
 	 * Use tmp to not change ::1 on lo0 to the primary jail address.
 	 */
 	tmp = ia->ia_addr.sin6_addr;
 	if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL &&
 	    (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) {
 		IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 		IP6STAT_INC(ip6s_sources_none);
 		return (EADDRNOTAVAIL);
 	}
 
 	if (ifpp)
 		*ifpp = ifp;
 
 	bcopy(&tmp, srcp, sizeof(*srcp));
 	if (ia->ia_ifp == ifp)
 		IP6STAT_INC(ip6s_sources_sameif[best_scope]);
 	else
 		IP6STAT_INC(ip6s_sources_otherif[best_scope]);
 	if (dst_scope == best_scope)
 		IP6STAT_INC(ip6s_sources_samescope[best_scope]);
 	else
 		IP6STAT_INC(ip6s_sources_otherscope[best_scope]);
 	if (IFA6_IS_DEPRECATED(ia))
 		IP6STAT_INC(ip6s_sources_deprecated[best_scope]);
 	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
 	return (0);
+}
+
+/*
+ * Select source address based on @inp, @dstsock and @opts.
+ * Stores selected address to @srcp. If @scope_ambiguous is set,
+ * embed scope from selected outgoing interface. If @hlim pointer
+ * is provided, stores calculated hop limit there.
+ * Returns 0 on success.
+ */
+int
+in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
+    struct inpcb *inp, struct ucred *cred, int scope_ambiguous,
+    struct in6_addr *srcp, int *hlim)
+{
+	struct ifnet *retifp;
+	uint32_t fibnum;
+	int error;
+
+	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB;
+	retifp = NULL;
+
+	error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp);
+	if (error != 0)
+		return (error);
+
+	if (hlim != NULL)
+		*hlim = in6_selecthlim(inp, retifp);
+
+	if (retifp == NULL || scope_ambiguous == 0)
+		return (0);
+
+	/*
+	 * Application should provide a proper zone ID or the use of
+	 * default zone IDs should be enabled.  Unfortunately, some
+	 * applications do not behave as it should, so we need a
+	 * workaround.  Even if an appropriate ID is not determined
+	 * (when it's required), if we can determine the outgoing
+	 * interface. determine the zone ID based on the interface.
+	 */
+	error = in6_setscope(&dstsock->sin6_addr, retifp, NULL);
+
+	return (error);
+}
+
+/*
+ * Select source address based on @fibnum, @dst and @scopeid.
+ * Stores selected address to @srcp.
+ * Returns 0 on success.
+ *
+ * Used by non-socket based consumers (ND code mostly)
+ */
+int
+in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst,
+    uint32_t scopeid, struct ifnet *ifp, struct in6_addr *srcp,
+    int *hlim)
+{
+	struct ifnet *retifp;
+	struct sockaddr_in6 dst_sa;
+	int error;
+
+	retifp = ifp;
+	bzero(&dst_sa, sizeof(dst_sa));
+	dst_sa.sin6_family = AF_INET6;
+	dst_sa.sin6_len = sizeof(dst_sa);
+	dst_sa.sin6_addr = *dst;
+	dst_sa.sin6_scope_id = scopeid;
+	sa6_embedscope(&dst_sa, 0);
+
+	error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL, NULL, &retifp, srcp);
+	if (hlim != NULL)
+		*hlim = in6_selecthlim(NULL, retifp);
+
+	return (error);
 }
 
 /*
  * clone - meaningful only for bsdi and freebsd
  */
 static int
 selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
     struct ip6_moptions *mopts, struct route_in6 *ro,
     struct ifnet **retifp, struct rtentry **retrt, int norouteok, u_int fibnum)
 {
 	int error = 0;
 	struct ifnet *ifp = NULL;
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 *sin6_next;
 	struct in6_pktinfo *pi = NULL;
 	struct in6_addr *dst = &dstsock->sin6_addr;
 	uint32_t zoneid;
 #if 0
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	if (dstsock->sin6_addr.s6_addr32[0] == 0 &&
 	    dstsock->sin6_addr.s6_addr32[1] == 0 &&
 	    !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) {
 		printf("in6_selectroute: strange destination %s\n",
 		       ip6_sprintf(ip6buf, &dstsock->sin6_addr));
 	} else {
 		printf("in6_selectroute: destination = %s%%%d\n",
 		       ip6_sprintf(ip6buf, &dstsock->sin6_addr),
 		       dstsock->sin6_scope_id); /* for debug */
 	}
 #endif
 
 	/* If the caller specify the outgoing interface explicitly, use it. */
 	if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) {
 		/* XXX boundary check is assumed to be already done. */
 		ifp = ifnet_byindex(pi->ipi6_ifindex);
 		if (ifp != NULL &&
 		    (norouteok || retrt == NULL ||
 		    IN6_IS_ADDR_MULTICAST(dst))) {
 			/*
 			 * we do not have to check or get the route for
 			 * multicast.
 			 */
 			goto done;
 		} else
 			goto getroute;
 	}
 	/*
 	 * If the destination address is a multicast address and the outgoing
 	 * interface for the address is specified by the caller, use it.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(dst) &&
 	    mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) {
 		goto done; /* we do not need a route for multicast. */
 	}
 	/*
 	 * If destination address is LLA or link- or node-local multicast,
 	 * use it's embedded scope zone id to determine outgoing interface.
 	 */
 	if (IN6_IS_ADDR_MC_LINKLOCAL(dst) ||
 	    IN6_IS_ADDR_MC_NODELOCAL(dst)) {
 		zoneid = ntohs(in6_getscope(dst));
 		if (zoneid > 0) {
 			ifp = in6_getlinkifnet(zoneid);
 			goto done;
 		}
 	}
 
   getroute:
 	/*
 	 * If the next hop address for the packet is specified by the caller,
 	 * use it as the gateway.
 	 */
 	if (opts && opts->ip6po_nexthop) {
 		struct route_in6 *ron;
 
 		sin6_next = satosin6(opts->ip6po_nexthop);
 		if (IN6_IS_ADDR_LINKLOCAL(&sin6_next->sin6_addr)) {
 			/*
 			 * Next hop is LLA, thus it should be neighbor.
 			 * Determine outgoing interface by zone index.
 			 */
 			zoneid = ntohs(in6_getscope(&sin6_next->sin6_addr));
 			if (zoneid > 0) {
 				ifp = in6_getlinkifnet(zoneid);
 				goto done;
 			}
 		}
 		ron = &opts->ip6po_nextroute;
 		/* Use a cached route if it exists and is valid. */
 		if (ron->ro_rt != NULL && (
 		    (ron->ro_rt->rt_flags & RTF_UP) == 0 ||
 		    ron->ro_dst.sin6_family != AF_INET6 ||
 		    !IN6_ARE_ADDR_EQUAL(&ron->ro_dst.sin6_addr,
 			&sin6_next->sin6_addr)))
 			RO_RTFREE(ron);
 		if (ron->ro_rt == NULL) {
 			ron->ro_dst = *sin6_next;
 			in6_rtalloc(ron, fibnum); /* multi path case? */
 		}
 		/*
 		 * The node identified by that address must be a
 		 * neighbor of the sending host.
 		 */
 		if (ron->ro_rt == NULL ||
 		    (ron->ro_rt->rt_flags & RTF_GATEWAY) != 0)
 			error = EHOSTUNREACH;
 		goto done;
 	}
 
 	/*
 	 * Use a cached route if it exists and is valid, else try to allocate
 	 * a new one.  Note that we should check the address family of the
 	 * cached destination, in case of sharing the cache with IPv4.
 	 */
 	if (ro) {
 		if (ro->ro_rt &&
 		    (!(ro->ro_rt->rt_flags & RTF_UP) ||
 		     ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 ||
 		     !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr,
 		     dst))) {
 			RTFREE(ro->ro_rt);
 			ro->ro_rt = (struct rtentry *)NULL;
 		}
 		if (ro->ro_rt == (struct rtentry *)NULL) {
 			struct sockaddr_in6 *sa6;
 
 			/* No route yet, so try to acquire one */
 			bzero(&ro->ro_dst, sizeof(struct sockaddr_in6));
 			sa6 = (struct sockaddr_in6 *)&ro->ro_dst;
 			*sa6 = *dstsock;
 			sa6->sin6_scope_id = 0;
 
 #ifdef RADIX_MPATH
 				rtalloc_mpath_fib((struct route *)ro,
 				    ntohl(sa6->sin6_addr.s6_addr32[3]), fibnum);
 #else			
 				ro->ro_rt = in6_rtalloc1((struct sockaddr *)
 				    &ro->ro_dst, 0, 0UL, fibnum);
 				if (ro->ro_rt)
 					RT_UNLOCK(ro->ro_rt);
 #endif
 		}
 				
 		/*
 		 * do not care about the result if we have the nexthop
 		 * explicitly specified.
 		 */
 		if (opts && opts->ip6po_nexthop)
 			goto done;
 
 		if (ro->ro_rt) {
 			ifp = ro->ro_rt->rt_ifp;
 
 			if (ifp == NULL) { /* can this really happen? */
 				RTFREE(ro->ro_rt);
 				ro->ro_rt = NULL;
 			}
 		}
 		if (ro->ro_rt == NULL)
 			error = EHOSTUNREACH;
 		rt = ro->ro_rt;
 
 		/*
 		 * Check if the outgoing interface conflicts with
 		 * the interface specified by ipi6_ifindex (if specified).
 		 * Note that loopback interface is always okay.
 		 * (this may happen when we are sending a packet to one of
 		 *  our own addresses.)
 		 */
 		if (ifp && opts && opts->ip6po_pktinfo &&
 		    opts->ip6po_pktinfo->ipi6_ifindex) {
 			if (!(ifp->if_flags & IFF_LOOPBACK) &&
 			    ifp->if_index !=
 			    opts->ip6po_pktinfo->ipi6_ifindex) {
 				error = EHOSTUNREACH;
 				goto done;
 			}
 		}
 	}
 
   done:
 	if (ifp == NULL && rt == NULL) {
 		/*
 		 * This can happen if the caller did not pass a cached route
 		 * nor any other hints.  We treat this case an error.
 		 */
 		error = EHOSTUNREACH;
 	}
 	if (error == EHOSTUNREACH)
 		IP6STAT_INC(ip6s_noroute);
 
 	if (retifp != NULL) {
 		*retifp = ifp;
 
 		/*
 		 * Adjust the "outgoing" interface.  If we're going to loop 
 		 * the packet back to ourselves, the ifp would be the loopback 
 		 * interface. However, we'd rather know the interface associated 
 		 * to the destination address (which should probably be one of 
 		 * our own addresses.)
 		 */
 		if (rt) {
 			if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
 			    (rt->rt_gateway->sa_family == AF_LINK))
 				*retifp = 
 					ifnet_byindex(((struct sockaddr_dl *)
 						       rt->rt_gateway)->sdl_index);
 		}
 	}
 
 	if (retrt != NULL)
 		*retrt = rt;	/* rt may be NULL */
 
 	return (error);
 }
 
 static int
 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
     struct ip6_moptions *mopts, struct ifnet **retifp,
     struct ifnet *oifp, u_int fibnum)
 {
 	int error;
 	struct route_in6 sro;
 	struct rtentry *rt = NULL;
 	int rt_flags;
 
 	KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__));
 
 	bzero(&sro, sizeof(sro));
 	rt_flags = 0;
 
 	error = selectroute(dstsock, opts, mopts, &sro, retifp, &rt, 1, fibnum);
 
 	if (rt)
 		rt_flags = rt->rt_flags;
 	if (rt && rt == sro.ro_rt)
 		RTFREE(rt);
 
 	if (error != 0) {
 		/* Help ND. See oifp comment in in6_selectsrc(). */
 		if (oifp != NULL && fibnum == RT_DEFAULT_FIB) {
 			*retifp = oifp;
 			error = 0;
 		}
 		return (error);
 	}
 
 	/*
 	 * do not use a rejected or black hole route.
 	 * XXX: this check should be done in the L2 output routine.
 	 * However, if we skipped this check here, we'd see the following
 	 * scenario:
 	 * - install a rejected route for a scoped address prefix
 	 *   (like fe80::/10)
 	 * - send a packet to a destination that matches the scoped prefix,
 	 *   with ambiguity about the scope zone.
 	 * - pick the outgoing interface from the route, and disambiguate the
 	 *   scope zone with the interface.
 	 * - ip6_output() would try to get another route with the "new"
 	 *   destination, which may be valid.
 	 * - we'd see no error on output.
 	 * Although this may not be very harmful, it should still be confusing.
 	 * We thus reject the case here.
 	 */
 
 	if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) {
 		error = (rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
 		return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Public wrapper function to selectroute().
  *
  * XXX-BZ in6_selectroute() should and will grow the FIB argument. The
  * in6_selectroute_fib() function is only there for backward compat on stable.
  */
 int
 in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
     struct ip6_moptions *mopts, struct route_in6 *ro,
     struct ifnet **retifp, struct rtentry **retrt)
 {
 
 	return (selectroute(dstsock, opts, mopts, ro, retifp,
 	    retrt, 0, RT_DEFAULT_FIB));
 }
 
 #ifndef BURN_BRIDGES
 int
 in6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
     struct ip6_moptions *mopts, struct route_in6 *ro,
     struct ifnet **retifp, struct rtentry **retrt, u_int fibnum)
 {
 
 	return (selectroute(dstsock, opts, mopts, ro, retifp,
 	    retrt, 0, fibnum));
 }
 #endif
 
 /*
  * Default hop limit selection. The precedence is as follows:
  * 1. Hoplimit value specified via ioctl.
  * 2. (If the outgoing interface is detected) the current
  *     hop limit of the interface specified by router advertisement.
  * 3. The system default hoplimit.
  */
 int
 in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp)
 {
 
 	if (in6p && in6p->in6p_hops >= 0)
 		return (in6p->in6p_hops);
 	else if (ifp)
 		return (ND_IFINFO(ifp)->chlim);
 	else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) {
 		struct nhop6_basic nh6;
 		struct in6_addr dst;
 		uint32_t fibnum, scopeid;
 		int hlim;
 
 		fibnum = in6p->inp_inc.inc_fibnum;
 		in6_splitscope(&in6p->in6p_faddr, &dst, &scopeid);
 		if (fib6_lookup_nh_basic(fibnum, &dst, scopeid, 0, 0, &nh6)==0){
 			hlim = ND_IFINFO(nh6.nh_ifp)->chlim;
 			return (hlim);
 		}
 	}
 	return (V_ip6_defhlim);
 }
 
 /*
  * XXX: this is borrowed from in6_pcbbind(). If possible, we should
  * share this function by all *bsd*...
  */
 int
 in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred)
 {
 	struct socket *so = inp->inp_socket;
 	u_int16_t lport = 0;
 	int error, lookupflags = 0;
 #ifdef INVARIANTS
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 #endif
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(pcbinfo);
 
 	error = prison_local_ip6(cred, laddr,
 	    ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0));
 	if (error)
 		return(error);
 
 	/* XXX: this is redundant when called from in6_pcbbind */
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		lookupflags = INPLOOKUP_WILDCARD;
 
 	inp->inp_flags |= INP_ANONPORT;
 
 	error = in_pcb_lport(inp, NULL, &lport, cred, lookupflags);
 	if (error != 0)
 		return (error);
 
 	inp->inp_lport = lport;
 	if (in_pcbinshash(inp) != 0) {
 		inp->in6p_laddr = in6addr_any;
 		inp->inp_lport = 0;
 		return (EAGAIN);
 	}
 
 	return (0);
 }
 
 void
 addrsel_policy_init(void)
 {
 
 	init_policy_queue();
 
 	/* initialize the "last resort" policy */
 	bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy));
 	V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP;
 
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	ADDRSEL_LOCK_INIT();
 	ADDRSEL_SXLOCK_INIT();
 }
 
 static struct in6_addrpolicy *
 lookup_addrsel_policy(struct sockaddr_in6 *key)
 {
 	struct in6_addrpolicy *match = NULL;
 
 	ADDRSEL_LOCK();
 	match = match_addrsel_policy(key);
 
 	if (match == NULL)
 		match = &V_defaultaddrpolicy;
 	else
 		match->use++;
 	ADDRSEL_UNLOCK();
 
 	return (match);
 }
 
 /*
  * Subroutines to manage the address selection policy table via sysctl.
  */
 struct walkarg {
 	struct sysctl_req *w_req;
 };
 
 static int in6_src_sysctl(SYSCTL_HANDLER_ARGS);
 SYSCTL_DECL(_net_inet6_ip6);
 static SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy,
 	CTLFLAG_RD, in6_src_sysctl, "");
 
 static int
 in6_src_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct walkarg w;
 
 	if (req->newptr)
 		return EPERM;
 
 	bzero(&w, sizeof(w));
 	w.w_req = req;
 
 	return (walk_addrsel_policy(dump_addrsel_policyent, &w));
 }
 
 int
 in6_src_ioctl(u_long cmd, caddr_t data)
 {
 	struct in6_addrpolicy ent0;
 
 	if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY)
 		return (EOPNOTSUPP); /* check for safety */
 
 	ent0 = *(struct in6_addrpolicy *)data;
 
 	if (ent0.label == ADDR_LABEL_NOTAPP)
 		return (EINVAL);
 	/* check if the prefix mask is consecutive. */
 	if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0)
 		return (EINVAL);
 	/* clear trailing garbages (if any) of the prefix address. */
 	IN6_MASK_ADDR(&ent0.addr.sin6_addr, &ent0.addrmask.sin6_addr);
 	ent0.use = 0;
 
 	switch (cmd) {
 	case SIOCAADDRCTL_POLICY:
 		return (add_addrsel_policyent(&ent0));
 	case SIOCDADDRCTL_POLICY:
 		return (delete_addrsel_policyent(&ent0));
 	}
 
 	return (0);		/* XXX: compromise compilers */
 }
 
 /*
  * The followings are implementation of the policy table using a
  * simple tail queue.
  * XXX such details should be hidden.
  * XXX implementation using binary tree should be more efficient.
  */
 struct addrsel_policyent {
 	TAILQ_ENTRY(addrsel_policyent) ape_entry;
 	struct in6_addrpolicy ape_policy;
 };
 
 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent);
 
 static VNET_DEFINE(struct addrsel_policyhead, addrsel_policytab);
 #define	V_addrsel_policytab		VNET(addrsel_policytab)
 
 static void
 init_policy_queue(void)
 {
 
 	TAILQ_INIT(&V_addrsel_policytab);
 }
 
 static int
 add_addrsel_policyent(struct in6_addrpolicy *newpolicy)
 {
 	struct addrsel_policyent *new, *pol;
 
 	new = malloc(sizeof(*new), M_IFADDR,
 	       M_WAITOK);
 	ADDRSEL_XLOCK();
 	ADDRSEL_LOCK();
 
 	/* duplication check */
 	TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
 		if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr,
 				       &pol->ape_policy.addr.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr,
 				       &pol->ape_policy.addrmask.sin6_addr)) {
 			ADDRSEL_UNLOCK();
 			ADDRSEL_XUNLOCK();
 			free(new, M_IFADDR);
 			return (EEXIST);	/* or override it? */
 		}
 	}
 
 	bzero(new, sizeof(*new));
 
 	/* XXX: should validate entry */
 	new->ape_policy = *newpolicy;
 
 	TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry);
 	ADDRSEL_UNLOCK();
 	ADDRSEL_XUNLOCK();
 
 	return (0);
 }
 
 static int
 delete_addrsel_policyent(struct in6_addrpolicy *key)
 {
 	struct addrsel_policyent *pol;
 
 	ADDRSEL_XLOCK();
 	ADDRSEL_LOCK();
 
 	/* search for the entry in the table */
 	TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
 		if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr,
 		    &pol->ape_policy.addr.sin6_addr) &&
 		    IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr,
 		    &pol->ape_policy.addrmask.sin6_addr)) {
 			break;
 		}
 	}
 	if (pol == NULL) {
 		ADDRSEL_UNLOCK();
 		ADDRSEL_XUNLOCK();
 		return (ESRCH);
 	}
 
 	TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry);
 	ADDRSEL_UNLOCK();
 	ADDRSEL_XUNLOCK();
 	free(pol, M_IFADDR);
 
 	return (0);
 }
 
 static int
 walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w)
 {
 	struct addrsel_policyent *pol;
 	int error = 0;
 
 	ADDRSEL_SLOCK();
 	TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) {
 		if ((error = (*callback)(&pol->ape_policy, w)) != 0) {
 			ADDRSEL_SUNLOCK();
 			return (error);
 		}
 	}
 	ADDRSEL_SUNLOCK();
 	return (error);
 }
 
 static int
 dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg)
 {
 	int error = 0;
 	struct walkarg *w = arg;
 
 	error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol));
 
 	return (error);
 }
 
 static struct in6_addrpolicy *
 match_addrsel_policy(struct sockaddr_in6 *key)
 {
 	struct addrsel_policyent *pent;
 	struct in6_addrpolicy *bestpol = NULL, *pol;
 	int matchlen, bestmatchlen = -1;
 	u_char *mp, *ep, *k, *p, m;
 
 	TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) {
 		matchlen = 0;
 
 		pol = &pent->ape_policy;
 		mp = (u_char *)&pol->addrmask.sin6_addr;
 		ep = mp + 16;	/* XXX: scope field? */
 		k = (u_char *)&key->sin6_addr;
 		p = (u_char *)&pol->addr.sin6_addr;
 		for (; mp < ep && *mp; mp++, k++, p++) {
 			m = *mp;
 			if ((*k & m) != *p)
 				goto next; /* not match */
 			if (m == 0xff) /* short cut for a typical case */
 				matchlen += 8;
 			else {
 				while (m >= 0x80) {
 					matchlen++;
 					m <<= 1;
 				}
 			}
 		}
 
 		/* matched.  check if this is better than the current best. */
 		if (bestpol == NULL ||
 		    matchlen > bestmatchlen) {
 			bestpol = pol;
 			bestmatchlen = matchlen;
 		}
 
 	  next:
 		continue;
 	}
 
 	return (bestpol);
 }
Index: projects/clang380-import/sys/netinet6/ip6_var.h
===================================================================
--- projects/clang380-import/sys/netinet6/ip6_var.h	(revision 293686)
+++ projects/clang380-import/sys/netinet6/ip6_var.h	(revision 293687)
@@ -1,435 +1,436 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_var.h,v 1.62 2001/05/03 14:51:48 itojun Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_var.h	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 #ifndef _NETINET6_IP6_VAR_H_
 #define _NETINET6_IP6_VAR_H_
 
 /*
  * IP6 reassembly queue structure.  Each fragment
  * being reassembled is attached to one of these structures.
  */
 struct	ip6q {
 	struct ip6asfrag *ip6q_down;
 	struct ip6asfrag *ip6q_up;
 	u_int32_t	ip6q_ident;
 	u_int8_t	ip6q_nxt;
 	u_int8_t	ip6q_ecn;
 	u_int8_t	ip6q_ttl;
 	struct in6_addr ip6q_src, ip6q_dst;
 	struct ip6q	*ip6q_next;
 	struct ip6q	*ip6q_prev;
 	int		ip6q_unfrglen;	/* len of unfragmentable part */
 #ifdef notyet
 	u_char		*ip6q_nxtp;
 #endif
 	int		ip6q_nfrag;	/* # of fragments */
 	struct label	*ip6q_label;
 };
 
 struct	ip6asfrag {
 	struct ip6asfrag *ip6af_down;
 	struct ip6asfrag *ip6af_up;
 	struct mbuf	*ip6af_m;
 	int		ip6af_offset;	/* offset in ip6af_m to next header */
 	int		ip6af_frglen;	/* fragmentable part length */
 	int		ip6af_off;	/* fragment offset */
 	u_int16_t	ip6af_mff;	/* more fragment bit in frag off */
 };
 
 #define IP6_REASS_MBUF(ip6af) (*(struct mbuf **)&((ip6af)->ip6af_m))
 
 /*
  * IP6 reinjecting structure.
  */
 struct ip6_direct_ctx {
 	uint32_t	ip6dc_nxt;	/* next header to process */
 	uint32_t	ip6dc_off;	/* offset to next header */
 };
 
 /*
  * Structure attached to inpcb.in6p_moptions and
  * passed to ip6_output when IPv6 multicast options are in use.
  * This structure is lazy-allocated.
  */
 struct ip6_moptions {
 	struct	ifnet *im6o_multicast_ifp; /* ifp for outgoing multicasts */
 	u_char	im6o_multicast_hlim;	/* hoplimit for outgoing multicasts */
 	u_char	im6o_multicast_loop;	/* 1 >= hear sends if a member */
 	u_short	im6o_num_memberships;	/* no. memberships this socket */
 	u_short	im6o_max_memberships;	/* max memberships this socket */
 	struct	in6_multi **im6o_membership;	/* group memberships */
 	struct	in6_mfilter *im6o_mfilters;	/* source filters */
 };
 
 /*
  * Control options for outgoing packets
  */
 
 /* Routing header related info */
 struct	ip6po_rhinfo {
 	struct	ip6_rthdr *ip6po_rhi_rthdr; /* Routing header */
 	struct	route_in6 ip6po_rhi_route; /* Route to the 1st hop */
 };
 #define ip6po_rthdr	ip6po_rhinfo.ip6po_rhi_rthdr
 #define ip6po_route	ip6po_rhinfo.ip6po_rhi_route
 
 /* Nexthop related info */
 struct	ip6po_nhinfo {
 	struct	sockaddr *ip6po_nhi_nexthop;
 	struct	route_in6 ip6po_nhi_route; /* Route to the nexthop */
 };
 #define ip6po_nexthop	ip6po_nhinfo.ip6po_nhi_nexthop
 #define ip6po_nextroute	ip6po_nhinfo.ip6po_nhi_route
 
 struct	ip6_pktopts {
 	struct	mbuf *ip6po_m;	/* Pointer to mbuf storing the data */
 	int	ip6po_hlim;	/* Hoplimit for outgoing packets */
 
 	/* Outgoing IF/address information */
 	struct	in6_pktinfo *ip6po_pktinfo;
 
 	/* Next-hop address information */
 	struct	ip6po_nhinfo ip6po_nhinfo;
 
 	struct	ip6_hbh *ip6po_hbh; /* Hop-by-Hop options header */
 
 	/* Destination options header (before a routing header) */
 	struct	ip6_dest *ip6po_dest1;
 
 	/* Routing header related info. */
 	struct	ip6po_rhinfo ip6po_rhinfo;
 
 	/* Destination options header (after a routing header) */
 	struct	ip6_dest *ip6po_dest2;
 
 	int	ip6po_tclass;	/* traffic class */
 
 	int	ip6po_minmtu;  /* fragment vs PMTU discovery policy */
 #define IP6PO_MINMTU_MCASTONLY	-1 /* default; send at min MTU for multicast*/
 #define IP6PO_MINMTU_DISABLE	 0 /* always perform pmtu disc */
 #define IP6PO_MINMTU_ALL	 1 /* always send at min MTU */
 
 	int	ip6po_prefer_tempaddr;  /* whether temporary addresses are
 					   preferred as source address */
 #define IP6PO_TEMPADDR_SYSTEM	-1 /* follow the system default */
 #define IP6PO_TEMPADDR_NOTPREFER 0 /* not prefer temporary address */
 #define IP6PO_TEMPADDR_PREFER	 1 /* prefer temporary address */
 
 	int ip6po_flags;
 #if 0	/* parameters in this block is obsolete. do not reuse the values. */
 #define IP6PO_REACHCONF	0x01	/* upper-layer reachability confirmation. */
 #define IP6PO_MINMTU	0x02	/* use minimum MTU (IPV6_USE_MIN_MTU) */
 #endif
 #define IP6PO_DONTFRAG	0x04	/* disable fragmentation (IPV6_DONTFRAG) */
 #define IP6PO_USECOA	0x08	/* use care of address */
 };
 
 /*
  * Control options for incoming packets
  */
 
 struct	ip6stat {
 	uint64_t ip6s_total;		/* total packets received */
 	uint64_t ip6s_tooshort;		/* packet too short */
 	uint64_t ip6s_toosmall;		/* not enough data */
 	uint64_t ip6s_fragments;	/* fragments received */
 	uint64_t ip6s_fragdropped;	/* frags dropped(dups, out of space) */
 	uint64_t ip6s_fragtimeout;	/* fragments timed out */
 	uint64_t ip6s_fragoverflow;	/* fragments that exceeded limit */
 	uint64_t ip6s_forward;		/* packets forwarded */
 	uint64_t ip6s_cantforward;	/* packets rcvd for unreachable dest */
 	uint64_t ip6s_redirectsent;	/* packets forwarded on same net */
 	uint64_t ip6s_delivered;	/* datagrams delivered to upper level*/
 	uint64_t ip6s_localout;		/* total ip packets generated here */
 	uint64_t ip6s_odropped;		/* lost packets due to nobufs, etc. */
 	uint64_t ip6s_reassembled;	/* total packets reassembled ok */
 	uint64_t ip6s_fragmented;	/* datagrams successfully fragmented */
 	uint64_t ip6s_ofragments;	/* output fragments created */
 	uint64_t ip6s_cantfrag;		/* don't fragment flag was set, etc. */
 	uint64_t ip6s_badoptions;	/* error in option processing */
 	uint64_t ip6s_noroute;		/* packets discarded due to no route */
 	uint64_t ip6s_badvers;		/* ip6 version != 6 */
 	uint64_t ip6s_rawout;		/* total raw ip packets generated */
 	uint64_t ip6s_badscope;		/* scope error */
 	uint64_t ip6s_notmember;	/* don't join this multicast group */
 #define	IP6S_HDRCNT		256	/* headers count */
 	uint64_t ip6s_nxthist[IP6S_HDRCNT]; /* next header history */
 	uint64_t ip6s_m1;		/* one mbuf */
 #define	IP6S_M2MMAX		32
 	uint64_t ip6s_m2m[IP6S_M2MMAX];	/* two or more mbuf */
 	uint64_t ip6s_mext1;		/* one ext mbuf */
 	uint64_t ip6s_mext2m;		/* two or more ext mbuf */
 	uint64_t ip6s_exthdrtoolong;	/* ext hdr are not contiguous */
 	uint64_t ip6s_nogif;		/* no match gif found */
 	uint64_t ip6s_toomanyhdr;	/* discarded due to too many headers */
 
 	/*
 	 * statistics for improvement of the source address selection
 	 * algorithm:
 	 * XXX: hardcoded 16 = # of ip6 multicast scope types + 1
 	 */
 #define	IP6S_RULESMAX		16
 #define	IP6S_SCOPECNT		16
 	/* number of times that address selection fails */
 	uint64_t ip6s_sources_none;
 	/* number of times that an address on the outgoing I/F is chosen */
 	uint64_t ip6s_sources_sameif[IP6S_SCOPECNT];
 	/* number of times that an address on a non-outgoing I/F is chosen */
 	uint64_t ip6s_sources_otherif[IP6S_SCOPECNT];
 	/*
 	 * number of times that an address that has the same scope
 	 * from the destination is chosen.
 	 */
 	uint64_t ip6s_sources_samescope[IP6S_SCOPECNT];
 	/*
 	 * number of times that an address that has a different scope
 	 * from the destination is chosen.
 	 */
 	uint64_t ip6s_sources_otherscope[IP6S_SCOPECNT];
 	/* number of times that a deprecated address is chosen */
 	uint64_t ip6s_sources_deprecated[IP6S_SCOPECNT];
 
 	/* number of times that each rule of source selection is applied. */
 	uint64_t ip6s_sources_rule[IP6S_RULESMAX];
 };
 
 #ifdef _KERNEL
 #include <sys/counter.h>
 
 VNET_PCPUSTAT_DECLARE(struct ip6stat, ip6stat);
 #define	IP6STAT_ADD(name, val)	\
     VNET_PCPUSTAT_ADD(struct ip6stat, ip6stat, name, (val))
 #define	IP6STAT_SUB(name, val)	IP6STAT_ADD(name, -(val))
 #define	IP6STAT_INC(name)	IP6STAT_ADD(name, 1)
 #define	IP6STAT_DEC(name)	IP6STAT_SUB(name, 1)
 #endif
 
 #ifdef _KERNEL
 /* flags passed to ip6_output as last parameter */
 #define	IPV6_UNSPECSRC		0x01	/* allow :: as the source address */
 #define	IPV6_FORWARDING		0x02	/* most of IPv6 header exists */
 #define	IPV6_MINMTU		0x04	/* use minimum MTU (IPV6_USE_MIN_MTU) */
 
 #ifdef __NO_STRICT_ALIGNMENT
 #define IP6_HDR_ALIGNED_P(ip)	1
 #else
 #define IP6_HDR_ALIGNED_P(ip)	((((intptr_t) (ip)) & 3) == 0)
 #endif
 
 VNET_DECLARE(int, ip6_defhlim);		/* default hop limit */
 VNET_DECLARE(int, ip6_defmcasthlim);	/* default multicast hop limit */
 VNET_DECLARE(int, ip6_forwarding);	/* act as router? */
 VNET_DECLARE(int, ip6_use_deprecated);	/* allow deprecated addr as source */
 VNET_DECLARE(int, ip6_rr_prune);	/* router renumbering prefix
 					 * walk list every 5 sec.    */
 VNET_DECLARE(int, ip6_mcast_pmtu);	/* enable pMTU discovery for multicast? */
 VNET_DECLARE(int, ip6_v6only);
 #define	V_ip6_defhlim			VNET(ip6_defhlim)
 #define	V_ip6_defmcasthlim		VNET(ip6_defmcasthlim)
 #define	V_ip6_forwarding		VNET(ip6_forwarding)
 #define	V_ip6_use_deprecated		VNET(ip6_use_deprecated)
 #define	V_ip6_rr_prune			VNET(ip6_rr_prune)
 #define	V_ip6_mcast_pmtu		VNET(ip6_mcast_pmtu)
 #define	V_ip6_v6only			VNET(ip6_v6only)
 
 VNET_DECLARE(struct socket *, ip6_mrouter);	/* multicast routing daemon */
 VNET_DECLARE(int, ip6_sendredirects);	/* send IP redirects when forwarding? */
 VNET_DECLARE(int, ip6_maxfragpackets);	/* Maximum packets in reassembly
 					 * queue */
 VNET_DECLARE(int, ip6_maxfrags);	/* Maximum fragments in reassembly
 					 * queue */
 VNET_DECLARE(int, ip6_accept_rtadv);	/* Acts as a host not a router */
 VNET_DECLARE(int, ip6_no_radr);		/* No defroute from RA */
 VNET_DECLARE(int, ip6_norbit_raif);	/* Disable R-bit in NA on RA
 					 * receiving IF. */
 VNET_DECLARE(int, ip6_rfc6204w3);	/* Accept defroute from RA even when
 					   forwarding enabled */
 VNET_DECLARE(int, ip6_log_interval);
 VNET_DECLARE(time_t, ip6_log_time);
 VNET_DECLARE(int, ip6_hdrnestlimit);	/* upper limit of # of extension
 					 * headers */
 VNET_DECLARE(int, ip6_dad_count);	/* DupAddrDetectionTransmits */
 #define	V_ip6_mrouter			VNET(ip6_mrouter)
 #define	V_ip6_sendredirects		VNET(ip6_sendredirects)
 #define	V_ip6_maxfragpackets		VNET(ip6_maxfragpackets)
 #define	V_ip6_maxfrags			VNET(ip6_maxfrags)
 #define	V_ip6_accept_rtadv		VNET(ip6_accept_rtadv)
 #define	V_ip6_no_radr			VNET(ip6_no_radr)
 #define	V_ip6_norbit_raif		VNET(ip6_norbit_raif)
 #define	V_ip6_rfc6204w3			VNET(ip6_rfc6204w3)
 #define	V_ip6_log_interval		VNET(ip6_log_interval)
 #define	V_ip6_log_time			VNET(ip6_log_time)
 #define	V_ip6_hdrnestlimit		VNET(ip6_hdrnestlimit)
 #define	V_ip6_dad_count			VNET(ip6_dad_count)
 
 VNET_DECLARE(int, ip6_auto_flowlabel);
 VNET_DECLARE(int, ip6_auto_linklocal);
 #define	V_ip6_auto_flowlabel		VNET(ip6_auto_flowlabel)
 #define	V_ip6_auto_linklocal		VNET(ip6_auto_linklocal)
 
 VNET_DECLARE(int, ip6_use_tempaddr);	/* Whether to use temporary addresses */
 VNET_DECLARE(int, ip6_prefer_tempaddr);	/* Whether to prefer temporary
 					 * addresses in the source address
 					 * selection */
 #define	V_ip6_use_tempaddr		VNET(ip6_use_tempaddr)
 #define	V_ip6_prefer_tempaddr		VNET(ip6_prefer_tempaddr)
 
 VNET_DECLARE(int, ip6_use_defzone);	/* Whether to use the default scope
 					 * zone when unspecified */
 #define	V_ip6_use_defzone		VNET(ip6_use_defzone)
 
 VNET_DECLARE (struct pfil_head, inet6_pfil_hook);	/* packet filter hooks */
 #define	V_inet6_pfil_hook	VNET(inet6_pfil_hook)
 #ifdef IPSTEALTH
 VNET_DECLARE(int, ip6stealth);
 #define	V_ip6stealth			VNET(ip6stealth)
 #endif
 
 extern struct	pr_usrreqs rip6_usrreqs;
 struct sockopt;
 
 struct inpcb;
 
 int	icmp6_ctloutput(struct socket *, struct sockopt *sopt);
 
 struct in6_ifaddr;
 void	ip6_init(void);
 #ifdef VIMAGE
 void	ip6_destroy(void);
 #endif
 int	ip6proto_register(short);
 int	ip6proto_unregister(short);
 
 void	ip6_input(struct mbuf *);
 void	ip6_direct_input(struct mbuf *);
 void	ip6_freepcbopts(struct ip6_pktopts *);
 
 int	ip6_unknown_opt(u_int8_t *, struct mbuf *, int);
 char *	ip6_get_prevhdr(const struct mbuf *, int);
 int	ip6_nexthdr(const struct mbuf *, int, int, int *);
 int	ip6_lasthdr(const struct mbuf *, int, int, int *);
 
 extern int	(*ip6_mforward)(struct ip6_hdr *, struct ifnet *,
     struct mbuf *);
 
 int	ip6_process_hopopts(struct mbuf *, u_int8_t *, int, u_int32_t *,
 				 u_int32_t *);
 struct mbuf	**ip6_savecontrol_v4(struct inpcb *, struct mbuf *,
 	    struct mbuf **, int *);
 void	ip6_savecontrol(struct inpcb *, struct mbuf *, struct mbuf **);
 void	ip6_notify_pmtu(struct inpcb *, struct sockaddr_in6 *, u_int32_t);
 int	ip6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 
 void	ip6_forward(struct mbuf *, int);
 
 void	ip6_mloopback(struct ifnet *, const struct mbuf *);
 int	ip6_output(struct mbuf *, struct ip6_pktopts *,
 			struct route_in6 *,
 			int,
 			struct ip6_moptions *, struct ifnet **,
 			struct inpcb *);
 int	ip6_ctloutput(struct socket *, struct sockopt *);
 int	ip6_raw_ctloutput(struct socket *, struct sockopt *);
 void	ip6_initpktopts(struct ip6_pktopts *);
 int	ip6_setpktopts(struct mbuf *, struct ip6_pktopts *,
 	struct ip6_pktopts *, struct ucred *, int);
 void	ip6_clearpktopts(struct ip6_pktopts *, int);
 struct ip6_pktopts *ip6_copypktopts(struct ip6_pktopts *, int);
 int	ip6_optlen(struct inpcb *);
 int	ip6_deletefraghdr(struct mbuf *, int, int);
 int	ip6_fragment(struct ifnet *, struct mbuf *, int, u_char, int,
 			uint32_t);
 
 int	route6_input(struct mbuf **, int *, int);
 
 void	frag6_init(void);
 int	frag6_input(struct mbuf **, int *, int);
 void	frag6_slowtimo(void);
 void	frag6_drain(void);
 
 void	rip6_init(void);
 int	rip6_input(struct mbuf **, int *, int);
 void	rip6_ctlinput(int, struct sockaddr *, void *);
 int	rip6_ctloutput(struct socket *, struct sockopt *);
 int	rip6_output(struct mbuf *, struct socket *, ...);
 int	rip6_usrreq(struct socket *,
 	    int, struct mbuf *, struct mbuf *, struct mbuf *, struct thread *);
 
 int	dest6_input(struct mbuf **, int *, int);
 int	none_input(struct mbuf **, int *, int);
 
-int	in6_selectsrc(struct sockaddr_in6 *, struct ip6_pktopts *,
-	struct inpcb *inp, struct ucred *cred,
-	struct ifnet **, struct in6_addr *);
+int	in6_selectsrc_socket(struct sockaddr_in6 *, struct ip6_pktopts *,
+    struct inpcb *, struct ucred *, int, struct in6_addr *, int *);
+int	in6_selectsrc_addr(uint32_t, const struct in6_addr *,
+    uint32_t, struct ifnet *, struct in6_addr *, int *);
 int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
 	struct ip6_moptions *, struct route_in6 *, struct ifnet **,
 	struct rtentry **);
 int	in6_selectroute_fib(struct sockaddr_in6 *, struct ip6_pktopts *,
 	    struct ip6_moptions *, struct route_in6 *, struct ifnet **,
 	    struct rtentry **, u_int);
 u_int32_t ip6_randomid(void);
 u_int32_t ip6_randomflowlabel(void);
 void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset);
 #endif /* _KERNEL */
 
 #endif /* !_NETINET6_IP6_VAR_H_ */
Index: projects/clang380-import/sys/netinet6/nd6_nbr.c
===================================================================
--- projects/clang380-import/sys/netinet6/nd6_nbr.c	(revision 293686)
+++ projects/clang380-import/sys/netinet6/nd6_nbr.c	(revision 293687)
@@ -1,1592 +1,1583 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/libkern.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/errno.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
 #include <sys/callout.h>
 #include <sys/refcount.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #ifdef RADIX_MPATH
 #include <net/radix_mpath.h>
 #endif
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <net/if_llatbl.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_carp.h>
 #include <netinet6/send.h>
 
 #define SDL(s) ((struct sockaddr_dl *)s)
 
 struct dadq;
 static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *);
 static void nd6_dad_add(struct dadq *dp);
 static void nd6_dad_del(struct dadq *dp);
 static void nd6_dad_rele(struct dadq *);
 static void nd6_dad_starttimer(struct dadq *, int, int);
 static void nd6_dad_stoptimer(struct dadq *);
 static void nd6_dad_timer(struct dadq *);
 static void nd6_dad_duplicated(struct ifaddr *, struct dadq *);
 static void nd6_dad_ns_output(struct dadq *);
 static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *);
 static void nd6_dad_na_input(struct ifaddr *);
 static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *,
     const struct in6_addr *, u_long, int, struct sockaddr *, u_int);
 static void nd6_ns_output_fib(struct ifnet *, const struct in6_addr *,
     const struct in6_addr *, const struct in6_addr *, uint8_t *, u_int);
 
 static VNET_DEFINE(int, dad_enhanced) = 1;
 #define	V_dad_enhanced			VNET(dad_enhanced)
 
 SYSCTL_DECL(_net_inet6_ip6);
 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, dad_enhanced, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(dad_enhanced), 0,
     "Enable Enhanced DAD, which adds a random nonce to NS messages for DAD.");
 
 static VNET_DEFINE(int, dad_maxtry) = 15;	/* max # of *tries* to
 						   transmit DAD packet */
 #define	V_dad_maxtry			VNET(dad_maxtry)
 
 /*
  * Input a Neighbor Solicitation Message.
  *
  * Based on RFC 2461
  * Based on RFC 2462 (duplicate address detection)
  */
 void
 nd6_ns_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_neighbor_solicit *nd_ns;
 	struct in6_addr saddr6 = ip6->ip6_src;
 	struct in6_addr daddr6 = ip6->ip6_dst;
 	struct in6_addr taddr6;
 	struct in6_addr myaddr6;
 	char *lladdr = NULL;
 	struct ifaddr *ifa = NULL;
 	int lladdrlen = 0;
 	int anycast = 0, proxy = 0, tentative = 0;
 	int tlladdr;
 	int rflag;
 	union nd_opts ndopts;
 	struct sockaddr_dl proxydl;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0;
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif)
 		rflag = 0;
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len);
 	if (nd_ns == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return;
 	}
 #endif
 	ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */
 	taddr6 = nd_ns->nd_ns_target;
 	if (in6_setscope(&taddr6, ifp, NULL) != 0)
 		goto bad;
 
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n",
 		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
 		/* dst has to be a solicited node multicast address. */
 		if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL &&
 		    /* don't check ifindex portion */
 		    daddr6.s6_addr32[1] == 0 &&
 		    daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE &&
 		    daddr6.s6_addr8[12] == 0xff) {
 			; /* good */
 		} else {
 			nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
 			    "(wrong ip6 dst)\n"));
 			goto bad;
 		}
 	} else if (!V_nd6_onlink_ns_rfc4861) {
 		struct sockaddr_in6 src_sa6;
 
 		/*
 		 * According to recent IETF discussions, it is not a good idea
 		 * to accept a NS from an address which would not be deemed
 		 * to be a neighbor otherwise.  This point is expected to be
 		 * clarified in future revisions of the specification.
 		 */
 		bzero(&src_sa6, sizeof(src_sa6));
 		src_sa6.sin6_family = AF_INET6;
 		src_sa6.sin6_len = sizeof(src_sa6);
 		src_sa6.sin6_addr = saddr6;
 		if (nd6_is_addr_neighbor(&src_sa6, ifp) == 0) {
 			nd6log((LOG_INFO, "nd6_ns_input: "
 				"NS packet from non-neighbor\n"));
 			goto bad;
 		}
 	}
 
 	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
 		nd6log((LOG_INFO, "nd6_ns_input: bad NS target (multicast)\n"));
 		goto bad;
 	}
 
 	icmp6len -= sizeof(*nd_ns);
 	nd6_option_init(nd_ns + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO,
 		    "nd6_ns_input: invalid ND option, ignored\n"));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_src_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
 	}
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) {
 		nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
 		    "(link-layer address option)\n"));
 		goto bad;
 	}
 
 	/*
 	 * Attaching target link-layer address to the NA?
 	 * (RFC 2461 7.2.4)
 	 *
 	 * NS IP dst is unicast/anycast			MUST NOT add
 	 * NS IP dst is solicited-node multicast	MUST add
 	 *
 	 * In implementation, we add target link-layer address by default.
 	 * We do not add one in MUST NOT cases.
 	 */
 	if (!IN6_IS_ADDR_MULTICAST(&daddr6))
 		tlladdr = 0;
 	else
 		tlladdr = 1;
 
 	/*
 	 * Target address (taddr6) must be either:
 	 * (1) Valid unicast/anycast address for my receiving interface,
 	 * (2) Unicast address for which I'm offering proxy service, or
 	 * (3) "tentative" address on which DAD is being performed.
 	 */
 	/* (1) and (3) check. */
 	if (ifp->if_carp)
 		ifa = (*carp_iamatch6_p)(ifp, &taddr6);
 	else
 		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
 
 	/* (2) check. */
 	if (ifa == NULL) {
 		struct sockaddr_dl rt_gateway;
 		struct rt_addrinfo info;
 		struct sockaddr_in6 dst6;
 
 		bzero(&dst6, sizeof(dst6));
 		dst6.sin6_len = sizeof(struct sockaddr_in6);
 		dst6.sin6_family = AF_INET6;
 		dst6.sin6_addr = taddr6;
 
 		bzero(&rt_gateway, sizeof(rt_gateway));
 		rt_gateway.sdl_len = sizeof(rt_gateway);
 		bzero(&info, sizeof(info));
 		info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&rt_gateway;
 
 		/* Always use the default FIB. */
 		if (rib_lookup_info(RT_DEFAULT_FIB, (struct sockaddr *)&dst6,
 		    0, 0, &info) == 0) {
 			if ((info.rti_flags & RTF_ANNOUNCE) != 0 &&
 			    rt_gateway.sdl_family == AF_LINK) {
 
 				/*
 				 * proxy NDP for single entry
 				 */
 				proxydl = *SDL(&rt_gateway);
 				ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(
 				    ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
 				if (ifa)
 					proxy = 1;
 			}
 		}
 	}
 	if (ifa == NULL) {
 		/*
 		 * We've got an NS packet, and we don't have that adddress
 		 * assigned for us.  We MUST silently ignore it.
 		 * See RFC2461 7.2.3.
 		 */
 		goto freeit;
 	}
 	myaddr6 = *IFA_IN6(ifa);
 	anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST;
 	tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE;
 	if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED)
 		goto freeit;
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s "
 		    "(if %d, NS packet %d)\n",
 		    ip6_sprintf(ip6bufs, &taddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) {
 		nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n",
 		    ip6_sprintf(ip6bufs, &saddr6)));
 		goto freeit;
 	}
 
 	/*
 	 * We have neighbor solicitation packet, with target address equals to
 	 * one of my tentative address.
 	 *
 	 * src addr	how to process?
 	 * ---		---
 	 * multicast	of course, invalid (rejected in ip6_input)
 	 * unicast	somebody is doing address resolution -> ignore
 	 * unspec	dup address detection
 	 *
 	 * The processing is defined in RFC 2462.
 	 */
 	if (tentative) {
 		/*
 		 * If source address is unspecified address, it is for
 		 * duplicate address detection.
 		 *
 		 * If not, the packet is for addess resolution;
 		 * silently ignore it.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
 			nd6_dad_ns_input(ifa, ndopts.nd_opts_nonce);
 
 		goto freeit;
 	}
 
 	/*
 	 * If the source address is unspecified address, entries must not
 	 * be created or updated.
 	 * It looks that sender is performing DAD.  Output NA toward
 	 * all-node multicast address, to tell the sender that I'm using
 	 * the address.
 	 * S bit ("solicited") must be zero.
 	 */
 	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
 		struct in6_addr in6_all;
 
 		in6_all = in6addr_linklocal_allnodes;
 		if (in6_setscope(&in6_all, ifp, NULL) != 0)
 			goto bad;
 		nd6_na_output_fib(ifp, &in6_all, &taddr6,
 		    ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
 		    rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL,
 		    M_GETFIB(m));
 		goto freeit;
 	}
 
 	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen,
 	    ND_NEIGHBOR_SOLICIT, 0);
 
 	nd6_na_output_fib(ifp, &saddr6, &taddr6,
 	    ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
 	    rflag | ND_NA_FLAG_SOLICITED, tlladdr,
 	    proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m));
  freeit:
 	if (ifa != NULL)
 		ifa_free(ifa);
 	m_freem(m);
 	return;
 
  bad:
 	nd6log((LOG_ERR, "nd6_ns_input: src=%s\n",
 		ip6_sprintf(ip6bufs, &saddr6)));
 	nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n",
 		ip6_sprintf(ip6bufs, &daddr6)));
 	nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n",
 		ip6_sprintf(ip6bufs, &taddr6)));
 	ICMP6STAT_INC(icp6s_badns);
 	if (ifa != NULL)
 		ifa_free(ifa);
 	m_freem(m);
 }
 
 /*
  * Output a Neighbor Solicitation Message. Caller specifies:
  *	- ICMP6 header source IP6 address
  *	- ND6 header target IP6 address
  *	- ND6 header source datalink address
  *
  * Based on RFC 2461
  * Based on RFC 2462 (duplicate address detection)
  *
  *    ln - for source address determination
  * nonce - If non-NULL, NS is used for duplicate address detection and
  *         the value (length is ND_OPT_NONCE_LEN) is used as a random nonce.
  */
 static void
 nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6,
     const struct in6_addr *daddr6, const struct in6_addr *taddr6,
     uint8_t *nonce, u_int fibnum)
 {
 	struct mbuf *m;
 	struct m_tag *mtag;
 	struct ip6_hdr *ip6;
 	struct nd_neighbor_solicit *nd_ns;
 	struct ip6_moptions im6o;
 	int icmp6len;
 	int maxlen;
 	caddr_t mac;
 
 	if (IN6_IS_ADDR_MULTICAST(taddr6))
 		return;
 
 	/* estimate the size of message */
 	maxlen = sizeof(*ip6) + sizeof(*nd_ns);
 	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
 	KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
 	    "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
 	    __func__, max_linkhdr, maxlen, MCLBYTES));
 
 	if (max_linkhdr + maxlen > MHLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	else
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return;
 	M_SETFIB(m, fibnum);
 
 	if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) {
 		m->m_flags |= M_MCAST;
 		im6o.im6o_multicast_ifp = ifp;
 		im6o.im6o_multicast_hlim = 255;
 		im6o.im6o_multicast_loop = 0;
 	}
 
 	icmp6len = sizeof(*nd_ns);
 	m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len;
 	m->m_data += max_linkhdr;	/* or M_ALIGN() equivalent? */
 
 	/* fill neighbor solicitation packet */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	/* ip6->ip6_plen will be set later */
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = 255;
 	if (daddr6)
 		ip6->ip6_dst = *daddr6;
 	else {
 		ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
 		ip6->ip6_dst.s6_addr16[1] = 0;
 		ip6->ip6_dst.s6_addr32[1] = 0;
 		ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE;
 		ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3];
 		ip6->ip6_dst.s6_addr8[12] = 0xff;
 		if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
 			goto bad;
 	}
 	if (nonce == NULL) {
 		struct ifaddr *ifa = NULL;
 
 		/*
 		 * RFC2461 7.2.2:
 		 * "If the source address of the packet prompting the
 		 * solicitation is the same as one of the addresses assigned
 		 * to the outgoing interface, that address SHOULD be placed
 		 * in the IP Source Address of the outgoing solicitation.
 		 * Otherwise, any one of the addresses assigned to the
 		 * interface should be used."
 		 *
 		 * We use the source address for the prompting packet
 		 * (saddr6), if saddr6 belongs to the outgoing interface.
 		 * Otherwise, we perform the source address selection as usual.
 		 */
 
 		if (saddr6 != NULL)
 			ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, saddr6);
 		if (ifa != NULL) {
 			/* ip6_src set already. */
 			ip6->ip6_src = *saddr6;
 			ifa_free(ifa);
 		} else {
 			int error;
-			struct sockaddr_in6 dst_sa;
-			struct in6_addr src_in;
-			struct ifnet *oifp;
+			struct in6_addr dst6, src6;
+			uint32_t scopeid;
 
-			bzero(&dst_sa, sizeof(dst_sa));
-			dst_sa.sin6_family = AF_INET6;
-			dst_sa.sin6_len = sizeof(dst_sa);
-			dst_sa.sin6_addr = ip6->ip6_dst;
-
-			oifp = ifp;
-			error = in6_selectsrc(&dst_sa, NULL,
-			    NULL, NULL, &oifp, &src_in);
+			in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid);
+			error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
+			    scopeid, ifp, &src6, NULL);
 			if (error) {
 				char ip6buf[INET6_ADDRSTRLEN];
 				nd6log((LOG_DEBUG, "%s: source can't be "
 				    "determined: dst=%s, error=%d\n", __func__,
-				    ip6_sprintf(ip6buf, &dst_sa.sin6_addr),
+				    ip6_sprintf(ip6buf, &dst6),
 				    error));
 				goto bad;
 			}
-			ip6->ip6_src = src_in;
+			ip6->ip6_src = src6;
 		}
 	} else {
 		/*
 		 * Source address for DAD packet must always be IPv6
 		 * unspecified address. (0::0)
 		 * We actually don't have to 0-clear the address (we did it
 		 * above), but we do so here explicitly to make the intention
 		 * clearer.
 		 */
 		bzero(&ip6->ip6_src, sizeof(ip6->ip6_src));
 	}
 	nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1);
 	nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
 	nd_ns->nd_ns_code = 0;
 	nd_ns->nd_ns_reserved = 0;
 	nd_ns->nd_ns_target = *taddr6;
 	in6_clearscope(&nd_ns->nd_ns_target); /* XXX */
 
 	/*
 	 * Add source link-layer address option.
 	 *
 	 *				spec		implementation
 	 *				---		---
 	 * DAD packet			MUST NOT	do not add the option
 	 * there's no link layer address:
 	 *				impossible	do not add the option
 	 * there's link layer address:
 	 *	Multicast NS		MUST add one	add the option
 	 *	Unicast NS		SHOULD add one	add the option
 	 */
 	if (nonce == NULL && (mac = nd6_ifptomac(ifp))) {
 		int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
 		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
 		/* 8 byte alignments... */
 		optlen = (optlen + 7) & ~7;
 
 		m->m_pkthdr.len += optlen;
 		m->m_len += optlen;
 		icmp6len += optlen;
 		bzero((caddr_t)nd_opt, optlen);
 		nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
 		nd_opt->nd_opt_len = optlen >> 3;
 		bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
 	}
 	/*
 	 * Add a Nonce option (RFC 3971) to detect looped back NS messages.
 	 * This behavior is documented as Enhanced Duplicate Address
 	 * Detection in RFC 7527.
 	 * net.inet6.ip6.dad_enhanced=0 disables this.
 	 */
 	if (V_dad_enhanced != 0 && nonce != NULL) {
 		int optlen = sizeof(struct nd_opt_hdr) + ND_OPT_NONCE_LEN;
 		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
 		/* 8-byte alignment is required. */
 		optlen = (optlen + 7) & ~7;
 
 		m->m_pkthdr.len += optlen;
 		m->m_len += optlen;
 		icmp6len += optlen;
 		bzero((caddr_t)nd_opt, optlen);
 		nd_opt->nd_opt_type = ND_OPT_NONCE;
 		nd_opt->nd_opt_len = optlen >> 3;
 		bcopy(nonce, (caddr_t)(nd_opt + 1), ND_OPT_NONCE_LEN);
 	}
 	ip6->ip6_plen = htons((u_short)icmp6len);
 	nd_ns->nd_ns_cksum = 0;
 	nd_ns->nd_ns_cksum =
 	    in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len);
 
 	if (send_sendso_input_hook != NULL) {
 		mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
 			sizeof(unsigned short), M_NOWAIT);
 		if (mtag == NULL)
 			goto bad;
 		*(unsigned short *)(mtag + 1) = nd_ns->nd_ns_type;
 		m_tag_prepend(m, mtag);
 	}
 
 	ip6_output(m, NULL, NULL, (nonce != NULL) ? IPV6_UNSPECSRC : 0,
 	    &im6o, NULL, NULL);
 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
 	icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
 	ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]);
 
 	return;
 
   bad:
 	m_freem(m);
 	return;
 }
 
 #ifndef BURN_BRIDGES
 void
 nd6_ns_output(struct ifnet *ifp, const struct in6_addr *saddr6,
     const struct in6_addr *daddr6, const struct in6_addr *taddr6,uint8_t *nonce)
 {
 
 	nd6_ns_output_fib(ifp, saddr6, daddr6, taddr6, nonce, RT_DEFAULT_FIB);
 }
 #endif
 /*
  * Neighbor advertisement input handling.
  *
  * Based on RFC 2461
  * Based on RFC 2462 (duplicate address detection)
  *
  * the following items are not implemented yet:
  * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
  * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
  */
 void
 nd6_na_input(struct mbuf *m, int off, int icmp6len)
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_neighbor_advert *nd_na;
 	struct in6_addr daddr6 = ip6->ip6_dst;
 	struct in6_addr taddr6;
 	int flags;
 	int is_router;
 	int is_solicited;
 	int is_override;
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	int checklink = 0;
 	struct ifaddr *ifa;
 	struct llentry *ln = NULL;
 	union nd_opts ndopts;
 	struct mbuf *chain = NULL;
 	struct sockaddr_in6 sin6;
 	u_char linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	int lladdr_off;
 	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
 
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 		    "nd6_na_input: invalid hlim (%d) from %s to %s on %s\n",
 		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
 		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
 		goto bad;
 	}
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len);
 	if (nd_na == NULL) {
 		ICMP6STAT_INC(icp6s_tooshort);
 		return;
 	}
 #endif
 
 	flags = nd_na->nd_na_flags_reserved;
 	is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
 	is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
 	is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);
 	memset(&sin6, 0, sizeof(sin6));
 
 	taddr6 = nd_na->nd_na_target;
 	if (in6_setscope(&taddr6, ifp, NULL))
 		goto bad;	/* XXX: impossible */
 
 	if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
 		nd6log((LOG_ERR,
 		    "nd6_na_input: invalid target address %s\n",
 		    ip6_sprintf(ip6bufs, &taddr6)));
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&daddr6))
 		if (is_solicited) {
 			nd6log((LOG_ERR,
 			    "nd6_na_input: a solicited adv is multicasted\n"));
 			goto bad;
 		}
 
 	icmp6len -= sizeof(*nd_na);
 	nd6_option_init(nd_na + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO,
 		    "nd6_na_input: invalid ND option, ignored\n"));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_tgt_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
 	}
 
 	/*
 	 * This effectively disables the DAD check on a non-master CARP
 	 * address.
 	 */
 	if (ifp->if_carp)
 		ifa = (*carp_iamatch6_p)(ifp, &taddr6);
 	else
 		ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
 
 	/*
 	 * Target address matches one of my interface address.
 	 *
 	 * If my address is tentative, this means that there's somebody
 	 * already using the same address as mine.  This indicates DAD failure.
 	 * This is defined in RFC 2462.
 	 *
 	 * Otherwise, process as defined in RFC 2461.
 	 */
 	if (ifa
 	 && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) {
 		nd6_dad_na_input(ifa);
 		ifa_free(ifa);
 		goto freeit;
 	}
 
 	/* Just for safety, maybe unnecessary. */
 	if (ifa) {
 		ifa_free(ifa);
 		log(LOG_ERR,
 		    "nd6_na_input: duplicate IP6 address %s\n",
 		    ip6_sprintf(ip6bufs, &taddr6));
 		goto freeit;
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s "
 		    "(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6),
 		    ifp->if_addrlen, lladdrlen - 2));
 		goto bad;
 	}
 
 	/*
 	 * If no neighbor cache entry is found, NA SHOULD silently be
 	 * discarded.
 	 */
 	IF_AFDATA_RLOCK(ifp);
 	ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp);
 	IF_AFDATA_RUNLOCK(ifp);
 	if (ln == NULL) {
 		goto freeit;
 	}
 
 	if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
 		/*
 		 * If the link-layer has address, and no lladdr option came,
 		 * discard the packet.
 		 */
 		if (ifp->if_addrlen && lladdr == NULL) {
 			goto freeit;
 		}
 
 		/*
 		 * Record link-layer address, and update the state.
 		 */
 		linkhdrsize = sizeof(linkhdr);
 		if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
 		    linkhdr, &linkhdrsize, &lladdr_off) != 0)
 			return;
 
 		if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
 		    lladdr_off) == 0) {
 			ln = NULL;
 			goto freeit;
 		}
 		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 		if (is_solicited)
 			nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
 		else
 			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 		if ((ln->ln_router = is_router) != 0) {
 			/*
 			 * This means a router's state has changed from
 			 * non-reachable to probably reachable, and might
 			 * affect the status of associated prefixes..
 			 */
 			checklink = 1;
 		}
 	} else {
 		int llchange;
 
 		/*
 		 * Check if the link-layer address has changed or not.
 		 */
 		if (lladdr == NULL)
 			llchange = 0;
 		else {
 			if (ln->la_flags & LLE_VALID) {
 				if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen))
 					llchange = 1;
 				else
 					llchange = 0;
 			} else
 				llchange = 1;
 		}
 
 		/*
 		 * This is VERY complex.  Look at it with care.
 		 *
 		 * override solicit lladdr llchange	action
 		 *					(L: record lladdr)
 		 *
 		 *	0	0	n	--	(2c)
 		 *	0	0	y	n	(2b) L
 		 *	0	0	y	y	(1)    REACHABLE->STALE
 		 *	0	1	n	--	(2c)   *->REACHABLE
 		 *	0	1	y	n	(2b) L *->REACHABLE
 		 *	0	1	y	y	(1)    REACHABLE->STALE
 		 *	1	0	n	--	(2a)
 		 *	1	0	y	n	(2a) L
 		 *	1	0	y	y	(2a) L *->STALE
 		 *	1	1	n	--	(2a)   *->REACHABLE
 		 *	1	1	y	n	(2a) L *->REACHABLE
 		 *	1	1	y	y	(2a) L *->REACHABLE
 		 */
 		if (!is_override && (lladdr != NULL && llchange)) {  /* (1) */
 			/*
 			 * If state is REACHABLE, make it STALE.
 			 * no other updates should be done.
 			 */
 			if (ln->ln_state == ND6_LLINFO_REACHABLE)
 				nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 			goto freeit;
 		} else if (is_override				   /* (2a) */
 			|| (!is_override && (lladdr != NULL && !llchange)) /* (2b) */
 			|| lladdr == NULL) {			   /* (2c) */
 			/*
 			 * Update link-local address, if any.
 			 */
 			if (lladdr != NULL) {
 				linkhdrsize = sizeof(linkhdr);
 				if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
 				    linkhdr, &linkhdrsize, &lladdr_off) != 0)
 					goto freeit;
 				if (lltable_try_set_entry_addr(ifp, ln, linkhdr,
 				    linkhdrsize, lladdr_off) == 0) {
 					ln = NULL;
 					goto freeit;
 				}
 				EVENTHANDLER_INVOKE(lle_event, ln,
 				    LLENTRY_RESOLVED);
 			}
 
 			/*
 			 * If solicited, make the state REACHABLE.
 			 * If not solicited and the link-layer address was
 			 * changed, make it STALE.
 			 */
 			if (is_solicited)
 				nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
 			else {
 				if (lladdr != NULL && llchange)
 					nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 			}
 		}
 
 		if (ln->ln_router && !is_router) {
 			/*
 			 * The peer dropped the router flag.
 			 * Remove the sender from the Default Router List and
 			 * update the Destination Cache entries.
 			 */
 			struct nd_defrouter *dr;
 			struct in6_addr *in6;
 			struct ifnet *nd6_ifp;
 
 			in6 = &ln->r_l3addr.addr6;
 
 			/*
 			 * Lock to protect the default router list.
 			 * XXX: this might be unnecessary, since this function
 			 * is only called under the network software interrupt
 			 * context.  However, we keep it just for safety.
 			 */
 			nd6_ifp = lltable_get_ifp(ln->lle_tbl);
 			dr = defrouter_lookup(in6, nd6_ifp);
 			if (dr)
 				defrtrlist_del(dr);
 			else if (ND_IFINFO(nd6_ifp)->flags &
 			    ND6_IFF_ACCEPT_RTADV) {
 				/*
 				 * Even if the neighbor is not in the default
 				 * router list, the neighbor may be used
 				 * as a next hop for some destinations
 				 * (e.g. redirect case). So we must
 				 * call rt6_flush explicitly.
 				 */
 				rt6_flush(&ip6->ip6_src, ifp);
 			}
 		}
 		ln->ln_router = is_router;
 	}
         /* XXX - QL
 	 *  Does this matter?
 	 *  rt->rt_flags &= ~RTF_REJECT;
 	 */
 	ln->la_asked = 0;
 	if (ln->la_hold != NULL)
 		nd6_grab_holdchain(ln, &chain, &sin6);
  freeit:
 	if (ln != NULL)
 		LLE_WUNLOCK(ln);
 
 	if (chain != NULL)
 		nd6_flush_holdchain(ifp, ifp, chain, &sin6);
 
 	if (checklink)
 		pfxlist_onlink_check();
 
 	m_freem(m);
 	return;
 
  bad:
 	if (ln != NULL)
 		LLE_WUNLOCK(ln);
 
 	ICMP6STAT_INC(icp6s_badna);
 	m_freem(m);
 }
 
 /*
  * Neighbor advertisement output handling.
  *
  * Based on RFC 2461
  *
  * the following items are not implemented yet:
  * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
  * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
  *
  * tlladdr - 1 if include target link-layer address
  * sdl0 - sockaddr_dl (= proxy NA) or NULL
  */
 static void
 nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
     const struct in6_addr *taddr6, u_long flags, int tlladdr,
     struct sockaddr *sdl0, u_int fibnum)
 {
 	struct mbuf *m;
 	struct m_tag *mtag;
-	struct ifnet *oifp;
 	struct ip6_hdr *ip6;
 	struct nd_neighbor_advert *nd_na;
 	struct ip6_moptions im6o;
-	struct in6_addr src, daddr6;
-	struct sockaddr_in6 dst_sa;
+	struct in6_addr daddr6, dst6, src6;
+	uint32_t scopeid;
+
 	int icmp6len, maxlen, error;
 	caddr_t mac = NULL;
 
 	daddr6 = *daddr6_0;	/* make a local copy for modification */
 
 	/* estimate the size of message */
 	maxlen = sizeof(*ip6) + sizeof(*nd_na);
 	maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
 	KASSERT(max_linkhdr + maxlen <= MCLBYTES, (
 	    "%s: max_linkhdr + maxlen > MCLBYTES (%d + %d > %d)",
 	    __func__, max_linkhdr, maxlen, MCLBYTES));
 
 	if (max_linkhdr + maxlen > MHLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 	else
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return;
 	M_SETFIB(m, fibnum);
 
 	if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
 		m->m_flags |= M_MCAST;
 		im6o.im6o_multicast_ifp = ifp;
 		im6o.im6o_multicast_hlim = 255;
 		im6o.im6o_multicast_loop = 0;
 	}
 
 	icmp6len = sizeof(*nd_na);
 	m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len;
 	m->m_data += max_linkhdr;	/* or M_ALIGN() equivalent? */
 
 	/* fill neighbor advertisement packet */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = 255;
 	if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) {
 		/* reply to DAD */
 		daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
 		daddr6.s6_addr16[1] = 0;
 		daddr6.s6_addr32[1] = 0;
 		daddr6.s6_addr32[2] = 0;
 		daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE;
 		if (in6_setscope(&daddr6, ifp, NULL))
 			goto bad;
 
 		flags &= ~ND_NA_FLAG_SOLICITED;
 	}
 	ip6->ip6_dst = daddr6;
-	bzero(&dst_sa, sizeof(struct sockaddr_in6));
-	dst_sa.sin6_family = AF_INET6;
-	dst_sa.sin6_len = sizeof(struct sockaddr_in6);
-	dst_sa.sin6_addr = daddr6;
 
 	/*
 	 * Select a source whose scope is the same as that of the dest.
 	 */
-	oifp = ifp;
-	error = in6_selectsrc(&dst_sa, NULL, NULL, NULL, &oifp, &src);
+	in6_splitscope(&daddr6, &dst6, &scopeid);
+	error = in6_selectsrc_addr(RT_DEFAULT_FIB, &dst6,
+	    scopeid, ifp, &src6, NULL);
 	if (error) {
 		char ip6buf[INET6_ADDRSTRLEN];
 		nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
 		    "determined: dst=%s, error=%d\n",
-		    ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error));
+		    ip6_sprintf(ip6buf, &daddr6), error));
 		goto bad;
 	}
-	ip6->ip6_src = src;
+	ip6->ip6_src = src6;
 	nd_na = (struct nd_neighbor_advert *)(ip6 + 1);
 	nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
 	nd_na->nd_na_code = 0;
 	nd_na->nd_na_target = *taddr6;
 	in6_clearscope(&nd_na->nd_na_target); /* XXX */
 
 	/*
 	 * "tlladdr" indicates NS's condition for adding tlladdr or not.
 	 * see nd6_ns_input() for details.
 	 * Basically, if NS packet is sent to unicast/anycast addr,
 	 * target lladdr option SHOULD NOT be included.
 	 */
 	if (tlladdr) {
 		/*
 		 * sdl0 != NULL indicates proxy NA.  If we do proxy, use
 		 * lladdr in sdl0.  If we are not proxying (sending NA for
 		 * my address) use lladdr configured for the interface.
 		 */
 		if (sdl0 == NULL) {
 			if (ifp->if_carp)
 				mac = (*carp_macmatch6_p)(ifp, m, taddr6);
 			if (mac == NULL)
 				mac = nd6_ifptomac(ifp);
 		} else if (sdl0->sa_family == AF_LINK) {
 			struct sockaddr_dl *sdl;
 			sdl = (struct sockaddr_dl *)sdl0;
 			if (sdl->sdl_alen == ifp->if_addrlen)
 				mac = LLADDR(sdl);
 		}
 	}
 	if (tlladdr && mac) {
 		int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
 		struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1);
 
 		/* roundup to 8 bytes alignment! */
 		optlen = (optlen + 7) & ~7;
 
 		m->m_pkthdr.len += optlen;
 		m->m_len += optlen;
 		icmp6len += optlen;
 		bzero((caddr_t)nd_opt, optlen);
 		nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
 		nd_opt->nd_opt_len = optlen >> 3;
 		bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
 	} else
 		flags &= ~ND_NA_FLAG_OVERRIDE;
 
 	ip6->ip6_plen = htons((u_short)icmp6len);
 	nd_na->nd_na_flags_reserved = flags;
 	nd_na->nd_na_cksum = 0;
 	nd_na->nd_na_cksum =
 	    in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len);
 
 	if (send_sendso_input_hook != NULL) {
 		mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
 		    sizeof(unsigned short), M_NOWAIT);
 		if (mtag == NULL)
 			goto bad;
 		*(unsigned short *)(mtag + 1) = nd_na->nd_na_type;
 		m_tag_prepend(m, mtag);
 	}
 
 	ip6_output(m, NULL, NULL, 0, &im6o, NULL, NULL);
 	icmp6_ifstat_inc(ifp, ifs6_out_msg);
 	icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert);
 	ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]);
 
 	return;
 
   bad:
 	m_freem(m);
 	return;
 }
 
 #ifndef BURN_BRIDGES
 void
 nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0,
     const struct in6_addr *taddr6, u_long flags, int tlladdr,
     struct sockaddr *sdl0)
 {
 
 	nd6_na_output_fib(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0,
 	    RT_DEFAULT_FIB);
 }
 #endif
 
 caddr_t
 nd6_ifptomac(struct ifnet *ifp)
 {
 	switch (ifp->if_type) {
 	case IFT_ARCNET:
 	case IFT_ETHER:
 	case IFT_FDDI:
 	case IFT_IEEE1394:
 	case IFT_L2VLAN:
 	case IFT_IEEE80211:
 	case IFT_INFINIBAND:
 	case IFT_BRIDGE:
 	case IFT_ISO88025:
 		return IF_LLADDR(ifp);
 	default:
 		return NULL;
 	}
 }
 
 struct dadq {
 	TAILQ_ENTRY(dadq) dad_list;
 	struct ifaddr *dad_ifa;
 	int dad_count;		/* max NS to send */
 	int dad_ns_tcount;	/* # of trials to send NS */
 	int dad_ns_ocount;	/* NS sent so far */
 	int dad_ns_icount;
 	int dad_na_icount;
 	int dad_ns_lcount;	/* looped back NS */
 	int dad_loopbackprobe;	/* probing state for loopback detection */
 	struct callout dad_timer_ch;
 	struct vnet *dad_vnet;
 	u_int dad_refcnt;
 #define	ND_OPT_NONCE_LEN32 \
 		((ND_OPT_NONCE_LEN + sizeof(uint32_t) - 1)/sizeof(uint32_t))
 	uint32_t dad_nonce[ND_OPT_NONCE_LEN32];
 };
 
 static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq);
 static VNET_DEFINE(struct rwlock, dad_rwlock);
 #define	V_dadq			VNET(dadq)
 #define	V_dad_rwlock		VNET(dad_rwlock)
 
 #define	DADQ_RLOCK()		rw_rlock(&V_dad_rwlock)	
 #define	DADQ_RUNLOCK()		rw_runlock(&V_dad_rwlock)	
 #define	DADQ_WLOCK()		rw_wlock(&V_dad_rwlock)	
 #define	DADQ_WUNLOCK()		rw_wunlock(&V_dad_rwlock)	
 
 static void
 nd6_dad_add(struct dadq *dp)
 {
 
 	DADQ_WLOCK();
 	TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list);
 	DADQ_WUNLOCK();
 }
 
 static void
 nd6_dad_del(struct dadq *dp)
 {
 
 	DADQ_WLOCK();
 	TAILQ_REMOVE(&V_dadq, dp, dad_list);
 	DADQ_WUNLOCK();
 	nd6_dad_rele(dp);
 }
 
 static struct dadq *
 nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n)
 {
 	struct dadq *dp;
 
 	DADQ_RLOCK();
 	TAILQ_FOREACH(dp, &V_dadq, dad_list) {
 		if (dp->dad_ifa != ifa)
 			continue;
 		/*
 		 * Skip if the nonce matches the received one.
 		 * +2 in the length is required because of type and
 		 * length fields are included in a header.
 		 */
 		if (n != NULL &&
 		    n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 &&
 		    memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0],
 		        ND_OPT_NONCE_LEN) == 0) {
 			dp->dad_ns_lcount++;
 			continue;
 		}
 		refcount_acquire(&dp->dad_refcnt);
 		break;
 	}
 	DADQ_RUNLOCK();
 
 	return (dp);
 }
 
 static void
 nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns)
 {
 
 	if (send_ns != 0)
 		nd6_dad_ns_output(dp);
 	callout_reset(&dp->dad_timer_ch, ticks,
 	    (void (*)(void *))nd6_dad_timer, (void *)dp);
 }
 
 static void
 nd6_dad_stoptimer(struct dadq *dp)
 {
 
 	callout_drain(&dp->dad_timer_ch);
 }
 
 static void
 nd6_dad_rele(struct dadq *dp)
 {
 
 	if (refcount_release(&dp->dad_refcnt)) {
 		ifa_free(dp->dad_ifa);
 		free(dp, M_IP6NDP);
 	}
 }
 
 void
 nd6_dad_init(void)
 {
 
 	rw_init(&V_dad_rwlock, "nd6 DAD queue");
 	TAILQ_INIT(&V_dadq);
 }
 
 /*
  * Start Duplicate Address Detection (DAD) for specified interface address.
  */
 void
 nd6_dad_start(struct ifaddr *ifa, int delay)
 {
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct dadq *dp;
 	char ip6buf[INET6_ADDRSTRLEN];
 	int send_ns;
 
 	/*
 	 * If we don't need DAD, don't do it.
 	 * There are several cases:
 	 * - DAD is disabled (ip6_dad_count == 0)
 	 * - the interface address is anycast
 	 */
 	if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) {
 		log(LOG_DEBUG,
 			"nd6_dad_start: called with non-tentative address "
 			"%s(%s)\n",
 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		return;
 	}
 	if (ia->ia6_flags & IN6_IFF_ANYCAST) {
 		ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 		return;
 	}
 	if (!V_ip6_dad_count) {
 		ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 		return;
 	}
 	if (ifa->ifa_ifp == NULL)
 		panic("nd6_dad_start: ifa->ifa_ifp == NULL");
 	if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_NO_DAD) {
 		ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 		return;
 	}
 	if (!(ifa->ifa_ifp->if_flags & IFF_UP) ||
 	    !(ifa->ifa_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
 	    (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED)) {
 		ia->ia6_flags |= IN6_IFF_TENTATIVE;
 		return;
 	}
 	if ((dp = nd6_dad_find(ifa, NULL)) != NULL) {
 		/*
 		 * DAD already in progress.  Let the existing entry
 		 * to finish it.
 		 */
 		return;
 	}
 
 	dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT | M_ZERO);
 	if (dp == NULL) {
 		log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
 			"%s(%s)\n",
 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		return;
 	}
 	callout_init(&dp->dad_timer_ch, 0);
 #ifdef VIMAGE
 	dp->dad_vnet = curvnet;
 #endif
 	nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
 	    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 
 	/*
 	 * Send NS packet for DAD, ip6_dad_count times.
 	 * Note that we must delay the first transmission, if this is the
 	 * first packet to be sent from the interface after interface
 	 * (re)initialization.
 	 */
 	dp->dad_ifa = ifa;
 	ifa_ref(dp->dad_ifa);
 	dp->dad_count = V_ip6_dad_count;
 	dp->dad_ns_icount = dp->dad_na_icount = 0;
 	dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
 	dp->dad_ns_lcount = dp->dad_loopbackprobe = 0;
 	refcount_init(&dp->dad_refcnt, 1);
 	nd6_dad_add(dp);
 	send_ns = 0;
 	if (delay == 0) {
 		send_ns = 1;
 		delay = (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000;
 	}
 	nd6_dad_starttimer(dp, delay, send_ns);
 }
 
 /*
  * terminate DAD unconditionally.  used for address removals.
  */
 void
 nd6_dad_stop(struct ifaddr *ifa)
 {
 	struct dadq *dp;
 
 	dp = nd6_dad_find(ifa, NULL);
 	if (!dp) {
 		/* DAD wasn't started yet */
 		return;
 	}
 
 	nd6_dad_stoptimer(dp);
 
 	/*
 	 * The DAD queue entry may have been removed by nd6_dad_timer() while
 	 * we were waiting for it to stop, so re-do the lookup.
 	 */
 	nd6_dad_rele(dp);
 	if (nd6_dad_find(ifa, NULL) == NULL)
 		return;
 
 	nd6_dad_del(dp);
 	nd6_dad_rele(dp);
 }
 
 static void
 nd6_dad_timer(struct dadq *dp)
 {
 	CURVNET_SET(dp->dad_vnet);
 	struct ifaddr *ifa = dp->dad_ifa;
 	struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	/* Sanity check */
 	if (ia == NULL) {
 		log(LOG_ERR, "nd6_dad_timer: called with null parameter\n");
 		goto err;
 	}
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
 		/* Do not need DAD for ifdisabled interface. */
 		log(LOG_ERR, "nd6_dad_timer: cancel DAD on %s because of "
 		    "ND6_IFF_IFDISABLED.\n", ifp->if_xname);
 		goto err;
 	}
 	if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
 		log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
 			"%s(%s)\n",
 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		goto err;
 	}
 	if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
 		log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
 			"%s(%s)\n",
 			ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		goto err;
 	}
 
 	/* Stop DAD if the interface is down even after dad_maxtry attempts. */
 	if ((dp->dad_ns_tcount > V_dad_maxtry) &&
 	    (((ifp->if_flags & IFF_UP) == 0) ||
 	     ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))) {
 		nd6log((LOG_INFO, "%s: could not run DAD "
 		    "because the interface was down or not running.\n",
 		    if_name(ifa->ifa_ifp)));
 		goto err;
 	}
 
 	/* Need more checks? */
 	if (dp->dad_ns_ocount < dp->dad_count) {
 		/*
 		 * We have more NS to go.  Send NS packet for DAD.
 		 */
 		nd6_dad_starttimer(dp,
 		    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1);
 		goto done;
 	} else {
 		/*
 		 * We have transmitted sufficient number of DAD packets.
 		 * See what we've got.
 		 */
 		if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0)
 			/* We've seen NS or NA, means DAD has failed. */
 			nd6_dad_duplicated(ifa, dp);
 		else if (V_dad_enhanced != 0 &&
 		    dp->dad_ns_lcount > 0 &&
 		    dp->dad_ns_lcount > dp->dad_loopbackprobe) {
 			/*
 			 * Sec. 4.1 in RFC 7527 requires transmission of
 			 * additional probes until the loopback condition
 			 * becomes clear when a looped back probe is detected.
 			 */
 			log(LOG_ERR, "%s: a looped back NS message is "
 			    "detected during DAD for %s.  "
 			    "Another DAD probes are being sent.\n",
 			    if_name(ifa->ifa_ifp),
 			    ip6_sprintf(ip6buf, IFA_IN6(ifa)));
 			dp->dad_loopbackprobe = dp->dad_ns_lcount;
 			/*
 			 * Send an NS immediately and increase dad_count by
 			 * V_nd6_mmaxtries - 1.
 			 */
 			dp->dad_count =
 			    dp->dad_ns_ocount + V_nd6_mmaxtries - 1;
 			nd6_dad_starttimer(dp,
 			    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000,
 			    1);
 			goto done;
 		} else {
 			/*
 			 * We are done with DAD.  No NA came, no NS came.
 			 * No duplicate address found.  Check IFDISABLED flag
 			 * again in case that it is changed between the
 			 * beginning of this function and here.
 			 */
 			if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) == 0)
 				ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 
 			nd6log((LOG_DEBUG,
 			    "%s: DAD complete for %s - no duplicates found\n",
 			    if_name(ifa->ifa_ifp),
 			    ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
 			if (dp->dad_ns_lcount > 0)
 				log(LOG_ERR, "%s: DAD completed while "
 				    "a looped back NS message is detected "
 				    "during DAD for %s.\n",
 				    if_name(ifa->ifa_ifp),
 				    ip6_sprintf(ip6buf, IFA_IN6(ifa)));
 		}
 	}
 err:
 	nd6_dad_del(dp);
 done:
 	CURVNET_RESTORE();
 }
 
 static void
 nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp)
 {
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct ifnet *ifp;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
 	    "NS in/out/loopback=%d/%d/%d, NA in=%d\n",
 	    if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
 	    dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_ns_lcount,
 	    dp->dad_na_icount);
 
 	ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
 	ia->ia6_flags |= IN6_IFF_DUPLICATED;
 
 	ifp = ifa->ifa_ifp;
 	log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
 	    if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
 	log(LOG_ERR, "%s: manual intervention required\n",
 	    if_name(ifp));
 
 	/*
 	 * If the address is a link-local address formed from an interface
 	 * identifier based on the hardware address which is supposed to be
 	 * uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP
 	 * operation on the interface SHOULD be disabled.
 	 * [RFC 4862, Section 5.4.5]
 	 */
 	if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
 		struct in6_addr in6;
 
 		/*
 		 * To avoid over-reaction, we only apply this logic when we are
 		 * very sure that hardware addresses are supposed to be unique.
 		 */
 		switch (ifp->if_type) {
 		case IFT_ETHER:
 		case IFT_FDDI:
 		case IFT_ATM:
 		case IFT_IEEE1394:
 		case IFT_IEEE80211:
 		case IFT_INFINIBAND:
 			in6 = ia->ia_addr.sin6_addr;
 			if (in6_get_hw_ifid(ifp, &in6) == 0 &&
 			    IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) {
 				ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
 				log(LOG_ERR, "%s: possible hardware address "
 				    "duplication detected, disable IPv6\n",
 				    if_name(ifp));
 			}
 			break;
 		}
 	}
 }
 
 static void
 nd6_dad_ns_output(struct dadq *dp)
 {
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)dp->dad_ifa;
 	struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
 	int i;
 
 	dp->dad_ns_tcount++;
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		return;
 	}
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		return;
 	}
 
 	dp->dad_ns_ocount++;
 	if (V_dad_enhanced != 0) {
 		for (i = 0; i < ND_OPT_NONCE_LEN32; i++)
 			dp->dad_nonce[i] = arc4random();
 		/*
 		 * XXXHRS: Note that in the case that
 		 * DupAddrDetectTransmits > 1, multiple NS messages with
 		 * different nonces can be looped back in an unexpected
 		 * order.  The current implementation recognizes only
 		 * the latest nonce on the sender side.  Practically it
 		 * should work well in almost all cases.
 		 */
 	}
 	nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr,
 	    (uint8_t *)&dp->dad_nonce[0]);
 }
 
 static void
 nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce)
 {
 	struct in6_ifaddr *ia;
 	struct ifnet *ifp;
 	const struct in6_addr *taddr6;
 	struct dadq *dp;
 
 	if (ifa == NULL)
 		panic("ifa == NULL in nd6_dad_ns_input");
 
 	ia = (struct in6_ifaddr *)ifa;
 	ifp = ifa->ifa_ifp;
 	taddr6 = &ia->ia_addr.sin6_addr;
 	/* Ignore Nonce option when Enhanced DAD is disabled. */
 	if (V_dad_enhanced == 0)
 		ndopt_nonce = NULL;
 	dp = nd6_dad_find(ifa, ndopt_nonce);
 	if (dp == NULL)
 		return;
 
 	dp->dad_ns_icount++;
 	nd6_dad_rele(dp);
 }
 
 static void
 nd6_dad_na_input(struct ifaddr *ifa)
 {
 	struct dadq *dp;
 
 	if (ifa == NULL)
 		panic("ifa == NULL in nd6_dad_na_input");
 
 	dp = nd6_dad_find(ifa, NULL);
 	if (dp != NULL) {
 		dp->dad_na_icount++;
 		nd6_dad_rele(dp);
 	}
 }
Index: projects/clang380-import/sys/netinet6/raw_ip6.c
===================================================================
--- projects/clang380-import/sys/netinet6/raw_ip6.c	(revision 293686)
+++ projects/clang380-import/sys/netinet6/raw_ip6.c	(revision 293687)
@@ -1,924 +1,905 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)raw_ip.c	8.2 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipsec.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 
 #include <netinet/icmp6.h>
 #include <netinet/ip6.h>
 #include <netinet/ip_var.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_mroute.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/raw_ip6.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/send.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /* IPSEC */
 
 #include <machine/stdarg.h>
 
 #define	satosin6(sa)	((struct sockaddr_in6 *)(sa))
 #define	ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
 
 /*
  * Raw interface to IP6 protocol.
  */
 
 VNET_DECLARE(struct inpcbhead, ripcb);
 VNET_DECLARE(struct inpcbinfo, ripcbinfo);
 #define	V_ripcb				VNET(ripcb)
 #define	V_ripcbinfo			VNET(ripcbinfo)
 
 extern u_long	rip_sendspace;
 extern u_long	rip_recvspace;
 
 VNET_PCPUSTAT_DEFINE(struct rip6stat, rip6stat);
 VNET_PCPUSTAT_SYSINIT(rip6stat);
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(rip6stat);
 #endif /* VIMAGE */
 
 /*
  * Hooks for multicast routing. They all default to NULL, so leave them not
  * initialized and rely on BSS being set to 0.
  */
 
 /*
  * The socket used to communicate with the multicast routing daemon.
  */
 VNET_DEFINE(struct socket *, ip6_mrouter);
 
 /*
  * The various mrouter functions.
  */
 int (*ip6_mrouter_set)(struct socket *, struct sockopt *);
 int (*ip6_mrouter_get)(struct socket *, struct sockopt *);
 int (*ip6_mrouter_done)(void);
 int (*ip6_mforward)(struct ip6_hdr *, struct ifnet *, struct mbuf *);
 int (*mrt6_ioctl)(u_long, caddr_t);
 
 /*
  * Setup generic address and protocol structures for raw_input routine, then
  * pass them along with mbuf chain.
  */
 int
 rip6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct ifnet *ifp;
 	struct mbuf *m = *mp;
 	register struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	register struct inpcb *in6p;
 	struct inpcb *last = 0;
 	struct mbuf *opts = NULL;
 	struct sockaddr_in6 fromsa;
 
 	RIP6STAT_INC(rip6s_ipackets);
 
 	init_sin6(&fromsa, m); /* general init */
 
 	ifp = m->m_pkthdr.rcvif;
 
 	INP_INFO_RLOCK(&V_ripcbinfo);
 	LIST_FOREACH(in6p, &V_ripcb, inp_list) {
 		/* XXX inp locking */
 		if ((in6p->inp_vflag & INP_IPV6) == 0)
 			continue;
 		if (in6p->inp_ip_p &&
 		    in6p->inp_ip_p != proto)
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
 		    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
 		    !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
 			continue;
 		if (jailed_without_vnet(in6p->inp_cred)) {
 			/*
 			 * Allow raw socket in jail to receive multicast;
 			 * assume process had PRIV_NETINET_RAW at attach,
 			 * and fall through into normal filter path if so.
 			 */
 			if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 			    prison_check_ip6(in6p->inp_cred,
 			    &ip6->ip6_dst) != 0)
 				continue;
 		}
 		INP_RLOCK(in6p);
 		if (in6p->in6p_cksum != -1) {
 			RIP6STAT_INC(rip6s_isum);
 			if (in6_cksum(m, proto, *offp,
 			    m->m_pkthdr.len - *offp)) {
 				INP_RUNLOCK(in6p);
 				RIP6STAT_INC(rip6s_badsum);
 				continue;
 			}
 		}
 		/*
 		 * If this raw socket has multicast state, and we
 		 * have received a multicast, check if this socket
 		 * should receive it, as multicast filtering is now
 		 * the responsibility of the transport layer.
 		 */
 		if (in6p->in6p_moptions &&
 		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 			/*
 			 * If the incoming datagram is for MLD, allow it
 			 * through unconditionally to the raw socket.
 			 *
 			 * Use the M_RTALERT_MLD flag to check for MLD
 			 * traffic without having to inspect the mbuf chain
 			 * more deeply, as all MLDv1/v2 host messages MUST
 			 * contain the Router Alert option.
 			 *
 			 * In the case of MLDv1, we may not have explicitly
 			 * joined the group, and may have set IFF_ALLMULTI
 			 * on the interface. im6o_mc_filter() may discard
 			 * control traffic we actually need to see.
 			 *
 			 * Userland multicast routing daemons should continue
 			 * filter the control traffic appropriately.
 			 */
 			int blocked;
 
 			blocked = MCAST_PASS;
 			if ((m->m_flags & M_RTALERT_MLD) == 0) {
 				struct sockaddr_in6 mcaddr;
 
 				bzero(&mcaddr, sizeof(struct sockaddr_in6));
 				mcaddr.sin6_len = sizeof(struct sockaddr_in6);
 				mcaddr.sin6_family = AF_INET6;
 				mcaddr.sin6_addr = ip6->ip6_dst;
 
 				blocked = im6o_mc_filter(in6p->in6p_moptions,
 				    ifp,
 				    (struct sockaddr *)&mcaddr,
 				    (struct sockaddr *)&fromsa);
 			}
 			if (blocked != MCAST_PASS) {
 				IP6STAT_INC(ip6s_notmember);
 				INP_RUNLOCK(in6p);
 				continue;
 			}
 		}
 		if (last != NULL) {
 			struct mbuf *n = m_copy(m, 0, (int)M_COPYALL);
 
 #ifdef IPSEC
 			/*
 			 * Check AH/ESP integrity.
 			 */
 			if (n && ipsec6_in_reject(n, last)) {
 				m_freem(n);
 				/* Do not inject data into pcb. */
 			} else
 #endif /* IPSEC */
 			if (n) {
 				if (last->inp_flags & INP_CONTROLOPTS ||
 				    last->inp_socket->so_options & SO_TIMESTAMP)
 					ip6_savecontrol(last, n, &opts);
 				/* strip intermediate headers */
 				m_adj(n, *offp);
 				if (sbappendaddr(&last->inp_socket->so_rcv,
 						(struct sockaddr *)&fromsa,
 						 n, opts) == 0) {
 					m_freem(n);
 					if (opts)
 						m_freem(opts);
 					RIP6STAT_INC(rip6s_fullsock);
 				} else
 					sorwakeup(last->inp_socket);
 				opts = NULL;
 			}
 			INP_RUNLOCK(last);
 		}
 		last = in6p;
 	}
 	INP_INFO_RUNLOCK(&V_ripcbinfo);
 #ifdef IPSEC
 	/*
 	 * Check AH/ESP integrity.
 	 */
 	if ((last != NULL) && ipsec6_in_reject(m, last)) {
 		m_freem(m);
 		IP6STAT_DEC(ip6s_delivered);
 		/* Do not inject data into pcb. */
 		INP_RUNLOCK(last);
 	} else
 #endif /* IPSEC */
 	if (last != NULL) {
 		if (last->inp_flags & INP_CONTROLOPTS ||
 		    last->inp_socket->so_options & SO_TIMESTAMP)
 			ip6_savecontrol(last, m, &opts);
 		/* Strip intermediate headers. */
 		m_adj(m, *offp);
 		if (sbappendaddr(&last->inp_socket->so_rcv,
 		    (struct sockaddr *)&fromsa, m, opts) == 0) {
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
 			RIP6STAT_INC(rip6s_fullsock);
 		} else
 			sorwakeup(last->inp_socket);
 		INP_RUNLOCK(last);
 	} else {
 		RIP6STAT_INC(rip6s_nosock);
 		if (m->m_flags & M_MCAST)
 			RIP6STAT_INC(rip6s_nosockmcast);
 		if (proto == IPPROTO_NONE)
 			m_freem(m);
 		else {
 			char *prvnxtp = ip6_get_prevhdr(m, *offp); /* XXX */
 			icmp6_error(m, ICMP6_PARAM_PROB,
 			    ICMP6_PARAMPROB_NEXTHEADER,
 			    prvnxtp - mtod(m, char *));
 		}
 		IP6STAT_DEC(ip6s_delivered);
 	}
 	return (IPPROTO_DONE);
 }
 
 void
 rip6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	int off = 0;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	void *cmdarg;
 	struct inpcb *(*notify)(struct inpcb *, int) = in6_rtchange;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	if ((unsigned)cmd >= PRC_NCMDS)
 		return;
 	if (PRC_IS_REDIRECT(cmd))
 		notify = in6_rtchange, d = NULL;
 	else if (cmd == PRC_HOSTDEAD)
 		d = NULL;
 	else if (inet6ctlerrmap[cmd] == 0)
 		return;
 
 	/*
 	 * If the parameter is from icmp6, decode it.
 	 */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		cmdarg = ip6cp->ip6c_cmdarg;
 		sa6_src = ip6cp->ip6c_src;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		cmdarg = NULL;
 		sa6_src = &sa6_any;
 	}
 
 	(void) in6_pcbnotify(&V_ripcbinfo, sa, 0,
 	    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
 }
 
 /*
  * Generate IPv6 header and pass packet to ip6_output.  Tack on options user
  * may have setup with control call.
  */
 int
 rip6_output(struct mbuf *m, struct socket *so, ...)
 {
 	struct mbuf *control;
 	struct m_tag *mtag;
 	struct sockaddr_in6 *dstsock;
 	struct in6_addr *dst;
 	struct ip6_hdr *ip6;
 	struct inpcb *in6p;
 	u_int	plen = m->m_pkthdr.len;
 	int error = 0;
 	struct ip6_pktopts opt, *optp;
 	struct ifnet *oifp = NULL;
 	int type = 0, code = 0;		/* for ICMPv6 output statistics only */
 	int scope_ambiguous = 0;
 	int use_defzone = 0;
+	int hlim = 0;
 	struct in6_addr in6a;
 	va_list ap;
 
 	va_start(ap, so);
 	dstsock = va_arg(ap, struct sockaddr_in6 *);
 	control = va_arg(ap, struct mbuf *);
 	va_end(ap);
 
 	in6p = sotoinpcb(so);
 	INP_WLOCK(in6p);
 
 	dst = &dstsock->sin6_addr;
 	if (control != NULL) {
 		if ((error = ip6_setpktopts(control, &opt,
 		    in6p->in6p_outputopts, so->so_cred,
 		    so->so_proto->pr_protocol)) != 0) {
 			goto bad;
 		}
 		optp = &opt;
 	} else
 		optp = in6p->in6p_outputopts;
 
 	/*
 	 * Check and convert scope zone ID into internal form.
 	 *
 	 * XXX: we may still need to determine the zone later.
 	 */
 	if (!(so->so_state & SS_ISCONNECTED)) {
 		if (!optp || !optp->ip6po_pktinfo ||
 		    !optp->ip6po_pktinfo->ipi6_ifindex)
 			use_defzone = V_ip6_use_defzone;
 		if (dstsock->sin6_scope_id == 0 && !use_defzone)
 			scope_ambiguous = 1;
 		if ((error = sa6_embedscope(dstsock, use_defzone)) != 0)
 			goto bad;
 	}
 
 	/*
 	 * For an ICMPv6 packet, we should know its type and code to update
 	 * statistics.
 	 */
 	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
 		struct icmp6_hdr *icmp6;
 		if (m->m_len < sizeof(struct icmp6_hdr) &&
 		    (m = m_pullup(m, sizeof(struct icmp6_hdr))) == NULL) {
 			error = ENOBUFS;
 			goto bad;
 		}
 		icmp6 = mtod(m, struct icmp6_hdr *);
 		type = icmp6->icmp6_type;
 		code = icmp6->icmp6_code;
 	}
 
 	M_PREPEND(m, sizeof(*ip6), M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto bad;
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * Source address selection.
 	 */
-	error = in6_selectsrc(dstsock, optp, in6p, so->so_cred,
-	    &oifp, &in6a);
+	error = in6_selectsrc_socket(dstsock, optp, in6p, so->so_cred,
+	    scope_ambiguous, &in6a, &hlim);
+
 	if (error)
 		goto bad;
 	error = prison_check_ip6(in6p->inp_cred, &in6a);
 	if (error != 0)
 		goto bad;
 	ip6->ip6_src = in6a;
 
-	if (oifp && scope_ambiguous) {
-		/*
-		 * Application should provide a proper zone ID or the use of
-		 * default zone IDs should be enabled.  Unfortunately, some
-		 * applications do not behave as it should, so we need a
-		 * workaround.  Even if an appropriate ID is not determined
-		 * (when it's required), if we can determine the outgoing
-		 * interface. determine the zone ID based on the interface.
-		 */
-		error = in6_setscope(&dstsock->sin6_addr, oifp, NULL);
-		if (error != 0)
-			goto bad;
-	}
 	ip6->ip6_dst = dstsock->sin6_addr;
 
 	/*
 	 * Fill in the rest of the IPv6 header fields.
 	 */
 	ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
 	    (in6p->inp_flow & IPV6_FLOWINFO_MASK);
 	ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
 	    (IPV6_VERSION & IPV6_VERSION_MASK);
 
 	/*
 	 * ip6_plen will be filled in ip6_output, so not fill it here.
 	 */
 	ip6->ip6_nxt = in6p->inp_ip_p;
-	ip6->ip6_hlim = in6_selecthlim(in6p, oifp);
+	ip6->ip6_hlim = hlim;
 
 	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6 ||
 	    in6p->in6p_cksum != -1) {
 		struct mbuf *n;
 		int off;
 		u_int16_t *p;
 
 		/* Compute checksum. */
 		if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
 			off = offsetof(struct icmp6_hdr, icmp6_cksum);
 		else
 			off = in6p->in6p_cksum;
 		if (plen < off + 1) {
 			error = EINVAL;
 			goto bad;
 		}
 		off += sizeof(struct ip6_hdr);
 
 		n = m;
 		while (n && n->m_len <= off) {
 			off -= n->m_len;
 			n = n->m_next;
 		}
 		if (!n)
 			goto bad;
 		p = (u_int16_t *)(mtod(n, caddr_t) + off);
 		*p = 0;
 		*p = in6_cksum(m, ip6->ip6_nxt, sizeof(*ip6), plen);
 	}
 
 	/*
 	 * Send RA/RS messages to user land for protection, before sending
 	 * them to rtadvd/rtsol.
 	 */
 	if ((send_sendso_input_hook != NULL) &&
 	    so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
 		switch (type) {
 		case ND_ROUTER_ADVERT:
 		case ND_ROUTER_SOLICIT:
 			mtag = m_tag_get(PACKET_TAG_ND_OUTGOING,
 				sizeof(unsigned short), M_NOWAIT);
 			if (mtag == NULL)
 				goto bad;
 			m_tag_prepend(m, mtag);
 		}
 	}
 
 	error = ip6_output(m, optp, NULL, 0, in6p->in6p_moptions, &oifp, in6p);
 	if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) {
 		if (oifp)
 			icmp6_ifoutstat_inc(oifp, type, code);
 		ICMP6STAT_INC(icp6s_outhist[type]);
 	} else
 		RIP6STAT_INC(rip6s_opackets);
 
 	goto freectl;
 
  bad:
 	if (m)
 		m_freem(m);
 
  freectl:
 	if (control != NULL) {
 		ip6_clearpktopts(&opt, -1);
 		m_freem(control);
 	}
 	INP_WUNLOCK(in6p);
 	return (error);
 }
 
 /*
  * Raw IPv6 socket option processing.
  */
 int
 rip6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct inpcb *inp;
 	int error;
 
 	if (sopt->sopt_level == IPPROTO_ICMPV6)
 		/*
 		 * XXX: is it better to call icmp6_ctloutput() directly
 		 * from protosw?
 		 */
 		return (icmp6_ctloutput(so, sopt));
 	else if (sopt->sopt_level != IPPROTO_IPV6) {
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_name == SO_SETFIB) {
 			inp = sotoinpcb(so);
 			INP_WLOCK(inp);
 			inp->inp_inc.inc_fibnum = so->so_fibnum;
 			INP_WUNLOCK(inp);
 			return (0);
 		}
 		return (EINVAL);
 	}
 
 	error = 0;
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case MRT6_INIT:
 		case MRT6_DONE:
 		case MRT6_ADD_MIF:
 		case MRT6_DEL_MIF:
 		case MRT6_ADD_MFC:
 		case MRT6_DEL_MFC:
 		case MRT6_PIM:
 			error = ip6_mrouter_get ?  ip6_mrouter_get(so, sopt) :
 			    EOPNOTSUPP;
 			break;
 		case IPV6_CHECKSUM:
 			error = ip6_raw_ctloutput(so, sopt);
 			break;
 		default:
 			error = ip6_ctloutput(so, sopt);
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case MRT6_INIT:
 		case MRT6_DONE:
 		case MRT6_ADD_MIF:
 		case MRT6_DEL_MIF:
 		case MRT6_ADD_MFC:
 		case MRT6_DEL_MFC:
 		case MRT6_PIM:
 			error = ip6_mrouter_set ?  ip6_mrouter_set(so, sopt) :
 			    EOPNOTSUPP;
 			break;
 		case IPV6_CHECKSUM:
 			error = ip6_raw_ctloutput(so, sopt);
 			break;
 		default:
 			error = ip6_ctloutput(so, sopt);
 			break;
 		}
 		break;
 	}
 
 	return (error);
 }
 
 static int
 rip6_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct icmp6_filter *filter;
 	int error;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("rip6_attach: inp != NULL"));
 
 	error = priv_check(td, PRIV_NETINET_RAW);
 	if (error)
 		return (error);
 	error = soreserve(so, rip_sendspace, rip_recvspace);
 	if (error)
 		return (error);
 	filter = malloc(sizeof(struct icmp6_filter), M_PCB, M_NOWAIT);
 	if (filter == NULL)
 		return (ENOMEM);
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	error = in_pcballoc(so, &V_ripcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(&V_ripcbinfo);
 		free(filter, M_PCB);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	inp->inp_vflag |= INP_IPV6;
 	inp->inp_ip_p = (long)proto;
 	inp->in6p_hops = -1;	/* use kernel default */
 	inp->in6p_cksum = -1;
 	inp->in6p_icmp6filt = filter;
 	ICMP6_FILTER_SETPASSALL(inp->in6p_icmp6filt);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static void
 rip6_detach(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_detach: inp == NULL"));
 
 	if (so == V_ip6_mrouter && ip6_mrouter_done)
 		ip6_mrouter_done();
 	/* xxx: RSVP */
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	free(inp->in6p_icmp6filt, M_PCB);
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 }
 
 /* XXXRW: This can't ever be called. */
 static void
 rip6_abort(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_abort: inp == NULL"));
 
 	soisdisconnected(so);
 }
 
 static void
 rip6_close(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_close: inp == NULL"));
 
 	soisdisconnected(so);
 }
 
 static int
 rip6_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_disconnect: inp == NULL"));
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 	inp->in6p_faddr = in6addr_any;
 	rip6_abort(so);
 	return (0);
 }
 
 static int
 rip6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
 	struct ifaddr *ifa = NULL;
 	int error = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_bind: inp == NULL"));
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if ((error = prison_check_ip6(td->td_ucred, &addr->sin6_addr)) != 0)
 		return (error);
 	if (TAILQ_EMPTY(&V_ifnet) || addr->sin6_family != AF_INET6)
 		return (EADDRNOTAVAIL);
 	if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
 		return (error);
 
 	if (!IN6_IS_ADDR_UNSPECIFIED(&addr->sin6_addr) &&
 	    (ifa = ifa_ifwithaddr((struct sockaddr *)addr)) == NULL)
 		return (EADDRNOTAVAIL);
 	if (ifa != NULL &&
 	    ((struct in6_ifaddr *)ifa)->ia6_flags &
 	    (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|
 	     IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) {
 		ifa_free(ifa);
 		return (EADDRNOTAVAIL);
 	}
 	if (ifa != NULL)
 		ifa_free(ifa);
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	inp->in6p_laddr = addr->sin6_addr;
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)nam;
 	struct in6_addr in6a;
-	struct ifnet *ifp = NULL;
 	int error = 0, scope_ambiguous = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_connect: inp == NULL"));
 
 	if (nam->sa_len != sizeof(*addr))
 		return (EINVAL);
 	if (TAILQ_EMPTY(&V_ifnet))
 		return (EADDRNOTAVAIL);
 	if (addr->sin6_family != AF_INET6)
 		return (EAFNOSUPPORT);
 
 	/*
 	 * Application should provide a proper zone ID or the use of default
 	 * zone IDs should be enabled.  Unfortunately, some applications do
 	 * not behave as it should, so we need a workaround.  Even if an
 	 * appropriate ID is not determined, we'll see if we can determine
 	 * the outgoing interface.  If we can, determine the zone ID based on
 	 * the interface below.
 	 */
 	if (addr->sin6_scope_id == 0 && !V_ip6_use_defzone)
 		scope_ambiguous = 1;
 	if ((error = sa6_embedscope(addr, V_ip6_use_defzone)) != 0)
 		return (error);
 
 	INP_INFO_WLOCK(&V_ripcbinfo);
 	INP_WLOCK(inp);
 	/* Source address selection. XXX: need pcblookup? */
-	error = in6_selectsrc(addr, inp->in6p_outputopts,
-	    inp, so->so_cred, &ifp, &in6a);
+	error = in6_selectsrc_socket(addr, inp->in6p_outputopts,
+	    inp, so->so_cred, scope_ambiguous, &in6a, NULL);
 	if (error) {
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_ripcbinfo);
 		return (error);
 	}
 
-	/* XXX: see above */
-	if (ifp && scope_ambiguous &&
-	    (error = in6_setscope(&addr->sin6_addr, ifp, NULL)) != 0) {
-		INP_WUNLOCK(inp);
-		INP_INFO_WUNLOCK(&V_ripcbinfo);
-		return (error);
-	}
 	inp->in6p_faddr = addr->sin6_addr;
 	inp->in6p_laddr = in6a;
 	soisconnected(so);
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_ripcbinfo);
 	return (0);
 }
 
 static int
 rip6_shutdown(struct socket *so)
 {
 	struct inpcb *inp;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_shutdown: inp == NULL"));
 
 	INP_WLOCK(inp);
 	socantsendmore(so);
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 rip6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	struct sockaddr_in6 tmp;
 	struct sockaddr_in6 *dst;
 	int ret;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("rip6_send: inp == NULL"));
 
 	/* Always copy sockaddr to avoid overwrites. */
 	/* Unlocked read. */
 	if (so->so_state & SS_ISCONNECTED) {
 		if (nam) {
 			m_freem(m);
 			return (EISCONN);
 		}
 		/* XXX */
 		bzero(&tmp, sizeof(tmp));
 		tmp.sin6_family = AF_INET6;
 		tmp.sin6_len = sizeof(struct sockaddr_in6);
 		INP_RLOCK(inp);
 		bcopy(&inp->in6p_faddr, &tmp.sin6_addr,
 		    sizeof(struct in6_addr));
 		INP_RUNLOCK(inp);
 		dst = &tmp;
 	} else {
 		if (nam == NULL) {
 			m_freem(m);
 			return (ENOTCONN);
 		}
 		if (nam->sa_len != sizeof(struct sockaddr_in6)) {
 			m_freem(m);
 			return (EINVAL);
 		}
 		tmp = *(struct sockaddr_in6 *)nam;
 		dst = &tmp;
 
 		if (dst->sin6_family == AF_UNSPEC) {
 			/*
 			 * XXX: we allow this case for backward
 			 * compatibility to buggy applications that
 			 * rely on old (and wrong) kernel behavior.
 			 */
 			log(LOG_INFO, "rip6 SEND: address family is "
 			    "unspec. Assume AF_INET6\n");
 			dst->sin6_family = AF_INET6;
 		} else if (dst->sin6_family != AF_INET6) {
 			m_freem(m);
 			return(EAFNOSUPPORT);
 		}
 	}
 	ret = rip6_output(m, so, dst, control);
 	return (ret);
 }
 
 struct pr_usrreqs rip6_usrreqs = {
 	.pru_abort =		rip6_abort,
 	.pru_attach =		rip6_attach,
 	.pru_bind =		rip6_bind,
 	.pru_connect =		rip6_connect,
 	.pru_control =		in6_control,
 	.pru_detach =		rip6_detach,
 	.pru_disconnect =	rip6_disconnect,
 	.pru_peeraddr =		in6_getpeeraddr,
 	.pru_send =		rip6_send,
 	.pru_shutdown =		rip6_shutdown,
 	.pru_sockaddr =		in6_getsockaddr,
 	.pru_close =		rip6_close,
 };
Index: projects/clang380-import/sys/netinet6/udp6_usrreq.c
===================================================================
--- projects/clang380-import/sys/netinet6/udp6_usrreq.c	(revision 293686)
+++ projects/clang380-import/sys/netinet6/udp6_usrreq.c	(revision 293687)
@@ -1,1285 +1,1279 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2014 Kevin Lo
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $
  *	$KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)udp_usrreq.c	8.6 (Berkeley) 5/23/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipfw.h"
 #include "opt_ipsec.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/rss_config.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp_var.h>
 #include <netinet/icmp6.h>
 #include <netinet/ip_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/udplite.h>
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/in6_rss.h>
 #include <netinet6/udp6_var.h>
 #include <netinet6/scope6_var.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /* IPSEC */
 
 #include <security/mac/mac_framework.h>
 
 /*
  * UDP protocol implementation.
  * Per RFC 768, August, 1980.
  */
 
 extern struct protosw	inetsw[];
 static void		udp6_detach(struct socket *so);
 
 static int
 udp6_append(struct inpcb *inp, struct mbuf *n, int off,
     struct sockaddr_in6 *fromsa)
 {
 	struct socket *so;
 	struct mbuf *opts;
 	struct udpcb *up;
 
 	INP_LOCK_ASSERT(inp);
 
 	/*
 	 * Engage the tunneling protocol.
 	 */
 	up = intoudpcb(inp);
 	if (up->u_tun_func != NULL) {
 		in_pcbref(inp);
 		INP_RUNLOCK(inp);
 		(*up->u_tun_func)(n, off, inp, (struct sockaddr *)fromsa,
 		    up->u_tun_ctx);
 		INP_RLOCK(inp);
 		return (in_pcbrele_rlocked(inp));
 	}
 #ifdef IPSEC
 	/* Check AH/ESP integrity. */
 	if (ipsec6_in_reject(n, inp)) {
 		m_freem(n);
 		return (0);
 	}
 #endif /* IPSEC */
 #ifdef MAC
 	if (mac_inpcb_check_deliver(inp, n) != 0) {
 		m_freem(n);
 		return (0);
 	}
 #endif
 	opts = NULL;
 	if (inp->inp_flags & INP_CONTROLOPTS ||
 	    inp->inp_socket->so_options & SO_TIMESTAMP)
 		ip6_savecontrol(inp, n, &opts);
 	m_adj(n, off + sizeof(struct udphdr));
 
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)fromsa, n,
 	    opts) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		m_freem(n);
 		if (opts)
 			m_freem(opts);
 		UDPSTAT_INC(udps_fullsock);
 	} else
 		sorwakeup_locked(so);
 	return (0);
 }
 
 int
 udp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct ifnet *ifp;
 	struct ip6_hdr *ip6;
 	struct udphdr *uh;
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 	int off = *offp;
 	int cscov_partial;
 	int plen, ulen;
 	struct sockaddr_in6 fromsa;
 	struct m_tag *fwd_tag;
 	uint16_t uh_sum;
 	uint8_t nxt;
 
 	ifp = m->m_pkthdr.rcvif;
 	ip6 = mtod(m, struct ip6_hdr *);
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE);
 	ip6 = mtod(m, struct ip6_hdr *);
 	uh = (struct udphdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh));
 	if (!uh)
 		return (IPPROTO_DONE);
 #endif
 
 	UDPSTAT_INC(udps_ipackets);
 
 	/*
 	 * Destination port of 0 is illegal, based on RFC768.
 	 */
 	if (uh->uh_dport == 0)
 		goto badunlocked;
 
 	plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6);
 	ulen = ntohs((u_short)uh->uh_ulen);
 
 	nxt = proto;
 	cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0;
 	if (nxt == IPPROTO_UDPLITE) {
 		/* Zero means checksum over the complete packet. */
 		if (ulen == 0)
 			ulen = plen;
 		if (ulen == plen)
 			cscov_partial = 0;
 		if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) {
 			/* XXX: What is the right UDPLite MIB counter? */
 			goto badunlocked;
 		}
 		if (uh->uh_sum == 0) {
 			/* XXX: What is the right UDPLite MIB counter? */
 			goto badunlocked;
 		}
 	} else {
 		if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) {
 			UDPSTAT_INC(udps_badlen);
 			goto badunlocked;
 		}
 		if (uh->uh_sum == 0) {
 			UDPSTAT_INC(udps_nosum);
 			goto badunlocked;
 		}
 	}
 
 	if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) &&
 	    !cscov_partial) {
 		if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 			uh_sum = m->m_pkthdr.csum_data;
 		else
 			uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
 			    m->m_pkthdr.csum_data);
 		uh_sum ^= 0xffff;
 	} else
 		uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
 
 	if (uh_sum != 0) {
 		UDPSTAT_INC(udps_badsum);
 		goto badunlocked;
 	}
 
 	/*
 	 * Construct sockaddr format source address.
 	 */
 	init_sin6(&fromsa, m);
 	fromsa.sin6_port = uh->uh_sport;
 
 	pcbinfo = udp_get_inpcbinfo(nxt);
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		struct inpcb *last;
 		struct inpcbhead *pcblist;
 		struct ip6_moptions *imo;
 
 		INP_INFO_RLOCK(pcbinfo);
 		/*
 		 * In the event that laddr should be set to the link-local
 		 * address (this happens in RIPng), the multicast address
 		 * specified in the received packet will not match laddr.  To
 		 * handle this situation, matching is relaxed if the
 		 * receiving interface is the same as one specified in the
 		 * socket and if the destination multicast address matches
 		 * one of the multicast groups specified in the socket.
 		 */
 
 		/*
 		 * KAME note: traditionally we dropped udpiphdr from mbuf
 		 * here.  We need udphdr for IPsec processing so we do that
 		 * later.
 		 */
 		pcblist = udp_get_pcblist(nxt);
 		last = NULL;
 		LIST_FOREACH(inp, pcblist, inp_list) {
 			if ((inp->inp_vflag & INP_IPV6) == 0)
 				continue;
 			if (inp->inp_lport != uh->uh_dport)
 				continue;
 			if (inp->inp_fport != 0 &&
 			    inp->inp_fport != uh->uh_sport)
 				continue;
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
 				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr,
 							&ip6->ip6_dst))
 					continue;
 			}
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 				if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr,
 							&ip6->ip6_src) ||
 				    inp->inp_fport != uh->uh_sport)
 					continue;
 			}
 
 			/*
 			 * XXXRW: Because we weren't holding either the inpcb
 			 * or the hash lock when we checked for a match 
 			 * before, we should probably recheck now that the 
 			 * inpcb lock is (supposed to be) held.
 			 */
 
 			/*
 			 * Handle socket delivery policy for any-source
 			 * and source-specific multicast. [RFC3678]
 			 */
 			imo = inp->in6p_moptions;
 			if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 				struct sockaddr_in6	 mcaddr;
 				int			 blocked;
 
 				INP_RLOCK(inp);
 
 				bzero(&mcaddr, sizeof(struct sockaddr_in6));
 				mcaddr.sin6_len = sizeof(struct sockaddr_in6);
 				mcaddr.sin6_family = AF_INET6;
 				mcaddr.sin6_addr = ip6->ip6_dst;
 
 				blocked = im6o_mc_filter(imo, ifp,
 					(struct sockaddr *)&mcaddr,
 					(struct sockaddr *)&fromsa);
 				if (blocked != MCAST_PASS) {
 					if (blocked == MCAST_NOTGMEMBER)
 						IP6STAT_INC(ip6s_notmember);
 					if (blocked == MCAST_NOTSMEMBER ||
 					    blocked == MCAST_MUTED)
 						UDPSTAT_INC(udps_filtermcast);
 					INP_RUNLOCK(inp); /* XXX */
 					continue;
 				}
 
 				INP_RUNLOCK(inp);
 			}
 			if (last != NULL) {
 				struct mbuf *n;
 
 				if ((n = m_copy(m, 0, M_COPYALL)) != NULL) {
 					INP_RLOCK(last);
 					UDP_PROBE(receive, NULL, last, ip6,
 					    last, uh);
 					if (udp6_append(last, n, off, &fromsa))
 						goto inp_lost;
 					INP_RUNLOCK(last);
 				}
 			}
 			last = inp;
 			/*
 			 * Don't look for additional matches if this one does
 			 * not have either the SO_REUSEPORT or SO_REUSEADDR
 			 * socket options set.  This heuristic avoids
 			 * searching through all pcbs in the common case of a
 			 * non-shared port.  It assumes that an application
 			 * will never clear these options after setting them.
 			 */
 			if ((last->inp_socket->so_options &
 			     (SO_REUSEPORT|SO_REUSEADDR)) == 0)
 				break;
 		}
 
 		if (last == NULL) {
 			/*
 			 * No matching pcb found; discard datagram.  (No need
 			 * to send an ICMP Port Unreachable for a broadcast
 			 * or multicast datgram.)
 			 */
 			UDPSTAT_INC(udps_noport);
 			UDPSTAT_INC(udps_noportmcast);
 			goto badheadlocked;
 		}
 		INP_RLOCK(last);
 		INP_INFO_RUNLOCK(pcbinfo);
 		UDP_PROBE(receive, NULL, last, ip6, last, uh);
 		if (udp6_append(last, m, off, &fromsa) == 0) 
 			INP_RUNLOCK(last);
 	inp_lost:
 		return (IPPROTO_DONE);
 	}
 	/*
 	 * Locate pcb for datagram.
 	 */
 
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		struct sockaddr_in6 *next_hop6;
 
 		next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
 
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in6_pcblookup(pcbinfo, &ip6->ip6_src,
 			    uh->uh_sport, &next_hop6->sin6_addr,
 			    next_hop6->sin6_port ? htons(next_hop6->sin6_port) :
 			    uh->uh_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif);
 		}
 		/* Remove the tag from the packet. We don't need it anymore. */
 		m_tag_delete(m, fwd_tag);
 		m->m_flags &= ~M_IP6_NEXTHOP;
 	} else
 		inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src,
 		    uh->uh_sport, &ip6->ip6_dst, uh->uh_dport,
 		    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB,
 		    m->m_pkthdr.rcvif, m);
 	if (inp == NULL) {
 		if (udp_log_in_vain) {
 			char ip6bufs[INET6_ADDRSTRLEN];
 			char ip6bufd[INET6_ADDRSTRLEN];
 
 			log(LOG_INFO,
 			    "Connection attempt to UDP [%s]:%d from [%s]:%d\n",
 			    ip6_sprintf(ip6bufd, &ip6->ip6_dst),
 			    ntohs(uh->uh_dport),
 			    ip6_sprintf(ip6bufs, &ip6->ip6_src),
 			    ntohs(uh->uh_sport));
 		}
 		UDPSTAT_INC(udps_noport);
 		if (m->m_flags & M_MCAST) {
 			printf("UDP6: M_MCAST is set in a unicast packet.\n");
 			UDPSTAT_INC(udps_noportmcast);
 			goto badunlocked;
 		}
 		if (V_udp_blackhole)
 			goto badunlocked;
 		if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0)
 			goto badunlocked;
 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0);
 		return (IPPROTO_DONE);
 	}
 	INP_RLOCK_ASSERT(inp);
 	up = intoudpcb(inp);
 	if (cscov_partial) {
 		if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) {
 			INP_RUNLOCK(inp);
 			m_freem(m);
 			return (IPPROTO_DONE);
 		}
 	}
 	UDP_PROBE(receive, NULL, inp, ip6, inp, uh);
 	if (udp6_append(inp, m, off, &fromsa) == 0)
 		INP_RUNLOCK(inp);
 	return (IPPROTO_DONE);
 
 badheadlocked:
 	INP_INFO_RUNLOCK(pcbinfo);
 badunlocked:
 	if (m)
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 static void
 udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d,
     struct inpcbinfo *pcbinfo)
 {
 	struct udphdr uh;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	int off = 0;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	void *cmdarg;
 	struct inpcb *(*notify)(struct inpcb *, int) = udp_notify;
 	struct udp_portonly {
 		u_int16_t uh_sport;
 		u_int16_t uh_dport;
 	} *uhp;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	if ((unsigned)cmd >= PRC_NCMDS)
 		return;
 	if (PRC_IS_REDIRECT(cmd))
 		notify = in6_rtchange, d = NULL;
 	else if (cmd == PRC_HOSTDEAD)
 		d = NULL;
 	else if (inet6ctlerrmap[cmd] == 0)
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		cmdarg = ip6cp->ip6c_cmdarg;
 		sa6_src = ip6cp->ip6c_src;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		cmdarg = NULL;
 		sa6_src = &sa6_any;
 	}
 
 	if (ip6) {
 		/*
 		 * XXX: We assume that when IPV6 is non NULL,
 		 * M and OFF are valid.
 		 */
 
 		/* Check if we can safely examine src and dst ports. */
 		if (m->m_pkthdr.len < off + sizeof(*uhp))
 			return;
 
 		bzero(&uh, sizeof(uh));
 		m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh);
 
 		(void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport,
 		    (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd,
 		    cmdarg, notify);
 	} else
 		(void)in6_pcbnotify(pcbinfo, sa, 0,
 		    (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify);
 }
 
 void
 udp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 
 	return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo));
 }
 
 void
 udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 
 	return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo));
 }
 
 static int
 udp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 
 	if (req->newlen != sizeof(addrs))
 		return (EINVAL);
 	if (req->oldlen != sizeof(struct xucred))
 		return (EINVAL);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr,
 	    addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port,
 	    INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		INP_RLOCK_ASSERT(inp);
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseesocket(req->td->td_ucred,
 			    inp->inp_socket);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
     0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
 
 static int
 udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
     struct mbuf *control, struct thread *td)
 {
 	u_int32_t ulen = m->m_pkthdr.len;
 	u_int32_t plen = sizeof(struct udphdr) + ulen;
 	struct ip6_hdr *ip6;
 	struct udphdr *udp6;
 	struct in6_addr *laddr, *faddr, in6a;
 	struct sockaddr_in6 *sin6 = NULL;
-	struct ifnet *oifp = NULL;
 	int cscov_partial = 0;
 	int scope_ambiguous = 0;
 	u_short fport;
 	int error = 0;
 	uint8_t nxt;
 	uint16_t cscov = 0;
 	struct ip6_pktopts *optp, opt;
 	int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
 	int flags;
 	struct sockaddr_in6 tmp;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
 
 	if (addr6) {
 		/* addr6 has been validated in udp6_send(). */
 		sin6 = (struct sockaddr_in6 *)addr6;
 
 		/* protect *sin6 from overwrites */
 		tmp = *sin6;
 		sin6 = &tmp;
 
 		/*
 		 * Application should provide a proper zone ID or the use of
 		 * default zone IDs should be enabled.  Unfortunately, some
 		 * applications do not behave as it should, so we need a
 		 * workaround.  Even if an appropriate ID is not determined,
 		 * we'll see if we can determine the outgoing interface.  If we
 		 * can, determine the zone ID based on the interface below.
 		 */
 		if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
 			scope_ambiguous = 1;
 		if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
 			return (error);
 	}
 
 	nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 	    IPPROTO_UDP : IPPROTO_UDPLITE;
 	if (control) {
 		if ((error = ip6_setpktopts(control, &opt,
 		    inp->in6p_outputopts, td->td_ucred, nxt)) != 0)
 			goto release;
 		optp = &opt;
 	} else
 		optp = inp->in6p_outputopts;
 
 	if (sin6) {
 		faddr = &sin6->sin6_addr;
 
 		/*
 		 * Since we saw no essential reason for calling in_pcbconnect,
 		 * we get rid of such kind of logic, and call in6_selectsrc
 		 * and in6_pcbsetport in order to fill in the local address
 		 * and the local port.
 		 */
 		if (sin6->sin6_port == 0) {
 			error = EADDRNOTAVAIL;
 			goto release;
 		}
 
 		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			/* how about ::ffff:0.0.0.0 case? */
 			error = EISCONN;
 			goto release;
 		}
 
 		fport = sin6->sin6_port; /* allow 0 port */
 
 		if (IN6_IS_ADDR_V4MAPPED(faddr)) {
 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
 				/*
 				 * I believe we should explicitly discard the
 				 * packet when mapped addresses are disabled,
 				 * rather than send the packet as an IPv6 one.
 				 * If we chose the latter approach, the packet
 				 * might be sent out on the wire based on the
 				 * default route, the situation which we'd
 				 * probably want to avoid.
 				 * (20010421 jinmei@kame.net)
 				 */
 				error = EINVAL;
 				goto release;
 			}
 			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
 			    !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
 				/*
 				 * when remote addr is an IPv4-mapped address,
 				 * local addr should not be an IPv6 address,
 				 * since you cannot determine how to map IPv6
 				 * source address to IPv4.
 				 */
 				error = EINVAL;
 				goto release;
 			}
 
 			af = AF_INET;
 		}
 
 		if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
-			error = in6_selectsrc(sin6, optp, inp,
-			    td->td_ucred, &oifp, &in6a);
+			error = in6_selectsrc_socket(sin6, optp, inp,
+			    td->td_ucred, scope_ambiguous, &in6a, NULL);
 			if (error)
 				goto release;
-			if (oifp && scope_ambiguous &&
-			    (error = in6_setscope(&sin6->sin6_addr,
-			    oifp, NULL))) {
-				goto release;
-			}
 			laddr = &in6a;
 		} else
 			laddr = &inp->in6p_laddr;	/* XXX */
 		if (laddr == NULL) {
 			if (error == 0)
 				error = EADDRNOTAVAIL;
 			goto release;
 		}
 		if (inp->inp_lport == 0 &&
 		    (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) {
 			/* Undo an address bind that may have occurred. */
 			inp->in6p_laddr = in6addr_any;
 			goto release;
 		}
 	} else {
 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 			error = ENOTCONN;
 			goto release;
 		}
 		if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
 			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
 				/*
 				 * XXX: this case would happen when the
 				 * application sets the V6ONLY flag after
 				 * connecting the foreign address.
 				 * Such applications should be fixed,
 				 * so we bark here.
 				 */
 				log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
 				    "option was set for a connected socket\n");
 				error = EINVAL;
 				goto release;
 			} else
 				af = AF_INET;
 		}
 		laddr = &inp->in6p_laddr;
 		faddr = &inp->in6p_faddr;
 		fport = inp->inp_fport;
 	}
 
 	if (af == AF_INET)
 		hlen = sizeof(struct ip);
 
 	/*
 	 * Calculate data length and get a mbuf
 	 * for UDP and IP6 headers.
 	 */
 	M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto release;
 	}
 
 	/*
 	 * Stuff checksum and output datagram.
 	 */
 	udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
 	udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
 	udp6->uh_dport = fport;
 	if (nxt == IPPROTO_UDPLITE) {
 		struct udpcb *up;
 
 		up = intoudpcb(inp);
 		cscov = up->u_txcslen;
 		if (cscov >= plen)
 			cscov = 0;
 		udp6->uh_ulen = htons(cscov);
 		/*
 		 * For UDP-Lite, checksum coverage length of zero means
 		 * the entire UDPLite packet is covered by the checksum.
 		 */
 		cscov_partial = (cscov == 0) ? 0 : 1;
 	} else if (plen <= 0xffff)
 		udp6->uh_ulen = htons((u_short)plen);
 	else
 		udp6->uh_ulen = 0;
 	udp6->uh_sum = 0;
 
 	switch (af) {
 	case AF_INET6:
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6->ip6_flow	= inp->inp_flow & IPV6_FLOWINFO_MASK;
 		ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
 		ip6->ip6_vfc	|= IPV6_VERSION;
 		ip6->ip6_plen	= htons((u_short)plen);
 		ip6->ip6_nxt	= nxt;
 		ip6->ip6_hlim	= in6_selecthlim(inp, NULL);
 		ip6->ip6_src	= *laddr;
 		ip6->ip6_dst	= *faddr;
 
 		if (cscov_partial) {
 			if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
 			    sizeof(struct ip6_hdr), plen, cscov)) == 0)
 				udp6->uh_sum = 0xffff;
 		} else {
 			udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
 			m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
 			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
 		}
 
 #ifdef	RSS
 		{
 			uint32_t hash_val, hash_type;
 			uint8_t pr;
 
 			pr = inp->inp_socket->so_proto->pr_protocol;
 			/*
 			 * Calculate an appropriate RSS hash for UDP and
 			 * UDP Lite.
 			 *
 			 * The called function will take care of figuring out
 			 * whether a 2-tuple or 4-tuple hash is required based
 			 * on the currently configured scheme.
 			 *
 			 * Later later on connected socket values should be
 			 * cached in the inpcb and reused, rather than constantly
 			 * re-calculating it.
 			 *
 			 * UDP Lite is a different protocol number and will
 			 * likely end up being hashed as a 2-tuple until
 			 * RSS / NICs grow UDP Lite protocol awareness.
 			 */
 			if (rss_proto_software_hash_v6(faddr, laddr, fport,
 			    inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
 				m->m_pkthdr.flowid = hash_val;
 				M_HASHTYPE_SET(m, hash_type);
 			}
 		}
 #endif
 		flags = 0;
 #ifdef	RSS
 		/*
 		 * Don't override with the inp cached flowid.
 		 *
 		 * Until the whole UDP path is vetted, it may actually
 		 * be incorrect.
 		 */
 		flags |= IP_NODEFAULTFLOWID;
 #endif
 
 		UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
 		UDPSTAT_INC(udps_opackets);
 		error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
 		    NULL, inp);
 		break;
 	case AF_INET:
 		error = EAFNOSUPPORT;
 		goto release;
 	}
 	goto releaseopt;
 
 release:
 	m_freem(m);
 
 releaseopt:
 	if (control) {
 		ip6_clearpktopts(&opt, -1);
 		m_freem(control);
 	}
 	return (error);
 }
 
 static void
 udp6_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_abort: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 		uint8_t nxt;
 
 		nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 		    IPPROTO_UDP : IPPROTO_UDPLITE;
 		INP_WUNLOCK(inp);
 		pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 		(*pru->pru_abort)(so);
 		return;
 	}
 #endif
 
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		inp->in6p_laddr = in6addr_any;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp6_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("udp6_attach: inp != NULL"));
 
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = soreserve(so, udp_sendspace, udp_recvspace);
 		if (error)
 			return (error);
 	}
 	INP_INFO_WLOCK(pcbinfo);
 	error = in_pcballoc(so, pcbinfo);
 	if (error) {
 		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 	inp = (struct inpcb *)so->so_pcb;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
 		inp->inp_vflag |= INP_IPV4;
 	inp->in6p_hops = -1;	/* use kernel default */
 	inp->in6p_cksum = -1;	/* just to be sure */
 	/*
 	 * XXX: ugly!!
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
 
 	error = udp_newudpcb(inp);
 	if (error) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
 		INP_INFO_WUNLOCK(pcbinfo);
 		return (error);
 	}
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 	return (0);
 }
 
 static int
 udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_bind: inp == NULL"));
 
 	INP_WLOCK(inp);
 	INP_HASH_WLOCK(pcbinfo);
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		struct sockaddr_in6 *sin6_p;
 
 		sin6_p = (struct sockaddr_in6 *)nam;
 
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr))
 			inp->inp_vflag |= INP_IPV4;
 #ifdef INET
 		else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) {
 			struct sockaddr_in sin;
 
 			in6_sin6_2_sin(&sin, sin6_p);
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
 			error = in_pcbbind(inp, (struct sockaddr *)&sin,
 			    td->td_ucred);
 			goto out;
 		}
 #endif
 	}
 
 	error = in6_pcbbind(inp, nam, td->td_ucred);
 #ifdef INET
 out:
 #endif
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp6_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_close: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 		uint8_t nxt;
 
 		nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 		    IPPROTO_UDP : IPPROTO_UDPLITE;
 		INP_WUNLOCK(inp);
 		pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 		(*pru->pru_disconnect)(so);
 		return;
 	}
 #endif
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		INP_HASH_WLOCK(pcbinfo);
 		in6_pcbdisconnect(inp);
 		inp->in6p_laddr = in6addr_any;
 		INP_HASH_WUNLOCK(pcbinfo);
 		soisdisconnected(so);
 	}
 	INP_WUNLOCK(inp);
 }
 
 static int
 udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct sockaddr_in6 *sin6;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	sin6 = (struct sockaddr_in6 *)nam;
 	KASSERT(inp != NULL, ("udp6_connect: inp == NULL"));
 
 	/*
 	 * XXXRW: Need to clarify locking of v4/v6 flags.
 	 */
 	INP_WLOCK(inp);
 #ifdef INET
 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 		struct sockaddr_in sin;
 
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
 			error = EINVAL;
 			goto out;
 		}
 		if (inp->inp_faddr.s_addr != INADDR_ANY) {
 			error = EISCONN;
 			goto out;
 		}
 		in6_sin6_2_sin(&sin, sin6);
 		inp->inp_vflag |= INP_IPV4;
 		inp->inp_vflag &= ~INP_IPV6;
 		error = prison_remote_ip4(td->td_ucred, &sin.sin_addr);
 		if (error != 0)
 			goto out;
 		INP_HASH_WLOCK(pcbinfo);
 		error = in_pcbconnect(inp, (struct sockaddr *)&sin,
 		    td->td_ucred);
 		INP_HASH_WUNLOCK(pcbinfo);
 		if (error == 0)
 			soisconnected(so);
 		goto out;
 	}
 #endif
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		error = EISCONN;
 		goto out;
 	}
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr);
 	if (error != 0)
 		goto out;
 	INP_HASH_WLOCK(pcbinfo);
 	error = in6_pcbconnect(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(pcbinfo);
 	if (error == 0)
 		soisconnected(so);
 out:
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 static void
 udp6_detach(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	struct udpcb *up;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_detach: inp == NULL"));
 
 	INP_INFO_WLOCK(pcbinfo);
 	INP_WLOCK(inp);
 	up = intoudpcb(inp);
 	KASSERT(up != NULL, ("%s: up == NULL", __func__));
 	in_pcbdetach(inp);
 	in_pcbfree(inp);
 	INP_INFO_WUNLOCK(pcbinfo);
 	udp_discardcb(up);
 }
 
 static int
 udp6_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL"));
 
 	INP_WLOCK(inp);
 #ifdef INET
 	if (inp->inp_vflag & INP_IPV4) {
 		struct pr_usrreqs *pru;
 		uint8_t nxt;
 
 		nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
 		    IPPROTO_UDP : IPPROTO_UDPLITE;
 		INP_WUNLOCK(inp);
 		pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 		(void)(*pru->pru_disconnect)(so);
 		return (0);
 	}
 #endif
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
 		error = ENOTCONN;
 		goto out;
 	}
 
 	INP_HASH_WLOCK(pcbinfo);
 	in6_pcbdisconnect(inp);
 	inp->in6p_laddr = in6addr_any;
 	INP_HASH_WUNLOCK(pcbinfo);
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTED;		/* XXX */
 	SOCK_UNLOCK(so);
 out:
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 udp6_send(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 	struct inpcb *inp;
 	struct inpcbinfo *pcbinfo;
 	int error = 0;
 
 	pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
 
 	INP_WLOCK(inp);
 	if (addr) {
 		if (addr->sa_len != sizeof(struct sockaddr_in6)) {
 			error = EINVAL;
 			goto bad;
 		}
 		if (addr->sa_family != AF_INET6) {
 			error = EAFNOSUPPORT;
 			goto bad;
 		}
 	}
 
 #ifdef INET
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		int hasv4addr;
 		struct sockaddr_in6 *sin6 = 0;
 
 		if (addr == 0)
 			hasv4addr = (inp->inp_vflag & INP_IPV4);
 		else {
 			sin6 = (struct sockaddr_in6 *)addr;
 			hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
 			    ? 1 : 0;
 		}
 		if (hasv4addr) {
 			struct pr_usrreqs *pru;
 			uint8_t nxt;
 
 			nxt = (inp->inp_socket->so_proto->pr_protocol ==
 			    IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE;
 			/*
 			 * XXXRW: We release UDP-layer locks before calling
 			 * udp_send() in order to avoid recursion.  However,
 			 * this does mean there is a short window where inp's
 			 * fields are unstable.  Could this lead to a
 			 * potential race in which the factors causing us to
 			 * select the UDPv4 output routine are invalidated?
 			 */
 			INP_WUNLOCK(inp);
 			if (sin6)
 				in6_sin6_2_sin_in_sock(addr);
 			pru = inetsw[ip_protox[nxt]].pr_usrreqs;
 			/* addr will just be freed in sendit(). */
 			return ((*pru->pru_send)(so, flags, m, addr, control,
 			    td));
 		}
 	}
 #endif
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 	INP_HASH_WLOCK(pcbinfo);
 	error = udp6_output(inp, m, addr, control, td);
 	INP_HASH_WUNLOCK(pcbinfo);
 	INP_WUNLOCK(inp);
 	return (error);
 
 bad:
 	INP_WUNLOCK(inp);
 	m_freem(m);
 	return (error);
 }
 
 struct pr_usrreqs udp6_usrreqs = {
 	.pru_abort =		udp6_abort,
 	.pru_attach =		udp6_attach,
 	.pru_bind =		udp6_bind,
 	.pru_connect =		udp6_connect,
 	.pru_control =		in6_control,
 	.pru_detach =		udp6_detach,
 	.pru_disconnect =	udp6_disconnect,
 	.pru_peeraddr =		in6_mapped_peeraddr,
 	.pru_send =		udp6_send,
 	.pru_shutdown =		udp_shutdown,
 	.pru_sockaddr =		in6_mapped_sockaddr,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_sosend =		sosend_dgram,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		udp6_close
 };
Index: projects/clang380-import/sys/netpfil/ipfw/ip_fw_table.c
===================================================================
--- projects/clang380-import/sys/netpfil/ipfw/ip_fw_table.c	(revision 293686)
+++ projects/clang380-import/sys/netpfil/ipfw/ip_fw_table.c	(revision 293687)
@@ -1,3552 +1,3552 @@
 /*-
  * Copyright (c) 2004 Ruslan Ermilov and Vsevolod Lobko.
  * Copyright (c) 2014 Yandex LLC
  * Copyright (c) 2014 Alexander V. Chernikov
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Lookup table support for ipfw.
  *
  * This file contains handlers for all generic tables' operations:
  * add/del/flush entries, list/dump tables etc..
  *
  * Table data modification is protected by both UH and runtime lock
  * while reading configuration/data is protected by UH lock.
  *
  * Lookup algorithms for all table types are located in ip_fw_table_algo.c
  */
 
 #include "opt_ipfw.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/queue.h>
 #include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
 
 #include <netinet/in.h>
 #include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
 #include <netinet/ip_fw.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/ip_fw_table.h>
 
  /*
  * Table has the following `type` concepts:
  *
  * `no.type` represents lookup key type (addr, ifp, uid, etc..)
  * vmask represents bitmask of table values which are present at the moment.
  * Special IPFW_VTYPE_LEGACY ( (uint32_t)-1 ) represents old
  * single-value-for-all approach.
  */
 struct table_config {
 	struct named_object	no;
 	uint8_t		tflags;		/* type flags */
 	uint8_t		locked;		/* 1 if locked from changes */
 	uint8_t		linked;		/* 1 if already linked */
 	uint8_t		ochanged;	/* used by set swapping */
 	uint8_t		vshared;	/* 1 if using shared value array */
 	uint8_t		spare[3];
 	uint32_t	count;		/* Number of records */
 	uint32_t	limit;		/* Max number of records */
 	uint32_t	vmask;		/* bitmask with supported values */
 	uint32_t	ocount;		/* used by set swapping */
 	uint64_t	gencnt;		/* generation count */
 	char		tablename[64];	/* table name */
 	struct table_algo	*ta;	/* Callbacks for given algo */
 	void		*astate;	/* algorithm state */
 	struct table_info	ti_copy;	/* data to put to table_info */
 	struct namedobj_instance	*vi;
 };
 
 static int find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
     struct table_config **tc);
 static struct table_config *find_table(struct namedobj_instance *ni,
     struct tid_info *ti);
 static struct table_config *alloc_table_config(struct ip_fw_chain *ch,
     struct tid_info *ti, struct table_algo *ta, char *adata, uint8_t tflags);
 static void free_table_config(struct namedobj_instance *ni,
     struct table_config *tc);
 static int create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
     char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int ref);
 static void link_table(struct ip_fw_chain *ch, struct table_config *tc);
 static void unlink_table(struct ip_fw_chain *ch, struct table_config *tc);
 static int find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
     struct tentry_info *tei, uint32_t count, int op, struct table_config **ptc);
 #define	OP_ADD	1
 #define	OP_DEL	0
 static int export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
     struct sockopt_data *sd);
 static void export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
     ipfw_xtable_info *i);
 static int dump_table_tentry(void *e, void *arg);
 static int dump_table_xentry(void *e, void *arg);
 
 static int swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
     struct tid_info *b);
 
 static int check_table_name(const char *name);
 static int check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
     struct table_config *tc, struct table_info *ti, uint32_t count);
 static int destroy_table(struct ip_fw_chain *ch, struct tid_info *ti);
 
 static struct table_algo *find_table_algo(struct tables_config *tableconf,
     struct tid_info *ti, char *name);
 
 static void objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti);
 static void ntlv_to_ti(struct _ipfw_obj_ntlv *ntlv, struct tid_info *ti);
 
 #define	CHAIN_TO_NI(chain)	(CHAIN_TO_TCFG(chain)->namehash)
 #define	KIDX_TO_TI(ch, k)	(&(((struct table_info *)(ch)->tablestate)[k]))
 
 #define	TA_BUF_SZ	128	/* On-stack buffer for add/delete state */
 
 void
 rollback_toperation_state(struct ip_fw_chain *ch, void *object)
 {
 	struct tables_config *tcfg;
 	struct op_state *os;
 
 	tcfg = CHAIN_TO_TCFG(ch);
 	TAILQ_FOREACH(os, &tcfg->state_list, next)
 		os->func(object, os);
 }
 
 void
 add_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
 {
 	struct tables_config *tcfg;
 
 	tcfg = CHAIN_TO_TCFG(ch);
 	TAILQ_INSERT_HEAD(&tcfg->state_list, &ts->opstate, next);
 }
 
 void
 del_toperation_state(struct ip_fw_chain *ch, struct tableop_state *ts)
 {
 	struct tables_config *tcfg;
 
 	tcfg = CHAIN_TO_TCFG(ch);
 	TAILQ_REMOVE(&tcfg->state_list, &ts->opstate, next);
 }
 
 void
 tc_ref(struct table_config *tc)
 {
 
 	tc->no.refcnt++;
 }
 
 void
 tc_unref(struct table_config *tc)
 {
 
 	tc->no.refcnt--;
 }
 
 static struct table_value *
 get_table_value(struct ip_fw_chain *ch, struct table_config *tc, uint32_t kidx)
 {
 	struct table_value *pval;
 
 	pval = (struct table_value *)ch->valuestate;
 
 	return (&pval[kidx]);
 }
 
 
 /*
  * Checks if we're able to insert/update entry @tei into table
  * w.r.t @tc limits.
  * May alter @tei to indicate insertion error / insert
  * options.
  *
  * Returns 0 if operation can be performed/
  */
 static int
 check_table_limit(struct table_config *tc, struct tentry_info *tei)
 {
 
 	if (tc->limit == 0 || tc->count < tc->limit)
 		return (0);
 
 	if ((tei->flags & TEI_FLAGS_UPDATE) == 0) {
 		/* Notify userland on error cause */
 		tei->flags |= TEI_FLAGS_LIMIT;
 		return (EFBIG);
 	}
 
 	/*
 	 * We have UPDATE flag set.
 	 * Permit updating record (if found),
 	 * but restrict adding new one since we've
 	 * already hit the limit.
 	 */
 	tei->flags |= TEI_FLAGS_DONTADD;
 
 	return (0);
 }
 
 /*
  * Convert algorithm callback return code into
  * one of pre-defined states known by userland.
  */
 static void
 store_tei_result(struct tentry_info *tei, int op, int error, uint32_t num)
 {
 	int flag;
 
 	flag = 0;
 
 	switch (error) {
 	case 0:
 		if (op == OP_ADD && num != 0)
 			flag = TEI_FLAGS_ADDED;
 		if (op == OP_DEL)
 			flag = TEI_FLAGS_DELETED;
 		break;
 	case ENOENT:
 		flag = TEI_FLAGS_NOTFOUND;
 		break;
 	case EEXIST:
 		flag = TEI_FLAGS_EXISTS;
 		break;
 	default:
 		flag = TEI_FLAGS_ERROR;
 	}
 
 	tei->flags |= flag;
 }
 
 /*
  * Creates and references table with default parameters.
  * Saves table config, algo and allocated kidx info @ptc, @pta and
  * @pkidx if non-zero.
  * Used for table auto-creation to support old binaries.
  *
  * Returns 0 on success.
  */
 static int
 create_table_compat(struct ip_fw_chain *ch, struct tid_info *ti,
     uint16_t *pkidx)
 {
 	ipfw_xtable_info xi;
 	int error;
 
 	memset(&xi, 0, sizeof(xi));
 	/* Set default value mask for legacy clients */
 	xi.vmask = IPFW_VTYPE_LEGACY;
 
 	error = create_table_internal(ch, ti, NULL, &xi, pkidx, 1);
 	if (error != 0)
 		return (error);
 
 	return (0);
 }
 
 /*
  * Find and reference existing table optionally
  * creating new one.
  *
  * Saves found table config into @ptc.
  * Note function may drop/acquire UH_WLOCK.
  * Returns 0 if table was found/created and referenced
  * or non-zero return code.
  */
 static int
 find_ref_table(struct ip_fw_chain *ch, struct tid_info *ti,
     struct tentry_info *tei, uint32_t count, int op,
     struct table_config **ptc)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 	uint16_t kidx;
 	int error;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	ni = CHAIN_TO_NI(ch);
 	tc = NULL;
 	if ((tc = find_table(ni, ti)) != NULL) {
 		/* check table type */
 		if (tc->no.subtype != ti->type)
 			return (EINVAL);
 
 		if (tc->locked != 0)
 			return (EACCES);
 
 		/* Try to exit early on limit hit */
 		if (op == OP_ADD && count == 1 &&
 		    check_table_limit(tc, tei) != 0)
 			return (EFBIG);
 
 		/* Reference and return */
 		tc->no.refcnt++;
 		*ptc = tc;
 		return (0);
 	}
 
 	if (op == OP_DEL)
 		return (ESRCH);
 
 	/* Compability mode: create new table for old clients */
 	if ((tei->flags & TEI_FLAGS_COMPAT) == 0)
 		return (ESRCH);
 
 	IPFW_UH_WUNLOCK(ch);
 	error = create_table_compat(ch, ti, &kidx);
 	IPFW_UH_WLOCK(ch);
 	
 	if (error != 0)
 		return (error);
 
 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
 	KASSERT(tc != NULL, ("create_table_compat returned bad idx %d", kidx));
 
 	/* OK, now we've got referenced table. */
 	*ptc = tc;
 	return (0);
 }
 
 /*
  * Rolls back already @added to @tc entries using state array @ta_buf_m.
  * Assume the following layout:
  * 1) ADD state (ta_buf_m[0] ... t_buf_m[added - 1]) for handling update cases
  * 2) DEL state (ta_buf_m[count[ ... t_buf_m[count + added - 1])
  *   for storing deleted state
  */
 static void
 rollback_added_entries(struct ip_fw_chain *ch, struct table_config *tc,
     struct table_info *tinfo, struct tentry_info *tei, caddr_t ta_buf_m,
     uint32_t count, uint32_t added)
 {
 	struct table_algo *ta;
 	struct tentry_info *ptei;
 	caddr_t v, vv;
 	size_t ta_buf_sz;
 	int error, i;
 	uint32_t num;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	ta = tc->ta;
 	ta_buf_sz = ta->ta_buf_size;
 	v = ta_buf_m;
 	vv = v + count * ta_buf_sz;
 	for (i = 0; i < added; i++, v += ta_buf_sz, vv += ta_buf_sz) {
 		ptei = &tei[i];
 		if ((ptei->flags & TEI_FLAGS_UPDATED) != 0) {
 
 			/*
 			 * We have old value stored by previous
 			 * call in @ptei->value. Do add once again
 			 * to restore it.
 			 */
 			error = ta->add(tc->astate, tinfo, ptei, v, &num);
 			KASSERT(error == 0, ("rollback UPDATE fail"));
 			KASSERT(num == 0, ("rollback UPDATE fail2"));
 			continue;
 		}
 
 		error = ta->prepare_del(ch, ptei, vv);
 		KASSERT(error == 0, ("pre-rollback INSERT failed"));
 		error = ta->del(tc->astate, tinfo, ptei, vv, &num);
 		KASSERT(error == 0, ("rollback INSERT failed"));
 		tc->count -= num;
 	}
 }
 
 /*
  * Prepares add/del state for all @count entries in @tei.
  * Uses either stack buffer (@ta_buf) or allocates a new one.
  * Stores pointer to allocated buffer back to @ta_buf.
  *
  * Returns 0 on success.
  */
 static int
 prepare_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
     struct tentry_info *tei, uint32_t count, int op, caddr_t *ta_buf)
 {
 	caddr_t ta_buf_m, v;
 	size_t ta_buf_sz, sz;
 	struct tentry_info *ptei;
 	int error, i;
 
 	error = 0;
 	ta_buf_sz = ta->ta_buf_size;
 	if (count == 1) {
 		/* Sigle add/delete, use on-stack buffer */
 		memset(*ta_buf, 0, TA_BUF_SZ);
 		ta_buf_m = *ta_buf;
 	} else {
 
 		/*
 		 * Multiple adds/deletes, allocate larger buffer
 		 *
 		 * Note we need 2xcount buffer for add case:
 		 * we have hold both ADD state
 		 * and DELETE state (this may be needed
 		 * if we need to rollback all changes)
 		 */
 		sz = count * ta_buf_sz;
 		ta_buf_m = malloc((op == OP_ADD) ? sz * 2 : sz, M_TEMP,
 		    M_WAITOK | M_ZERO);
 	}
 
 	v = ta_buf_m;
 	for (i = 0; i < count; i++, v += ta_buf_sz) {
 		ptei = &tei[i];
 		error = (op == OP_ADD) ?
 		    ta->prepare_add(ch, ptei, v) : ta->prepare_del(ch, ptei, v);
 
 		/*
 		 * Some syntax error (incorrect mask, or address, or
 		 * anything). Return error regardless of atomicity
 		 * settings.
 		 */
 		if (error != 0)
 			break;
 	}
 
 	*ta_buf = ta_buf_m;
 	return (error);
 }
 
 /*
  * Flushes allocated state for each @count entries in @tei.
  * Frees @ta_buf_m if differs from stack buffer @ta_buf.
  */
 static void
 flush_batch_buffer(struct ip_fw_chain *ch, struct table_algo *ta,
     struct tentry_info *tei, uint32_t count, int rollback,
     caddr_t ta_buf_m, caddr_t ta_buf)
 {
 	caddr_t v;
 	struct tentry_info *ptei;
 	size_t ta_buf_sz;
 	int i;
 
 	ta_buf_sz = ta->ta_buf_size;
 
 	/* Run cleaning callback anyway */
 	v = ta_buf_m;
 	for (i = 0; i < count; i++, v += ta_buf_sz) {
 		ptei = &tei[i];
 		ta->flush_entry(ch, ptei, v);
 		if (ptei->ptv != NULL) {
 			free(ptei->ptv, M_IPFW);
 			ptei->ptv = NULL;
 		}
 	}
 
 	/* Clean up "deleted" state in case of rollback */
 	if (rollback != 0) {
 		v = ta_buf_m + count * ta_buf_sz;
 		for (i = 0; i < count; i++, v += ta_buf_sz)
 			ta->flush_entry(ch, &tei[i], v);
 	}
 
 	if (ta_buf_m != ta_buf)
 		free(ta_buf_m, M_TEMP);
 }
 
 
 static void
 rollback_add_entry(void *object, struct op_state *_state)
 {
 	struct ip_fw_chain *ch;
 	struct tableop_state *ts;
 
 	ts = (struct tableop_state *)_state;
 
 	if (ts->tc != object && ts->ch != object)
 		return;
 
 	ch = ts->ch;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	/* Call specifid unlockers */
 	rollback_table_values(ts);
 
 	/* Indicate we've called */
 	ts->modified = 1;
 }
 
 /*
  * Adds/updates one or more entries in table @ti.
  *
  * Function may drop/reacquire UH wlock multiple times due to
  * items alloc, algorithm callbacks (check_space), value linkage
  * (new values, value storage realloc), etc..
  * Other processes like other adds (which may involve storage resize),
  * table swaps (which changes table data and may change algo type),
  * table modify (which may change value mask) may be executed
  * simultaneously so we need to deal with it.
  *
  * The following approach was implemented:
  * we have per-chain linked list, protected with UH lock.
  * add_table_entry prepares special on-stack structure wthich is passed
  * to its descendants. Users add this structure to this list before unlock.
  * After performing needed operations and acquiring UH lock back, each user
  * checks if structure has changed. If true, it rolls local state back and
  * returns without error to the caller.
  * add_table_entry() on its own checks if structure has changed and restarts
  * its operation from the beginning (goto restart).
  *
  * Functions which are modifying fields of interest (currently
  *   resize_shared_value_storage() and swap_tables() )
  * traverses given list while holding UH lock immediately before
  * performing their operations calling function provided be list entry
  * ( currently rollback_add_entry  ) which performs rollback for all necessary
  * state and sets appropriate values in structure indicating rollback
  * has happened.
  *
  * Algo interaction:
  * Function references @ti first to ensure table won't
  * disappear or change its type.
  * After that, prepare_add callback is called for each @tei entry.
  * Next, we try to add each entry under UH+WHLOCK
  * using add() callback.
  * Finally, we free all state by calling flush_entry callback
  * for each @tei.
  *
  * Returns 0 on success.
  */
 int
 add_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
     struct tentry_info *tei, uint8_t flags, uint32_t count)
 {
 	struct table_config *tc;
 	struct table_algo *ta;
 	uint16_t kidx;
 	int error, first_error, i, rollback;
 	uint32_t num, numadd;
 	struct tentry_info *ptei;
 	struct tableop_state ts;
 	char ta_buf[TA_BUF_SZ];
 	caddr_t ta_buf_m, v;
 
 	memset(&ts, 0, sizeof(ts));
 	ta = NULL;
 	IPFW_UH_WLOCK(ch);
 
 	/*
 	 * Find and reference existing table.
 	 */
 restart:
 	if (ts.modified != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		flush_batch_buffer(ch, ta, tei, count, rollback,
 		    ta_buf_m, ta_buf);
 		memset(&ts, 0, sizeof(ts));
 		ta = NULL;
 		IPFW_UH_WLOCK(ch);
 	}
 
 	error = find_ref_table(ch, ti, tei, count, OP_ADD, &tc);
 	if (error != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (error);
 	}
 	ta = tc->ta;
 
 	/* Fill in tablestate */
 	ts.ch = ch;
 	ts.opstate.func = rollback_add_entry;
 	ts.tc = tc;
 	ts.vshared = tc->vshared;
 	ts.vmask = tc->vmask;
 	ts.ta = ta;
 	ts.tei = tei;
 	ts.count = count;
 	rollback = 0;
 	add_toperation_state(ch, &ts);
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Allocate memory and prepare record(s) */
 	/* Pass stack buffer by default */
 	ta_buf_m = ta_buf;
 	error = prepare_batch_buffer(ch, ta, tei, count, OP_ADD, &ta_buf_m);
 
 	IPFW_UH_WLOCK(ch);
 	del_toperation_state(ch, &ts);
 	/* Drop reference we've used in first search */
 	tc->no.refcnt--;
 
 	/* Check prepare_batch_buffer() error */
 	if (error != 0)
 		goto cleanup;
 
 	/*
 	 * Check if table swap has happened.
 	 * (so table algo might be changed).
 	 * Restart operation to achieve consistent behavior.
 	 */
 	if (ts.modified != 0)
 		goto restart;
 
 	/*
 	 * Link all values values to shared/per-table value array.
 	 *
 	 * May release/reacquire UH_WLOCK.
 	 */
 	error = ipfw_link_table_values(ch, &ts);
 	if (error != 0)
 		goto cleanup;
 	if (ts.modified != 0)
 		goto restart;
 
 	/*
 	 * Ensure we are able to add all entries without additional
 	 * memory allocations. May release/reacquire UH_WLOCK.
 	 */
 	kidx = tc->no.kidx;
 	error = check_table_space(ch, &ts, tc, KIDX_TO_TI(ch, kidx), count);
 	if (error != 0)
 		goto cleanup;
 	if (ts.modified != 0)
 		goto restart;
 
 	/* We've got valid table in @tc. Let's try to add data */
 	kidx = tc->no.kidx;
 	ta = tc->ta;
 	numadd = 0;
 	first_error = 0;
 
 	IPFW_WLOCK(ch);
 
 	v = ta_buf_m;
 	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
 		ptei = &tei[i];
 		num = 0;
 		/* check limit before adding */
 		if ((error = check_table_limit(tc, ptei)) == 0) {
 			error = ta->add(tc->astate, KIDX_TO_TI(ch, kidx),
 			    ptei, v, &num);
 			/* Set status flag to inform userland */
 			store_tei_result(ptei, OP_ADD, error, num);
 		}
 		if (error == 0) {
 			/* Update number of records to ease limit checking */
 			tc->count += num;
 			numadd += num;
 			continue;
 		}
 
 		if (first_error == 0)
 			first_error = error;
 
 		/*
 		 * Some error have happened. Check our atomicity
 		 * settings: continue if atomicity is not required,
 		 * rollback changes otherwise.
 		 */
 		if ((flags & IPFW_CTF_ATOMIC) == 0)
 			continue;
 
 		rollback_added_entries(ch, tc, KIDX_TO_TI(ch, kidx),
 		    tei, ta_buf_m, count, i);
 
 		rollback = 1;
 		break;
 	}
 
 	IPFW_WUNLOCK(ch);
 
 	ipfw_garbage_table_values(ch, tc, tei, count, rollback);
 
 	/* Permit post-add algorithm grow/rehash. */
 	if (numadd != 0)
 		check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
 
 	/* Return first error to user, if any */
 	error = first_error;
 
 cleanup:
 	IPFW_UH_WUNLOCK(ch);
 
 	flush_batch_buffer(ch, ta, tei, count, rollback, ta_buf_m, ta_buf);
 	
 	return (error);
 }
 
 /*
  * Deletes one or more entries in table @ti.
  *
  * Returns 0 on success.
  */
 int
 del_table_entry(struct ip_fw_chain *ch, struct tid_info *ti,
     struct tentry_info *tei, uint8_t flags, uint32_t count)
 {
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct tentry_info *ptei;
 	uint16_t kidx;
 	int error, first_error, i;
 	uint32_t num, numdel;
 	char ta_buf[TA_BUF_SZ];
 	caddr_t ta_buf_m, v;
 
 	/*
 	 * Find and reference existing table.
 	 */
 	IPFW_UH_WLOCK(ch);
 	error = find_ref_table(ch, ti, tei, count, OP_DEL, &tc);
 	if (error != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (error);
 	}
 	ta = tc->ta;
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Allocate memory and prepare record(s) */
 	/* Pass stack buffer by default */
 	ta_buf_m = ta_buf;
 	error = prepare_batch_buffer(ch, ta, tei, count, OP_DEL, &ta_buf_m);
 	if (error != 0)
 		goto cleanup;
 
 	IPFW_UH_WLOCK(ch);
 
 	/* Drop reference we've used in first search */
 	tc->no.refcnt--;
 
 	/*
 	 * Check if table algo is still the same.
 	 * (changed ta may be the result of table swap).
 	 */
 	if (ta != tc->ta) {
 		IPFW_UH_WUNLOCK(ch);
 		error = EINVAL;
 		goto cleanup;
 	}
 
 	kidx = tc->no.kidx;
 	numdel = 0;
 	first_error = 0;
 
 	IPFW_WLOCK(ch);
 	v = ta_buf_m;
 	for (i = 0; i < count; i++, v += ta->ta_buf_size) {
 		ptei = &tei[i];
 		num = 0;
 		error = ta->del(tc->astate, KIDX_TO_TI(ch, kidx), ptei, v,
 		    &num);
 		/* Save state for userland */
 		store_tei_result(ptei, OP_DEL, error, num);
 		if (error != 0 && first_error == 0)
 			first_error = error;
 		tc->count -= num;
 		numdel += num;
 	}
 	IPFW_WUNLOCK(ch);
 
 	/* Unlink non-used values */
 	ipfw_garbage_table_values(ch, tc, tei, count, 0);
 
 	if (numdel != 0) {
 		/* Run post-del hook to permit shrinking */
 		check_table_space(ch, NULL, tc, KIDX_TO_TI(ch, kidx), 0);
 	}
 
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Return first error to user, if any */
 	error = first_error;
 
 cleanup:
 	flush_batch_buffer(ch, ta, tei, count, 0, ta_buf_m, ta_buf);
 
 	return (error);
 }
 
 /*
  * Ensure that table @tc has enough space to add @count entries without
  * need for reallocation.
  *
  * Callbacks order:
  * 0) need_modify() (UH_WLOCK) - checks if @count items can be added w/o resize.
  *
  * 1) alloc_modify (no locks, M_WAITOK) - alloc new state based on @pflags.
  * 2) prepare_modifyt (UH_WLOCK) - copy old data into new storage
  * 3) modify (UH_WLOCK + WLOCK) - switch pointers
  * 4) flush_modify (UH_WLOCK) - free state, if needed
  *
  * Returns 0 on success.
  */
 static int
 check_table_space(struct ip_fw_chain *ch, struct tableop_state *ts,
     struct table_config *tc, struct table_info *ti, uint32_t count)
 {
 	struct table_algo *ta;
 	uint64_t pflags;
 	char ta_buf[TA_BUF_SZ];
 	int error;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	error = 0;
 	ta = tc->ta;
 	if (ta->need_modify == NULL)
 		return (0);
 
 	/* Acquire reference not to loose @tc between locks/unlocks */
 	tc->no.refcnt++;
 
 	/*
 	 * TODO: think about avoiding race between large add/large delete
 	 * operation on algorithm which implements shrinking along with
 	 * growing.
 	 */
 	while (true) {
 		pflags = 0;
 		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
 			error = 0;
 			break;
 		}
 
 		/* We have to shrink/grow table */
 		if (ts != NULL)
 			add_toperation_state(ch, ts);
 		IPFW_UH_WUNLOCK(ch);
 
 		memset(&ta_buf, 0, sizeof(ta_buf));
 		error = ta->prepare_mod(ta_buf, &pflags);
 
 		IPFW_UH_WLOCK(ch);
 		if (ts != NULL)
 			del_toperation_state(ch, ts);
 
 		if (error != 0)
 			break;
 
 		if (ts != NULL && ts->modified != 0) {
 
 			/*
 			 * Swap operation has happened
 			 * so we're currently operating on other
 			 * table data. Stop doing this.
 			 */
 			ta->flush_mod(ta_buf);
 			break;
 		}
 
 		/* Check if we still need to alter table */
 		ti = KIDX_TO_TI(ch, tc->no.kidx);
 		if (ta->need_modify(tc->astate, ti, count, &pflags) == 0) {
 			IPFW_UH_WUNLOCK(ch);
 
 			/*
 			 * Other thread has already performed resize.
 			 * Flush our state and return.
 			 */
 			ta->flush_mod(ta_buf);
 			break;
 		}
 	
 		error = ta->fill_mod(tc->astate, ti, ta_buf, &pflags);
 		if (error == 0) {
 			/* Do actual modification */
 			IPFW_WLOCK(ch);
 			ta->modify(tc->astate, ti, ta_buf, pflags);
 			IPFW_WUNLOCK(ch);
 		}
 
 		/* Anyway, flush data and retry */
 		ta->flush_mod(ta_buf);
 	}
 
 	tc->no.refcnt--;
 	return (error);
 }
 
 /*
  * Adds or deletes record in table.
  * Data layout (v0):
  * Request: [ ip_fw3_opheader ipfw_table_xentry ]
  *
  * Returns 0 on success
  */
 static int
 manage_table_ent_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_table_xentry *xent;
 	struct tentry_info tei;
 	struct tid_info ti;
 	struct table_value v;
 	int error, hdrlen, read;
 
 	hdrlen = offsetof(ipfw_table_xentry, k);
 
 	/* Check minimum header size */
 	if (sd->valsize < (sizeof(*op3) + hdrlen))
 		return (EINVAL);
 
 	read = sizeof(ip_fw3_opheader);
 
 	/* Check if xentry len field is valid */
 	xent = (ipfw_table_xentry *)(op3 + 1);
 	if (xent->len < hdrlen || xent->len + read > sd->valsize)
 		return (EINVAL);
 	
 	memset(&tei, 0, sizeof(tei));
 	tei.paddr = &xent->k;
 	tei.masklen = xent->masklen;
 	ipfw_import_table_value_legacy(xent->value, &v);
 	tei.pvalue = &v;
 	/* Old requests compability */
 	tei.flags = TEI_FLAGS_COMPAT;
 	if (xent->type == IPFW_TABLE_ADDR) {
 		if (xent->len - hdrlen == sizeof(in_addr_t))
 			tei.subtype = AF_INET;
 		else
 			tei.subtype = AF_INET6;
 	}
 
 	memset(&ti, 0, sizeof(ti));
 	ti.uidx = xent->tbl;
 	ti.type = xent->type;
 
 	error = (op3->opcode == IP_FW_TABLE_XADD) ?
 	    add_table_entry(ch, &ti, &tei, 0, 1) :
 	    del_table_entry(ch, &ti, &tei, 0, 1);
 
 	return (error);
 }
 
 /*
  * Adds or deletes record in table.
  * Data layout (v1)(current):
  * Request: [ ipfw_obj_header
  *   ipfw_obj_ctlv(IPFW_TLV_TBLENT_LIST) [ ipfw_obj_tentry x N ]
  * ]
  *
  * Returns 0 on success
  */
 static int
 manage_table_ent_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_tentry *tent, *ptent;
 	ipfw_obj_ctlv *ctlv;
 	ipfw_obj_header *oh;
 	struct tentry_info *ptei, tei, *tei_buf;
 	struct tid_info ti;
 	int error, i, kidx, read;
 
 	/* Check minimum header size */
 	if (sd->valsize < (sizeof(*oh) + sizeof(*ctlv)))
 		return (EINVAL);
 
 	/* Check if passed data is too long */
 	if (sd->valsize != sd->kavail)
 		return (EINVAL);
 
 	oh = (ipfw_obj_header *)sd->kbuf;
 
 	/* Basic length checks for TLVs */
 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
 		return (EINVAL);
 
 	read = sizeof(*oh);
 
 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
 	if (ctlv->head.length + read != sd->valsize)
 		return (EINVAL);
 
 	read += sizeof(*ctlv);
 	tent = (ipfw_obj_tentry *)(ctlv + 1);
 	if (ctlv->count * sizeof(*tent) + read != sd->valsize)
 		return (EINVAL);
 
 	if (ctlv->count == 0)
 		return (0);
 
 	/*
 	 * Mark entire buffer as "read".
 	 * This instructs sopt api write it back
 	 * after function return.
 	 */
 	ipfw_get_sopt_header(sd, sd->valsize);
 
 	/* Perform basic checks for each entry */
 	ptent = tent;
 	kidx = tent->idx;
 	for (i = 0; i < ctlv->count; i++, ptent++) {
 		if (ptent->head.length != sizeof(*ptent))
 			return (EINVAL);
 		if (ptent->idx != kidx)
 			return (ENOTSUP);
 	}
 
 	/* Convert data into kernel request objects */
 	objheader_to_ti(oh, &ti);
 	ti.type = oh->ntlv.type;
 	ti.uidx = kidx;
 
 	/* Use on-stack buffer for single add/del */
 	if (ctlv->count == 1) {
 		memset(&tei, 0, sizeof(tei));
 		tei_buf = &tei;
 	} else
 		tei_buf = malloc(ctlv->count * sizeof(tei), M_TEMP,
 		    M_WAITOK | M_ZERO);
 
 	ptei = tei_buf;
 	ptent = tent;
 	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
 		ptei->paddr = &ptent->k;
 		ptei->subtype = ptent->subtype;
 		ptei->masklen = ptent->masklen;
 		if (ptent->head.flags & IPFW_TF_UPDATE)
 			ptei->flags |= TEI_FLAGS_UPDATE;
 
 		ipfw_import_table_value_v1(&ptent->v.value);
 		ptei->pvalue = (struct table_value *)&ptent->v.value;
 	}
 
 	error = (oh->opheader.opcode == IP_FW_TABLE_XADD) ?
 	    add_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count) :
 	    del_table_entry(ch, &ti, tei_buf, ctlv->flags, ctlv->count);
 
 	/* Translate result back to userland */
 	ptei = tei_buf;
 	ptent = tent;
 	for (i = 0; i < ctlv->count; i++, ptent++, ptei++) {
 		if (ptei->flags & TEI_FLAGS_ADDED)
 			ptent->result = IPFW_TR_ADDED;
 		else if (ptei->flags & TEI_FLAGS_DELETED)
 			ptent->result = IPFW_TR_DELETED;
 		else if (ptei->flags & TEI_FLAGS_UPDATED)
 			ptent->result = IPFW_TR_UPDATED;
 		else if (ptei->flags & TEI_FLAGS_LIMIT)
 			ptent->result = IPFW_TR_LIMIT;
 		else if (ptei->flags & TEI_FLAGS_ERROR)
 			ptent->result = IPFW_TR_ERROR;
 		else if (ptei->flags & TEI_FLAGS_NOTFOUND)
 			ptent->result = IPFW_TR_NOTFOUND;
 		else if (ptei->flags & TEI_FLAGS_EXISTS)
 			ptent->result = IPFW_TR_EXISTS;
 		ipfw_export_table_value_v1(ptei->pvalue, &ptent->v.value);
 	}
 
 	if (tei_buf != &tei)
 		free(tei_buf, M_TEMP);
 
 	return (error);
 }
 
 /*
  * Looks up an entry in given table.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_obj_tentry ]
  * Reply: [ ipfw_obj_header ipfw_obj_tentry ]
  *
  * Returns 0 on success
  */
 static int
 find_table_entry(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_obj_tentry *tent;
 	ipfw_obj_header *oh;
 	struct tid_info ti;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct table_info *kti;
 	struct namedobj_instance *ni;
 	int error;
 	size_t sz;
 
 	/* Check minimum header size */
 	sz = sizeof(*oh) + sizeof(*tent);
 	if (sd->valsize != sz)
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 	tent = (ipfw_obj_tentry *)(oh + 1);
 
 	/* Basic length checks for TLVs */
 	if (oh->ntlv.head.length != sizeof(oh->ntlv))
 		return (EINVAL);
 
 	objheader_to_ti(oh, &ti);
 	ti.type = oh->ntlv.type;
 	ti.uidx = tent->idx;
 
 	IPFW_UH_RLOCK(ch);
 	ni = CHAIN_TO_NI(ch);
 
 	/*
 	 * Find existing table and check its type .
 	 */
 	ta = NULL;
 	if ((tc = find_table(ni, &ti)) == NULL) {
 		IPFW_UH_RUNLOCK(ch);
 		return (ESRCH);
 	}
 
 	/* check table type */
 	if (tc->no.subtype != ti.type) {
 		IPFW_UH_RUNLOCK(ch);
 		return (EINVAL);
 	}
 
 	kti = KIDX_TO_TI(ch, tc->no.kidx);
 	ta = tc->ta;
 
 	if (ta->find_tentry == NULL)
 		return (ENOTSUP);
 
 	error = ta->find_tentry(tc->astate, kti, tent);
 
 	IPFW_UH_RUNLOCK(ch);
 
 	return (error);
 }
 
 /*
  * Flushes all entries or destroys given table.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ]
  *
  * Returns 0 on success
  */
 static int
 flush_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	int error;
 	struct _ipfw_obj_header *oh;
 	struct tid_info ti;
 
 	if (sd->valsize != sizeof(*oh))
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)op3;
 	objheader_to_ti(oh, &ti);
 
 	if (op3->opcode == IP_FW_TABLE_XDESTROY)
 		error = destroy_table(ch, &ti);
 	else if (op3->opcode == IP_FW_TABLE_XFLUSH)
 		error = flush_table(ch, &ti);
 	else
 		return (ENOTSUP);
 
 	return (error);
 }
 
 static void
 restart_flush(void *object, struct op_state *_state)
 {
 	struct tableop_state *ts;
 
 	ts = (struct tableop_state *)_state;
 
 	if (ts->tc != object)
 		return;
 
 	/* Indicate we've called */
 	ts->modified = 1;
 }
 
 /*
  * Flushes given table.
  *
  * Function create new table instance with the same
  * parameters, swaps it with old one and
  * flushes state without holding runtime WLOCK.
  *
  * Returns 0 on success.
  */
 int
 flush_table(struct ip_fw_chain *ch, struct tid_info *ti)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct table_info ti_old, ti_new, *tablestate;
 	void *astate_old, *astate_new;
 	char algostate[64], *pstate;
 	struct tableop_state ts;
 	int error, need_gc;
 	uint16_t kidx;
 	uint8_t tflags;
 
 	/*
 	 * Stage 1: save table algoritm.
 	 * Reference found table to ensure it won't disappear.
 	 */
 	IPFW_UH_WLOCK(ch);
 	ni = CHAIN_TO_NI(ch);
 	if ((tc = find_table(ni, ti)) == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return (ESRCH);
 	}
 	need_gc = 0;
 	astate_new = NULL;
 	memset(&ti_new, 0, sizeof(ti_new));
 restart:
 	/* Set up swap handler */
 	memset(&ts, 0, sizeof(ts));
 	ts.opstate.func = restart_flush;
 	ts.tc = tc;
 
 	ta = tc->ta;
 	/* Do not flush readonly tables */
 	if ((ta->flags & TA_FLAG_READONLY) != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EACCES);
 	}
 	/* Save startup algo parameters */
 	if (ta->print_config != NULL) {
 		ta->print_config(tc->astate, KIDX_TO_TI(ch, tc->no.kidx),
 		    algostate, sizeof(algostate));
 		pstate = algostate;
 	} else
 		pstate = NULL;
 	tflags = tc->tflags;
 	tc->no.refcnt++;
 	add_toperation_state(ch, &ts);
 	IPFW_UH_WUNLOCK(ch);
 
 	/*
 	 * Stage 1.5: if this is not the first attempt, destroy previous state
 	 */
 	if (need_gc != 0) {
 		ta->destroy(astate_new, &ti_new);
 		need_gc = 0;
 	}
 
 	/*
 	 * Stage 2: allocate new table instance using same algo.
 	 */
 	memset(&ti_new, 0, sizeof(struct table_info));
 	error = ta->init(ch, &astate_new, &ti_new, pstate, tflags);
 
 	/*
 	 * Stage 3: swap old state pointers with newly-allocated ones.
 	 * Decrease refcount.
 	 */
 	IPFW_UH_WLOCK(ch);
 	tc->no.refcnt--;
 	del_toperation_state(ch, &ts);
 
 	if (error != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (error);
 	}
 
 	/*
 	 * Restart operation if table swap has happened:
 	 * even if algo may be the same, algo init parameters
 	 * may change. Restart operation instead of doing
 	 * complex checks.
 	 */
 	if (ts.modified != 0) {
 		/* Delay destroying data since we're holding UH lock */
 		need_gc = 1;
 		goto restart;
 	}
 
 	ni = CHAIN_TO_NI(ch);
 	kidx = tc->no.kidx;
 	tablestate = (struct table_info *)ch->tablestate;
 
 	IPFW_WLOCK(ch);
 	ti_old = tablestate[kidx];
 	tablestate[kidx] = ti_new;
 	IPFW_WUNLOCK(ch);
 
 	astate_old = tc->astate;
 	tc->astate = astate_new;
 	tc->ti_copy = ti_new;
 	tc->count = 0;
 
 	/* Notify algo on real @ti address */
 	if (ta->change_ti != NULL)
 		ta->change_ti(tc->astate, &tablestate[kidx]);
 
 	/*
 	 * Stage 4: unref values.
 	 */
 	ipfw_unref_table_values(ch, tc, ta, astate_old, &ti_old);
 	IPFW_UH_WUNLOCK(ch);
 
 	/*
 	 * Stage 5: perform real flush/destroy.
 	 */
 	ta->destroy(astate_old, &ti_old);
 
 	return (0);
 }
 
 /*
  * Swaps two tables.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_obj_ntlv ]
  *
  * Returns 0 on success
  */
 static int
 swap_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	int error;
 	struct _ipfw_obj_header *oh;
 	struct tid_info ti_a, ti_b;
 
 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_obj_ntlv))
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)op3;
 	ntlv_to_ti(&oh->ntlv, &ti_a);
 	ntlv_to_ti((ipfw_obj_ntlv *)(oh + 1), &ti_b);
 
 	error = swap_tables(ch, &ti_a, &ti_b);
 
 	return (error);
 }
 
 /*
  * Swaps two tables of the same type/valtype.
  *
  * Checks if tables are compatible and limits
  * permits swap, than actually perform swap.
  *
  * Each table consists of 2 different parts:
  * config:
  *   @tc (with name, set, kidx) and rule bindings, which is "stable".
  *   number of items
  *   table algo
  * runtime:
  *   runtime data @ti (ch->tablestate)
  *   runtime cache in @tc
  *   algo-specific data (@tc->astate)
  *
  * So we switch:
  *  all runtime data
  *   number of items
  *   table algo
  *
  * After that we call @ti change handler for each table.
  *
  * Note that referencing @tc won't protect tc->ta from change.
  * XXX: Do we need to restrict swap between locked tables?
  * XXX: Do we need to exchange ftype?
  *
  * Returns 0 on success.
  */
 static int
 swap_tables(struct ip_fw_chain *ch, struct tid_info *a,
     struct tid_info *b)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc_a, *tc_b;
 	struct table_algo *ta;
 	struct table_info ti, *tablestate;
 	void *astate;
 	uint32_t count;
 
 	/*
 	 * Stage 1: find both tables and ensure they are of
 	 * the same type.
 	 */
 	IPFW_UH_WLOCK(ch);
 	ni = CHAIN_TO_NI(ch);
 	if ((tc_a = find_table(ni, a)) == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return (ESRCH);
 	}
 	if ((tc_b = find_table(ni, b)) == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return (ESRCH);
 	}
 
 	/* It is very easy to swap between the same table */
 	if (tc_a == tc_b) {
 		IPFW_UH_WUNLOCK(ch);
 		return (0);
 	}
 
 	/* Check type and value are the same */
 	if (tc_a->no.subtype!=tc_b->no.subtype || tc_a->tflags!=tc_b->tflags) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EINVAL);
 	}
 
 	/* Check limits before swap */
 	if ((tc_a->limit != 0 && tc_b->count > tc_a->limit) ||
 	    (tc_b->limit != 0 && tc_a->count > tc_b->limit)) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EFBIG);
 	}
 
 	/* Check if one of the tables is readonly */
 	if (((tc_a->ta->flags | tc_b->ta->flags) & TA_FLAG_READONLY) != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EACCES);
 	}
 
 	/* Notify we're going to swap */
 	rollback_toperation_state(ch, tc_a);
 	rollback_toperation_state(ch, tc_b);
 
 	/* Everything is fine, prepare to swap */
 	tablestate = (struct table_info *)ch->tablestate;
 	ti = tablestate[tc_a->no.kidx];
 	ta = tc_a->ta;
 	astate = tc_a->astate;
 	count = tc_a->count;
 
 	IPFW_WLOCK(ch);
 	/* a <- b */
 	tablestate[tc_a->no.kidx] = tablestate[tc_b->no.kidx];
 	tc_a->ta = tc_b->ta;
 	tc_a->astate = tc_b->astate;
 	tc_a->count = tc_b->count;
 	/* b <- a */
 	tablestate[tc_b->no.kidx] = ti;
 	tc_b->ta = ta;
 	tc_b->astate = astate;
 	tc_b->count = count;
 	IPFW_WUNLOCK(ch);
 
 	/* Ensure tc.ti copies are in sync */
 	tc_a->ti_copy = tablestate[tc_a->no.kidx];
 	tc_b->ti_copy = tablestate[tc_b->no.kidx];
 
 	/* Notify both tables on @ti change */
 	if (tc_a->ta->change_ti != NULL)
 		tc_a->ta->change_ti(tc_a->astate, &tablestate[tc_a->no.kidx]);
 	if (tc_b->ta->change_ti != NULL)
 		tc_b->ta->change_ti(tc_b->astate, &tablestate[tc_b->no.kidx]);
 
 	IPFW_UH_WUNLOCK(ch);
 
 	return (0);
 }
 
 /*
  * Destroys table specified by @ti.
  * Data layout (v0)(current):
  * Request: [ ip_fw3_opheader ]
  *
  * Returns 0 on success
  */
 static int
 destroy_table(struct ip_fw_chain *ch, struct tid_info *ti)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 
 	IPFW_UH_WLOCK(ch);
 
 	ni = CHAIN_TO_NI(ch);
 	if ((tc = find_table(ni, ti)) == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return (ESRCH);
 	}
 
 	/* Do not permit destroying referenced tables */
 	if (tc->no.refcnt > 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EBUSY);
 	}
 
 	IPFW_WLOCK(ch);
 	unlink_table(ch, tc);
 	IPFW_WUNLOCK(ch);
 
 	/* Free obj index */
 	if (ipfw_objhash_free_idx(ni, tc->no.kidx) != 0)
 		printf("Error unlinking kidx %d from table %s\n",
 		    tc->no.kidx, tc->tablename);
 
 	/* Unref values used in tables while holding UH lock */
 	ipfw_unref_table_values(ch, tc, tc->ta, tc->astate, &tc->ti_copy);
 	IPFW_UH_WUNLOCK(ch);
 
 	free_table_config(ni, tc);
 
 	return (0);
 }
 
 static uint32_t
 roundup2p(uint32_t v)
 {
 
 	v--;
 	v |= v >> 1;
 	v |= v >> 2;
 	v |= v >> 4;
 	v |= v >> 8;
 	v |= v >> 16;
 	v++;
 
 	return (v);
 }
 
 /*
  * Grow tables index.
  *
  * Returns 0 on success.
  */
 int
 ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables)
 {
 	unsigned int ntables_old, tbl;
 	struct namedobj_instance *ni;
 	void *new_idx, *old_tablestate, *tablestate;
 	struct table_info *ti;
 	struct table_config *tc;
 	int i, new_blocks;
 
 	/* Check new value for validity */
 	if (ntables == 0)
 		return (EINVAL);
 	if (ntables > IPFW_TABLES_MAX)
 		ntables = IPFW_TABLES_MAX;
 	/* Alight to nearest power of 2 */
 	ntables = (unsigned int)roundup2p(ntables); 
 
 	/* Allocate new pointers */
 	tablestate = malloc(ntables * sizeof(struct table_info),
 	    M_IPFW, M_WAITOK | M_ZERO);
 
 	ipfw_objhash_bitmap_alloc(ntables, (void *)&new_idx, &new_blocks);
 
 	IPFW_UH_WLOCK(ch);
 
 	tbl = (ntables >= V_fw_tables_max) ? V_fw_tables_max : ntables;
 	ni = CHAIN_TO_NI(ch);
 
 	/* Temporary restrict decreasing max_tables */
 	if (ntables < V_fw_tables_max) {
 
 		/*
 		 * FIXME: Check if we really can shrink
 		 */
 		IPFW_UH_WUNLOCK(ch);
 		return (EINVAL);
 	}
 
 	/* Copy table info/indices */
 	memcpy(tablestate, ch->tablestate, sizeof(struct table_info) * tbl);
 	ipfw_objhash_bitmap_merge(ni, &new_idx, &new_blocks);
 
 	IPFW_WLOCK(ch);
 
 	/* Change pointers */
 	old_tablestate = ch->tablestate;
 	ch->tablestate = tablestate;
 	ipfw_objhash_bitmap_swap(ni, &new_idx, &new_blocks);
 
 	ntables_old = V_fw_tables_max;
 	V_fw_tables_max = ntables;
 
 	IPFW_WUNLOCK(ch);
 
 	/* Notify all consumers that their @ti pointer has changed */
 	ti = (struct table_info *)ch->tablestate;
 	for (i = 0; i < tbl; i++, ti++) {
 		if (ti->lookup == NULL)
 			continue;
 		tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, i);
 		if (tc == NULL || tc->ta->change_ti == NULL)
 			continue;
 
 		tc->ta->change_ti(tc->astate, ti);
 	}
 
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Free old pointers */
 	free(old_tablestate, M_IPFW);
 	ipfw_objhash_bitmap_free(new_idx, new_blocks);
 
 	return (0);
 }
 
 /*
  * Switch between "set 0" and "rule's set" table binding,
  * Check all ruleset bindings and permits changing
  * IFF each binding has both rule AND table in default set (set 0).
  *
  * Returns 0 on success.
  */
 int
 ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int sets)
 {
 	struct namedobj_instance *ni;
 	struct named_object *no;
 	struct ip_fw *rule;
 	ipfw_insn *cmd;
 	int cmdlen, i, l;
 	uint16_t kidx;
 
 	IPFW_UH_WLOCK(ch);
 
 	if (V_fw_tables_sets == sets) {
 		IPFW_UH_WUNLOCK(ch);
 		return (0);
 	}
 
 	ni = CHAIN_TO_NI(ch);
 
 	/*
 	 * Scan all rules and examine tables opcodes.
 	 */
 	for (i = 0; i < ch->n_rules; i++) {
 		rule = ch->map[i];
 
 		l = rule->cmd_len;
 		cmd = rule->cmd;
 		cmdlen = 0;
 		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 			cmdlen = F_LEN(cmd);
 
 			if (classify_opcode_kidx(cmd, &kidx) != 0)
 				continue;
 
 			no = ipfw_objhash_lookup_kidx(ni, kidx);
 
 			/* Check if both table object and rule has the set 0 */
 			if (no->set != 0 || rule->set != 0) {
 				IPFW_UH_WUNLOCK(ch);
 				return (EBUSY);
 			}
 
 		}
 	}
 	V_fw_tables_sets = sets;
 
 	IPFW_UH_WUNLOCK(ch);
 
 	return (0);
 }
 
 /*
  * Lookup an IP @addr in table @tbl.
  * Stores found value in @val.
  *
  * Returns 1 if @addr was found.
  */
 int
 ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
     uint32_t *val)
 {
 	struct table_info *ti;
 
 	ti = KIDX_TO_TI(ch, tbl);
 
 	return (ti->lookup(ti, &addr, sizeof(in_addr_t), val));
 }
 
 /*
  * Lookup an arbtrary key @paddr of legth @plen in table @tbl.
  * Stores found value in @val.
  *
  * Returns 1 if key was found.
  */
 int
 ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
     void *paddr, uint32_t *val)
 {
 	struct table_info *ti;
 
 	ti = KIDX_TO_TI(ch, tbl);
 
 	return (ti->lookup(ti, paddr, plen, val));
 }
 
 /*
  * Info/List/dump support for tables.
  *
  */
 
 /*
  * High-level 'get' cmds sysctl handlers
  */
 
 /*
  * Lists all tables currently available in kernel.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
  * Reply: [ ipfw_obj_lheader ipfw_xtable_info x N ]
  *
  * Returns 0 on success
  */
 static int
 list_tables(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_lheader *olh;
 	int error;
 
 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
 	if (olh == NULL)
 		return (EINVAL);
 	if (sd->valsize < olh->size)
 		return (EINVAL);
 
 	IPFW_UH_RLOCK(ch);
 	error = export_tables(ch, olh, sd);
 	IPFW_UH_RUNLOCK(ch);
 
 	return (error);
 }
 
 /*
  * Store table info to buffer provided by @sd.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_xtable_info(empty)]
  * Reply: [ ipfw_obj_header ipfw_xtable_info ]
  *
  * Returns 0 on success.
  */
 static int
 describe_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_header *oh;
 	struct table_config *tc;
 	struct tid_info ti;
 	size_t sz;
 
 	sz = sizeof(*oh) + sizeof(ipfw_xtable_info);
 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 	if (oh == NULL)
 		return (EINVAL);
 
 	objheader_to_ti(oh, &ti);
 
 	IPFW_UH_RLOCK(ch);
 	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
 		IPFW_UH_RUNLOCK(ch);
 		return (ESRCH);
 	}
 
 	export_table_info(ch, tc, (ipfw_xtable_info *)(oh + 1));
 	IPFW_UH_RUNLOCK(ch);
 
 	return (0);
 }
 
 /*
  * Modifies existing table.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_xtable_info ]
  *
  * Returns 0 on success
  */
 static int
 modify_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_header *oh;
 	ipfw_xtable_info *i;
 	char *tname;
 	struct tid_info ti;
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 
 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)sd->kbuf;
 	i = (ipfw_xtable_info *)(oh + 1);
 
 	/*
 	 * Verify user-supplied strings.
 	 * Check for null-terminated/zero-length strings/
 	 */
 	tname = oh->ntlv.name;
 	if (check_table_name(tname) != 0)
 		return (EINVAL);
 
 	objheader_to_ti(oh, &ti);
 	ti.type = i->type;
 
 	IPFW_UH_WLOCK(ch);
 	ni = CHAIN_TO_NI(ch);
 	if ((tc = find_table(ni, &ti)) == NULL) {
 		IPFW_UH_WUNLOCK(ch);
 		return (ESRCH);
 	}
 
 	/* Do not support any modifications for readonly tables */
 	if ((tc->ta->flags & TA_FLAG_READONLY) != 0) {
 		IPFW_UH_WUNLOCK(ch);
 		return (EACCES);
 	}
 
 	if ((i->mflags & IPFW_TMFLAGS_LIMIT) != 0)
 		tc->limit = i->limit;
 	if ((i->mflags & IPFW_TMFLAGS_LOCK) != 0)
 		tc->locked = ((i->flags & IPFW_TGFLAGS_LOCKED) != 0);
 	IPFW_UH_WUNLOCK(ch);
 
 	return (0);
 }
 
 /*
  * Creates new table.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_header ipfw_xtable_info ]
  *
  * Returns 0 on success
  */
 static int
 create_table(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_header *oh;
 	ipfw_xtable_info *i;
 	char *tname, *aname;
 	struct tid_info ti;
 	struct namedobj_instance *ni;
 
 	if (sd->valsize != sizeof(*oh) + sizeof(ipfw_xtable_info))
 		return (EINVAL);
 
 	oh = (struct _ipfw_obj_header *)sd->kbuf;
 	i = (ipfw_xtable_info *)(oh + 1);
 
 	/*
 	 * Verify user-supplied strings.
 	 * Check for null-terminated/zero-length strings/
 	 */
 	tname = oh->ntlv.name;
 	aname = i->algoname;
 	if (check_table_name(tname) != 0 ||
 	    strnlen(aname, sizeof(i->algoname)) == sizeof(i->algoname))
 		return (EINVAL);
 
 	if (aname[0] == '\0') {
 		/* Use default algorithm */
 		aname = NULL;
 	}
 
 	objheader_to_ti(oh, &ti);
 	ti.type = i->type;
 
 	ni = CHAIN_TO_NI(ch);
 
 	IPFW_UH_RLOCK(ch);
 	if (find_table(ni, &ti) != NULL) {
 		IPFW_UH_RUNLOCK(ch);
 		return (EEXIST);
 	}
 	IPFW_UH_RUNLOCK(ch);
 
 	return (create_table_internal(ch, &ti, aname, i, NULL, 0));
 }
 
 /*
  * Creates new table based on @ti and @aname.
  *
  * Relies on table name checking inside find_name_tlv()
  * Assume @aname to be checked and valid.
  * Stores allocated table kidx inside @pkidx (if non-NULL).
  * Reference created table if @compat is non-zero.
  *
  * Returns 0 on success.
  */
 static int
 create_table_internal(struct ip_fw_chain *ch, struct tid_info *ti,
     char *aname, ipfw_xtable_info *i, uint16_t *pkidx, int compat)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc, *tc_new, *tmp;
 	struct table_algo *ta;
 	uint16_t kidx;
 
 	ni = CHAIN_TO_NI(ch);
 
 	ta = find_table_algo(CHAIN_TO_TCFG(ch), ti, aname);
 	if (ta == NULL)
 		return (ENOTSUP);
 
 	tc = alloc_table_config(ch, ti, ta, aname, i->tflags);
 	if (tc == NULL)
 		return (ENOMEM);
 
 	tc->vmask = i->vmask;
 	tc->limit = i->limit;
 	if (ta->flags & TA_FLAG_READONLY)
 		tc->locked = 1;
 	else
 		tc->locked = (i->flags & IPFW_TGFLAGS_LOCKED) != 0;
 
 	IPFW_UH_WLOCK(ch);
 
 	/* Check if table has been already created */
 	tc_new = find_table(ni, ti);
 	if (tc_new != NULL) {
 
 		/*
 		 * Compat: do not fail if we're
 		 * requesting to create existing table
 		 * which has the same type
 		 */
 		if (compat == 0 || tc_new->no.subtype != tc->no.subtype) {
 			IPFW_UH_WUNLOCK(ch);
 			free_table_config(ni, tc);
 			return (EEXIST);
 		}
 
 		/* Exchange tc and tc_new for proper refcounting & freeing */
 		tmp = tc;
 		tc = tc_new;
 		tc_new = tmp;
 	} else {
 		/* New table */
 		if (ipfw_objhash_alloc_idx(ni, &kidx) != 0) {
 			IPFW_UH_WUNLOCK(ch);
 			printf("Unable to allocate table index."
 			    " Consider increasing net.inet.ip.fw.tables_max");
 			free_table_config(ni, tc);
 			return (EBUSY);
 		}
 		tc->no.kidx = kidx;
 		tc->no.etlv = IPFW_TLV_TBL_NAME;
 
 		IPFW_WLOCK(ch);
 		link_table(ch, tc);
 		IPFW_WUNLOCK(ch);
 	}
 
 	if (compat != 0)
 		tc->no.refcnt++;
 	if (pkidx != NULL)
 		*pkidx = tc->no.kidx;
 
 	IPFW_UH_WUNLOCK(ch);
 
 	if (tc_new != NULL)
 		free_table_config(ni, tc_new);
 
 	return (0);
 }
 
 static void
 ntlv_to_ti(ipfw_obj_ntlv *ntlv, struct tid_info *ti)
 {
 
 	memset(ti, 0, sizeof(struct tid_info));
 	ti->set = ntlv->set;
 	ti->uidx = ntlv->idx;
 	ti->tlvs = ntlv;
 	ti->tlen = ntlv->head.length;
 }
 
 static void
 objheader_to_ti(struct _ipfw_obj_header *oh, struct tid_info *ti)
 {
 
 	ntlv_to_ti(&oh->ntlv, ti);
 }
 
 struct namedobj_instance *
 ipfw_get_table_objhash(struct ip_fw_chain *ch)
 {
 
 	return (CHAIN_TO_NI(ch));
 }
 
 /*
  * Exports basic table info as name TLV.
  * Used inside dump_static_rules() to provide info
  * about all tables referenced by current ruleset.
  *
  * Returns 0 on success.
  */
 int
 ipfw_export_table_ntlv(struct ip_fw_chain *ch, uint16_t kidx,
     struct sockopt_data *sd)
 {
 	struct namedobj_instance *ni;
 	struct named_object *no;
 	ipfw_obj_ntlv *ntlv;
 
 	ni = CHAIN_TO_NI(ch);
 
 	no = ipfw_objhash_lookup_kidx(ni, kidx);
 	KASSERT(no != NULL, ("invalid table kidx passed"));
 
 	ntlv = (ipfw_obj_ntlv *)ipfw_get_sopt_space(sd, sizeof(*ntlv));
 	if (ntlv == NULL)
 		return (ENOMEM);
 
 	ntlv->head.type = IPFW_TLV_TBL_NAME;
 	ntlv->head.length = sizeof(*ntlv);
 	ntlv->idx = no->kidx;
 	strlcpy(ntlv->name, no->name, sizeof(ntlv->name));
 
 	return (0);
 }
 
 struct dump_args {
 	struct ip_fw_chain *ch;
 	struct table_info *ti;
 	struct table_config *tc;
 	struct sockopt_data *sd;
 	uint32_t cnt;
 	uint16_t uidx;
 	int error;
 	uint32_t size;
 	ipfw_table_entry *ent;
 	ta_foreach_f *f;
 	void *farg;
 	ipfw_obj_tentry tent;
 };
 
 static int
 count_ext_entries(void *e, void *arg)
 {
 	struct dump_args *da;
 
 	da = (struct dump_args *)arg;
 	da->cnt++;
 
 	return (0);
 }
 
 /*
  * Gets number of items from table either using
  * internal counter or calling algo callback for
  * externally-managed tables.
  *
  * Returns number of records.
  */
 static uint32_t
 table_get_count(struct ip_fw_chain *ch, struct table_config *tc)
 {
 	struct table_info *ti;
 	struct table_algo *ta;
 	struct dump_args da;
 
 	ti = KIDX_TO_TI(ch, tc->no.kidx);
 	ta = tc->ta;
 
 	/* Use internal counter for self-managed tables */
 	if ((ta->flags & TA_FLAG_READONLY) == 0)
 		return (tc->count);
 
 	/* Use callback to quickly get number of items */
 	if ((ta->flags & TA_FLAG_EXTCOUNTER) != 0)
 		return (ta->get_count(tc->astate, ti));
 
 	/* Count number of iterms ourselves */
 	memset(&da, 0, sizeof(da));
 	ta->foreach(tc->astate, ti, count_ext_entries, &da);
 
 	return (da.cnt);
 }
 
 /*
  * Exports table @tc info into standard ipfw_xtable_info format.
  */
 static void
 export_table_info(struct ip_fw_chain *ch, struct table_config *tc,
     ipfw_xtable_info *i)
 {
 	struct table_info *ti;
 	struct table_algo *ta;
 	
 	i->type = tc->no.subtype;
 	i->tflags = tc->tflags;
 	i->vmask = tc->vmask;
 	i->set = tc->no.set;
 	i->kidx = tc->no.kidx;
 	i->refcnt = tc->no.refcnt;
 	i->count = table_get_count(ch, tc);
 	i->limit = tc->limit;
 	i->flags |= (tc->locked != 0) ? IPFW_TGFLAGS_LOCKED : 0;
-	i->size = tc->count * sizeof(ipfw_obj_tentry);
+	i->size = i->count * sizeof(ipfw_obj_tentry);
 	i->size += sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
 	strlcpy(i->tablename, tc->tablename, sizeof(i->tablename));
 	ti = KIDX_TO_TI(ch, tc->no.kidx);
 	ta = tc->ta;
 	if (ta->print_config != NULL) {
 		/* Use algo function to print table config to string */
 		ta->print_config(tc->astate, ti, i->algoname,
 		    sizeof(i->algoname));
 	} else
 		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
 	/* Dump algo-specific data, if possible */
 	if (ta->dump_tinfo != NULL) {
 		ta->dump_tinfo(tc->astate, ti, &i->ta_info);
 		i->ta_info.flags |= IPFW_TATFLAGS_DATA;
 	}
 }
 
 struct dump_table_args {
 	struct ip_fw_chain *ch;
 	struct sockopt_data *sd;
 };
 
 static void
 export_table_internal(struct namedobj_instance *ni, struct named_object *no,
     void *arg)
 {
 	ipfw_xtable_info *i;
 	struct dump_table_args *dta;
 
 	dta = (struct dump_table_args *)arg;
 
 	i = (ipfw_xtable_info *)ipfw_get_sopt_space(dta->sd, sizeof(*i));
 	KASSERT(i != 0, ("previously checked buffer is not enough"));
 
 	export_table_info(dta->ch, (struct table_config *)no, i);
 }
 
 /*
  * Export all tables as ipfw_xtable_info structures to
  * storage provided by @sd.
  *
  * If supplied buffer is too small, fills in required size
  * and returns ENOMEM.
  * Returns 0 on success.
  */
 static int
 export_tables(struct ip_fw_chain *ch, ipfw_obj_lheader *olh,
     struct sockopt_data *sd)
 {
 	uint32_t size;
 	uint32_t count;
 	struct dump_table_args dta;
 
 	count = ipfw_objhash_count(CHAIN_TO_NI(ch));
 	size = count * sizeof(ipfw_xtable_info) + sizeof(ipfw_obj_lheader);
 
 	/* Fill in header regadless of buffer size */
 	olh->count = count;
 	olh->objsize = sizeof(ipfw_xtable_info);
 
 	if (size > olh->size) {
 		olh->size = size;
 		return (ENOMEM);
 	}
 
 	olh->size = size;
 
 	dta.ch = ch;
 	dta.sd = sd;
 
 	ipfw_objhash_foreach(CHAIN_TO_NI(ch), export_table_internal, &dta);
 
 	return (0);
 }
 
 /*
  * Dumps all table data
  * Data layout (v1)(current):
  * Request: [ ipfw_obj_header ], size = ipfw_xtable_info.size
  * Reply: [ ipfw_obj_header ipfw_xtable_info ipfw_obj_tentry x N ]
  *
  * Returns 0 on success
  */
 static int
 dump_table_v1(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_header *oh;
 	ipfw_xtable_info *i;
 	struct tid_info ti;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct dump_args da;
 	uint32_t sz;
 
 	sz = sizeof(ipfw_obj_header) + sizeof(ipfw_xtable_info);
 	oh = (struct _ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
 	if (oh == NULL)
 		return (EINVAL);
 
 	i = (ipfw_xtable_info *)(oh + 1);
 	objheader_to_ti(oh, &ti);
 
 	IPFW_UH_RLOCK(ch);
 	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
 		IPFW_UH_RUNLOCK(ch);
 		return (ESRCH);
 	}
 	export_table_info(ch, tc, i);
 
 	if (sd->valsize < i->size) {
 
 		/*
 		 * Submitted buffer size is not enough.
 		 * WE've already filled in @i structure with
 		 * relevant table info including size, so we
 		 * can return. Buffer will be flushed automatically.
 		 */
 		IPFW_UH_RUNLOCK(ch);
 		return (ENOMEM);
 	}
 
 	/*
 	 * Do the actual dump in eXtended format
 	 */
 	memset(&da, 0, sizeof(da));
 	da.ch = ch;
 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 	da.tc = tc;
 	da.sd = sd;
 
 	ta = tc->ta;
 
 	ta->foreach(tc->astate, da.ti, dump_table_tentry, &da);
 	IPFW_UH_RUNLOCK(ch);
 
 	return (da.error);
 }
 
 /*
  * Dumps all table data
  * Data layout (version 0)(legacy):
  * Request: [ ipfw_xtable ], size = IP_FW_TABLE_XGETSIZE()
  * Reply: [ ipfw_xtable ipfw_table_xentry x N ]
  *
  * Returns 0 on success
  */
 static int
 dump_table_v0(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	ipfw_xtable *xtbl;
 	struct tid_info ti;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct dump_args da;
 	size_t sz, count;
 
 	xtbl = (ipfw_xtable *)ipfw_get_sopt_header(sd, sizeof(ipfw_xtable));
 	if (xtbl == NULL)
 		return (EINVAL);
 
 	memset(&ti, 0, sizeof(ti));
 	ti.uidx = xtbl->tbl;
 	
 	IPFW_UH_RLOCK(ch);
 	if ((tc = find_table(CHAIN_TO_NI(ch), &ti)) == NULL) {
 		IPFW_UH_RUNLOCK(ch);
 		return (0);
 	}
 	count = table_get_count(ch, tc);
 	sz = count * sizeof(ipfw_table_xentry) + sizeof(ipfw_xtable);
 
 	xtbl->cnt = count;
 	xtbl->size = sz;
 	xtbl->type = tc->no.subtype;
 	xtbl->tbl = ti.uidx;
 
 	if (sd->valsize < sz) {
 
 		/*
 		 * Submitted buffer size is not enough.
 		 * WE've already filled in @i structure with
 		 * relevant table info including size, so we
 		 * can return. Buffer will be flushed automatically.
 		 */
 		IPFW_UH_RUNLOCK(ch);
 		return (ENOMEM);
 	}
 
 	/* Do the actual dump in eXtended format */
 	memset(&da, 0, sizeof(da));
 	da.ch = ch;
 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 	da.tc = tc;
 	da.sd = sd;
 
 	ta = tc->ta;
 
 	ta->foreach(tc->astate, da.ti, dump_table_xentry, &da);
 	IPFW_UH_RUNLOCK(ch);
 
 	return (0);
 }
 
 /*
  * Legacy function to retrieve number of items in table.
  */
 static int
 get_table_size(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	uint32_t *tbl;
 	struct tid_info ti;
 	size_t sz;
 	int error;
 
 	sz = sizeof(*op3) + sizeof(uint32_t);
 	op3 = (ip_fw3_opheader *)ipfw_get_sopt_header(sd, sz);
 	if (op3 == NULL)
 		return (EINVAL);
 
 	tbl = (uint32_t *)(op3 + 1);
 	memset(&ti, 0, sizeof(ti));
 	ti.uidx = *tbl;
 	IPFW_UH_RLOCK(ch);
 	error = ipfw_count_xtable(ch, &ti, tbl);
 	IPFW_UH_RUNLOCK(ch);
 	return (error);
 }
 
 /*
  * Legacy IP_FW_TABLE_GETSIZE handler
  */
 int
 ipfw_count_table(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
 {
 	struct table_config *tc;
 
 	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
 		return (ESRCH);
 	*cnt = table_get_count(ch, tc);
 	return (0);
 }
 
 /*
  * Legacy IP_FW_TABLE_XGETSIZE handler
  */
 int
 ipfw_count_xtable(struct ip_fw_chain *ch, struct tid_info *ti, uint32_t *cnt)
 {
 	struct table_config *tc;
 	uint32_t count;
 
 	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL) {
 		*cnt = 0;
 		return (0); /* 'table all list' requires success */
 	}
 
 	count = table_get_count(ch, tc);
 	*cnt = count * sizeof(ipfw_table_xentry);
 	if (count > 0)
 		*cnt += sizeof(ipfw_xtable);
 	return (0);
 }
 
 static int
 dump_table_entry(void *e, void *arg)
 {
 	struct dump_args *da;
 	struct table_config *tc;
 	struct table_algo *ta;
 	ipfw_table_entry *ent;
 	struct table_value *pval;
 	int error;
 
 	da = (struct dump_args *)arg;
 
 	tc = da->tc;
 	ta = tc->ta;
 
 	/* Out of memory, returning */
 	if (da->cnt == da->size)
 		return (1);
 	ent = da->ent++;
 	ent->tbl = da->uidx;
 	da->cnt++;
 
 	error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
 	if (error != 0)
 		return (error);
 
 	ent->addr = da->tent.k.addr.s_addr;
 	ent->masklen = da->tent.masklen;
 	pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
 	ent->value = ipfw_export_table_value_legacy(pval);
 
 	return (0);
 }
 
 /*
  * Dumps table in pre-8.1 legacy format.
  */
 int
 ipfw_dump_table_legacy(struct ip_fw_chain *ch, struct tid_info *ti,
     ipfw_table *tbl)
 {
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct dump_args da;
 
 	tbl->cnt = 0;
 
 	if ((tc = find_table(CHAIN_TO_NI(ch), ti)) == NULL)
 		return (0);	/* XXX: We should return ESRCH */
 
 	ta = tc->ta;
 
 	/* This dump format supports IPv4 only */
 	if (tc->no.subtype != IPFW_TABLE_ADDR)
 		return (0);
 
 	memset(&da, 0, sizeof(da));
 	da.ch = ch;
 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 	da.tc = tc;
 	da.ent = &tbl->ent[0];
 	da.size = tbl->size;
 
 	tbl->cnt = 0;
 	ta->foreach(tc->astate, da.ti, dump_table_entry, &da);
 	tbl->cnt = da.cnt;
 
 	return (0);
 }
 
 /*
  * Dumps table entry in eXtended format (v1)(current).
  */
 static int
 dump_table_tentry(void *e, void *arg)
 {
 	struct dump_args *da;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct table_value *pval;
 	ipfw_obj_tentry *tent;
 	int error;
 
 	da = (struct dump_args *)arg;
 
 	tc = da->tc;
 	ta = tc->ta;
 
 	tent = (ipfw_obj_tentry *)ipfw_get_sopt_space(da->sd, sizeof(*tent));
 	/* Out of memory, returning */
 	if (tent == NULL) {
 		da->error = ENOMEM;
 		return (1);
 	}
 	tent->head.length = sizeof(ipfw_obj_tentry);
 	tent->idx = da->uidx;
 
 	error = ta->dump_tentry(tc->astate, da->ti, e, tent);
 	if (error != 0)
 		return (error);
 
 	pval = get_table_value(da->ch, da->tc, tent->v.kidx);
 	ipfw_export_table_value_v1(pval, &tent->v.value);
 
 	return (0);
 }
 
 /*
  * Dumps table entry in eXtended format (v0).
  */
 static int
 dump_table_xentry(void *e, void *arg)
 {
 	struct dump_args *da;
 	struct table_config *tc;
 	struct table_algo *ta;
 	ipfw_table_xentry *xent;
 	ipfw_obj_tentry *tent;
 	struct table_value *pval;
 	int error;
 
 	da = (struct dump_args *)arg;
 
 	tc = da->tc;
 	ta = tc->ta;
 
 	xent = (ipfw_table_xentry *)ipfw_get_sopt_space(da->sd, sizeof(*xent));
 	/* Out of memory, returning */
 	if (xent == NULL)
 		return (1);
 	xent->len = sizeof(ipfw_table_xentry);
 	xent->tbl = da->uidx;
 
 	memset(&da->tent, 0, sizeof(da->tent));
 	tent = &da->tent;
 	error = ta->dump_tentry(tc->astate, da->ti, e, tent);
 	if (error != 0)
 		return (error);
 
 	/* Convert current format to previous one */
 	xent->masklen = tent->masklen;
 	pval = get_table_value(da->ch, da->tc, da->tent.v.kidx);
 	xent->value = ipfw_export_table_value_legacy(pval);
 	/* Apply some hacks */
 	if (tc->no.subtype == IPFW_TABLE_ADDR && tent->subtype == AF_INET) {
 		xent->k.addr6.s6_addr32[3] = tent->k.addr.s_addr;
 		xent->flags = IPFW_TCF_INET;
 	} else
 		memcpy(&xent->k, &tent->k, sizeof(xent->k));
 
 	return (0);
 }
 
 /*
  * Helper function to export table algo data
  * to tentry format before calling user function.
  *
  * Returns 0 on success.
  */
 static int
 prepare_table_tentry(void *e, void *arg)
 {
 	struct dump_args *da;
 	struct table_config *tc;
 	struct table_algo *ta;
 	int error;
 
 	da = (struct dump_args *)arg;
 
 	tc = da->tc;
 	ta = tc->ta;
 
 	error = ta->dump_tentry(tc->astate, da->ti, e, &da->tent);
 	if (error != 0)
 		return (error);
 
 	da->f(&da->tent, da->farg);
 
 	return (0);
 }
 
 /*
  * Allow external consumers to read table entries in standard format.
  */
 int
 ipfw_foreach_table_tentry(struct ip_fw_chain *ch, uint16_t kidx,
     ta_foreach_f *f, void *arg)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 	struct table_algo *ta;
 	struct dump_args da;
 
 	ni = CHAIN_TO_NI(ch);
 
 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, kidx);
 	if (tc == NULL)
 		return (ESRCH);
 
 	ta = tc->ta;
 
 	memset(&da, 0, sizeof(da));
 	da.ch = ch;
 	da.ti = KIDX_TO_TI(ch, tc->no.kidx);
 	da.tc = tc;
 	da.f = f;
 	da.farg = arg;
 
 	ta->foreach(tc->astate, da.ti, prepare_table_tentry, &da);
 
 	return (0);
 }
 
 /*
  * Table algorithms
  */ 
 
 /*
  * Finds algoritm by index, table type or supplied name.
  *
  * Returns pointer to algo or NULL.
  */
 static struct table_algo *
 find_table_algo(struct tables_config *tcfg, struct tid_info *ti, char *name)
 {
 	int i, l;
 	struct table_algo *ta;
 
 	if (ti->type > IPFW_TABLE_MAXTYPE)
 		return (NULL);
 
 	/* Search by index */
 	if (ti->atype != 0) {
 		if (ti->atype > tcfg->algo_count)
 			return (NULL);
 		return (tcfg->algo[ti->atype]);
 	}
 
 	if (name == NULL) {
 		/* Return default algorithm for given type if set */
 		return (tcfg->def_algo[ti->type]);
 	}
 
 	/* Search by name */
 	/* TODO: better search */
 	for (i = 1; i <= tcfg->algo_count; i++) {
 		ta = tcfg->algo[i];
 
 		/*
 		 * One can supply additional algorithm
 		 * parameters so we compare only the first word
 		 * of supplied name:
 		 * 'addr:chash hsize=32'
 		 * '^^^^^^^^^'
 		 *
 		 */
 		l = strlen(ta->name);
 		if (strncmp(name, ta->name, l) != 0)
 			continue;
 		if (name[l] != '\0' && name[l] != ' ')
 			continue;
 		/* Check if we're requesting proper table type */
 		if (ti->type != 0 && ti->type != ta->type)
 			return (NULL);
 		return (ta);
 	}
 
 	return (NULL);
 }
 
 /*
  * Register new table algo @ta.
  * Stores algo id inside @idx.
  *
  * Returns 0 on success.
  */
 int
 ipfw_add_table_algo(struct ip_fw_chain *ch, struct table_algo *ta, size_t size,
     int *idx)
 {
 	struct tables_config *tcfg;
 	struct table_algo *ta_new;
 	size_t sz;
 
 	if (size > sizeof(struct table_algo))
 		return (EINVAL);
 
 	/* Check for the required on-stack size for add/del */
 	sz = roundup2(ta->ta_buf_size, sizeof(void *));
 	if (sz > TA_BUF_SZ)
 		return (EINVAL);
 
 	KASSERT(ta->type <= IPFW_TABLE_MAXTYPE,("Increase IPFW_TABLE_MAXTYPE"));
 
 	/* Copy algorithm data to stable storage. */
 	ta_new = malloc(sizeof(struct table_algo), M_IPFW, M_WAITOK | M_ZERO);
 	memcpy(ta_new, ta, size);
 
 	tcfg = CHAIN_TO_TCFG(ch);
 
 	KASSERT(tcfg->algo_count < 255, ("Increase algo array size"));
 
 	tcfg->algo[++tcfg->algo_count] = ta_new;
 	ta_new->idx = tcfg->algo_count;
 
 	/* Set algorithm as default one for given type */
 	if ((ta_new->flags & TA_FLAG_DEFAULT) != 0 &&
 	    tcfg->def_algo[ta_new->type] == NULL)
 		tcfg->def_algo[ta_new->type] = ta_new;
 
 	*idx = ta_new->idx;
 	
 	return (0);
 }
 
 /*
  * Unregisters table algo using @idx as id.
  * XXX: It is NOT safe to call this function in any place
  * other than ipfw instance destroy handler.
  */
 void
 ipfw_del_table_algo(struct ip_fw_chain *ch, int idx)
 {
 	struct tables_config *tcfg;
 	struct table_algo *ta;
 
 	tcfg = CHAIN_TO_TCFG(ch);
 
 	KASSERT(idx <= tcfg->algo_count, ("algo idx %d out of range 1..%d",
 	    idx, tcfg->algo_count));
 
 	ta = tcfg->algo[idx];
 	KASSERT(ta != NULL, ("algo idx %d is NULL", idx));
 
 	if (tcfg->def_algo[ta->type] == ta)
 		tcfg->def_algo[ta->type] = NULL;
 
 	free(ta, M_IPFW);
 }
 
 /*
  * Lists all table algorithms currently available.
  * Data layout (v0)(current):
  * Request: [ ipfw_obj_lheader ], size = ipfw_obj_lheader.size
  * Reply: [ ipfw_obj_lheader ipfw_ta_info x N ]
  *
  * Returns 0 on success
  */
 static int
 list_table_algo(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
     struct sockopt_data *sd)
 {
 	struct _ipfw_obj_lheader *olh;
 	struct tables_config *tcfg;
 	ipfw_ta_info *i;
 	struct table_algo *ta;
 	uint32_t count, n, size;
 
 	olh = (struct _ipfw_obj_lheader *)ipfw_get_sopt_header(sd,sizeof(*olh));
 	if (olh == NULL)
 		return (EINVAL);
 	if (sd->valsize < olh->size)
 		return (EINVAL);
 
 	IPFW_UH_RLOCK(ch);
 	tcfg = CHAIN_TO_TCFG(ch);
 	count = tcfg->algo_count;
 	size = count * sizeof(ipfw_ta_info) + sizeof(ipfw_obj_lheader);
 
 	/* Fill in header regadless of buffer size */
 	olh->count = count;
 	olh->objsize = sizeof(ipfw_ta_info);
 
 	if (size > olh->size) {
 		olh->size = size;
 		IPFW_UH_RUNLOCK(ch);
 		return (ENOMEM);
 	}
 	olh->size = size;
 
 	for (n = 1; n <= count; n++) {
 		i = (ipfw_ta_info *)ipfw_get_sopt_space(sd, sizeof(*i));
 		KASSERT(i != 0, ("previously checked buffer is not enough"));
 		ta = tcfg->algo[n];
 		strlcpy(i->algoname, ta->name, sizeof(i->algoname));
 		i->type = ta->type;
 		i->refcnt = ta->refcnt;
 	}
 
 	IPFW_UH_RUNLOCK(ch);
 
 	return (0);
 }
 
 static int
 classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 {
 	/* Basic IPv4/IPv6 or u32 lookups */
 	*puidx = cmd->arg1;
 	/* Assume ADDR by default */
 	*ptype = IPFW_TABLE_ADDR;
 	int v;
 		
 	if (F_LEN(cmd) > F_INSN_SIZE(ipfw_insn_u32)) {
 		/*
 		 * generic lookup. The key must be
 		 * in 32bit big-endian format.
 		 */
 		v = ((ipfw_insn_u32 *)cmd)->d[1];
 		switch (v) {
 		case 0:
 		case 1:
 			/* IPv4 src/dst */
 			break;
 		case 2:
 		case 3:
 			/* src/dst port */
 			*ptype = IPFW_TABLE_NUMBER;
 			break;
 		case 4:
 			/* uid/gid */
 			*ptype = IPFW_TABLE_NUMBER;
 			break;
 		case 5:
 			/* jid */
 			*ptype = IPFW_TABLE_NUMBER;
 			break;
 		case 6:
 			/* dscp */
 			*ptype = IPFW_TABLE_NUMBER;
 			break;
 		}
 	}
 
 	return (0);
 }
 
 static int
 classify_via(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 {
 	ipfw_insn_if *cmdif;
 
 	/* Interface table, possibly */
 	cmdif = (ipfw_insn_if *)cmd;
 	if (cmdif->name[0] != '\1')
 		return (1);
 
 	*ptype = IPFW_TABLE_INTERFACE;
 	*puidx = cmdif->p.kidx;
 
 	return (0);
 }
 
 static int
 classify_flow(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 {
 
 	*puidx = cmd->arg1;
 	*ptype = IPFW_TABLE_FLOW;
 
 	return (0);
 }
 
 static void
 update_arg1(ipfw_insn *cmd, uint16_t idx)
 {
 
 	cmd->arg1 = idx;
 }
 
 static void
 update_via(ipfw_insn *cmd, uint16_t idx)
 {
 	ipfw_insn_if *cmdif;
 
 	cmdif = (ipfw_insn_if *)cmd;
 	cmdif->p.kidx = idx;
 }
 
 static int
 table_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
     struct named_object **pno)
 {
 	struct table_config *tc;
 	int error;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	error = find_table_err(CHAIN_TO_NI(ch), ti, &tc);
 	if (error != 0)
 		return (error);
 
 	*pno = &tc->no;
 	return (0);
 }
 
 /* XXX: sets-sets! */
 static struct named_object *
 table_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
 {
 	struct namedobj_instance *ni;
 	struct table_config *tc;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 	ni = CHAIN_TO_NI(ch);
 	tc = (struct table_config *)ipfw_objhash_lookup_kidx(ni, idx);
 	KASSERT(tc != NULL, ("Table with index %d not found", idx));
 
 	return (&tc->no);
 }
 
 static struct opcode_obj_rewrite opcodes[] = {
 	{
 		O_IP_SRC_LOOKUP, IPFW_TLV_TBL_NAME,
 		classify_srcdst, update_arg1,
 		table_findbyname, table_findbykidx, create_table_compat
 	},
 	{
 		O_IP_DST_LOOKUP, IPFW_TLV_TBL_NAME,
 		classify_srcdst, update_arg1,
 		table_findbyname, table_findbykidx, create_table_compat
 	},
 	{
 		O_IP_FLOW_LOOKUP, IPFW_TLV_TBL_NAME,
 		classify_flow, update_arg1,
 		table_findbyname, table_findbykidx, create_table_compat
 	},
 	{
 		O_XMIT, IPFW_TLV_TBL_NAME,
 		classify_via, update_via,
 		table_findbyname, table_findbykidx, create_table_compat
 	},
 	{
 		O_RECV, IPFW_TLV_TBL_NAME,
 		classify_via, update_via,
 		table_findbyname, table_findbykidx, create_table_compat
 	},
 	{
 		O_VIA, IPFW_TLV_TBL_NAME,
 		classify_via, update_via,
 		table_findbyname, table_findbykidx, create_table_compat
 	},
 };
 
 
 /*
  * Checks table name for validity.
  * Enforce basic length checks, the rest
  * should be done in userland.
  *
  * Returns 0 if name is considered valid.
  */
 static int
 check_table_name(const char *name)
 {
 
 	/*
 	 * TODO: do some more complicated checks
 	 */
 	return (ipfw_check_object_name_generic(name));
 }
 
 /*
  * Find tablename TLV by @uid.
  * Check @tlvs for valid data inside.
  *
  * Returns pointer to found TLV or NULL.
  */
 static ipfw_obj_ntlv *
 find_name_tlv(void *tlvs, int len, uint16_t uidx)
 {
 	ipfw_obj_ntlv *ntlv;
 	uintptr_t pa, pe;
 	int l;
 
 	pa = (uintptr_t)tlvs;
 	pe = pa + len;
 	l = 0;
 	for (; pa < pe; pa += l) {
 		ntlv = (ipfw_obj_ntlv *)pa;
 		l = ntlv->head.length;
 
 		if (l != sizeof(*ntlv))
 			return (NULL);
 
 		if (ntlv->head.type != IPFW_TLV_TBL_NAME)
 			continue;
 
 		if (ntlv->idx != uidx)
 			continue;
 
 		if (check_table_name(ntlv->name) != 0)
 			return (NULL);
 		
 		return (ntlv);
 	}
 
 	return (NULL);
 }
 
 /*
  * Finds table config based on either legacy index
  * or name in ntlv.
  * Note @ti structure contains unchecked data from userland.
  *
  * Returns 0 in success and fills in @tc with found config
  */
 static int
 find_table_err(struct namedobj_instance *ni, struct tid_info *ti,
     struct table_config **tc)
 {
 	char *name, bname[16];
 	struct named_object *no;
 	ipfw_obj_ntlv *ntlv;
 	uint32_t set;
 
 	if (ti->tlvs != NULL) {
 		ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx);
 		if (ntlv == NULL)
 			return (EINVAL);
 		name = ntlv->name;
 
 		/*
 		 * Use set provided by @ti instead of @ntlv one.
 		 * This is needed due to different sets behavior
 		 * controlled by V_fw_tables_sets.
 		 */
 		set = ti->set;
 	} else {
 		snprintf(bname, sizeof(bname), "%d", ti->uidx);
 		name = bname;
 		set = 0;
 	}
 
 	no = ipfw_objhash_lookup_name(ni, set, name);
 	*tc = (struct table_config *)no;
 
 	return (0);
 }
 
 /*
  * Finds table config based on either legacy index
  * or name in ntlv.
  * Note @ti structure contains unchecked data from userland.
  *
  * Returns pointer to table_config or NULL.
  */
 static struct table_config *
 find_table(struct namedobj_instance *ni, struct tid_info *ti)
 {
 	struct table_config *tc;
 
 	if (find_table_err(ni, ti, &tc) != 0)
 		return (NULL);
 
 	return (tc);
 }
 
 /*
  * Allocate new table config structure using
  * specified @algo and @aname.
  *
  * Returns pointer to config or NULL.
  */
 static struct table_config *
 alloc_table_config(struct ip_fw_chain *ch, struct tid_info *ti,
     struct table_algo *ta, char *aname, uint8_t tflags)
 {
 	char *name, bname[16];
 	struct table_config *tc;
 	int error;
 	ipfw_obj_ntlv *ntlv;
 	uint32_t set;
 
 	if (ti->tlvs != NULL) {
 		ntlv = find_name_tlv(ti->tlvs, ti->tlen, ti->uidx);
 		if (ntlv == NULL)
 			return (NULL);
 		name = ntlv->name;
 		set = ntlv->set;
 	} else {
 		/* Compat part: convert number to string representation */
 		snprintf(bname, sizeof(bname), "%d", ti->uidx);
 		name = bname;
 		set = 0;
 	}
 
 	tc = malloc(sizeof(struct table_config), M_IPFW, M_WAITOK | M_ZERO);
 	tc->no.name = tc->tablename;
 	tc->no.subtype = ta->type;
 	tc->no.set = set;
 	tc->tflags = tflags;
 	tc->ta = ta;
 	strlcpy(tc->tablename, name, sizeof(tc->tablename));
 	/* Set "shared" value type by default */
 	tc->vshared = 1;
 
 	/* Preallocate data structures for new tables */
 	error = ta->init(ch, &tc->astate, &tc->ti_copy, aname, tflags);
 	if (error != 0) {
 		free(tc, M_IPFW);
 		return (NULL);
 	}
 	
 	return (tc);
 }
 
 /*
  * Destroys table state and config.
  */
 static void
 free_table_config(struct namedobj_instance *ni, struct table_config *tc)
 {
 
 	KASSERT(tc->linked == 0, ("free() on linked config"));
 	/* UH lock MUST NOT be held */
 
 	/*
 	 * We're using ta without any locking/referencing.
 	 * TODO: fix this if we're going to use unloadable algos.
 	 */
 	tc->ta->destroy(tc->astate, &tc->ti_copy);
 	free(tc, M_IPFW);
 }
 
 /*
  * Links @tc to @chain table named instance.
  * Sets appropriate type/states in @chain table info.
  */
 static void
 link_table(struct ip_fw_chain *ch, struct table_config *tc)
 {
 	struct namedobj_instance *ni;
 	struct table_info *ti;
 	uint16_t kidx;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 	IPFW_WLOCK_ASSERT(ch);
 
 	ni = CHAIN_TO_NI(ch);
 	kidx = tc->no.kidx;
 
 	ipfw_objhash_add(ni, &tc->no);
 
 	ti = KIDX_TO_TI(ch, kidx);
 	*ti = tc->ti_copy;
 
 	/* Notify algo on real @ti address */
 	if (tc->ta->change_ti != NULL)
 		tc->ta->change_ti(tc->astate, ti);
 
 	tc->linked = 1;
 	tc->ta->refcnt++;
 }
 
 /*
  * Unlinks @tc from @chain table named instance.
  * Zeroes states in @chain and stores them in @tc.
  */
 static void
 unlink_table(struct ip_fw_chain *ch, struct table_config *tc)
 {
 	struct namedobj_instance *ni;
 	struct table_info *ti;
 	uint16_t kidx;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 	IPFW_WLOCK_ASSERT(ch);
 
 	ni = CHAIN_TO_NI(ch);
 	kidx = tc->no.kidx;
 
 	/* Clear state. @ti copy is already saved inside @tc */
 	ipfw_objhash_del(ni, &tc->no);
 	ti = KIDX_TO_TI(ch, kidx);
 	memset(ti, 0, sizeof(struct table_info));
 	tc->linked = 0;
 	tc->ta->refcnt--;
 
 	/* Notify algo on real @ti address */
 	if (tc->ta->change_ti != NULL)
 		tc->ta->change_ti(tc->astate, NULL);
 }
 
 struct swap_table_args {
 	int set;
 	int new_set;
 	int mv;
 };
 
 /*
  * Change set for each matching table.
  *
  * Ensure we dispatch each table once by setting/checking ochange
  * fields.
  */
 static void
 swap_table_set(struct namedobj_instance *ni, struct named_object *no,
     void *arg)
 {
 	struct table_config *tc;
 	struct swap_table_args *sta;
 
 	tc = (struct table_config *)no;
 	sta = (struct swap_table_args *)arg;
 
 	if (no->set != sta->set && (no->set != sta->new_set || sta->mv != 0))
 		return;
 
 	if (tc->ochanged != 0)
 		return;
 
 	tc->ochanged = 1;
 	ipfw_objhash_del(ni, no);
 	if (no->set == sta->set)
 		no->set = sta->new_set;
 	else
 		no->set = sta->set;
 	ipfw_objhash_add(ni, no);
 }
 
 /*
  * Cleans up ochange field for all tables.
  */
 static void
 clean_table_set_data(struct namedobj_instance *ni, struct named_object *no,
     void *arg)
 {
 	struct table_config *tc;
 	struct swap_table_args *sta;
 
 	tc = (struct table_config *)no;
 	sta = (struct swap_table_args *)arg;
 
 	tc->ochanged = 0;
 }
 
 /*
  * Swaps tables within two sets.
  */
 void
 ipfw_swap_tables_sets(struct ip_fw_chain *ch, uint32_t set,
     uint32_t new_set, int mv)
 {
 	struct swap_table_args sta;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	sta.set = set;
 	sta.new_set = new_set;
 	sta.mv = mv;
 
 	ipfw_objhash_foreach(CHAIN_TO_NI(ch), swap_table_set, &sta);
 	ipfw_objhash_foreach(CHAIN_TO_NI(ch), clean_table_set_data, &sta);
 }
 
 /*
  * Move all tables which are reference by rules in @rr to set @new_set.
  * Makes sure that all relevant tables are referenced ONLLY by given rules.
  *
  * Retuns 0 on success,
  */
 int
 ipfw_move_tables_sets(struct ip_fw_chain *ch, ipfw_range_tlv *rt,
     uint32_t new_set)
 {
 	struct ip_fw *rule;
 	struct table_config *tc;
 	struct named_object *no;
 	struct namedobj_instance *ni;
 	int bad, i, l, cmdlen;
 	uint16_t kidx;
 	ipfw_insn *cmd;
 
 	IPFW_UH_WLOCK_ASSERT(ch);
 
 	ni = CHAIN_TO_NI(ch);
 
 	/* Stage 1: count number of references by given rules */
 	for (i = 0; i < ch->n_rules - 1; i++) {
 		rule = ch->map[i];
 		if (ipfw_match_range(rule, rt) == 0)
 			continue;
 
 		l = rule->cmd_len;
 		cmd = rule->cmd;
 		cmdlen = 0;
 		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 			cmdlen = F_LEN(cmd);
 			if (classify_opcode_kidx(cmd, &kidx) != 0)
 				continue;
 			no = ipfw_objhash_lookup_kidx(ni, kidx);
 			KASSERT(no != NULL, 
 			    ("objhash lookup failed on index %d", kidx));
 			tc = (struct table_config *)no;
 			tc->ocount++;
 		}
 
 	}
 
 	/* Stage 2: verify "ownership" */
 	bad = 0;
 	for (i = 0; i < ch->n_rules - 1; i++) {
 		rule = ch->map[i];
 		if (ipfw_match_range(rule, rt) == 0)
 			continue;
 
 		l = rule->cmd_len;
 		cmd = rule->cmd;
 		cmdlen = 0;
 		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 			cmdlen = F_LEN(cmd);
 			if (classify_opcode_kidx(cmd, &kidx) != 0)
 				continue;
 			no = ipfw_objhash_lookup_kidx(ni, kidx);
 			KASSERT(no != NULL, 
 			    ("objhash lookup failed on index %d", kidx));
 			tc = (struct table_config *)no;
 			if (tc->no.refcnt != tc->ocount) {
 
 				/*
 				 * Number of references differ:
 				 * Other rule(s) are holding reference to given
 				 * table, so it is not possible to change its set.
 				 *
 				 * Note that refcnt may account
 				 * references to some going-to-be-added rules.
 				 * Since we don't know their numbers (and event
 				 * if they will be added) it is perfectly OK
 				 * to return error here.
 				 */
 				bad = 1;
 				break;
 			}
 		}
 
 		if (bad != 0)
 			break;
 	}
 
 	/* Stage 3: change set or cleanup */
 	for (i = 0; i < ch->n_rules - 1; i++) {
 		rule = ch->map[i];
 		if (ipfw_match_range(rule, rt) == 0)
 			continue;
 
 		l = rule->cmd_len;
 		cmd = rule->cmd;
 		cmdlen = 0;
 		for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 			cmdlen = F_LEN(cmd);
 			if (classify_opcode_kidx(cmd, &kidx) != 0)
 				continue;
 			no = ipfw_objhash_lookup_kidx(ni, kidx);
 			KASSERT(no != NULL, 
 			    ("objhash lookup failed on index %d", kidx));
 			tc = (struct table_config *)no;
 
 			tc->ocount = 0;
 			if (bad != 0)
 				continue;
 
 			/* Actually change set. */
 			ipfw_objhash_del(ni, no);
 			no->set = new_set;
 			ipfw_objhash_add(ni, no);
 		}
 	}
 
 	return (bad);
 }
 
 /*
  * Finds and bumps refcount for objects referenced by given @rule.
  * Auto-creates non-existing tables.
  * Fills in @oib array with userland/kernel indexes.
  *
  * Returns 0 on success.
  */
 static int
 ref_rule_objects(struct ip_fw_chain *ch, struct ip_fw *rule,
     struct rule_check_info *ci, struct obj_idx *oib, struct tid_info *ti)
 {
 	int cmdlen, error, l, numnew;
 	ipfw_insn *cmd;
 	struct obj_idx *pidx;
 	int found, unresolved;
 
 	pidx = oib;
 	l = rule->cmd_len;
 	cmd = rule->cmd;
 	cmdlen = 0;
 	error = 0;
 	numnew = 0;
 	found = 0;
 	unresolved = 0;
 
 	IPFW_UH_WLOCK(ch);
 
 	/* Increase refcount on each existing referenced table. */
 	for ( ;	l > 0 ; l -= cmdlen, cmd += cmdlen) {
 		cmdlen = F_LEN(cmd);
 
 		error = ref_opcode_object(ch, cmd, ti, pidx, &found, &unresolved);
 		if (error != 0)
 			break;
 		if (found || unresolved) {
 			pidx->off = rule->cmd_len - l;
 			pidx++;
 		}
 		/*
 		 * Compability stuff for old clients:
 		 * prepare to manually create non-existing objects.
 		 */
 		if (unresolved)
 			numnew++;
 	}
 
 	if (error != 0) {
 		/* Unref everything we have already done */
 		unref_oib_objects(ch, rule->cmd, oib, pidx);
 		IPFW_UH_WUNLOCK(ch);
 		return (error);
 	}
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Perform auto-creation for non-existing objects */
 	if (numnew != 0)
 		error = create_objects_compat(ch, rule->cmd, oib, pidx, ti);
 
 	/* Calculate real number of dynamic objects */
 	ci->object_opcodes = (uint16_t)(pidx - oib);
 
 	return (error);
 }
 
 /*
  * Checks is opcode is referencing table of appropriate type.
  * Adds reference count for found table if true.
  * Rewrites user-supplied opcode values with kernel ones.
  *
  * Returns 0 on success and appropriate error code otherwise.
  */
 int
 ipfw_rewrite_rule_uidx(struct ip_fw_chain *chain,
     struct rule_check_info *ci)
 {
 	int error;
 	ipfw_insn *cmd;
 	uint8_t type;
 	struct obj_idx *p, *pidx_first, *pidx_last;
 	struct tid_info ti;
 
 	/*
 	 * Prepare an array for storing opcode indices.
 	 * Use stack allocation by default.
 	 */
 	if (ci->object_opcodes <= (sizeof(ci->obuf)/sizeof(ci->obuf[0]))) {
 		/* Stack */
 		pidx_first = ci->obuf;
 	} else
 		pidx_first = malloc(ci->object_opcodes * sizeof(struct obj_idx),
 		    M_IPFW, M_WAITOK | M_ZERO);
 
 	error = 0;
 	type = 0;
 	memset(&ti, 0, sizeof(ti));
 
 	/*
 	 * Use default set for looking up tables (old way) or
 	 * use set rule is assigned to (new way).
 	 */
 	ti.set = (V_fw_tables_sets != 0) ? ci->krule->set : 0;
 	if (ci->ctlv != NULL) {
 		ti.tlvs = (void *)(ci->ctlv + 1);
 		ti.tlen = ci->ctlv->head.length - sizeof(ipfw_obj_ctlv);
 	}
 
 	/* Reference all used tables and other objects */
 	error = ref_rule_objects(chain, ci->krule, ci, pidx_first, &ti);
 	if (error != 0)
 		goto free;
 	/*
 	 * Note that ref_rule_objects() might have updated ci->object_opcodes
 	 * to reflect actual number of object opcodes.
 	 */
 
 	/* Perform rule rewrite */
 	p = pidx_first;
 	pidx_last = pidx_first + ci->object_opcodes;
 	for (p = pidx_first; p < pidx_last; p++) {
 		cmd = ci->krule->cmd + p->off;
 		update_opcode_kidx(cmd, p->kidx);
 	}
 
 free:
 	if (pidx_first != ci->obuf)
 		free(pidx_first, M_IPFW);
 
 	return (error);
 }
 
 static struct ipfw_sopt_handler	scodes[] = {
 	{ IP_FW_TABLE_XCREATE,	0,	HDIR_SET,	create_table },
 	{ IP_FW_TABLE_XDESTROY,	0,	HDIR_SET,	flush_table_v0 },
 	{ IP_FW_TABLE_XFLUSH,	0,	HDIR_SET,	flush_table_v0 },
 	{ IP_FW_TABLE_XMODIFY,	0,	HDIR_BOTH,	modify_table },
 	{ IP_FW_TABLE_XINFO,	0,	HDIR_GET,	describe_table },
 	{ IP_FW_TABLES_XLIST,	0,	HDIR_GET,	list_tables },
 	{ IP_FW_TABLE_XLIST,	0,	HDIR_GET,	dump_table_v0 },
 	{ IP_FW_TABLE_XLIST,	1,	HDIR_GET,	dump_table_v1 },
 	{ IP_FW_TABLE_XADD,	0,	HDIR_BOTH,	manage_table_ent_v0 },
 	{ IP_FW_TABLE_XADD,	1,	HDIR_BOTH,	manage_table_ent_v1 },
 	{ IP_FW_TABLE_XDEL,	0,	HDIR_BOTH,	manage_table_ent_v0 },
 	{ IP_FW_TABLE_XDEL,	1,	HDIR_BOTH,	manage_table_ent_v1 },
 	{ IP_FW_TABLE_XFIND,	0,	HDIR_GET,	find_table_entry },
 	{ IP_FW_TABLE_XSWAP,	0,	HDIR_SET,	swap_table },
 	{ IP_FW_TABLES_ALIST,	0,	HDIR_GET,	list_table_algo },
 	{ IP_FW_TABLE_XGETSIZE,	0,	HDIR_GET,	get_table_size },
 };
 
 static void
 destroy_table_locked(struct namedobj_instance *ni, struct named_object *no,
     void *arg)
 {
 
 	unlink_table((struct ip_fw_chain *)arg, (struct table_config *)no);
 	if (ipfw_objhash_free_idx(ni, no->kidx) != 0)
 		printf("Error unlinking kidx %d from table %s\n",
 		    no->kidx, no->name);
 	free_table_config(ni, (struct table_config *)no);
 }
 
 /*
  * Shuts tables module down.
  */
 void
 ipfw_destroy_tables(struct ip_fw_chain *ch, int last)
 {
 
 	IPFW_DEL_SOPT_HANDLER(last, scodes);
 	IPFW_DEL_OBJ_REWRITER(last, opcodes);
 
 	/* Remove all tables from working set */
 	IPFW_UH_WLOCK(ch);
 	IPFW_WLOCK(ch);
 	ipfw_objhash_foreach(CHAIN_TO_NI(ch), destroy_table_locked, ch);
 	IPFW_WUNLOCK(ch);
 	IPFW_UH_WUNLOCK(ch);
 
 	/* Free pointers itself */
 	free(ch->tablestate, M_IPFW);
 
 	ipfw_table_value_destroy(ch, last);
 	ipfw_table_algo_destroy(ch);
 
 	ipfw_objhash_destroy(CHAIN_TO_NI(ch));
 	free(CHAIN_TO_TCFG(ch), M_IPFW);
 }
 
 /*
  * Starts tables module.
  */
 int
 ipfw_init_tables(struct ip_fw_chain *ch, int first)
 {
 	struct tables_config *tcfg;
 
 	/* Allocate pointers */
 	ch->tablestate = malloc(V_fw_tables_max * sizeof(struct table_info),
 	    M_IPFW, M_WAITOK | M_ZERO);
 
 	tcfg = malloc(sizeof(struct tables_config), M_IPFW, M_WAITOK | M_ZERO);
 	tcfg->namehash = ipfw_objhash_create(V_fw_tables_max);
 	ch->tblcfg = tcfg;
 
 	ipfw_table_value_init(ch, first);
 	ipfw_table_algo_init(ch);
 
 	IPFW_ADD_OBJ_REWRITER(first, opcodes);
 	IPFW_ADD_SOPT_HANDLER(first, scodes);
 	return (0);
 }
 
 
 
Index: projects/clang380-import/sys/netpfil/ipfw/ip_fw_table_algo.c
===================================================================
--- projects/clang380-import/sys/netpfil/ipfw/ip_fw_table_algo.c	(revision 293686)
+++ projects/clang380-import/sys/netpfil/ipfw/ip_fw_table_algo.c	(revision 293687)
@@ -1,4082 +1,4106 @@
 /*-
  * Copyright (c) 2014 Yandex LLC
  * Copyright (c) 2014 Alexander V. Chernikov
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Lookup table algorithms.
  *
  */
 
 #include "opt_ipfw.h"
 #include "opt_inet.h"
 #ifndef INET
 #error IPFIREWALL requires INET.
 #endif /* INET */
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <net/if.h>	/* ip_fw.h requires IFNAMSIZ */
 #include <net/radix.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
+#include <netinet/in_fib.h>
 #include <netinet/ip_var.h>	/* struct ipfw_rule_ref */
 #include <netinet/ip_fw.h>
+#include <netinet6/in6_fib.h>
 
 #include <netpfil/ipfw/ip_fw_private.h>
 #include <netpfil/ipfw/ip_fw_table.h>
 
 
 /*
  * IPFW table lookup algorithms.
  *
  * What is needed to add another table algo?
  *
  * Algo init:
  * * struct table_algo has to be filled with:
  *   name: "type:algoname" format, e.g. "addr:radix". Currently
  *     there are the following types: "addr", "iface", "number" and "flow".
  *   type: one of IPFW_TABLE_* types
  *   flags: one or more TA_FLAGS_*
  *   ta_buf_size: size of structure used to store add/del item state.
  *     Needs to be less than TA_BUF_SZ.
  *   callbacks: see below for description.
  * * ipfw_add_table_algo / ipfw_del_table_algo has to be called
  *
  * Callbacks description:
  *
  * -init: request to initialize new table instance.
  * typedef int (ta_init)(struct ip_fw_chain *ch, void **ta_state,
  *     struct table_info *ti, char *data, uint8_t tflags);
  * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success.
  *
  *  Allocate all structures needed for normal operations.
  *  * Caller may want to parse @data for some algo-specific
  *    options provided by userland.
  *  * Caller may want to save configuration state pointer to @ta_state
  *  * Caller needs to save desired runtime structure pointer(s)
  *    inside @ti fields. Note that it is not correct to save
  *    @ti pointer at this moment. Use -change_ti hook for that.
  *  * Caller has to fill in ti->lookup to appropriate function
  *    pointer.
  *
  *
  *
  * -destroy: request to destroy table instance.
  * typedef void (ta_destroy)(void *ta_state, struct table_info *ti);
  * MANDATORY, unlocked. (M_WAITOK).
  *
  * Frees all table entries and all tables structures allocated by -init.
  *
  *
  *
  * -prepare_add: request to allocate state for adding new entry.
  * typedef int (ta_prepare_add)(struct ip_fw_chain *ch, struct tentry_info *tei,
  *     void *ta_buf);
  * MANDATORY, unlocked. (M_WAITOK). Returns 0 on success.
  *
  * Allocates state and fills it in with all necessary data (EXCEPT value)
  * from @tei to minimize operations needed to be done under WLOCK.
  * "value" field has to be copied to new entry in @add callback.
  * Buffer ta_buf of size ta->ta_buf_sz may be used to store
  * allocated state.
  *
  *
  *
  * -prepare_del: request to set state for deleting existing entry.
  * typedef int (ta_prepare_del)(struct ip_fw_chain *ch, struct tentry_info *tei,
  *     void *ta_buf);
  * MANDATORY, locked, UH. (M_NOWAIT). Returns 0 on success.
  *
  * Buffer ta_buf of size ta->ta_buf_sz may be used to store
  * allocated state. Caller should use on-stack ta_buf allocation
  * instead of doing malloc().
  *
  *
  *
  * -add: request to insert new entry into runtime/config structures.
  *  typedef int (ta_add)(void *ta_state, struct table_info *ti,
  *     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
  * MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success.
  *
  * Insert new entry using previously-allocated state in @ta_buf.
  * * @tei may have the following flags:
  *   TEI_FLAGS_UPDATE: request to add or update entry.
  *   TEI_FLAGS_DONTADD: request to update (but not add) entry.
  * * Caller is required to do the following:
  *   copy real entry value from @tei
  *   entry added: return 0, set 1 to @pnum
  *   entry updated: return 0, store 0 to @pnum, store old value in @tei,
  *     add TEI_FLAGS_UPDATED flag to @tei.
  *   entry exists: return EEXIST
  *   entry not found: return ENOENT
  *   other error: return non-zero error code.
  *
  *
  *
  * -del: request to delete existing entry from runtime/config structures.
  *  typedef int (ta_del)(void *ta_state, struct table_info *ti,
  *     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
  *  MANDATORY, UH+WLOCK. (M_NOWAIT). Returns 0 on success.
  *
  *  Delete entry using previously set up in @ta_buf.
  * * Caller is required to do the following:
  *   entry deleted: return 0, set 1 to @pnum, store old value in @tei.
  *   entry not found: return ENOENT
  *   other error: return non-zero error code.
  *
  *
  *
  * -flush_entry: flush entry state created by -prepare_add / -del / others
  *  typedef void (ta_flush_entry)(struct ip_fw_chain *ch,
  *      struct tentry_info *tei, void *ta_buf);
  *  MANDATORY, may be locked. (M_NOWAIT).
  *
  *  Delete state allocated by:
  *  -prepare_add (-add returned EEXIST|UPDATED)
  *  -prepare_del (if any)
  *  -del
  *  * Caller is required to handle empty @ta_buf correctly.
  *
  *
  * -find_tentry: finds entry specified by key @tei
  *  typedef int ta_find_tentry(void *ta_state, struct table_info *ti,
  *      ipfw_obj_tentry *tent);
  *  OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 on success.
  *
  *  Finds entry specified by given key.
  *  * Caller is requred to do the following:
  *    entry found: returns 0, export entry to @tent
  *    entry not found: returns ENOENT
  *
  *
  * -need_modify: checks if @ti has enough space to hold another @count items.
  *  typedef int (ta_need_modify)(void *ta_state, struct table_info *ti,
  *      uint32_t count, uint64_t *pflags);
  *  OPTIONAL, locked (UH). (M_NOWAIT). Returns 0 if has.
  *
  *  Checks if given table has enough space to add @count items without
  *  resize. Caller may use @pflags to store desired modification data.
  *
  *
  *
  * -prepare_mod: allocate structures for table modification.
  *  typedef int (ta_prepare_mod)(void *ta_buf, uint64_t *pflags);
  * OPTIONAL(need_modify), unlocked. (M_WAITOK). Returns 0 on success.
  *
  * Allocate all needed state for table modification. Caller
  * should use `struct mod_item` to store new state in @ta_buf.
  * Up to TA_BUF_SZ (128 bytes) can be stored in @ta_buf.
  * 
  *
  *
  * -fill_mod: copy some data to new state/
  *  typedef int (ta_fill_mod)(void *ta_state, struct table_info *ti,
  *      void *ta_buf, uint64_t *pflags);
  * OPTIONAL(need_modify), locked (UH). (M_NOWAIT). Returns 0 on success.
  *
  * Copy as much data as we can to minimize changes under WLOCK.
  * For example, array can be merged inside this callback.
  *
  *
  *
  * -modify: perform final modification.
  *  typedef void (ta_modify)(void *ta_state, struct table_info *ti,
  *      void *ta_buf, uint64_t pflags);
  * OPTIONAL(need_modify), locked (UH+WLOCK). (M_NOWAIT). 
  *
  * Performs all changes necessary to switch to new structures.
  * * Caller should save old pointers to @ta_buf storage.
  *
  *
  *
  * -flush_mod: flush table modification state.
  *  typedef void (ta_flush_mod)(void *ta_buf);
  * OPTIONAL(need_modify), unlocked. (M_WAITOK).
  *
  * Performs flush for the following:
  *   - prepare_mod (modification was not necessary)
  *   - modify (for the old state)
  *
  *
  *
  * -change_gi: monitor table info pointer changes
  * typedef void (ta_change_ti)(void *ta_state, struct table_info *ti);
  * OPTIONAL, locked (UH). (M_NOWAIT).
  *
  * Called on @ti pointer changed. Called immediately after -init
  * to set initial state.
  *
  *
  *
  * -foreach: calls @f for each table entry
  *  typedef void ta_foreach(void *ta_state, struct table_info *ti,
  *      ta_foreach_f *f, void *arg);
  * MANDATORY, locked(UH). (M_NOWAIT).
  *
  * Runs callback with specified argument for each table entry,
  * Typically used for dumping table entries.
  *
  *
  *
  * -dump_tentry: dump table entry in current @tentry format.
  *  typedef int ta_dump_tentry(void *ta_state, struct table_info *ti, void *e,
  *      ipfw_obj_tentry *tent);
  * MANDATORY, locked(UH). (M_NOWAIT). Returns 0 on success.
  *
  * Dumps entry @e to @tent.
  *
  *
  * -print_config: prints custom algoritm options into buffer.
  *  typedef void (ta_print_config)(void *ta_state, struct table_info *ti,
  *      char *buf, size_t bufsize);
  * OPTIONAL. locked(UH). (M_NOWAIT).
  *
  * Prints custom algorithm options in the format suitable to pass
  * back to -init callback.
  *
  *
  *
  * -dump_tinfo: dumps algo-specific info.
  *  typedef void ta_dump_tinfo(void *ta_state, struct table_info *ti,
  *      ipfw_ta_tinfo *tinfo);
  * OPTIONAL. locked(UH). (M_NOWAIT).
  *
  * Dumps options like items size/hash size, etc.
  */
 
 MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables");
 
 /*
  * Utility structures/functions common to more than one algo
  */
 
 struct mod_item {
 	void	*main_ptr;
 	size_t	size;
 	void	*main_ptr6;
 	size_t	size6;
 };
 
 static int badd(const void *key, void *item, void *base, size_t nmemb,
     size_t size, int (*compar) (const void *, const void *));
 static int bdel(const void *key, void *base, size_t nmemb, size_t size,
     int (*compar) (const void *, const void *));
 
 
 /*
  * ADDR implementation using radix
  *
  */
 
 /*
  * The radix code expects addr and mask to be array of bytes,
  * with the first byte being the length of the array. rn_inithead
  * is called with the offset in bits of the lookup key within the
  * array. If we use a sockaddr_in as the underlying type,
  * sin_len is conveniently located at offset 0, sin_addr is at
  * offset 4 and normally aligned.
  * But for portability, let's avoid assumption and make the code explicit
  */
 #define KEY_LEN(v)	*((uint8_t *)&(v))
 /*
  * Do not require radix to compare more than actual IPv4/IPv6 address
  */
 #define KEY_LEN_INET	(offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t))
 #define KEY_LEN_INET6	(offsetof(struct sa_in6, sin6_addr) + sizeof(struct in6_addr))
 
 #define OFF_LEN_INET	(8 * offsetof(struct sockaddr_in, sin_addr))
 #define OFF_LEN_INET6	(8 * offsetof(struct sa_in6, sin6_addr))
 
 struct radix_addr_entry {
 	struct radix_node	rn[2];
 	struct sockaddr_in	addr;
 	uint32_t		value;
 	uint8_t			masklen;
 };
 
 struct sa_in6 {
 	uint8_t			sin6_len;
 	uint8_t			sin6_family;
 	uint8_t			pad[2];
 	struct in6_addr		sin6_addr;
 };
 
 struct radix_addr_xentry {
 	struct radix_node	rn[2];
 	struct sa_in6		addr6;
 	uint32_t		value;
 	uint8_t			masklen;
 };
 
 struct radix_cfg {
 	struct radix_node_head	*head4;
 	struct radix_node_head	*head6;
 	size_t			count4;
 	size_t			count6;
 };
 
 struct ta_buf_radix
 {
 	void *ent_ptr;
 	struct sockaddr	*addr_ptr;
 	struct sockaddr	*mask_ptr;
 	union {
 		struct {
 			struct sockaddr_in sa;
 			struct sockaddr_in ma;
 		} a4;
 		struct {
 			struct sa_in6 sa;
 			struct sa_in6 ma;
 		} a6;
 	} addr;
 };
 
 static int ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int ta_init_radix(struct ip_fw_chain *ch, void **ta_state,
     struct table_info *ti, char *data, uint8_t tflags);
 static int flush_radix_entry(struct radix_node *rn, void *arg);
 static void ta_destroy_radix(void *ta_state, struct table_info *ti);
 static void ta_dump_radix_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int ta_dump_radix_tentry(void *ta_state, struct table_info *ti,
     void *e, ipfw_obj_tentry *tent);
 static int ta_find_radix_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void ta_foreach_radix(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 static void tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa,
     struct sockaddr *ma, int *set_mask);
 static int ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_add_radix(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static int ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_del_radix(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static void ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_need_modify_radix(void *ta_state, struct table_info *ti,
     uint32_t count, uint64_t *pflags);
 
 static int
 ta_lookup_radix(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct radix_node_head *rnh;
 
 	if (keylen == sizeof(in_addr_t)) {
 		struct radix_addr_entry *ent;
 		struct sockaddr_in sa;
 		KEY_LEN(sa) = KEY_LEN_INET;
 		sa.sin_addr.s_addr = *((in_addr_t *)key);
 		rnh = (struct radix_node_head *)ti->state;
 		ent = (struct radix_addr_entry *)(rnh->rnh_matchaddr(&sa, rnh));
 		if (ent != NULL) {
 			*val = ent->value;
 			return (1);
 		}
 	} else {
 		struct radix_addr_xentry *xent;
 		struct sa_in6 sa6;
 		KEY_LEN(sa6) = KEY_LEN_INET6;
 		memcpy(&sa6.sin6_addr, key, sizeof(struct in6_addr));
 		rnh = (struct radix_node_head *)ti->xstate;
 		xent = (struct radix_addr_xentry *)(rnh->rnh_matchaddr(&sa6, rnh));
 		if (xent != NULL) {
 			*val = xent->value;
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * New table
  */
 static int
 ta_init_radix(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	struct radix_cfg *cfg;
 
 	if (!rn_inithead(&ti->state, OFF_LEN_INET))
 		return (ENOMEM);
 	if (!rn_inithead(&ti->xstate, OFF_LEN_INET6)) {
 		rn_detachhead(&ti->state);
 		return (ENOMEM);
 	}
 
 	cfg = malloc(sizeof(struct radix_cfg), M_IPFW, M_WAITOK | M_ZERO);
 
 	*ta_state = cfg;
 	ti->lookup = ta_lookup_radix;
 
 	return (0);
 }
 
 static int
 flush_radix_entry(struct radix_node *rn, void *arg)
 {
 	struct radix_node_head * const rnh = arg;
 	struct radix_addr_entry *ent;
 
 	ent = (struct radix_addr_entry *)
 	    rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
 	if (ent != NULL)
 		free(ent, M_IPFW_TBL);
 	return (0);
 }
 
 static void
 ta_destroy_radix(void *ta_state, struct table_info *ti)
 {
 	struct radix_cfg *cfg;
 	struct radix_node_head *rnh;
 
 	cfg = (struct radix_cfg *)ta_state;
 
 	rnh = (struct radix_node_head *)(ti->state);
 	rnh->rnh_walktree(rnh, flush_radix_entry, rnh);
 	rn_detachhead(&ti->state);
 
 	rnh = (struct radix_node_head *)(ti->xstate);
 	rnh->rnh_walktree(rnh, flush_radix_entry, rnh);
 	rn_detachhead(&ti->xstate);
 
 	free(cfg, M_IPFW);
 }
 
 /*
  * Provide algo-specific table info
  */
 static void
 ta_dump_radix_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 	struct radix_cfg *cfg;
 
 	cfg = (struct radix_cfg *)ta_state;
 
 	tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
 	tinfo->taclass4 = IPFW_TACLASS_RADIX;
 	tinfo->count4 = cfg->count4;
 	tinfo->itemsize4 = sizeof(struct radix_addr_entry);
 	tinfo->taclass6 = IPFW_TACLASS_RADIX;
 	tinfo->count6 = cfg->count6;
 	tinfo->itemsize6 = sizeof(struct radix_addr_xentry);
 }
 
 static int
 ta_dump_radix_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct radix_addr_entry *n;
 #ifdef INET6
 	struct radix_addr_xentry *xn;
 #endif
 
 	n = (struct radix_addr_entry *)e;
 
 	/* Guess IPv4/IPv6 radix by sockaddr family */
 	if (n->addr.sin_family == AF_INET) {
 		tent->k.addr.s_addr = n->addr.sin_addr.s_addr;
 		tent->masklen = n->masklen;
 		tent->subtype = AF_INET;
 		tent->v.kidx = n->value;
 #ifdef INET6
 	} else {
 		xn = (struct radix_addr_xentry *)e;
 		memcpy(&tent->k, &xn->addr6.sin6_addr, sizeof(struct in6_addr));
 		tent->masklen = xn->masklen;
 		tent->subtype = AF_INET6;
 		tent->v.kidx = xn->value;
 #endif
 	}
 
 	return (0);
 }
 
 static int
 ta_find_radix_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct radix_node_head *rnh;
 	void *e;
 
 	e = NULL;
 	if (tent->subtype == AF_INET) {
 		struct sockaddr_in sa;
 		KEY_LEN(sa) = KEY_LEN_INET;
 		sa.sin_addr.s_addr = tent->k.addr.s_addr;
 		rnh = (struct radix_node_head *)ti->state;
 		e = rnh->rnh_matchaddr(&sa, rnh);
 	} else {
 		struct sa_in6 sa6;
 		KEY_LEN(sa6) = KEY_LEN_INET6;
 		memcpy(&sa6.sin6_addr, &tent->k.addr6, sizeof(struct in6_addr));
 		rnh = (struct radix_node_head *)ti->xstate;
 		e = rnh->rnh_matchaddr(&sa6, rnh);
 	}
 
 	if (e != NULL) {
 		ta_dump_radix_tentry(ta_state, ti, e, tent);
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_foreach_radix(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct radix_node_head *rnh;
 
 	rnh = (struct radix_node_head *)(ti->state);
 	rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg);
 
 	rnh = (struct radix_node_head *)(ti->xstate);
 	rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg);
 }
 
 
 #ifdef INET6
 static inline void ipv6_writemask(struct in6_addr *addr6, uint8_t mask);
 
 static inline void
 ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
 {
 	uint32_t *cp;
 
 	for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
 		*cp++ = 0xFFFFFFFF;
 	*cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
 }
 #endif
 
 static void
 tei_to_sockaddr_ent(struct tentry_info *tei, struct sockaddr *sa,
     struct sockaddr *ma, int *set_mask)
 {
 	int mlen;
 #ifdef INET
 	struct sockaddr_in *addr, *mask;
 #endif
 #ifdef INET6
 	struct sa_in6 *addr6, *mask6;
 #endif
 	in_addr_t a4;
 
 	mlen = tei->masklen;
 
 	if (tei->subtype == AF_INET) {
 #ifdef INET
 		addr = (struct sockaddr_in *)sa;
 		mask = (struct sockaddr_in *)ma;
 		/* Set 'total' structure length */
 		KEY_LEN(*addr) = KEY_LEN_INET;
 		KEY_LEN(*mask) = KEY_LEN_INET;
 		addr->sin_family = AF_INET;
 		mask->sin_addr.s_addr =
 		    htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0);
 		a4 = *((in_addr_t *)tei->paddr);
 		addr->sin_addr.s_addr = a4 & mask->sin_addr.s_addr;
 		if (mlen != 32)
 			*set_mask = 1;
 		else
 			*set_mask = 0;
 #endif
 #ifdef INET6
 	} else if (tei->subtype == AF_INET6) {
 		/* IPv6 case */
 		addr6 = (struct sa_in6 *)sa;
 		mask6 = (struct sa_in6 *)ma;
 		/* Set 'total' structure length */
 		KEY_LEN(*addr6) = KEY_LEN_INET6;
 		KEY_LEN(*mask6) = KEY_LEN_INET6;
 		addr6->sin6_family = AF_INET6;
 		ipv6_writemask(&mask6->sin6_addr, mlen);
 		memcpy(&addr6->sin6_addr, tei->paddr, sizeof(struct in6_addr));
 		APPLY_MASK(&addr6->sin6_addr, &mask6->sin6_addr);
 		if (mlen != 128)
 			*set_mask = 1;
 		else
 			*set_mask = 0;
 #endif
 	}
 }
 
 static int
 ta_prepare_add_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_radix *tb;
 	struct radix_addr_entry *ent;
 #ifdef INET6
 	struct radix_addr_xentry *xent;
 #endif
 	struct sockaddr *addr, *mask;
 	int mlen, set_mask;
 
 	tb = (struct ta_buf_radix *)ta_buf;
 
 	mlen = tei->masklen;
 	set_mask = 0;
 	
 	if (tei->subtype == AF_INET) {
 #ifdef INET
 		if (mlen > 32)
 			return (EINVAL);
 		ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
 		ent->masklen = mlen;
 
 		addr = (struct sockaddr *)&ent->addr;
 		mask = (struct sockaddr *)&tb->addr.a4.ma;
 		tb->ent_ptr = ent;
 #endif
 #ifdef INET6
 	} else if (tei->subtype == AF_INET6) {
 		/* IPv6 case */
 		if (mlen > 128)
 			return (EINVAL);
 		xent = malloc(sizeof(*xent), M_IPFW_TBL, M_WAITOK | M_ZERO);
 		xent->masklen = mlen;
 
 		addr = (struct sockaddr *)&xent->addr6;
 		mask = (struct sockaddr *)&tb->addr.a6.ma;
 		tb->ent_ptr = xent;
 #endif
 	} else {
 		/* Unknown CIDR type */
 		return (EINVAL);
 	}
 
 	tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
 	/* Set pointers */
 	tb->addr_ptr = addr;
 	if (set_mask != 0)
 		tb->mask_ptr = mask;
 
 	return (0);
 }
 
 static int
 ta_add_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct radix_cfg *cfg;
 	struct radix_node_head *rnh;
 	struct radix_node *rn;
 	struct ta_buf_radix *tb;
 	uint32_t *old_value, value;
 
 	cfg = (struct radix_cfg *)ta_state;
 	tb = (struct ta_buf_radix *)ta_buf;
 
 	/* Save current entry value from @tei */
 	if (tei->subtype == AF_INET) {
 		rnh = ti->state;
 		((struct radix_addr_entry *)tb->ent_ptr)->value = tei->value;
 	} else {
 		rnh = ti->xstate;
 		((struct radix_addr_xentry *)tb->ent_ptr)->value = tei->value;
 	}
 
 	/* Search for an entry first */
 	rn = rnh->rnh_lookup(tb->addr_ptr, tb->mask_ptr, rnh);
 	if (rn != NULL) {
 		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
 			return (EEXIST);
 		/* Record already exists. Update value if we're asked to */
 		if (tei->subtype == AF_INET)
 			old_value = &((struct radix_addr_entry *)rn)->value;
 		else
 			old_value = &((struct radix_addr_xentry *)rn)->value;
 
 		value = *old_value;
 		*old_value = tei->value;
 		tei->value = value;
 
 		/* Indicate that update has happened instead of addition */
 		tei->flags |= TEI_FLAGS_UPDATED;
 		*pnum = 0;
 
 		return (0);
 	}
 
 	if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
 		return (EFBIG);
 
 	rn = rnh->rnh_addaddr(tb->addr_ptr, tb->mask_ptr, rnh, tb->ent_ptr);
 	if (rn == NULL) {
 		/* Unknown error */
 		return (EINVAL);
 	}
 	
 	if (tei->subtype == AF_INET)
 		cfg->count4++;
 	else
 		cfg->count6++;
 	tb->ent_ptr = NULL;
 	*pnum = 1;
 
 	return (0);
 }
 
 static int
 ta_prepare_del_radix(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_radix *tb;
 	struct sockaddr *addr, *mask;
 	int mlen, set_mask;
 
 	tb = (struct ta_buf_radix *)ta_buf;
 
 	mlen = tei->masklen;
 	set_mask = 0;
 
 	if (tei->subtype == AF_INET) {
 		if (mlen > 32)
 			return (EINVAL);
 
 		addr = (struct sockaddr *)&tb->addr.a4.sa;
 		mask = (struct sockaddr *)&tb->addr.a4.ma;
 #ifdef INET6
 	} else if (tei->subtype == AF_INET6) {
 		if (mlen > 128)
 			return (EINVAL);
 
 		addr = (struct sockaddr *)&tb->addr.a6.sa;
 		mask = (struct sockaddr *)&tb->addr.a6.ma;
 #endif
 	} else
 		return (EINVAL);
 
 	tei_to_sockaddr_ent(tei, addr, mask, &set_mask);
 	tb->addr_ptr = addr;
 	if (set_mask != 0)
 		tb->mask_ptr = mask;
 
 	return (0);
 }
 
 static int
 ta_del_radix(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct radix_cfg *cfg;
 	struct radix_node_head *rnh;
 	struct radix_node *rn;
 	struct ta_buf_radix *tb;
 
 	cfg = (struct radix_cfg *)ta_state;
 	tb = (struct ta_buf_radix *)ta_buf;
 
 	if (tei->subtype == AF_INET)
 		rnh = ti->state;
 	else
 		rnh = ti->xstate;
 
 	rn = rnh->rnh_deladdr(tb->addr_ptr, tb->mask_ptr, rnh);
 
 	if (rn == NULL)
 		return (ENOENT);
 
 	/* Save entry value to @tei */
 	if (tei->subtype == AF_INET)
 		tei->value = ((struct radix_addr_entry *)rn)->value;
 	else
 		tei->value = ((struct radix_addr_xentry *)rn)->value;
 
 	tb->ent_ptr = rn;
 	
 	if (tei->subtype == AF_INET)
 		cfg->count4--;
 	else
 		cfg->count6--;
 	*pnum = 1;
 
 	return (0);
 }
 
 static void
 ta_flush_radix_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_radix *tb;
 
 	tb = (struct ta_buf_radix *)ta_buf;
 
 	if (tb->ent_ptr != NULL)
 		free(tb->ent_ptr, M_IPFW_TBL);
 }
 
 static int
 ta_need_modify_radix(void *ta_state, struct table_info *ti, uint32_t count,
     uint64_t *pflags)
 {
 
 	/*
 	 * radix does not require additional memory allocations
 	 * other than nodes itself. Adding new masks to the tree do
 	 * but we don't have any API to call (and we don't known which
 	 * sizes do we need).
 	 */
 	return (0);
 }
 
 struct table_algo addr_radix = {
 	.name		= "addr:radix",
 	.type		= IPFW_TABLE_ADDR,
 	.flags		= TA_FLAG_DEFAULT,
 	.ta_buf_size	= sizeof(struct ta_buf_radix),
 	.init		= ta_init_radix,
 	.destroy	= ta_destroy_radix,
 	.prepare_add	= ta_prepare_add_radix,
 	.prepare_del	= ta_prepare_del_radix,
 	.add		= ta_add_radix,
 	.del		= ta_del_radix,
 	.flush_entry	= ta_flush_radix_entry,
 	.foreach	= ta_foreach_radix,
 	.dump_tentry	= ta_dump_radix_tentry,
 	.find_tentry	= ta_find_radix_tentry,
 	.dump_tinfo	= ta_dump_radix_tinfo,
 	.need_modify	= ta_need_modify_radix,
 };
 
 
 /*
  * addr:hash cmds
  *
  *
  * ti->data:
  * [inv.mask4][inv.mask6][log2hsize4][log2hsize6]
  * [        8][        8[          8][         8]
  *
  * inv.mask4: 32 - mask
  * inv.mask6:
  * 1) _slow lookup: mask
  * 2) _aligned: (128 - mask) / 8
  * 3) _64: 8
  *
  *
  * pflags:
  * [v4=1/v6=0][hsize]
  * [       32][   32]
  */
 
 struct chashentry;
 
 SLIST_HEAD(chashbhead, chashentry);
 
 struct chash_cfg {
 	struct chashbhead *head4;
 	struct chashbhead *head6;
 	size_t	size4;
 	size_t	size6;
 	size_t	items4;
 	size_t	items6;
 	uint8_t	mask4;
 	uint8_t	mask6;
 };
 
 struct chashentry {
 	SLIST_ENTRY(chashentry)	next;
 	uint32_t	value;
 	uint32_t	type;
 	union {
 		uint32_t	a4;	/* Host format */
 		struct in6_addr	a6;	/* Network format */
 	} a;
 };
 
 struct ta_buf_chash
 {
 	void *ent_ptr;
 	struct chashentry ent;
 };
 
 #ifdef INET
 static __inline uint32_t hash_ip(uint32_t addr, int hsize);
 #endif
 #ifdef INET6
 static __inline uint32_t hash_ip6(struct in6_addr *addr6, int hsize);
 static __inline uint16_t hash_ip64(struct in6_addr *addr6, int hsize);
 static __inline uint32_t hash_ip6_slow(struct in6_addr *addr6, void *key,
     int mask, int hsize);
 static __inline uint32_t hash_ip6_al(struct in6_addr *addr6, void *key, int mask,
     int hsize);
 #endif
 static int ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int ta_lookup_chash_aligned(struct table_info *ti, void *key,
     uint32_t keylen, uint32_t *val);
 static int ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int chash_parse_opts(struct chash_cfg *cfg, char *data);
 static void ta_print_chash_config(void *ta_state, struct table_info *ti,
     char *buf, size_t bufsize);
 static int ta_log2(uint32_t v);
 static int ta_init_chash(struct ip_fw_chain *ch, void **ta_state,
     struct table_info *ti, char *data, uint8_t tflags);
 static void ta_destroy_chash(void *ta_state, struct table_info *ti);
 static void ta_dump_chash_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int ta_dump_chash_tentry(void *ta_state, struct table_info *ti,
     void *e, ipfw_obj_tentry *tent);
 static uint32_t hash_ent(struct chashentry *ent, int af, int mlen,
     uint32_t size);
 static int tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent);
 static int ta_find_chash_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void ta_foreach_chash(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 static int ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_add_chash(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static int ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_del_chash(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static void ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_need_modify_chash(void *ta_state, struct table_info *ti,
     uint32_t count, uint64_t *pflags);
 static int ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags);
 static int ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t *pflags);
 static void ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags);
 static void ta_flush_mod_chash(void *ta_buf);
 
 
 #ifdef INET
 static __inline uint32_t
 hash_ip(uint32_t addr, int hsize)
 {
 
 	return (addr % (hsize - 1));
 }
 #endif
 
 #ifdef INET6
 static __inline uint32_t
 hash_ip6(struct in6_addr *addr6, int hsize)
 {
 	uint32_t i;
 
 	i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1] ^
 	    addr6->s6_addr32[2] ^ addr6->s6_addr32[3];
 
 	return (i % (hsize - 1));
 }
 
 
 static __inline uint16_t
 hash_ip64(struct in6_addr *addr6, int hsize)
 {
 	uint32_t i;
 
 	i = addr6->s6_addr32[0] ^ addr6->s6_addr32[1];
 
 	return (i % (hsize - 1));
 }
 
 
 static __inline uint32_t
 hash_ip6_slow(struct in6_addr *addr6, void *key, int mask, int hsize)
 {
 	struct in6_addr mask6;
 
 	ipv6_writemask(&mask6, mask);
 	memcpy(addr6, key, sizeof(struct in6_addr));
 	APPLY_MASK(addr6, &mask6);
 	return (hash_ip6(addr6, hsize));
 }
 
 static __inline uint32_t
 hash_ip6_al(struct in6_addr *addr6, void *key, int mask, int hsize)
 {
 	uint64_t *paddr;
 
 	paddr = (uint64_t *)addr6;
 	*paddr = 0;
 	*(paddr + 1) = 0;
 	memcpy(addr6, key, mask);
 	return (hash_ip6(addr6, hsize));
 }
 #endif
 
 static int
 ta_lookup_chash_slow(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct chashbhead *head;
 	struct chashentry *ent;
 	uint16_t hash, hsize;
 	uint8_t imask;
 
 	if (keylen == sizeof(in_addr_t)) {
 #ifdef INET
 		head = (struct chashbhead *)ti->state;
 		imask = ti->data >> 24;
 		hsize = 1 << ((ti->data & 0xFFFF) >> 8);
 		uint32_t a;
 		a = ntohl(*((in_addr_t *)key));
 		a = a >> imask;
 		hash = hash_ip(a, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (ent->a.a4 == a) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	} else {
 #ifdef INET6
 		/* IPv6: worst scenario: non-round mask */
 		struct in6_addr addr6;
 		head = (struct chashbhead *)ti->xstate;
 		imask = (ti->data & 0xFF0000) >> 16;
 		hsize = 1 << (ti->data & 0xFF);
 		hash = hash_ip6_slow(&addr6, key, imask, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (memcmp(&ent->a.a6, &addr6, 16) == 0) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	}
 
 	return (0);
 }
 
 static int
 ta_lookup_chash_aligned(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct chashbhead *head;
 	struct chashentry *ent;
 	uint16_t hash, hsize;
 	uint8_t imask;
 
 	if (keylen == sizeof(in_addr_t)) {
 #ifdef INET
 		head = (struct chashbhead *)ti->state;
 		imask = ti->data >> 24;
 		hsize = 1 << ((ti->data & 0xFFFF) >> 8);
 		uint32_t a;
 		a = ntohl(*((in_addr_t *)key));
 		a = a >> imask;
 		hash = hash_ip(a, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (ent->a.a4 == a) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	} else {
 #ifdef INET6
 		/* IPv6: aligned to 8bit mask */
 		struct in6_addr addr6;
 		uint64_t *paddr, *ptmp;
 		head = (struct chashbhead *)ti->xstate;
 		imask = (ti->data & 0xFF0000) >> 16;
 		hsize = 1 << (ti->data & 0xFF);
 
 		hash = hash_ip6_al(&addr6, key, imask, hsize);
 		paddr = (uint64_t *)&addr6;
 		SLIST_FOREACH(ent, &head[hash], next) {
 			ptmp = (uint64_t *)&ent->a.a6;
 			if (paddr[0] == ptmp[0] && paddr[1] == ptmp[1]) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	}
 
 	return (0);
 }
 
 static int
 ta_lookup_chash_64(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct chashbhead *head;
 	struct chashentry *ent;
 	uint16_t hash, hsize;
 	uint8_t imask;
 
 	if (keylen == sizeof(in_addr_t)) {
 #ifdef INET
 		head = (struct chashbhead *)ti->state;
 		imask = ti->data >> 24;
 		hsize = 1 << ((ti->data & 0xFFFF) >> 8);
 		uint32_t a;
 		a = ntohl(*((in_addr_t *)key));
 		a = a >> imask;
 		hash = hash_ip(a, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (ent->a.a4 == a) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	} else {
 #ifdef INET6
 		/* IPv6: /64 */
 		uint64_t a6, *paddr;
 		head = (struct chashbhead *)ti->xstate;
 		paddr = (uint64_t *)key;
 		hsize = 1 << (ti->data & 0xFF);
 		a6 = *paddr;
 		hash = hash_ip64((struct in6_addr *)key, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			paddr = (uint64_t *)&ent->a.a6;
 			if (a6 == *paddr) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 #endif
 	}
 
 	return (0);
 }
 
 static int
 chash_parse_opts(struct chash_cfg *cfg, char *data)
 {
 	char *pdel, *pend, *s;
 	int mask4, mask6;
 
 	mask4 = cfg->mask4;
 	mask6 = cfg->mask6;
 
 	if (data == NULL)
 		return (0);
 	if ((pdel = strchr(data, ' ')) == NULL)
 		return (0);
 	while (*pdel == ' ')
 		pdel++;
 	if (strncmp(pdel, "masks=", 6) != 0)
 		return (EINVAL);
 	if ((s = strchr(pdel, ' ')) != NULL)
 		*s++ = '\0';
 
 	pdel += 6;
 	/* Need /XX[,/YY] */
 	if (*pdel++ != '/')
 		return (EINVAL);
 	mask4 = strtol(pdel, &pend, 10);
 	if (*pend == ',') {
 		/* ,/YY */
 		pdel = pend + 1;
 		if (*pdel++ != '/')
 			return (EINVAL);
 		mask6 = strtol(pdel, &pend, 10);
 		if (*pend != '\0')
 			return (EINVAL);
 	} else if (*pend != '\0')
 		return (EINVAL);
 
 	if (mask4 < 0 || mask4 > 32 || mask6 < 0 || mask6 > 128)
 		return (EINVAL);
 
 	cfg->mask4 = mask4;
 	cfg->mask6 = mask6;
 
 	return (0);
 }
 
 static void
 ta_print_chash_config(void *ta_state, struct table_info *ti, char *buf,
     size_t bufsize)
 {
 	struct chash_cfg *cfg;
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	if (cfg->mask4 != 32 || cfg->mask6 != 128)
 		snprintf(buf, bufsize, "%s masks=/%d,/%d", "addr:hash",
 		    cfg->mask4, cfg->mask6);
 	else
 		snprintf(buf, bufsize, "%s", "addr:hash");
 }
 
 static int
 ta_log2(uint32_t v)
 {
 	uint32_t r;
 
 	r = 0;
 	while (v >>= 1)
 		r++;
 
 	return (r);
 }
 
 /*
  * New table.
  * We assume 'data' to be either NULL or the following format:
  * 'addr:hash [masks=/32[,/128]]'
  */
 static int
 ta_init_chash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	int error, i;
 	uint32_t hsize;
 	struct chash_cfg *cfg;
 
 	cfg = malloc(sizeof(struct chash_cfg), M_IPFW, M_WAITOK | M_ZERO);
 
 	cfg->mask4 = 32;
 	cfg->mask6 = 128;
 
 	if ((error = chash_parse_opts(cfg, data)) != 0) {
 		free(cfg, M_IPFW);
 		return (error);
 	}
 
 	cfg->size4 = 128;
 	cfg->size6 = 128;
 
 	cfg->head4 = malloc(sizeof(struct chashbhead) * cfg->size4, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	cfg->head6 = malloc(sizeof(struct chashbhead) * cfg->size6, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < cfg->size4; i++)
 		SLIST_INIT(&cfg->head4[i]);
 	for (i = 0; i < cfg->size6; i++)
 		SLIST_INIT(&cfg->head6[i]);
 
 
 	*ta_state = cfg;
 	ti->state = cfg->head4;
 	ti->xstate = cfg->head6;
 
 	/* Store data depending on v6 mask length */
 	hsize = ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6);
 	if (cfg->mask6 == 64) {
 		ti->data = (32 - cfg->mask4) << 24 | (128 - cfg->mask6) << 16|
 		    hsize;
 		ti->lookup = ta_lookup_chash_64;
 	} else if ((cfg->mask6  % 8) == 0) {
 		ti->data = (32 - cfg->mask4) << 24 |
 		    cfg->mask6 << 13 | hsize;
 		ti->lookup = ta_lookup_chash_aligned;
 	} else {
 		/* don't do that! */
 		ti->data = (32 - cfg->mask4) << 24 |
 		    cfg->mask6 << 16 | hsize;
 		ti->lookup = ta_lookup_chash_slow;
 	}
 
 	return (0);
 }
 
 static void
 ta_destroy_chash(void *ta_state, struct table_info *ti)
 {
 	struct chash_cfg *cfg;
 	struct chashentry *ent, *ent_next;
 	int i;
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	for (i = 0; i < cfg->size4; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next)
 			free(ent, M_IPFW_TBL);
 
 	for (i = 0; i < cfg->size6; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next)
 			free(ent, M_IPFW_TBL);
 
 	free(cfg->head4, M_IPFW);
 	free(cfg->head6, M_IPFW);
 
 	free(cfg, M_IPFW);
 }
 
 static void
 ta_dump_chash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 	struct chash_cfg *cfg;
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	tinfo->flags = IPFW_TATFLAGS_AFDATA | IPFW_TATFLAGS_AFITEM;
 	tinfo->taclass4 = IPFW_TACLASS_HASH;
 	tinfo->size4 = cfg->size4;
 	tinfo->count4 = cfg->items4;
 	tinfo->itemsize4 = sizeof(struct chashentry);
 	tinfo->taclass6 = IPFW_TACLASS_HASH;
 	tinfo->size6 = cfg->size6;
 	tinfo->count6 = cfg->items6;
 	tinfo->itemsize6 = sizeof(struct chashentry);
 }
 
 static int
 ta_dump_chash_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct chash_cfg *cfg;
 	struct chashentry *ent;
 
 	cfg = (struct chash_cfg *)ta_state;
 	ent = (struct chashentry *)e;
 
 	if (ent->type == AF_INET) {
 		tent->k.addr.s_addr = htonl(ent->a.a4 << (32 - cfg->mask4));
 		tent->masklen = cfg->mask4;
 		tent->subtype = AF_INET;
 		tent->v.kidx = ent->value;
 #ifdef INET6
 	} else {
 		memcpy(&tent->k, &ent->a.a6, sizeof(struct in6_addr));
 		tent->masklen = cfg->mask6;
 		tent->subtype = AF_INET6;
 		tent->v.kidx = ent->value;
 #endif
 	}
 
 	return (0);
 }
 
 static uint32_t
 hash_ent(struct chashentry *ent, int af, int mlen, uint32_t size)
 {
 	uint32_t hash;
 
 	hash = 0;
 
 	if (af == AF_INET) {
 #ifdef INET
 		hash = hash_ip(ent->a.a4, size);
 #endif
 	} else {
 #ifdef INET6
 		if (mlen == 64)
 			hash = hash_ip64(&ent->a.a6, size);
 		else
 			hash = hash_ip6(&ent->a.a6, size);
 #endif
 	}
 
 	return (hash);
 }
 
 static int
 tei_to_chash_ent(struct tentry_info *tei, struct chashentry *ent)
 {
 	int mlen;
 #ifdef INET6
 	struct in6_addr mask6;
 #endif
 
 
 	mlen = tei->masklen;
 	
 	if (tei->subtype == AF_INET) {
 #ifdef INET
 		if (mlen > 32)
 			return (EINVAL);
 		ent->type = AF_INET;
 
 		/* Calculate masked address */
 		ent->a.a4 = ntohl(*((in_addr_t *)tei->paddr)) >> (32 - mlen);
 #endif
 #ifdef INET6
 	} else if (tei->subtype == AF_INET6) {
 		/* IPv6 case */
 		if (mlen > 128)
 			return (EINVAL);
 		ent->type = AF_INET6;
 
 		ipv6_writemask(&mask6, mlen);
 		memcpy(&ent->a.a6, tei->paddr, sizeof(struct in6_addr));
 		APPLY_MASK(&ent->a.a6, &mask6);
 #endif
 	} else {
 		/* Unknown CIDR type */
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 ta_find_chash_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct chash_cfg *cfg;
 	struct chashbhead *head;
 	struct chashentry ent, *tmp;
 	struct tentry_info tei;
 	int error;
 	uint32_t hash;
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	memset(&ent, 0, sizeof(ent));
 	memset(&tei, 0, sizeof(tei));
 
 	if (tent->subtype == AF_INET) {
 		tei.paddr = &tent->k.addr;
 		tei.masklen = cfg->mask4;
 		tei.subtype = AF_INET;
 
 		if ((error = tei_to_chash_ent(&tei, &ent)) != 0)
 			return (error);
 
 		head = cfg->head4;
 		hash = hash_ent(&ent, AF_INET, cfg->mask4, cfg->size4);
 		/* Check for existence */
 		SLIST_FOREACH(tmp, &head[hash], next) {
 			if (tmp->a.a4 != ent.a.a4)
 				continue;
 
 			ta_dump_chash_tentry(ta_state, ti, tmp, tent);
 			return (0);
 		}
 	} else {
 		tei.paddr = &tent->k.addr6;
 		tei.masklen = cfg->mask6;
 		tei.subtype = AF_INET6;
 
 		if ((error = tei_to_chash_ent(&tei, &ent)) != 0)
 			return (error);
 
 		head = cfg->head6;
 		hash = hash_ent(&ent, AF_INET6, cfg->mask6, cfg->size6);
 		/* Check for existence */
 		SLIST_FOREACH(tmp, &head[hash], next) {
 			if (memcmp(&tmp->a.a6, &ent.a.a6, 16) != 0)
 				continue;
 			ta_dump_chash_tentry(ta_state, ti, tmp, tent);
 			return (0);
 		}
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_foreach_chash(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct chash_cfg *cfg;
 	struct chashentry *ent, *ent_next;
 	int i;
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	for (i = 0; i < cfg->size4; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head4[i], next, ent_next)
 			f(ent, arg);
 
 	for (i = 0; i < cfg->size6; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head6[i], next, ent_next)
 			f(ent, arg);
 }
 
 static int
 ta_prepare_add_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_chash *tb;
 	struct chashentry *ent;
 	int error;
 
 	tb = (struct ta_buf_chash *)ta_buf;
 
 	ent = malloc(sizeof(*ent), M_IPFW_TBL, M_WAITOK | M_ZERO);
 
 	error = tei_to_chash_ent(tei, ent);
 	if (error != 0) {
 		free(ent, M_IPFW_TBL);
 		return (error);
 	}
 	tb->ent_ptr = ent;
 
 	return (0);
 }
 
 static int
 ta_add_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct chash_cfg *cfg;
 	struct chashbhead *head;
 	struct chashentry *ent, *tmp;
 	struct ta_buf_chash *tb;
 	int exists;
 	uint32_t hash, value;
 
 	cfg = (struct chash_cfg *)ta_state;
 	tb = (struct ta_buf_chash *)ta_buf;
 	ent = (struct chashentry *)tb->ent_ptr;
 	hash = 0;
 	exists = 0;
 
 	/* Read current value from @tei */
 	ent->value = tei->value;
 
 	/* Read cuurrent value */
 	if (tei->subtype == AF_INET) {
 		if (tei->masklen != cfg->mask4)
 			return (EINVAL);
 		head = cfg->head4;
 		hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4);
 
 		/* Check for existence */
 		SLIST_FOREACH(tmp, &head[hash], next) {
 			if (tmp->a.a4 == ent->a.a4) {
 				exists = 1;
 				break;
 			}
 		}
 	} else {
 		if (tei->masklen != cfg->mask6)
 			return (EINVAL);
 		head = cfg->head6;
 		hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6);
 		/* Check for existence */
 		SLIST_FOREACH(tmp, &head[hash], next) {
 			if (memcmp(&tmp->a.a6, &ent->a.a6, 16) == 0) {
 				exists = 1;
 				break;
 			}
 		}
 	}
 
 	if (exists == 1) {
 		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
 			return (EEXIST);
 		/* Record already exists. Update value if we're asked to */
 		value = tmp->value;
 		tmp->value = tei->value;
 		tei->value = value;
 		/* Indicate that update has happened instead of addition */
 		tei->flags |= TEI_FLAGS_UPDATED;
 		*pnum = 0;
 	} else {
 		if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
 			return (EFBIG);
 		SLIST_INSERT_HEAD(&head[hash], ent, next);
 		tb->ent_ptr = NULL;
 		*pnum = 1;
 
 		/* Update counters */
 		if (tei->subtype == AF_INET)
 			cfg->items4++;
 		else
 			cfg->items6++;
 	}
 
 	return (0);
 }
 
 static int
 ta_prepare_del_chash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_chash *tb;
 
 	tb = (struct ta_buf_chash *)ta_buf;
 
 	return (tei_to_chash_ent(tei, &tb->ent));
 }
 
 static int
 ta_del_chash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct chash_cfg *cfg;
 	struct chashbhead *head;
 	struct chashentry *tmp, *tmp_next, *ent;
 	struct ta_buf_chash *tb;
 	uint32_t hash;
 
 	cfg = (struct chash_cfg *)ta_state;
 	tb = (struct ta_buf_chash *)ta_buf;
 	ent = &tb->ent;
 
 	if (tei->subtype == AF_INET) {
 		if (tei->masklen != cfg->mask4)
 			return (EINVAL);
 		head = cfg->head4;
 		hash = hash_ent(ent, AF_INET, cfg->mask4, cfg->size4);
 
 		SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) {
 			if (tmp->a.a4 != ent->a.a4)
 				continue;
 
 			SLIST_REMOVE(&head[hash], tmp, chashentry, next);
 			cfg->items4--;
 			tb->ent_ptr = tmp;
 			tei->value = tmp->value;
 			*pnum = 1;
 			return (0);
 		}
 	} else {
 		if (tei->masklen != cfg->mask6)
 			return (EINVAL);
 		head = cfg->head6;
 		hash = hash_ent(ent, AF_INET6, cfg->mask6, cfg->size6);
 		SLIST_FOREACH_SAFE(tmp, &head[hash], next, tmp_next) {
 			if (memcmp(&tmp->a.a6, &ent->a.a6, 16) != 0)
 				continue;
 
 			SLIST_REMOVE(&head[hash], tmp, chashentry, next);
 			cfg->items6--;
 			tb->ent_ptr = tmp;
 			tei->value = tmp->value;
 			*pnum = 1;
 			return (0);
 		}
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_flush_chash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_chash *tb;
 
 	tb = (struct ta_buf_chash *)ta_buf;
 
 	if (tb->ent_ptr != NULL)
 		free(tb->ent_ptr, M_IPFW_TBL);
 }
 
 /*
  * Hash growing callbacks.
  */
 
 static int
 ta_need_modify_chash(void *ta_state, struct table_info *ti, uint32_t count,
     uint64_t *pflags)
 {
 	struct chash_cfg *cfg;
 	uint64_t data;
 
 	/*
 	 * Since we don't know exact number of IPv4/IPv6 records in @count,
 	 * ignore non-zero @count value at all. Check current hash sizes
 	 * and return appropriate data.
 	 */
 
 	cfg = (struct chash_cfg *)ta_state;
 
 	data = 0;
 	if (cfg->items4 > cfg->size4 && cfg->size4 < 65536)
 		data |= (cfg->size4 * 2) << 16;
 	if (cfg->items6 > cfg->size6 && cfg->size6 < 65536)
 		data |= cfg->size6 * 2;
 
 	if (data != 0) {
 		*pflags = data;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Allocate new, larger chash.
  */
 static int
 ta_prepare_mod_chash(void *ta_buf, uint64_t *pflags)
 {
 	struct mod_item *mi;
 	struct chashbhead *head;
 	int i;
 
 	mi = (struct mod_item *)ta_buf;
 
 	memset(mi, 0, sizeof(struct mod_item));
 	mi->size = (*pflags >> 16) & 0xFFFF;
 	mi->size6 = *pflags & 0xFFFF;
 	if (mi->size > 0) {
 		head = malloc(sizeof(struct chashbhead) * mi->size,
 		    M_IPFW, M_WAITOK | M_ZERO);
 		for (i = 0; i < mi->size; i++)
 			SLIST_INIT(&head[i]);
 		mi->main_ptr = head;
 	}
 
 	if (mi->size6 > 0) {
 		head = malloc(sizeof(struct chashbhead) * mi->size6,
 		    M_IPFW, M_WAITOK | M_ZERO);
 		for (i = 0; i < mi->size6; i++)
 			SLIST_INIT(&head[i]);
 		mi->main_ptr6 = head;
 	}
 
 	return (0);
 }
 
 /*
  * Copy data from old runtime array to new one.
  */
 static int
 ta_fill_mod_chash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t *pflags)
 {
 
 	/* In is not possible to do rehash if we're not holidng WLOCK. */
 	return (0);
 }
 
 /*
  * Switch old & new arrays.
  */
 static void
 ta_modify_chash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags)
 {
 	struct mod_item *mi;
 	struct chash_cfg *cfg;
 	struct chashbhead *old_head, *new_head;
 	struct chashentry *ent, *ent_next;
 	int af, i, mlen;
 	uint32_t nhash;
 	size_t old_size, new_size;
 
 	mi = (struct mod_item *)ta_buf;
 	cfg = (struct chash_cfg *)ta_state;
 
 	/* Check which hash we need to grow and do we still need that */
 	if (mi->size > 0 && cfg->size4 < mi->size) {
 		new_head = (struct chashbhead *)mi->main_ptr;
 		new_size = mi->size;
 		old_size = cfg->size4;
 		old_head = ti->state;
 		mlen = cfg->mask4;
 		af = AF_INET;
 
 		for (i = 0; i < old_size; i++) {
 			SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
 				nhash = hash_ent(ent, af, mlen, new_size);
 				SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
 			}
 		}
 
 		ti->state = new_head;
 		cfg->head4 = new_head;
 		cfg->size4 = mi->size;
 		mi->main_ptr = old_head;
 	}
 
 	if (mi->size6 > 0 && cfg->size6 < mi->size6) {
 		new_head = (struct chashbhead *)mi->main_ptr6;
 		new_size = mi->size6;
 		old_size = cfg->size6;
 		old_head = ti->xstate;
 		mlen = cfg->mask6;
 		af = AF_INET6;
 
 		for (i = 0; i < old_size; i++) {
 			SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
 				nhash = hash_ent(ent, af, mlen, new_size);
 				SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
 			}
 		}
 
 		ti->xstate = new_head;
 		cfg->head6 = new_head;
 		cfg->size6 = mi->size6;
 		mi->main_ptr6 = old_head;
 	}
 
 	/* Update lower 32 bits with new values */
 	ti->data &= 0xFFFFFFFF00000000;
 	ti->data |= ta_log2(cfg->size4) << 8 | ta_log2(cfg->size6);
 }
 
 /*
  * Free unneded array.
  */
 static void
 ta_flush_mod_chash(void *ta_buf)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 	if (mi->main_ptr != NULL)
 		free(mi->main_ptr, M_IPFW);
 	if (mi->main_ptr6 != NULL)
 		free(mi->main_ptr6, M_IPFW);
 }
 
 struct table_algo addr_hash = {
 	.name		= "addr:hash",
 	.type		= IPFW_TABLE_ADDR,
 	.ta_buf_size	= sizeof(struct ta_buf_chash),
 	.init		= ta_init_chash,
 	.destroy	= ta_destroy_chash,
 	.prepare_add	= ta_prepare_add_chash,
 	.prepare_del	= ta_prepare_del_chash,
 	.add		= ta_add_chash,
 	.del		= ta_del_chash,
 	.flush_entry	= ta_flush_chash_entry,
 	.foreach	= ta_foreach_chash,
 	.dump_tentry	= ta_dump_chash_tentry,
 	.find_tentry	= ta_find_chash_tentry,
 	.print_config	= ta_print_chash_config,
 	.dump_tinfo	= ta_dump_chash_tinfo,
 	.need_modify	= ta_need_modify_chash,
 	.prepare_mod	= ta_prepare_mod_chash,
 	.fill_mod	= ta_fill_mod_chash,
 	.modify		= ta_modify_chash,
 	.flush_mod	= ta_flush_mod_chash,
 };
 
 
 /*
  * Iface table cmds.
  *
  * Implementation:
  *
  * Runtime part:
  * - sorted array of "struct ifidx" pointed by ti->state.
  *   Array is allocated with rounding up to IFIDX_CHUNK. Only existing
  *   interfaces are stored in array, however its allocated size is
  *   sufficient to hold all table records if needed.
  * - current array size is stored in ti->data
  *
  * Table data:
  * - "struct iftable_cfg" is allocated to store table state (ta_state).
  * - All table records are stored inside namedobj instance.
  *
  */
 
 struct ifidx {
 	uint16_t	kidx;
 	uint16_t	spare;
 	uint32_t	value;
 };
 #define	DEFAULT_IFIDX_SIZE	64
 
 struct iftable_cfg;
 
 struct ifentry {
 	struct named_object	no;
 	struct ipfw_ifc		ic;
 	struct iftable_cfg	*icfg;
 	uint32_t		value;
 	int			linked;
 };
 
 struct iftable_cfg {
 	struct namedobj_instance	*ii;
 	struct ip_fw_chain	*ch;
 	struct table_info	*ti;
 	void	*main_ptr;
 	size_t	size;	/* Number of items allocated in array */
 	size_t	count;	/* Number of all items */
 	size_t	used;	/* Number of items _active_ now */
 };
 
 struct ta_buf_ifidx
 {
 	struct ifentry *ife;
 	uint32_t value;
 };
 
 int compare_ifidx(const void *k, const void *v);
 static struct ifidx * ifidx_find(struct table_info *ti, void *key);
 static int ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state,
     struct table_info *ti, char *data, uint8_t tflags);
 static void ta_change_ti_ifidx(void *ta_state, struct table_info *ti);
 static void destroy_ifidx_locked(struct namedobj_instance *ii,
     struct named_object *no, void *arg);
 static void ta_destroy_ifidx(void *ta_state, struct table_info *ti);
 static void ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_add_ifidx(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static int ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_del_ifidx(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static void ta_flush_ifidx_entry(struct ip_fw_chain *ch,
     struct tentry_info *tei, void *ta_buf);
 static void if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex);
 static int ta_need_modify_ifidx(void *ta_state, struct table_info *ti,
     uint32_t count, uint64_t *pflags);
 static int ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags);
 static int ta_fill_mod_ifidx(void *ta_state, struct table_info *ti,
     void *ta_buf, uint64_t *pflags);
 static void ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags);
 static void ta_flush_mod_ifidx(void *ta_buf);
 static int ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent);
 static int ta_find_ifidx_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void foreach_ifidx(struct namedobj_instance *ii, struct named_object *no,
     void *arg);
 static void ta_foreach_ifidx(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 
 int
 compare_ifidx(const void *k, const void *v)
 {
 	const struct ifidx *ifidx;
 	uint16_t key;
 
 	key = *((const uint16_t *)k);
 	ifidx = (const struct ifidx *)v;
 
 	if (key < ifidx->kidx)
 		return (-1);
 	else if (key > ifidx->kidx)
 		return (1);
 
 	return (0);
 }
 
 /*
  * Adds item @item with key @key into ascending-sorted array @base.
  * Assumes @base has enough additional storage.
  *
  * Returns 1 on success, 0 on duplicate key.
  */
 static int
 badd(const void *key, void *item, void *base, size_t nmemb,
     size_t size, int (*compar) (const void *, const void *))
 {
 	int min, max, mid, shift, res;
 	caddr_t paddr;
 
 	if (nmemb == 0) {
 		memcpy(base, item, size);
 		return (1);
 	}
 
 	/* Binary search */
 	min = 0;
 	max = nmemb - 1;
 	mid = 0;
 	while (min <= max) {
 		mid = (min + max) / 2;
 		res = compar(key, (const void *)((caddr_t)base + mid * size));
 		if (res == 0)
 			return (0);
 
 		if (res > 0)
 			min = mid + 1;
 		else
 			max = mid - 1;
 	}
 
 	/* Item not found. */
 	res = compar(key, (const void *)((caddr_t)base + mid * size));
 	if (res > 0)
 		shift = mid + 1;
 	else
 		shift = mid;
 
 	paddr = (caddr_t)base + shift * size;
 	if (nmemb > shift)
 		memmove(paddr + size, paddr, (nmemb - shift) * size);
 
 	memcpy(paddr, item, size);
 
 	return (1);
 }
 
 /*
  * Deletes item with key @key from ascending-sorted array @base.
  *
  * Returns 1 on success, 0 for non-existent key.
  */
 static int
 bdel(const void *key, void *base, size_t nmemb, size_t size,
     int (*compar) (const void *, const void *))
 {
 	caddr_t item;
 	size_t sz;
 
 	item = (caddr_t)bsearch(key, base, nmemb, size, compar);
 
 	if (item == NULL)
 		return (0);
 
 	sz = (caddr_t)base + nmemb * size - item;
 
 	if (sz > 0)
 		memmove(item, item + size, sz);
 
 	return (1);
 }
 
 static struct ifidx *
 ifidx_find(struct table_info *ti, void *key)
 {
 	struct ifidx *ifi;
 
 	ifi = bsearch(key, ti->state, ti->data, sizeof(struct ifidx),
 	    compare_ifidx);
 
 	return (ifi);
 }
 
 static int
 ta_lookup_ifidx(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct ifidx *ifi;
 
 	ifi = ifidx_find(ti, key);
 
 	if (ifi != NULL) {
 		*val = ifi->value;
 		return (1);
 	}
 
 	return (0);
 }
 
 static int
 ta_init_ifidx(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	struct iftable_cfg *icfg;
 
 	icfg = malloc(sizeof(struct iftable_cfg), M_IPFW, M_WAITOK | M_ZERO);
 
 	icfg->ii = ipfw_objhash_create(DEFAULT_IFIDX_SIZE);
 	icfg->size = DEFAULT_IFIDX_SIZE;
 	icfg->main_ptr = malloc(sizeof(struct ifidx) * icfg->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	icfg->ch = ch;
 
 	*ta_state = icfg;
 	ti->state = icfg->main_ptr;
 	ti->lookup = ta_lookup_ifidx;
 
 	return (0);
 }
 
 /*
  * Handle tableinfo @ti pointer change (on table array resize).
  */
 static void
 ta_change_ti_ifidx(void *ta_state, struct table_info *ti)
 {
 	struct iftable_cfg *icfg;
 
 	icfg = (struct iftable_cfg *)ta_state;
 	icfg->ti = ti;
 }
 
 static void
 destroy_ifidx_locked(struct namedobj_instance *ii, struct named_object *no,
     void *arg)
 {
 	struct ifentry *ife;
 	struct ip_fw_chain *ch;
 
 	ch = (struct ip_fw_chain *)arg;
 	ife = (struct ifentry *)no;
 
 	ipfw_iface_del_notify(ch, &ife->ic);
 	ipfw_iface_unref(ch, &ife->ic);
 	free(ife, M_IPFW_TBL);
 }
 
 
 /*
  * Destroys table @ti
  */
 static void
 ta_destroy_ifidx(void *ta_state, struct table_info *ti)
 {
 	struct iftable_cfg *icfg;
 	struct ip_fw_chain *ch;
 
 	icfg = (struct iftable_cfg *)ta_state;
 	ch = icfg->ch;
 
 	if (icfg->main_ptr != NULL)
 		free(icfg->main_ptr, M_IPFW);
 
 	IPFW_UH_WLOCK(ch);
 	ipfw_objhash_foreach(icfg->ii, destroy_ifidx_locked, ch);
 	IPFW_UH_WUNLOCK(ch);
 
 	ipfw_objhash_destroy(icfg->ii);
 
 	free(icfg, M_IPFW);
 }
 
 /*
  * Provide algo-specific table info
  */
 static void
 ta_dump_ifidx_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 	struct iftable_cfg *cfg;
 
 	cfg = (struct iftable_cfg *)ta_state;
 
 	tinfo->taclass4 = IPFW_TACLASS_ARRAY;
 	tinfo->size4 = cfg->size;
 	tinfo->count4 = cfg->used;
 	tinfo->itemsize4 = sizeof(struct ifidx);
 }
 
 /*
  * Prepare state to add to the table:
  * allocate ifentry and reference needed interface.
  */
 static int
 ta_prepare_add_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_ifidx *tb;
 	char *ifname;
 	struct ifentry *ife;
 
 	tb = (struct ta_buf_ifidx *)ta_buf;
 
 	/* Check if string is terminated */
 	ifname = (char *)tei->paddr;
 	if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
 		return (EINVAL);
 
 	ife = malloc(sizeof(struct ifentry), M_IPFW_TBL, M_WAITOK | M_ZERO);
 	ife->ic.cb = if_notifier;
 	ife->ic.cbdata = ife;
 
 	if (ipfw_iface_ref(ch, ifname, &ife->ic) != 0) {
 		free(ife, M_IPFW_TBL);
 		return (EINVAL);
 	}
 
 	/* Use ipfw_iface 'ifname' field as stable storage */
 	ife->no.name = ife->ic.iface->ifname;
 
 	tb->ife = ife;
 
 	return (0);
 }
 
 static int
 ta_add_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct iftable_cfg *icfg;
 	struct ifentry *ife, *tmp;
 	struct ta_buf_ifidx *tb;
 	struct ipfw_iface *iif;
 	struct ifidx *ifi;
 	char *ifname;
 	uint32_t value;
 
 	tb = (struct ta_buf_ifidx *)ta_buf;
 	ifname = (char *)tei->paddr;
 	icfg = (struct iftable_cfg *)ta_state;
 	ife = tb->ife;
 
 	ife->icfg = icfg;
 	ife->value = tei->value;
 
 	tmp = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
 
 	if (tmp != NULL) {
 		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
 			return (EEXIST);
 
 		/* Exchange values in @tmp and @tei */
 		value = tmp->value;
 		tmp->value = tei->value;
 		tei->value = value;
 
 		iif = tmp->ic.iface;
 		if (iif->resolved != 0) {
 			/* We have to update runtime value, too */
 			ifi = ifidx_find(ti, &iif->ifindex);
 			ifi->value = ife->value;
 		}
 
 		/* Indicate that update has happened instead of addition */
 		tei->flags |= TEI_FLAGS_UPDATED;
 		*pnum = 0;
 		return (0);
 	}
 
 	if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
 		return (EFBIG);
 
 	/* Link to internal list */
 	ipfw_objhash_add(icfg->ii, &ife->no);
 
 	/* Link notifier (possible running its callback) */
 	ipfw_iface_add_notify(icfg->ch, &ife->ic);
 	icfg->count++;
 
 	tb->ife = NULL;
 	*pnum = 1;
 
 	return (0);
 }
 
 /*
  * Prepare to delete key from table.
  * Do basic interface name checks.
  */
 static int
 ta_prepare_del_ifidx(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_ifidx *tb;
 	char *ifname;
 
 	tb = (struct ta_buf_ifidx *)ta_buf;
 
 	/* Check if string is terminated */
 	ifname = (char *)tei->paddr;
 	if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
 		return (EINVAL);
 
 	return (0);
 }
 
 /*
  * Remove key from both configuration list and
  * runtime array. Removed interface notification.
  */
 static int
 ta_del_ifidx(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct iftable_cfg *icfg;
 	struct ifentry *ife;
 	struct ta_buf_ifidx *tb;
 	char *ifname;
 	uint16_t ifindex;
 	int res;
 
 	tb = (struct ta_buf_ifidx *)ta_buf;
 	ifname = (char *)tei->paddr;
 	icfg = (struct iftable_cfg *)ta_state;
 	ife = tb->ife;
 
 	ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
 
 	if (ife == NULL)
 		return (ENOENT);
 
 	if (ife->linked != 0) {
 		/* We have to remove item from runtime */
 		ifindex = ife->ic.iface->ifindex;
 
 		res = bdel(&ifindex, icfg->main_ptr, icfg->used,
 		    sizeof(struct ifidx), compare_ifidx);
 
 		KASSERT(res == 1, ("index %d does not exist", ifindex));
 		icfg->used--;
 		ti->data = icfg->used;
 		ife->linked = 0;
 	}
 
 	/* Unlink from local list */
 	ipfw_objhash_del(icfg->ii, &ife->no);
 	/* Unlink notifier and deref */
 	ipfw_iface_del_notify(icfg->ch, &ife->ic);
 	ipfw_iface_unref(icfg->ch, &ife->ic);
 
 	icfg->count--;
 	tei->value = ife->value;
 
 	tb->ife = ife;
 	*pnum = 1;
 
 	return (0);
 }
 
 /*
  * Flush deleted entry.
  * Drops interface reference and frees entry.
  */
 static void
 ta_flush_ifidx_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_ifidx *tb;
 
 	tb = (struct ta_buf_ifidx *)ta_buf;
 
 	if (tb->ife != NULL)
 		free(tb->ife, M_IPFW_TBL);
 }
 
 
 /*
  * Handle interface announce/withdrawal for particular table.
  * Every real runtime array modification happens here.
  */
 static void
 if_notifier(struct ip_fw_chain *ch, void *cbdata, uint16_t ifindex)
 {
 	struct ifentry *ife;
 	struct ifidx ifi;
 	struct iftable_cfg *icfg;
 	struct table_info *ti;
 	int res;
 
 	ife = (struct ifentry *)cbdata;
 	icfg = ife->icfg;
 	ti = icfg->ti;
 
 	KASSERT(ti != NULL, ("ti=NULL, check change_ti handler"));
 
 	if (ife->linked == 0 && ifindex != 0) {
 		/* Interface announce */
 		ifi.kidx = ifindex;
 		ifi.spare = 0;
 		ifi.value = ife->value;
 		res = badd(&ifindex, &ifi, icfg->main_ptr, icfg->used,
 		    sizeof(struct ifidx), compare_ifidx);
 		KASSERT(res == 1, ("index %d already exists", ifindex));
 		icfg->used++;
 		ti->data = icfg->used;
 		ife->linked = 1;
 	} else if (ife->linked != 0 && ifindex == 0) {
 		/* Interface withdrawal */
 		ifindex = ife->ic.iface->ifindex;
 
 		res = bdel(&ifindex, icfg->main_ptr, icfg->used,
 		    sizeof(struct ifidx), compare_ifidx);
 
 		KASSERT(res == 1, ("index %d does not exist", ifindex));
 		icfg->used--;
 		ti->data = icfg->used;
 		ife->linked = 0;
 	}
 }
 
 
 /*
  * Table growing callbacks.
  */
 
 static int
 ta_need_modify_ifidx(void *ta_state, struct table_info *ti, uint32_t count,
     uint64_t *pflags)
 {
 	struct iftable_cfg *cfg;
 	uint32_t size;
 
 	cfg = (struct iftable_cfg *)ta_state;
 
 	size = cfg->size;
 	while (size < cfg->count + count)
 		size *= 2;
 
 	if (size != cfg->size) {
 		*pflags = size;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Allocate ned, larger runtime ifidx array.
  */
 static int
 ta_prepare_mod_ifidx(void *ta_buf, uint64_t *pflags)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 
 	memset(mi, 0, sizeof(struct mod_item));
 	mi->size = *pflags;
 	mi->main_ptr = malloc(sizeof(struct ifidx) * mi->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 
 	return (0);
 }
 
 /*
  * Copy data from old runtime array to new one.
  */
 static int
 ta_fill_mod_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t *pflags)
 {
 	struct mod_item *mi;
 	struct iftable_cfg *icfg;
 
 	mi = (struct mod_item *)ta_buf;
 	icfg = (struct iftable_cfg *)ta_state;
 
 	/* Check if we still need to grow array */
 	if (icfg->size >= mi->size) {
 		*pflags = 0;
 		return (0);
 	}
 
 	memcpy(mi->main_ptr, icfg->main_ptr, icfg->used * sizeof(struct ifidx));
 
 	return (0);
 }
 
 /*
  * Switch old & new arrays.
  */
 static void
 ta_modify_ifidx(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags)
 {
 	struct mod_item *mi;
 	struct iftable_cfg *icfg;
 	void *old_ptr;
 
 	mi = (struct mod_item *)ta_buf;
 	icfg = (struct iftable_cfg *)ta_state;
 
 	old_ptr = icfg->main_ptr;
 	icfg->main_ptr = mi->main_ptr;
 	icfg->size = mi->size;
 	ti->state = icfg->main_ptr;
 
 	mi->main_ptr = old_ptr;
 }
 
 /*
  * Free unneded array.
  */
 static void
 ta_flush_mod_ifidx(void *ta_buf)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 	if (mi->main_ptr != NULL)
 		free(mi->main_ptr, M_IPFW);
 }
 
 static int
 ta_dump_ifidx_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct ifentry *ife;
 
 	ife = (struct ifentry *)e;
 
 	tent->masklen = 8 * IF_NAMESIZE;
 	memcpy(&tent->k, ife->no.name, IF_NAMESIZE);
 	tent->v.kidx = ife->value;
 
 	return (0);
 }
 
 static int
 ta_find_ifidx_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct iftable_cfg *icfg;
 	struct ifentry *ife;
 	char *ifname;
 
 	icfg = (struct iftable_cfg *)ta_state;
 	ifname = tent->k.iface;
 
 	if (strnlen(ifname, IF_NAMESIZE) == IF_NAMESIZE)
 		return (EINVAL);
 
 	ife = (struct ifentry *)ipfw_objhash_lookup_name(icfg->ii, 0, ifname);
 
 	if (ife != NULL) {
 		ta_dump_ifidx_tentry(ta_state, ti, ife, tent);
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 struct wa_ifidx {
 	ta_foreach_f	*f;
 	void		*arg;
 };
 
 static void
 foreach_ifidx(struct namedobj_instance *ii, struct named_object *no,
     void *arg)
 {
 	struct ifentry *ife;
 	struct wa_ifidx *wa;
 
 	ife = (struct ifentry *)no;
 	wa = (struct wa_ifidx *)arg;
 
 	wa->f(ife, wa->arg);
 }
 
 static void
 ta_foreach_ifidx(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct iftable_cfg *icfg;
 	struct wa_ifidx wa;
 
 	icfg = (struct iftable_cfg *)ta_state;
 
 	wa.f = f;
 	wa.arg = arg;
 
 	ipfw_objhash_foreach(icfg->ii, foreach_ifidx, &wa);
 }
 
 struct table_algo iface_idx = {
 	.name		= "iface:array",
 	.type		= IPFW_TABLE_INTERFACE,
 	.flags		= TA_FLAG_DEFAULT,
 	.ta_buf_size	= sizeof(struct ta_buf_ifidx),
 	.init		= ta_init_ifidx,
 	.destroy	= ta_destroy_ifidx,
 	.prepare_add	= ta_prepare_add_ifidx,
 	.prepare_del	= ta_prepare_del_ifidx,
 	.add		= ta_add_ifidx,
 	.del		= ta_del_ifidx,
 	.flush_entry	= ta_flush_ifidx_entry,
 	.foreach	= ta_foreach_ifidx,
 	.dump_tentry	= ta_dump_ifidx_tentry,
 	.find_tentry	= ta_find_ifidx_tentry,
 	.dump_tinfo	= ta_dump_ifidx_tinfo,
 	.need_modify	= ta_need_modify_ifidx,
 	.prepare_mod	= ta_prepare_mod_ifidx,
 	.fill_mod	= ta_fill_mod_ifidx,
 	.modify		= ta_modify_ifidx,
 	.flush_mod	= ta_flush_mod_ifidx,
 	.change_ti	= ta_change_ti_ifidx,
 };
 
 /*
  * Number array cmds.
  *
  * Implementation:
  *
  * Runtime part:
  * - sorted array of "struct numarray" pointed by ti->state.
  *   Array is allocated with rounding up to NUMARRAY_CHUNK.
  * - current array size is stored in ti->data
  *
  */
 
 struct numarray {
 	uint32_t	number;
 	uint32_t	value;
 };
 
 struct numarray_cfg {
 	void	*main_ptr;
 	size_t	size;	/* Number of items allocated in array */
 	size_t	used;	/* Number of items _active_ now */
 };
 
 struct ta_buf_numarray
 {
 	struct numarray na;
 };
 
 int compare_numarray(const void *k, const void *v);
 static struct numarray *numarray_find(struct table_info *ti, void *key);
 static int ta_lookup_numarray(struct table_info *ti, void *key,
     uint32_t keylen, uint32_t *val);
 static int ta_init_numarray(struct ip_fw_chain *ch, void **ta_state,
     struct table_info *ti, char *data, uint8_t tflags);
 static void ta_destroy_numarray(void *ta_state, struct table_info *ti);
 static void ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int ta_prepare_add_numarray(struct ip_fw_chain *ch,
     struct tentry_info *tei, void *ta_buf);
 static int ta_add_numarray(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static int ta_del_numarray(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static void ta_flush_numarray_entry(struct ip_fw_chain *ch,
     struct tentry_info *tei, void *ta_buf);
 static int ta_need_modify_numarray(void *ta_state, struct table_info *ti,
     uint32_t count, uint64_t *pflags);
 static int ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags);
 static int ta_fill_mod_numarray(void *ta_state, struct table_info *ti,
     void *ta_buf, uint64_t *pflags);
 static void ta_modify_numarray(void *ta_state, struct table_info *ti,
     void *ta_buf, uint64_t pflags);
 static void ta_flush_mod_numarray(void *ta_buf);
 static int ta_dump_numarray_tentry(void *ta_state, struct table_info *ti,
     void *e, ipfw_obj_tentry *tent);
 static int ta_find_numarray_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void ta_foreach_numarray(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 
 int
 compare_numarray(const void *k, const void *v)
 {
 	const struct numarray *na;
 	uint32_t key;
 
 	key = *((const uint32_t *)k);
 	na = (const struct numarray *)v;
 
 	if (key < na->number)
 		return (-1);
 	else if (key > na->number)
 		return (1);
 
 	return (0);
 }
 
 static struct numarray *
 numarray_find(struct table_info *ti, void *key)
 {
 	struct numarray *ri;
 
 	ri = bsearch(key, ti->state, ti->data, sizeof(struct numarray),
 	    compare_ifidx);
 
 	return (ri);
 }
 
 static int
 ta_lookup_numarray(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct numarray *ri;
 
 	ri = numarray_find(ti, key);
 
 	if (ri != NULL) {
 		*val = ri->value;
 		return (1);
 	}
 
 	return (0);
 }
 
 static int
 ta_init_numarray(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	struct numarray_cfg *cfg;
 
 	cfg = malloc(sizeof(*cfg), M_IPFW, M_WAITOK | M_ZERO);
 
 	cfg->size = 16;
 	cfg->main_ptr = malloc(sizeof(struct numarray) * cfg->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 
 	*ta_state = cfg;
 	ti->state = cfg->main_ptr;
 	ti->lookup = ta_lookup_numarray;
 
 	return (0);
 }
 
 /*
  * Destroys table @ti
  */
 static void
 ta_destroy_numarray(void *ta_state, struct table_info *ti)
 {
 	struct numarray_cfg *cfg;
 
 	cfg = (struct numarray_cfg *)ta_state;
 
 	if (cfg->main_ptr != NULL)
 		free(cfg->main_ptr, M_IPFW);
 
 	free(cfg, M_IPFW);
 }
 
 /*
  * Provide algo-specific table info
  */
 static void
 ta_dump_numarray_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 	struct numarray_cfg *cfg;
 
 	cfg = (struct numarray_cfg *)ta_state;
 
 	tinfo->taclass4 = IPFW_TACLASS_ARRAY;
 	tinfo->size4 = cfg->size;
 	tinfo->count4 = cfg->used;
 	tinfo->itemsize4 = sizeof(struct numarray);
 }
 
 /*
  * Prepare for addition/deletion to an array.
  */
 static int
 ta_prepare_add_numarray(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_numarray *tb;
 
 	tb = (struct ta_buf_numarray *)ta_buf;
 
 	tb->na.number = *((uint32_t *)tei->paddr);
 
 	return (0);
 }
 
 static int
 ta_add_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct numarray_cfg *cfg;
 	struct ta_buf_numarray *tb;
 	struct numarray *ri;
 	int res;
 	uint32_t value;
 
 	tb = (struct ta_buf_numarray *)ta_buf;
 	cfg = (struct numarray_cfg *)ta_state;
 
 	/* Read current value from @tei */
 	tb->na.value = tei->value;
 
 	ri = numarray_find(ti, &tb->na.number);
 	
 	if (ri != NULL) {
 		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
 			return (EEXIST);
 
 		/* Exchange values between ri and @tei */
 		value = ri->value;
 		ri->value = tei->value;
 		tei->value = value;
 		/* Indicate that update has happened instead of addition */
 		tei->flags |= TEI_FLAGS_UPDATED;
 		*pnum = 0;
 		return (0);
 	}
 
 	if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
 		return (EFBIG);
 
 	res = badd(&tb->na.number, &tb->na, cfg->main_ptr, cfg->used,
 	    sizeof(struct numarray), compare_numarray);
 
 	KASSERT(res == 1, ("number %d already exists", tb->na.number));
 	cfg->used++;
 	ti->data = cfg->used;
 	*pnum = 1;
 
 	return (0);
 }
 
 /*
  * Remove key from both configuration list and
  * runtime array. Removed interface notification.
  */
 static int
 ta_del_numarray(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct numarray_cfg *cfg;
 	struct ta_buf_numarray *tb;
 	struct numarray *ri;
 	int res;
 
 	tb = (struct ta_buf_numarray *)ta_buf;
 	cfg = (struct numarray_cfg *)ta_state;
 
 	ri = numarray_find(ti, &tb->na.number);
 	if (ri == NULL)
 		return (ENOENT);
 
 	tei->value = ri->value;
 	
 	res = bdel(&tb->na.number, cfg->main_ptr, cfg->used,
 	    sizeof(struct numarray), compare_numarray);
 
 	KASSERT(res == 1, ("number %u does not exist", tb->na.number));
 	cfg->used--;
 	ti->data = cfg->used;
 	*pnum = 1;
 
 	return (0);
 }
 
 static void
 ta_flush_numarray_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 
 	/* We don't have any state, do nothing */
 }
 
 
 /*
  * Table growing callbacks.
  */
 
 static int
 ta_need_modify_numarray(void *ta_state, struct table_info *ti, uint32_t count,
     uint64_t *pflags)
 {
 	struct numarray_cfg *cfg;
 	size_t size;
 
 	cfg = (struct numarray_cfg *)ta_state;
 
 	size = cfg->size;
 	while (size < cfg->used + count)
 		size *= 2;
 
 	if (size != cfg->size) {
 		*pflags = size;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Allocate new, larger runtime array.
  */
 static int
 ta_prepare_mod_numarray(void *ta_buf, uint64_t *pflags)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 
 	memset(mi, 0, sizeof(struct mod_item));
 	mi->size = *pflags;
 	mi->main_ptr = malloc(sizeof(struct numarray) * mi->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 
 	return (0);
 }
 
 /*
  * Copy data from old runtime array to new one.
  */
 static int
 ta_fill_mod_numarray(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t *pflags)
 {
 	struct mod_item *mi;
 	struct numarray_cfg *cfg;
 
 	mi = (struct mod_item *)ta_buf;
 	cfg = (struct numarray_cfg *)ta_state;
 
 	/* Check if we still need to grow array */
 	if (cfg->size >= mi->size) {
 		*pflags = 0;
 		return (0);
 	}
 
 	memcpy(mi->main_ptr, cfg->main_ptr, cfg->used * sizeof(struct numarray));
 
 	return (0);
 }
 
 /*
  * Switch old & new arrays.
  */
 static void
 ta_modify_numarray(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags)
 {
 	struct mod_item *mi;
 	struct numarray_cfg *cfg;
 	void *old_ptr;
 
 	mi = (struct mod_item *)ta_buf;
 	cfg = (struct numarray_cfg *)ta_state;
 
 	old_ptr = cfg->main_ptr;
 	cfg->main_ptr = mi->main_ptr;
 	cfg->size = mi->size;
 	ti->state = cfg->main_ptr;
 
 	mi->main_ptr = old_ptr;
 }
 
 /*
  * Free unneded array.
  */
 static void
 ta_flush_mod_numarray(void *ta_buf)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 	if (mi->main_ptr != NULL)
 		free(mi->main_ptr, M_IPFW);
 }
 
 static int
 ta_dump_numarray_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct numarray *na;
 
 	na = (struct numarray *)e;
 
 	tent->k.key = na->number;
 	tent->v.kidx = na->value;
 
 	return (0);
 }
 
 static int
 ta_find_numarray_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct numarray_cfg *cfg;
 	struct numarray *ri;
 
 	cfg = (struct numarray_cfg *)ta_state;
 
 	ri = numarray_find(ti, &tent->k.key);
 
 	if (ri != NULL) {
 		ta_dump_numarray_tentry(ta_state, ti, ri, tent);
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_foreach_numarray(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct numarray_cfg *cfg;
 	struct numarray *array;
 	int i;
 
 	cfg = (struct numarray_cfg *)ta_state;
 	array = cfg->main_ptr;
 
 	for (i = 0; i < cfg->used; i++)
 		f(&array[i], arg);
 }
 
 struct table_algo number_array = {
 	.name		= "number:array",
 	.type		= IPFW_TABLE_NUMBER,
 	.ta_buf_size	= sizeof(struct ta_buf_numarray),
 	.init		= ta_init_numarray,
 	.destroy	= ta_destroy_numarray,
 	.prepare_add	= ta_prepare_add_numarray,
 	.prepare_del	= ta_prepare_add_numarray,
 	.add		= ta_add_numarray,
 	.del		= ta_del_numarray,
 	.flush_entry	= ta_flush_numarray_entry,
 	.foreach	= ta_foreach_numarray,
 	.dump_tentry	= ta_dump_numarray_tentry,
 	.find_tentry	= ta_find_numarray_tentry,
 	.dump_tinfo	= ta_dump_numarray_tinfo,
 	.need_modify	= ta_need_modify_numarray,
 	.prepare_mod	= ta_prepare_mod_numarray,
 	.fill_mod	= ta_fill_mod_numarray,
 	.modify		= ta_modify_numarray,
 	.flush_mod	= ta_flush_mod_numarray,
 };
 
 /*
  * flow:hash cmds
  *
  *
  * ti->data:
  * [inv.mask4][inv.mask6][log2hsize4][log2hsize6]
  * [        8][        8[          8][         8]
  *
  * inv.mask4: 32 - mask
  * inv.mask6:
  * 1) _slow lookup: mask
  * 2) _aligned: (128 - mask) / 8
  * 3) _64: 8
  *
  *
  * pflags:
  * [hsize4][hsize6]
  * [    16][    16]
  */
 
 struct fhashentry;
 
 SLIST_HEAD(fhashbhead, fhashentry);
 
 struct fhashentry {
 	SLIST_ENTRY(fhashentry)	next;
 	uint8_t		af;
 	uint8_t		proto;
 	uint16_t	spare0;
 	uint16_t	dport;
 	uint16_t	sport;
 	uint32_t	value;
 	uint32_t	spare1;
 };
 
 struct fhashentry4 {
 	struct fhashentry	e;
 	struct in_addr		dip;
 	struct in_addr		sip;
 };
 
 struct fhashentry6 {
 	struct fhashentry	e;
 	struct in6_addr		dip6;
 	struct in6_addr		sip6;
 };
 
 struct fhash_cfg {
 	struct fhashbhead	*head;
 	size_t			size;
 	size_t			items;
 	struct fhashentry4	fe4;
 	struct fhashentry6	fe6;
 };
 
 struct ta_buf_fhash {
 	void	*ent_ptr;
 	struct fhashentry6 fe6;
 };
 
 static __inline int cmp_flow_ent(struct fhashentry *a,
     struct fhashentry *b, size_t sz);
 static __inline uint32_t hash_flow4(struct fhashentry4 *f, int hsize);
 static __inline uint32_t hash_flow6(struct fhashentry6 *f, int hsize);
 static uint32_t hash_flow_ent(struct fhashentry *ent, uint32_t size);
 static int ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int ta_init_fhash(struct ip_fw_chain *ch, void **ta_state,
 struct table_info *ti, char *data, uint8_t tflags);
 static void ta_destroy_fhash(void *ta_state, struct table_info *ti);
 static void ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int ta_dump_fhash_tentry(void *ta_state, struct table_info *ti,
     void *e, ipfw_obj_tentry *tent);
 static int tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent);
 static int ta_find_fhash_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void ta_foreach_fhash(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 static int ta_prepare_add_fhash(struct ip_fw_chain *ch,
     struct tentry_info *tei, void *ta_buf);
 static int ta_add_fhash(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static int ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_del_fhash(void *ta_state, struct table_info *ti,
     struct tentry_info *tei, void *ta_buf, uint32_t *pnum);
 static void ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf);
 static int ta_need_modify_fhash(void *ta_state, struct table_info *ti,
     uint32_t count, uint64_t *pflags);
 static int ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags);
 static int ta_fill_mod_fhash(void *ta_state, struct table_info *ti,
     void *ta_buf, uint64_t *pflags);
 static void ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags);
 static void ta_flush_mod_fhash(void *ta_buf);
 
 static __inline int
 cmp_flow_ent(struct fhashentry *a, struct fhashentry *b, size_t sz)
 {
 	uint64_t *ka, *kb;
 
 	ka = (uint64_t *)(&a->next + 1);
 	kb = (uint64_t *)(&b->next + 1);
 
 	if (*ka == *kb && (memcmp(a + 1, b + 1, sz) == 0))
 		return (1);
 
 	return (0);
 }
 
 static __inline uint32_t
 hash_flow4(struct fhashentry4 *f, int hsize)
 {
 	uint32_t i;
 
 	i = (f->dip.s_addr) ^ (f->sip.s_addr) ^ (f->e.dport) ^ (f->e.sport);
 
 	return (i % (hsize - 1));
 }
 
 static __inline uint32_t
 hash_flow6(struct fhashentry6 *f, int hsize)
 {
 	uint32_t i;
 
 	i = (f->dip6.__u6_addr.__u6_addr32[2]) ^
 	    (f->dip6.__u6_addr.__u6_addr32[3]) ^
 	    (f->sip6.__u6_addr.__u6_addr32[2]) ^
 	    (f->sip6.__u6_addr.__u6_addr32[3]) ^
 	    (f->e.dport) ^ (f->e.sport);
 
 	return (i % (hsize - 1));
 }
 
 static uint32_t
 hash_flow_ent(struct fhashentry *ent, uint32_t size)
 {
 	uint32_t hash;
 
 	if (ent->af == AF_INET) {
 		hash = hash_flow4((struct fhashentry4 *)ent, size);
 	} else {
 		hash = hash_flow6((struct fhashentry6 *)ent, size);
 	}
 
 	return (hash);
 }
 
 static int
 ta_lookup_fhash(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
 	struct fhashbhead *head;
 	struct fhashentry *ent;
 	struct fhashentry4 *m4;
 	struct ipfw_flow_id *id;
 	uint16_t hash, hsize;
 
 	id = (struct ipfw_flow_id *)key;
 	head = (struct fhashbhead *)ti->state;
 	hsize = ti->data;
 	m4 = (struct fhashentry4 *)ti->xstate;
 
 	if (id->addr_type == 4) {
 		struct fhashentry4 f;
 
 		/* Copy hash mask */
 		f = *m4;
 
 		f.dip.s_addr &= id->dst_ip;
 		f.sip.s_addr &= id->src_ip;
 		f.e.dport &= id->dst_port;
 		f.e.sport &= id->src_port;
 		f.e.proto &= id->proto;
 		hash = hash_flow4(&f, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (cmp_flow_ent(ent, &f.e, 2 * 4) != 0) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 	} else if (id->addr_type == 6) {
 		struct fhashentry6 f;
 		uint64_t *fp, *idp;
 
 		/* Copy hash mask */
 		f = *((struct fhashentry6 *)(m4 + 1));
 
 		/* Handle lack of __u6_addr.__u6_addr64 */
 		fp = (uint64_t *)&f.dip6;
 		idp = (uint64_t *)&id->dst_ip6;
 		/* src IPv6 is stored after dst IPv6 */
 		*fp++ &= *idp++;
 		*fp++ &= *idp++;
 		*fp++ &= *idp++;
 		*fp &= *idp;
 		f.e.dport &= id->dst_port;
 		f.e.sport &= id->src_port;
 		f.e.proto &= id->proto;
 		hash = hash_flow6(&f, hsize);
 		SLIST_FOREACH(ent, &head[hash], next) {
 			if (cmp_flow_ent(ent, &f.e, 2 * 16) != 0) {
 				*val = ent->value;
 				return (1);
 			}
 		}
 	}
 
 	return (0);
 }
 
 /*
  * New table.
  */
 static int
 ta_init_fhash(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	int i;
 	struct fhash_cfg *cfg;
 	struct fhashentry4 *fe4;
 	struct fhashentry6 *fe6;
 
 	cfg = malloc(sizeof(struct fhash_cfg), M_IPFW, M_WAITOK | M_ZERO);
 
 	cfg->size = 512;
 
 	cfg->head = malloc(sizeof(struct fhashbhead) * cfg->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < cfg->size; i++)
 		SLIST_INIT(&cfg->head[i]);
 
 	/* Fill in fe masks based on @tflags */
 	fe4 = &cfg->fe4;
 	fe6 = &cfg->fe6;
 	if (tflags & IPFW_TFFLAG_SRCIP) {
 		memset(&fe4->sip, 0xFF, sizeof(fe4->sip));
 		memset(&fe6->sip6, 0xFF, sizeof(fe6->sip6));
 	}
 	if (tflags & IPFW_TFFLAG_DSTIP) {
 		memset(&fe4->dip, 0xFF, sizeof(fe4->dip));
 		memset(&fe6->dip6, 0xFF, sizeof(fe6->dip6));
 	}
 	if (tflags & IPFW_TFFLAG_SRCPORT) {
 		memset(&fe4->e.sport, 0xFF, sizeof(fe4->e.sport));
 		memset(&fe6->e.sport, 0xFF, sizeof(fe6->e.sport));
 	}
 	if (tflags & IPFW_TFFLAG_DSTPORT) {
 		memset(&fe4->e.dport, 0xFF, sizeof(fe4->e.dport));
 		memset(&fe6->e.dport, 0xFF, sizeof(fe6->e.dport));
 	}
 	if (tflags & IPFW_TFFLAG_PROTO) {
 		memset(&fe4->e.proto, 0xFF, sizeof(fe4->e.proto));
 		memset(&fe6->e.proto, 0xFF, sizeof(fe6->e.proto));
 	}
 
 	fe4->e.af = AF_INET;
 	fe6->e.af = AF_INET6;
 
 	*ta_state = cfg;
 	ti->state = cfg->head;
 	ti->xstate = &cfg->fe4;
 	ti->data = cfg->size;
 	ti->lookup = ta_lookup_fhash;
 
 	return (0);
 }
 
 static void
 ta_destroy_fhash(void *ta_state, struct table_info *ti)
 {
 	struct fhash_cfg *cfg;
 	struct fhashentry *ent, *ent_next;
 	int i;
 
 	cfg = (struct fhash_cfg *)ta_state;
 
 	for (i = 0; i < cfg->size; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next)
 			free(ent, M_IPFW_TBL);
 
 	free(cfg->head, M_IPFW);
 	free(cfg, M_IPFW);
 }
 
 /*
  * Provide algo-specific table info
  */
 static void
 ta_dump_fhash_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 	struct fhash_cfg *cfg;
 
 	cfg = (struct fhash_cfg *)ta_state;
 
 	tinfo->flags = IPFW_TATFLAGS_AFITEM;
 	tinfo->taclass4 = IPFW_TACLASS_HASH;
 	tinfo->size4 = cfg->size;
 	tinfo->count4 = cfg->items;
 	tinfo->itemsize4 = sizeof(struct fhashentry4);
 	tinfo->itemsize6 = sizeof(struct fhashentry6);
 }
 
 static int
 ta_dump_fhash_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct fhash_cfg *cfg;
 	struct fhashentry *ent;
 	struct fhashentry4 *fe4;
 #ifdef INET6
 	struct fhashentry6 *fe6;
 #endif
 	struct tflow_entry *tfe;
 
 	cfg = (struct fhash_cfg *)ta_state;
 	ent = (struct fhashentry *)e;
 	tfe = &tent->k.flow;
 
 	tfe->af = ent->af;
 	tfe->proto = ent->proto;
 	tfe->dport = htons(ent->dport);
 	tfe->sport = htons(ent->sport);
 	tent->v.kidx = ent->value;
 	tent->subtype = ent->af;
 
 	if (ent->af == AF_INET) {
 		fe4 = (struct fhashentry4 *)ent;
 		tfe->a.a4.sip.s_addr = htonl(fe4->sip.s_addr);
 		tfe->a.a4.dip.s_addr = htonl(fe4->dip.s_addr);
 		tent->masklen = 32;
 #ifdef INET6
 	} else {
 		fe6 = (struct fhashentry6 *)ent;
 		tfe->a.a6.sip6 = fe6->sip6;
 		tfe->a.a6.dip6 = fe6->dip6;
 		tent->masklen = 128;
 #endif
 	}
 
 	return (0);
 }
 
 static int
 tei_to_fhash_ent(struct tentry_info *tei, struct fhashentry *ent)
 {
 #ifdef INET
 	struct fhashentry4 *fe4;
 #endif
 #ifdef INET6
 	struct fhashentry6 *fe6;
 #endif
 	struct tflow_entry *tfe;
 
 	tfe = (struct tflow_entry *)tei->paddr;
 
 	ent->af = tei->subtype;
 	ent->proto = tfe->proto;
 	ent->dport = ntohs(tfe->dport);
 	ent->sport = ntohs(tfe->sport);
 
 	if (tei->subtype == AF_INET) {
 #ifdef INET
 		fe4 = (struct fhashentry4 *)ent;
 		fe4->sip.s_addr = ntohl(tfe->a.a4.sip.s_addr);
 		fe4->dip.s_addr = ntohl(tfe->a.a4.dip.s_addr);
 #endif
 #ifdef INET6
 	} else if (tei->subtype == AF_INET6) {
 		fe6 = (struct fhashentry6 *)ent;
 		fe6->sip6 = tfe->a.a6.sip6;
 		fe6->dip6 = tfe->a.a6.dip6;
 #endif
 	} else {
 		/* Unknown CIDR type */
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 
 static int
 ta_find_fhash_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
 	struct fhash_cfg *cfg;
 	struct fhashbhead *head;
 	struct fhashentry *ent, *tmp;
 	struct fhashentry6 fe6;
 	struct tentry_info tei;
 	int error;
 	uint32_t hash;
 	size_t sz;
 
 	cfg = (struct fhash_cfg *)ta_state;
 
 	ent = &fe6.e;
 
 	memset(&fe6, 0, sizeof(fe6));
 	memset(&tei, 0, sizeof(tei));
 
 	tei.paddr = &tent->k.flow;
 	tei.subtype = tent->subtype;
 
 	if ((error = tei_to_fhash_ent(&tei, ent)) != 0)
 		return (error);
 
 	head = cfg->head;
 	hash = hash_flow_ent(ent, cfg->size);
 
 	if (tei.subtype == AF_INET)
 		sz = 2 * sizeof(struct in_addr);
 	else
 		sz = 2 * sizeof(struct in6_addr);
 
 	/* Check for existence */
 	SLIST_FOREACH(tmp, &head[hash], next) {
 		if (cmp_flow_ent(tmp, ent, sz) != 0) {
 			ta_dump_fhash_tentry(ta_state, ti, tmp, tent);
 			return (0);
 		}
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_foreach_fhash(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct fhash_cfg *cfg;
 	struct fhashentry *ent, *ent_next;
 	int i;
 
 	cfg = (struct fhash_cfg *)ta_state;
 
 	for (i = 0; i < cfg->size; i++)
 		SLIST_FOREACH_SAFE(ent, &cfg->head[i], next, ent_next)
 			f(ent, arg);
 }
 
 static int
 ta_prepare_add_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_fhash *tb;
 	struct fhashentry *ent;
 	size_t sz;
 	int error;
 
 	tb = (struct ta_buf_fhash *)ta_buf;
 
 	if (tei->subtype == AF_INET)
 		sz = sizeof(struct fhashentry4);
 	else if (tei->subtype == AF_INET6)
 		sz = sizeof(struct fhashentry6);
 	else
 		return (EINVAL);
 
 	ent = malloc(sz, M_IPFW_TBL, M_WAITOK | M_ZERO);
 
 	error = tei_to_fhash_ent(tei, ent);
 	if (error != 0) {
 		free(ent, M_IPFW_TBL);
 		return (error);
 	}
 	tb->ent_ptr = ent;
 
 	return (0);
 }
 
 static int
 ta_add_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct fhash_cfg *cfg;
 	struct fhashbhead *head;
 	struct fhashentry *ent, *tmp;
 	struct ta_buf_fhash *tb;
 	int exists;
 	uint32_t hash, value;
 	size_t sz;
 
 	cfg = (struct fhash_cfg *)ta_state;
 	tb = (struct ta_buf_fhash *)ta_buf;
 	ent = (struct fhashentry *)tb->ent_ptr;
 	exists = 0;
 
 	/* Read current value from @tei */
 	ent->value = tei->value;
 
 	head = cfg->head;
 	hash = hash_flow_ent(ent, cfg->size);
 
 	if (tei->subtype == AF_INET)
 		sz = 2 * sizeof(struct in_addr);
 	else
 		sz = 2 * sizeof(struct in6_addr);
 
 	/* Check for existence */
 	SLIST_FOREACH(tmp, &head[hash], next) {
 		if (cmp_flow_ent(tmp, ent, sz) != 0) {
 			exists = 1;
 			break;
 		}
 	}
 
 	if (exists == 1) {
 		if ((tei->flags & TEI_FLAGS_UPDATE) == 0)
 			return (EEXIST);
 		/* Record already exists. Update value if we're asked to */
 		/* Exchange values between tmp and @tei */
 		value = tmp->value;
 		tmp->value = tei->value;
 		tei->value = value;
 		/* Indicate that update has happened instead of addition */
 		tei->flags |= TEI_FLAGS_UPDATED;
 		*pnum = 0;
 	} else {
 		if ((tei->flags & TEI_FLAGS_DONTADD) != 0)
 			return (EFBIG);
 
 		SLIST_INSERT_HEAD(&head[hash], ent, next);
 		tb->ent_ptr = NULL;
 		*pnum = 1;
 
 		/* Update counters and check if we need to grow hash */
 		cfg->items++;
 	}
 
 	return (0);
 }
 
 static int
 ta_prepare_del_fhash(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_fhash *tb;
 
 	tb = (struct ta_buf_fhash *)ta_buf;
 
 	return (tei_to_fhash_ent(tei, &tb->fe6.e));
 }
 
 static int
 ta_del_fhash(void *ta_state, struct table_info *ti, struct tentry_info *tei,
     void *ta_buf, uint32_t *pnum)
 {
 	struct fhash_cfg *cfg;
 	struct fhashbhead *head;
 	struct fhashentry *ent, *tmp;
 	struct ta_buf_fhash *tb;
 	uint32_t hash;
 	size_t sz;
 
 	cfg = (struct fhash_cfg *)ta_state;
 	tb = (struct ta_buf_fhash *)ta_buf;
 	ent = &tb->fe6.e;
 
 	head = cfg->head;
 	hash = hash_flow_ent(ent, cfg->size);
 
 	if (tei->subtype == AF_INET)
 		sz = 2 * sizeof(struct in_addr);
 	else
 		sz = 2 * sizeof(struct in6_addr);
 
 	/* Check for existence */
 	SLIST_FOREACH(tmp, &head[hash], next) {
 		if (cmp_flow_ent(tmp, ent, sz) == 0)
 			continue;
 
 		SLIST_REMOVE(&head[hash], tmp, fhashentry, next);
 		tei->value = tmp->value;
 		*pnum = 1;
 		cfg->items--;
 		tb->ent_ptr = tmp;
 		return (0);
 	}
 
 	return (ENOENT);
 }
 
 static void
 ta_flush_fhash_entry(struct ip_fw_chain *ch, struct tentry_info *tei,
     void *ta_buf)
 {
 	struct ta_buf_fhash *tb;
 
 	tb = (struct ta_buf_fhash *)ta_buf;
 
 	if (tb->ent_ptr != NULL)
 		free(tb->ent_ptr, M_IPFW_TBL);
 }
 
 /*
  * Hash growing callbacks.
  */
 
 static int
 ta_need_modify_fhash(void *ta_state, struct table_info *ti, uint32_t count,
     uint64_t *pflags)
 {
 	struct fhash_cfg *cfg;
 
 	cfg = (struct fhash_cfg *)ta_state;
 
 	if (cfg->items > cfg->size && cfg->size < 65536) {
 		*pflags = cfg->size * 2;
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Allocate new, larger fhash.
  */
 static int
 ta_prepare_mod_fhash(void *ta_buf, uint64_t *pflags)
 {
 	struct mod_item *mi;
 	struct fhashbhead *head;
 	int i;
 
 	mi = (struct mod_item *)ta_buf;
 
 	memset(mi, 0, sizeof(struct mod_item));
 	mi->size = *pflags;
 	head = malloc(sizeof(struct fhashbhead) * mi->size, M_IPFW,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < mi->size; i++)
 		SLIST_INIT(&head[i]);
 
 	mi->main_ptr = head;
 
 	return (0);
 }
 
 /*
  * Copy data from old runtime array to new one.
  */
 static int
 ta_fill_mod_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t *pflags)
 {
 
 	/* In is not possible to do rehash if we're not holidng WLOCK. */
 	return (0);
 }
 
 /*
  * Switch old & new arrays.
  */
 static void
 ta_modify_fhash(void *ta_state, struct table_info *ti, void *ta_buf,
     uint64_t pflags)
 {
 	struct mod_item *mi;
 	struct fhash_cfg *cfg;
 	struct fhashbhead *old_head, *new_head;
 	struct fhashentry *ent, *ent_next;
 	int i;
 	uint32_t nhash;
 	size_t old_size;
 
 	mi = (struct mod_item *)ta_buf;
 	cfg = (struct fhash_cfg *)ta_state;
 
 	old_size = cfg->size;
 	old_head = ti->state;
 
 	new_head = (struct fhashbhead *)mi->main_ptr;
 	for (i = 0; i < old_size; i++) {
 		SLIST_FOREACH_SAFE(ent, &old_head[i], next, ent_next) {
 			nhash = hash_flow_ent(ent, mi->size);
 			SLIST_INSERT_HEAD(&new_head[nhash], ent, next);
 		}
 	}
 
 	ti->state = new_head;
 	ti->data = mi->size;
 	cfg->head = new_head;
 	cfg->size = mi->size;
 
 	mi->main_ptr = old_head;
 }
 
 /*
  * Free unneded array.
  */
 static void
 ta_flush_mod_fhash(void *ta_buf)
 {
 	struct mod_item *mi;
 
 	mi = (struct mod_item *)ta_buf;
 	if (mi->main_ptr != NULL)
 		free(mi->main_ptr, M_IPFW);
 }
 
 struct table_algo flow_hash = {
 	.name		= "flow:hash",
 	.type		= IPFW_TABLE_FLOW,
 	.flags		= TA_FLAG_DEFAULT,
 	.ta_buf_size	= sizeof(struct ta_buf_fhash),
 	.init		= ta_init_fhash,
 	.destroy	= ta_destroy_fhash,
 	.prepare_add	= ta_prepare_add_fhash,
 	.prepare_del	= ta_prepare_del_fhash,
 	.add		= ta_add_fhash,
 	.del		= ta_del_fhash,
 	.flush_entry	= ta_flush_fhash_entry,
 	.foreach	= ta_foreach_fhash,
 	.dump_tentry	= ta_dump_fhash_tentry,
 	.find_tentry	= ta_find_fhash_tentry,
 	.dump_tinfo	= ta_dump_fhash_tinfo,
 	.need_modify	= ta_need_modify_fhash,
 	.prepare_mod	= ta_prepare_mod_fhash,
 	.fill_mod	= ta_fill_mod_fhash,
 	.modify		= ta_modify_fhash,
 	.flush_mod	= ta_flush_mod_fhash,
 };
 
 /*
  * Kernel fibs bindings.
  *
  * Implementation:
  *
  * Runtime part:
  * - fully relies on route API
  * - fib number is stored in ti->data
  *
  */
 
-static struct rtentry *lookup_kfib(void *key, int keylen, int fib);
 static int ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val);
 static int kfib_parse_opts(int *pfib, char *data);
 static void ta_print_kfib_config(void *ta_state, struct table_info *ti,
     char *buf, size_t bufsize);
 static int ta_init_kfib(struct ip_fw_chain *ch, void **ta_state,
     struct table_info *ti, char *data, uint8_t tflags);
 static void ta_destroy_kfib(void *ta_state, struct table_info *ti);
 static void ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti,
     ipfw_ta_tinfo *tinfo);
 static int contigmask(uint8_t *p, int len);
 static int ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent);
+static int ta_dump_kfib_tentry_int(struct sockaddr *paddr,
+    struct sockaddr *pmask, ipfw_obj_tentry *tent);
 static int ta_find_kfib_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent);
 static void ta_foreach_kfib(void *ta_state, struct table_info *ti,
     ta_foreach_f *f, void *arg);
 
-static struct rtentry *
-lookup_kfib(void *key, int keylen, int fib)
-{
-	struct sockaddr *s;
 
-	if (keylen == 4) {
-		struct sockaddr_in sin;
-		bzero(&sin, sizeof(sin));
-		sin.sin_len = sizeof(struct sockaddr_in);
-		sin.sin_family = AF_INET;
-		sin.sin_addr.s_addr = *(in_addr_t *)key;
-		s = (struct sockaddr *)&sin;
-	} else {
-		struct sockaddr_in6 sin6;
-		bzero(&sin6, sizeof(sin6));
-		sin6.sin6_len = sizeof(struct sockaddr_in6);
-		sin6.sin6_family = AF_INET6;
-		sin6.sin6_addr = *(struct in6_addr *)key;
-		s = (struct sockaddr *)&sin6;
-	}
-
-	return (rtalloc1_fib(s, 0, 0, fib));
-}
-
 static int
 ta_lookup_kfib(struct table_info *ti, void *key, uint32_t keylen,
     uint32_t *val)
 {
-	struct rtentry *rte;
+#ifdef INET
+	struct nhop4_basic nh4;
+	struct in_addr in;
+#endif
+#ifdef INET6
+	struct nhop6_basic nh6;
+#endif
+	int error;
 
-	if ((rte = lookup_kfib(key, keylen, ti->data)) == NULL)
+	error = ENOENT;
+#ifdef INET
+	if (keylen == 4) {
+		in.s_addr = *(in_addr_t *)key;
+		error = fib4_lookup_nh_basic(ti->data,
+		    in, 0, 0, &nh4);
+	}
+#endif
+#ifdef INET6
+	if (keylen == 6)
+		error = fib6_lookup_nh_basic(ti->data,
+		    (struct in6_addr *)key, 0, 0, 0, &nh6);
+#endif
+
+	if (error != 0)
 		return (0);
 
 	*val = 0;
-	RTFREE_LOCKED(rte);
 
 	return (1);
 }
 
 /* Parse 'fib=%d' */
 static int
 kfib_parse_opts(int *pfib, char *data)
 {
 	char *pdel, *pend, *s;
 	int fibnum;
 
 	if (data == NULL)
 		return (0);
 	if ((pdel = strchr(data, ' ')) == NULL)
 		return (0);
 	while (*pdel == ' ')
 		pdel++;
 	if (strncmp(pdel, "fib=", 4) != 0)
 		return (EINVAL);
 	if ((s = strchr(pdel, ' ')) != NULL)
 		*s++ = '\0';
 
 	pdel += 4;
 	/* Need \d+ */
 	fibnum = strtol(pdel, &pend, 10);
 	if (*pend != '\0')
 		return (EINVAL);
 
 	*pfib = fibnum;
 
 	return (0);
 }
 
 static void
 ta_print_kfib_config(void *ta_state, struct table_info *ti, char *buf,
     size_t bufsize)
 {
 
 	if (ti->data != 0)
 		snprintf(buf, bufsize, "%s fib=%lu", "addr:kfib", ti->data);
 	else
 		snprintf(buf, bufsize, "%s", "addr:kfib");
 }
 
 static int
 ta_init_kfib(struct ip_fw_chain *ch, void **ta_state, struct table_info *ti,
     char *data, uint8_t tflags)
 {
 	int error, fibnum;
 
 	fibnum = 0;
 	if ((error = kfib_parse_opts(&fibnum, data)) != 0)
 		return (error);
 
 	if (fibnum >= rt_numfibs)
 		return (E2BIG);
 
 	ti->data = fibnum;
 	ti->lookup = ta_lookup_kfib;
 
 	return (0);
 }
 
 /*
  * Destroys table @ti
  */
 static void
 ta_destroy_kfib(void *ta_state, struct table_info *ti)
 {
 
 }
 
 /*
  * Provide algo-specific table info
  */
 static void
 ta_dump_kfib_tinfo(void *ta_state, struct table_info *ti, ipfw_ta_tinfo *tinfo)
 {
 
 	tinfo->flags = IPFW_TATFLAGS_AFDATA;
 	tinfo->taclass4 = IPFW_TACLASS_RADIX;
 	tinfo->count4 = 0;
 	tinfo->itemsize4 = sizeof(struct rtentry);
 	tinfo->taclass6 = IPFW_TACLASS_RADIX;
 	tinfo->count6 = 0;
 	tinfo->itemsize6 = sizeof(struct rtentry);
 }
 
 static int
 contigmask(uint8_t *p, int len)
 {
 	int i, n;
 
 	for (i = 0; i < len ; i++)
 		if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */
 			break;
 	for (n= i + 1; n < len; n++)
 		if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0)
 			return (-1); /* mask not contiguous */
 	return (i);
 }
 
 
 static int
 ta_dump_kfib_tentry(void *ta_state, struct table_info *ti, void *e,
     ipfw_obj_tentry *tent)
 {
 	struct rtentry *rte;
+
+	rte = (struct rtentry *)e;
+
+	return ta_dump_kfib_tentry_int(rt_key(rte), rt_mask(rte), tent);
+}
+
+static int
+ta_dump_kfib_tentry_int(struct sockaddr *paddr, struct sockaddr *pmask,
+    ipfw_obj_tentry *tent)
+{
 #ifdef INET
 	struct sockaddr_in *addr, *mask;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *addr6, *mask6;
 #endif
 	int len;
 
-	rte = (struct rtentry *)e;
-	addr = (struct sockaddr_in *)rt_key(rte);
-	mask = (struct sockaddr_in *)rt_mask(rte);
 	len = 0;
 
 	/* Guess IPv4/IPv6 radix by sockaddr family */
 #ifdef INET
-	if (addr->sin_family == AF_INET) {
+	if (paddr->sa_family == AF_INET) {
+		addr = (struct sockaddr_in *)paddr;
+		mask = (struct sockaddr_in *)pmask;
 		tent->k.addr.s_addr = addr->sin_addr.s_addr;
 		len = 32;
 		if (mask != NULL)
 			len = contigmask((uint8_t *)&mask->sin_addr, 32);
 		if (len == -1)
 			len = 0;
 		tent->masklen = len;
 		tent->subtype = AF_INET;
 		tent->v.kidx = 0; /* Do we need to put GW here? */
 	}
 #endif
 #ifdef INET6
-	if (addr->sin_family == AF_INET6) {
-		addr6 = (struct sockaddr_in6 *)addr;
-		mask6 = (struct sockaddr_in6 *)mask;
+	if (paddr->sa_family == AF_INET6) {
+		addr6 = (struct sockaddr_in6 *)paddr;
+		mask6 = (struct sockaddr_in6 *)pmask;
 		memcpy(&tent->k, &addr6->sin6_addr, sizeof(struct in6_addr));
 		len = 128;
 		if (mask6 != NULL)
 			len = contigmask((uint8_t *)&mask6->sin6_addr, 128);
 		if (len == -1)
 			len = 0;
 		tent->masklen = len;
 		tent->subtype = AF_INET6;
 		tent->v.kidx = 0;
 	}
 #endif
 
 	return (0);
 }
 
 static int
 ta_find_kfib_tentry(void *ta_state, struct table_info *ti,
     ipfw_obj_tentry *tent)
 {
-	struct rtentry *rte;
-	void *key;
-	int keylen;
+	struct rt_addrinfo info;
+	struct sockaddr_in6 key6, dst6, mask6;
+	struct sockaddr *dst, *key, *mask;
 
+	/* Prepare sockaddr for prefix/mask and info */
+	bzero(&dst6, sizeof(dst6));
+	dst6.sin6_len = sizeof(dst6);
+	dst = (struct sockaddr *)&dst6;
+	bzero(&mask6, sizeof(mask6));
+	mask6.sin6_len = sizeof(mask6);
+	mask = (struct sockaddr *)&mask6;
+
+	bzero(&info, sizeof(info));
+	info.rti_info[RTAX_DST] = dst;
+	info.rti_info[RTAX_NETMASK] = mask;
+
+	/* Prepare the lookup key */
+	bzero(&key6, sizeof(key6));
+	key6.sin6_family = tent->subtype;
+	key = (struct sockaddr *)&key6;
+
 	if (tent->subtype == AF_INET) {
-		key = &tent->k.addr;
-		keylen = sizeof(struct in_addr);
+		((struct sockaddr_in *)&key6)->sin_addr = tent->k.addr;
+		key6.sin6_len = sizeof(struct sockaddr_in);
 	} else {
-		key = &tent->k.addr6;
-		keylen = sizeof(struct in6_addr);
+		key6.sin6_addr = tent->k.addr6;
+		key6.sin6_len = sizeof(struct sockaddr_in6);
 	}
 
-	if ((rte = lookup_kfib(key, keylen, ti->data)) == NULL)
-		return (0);
+	if (rib_lookup_info(ti->data, key, 0, 0, &info) != 0)
+		return (ENOENT);
+	if ((info.rti_addrs & RTA_NETMASK) == 0)
+		mask = NULL;
 
-	if (rte != NULL) {
-		ta_dump_kfib_tentry(ta_state, ti, rte, tent);
-		RTFREE_LOCKED(rte);
-		return (0);
-	}
+	ta_dump_kfib_tentry_int(dst, mask, tent);
 
-	return (ENOENT);
+	return (0);
 }
 
 static void
 ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f,
     void *arg)
 {
 	struct radix_node_head *rnh;
 	int error;
 
 	rnh = rt_tables_get_rnh(ti->data, AF_INET);
 	if (rnh != NULL) {
 		RADIX_NODE_HEAD_RLOCK(rnh); 
 		error = rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg);
 		RADIX_NODE_HEAD_RUNLOCK(rnh);
 	}
 
 	rnh = rt_tables_get_rnh(ti->data, AF_INET6);
 	if (rnh != NULL) {
 		RADIX_NODE_HEAD_RLOCK(rnh); 
 		error = rnh->rnh_walktree(rnh, (walktree_f_t *)f, arg);
 		RADIX_NODE_HEAD_RUNLOCK(rnh);
 	}
 }
 
 struct table_algo addr_kfib = {
 	.name		= "addr:kfib",
 	.type		= IPFW_TABLE_ADDR,
 	.flags		= TA_FLAG_READONLY,
 	.ta_buf_size	= 0,
 	.init		= ta_init_kfib,
 	.destroy	= ta_destroy_kfib,
 	.foreach	= ta_foreach_kfib,
 	.dump_tentry	= ta_dump_kfib_tentry,
 	.find_tentry	= ta_find_kfib_tentry,
 	.dump_tinfo	= ta_dump_kfib_tinfo,
 	.print_config	= ta_print_kfib_config,
 };
 
 void
 ipfw_table_algo_init(struct ip_fw_chain *ch)
 {
 	size_t sz;
 
 	/*
 	 * Register all algorithms presented here.
 	 */
 	sz = sizeof(struct table_algo);
 	ipfw_add_table_algo(ch, &addr_radix, sz, &addr_radix.idx);
 	ipfw_add_table_algo(ch, &addr_hash, sz, &addr_hash.idx);
 	ipfw_add_table_algo(ch, &iface_idx, sz, &iface_idx.idx);
 	ipfw_add_table_algo(ch, &number_array, sz, &number_array.idx);
 	ipfw_add_table_algo(ch, &flow_hash, sz, &flow_hash.idx);
 	ipfw_add_table_algo(ch, &addr_kfib, sz, &addr_kfib.idx);
 }
 
 void
 ipfw_table_algo_destroy(struct ip_fw_chain *ch)
 {
 
 	ipfw_del_table_algo(ch, addr_radix.idx);
 	ipfw_del_table_algo(ch, addr_hash.idx);
 	ipfw_del_table_algo(ch, iface_idx.idx);
 	ipfw_del_table_algo(ch, number_array.idx);
 	ipfw_del_table_algo(ch, flow_hash.idx);
 	ipfw_del_table_algo(ch, addr_kfib.idx);
 }
 
 
Index: projects/clang380-import/sys/netpfil/pf/pf_if.c
===================================================================
--- projects/clang380-import/sys/netpfil/pf/pf_if.c	(revision 293686)
+++ projects/clang380-import/sys/netpfil/pf/pf_if.c	(revision 293687)
@@ -1,862 +1,863 @@
 /*-
  * Copyright (c) 2001 Daniel Hartmeier
  * Copyright (c) 2003 Cedric Berger
  * Copyright (c) 2005 Henning Brauer <henning@openbsd.org>
  * Copyright (c) 2005 Ryan McBride <mcbride@openbsd.org>
  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *    - Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *    - Redistributions in binary form must reproduce the above
  *      copyright notice, this list of conditions and the following
  *      disclaimer in the documentation and/or other materials provided
  *      with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  *	$OpenBSD: pf_if.c,v 1.54 2008/06/14 16:55:28 mk Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 #include <net/pfvar.h>
 #include <net/route.h>
 
 VNET_DEFINE(struct pfi_kif *,	 pfi_all);
 static VNET_DEFINE(long, pfi_update);
 #define	V_pfi_update	VNET(pfi_update)
 #define PFI_BUFFER_MAX	0x10000
 
 static VNET_DEFINE(struct pfr_addr *, pfi_buffer);
 static VNET_DEFINE(int, pfi_buffer_cnt);
 static VNET_DEFINE(int,	pfi_buffer_max);
 #define	V_pfi_buffer		 VNET(pfi_buffer)
 #define	V_pfi_buffer_cnt	 VNET(pfi_buffer_cnt)
 #define	V_pfi_buffer_max	 VNET(pfi_buffer_max)
 
 eventhandler_tag	 pfi_attach_cookie;
 eventhandler_tag	 pfi_detach_cookie;
 eventhandler_tag	 pfi_attach_group_cookie;
 eventhandler_tag	 pfi_change_group_cookie;
 eventhandler_tag	 pfi_detach_group_cookie;
 eventhandler_tag	 pfi_ifaddr_event_cookie;
 
 static void	 pfi_attach_ifnet(struct ifnet *);
 static void	 pfi_attach_ifgroup(struct ifg_group *);
 
 static void	 pfi_kif_update(struct pfi_kif *);
 static void	 pfi_dynaddr_update(struct pfi_dynaddr *dyn);
 static void	 pfi_table_update(struct pfr_ktable *, struct pfi_kif *, int,
 		    int);
 static void	 pfi_instance_add(struct ifnet *, int, int);
 static void	 pfi_address_add(struct sockaddr *, int, int);
 static int	 pfi_if_compare(struct pfi_kif *, struct pfi_kif *);
 static int	 pfi_skip_if(const char *, struct pfi_kif *);
 static int	 pfi_unmask(void *);
 static void	 pfi_attach_ifnet_event(void * __unused, struct ifnet *);
 static void	 pfi_detach_ifnet_event(void * __unused, struct ifnet *);
 static void	 pfi_attach_group_event(void *, struct ifg_group *);
 static void	 pfi_change_group_event(void *, char *);
 static void	 pfi_detach_group_event(void *, struct ifg_group *);
 static void	 pfi_ifaddr_event(void * __unused, struct ifnet *);
 
 RB_HEAD(pfi_ifhead, pfi_kif);
 static RB_PROTOTYPE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
 static RB_GENERATE(pfi_ifhead, pfi_kif, pfik_tree, pfi_if_compare);
 static VNET_DEFINE(struct pfi_ifhead, pfi_ifs);
 #define	V_pfi_ifs	VNET(pfi_ifs)
 
 #define	PFI_BUFFER_MAX		0x10000
 MALLOC_DEFINE(PFI_MTYPE, "pf_ifnet", "pf(4) interface database");
 
 LIST_HEAD(pfi_list, pfi_kif);
 static VNET_DEFINE(struct pfi_list, pfi_unlinked_kifs);
 #define	V_pfi_unlinked_kifs	VNET(pfi_unlinked_kifs)
 static struct mtx pfi_unlnkdkifs_mtx;
 MTX_SYSINIT(pfi_unlnkdkifs_mtx, &pfi_unlnkdkifs_mtx, "pf unlinked interfaces",
     MTX_DEF);
 
 void
 pfi_initialize(void)
 {
 	struct ifg_group *ifg;
 	struct ifnet *ifp;
 	struct pfi_kif *kif;
 
 	V_pfi_buffer_max = 64;
 	V_pfi_buffer = malloc(V_pfi_buffer_max * sizeof(*V_pfi_buffer),
 	    PFI_MTYPE, M_WAITOK);
 
 	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
 	PF_RULES_WLOCK();
 	V_pfi_all = pfi_kif_attach(kif, IFG_ALL);
 	PF_RULES_WUNLOCK();
 
 	IFNET_RLOCK();
 	TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
 		pfi_attach_ifgroup(ifg);
 	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
 		pfi_attach_ifnet(ifp);
 	IFNET_RUNLOCK();
 
 	pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_arrival_event,
 	    pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
 	pfi_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event,
 	    pfi_detach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
 	pfi_attach_group_cookie = EVENTHANDLER_REGISTER(group_attach_event,
 	    pfi_attach_group_event, curvnet, EVENTHANDLER_PRI_ANY);
 	pfi_change_group_cookie = EVENTHANDLER_REGISTER(group_change_event,
 	    pfi_change_group_event, curvnet, EVENTHANDLER_PRI_ANY);
 	pfi_detach_group_cookie = EVENTHANDLER_REGISTER(group_detach_event,
 	    pfi_detach_group_event, curvnet, EVENTHANDLER_PRI_ANY);
 	pfi_ifaddr_event_cookie = EVENTHANDLER_REGISTER(ifaddr_event,
 	    pfi_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY);
 }
 
 void
 pfi_cleanup(void)
 {
 	struct pfi_kif *p;
 
 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie);
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie);
 	EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie);
 	EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie);
 	EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie);
 	EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie);
 
 	V_pfi_all = NULL;
 	while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) {
 		RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p);
 		free(p, PFI_MTYPE);
 	}
 
 	while ((p = LIST_FIRST(&V_pfi_unlinked_kifs))) {
 		LIST_REMOVE(p, pfik_list);
 		free(p, PFI_MTYPE);
 	}
 
 	free(V_pfi_buffer, PFI_MTYPE);
 }
 
 struct pfi_kif *
 pfi_kif_find(const char *kif_name)
 {
 	struct pfi_kif_cmp s;
 
 	PF_RULES_ASSERT();
 
 	bzero(&s, sizeof(s));
 	strlcpy(s.pfik_name, kif_name, sizeof(s.pfik_name));
 
 	return (RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&s));
 }
 
 struct pfi_kif *
 pfi_kif_attach(struct pfi_kif *kif, const char *kif_name)
 {
 	struct pfi_kif *kif1;
 
 	PF_RULES_WASSERT();
 	KASSERT(kif != NULL, ("%s: null kif", __func__));
 
 	kif1 = pfi_kif_find(kif_name);
 	if (kif1 != NULL) {
 		free(kif, PFI_MTYPE);
 		return (kif1);
 	}
 
 	bzero(kif, sizeof(*kif));
 	strlcpy(kif->pfik_name, kif_name, sizeof(kif->pfik_name));
 	/*
 	 * It seems that the value of time_second is in unintialzied state
 	 * when pf sets interface statistics clear time in boot phase if pf
 	 * was statically linked to kernel. Instead of setting the bogus
 	 * time value have pfi_get_ifaces handle this case. In
 	 * pfi_get_ifaces it uses time_second if it sees the time is 0.
 	 */
 	kif->pfik_tzero = time_second > 1 ? time_second : 0;
 	TAILQ_INIT(&kif->pfik_dynaddrs);
 
 	RB_INSERT(pfi_ifhead, &V_pfi_ifs, kif);
 
 	return (kif);
 }
 
 void
 pfi_kif_ref(struct pfi_kif *kif)
 {
 
 	PF_RULES_WASSERT();
 	kif->pfik_rulerefs++;
 }
 
 void
 pfi_kif_unref(struct pfi_kif *kif)
 {
 
 	PF_RULES_WASSERT();
 	KASSERT(kif->pfik_rulerefs > 0, ("%s: %p has zero refs", __func__, kif));
 
 	kif->pfik_rulerefs--;
 
 	if (kif->pfik_rulerefs > 0)
 		return;
 
 	/* kif referencing an existing ifnet or group should exist. */
 	if (kif->pfik_ifp != NULL || kif->pfik_group != NULL || kif == V_pfi_all)
 		return;
 
 	RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif);
 
 	kif->pfik_flags |= PFI_IFLAG_REFS;
 
 	mtx_lock(&pfi_unlnkdkifs_mtx);
 	LIST_INSERT_HEAD(&V_pfi_unlinked_kifs, kif, pfik_list);
 	mtx_unlock(&pfi_unlnkdkifs_mtx);
 }
 
 void
 pfi_kif_purge(void)
 {
 	struct pfi_kif *kif, *kif1;
 
 	/*
 	 * Do naive mark-and-sweep garbage collecting of old kifs.
 	 * Reference flag is raised by pf_purge_expired_states().
 	 */
 	mtx_lock(&pfi_unlnkdkifs_mtx);
 	LIST_FOREACH_SAFE(kif, &V_pfi_unlinked_kifs, pfik_list, kif1) {
 		if (!(kif->pfik_flags & PFI_IFLAG_REFS)) {
 			LIST_REMOVE(kif, pfik_list);
 			free(kif, PFI_MTYPE);
 		} else
 			kif->pfik_flags &= ~PFI_IFLAG_REFS;
 	}
 	mtx_unlock(&pfi_unlnkdkifs_mtx);
 }
 
 int
 pfi_kif_match(struct pfi_kif *rule_kif, struct pfi_kif *packet_kif)
 {
 	struct ifg_list	*p;
 
 	if (rule_kif == NULL || rule_kif == packet_kif)
 		return (1);
 
 	if (rule_kif->pfik_group != NULL)
 		/* XXXGL: locking? */
 		TAILQ_FOREACH(p, &packet_kif->pfik_ifp->if_groups, ifgl_next)
 			if (p->ifgl_group == rule_kif->pfik_group)
 				return (1);
 
 	return (0);
 }
 
 static void
 pfi_attach_ifnet(struct ifnet *ifp)
 {
 	struct pfi_kif *kif;
 
 	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
 
 	PF_RULES_WLOCK();
 	V_pfi_update++;
 	kif = pfi_kif_attach(kif, ifp->if_xname);
 
 	kif->pfik_ifp = ifp;
 	ifp->if_pf_kif = kif;
 
 	pfi_kif_update(kif);
 	PF_RULES_WUNLOCK();
 }
 
 static void
 pfi_attach_ifgroup(struct ifg_group *ifg)
 {
 	struct pfi_kif *kif;
 
 	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
 
 	PF_RULES_WLOCK();
 	V_pfi_update++;
 	kif = pfi_kif_attach(kif, ifg->ifg_group);
 
 	kif->pfik_group = ifg;
 	ifg->ifg_pf_kif = kif;
 	PF_RULES_WUNLOCK();
 }
 
 int
 pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		switch (dyn->pfid_acnt4) {
 		case 0:
 			return (0);
 		case 1:
 			return (PF_MATCHA(0, &dyn->pfid_addr4,
 			    &dyn->pfid_mask4, a, AF_INET));
 		default:
 			return (pfr_match_addr(dyn->pfid_kt, a, AF_INET));
 		}
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		switch (dyn->pfid_acnt6) {
 		case 0:
 			return (0);
 		case 1:
 			return (PF_MATCHA(0, &dyn->pfid_addr6,
 			    &dyn->pfid_mask6, a, AF_INET6));
 		default:
 			return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6));
 		}
 		break;
 #endif /* INET6 */
 	default:
 		return (0);
 	}
 }
 
 int
 pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af)
 {
 	struct pfi_dynaddr	*dyn;
 	char			 tblname[PF_TABLE_NAME_SIZE];
 	struct pf_ruleset	*ruleset = NULL;
 	struct pfi_kif		*kif;
 	int			 rv = 0;
 
 	PF_RULES_WASSERT();
 	KASSERT(aw->type == PF_ADDR_DYNIFTL, ("%s: type %u",
 	    __func__, aw->type));
 	KASSERT(aw->p.dyn == NULL, ("%s: dyn is %p", __func__, aw->p.dyn));
 
 	if ((dyn = malloc(sizeof(*dyn), PFI_MTYPE, M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	if ((kif = malloc(sizeof(*kif), PFI_MTYPE, M_NOWAIT)) == NULL) {
 		free(dyn, PFI_MTYPE);
 		return (ENOMEM);
 	}
 
 	if (!strcmp(aw->v.ifname, "self"))
 		dyn->pfid_kif = pfi_kif_attach(kif, IFG_ALL);
 	else
 		dyn->pfid_kif = pfi_kif_attach(kif, aw->v.ifname);
 	pfi_kif_ref(dyn->pfid_kif);
 
 	dyn->pfid_net = pfi_unmask(&aw->v.a.mask);
 	if (af == AF_INET && dyn->pfid_net == 32)
 		dyn->pfid_net = 128;
 	strlcpy(tblname, aw->v.ifname, sizeof(tblname));
 	if (aw->iflags & PFI_AFLAG_NETWORK)
 		strlcat(tblname, ":network", sizeof(tblname));
 	if (aw->iflags & PFI_AFLAG_BROADCAST)
 		strlcat(tblname, ":broadcast", sizeof(tblname));
 	if (aw->iflags & PFI_AFLAG_PEER)
 		strlcat(tblname, ":peer", sizeof(tblname));
 	if (aw->iflags & PFI_AFLAG_NOALIAS)
 		strlcat(tblname, ":0", sizeof(tblname));
 	if (dyn->pfid_net != 128)
 		snprintf(tblname + strlen(tblname),
 		    sizeof(tblname) - strlen(tblname), "/%d", dyn->pfid_net);
 	if ((ruleset = pf_find_or_create_ruleset(PF_RESERVED_ANCHOR)) == NULL) {
 		rv = ENOMEM;
 		goto _bad;
 	}
 
 	if ((dyn->pfid_kt = pfr_attach_table(ruleset, tblname)) == NULL) {
 		rv = ENOMEM;
 		goto _bad;
 	}
 
 	dyn->pfid_kt->pfrkt_flags |= PFR_TFLAG_ACTIVE;
 	dyn->pfid_iflags = aw->iflags;
 	dyn->pfid_af = af;
 
 	TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
 	aw->p.dyn = dyn;
 	pfi_kif_update(dyn->pfid_kif);
 
 	return (0);
 
 _bad:
 	if (dyn->pfid_kt != NULL)
 		pfr_detach_table(dyn->pfid_kt);
 	if (ruleset != NULL)
 		pf_remove_if_empty_ruleset(ruleset);
 	if (dyn->pfid_kif != NULL)
 		pfi_kif_unref(dyn->pfid_kif);
 	free(dyn, PFI_MTYPE);
 
 	return (rv);
 }
 
 static void
 pfi_kif_update(struct pfi_kif *kif)
 {
 	struct ifg_list		*ifgl;
 	struct pfi_dynaddr	*p;
 
 	PF_RULES_WASSERT();
 
 	/* update all dynaddr */
 	TAILQ_FOREACH(p, &kif->pfik_dynaddrs, entry)
 		pfi_dynaddr_update(p);
 
 	/* again for all groups kif is member of */
 	if (kif->pfik_ifp != NULL) {
 		IF_ADDR_RLOCK(kif->pfik_ifp);
 		TAILQ_FOREACH(ifgl, &kif->pfik_ifp->if_groups, ifgl_next)
 			pfi_kif_update((struct pfi_kif *)
 			    ifgl->ifgl_group->ifg_pf_kif);
 		IF_ADDR_RUNLOCK(kif->pfik_ifp);
 	}
 }
 
 static void
 pfi_dynaddr_update(struct pfi_dynaddr *dyn)
 {
 	struct pfi_kif		*kif;
 	struct pfr_ktable	*kt;
 
 	PF_RULES_WASSERT();
 	KASSERT(dyn && dyn->pfid_kif && dyn->pfid_kt,
 	    ("%s: bad argument", __func__));
 
 	kif = dyn->pfid_kif;
 	kt = dyn->pfid_kt;
 
 	if (kt->pfrkt_larg != V_pfi_update) {
 		/* this table needs to be brought up-to-date */
 		pfi_table_update(kt, kif, dyn->pfid_net, dyn->pfid_iflags);
 		kt->pfrkt_larg = V_pfi_update;
 	}
 	pfr_dynaddr_update(kt, dyn);
 }
 
 static void
 pfi_table_update(struct pfr_ktable *kt, struct pfi_kif *kif, int net, int flags)
 {
 	int			 e, size2 = 0;
 	struct ifg_member	*ifgm;
 
 	V_pfi_buffer_cnt = 0;
 
 	if (kif->pfik_ifp != NULL)
 		pfi_instance_add(kif->pfik_ifp, net, flags);
 	else if (kif->pfik_group != NULL) {
 		IFNET_RLOCK_NOSLEEP();
 		TAILQ_FOREACH(ifgm, &kif->pfik_group->ifg_members, ifgm_next)
 			pfi_instance_add(ifgm->ifgm_ifp, net, flags);
 		IFNET_RUNLOCK_NOSLEEP();
 	}
 
 	if ((e = pfr_set_addrs(&kt->pfrkt_t, V_pfi_buffer, V_pfi_buffer_cnt, &size2,
 	    NULL, NULL, NULL, 0, PFR_TFLAG_ALLMASK)))
 		printf("%s: cannot set %d new addresses into table %s: %d\n",
 		    __func__, V_pfi_buffer_cnt, kt->pfrkt_name, e);
 }
 
 static void
 pfi_instance_add(struct ifnet *ifp, int net, int flags)
 {
 	struct ifaddr	*ia;
 	int		 got4 = 0, got6 = 0;
 	int		 net2, af;
 
 	IF_ADDR_RLOCK(ifp);
 	TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_list) {
 		if (ia->ifa_addr == NULL)
 			continue;
 		af = ia->ifa_addr->sa_family;
 		if (af != AF_INET && af != AF_INET6)
 			continue;
 		/*
 		 * XXX: For point-to-point interfaces, (ifname:0) and IPv4,
 		 *      jump over addresses without a proper route to work
 		 *      around a problem with ppp not fully removing the
 		 *      address used during IPCP.
 		 */
 		if ((ifp->if_flags & IFF_POINTOPOINT) &&
 		    !(ia->ifa_flags & IFA_ROUTE) &&
 		    (flags & PFI_AFLAG_NOALIAS) && (af == AF_INET))
 			continue;
 		if ((flags & PFI_AFLAG_BROADCAST) && af == AF_INET6)
 			continue;
 		if ((flags & PFI_AFLAG_BROADCAST) &&
 		    !(ifp->if_flags & IFF_BROADCAST))
 			continue;
 		if ((flags & PFI_AFLAG_PEER) &&
 		    !(ifp->if_flags & IFF_POINTOPOINT))
 			continue;
 		if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 &&
 		    IN6_IS_ADDR_LINKLOCAL(
 		    &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr))
 			continue;
 		if (flags & PFI_AFLAG_NOALIAS) {
 			if (af == AF_INET && got4)
 				continue;
 			if (af == AF_INET6 && got6)
 				continue;
 		}
 		if (af == AF_INET)
 			got4 = 1;
 		else if (af == AF_INET6)
 			got6 = 1;
 		net2 = net;
 		if (net2 == 128 && (flags & PFI_AFLAG_NETWORK)) {
 			if (af == AF_INET)
 				net2 = pfi_unmask(&((struct sockaddr_in *)
 				    ia->ifa_netmask)->sin_addr);
 			else if (af == AF_INET6)
 				net2 = pfi_unmask(&((struct sockaddr_in6 *)
 				    ia->ifa_netmask)->sin6_addr);
 		}
 		if (af == AF_INET && net2 > 32)
 			net2 = 32;
 		if (flags & PFI_AFLAG_BROADCAST)
 			pfi_address_add(ia->ifa_broadaddr, af, net2);
 		else if (flags & PFI_AFLAG_PEER)
 			pfi_address_add(ia->ifa_dstaddr, af, net2);
 		else
 			pfi_address_add(ia->ifa_addr, af, net2);
 	}
 	IF_ADDR_RUNLOCK(ifp);
 }
 
 static void
 pfi_address_add(struct sockaddr *sa, int af, int net)
 {
 	struct pfr_addr	*p;
 	int		 i;
 
 	if (V_pfi_buffer_cnt >= V_pfi_buffer_max) {
 		int		 new_max = V_pfi_buffer_max * 2;
 
 		if (new_max > PFI_BUFFER_MAX) {
 			printf("%s: address buffer full (%d/%d)\n", __func__,
 			    V_pfi_buffer_cnt, PFI_BUFFER_MAX);
 			return;
 		}
 		p = malloc(new_max * sizeof(*V_pfi_buffer), PFI_MTYPE,
 		    M_NOWAIT);
 		if (p == NULL) {
 			printf("%s: no memory to grow buffer (%d/%d)\n",
 			    __func__, V_pfi_buffer_cnt, PFI_BUFFER_MAX);
 			return;
 		}
 		memcpy(p, V_pfi_buffer, V_pfi_buffer_max * sizeof(*V_pfi_buffer));
 		/* no need to zero buffer */
 		free(V_pfi_buffer, PFI_MTYPE);
 		V_pfi_buffer = p;
 		V_pfi_buffer_max = new_max;
 	}
 	if (af == AF_INET && net > 32)
 		net = 128;
 	p = V_pfi_buffer + V_pfi_buffer_cnt++;
 	bzero(p, sizeof(*p));
 	p->pfra_af = af;
 	p->pfra_net = net;
 	if (af == AF_INET)
 		p->pfra_ip4addr = ((struct sockaddr_in *)sa)->sin_addr;
 	else if (af == AF_INET6) {
 		p->pfra_ip6addr = ((struct sockaddr_in6 *)sa)->sin6_addr;
 		if (IN6_IS_SCOPE_EMBED(&p->pfra_ip6addr))
 			p->pfra_ip6addr.s6_addr16[1] = 0;
 	}
 	/* mask network address bits */
 	if (net < 128)
 		((caddr_t)p)[p->pfra_net/8] &= ~(0xFF >> (p->pfra_net%8));
 	for (i = (p->pfra_net+7)/8; i < sizeof(p->pfra_u); i++)
 		((caddr_t)p)[i] = 0;
 }
 
 void
 pfi_dynaddr_remove(struct pfi_dynaddr *dyn)
 {
 
 	KASSERT(dyn->pfid_kif != NULL, ("%s: null pfid_kif", __func__));
 	KASSERT(dyn->pfid_kt != NULL, ("%s: null pfid_kt", __func__));
 
 	TAILQ_REMOVE(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
 	pfi_kif_unref(dyn->pfid_kif);
 	pfr_detach_table(dyn->pfid_kt);
 	free(dyn, PFI_MTYPE);
 }
 
 void
 pfi_dynaddr_copyout(struct pf_addr_wrap *aw)
 {
 
 	KASSERT(aw->type == PF_ADDR_DYNIFTL,
 	    ("%s: type %u", __func__, aw->type));
 
 	if (aw->p.dyn == NULL || aw->p.dyn->pfid_kif == NULL)
 		return;
 	aw->p.dyncnt = aw->p.dyn->pfid_acnt4 + aw->p.dyn->pfid_acnt6;
 }
 
 static int
 pfi_if_compare(struct pfi_kif *p, struct pfi_kif *q)
 {
 	return (strncmp(p->pfik_name, q->pfik_name, IFNAMSIZ));
 }
 
 void
 pfi_update_status(const char *name, struct pf_status *pfs)
 {
 	struct pfi_kif		*p;
 	struct pfi_kif_cmp	 key;
 	struct ifg_member	 p_member, *ifgm;
 	TAILQ_HEAD(, ifg_member) ifg_members;
 	int			 i, j, k;
 
 	strlcpy(key.pfik_name, name, sizeof(key.pfik_name));
 	p = RB_FIND(pfi_ifhead, &V_pfi_ifs, (struct pfi_kif *)&key);
 	if (p == NULL)
 		return;
 
 	if (p->pfik_group != NULL) {
 		bcopy(&p->pfik_group->ifg_members, &ifg_members,
 		    sizeof(ifg_members));
 	} else {
 		/* build a temporary list for p only */
 		bzero(&p_member, sizeof(p_member));
 		p_member.ifgm_ifp = p->pfik_ifp;
 		TAILQ_INIT(&ifg_members);
 		TAILQ_INSERT_TAIL(&ifg_members, &p_member, ifgm_next);
 	}
 	if (pfs) {
 		bzero(pfs->pcounters, sizeof(pfs->pcounters));
 		bzero(pfs->bcounters, sizeof(pfs->bcounters));
 	}
 	TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) {
 		if (ifgm->ifgm_ifp == NULL)
 			continue;
 		p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif;
 
 		/* just clear statistics */
 		if (pfs == NULL) {
 			bzero(p->pfik_packets, sizeof(p->pfik_packets));
 			bzero(p->pfik_bytes, sizeof(p->pfik_bytes));
 			p->pfik_tzero = time_second;
 			continue;
 		}
 		for (i = 0; i < 2; i++)
 			for (j = 0; j < 2; j++)
 				for (k = 0; k < 2; k++) {
 					pfs->pcounters[i][j][k] +=
 						p->pfik_packets[i][j][k];
 					pfs->bcounters[i][j] +=
 						p->pfik_bytes[i][j][k];
 				}
 	}
 }
 
 void
 pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size)
 {
 	struct pfi_kif	*p, *nextp;
 	int		 n = 0;
 
 	for (p = RB_MIN(pfi_ifhead, &V_pfi_ifs); p; p = nextp) {
 		nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
 		if (pfi_skip_if(name, p))
 			continue;
 		if (*size <= n++)
 			break;
 		if (!p->pfik_tzero)
 			p->pfik_tzero = time_second;
 		bcopy(p, buf++, sizeof(*buf));
 		nextp = RB_NEXT(pfi_ifhead, &V_pfi_ifs, p);
 	}
 	*size = n;
 }
 
 static int
 pfi_skip_if(const char *filter, struct pfi_kif *p)
 {
 	int	n;
 
 	if (filter == NULL || !*filter)
 		return (0);
 	if (!strcmp(p->pfik_name, filter))
 		return (0);	/* exact match */
 	n = strlen(filter);
 	if (n < 1 || n >= IFNAMSIZ)
 		return (1);	/* sanity check */
 	if (filter[n-1] >= '0' && filter[n-1] <= '9')
 		return (1);	/* only do exact match in that case */
 	if (strncmp(p->pfik_name, filter, n))
 		return (1);	/* prefix doesn't match */
 	return (p->pfik_name[n] < '0' || p->pfik_name[n] > '9');
 }
 
 int
 pfi_set_flags(const char *name, int flags)
 {
 	struct pfi_kif	*p;
 
 	RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
 		if (pfi_skip_if(name, p))
 			continue;
 		p->pfik_flags |= flags;
 	}
 	return (0);
 }
 
 int
 pfi_clear_flags(const char *name, int flags)
 {
 	struct pfi_kif	*p;
 
 	RB_FOREACH(p, pfi_ifhead, &V_pfi_ifs) {
 		if (pfi_skip_if(name, p))
 			continue;
 		p->pfik_flags &= ~flags;
 	}
 	return (0);
 }
 
 /* from pf_print_state.c */
 static int
 pfi_unmask(void *addr)
 {
 	struct pf_addr *m = addr;
 	int i = 31, j = 0, b = 0;
 	u_int32_t tmp;
 
 	while (j < 4 && m->addr32[j] == 0xffffffff) {
 		b += 32;
 		j++;
 	}
 	if (j < 4) {
 		tmp = ntohl(m->addr32[j]);
 		for (i = 31; tmp & (1 << i); --i)
 			b++;
 	}
 	return (b);
 }
 
 static void
 pfi_attach_ifnet_event(void *arg __unused, struct ifnet *ifp)
 {
 
 	CURVNET_SET(ifp->if_vnet);
 	pfi_attach_ifnet(ifp);
 #ifdef ALTQ
 	PF_RULES_WLOCK();
 	pf_altq_ifnet_event(ifp, 0);
 	PF_RULES_WUNLOCK();
 #endif
 	CURVNET_RESTORE();
 }
 
 static void
 pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp)
 {
 	struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif;
 
 	CURVNET_SET(ifp->if_vnet);
 	PF_RULES_WLOCK();
 	V_pfi_update++;
 	pfi_kif_update(kif);
 
 	kif->pfik_ifp = NULL;
 	ifp->if_pf_kif = NULL;
 #ifdef ALTQ
 	pf_altq_ifnet_event(ifp, 1);
 #endif
 	PF_RULES_WUNLOCK();
 	CURVNET_RESTORE();
 }
 
 static void
 pfi_attach_group_event(void *arg , struct ifg_group *ifg)
 {
 
 	CURVNET_SET((struct vnet *)arg);
 	pfi_attach_ifgroup(ifg);
 	CURVNET_RESTORE();
 }
 
 static void
 pfi_change_group_event(void *arg, char *gname)
 {
 	struct pfi_kif *kif;
 
 	kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
 
 	CURVNET_SET((struct vnet *)arg);
 	PF_RULES_WLOCK();
 	V_pfi_update++;
 	kif = pfi_kif_attach(kif, gname);
 	pfi_kif_update(kif);
 	PF_RULES_WUNLOCK();
 	CURVNET_RESTORE();
 }
 
 static void
 pfi_detach_group_event(void *arg, struct ifg_group *ifg)
 {
 	struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif;
 
 	CURVNET_SET((struct vnet *)arg);
 	PF_RULES_WLOCK();
 	V_pfi_update++;
 
 	kif->pfik_group = NULL;
 	ifg->ifg_pf_kif = NULL;
 	PF_RULES_WUNLOCK();
 	CURVNET_RESTORE();
 }
 
 static void
 pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp)
 {
 
 	CURVNET_SET(ifp->if_vnet);
 	PF_RULES_WLOCK();
 	if (ifp && ifp->if_pf_kif) {
 		V_pfi_update++;
 		pfi_kif_update(ifp->if_pf_kif);
 	}
 	PF_RULES_WUNLOCK();
 	CURVNET_RESTORE();
 }
Index: projects/clang380-import/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- projects/clang380-import/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c	(revision 293686)
+++ projects/clang380-import/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c	(revision 293687)
@@ -1,1544 +1,1540 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * General Public License (GPL) Version 2, available from the file
  * COPYING in the main directory of this source tree, or the
  * OpenIB.org BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
  *     conditions are met:
  *
  *      - Redistributions of source code must retain the above
  *        copyright notice, this list of conditions and the following
  *        disclaimer.
  *
  *      - Redistributions in binary form must reproduce the above
  *        copyright notice, this list of conditions and the following
  *        disclaimer in the documentation and/or other materials
  *        provided with the distribution.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 
 #include "ipoib.h"
 
 static	int ipoib_resolvemulti(struct ifnet *, struct sockaddr **,
 		struct sockaddr *);
 
 
 #include <linux/module.h>
 
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/vmalloc.h>
 
 #include <linux/if_arp.h>	/* For ARPHRD_xxx */
 #include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
 MODULE_LICENSE("Dual BSD/GPL");
 
 int ipoib_sendq_size = IPOIB_TX_RING_SIZE;
 int ipoib_recvq_size = IPOIB_RX_RING_SIZE;
 
 module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
 MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
 module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 int ipoib_debug_level = 1;
 
 module_param_named(debug_level, ipoib_debug_level, int, 0644);
 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
 #endif
 
 struct ipoib_path_iter {
 	struct ipoib_dev_priv *priv;
 	struct ipoib_path  path;
 };
 
 static const u8 ipv4_bcast_addr[] = {
 	0x00, 0xff, 0xff, 0xff,
 	0xff, 0x12, 0x40, 0x1b,	0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00,	0xff, 0xff, 0xff, 0xff
 };
 
 struct workqueue_struct *ipoib_workqueue;
 
 struct ib_sa_client ipoib_sa_client;
 
 static void ipoib_add_one(struct ib_device *device);
 static void ipoib_remove_one(struct ib_device *device);
 static void ipoib_start(struct ifnet *dev);
 static int ipoib_output(struct ifnet *ifp, struct mbuf *m,
 	    const struct sockaddr *dst, struct route *ro);
 static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
 static void ipoib_input(struct ifnet *ifp, struct mbuf *m);
 
 #define	IPOIB_MTAP(_ifp, _m)					\
 do {								\
 	if (bpf_peers_present((_ifp)->if_bpf)) {		\
 		M_ASSERTVALID(_m);				\
 		ipoib_mtap_mb((_ifp), (_m));			\
 	}							\
 } while (0)
 
 /*
  * This is for clients that have an ipoib_header in the mbuf.
  */
 static void
 ipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb)
 {
 	struct ipoib_header *ih;
 	struct ether_header eh;
 
 	ih = mtod(mb, struct ipoib_header *);
 	eh.ether_type = ih->proto;
 	bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN);
 	bzero(&eh.ether_shost, ETHER_ADDR_LEN);
 	mb->m_data += sizeof(struct ipoib_header);
 	mb->m_len -= sizeof(struct ipoib_header);
 	bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
 	mb->m_data -= sizeof(struct ipoib_header);
 	mb->m_len += sizeof(struct ipoib_header);
 }
 
 void
 ipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto)
 {
 	struct ether_header eh;
 
 	eh.ether_type = proto;
 	bzero(&eh.ether_shost, ETHER_ADDR_LEN);
 	bzero(&eh.ether_dhost, ETHER_ADDR_LEN);
 	bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
 }
 
 static struct ib_client ipoib_client = {
 	.name   = "ipoib",
 	.add    = ipoib_add_one,
 	.remove = ipoib_remove_one
 };
 
 int
 ipoib_open(struct ipoib_dev_priv *priv)
 {
 	struct ifnet *dev = priv->dev;
 
 	ipoib_dbg(priv, "bringing up interface\n");
 
 	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
 	if (ipoib_pkey_dev_delay_open(priv))
 		return 0;
 
 	if (ipoib_ib_dev_open(priv))
 		goto err_disable;
 
 	if (ipoib_ib_dev_up(priv))
 		goto err_stop;
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		struct ipoib_dev_priv *cpriv;
 
 		/* Bring up any child interfaces too */
 		mutex_lock(&priv->vlan_mutex);
 		list_for_each_entry(cpriv, &priv->child_intfs, list)
 			if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
 				ipoib_open(cpriv);
 		mutex_unlock(&priv->vlan_mutex);
 	}
 	dev->if_drv_flags |= IFF_DRV_RUNNING;
 	dev->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
 	return 0;
 
 err_stop:
 	ipoib_ib_dev_stop(priv, 1);
 
 err_disable:
 	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
 	return -EINVAL;
 }
 
 static void
 ipoib_init(void *arg)
 {
 	struct ifnet *dev;
 	struct ipoib_dev_priv *priv;
 
 	priv = arg;
 	dev = priv->dev;
 	if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		ipoib_open(priv);
 	queue_work(ipoib_workqueue, &priv->flush_light);
 }
 
 
 static int
 ipoib_stop(struct ipoib_dev_priv *priv)
 {
 	struct ifnet *dev = priv->dev;
 
 	ipoib_dbg(priv, "stopping interface\n");
 
 	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
 	dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	ipoib_ib_dev_down(priv, 0);
 	ipoib_ib_dev_stop(priv, 0);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		struct ipoib_dev_priv *cpriv;
 
 		/* Bring down any child interfaces too */
 		mutex_lock(&priv->vlan_mutex);
 		list_for_each_entry(cpriv, &priv->child_intfs, list)
 			if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0)
 				ipoib_stop(cpriv);
 		mutex_unlock(&priv->vlan_mutex);
 	}
 
 	return 0;
 }
 
 int
 ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu)
 {
 	struct ifnet *dev = priv->dev;
 
 	/* dev->if_mtu > 2K ==> connected mode */
 	if (ipoib_cm_admin_enabled(priv)) {
 		if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)))
 			return -EINVAL;
 
 		if (new_mtu > priv->mcast_mtu)
 			ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
 				   priv->mcast_mtu);
 
 		dev->if_mtu = new_mtu;
 		return 0;
 	}
 
 	if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
 		return -EINVAL;
 
 	priv->admin_mtu = new_mtu;
 
 	dev->if_mtu = min(priv->mcast_mtu, priv->admin_mtu);
 
 	queue_work(ipoib_workqueue, &priv->flush_light);
 
 	return 0;
 }
 
 static int
 ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 {
 	struct ipoib_dev_priv *priv = ifp->if_softc;
 	struct ifaddr *ifa = (struct ifaddr *) data;
 	struct ifreq *ifr = (struct ifreq *) data;
 	int error = 0;
 
 	switch (command) {
 	case SIOCSIFFLAGS:
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 				error = -ipoib_open(priv);
 		} else
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				ipoib_stop(priv);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 			queue_work(ipoib_workqueue, &priv->restart_task);
 		break;
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
 			arp_ifinit(ifp, ifa);
 			break;
 #endif
 		default:
 			ifp->if_init(ifp->if_softc);
 			break;
 		}
 		break;
 
 	case SIOCGIFADDR:
 		{
 			struct sockaddr *sa;
 
 			sa = (struct sockaddr *) & ifr->ifr_data;
 			bcopy(IF_LLADDR(ifp),
 			      (caddr_t) sa->sa_data, INFINIBAND_ALEN);
 		}
 		break;
 
 	case SIOCSIFMTU:
 		/*
 		 * Set the interface MTU.
 		 */
 		error = -ipoib_change_mtu(priv, ifr->ifr_mtu);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 
 static struct ipoib_path *
 __path_find(struct ipoib_dev_priv *priv, void *gid)
 {
 	struct rb_node *n = priv->path_tree.rb_node;
 	struct ipoib_path *path;
 	int ret;
 
 	while (n) {
 		path = rb_entry(n, struct ipoib_path, rb_node);
 
 		ret = memcmp(gid, path->pathrec.dgid.raw,
 			     sizeof (union ib_gid));
 
 		if (ret < 0)
 			n = n->rb_left;
 		else if (ret > 0)
 			n = n->rb_right;
 		else
 			return path;
 	}
 
 	return NULL;
 }
 
 static int
 __path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path)
 {
 	struct rb_node **n = &priv->path_tree.rb_node;
 	struct rb_node *pn = NULL;
 	struct ipoib_path *tpath;
 	int ret;
 
 	while (*n) {
 		pn = *n;
 		tpath = rb_entry(pn, struct ipoib_path, rb_node);
 
 		ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw,
 			     sizeof (union ib_gid));
 		if (ret < 0)
 			n = &pn->rb_left;
 		else if (ret > 0)
 			n = &pn->rb_right;
 		else
 			return -EEXIST;
 	}
 
 	rb_link_node(&path->rb_node, pn, n);
 	rb_insert_color(&path->rb_node, &priv->path_tree);
 
 	list_add_tail(&path->list, &priv->path_list);
 
 	return 0;
 }
 
 void
 ipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path)
 {
 
 	_IF_DRAIN(&path->queue);
 
 	if (path->ah)
 		ipoib_put_ah(path->ah);
 	if (ipoib_cm_get(path))
 		ipoib_cm_destroy_tx(ipoib_cm_get(path));
 
 	kfree(path);
 }
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
 
 struct ipoib_path_iter *
 ipoib_path_iter_init(struct ipoib_dev_priv *priv)
 {
 	struct ipoib_path_iter *iter;
 
 	iter = kmalloc(sizeof *iter, GFP_KERNEL);
 	if (!iter)
 		return NULL;
 
 	iter->priv = priv;
 	memset(iter->path.pathrec.dgid.raw, 0, 16);
 
 	if (ipoib_path_iter_next(iter)) {
 		kfree(iter);
 		return NULL;
 	}
 
 	return iter;
 }
 
 int
 ipoib_path_iter_next(struct ipoib_path_iter *iter)
 {
 	struct ipoib_dev_priv *priv = iter->priv;
 	struct rb_node *n;
 	struct ipoib_path *path;
 	int ret = 1;
 
 	spin_lock_irq(&priv->lock);
 
 	n = rb_first(&priv->path_tree);
 
 	while (n) {
 		path = rb_entry(n, struct ipoib_path, rb_node);
 
 		if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
 			   sizeof (union ib_gid)) < 0) {
 			iter->path = *path;
 			ret = 0;
 			break;
 		}
 
 		n = rb_next(n);
 	}
 
 	spin_unlock_irq(&priv->lock);
 
 	return ret;
 }
 
 void
 ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path)
 {
 	*path = iter->path;
 }
 
 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
 
 void
 ipoib_mark_paths_invalid(struct ipoib_dev_priv *priv)
 {
 	struct ipoib_path *path, *tp;
 
 	spin_lock_irq(&priv->lock);
 
 	list_for_each_entry_safe(path, tp, &priv->path_list, list) {
 		ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n",
 			be16_to_cpu(path->pathrec.dlid),
 			path->pathrec.dgid.raw, ":");
 		path->valid =  0;
 	}
 
 	spin_unlock_irq(&priv->lock);
 }
 
 void
 ipoib_flush_paths(struct ipoib_dev_priv *priv)
 {
 	struct ipoib_path *path, *tp;
 	LIST_HEAD(remove_list);
 	unsigned long flags;
 
 	spin_lock_irqsave(&priv->lock, flags);
 
 	list_splice_init(&priv->path_list, &remove_list);
 
 	list_for_each_entry(path, &remove_list, list)
 		rb_erase(&path->rb_node, &priv->path_tree);
 
 	list_for_each_entry_safe(path, tp, &remove_list, list) {
 		if (path->query)
 			ib_sa_cancel_query(path->query_id, path->query);
 		spin_unlock_irqrestore(&priv->lock, flags);
 		wait_for_completion(&path->done);
 		ipoib_path_free(priv, path);
 		spin_lock_irqsave(&priv->lock, flags);
 	}
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 static void
 path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr)
 {
 	struct ipoib_path *path = path_ptr;
 	struct ipoib_dev_priv *priv = path->priv;
 	struct ifnet *dev = priv->dev;
 	struct ipoib_ah *ah = NULL;
 	struct ipoib_ah *old_ah = NULL;
 	struct ifqueue mbqueue;
 	struct mbuf *mb;
 	unsigned long flags;
 
 	if (!status)
 		ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n",
 			  be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":");
 	else
 		ipoib_dbg(priv, "PathRec status %d for GID %16D\n",
 			  status, path->pathrec.dgid.raw, ":");
 
 	bzero(&mbqueue, sizeof(mbqueue));
 
 	if (!status) {
 		struct ib_ah_attr av;
 
 		if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
 			ah = ipoib_create_ah(priv, priv->pd, &av);
 	}
 
 	spin_lock_irqsave(&priv->lock, flags);
 
 	if (ah) {
 		path->pathrec = *pathrec;
 
 		old_ah   = path->ah;
 		path->ah = ah;
 
 		ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
 			  ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
 
 		for (;;) {
 			_IF_DEQUEUE(&path->queue, mb);
 			if (mb == NULL)
 				break;
 			_IF_ENQUEUE(&mbqueue, mb);
 		}
 
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
 		if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path))
 			ipoib_cm_set(path, ipoib_cm_create_tx(priv, path));
 #endif
 
 		path->valid = 1;
 	}
 
 	path->query = NULL;
 	complete(&path->done);
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	if (old_ah)
 		ipoib_put_ah(old_ah);
 
 	for (;;) {
 		_IF_DEQUEUE(&mbqueue, mb);
 		if (mb == NULL)
 			break;
 		mb->m_pkthdr.rcvif = dev;
 		if (dev->if_transmit(dev, mb))
 			ipoib_warn(priv, "dev_queue_xmit failed "
 				   "to requeue packet\n");
 	}
 }
 
 static struct ipoib_path *
 path_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr)
 {
 	struct ipoib_path *path;
 
 	if (!priv->broadcast)
 		return NULL;
 
 	path = kzalloc(sizeof *path, GFP_ATOMIC);
 	if (!path)
 		return NULL;
 
 	path->priv = priv;
 
 	bzero(&path->queue, sizeof(path->queue));
 
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
 	memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN);
 #endif
 	memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid));
 	path->pathrec.sgid	    = priv->local_gid;
 	path->pathrec.pkey	    = cpu_to_be16(priv->pkey);
 	path->pathrec.numb_path     = 1;
 	path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
 
 	return path;
 }
 
 static int
 path_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path)
 {
 	struct ifnet *dev = priv->dev;
 
 	ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU;
 	struct ib_sa_path_rec p_rec;
 
 	p_rec = path->pathrec;
 	p_rec.mtu_selector = IB_SA_GT;
 
 	switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) {
 	case 512:
 		p_rec.mtu = IB_MTU_256;
 		break;
 	case 1024:
 		p_rec.mtu = IB_MTU_512;
 		break;
 	case 2048:
 		p_rec.mtu = IB_MTU_1024;
 		break;
 	case 4096:
 		p_rec.mtu = IB_MTU_2048;
 		break;
 	default:
 		/* Wildcard everything */
 		comp_mask = 0;
 		p_rec.mtu = 0;
 		p_rec.mtu_selector = 0;
 	}
 
 	ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n",
 		  p_rec.dgid.raw, ":",
 		  comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0);
 
 	init_completion(&path->done);
 
 	path->query_id =
 		ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
 				   &p_rec, comp_mask		|
 				   IB_SA_PATH_REC_DGID		|
 				   IB_SA_PATH_REC_SGID		|
 				   IB_SA_PATH_REC_NUMB_PATH	|
 				   IB_SA_PATH_REC_TRAFFIC_CLASS |
 				   IB_SA_PATH_REC_PKEY,
 				   1000, GFP_ATOMIC,
 				   path_rec_completion,
 				   path, &path->query);
 	if (path->query_id < 0) {
 		ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id);
 		path->query = NULL;
 		complete(&path->done);
 		return path->query_id;
 	}
 
 	return 0;
 }
 
 static void
 ipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh)
 {
 	struct ipoib_path *path;
 
 	path = __path_find(priv, eh->hwaddr + 4);
 	if (!path || !path->valid) {
 		int new_path = 0;
 
 		if (!path) {
 			path = path_rec_create(priv, eh->hwaddr);
 			new_path = 1;
 		}
 		if (path) {
 			_IF_ENQUEUE(&path->queue, mb);
 			if (!path->query && path_rec_start(priv, path)) {
 				spin_unlock_irqrestore(&priv->lock, flags);
 				if (new_path)
 					ipoib_path_free(priv, path);
 				return;
 			} else
 				__path_add(priv, path);
 		} else {
 			if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
 			m_freem(mb);
 		}
 
 		return;
 	}
 
 	if (ipoib_cm_get(path) && ipoib_cm_up(path)) {
 		ipoib_cm_send(priv, mb, ipoib_cm_get(path));
 	} else if (path->ah) {
 		ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr));
 	} else if ((path->query || !path_rec_start(priv, path)) &&
 		    path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) {
 		_IF_ENQUEUE(&path->queue, mb);
 	} else {
 		if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
 		m_freem(mb);
 	}
 }
 
 static int
 ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb)
 {
 	struct ipoib_header *eh;
 
 	eh = mtod(mb, struct ipoib_header *);
 	if (IPOIB_IS_MULTICAST(eh->hwaddr)) {
 		/* Add in the P_Key for multicast*/
 		eh->hwaddr[8] = (priv->pkey >> 8) & 0xff;
 		eh->hwaddr[9] = priv->pkey & 0xff;
 
 		ipoib_mcast_send(priv, eh->hwaddr + 4, mb);
 	} else
 		ipoib_unicast_send(mb, priv, eh);
 
 	return 0;
 }
 
 
 static void
 _ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv)
 {
 	struct mbuf *mb;
 
 	if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return;
 
 	spin_lock(&priv->lock);
 	while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) &&
 	    (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
 		IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
 		if (mb == NULL)
 			break;
 		IPOIB_MTAP(dev, mb);
 		ipoib_send_one(priv, mb);
 	}
 	spin_unlock(&priv->lock);
 }
 
 static void
 ipoib_start(struct ifnet *dev)
 {
 	_ipoib_start(dev, dev->if_softc);
 }
 
 static void
 ipoib_vlan_start(struct ifnet *dev)
 {
 	struct ipoib_dev_priv *priv;
 	struct mbuf *mb;
 
 	priv = VLAN_COOKIE(dev);
 	if (priv != NULL)
 		return _ipoib_start(dev, priv);
 	while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) {
 		IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
 		if (mb == NULL)
 			break;
 		m_freem(mb);
 		if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
 	}
 }
 
 int
 ipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port)
 {
 
 	/* Allocate RX/TX "rings" to hold queued mbs */
 	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
 				GFP_KERNEL);
 	if (!priv->rx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
 		       ca->name, ipoib_recvq_size);
 		goto out;
 	}
 
 	priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL);
 	if (!priv->tx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
 		       ca->name, ipoib_sendq_size);
 		goto out_rx_ring_cleanup;
 	}
 	memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring);
 
 	/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
 
 	if (ipoib_ib_dev_init(priv, ca, port))
 		goto out_tx_ring_cleanup;
 
 	return 0;
 
 out_tx_ring_cleanup:
 	kfree(priv->tx_ring);
 
 out_rx_ring_cleanup:
 	kfree(priv->rx_ring);
 
 out:
 	return -ENOMEM;
 }
 
 static void
 ipoib_detach(struct ipoib_dev_priv *priv)
 {
 	struct ifnet *dev;
 
 	dev = priv->dev;
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		bpfdetach(dev);
 		if_detach(dev);
 		if_free(dev);
 	} else
 		VLAN_SETCOOKIE(priv->dev, NULL);
 
 	free(priv, M_TEMP);
 }
 
 void
 ipoib_dev_cleanup(struct ipoib_dev_priv *priv)
 {
 	struct ipoib_dev_priv *cpriv, *tcpriv;
 
 	/* Delete any child interfaces first */
 	list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
 		ipoib_dev_cleanup(cpriv);
 		ipoib_detach(cpriv);
 	}
 
 	ipoib_ib_dev_cleanup(priv);
 
 	kfree(priv->rx_ring);
 	kfree(priv->tx_ring);
 
 	priv->rx_ring = NULL;
 	priv->tx_ring = NULL;
 }
 
 static volatile int ipoib_unit;
 
 static struct ipoib_dev_priv *
 ipoib_priv_alloc(void)
 {
 	struct ipoib_dev_priv *priv;
 
 	priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK);
 	spin_lock_init(&priv->lock);
 	spin_lock_init(&priv->drain_lock);
 	mutex_init(&priv->vlan_mutex);
 	INIT_LIST_HEAD(&priv->path_list);
 	INIT_LIST_HEAD(&priv->child_intfs);
 	INIT_LIST_HEAD(&priv->dead_ahs);
 	INIT_LIST_HEAD(&priv->multicast_list);
 	INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
 	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
 	INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
 	INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
 	INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
 	INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
 	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
 	INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
 	memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN);
 
 	return (priv);
 }
 
 struct ipoib_dev_priv *
 ipoib_intf_alloc(const char *name)
 {
 	struct ipoib_dev_priv *priv;
 	struct sockaddr_dl *sdl;
 	struct ifnet *dev;
 
 	priv = ipoib_priv_alloc();
 	dev = priv->dev = if_alloc(IFT_INFINIBAND);
 	if (!dev) {
 		free(priv, M_TEMP);
 		return NULL;
 	}
 	dev->if_softc = priv;
 	if_initname(dev, name, atomic_fetchadd_int(&ipoib_unit, 1));
 	dev->if_flags = IFF_BROADCAST | IFF_MULTICAST;
 	dev->if_addrlen = INFINIBAND_ALEN;
 	dev->if_hdrlen = IPOIB_HEADER_LEN;
 	if_attach(dev);
 	dev->if_init = ipoib_init;
 	dev->if_ioctl = ipoib_ioctl;
 	dev->if_start = ipoib_start;
 	dev->if_output = ipoib_output;
 	dev->if_input = ipoib_input;
 	dev->if_resolvemulti = ipoib_resolvemulti;
 	dev->if_baudrate = IF_Gbps(10);
 	dev->if_broadcastaddr = priv->broadcastaddr;
 	dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2;
 	sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr;
 	sdl->sdl_type = IFT_INFINIBAND;
 	sdl->sdl_alen = dev->if_addrlen;
 	priv->dev = dev;
 	if_link_state_change(dev, LINK_STATE_DOWN);
 	bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN);
 
 	return dev->if_softc;
 }
 
 int
 ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
 {
 	struct ib_device_attr *device_attr;
 	int result = -ENOMEM;
 
 	device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
 	if (!device_attr) {
 		printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
 		       hca->name, sizeof *device_attr);
 		return result;
 	}
 
 	result = ib_query_device(hca, device_attr);
 	if (result) {
 		printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
 		       hca->name, result);
 		kfree(device_attr);
 		return result;
 	}
 	priv->hca_caps = device_attr->device_cap_flags;
 
 	kfree(device_attr);
 
 	priv->dev->if_hwassist = 0;
 	priv->dev->if_capabilities = 0;
 
 #ifndef CONFIG_INFINIBAND_IPOIB_CM
 	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
 		set_bit(IPOIB_FLAG_CSUM, &priv->flags);
 		priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP;
 		priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
 	}
 
 #if 0
 	if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) {
 		priv->dev->if_capabilities |= IFCAP_TSO4;
 		priv->dev->if_hwassist |= CSUM_TSO;
 	}
 #endif
 #endif
 	priv->dev->if_capabilities |=
 	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE;
 	priv->dev->if_capenable = priv->dev->if_capabilities;
 
 	return 0;
 }
 
 
 static struct ifnet *
 ipoib_add_port(const char *format, struct ib_device *hca, u8 port)
 {
 	struct ipoib_dev_priv *priv;
 	struct ib_port_attr attr;
 	int result = -ENOMEM;
 
 	priv = ipoib_intf_alloc(format);
 	if (!priv)
 		goto alloc_mem_failed;
 
 	if (!ib_query_port(hca, port, &attr))
 		priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
 	else {
 		printk(KERN_WARNING "%s: ib_query_port %d failed\n",
 		       hca->name, port);
 		goto device_init_failed;
 	}
 
 	/* MTU will be reset when mcast join happens */
 	priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
 	priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu;
 
 	result = ib_query_pkey(hca, port, 0, &priv->pkey);
 	if (result) {
 		printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
 		       hca->name, port, result);
 		goto device_init_failed;
 	}
 
 	if (ipoib_set_dev_features(priv, hca))
 		goto device_init_failed;
 
 	/*
 	 * Set the full membership bit, so that we join the right
 	 * broadcast group, etc.
 	 */
 	priv->pkey |= 0x8000;
 
 	priv->broadcastaddr[8] = priv->pkey >> 8;
 	priv->broadcastaddr[9] = priv->pkey & 0xff;
 
 	result = ib_query_gid(hca, port, 0, &priv->local_gid);
 	if (result) {
 		printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
 		       hca->name, port, result);
 		goto device_init_failed;
 	}
 	memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof (union ib_gid));
 
 	result = ipoib_dev_init(priv, hca, port);
 	if (result < 0) {
 		printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
 		       hca->name, port, result);
 		goto device_init_failed;
 	}
 	if (ipoib_cm_admin_enabled(priv))
 		priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv));
 
 	INIT_IB_EVENT_HANDLER(&priv->event_handler,
 			      priv->ca, ipoib_event);
 	result = ib_register_event_handler(&priv->event_handler);
 	if (result < 0) {
 		printk(KERN_WARNING "%s: ib_register_event_handler failed for "
 		       "port %d (ret = %d)\n",
 		       hca->name, port, result);
 		goto event_failed;
 	}
 	if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port);
 
 	return priv->dev;
 
 event_failed:
 	ipoib_dev_cleanup(priv);
 
 device_init_failed:
 	ipoib_detach(priv);
 
 alloc_mem_failed:
 	return ERR_PTR(result);
 }
 
 static void
 ipoib_add_one(struct ib_device *device)
 {
 	struct list_head *dev_list;
 	struct ifnet *dev;
 	struct ipoib_dev_priv *priv;
 	int s, e, p;
 
 	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
 		return;
 
 	dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
 	if (!dev_list)
 		return;
 
 	INIT_LIST_HEAD(dev_list);
 
 	if (device->node_type == RDMA_NODE_IB_SWITCH) {
 		s = 0;
 		e = 0;
 	} else {
 		s = 1;
 		e = device->phys_port_cnt;
 	}
 
 	for (p = s; p <= e; ++p) {
 		if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND)
 			continue;
 		dev = ipoib_add_port("ib", device, p);
 		if (!IS_ERR(dev)) {
 			priv = dev->if_softc;
 			list_add_tail(&priv->list, dev_list);
 		}
 	}
 
 	ib_set_client_data(device, &ipoib_client, dev_list);
 }
 
 static void
 ipoib_remove_one(struct ib_device *device)
 {
 	struct ipoib_dev_priv *priv, *tmp;
 	struct list_head *dev_list;
 
 	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
 		return;
 
 	dev_list = ib_get_client_data(device, &ipoib_client);
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
 		if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND)
 			continue;
 
 		ipoib_stop(priv);
 
 		ib_unregister_event_handler(&priv->event_handler);
 
 		/* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */
 
 		flush_workqueue(ipoib_workqueue);
 
 		ipoib_dev_cleanup(priv);
 		ipoib_detach(priv);
 	}
 
 	kfree(dev_list);
 }
 
 static void
 ipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
 {
 	struct ipoib_dev_priv *parent;
 	struct ipoib_dev_priv *priv;
 	struct ifnet *dev;
 	uint16_t pkey;
 	int error;
 
 	if (ifp->if_type != IFT_INFINIBAND)
 		return;
 	dev = VLAN_DEVAT(ifp, vtag);
 	if (dev == NULL)
 		return;
 	priv = NULL;
 	error = 0;
 	parent = ifp->if_softc;
 	/* We only support 15 bits of pkey. */
 	if (vtag & 0x8000)
 		return;
 	pkey = vtag | 0x8000;	/* Set full membership bit. */
 	if (pkey == parent->pkey)
 		return;
 	/* Check for dups */
 	mutex_lock(&parent->vlan_mutex);
 	list_for_each_entry(priv, &parent->child_intfs, list) {
 		if (priv->pkey == pkey) {
 			priv = NULL;
 			error = EBUSY;
 			goto out;
 		}
 	}
 	priv = ipoib_priv_alloc();
 	priv->dev = dev;
 	priv->max_ib_mtu = parent->max_ib_mtu;
 	priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu;
 	set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
 	error = ipoib_set_dev_features(priv, parent->ca);
 	if (error)
 		goto out;
 	priv->pkey = pkey;
 	priv->broadcastaddr[8] = pkey >> 8;
 	priv->broadcastaddr[9] = pkey & 0xff;
 	dev->if_broadcastaddr = priv->broadcastaddr;
 	error = ipoib_dev_init(priv, parent->ca, parent->port);
 	if (error)
 		goto out;
 	priv->parent = parent->dev;
 	list_add_tail(&priv->list, &parent->child_intfs);
 	VLAN_SETCOOKIE(dev, priv);
 	dev->if_start = ipoib_vlan_start;
 	dev->if_drv_flags &= ~IFF_DRV_RUNNING;
 	dev->if_hdrlen = IPOIB_HEADER_LEN;
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		ipoib_open(priv);
 	mutex_unlock(&parent->vlan_mutex);
 	return;
 out:
 	mutex_unlock(&parent->vlan_mutex);
 	if (priv)
 		free(priv, M_TEMP);
 	if (error)
 		ipoib_warn(parent,
 		    "failed to initialize subinterface: device %s, port %d vtag 0x%X",
 		    parent->ca->name, parent->port, vtag);
 	return;
 }
 
 static void
 ipoib_unconfig_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
 {
 	struct ipoib_dev_priv *parent;
 	struct ipoib_dev_priv *priv;
 	struct ifnet *dev;
 	uint16_t pkey;
 
 	if (ifp->if_type != IFT_INFINIBAND)
 		return;
 
 	dev = VLAN_DEVAT(ifp, vtag);
 	if (dev)
 		VLAN_SETCOOKIE(dev, NULL);
 	pkey = vtag | 0x8000;
 	parent = ifp->if_softc;
 	mutex_lock(&parent->vlan_mutex);
 	list_for_each_entry(priv, &parent->child_intfs, list) {
 		if (priv->pkey == pkey) {
 			ipoib_dev_cleanup(priv);
 			list_del(&priv->list);
 			break;
 		}
 	}
 	mutex_unlock(&parent->vlan_mutex);
 }
 
 eventhandler_tag ipoib_vlan_attach;
 eventhandler_tag ipoib_vlan_detach;
 
 static int __init
 ipoib_init_module(void)
 {
 	int ret;
 
 	ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
 	ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
 	ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
 
 	ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
 	ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
 	ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
 						     IPOIB_MIN_QUEUE_SIZE));
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
 	ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
 #endif
 
 	ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 		ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST);
 	ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 		ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST);
 
 	/*
 	 * We create our own workqueue mainly because we want to be
 	 * able to flush it when devices are being removed.  We can't
 	 * use schedule_work()/flush_scheduled_work() because both
 	 * unregister_netdev() and linkwatch_event take the rtnl lock,
 	 * so flush_scheduled_work() can deadlock during device
 	 * removal.
 	 */
 	ipoib_workqueue = create_singlethread_workqueue("ipoib");
 	if (!ipoib_workqueue) {
 		ret = -ENOMEM;
 		goto err_fs;
 	}
 
 	ib_sa_register_client(&ipoib_sa_client);
 
 	ret = ib_register_client(&ipoib_client);
 	if (ret)
 		goto err_sa;
 
 	return 0;
 
 err_sa:
 	ib_sa_unregister_client(&ipoib_sa_client);
 	destroy_workqueue(ipoib_workqueue);
 
 err_fs:
 	return ret;
 }
 
 static void __exit
 ipoib_cleanup_module(void)
 {
 
 	EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach);
 	EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach);
 	ib_unregister_client(&ipoib_client);
 	ib_sa_unregister_client(&ipoib_sa_client);
 	destroy_workqueue(ipoib_workqueue);
 }
 
 /*
  * Infiniband output routine.
  */
 static int
 ipoib_output(struct ifnet *ifp, struct mbuf *m,
 	const struct sockaddr *dst, struct route *ro)
 {
 	u_char edst[INFINIBAND_ALEN];
 #if defined(INET) || defined(INET6)
 	struct llentry *lle = NULL;
 #endif
-	struct rtentry *rt0 = NULL;
 	struct ipoib_header *eh;
 	int error = 0, is_gw = 0;
 	short type;
 
-	if (ro != NULL) {
-		rt0 = ro->ro_rt;
-		if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0)
-			is_gw = 1;
-	}
+	if (ro != NULL)
+		is_gw = (ro->ro_flags & RT_HAS_GW) != 0;
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
 	if (error)
 		goto bad;
 #endif
 
 	M_PROFILE(m);
 	if (ifp->if_flags & IFF_MONITOR) {
 		error = ENETDOWN;
 		goto bad;
 	}
 	if (!((ifp->if_flags & IFF_UP) &&
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
 		error = ENETDOWN;
 		goto bad;
 	}
 
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if (lle != NULL && (lle->la_flags & LLE_VALID))
 			memcpy(edst, lle->ll_addr, sizeof(edst));
 		else if (m->m_flags & M_MCAST)
 			ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst);
 		else
 			error = arpresolve(ifp, is_gw, m, dst, edst, NULL);
 		if (error)
 			return (error == EWOULDBLOCK ? 0 : error);
 		type = htons(ETHERTYPE_IP);
 		break;
 	case AF_ARP:
 	{
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_INFINIBAND);
 
 		switch(ntohs(ah->ar_op)) {
 		case ARPOP_REVREQUEST:
 		case ARPOP_REVREPLY:
 			type = htons(ETHERTYPE_REVARP);
 			break;
 		case ARPOP_REQUEST:
 		case ARPOP_REPLY:
 		default:
 			type = htons(ETHERTYPE_ARP);
 			break;
 		}
 
 		if (m->m_flags & M_BCAST)
 			bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN);
 		else
 			bcopy(ar_tha(ah), edst, INFINIBAND_ALEN);
 
 	}
 	break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (lle != NULL && (lle->la_flags & LLE_VALID))
 			memcpy(edst, lle->ll_addr, sizeof(edst));
 		else if (m->m_flags & M_MCAST)
 			ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst);
 		else
 			error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL);
 		if (error)
 			return error;
 		type = htons(ETHERTYPE_IPV6);
 		break;
 #endif
 
 	default:
 		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
 		error = EAFNOSUPPORT;
 		goto bad;
 	}
 
 	/*
 	 * Add local net header.  If no space in first mbuf,
 	 * allocate another.
 	 */
 	M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto bad;
 	}
 	eh = mtod(m, struct ipoib_header *);
 	(void)memcpy(&eh->proto, &type, sizeof(eh->proto));
 	(void)memcpy(&eh->hwaddr, edst, sizeof (edst));
 
 	/*
 	 * Queue message on interface, update output statistics if
 	 * successful, and start output if interface not yet active.
 	 */
 	return ((ifp->if_transmit)(ifp, m));
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Upper layer processing for a received Infiniband packet.
  */
 void
 ipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto)
 {
 	int isr;
 
 #ifdef MAC
 	/*
 	 * Tag the mbuf with an appropriate MAC label before any other
 	 * consumers can get to it.
 	 */
 	mac_ifnet_create_mbuf(ifp, m);
 #endif
 	/* Allow monitor mode to claim this frame, after stats are updated. */
 	if (ifp->if_flags & IFF_MONITOR) {
 		if_printf(ifp, "discard frame at IFF_MONITOR\n");
 		m_freem(m);
 		return;
 	}
 	/*
 	 * Dispatch frame to upper layer.
 	 */
 	switch (proto) {
 #ifdef INET
 	case ETHERTYPE_IP:
 		isr = NETISR_IP;
 		break;
 
 	case ETHERTYPE_ARP:
 		if (ifp->if_flags & IFF_NOARP) {
 			/* Discard packet if ARP is disabled on interface */
 			m_freem(m);
 			return;
 		}
 		isr = NETISR_ARP;
 		break;
 #endif
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		isr = NETISR_IPV6;
 		break;
 #endif
 	default:
 		goto discard;
 	}
 	netisr_dispatch(isr, m);
 	return;
 
 discard:
 	m_freem(m);
 }
 
 /*
  * Process a received Infiniband packet.
  */
 static void
 ipoib_input(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ipoib_header *eh;
 
 	if ((ifp->if_flags & IFF_UP) == 0) {
 		m_freem(m);
 		return;
 	}
 	CURVNET_SET_QUIET(ifp->if_vnet);
 
 	/* Let BPF have it before we strip the header. */
 	IPOIB_MTAP(ifp, m);
 	eh = mtod(m, struct ipoib_header *);
 	/*
 	 * Reset layer specific mbuf flags to avoid confusing upper layers.
 	 * Strip off Infiniband header.
 	 */
 	m->m_flags &= ~M_VLANTAG;
 	m_clrprotoflags(m);
 	m_adj(m, IPOIB_HEADER_LEN);
 
 	if (IPOIB_IS_MULTICAST(eh->hwaddr)) {
 		if (memcmp(eh->hwaddr, ifp->if_broadcastaddr,
 		    ifp->if_addrlen) == 0)
 			m->m_flags |= M_BCAST;
 		else
 			m->m_flags |= M_MCAST;
 		if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
 	}
 
 	ipoib_demux(ifp, m, ntohs(eh->proto));
 	CURVNET_RESTORE();
 }
 
 static int
 ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
 	struct sockaddr *sa)
 {
 	struct sockaddr_dl *sdl;
 #ifdef INET
 	struct sockaddr_in *sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6;
 #endif
 	u_char *e_addr;
 
 	switch(sa->sa_family) {
 	case AF_LINK:
 		/*
 		 * No mapping needed. Just check that it's a valid MC address.
 		 */
 		sdl = (struct sockaddr_dl *)sa;
 		e_addr = LLADDR(sdl);
 		if (!IPOIB_IS_MULTICAST(e_addr))
 			return EADDRNOTAVAIL;
 		*llsa = 0;
 		return 0;
 
 #ifdef INET
 	case AF_INET:
 		sin = (struct sockaddr_in *)sa;
 		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
 		sdl->sdl_alen = INFINIBAND_ALEN;
 		e_addr = LLADDR(sdl);
 		ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr,
 		    e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sin6 = (struct sockaddr_in6 *)sa;
 		/*
 		 * An IP6 address of 0 means listen to all
 		 * of the multicast address used for IP6.  
 		 * This has no meaning in ipoib.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
 			return EADDRNOTAVAIL;
 		sdl = link_init_sdl(ifp, *llsa, IFT_INFINIBAND);
 		sdl->sdl_alen = INFINIBAND_ALEN;
 		e_addr = LLADDR(sdl);
 		ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
 		*llsa = (struct sockaddr *)sdl;
 		return 0;
 #endif
 
 	default:
 		return EAFNOSUPPORT;
 	}
 }
 
 module_init(ipoib_init_module);
 module_exit(ipoib_cleanup_module);
 
 static int
 ipoib_evhand(module_t mod, int event, void *arg)
 {
 	                return (0);
 }
 
 static moduledata_t ipoib_mod = {
 	                .name = "ipoib",
 			                .evhand = ipoib_evhand,
 };
 
 DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_SMP, SI_ORDER_ANY);
 MODULE_DEPEND(ipoib, ibcore, 1, 1, 1);
 MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1);
Index: projects/clang380-import/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c
===================================================================
--- projects/clang380-import/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c	(revision 293686)
+++ projects/clang380-import/sys/ofed/drivers/infiniband/ulp/sdp/sdp_rx.c	(revision 293687)
@@ -1,782 +1,782 @@
 /*
  * Copyright (c) 2009 Mellanox Technologies Ltd.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * General Public License (GPL) Version 2, available from the file
  * COPYING in the main directory of this source tree, or the
  * OpenIB.org BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
  *     conditions are met:
  *
  *      - Redistributions of source code must retain the above
  *        copyright notice, this list of conditions and the following
  *        disclaimer.
  *
  *      - Redistributions in binary form must reproduce the above
  *        copyright notice, this list of conditions and the following
  *        disclaimer in the documentation and/or other materials
  *        provided with the distribution.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include "sdp.h"
 
 SDP_MODPARAM_INT(rcvbuf_initial_size, 32 * 1024,
 		"Receive buffer initial size in bytes.");
 SDP_MODPARAM_SINT(rcvbuf_scale, 0x8,
 		"Receive buffer size scale factor.");
 
 /* Like tcp_fin - called when SDP_MID_DISCONNECT is received */
 static void
 sdp_handle_disconn(struct sdp_sock *ssk)
 {
 
 	sdp_dbg(ssk->socket, "%s\n", __func__);
 
 	SDP_WLOCK_ASSERT(ssk);
 	if (TCPS_HAVERCVDFIN(ssk->state) == 0)
 		socantrcvmore(ssk->socket);
 
 	switch (ssk->state) {
 	case TCPS_SYN_RECEIVED:
 	case TCPS_ESTABLISHED:
 		ssk->state = TCPS_CLOSE_WAIT;
 		break;
 
 	case TCPS_FIN_WAIT_1:
 		/* Received a reply FIN - start Infiniband tear down */
 		sdp_dbg(ssk->socket,
 		    "%s: Starting Infiniband tear down sending DREQ\n",
 		    __func__);
 
 		sdp_cancel_dreq_wait_timeout(ssk);
 		ssk->qp_active = 0;
 		if (ssk->id) {
 			struct rdma_cm_id *id;
 
 			id = ssk->id;
 			SDP_WUNLOCK(ssk);
 			rdma_disconnect(id);
 			SDP_WLOCK(ssk);
 		} else {
 			sdp_warn(ssk->socket,
 			    "%s: ssk->id is NULL\n", __func__);
 			return;
 		}
 		break;
 	case TCPS_TIME_WAIT:
 		/* This is a mutual close situation and we've got the DREQ from
 		   the peer before the SDP_MID_DISCONNECT */
 		break;
 	case TCPS_CLOSED:
 		/* FIN arrived after IB teardown started - do nothing */
 		sdp_dbg(ssk->socket, "%s: fin in state %s\n",
 		    __func__, sdp_state_str(ssk->state));
 		return;
 	default:
 		sdp_warn(ssk->socket,
 		    "%s: FIN in unexpected state. state=%d\n",
 		    __func__, ssk->state);
 		break;
 	}
 }
 
 static int
 sdp_post_recv(struct sdp_sock *ssk)
 {
 	struct sdp_buf *rx_req;
 	int i, rc;
 	u64 addr;
 	struct ib_device *dev;
 	struct ib_recv_wr rx_wr = { NULL };
 	struct ib_sge ibsge[SDP_MAX_RECV_SGES];
 	struct ib_sge *sge = ibsge;
 	struct ib_recv_wr *bad_wr;
 	struct mbuf *mb, *m;
 	struct sdp_bsdh *h;
 	int id = ring_head(ssk->rx_ring);
 
 	/* Now, allocate and repost recv */
 	sdp_prf(ssk->socket, mb, "Posting mb");
 	mb = m_getm2(NULL, ssk->recv_bytes, M_NOWAIT, MT_DATA, M_PKTHDR);
 	if (mb == NULL) {
 		/* Retry so we can't stall out with no memory. */
 		if (!rx_ring_posted(ssk))
 			queue_work(rx_comp_wq, &ssk->rx_comp_work);
 		return -1;
 	}
 	for (m = mb; m != NULL; m = m->m_next) {
 		m->m_len = M_SIZE(m);
 		mb->m_pkthdr.len += m->m_len;
 	}
 	h = mtod(mb, struct sdp_bsdh *);
 	rx_req = ssk->rx_ring.buffer + (id & (SDP_RX_SIZE - 1));
 	rx_req->mb = mb;
 	dev = ssk->ib_device;
         for (i = 0;  mb != NULL; i++, mb = mb->m_next, sge++) {
 		addr = ib_dma_map_single(dev, mb->m_data, mb->m_len,
 		    DMA_TO_DEVICE);
 		/* TODO: proper error handling */
 		BUG_ON(ib_dma_mapping_error(dev, addr));
 		BUG_ON(i >= SDP_MAX_RECV_SGES);
 		rx_req->mapping[i] = addr;
 		sge->addr = addr;
 		sge->length = mb->m_len;
 		sge->lkey = ssk->sdp_dev->mr->lkey;
         }
 
 	rx_wr.next = NULL;
 	rx_wr.wr_id = id | SDP_OP_RECV;
 	rx_wr.sg_list = ibsge;
 	rx_wr.num_sge = i;
 	rc = ib_post_recv(ssk->qp, &rx_wr, &bad_wr);
 	if (unlikely(rc)) {
 		sdp_warn(ssk->socket, "ib_post_recv failed. status %d\n", rc);
 
 		sdp_cleanup_sdp_buf(ssk, rx_req, DMA_FROM_DEVICE);
 		m_freem(mb);
 
 		sdp_notify(ssk, ECONNRESET);
 
 		return -1;
 	}
 
 	atomic_inc(&ssk->rx_ring.head);
 	SDPSTATS_COUNTER_INC(post_recv);
 
 	return 0;
 }
 
 static inline int
 sdp_post_recvs_needed(struct sdp_sock *ssk)
 {
 	unsigned long bytes_in_process;
 	unsigned long max_bytes;
 	int buffer_size;
 	int posted;
 
 	if (!ssk->qp_active || !ssk->socket)
 		return 0;
 
 	posted = rx_ring_posted(ssk);
 	if (posted >= SDP_RX_SIZE)
 		return 0;
 	if (posted < SDP_MIN_TX_CREDITS)
 		return 1;
 
 	buffer_size = ssk->recv_bytes;
 	max_bytes = max(ssk->socket->so_snd.sb_hiwat,
 	    (1 + SDP_MIN_TX_CREDITS) * buffer_size);
 	max_bytes *= rcvbuf_scale;
 	/*
 	 * Compute bytes in the receive queue and socket buffer.
 	 */
 	bytes_in_process = (posted - SDP_MIN_TX_CREDITS) * buffer_size;
 	bytes_in_process += sbused(&ssk->socket->so_rcv);
 
 	return bytes_in_process < max_bytes;
 }
 
 static inline void
 sdp_post_recvs(struct sdp_sock *ssk)
 {
 
 	while (sdp_post_recvs_needed(ssk))
 		if (sdp_post_recv(ssk))
 			return;
 }
 
 static inline struct mbuf *
 sdp_sock_queue_rcv_mb(struct socket *sk, struct mbuf *mb)
 {
 	struct sdp_sock *ssk = sdp_sk(sk);
 	struct sdp_bsdh *h;
 
 	h = mtod(mb, struct sdp_bsdh *);
 
 #ifdef SDP_ZCOPY
 	SDP_SKB_CB(mb)->seq = rcv_nxt(ssk);
 	if (h->mid == SDP_MID_SRCAVAIL) {
 		struct sdp_srcah *srcah = (struct sdp_srcah *)(h+1);
 		struct rx_srcavail_state *rx_sa;
 		
 		ssk->srcavail_cancel_mseq = 0;
 
 		ssk->rx_sa = rx_sa = RX_SRCAVAIL_STATE(mb) = kzalloc(
 				sizeof(struct rx_srcavail_state), M_NOWAIT);
 
 		rx_sa->mseq = ntohl(h->mseq);
 		rx_sa->used = 0;
 		rx_sa->len = mb_len = ntohl(srcah->len);
 		rx_sa->rkey = ntohl(srcah->rkey);
 		rx_sa->vaddr = be64_to_cpu(srcah->vaddr);
 		rx_sa->flags = 0;
 
 		if (ssk->tx_sa) {
 			sdp_dbg_data(ssk->socket, "got RX SrcAvail while waiting "
 					"for TX SrcAvail. waking up TX SrcAvail"
 					"to be aborted\n");
 			wake_up(sk->sk_sleep);
 		}
 
 		atomic_add(mb->len, &ssk->rcv_nxt);
 		sdp_dbg_data(sk, "queueing SrcAvail. mb_len = %d vaddr = %lld\n",
 			mb_len, rx_sa->vaddr);
 	} else
 #endif
 	{
 		atomic_add(mb->m_pkthdr.len, &ssk->rcv_nxt);
 	}
 
 	m_adj(mb, SDP_HEAD_SIZE);
 	SOCKBUF_LOCK(&sk->so_rcv);
 	if (unlikely(h->flags & SDP_OOB_PRES))
 		sdp_urg(ssk, mb);
-	sbappend_locked(&sk->so_rcv, mb);
+	sbappend_locked(&sk->so_rcv, mb, 0);
 	sorwakeup_locked(sk);
 	return mb;
 }
 
 static int
 sdp_get_recv_bytes(struct sdp_sock *ssk, u32 new_size)
 {
 
 	return MIN(new_size, SDP_MAX_PACKET);
 }
 
 int
 sdp_init_buffers(struct sdp_sock *ssk, u32 new_size)
 {
 
 	ssk->recv_bytes = sdp_get_recv_bytes(ssk, new_size);
 	sdp_post_recvs(ssk);
 
 	return 0;
 }
 
 int
 sdp_resize_buffers(struct sdp_sock *ssk, u32 new_size)
 {
 	u32 curr_size = ssk->recv_bytes;
 	u32 max_size = SDP_MAX_PACKET;
 
 	if (new_size > curr_size && new_size <= max_size) {
 		ssk->recv_bytes = sdp_get_recv_bytes(ssk, new_size);
 		return 0;
 	}
 	return -1;
 }
 
 static void
 sdp_handle_resize_request(struct sdp_sock *ssk, struct sdp_chrecvbuf *buf)
 {
 	if (sdp_resize_buffers(ssk, ntohl(buf->size)) == 0)
 		ssk->recv_request_head = ring_head(ssk->rx_ring) + 1;
 	else
 		ssk->recv_request_head = ring_tail(ssk->rx_ring);
 	ssk->recv_request = 1;
 }
 
 static void
 sdp_handle_resize_ack(struct sdp_sock *ssk, struct sdp_chrecvbuf *buf)
 {
 	u32 new_size = ntohl(buf->size);
 
 	if (new_size > ssk->xmit_size_goal)
 		ssk->xmit_size_goal = new_size;
 }
 
 static struct mbuf *
 sdp_recv_completion(struct sdp_sock *ssk, int id)
 {
 	struct sdp_buf *rx_req;
 	struct ib_device *dev;
 	struct mbuf *mb;
 
 	if (unlikely(id != ring_tail(ssk->rx_ring))) {
 		printk(KERN_WARNING "Bogus recv completion id %d tail %d\n",
 			id, ring_tail(ssk->rx_ring));
 		return NULL;
 	}
 
 	dev = ssk->ib_device;
 	rx_req = &ssk->rx_ring.buffer[id & (SDP_RX_SIZE - 1)];
 	mb = rx_req->mb;
 	sdp_cleanup_sdp_buf(ssk, rx_req, DMA_FROM_DEVICE);
 
 	atomic_inc(&ssk->rx_ring.tail);
 	atomic_dec(&ssk->remote_credits);
 	return mb;
 }
 
 /* socket lock should be taken before calling this */
 static int
 sdp_process_rx_ctl_mb(struct sdp_sock *ssk, struct mbuf *mb)
 {
 	struct sdp_bsdh *h;
 	struct socket *sk;
 
 	SDP_WLOCK_ASSERT(ssk);
 	sk = ssk->socket;
  	h = mtod(mb, struct sdp_bsdh *);
 	switch (h->mid) {
 	case SDP_MID_DATA:
 	case SDP_MID_SRCAVAIL:
 		sdp_dbg(sk, "DATA after socket rcv was shutdown\n");
 
 		/* got data in RCV_SHUTDOWN */
 		if (ssk->state == TCPS_FIN_WAIT_1) {
 			sdp_dbg(sk, "RX data when state = FIN_WAIT1\n");
 			sdp_notify(ssk, ECONNRESET);
 		}
 		m_freem(mb);
 
 		break;
 #ifdef SDP_ZCOPY
 	case SDP_MID_RDMARDCOMPL:
 		m_freem(mb);
 		break;
 	case SDP_MID_SENDSM:
 		sdp_handle_sendsm(ssk, ntohl(h->mseq_ack));
 		m_freem(mb);
 		break;
 	case SDP_MID_SRCAVAIL_CANCEL:
 		sdp_dbg_data(sk, "Handling SrcAvailCancel\n");
 		sdp_prf(sk, NULL, "Handling SrcAvailCancel");
 		if (ssk->rx_sa) {
 			ssk->srcavail_cancel_mseq = ntohl(h->mseq);
 			ssk->rx_sa->flags |= RX_SA_ABORTED;
 			ssk->rx_sa = NULL; /* TODO: change it into SDP_MID_DATA and get 
 			                      the dirty logic from recvmsg */
 		} else {
 			sdp_dbg(sk, "Got SrcAvailCancel - "
 					"but no SrcAvail in process\n");
 		}
 		m_freem(mb);
 		break;
 	case SDP_MID_SINKAVAIL:
 		sdp_dbg_data(sk, "Got SinkAvail - not supported: ignored\n");
 		sdp_prf(sk, NULL, "Got SinkAvail - not supported: ignored");
 		/* FALLTHROUGH */
 #endif
 	case SDP_MID_ABORT:
 		sdp_dbg_data(sk, "Handling ABORT\n");
 		sdp_prf(sk, NULL, "Handling ABORT");
 		sdp_notify(ssk, ECONNRESET);
 		m_freem(mb);
 		break;
 	case SDP_MID_DISCONN:
 		sdp_dbg_data(sk, "Handling DISCONN\n");
 		sdp_prf(sk, NULL, "Handling DISCONN");
 		sdp_handle_disconn(ssk);
 		break;
 	case SDP_MID_CHRCVBUF:
 		sdp_dbg_data(sk, "Handling RX CHRCVBUF\n");
 		sdp_handle_resize_request(ssk, (struct sdp_chrecvbuf *)(h+1));
 		m_freem(mb);
 		break;
 	case SDP_MID_CHRCVBUF_ACK:
 		sdp_dbg_data(sk, "Handling RX CHRCVBUF_ACK\n");
 		sdp_handle_resize_ack(ssk, (struct sdp_chrecvbuf *)(h+1));
 		m_freem(mb);
 		break;
 	default:
 		/* TODO: Handle other messages */
 		sdp_warn(sk, "SDP: FIXME MID %d\n", h->mid);
 		m_freem(mb);
 	}
 
 	return 0;
 }
 
 static int
 sdp_process_rx_mb(struct sdp_sock *ssk, struct mbuf *mb)
 {
 	struct socket *sk;
 	struct sdp_bsdh *h;
 	unsigned long mseq_ack;
 	int credits_before;
 
 	h = mtod(mb, struct sdp_bsdh *);
 	sk = ssk->socket;
 	/*
 	 * If another thread is in so_pcbfree this may be partially torn
 	 * down but no further synchronization is required as the destroying
 	 * thread will wait for receive to shutdown before discarding the
 	 * socket.
 	 */
 	if (sk == NULL) {
 		m_freem(mb);
 		return 0;
 	}
 
 	SDPSTATS_HIST_LINEAR(credits_before_update, tx_credits(ssk));
 
 	mseq_ack = ntohl(h->mseq_ack);
 	credits_before = tx_credits(ssk);
 	atomic_set(&ssk->tx_ring.credits, mseq_ack - ring_head(ssk->tx_ring) +
 			1 + ntohs(h->bufs));
 	if (mseq_ack >= ssk->nagle_last_unacked)
 		ssk->nagle_last_unacked = 0;
 
 	sdp_prf1(ssk->socket, mb, "RX %s +%d c:%d->%d mseq:%d ack:%d\n",
 		mid2str(h->mid), ntohs(h->bufs), credits_before,
 		tx_credits(ssk), ntohl(h->mseq), ntohl(h->mseq_ack));
 
 	if (unlikely(h->mid == SDP_MID_DATA &&
 	    mb->m_pkthdr.len == SDP_HEAD_SIZE)) {
 		/* Credit update is valid even after RCV_SHUTDOWN */
 		m_freem(mb);
 		return 0;
 	}
 
 	if ((h->mid != SDP_MID_DATA && h->mid != SDP_MID_SRCAVAIL) ||
 	    TCPS_HAVERCVDFIN(ssk->state)) {
 		sdp_prf(sk, NULL, "Control mb - queing to control queue");
 #ifdef SDP_ZCOPY
 		if (h->mid == SDP_MID_SRCAVAIL_CANCEL) {
 			sdp_dbg_data(sk, "Got SrcAvailCancel. "
 					"seq: 0x%d seq_ack: 0x%d\n",
 					ntohl(h->mseq), ntohl(h->mseq_ack));
 			ssk->srcavail_cancel_mseq = ntohl(h->mseq);
 		}
 
 
 		if (h->mid == SDP_MID_RDMARDCOMPL) {
 			struct sdp_rrch *rrch = (struct sdp_rrch *)(h+1);
 			sdp_dbg_data(sk, "RdmaRdCompl message arrived\n");
 			sdp_handle_rdma_read_compl(ssk, ntohl(h->mseq_ack),
 					ntohl(rrch->len));
 		}
 #endif
 		mb->m_nextpkt = NULL;
 		if (ssk->rx_ctl_tail)
 			ssk->rx_ctl_tail->m_nextpkt = mb;
 		else
 			ssk->rx_ctl_q = mb;
 		ssk->rx_ctl_tail = mb;
 
 		return 0;
 	}
 
 	sdp_prf1(sk, NULL, "queueing %s mb\n", mid2str(h->mid));
 	mb = sdp_sock_queue_rcv_mb(sk, mb);
 
 
 	return 0;
 }
 
 /* called only from irq */
 static struct mbuf *
 sdp_process_rx_wc(struct sdp_sock *ssk, struct ib_wc *wc)
 {
 	struct mbuf *mb;
 	struct sdp_bsdh *h;
 	struct socket *sk = ssk->socket;
 	int mseq;
 
 	mb = sdp_recv_completion(ssk, wc->wr_id);
 	if (unlikely(!mb))
 		return NULL;
 
 	if (unlikely(wc->status)) {
 		if (ssk->qp_active && sk) {
 			sdp_dbg(sk, "Recv completion with error. "
 					"Status %d, vendor: %d\n",
 				wc->status, wc->vendor_err);
 			sdp_abort(sk);
 			ssk->qp_active = 0;
 		}
 		m_freem(mb);
 		return NULL;
 	}
 
 	sdp_dbg_data(sk, "Recv completion. ID %d Length %d\n",
 			(int)wc->wr_id, wc->byte_len);
 	if (unlikely(wc->byte_len < sizeof(struct sdp_bsdh))) {
 		sdp_warn(sk, "SDP BUG! byte_len %d < %zd\n",
 				wc->byte_len, sizeof(struct sdp_bsdh));
 		m_freem(mb);
 		return NULL;
 	}
 	/* Use m_adj to trim the tail of data we didn't use. */
 	m_adj(mb, -(mb->m_pkthdr.len - wc->byte_len));
 	h = mtod(mb, struct sdp_bsdh *);
 
 	SDP_DUMP_PACKET(ssk->socket, "RX", mb, h);
 
 	ssk->rx_packets++;
 	ssk->rx_bytes += mb->m_pkthdr.len;
 
 	mseq = ntohl(h->mseq);
 	atomic_set(&ssk->mseq_ack, mseq);
 	if (mseq != (int)wc->wr_id)
 		sdp_warn(sk, "SDP BUG! mseq %d != wrid %d\n",
 				mseq, (int)wc->wr_id);
 
 	return mb;
 }
 
 /* Wakeup writers if we now have credits. */
 static void
 sdp_bzcopy_write_space(struct sdp_sock *ssk)
 {
 	struct socket *sk = ssk->socket;
 
 	if (tx_credits(ssk) >= ssk->min_bufs && sk)
 		sowwakeup(sk);
 }
 
 /* only from interrupt. */
 static int
 sdp_poll_rx_cq(struct sdp_sock *ssk)
 {
 	struct ib_cq *cq = ssk->rx_ring.cq;
 	struct ib_wc ibwc[SDP_NUM_WC];
 	int n, i;
 	int wc_processed = 0;
 	struct mbuf *mb;
 
 	do {
 		n = ib_poll_cq(cq, SDP_NUM_WC, ibwc);
 		for (i = 0; i < n; ++i) {
 			struct ib_wc *wc = &ibwc[i];
 
 			BUG_ON(!(wc->wr_id & SDP_OP_RECV));
 			mb = sdp_process_rx_wc(ssk, wc);
 			if (!mb)
 				continue;
 
 			sdp_process_rx_mb(ssk, mb);
 			wc_processed++;
 		}
 	} while (n == SDP_NUM_WC);
 
 	if (wc_processed)
 		sdp_bzcopy_write_space(ssk);
 
 	return wc_processed;
 }
 
 static void
 sdp_rx_comp_work(struct work_struct *work)
 {
 	struct sdp_sock *ssk = container_of(work, struct sdp_sock,
 			rx_comp_work);
 
 	sdp_prf(ssk->socket, NULL, "%s", __func__);
 
 	SDP_WLOCK(ssk);
 	if (unlikely(!ssk->qp)) {
 		sdp_prf(ssk->socket, NULL, "qp was destroyed");
 		goto out;
 	}
 	if (unlikely(!ssk->rx_ring.cq)) {
 		sdp_prf(ssk->socket, NULL, "rx_ring.cq is NULL");
 		goto out;
 	}
 
 	if (unlikely(!ssk->poll_cq)) {
 		struct rdma_cm_id *id = ssk->id;
 		if (id && id->qp)
 			rdma_notify(id, IB_EVENT_COMM_EST);
 		goto out;
 	}
 
 	sdp_do_posts(ssk);
 out:
 	SDP_WUNLOCK(ssk);
 }
 
 void
 sdp_do_posts(struct sdp_sock *ssk)
 {
 	struct socket *sk = ssk->socket;
 	int xmit_poll_force;
 	struct mbuf *mb;
 
 	SDP_WLOCK_ASSERT(ssk);
 	if (!ssk->qp_active) {
 		sdp_dbg(sk, "QP is deactivated\n");
 		return;
 	}
 
 	while ((mb = ssk->rx_ctl_q)) {
 		ssk->rx_ctl_q = mb->m_nextpkt;
 		mb->m_nextpkt = NULL;
 		sdp_process_rx_ctl_mb(ssk, mb);
 	}
 
 	if (ssk->state == TCPS_TIME_WAIT)
 		return;
 
 	if (!ssk->rx_ring.cq || !ssk->tx_ring.cq)
 		return;
 
 	sdp_post_recvs(ssk);
 
 	if (tx_ring_posted(ssk))
 		sdp_xmit_poll(ssk, 1);
 
 	sdp_post_sends(ssk, M_NOWAIT);
 
 	xmit_poll_force = tx_credits(ssk) < SDP_MIN_TX_CREDITS;
 
 	if (credit_update_needed(ssk) || xmit_poll_force) {
 		/* if has pending tx because run out of tx_credits - xmit it */
 		sdp_prf(sk, NULL, "Processing to free pending sends");
 		sdp_xmit_poll(ssk,  xmit_poll_force);
 		sdp_prf(sk, NULL, "Sending credit update");
 		sdp_post_sends(ssk, M_NOWAIT);
 	}
 
 }
 
 int
 sdp_process_rx(struct sdp_sock *ssk)
 {
 	int wc_processed = 0;
 	int credits_before;
 
 	if (!rx_ring_trylock(&ssk->rx_ring)) {
 		sdp_dbg(ssk->socket, "ring destroyed. not polling it\n");
 		return 0;
 	}
 
 	credits_before = tx_credits(ssk);
 
 	wc_processed = sdp_poll_rx_cq(ssk);
 	sdp_prf(ssk->socket, NULL, "processed %d", wc_processed);
 
 	if (wc_processed) {
 		sdp_prf(ssk->socket, NULL, "credits:  %d -> %d",
 				credits_before, tx_credits(ssk));
 		queue_work(rx_comp_wq, &ssk->rx_comp_work);
 	}
 	sdp_arm_rx_cq(ssk);
 
 	rx_ring_unlock(&ssk->rx_ring);
 
 	return (wc_processed);
 }
 
 static void
 sdp_rx_irq(struct ib_cq *cq, void *cq_context)
 {
 	struct socket *sk = cq_context;
 	struct sdp_sock *ssk = sdp_sk(sk);
 
 	if (cq != ssk->rx_ring.cq) {
 		sdp_dbg(sk, "cq = %p, ssk->cq = %p\n", cq, ssk->rx_ring.cq);
 		return;
 	}
 
 	SDPSTATS_COUNTER_INC(rx_int_count);
 
 	sdp_prf(sk, NULL, "rx irq");
 
 	sdp_process_rx(ssk);
 }
 
 static
 void sdp_rx_ring_purge(struct sdp_sock *ssk)
 {
 	while (rx_ring_posted(ssk) > 0) {
 		struct mbuf *mb;
 		mb = sdp_recv_completion(ssk, ring_tail(ssk->rx_ring));
 		if (!mb)
 			break;
 		m_freem(mb);
 	}
 }
 
 void
 sdp_rx_ring_init(struct sdp_sock *ssk)
 {
 	ssk->rx_ring.buffer = NULL;
 	ssk->rx_ring.destroyed = 0;
 	rw_init(&ssk->rx_ring.destroyed_lock, "sdp rx lock");
 }
 
 static void
 sdp_rx_cq_event_handler(struct ib_event *event, void *data)
 {
 }
 
 int
 sdp_rx_ring_create(struct sdp_sock *ssk, struct ib_device *device)
 {
 	struct ib_cq *rx_cq;
 	int rc = 0;
 
 
 	sdp_dbg(ssk->socket, "rx ring created");
 	INIT_WORK(&ssk->rx_comp_work, sdp_rx_comp_work);
 	atomic_set(&ssk->rx_ring.head, 1);
 	atomic_set(&ssk->rx_ring.tail, 1);
 
 	ssk->rx_ring.buffer = kmalloc(
 			sizeof *ssk->rx_ring.buffer * SDP_RX_SIZE, GFP_KERNEL);
 	if (!ssk->rx_ring.buffer) {
 		sdp_warn(ssk->socket,
 			"Unable to allocate RX Ring size %zd.\n",
 			 sizeof(*ssk->rx_ring.buffer) * SDP_RX_SIZE);
 
 		return -ENOMEM;
 	}
 
 	rx_cq = ib_create_cq(device, sdp_rx_irq, sdp_rx_cq_event_handler,
 			  ssk->socket, SDP_RX_SIZE, 0);
 
 	if (IS_ERR(rx_cq)) {
 		rc = PTR_ERR(rx_cq);
 		sdp_warn(ssk->socket, "Unable to allocate RX CQ: %d.\n", rc);
 		goto err_cq;
 	}
 
 	sdp_sk(ssk->socket)->rx_ring.cq = rx_cq;
 	sdp_arm_rx_cq(ssk);
 
 	return 0;
 
 err_cq:
 	kfree(ssk->rx_ring.buffer);
 	ssk->rx_ring.buffer = NULL;
 	return rc;
 }
 
 void
 sdp_rx_ring_destroy(struct sdp_sock *ssk)
 {
 
 	cancel_work_sync(&ssk->rx_comp_work);
 	rx_ring_destroy_lock(&ssk->rx_ring);
 
 	if (ssk->rx_ring.buffer) {
 		sdp_rx_ring_purge(ssk);
 
 		kfree(ssk->rx_ring.buffer);
 		ssk->rx_ring.buffer = NULL;
 	}
 
 	if (ssk->rx_ring.cq) {
 		if (ib_destroy_cq(ssk->rx_ring.cq)) {
 			sdp_warn(ssk->socket, "destroy cq(%p) failed\n",
 				ssk->rx_ring.cq);
 		} else {
 			ssk->rx_ring.cq = NULL;
 		}
 	}
 
 	WARN_ON(ring_head(ssk->rx_ring) != ring_tail(ssk->rx_ring));
 }
Index: projects/clang380-import/sys/powerpc/aim/locore32.S
===================================================================
--- projects/clang380-import/sys/powerpc/aim/locore32.S	(revision 293686)
+++ projects/clang380-import/sys/powerpc/aim/locore32.S	(revision 293687)
@@ -1,176 +1,115 @@
 /* $FreeBSD$ */
-/* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */
 
 /*-
- * Copyright (C) 2001 Benno Rice
+ * Copyright (C) 2010-2016 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
- * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/*-
- * Copyright (C) 1995, 1996 Wolfgang Solfrank.
- * Copyright (C) 1995, 1996 TooLs GmbH.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by TooLs GmbH.
- * 4. The name of TooLs GmbH may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
 
 #include "assym.s"
 
 #include <sys/syscall.h>
 
 #include <machine/trap.h>
 #include <machine/param.h>
 #include <machine/spr.h>
 #include <machine/asm.h>
 #include "opt_platform.h"
 
 /* Locate the per-CPU data structure */
 #define GET_CPUINFO(r)  \
         mfsprg0  r
 
 /*
  * Compiled KERNBASE location and the kernel load address
  */
         .globl  kernbase
         .set    kernbase, KERNBASE
 
 /*
  * Globals
  */
 	.data
 	.align 3
 GLOBAL(__startkernel)
 	.long	begin
 GLOBAL(__endkernel)
 	.long	end
 	.align	4
 #define	TMPSTKSZ	8192		/* 8K temporary stack */
 GLOBAL(tmpstk)
 	.space	TMPSTKSZ
 
 	.text
 	.globl	btext
 btext:
 
 /*
- * This symbol is here for the benefit of kvm_mkdb, and is supposed to
- * mark the start of kernel text.
+ * Main kernel entry point.
  */
-	.globl	kernel_text
-kernel_text:
-
-/*
- * Startup entry.  Note, this must be the first thing in the text
- * segment!
- */
 	.text
 	.globl	__start
 __start:
 	/* Figure out where we are */
 	bl	1f
 	.long	_DYNAMIC-.
 	.long	_GLOBAL_OFFSET_TABLE_-.
 	.long	tmpstk-.
 1:	mflr	%r30
 
 	/* Set up temporary stack pointer */
 	lwz	%r1,8(%r30)
 	add	%r1,%r1,%r30
 	addi	%r1,%r1,(8+TMPSTKSZ-32)
 
 	/* Relocate self */
 	stw	%r3,16(%r1)
 	stw	%r4,20(%r1)
 	stw	%r5,24(%r1)
 	stw	%r6,28(%r1)
 
 	lwz	%r3,0(%r30) /* _DYNAMIC in %r3 */
 	add	%r3,%r3,%r30
 	lwz	%r4,4(%r30) /* GOT pointer */
 	add	%r4,%r4,%r30
 	lwz	%r4,4(%r4)  /* got[0] is _DYNAMIC link addr */
 	subf	%r4,%r4,%r3 /* subtract to calculate relocbase */
 	bl	elf_reloc_self
 	
 	lwz	%r3,16(%r1)
 	lwz	%r4,20(%r1)
 	lwz	%r5,24(%r1)
 	lwz	%r6,28(%r1)
 
 	/* MD setup */
 	bl	powerpc_init
 
 	/* Set stack pointer to new value and branch to mi_startup */
 	mr	%r1, %r3
 	li	%r3, 0
 	stw	%r3, 0(%r1)
 	bl	mi_startup
 
-	/* If mi_startup somehow returns, exit. This would be bad. */
-	b	OF_exit
-
-/*
- * int setfault()
- *
- * Similar to setjmp to setup for handling faults on accesses to user memory.
- * Any routine using this may only call bcopy, either the form below,
- * or the (currently used) C code optimized, so it doesn't use any non-volatile
- * registers.
- */
-	.globl	setfault
-setfault:
-	mflr	0
-	mfcr	12
-	mfsprg	4,0
-	lwz	4,TD_PCB(2)	/* curthread = r2 */
-	stw	3,PCB_ONFAULT(4)
-	stw	0,0(3)
-	stw	1,4(3)
-	stw	2,8(3)
-	stmw	12,12(3)
-	xor	3,3,3
-	blr
+	/* mi_startup() does not return */
+	b	.
 
 #include <powerpc/aim/trap_subr32.S>
Index: projects/clang380-import/sys/powerpc/aim/locore64.S
===================================================================
--- projects/clang380-import/sys/powerpc/aim/locore64.S	(revision 293686)
+++ projects/clang380-import/sys/powerpc/aim/locore64.S	(revision 293687)
@@ -1,218 +1,131 @@
 /* $FreeBSD$ */
-/* $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ */
 
 /*-
- * Copyright (C) 2001 Benno Rice
+ * Copyright (C) 2010-2016 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
- * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/*-
- * Copyright (C) 1995, 1996 Wolfgang Solfrank.
- * Copyright (C) 1995, 1996 TooLs GmbH.
- * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by TooLs GmbH.
- * 4. The name of TooLs GmbH may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * $FreeBSD$
  */
 
 #include "assym.s"
 
 #include <sys/syscall.h>
 
 #include <machine/trap.h>
 #include <machine/param.h>
 #include <machine/spr.h>
 #include <machine/asm.h>
 
 #ifdef _CALL_ELF
 .abiversion _CALL_ELF
 #endif
 
-/* Locate the per-CPU data structure */
-#define GET_CPUINFO(r)  \
-        mfsprg0  r
-#define GET_TOCBASE(r)  \
-	li	r,TRAP_TOCBASE;	/* Magic address for TOC */ \
-	ld	r,0(r)
-
 /* Glue for linker script */
 .globl  kernbase
 .set    kernbase, KERNBASE
 
 /*
  * Globals
  */
 	.data
 	.align 3
 GLOBAL(__startkernel)
 	.llong	begin
 GLOBAL(__endkernel)
 	.llong	end
 
 	.align	4
 #define	TMPSTKSZ	16384		/* 16K temporary stack */
 GLOBAL(tmpstk)
 	.space	TMPSTKSZ
 
 TOC_ENTRY(tmpstk)
 
 	.text
 	.globl	btext
 btext:
 
 /*
- * This symbol is here for the benefit of kvm_mkdb, and is supposed to
- * mark the start of kernel text.
- */
-	.globl	kernel_text
-kernel_text:
-
-/*
- * Startup entry.  Note, this must be the first thing in the text
- * segment!
+ * Main kernel entry point.
  *
  * Calling convention:
  * r3: Flattened Device Tree pointer (or zero)
  * r4: ignored
  * r5: OF client interface pointer (or zero)
  * r6: Loader metadata pointer (or zero)
  */
 	.text
 ASENTRY_NOPROF(__start)
 	/* Set up the TOC pointer */
 	b	0f
 	.align 3
 0:	nop
 	bl	1f
 	.llong	__tocbase + 0x8000 - .
 1:	mflr	%r2
 	ld	%r1,0(%r2)
 	add	%r2,%r1,%r2
 
 	/* Get load offset */
 	ld	%r31,-0x8000(%r2) /* First TOC entry is TOC base */
 	subf    %r31,%r31,%r2	/* Subtract from real TOC base to get base */
 
 	/* Set up the stack pointer */
 	ld	%r1,TOC_REF(tmpstk)(%r2)
 	addi	%r1,%r1,TMPSTKSZ-96
 	add	%r1,%r1,%r31
 
 	/* Relocate kernel */
 	std	%r3,48(%r1)
 	std	%r4,56(%r1)
 	std	%r5,64(%r1)
 	std	%r6,72(%r1)
 	bl	1f
 	.llong _DYNAMIC-.
 1:	mflr	%r3
 	ld	%r4,0(%r3)
 	add	%r3,%r4,%r3
 	mr	%r4,%r31
 	bl	elf_reloc_self
 	nop
 	ld	%r3,48(%r1)
 	ld	%r4,56(%r1)
 	ld	%r5,64(%r1)
 	ld	%r6,72(%r1)
 
 	/* Begin CPU init */
 	mr	%r4,%r2 /* Replace ignored r4 with tocbase for trap handlers */
 	bl	powerpc_init
 	nop
 
 	/* Set stack pointer to new value and branch to mi_startup */
 	mr	%r1, %r3
 	li	%r3, 0
 	std	%r3, 0(%r1)
 	bl	mi_startup
 	nop
 
-	/* If this returns (it won't), go back to firmware */
-	b	OF_exit
-	nop
-
-/*
- * int setfault()
- *
- * Similar to setjmp to setup for handling faults on accesses to user memory.
- * Any routine using this may only call bcopy, either the form below,
- * or the (currently used) C code optimized, so it doesn't use any non-volatile
- * registers.
- */
-ASENTRY_NOPROF(setfault)
-	mflr	0
-	mfcr	12
-	mfsprg	4,0
-	ld	4,TD_PCB(13)	/* curthread = r13 */
-	std	3,PCB_ONFAULT(4)
-	std	0,0(3)
-	std	1,8(3)
-	std	2,16(3)
-
-	std	%r12,24(%r3)	/* Save the non-volatile GP regs. */
-	std	%r13,24+1*8(%r3)	
-	std	%r14,24+2*8(%r3)	
-	std	%r15,24+3*8(%r3)	
-	std	%r16,24+4*8(%r3)	
-	std	%r17,24+5*8(%r3)	
-	std	%r18,24+6*8(%r3)	
-	std	%r19,24+7*8(%r3)	
-	std	%r20,24+8*8(%r3)	
-	std	%r21,24+9*8(%r3)	
-	std	%r22,24+10*8(%r3)	
-	std	%r23,24+11*8(%r3)	
-	std	%r24,24+12*8(%r3)	
-	std	%r25,24+13*8(%r3)	
-	std	%r26,24+14*8(%r3)	
-	std	%r27,24+15*8(%r3)	
-	std	%r28,24+16*8(%r3)
-	std	%r29,24+17*8(%r3)	
-	std	%r30,24+18*8(%r3)	
-	std	%r31,24+19*8(%r3)	
-
-	xor	3,3,3
-	blr
+	/* Unreachable */
+	b	.
 
 #include <powerpc/aim/trap_subr64.S>
Index: projects/clang380-import/sys/powerpc/aim/trap_subr64.S
===================================================================
--- projects/clang380-import/sys/powerpc/aim/trap_subr64.S	(revision 293686)
+++ projects/clang380-import/sys/powerpc/aim/trap_subr64.S	(revision 293687)
@@ -1,866 +1,869 @@
 /* $FreeBSD$ */
 /* $NetBSD: trap_subr.S,v 1.20 2002/04/22 23:20:08 kleink Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * NOTICE: This is not a standalone file.  to use it, #include it in
  * your port's locore.S, like so:
  *
  *	#include <powerpc/aim/trap_subr.S>
  */
 
-/*
- * Save/restore segment registers
- */
+/* Locate the per-CPU data structure */
+#define GET_CPUINFO(r)  \
+        mfsprg0  r
+#define GET_TOCBASE(r)  \
+	li	r,TRAP_TOCBASE;	/* Magic address for TOC */ \
+	ld	r,0(r)
 
 /*
  * Restore SRs for a pmap
  *
  * Requires that r28-r31 be scratch, with r28 initialized to the SLB cache
  */
 
 /*
  * User SRs are loaded through a pointer to the current pmap.
  */
 restore_usersrs:
 	GET_CPUINFO(%r28)
 	ld	%r28,PC_USERSLB(%r28)
 	li	%r29, 0			/* Set the counter to zero */
 
 	slbia
 	slbmfee	%r31,%r29		
 	clrrdi	%r31,%r31,28
 	slbie	%r31
 1:	ld	%r31, 0(%r28)		/* Load SLB entry pointer */
 	cmpdi	%r31, 0			/* If NULL, stop */
 	beqlr
 
 	ld	%r30, 0(%r31)		/* Load SLBV */
 	ld	%r31, 8(%r31)		/* Load SLBE */
 	or	%r31, %r31, %r29	/*  Set SLBE slot */
 	slbmte	%r30, %r31		/* Install SLB entry */
 
 	addi	%r28, %r28, 8		/* Advance pointer */
 	addi	%r29, %r29, 1
 	b	1b			/* Repeat */
 
 /*
  * Kernel SRs are loaded directly from the PCPU fields
  */
 restore_kernsrs:
 	GET_CPUINFO(%r28)
 	addi	%r28,%r28,PC_KERNSLB
 	li	%r29, 0			/* Set the counter to zero */
 
 	slbia
 	slbmfee	%r31,%r29		
 	clrrdi	%r31,%r31,28
 	slbie	%r31
 1:	cmpdi	%r29, USER_SLB_SLOT	/* Skip the user slot */
 	beq-	2f
 
 	ld	%r31, 8(%r28)		/* Load SLBE */
 	cmpdi	%r31, 0			/* If SLBE is not valid, stop */
 	beqlr
 	ld	%r30, 0(%r28)		/* Load SLBV  */
 	slbmte	%r30, %r31		/* Install SLB entry */
 
 2:	addi	%r28, %r28, 16		/* Advance pointer */
 	addi	%r29, %r29, 1
 	cmpdi	%r29, 64		/* Repeat if we are not at the end */
 	blt	1b 
 	blr
 
 /*
  * FRAME_SETUP assumes:
  *	SPRG1		SP (1)
  * 	SPRG3		trap type
  *	savearea	r27-r31,DAR,DSISR   (DAR & DSISR only for DSI traps)
  *	r28		LR
  *	r29		CR
  *	r30		scratch
  *	r31		scratch
  *	r1		kernel stack
  *	SRR0/1		as at start of trap
  *
  * NOTE: SPRG1 is never used while the MMU is on, making it safe to reuse
  * in any real-mode fault handler, including those handling double faults.
  */
 #define	FRAME_SETUP(savearea)						\
 /* Have to enable translation to allow access of kernel stack: */	\
 	GET_CPUINFO(%r31);						\
 	mfsrr0	%r30;							\
 	std	%r30,(savearea+CPUSAVE_SRR0)(%r31);	/* save SRR0 */	\
 	mfsrr1	%r30;							\
 	std	%r30,(savearea+CPUSAVE_SRR1)(%r31);	/* save SRR1 */	\
 	mfsprg1	%r31;			/* get saved SP (clears SPRG1) */ \
 	mfmsr	%r30;							\
 	ori	%r30,%r30,(PSL_DR|PSL_IR|PSL_RI)@l; /* relocation on */	\
 	mtmsr	%r30;			/* stack can now be accessed */	\
 	isync;								\
 	stdu	%r31,-(FRAMELEN+288)(%r1); /* save it in the callframe */ \
 	std	%r0, FRAME_0+48(%r1);	/* save r0 in the trapframe */	\
 	std	%r31,FRAME_1+48(%r1);	/* save SP   "      "       */	\
 	std	%r2, FRAME_2+48(%r1);	/* save r2   "      "       */	\
 	std	%r28,FRAME_LR+48(%r1);	/* save LR   "      "       */	\
 	std	%r29,FRAME_CR+48(%r1);	/* save CR   "      "       */	\
 	GET_CPUINFO(%r2);						\
 	ld	%r27,(savearea+CPUSAVE_R27)(%r2); /* get saved r27 */	\
 	ld	%r28,(savearea+CPUSAVE_R28)(%r2); /* get saved r28 */	\
 	ld	%r29,(savearea+CPUSAVE_R29)(%r2); /* get saved r29 */	\
 	ld	%r30,(savearea+CPUSAVE_R30)(%r2); /* get saved r30 */	\
 	ld	%r31,(savearea+CPUSAVE_R31)(%r2); /* get saved r31 */	\
 	std	%r3,  FRAME_3+48(%r1);	/* save r3-r31 */		\
 	std	%r4,  FRAME_4+48(%r1);					\
 	std	%r5,  FRAME_5+48(%r1);					\
 	std	%r6,  FRAME_6+48(%r1);					\
 	std	%r7,  FRAME_7+48(%r1);					\
 	std	%r8,  FRAME_8+48(%r1);					\
 	std	%r9,  FRAME_9+48(%r1);					\
 	std	%r10, FRAME_10+48(%r1);					\
 	std	%r11, FRAME_11+48(%r1);					\
 	std	%r12, FRAME_12+48(%r1);					\
 	std	%r13, FRAME_13+48(%r1);					\
 	std	%r14, FRAME_14+48(%r1);					\
 	std	%r15, FRAME_15+48(%r1);					\
 	std	%r16, FRAME_16+48(%r1);					\
 	std	%r17, FRAME_17+48(%r1);					\
 	std	%r18, FRAME_18+48(%r1);					\
 	std	%r19, FRAME_19+48(%r1);					\
 	std	%r20, FRAME_20+48(%r1);					\
 	std	%r21, FRAME_21+48(%r1);					\
 	std	%r22, FRAME_22+48(%r1);					\
 	std	%r23, FRAME_23+48(%r1);					\
 	std	%r24, FRAME_24+48(%r1);					\
 	std	%r25, FRAME_25+48(%r1);					\
 	std	%r26, FRAME_26+48(%r1);					\
 	std	%r27, FRAME_27+48(%r1);					\
 	std	%r28, FRAME_28+48(%r1);					\
 	std	%r29, FRAME_29+48(%r1);					\
 	std	%r30, FRAME_30+48(%r1);					\
 	std	%r31, FRAME_31+48(%r1);					\
 	ld	%r28,(savearea+CPUSAVE_AIM_DAR)(%r2);  /* saved DAR */	\
 	ld	%r29,(savearea+CPUSAVE_AIM_DSISR)(%r2);/* saved DSISR */\
 	ld	%r30,(savearea+CPUSAVE_SRR0)(%r2); /* saved SRR0 */	\
 	ld	%r31,(savearea+CPUSAVE_SRR1)(%r2); /* saved SRR1 */	\
 	mfxer	%r3;							\
 	mfctr	%r4;							\
 	mfsprg3	%r5;							\
 	std	%r3, FRAME_XER+48(1);	/* save xer/ctr/exc */		\
 	std	%r4, FRAME_CTR+48(1);					\
 	std	%r5, FRAME_EXC+48(1);					\
 	std	%r28,FRAME_AIM_DAR+48(1);				\
 	std	%r29,FRAME_AIM_DSISR+48(1); /* save dsisr/srr0/srr1 */	\
 	std	%r30,FRAME_SRR0+48(1);					\
 	std	%r31,FRAME_SRR1+48(1);					\
 	ld	%r13,PC_CURTHREAD(%r2)	/* set kernel curthread */
 
 #define	FRAME_LEAVE(savearea)						\
 /* Disable exceptions: */						\
 	mfmsr	%r2;							\
 	andi.	%r2,%r2,~PSL_EE@l;					\
 	mtmsr	%r2;							\
 	isync;								\
 /* Now restore regs: */							\
 	ld	%r2,FRAME_SRR0+48(%r1);					\
 	ld	%r3,FRAME_SRR1+48(%r1);					\
 	ld	%r4,FRAME_CTR+48(%r1);					\
 	ld	%r5,FRAME_XER+48(%r1);					\
 	ld	%r6,FRAME_LR+48(%r1);					\
 	GET_CPUINFO(%r7);						\
 	std	%r2,(savearea+CPUSAVE_SRR0)(%r7); /* save SRR0 */	\
 	std	%r3,(savearea+CPUSAVE_SRR1)(%r7); /* save SRR1 */	\
 	ld	%r7,FRAME_CR+48(%r1);					\
 	mtctr	%r4;							\
 	mtxer	%r5;							\
 	mtlr	%r6;							\
 	mtsprg2	%r7;			/* save cr */			\
 	ld	%r31,FRAME_31+48(%r1);   /* restore r0-31 */		\
 	ld	%r30,FRAME_30+48(%r1);					\
 	ld	%r29,FRAME_29+48(%r1);					\
 	ld	%r28,FRAME_28+48(%r1);					\
 	ld	%r27,FRAME_27+48(%r1);					\
 	ld	%r26,FRAME_26+48(%r1);					\
 	ld	%r25,FRAME_25+48(%r1);					\
 	ld	%r24,FRAME_24+48(%r1);					\
 	ld	%r23,FRAME_23+48(%r1);					\
 	ld	%r22,FRAME_22+48(%r1);					\
 	ld	%r21,FRAME_21+48(%r1);					\
 	ld	%r20,FRAME_20+48(%r1);					\
 	ld	%r19,FRAME_19+48(%r1);					\
 	ld	%r18,FRAME_18+48(%r1);					\
 	ld	%r17,FRAME_17+48(%r1);					\
 	ld	%r16,FRAME_16+48(%r1);					\
 	ld	%r15,FRAME_15+48(%r1);					\
 	ld	%r14,FRAME_14+48(%r1);					\
 	ld	%r13,FRAME_13+48(%r1);					\
 	ld	%r12,FRAME_12+48(%r1);					\
 	ld	%r11,FRAME_11+48(%r1);					\
 	ld	%r10,FRAME_10+48(%r1);					\
 	ld	%r9, FRAME_9+48(%r1);					\
 	ld	%r8, FRAME_8+48(%r1);					\
 	ld	%r7, FRAME_7+48(%r1);					\
 	ld	%r6, FRAME_6+48(%r1);					\
 	ld	%r5, FRAME_5+48(%r1);					\
 	ld	%r4, FRAME_4+48(%r1);					\
 	ld	%r3, FRAME_3+48(%r1);					\
 	ld	%r2, FRAME_2+48(%r1);					\
 	ld	%r0, FRAME_0+48(%r1);					\
 	ld	%r1, FRAME_1+48(%r1);					\
 /* Can't touch %r1 from here on */					\
 	mtsprg3	%r3;			/* save r3 */			\
 /* Disable translation, machine check and recoverability: */		\
 	mfmsr	%r3;							\
 	andi.	%r3,%r3,~(PSL_DR|PSL_IR|PSL_ME|PSL_RI)@l;		\
 	mtmsr	%r3;							\
 	isync;								\
 /* Decide whether we return to user mode: */				\
 	GET_CPUINFO(%r3);						\
 	ld	%r3,(savearea+CPUSAVE_SRR1)(%r3);			\
 	mtcr	%r3;							\
 	bf	17,1f;			/* branch if PSL_PR is false */	\
 /* Restore user SRs */							\
 	GET_CPUINFO(%r3);						\
 	std	%r27,(savearea+CPUSAVE_R27)(%r3);			\
 	std	%r28,(savearea+CPUSAVE_R28)(%r3);			\
 	std	%r29,(savearea+CPUSAVE_R29)(%r3);			\
 	std	%r30,(savearea+CPUSAVE_R30)(%r3);			\
 	std	%r31,(savearea+CPUSAVE_R31)(%r3);			\
 	mflr	%r27;			/* preserve LR */		\
 	bl	restore_usersrs;	/* uses r28-r31 */		\
 	mtlr	%r27;							\
 	ld	%r31,(savearea+CPUSAVE_R31)(%r3);			\
 	ld	%r30,(savearea+CPUSAVE_R30)(%r3);			\
 	ld	%r29,(savearea+CPUSAVE_R29)(%r3);			\
 	ld	%r28,(savearea+CPUSAVE_R28)(%r3);			\
 	ld	%r27,(savearea+CPUSAVE_R27)(%r3);			\
 1:	mfsprg2	%r3;			/* restore cr */		\
 	mtcr	%r3;							\
 	GET_CPUINFO(%r3);						\
 	ld	%r3,(savearea+CPUSAVE_SRR0)(%r3); /* restore srr0 */	\
 	mtsrr0	%r3;							\
 	GET_CPUINFO(%r3);						\
 	ld	%r3,(savearea+CPUSAVE_SRR1)(%r3); /* restore srr1 */	\
 	mtsrr1	%r3;							\
 	mfsprg3	%r3			/* restore r3 */
 
 #ifdef KDTRACE_HOOKS
 	.data
 	.globl	dtrace_invop_calltrap_addr
 	.align	8
 	.type	dtrace_invop_calltrap_addr, @object
         .size	dtrace_invop_calltrap_addr, 8
 dtrace_invop_calltrap_addr:
 	.word	0
 	.word	0
 
 	.text
 #endif
 
 /*
  * Processor reset exception handler. These are typically
  * the first instructions the processor executes after a
  * software reset. We do this in two bits so that we are
  * not still hanging around in the trap handling region
  * once the MMU is turned on.
  */
 	.globl	CNAME(rstcode), CNAME(rstcodeend)
 CNAME(rstcode):
 	/* Explicitly set MSR[SF] */
 	mfmsr	%r9
 	li	%r8,1
 	insrdi	%r9,%r8,1,0
 	mtmsrd	%r9
 	isync
 	bl	1f
 	.llong	cpu_reset
 1:	mflr	%r9
 	ld	%r9,0(%r9)
 	mtlr	%r9
 
 	blr
 CNAME(rstcodeend):
 
 cpu_reset:
 	GET_TOCBASE(%r2)
 
 	ld	%r1,TOC_REF(tmpstk)(%r2)	/* get new SP */
 	addi	%r1,%r1,(TMPSTKSZ-48)
 
 	bl	CNAME(cpudep_ap_early_bootstrap) /* Set PCPU */
 	nop
 	lis	%r3,1@l
 	bl	CNAME(pmap_cpu_bootstrap)	/* Turn on virtual memory */
 	nop
 	bl	CNAME(cpudep_ap_bootstrap)	/* Set up PCPU and stack */
 	nop
 	mr	%r1,%r3				/* Use new stack */
 	bl	CNAME(cpudep_ap_setup)
 	nop
 	GET_CPUINFO(%r5)
 	ld	%r3,(PC_RESTORE)(%r5)
 	cmpldi	%cr0,%r3,0
 	beq	%cr0,2f
 	nop
 	li	%r4,1
 	b	CNAME(longjmp)
 	nop
 2:
 #ifdef SMP
 	bl	CNAME(machdep_ap_bootstrap)	/* And away! */
 	nop
 #endif
 
 	/* Should not be reached */
 9:
 	b	9b
 
 /*
  * This code gets copied to all the trap vectors
  * (except ISI/DSI, ALI, and the interrupts). Has to fit in 8 instructions!
  */
 
 	.globl	CNAME(trapcode),CNAME(trapcodeend)
 	.p2align 3
 CNAME(trapcode):
 	mtsprg1	%r1			/* save SP */
 	mflr	%r1			/* Save the old LR in r1 */
 	mtsprg2 %r1			/* And then in SPRG2 */
 	ld	%r1,TRAP_GENTRAP(0)
 	mtlr	%r1
 	li	%r1, 0xe0		/* How to get the vector from LR */
 	blrl				/* Branch to generictrap */
 CNAME(trapcodeend):
 
 /*
  * For SLB misses: do special things for the kernel
  *
  * Note: SPRG1 is always safe to overwrite any time the MMU is on, which is
  * the only time this can be called.
  */
 	.globl	CNAME(slbtrap),CNAME(slbtrapend)
 	.p2align 3
 CNAME(slbtrap):
 	mtsprg1	%r1			/* save SP */
 	GET_CPUINFO(%r1)
 	std	%r2,(PC_SLBSAVE+16)(%r1)
 	mfcr	%r2			/* save CR */
 	std	%r2,(PC_SLBSAVE+104)(%r1)
 	mfsrr1	%r2			/* test kernel mode */
 	mtcr	%r2
 	bf	17,2f			/* branch if PSL_PR is false */
 	/* User mode */
 	ld	%r2,(PC_SLBSAVE+104)(%r1) /* Restore CR */
 	mtcr	%r2
 	ld	%r2,(PC_SLBSAVE+16)(%r1) /* Restore R2 */
 	mflr	%r1			/* Save the old LR in r1 */
 	mtsprg2 %r1			/* And then in SPRG2 */
 					/* 52 bytes so far */
 	bl	1f
 	.llong	generictrap
 1:	mflr	%r1
 	ld	%r1,0(%r1)
 	mtlr	%r1
 	li	%r1, 0x80		/* How to get the vector from LR */
 	blrl				/* Branch to generictrap */
 					/* 84 bytes */
 2:	mflr	%r2			/* Save the old LR in r2 */
 	nop
 	bl	3f			/* Begin dance to jump to kern_slbtrap*/
 	.llong	kern_slbtrap
 3:	mflr	%r1
 	ld	%r1,0(%r1)
 	mtlr	%r1
 	GET_CPUINFO(%r1)
 	blrl				/* 124 bytes -- 4 to spare */
 CNAME(slbtrapend):
 
 kern_slbtrap:
 	std	%r2,(PC_SLBSAVE+136)(%r1) /* old LR */
 	std	%r3,(PC_SLBSAVE+24)(%r1) /* save R3 */
 
 	/* Check if this needs to be handled as a regular trap (userseg miss) */
 	mflr	%r2
 	andi.	%r2,%r2,0xff80
 	cmpwi	%r2,0x380
 	bne	1f
 	mfdar	%r2
 	b	2f
 1:	mfsrr0	%r2
 2:	/* r2 now contains the fault address */
 	lis	%r3,SEGMENT_MASK@highesta
 	ori	%r3,%r3,SEGMENT_MASK@highera
 	sldi	%r3,%r3,32
 	oris	%r3,%r3,SEGMENT_MASK@ha
 	ori	%r3,%r3,SEGMENT_MASK@l
 	and	%r2,%r2,%r3	/* R2 = segment base address */
 	lis	%r3,USER_ADDR@highesta
 	ori	%r3,%r3,USER_ADDR@highera
 	sldi	%r3,%r3,32
 	oris	%r3,%r3,USER_ADDR@ha
 	ori	%r3,%r3,USER_ADDR@l
 	cmpd	%r2,%r3		/* Compare fault base to USER_ADDR */
 	bne	3f
 
 	/* User seg miss, handle as a regular trap */
 	ld	%r2,(PC_SLBSAVE+104)(%r1) /* Restore CR */
 	mtcr	%r2
 	ld	%r2,(PC_SLBSAVE+16)(%r1) /* Restore R2,R3 */
 	ld	%r3,(PC_SLBSAVE+24)(%r1)
 	ld	%r1,(PC_SLBSAVE+136)(%r1) /* Save the old LR in r1 */
 	mtsprg2 %r1			/* And then in SPRG2 */
 	li	%r1, 0x80		/* How to get the vector from LR */
 	b	generictrap		/* Retain old LR using b */
 	
 3:	/* Real kernel SLB miss */
 	std	%r0,(PC_SLBSAVE+0)(%r1)	/* free all volatile regs */
 	mfsprg1	%r2			/* Old R1 */
 	std	%r2,(PC_SLBSAVE+8)(%r1)
 	/* R2,R3 already saved */
 	std	%r4,(PC_SLBSAVE+32)(%r1)
 	std	%r5,(PC_SLBSAVE+40)(%r1)
 	std	%r6,(PC_SLBSAVE+48)(%r1)
 	std	%r7,(PC_SLBSAVE+56)(%r1)
 	std	%r8,(PC_SLBSAVE+64)(%r1)
 	std	%r9,(PC_SLBSAVE+72)(%r1)
 	std	%r10,(PC_SLBSAVE+80)(%r1)
 	std	%r11,(PC_SLBSAVE+88)(%r1)
 	std	%r12,(PC_SLBSAVE+96)(%r1)
 	/* CR already saved */
 	mfxer	%r2			/* save XER */
 	std	%r2,(PC_SLBSAVE+112)(%r1)
 	mflr	%r2			/* save LR (SP already saved) */
 	std	%r2,(PC_SLBSAVE+120)(%r1)
 	mfctr	%r2			/* save CTR */
 	std	%r2,(PC_SLBSAVE+128)(%r1)
 
 	/* Call handler */
 	addi	%r1,%r1,PC_SLBSTACK-48+1024
 	li	%r2,~15
 	and	%r1,%r1,%r2
 	GET_TOCBASE(%r2)
 	mflr	%r3
 	andi.	%r3,%r3,0xff80
 	mfdar	%r4
 	mfsrr0	%r5
 	bl	handle_kernel_slb_spill
 	nop
 
 	/* Save r28-31, restore r4-r12 */
 	GET_CPUINFO(%r1)
 	ld	%r4,(PC_SLBSAVE+32)(%r1)
 	ld	%r5,(PC_SLBSAVE+40)(%r1)
 	ld	%r6,(PC_SLBSAVE+48)(%r1)
 	ld	%r7,(PC_SLBSAVE+56)(%r1)
 	ld	%r8,(PC_SLBSAVE+64)(%r1)
 	ld	%r9,(PC_SLBSAVE+72)(%r1)
 	ld	%r10,(PC_SLBSAVE+80)(%r1)
 	ld	%r11,(PC_SLBSAVE+88)(%r1)
 	ld	%r12,(PC_SLBSAVE+96)(%r1)
 	std	%r28,(PC_SLBSAVE+64)(%r1)
 	std	%r29,(PC_SLBSAVE+72)(%r1)
 	std	%r30,(PC_SLBSAVE+80)(%r1)
 	std	%r31,(PC_SLBSAVE+88)(%r1)
 
 	/* Restore kernel mapping */
 	bl	restore_kernsrs
 
 	/* Restore remaining registers */
 	ld	%r28,(PC_SLBSAVE+64)(%r1)
 	ld	%r29,(PC_SLBSAVE+72)(%r1)
 	ld	%r30,(PC_SLBSAVE+80)(%r1)
 	ld	%r31,(PC_SLBSAVE+88)(%r1)
 
 	ld	%r2,(PC_SLBSAVE+104)(%r1)
 	mtcr	%r2
 	ld	%r2,(PC_SLBSAVE+112)(%r1)
 	mtxer	%r2
 	ld	%r2,(PC_SLBSAVE+120)(%r1)
 	mtlr	%r2
 	ld	%r2,(PC_SLBSAVE+128)(%r1)
 	mtctr	%r2
 	ld	%r2,(PC_SLBSAVE+136)(%r1)
 	mtlr	%r2
 
 	/* Restore r0-r3 */
 	ld	%r0,(PC_SLBSAVE+0)(%r1)
 	ld	%r2,(PC_SLBSAVE+16)(%r1)
 	ld	%r3,(PC_SLBSAVE+24)(%r1)
 	mfsprg1	%r1
 
 	/* Back to whatever we were doing */
 	rfid
 
 /*
  * For ALI: has to save DSISR and DAR
  */
 	.globl	CNAME(alitrap),CNAME(aliend)
 CNAME(alitrap):
 	mtsprg1	%r1			/* save SP */
 	GET_CPUINFO(%r1)
 	std	%r27,(PC_TEMPSAVE+CPUSAVE_R27)(%r1)	/* free r27-r31 */
 	std	%r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1)
 	std	%r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1)
 	std	%r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1)
 	std	%r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1)
 	mfdar	%r30
 	mfdsisr	%r31
 	std	%r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1)
 	std	%r31,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1)
 	mfsprg1	%r1			/* restore SP, in case of branch */
 	mflr	%r28			/* save LR */
 	mfcr	%r29			/* save CR */
 
 	/* Begin dance to branch to s_trap in a bit */
 	b	1f
 	.p2align 3
 1:	nop
 	bl	1f
 	.llong	s_trap
 1:	mflr	%r31
 	ld	%r31,0(%r31)
 	mtlr	%r31
 
 	/* Put our exception vector in SPRG3 */
 	li	%r31, EXC_ALI
 	mtsprg3	%r31
 
 	/* Test whether we already had PR set */
 	mfsrr1	%r31
 	mtcr	%r31
 	blrl
 CNAME(aliend):
 
 /*
  * Similar to the above for DSI
  * Has to handle standard pagetable spills
  */
 	.globl	CNAME(dsitrap),CNAME(dsiend)
 CNAME(dsitrap):
 	mtsprg1	%r1			/* save SP */
 	GET_CPUINFO(%r1)
 	std	%r27,(PC_DISISAVE+CPUSAVE_R27)(%r1)	/* free r27-r31 */
 	std	%r28,(PC_DISISAVE+CPUSAVE_R28)(%r1)
 	std	%r29,(PC_DISISAVE+CPUSAVE_R29)(%r1)
 	std	%r30,(PC_DISISAVE+CPUSAVE_R30)(%r1)
 	std	%r31,(PC_DISISAVE+CPUSAVE_R31)(%r1)
 	mfcr	%r29			/* save CR */
 	mfxer	%r30			/* save XER */
 	mtsprg2	%r30			/* in SPRG2 */
 	mfsrr1	%r31			/* test kernel mode */
 	mtcr	%r31
 	mflr	%r28			/* save LR (SP already saved) */
 	bl	1f			/* Begin branching to disitrap */
 	.llong	disitrap
 1:	mflr	%r1
 	ld	%r1,0(%r1)
 	mtlr	%r1
 	blrl				/* Branch to generictrap */
 CNAME(dsiend):
 
 /*
  * Preamble code for DSI/ISI traps
  */
 disitrap:
 	/* Write the trap vector to SPRG3 by computing LR & 0xff00 */
 	mflr	%r1
 	andi.	%r1,%r1,0xff00
 	mtsprg3	%r1
 	
 	GET_CPUINFO(%r1)
 	ld	%r31,(PC_DISISAVE+CPUSAVE_R27)(%r1)
 	std	%r31,(PC_TEMPSAVE+CPUSAVE_R27)(%r1)
 	ld	%r30,(PC_DISISAVE+CPUSAVE_R28)(%r1)
 	std	%r30,(PC_TEMPSAVE+CPUSAVE_R28)(%r1)
 	ld	%r31,(PC_DISISAVE+CPUSAVE_R29)(%r1)
 	std	%r31,(PC_TEMPSAVE+CPUSAVE_R29)(%r1)
 	ld	%r30,(PC_DISISAVE+CPUSAVE_R30)(%r1)
 	std	%r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1)
 	ld	%r31,(PC_DISISAVE+CPUSAVE_R31)(%r1)
 	std	%r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1)
 	mfdar	%r30
 	mfdsisr	%r31
 	std	%r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1)
 	std	%r31,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1)
 
 #ifdef KDB
 	/* Try to detect a kernel stack overflow */
 	mfsrr1	%r31
 	mtcr	%r31
 	bt	17,realtrap		/* branch is user mode */
 	mfsprg1	%r31			/* get old SP */
 	clrrdi	%r31,%r31,12		/* Round SP down to nearest page */
 	sub.	%r30,%r31,%r30		/* SP - DAR */
 	bge	1f
 	neg	%r30,%r30		/* modulo value */
 1:	cmpldi	%cr0,%r30,4096		/* is DAR within a page of SP? */
 	bge	%cr0,realtrap		/* no, too far away. */
 
 	/* Now convert this DSI into a DDB trap.  */
 	GET_CPUINFO(%r1)
 	ld	%r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1) /* get DAR */
 	std	%r30,(PC_DBSAVE  +CPUSAVE_AIM_DAR)(%r1) /* save DAR */
 	ld	%r30,(PC_TEMPSAVE+CPUSAVE_AIM_DSISR)(%r1) /* get DSISR */
 	std	%r30,(PC_DBSAVE  +CPUSAVE_AIM_DSISR)(%r1) /* save DSISR */
 	ld	%r31,(PC_DISISAVE+CPUSAVE_R27)(%r1) /* get  r27 */
 	std	%r31,(PC_DBSAVE  +CPUSAVE_R27)(%r1) /* save r27 */
 	ld	%r30,(PC_DISISAVE+CPUSAVE_R28)(%r1) /* get  r28 */
 	std	%r30,(PC_DBSAVE  +CPUSAVE_R28)(%r1) /* save r28 */
 	ld	%r31,(PC_DISISAVE+CPUSAVE_R29)(%r1) /* get  r29 */
 	std	%r31,(PC_DBSAVE  +CPUSAVE_R29)(%r1) /* save r29 */
 	ld	%r30,(PC_DISISAVE+CPUSAVE_R30)(%r1) /* get  r30 */
 	std	%r30,(PC_DBSAVE  +CPUSAVE_R30)(%r1) /* save r30 */
 	ld	%r31,(PC_DISISAVE+CPUSAVE_R31)(%r1) /* get  r31 */
 	std	%r31,(PC_DBSAVE  +CPUSAVE_R31)(%r1) /* save r31 */
 	b	dbtrap
 #endif
 
 	/* XXX need stack probe here */
 realtrap:
 /* Test whether we already had PR set */
 	mfsrr1	%r1
 	mtcr	%r1
 	mfsprg1	%r1			/* restore SP (might have been
 					   overwritten) */
 	bf	17,k_trap		/* branch if PSL_PR is false */
 	GET_CPUINFO(%r1)
 	ld	%r1,PC_CURPCB(%r1)
 	mr	%r27,%r28		/* Save LR, r29 */
 	mtsprg2	%r29
 	bl	restore_kernsrs		/* enable kernel mapping */
 	mfsprg2	%r29
 	mr	%r28,%r27
 	b	s_trap
 
 /*
  * generictrap does some standard setup for trap handling to minimize
  * the code that need be installed in the actual vectors. It expects
  * the following conditions.
  * 
  * R1 - Trap vector = LR & (0xff00 | R1)
  * SPRG1 - Original R1 contents
  * SPRG2 - Original LR
  */
 
 	.globl	CNAME(generictrap)
 generictrap:
 	/* Save R1 for computing the exception vector */
 	mtsprg3 %r1
 
 	/* Save interesting registers */
 	GET_CPUINFO(%r1)
 	std	%r27,(PC_TEMPSAVE+CPUSAVE_R27)(%r1)	/* free r27-r31 */
 	std	%r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1)
 	std	%r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1)
 	std	%r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1)
 	std	%r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1)
 	mfdar	%r30
 	std	%r30,(PC_TEMPSAVE+CPUSAVE_AIM_DAR)(%r1)
 	mfsprg1	%r1			/* restore SP, in case of branch */
 	mfsprg2	%r28			/* save LR */
 	mfcr	%r29			/* save CR */
 
 	/* Compute the exception vector from the link register */
 	mfsprg3 %r31
 	ori	%r31,%r31,0xff00
 	mflr	%r30
 	addi	%r30,%r30,-4 /* The branch instruction, not the next */
 	and	%r30,%r30,%r31
 	mtsprg3	%r30
 
 	/* Test whether we already had PR set */
 	mfsrr1	%r31
 	mtcr	%r31
 
 s_trap:
 	bf	17,k_trap		/* branch if PSL_PR is false */
 	GET_CPUINFO(%r1)
 u_trap:
 	ld	%r1,PC_CURPCB(%r1)
 	mr	%r27,%r28		/* Save LR, r29 */
 	mtsprg2	%r29
 	bl	restore_kernsrs		/* enable kernel mapping */
 	mfsprg2	%r29
 	mr	%r28,%r27
 
 /*
  * Now the common trap catching code.
  */
 k_trap:
 	FRAME_SETUP(PC_TEMPSAVE)
 /* Call C interrupt dispatcher: */
 trapagain:
 	GET_TOCBASE(%r2)
 	addi	%r3,%r1,48
 	bl	CNAME(powerpc_interrupt)
 	nop
 
 	.globl	CNAME(trapexit)	/* backtrace code sentinel */
 CNAME(trapexit):
 /* Disable interrupts: */
 	mfmsr	%r3
 	andi.	%r3,%r3,~PSL_EE@l
 	mtmsr	%r3
 	isync
 /* Test AST pending: */
 	ld	%r5,FRAME_SRR1+48(%r1)
 	mtcr	%r5
 	bf	17,1f			/* branch if PSL_PR is false */
 
 	GET_CPUINFO(%r3)		/* get per-CPU pointer */
 	lwz	%r4, TD_FLAGS(%r13)	/* get thread flags value */
 	lis	%r5, (TDF_ASTPENDING|TDF_NEEDRESCHED)@h
 	ori	%r5,%r5, (TDF_ASTPENDING|TDF_NEEDRESCHED)@l
 	and.	%r4,%r4,%r5
 	beq	1f
 	mfmsr	%r3			/* re-enable interrupts */
 	ori	%r3,%r3,PSL_EE@l
 	mtmsr	%r3
 	isync
 	GET_TOCBASE(%r2)
 	addi	%r3,%r1,48
 	bl	CNAME(ast)
 	nop
 	.globl	CNAME(asttrapexit)	/* backtrace code sentinel #2 */
 CNAME(asttrapexit):
 	b	trapexit		/* test ast ret value ? */
 1:
 	FRAME_LEAVE(PC_TEMPSAVE)
 	rfid
 
 #if defined(KDB)
 /*
  * Deliberate entry to dbtrap
  */
 ASENTRY_NOPROF(breakpoint)
 	mtsprg1	%r1
 	mfmsr	%r3
 	mtsrr1	%r3
 	andi.	%r3,%r3,~(PSL_EE|PSL_ME)@l
 	mtmsr	%r3			/* disable interrupts */
 	isync
 	GET_CPUINFO(%r3)
 	std	%r27,(PC_DBSAVE+CPUSAVE_R27)(%r3)
 	std	%r28,(PC_DBSAVE+CPUSAVE_R28)(%r3)
 	std	%r29,(PC_DBSAVE+CPUSAVE_R29)(%r3)
 	std	%r30,(PC_DBSAVE+CPUSAVE_R30)(%r3)
 	std	%r31,(PC_DBSAVE+CPUSAVE_R31)(%r3)
 	mflr	%r28
 	li	%r29,EXC_BPT
 	mtlr	%r29
 	mfcr	%r29
 	mtsrr0	%r28
 
 /*
  * Now the kdb trap catching code.
  */
 dbtrap:
 	/* Write the trap vector to SPRG3 by computing LR & 0xff00 */
 	mflr	%r1
 	andi.	%r1,%r1,0xff00
 	mtsprg3	%r1
 
 	ld	%r1,TRAP_TOCBASE(0)		/* get new SP */
 	ld	%r1,TOC_REF(tmpstk)(%r1)
 	addi	%r1,%r1,(TMPSTKSZ-48)
 
 	FRAME_SETUP(PC_DBSAVE)
 /* Call C trap code: */
 	GET_TOCBASE(%r2)
 	addi	%r3,%r1,48
 	bl	CNAME(db_trap_glue)
 	nop
 	or.	%r3,%r3,%r3
 	bne	dbleave
 /* This wasn't for KDB, so switch to real trap: */
 	ld	%r3,FRAME_EXC+48(%r1)	/* save exception */
 	GET_CPUINFO(%r4)
 	std	%r3,(PC_DBSAVE+CPUSAVE_R31)(%r4)
 	FRAME_LEAVE(PC_DBSAVE)
 	mtsprg1	%r1			/* prepare for entrance to realtrap */
 	GET_CPUINFO(%r1)
 	std	%r27,(PC_TEMPSAVE+CPUSAVE_R27)(%r1)
 	std	%r28,(PC_TEMPSAVE+CPUSAVE_R28)(%r1)
 	std	%r29,(PC_TEMPSAVE+CPUSAVE_R29)(%r1)
 	std	%r30,(PC_TEMPSAVE+CPUSAVE_R30)(%r1)
 	std	%r31,(PC_TEMPSAVE+CPUSAVE_R31)(%r1)
 	mflr	%r28
 	mfcr	%r29
 	ld	%r31,(PC_DBSAVE+CPUSAVE_R31)(%r1)
 	mtsprg3	%r31			/* SPRG3 was clobbered by FRAME_LEAVE */
 	mfsprg1	%r1
 	b	realtrap
 dbleave:
 	FRAME_LEAVE(PC_DBSAVE)
 	rfid
 
 /*
  * In case of KDB we want a separate trap catcher for it
  */
 	.globl	CNAME(dblow),CNAME(dbend)
 CNAME(dblow):
 	mtsprg1	%r1			/* save SP */
 	mtsprg2	%r29			/* save r29 */
 	mfcr	%r29			/* save CR in r29 */
 	mfsrr1	%r1
 	mtcr	%r1
 	bf	17,1f			/* branch if privileged */
 
 	/* Unprivileged case */
 	mtcr	%r29			/* put the condition register back */
         mfsprg2	%r29			/* ... and r29 */
         mflr	%r1			/* save LR */
 	mtsprg2 %r1			/* And then in SPRG2 */
 
 	ld	%r1, TRAP_GENTRAP(0)	/* Get branch address */
 	mtlr	%r1
 	li	%r1, 0	 		/* How to get the vector from LR */
 	blrl				/* Branch to generictrap */
 
 1:
 	GET_CPUINFO(%r1)
 	std	%r27,(PC_DBSAVE+CPUSAVE_R27)(%r1)	/* free r27 */
 	std	%r28,(PC_DBSAVE+CPUSAVE_R28)(%r1)	/* free r28 */
         mfsprg2	%r28				/* r29 holds cr...  */
         std	%r28,(PC_DBSAVE+CPUSAVE_R29)(%r1)	/* free r29 */
         std	%r30,(PC_DBSAVE+CPUSAVE_R30)(%r1)	/* free r30 */
         std	%r31,(PC_DBSAVE+CPUSAVE_R31)(%r1)	/* free r31 */
         mflr	%r28					/* save LR */
 	bl	9f					/* Begin branch */
 	.llong	dbtrap
 9:	mflr	%r1
 	ld	%r1,0(%r1)
 	mtlr	%r1
 	blrl				/* Branch to generictrap */
 CNAME(dbend):
 #endif /* KDB */
Index: projects/clang380-import/sys/powerpc/booke/booke_machdep.c
===================================================================
--- projects/clang380-import/sys/powerpc/booke/booke_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/powerpc/booke/booke_machdep.c	(revision 293687)
@@ -1,411 +1,410 @@
 /*-
  * Copyright (C) 2006-2012 Semihalf
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
  * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Copyright (C) 2001 Benno Rice
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $
  */
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_kstack_pages.h"
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/sysctl.h>
 #include <sys/exec.h>
 #include <sys/ktr.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/imgact.h>
 #include <sys/msgbuf.h>
 #include <sys/ptrace.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 
 #include <machine/cpu.h>
 #include <machine/kdb.h>
 #include <machine/reg.h>
 #include <machine/vmparam.h>
 #include <machine/spr.h>
 #include <machine/hid.h>
 #include <machine/psl.h>
 #include <machine/trap.h>
 #include <machine/md_var.h>
 #include <machine/mmuvar.h>
 #include <machine/sigframe.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
 #include <machine/platform.h>
 
 #include <sys/linker.h>
 #include <sys/reboot.h>
 
 #include <contrib/libfdt/libfdt.h>
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 
 #if defined(MPC85XX) || defined(QORIQ_DPAA)
 #include <powerpc/mpc85xx/mpc85xx.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #ifdef  DEBUG
 #define debugf(fmt, args...) printf(fmt, ##args)
 #else
 #define debugf(fmt, args...)
 #endif
 
-extern unsigned char kernel_text[];
 extern unsigned char _etext[];
 extern unsigned char _edata[];
 extern unsigned char __bss_start[];
 extern unsigned char __sbss_start[];
 extern unsigned char __sbss_end[];
 extern unsigned char _end[];
 extern vm_offset_t __endkernel;
 
 /*
  * Bootinfo is passed to us by legacy loaders. Save the address of the
  * structure to handle backward compatibility.
  */
 uint32_t *bootinfo;
 
 void print_kernel_section_addr(void);
 void print_kenv(void);
 uintptr_t booke_init(u_long, u_long);
 void ivor_setup(void);
 
 extern void *interrupt_vector_base;
 extern void *int_critical_input;
 extern void *int_machine_check;
 extern void *int_data_storage;
 extern void *int_instr_storage;
 extern void *int_external_input;
 extern void *int_alignment;
 extern void *int_fpu;
 extern void *int_program;
 extern void *int_syscall;
 extern void *int_decrementer;
 extern void *int_fixed_interval_timer;
 extern void *int_watchdog;
 extern void *int_data_tlb_error;
 extern void *int_inst_tlb_error;
 extern void *int_debug;
 extern void *int_vec;
 extern void *int_vecast;
 #ifdef HWPMC_HOOKS
 extern void *int_performance_counter;
 #endif
 
 #define SET_TRAP(ivor, handler) \
 	KASSERT(((uintptr_t)(&handler) & ~0xffffUL) == \
 	    ((uintptr_t)(&interrupt_vector_base) & ~0xffffUL), \
 	    ("Handler " #handler " too far from interrupt vector base")); \
 	mtspr(ivor, (uintptr_t)(&handler) & 0xffffUL);
 
 uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t, vm_offset_t, void *mdp);
 void booke_cpu_init(void);
 
 void
 booke_cpu_init(void)
 {
 
 	cpu_features |= PPC_FEATURE_BOOKE;
 
 	pmap_mmu_install(MMU_TYPE_BOOKE, BUS_PROBE_GENERIC);
 }
 
 void
 ivor_setup(void)
 {
 
 	mtspr(SPR_IVPR, ((uintptr_t)&interrupt_vector_base) & 0xffff0000);
 
 	SET_TRAP(SPR_IVOR0, int_critical_input);
 	SET_TRAP(SPR_IVOR1, int_machine_check);
 	SET_TRAP(SPR_IVOR2, int_data_storage);
 	SET_TRAP(SPR_IVOR3, int_instr_storage);
 	SET_TRAP(SPR_IVOR4, int_external_input);
 	SET_TRAP(SPR_IVOR5, int_alignment);
 	SET_TRAP(SPR_IVOR6, int_program);
 	SET_TRAP(SPR_IVOR8, int_syscall);
 	SET_TRAP(SPR_IVOR10, int_decrementer);
 	SET_TRAP(SPR_IVOR11, int_fixed_interval_timer);
 	SET_TRAP(SPR_IVOR12, int_watchdog);
 	SET_TRAP(SPR_IVOR13, int_data_tlb_error);
 	SET_TRAP(SPR_IVOR14, int_inst_tlb_error);
 	SET_TRAP(SPR_IVOR15, int_debug);
 #ifdef HWPMC_HOOKS
 	SET_TRAP(SPR_IVOR35, int_performance_counter);
 #endif
 	switch ((mfpvr() >> 16) & 0xffff) {
 	case FSL_E6500:
 		SET_TRAP(SPR_IVOR32, int_vec);
 		SET_TRAP(SPR_IVOR33, int_vecast);
 		/* FALLTHROUGH */
 	case FSL_E500mc:
 	case FSL_E5500:
 		SET_TRAP(SPR_IVOR7, int_fpu);
 	}
 }
 
 static int
 booke_check_for_fdt(uint32_t arg1, vm_offset_t *dtbp)
 {
 	void *ptr;
 
 	if (arg1 % 8 != 0)
 		return (-1);
 
 	ptr = (void *)pmap_early_io_map(arg1, PAGE_SIZE);
 	if (fdt_check_header(ptr) != 0)
 		return (-1);
 
 	*dtbp = (vm_offset_t)ptr;
 
 	return (0);
 }
 
 uintptr_t
 booke_init(u_long arg1, u_long arg2)
 {
 	uintptr_t ret;
 	void *mdp;
 	vm_offset_t dtbp, end;
 
 	end = (uintptr_t)_end;
 	dtbp = (vm_offset_t)NULL;
 
 	/* Set up TLB initially */
 	bootinfo = NULL;
 	bzero(__sbss_start, __sbss_end - __sbss_start);
 	bzero(__bss_start, _end - __bss_start);
 	tlb1_init();
 
 	/*
 	 * Handle the various ways we can get loaded and started:
 	 *  -	FreeBSD's loader passes the pointer to the metadata
 	 *	in arg1, with arg2 undefined. arg1 has a value that's
 	 *	relative to the kernel's link address (i.e. larger
 	 *	than 0xc0000000).
 	 *  -	Juniper's loader passes the metadata pointer in arg2
 	 *	and sets arg1 to zero. This is to signal that the
 	 *	loader maps the kernel and starts it at its link
 	 *	address (unlike the FreeBSD loader).
 	 *  -	U-Boot passes the standard argc and argv parameters
 	 *	in arg1 and arg2 (resp). arg1 is between 1 and some
 	 *	relatively small number, such as 64K. arg2 is the
 	 *	physical address of the argv vector.
 	 *  -   ePAPR loaders pass an FDT blob in r3 (arg1) and the magic hex
 	 *      string 0x45504150 ('EPAP') in r6 (which has been lost by now).
 	 *      r4 (arg2) is supposed to be set to zero, but is not always.
 	 */
 	
 	if (arg1 == 0)				/* Juniper loader */
 		mdp = (void *)arg2;
 	else if (booke_check_for_fdt(arg1, &dtbp) == 0) { /* ePAPR */
 		end = roundup(end, 8);
 		memmove((void *)end, (void *)dtbp, fdt_totalsize((void *)dtbp));
 		dtbp = end;
 		end += fdt_totalsize((void *)dtbp);
 		__endkernel = end;
 		mdp = NULL;
-	} else if (arg1 > (uintptr_t)kernel_text)	/* FreeBSD loader */
+	} else if (arg1 > (uintptr_t)btext)	/* FreeBSD loader */
 		mdp = (void *)arg1;
 	else					/* U-Boot */
 		mdp = NULL;
 
 	/* Default to 32 byte cache line size. */
 	switch ((mfpvr()) >> 16) {
 	case FSL_E500mc:
 	case FSL_E5500:
 	case FSL_E6500:
 		cacheline_size = 64;
 		break;
 	}
 
 	ret = powerpc_init(dtbp, 0, 0, mdp);
 
 	/* Enable caches */
 	booke_enable_l1_cache();
 	booke_enable_l2_cache();
 
 	booke_enable_bpred();
 
 	return (ret);
 }
 
 #define RES_GRANULE 32
 extern uint32_t tlb0_miss_locks[];
 
 /* Initialise a struct pcpu. */
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz)
 {
 
 	pcpu->pc_tid_next = TID_MIN;
 
 #ifdef SMP
 	uint32_t *ptr;
 	int words_per_gran = RES_GRANULE / sizeof(uint32_t);
 
 	ptr = &tlb0_miss_locks[cpuid * words_per_gran];
 	pcpu->pc_booke_tlb_lock = ptr;
 	*ptr = TLB_UNLOCKED;
 	*(ptr + 1) = 0;		/* recurse counter */
 #endif
 }
 
 /* Shutdown the CPU as much as possible. */
 void
 cpu_halt(void)
 {
 
 	mtmsr(mfmsr() & ~(PSL_CE | PSL_EE | PSL_ME | PSL_DE));
 	while (1)
 		;
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	tf->srr1 |= PSL_DE;
 	tf->cpu.booke.dbcr0 |= (DBCR0_IDM | DBCR0_IC);
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 	tf->srr1 &= ~PSL_DE;
 	tf->cpu.booke.dbcr0 &= ~(DBCR0_IDM | DBCR0_IC);
 	return (0);
 }
 
 void
 kdb_cpu_clear_singlestep(void)
 {
 	register_t r;
 
 	r = mfspr(SPR_DBCR0);
 	mtspr(SPR_DBCR0, r & ~DBCR0_IC);
 	kdb_frame->srr1 &= ~PSL_DE;
 }
 
 void
 kdb_cpu_set_singlestep(void)
 {
 	register_t r;
 
 	r = mfspr(SPR_DBCR0);
 	mtspr(SPR_DBCR0, r | DBCR0_IC | DBCR0_IDM);
 	kdb_frame->srr1 |= PSL_DE;
 }
 
Index: projects/clang380-import/sys/powerpc/booke/locore.S
===================================================================
--- projects/clang380-import/sys/powerpc/booke/locore.S	(revision 293686)
+++ projects/clang380-import/sys/powerpc/booke/locore.S	(revision 293687)
@@ -1,884 +1,861 @@
 /*-
  * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com>
  * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
  * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "assym.s"
 
 #include "opt_hwpmc_hooks.h"
 
 #include <machine/asm.h>
 #include <machine/hid.h>
 #include <machine/param.h>
 #include <machine/spr.h>
 #include <machine/pte.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 #include <machine/tlb.h>
 
 #define TMPSTACKSZ	16384
 
 	.text
 	.globl	btext
 btext:
 
 /*
  * This symbol is here for the benefit of kvm_mkdb, and is supposed to
  * mark the start of kernel text.
  */
 	.globl	kernel_text
 kernel_text:
 
 /*
  * Startup entry.  Note, this must be the first thing in the text segment!
  */
 	.text
 	.globl	__start
 __start:
 
 /*
  * Assumptions on the boot loader:
  *  - System memory starts from physical address 0
  *  - It's mapped by a single TLB1 entry
  *  - TLB1 mapping is 1:1 pa to va
  *  - Kernel is loaded at 64MB boundary
  *  - All PID registers are set to the same value
  *  - CPU is running in AS=0
  *
  * Registers contents provided by the loader(8):
  *	r1	: stack pointer
  *	r3	: metadata pointer
  *
  * We rearrange the TLB1 layout as follows:
  *  - Find TLB1 entry we started in
  *  - Make sure it's protected, invalidate other entries
  *  - Create temp entry in the second AS (make sure it's not TLB[1])
  *  - Switch to temp mapping
  *  - Map 64MB of RAM in TLB1[1]
  *  - Use AS=1, set EPN to KERNBASE and RPN to kernel load address
  *  - Switch to to TLB1[1] mapping
  *  - Invalidate temp mapping
  *
  * locore registers use:
  *	r1	: stack pointer
  *	r2	: trace pointer (AP only, for early diagnostics)
  *	r3-r27	: scratch registers
  *	r28	: temp TLB1 entry
  *	r29	: initial TLB1 entry we started in
  *	r30-r31	: arguments (metadata pointer)
  */
 
 /*
  * Keep arguments in r30 & r31 for later use.
  */
 	mr	%r30, %r3
 	mr	%r31, %r4
 
 /*
  * Initial cleanup
  */
 	li	%r3, PSL_DE	/* Keep debug exceptions for CodeWarrior. */
 	mtmsr	%r3
 	isync
 
 	mfpvr	%r3
 	rlwinm	%r3, %r3, 16, 16, 31
 
 	lis	%r4, HID0_E500_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E500_DEFAULT_SET@l
 
 	/* Check for e500mc and e5500 */
 	cmpli	0, 0, %r3, FSL_E500mc
 	bne	2f
 
 	lis	%r4, HID0_E500MC_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E500MC_DEFAULT_SET@l
 	b	3f
 2:
 	cmpli	0, 0, %r3, FSL_E5500
 	bne	3f
 
 	lis	%r4, HID0_E5500_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E5500_DEFAULT_SET@l
 
 3:
 	mtspr	SPR_HID0, %r4
 	isync
 
 /*
  * E500mc and E5500 do not have HID1 register, so skip HID1 setup on
  * this core.
  */
 	cmpli	0, 0, %r3, FSL_E500mc
 	beq	1f
 	cmpli	0, 0, %r3, FSL_E5500
 	beq	1f
 
 	lis	%r3, HID1_E500_DEFAULT_SET@h
 	ori	%r3, %r3, HID1_E500_DEFAULT_SET@l
 	mtspr	SPR_HID1, %r3
 	isync
 1:
 	/* Invalidate all entries in TLB0 */
 	li	%r3, 0
 	bl	tlb_inval_all
 
 	cmpwi	%r30, 0
 	beq	done_mapping
 
 /*
  * Locate the TLB1 entry that maps this code
  */
 	bl	1f
 1:	mflr	%r3
 	bl	tlb1_find_current	/* the entry found is returned in r29 */
 
 	bl	tlb1_inval_all_but_current
 
 /*
  * Create temporary mapping in AS=1 and switch to it
  */
 	addi	%r3, %r29, 1
 	bl	tlb1_temp_mapping_as1
 
 	mfmsr	%r3
 	ori	%r3, %r3, (PSL_IS | PSL_DS)
 	bl	2f
 2:	mflr	%r4
 	addi	%r4, %r4, 20
 	mtspr	SPR_SRR0, %r4
 	mtspr	SPR_SRR1, %r3
 	rfi				/* Switch context */
 
 /*
  * Invalidate initial entry
  */
 	mr	%r3, %r29
 	bl	tlb1_inval_entry
 
 /*
  * Setup final mapping in TLB1[1] and switch to it
  */
 	/* Final kernel mapping, map in 64 MB of RAM */
 	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
 	li	%r4, 0			/* Entry 0 */
 	rlwimi	%r3, %r4, 16, 10, 15
 	mtspr	SPR_MAS0, %r3
 	isync
 
 	li	%r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l
 	oris	%r3, %r3, (MAS1_VALID | MAS1_IPROT)@h
 	mtspr	SPR_MAS1, %r3		/* note TS was not filled, so it's TS=0 */
 	isync
 
 	lis	%r3, KERNBASE@h
 	ori	%r3, %r3, KERNBASE@l	/* EPN = KERNBASE */
 #ifdef SMP
 	ori	%r3, %r3, MAS2_M@l	/* WIMGE = 0b00100 */
 #endif
 	mtspr	SPR_MAS2, %r3
 	isync
 
 	/* Discover phys load address */
 	bl	3f
 3:	mflr	%r4			/* Use current address */
 	rlwinm	%r4, %r4, 0, 0, 5	/* 64MB alignment mask */
 	ori	%r4, %r4, (MAS3_SX | MAS3_SW | MAS3_SR)@l
 	mtspr	SPR_MAS3, %r4		/* Set RPN and protection */
 	isync
 	bl	zero_mas7
 	bl	zero_mas8
 	tlbwe
 	isync
 	msync
 
 	/* Switch to the above TLB1[1] mapping */
 	bl	4f
 4:	mflr	%r4
 	rlwinm	%r4, %r4, 0, 8, 31	/* Current offset from kernel load address */
 	rlwinm	%r3, %r3, 0, 0, 19
 	add	%r4, %r4, %r3		/* Convert to kernel virtual address */
 	addi	%r4, %r4, 36
 	li	%r3, PSL_DE		/* Note AS=0 */
 	mtspr   SPR_SRR0, %r4
 	mtspr   SPR_SRR1, %r3
 	rfi
 
 /*
  * Invalidate temp mapping
  */
 	mr	%r3, %r28
 	bl	tlb1_inval_entry
 
 done_mapping:
 
 /*
  * Setup a temporary stack
  */
 	bl	1f
 	.long tmpstack-.
 1:	mflr	%r1
 	lwz	%r2,0(%r1)
 	add	%r1,%r1,%r2
 	addi	%r1, %r1, (TMPSTACKSZ - 16)
 
 /*
  * Relocate kernel
  */
 	bl      1f
 	.long   _DYNAMIC-.
 	.long   _GLOBAL_OFFSET_TABLE_-.
 1:	mflr    %r5
 	lwz	%r3,0(%r5)	/* _DYNAMIC in %r3 */
 	add	%r3,%r3,%r5
 	lwz	%r4,4(%r5)	/* GOT pointer */
 	add	%r4,%r4,%r5
 	lwz	%r4,4(%r4)	/* got[0] is _DYNAMIC link addr */
 	subf	%r4,%r4,%r3	/* subtract to calculate relocbase */
 	bl	elf_reloc_self
 
 /*
  * Initialise exception vector offsets
  */
 	bl	ivor_setup
 
 /*
  * Set up arguments and jump to system initialization code
  */
 	mr	%r3, %r30
 	mr	%r4, %r31
 
 	/* Prepare core */
 	bl	booke_init
 
 	/* Switch to thread0.td_kstack now */
 	mr	%r1, %r3
 	li	%r3, 0
 	stw	%r3, 0(%r1)
 
 	/* Machine independet part, does not return */
 	bl	mi_startup
 	/* NOT REACHED */
 5:	b	5b
 
 
 #ifdef SMP
 /************************************************************************/
 /* AP Boot page */
 /************************************************************************/
 	.text
 	.globl	__boot_page
 	.align	12
 __boot_page:
 	bl	1f
 
 	.globl	bp_ntlb1s
 bp_ntlb1s:
 	.long	0
 
 	.globl	bp_tlb1
 bp_tlb1:
 	.space	4 * 3 * 64
 
 	.globl	bp_tlb1_end
 bp_tlb1_end:
 
 /*
  * Initial configuration
  */
 1:	mflr	%r31		/* r31 hold the address of bp_ntlb1s */
 
 	/* Set HIDs */
 	mfpvr	%r3
 	rlwinm	%r3, %r3, 16, 16, 31
 
 	/* HID0 for E500 is default */
 	lis	%r4, HID0_E500_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E500_DEFAULT_SET@l
 
 	cmpli	0, 0, %r3, FSL_E500mc
 	bne	2f
 	lis	%r4, HID0_E500MC_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E500MC_DEFAULT_SET@l
 	b	3f
 2:
 	cmpli	0, 0, %r3, FSL_E5500
 	bne	3f
 	lis	%r4, HID0_E5500_DEFAULT_SET@h
 	ori	%r4, %r4, HID0_E5500_DEFAULT_SET@l
 3:
 	mtspr	SPR_HID0, %r4
 	isync
 /*
  * E500mc and E5500 do not have HID1 register, so skip HID1 setup on
  * this core.
  */
 	cmpli	0, 0, %r3, FSL_E500mc
 	beq	1f
 	cmpli	0, 0, %r3, FSL_E5500
 	beq	1f
 
 	lis	%r3, HID1_E500_DEFAULT_SET@h
 	ori	%r3, %r3, HID1_E500_DEFAULT_SET@l
 	mtspr	SPR_HID1, %r3
 	isync
 1:
 	/* Enable branch prediction */
 	li	%r3, BUCSR_BPEN
 	mtspr	SPR_BUCSR, %r3
 	isync
 
 	/* Invalidate all entries in TLB0 */
 	li	%r3, 0
 	bl	tlb_inval_all
 
 /*
  * Find TLB1 entry which is translating us now
  */
 	bl	2f
 2:	mflr	%r3
 	bl	tlb1_find_current	/* the entry number found is in r29 */
 
 	bl	tlb1_inval_all_but_current
 
 /*
  * Create temporary translation in AS=1 and switch to it
  */
 	lwz	%r3, 0(%r31)
 	bl	tlb1_temp_mapping_as1
 
 	mfmsr	%r3
 	ori	%r3, %r3, (PSL_IS | PSL_DS)
 	bl	3f
 3:	mflr	%r4
 	addi	%r4, %r4, 20
 	mtspr	SPR_SRR0, %r4
 	mtspr	SPR_SRR1, %r3
 	rfi				/* Switch context */
 
 /*
  * Invalidate initial entry
  */
 	mr	%r3, %r29
 	bl	tlb1_inval_entry
 
 /*
  * Setup final mapping in TLB1[1] and switch to it
  */
 	lwz	%r6, 0(%r31)
 	addi	%r5, %r31, 4
 	li	%r4, 0
 
 4:	lis	%r3, MAS0_TLBSEL1@h
 	rlwimi	%r3, %r4, 16, 12, 15
 	mtspr	SPR_MAS0, %r3
 	isync
 	lwz	%r3, 0(%r5)
 	mtspr	SPR_MAS1, %r3
 	isync
 	lwz	%r3, 4(%r5)
 	mtspr	SPR_MAS2, %r3
 	isync
 	lwz	%r3, 8(%r5)
 	mtspr	SPR_MAS3, %r3
 	isync
 	tlbwe
 	isync
 	msync
 	addi	%r5, %r5, 12
 	addi	%r4, %r4, 1
 	cmpw	%r4, %r6
 	blt	4b
 
 	/* Switch to the final mapping */
 	bl	5f
 	.long __boot_page-.
 5:	mflr	%r5
 	lwz	%r3,0(%r3)
 	add	%r5,%r5,%r3		/* __boot_page in r5 */
 	bl	6f
 6:	mflr	%r3
 	rlwinm	%r3, %r3, 0, 0xfff	/* Offset from boot page start */
 	add	%r3, %r3, %r5		/* Make this virtual address */
 	addi	%r3, %r3, 32
 	li	%r4, 0			/* Note AS=0 */
 	mtspr	SPR_SRR0, %r3
 	mtspr	SPR_SRR1, %r4
 	rfi
 
 /*
  * At this point we're running at virtual addresses KERNBASE and beyond so
  * it's allowed to directly access all locations the kernel was linked
  * against.
  */
 
 /*
  * Invalidate temp mapping
  */
 	mr	%r3, %r28
 	bl	tlb1_inval_entry
 
 /*
  * Setup a temporary stack
  */
 	bl	1f
 	.long tmpstack-.
 1:	mflr	%r1
 	lwz	%r2,0(%r1)
 	add	%r1,%r1,%r2
 	addi	%r1, %r1, (TMPSTACKSZ - 16)
 
 /*
  * Initialise exception vector offsets
  */
 	bl	ivor_setup
 
 	/*
 	 * Assign our pcpu instance
 	 */
 	bl	1f
 	.long ap_pcpu-.
 1:	mflr	%r4
 	lwz	%r3, 0(%r4)
 	add	%r3, %r3, %r4
 	lwz	%r3, 0(%r3)
 	mtsprg0	%r3
 
 	bl	pmap_bootstrap_ap
 
 	bl	cpudep_ap_bootstrap
 	/* Switch to the idle thread's kstack */
 	mr	%r1, %r3
 	
 	bl	machdep_ap_bootstrap
 
 	/* NOT REACHED */
 6:	b	6b
 #endif /* SMP */
 
 /*
  * Invalidate all entries in the given TLB.
  *
  * r3	TLBSEL
  */
 tlb_inval_all:
 	rlwinm	%r3, %r3, 3, (1 << 3)	/* TLBSEL */
 	ori	%r3, %r3, (1 << 2)	/* INVALL */
 	tlbivax	0, %r3
 	isync
 	msync
 
 	tlbsync
 	msync
 	blr
 
 /*
  * expects address to look up in r3, returns entry number in r29
  *
  * FIXME: the hidden assumption is we are now running in AS=0, but we should
  * retrieve actual AS from MSR[IS|DS] and put it in MAS6[SAS]
  */
 tlb1_find_current:
 	mfspr	%r17, SPR_PID0
 	slwi	%r17, %r17, MAS6_SPID0_SHIFT
 	mtspr	SPR_MAS6, %r17
 	isync
 	tlbsx	0, %r3
 	mfspr	%r17, SPR_MAS0
 	rlwinm	%r29, %r17, 16, 20, 31		/* MAS0[ESEL] -> r29 */
 
 	/* Make sure we have IPROT set on the entry */
 	mfspr	%r17, SPR_MAS1
 	oris	%r17, %r17, MAS1_IPROT@h
 	mtspr	SPR_MAS1, %r17
 	isync
 	tlbwe
 	isync
 	msync
 	blr
 
 /*
  * Invalidates a single entry in TLB1.
  *
  * r3		ESEL
  * r4-r5	scratched
  */
 tlb1_inval_entry:
 	lis	%r4, MAS0_TLBSEL1@h	/* Select TLB1 */
 	rlwimi	%r4, %r3, 16, 10, 15	/* Select our entry */
 	mtspr	SPR_MAS0, %r4
 	isync
 	tlbre
 	li	%r5, 0			/* MAS1[V] = 0 */
 	mtspr	SPR_MAS1, %r5
 	isync
 	tlbwe
 	isync
 	msync
 	blr
 
 /*
  * r3		entry of temp translation
  * r29		entry of current translation
  * r28		returns temp entry passed in r3
  * r4-r5	scratched
  */
 tlb1_temp_mapping_as1:
 	mr	%r28, %r3
 
 	/* Read our current translation */
 	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
 	rlwimi	%r3, %r29, 16, 10, 15	/* Select our current entry */
 	mtspr	SPR_MAS0, %r3
 	isync
 	tlbre
 
 	/* Prepare and write temp entry */
 	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
 	rlwimi	%r3, %r28, 16, 10, 15	/* Select temp entry */
 	mtspr	SPR_MAS0, %r3
 	isync
 	mfspr	%r5, SPR_MAS1
 	li	%r4, 1			/* AS=1 */
 	rlwimi	%r5, %r4, 12, 19, 19
 	li	%r4, 0			/* Global mapping, TID=0 */
 	rlwimi	%r5, %r4, 16, 8, 15
 	oris	%r5, %r5, (MAS1_VALID | MAS1_IPROT)@h
 	mtspr	SPR_MAS1, %r5
 	isync
 	mflr	%r3
 	bl	zero_mas7
 	bl	zero_mas8
 	mtlr	%r3
 	tlbwe
 	isync
 	msync
 	blr
 
 /*
  * Loops over TLB1, invalidates all entries skipping the one which currently
  * maps this code.
  *
  * r29		current entry
  * r3-r5	scratched
  */
 tlb1_inval_all_but_current:
 	mr	%r6, %r3
 	mfspr	%r3, SPR_TLB1CFG	/* Get number of entries */
 	andi.	%r3, %r3, TLBCFG_NENTRY_MASK@l
 	li	%r4, 0			/* Start from Entry 0 */
 1:	lis	%r5, MAS0_TLBSEL1@h
 	rlwimi	%r5, %r4, 16, 10, 15
 	mtspr	SPR_MAS0, %r5
 	isync
 	tlbre
 	mfspr	%r5, SPR_MAS1
 	cmpw	%r4, %r29		/* our current entry? */
 	beq	2f
 	rlwinm	%r5, %r5, 0, 2, 31	/* clear VALID and IPROT bits */
 	mtspr	SPR_MAS1, %r5
 	isync
 	tlbwe
 	isync
 	msync
 2:	addi	%r4, %r4, 1
 	cmpw	%r4, %r3		/* Check if this is the last entry */
 	bne	1b
 	blr
 
 /*
  * MAS7 and MAS8 conditional zeroing.
  */
 .globl zero_mas7
 zero_mas7:
 	mfpvr	%r20
 	rlwinm	%r20, %r20, 16, 16, 31
 	cmpli	0, 0, %r20, FSL_E500v1
 	beq	1f
 
 	li	%r20, 0
 	mtspr	SPR_MAS7, %r20
 	isync
 1:
 	blr
 
 .globl zero_mas8
 zero_mas8:
 	mfpvr	%r20
 	rlwinm	%r20, %r20, 16, 16, 31
 	cmpli	0, 0, %r20, FSL_E500mc
 	beq	1f
 	cmpli	0, 0, %r20, FSL_E5500
 	beq	1f
 
 	blr
 1:
 	li	%r20, 0
 	mtspr	SPR_MAS8, %r20
 	isync
 	blr
 
 #ifdef SMP
 __boot_page_padding:
 	/*
 	 * Boot page needs to be exactly 4K, with the last word of this page
 	 * acting as the reset vector, so we need to stuff the remainder.
 	 * Upon release from holdoff CPU fetches the last word of the boot
 	 * page.
 	 */
 	.space	4092 - (__boot_page_padding - __boot_page)
 	b	__boot_page
 #endif /* SMP */
 
 /************************************************************************/
 /* locore subroutines */
 /************************************************************************/
 
 /*
  * Cache disable/enable/inval sequences according
  * to section 2.16 of E500CORE RM.
  */
 ENTRY(dcache_inval)
 	/* Invalidate d-cache */
 	mfspr	%r3, SPR_L1CSR0
 	ori	%r3, %r3, (L1CSR0_DCFI | L1CSR0_DCLFR)@l
 	msync
 	isync
 	mtspr	SPR_L1CSR0, %r3
 	isync
 1:	mfspr	%r3, SPR_L1CSR0
 	andi.	%r3, %r3, L1CSR0_DCFI
 	bne	1b
 	blr
 
 ENTRY(dcache_disable)
 	/* Disable d-cache */
 	mfspr	%r3, SPR_L1CSR0
 	li	%r4, L1CSR0_DCE@l
 	not	%r4, %r4
 	and	%r3, %r3, %r4
 	msync
 	isync
 	mtspr	SPR_L1CSR0, %r3
 	isync
 	blr
 
 ENTRY(dcache_enable)
 	/* Enable d-cache */
 	mfspr	%r3, SPR_L1CSR0
 	oris	%r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@h
 	ori	%r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@l
 	msync
 	isync
 	mtspr	SPR_L1CSR0, %r3
 	isync
 	blr
 
 ENTRY(icache_inval)
 	/* Invalidate i-cache */
 	mfspr	%r3, SPR_L1CSR1
 	ori	%r3, %r3, (L1CSR1_ICFI | L1CSR1_ICLFR)@l
 	isync
 	mtspr	SPR_L1CSR1, %r3
 	isync
 1:	mfspr	%r3, SPR_L1CSR1
 	andi.	%r3, %r3, L1CSR1_ICFI
 	bne	1b
 	blr
 
 ENTRY(icache_disable)
 	/* Disable i-cache */
 	mfspr	%r3, SPR_L1CSR1
 	li	%r4, L1CSR1_ICE@l
 	not	%r4, %r4
 	and	%r3, %r3, %r4
 	isync
 	mtspr	SPR_L1CSR1, %r3
 	isync
 	blr
 
 ENTRY(icache_enable)
 	/* Enable i-cache */
 	mfspr	%r3, SPR_L1CSR1
 	oris	%r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@h
 	ori	%r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@l
 	isync
 	mtspr	SPR_L1CSR1, %r3
 	isync
 	blr
 
 /*
  * L2 cache disable/enable/inval sequences for E500mc.
  */
 
 ENTRY(l2cache_inval)
 	mfspr	%r3, SPR_L2CSR0
 	oris	%r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@h
 	ori	%r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@l
 	isync
 	mtspr	SPR_L2CSR0, %r3
 	isync
 1:	mfspr   %r3, SPR_L2CSR0
 	andis.	%r3, %r3, L2CSR0_L2FI@h
 	bne	1b
 	blr
 
 ENTRY(l2cache_enable)
 	mfspr	%r3, SPR_L2CSR0
 	oris	%r3, %r3, (L2CSR0_L2E | L2CSR0_L2PE)@h
 	isync
 	mtspr	SPR_L2CSR0, %r3
 	isync
 	blr
 
 /*
  * Branch predictor setup.
  */
 ENTRY(bpred_enable)
 	mfspr	%r3, SPR_BUCSR
 	ori	%r3, %r3, BUCSR_BBFI
 	isync
 	mtspr	SPR_BUCSR, %r3
 	isync
 	ori	%r3, %r3, BUCSR_BPEN
 	isync
 	mtspr	SPR_BUCSR, %r3
 	isync
 	blr
 
 ENTRY(dataloss_erratum_access)
 	/* Lock two cache lines into I-Cache */
 	sync
 	mfspr	%r11, SPR_L1CSR1
 	rlwinm	%r11, %r11, 0, ~L1CSR1_ICUL
 	sync
 	isync
 	mtspr	SPR_L1CSR1, %r11
 	isync
 
 	mflr	%r9
 	bl	1f
 	.long 2f-.
 1:
 	mflr	%r5
 	lwz	%r8, 0(%r5)
 	mtlr	%r9
 	add	%r8, %r8, %r5
 	icbtls	0, 0, %r8
 	addi	%r9, %r8, 64
 
 	sync
 	mfspr	%r11, SPR_L1CSR1
 3:	andi.	%r11, %r11, L1CSR1_ICUL
 	bne	3b
 
 	icbtls	0, 0, %r9
 
 	sync
 	mfspr	%r11, SPR_L1CSR1
 3:	andi.	%r11, %r11, L1CSR1_ICUL
 	bne	3b
 
 	b	2f
 	.align	6
 	/* Inside a locked cacheline, wait a while, write, then wait a while */
 2:	sync
 
 	mfspr	%r5, TBR_TBL
 4:	addis	%r11, %r5, 0x100000@h	/* wait around one million timebase ticks */
 	mfspr	%r5, TBR_TBL
 	subf.	%r5, %r5, %r11
 	bgt	4b
 
 	stw	%r4, 0(%r3)
 
 	mfspr	%r5, TBR_TBL
 4:	addis	%r11, %r5, 0x100000@h	/* wait around one million timebase ticks */
 	mfspr	%r5, TBR_TBL
 	subf.	%r5, %r5, %r11
 	bgt	4b
 
 	sync
 
 	/*
 	 * Fill out the rest of this cache line and the next with nops,
 	 * to ensure that nothing outside the locked area will be
 	 * fetched due to a branch.
 	 */
 	.rept 19
 	nop
 	.endr
 
 	icblc	0, 0, %r8
 	icblc	0, 0, %r9
 
 	blr
 
-/*
- * int setfault()
- *
- * Similar to setjmp to setup for handling faults on accesses to user memory.
- * Any routine using this may only call bcopy, either the form below,
- * or the (currently used) C code optimized, so it doesn't use any non-volatile
- * registers.
- */
-	.globl	setfault
-setfault:
-	mflr	%r0
-	mfsprg0	%r4
-	lwz	%r4, TD_PCB(%r2)
-	stw	%r3, PCB_ONFAULT(%r4)
-	mfcr	%r4
-	stw	%r0, 0(%r3)
-	stw	%r1, 4(%r3)
-	stw	%r2, 8(%r3)
-	stw	%r4, 12(%r3)
-	stmw	%r13, 16(%r3)		/* store CR, CTR, XER, [r13 .. r31] */
-	li	%r3, 0			/* return FALSE */
-	blr
-
 /************************************************************************/
 /* Data section								*/
 /************************************************************************/
 	.data
 	.align 3
 GLOBAL(__startkernel)
 	.long   begin
 GLOBAL(__endkernel)
 	.long   end
 	.align	4
 tmpstack:
 	.space	TMPSTACKSZ
 tmpstackbound:
 	.space 10240	/* XXX: this really should not be necessary */
 
 /*
  * Compiled KERNBASE locations
  */
 	.globl	kernbase
 	.set	kernbase, KERNBASE
 
 #include <powerpc/booke/trap_subr.S>
Index: projects/clang380-import/sys/powerpc/include/pcb.h
===================================================================
--- projects/clang380-import/sys/powerpc/include/pcb.h	(revision 293686)
+++ projects/clang380-import/sys/powerpc/include/pcb.h	(revision 293687)
@@ -1,98 +1,98 @@
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	$NetBSD: pcb.h,v 1.4 2000/06/04 11:57:17 tsubai Exp $
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PCB_H_
 #define	_MACHINE_PCB_H_
 
-typedef register_t faultbuf[25];
+#include <machine/setjmp.h>
 
 struct pcb {
 	register_t	pcb_context[20];	/* non-volatile r14-r31 */
 	register_t	pcb_cr;			/* Condition register */
 	register_t	pcb_sp;			/* stack pointer */
 	register_t	pcb_toc;		/* toc pointer */
 	register_t	pcb_lr;			/* link register */
 	struct		pmap *pcb_pm;		/* pmap of our vmspace */
-	faultbuf	*pcb_onfault;		/* For use during
+	jmp_buf		*pcb_onfault;		/* For use during
 						    copyin/copyout */
 	int		pcb_flags;
 #define	PCB_FPU		1	/* Process uses FPU */
 #define	PCB_FPREGS	2	/* Process had FPU registers initialized */
 #define	PCB_VEC		4	/* Process had Altivec initialized */
 #define	PCB_VSX		8	/* Process had VSX initialized */
 	struct fpu {
 		union {
 			double fpr;
 			uint32_t vsr[4];
 		} fpr[32];
 		double	fpscr;	/* FPSCR stored as double for easier access */
 	} pcb_fpu;		/* Floating point processor */
 	unsigned int	pcb_fpcpu;		/* which CPU had our FPU
 							stuff. */
 	struct vec {
 		uint32_t vr[32][4];
 		uint32_t spare[2];
 		uint32_t vrsave;
 		uint32_t vscr;	/* aligned at vector element 3 */
 	} pcb_vec __aligned(16);	/* Vector processor */
 	unsigned int	pcb_veccpu;		/* which CPU had our vector
 							stuff. */
 
 	union {
 		struct {
 			vm_offset_t	usr_segm;	/* Base address */
 			register_t	usr_vsid;	/* USER_SR segment */
 		} aim;
 		struct {
 			register_t	dbcr0;
 		} booke;
 	} pcb_cpu;
 };
 
 #ifdef	_KERNEL
 
 struct trapframe;
 
 #ifndef curpcb
 extern struct pcb *curpcb;
 #endif
 
 extern struct pmap *curpm;
 extern struct proc *fpuproc;
 
 void	makectx(struct trapframe *, struct pcb *);
 void	savectx(struct pcb *) __returns_twice;
 
 #endif
 #endif	/* _MACHINE_PCB_H_ */
Index: projects/clang380-import/sys/powerpc/include/setjmp.h
===================================================================
--- projects/clang380-import/sys/powerpc/include/setjmp.h	(revision 293686)
+++ projects/clang380-import/sys/powerpc/include/setjmp.h	(revision 293687)
@@ -1,24 +1,28 @@
 /*-
  *	$NetBSD: setjmp.h,v 1.3 1998/09/16 23:51:27 thorpej Exp $
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_SETJMP_H_
 #define	_MACHINE_SETJMP_H_
 
 #include <sys/cdefs.h>
 
+#ifdef _KERNEL
+#define	_JBLEN	25	/* Kernel doesn't save FP and Altivec regs */
+#else
 #define	_JBLEN	100
+#endif
 
 /*
  * jmp_buf and sigjmp_buf are encapsulated in different structs to force
  * compile-time diagnostics for mismatches.  The structs are the same
  * internally to avoid some run-time errors for mismatches.
  */
 #if __BSD_VISIBLE || __POSIX_VISIBLE || __XSI_VISIBLE
 typedef	struct _sigjmp_buf { long _sjb[_JBLEN + 1]; } sigjmp_buf[1];
 #endif
 
 typedef	struct _jmp_buf { long _jb[_JBLEN + 1]; } jmp_buf[1];
 
 #endif /* !_MACHINE_SETJMP_H_ */
Index: projects/clang380-import/sys/powerpc/ofw/rtas.c
===================================================================
--- projects/clang380-import/sys/powerpc/ofw/rtas.c	(revision 293686)
+++ projects/clang380-import/sys/powerpc/ofw/rtas.c	(revision 293687)
@@ -1,272 +1,271 @@
 /*-
  * Copyright (c) 2011 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/pmap.h>
 #include <machine/rtas.h>
 #include <machine/stdarg.h>
 
 #include <dev/ofw/openfirm.h>
 
 static MALLOC_DEFINE(M_RTAS, "rtas", "Run Time Abstraction Service");
 
 static vm_offset_t	rtas_bounce_phys;
 static caddr_t		rtas_bounce_virt;
 static off_t		rtas_bounce_offset;
 static size_t		rtas_bounce_size;
 static uintptr_t	rtas_private_data;
 static struct mtx	rtas_mtx;
 static phandle_t	rtas;
 
 /* From ofwcall.S */
 int rtascall(vm_offset_t callbuffer, uintptr_t rtas_privdat);
 extern uintptr_t	rtas_entry;
 extern register_t	rtasmsr;
 
-int setfault(faultbuf);             /* defined in locore.S */
-
 /*
  * After the VM is up, allocate RTAS memory and instantiate it
  */
 
 static void rtas_setup(void *);
 
 SYSINIT(rtas_setup, SI_SUB_KMEM, SI_ORDER_ANY, rtas_setup, NULL);
 
 static void
 rtas_setup(void *junk)
 {
 	ihandle_t rtasi;
 	cell_t rtas_size = 0, rtas_ptr;
 	char path[31];
 	int result;
 
 	rtas = OF_finddevice("/rtas");
 	if (rtas == -1) {
 		rtas = 0;
 		return;
 	}
 	OF_package_to_path(rtas, path, sizeof(path));
 
 	mtx_init(&rtas_mtx, "RTAS", NULL, MTX_SPIN);
 
 	/* RTAS must be called with everything turned off in MSR */
 	rtasmsr = mfmsr();
 	rtasmsr &= ~(PSL_IR | PSL_DR | PSL_EE | PSL_SE);
 	#ifdef __powerpc64__
 	rtasmsr &= ~PSL_SF;
 	#endif
 
 	/*
 	 * Allocate rtas_size + one page of contiguous, wired physical memory
 	 * that can fit into a 32-bit address space and accessed from real mode.
 	 * This is used both to bounce arguments and for RTAS private data.
 	 *
 	 * It must be 4KB-aligned and not cross a 256 MB boundary.
 	 */
 
 	OF_getencprop(rtas, "rtas-size", &rtas_size, sizeof(rtas_size));
 	rtas_size = round_page(rtas_size);
 	rtas_bounce_virt = contigmalloc(rtas_size + PAGE_SIZE, M_RTAS, 0, 0,
 	    ulmin(platform_real_maxaddr(), BUS_SPACE_MAXADDR_32BIT),
 	    4096, 256*1024*1024);
 
 	rtas_private_data = vtophys(rtas_bounce_virt);
 	rtas_bounce_virt += rtas_size;	/* Actual bounce area */
 	rtas_bounce_phys = vtophys(rtas_bounce_virt);
 	rtas_bounce_size = PAGE_SIZE;
 
 	/*
 	 * Instantiate RTAS. We always use the 32-bit version.
 	 */
 
 	if (OF_hasprop(rtas, "linux,rtas-entry") &&
 	    OF_hasprop(rtas, "linux,rtas-base")) {
 		OF_getencprop(rtas, "linux,rtas-base", &rtas_ptr,
 		    sizeof(rtas_ptr));
 		rtas_private_data = rtas_ptr;
 		OF_getencprop(rtas, "linux,rtas-entry", &rtas_ptr,
 		    sizeof(rtas_ptr));
 	} else {
 		rtasi = OF_open(path);
 		if (rtasi == 0) {
 			rtas = 0;
 			printf("Error initializing RTAS: could not open "
 			    "node\n");
 			return;
 		}
 
 		result = OF_call_method("instantiate-rtas", rtasi, 1, 1,
 		    (cell_t)rtas_private_data, &rtas_ptr);
 		OF_close(rtasi);
 
 		if (result != 0) {
 			rtas = 0;
 			rtas_ptr = 0;
 			printf("Error initializing RTAS (%d)\n", result);
 			return;
 		}
 	}
 
 	rtas_entry = (uintptr_t)(rtas_ptr);
 }
 
 static cell_t
 rtas_real_map(const void *buf, size_t len)
 {
 	cell_t phys;
 
 	mtx_assert(&rtas_mtx, MA_OWNED);
 
 	/*
 	 * Make sure the bounce page offset satisfies any reasonable
 	 * alignment constraint.
 	 */
 	rtas_bounce_offset += sizeof(register_t) -
 	    (rtas_bounce_offset % sizeof(register_t));
 
 	if (rtas_bounce_offset + len > rtas_bounce_size) {
 		panic("Oversize RTAS call!");
 		return 0;
 	}
 
 	if (buf != NULL)
 		memcpy(rtas_bounce_virt + rtas_bounce_offset, buf, len);
 	else
 		return (0);
 
 	phys = rtas_bounce_phys + rtas_bounce_offset;
 	rtas_bounce_offset += len;
 
 	return (phys);
 }
 
 static void
 rtas_real_unmap(cell_t physaddr, void *buf, size_t len)
 {
 	mtx_assert(&rtas_mtx, MA_OWNED);
 
 	if (physaddr == 0)
 		return;
 
 	memcpy(buf, rtas_bounce_virt + (physaddr - rtas_bounce_phys), len);
 }
 
 /* Check if we have RTAS */
 int
 rtas_exists(void)
 {
 	return (rtas != 0);
 }
 
 /* Call an RTAS method by token */
 int
 rtas_call_method(cell_t token, int nargs, int nreturns, ...)
 {
 	vm_offset_t argsptr;
-	faultbuf env, *oldfaultbuf;
+	jmp_buf env, *oldfaultbuf;
 	va_list ap;
 	struct {
 		cell_t token;
 		cell_t nargs;
 		cell_t nreturns;
 		cell_t args_n_results[12];
 	} args;
 	int n, result;
 
 	if (!rtas_exists() || nargs + nreturns > 12)
 		return (-1);
 
 	args.token = token;
 	va_start(ap, nreturns);
 
 	mtx_lock_spin(&rtas_mtx);
 	rtas_bounce_offset = 0;
 
 	args.nargs = nargs;
 	args.nreturns = nreturns;
 
 	for (n = 0; n < nargs; n++)
 		args.args_n_results[n] = va_arg(ap, cell_t);
 
 	argsptr = rtas_real_map(&args, sizeof(args));
 
 	/* Get rid of any stale machine checks that have been waiting.  */
 	__asm __volatile ("sync; isync");
 	oldfaultbuf = curthread->td_pcb->pcb_onfault;
-        if (!setfault(env)) {
+	curthread->td_pcb->pcb_onfault = &env;
+	if (!setjmp(env)) {
 		__asm __volatile ("sync");
 		result = rtascall(argsptr, rtas_private_data);
 		__asm __volatile ("sync; isync");
 	} else {
 		result = RTAS_HW_ERROR;
 	}
 	curthread->td_pcb->pcb_onfault = oldfaultbuf;
 	__asm __volatile ("sync");
 
 	rtas_real_unmap(argsptr, &args, sizeof(args));
 	mtx_unlock_spin(&rtas_mtx);
 
 	if (result < 0)
 		return (result);
 
 	for (n = nargs; n < nargs + nreturns; n++)
 		*va_arg(ap, cell_t *) = args.args_n_results[n];
 	return (result);
 }
 
 /* Look up an RTAS token */
 cell_t
 rtas_token_lookup(const char *method)
 {
 	cell_t token;
 	
 	if (!rtas_exists())
 		return (-1);
 
 	if (OF_getencprop(rtas, method, &token, sizeof(token)) == -1)
 		return (-1);
 
 	return (token);
 }
 
 
Index: projects/clang380-import/sys/powerpc/powermac/grackle.c
===================================================================
--- projects/clang380-import/sys/powerpc/powermac/grackle.c	(revision 293686)
+++ projects/clang380-import/sys/powerpc/powermac/grackle.c	(revision 293687)
@@ -1,325 +1,324 @@
 /*-
  * Copyright 2003 by Peter Grehan. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_pci.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/pio.h>
 #include <machine/resource.h>
 
 #include <sys/rman.h>
 
 #include <powerpc/ofw/ofw_pci.h>
 #include <powerpc/powermac/gracklevar.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include "pcib_if.h"
 
 /*
  * Device interface.
  */
 static int		grackle_probe(device_t);
 static int		grackle_attach(device_t);
 
 /*
  * pcib interface.
  */
 static u_int32_t	grackle_read_config(device_t, u_int, u_int, u_int,
 			    u_int, int);
 static void		grackle_write_config(device_t, u_int, u_int, u_int,
 			    u_int, u_int32_t, int);
 
 /*
  * Local routines.
  */
 static int		grackle_enable_config(struct grackle_softc *, u_int,
 			    u_int, u_int, u_int);
 static void		grackle_disable_config(struct grackle_softc *);
 static int		badaddr(void *, size_t);
 
-int			setfault(faultbuf);	/* defined in locore.S */
-
 /*
  * Driver methods.
  */
 static device_method_t	grackle_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		grackle_probe),
 	DEVMETHOD(device_attach,	grackle_attach),
 
 	/* pcib interface */
 	DEVMETHOD(pcib_read_config,	grackle_read_config),
 	DEVMETHOD(pcib_write_config,	grackle_write_config),
 
 	DEVMETHOD_END
 };
 
 static devclass_t	grackle_devclass;
 DEFINE_CLASS_1(pcib, grackle_driver, grackle_methods,
     sizeof(struct grackle_softc), ofw_pci_driver);
 DRIVER_MODULE(grackle, ofwbus, grackle_driver, grackle_devclass, 0, 0);
 
 static int
 grackle_probe(device_t dev)
 {
 	const char	*type, *compatible;
 
 	type = ofw_bus_get_type(dev);
 	compatible = ofw_bus_get_compat(dev);
 
 	if (type == NULL || compatible == NULL)
 		return (ENXIO);
 
 	if (strcmp(type, "pci") != 0 || strcmp(compatible, "grackle") != 0)
 		return (ENXIO);
 
 	device_set_desc(dev, "MPC106 (Grackle) Host-PCI bridge");
 	return (0);
 }
 
 static int
 grackle_attach(device_t dev)
 {
 	struct		grackle_softc *sc;
 
 	sc = device_get_softc(dev);
 
 	/*
 	 * The Grackle PCI config addr/data registers are actually in
 	 * PCI space, but since they are needed to actually probe the
 	 * PCI bus, use the fact that they are also available directly
 	 * on the processor bus and map them
 	 */
 	sc->sc_addr = (vm_offset_t)pmap_mapdev(GRACKLE_ADDR, PAGE_SIZE);
 	sc->sc_data = (vm_offset_t)pmap_mapdev(GRACKLE_DATA, PAGE_SIZE);
 
 	return (ofw_pci_attach(dev));
 }
 
 static u_int32_t
 grackle_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg,
     int width)
 {
 	struct		grackle_softc *sc;
 	vm_offset_t	caoff;
 	u_int32_t	retval = 0xffffffff;
 
 	sc = device_get_softc(dev);
 	caoff = sc->sc_data + (reg & 0x03);
 
 	if (grackle_enable_config(sc, bus, slot, func, reg) != 0) {
 
 		/*
 		 * Config probes to non-existent devices on the
 		 * secondary bus generates machine checks. Be sure
 		 * to catch these.
 		 */
 		if (bus > 0) {
 		  if (badaddr((void *)sc->sc_data, 4)) {
 			  return (retval);
 		  }
 		}
 
 		switch (width) {
 		case 1:
 			retval = (in8rb(caoff));
 			break;
 		case 2:
 			retval = (in16rb(caoff));
 			break;
 		case 4:
 			retval = (in32rb(caoff));
 			break;
 		}
 	}
 	grackle_disable_config(sc);
 
 	return (retval);
 }
 
 static void
 grackle_write_config(device_t dev, u_int bus, u_int slot, u_int func,
     u_int reg, u_int32_t val, int width)
 {
 	struct		grackle_softc *sc;
 	vm_offset_t	caoff;
 
 	sc = device_get_softc(dev);
 	caoff = sc->sc_data + (reg & 0x03);
 
 	if (grackle_enable_config(sc, bus, slot, func, reg)) {
 		switch (width) {
 		case 1:
 			out8rb(caoff, val);
 			(void)in8rb(caoff);
 			break;
 		case 2:
 			out16rb(caoff, val);
 			(void)in16rb(caoff);
 			break;
 		case 4:
 			out32rb(caoff, val);
 			(void)in32rb(caoff);
 			break;
 		}
 	}
 	grackle_disable_config(sc);
 }
 
 static int
 grackle_enable_config(struct grackle_softc *sc, u_int bus, u_int slot,
     u_int func, u_int reg)
 {
 	u_int32_t	cfgval;
 
 	/*
 	 * Unlike UniNorth, the format of the config word is the same
 	 * for local (0) and remote busses.
 	 */
 	cfgval = (bus << 16) | (slot << 11) | (func << 8) | (reg & 0xFC)
 	    | GRACKLE_CFG_ENABLE;
 
 	out32rb(sc->sc_addr, cfgval);
 	(void) in32rb(sc->sc_addr);
 
 	return (1);
 }
 
 static void
 grackle_disable_config(struct grackle_softc *sc)
 {
 	/*
 	 * Clear the GRACKLE_CFG_ENABLE bit to prevent stray
 	 * accesses from causing config cycles
 	 */
 	out32rb(sc->sc_addr, 0);
 }
 
 static int
 badaddr(void *addr, size_t size)
 {
 	struct thread	*td;
-	faultbuf	env, *oldfaultbuf;
+	jmp_buf		env, *oldfaultbuf;
 	int		x;
 
 	/* Get rid of any stale machine checks that have been waiting.  */
 	__asm __volatile ("sync; isync");
 
 	td = curthread;
 
 	oldfaultbuf = td->td_pcb->pcb_onfault;
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = oldfaultbuf;
 		__asm __volatile ("sync");
 		return 1;
 	}
 
 	__asm __volatile ("sync");
 
 	switch (size) {
 	case 1:
 		x = *(volatile int8_t *)addr;
 		break;
 	case 2:
 		x = *(volatile int16_t *)addr;
 		break;
 	case 4:
 		x = *(volatile int32_t *)addr;
 		break;
 	default:
 		panic("badaddr: invalid size (%zd)", size);
 	}
 
 	/* Make sure we took the machine check, if we caused one. */
 	__asm __volatile ("sync; isync");
 
 	td->td_pcb->pcb_onfault = oldfaultbuf;
 	__asm __volatile ("sync");	/* To be sure. */
 
 	return (0);
 }
 
 /*
  * Driver to swallow Grackle host bridges from the PCI bus side.
  */
 static int
 grackle_hb_probe(device_t dev)
 {
 
 	if (pci_get_devid(dev) == 0x00021057) {
 		device_set_desc(dev, "Grackle Host to PCI bridge");
 		device_quiet(dev);
 		return (0);
 	}
 
 	return (ENXIO);
 }
 
 static int
 grackle_hb_attach(device_t dev)
 {
 
 	return (0);
 }
 
 static device_method_t grackle_hb_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,         grackle_hb_probe),
 	DEVMETHOD(device_attach,        grackle_hb_attach),
 
 	{ 0, 0 }
 };
 
 static driver_t grackle_hb_driver = {
 	"grackle_hb",
 	grackle_hb_methods,
 	1,
 };
 static devclass_t grackle_hb_devclass;
 
 DRIVER_MODULE(grackle_hb, pci, grackle_hb_driver, grackle_hb_devclass, 0, 0);
Index: projects/clang380-import/sys/powerpc/powerpc/copyinout.c
===================================================================
--- projects/clang380-import/sys/powerpc/powerpc/copyinout.c	(revision 293686)
+++ projects/clang380-import/sys/powerpc/powerpc/copyinout.c	(revision 293687)
@@ -1,611 +1,621 @@
 /*-
  * Copyright (C) 2002 Benno Rice
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 /*-
  * Copyright (C) 1993 Wolfgang Solfrank.
  * Copyright (C) 1993 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 #include <machine/pcb.h>
 #include <machine/sr.h>
 #include <machine/slb.h>
 #include <machine/vmparam.h>
 
-int	setfault(faultbuf);	/* defined in locore.S */
-
 #ifdef AIM
 /*
  * Makes sure that the right segment of userspace is mapped in.
  */
 
 #ifdef __powerpc64__
 static __inline void
 set_user_sr(pmap_t pm, volatile const void *addr)
 {
 	struct slb *slb;
 	register_t slbv;
 
 	/* Try lockless look-up first */
 	slb = user_va_to_slb_entry(pm, (vm_offset_t)addr);
 
 	if (slb == NULL) {
 		/* If it isn't there, we need to pre-fault the VSID */
 		PMAP_LOCK(pm);
 		slbv = va_to_vsid(pm, (vm_offset_t)addr) << SLBV_VSID_SHIFT;
 		PMAP_UNLOCK(pm);
 	} else {
 		slbv = slb->slbv;
 	}
 
 	/* Mark segment no-execute */
 	slbv |= SLBV_N;
 
 	/* If we have already set this VSID, we can just return */
 	if (curthread->td_pcb->pcb_cpu.aim.usr_vsid == slbv) 
 		return;
 
 	__asm __volatile("isync");
 	curthread->td_pcb->pcb_cpu.aim.usr_segm =
 	    (uintptr_t)addr >> ADDR_SR_SHFT;
 	curthread->td_pcb->pcb_cpu.aim.usr_vsid = slbv;
 	__asm __volatile ("slbie %0; slbmte %1, %2; isync" ::
 	    "r"(USER_ADDR), "r"(slbv), "r"(USER_SLB_SLBE));
 }
 #else
 static __inline void
 set_user_sr(pmap_t pm, volatile const void *addr)
 {
 	register_t vsid;
 
 	vsid = va_to_vsid(pm, (vm_offset_t)addr);
 
 	/* Mark segment no-execute */
 	vsid |= SR_N;
 
 	/* If we have already set this VSID, we can just return */
 	if (curthread->td_pcb->pcb_cpu.aim.usr_vsid == vsid)
 		return;
 
 	__asm __volatile("isync");
 	curthread->td_pcb->pcb_cpu.aim.usr_segm =
 	    (uintptr_t)addr >> ADDR_SR_SHFT;
 	curthread->td_pcb->pcb_cpu.aim.usr_vsid = vsid;
 	__asm __volatile("mtsr %0,%1; isync" :: "n"(USER_SR), "r"(vsid));
 }
 #endif
 
 static __inline int
 map_user_ptr(pmap_t pm, volatile const void *uaddr, void **kaddr, size_t ulen,
     size_t *klen)
 {
 	size_t l;
 
 	*kaddr = (char *)USER_ADDR + ((uintptr_t)uaddr & ~SEGMENT_MASK);
 
 	l = ((char *)USER_ADDR + SEGMENT_LENGTH) - (char *)(*kaddr);
 	if (l > ulen)
 		l = ulen;
 	if (klen)
 		*klen = l;
 	else if (l != ulen)
 		return (EFAULT);
 
 	set_user_sr(pm, uaddr);
 
 	return (0);
 }
 #else /* Book-E uses a combined kernel/user mapping */
 static __inline int
 map_user_ptr(pmap_t pm, volatile const void *uaddr, void **kaddr, size_t ulen,
     size_t *klen)
 {
 
 	if ((uintptr_t)uaddr + ulen > VM_MAXUSER_ADDRESS + PAGE_SIZE)
 		return (EFAULT);
 
 	*kaddr = (void *)(uintptr_t)uaddr;
 	if (klen)
 		*klen = ulen;
 
 	return (0);
 }
 #endif
 
 int
 copyout(const void *kaddr, void *udaddr, size_t len)
 {
 	struct		thread *td;
 	pmap_t		pm;
-	faultbuf	env;
+	jmp_buf		env;
 	const char	*kp;
 	char		*up, *p;
 	size_t		l;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (EFAULT);
 	}
 
 	kp = kaddr;
 	up = udaddr;
 
 	while (len > 0) {
 		if (map_user_ptr(pm, udaddr, (void **)&p, len, &l)) {
 			td->td_pcb->pcb_onfault = NULL;
 			return (EFAULT);
 		}
 
 		bcopy(kp, p, l);
 
 		up += l;
 		kp += l;
 		len -= l;
 	}
 
 	td->td_pcb->pcb_onfault = NULL;
 	return (0);
 }
 
 int
 copyin(const void *udaddr, void *kaddr, size_t len)
 {
 	struct		thread *td;
 	pmap_t		pm;
-	faultbuf	env;
+	jmp_buf		env;
 	const char	*up;
 	char		*kp, *p;
 	size_t		l;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (EFAULT);
 	}
 
 	kp = kaddr;
 	up = udaddr;
 
 	while (len > 0) {
 		if (map_user_ptr(pm, udaddr, (void **)&p, len, &l)) {
 			td->td_pcb->pcb_onfault = NULL;
 			return (EFAULT);
 		}
 
 		bcopy(p, kp, l);
 
 		up += l;
 		kp += l;
 		len -= l;
 	}
 
 	td->td_pcb->pcb_onfault = NULL;
 	return (0);
 }
 
 int
 copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done)
 {
 	const char	*up;
 	char		*kp;
 	size_t		l;
 	int		rv, c;
 
 	kp = kaddr;
 	up = udaddr;
 
 	rv = ENAMETOOLONG;
 
 	for (l = 0; len-- > 0; l++) {
 		if ((c = fubyte(up++)) < 0) {
 			rv = EFAULT;
 			break;
 		}
 
 		if (!(*kp++ = c)) {
 			l++;
 			rv = 0;
 			break;
 		}
 	}
 
 	if (done != NULL) {
 		*done = l;
 	}
 
 	return (rv);
 }
 
 int
 subyte(volatile void *addr, int byte)
 {
 	struct		thread *td;
 	pmap_t		pm;
-	faultbuf	env;
+	jmp_buf		env;
 	char		*p;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	*p = (char)byte;
 
 	td->td_pcb->pcb_onfault = NULL;
 	return (0);
 }
 
 #ifdef __powerpc64__
 int
 suword32(volatile void *addr, int word)
 {
 	struct		thread *td;
 	pmap_t		pm;
-	faultbuf	env;
+	jmp_buf		env;
 	int		*p;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	*p = word;
 
 	td->td_pcb->pcb_onfault = NULL;
 	return (0);
 }
 #endif
 
 int
 suword(volatile void *addr, long word)
 {
 	struct		thread *td;
 	pmap_t		pm;
-	faultbuf	env;
+	jmp_buf		env;
 	long		*p;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	*p = word;
 
 	td->td_pcb->pcb_onfault = NULL;
 	return (0);
 }
 
 #ifdef __powerpc64__
 int
 suword64(volatile void *addr, int64_t word)
 {
 	return (suword(addr, (long)word));
 }
 #else
 int
 suword32(volatile void *addr, int32_t word)
 {
 	return (suword(addr, (long)word));
 }
 #endif
 
 int
 fubyte(volatile const void *addr)
 {
 	struct		thread *td;
 	pmap_t		pm;
-	faultbuf	env;
+	jmp_buf		env;
 	u_char		*p;
 	int		val;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	val = *p;
 
 	td->td_pcb->pcb_onfault = NULL;
 	return (val);
 }
 
 int
 fuword16(volatile const void *addr)
 {
 	struct		thread *td;
 	pmap_t		pm;
-	faultbuf	env;
+	jmp_buf		env;
 	uint16_t	*p, val;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	val = *p;
 
 	td->td_pcb->pcb_onfault = NULL;
 	return (val);
 }
 
 int
 fueword32(volatile const void *addr, int32_t *val)
 {
 	struct		thread *td;
 	pmap_t		pm;
-	faultbuf	env;
+	jmp_buf		env;
 	int32_t		*p;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	*val = *p;
 
 	td->td_pcb->pcb_onfault = NULL;
 	return (0);
 }
 
 #ifdef __powerpc64__
 int
 fueword64(volatile const void *addr, int64_t *val)
 {
 	struct		thread *td;
 	pmap_t		pm;
-	faultbuf	env;
+	jmp_buf		env;
 	int64_t		*p;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	*val = *p;
 
 	td->td_pcb->pcb_onfault = NULL;
 	return (0);
 }
 #endif
 
 int
 fueword(volatile const void *addr, long *val)
 {
 	struct		thread *td;
 	pmap_t		pm;
-	faultbuf	env;
+	jmp_buf		env;
 	long		*p;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	if (map_user_ptr(pm, addr, (void **)&p, sizeof(*p), NULL)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	*val = *p;
 
 	td->td_pcb->pcb_onfault = NULL;
 	return (0);
 }
 
 int
 casueword32(volatile uint32_t *addr, uint32_t old, uint32_t *oldvalp,
     uint32_t new)
 {
 	struct thread *td;
 	pmap_t pm;
-	faultbuf env;
+	jmp_buf		env;
 	uint32_t *p, val;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	if (map_user_ptr(pm, (void *)(uintptr_t)addr, (void **)&p, sizeof(*p),
 	    NULL)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	__asm __volatile (
 		"1:\tlwarx %0, 0, %2\n\t"	/* load old value */
 		"cmplw %3, %0\n\t"		/* compare */
 		"bne 2f\n\t"			/* exit if not equal */
 		"stwcx. %4, 0, %2\n\t"      	/* attempt to store */
 		"bne- 1b\n\t"			/* spin if failed */
 		"b 3f\n\t"			/* we've succeeded */
 		"2:\n\t"
 		"stwcx. %0, 0, %2\n\t"       	/* clear reservation (74xx) */
 		"3:\n\t"
 		: "=&r" (val), "=m" (*p)
 		: "r" (p), "r" (old), "r" (new), "m" (*p)
 		: "cr0", "memory");
 
 	td->td_pcb->pcb_onfault = NULL;
 
 	*oldvalp = val;
 	return (0);
 }
 
 #ifndef __powerpc64__
 int
 casueword(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new)
 {
 
 	return (casueword32((volatile uint32_t *)addr, old,
 	    (uint32_t *)oldvalp, new));
 }
 #else
 int
 casueword(volatile u_long *addr, u_long old, u_long *oldvalp, u_long new)
 {
 	struct thread *td;
 	pmap_t pm;
-	faultbuf env;
+	jmp_buf		env;
 	u_long *p, val;
 
 	td = curthread;
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 
-	if (setfault(env)) {
+	td->td_pcb->pcb_onfault = &env;
+	if (setjmp(env)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	if (map_user_ptr(pm, (void *)(uintptr_t)addr, (void **)&p, sizeof(*p),
 	    NULL)) {
 		td->td_pcb->pcb_onfault = NULL;
 		return (-1);
 	}
 
 	__asm __volatile (
 		"1:\tldarx %0, 0, %2\n\t"	/* load old value */
 		"cmpld %3, %0\n\t"		/* compare */
 		"bne 2f\n\t"			/* exit if not equal */
 		"stdcx. %4, 0, %2\n\t"      	/* attempt to store */
 		"bne- 1b\n\t"			/* spin if failed */
 		"b 3f\n\t"			/* we've succeeded */
 		"2:\n\t"
 		"stdcx. %0, 0, %2\n\t"       	/* clear reservation (74xx) */
 		"3:\n\t"
 		: "=&r" (val), "=m" (*p)
 		: "r" (p), "r" (old), "r" (new), "m" (*p)
 		: "cr0", "memory");
 
 	td->td_pcb->pcb_onfault = NULL;
 
 	*oldvalp = val;
 	return (0);
 }
 #endif
Index: projects/clang380-import/sys/powerpc/powerpc/elf32_machdep.c
===================================================================
--- projects/clang380-import/sys/powerpc/powerpc/elf32_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/powerpc/powerpc/elf32_machdep.c	(revision 293687)
@@ -1,320 +1,321 @@
 /*-
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 
 #define __ELF_WORD_SIZE 32
 
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/namei.h>
 #include <sys/fcntl.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/linker.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 
 #include <machine/altivec.h>
 #include <machine/cpu.h>
 #include <machine/elf.h>
 #include <machine/reg.h>
 #include <machine/md_var.h>
 
 #ifdef __powerpc64__
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_util.h>
 
 extern const char *freebsd32_syscallnames[];
 #endif
 
 struct sysentvec elf32_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 #ifdef __powerpc64__
 	.sv_table	= freebsd32_sysent,
 #else
 	.sv_table	= sysent,
 #endif
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode32,
 	.sv_szsigcode	= &szsigcode32,
 	.sv_name	= "FreeBSD ELF32",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_stackprot	= VM_PROT_ALL,
 #ifdef __powerpc64__
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= FREEBSD32_USRSTACK,
 	.sv_psstrings	= FREEBSD32_PS_STRINGS,
 	.sv_copyout_strings = freebsd32_copyout_strings,
 	.sv_setregs	= ppc32_setregs,
 	.sv_syscallnames = freebsd32_syscallnames,
 #else
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_syscallnames = syscallnames,
 #endif
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_ILP32 | SV_SHP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_shared_page_base = FREEBSD32_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
 static Elf32_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 #ifdef __powerpc64__
 	.interp_newpath	= "/libexec/ld-elf32.so.1",
 #else
 	.interp_newpath	= NULL,
 #endif
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t) elf32_insert_brand_entry,
     &freebsd_brand_info);
 
 static Elf32_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf32_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf32_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oelf32, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf32_insert_brand_entry,
 	&freebsd_brand_oinfo);
 
 void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase);
 
 void
 elf32_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 	size_t len;
 	struct pcb *pcb;
 
 	len = 0;
 	pcb = td->td_pcb;
 	if (pcb->pcb_flags & PCB_VEC) {
 		save_vec_nodrop(td);
 		if (dst != NULL) {
 			len += elf32_populate_note(NT_PPC_VMX,
 			    &pcb->pcb_vec, dst,
 			    sizeof(pcb->pcb_vec), NULL);
 		} else
 			len += elf32_populate_note(NT_PPC_VMX, NULL, NULL,
 			    sizeof(pcb->pcb_vec), NULL);
 	}
 	*off = len;
 }
 
 #ifndef __powerpc64__
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf_Addr *where;
 	Elf_Half *hwhere;
 	Elf_Addr addr;
 	Elf_Addr addend;
 	Elf_Word rtype, symidx;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		panic("PPC only supports RELA relocations");
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) ((uintptr_t)relocbase + rela->r_offset);
 		hwhere = (Elf_Half *) ((uintptr_t)relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("elf_reloc: unknown relocation mode %d\n", type);
 	}
 
 	switch (rtype) {
 
 	case R_PPC_NONE:
 		break;
 
 	case R_PPC_ADDR32: /* word32 S + A */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return -1;
 		*where = elf_relocaddr(lf, addr + addend);
 			break;
 
 	case R_PPC_ADDR16_LO: /* #lo(S) */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return -1;
 		/*
 		 * addend values are sometimes relative to sections
 		 * (i.e. .rodata) in rela, where in reality they
 		 * are relative to relocbase. Detect this condition.
 		 */
 		if (addr > relocbase && addr <= (relocbase + addend))
 			addr = relocbase;
 		addr = elf_relocaddr(lf, addr + addend);
 		*hwhere = addr & 0xffff;
 		break;
 
 	case R_PPC_ADDR16_HA: /* #ha(S) */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return -1;
 		/*
 		 * addend values are sometimes relative to sections
 		 * (i.e. .rodata) in rela, where in reality they
 		 * are relative to relocbase. Detect this condition.
 		 */
 		if (addr > relocbase && addr <= (relocbase + addend))
 			addr = relocbase;
 		addr = elf_relocaddr(lf, addr + addend);
 		*hwhere = ((addr >> 16) + ((addr & 0x8000) ? 1 : 0))
 		    & 0xffff;
 		break;
 
 	case R_PPC_RELATIVE: /* word32 B + A */
 		*where = elf_relocaddr(lf, relocbase + addend);
 		break;
 
 	default:
 		printf("kldload: unexpected relocation type %d\n",
 		    (int) rtype);
 		return -1;
 	}
 	return(0);
 }
 
 void
 elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase)
 {
 	Elf_Rela *rela = 0, *relalim;
 	Elf_Addr relasz = 0;
 	Elf_Addr *where;
 
 	/*
 	 * Extract the rela/relasz values from the dynamic section
 	 */
 	for (; dynp->d_tag != DT_NULL; dynp++) {
 		switch (dynp->d_tag) {
 		case DT_RELA:
 			rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr);
 			break;
 		case DT_RELASZ:
 			relasz = dynp->d_un.d_val;
 			break;
 		}
 	}
 
 	/*
 	 * Relocate these values
 	 */
 	relalim = (Elf_Rela *)((caddr_t)rela + relasz);
 	for (; rela < relalim; rela++) {
 		if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE)
 			continue;
 		where = (Elf_Addr *)(relocbase + rela->r_offset);
 		*where = (Elf_Addr)(relocbase + rela->r_addend);
 	}
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf)
 {
 	/* Only sync the cache for non-kernel modules */
 	if (lf->id != 1)
 		__syncicache(lf->address, lf->size);
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
 #endif
Index: projects/clang380-import/sys/powerpc/powerpc/elf64_machdep.c
===================================================================
--- projects/clang380-import/sys/powerpc/powerpc/elf64_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/powerpc/powerpc/elf64_machdep.c	(revision 293687)
@@ -1,374 +1,375 @@
 /*-
  * Copyright 1996-1998 John D. Polstra.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/namei.h>
 #include <sys/fcntl.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/linker.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 
 #include <machine/altivec.h>
 #include <machine/cpu.h>
 #include <machine/elf.h>
 #include <machine/md_var.h>
 
 static void exec_setregs_funcdesc(struct thread *td, struct image_params *imgp,
     u_long stack);
 
 struct sysentvec elf64_freebsd_sysvec_v1 = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode64,
 	.sv_szsigcode	= &szsigcode64,
 	.sv_name	= "FreeBSD ELF64",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs_funcdesc,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_LP64 | SV_SHP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 INIT_SYSENTVEC(elf64_sysvec_v1, &elf64_freebsd_sysvec_v1);
 
 struct sysentvec elf64_freebsd_sysvec_v2 = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode64_elfv2,
 	.sv_szsigcode	= &szsigcode64_elfv2,
 	.sv_name	= "FreeBSD ELF64 V2",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_LP64 | SV_SHP,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_shared_page_base = SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 };
 INIT_SYSENTVEC(elf64_sysvec_v2, &elf64_freebsd_sysvec_v2);
 
 static boolean_t ppc64_elfv1_header_match(struct image_params *params);
 static boolean_t ppc64_elfv2_header_match(struct image_params *params);
 
 static Elf64_Brandinfo freebsd_brand_info_elfv1 = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec_v1,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 	.header_supported = &ppc64_elfv1_header_match
 };
 
 SYSINIT(elf64v1, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t) elf64_insert_brand_entry,
     &freebsd_brand_info_elfv1);
 
 static Elf64_Brandinfo freebsd_brand_info_elfv2 = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec_v2,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 	.header_supported = &ppc64_elfv2_header_match
 };
 
 SYSINIT(elf64v2, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t) elf64_insert_brand_entry,
     &freebsd_brand_info_elfv2);
 
 static Elf64_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_PPC64,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec_v1,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
 	.header_supported = &ppc64_elfv1_header_match
 };
 
 SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
 	(sysinit_cfunc_t) elf64_insert_brand_entry,
 	&freebsd_brand_oinfo);
 
 void elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase);
 
 static boolean_t
 ppc64_elfv1_header_match(struct image_params *params)
 {
 	const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header;
 	int abi = (hdr->e_flags & 3);
 
 	return (abi == 0 || abi == 1);
 }
 
 static boolean_t
 ppc64_elfv2_header_match(struct image_params *params)
 {
 	const Elf64_Ehdr *hdr = (const Elf64_Ehdr *)params->image_header;
 	int abi = (hdr->e_flags & 3);
 
 	return (abi == 2);
 }
 
 static void  
 exec_setregs_funcdesc(struct thread *td, struct image_params *imgp,
     u_long stack)
 {
 	struct trapframe *tf;
 	register_t entry_desc[3];
 
 	tf = trapframe(td);
 	exec_setregs(td, imgp, stack);
 
 	/*
 	 * For 64-bit ELFv1, we need to disentangle the function
 	 * descriptor
 	 *
 	 * 0. entry point
 	 * 1. TOC value (r2)
 	 * 2. Environment pointer (r11)
 	 */
 
 	(void)copyin((void *)imgp->entry_addr, entry_desc,
 	    sizeof(entry_desc));
 	tf->srr0 = entry_desc[0] + imgp->reloc_base;
 	tf->fixreg[2] = entry_desc[1] + imgp->reloc_base;
 	tf->fixreg[11] = entry_desc[2] + imgp->reloc_base;
 }
 
 void
 elf64_dump_thread(struct thread *td, void *dst, size_t *off)
 {
 	size_t len;
 	struct pcb *pcb;
 
 	len = 0;
 	pcb = td->td_pcb;
 	if (pcb->pcb_flags & PCB_VEC) {
 		save_vec_nodrop(td);
 		if (dst != NULL) {
 			len += elf64_populate_note(NT_PPC_VMX,
 			    &pcb->pcb_vec, dst,
 			    sizeof(pcb->pcb_vec), NULL);
 		} else
 			len += elf64_populate_note(NT_PPC_VMX, NULL, NULL,
 			    sizeof(pcb->pcb_vec), NULL);
 	}
 	*off = len;
 }
 
 
 /* Process one elf relocation with addend. */
 static int
 elf_reloc_internal(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, int local, elf_lookup_fn lookup)
 {
 	Elf_Addr *where;
 	Elf_Addr addr;
 	Elf_Addr addend;
 	Elf_Word rtype, symidx;
 	const Elf_Rela *rela;
 	int error;
 
 	switch (type) {
 	case ELF_RELOC_REL:
 		panic("PPC only supports RELA relocations");
 		break;
 	case ELF_RELOC_RELA:
 		rela = (const Elf_Rela *)data;
 		where = (Elf_Addr *) (relocbase + rela->r_offset);
 		addend = rela->r_addend;
 		rtype = ELF_R_TYPE(rela->r_info);
 		symidx = ELF_R_SYM(rela->r_info);
 		break;
 	default:
 		panic("elf_reloc: unknown relocation mode %d\n", type);
 	}
 
 	switch (rtype) {
 
 	case R_PPC_NONE:
 		break;
 
 	case R_PPC64_ADDR64:	/* doubleword64 S + A */
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return -1;
 		addr += addend;
 		*where = addr;
 		break;
 
 	case R_PPC_RELATIVE:	/* doubleword64 B + A */
 		*where = elf_relocaddr(lf, relocbase + addend);
 		break;
 
 	case R_PPC_JMP_SLOT:	/* function descriptor copy */
 		lookup(lf, symidx, 1, &addr);
 #if !defined(_CALL_ELF) || _CALL_ELF == 1
 		memcpy(where, (Elf_Addr *)addr, 3*sizeof(Elf_Addr));
 #else
 		memcpy(where, (Elf_Addr *)addr, sizeof(Elf_Addr));
 #endif
 		__asm __volatile("dcbst 0,%0; sync" :: "r"(where) : "memory");
 		break;
 
 	default:
 		printf("kldload: unexpected relocation type %d\n",
 		    (int) rtype);
 		return -1;
 	}
 	return(0);
 }
 
 void
 elf_reloc_self(Elf_Dyn *dynp, Elf_Addr relocbase)
 {
 	Elf_Rela *rela = 0, *relalim;
 	Elf_Addr relasz = 0;
 	Elf_Addr *where;
 
 	/*
 	 * Extract the rela/relasz values from the dynamic section
 	 */
 	for (; dynp->d_tag != DT_NULL; dynp++) {
 		switch (dynp->d_tag) {
 		case DT_RELA:
 			rela = (Elf_Rela *)(relocbase+dynp->d_un.d_ptr);
 			break;
 		case DT_RELASZ:
 			relasz = dynp->d_un.d_val;
 			break;
 		}
 	}
 
 	/*
 	 * Relocate these values
 	 */
 	relalim = (Elf_Rela *)((caddr_t)rela + relasz);
 	for (; rela < relalim; rela++) {
 		if (ELF_R_TYPE(rela->r_info) != R_PPC_RELATIVE)
 			continue;
 		where = (Elf_Addr *)(relocbase + rela->r_offset);
 		*where = (Elf_Addr)(relocbase + rela->r_addend);
 	}
 }
 
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 0, lookup));
 }
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup)
 {
 
 	return (elf_reloc_internal(lf, relocbase, data, type, 1, lookup));
 }
 
 int
 elf_cpu_load_file(linker_file_t lf)
 {
 	/* Only sync the cache for non-kernel modules */
 	if (lf->id != 1)
 		__syncicache(lf->address, lf->size);
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
Index: projects/clang380-import/sys/powerpc/powerpc/mp_machdep.c
===================================================================
--- projects/clang380-import/sys/powerpc/powerpc/mp_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/powerpc/powerpc/mp_machdep.c	(revision 293687)
@@ -1,375 +1,374 @@
 /*-
  * Copyright (c) 2008 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/bus.h>
 #include <sys/cpuset.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/intr_machdep.h>
 #include <machine/pcb.h>
 #include <machine/platform.h>
 #include <machine/md_var.h>
 #include <machine/setjmp.h>
 #include <machine/smp.h>
 
 #include "pic_if.h"
 
 extern struct pcpu __pcpu[MAXCPU];
 
 volatile static int ap_awake;
 volatile static u_int ap_letgo;
 volatile static u_quad_t ap_timebase;
 static u_int ipi_msg_cnt[32];
 static struct mtx ap_boot_mtx;
 struct pcb stoppcbs[MAXCPU];
-int longfault(faultbuf, int);
 
 void
 machdep_ap_bootstrap(void)
 {
 
 	/* Set PIR */
 	PCPU_SET(pir, mfspr(SPR_PIR));
 	PCPU_SET(awake, 1);
 	__asm __volatile("msync; isync");
 
 	while (ap_letgo == 0)
 		;
 
 	/* Initialize DEC and TB, sync with the BSP values */
 #ifdef __powerpc64__
 	/* Writing to the time base register is hypervisor-privileged */
 	if (mfmsr() & PSL_HV)
 		mttb(ap_timebase);
 #else
 	mttb(ap_timebase);
 #endif
 	decr_ap_init();
 
 	/* Give platform code a chance to do anything necessary */
 	platform_smp_ap_init();
 
 	/* Serialize console output and AP count increment */
 	mtx_lock_spin(&ap_boot_mtx);
 	ap_awake++;
 	printf("SMP: AP CPU #%d launched\n", PCPU_GET(cpuid));
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 	/* Announce ourselves awake, and enter the scheduler */
 	sched_throw(NULL);
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 	struct cpuref cpuref;
 	int error;
 
 	mp_ncpus = 0;
 	error = platform_smp_first_cpu(&cpuref);
 	while (!error) {
 		mp_ncpus++;
 		error = platform_smp_next_cpu(&cpuref);
 	}
 	/* Sanity. */
 	if (mp_ncpus == 0)
 		mp_ncpus = 1;
 
 	/*
 	 * Set the largest cpuid we're going to use. This is necessary
 	 * for VM initialization.
 	 */
 	mp_maxid = min(mp_ncpus, MAXCPU) - 1;
 }
 
 int
 cpu_mp_probe(void)
 {
 
 	/*
 	 * We're not going to enable SMP if there's only 1 processor.
 	 */
 	return (mp_ncpus > 1);
 }
 
 void
 cpu_mp_start(void)
 {
 	struct cpuref bsp, cpu;
 	struct pcpu *pc;
 	int error;
 
 	error = platform_smp_get_bsp(&bsp);
 	KASSERT(error == 0, ("Don't know BSP"));
 	KASSERT(bsp.cr_cpuid == 0, ("%s: cpuid != 0", __func__));
 
 	error = platform_smp_first_cpu(&cpu);
 	while (!error) {
 		if (cpu.cr_cpuid >= MAXCPU) {
 			printf("SMP: cpu%d: skipped -- ID out of range\n",
 			    cpu.cr_cpuid);
 			goto next;
 		}
 		if (CPU_ISSET(cpu.cr_cpuid, &all_cpus)) {
 			printf("SMP: cpu%d: skipped - duplicate ID\n",
 			    cpu.cr_cpuid);
 			goto next;
 		}
 		if (cpu.cr_cpuid != bsp.cr_cpuid) {
 			void *dpcpu;
 
 			pc = &__pcpu[cpu.cr_cpuid];
 			dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
 			    M_WAITOK | M_ZERO);
 			pcpu_init(pc, cpu.cr_cpuid, sizeof(*pc));
 			dpcpu_init(dpcpu, cpu.cr_cpuid);
 		} else {
 			pc = pcpup;
 			pc->pc_cpuid = bsp.cr_cpuid;
 			pc->pc_bsp = 1;
 		}
 		pc->pc_hwref = cpu.cr_hwref;
 		CPU_SET(pc->pc_cpuid, &all_cpus);
 next:
 		error = platform_smp_next_cpu(&cpu);
 	}
 }
 
 void
 cpu_mp_announce(void)
 {
 	struct pcpu *pc;
 	int i;
 
 	for (i = 0; i <= mp_maxid; i++) {
 		pc = pcpu_find(i);
 		if (pc == NULL)
 			continue;
 		printf("cpu%d: dev=%x", i, (int)pc->pc_hwref);
 		if (pc->pc_bsp)
 			printf(" (BSP)");
 		printf("\n");
 	}
 }
 
 static void
 cpu_mp_unleash(void *dummy)
 {
 	struct pcpu *pc;
 	int cpus, timeout;
 
 	if (mp_ncpus <= 1)
 		return;
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	cpus = 0;
 	smp_cpus = 0;
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		cpus++;
 		if (!pc->pc_bsp) {
 			if (bootverbose)
 				printf("Waking up CPU %d (dev=%x)\n",
 				    pc->pc_cpuid, (int)pc->pc_hwref);
 
 			platform_smp_start_cpu(pc);
 			
 			timeout = 2000;	/* wait 2sec for the AP */
 			while (!pc->pc_awake && --timeout > 0)
 				DELAY(1000);
 
 		} else {
 			PCPU_SET(pir, mfspr(SPR_PIR));
 			pc->pc_awake = 1;
 		}
 		if (pc->pc_awake) {
 			if (bootverbose)
 				printf("Adding CPU %d, pir=%x, awake=%x\n",
 				    pc->pc_cpuid, pc->pc_pir, pc->pc_awake);
 			smp_cpus++;
 		} else
 			CPU_SET(pc->pc_cpuid, &stopped_cpus);
 	}
 
 	ap_awake = 1;
 
 	/* Provide our current DEC and TB values for APs */
 	ap_timebase = mftb() + 10;
 	__asm __volatile("msync; isync");
 	
 	/* Let APs continue */
 	atomic_store_rel_int(&ap_letgo, 1);
 
 #ifdef __powerpc64__
 	/* Writing to the time base register is hypervisor-privileged */
 	if (mfmsr() & PSL_HV)
 		mttb(ap_timebase);
 #else
 	mttb(ap_timebase);
 #endif
 
 	while (ap_awake < smp_cpus)
 		;
 
 	if (smp_cpus != cpus || cpus != mp_ncpus) {
 		printf("SMP: %d CPUs found; %d CPUs usable; %d CPUs woken\n",
 		    mp_ncpus, cpus, smp_cpus);
 	}
 
 	/* Let the APs get into the scheduler */
 	DELAY(10000);
 
 	/* XXX Atomic set operation? */
 	smp_started = 1;
 }
 
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL);
 
 int
 powerpc_ipi_handler(void *arg)
 {
 	u_int cpuid;
 	uint32_t ipimask;
 	int msg;
 
 	CTR2(KTR_SMP, "%s: MSR 0x%08x", __func__, mfmsr());
 
 	ipimask = atomic_readandclear_32(&(pcpup->pc_ipimask));
 	if (ipimask == 0)
 		return (FILTER_STRAY);
 	while ((msg = ffs(ipimask) - 1) != -1) {
 		ipimask &= ~(1u << msg);
 		ipi_msg_cnt[msg]++;
 		switch (msg) {
 		case IPI_AST:
 			CTR1(KTR_SMP, "%s: IPI_AST", __func__);
 			break;
 		case IPI_PREEMPT:
 			CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
 			sched_preempt(curthread);
 			break;
 		case IPI_RENDEZVOUS:
 			CTR1(KTR_SMP, "%s: IPI_RENDEZVOUS", __func__);
 			smp_rendezvous_action();
 			break;
 		case IPI_STOP:
 
 			/*
 			 * IPI_STOP_HARD is mapped to IPI_STOP so it is not
 			 * necessary to add such case in the switch.
 			 */
 			CTR1(KTR_SMP, "%s: IPI_STOP or IPI_STOP_HARD (stop)",
 			    __func__);
 			cpuid = PCPU_GET(cpuid);
 			savectx(&stoppcbs[cpuid]);
 			savectx(PCPU_GET(curpcb));
 			CPU_SET_ATOMIC(cpuid, &stopped_cpus);
 			while (!CPU_ISSET(cpuid, &started_cpus))
 				cpu_spinwait();
 			CPU_CLR_ATOMIC(cpuid, &stopped_cpus);
 			CPU_CLR_ATOMIC(cpuid, &started_cpus);
 			CTR1(KTR_SMP, "%s: IPI_STOP (restart)", __func__);
 			break;
 		case IPI_HARDCLOCK:
 			CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
 			hardclockintr();
 			break;
 		}
 	}
 
 	return (FILTER_HANDLED);
 }
 
 static void
 ipi_send(struct pcpu *pc, int ipi)
 {
 
 	CTR4(KTR_SMP, "%s: pc=%p, targetcpu=%d, IPI=%d", __func__,
 	    pc, pc->pc_cpuid, ipi);
 
 	atomic_set_32(&pc->pc_ipimask, (1 << ipi));
 	powerpc_sync();
 	PIC_IPI(root_pic, pc->pc_cpuid);
 
 	CTR1(KTR_SMP, "%s: sent", __func__);
 }
 
 /* Send an IPI to a set of cpus. */
 void
 ipi_selected(cpuset_t cpus, int ipi)
 {
 	struct pcpu *pc;
 
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		if (CPU_ISSET(pc->pc_cpuid, &cpus))
 			ipi_send(pc, ipi);
 	}
 }
 
 /* Send an IPI to a specific CPU. */
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 
 	ipi_send(cpuid_to_pcpu[cpu], ipi);
 }
 
 /* Send an IPI to all CPUs EXCEPT myself. */
 void
 ipi_all_but_self(int ipi)
 {
 	struct pcpu *pc;
 
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		if (pc != pcpup)
 			ipi_send(pc, ipi);
 	}
 }
Index: projects/clang380-import/sys/powerpc/powerpc/setjmp.S
===================================================================
--- projects/clang380-import/sys/powerpc/powerpc/setjmp.S	(revision 293686)
+++ projects/clang380-import/sys/powerpc/powerpc/setjmp.S	(revision 293687)
@@ -1,115 +1,114 @@
 /*	$FreeBSD$  */
 /*	from:	NetBSD: setjmp.S,v 1.1 1998/01/27 15:13:12 sakamoto Exp $  */
 /*	from:	OpenBSD: setjmp.S,v 1.2 1996/12/28 06:22:18 rahnds Exp 	*/
 /* kernel version of this file, does not have signal goop */
 /* int setjmp(jmp_buf env) */
 
 #include <machine/asm.h>
 
 #ifdef __powerpc64__
 #define LD_REG	ld
 #define	ST_REG	std
 #define	REGWIDTH 8
 #else
 #define	LD_REG	lwz
 #define	ST_REG	stw
 #define	REGWIDTH 4
 #endif
 
 #define JMP_r1	1*REGWIDTH
 #define JMP_r2	2*REGWIDTH
 #define JMP_r14	3*REGWIDTH
 #define JMP_r15 4*REGWIDTH
 #define JMP_r16 5*REGWIDTH
 #define JMP_r17 6*REGWIDTH
 #define JMP_r18 7*REGWIDTH
 #define JMP_r19 8*REGWIDTH
 #define JMP_r20 9*REGWIDTH
 #define JMP_r21 10*REGWIDTH
 #define JMP_r22 11*REGWIDTH
 #define JMP_r23 12*REGWIDTH
 #define JMP_r24 13*REGWIDTH
 #define JMP_r25 14*REGWIDTH
 #define JMP_r26 15*REGWIDTH
 #define JMP_r27 16*REGWIDTH
 #define JMP_r28 17*REGWIDTH
 #define JMP_r29 18*REGWIDTH
 #define JMP_r30 19*REGWIDTH
 #define JMP_r31 20*REGWIDTH
 #define JMP_lr 	21*REGWIDTH
 #define JMP_cr	22*REGWIDTH
 #define JMP_ctr	23*REGWIDTH
 #define JMP_xer	24*REGWIDTH
-#define JMP_sig	25*REGWIDTH
 
 ASENTRY_NOPROF(setjmp)
 	ST_REG 31, JMP_r31(3)
 	/* r1, r2, r14-r30 */
 	ST_REG 1,  JMP_r1 (3)
 	ST_REG 2,  JMP_r2 (3)
 	ST_REG 14, JMP_r14(3)
 	ST_REG 15, JMP_r15(3)
 	ST_REG 16, JMP_r16(3)
 	ST_REG 17, JMP_r17(3)
 	ST_REG 18, JMP_r18(3)
 	ST_REG 19, JMP_r19(3)
 	ST_REG 20, JMP_r20(3)
 	ST_REG 21, JMP_r21(3)
 	ST_REG 22, JMP_r22(3)
 	ST_REG 23, JMP_r23(3)
 	ST_REG 24, JMP_r24(3)
 	ST_REG 25, JMP_r25(3)
 	ST_REG 26, JMP_r26(3)
 	ST_REG 27, JMP_r27(3)
 	ST_REG 28, JMP_r28(3)
 	ST_REG 29, JMP_r29(3)
 	ST_REG 30, JMP_r30(3)
 	/* cr, lr, ctr, xer */
 	mfcr 0
 	ST_REG 0, JMP_cr(3)
 	mflr 0
 	ST_REG 0, JMP_lr(3)
 	mfctr 0
 	ST_REG 0, JMP_ctr(3)
 	mfxer 0
 	ST_REG 0, JMP_xer(3)
 	/* f14-f31, fpscr */
 	li 3, 0
 	blr
 
 
 .extern sigsetmask
 ASENTRY_NOPROF(longjmp)
 	LD_REG 31, JMP_r31(3)
 	/* r1, r2, r14-r30 */
 	LD_REG 1,  JMP_r1 (3)
 	LD_REG 2,  JMP_r2 (3)
 	LD_REG 14, JMP_r14(3)
 	LD_REG 15, JMP_r15(3)
 	LD_REG 16, JMP_r16(3)
 	LD_REG 17, JMP_r17(3)
 	LD_REG 18, JMP_r18(3)
 	LD_REG 19, JMP_r19(3)
 	LD_REG 20, JMP_r20(3)
 	LD_REG 21, JMP_r21(3)
 	LD_REG 22, JMP_r22(3)
 	LD_REG 23, JMP_r23(3)
 	LD_REG 24, JMP_r24(3)
 	LD_REG 25, JMP_r25(3)
 	LD_REG 26, JMP_r26(3)
 	LD_REG 27, JMP_r27(3)
 	LD_REG 28, JMP_r28(3)
 	LD_REG 29, JMP_r29(3)
 	LD_REG 30, JMP_r30(3)
 	/* cr, lr, ctr, xer */
 	LD_REG 0, JMP_cr(3)
 	mtcr 0
 	LD_REG 0, JMP_lr(3)
 	mtlr 0
 	LD_REG 0, JMP_ctr(3)
 	mtctr 0
 	LD_REG 0, JMP_xer(3)
 	mtxer 0
 	/* f14-f31, fpscr */
 	mr 3, 4
 	blr
Index: projects/clang380-import/sys/powerpc/powerpc/trap.c
===================================================================
--- projects/clang380-import/sys/powerpc/powerpc/trap.c	(revision 293686)
+++ projects/clang380-import/sys/powerpc/powerpc/trap.c	(revision 293687)
@@ -1,825 +1,827 @@
 /*-
  * Copyright (C) 1995, 1996 Wolfgang Solfrank.
  * Copyright (C) 1995, 1996 TooLs GmbH.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $NetBSD: trap.c,v 1.58 2002/03/04 04:07:35 dbj Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/proc.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/uio.h>
 #include <sys/signalvar.h>
 #include <sys/vmmeter.h>
 
 #include <security/audit/audit.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 
 #include <machine/_inttypes.h>
 #include <machine/altivec.h>
 #include <machine/cpu.h>
 #include <machine/db_machdep.h>
 #include <machine/fpu.h>
 #include <machine/frame.h>
 #include <machine/pcb.h>
 #include <machine/pmap.h>
 #include <machine/psl.h>
 #include <machine/trap.h>
 #include <machine/spr.h>
 #include <machine/sr.h>
 
-#define	FAULTBUF_LR	0
+/* Below matches setjmp.S */
+#define	FAULTBUF_LR	21
 #define	FAULTBUF_R1	1
 #define	FAULTBUF_R2	2
-#define	FAULTBUF_CR	3
-#define	FAULTBUF_R13	4
+#define	FAULTBUF_CR	22
+#define	FAULTBUF_R14	3
 
 static void	trap_fatal(struct trapframe *frame);
 static void	printtrap(u_int vector, struct trapframe *frame, int isfatal,
 		    int user);
 static int	trap_pfault(struct trapframe *frame, int user);
 static int	fix_unaligned(struct thread *td, struct trapframe *frame);
 static int	handle_onfault(struct trapframe *frame);
 static void	syscall(struct trapframe *frame);
 
 #ifdef __powerpc64__
        void	handle_kernel_slb_spill(int, register_t, register_t);
 static int	handle_user_slb_spill(pmap_t pm, vm_offset_t addr);
 extern int	n_slbs;
 #endif
 
 struct powerpc_exception {
 	u_int	vector;
 	char	*name;
 };
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 
 int (*dtrace_invop_jump_addr)(struct trapframe *);
 #endif
 
 static struct powerpc_exception powerpc_exceptions[] = {
 	{ EXC_CRIT,	"critical input" },
 	{ EXC_RST,	"system reset" },
 	{ EXC_MCHK,	"machine check" },
 	{ EXC_DSI,	"data storage interrupt" },
 	{ EXC_DSE,	"data segment exception" },
 	{ EXC_ISI,	"instruction storage interrupt" },
 	{ EXC_ISE,	"instruction segment exception" },
 	{ EXC_EXI,	"external interrupt" },
 	{ EXC_ALI,	"alignment" },
 	{ EXC_PGM,	"program" },
 	{ EXC_FPU,	"floating-point unavailable" },
 	{ EXC_APU,	"auxiliary proc unavailable" },
 	{ EXC_DECR,	"decrementer" },
 	{ EXC_FIT,	"fixed-interval timer" },
 	{ EXC_WDOG,	"watchdog timer" },
 	{ EXC_SC,	"system call" },
 	{ EXC_TRC,	"trace" },
 	{ EXC_FPA,	"floating-point assist" },
 	{ EXC_DEBUG,	"debug" },
 	{ EXC_PERF,	"performance monitoring" },
 	{ EXC_VEC,	"altivec unavailable" },
 	{ EXC_VSX,	"vsx unavailable" },
 	{ EXC_ITMISS,	"instruction tlb miss" },
 	{ EXC_DLMISS,	"data load tlb miss" },
 	{ EXC_DSMISS,	"data store tlb miss" },
 	{ EXC_BPT,	"instruction breakpoint" },
 	{ EXC_SMI,	"system management" },
 	{ EXC_VECAST_G4,	"altivec assist" },
 	{ EXC_THRM,	"thermal management" },
 	{ EXC_RUNMODETRC,	"run mode/trace" },
 	{ EXC_LAST,	NULL }
 };
 
 static const char *
 trapname(u_int vector)
 {
 	struct	powerpc_exception *pe;
 
 	for (pe = powerpc_exceptions; pe->vector != EXC_LAST; pe++) {
 		if (pe->vector == vector)
 			return (pe->name);
 	}
 
 	return ("unknown");
 }
 
 void
 trap(struct trapframe *frame)
 {
 	struct thread	*td;
 	struct proc	*p;
 #ifdef KDTRACE_HOOKS
 	uint32_t inst;
 #endif
 	int		sig, type, user;
 	u_int		ucode;
 	ksiginfo_t	ksi;
 
 	PCPU_INC(cnt.v_trap);
 
 	td = curthread;
 	p = td->td_proc;
 
 	type = ucode = frame->exc;
 	sig = 0;
 	user = frame->srr1 & PSL_PR;
 
 	CTR3(KTR_TRAP, "trap: %s type=%s (%s)", td->td_name,
 	    trapname(type), user ? "user" : "kernel");
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * A trap can occur while DTrace executes a probe. Before
 	 * executing the probe, DTrace blocks re-scheduling and sets
 	 * a flag in its per-cpu flags to indicate that it doesn't
 	 * want to fault. On returning from the probe, the no-fault
 	 * flag is cleared and finally re-scheduling is enabled.
 	 *
 	 * If the DTrace kernel module has registered a trap handler,
 	 * call it and if it returns non-zero, assume that it has
 	 * handled the trap and modified the trap frame so that this
 	 * function can return normally.
 	 */
 	if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type) != 0)
 		return;
 #endif
 
 	if (user) {
 		td->td_pticks = 0;
 		td->td_frame = frame;
 		if (td->td_cowgen != p->p_cowgen)
 			thread_cow_update(td);
 
 		/* User Mode Traps */
 		switch (type) {
 		case EXC_RUNMODETRC:
 		case EXC_TRC:
 			frame->srr1 &= ~PSL_SE;
 			sig = SIGTRAP;
 			ucode = TRAP_TRACE;
 			break;
 
 #ifdef __powerpc64__
 		case EXC_ISE:
 		case EXC_DSE:
 			if (handle_user_slb_spill(&p->p_vmspace->vm_pmap,
 			    (type == EXC_ISE) ? frame->srr0 : frame->dar) != 0){
 				sig = SIGSEGV;
 				ucode = SEGV_MAPERR;
 			}
 			break;
 #endif
 		case EXC_DSI:
 		case EXC_ISI:
 			sig = trap_pfault(frame, 1);
 			if (sig == SIGSEGV)
 				ucode = SEGV_MAPERR;
 			break;
 
 		case EXC_SC:
 			syscall(frame);
 			break;
 
 		case EXC_FPU:
 			KASSERT((td->td_pcb->pcb_flags & PCB_FPU) != PCB_FPU,
 			    ("FPU already enabled for thread"));
 			enable_fpu(td);
 			break;
 
 		case EXC_VEC:
 			KASSERT((td->td_pcb->pcb_flags & PCB_VEC) != PCB_VEC,
 			    ("Altivec already enabled for thread"));
 			enable_vec(td);
 			break;
 
 		case EXC_VSX:
 			KASSERT((td->td_pcb->pcb_flags & PCB_VSX) != PCB_VSX,
 			    ("VSX already enabled for thread"));
 			if (!(td->td_pcb->pcb_flags & PCB_VEC))
 				enable_vec(td);
 			if (!(td->td_pcb->pcb_flags & PCB_FPU))
 				save_fpu(td);
 			td->td_pcb->pcb_flags |= PCB_VSX;
 			enable_fpu(td);
 			break;
 
 		case EXC_VECAST_E:
 		case EXC_VECAST_G4:
 		case EXC_VECAST_G5:
 			/*
 			 * We get a VPU assist exception for IEEE mode
 			 * vector operations on denormalized floats.
 			 * Emulating this is a giant pain, so for now,
 			 * just switch off IEEE mode and treat them as
 			 * zero.
 			 */
 
 			save_vec(td);
 			td->td_pcb->pcb_vec.vscr |= ALTIVEC_VSCR_NJ;
 			enable_vec(td);
 			break;
 
 		case EXC_ALI:
 			if (fix_unaligned(td, frame) != 0) {
 				sig = SIGBUS;
 				ucode = BUS_ADRALN;
 			}
 			else
 				frame->srr0 += 4;
 			break;
 
 		case EXC_DEBUG:	/* Single stepping */
 			mtspr(SPR_DBSR, mfspr(SPR_DBSR));
 			frame->srr1 &= ~PSL_DE;
 			frame->cpu.booke.dbcr0 &= ~(DBCR0_IDM || DBCR0_IC);
 			sig = SIGTRAP;
 			ucode = TRAP_TRACE;
 			break;
 
 		case EXC_PGM:
 			/* Identify the trap reason */
 #ifdef AIM
 			if (frame->srr1 & EXC_PGM_TRAP) {
 #else
 			if (frame->cpu.booke.esr & ESR_PTR) {
 #endif
 #ifdef KDTRACE_HOOKS
 				inst = fuword32((const void *)frame->srr0);
 				if (inst == 0x0FFFDDDD &&
 				    dtrace_pid_probe_ptr != NULL) {
 					struct reg regs;
 					fill_regs(td, &regs);
 					(*dtrace_pid_probe_ptr)(&regs);
 					break;
 				}
 #endif
  				sig = SIGTRAP;
 				ucode = TRAP_BRKPT;
 			} else {
 				sig = ppc_instr_emulate(frame, td->td_pcb);
 				if (sig == SIGILL) {
 					if (frame->srr1 & EXC_PGM_PRIV)
 						ucode = ILL_PRVOPC;
 					else if (frame->srr1 & EXC_PGM_ILLEGAL)
 						ucode = ILL_ILLOPC;
 				} else if (sig == SIGFPE)
 					ucode = FPE_FLTINV;	/* Punt for now, invalid operation. */
 			}
 			break;
 
 		case EXC_MCHK:
 			/*
 			 * Note that this may not be recoverable for the user
 			 * process, depending on the type of machine check,
 			 * but it at least prevents the kernel from dying.
 			 */
 			sig = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 
 		default:
 			trap_fatal(frame);
 		}
 	} else {
 		/* Kernel Mode Traps */
 
 		KASSERT(cold || td->td_ucred != NULL,
 		    ("kernel trap doesn't have ucred"));
 		switch (type) {
 #ifdef KDTRACE_HOOKS
 		case EXC_PGM:
 			if (frame->srr1 & EXC_PGM_TRAP) {
 				if (*(uint32_t *)frame->srr0 == EXC_DTRACE) {
 					if (dtrace_invop_jump_addr != NULL) {
 						dtrace_invop_jump_addr(frame);
 						return;
 					}
 				}
 			}
 			break;
 #endif
 #ifdef __powerpc64__
 		case EXC_DSE:
 			if ((frame->dar & SEGMENT_MASK) == USER_ADDR) {
 				__asm __volatile ("slbmte %0, %1" ::
 					"r"(td->td_pcb->pcb_cpu.aim.usr_vsid),
 					"r"(USER_SLB_SLBE));
 				return;
 			}
 			break;
 #endif
 		case EXC_DSI:
 			if (trap_pfault(frame, 0) == 0)
  				return;
 			break;
 		case EXC_MCHK:
 			if (handle_onfault(frame))
  				return;
 			break;
 		default:
 			break;
 		}
 		trap_fatal(frame);
 	}
 
 	if (sig != 0) {
 		if (p->p_sysent->sv_transtrap != NULL)
 			sig = (p->p_sysent->sv_transtrap)(sig, type);
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = sig;
 		ksi.ksi_code = (int) ucode; /* XXX, not POSIX */
 		/* ksi.ksi_addr = ? */
 		ksi.ksi_trapno = type;
 		trapsignal(td, &ksi);
 	}
 
 	userret(td, frame);
 }
 
 static void
 trap_fatal(struct trapframe *frame)
 {
 
 	printtrap(frame->exc, frame, 1, (frame->srr1 & PSL_PR));
 #ifdef KDB
 	if ((debugger_on_panic || kdb_active) &&
 	    kdb_trap(frame->exc, 0, frame))
 		return;
 #endif
 	panic("%s trap", trapname(frame->exc));
 }
 
 static void
 printtrap(u_int vector, struct trapframe *frame, int isfatal, int user)
 {
 	uint16_t ver;
 #ifdef BOOKE
 	vm_paddr_t pa;
 #endif
 
 	printf("\n");
 	printf("%s %s trap:\n", isfatal ? "fatal" : "handled",
 	    user ? "user" : "kernel");
 	printf("\n");
 	printf("   exception       = 0x%x (%s)\n", vector, trapname(vector));
 	switch (vector) {
 	case EXC_DSE:
 	case EXC_DSI:
 	case EXC_DTMISS:
 		printf("   virtual address = 0x%" PRIxPTR "\n", frame->dar);
 #ifdef AIM
 		printf("   dsisr           = 0x%lx\n",
 		    (u_long)frame->cpu.aim.dsisr);
 #endif
 		break;
 	case EXC_ISE:
 	case EXC_ISI:
 	case EXC_ITMISS:
 		printf("   virtual address = 0x%" PRIxPTR "\n", frame->srr0);
 		break;
 	case EXC_MCHK:
 		ver = mfpvr() >> 16;
 #if defined(AIM)
 		if (MPC745X_P(ver))
 			printf("    msssr0         = 0x%lx\n",
 			    (u_long)mfspr(SPR_MSSSR0));
 #elif defined(BOOKE)
 		pa = mfspr(SPR_MCARU);
 		pa = (pa << 32) | mfspr(SPR_MCAR);
 		printf("   mcsr            = 0x%lx\n", (u_long)mfspr(SPR_MCSR));
 		printf("   mcar            = 0x%jx\n", (uintmax_t)pa);
 #endif
 		break;
 	}
 #ifdef BOOKE
 	printf("   esr             = 0x%" PRIxPTR "\n",
 	    frame->cpu.booke.esr);
 #endif
 	printf("   srr0            = 0x%" PRIxPTR "\n", frame->srr0);
 	printf("   srr1            = 0x%lx\n", (u_long)frame->srr1);
 	printf("   lr              = 0x%" PRIxPTR "\n", frame->lr);
 	printf("   curthread       = %p\n", curthread);
 	if (curthread != NULL)
 		printf("          pid = %d, comm = %s\n",
 		    curthread->td_proc->p_pid, curthread->td_name);
 	printf("\n");
 }
 
 /*
  * Handles a fatal fault when we have onfault state to recover.  Returns
  * non-zero if there was onfault recovery state available.
  */
 static int
 handle_onfault(struct trapframe *frame)
 {
 	struct		thread *td;
-	faultbuf	*fb;
+	jmp_buf		*fb;
 
 	td = curthread;
 	fb = td->td_pcb->pcb_onfault;
 	if (fb != NULL) {
-		frame->srr0 = (*fb)[FAULTBUF_LR];
-		frame->fixreg[1] = (*fb)[FAULTBUF_R1];
-		frame->fixreg[2] = (*fb)[FAULTBUF_R2];
+		frame->srr0 = (*fb)->_jb[FAULTBUF_LR];
+		frame->fixreg[1] = (*fb)->_jb[FAULTBUF_R1];
+		frame->fixreg[2] = (*fb)->_jb[FAULTBUF_R2];
 		frame->fixreg[3] = 1;
-		frame->cr = (*fb)[FAULTBUF_CR];
-		bcopy(&(*fb)[FAULTBUF_R13], &frame->fixreg[13],
-		    19 * sizeof(register_t));
+		frame->cr = (*fb)->_jb[FAULTBUF_CR];
+		bcopy(&(*fb)->_jb[FAULTBUF_R14], &frame->fixreg[14],
+		    18 * sizeof(register_t));
+		td->td_pcb->pcb_onfault = NULL; /* Returns twice, not thrice */
 		return (1);
 	}
 	return (0);
 }
 
 int
 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 {
 	struct proc *p;
 	struct trapframe *frame;
 	caddr_t	params;
 	size_t argsz;
 	int error, n, i;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 
 	sa->code = frame->fixreg[0];
 	params = (caddr_t)(frame->fixreg + FIRSTARG);
 	n = NARGREG;
 
 	if (sa->code == SYS_syscall) {
 		/*
 		 * code is first argument,
 		 * followed by actual args.
 		 */
 		sa->code = *(register_t *) params;
 		params += sizeof(register_t);
 		n -= 1;
 	} else if (sa->code == SYS___syscall) {
 		/*
 		 * Like syscall, but code is a quad,
 		 * so as to maintain quad alignment
 		 * for the rest of the args.
 		 */
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
 			params += sizeof(register_t);
 			sa->code = *(register_t *) params;
 			params += sizeof(register_t);
 			n -= 2;
 		} else {
 			sa->code = *(register_t *) params;
 			params += sizeof(register_t);
 			n -= 1;
 		}
 	}
 
  	if (p->p_sysent->sv_mask)
 		sa->code &= p->p_sysent->sv_mask;
 	if (sa->code >= p->p_sysent->sv_size)
 		sa->callp = &p->p_sysent->sv_table[0];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	sa->narg = sa->callp->sy_narg;
 
 	if (SV_PROC_FLAG(p, SV_ILP32)) {
 		argsz = sizeof(uint32_t);
 
 		for (i = 0; i < n; i++)
 			sa->args[i] = ((u_register_t *)(params))[i] &
 			    0xffffffff;
 	} else {
 		argsz = sizeof(uint64_t);
 
 		for (i = 0; i < n; i++)
 			sa->args[i] = ((u_register_t *)(params))[i];
 	}
 
 	if (sa->narg > n)
 		error = copyin(MOREARGS(frame->fixreg[1]), sa->args + n,
 			       (sa->narg - n) * argsz);
 	else
 		error = 0;
 
 #ifdef __powerpc64__
 	if (SV_PROC_FLAG(p, SV_ILP32) && sa->narg > n) {
 		/* Expand the size of arguments copied from the stack */
 
 		for (i = sa->narg; i >= n; i--)
 			sa->args[i] = ((uint32_t *)(&sa->args[n]))[i-n];
 	}
 #endif
 
 	if (error == 0) {
 		td->td_retval[0] = 0;
 		td->td_retval[1] = frame->fixreg[FIRSTARG + 1];
 	}
 	return (error);
 }
 
 #include "../../kern/subr_syscall.c"
 
 void
 syscall(struct trapframe *frame)
 {
 	struct thread *td;
 	struct syscall_args sa;
 	int error;
 
 	td = curthread;
 	td->td_frame = frame;
 
 #ifdef __powerpc64__
 	/*
 	 * Speculatively restore last user SLB segment, which we know is
 	 * invalid already, since we are likely to do copyin()/copyout().
 	 */
 	__asm __volatile ("slbmte %0, %1; isync" ::
             "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE));
 #endif
 
 	error = syscallenter(td, &sa);
 	syscallret(td, error, &sa);
 }
 
 #ifdef __powerpc64__
 /* Handle kernel SLB faults -- runs in real mode, all seat belts off */
 void
 handle_kernel_slb_spill(int type, register_t dar, register_t srr0)
 {
 	struct slb *slbcache;
 	uint64_t slbe, slbv;
 	uint64_t esid, addr;
 	int i;
 
 	addr = (type == EXC_ISE) ? srr0 : dar;
 	slbcache = PCPU_GET(slb);
 	esid = (uintptr_t)addr >> ADDR_SR_SHFT;
 	slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
 	
 	/* See if the hardware flushed this somehow (can happen in LPARs) */
 	for (i = 0; i < n_slbs; i++)
 		if (slbcache[i].slbe == (slbe | (uint64_t)i))
 			return;
 
 	/* Not in the map, needs to actually be added */
 	slbv = kernel_va_to_slbv(addr);
 	if (slbcache[USER_SLB_SLOT].slbe == 0) {
 		for (i = 0; i < n_slbs; i++) {
 			if (i == USER_SLB_SLOT)
 				continue;
 			if (!(slbcache[i].slbe & SLBE_VALID))
 				goto fillkernslb;
 		}
 
 		if (i == n_slbs)
 			slbcache[USER_SLB_SLOT].slbe = 1;
 	}
 
 	/* Sacrifice a random SLB entry that is not the user entry */
 	i = mftb() % n_slbs;
 	if (i == USER_SLB_SLOT)
 		i = (i+1) % n_slbs;
 
 fillkernslb:
 	/* Write new entry */
 	slbcache[i].slbv = slbv;
 	slbcache[i].slbe = slbe | (uint64_t)i;
 
 	/* Trap handler will restore from cache on exit */
 }
 
 static int 
 handle_user_slb_spill(pmap_t pm, vm_offset_t addr)
 {
 	struct slb *user_entry;
 	uint64_t esid;
 	int i;
 
 	esid = (uintptr_t)addr >> ADDR_SR_SHFT;
 
 	PMAP_LOCK(pm);
 	user_entry = user_va_to_slb_entry(pm, addr);
 
 	if (user_entry == NULL) {
 		/* allocate_vsid auto-spills it */
 		(void)allocate_user_vsid(pm, esid, 0);
 	} else {
 		/*
 		 * Check that another CPU has not already mapped this.
 		 * XXX: Per-thread SLB caches would be better.
 		 */
 		for (i = 0; i < pm->pm_slb_len; i++)
 			if (pm->pm_slb[i] == user_entry)
 				break;
 
 		if (i == pm->pm_slb_len)
 			slb_insert_user(pm, user_entry);
 	}
 	PMAP_UNLOCK(pm);
 
 	return (0);
 }
 #endif
 
 static int
 trap_pfault(struct trapframe *frame, int user)
 {
 	vm_offset_t	eva, va;
 	struct		thread *td;
 	struct		proc *p;
 	vm_map_t	map;
 	vm_prot_t	ftype;
 	int		rv;
 #ifdef AIM
 	register_t	user_sr;
 #endif
 
 	td = curthread;
 	p = td->td_proc;
 	if (frame->exc == EXC_ISI) {
 		eva = frame->srr0;
 		ftype = VM_PROT_EXECUTE;
 		if (frame->srr1 & SRR1_ISI_PFAULT)
 			ftype |= VM_PROT_READ;
 	} else {
 		eva = frame->dar;
 #ifdef BOOKE
 		if (frame->cpu.booke.esr & ESR_ST)
 #else
 		if (frame->cpu.aim.dsisr & DSISR_STORE)
 #endif
 			ftype = VM_PROT_WRITE;
 		else
 			ftype = VM_PROT_READ;
 	}
 
 	if (user) {
 		KASSERT(p->p_vmspace != NULL, ("trap_pfault: vmspace  NULL"));
 		map = &p->p_vmspace->vm_map;
 	} else {
 #ifdef BOOKE
 		if (eva < VM_MAXUSER_ADDRESS) {
 #else
 		if ((eva >> ADDR_SR_SHFT) == (USER_ADDR >> ADDR_SR_SHFT)) {
 #endif
 			map = &p->p_vmspace->vm_map;
 
 #ifdef AIM
 			user_sr = td->td_pcb->pcb_cpu.aim.usr_segm;
 			eva &= ADDR_PIDX | ADDR_POFF;
 			eva |= user_sr << ADDR_SR_SHFT;
 #endif
 		} else {
 			map = kernel_map;
 		}
 	}
 	va = trunc_page(eva);
 
 	/* Fault in the page. */
 	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 	/*
 	 * XXXDTRACE: add dtrace_doubletrap_func here?
 	 */
 
 	if (rv == KERN_SUCCESS)
 		return (0);
 
 	if (!user && handle_onfault(frame))
 		return (0);
 
 	return (SIGSEGV);
 }
 
 /*
  * For now, this only deals with the particular unaligned access case
  * that gcc tends to generate.  Eventually it should handle all of the
  * possibilities that can happen on a 32-bit PowerPC in big-endian mode.
  */
 
 static int
 fix_unaligned(struct thread *td, struct trapframe *frame)
 {
 	struct thread	*fputhread;
 	int		indicator, reg;
 	double		*fpr;
 
 	indicator = EXC_ALI_OPCODE_INDICATOR(frame->cpu.aim.dsisr);
 
 	switch (indicator) {
 	case EXC_ALI_LFD:
 	case EXC_ALI_STFD:
 		reg = EXC_ALI_RST(frame->cpu.aim.dsisr);
 		fpr = &td->td_pcb->pcb_fpu.fpr[reg].fpr;
 		fputhread = PCPU_GET(fputhread);
 
 		/* Juggle the FPU to ensure that we've initialized
 		 * the FPRs, and that their current state is in
 		 * the PCB.
 		 */
 		if (fputhread != td) {
 			if (fputhread)
 				save_fpu(fputhread);
 			enable_fpu(td);
 		}
 		save_fpu(td);
 
 		if (indicator == EXC_ALI_LFD) {
 			if (copyin((void *)frame->dar, fpr,
 			    sizeof(double)) != 0)
 				return (-1);
 			enable_fpu(td);
 		} else {
 			if (copyout(fpr, (void *)frame->dar,
 			    sizeof(double)) != 0)
 				return (-1);
 		}
 		return (0);
 		break;
 	}
 
 	return (-1);
 }
 
 #ifdef KDB
 int db_trap_glue(struct trapframe *);		/* Called from trap_subr.S */
 
 int
 db_trap_glue(struct trapframe *frame)
 {
 	if (!(frame->srr1 & PSL_PR)
 	    && (frame->exc == EXC_TRC || frame->exc == EXC_RUNMODETRC
 #ifdef AIM
 		|| (frame->exc == EXC_PGM
 		    && (frame->srr1 & EXC_PGM_TRAP))
 #else
 		|| (frame->exc == EXC_DEBUG)
 #endif
 		|| frame->exc == EXC_BPT
 		|| frame->exc == EXC_DSI)) {
 		int type = frame->exc;
 
 		/* Ignore DTrace traps. */
 		if (*(uint32_t *)frame->srr0 == EXC_DTRACE)
 			return (0);
 #ifdef AIM
 		if (type == EXC_PGM && (frame->srr1 & EXC_PGM_TRAP)) {
 #else
 		if (frame->cpu.booke.esr & ESR_PTR) {
 #endif
 			type = T_BREAKPOINT;
 		}
 		return (kdb_trap(type, 0, frame));
 	}
 
 	return (0);
 }
 #endif
Index: projects/clang380-import/sys/sparc64/sparc64/elf_machdep.c
===================================================================
--- projects/clang380-import/sys/sparc64/sparc64/elf_machdep.c	(revision 293686)
+++ projects/clang380-import/sys/sparc64/sparc64/elf_machdep.c	(revision 293687)
@@ -1,423 +1,424 @@
 /*-
  * Copyright (c) 2001 Jake Burkholder.
  * Copyright (c) 2000 Eduardo Horvath.
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Paul Kranenburg.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  *	from: NetBSD: mdreloc.c,v 1.42 2008/04/28 20:23:04 martin Exp
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/linker.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/imgact_elf.h>
 #include <sys/syscall.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 
 #include <machine/elf.h>
 
 #include "linker_if.h"
 
 static struct sysentvec elf64_freebsd_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= __elfN(freebsd_fixup),
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= NULL,
 	.sv_szsigcode	= NULL,
 	.sv_name	= "FreeBSD ELF64",
 	.sv_coredump	= __elfN(coredump),
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_READ | VM_PROT_WRITE,
 	.sv_copyout_strings = exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_LP64,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
+	.sv_trap	= NULL,
 };
 
 static Elf64_Brandinfo freebsd_brand_info = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_SPARCV9,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
     (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info);
 
 static Elf64_Brandinfo freebsd_brand_oinfo = {
 	.brand		= ELFOSABI_FREEBSD,
 	.machine	= EM_SPARCV9,
 	.compat_3_brand	= "FreeBSD",
 	.emul_path	= NULL,
 	.interp_path	= "/usr/libexec/ld-elf.so.1",
 	.sysvec		= &elf64_freebsd_sysvec,
 	.interp_newpath	= NULL,
 	.brand_note	= &elf64_freebsd_brandnote,
 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
 };
 
 SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
     (sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo);
 
 void
 elf64_dump_thread(struct thread *td __unused, void *dst __unused,
     size_t *off __unused)
 {
 
 }
 
 /*
  * The following table holds for each relocation type:
  *	- the width in bits of the memory location the relocation
  *	  applies to (not currently used)
  *	- the number of bits the relocation value must be shifted to the
  *	  right (i.e. discard least significant bits) to fit into
  *	  the appropriate field in the instruction word.
  *	- flags indicating whether
  *		* the relocation involves a symbol
  *		* the relocation is relative to the current position
  *		* the relocation is for a GOT entry
  *		* the relocation is relative to the load address
  *
  */
 #define	_RF_S		0x80000000		/* Resolve symbol */
 #define	_RF_A		0x40000000		/* Use addend */
 #define	_RF_P		0x20000000		/* Location relative */
 #define	_RF_G		0x10000000		/* GOT offset */
 #define	_RF_B		0x08000000		/* Load address relative */
 #define	_RF_U		0x04000000		/* Unaligned */
 #define	_RF_X		0x02000000		/* Bare symbols, needs proc */
 #define	_RF_D		0x01000000		/* Use dynamic TLS offset */
 #define	_RF_O		0x00800000		/* Use static TLS offset */
 #define	_RF_I		0x00400000		/* Use TLS object ID */
 #define	_RF_SZ(s)	(((s) & 0xff) << 8)	/* memory target size */
 #define	_RF_RS(s)	( (s) & 0xff)		/* right shift */
 static const int reloc_target_flags[] = {
 	0,							/* NONE */
 	_RF_S|_RF_A|		_RF_SZ(8)  | _RF_RS(0),		/* 8 */
 	_RF_S|_RF_A|		_RF_SZ(16) | _RF_RS(0),		/* 16 */
 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* 32 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(8)  | _RF_RS(0),		/* DISP_8 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(16) | _RF_RS(0),		/* DISP_16 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* DISP_32 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP_30 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP_22 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(10),	/* HI22 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(0),		/* 22 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(0),		/* 13 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(0),		/* LO10 */
 	_RF_G|			_RF_SZ(32) | _RF_RS(0),		/* GOT10 */
 	_RF_G|			_RF_SZ(32) | _RF_RS(0),		/* GOT13 */
 	_RF_G|			_RF_SZ(32) | _RF_RS(10),	/* GOT22 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* PC10 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(10),	/* PC22 */
 	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WPLT30 */
 				_RF_SZ(32) | _RF_RS(0),		/* COPY */
 	_RF_S|_RF_A|		_RF_SZ(64) | _RF_RS(0),		/* GLOB_DAT */
 				_RF_SZ(32) | _RF_RS(0),		/* JMP_SLOT */
 	      _RF_A|	_RF_B|	_RF_SZ(64) | _RF_RS(0),		/* RELATIVE */
 	_RF_S|_RF_A|	_RF_U|	_RF_SZ(32) | _RF_RS(0),		/* UA_32 */
 
 	      _RF_A|		_RF_SZ(32) | _RF_RS(0),		/* PLT32 */
 	      _RF_A|		_RF_SZ(32) | _RF_RS(10),	/* HIPLT22 */
 	      _RF_A|		_RF_SZ(32) | _RF_RS(0),		/* LOPLT10 */
 	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* PCPLT32 */
 	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(10),	/* PCPLT22 */
 	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(0),		/* PCPLT10 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(0),		/* 10 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(0),		/* 11 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(64) | _RF_RS(0),		/* 64 */
 	_RF_S|_RF_A|/*extra*/	_RF_SZ(32) | _RF_RS(0),		/* OLO10 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(42),	/* HH22 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(32),	/* HM10 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(10),	/* LM22 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(42),	/* PC_HH22 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(32),	/* PC_HM10 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(10),	/* PC_LM22 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP16 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* WDISP19 */
 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* GLOB_JMP */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(0),		/* 7 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(0),		/* 5 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(0),		/* 6 */
 	_RF_S|_RF_A|_RF_P|	_RF_SZ(64) | _RF_RS(0),		/* DISP64 */
 	      _RF_A|		_RF_SZ(64) | _RF_RS(0),		/* PLT64 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(10),	/* HIX22 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(0),		/* LOX10 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(22),	/* H44 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(12),	/* M44 */
 	_RF_S|_RF_A|_RF_X|	_RF_SZ(32) | _RF_RS(0),		/* L44 */
 	_RF_S|_RF_A|		_RF_SZ(64) | _RF_RS(0),		/* REGISTER */
 	_RF_S|_RF_A|	_RF_U|	_RF_SZ(64) | _RF_RS(0),		/* UA64 */
 	_RF_S|_RF_A|	_RF_U|	_RF_SZ(16) | _RF_RS(0),		/* UA16 */
 
 #if 0
 	/* TLS */
 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(10),	/* GD_HI22 */
 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* GD_LO10 */
 	0,							/* GD_ADD */
 	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* GD_CALL */
 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(10),	/* LDM_HI22 */
 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* LDM_LO10 */
 	0,							/* LDM_ADD */
 	      _RF_A|_RF_P|	_RF_SZ(32) | _RF_RS(2),		/* LDM_CALL */
 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(10),	/* LDO_HIX22 */
 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* LDO_LOX10 */
 	0,							/* LDO_ADD */
 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(10),	/* IE_HI22 */
 	_RF_S|_RF_A|		_RF_SZ(32) | _RF_RS(0),		/* IE_LO10 */
 	0,							/* IE_LD */
 	0,							/* IE_LDX */
 	0,							/* IE_ADD */
 	_RF_S|_RF_A|	_RF_O|	_RF_SZ(32) | _RF_RS(10),	/* LE_HIX22 */
 	_RF_S|_RF_A|	_RF_O|	_RF_SZ(32) | _RF_RS(0),		/* LE_LOX10 */
 	_RF_S|		_RF_I|	_RF_SZ(32) | _RF_RS(0),		/* DTPMOD32 */
 	_RF_S|		_RF_I|	_RF_SZ(64) | _RF_RS(0),		/* DTPMOD64 */
 	_RF_S|_RF_A|	_RF_D|	_RF_SZ(32) | _RF_RS(0),		/* DTPOFF32 */
 	_RF_S|_RF_A|	_RF_D|	_RF_SZ(64) | _RF_RS(0),		/* DTPOFF64 */
 	_RF_S|_RF_A|	_RF_O|	_RF_SZ(32) | _RF_RS(0),		/* TPOFF32 */
 	_RF_S|_RF_A|	_RF_O|	_RF_SZ(64) | _RF_RS(0)		/* TPOFF64 */
 #endif
 };
 
 #if 0
 static const char *const reloc_names[] = {
 	"NONE", "8", "16", "32", "DISP_8", "DISP_16", "DISP_32", "WDISP_30",
 	"WDISP_22", "HI22", "22", "13", "LO10", "GOT10", "GOT13", "GOT22",
 	"PC10", "PC22", "WPLT30", "COPY", "GLOB_DAT", "JMP_SLOT", "RELATIVE",
 	"UA_32", "PLT32", "HIPLT22", "LOPLT10", "LOPLT10", "PCPLT22",
 	"PCPLT32", "10", "11", "64", "OLO10", "HH22", "HM10", "LM22",
 	"PC_HH22", "PC_HM10", "PC_LM22", "WDISP16", "WDISP19", "GLOB_JMP",
 	"7", "5", "6", "DISP64", "PLT64", "HIX22", "LOX10", "H44", "M44",
 	"L44", "REGISTER", "UA64", "UA16", "GD_HI22", "GD_LO10", "GD_ADD",
 	"GD_CALL", "LDM_HI22", "LDMO10", "LDM_ADD", "LDM_CALL", "LDO_HIX22",
 	"LDO_LOX10", "LDO_ADD", "IE_HI22", "IE_LO10", "IE_LD", "IE_LDX",
 	"IE_ADD", "LE_HIX22", "LE_LOX10", "DTPMOD32", "DTPMOD64", "DTPOFF32",
 	"DTPOFF64", "TPOFF32", "TPOFF64"
 };
 #endif
 
 #define	RELOC_RESOLVE_SYMBOL(t)		((reloc_target_flags[t] & _RF_S) != 0)
 #define	RELOC_PC_RELATIVE(t)		((reloc_target_flags[t] & _RF_P) != 0)
 #define	RELOC_BASE_RELATIVE(t)		((reloc_target_flags[t] & _RF_B) != 0)
 #define	RELOC_UNALIGNED(t)		((reloc_target_flags[t] & _RF_U) != 0)
 #define	RELOC_USE_ADDEND(t)		((reloc_target_flags[t] & _RF_A) != 0)
 #define	RELOC_BARE_SYMBOL(t)		((reloc_target_flags[t] & _RF_X) != 0)
 #define	RELOC_USE_TLS_DOFF(t)		((reloc_target_flags[t] & _RF_D) != 0)
 #define	RELOC_USE_TLS_OFF(t)		((reloc_target_flags[t] & _RF_O) != 0)
 #define	RELOC_USE_TLS_ID(t)		((reloc_target_flags[t] & _RF_I) != 0)
 #define	RELOC_TARGET_SIZE(t)		((reloc_target_flags[t] >> 8) & 0xff)
 #define	RELOC_VALUE_RIGHTSHIFT(t)	(reloc_target_flags[t] & 0xff)
 
 static const long reloc_target_bitmask[] = {
 #define	_BM(x)	(~(-(1ULL << (x))))
 	0,				/* NONE */
 	_BM(8), _BM(16), _BM(32),	/* 8, 16, 32 */
 	_BM(8), _BM(16), _BM(32),	/* DISP8, DISP16, DISP32 */
 	_BM(30), _BM(22),		/* WDISP30, WDISP22 */
 	_BM(22), _BM(22),		/* HI22, 22 */
 	_BM(13), _BM(10),		/* 13, LO10 */
 	_BM(10), _BM(13), _BM(22),	/* GOT10, GOT13, GOT22 */
 	_BM(10), _BM(22),		/* PC10, PC22 */
 	_BM(30), 0,			/* WPLT30, COPY */
 	_BM(32), _BM(32), _BM(32),	/* GLOB_DAT, JMP_SLOT, RELATIVE */
 	_BM(32), _BM(32),		/* UA32, PLT32 */
 	_BM(22), _BM(10),		/* HIPLT22, LOPLT10 */
 	_BM(32), _BM(22), _BM(10),	/* PCPLT32, PCPLT22, PCPLT10 */
 	_BM(10), _BM(11), -1,		/* 10, 11, 64 */
 	_BM(13), _BM(22),		/* OLO10, HH22 */
 	_BM(10), _BM(22),		/* HM10, LM22 */
 	_BM(22), _BM(10), _BM(22),	/* PC_HH22, PC_HM10, PC_LM22 */
 	_BM(16), _BM(19),		/* WDISP16, WDISP19 */
 	-1,				/* GLOB_JMP */
 	_BM(7), _BM(5), _BM(6),		/* 7, 5, 6 */
 	-1, -1,				/* DISP64, PLT64 */
 	_BM(22), _BM(13),		/* HIX22, LOX10 */
 	_BM(22), _BM(10), _BM(13),	/* H44, M44, L44 */
 	-1, -1, _BM(16),		/* REGISTER, UA64, UA16 */
 #if 0
 	_BM(22), _BM(10), 0, _BM(30),	/* GD_HI22, GD_LO10, GD_ADD, GD_CALL */
 	_BM(22), _BM(10), 0,		/* LDM_HI22, LDMO10, LDM_ADD */
 	_BM(30),			/* LDM_CALL */
 	_BM(22), _BM(10), 0,		/* LDO_HIX22, LDO_LOX10, LDO_ADD */
 	_BM(22), _BM(10), 0, 0,		/* IE_HI22, IE_LO10, IE_LD, IE_LDX */
 	0,				/* IE_ADD */
 	_BM(22), _BM(13),		/* LE_HIX22, LE_LOX10 */
 	_BM(32), -1,			/* DTPMOD32, DTPMOD64 */
 	_BM(32), -1,			/* DTPOFF32, DTPOFF64 */
 	_BM(32), -1			/* TPOFF32, TPOFF64 */
 #endif
 #undef _BM
 };
 #define	RELOC_VALUE_BITMASK(t)	(reloc_target_bitmask[t])
 
 int
 elf_reloc_local(linker_file_t lf, Elf_Addr relocbase, const void *data,
     int type, elf_lookup_fn lookup __unused)
 {
 	const Elf_Rela *rela;
 	Elf_Addr *where;
 
 	if (type != ELF_RELOC_RELA)
 		return (-1);
 
 	rela = (const Elf_Rela *)data;
 	if (ELF64_R_TYPE_ID(rela->r_info) != R_SPARC_RELATIVE)
 		return (-1);
 
 	where = (Elf_Addr *)(relocbase + rela->r_offset);
 	*where = elf_relocaddr(lf, rela->r_addend + relocbase);
 
 	return (0);
 }
 
 /* Process one elf relocation with addend. */
 int
 elf_reloc(linker_file_t lf, Elf_Addr relocbase, const void *data, int type,
     elf_lookup_fn lookup)
 {
 	const Elf_Rela *rela;
 	Elf_Word *where32;
 	Elf_Addr *where;
 	Elf_Size rtype, symidx;
 	Elf_Addr value;
 	Elf_Addr mask;
 	Elf_Addr addr;
 	int error;
 
 	if (type != ELF_RELOC_RELA)
 		return (-1);
 
 	rela = (const Elf_Rela *)data;
 	where = (Elf_Addr *)(relocbase + rela->r_offset);
 	where32 = (Elf_Word *)where;
 	rtype = ELF64_R_TYPE_ID(rela->r_info);
 	symidx = ELF_R_SYM(rela->r_info);
 
 	if (rtype == R_SPARC_NONE || rtype == R_SPARC_RELATIVE)
 		return (0);
 
 	if (rtype == R_SPARC_JMP_SLOT || rtype == R_SPARC_COPY ||
 	    rtype >= sizeof(reloc_target_bitmask) /
 	    sizeof(*reloc_target_bitmask)) {
 		printf("kldload: unexpected relocation type %ld\n", rtype);
 		return (-1);
 	}
 
 	if (RELOC_UNALIGNED(rtype)) {
 		printf("kldload: unaligned relocation type %ld\n", rtype);
 		return (-1);
 	}
 
 	value = rela->r_addend;
 
 	if (RELOC_RESOLVE_SYMBOL(rtype)) {
 		error = lookup(lf, symidx, 1, &addr);
 		if (error != 0)
 			return (-1);
 		value += addr;
 		if (RELOC_BARE_SYMBOL(rtype))
 			value = elf_relocaddr(lf, value);
 	}
 
 	if (rtype == R_SPARC_OLO10)
 		value = (value & 0x3ff) + ELF64_R_TYPE_DATA(rela->r_info);
 
 	if (rtype == R_SPARC_HIX22)
 		value ^= 0xffffffffffffffff;
 
 	if (RELOC_PC_RELATIVE(rtype))
 		value -= (Elf_Addr)where;
 
 	if (RELOC_BASE_RELATIVE(rtype))
 		value = elf_relocaddr(lf, value + relocbase);
 
 	mask = RELOC_VALUE_BITMASK(rtype);
 	value >>= RELOC_VALUE_RIGHTSHIFT(rtype);
 	value &= mask;
 
 	if (rtype == R_SPARC_LOX10)
 		value |= 0x1c00;
 
 	if (RELOC_TARGET_SIZE(rtype) > 32) {
 		*where &= ~mask;
 		*where |= value;
 	} else {
 		*where32 &= ~mask;
 		*where32 |= value;
 	}
 
 	return (0);
 }
 
 int
 elf_cpu_load_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
 
 int
 elf_cpu_unload_file(linker_file_t lf __unused)
 {
 
 	return (0);
 }
Index: projects/clang380-import/sys/sys/mbuf.h
===================================================================
--- projects/clang380-import/sys/sys/mbuf.h	(revision 293686)
+++ projects/clang380-import/sys/sys/mbuf.h	(revision 293687)
@@ -1,1311 +1,1313 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mbuf.h	8.5 (Berkeley) 2/19/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_MBUF_H_
 #define	_SYS_MBUF_H_
 
 /* XXX: These includes suck. Sorry! */
 #include <sys/queue.h>
 #ifdef _KERNEL
 #include <sys/systm.h>
 #include <vm/uma.h>
 #ifdef WITNESS
 #include <sys/lock.h>
 #endif
 #endif
 
 /*
  * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead.
  * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in
  * sys/param.h), which has no additional overhead and is used instead of the
  * internal data area; this is done when at least MINCLSIZE of data must be
  * stored.  Additionally, it is possible to allocate a separate buffer
  * externally and attach it to the mbuf in a way similar to that of mbuf
  * clusters.
  *
  * NB: These calculation do not take actual compiler-induced alignment and
  * padding inside the complete struct mbuf into account.  Appropriate
  * attention is required when changing members of struct mbuf.
  *
  * MLEN is data length in a normal mbuf.
  * MHLEN is data length in an mbuf with pktheader.
  * MINCLSIZE is a smallest amount of data that should be put into cluster.
  *
  * Compile-time assertions in uipc_mbuf.c test these values to ensure that
  * they are sensible.
  */
 struct mbuf;
 #define	MHSIZE		offsetof(struct mbuf, m_dat)
 #define	MPKTHSIZE	offsetof(struct mbuf, m_pktdat)
 #define	MLEN		((int)(MSIZE - MHSIZE))
 #define	MHLEN		((int)(MSIZE - MPKTHSIZE))
 #define	MINCLSIZE	(MHLEN + 1)
 
 #ifdef _KERNEL
 /*-
  * Macro for type conversion: convert mbuf pointer to data pointer of correct
  * type:
  *
  * mtod(m, t)	-- Convert mbuf pointer to data pointer of correct type.
  * mtodo(m, o) -- Same as above but with offset 'o' into data.
  */
 #define	mtod(m, t)	((t)((m)->m_data))
 #define	mtodo(m, o)	((void *)(((m)->m_data) + (o)))
 
 /*
  * Argument structure passed to UMA routines during mbuf and packet
  * allocations.
  */
 struct mb_args {
 	int	flags;	/* Flags for mbuf being allocated */
 	short	type;	/* Type of mbuf being allocated */
 };
 #endif /* _KERNEL */
 
 /*
  * Packet tag structure (see below for details).
  */
 struct m_tag {
 	SLIST_ENTRY(m_tag)	m_tag_link;	/* List of packet tags */
 	u_int16_t		m_tag_id;	/* Tag ID */
 	u_int16_t		m_tag_len;	/* Length of data */
 	u_int32_t		m_tag_cookie;	/* ABI/Module ID */
 	void			(*m_tag_free)(struct m_tag *);
 };
 
 /*
  * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
  * Size ILP32: 48
  *	 LP64: 56
  * Compile-time assertions in uipc_mbuf.c test these values to ensure that
  * they are correct.
  */
 struct pkthdr {
 	struct ifnet	*rcvif;		/* rcv interface */
 	SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
 	int32_t		 len;		/* total packet length */
 
 	/* Layer crossing persistent information. */
 	uint32_t	 flowid;	/* packet's 4-tuple system */
 	uint64_t	 csum_flags;	/* checksum and offload features */
 	uint16_t	 fibnum;	/* this packet should use this fib */
 	uint8_t		 cosqos;	/* class/quality of service */
 	uint8_t		 rsstype;	/* hash type */
 	uint8_t		 l2hlen;	/* layer 2 header length */
 	uint8_t		 l3hlen;	/* layer 3 header length */
 	uint8_t		 l4hlen;	/* layer 4 header length */
 	uint8_t		 l5hlen;	/* layer 5 header length */
 	union {
 		uint8_t  eight[8];
 		uint16_t sixteen[4];
 		uint32_t thirtytwo[2];
 		uint64_t sixtyfour[1];
 		uintptr_t unintptr[1];
 		void	*ptr;
 	} PH_per;
 
 	/* Layer specific non-persistent local storage for reassembly, etc. */
 	union {
 		uint8_t  eight[8];
 		uint16_t sixteen[4];
 		uint32_t thirtytwo[2];
 		uint64_t sixtyfour[1];
 		uintptr_t unintptr[1];
 		void 	*ptr;
 	} PH_loc;
 };
 #define	ether_vtag	PH_per.sixteen[0]
 #define	PH_vt		PH_per
 #define	vt_nrecs	sixteen[0]
 #define	tso_segsz	PH_per.sixteen[1]
 #define	csum_phsum	PH_per.sixteen[2]
 #define	csum_data	PH_per.thirtytwo[1]
 
 /*
  * Description of external storage mapped into mbuf; valid only if M_EXT is
  * set.
  * Size ILP32: 28
  *	 LP64: 48
  * Compile-time assertions in uipc_mbuf.c test these values to ensure that
  * they are correct.
  */
 struct m_ext {
 	volatile u_int	*ext_cnt;	/* pointer to ref count info */
 	caddr_t		 ext_buf;	/* start of buffer */
 	uint32_t	 ext_size;	/* size of buffer, for ext_free */
 	uint32_t	 ext_type:8,	/* type of external storage */
 			 ext_flags:24;	/* external storage mbuf flags */
 	void		(*ext_free)	/* free routine if not the usual */
 			    (struct mbuf *, void *, void *);
 	void		*ext_arg1;	/* optional argument pointer */
 	void		*ext_arg2;	/* optional argument pointer */
 };
 
 /*
  * The core of the mbuf object along with some shortcut defines for practical
  * purposes.
  */
 struct mbuf {
 	/*
 	 * Header present at the beginning of every mbuf.
 	 * Size ILP32: 24
 	 *      LP64: 32
 	 * Compile-time assertions in uipc_mbuf.c test these values to ensure
 	 * that they are correct.
 	 */
 	union {	/* next buffer in chain */
 		struct mbuf		*m_next;
 		SLIST_ENTRY(mbuf)	m_slist;
 		STAILQ_ENTRY(mbuf)	m_stailq;
 	};
 	union {	/* next chain in queue/record */
 		struct mbuf		*m_nextpkt;
 		SLIST_ENTRY(mbuf)	m_slistpkt;
 		STAILQ_ENTRY(mbuf)	m_stailqpkt;
 	};
 	caddr_t		 m_data;	/* location of data */
 	int32_t		 m_len;		/* amount of data in this mbuf */
 	uint32_t	 m_type:8,	/* type of data in this mbuf */
 			 m_flags:24;	/* flags; see below */
 #if !defined(__LP64__)
 	uint32_t	 m_pad;		/* pad for 64bit alignment */
 #endif
 
 	/*
 	 * A set of optional headers (packet header, external storage header)
 	 * and internal data storage.  Historically, these arrays were sized
 	 * to MHLEN (space left after a packet header) and MLEN (space left
 	 * after only a regular mbuf header); they are now variable size in
 	 * order to support future work on variable-size mbufs.
 	 */
 	union {
 		struct {
 			struct pkthdr	m_pkthdr;	/* M_PKTHDR set */
 			union {
 				struct m_ext	m_ext;	/* M_EXT set */
 				char		m_pktdat[0];
 			};
 		};
 		char	m_dat[0];			/* !M_PKTHDR, !M_EXT */
 	};
 };
 
 /*
  * mbuf flags of global significance and layer crossing.
  * Those of only protocol/layer specific significance are to be mapped
  * to M_PROTO[1-12] and cleared at layer handoff boundaries.
  * NB: Limited to the lower 24 bits.
  */
 #define	M_EXT		0x00000001 /* has associated external storage */
 #define	M_PKTHDR	0x00000002 /* start of record */
 #define	M_EOR		0x00000004 /* end of record */
 #define	M_RDONLY	0x00000008 /* associated data is marked read-only */
 #define	M_BCAST		0x00000010 /* send/received as link-level broadcast */
 #define	M_MCAST		0x00000020 /* send/received as link-level multicast */
 #define	M_PROMISC	0x00000040 /* packet was not for us */
 #define	M_VLANTAG	0x00000080 /* ether_vtag is valid */
 #define	M_UNUSED_8	0x00000100 /* --available-- */
 #define	M_NOFREE	0x00000200 /* do not free mbuf, embedded in cluster */
 
 #define	M_PROTO1	0x00001000 /* protocol-specific */
 #define	M_PROTO2	0x00002000 /* protocol-specific */
 #define	M_PROTO3	0x00004000 /* protocol-specific */
 #define	M_PROTO4	0x00008000 /* protocol-specific */
 #define	M_PROTO5	0x00010000 /* protocol-specific */
 #define	M_PROTO6	0x00020000 /* protocol-specific */
 #define	M_PROTO7	0x00040000 /* protocol-specific */
 #define	M_PROTO8	0x00080000 /* protocol-specific */
 #define	M_PROTO9	0x00100000 /* protocol-specific */
 #define	M_PROTO10	0x00200000 /* protocol-specific */
 #define	M_PROTO11	0x00400000 /* protocol-specific */
 #define	M_PROTO12	0x00800000 /* protocol-specific */
 
 /*
  * Flags to purge when crossing layers.
  */
 #define	M_PROTOFLAGS \
     (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\
      M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12)
 
 /*
  * Flags preserved when copying m_pkthdr.
  */
 #define M_COPYFLAGS \
     (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG| \
      M_PROTOFLAGS)
 
 /*
  * Mbuf flag description for use with printf(9) %b identifier.
  */
 #define	M_FLAG_BITS \
     "\20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_BCAST\6M_MCAST" \
     "\7M_PROMISC\10M_VLANTAG"
 #define	M_FLAG_PROTOBITS \
     "\15M_PROTO1\16M_PROTO2\17M_PROTO3\20M_PROTO4\21M_PROTO5" \
     "\22M_PROTO6\23M_PROTO7\24M_PROTO8\25M_PROTO9\26M_PROTO10" \
     "\27M_PROTO11\30M_PROTO12"
 #define	M_FLAG_PRINTF (M_FLAG_BITS M_FLAG_PROTOBITS)
 
 /*
  * Network interface cards are able to hash protocol fields (such as IPv4
  * addresses and TCP port numbers) classify packets into flows.  These flows
  * can then be used to maintain ordering while delivering packets to the OS
  * via parallel input queues, as well as to provide a stateless affinity
  * model.  NIC drivers can pass up the hash via m->m_pkthdr.flowid, and set
  * m_flag fields to indicate how the hash should be interpreted by the
  * network stack.
  *
  * Most NICs support RSS, which provides ordering and explicit affinity, and
  * use the hash m_flag bits to indicate what header fields were covered by
  * the hash.  M_HASHTYPE_OPAQUE can be set by non-RSS cards or configurations
  * that provide an opaque flow identifier, allowing for ordering and
  * distribution without explicit affinity.
  */
 /* Microsoft RSS standard hash types */
 #define	M_HASHTYPE_NONE			0
 #define	M_HASHTYPE_RSS_IPV4		1	/* IPv4 2-tuple */
 #define	M_HASHTYPE_RSS_TCP_IPV4		2	/* TCPv4 4-tuple */
 #define	M_HASHTYPE_RSS_IPV6		3	/* IPv6 2-tuple */
 #define	M_HASHTYPE_RSS_TCP_IPV6		4	/* TCPv6 4-tuple */
 #define	M_HASHTYPE_RSS_IPV6_EX		5	/* IPv6 2-tuple + ext hdrs */
 #define	M_HASHTYPE_RSS_TCP_IPV6_EX	6	/* TCPv6 4-tiple + ext hdrs */
 /* Non-standard RSS hash types */
 #define	M_HASHTYPE_RSS_UDP_IPV4		7	/* IPv4 UDP 4-tuple */
 #define	M_HASHTYPE_RSS_UDP_IPV4_EX	8	/* IPv4 UDP 4-tuple + ext hdrs */
 #define	M_HASHTYPE_RSS_UDP_IPV6		9	/* IPv6 UDP 4-tuple */
 #define	M_HASHTYPE_RSS_UDP_IPV6_EX	10	/* IPv6 UDP 4-tuple + ext hdrs */
 
 #define	M_HASHTYPE_OPAQUE		255	/* ordering, not affinity */
 
 #define	M_HASHTYPE_CLEAR(m)	((m)->m_pkthdr.rsstype = 0)
 #define	M_HASHTYPE_GET(m)	((m)->m_pkthdr.rsstype)
 #define	M_HASHTYPE_SET(m, v)	((m)->m_pkthdr.rsstype = (v))
 #define	M_HASHTYPE_TEST(m, v)	(M_HASHTYPE_GET(m) == (v))
 
 /*
  * COS/QOS class and quality of service tags.
  * It uses DSCP code points as base.
  */
 #define	QOS_DSCP_CS0		0x00
 #define	QOS_DSCP_DEF		QOS_DSCP_CS0
 #define	QOS_DSCP_CS1		0x20
 #define	QOS_DSCP_AF11		0x28
 #define	QOS_DSCP_AF12		0x30
 #define	QOS_DSCP_AF13		0x38
 #define	QOS_DSCP_CS2		0x40
 #define	QOS_DSCP_AF21		0x48
 #define	QOS_DSCP_AF22		0x50
 #define	QOS_DSCP_AF23		0x58
 #define	QOS_DSCP_CS3		0x60
 #define	QOS_DSCP_AF31		0x68
 #define	QOS_DSCP_AF32		0x70
 #define	QOS_DSCP_AF33		0x78
 #define	QOS_DSCP_CS4		0x80
 #define	QOS_DSCP_AF41		0x88
 #define	QOS_DSCP_AF42		0x90
 #define	QOS_DSCP_AF43		0x98
 #define	QOS_DSCP_CS5		0xa0
 #define	QOS_DSCP_EF		0xb8
 #define	QOS_DSCP_CS6		0xc0
 #define	QOS_DSCP_CS7		0xe0
 
 /*
  * External mbuf storage buffer types.
  */
 #define	EXT_CLUSTER	1	/* mbuf cluster */
-#define	EXT_SFBUF	2	/* sendfile(2)'s sf_bufs */
+#define	EXT_SFBUF	2	/* sendfile(2)'s sf_buf */
 #define	EXT_JUMBOP	3	/* jumbo cluster page sized */
 #define	EXT_JUMBO9	4	/* jumbo cluster 9216 bytes */
 #define	EXT_JUMBO16	5	/* jumbo cluster 16184 bytes */
 #define	EXT_PACKET	6	/* mbuf+cluster from packet zone */
 #define	EXT_MBUF	7	/* external mbuf reference (M_IOVEC) */
+#define	EXT_SFBUF_NOCACHE 8	/* sendfile(2)'s sf_buf not to be cached */
 
 #define	EXT_VENDOR1	224	/* for vendor-internal use */
 #define	EXT_VENDOR2	225	/* for vendor-internal use */
 #define	EXT_VENDOR3	226	/* for vendor-internal use */
 #define	EXT_VENDOR4	227	/* for vendor-internal use */
 
 #define	EXT_EXP1	244	/* for experimental use */
 #define	EXT_EXP2	245	/* for experimental use */
 #define	EXT_EXP3	246	/* for experimental use */
 #define	EXT_EXP4	247	/* for experimental use */
 
 #define	EXT_NET_DRV	252	/* custom ext_buf provided by net driver(s) */
 #define	EXT_MOD_TYPE	253	/* custom module's ext_buf type */
 #define	EXT_DISPOSABLE	254	/* can throw this buffer away w/page flipping */
 #define	EXT_EXTREF	255	/* has externally maintained ext_cnt ptr */
 
 /*
  * Flags for external mbuf buffer types.
  * NB: limited to the lower 24 bits.
  */
 #define	EXT_FLAG_EMBREF		0x000001	/* embedded ext_cnt, notyet */
 #define	EXT_FLAG_EXTREF		0x000002	/* external ext_cnt, notyet */
 #define	EXT_FLAG_NOFREE		0x000010	/* don't free mbuf to pool, notyet */
 
 #define	EXT_FLAG_VENDOR1	0x010000	/* for vendor-internal use */
 #define	EXT_FLAG_VENDOR2	0x020000	/* for vendor-internal use */
 #define	EXT_FLAG_VENDOR3	0x040000	/* for vendor-internal use */
 #define	EXT_FLAG_VENDOR4	0x080000	/* for vendor-internal use */
 
 #define	EXT_FLAG_EXP1		0x100000	/* for experimental use */
 #define	EXT_FLAG_EXP2		0x200000	/* for experimental use */
 #define	EXT_FLAG_EXP3		0x400000	/* for experimental use */
 #define	EXT_FLAG_EXP4		0x800000	/* for experimental use */
 
 /*
  * EXT flag description for use with printf(9) %b identifier.
  */
 #define	EXT_FLAG_BITS \
     "\20\1EXT_FLAG_EMBREF\2EXT_FLAG_EXTREF\5EXT_FLAG_NOFREE" \
     "\21EXT_FLAG_VENDOR1\22EXT_FLAG_VENDOR2\23EXT_FLAG_VENDOR3" \
     "\24EXT_FLAG_VENDOR4\25EXT_FLAG_EXP1\26EXT_FLAG_EXP2\27EXT_FLAG_EXP3" \
     "\30EXT_FLAG_EXP4"
 
 /*
  * External reference/free functions.
  */
 void sf_ext_ref(void *, void *);
 void sf_ext_free(void *, void *);
+void sf_ext_free_nocache(void *, void *);
 
 /*
  * Flags indicating checksum, segmentation and other offload work to be
  * done, or already done, by hardware or lower layers.  It is split into
  * separate inbound and outbound flags.
  *
  * Outbound flags that are set by upper protocol layers requesting lower
  * layers, or ideally the hardware, to perform these offloading tasks.
  * For outbound packets this field and its flags can be directly tested
  * against ifnet if_hwassist.
  */
 #define	CSUM_IP			0x00000001	/* IP header checksum offload */
 #define	CSUM_IP_UDP		0x00000002	/* UDP checksum offload */
 #define	CSUM_IP_TCP		0x00000004	/* TCP checksum offload */
 #define	CSUM_IP_SCTP		0x00000008	/* SCTP checksum offload */
 #define	CSUM_IP_TSO		0x00000010	/* TCP segmentation offload */
 #define	CSUM_IP_ISCSI		0x00000020	/* iSCSI checksum offload */
 
 #define	CSUM_IP6_UDP		0x00000200	/* UDP checksum offload */
 #define	CSUM_IP6_TCP		0x00000400	/* TCP checksum offload */
 #define	CSUM_IP6_SCTP		0x00000800	/* SCTP checksum offload */
 #define	CSUM_IP6_TSO		0x00001000	/* TCP segmentation offload */
 #define	CSUM_IP6_ISCSI		0x00002000	/* iSCSI checksum offload */
 
 /* Inbound checksum support where the checksum was verified by hardware. */
 #define	CSUM_L3_CALC		0x01000000	/* calculated layer 3 csum */
 #define	CSUM_L3_VALID		0x02000000	/* checksum is correct */
 #define	CSUM_L4_CALC		0x04000000	/* calculated layer 4 csum */
 #define	CSUM_L4_VALID		0x08000000	/* checksum is correct */
 #define	CSUM_L5_CALC		0x10000000	/* calculated layer 5 csum */
 #define	CSUM_L5_VALID		0x20000000	/* checksum is correct */
 #define	CSUM_COALESED		0x40000000	/* contains merged segments */
 
 /*
  * CSUM flag description for use with printf(9) %b identifier.
  */
 #define	CSUM_BITS \
     "\20\1CSUM_IP\2CSUM_IP_UDP\3CSUM_IP_TCP\4CSUM_IP_SCTP\5CSUM_IP_TSO" \
     "\6CSUM_IP_ISCSI" \
     "\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP\15CSUM_IP6_TSO" \
     "\16CSUM_IP6_ISCSI" \
     "\31CSUM_L3_CALC\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID" \
     "\35CSUM_L5_CALC\36CSUM_L5_VALID\37CSUM_COALESED"
 
 /* CSUM flags compatibility mappings. */
 #define	CSUM_IP_CHECKED		CSUM_L3_CALC
 #define	CSUM_IP_VALID		CSUM_L3_VALID
 #define	CSUM_DATA_VALID		CSUM_L4_VALID
 #define	CSUM_PSEUDO_HDR		CSUM_L4_CALC
 #define	CSUM_SCTP_VALID		CSUM_L4_VALID
 #define	CSUM_DELAY_DATA		(CSUM_TCP|CSUM_UDP)
 #define	CSUM_DELAY_IP		CSUM_IP		/* Only v4, no v6 IP hdr csum */
 #define	CSUM_DELAY_DATA_IPV6	(CSUM_TCP_IPV6|CSUM_UDP_IPV6)
 #define	CSUM_DATA_VALID_IPV6	CSUM_DATA_VALID
 #define	CSUM_TCP		CSUM_IP_TCP
 #define	CSUM_UDP		CSUM_IP_UDP
 #define	CSUM_SCTP		CSUM_IP_SCTP
 #define	CSUM_TSO		(CSUM_IP_TSO|CSUM_IP6_TSO)
 #define	CSUM_UDP_IPV6		CSUM_IP6_UDP
 #define	CSUM_TCP_IPV6		CSUM_IP6_TCP
 #define	CSUM_SCTP_IPV6		CSUM_IP6_SCTP
 
 /*
  * mbuf types describing the content of the mbuf (including external storage).
  */
 #define	MT_NOTMBUF	0	/* USED INTERNALLY ONLY! Object is not mbuf */
 #define	MT_DATA		1	/* dynamic (data) allocation */
 #define	MT_HEADER	MT_DATA	/* packet header, use M_PKTHDR instead */
 
 #define	MT_VENDOR1	4	/* for vendor-internal use */
 #define	MT_VENDOR2	5	/* for vendor-internal use */
 #define	MT_VENDOR3	6	/* for vendor-internal use */
 #define	MT_VENDOR4	7	/* for vendor-internal use */
 
 #define	MT_SONAME	8	/* socket name */
 
 #define	MT_EXP1		9	/* for experimental use */
 #define	MT_EXP2		10	/* for experimental use */
 #define	MT_EXP3		11	/* for experimental use */
 #define	MT_EXP4		12	/* for experimental use */
 
 #define	MT_CONTROL	14	/* extra-data protocol message */
 #define	MT_OOBDATA	15	/* expedited data  */
 #define	MT_NTYPES	16	/* number of mbuf types for mbtypes[] */
 
 #define	MT_NOINIT	255	/* Not a type but a flag to allocate
 				   a non-initialized mbuf */
 
 /*
  * String names of mbuf-related UMA(9) and malloc(9) types.  Exposed to
  * !_KERNEL so that monitoring tools can look up the zones with
  * libmemstat(3).
  */
 #define	MBUF_MEM_NAME		"mbuf"
 #define	MBUF_CLUSTER_MEM_NAME	"mbuf_cluster"
 #define	MBUF_PACKET_MEM_NAME	"mbuf_packet"
 #define	MBUF_JUMBOP_MEM_NAME	"mbuf_jumbo_page"
 #define	MBUF_JUMBO9_MEM_NAME	"mbuf_jumbo_9k"
 #define	MBUF_JUMBO16_MEM_NAME	"mbuf_jumbo_16k"
 #define	MBUF_TAG_MEM_NAME	"mbuf_tag"
 #define	MBUF_EXTREFCNT_MEM_NAME	"mbuf_ext_refcnt"
 
 #ifdef _KERNEL
 
 #ifdef WITNESS
 #define	MBUF_CHECKSLEEP(how) do {					\
 	if (how == M_WAITOK)						\
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,		\
 		    "Sleeping in \"%s\"", __func__);			\
 } while (0)
 #else
 #define	MBUF_CHECKSLEEP(how)
 #endif
 
 /*
  * Network buffer allocation API
  *
  * The rest of it is defined in kern/kern_mbuf.c
  */
 extern uma_zone_t	zone_mbuf;
 extern uma_zone_t	zone_clust;
 extern uma_zone_t	zone_pack;
 extern uma_zone_t	zone_jumbop;
 extern uma_zone_t	zone_jumbo9;
 extern uma_zone_t	zone_jumbo16;
 extern uma_zone_t	zone_ext_refcnt;
 
 void		 mb_dupcl(struct mbuf *, const struct mbuf *);
 void		 mb_free_ext(struct mbuf *);
 int		 m_pkthdr_init(struct mbuf *, int);
 
 static __inline int
 m_gettype(int size)
 {
 	int type;
 
 	switch (size) {
 	case MSIZE:
 		type = EXT_MBUF;
 		break;
 	case MCLBYTES:
 		type = EXT_CLUSTER;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case MJUMPAGESIZE:
 		type = EXT_JUMBOP;
 		break;
 #endif
 	case MJUM9BYTES:
 		type = EXT_JUMBO9;
 		break;
 	case MJUM16BYTES:
 		type = EXT_JUMBO16;
 		break;
 	default:
 		panic("%s: invalid cluster size %d", __func__, size);
 	}
 
 	return (type);
 }
 
 /*
  * Associated an external reference counted buffer with an mbuf.
  */
 static __inline void
 m_extaddref(struct mbuf *m, caddr_t buf, u_int size, u_int *ref_cnt,
     void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2)
 {
 
 	KASSERT(ref_cnt != NULL, ("%s: ref_cnt not provided", __func__));
 
 	atomic_add_int(ref_cnt, 1);
 	m->m_flags |= M_EXT;
 	m->m_ext.ext_buf = buf;
 	m->m_ext.ext_cnt = ref_cnt;
 	m->m_data = m->m_ext.ext_buf;
 	m->m_ext.ext_size = size;
 	m->m_ext.ext_free = freef;
 	m->m_ext.ext_arg1 = arg1;
 	m->m_ext.ext_arg2 = arg2;
 	m->m_ext.ext_type = EXT_EXTREF;
 	m->m_ext.ext_flags = 0;
 }
 
 static __inline uma_zone_t
 m_getzone(int size)
 {
 	uma_zone_t zone;
 
 	switch (size) {
 	case MCLBYTES:
 		zone = zone_clust;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case MJUMPAGESIZE:
 		zone = zone_jumbop;
 		break;
 #endif
 	case MJUM9BYTES:
 		zone = zone_jumbo9;
 		break;
 	case MJUM16BYTES:
 		zone = zone_jumbo16;
 		break;
 	default:
 		panic("%s: invalid cluster size %d", __func__, size);
 	}
 
 	return (zone);
 }
 
 /*
  * Initialize an mbuf with linear storage.
  *
  * Inline because the consumer text overhead will be roughly the same to
  * initialize or call a function with this many parameters and M_PKTHDR
  * should go away with constant propagation for !MGETHDR.
  */
 static __inline int
 m_init(struct mbuf *m, uma_zone_t zone __unused, int size __unused, int how,
     short type, int flags)
 {
 	int error;
 
 	m->m_next = NULL;
 	m->m_nextpkt = NULL;
 	m->m_data = m->m_dat;
 	m->m_len = 0;
 	m->m_flags = flags;
 	m->m_type = type;
 	if (flags & M_PKTHDR) {
 		if ((error = m_pkthdr_init(m, how)) != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 static __inline struct mbuf *
 m_get(int how, short type)
 {
 	struct mb_args args;
 
 	args.flags = 0;
 	args.type = type;
 	return (uma_zalloc_arg(zone_mbuf, &args, how));
 }
 
 /*
  * XXX This should be deprecated, very little use.
  */
 static __inline struct mbuf *
 m_getclr(int how, short type)
 {
 	struct mbuf *m;
 	struct mb_args args;
 
 	args.flags = 0;
 	args.type = type;
 	m = uma_zalloc_arg(zone_mbuf, &args, how);
 	if (m != NULL)
 		bzero(m->m_data, MLEN);
 	return (m);
 }
 
 static __inline struct mbuf *
 m_gethdr(int how, short type)
 {
 	struct mb_args args;
 
 	args.flags = M_PKTHDR;
 	args.type = type;
 	return (uma_zalloc_arg(zone_mbuf, &args, how));
 }
 
 static __inline struct mbuf *
 m_getcl(int how, short type, int flags)
 {
 	struct mb_args args;
 
 	args.flags = flags;
 	args.type = type;
 	return (uma_zalloc_arg(zone_pack, &args, how));
 }
 
 static __inline int
 m_clget(struct mbuf *m, int how)
 {
 
 	KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT",
 	    __func__, m));
 	m->m_ext.ext_buf = (char *)NULL;
 	uma_zalloc_arg(zone_clust, m, how);
 	/*
 	 * On a cluster allocation failure, drain the packet zone and retry,
 	 * we might be able to loosen a few clusters up on the drain.
 	 */
 	if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) {
 		zone_drain(zone_pack);
 		uma_zalloc_arg(zone_clust, m, how);
 	}
 	return (m->m_flags & M_EXT);
 }
 
 /*
  * m_cljget() is different from m_clget() as it can allocate clusters without
  * attaching them to an mbuf.  In that case the return value is the pointer
  * to the cluster of the requested size.  If an mbuf was specified, it gets
  * the cluster attached to it and the return value can be safely ignored.
  * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
  */
 static __inline void *
 m_cljget(struct mbuf *m, int how, int size)
 {
 	uma_zone_t zone;
 
 	if (m != NULL) {
 		KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT",
 		    __func__, m));
 		m->m_ext.ext_buf = NULL;
 	}
 
 	zone = m_getzone(size);
 	return (uma_zalloc_arg(zone, m, how));
 }
 
 static __inline void
 m_cljset(struct mbuf *m, void *cl, int type)
 {
 	uma_zone_t zone;
 	int size;
 
 	switch (type) {
 	case EXT_CLUSTER:
 		size = MCLBYTES;
 		zone = zone_clust;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case EXT_JUMBOP:
 		size = MJUMPAGESIZE;
 		zone = zone_jumbop;
 		break;
 #endif
 	case EXT_JUMBO9:
 		size = MJUM9BYTES;
 		zone = zone_jumbo9;
 		break;
 	case EXT_JUMBO16:
 		size = MJUM16BYTES;
 		zone = zone_jumbo16;
 		break;
 	default:
 		panic("%s: unknown cluster type %d", __func__, type);
 		break;
 	}
 
 	m->m_data = m->m_ext.ext_buf = cl;
 	m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL;
 	m->m_ext.ext_size = size;
 	m->m_ext.ext_type = type;
 	m->m_ext.ext_flags = 0;
 	m->m_ext.ext_cnt = uma_find_refcnt(zone, cl);
 	m->m_flags |= M_EXT;
 
 }
 
 static __inline void
 m_chtype(struct mbuf *m, short new_type)
 {
 
 	m->m_type = new_type;
 }
 
 static __inline void
 m_clrprotoflags(struct mbuf *m)
 {
 
 	while (m) {
 		m->m_flags &= ~M_PROTOFLAGS;
 		m = m->m_next;
 	}
 }
 
 static __inline struct mbuf *
 m_last(struct mbuf *m)
 {
 
 	while (m->m_next)
 		m = m->m_next;
 	return (m);
 }
 
 /*
  * mbuf, cluster, and external object allocation macros (for compatibility
  * purposes).
  */
 #define	M_MOVE_PKTHDR(to, from)	m_move_pkthdr((to), (from))
 #define	MGET(m, how, type)	((m) = m_get((how), (type)))
 #define	MGETHDR(m, how, type)	((m) = m_gethdr((how), (type)))
 #define	MCLGET(m, how)		m_clget((m), (how))
 #define	MEXTADD(m, buf, size, free, arg1, arg2, flags, type)		\
     (void )m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2),\
     (flags), (type), M_NOWAIT)
 #define	m_getm(m, len, how, type)					\
     m_getm2((m), (len), (how), (type), M_PKTHDR)
 
 /*
  * Evaluate TRUE if it's safe to write to the mbuf m's data region (this can
  * be both the local data payload, or an external buffer area, depending on
  * whether M_EXT is set).
  */
 #define	M_WRITABLE(m)	(!((m)->m_flags & M_RDONLY) &&			\
 			 (!(((m)->m_flags & M_EXT)) ||			\
 			 (*((m)->m_ext.ext_cnt) == 1)) )		\
 
 /* Check if the supplied mbuf has a packet header, or else panic. */
 #define	M_ASSERTPKTHDR(m)						\
 	KASSERT((m) != NULL && (m)->m_flags & M_PKTHDR,			\
 	    ("%s: no mbuf packet header!", __func__))
 
 /*
  * Ensure that the supplied mbuf is a valid, non-free mbuf.
  *
  * XXX: Broken at the moment.  Need some UMA magic to make it work again.
  */
 #define	M_ASSERTVALID(m)						\
 	KASSERT((((struct mbuf *)m)->m_flags & 0) == 0,			\
 	    ("%s: attempted use of a free mbuf!", __func__))
 
 /*
  * Return the address of the start of the buffer associated with an mbuf,
  * handling external storage, packet-header mbufs, and regular data mbufs.
  */
 #define	M_START(m)							\
 	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_buf :			\
 	 ((m)->m_flags & M_PKTHDR) ? &(m)->m_pktdat[0] :		\
 	 &(m)->m_dat[0])
 
 /*
  * Return the size of the buffer associated with an mbuf, handling external
  * storage, packet-header mbufs, and regular data mbufs.
  */
 #define	M_SIZE(m)							\
 	(((m)->m_flags & M_EXT) ? (m)->m_ext.ext_size :			\
 	 ((m)->m_flags & M_PKTHDR) ? MHLEN :				\
 	 MLEN)
 
 /*
  * Set the m_data pointer of a newly allocated mbuf to place an object of the
  * specified size at the end of the mbuf, longword aligned.
  *
  * NB: Historically, we had M_ALIGN(), MH_ALIGN(), and MEXT_ALIGN() as
  * separate macros, each asserting that it was called at the proper moment.
  * This required callers to themselves test the storage type and call the
  * right one.  Rather than require callers to be aware of those layout
  * decisions, we centralize here.
  */
 static __inline void
 m_align(struct mbuf *m, int len)
 {
 #ifdef INVARIANTS
 	const char *msg = "%s: not a virgin mbuf";
 #endif
 	int adjust;
 
 	KASSERT(m->m_data == M_START(m), (msg, __func__));
 
 	adjust = M_SIZE(m) - len;
 	m->m_data += adjust &~ (sizeof(long)-1);
 }
 
 #define	M_ALIGN(m, len)		m_align(m, len)
 #define	MH_ALIGN(m, len)	m_align(m, len)
 #define	MEXT_ALIGN(m, len)	m_align(m, len)
 
 /*
  * Compute the amount of space available before the current start of data in
  * an mbuf.
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
  *
  * NB: In previous versions, M_LEADINGSPACE() would only check M_WRITABLE()
  * for mbufs with external storage.  We now allow mbuf-embedded data to be
  * read-only as well.
  */
 #define	M_LEADINGSPACE(m)						\
 	(M_WRITABLE(m) ? ((m)->m_data - M_START(m)) : 0)
 
 /*
  * Compute the amount of space available after the end of data in an mbuf.
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
  *
  * NB: In previous versions, M_TRAILINGSPACE() would only check M_WRITABLE()
  * for mbufs with external storage.  We now allow mbuf-embedded data to be
  * read-only as well.
  */
 #define	M_TRAILINGSPACE(m)						\
 	(M_WRITABLE(m) ?						\
 	    ((M_START(m) + M_SIZE(m)) - ((m)->m_data + (m)->m_len)) : 0)
 
 /*
  * Arrange to prepend space of size plen to mbuf m.  If a new mbuf must be
  * allocated, how specifies whether to wait.  If the allocation fails, the
  * original mbuf chain is freed and m is set to NULL.
  */
 #define	M_PREPEND(m, plen, how) do {					\
 	struct mbuf **_mmp = &(m);					\
 	struct mbuf *_mm = *_mmp;					\
 	int _mplen = (plen);						\
 	int __mhow = (how);						\
 									\
 	MBUF_CHECKSLEEP(how);						\
 	if (M_LEADINGSPACE(_mm) >= _mplen) {				\
 		_mm->m_data -= _mplen;					\
 		_mm->m_len += _mplen;					\
 	} else								\
 		_mm = m_prepend(_mm, _mplen, __mhow);			\
 	if (_mm != NULL && _mm->m_flags & M_PKTHDR)			\
 		_mm->m_pkthdr.len += _mplen;				\
 	*_mmp = _mm;							\
 } while (0)
 
 /*
  * Change mbuf to new type.  This is a relatively expensive operation and
  * should be avoided.
  */
 #define	MCHTYPE(m, t)	m_chtype((m), (t))
 
 /* Length to m_copy to copy all. */
 #define	M_COPYALL	1000000000
 
 /* Compatibility with 4.3. */
 #define	m_copy(m, o, l)	m_copym((m), (o), (l), M_NOWAIT)
 
 extern int		max_datalen;	/* MHLEN - max_hdr */
 extern int		max_hdr;	/* Largest link + protocol header */
 extern int		max_linkhdr;	/* Largest link-level header */
 extern int		max_protohdr;	/* Largest protocol header */
 extern int		nmbclusters;	/* Maximum number of clusters */
 
 struct uio;
 
 void		 m_adj(struct mbuf *, int);
 int		 m_apply(struct mbuf *, int, int,
 		    int (*)(void *, void *, u_int), void *);
 int		 m_append(struct mbuf *, int, c_caddr_t);
 void		 m_cat(struct mbuf *, struct mbuf *);
 void		 m_catpkt(struct mbuf *, struct mbuf *);
 int		 m_extadd(struct mbuf *, caddr_t, u_int,
 		    void (*)(struct mbuf *, void *, void *), void *, void *,
 		    int, int, int);
 struct mbuf	*m_collapse(struct mbuf *, int, int);
 void		 m_copyback(struct mbuf *, int, int, c_caddr_t);
 void		 m_copydata(const struct mbuf *, int, int, caddr_t);
 struct mbuf	*m_copym(const struct mbuf *, int, int, int);
 struct mbuf	*m_copypacket(struct mbuf *, int);
 void		 m_copy_pkthdr(struct mbuf *, struct mbuf *);
 struct mbuf	*m_copyup(struct mbuf *, int, int);
 struct mbuf	*m_defrag(struct mbuf *, int);
 void		 m_demote_pkthdr(struct mbuf *);
 void		 m_demote(struct mbuf *, int, int);
 struct mbuf	*m_devget(char *, int, int, struct ifnet *,
 		    void (*)(char *, caddr_t, u_int));
 struct mbuf	*m_dup(const struct mbuf *, int);
 int		 m_dup_pkthdr(struct mbuf *, const struct mbuf *, int);
 u_int		 m_fixhdr(struct mbuf *);
 struct mbuf	*m_fragment(struct mbuf *, int, int);
 void		 m_freem(struct mbuf *);
 struct mbuf	*m_get2(int, int, short, int);
 struct mbuf	*m_getjcl(int, short, int, int);
 struct mbuf	*m_getm2(struct mbuf *, int, int, short, int);
 struct mbuf	*m_getptr(struct mbuf *, int, int *);
 u_int		 m_length(struct mbuf *, struct mbuf **);
 int		 m_mbuftouio(struct uio *, struct mbuf *, int);
 void		 m_move_pkthdr(struct mbuf *, struct mbuf *);
 struct mbuf	*m_prepend(struct mbuf *, int, int);
 void		 m_print(const struct mbuf *, int);
 struct mbuf	*m_pulldown(struct mbuf *, int, int, int *);
 struct mbuf	*m_pullup(struct mbuf *, int);
 int		 m_sanity(struct mbuf *, int);
 struct mbuf	*m_split(struct mbuf *, int, int);
 struct mbuf	*m_uiotombuf(struct uio *, int, int, int, int);
 struct mbuf	*m_unshare(struct mbuf *, int);
 
 /*-
  * Network packets may have annotations attached by affixing a list of
  * "packet tags" to the pkthdr structure.  Packet tags are dynamically
  * allocated semi-opaque data structures that have a fixed header
  * (struct m_tag) that specifies the size of the memory block and a
  * <cookie,type> pair that identifies it.  The cookie is a 32-bit unique
  * unsigned value used to identify a module or ABI.  By convention this value
  * is chosen as the date+time that the module is created, expressed as the
  * number of seconds since the epoch (e.g., using date -u +'%s').  The type
  * value is an ABI/module-specific value that identifies a particular
  * annotation and is private to the module.  For compatibility with systems
  * like OpenBSD that define packet tags w/o an ABI/module cookie, the value
  * PACKET_ABI_COMPAT is used to implement m_tag_get and m_tag_find
  * compatibility shim functions and several tag types are defined below.
  * Users that do not require compatibility should use a private cookie value
  * so that packet tag-related definitions can be maintained privately.
  *
  * Note that the packet tag returned by m_tag_alloc has the default memory
  * alignment implemented by malloc.  To reference private data one can use a
  * construct like:
  *
  *	struct m_tag *mtag = m_tag_alloc(...);
  *	struct foo *p = (struct foo *)(mtag+1);
  *
  * if the alignment of struct m_tag is sufficient for referencing members of
  * struct foo.  Otherwise it is necessary to embed struct m_tag within the
  * private data structure to insure proper alignment; e.g.,
  *
  *	struct foo {
  *		struct m_tag	tag;
  *		...
  *	};
  *	struct foo *p = (struct foo *) m_tag_alloc(...);
  *	struct m_tag *mtag = &p->tag;
  */
 
 /*
  * Persistent tags stay with an mbuf until the mbuf is reclaimed.  Otherwise
  * tags are expected to ``vanish'' when they pass through a network
  * interface.  For most interfaces this happens normally as the tags are
  * reclaimed when the mbuf is free'd.  However in some special cases
  * reclaiming must be done manually.  An example is packets that pass through
  * the loopback interface.  Also, one must be careful to do this when
  * ``turning around'' packets (e.g., icmp_reflect).
  *
  * To mark a tag persistent bit-or this flag in when defining the tag id.
  * The tag will then be treated as described above.
  */
 #define	MTAG_PERSISTENT				0x800
 
 #define	PACKET_TAG_NONE				0  /* Nadda */
 
 /* Packet tags for use with PACKET_ABI_COMPAT. */
 #define	PACKET_TAG_IPSEC_IN_DONE		1  /* IPsec applied, in */
 #define	PACKET_TAG_IPSEC_OUT_DONE		2  /* IPsec applied, out */
 #define	PACKET_TAG_IPSEC_IN_CRYPTO_DONE		3  /* NIC IPsec crypto done */
 #define	PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED	4  /* NIC IPsec crypto req'ed */
 #define	PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO	5  /* NIC notifies IPsec */
 #define	PACKET_TAG_IPSEC_PENDING_TDB		6  /* Reminder to do IPsec */
 #define	PACKET_TAG_BRIDGE			7  /* Bridge processing done */
 #define	PACKET_TAG_GIF				8  /* GIF processing done */
 #define	PACKET_TAG_GRE				9  /* GRE processing done */
 #define	PACKET_TAG_IN_PACKET_CHECKSUM		10 /* NIC checksumming done */
 #define	PACKET_TAG_ENCAP			11 /* Encap.  processing */
 #define	PACKET_TAG_IPSEC_SOCKET			12 /* IPSEC socket ref */
 #define	PACKET_TAG_IPSEC_HISTORY		13 /* IPSEC history */
 #define	PACKET_TAG_IPV6_INPUT			14 /* IPV6 input processing */
 #define	PACKET_TAG_DUMMYNET			15 /* dummynet info */
 #define	PACKET_TAG_DIVERT			17 /* divert info */
 #define	PACKET_TAG_IPFORWARD			18 /* ipforward info */
 #define	PACKET_TAG_MACLABEL	(19 | MTAG_PERSISTENT) /* MAC label */
 #define	PACKET_TAG_PF		(21 | MTAG_PERSISTENT) /* PF/ALTQ information */
 #define	PACKET_TAG_RTSOCKFAM			25 /* rtsock sa family */
 #define	PACKET_TAG_IPOPTIONS			27 /* Saved IP options */
 #define	PACKET_TAG_CARP				28 /* CARP info */
 #define	PACKET_TAG_IPSEC_NAT_T_PORTS		29 /* two uint16_t */
 #define	PACKET_TAG_ND_OUTGOING			30 /* ND outgoing */
 
 /* Specific cookies and tags. */
 
 /* Packet tag routines. */
 struct m_tag	*m_tag_alloc(u_int32_t, int, int, int);
 void		 m_tag_delete(struct mbuf *, struct m_tag *);
 void		 m_tag_delete_chain(struct mbuf *, struct m_tag *);
 void		 m_tag_free_default(struct m_tag *);
 struct m_tag	*m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *);
 struct m_tag	*m_tag_copy(struct m_tag *, int);
 int		 m_tag_copy_chain(struct mbuf *, const struct mbuf *, int);
 void		 m_tag_delete_nonpersistent(struct mbuf *);
 
 /*
  * Initialize the list of tags associated with an mbuf.
  */
 static __inline void
 m_tag_init(struct mbuf *m)
 {
 
 	SLIST_INIT(&m->m_pkthdr.tags);
 }
 
 /*
  * Set up the contents of a tag.  Note that this does not fill in the free
  * method; the caller is expected to do that.
  *
  * XXX probably should be called m_tag_init, but that was already taken.
  */
 static __inline void
 m_tag_setup(struct m_tag *t, u_int32_t cookie, int type, int len)
 {
 
 	t->m_tag_id = type;
 	t->m_tag_len = len;
 	t->m_tag_cookie = cookie;
 }
 
 /*
  * Reclaim resources associated with a tag.
  */
 static __inline void
 m_tag_free(struct m_tag *t)
 {
 
 	(*t->m_tag_free)(t);
 }
 
 /*
  * Return the first tag associated with an mbuf.
  */
 static __inline struct m_tag *
 m_tag_first(struct mbuf *m)
 {
 
 	return (SLIST_FIRST(&m->m_pkthdr.tags));
 }
 
 /*
  * Return the next tag in the list of tags associated with an mbuf.
  */
 static __inline struct m_tag *
 m_tag_next(struct mbuf *m __unused, struct m_tag *t)
 {
 
 	return (SLIST_NEXT(t, m_tag_link));
 }
 
 /*
  * Prepend a tag to the list of tags associated with an mbuf.
  */
 static __inline void
 m_tag_prepend(struct mbuf *m, struct m_tag *t)
 {
 
 	SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link);
 }
 
 /*
  * Unlink a tag from the list of tags associated with an mbuf.
  */
 static __inline void
 m_tag_unlink(struct mbuf *m, struct m_tag *t)
 {
 
 	SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link);
 }
 
 /* These are for OpenBSD compatibility. */
 #define	MTAG_ABI_COMPAT		0		/* compatibility ABI */
 
 static __inline struct m_tag *
 m_tag_get(int type, int length, int wait)
 {
 	return (m_tag_alloc(MTAG_ABI_COMPAT, type, length, wait));
 }
 
 static __inline struct m_tag *
 m_tag_find(struct mbuf *m, int type, struct m_tag *start)
 {
 	return (SLIST_EMPTY(&m->m_pkthdr.tags) ? (struct m_tag *)NULL :
 	    m_tag_locate(m, MTAG_ABI_COMPAT, type, start));
 }
 
 static __inline struct mbuf *
 m_free(struct mbuf *m)
 {
 	struct mbuf *n = m->m_next;
 
 	if ((m->m_flags & (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
 		m_tag_delete_chain(m, NULL);
 	if (m->m_flags & M_EXT)
 		mb_free_ext(m);
 	else if ((m->m_flags & M_NOFREE) == 0)
 		uma_zfree(zone_mbuf, m);
 	return (n);
 }
 
 static __inline int
 rt_m_getfib(struct mbuf *m)
 {
 	KASSERT(m->m_flags & M_PKTHDR , ("Attempt to get FIB from non header mbuf."));
 	return (m->m_pkthdr.fibnum);
 }
 
 #define M_GETFIB(_m)   rt_m_getfib(_m)
 
 #define M_SETFIB(_m, _fib) do {						\
         KASSERT((_m)->m_flags & M_PKTHDR, ("Attempt to set FIB on non header mbuf."));	\
 	((_m)->m_pkthdr.fibnum) = (_fib);				\
 } while (0)
 
 /* flags passed as first argument for "m_ether_tcpip_hash()" */
 #define	MBUF_HASHFLAG_L2	(1 << 2)
 #define	MBUF_HASHFLAG_L3	(1 << 3)
 #define	MBUF_HASHFLAG_L4	(1 << 4)
 
 /* mbuf hashing helper routines */
 uint32_t	m_ether_tcpip_hash_init(void);
 uint32_t	m_ether_tcpip_hash(const uint32_t, const struct mbuf *, const uint32_t);
 
 #ifdef MBUF_PROFILING
  void m_profile(struct mbuf *m);
  #define M_PROFILE(m) m_profile(m)
 #else
  #define M_PROFILE(m)
 #endif
 
 struct mbufq {
 	STAILQ_HEAD(, mbuf)	mq_head;
 	int			mq_len;
 	int			mq_maxlen;
 };
 
 static inline void
 mbufq_init(struct mbufq *mq, int maxlen)
 {
 
 	STAILQ_INIT(&mq->mq_head);
 	mq->mq_maxlen = maxlen;
 	mq->mq_len = 0;
 }
 
 static inline struct mbuf *
 mbufq_flush(struct mbufq *mq)
 {
 	struct mbuf *m;
 
 	m = STAILQ_FIRST(&mq->mq_head);
 	STAILQ_INIT(&mq->mq_head);
 	mq->mq_len = 0;
 	return (m);
 }
 
 static inline void
 mbufq_drain(struct mbufq *mq)
 {
 	struct mbuf *m, *n;
 
 	n = mbufq_flush(mq);
 	while ((m = n) != NULL) {
 		n = STAILQ_NEXT(m, m_stailqpkt);
 		m_freem(m);
 	}
 }
 
 static inline struct mbuf *
 mbufq_first(const struct mbufq *mq)
 {
 
 	return (STAILQ_FIRST(&mq->mq_head));
 }
 
 static inline struct mbuf *
 mbufq_last(const struct mbufq *mq)
 {
 
 	return (STAILQ_LAST(&mq->mq_head, mbuf, m_stailqpkt));
 }
 
 static inline int
 mbufq_full(const struct mbufq *mq)
 {
 
 	return (mq->mq_len >= mq->mq_maxlen);
 }
 
 static inline int
 mbufq_len(const struct mbufq *mq)
 {
 
 	return (mq->mq_len);
 }
 
 static inline int
 mbufq_enqueue(struct mbufq *mq, struct mbuf *m)
 {
 
 	if (mbufq_full(mq))
 		return (ENOBUFS);
 	STAILQ_INSERT_TAIL(&mq->mq_head, m, m_stailqpkt);
 	mq->mq_len++;
 	return (0);
 }
 
 static inline struct mbuf *
 mbufq_dequeue(struct mbufq *mq)
 {
 	struct mbuf *m;
 
 	m = STAILQ_FIRST(&mq->mq_head);
 	if (m) {
 		STAILQ_REMOVE_HEAD(&mq->mq_head, m_stailqpkt);
 		m->m_nextpkt = NULL;
 		mq->mq_len--;
 	}
 	return (m);
 }
 
 static inline void
 mbufq_prepend(struct mbufq *mq, struct mbuf *m)
 {
 
 	STAILQ_INSERT_HEAD(&mq->mq_head, m, m_stailqpkt);
 	mq->mq_len++;
 }
 #endif /* _KERNEL */
 #endif /* !_SYS_MBUF_H_ */
Index: projects/clang380-import/sys/sys/sf_buf.h
===================================================================
--- projects/clang380-import/sys/sys/sf_buf.h	(revision 293686)
+++ projects/clang380-import/sys/sys/sf_buf.h	(revision 293687)
@@ -1,196 +1,203 @@
 /*-
  * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
  * Copyright (c) 2003-2004 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SF_BUF_H_
 #define _SYS_SF_BUF_H_
 
 struct sfstat {				/* sendfile statistics */
+	uint64_t	sf_syscalls;	/* times sendfile was called */
+	uint64_t	sf_noiocnt;	/* times sendfile didn't require I/O */
 	uint64_t	sf_iocnt;	/* times sendfile had to do disk I/O */
+	uint64_t	sf_pages_read;	/* pages read as part of a request */
+	uint64_t	sf_pages_valid;	/* pages were valid for a request */
+	uint64_t	sf_rhpages_requested;	/* readahead pages requested */
+	uint64_t	sf_rhpages_read;	/* readahead pages read */
+	uint64_t	sf_busy;	/* times aborted on a busy page */
 	uint64_t	sf_allocfail;	/* times sfbuf allocation failed */
 	uint64_t	sf_allocwait;	/* times sfbuf allocation had to wait */
 };
 
 #ifdef _KERNEL
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_page.h>
 
 /*
  * Sf_bufs, or sendfile(2) buffers provide a vm_page that is mapped
  * into kernel address space. Note, that they aren't used only
  * by sendfile(2)!
  *
  * Sf_bufs could be implemented as a feature of vm_page_t, but that
  * would require growth of the structure. That's why they are implemented
  * as a separate hash indexed by vm_page address. Implementation lives in
  * kern/subr_sfbuf.c. Meanwhile, most 64-bit machines have a physical map,
  * so they don't require this hash at all, thus ignore subr_sfbuf.c.
  *
  * Different 32-bit architectures demand different requirements on sf_buf
  * hash and functions. They request features in machine/vmparam.h, which
  * enable parts of this file. They can also optionally provide helpers in
  * machine/sf_buf.h
  *
  * Defines are:
  * SFBUF		This machine requires sf_buf hash.
  * 			subr_sfbuf.c should be compiled.
  * SFBUF_CPUSET		This machine can perform SFB_CPUPRIVATE mappings,
  *			that do no invalidate cache on the rest of CPUs.
  * SFBUF_NOMD		This machine doesn't have machine/sf_buf.h
  *
  * SFBUF_OPTIONAL_DIRECT_MAP	Value of this define is used as boolean
  *				variable that tells whether machine is
  *				capable of direct map or not at runtime.
  * SFBUF_MAP		This machine provides its own sf_buf_map() and
  *			sf_buf_unmap().
  * SFBUF_PROCESS_PAGE	This machine provides sf_buf_process_page()
  *			function.
  */
 
 #ifdef SFBUF
 #if defined(SMP) && defined(SFBUF_CPUSET)
 #include <sys/_cpuset.h>
 #endif
 #include <sys/queue.h>
 
 struct sf_buf {
 	LIST_ENTRY(sf_buf)	list_entry;	/* list of buffers */
 	TAILQ_ENTRY(sf_buf)	free_entry;	/* list of buffers */
 	vm_page_t		m;		/* currently mapped page */
 	vm_offset_t		kva;		/* va of mapping */
 	int			ref_count;	/* usage of this mapping */
 #if defined(SMP) && defined(SFBUF_CPUSET)
 	cpuset_t		cpumask;	/* where mapping is valid */
 #endif
 };
 #else /* ! SFBUF */
 struct sf_buf;
 #endif /* SFBUF */
 
 #ifndef SFBUF_NOMD
 #include <machine/sf_buf.h>
 #endif
 #ifdef SFBUF_OPTIONAL_DIRECT_MAP
 #include <machine/md_var.h>
 #endif
 
 #ifdef SFBUF
 struct sf_buf *sf_buf_alloc(struct vm_page *, int);
 void sf_buf_free(struct sf_buf *);
 void sf_buf_ref(struct sf_buf *);
 
 static inline vm_offset_t
 sf_buf_kva(struct sf_buf *sf)
 {
 #ifdef SFBUF_OPTIONAL_DIRECT_MAP
 	if (SFBUF_OPTIONAL_DIRECT_MAP)
 		return (SFBUF_PHYS_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf)));
 #endif
 
         return (sf->kva);
 }
 
 static inline vm_page_t
 sf_buf_page(struct sf_buf *sf)
 {
 #ifdef SFBUF_OPTIONAL_DIRECT_MAP
 	if (SFBUF_OPTIONAL_DIRECT_MAP)
 		return ((vm_page_t)sf);
 #endif
 
         return (sf->m);
 }
 
 #ifndef SFBUF_MAP
 #include <vm/pmap.h>
 
 static inline void
 sf_buf_map(struct sf_buf *sf, int flags)
 {
 
 	pmap_qenter(sf->kva, &sf->m, 1);
 }
 
 static inline int
 sf_buf_unmap(struct sf_buf *sf)
 {
 
 	return (0);
 }
 #endif /* SFBUF_MAP */
 
 #if defined(SMP) && defined(SFBUF_CPUSET)
 void sf_buf_shootdown(struct sf_buf *, int);
 #endif
 
 #ifdef SFBUF_PROCESS_PAGE
 boolean_t sf_buf_process_page(vm_page_t, void (*)(struct sf_buf *));
 #endif
 
 #else /* ! SFBUF */
 
 static inline struct sf_buf *
 sf_buf_alloc(struct vm_page *m, int pri)
 {
 
 	return ((struct sf_buf *)m);
 }
 
 static inline void
 sf_buf_free(struct sf_buf *sf)
 {
 }
 
 static inline void
 sf_buf_ref(struct sf_buf *sf)
 {
 }
 #endif /* SFBUF */
 
 /*
  * Options to sf_buf_alloc() are specified through its flags argument.  This
  * argument's value should be the result of a bitwise or'ing of one or more
  * of the following values.
  */
 #define	SFB_CATCH	1		/* Check signals if the allocation
 					   sleeps. */
 #define	SFB_CPUPRIVATE	2		/* Create a CPU private mapping. */
 #define	SFB_DEFAULT	0
 #define	SFB_NOWAIT	4		/* Return NULL if all bufs are used. */
 
 extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
 #define	SFSTAT_ADD(name, val)	\
     counter_u64_add(sfstat[offsetof(struct sfstat, name) / sizeof(uint64_t)],\
 	(val))
 #define	SFSTAT_INC(name)	SFSTAT_ADD(name, 1)
 #endif /* _KERNEL */
 #endif /* !_SYS_SF_BUF_H_ */
Index: projects/clang380-import/sys/sys/sockbuf.h
===================================================================
--- projects/clang380-import/sys/sys/sockbuf.h	(revision 293686)
+++ projects/clang380-import/sys/sys/sockbuf.h	(revision 293687)
@@ -1,252 +1,252 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
  *
  * $FreeBSD$
  */
 #ifndef _SYS_SOCKBUF_H_
 #define _SYS_SOCKBUF_H_
 #include <sys/selinfo.h>		/* for struct selinfo */
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_sx.h>
 
 #define	SB_MAX		(2*1024*1024)	/* default for max chars in sockbuf */
 
 /*
  * Constants for sb_flags field of struct sockbuf.
  */
 #define	SB_WAIT		0x04		/* someone is waiting for data/space */
 #define	SB_SEL		0x08		/* someone is selecting */
 #define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
 #define	SB_UPCALL	0x20		/* someone wants an upcall */
 #define	SB_NOINTR	0x40		/* operations not interruptible */
 #define	SB_AIO		0x80		/* AIO operations queued */
 #define	SB_KNOTE	0x100		/* kernel note attached */
 #define	SB_NOCOALESCE	0x200		/* don't coalesce new data into existing mbufs */
 #define	SB_IN_TOE	0x400		/* socket buffer is in the middle of an operation */
 #define	SB_AUTOSIZE	0x800		/* automatically size socket buffer */
 #define	SB_STOP		0x1000		/* backpressure indicator */
 
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
 
 struct mbuf;
 struct sockaddr;
 struct socket;
 struct thread;
 
 struct	xsockbuf {
 	u_int	sb_cc;
 	u_int	sb_hiwat;
 	u_int	sb_mbcnt;
 	u_int   sb_mcnt;
 	u_int   sb_ccnt;
 	u_int	sb_mbmax;
 	int	sb_lowat;
 	int	sb_timeo;
 	short	sb_flags;
 };
 
 /*
  * Variables for socket buffering.
  */
 struct	sockbuf {
 	struct	selinfo sb_sel;	/* process selecting read/write */
 	struct	mtx sb_mtx;	/* sockbuf lock */
 	struct	sx sb_sx;	/* prevent I/O interlacing */
 	short	sb_state;	/* (c/d) socket state on sockbuf */
 #define	sb_startzero	sb_mb
 	struct	mbuf *sb_mb;	/* (c/d) the mbuf chain */
 	struct	mbuf *sb_mbtail; /* (c/d) the last mbuf in the chain */
 	struct	mbuf *sb_lastrecord;	/* (c/d) first mbuf of last
 					 * record in socket buffer */
 	struct	mbuf *sb_sndptr; /* (c/d) pointer into mbuf chain */
 	struct	mbuf *sb_fnrdy;	/* (c/d) pointer to first not ready buffer */
 	u_int	sb_sndptroff;	/* (c/d) byte offset of ptr into chain */
 	u_int	sb_acc;		/* (c/d) available chars in buffer */
 	u_int	sb_ccc;		/* (c/d) claimed chars in buffer */
 	u_int	sb_hiwat;	/* (c/d) max actual char count */
 	u_int	sb_mbcnt;	/* (c/d) chars of mbufs used */
 	u_int   sb_mcnt;        /* (c/d) number of mbufs in buffer */
 	u_int   sb_ccnt;        /* (c/d) number of clusters in buffer */
 	u_int	sb_mbmax;	/* (c/d) max chars of mbufs to use */
 	u_int	sb_ctl;		/* (c/d) non-data chars in buffer */
 	int	sb_lowat;	/* (c/d) low water mark */
 	sbintime_t	sb_timeo;	/* (c/d) timeout for read/write */
 	short	sb_flags;	/* (c/d) flags, see below */
 	int	(*sb_upcall)(struct socket *, void *, int); /* (c/d) */
 	void	*sb_upcallarg;	/* (c/d) */
 };
 
 #ifdef _KERNEL
 
 /*
  * Per-socket buffer mutex used to protect most fields in the socket
  * buffer.
  */
 #define	SOCKBUF_MTX(_sb)		(&(_sb)->sb_mtx)
 #define	SOCKBUF_LOCK_INIT(_sb, _name) \
 	mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF)
 #define	SOCKBUF_LOCK_DESTROY(_sb)	mtx_destroy(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
 #define	SOCKBUF_UNLOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
 
 /*
  * Socket buffer private mbuf(9) flags.
  */
 #define	M_NOTREADY	M_PROTO1	/* m_data not populated yet */
 #define	M_BLOCKED	M_PROTO2	/* M_NOTREADY in front of m */
 #define	M_NOTAVAIL	(M_NOTREADY | M_BLOCKED)
 
-void	sbappend(struct sockbuf *sb, struct mbuf *m);
-void	sbappend_locked(struct sockbuf *sb, struct mbuf *m);
+void	sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
+void	sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
 	    const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
 int	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control);
 int	sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control);
 void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
 void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
 void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
 struct mbuf *
 	sbcreatecontrol(caddr_t p, int size, int type, int level);
 void	sbdestroy(struct sockbuf *sb, struct socket *so);
 void	sbdrop(struct sockbuf *sb, int len);
 void	sbdrop_locked(struct sockbuf *sb, int len);
 struct mbuf *
 	sbcut_locked(struct sockbuf *sb, int len);
 void	sbdroprecord(struct sockbuf *sb);
 void	sbdroprecord_locked(struct sockbuf *sb);
 void	sbflush(struct sockbuf *sb);
 void	sbflush_locked(struct sockbuf *sb);
 void	sbrelease(struct sockbuf *sb, struct socket *so);
 void	sbrelease_internal(struct sockbuf *sb, struct socket *so);
 void	sbrelease_locked(struct sockbuf *sb, struct socket *so);
 int	sbreserve(struct sockbuf *sb, u_long cc, struct socket *so,
 	    struct thread *td);
 int	sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
 	    struct thread *td);
 struct mbuf *
 	sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff);
 struct mbuf *
 	sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
 void	sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
 int	sbwait(struct sockbuf *sb);
 int	sblock(struct sockbuf *sb, int flags);
 void	sbunlock(struct sockbuf *sb);
 void	sballoc(struct sockbuf *, struct mbuf *);
 void	sbfree(struct sockbuf *, struct mbuf *);
 int	sbready(struct sockbuf *, struct mbuf *, int);
 
 /*
  * Return how much data is available to be taken out of socket
  * buffer right now.
  */
 static inline u_int
 sbavail(struct sockbuf *sb)
 {
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 	return (sb->sb_acc);
 }
 
 /*
  * Return how much data sits there in the socket buffer
  * It might be that some data is not yet ready to be read.
  */
 static inline u_int
 sbused(struct sockbuf *sb)
 {
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 	return (sb->sb_ccc);
 }
 
 /*
  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
  * This is problematical if the fields are unsigned, as the space might
  * still be negative (ccc > hiwat or mbcnt > mbmax).
  */
 static inline long
 sbspace(struct sockbuf *sb)
 {
 	int bleft, mleft;		/* size should match sockbuf fields */
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	if (sb->sb_flags & SB_STOP)
 		return(0);
 
 	bleft = sb->sb_hiwat - sb->sb_ccc;
 	mleft = sb->sb_mbmax - sb->sb_mbcnt;
 
 	return ((bleft < mleft) ? bleft : mleft);
 }
 
 #define SB_EMPTY_FIXUP(sb) do {						\
 	if ((sb)->sb_mb == NULL) {					\
 		(sb)->sb_mbtail = NULL;					\
 		(sb)->sb_lastrecord = NULL;				\
 	}								\
 } while (/*CONSTCOND*/0)
 
 #ifdef SOCKBUF_DEBUG
 void	sblastrecordchk(struct sockbuf *, const char *, int);
 void	sblastmbufchk(struct sockbuf *, const char *, int);
 void	sbcheck(struct sockbuf *, const char *, int);
 #define	SBLASTRECORDCHK(sb)	sblastrecordchk((sb), __FILE__, __LINE__)
 #define	SBLASTMBUFCHK(sb)	sblastmbufchk((sb), __FILE__, __LINE__)
 #define	SBCHECK(sb)		sbcheck((sb), __FILE__, __LINE__)
 #else
 #define	SBLASTRECORDCHK(sb)	do {} while (0)
 #define	SBLASTMBUFCHK(sb)	do {} while (0)
 #define	SBCHECK(sb)		do {} while (0)
 #endif /* SOCKBUF_DEBUG */
 
 #endif /* _KERNEL */
 
 #endif /* _SYS_SOCKBUF_H_ */
Index: projects/clang380-import/sys/sys/socket.h
===================================================================
--- projects/clang380-import/sys/sys/socket.h	(revision 293686)
+++ projects/clang380-import/sys/sys/socket.h	(revision 293687)
@@ -1,672 +1,675 @@
 /*-
  * Copyright (c) 1982, 1985, 1986, 1988, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socket.h	8.4 (Berkeley) 2/21/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_SOCKET_H_
 #define	_SYS_SOCKET_H_
 
 #include <sys/cdefs.h>
 #include <sys/_types.h>
 #include <sys/_iovec.h>
 #include <machine/_align.h>
 
 /*
  * Definitions related to sockets: types, address families, options.
  */
 
 /*
  * Data types.
  */
 #if __BSD_VISIBLE
 #ifndef _GID_T_DECLARED
 typedef	__gid_t		gid_t;
 #define	_GID_T_DECLARED
 #endif
 
 #ifndef _OFF_T_DECLARED
 typedef	__off_t		off_t;
 #define	_OFF_T_DECLARED
 #endif
 
 #ifndef _PID_T_DECLARED
 typedef	__pid_t		pid_t;
 #define	_PID_T_DECLARED
 #endif
 #endif
 
 #ifndef _SA_FAMILY_T_DECLARED
 typedef	__sa_family_t	sa_family_t;
 #define	_SA_FAMILY_T_DECLARED
 #endif
 
 #ifndef _SOCKLEN_T_DECLARED
 typedef	__socklen_t	socklen_t;
 #define	_SOCKLEN_T_DECLARED
 #endif
  
 #ifndef _SSIZE_T_DECLARED
 typedef	__ssize_t	ssize_t;
 #define	_SSIZE_T_DECLARED
 #endif
 
 #if __BSD_VISIBLE 
 #ifndef _UID_T_DECLARED
 typedef	__uid_t		uid_t;
 #define	_UID_T_DECLARED
 #endif
 #endif
 
 #ifndef _UINT32_T_DECLARED
 typedef	__uint32_t	uint32_t;
 #define	_UINT32_T_DECLARED
 #endif
 
 #ifndef _UINTPTR_T_DECLARED
 typedef	__uintptr_t	uintptr_t;
 #define	_UINTPTR_T_DECLARED
 #endif
 
 /*
  * Types
  */
 #define	SOCK_STREAM	1		/* stream socket */
 #define	SOCK_DGRAM	2		/* datagram socket */
 #define	SOCK_RAW	3		/* raw-protocol interface */
 #if __BSD_VISIBLE
 #define	SOCK_RDM	4		/* reliably-delivered message */
 #endif
 #define	SOCK_SEQPACKET	5		/* sequenced packet stream */
 
 #if __BSD_VISIBLE
 /*
  * Creation flags, OR'ed into socket() and socketpair() type argument.
  */
 #define	SOCK_CLOEXEC	0x10000000
 #define	SOCK_NONBLOCK	0x20000000
 #endif
 
 /*
  * Option flags per-socket.
  */
 #define	SO_DEBUG	0x0001		/* turn on debugging info recording */
 #define	SO_ACCEPTCONN	0x0002		/* socket has had listen() */
 #define	SO_REUSEADDR	0x0004		/* allow local address reuse */
 #define	SO_KEEPALIVE	0x0008		/* keep connections alive */
 #define	SO_DONTROUTE	0x0010		/* just use interface addresses */
 #define	SO_BROADCAST	0x0020		/* permit sending of broadcast msgs */
 #if __BSD_VISIBLE
 #define	SO_USELOOPBACK	0x0040		/* bypass hardware when possible */
 #endif
 #define	SO_LINGER	0x0080		/* linger on close if data present */
 #define	SO_OOBINLINE	0x0100		/* leave received OOB data in line */
 #if __BSD_VISIBLE
 #define	SO_REUSEPORT	0x0200		/* allow local address & port reuse */
 #define	SO_TIMESTAMP	0x0400		/* timestamp received dgram traffic */
 #define	SO_NOSIGPIPE	0x0800		/* no SIGPIPE from EPIPE */
 #define	SO_ACCEPTFILTER	0x1000		/* there is an accept filter */
 #define	SO_BINTIME	0x2000		/* timestamp received dgram traffic */
 #endif
 #define	SO_NO_OFFLOAD	0x4000		/* socket cannot be offloaded */
 #define	SO_NO_DDP	0x8000		/* disable direct data placement */
 
 /*
  * Additional options, not kept in so_options.
  */
 #define	SO_SNDBUF	0x1001		/* send buffer size */
 #define	SO_RCVBUF	0x1002		/* receive buffer size */
 #define	SO_SNDLOWAT	0x1003		/* send low-water mark */
 #define	SO_RCVLOWAT	0x1004		/* receive low-water mark */
 #define	SO_SNDTIMEO	0x1005		/* send timeout */
 #define	SO_RCVTIMEO	0x1006		/* receive timeout */
 #define	SO_ERROR	0x1007		/* get error status and clear */
 #define	SO_TYPE		0x1008		/* get socket type */
 #if __BSD_VISIBLE
 #define	SO_LABEL	0x1009		/* socket's MAC label */
 #define	SO_PEERLABEL	0x1010		/* socket's peer's MAC label */
 #define	SO_LISTENQLIMIT	0x1011		/* socket's backlog limit */
 #define	SO_LISTENQLEN	0x1012		/* socket's complete queue length */
 #define	SO_LISTENINCQLEN	0x1013	/* socket's incomplete queue length */
 #define	SO_SETFIB	0x1014		/* use this FIB to route */
 #define	SO_USER_COOKIE	0x1015		/* user cookie (dummynet etc.) */
 #define	SO_PROTOCOL	0x1016		/* get socket protocol (Linux name) */
 #define	SO_PROTOTYPE	SO_PROTOCOL	/* alias for SO_PROTOCOL (SunOS name) */
 #endif
 
 /*
  * Space reserved for new socket options added by third-party vendors.
  * This range applies to all socket option levels.  New socket options
  * in FreeBSD should always use an option value less than SO_VENDOR.
  */
 #if __BSD_VISIBLE
 #define	SO_VENDOR	0x80000000
 #endif
 
 /*
  * Structure used for manipulating linger option.
  */
 struct linger {
 	int	l_onoff;		/* option on/off */
 	int	l_linger;		/* linger time */
 };
 
 #if __BSD_VISIBLE
 struct accept_filter_arg {
 	char	af_name[16];
 	char	af_arg[256-16];
 };
 #endif
 
 /*
  * Level number for (get/set)sockopt() to apply to socket itself.
  */
 #define	SOL_SOCKET	0xffff		/* options for socket level */
 
 /*
  * Address families.
  */
 #define	AF_UNSPEC	0		/* unspecified */
 #if __BSD_VISIBLE
 #define	AF_LOCAL	AF_UNIX		/* local to host (pipes, portals) */
 #endif
 #define	AF_UNIX		1		/* standardized name for AF_LOCAL */
 #define	AF_INET		2		/* internetwork: UDP, TCP, etc. */
 #if __BSD_VISIBLE
 #define	AF_IMPLINK	3		/* arpanet imp addresses */
 #define	AF_PUP		4		/* pup protocols: e.g. BSP */
 #define	AF_CHAOS	5		/* mit CHAOS protocols */
 #define	AF_NETBIOS	6		/* SMB protocols */
 #define	AF_ISO		7		/* ISO protocols */
 #define	AF_OSI		AF_ISO
 #define	AF_ECMA		8		/* European computer manufacturers */
 #define	AF_DATAKIT	9		/* datakit protocols */
 #define	AF_CCITT	10		/* CCITT protocols, X.25 etc */
 #define	AF_SNA		11		/* IBM SNA */
 #define AF_DECnet	12		/* DECnet */
 #define AF_DLI		13		/* DEC Direct data link interface */
 #define AF_LAT		14		/* LAT */
 #define	AF_HYLINK	15		/* NSC Hyperchannel */
 #define	AF_APPLETALK	16		/* Apple Talk */
 #define	AF_ROUTE	17		/* Internal Routing Protocol */
 #define	AF_LINK		18		/* Link layer interface */
 #define	pseudo_AF_XTP	19		/* eXpress Transfer Protocol (no AF) */
 #define	AF_COIP		20		/* connection-oriented IP, aka ST II */
 #define	AF_CNT		21		/* Computer Network Technology */
 #define pseudo_AF_RTIP	22		/* Help Identify RTIP packets */
 #define	AF_IPX		23		/* Novell Internet Protocol */
 #define	AF_SIP		24		/* Simple Internet Protocol */
 #define	pseudo_AF_PIP	25		/* Help Identify PIP packets */
 #define	AF_ISDN		26		/* Integrated Services Digital Network*/
 #define	AF_E164		AF_ISDN		/* CCITT E.164 recommendation */
 #define	pseudo_AF_KEY	27		/* Internal key-management function */
 #endif
 #define	AF_INET6	28		/* IPv6 */
 #if __BSD_VISIBLE
 #define	AF_NATM		29		/* native ATM access */
 #define	AF_ATM		30		/* ATM */
 #define pseudo_AF_HDRCMPLT 31		/* Used by BPF to not rewrite headers
 					 * in interface output routine
 					 */
 #define	AF_NETGRAPH	32		/* Netgraph sockets */
 #define	AF_SLOW		33		/* 802.3ad slow protocol */
 #define	AF_SCLUSTER	34		/* Sitara cluster protocol */
 #define	AF_ARP		35
 #define	AF_BLUETOOTH	36		/* Bluetooth sockets */
 #define	AF_IEEE80211	37		/* IEEE 802.11 protocol */
 #define	AF_INET_SDP	40		/* OFED Socket Direct Protocol ipv4 */
 #define	AF_INET6_SDP	42		/* OFED Socket Direct Protocol ipv6 */
 #define	AF_MAX		42
 /*
  * When allocating a new AF_ constant, please only allocate
  * even numbered constants for FreeBSD until 134 as odd numbered AF_
  * constants 39-133 are now reserved for vendors.
  */
 #define AF_VENDOR00 39
 #define AF_VENDOR01 41
 #define AF_VENDOR02 43
 #define AF_VENDOR03 45
 #define AF_VENDOR04 47
 #define AF_VENDOR05 49
 #define AF_VENDOR06 51
 #define AF_VENDOR07 53
 #define AF_VENDOR08 55
 #define AF_VENDOR09 57
 #define AF_VENDOR10 59
 #define AF_VENDOR11 61
 #define AF_VENDOR12 63
 #define AF_VENDOR13 65
 #define AF_VENDOR14 67
 #define AF_VENDOR15 69
 #define AF_VENDOR16 71
 #define AF_VENDOR17 73
 #define AF_VENDOR18 75
 #define AF_VENDOR19 77
 #define AF_VENDOR20 79
 #define AF_VENDOR21 81
 #define AF_VENDOR22 83
 #define AF_VENDOR23 85
 #define AF_VENDOR24 87
 #define AF_VENDOR25 89
 #define AF_VENDOR26 91
 #define AF_VENDOR27 93
 #define AF_VENDOR28 95
 #define AF_VENDOR29 97
 #define AF_VENDOR30 99
 #define AF_VENDOR31 101
 #define AF_VENDOR32 103
 #define AF_VENDOR33 105
 #define AF_VENDOR34 107
 #define AF_VENDOR35 109
 #define AF_VENDOR36 111
 #define AF_VENDOR37 113
 #define AF_VENDOR38 115
 #define AF_VENDOR39 117
 #define AF_VENDOR40 119
 #define AF_VENDOR41 121
 #define AF_VENDOR42 123
 #define AF_VENDOR43 125
 #define AF_VENDOR44 127
 #define AF_VENDOR45 129
 #define AF_VENDOR46 131
 #define AF_VENDOR47 133
 #endif
 
 /*
  * Structure used by kernel to store most
  * addresses.
  */
 struct sockaddr {
 	unsigned char	sa_len;		/* total length */
 	sa_family_t	sa_family;	/* address family */
 	char		sa_data[14];	/* actually longer; address value */
 };
 #if __BSD_VISIBLE
 #define	SOCK_MAXADDRLEN	255		/* longest possible addresses */
 
 /*
  * Structure used by kernel to pass protocol
  * information in raw sockets.
  */
 struct sockproto {
 	unsigned short	sp_family;		/* address family */
 	unsigned short	sp_protocol;		/* protocol */
 };
 #endif
 
 #include <sys/_sockaddr_storage.h>
 
 #if __BSD_VISIBLE
 /*
  * Protocol families, same as address families for now.
  */
 #define	PF_UNSPEC	AF_UNSPEC
 #define	PF_LOCAL	AF_LOCAL
 #define	PF_UNIX		PF_LOCAL	/* backward compatibility */
 #define	PF_INET		AF_INET
 #define	PF_IMPLINK	AF_IMPLINK
 #define	PF_PUP		AF_PUP
 #define	PF_CHAOS	AF_CHAOS
 #define	PF_NETBIOS	AF_NETBIOS
 #define	PF_ISO		AF_ISO
 #define	PF_OSI		AF_ISO
 #define	PF_ECMA		AF_ECMA
 #define	PF_DATAKIT	AF_DATAKIT
 #define	PF_CCITT	AF_CCITT
 #define	PF_SNA		AF_SNA
 #define PF_DECnet	AF_DECnet
 #define PF_DLI		AF_DLI
 #define PF_LAT		AF_LAT
 #define	PF_HYLINK	AF_HYLINK
 #define	PF_APPLETALK	AF_APPLETALK
 #define	PF_ROUTE	AF_ROUTE
 #define	PF_LINK		AF_LINK
 #define	PF_XTP		pseudo_AF_XTP	/* really just proto family, no AF */
 #define	PF_COIP		AF_COIP
 #define	PF_CNT		AF_CNT
 #define	PF_SIP		AF_SIP
 #define	PF_IPX		AF_IPX
 #define PF_RTIP		pseudo_AF_RTIP	/* same format as AF_INET */
 #define PF_PIP		pseudo_AF_PIP
 #define	PF_ISDN		AF_ISDN
 #define	PF_KEY		pseudo_AF_KEY
 #define	PF_INET6	AF_INET6
 #define	PF_NATM		AF_NATM
 #define	PF_ATM		AF_ATM
 #define	PF_NETGRAPH	AF_NETGRAPH
 #define	PF_SLOW		AF_SLOW
 #define PF_SCLUSTER	AF_SCLUSTER
 #define	PF_ARP		AF_ARP
 #define	PF_BLUETOOTH	AF_BLUETOOTH
 #define	PF_IEEE80211	AF_IEEE80211
 #define	PF_INET_SDP	AF_INET_SDP
 #define	PF_INET6_SDP	AF_INET6_SDP
 
 #define	PF_MAX		AF_MAX
 
 /*
  * Definitions for network related sysctl, CTL_NET.
  *
  * Second level is protocol family.
  * Third level is protocol number.
  *
  * Further levels are defined by the individual families.
  */
 
 /*
  * PF_ROUTE - Routing table
  *
  * Three additional levels are defined:
  *	Fourth: address family, 0 is wildcard
  *	Fifth: type of info, defined below
  *	Sixth: flag(s) to mask with for NET_RT_FLAGS
  */
 #define NET_RT_DUMP	1		/* dump; may limit to a.f. */
 #define NET_RT_FLAGS	2		/* by flags, e.g. RESOLVING */
 #define NET_RT_IFLIST	3		/* survey interface list */
 #define	NET_RT_IFMALIST	4		/* return multicast address list */
 #define	NET_RT_IFLISTL	5		/* Survey interface list, using 'l'en
 					 * versions of msghdr structs. */
 #endif /* __BSD_VISIBLE */
 
 /*
  * Maximum queue length specifiable by listen.
  */
 #define	SOMAXCONN	128
 
 /*
  * Message header for recvmsg and sendmsg calls.
  * Used value-result for recvmsg, value only for sendmsg.
  */
 struct msghdr {
 	void		*msg_name;		/* optional address */
 	socklen_t	 msg_namelen;		/* size of address */
 	struct iovec	*msg_iov;		/* scatter/gather array */
 	int		 msg_iovlen;		/* # elements in msg_iov */
 	void		*msg_control;		/* ancillary data, see below */
 	socklen_t	 msg_controllen;	/* ancillary data buffer len */
 	int		 msg_flags;		/* flags on received message */
 };
 
 #define	MSG_OOB		0x1		/* process out-of-band data */
 #define	MSG_PEEK	0x2		/* peek at incoming message */
 #define	MSG_DONTROUTE	0x4		/* send without using routing tables */
 #define	MSG_EOR		0x8		/* data completes record */
 #define	MSG_TRUNC	0x10		/* data discarded before delivery */
 #define	MSG_CTRUNC	0x20		/* control data lost before delivery */
 #define	MSG_WAITALL	0x40		/* wait for full request or error */
 #if __POSIX_VISIBLE >= 200809
 #define	MSG_NOSIGNAL	0x20000		/* do not generate SIGPIPE on EOF */
 #endif
 #if __BSD_VISIBLE
 #define	MSG_DONTWAIT	0x80		/* this message should be nonblocking */
 #define	MSG_EOF		0x100		/* data completes connection */
 #define	MSG_NOTIFICATION 0x2000         /* SCTP notification */
 #define	MSG_NBIO	0x4000		/* FIONBIO mode, used by fifofs */
 #define	MSG_COMPAT      0x8000		/* used in sendit() */
 #define	MSG_CMSG_CLOEXEC 0x40000	/* make received fds close-on-exec */
 #endif
 #ifdef _KERNEL
 #define	MSG_SOCALLBCK   0x10000		/* for use by socket callbacks - soreceive (TCP) */
 #endif
 
 /*
  * Header for ancillary data objects in msg_control buffer.
  * Used for additional information with/about a datagram
  * not expressible by flags.  The format is a sequence
  * of message elements headed by cmsghdr structures.
  */
 struct cmsghdr {
 	socklen_t	cmsg_len;		/* data byte count, including hdr */
 	int		cmsg_level;		/* originating protocol */
 	int		cmsg_type;		/* protocol-specific type */
 /* followed by	u_char  cmsg_data[]; */
 };
 
 #if __BSD_VISIBLE
 /*
  * While we may have more groups than this, the cmsgcred struct must
  * be able to fit in an mbuf and we have historically supported a
  * maximum of 16 groups.
 */
 #define CMGROUP_MAX 16
 
 /*
  * Credentials structure, used to verify the identity of a peer
  * process that has sent us a message. This is allocated by the
  * peer process but filled in by the kernel. This prevents the
  * peer from lying about its identity. (Note that cmcred_groups[0]
  * is the effective GID.)
  */
 struct cmsgcred {
 	pid_t	cmcred_pid;		/* PID of sending process */
 	uid_t	cmcred_uid;		/* real UID of sending process */
 	uid_t	cmcred_euid;		/* effective UID of sending process */
 	gid_t	cmcred_gid;		/* real GID of sending process */
 	short	cmcred_ngroups;		/* number or groups */
 	gid_t	cmcred_groups[CMGROUP_MAX];	/* groups */
 };
 
 /*
  * Socket credentials.
  */
 struct sockcred {
 	uid_t	sc_uid;			/* real user id */
 	uid_t	sc_euid;		/* effective user id */
 	gid_t	sc_gid;			/* real group id */
 	gid_t	sc_egid;		/* effective group id */
 	int	sc_ngroups;		/* number of supplemental groups */
 	gid_t	sc_groups[1];		/* variable length */
 };
 
 /*
  * Compute size of a sockcred structure with groups.
  */
 #define	SOCKCREDSIZE(ngrps) \
 	(sizeof(struct sockcred) + (sizeof(gid_t) * ((ngrps) - 1)))
 
 #endif /* __BSD_VISIBLE */
 
 /* given pointer to struct cmsghdr, return pointer to data */
 #define	CMSG_DATA(cmsg)		((unsigned char *)(cmsg) + \
 				 _ALIGN(sizeof(struct cmsghdr)))
 
 /* given pointer to struct cmsghdr, return pointer to next cmsghdr */
 #define	CMSG_NXTHDR(mhdr, cmsg)	\
 	((char *)(cmsg) == NULL ? CMSG_FIRSTHDR(mhdr) : \
 	    ((char *)(cmsg) + _ALIGN(((struct cmsghdr *)(cmsg))->cmsg_len) + \
 	  _ALIGN(sizeof(struct cmsghdr)) > \
 	    (char *)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \
 	    (struct cmsghdr *)0 : \
 	    (struct cmsghdr *)(void *)((char *)(cmsg) + \
 	    _ALIGN(((struct cmsghdr *)(cmsg))->cmsg_len)))
 
 /*
  * RFC 2292 requires to check msg_controllen, in case that the kernel returns
  * an empty list for some reasons.
  */
 #define	CMSG_FIRSTHDR(mhdr) \
 	((mhdr)->msg_controllen >= sizeof(struct cmsghdr) ? \
 	 (struct cmsghdr *)(mhdr)->msg_control : \
 	 (struct cmsghdr *)NULL)
 
 #if __BSD_VISIBLE
 /* RFC 2292 additions */
 #define	CMSG_SPACE(l)		(_ALIGN(sizeof(struct cmsghdr)) + _ALIGN(l))
 #define	CMSG_LEN(l)		(_ALIGN(sizeof(struct cmsghdr)) + (l))
 #endif
 
 #ifdef _KERNEL
 #define	CMSG_ALIGN(n)	_ALIGN(n)
 #endif
 
 /* "Socket"-level control message types: */
 #define	SCM_RIGHTS	0x01		/* access rights (array of int) */
 #if __BSD_VISIBLE
 #define	SCM_TIMESTAMP	0x02		/* timestamp (struct timeval) */
 #define	SCM_CREDS	0x03		/* process creds (struct cmsgcred) */
 #define	SCM_BINTIME	0x04		/* timestamp (struct bintime) */
 #endif
 
 #if __BSD_VISIBLE
 /*
  * 4.3 compat sockaddr, move to compat file later
  */
 struct osockaddr {
 	unsigned short sa_family;	/* address family */
 	char	sa_data[14];		/* up to 14 bytes of direct address */
 };
 
 /*
  * 4.3-compat message header (move to compat file later).
  */
 struct omsghdr {
 	char	*msg_name;		/* optional address */
 	int	msg_namelen;		/* size of address */
 	struct	iovec *msg_iov;		/* scatter/gather array */
 	int	msg_iovlen;		/* # elements in msg_iov */
 	char	*msg_accrights;		/* access rights sent/received */
 	int	msg_accrightslen;
 };
 #endif
 
 /*
  * howto arguments for shutdown(2), specified by Posix.1g.
  */
 #define	SHUT_RD		0		/* shut down the reading side */
 #define	SHUT_WR		1		/* shut down the writing side */
 #define	SHUT_RDWR	2		/* shut down both sides */
 
 #if __BSD_VISIBLE
 /* for SCTP */
 /* we cheat and use the SHUT_XX defines for these */
 #define PRU_FLUSH_RD     SHUT_RD
 #define PRU_FLUSH_WR     SHUT_WR
 #define PRU_FLUSH_RDWR   SHUT_RDWR
 #endif
 
 
 #if __BSD_VISIBLE
 /*
  * sendfile(2) header/trailer struct
  */
 struct sf_hdtr {
 	struct iovec *headers;	/* pointer to an array of header struct iovec's */
 	int hdr_cnt;		/* number of header iovec's */
 	struct iovec *trailers;	/* pointer to an array of trailer struct iovec's */
 	int trl_cnt;		/* number of trailer iovec's */
 };
 
 /*
  * Sendfile-specific flag(s)
  */
 #define	SF_NODISKIO     0x00000001
-#define	SF_MNOWAIT	0x00000002
+#define	SF_MNOWAIT	0x00000002	/* obsolete */
 #define	SF_SYNC		0x00000004
+#define	SF_NOCACHE	0x00000010
+#define	SF_FLAGS(rh, flags)	(((rh) << 16) | (flags))
 
 #ifdef _KERNEL
 #define	SFK_COMPAT	0x00000001
+#define	SF_READAHEAD(flags)	((flags) >> 16)
 #endif /* _KERNEL */
 #endif /* __BSD_VISIBLE */
 
 #ifndef	_KERNEL
 
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
 int	accept(int, struct sockaddr * __restrict, socklen_t * __restrict);
 int	bind(int, const struct sockaddr *, socklen_t);
 int	connect(int, const struct sockaddr *, socklen_t);
 #if __BSD_VISIBLE
 int	accept4(int, struct sockaddr * __restrict, socklen_t * __restrict, int);
 int	bindat(int, int, const struct sockaddr *, socklen_t);
 int	connectat(int, int, const struct sockaddr *, socklen_t);
 #endif
 int	getpeername(int, struct sockaddr * __restrict, socklen_t * __restrict);
 int	getsockname(int, struct sockaddr * __restrict, socklen_t * __restrict);
 int	getsockopt(int, int, int, void * __restrict, socklen_t * __restrict);
 int	listen(int, int);
 ssize_t	recv(int, void *, size_t, int);
 ssize_t	recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict);
 ssize_t	recvmsg(int, struct msghdr *, int);
 ssize_t	send(int, const void *, size_t, int);
 ssize_t	sendto(int, const void *,
 	    size_t, int, const struct sockaddr *, socklen_t);
 ssize_t	sendmsg(int, const struct msghdr *, int);
 #if __BSD_VISIBLE
 int	sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int);
 int	setfib(int);
 #endif
 int	setsockopt(int, int, int, const void *, socklen_t);
 int	shutdown(int, int);
 int	sockatmark(int);
 int	socket(int, int, int);
 int	socketpair(int, int, int, int *);
 __END_DECLS
 
 #endif /* !_KERNEL */
 
 #ifdef _KERNEL
 struct socket;
 
 struct tcpcb *so_sototcpcb(struct socket *so);
 struct inpcb *so_sotoinpcb(struct socket *so);
 struct sockbuf *so_sockbuf_snd(struct socket *);
 struct sockbuf *so_sockbuf_rcv(struct socket *);
 
 int so_state_get(const struct socket *);
 void so_state_set(struct socket *, int);
 
 int so_options_get(const struct socket *);
 void so_options_set(struct socket *, int);
 
 int so_error_get(const struct socket *);
 void so_error_set(struct socket *, int);
 
 int so_linger_get(const struct socket *);
 void so_linger_set(struct socket *, int);
 
 struct protosw *so_protosw_get(const struct socket *);
 void so_protosw_set(struct socket *, struct protosw *);
 
 void so_sorwakeup_locked(struct socket *so);
 void so_sowwakeup_locked(struct socket *so);
 
 void so_sorwakeup(struct socket *so);
 void so_sowwakeup(struct socket *so);
 
 void so_lock(struct socket *so);
 void so_unlock(struct socket *so);
 
 void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg);
 
 #endif
 
 
 #endif /* !_SYS_SOCKET_H_ */
Index: projects/clang380-import/sys/sys/sysent.h
===================================================================
--- projects/clang380-import/sys/sys/sysent.h	(revision 293686)
+++ projects/clang380-import/sys/sys/sysent.h	(revision 293687)
@@ -1,278 +1,279 @@
 /*-
  * Copyright (c) 1982, 1988, 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SYSENT_H_
 #define	_SYS_SYSENT_H_
 
 #include <bsm/audit.h>
 
 struct rlimit;
 struct sysent;
 struct thread;
 struct ksiginfo;
 struct syscall_args;
 
 enum systrace_probe_t {
 	SYSTRACE_ENTRY,
 	SYSTRACE_RETURN,
 };
 
 typedef	int	sy_call_t(struct thread *, void *);
 
 typedef	void	(*systrace_probe_func_t)(struct syscall_args *,
 		    enum systrace_probe_t, int);
 typedef	void	(*systrace_args_func_t)(int, void *, uint64_t *, int *);
 
 extern systrace_probe_func_t	systrace_probe_func;
 
 struct sysent {			/* system call table */
 	int	sy_narg;	/* number of arguments */
 	sy_call_t *sy_call;	/* implementing function */
 	au_event_t sy_auevent;	/* audit event associated with syscall */
 	systrace_args_func_t sy_systrace_args_func;
 				/* optional argument conversion function. */
 	u_int32_t sy_entry;	/* DTrace entry ID for systrace. */
 	u_int32_t sy_return;	/* DTrace return ID for systrace. */
 	u_int32_t sy_flags;	/* General flags for system calls. */
 	u_int32_t sy_thrcnt;
 };
 
 /*
  * A system call is permitted in capability mode.
  */
 #define	SYF_CAPENABLED	0x00000001
 
 #define	SY_THR_FLAGMASK	0x7
 #define	SY_THR_STATIC	0x1
 #define	SY_THR_DRAINING	0x2
 #define	SY_THR_ABSENT	0x4
 #define	SY_THR_INCR	0x8
 
 #ifdef KLD_MODULE
 #define	SY_THR_STATIC_KLD	0
 #else
 #define	SY_THR_STATIC_KLD	SY_THR_STATIC
 #endif
 
 struct image_params;
 struct __sigset;
 struct trapframe;
 struct vnode;
 
 struct sysentvec {
 	int		sv_size;	/* number of entries */
 	struct sysent	*sv_table;	/* pointer to sysent */
 	u_int		sv_mask;	/* optional mask to index */
 	int		sv_errsize;	/* size of errno translation table */
 	int 		*sv_errtbl;	/* errno translation table */
 	int		(*sv_transtrap)(int, int);
 					/* translate trap-to-signal mapping */
 	int		(*sv_fixup)(register_t **, struct image_params *);
 					/* stack fixup function */
 	void		(*sv_sendsig)(void (*)(int), struct ksiginfo *, struct __sigset *);
 			    		/* send signal */
 	char 		*sv_sigcode;	/* start of sigtramp code */
 	int 		*sv_szsigcode;	/* size of sigtramp code */
 	char		*sv_name;	/* name of binary type */
 	int		(*sv_coredump)(struct thread *, struct vnode *, off_t, int);
 					/* function to dump core, or NULL */
 	int		(*sv_imgact_try)(struct image_params *);
 	int		sv_minsigstksz;	/* minimum signal stack size */
 	int		sv_pagesize;	/* pagesize */
 	vm_offset_t	sv_minuser;	/* VM_MIN_ADDRESS */
 	vm_offset_t	sv_maxuser;	/* VM_MAXUSER_ADDRESS */
 	vm_offset_t	sv_usrstack;	/* USRSTACK */
 	vm_offset_t	sv_psstrings;	/* PS_STRINGS */
 	int		sv_stackprot;	/* vm protection for stack */
 	register_t	*(*sv_copyout_strings)(struct image_params *);
 	void		(*sv_setregs)(struct thread *, struct image_params *,
 			    u_long);
 	void		(*sv_fixlimit)(struct rlimit *, int);
 	u_long		*sv_maxssiz;
 	u_int		sv_flags;
 	void		(*sv_set_syscall_retval)(struct thread *, int);
 	int		(*sv_fetch_syscall_args)(struct thread *, struct
 			    syscall_args *);
 	const char	**sv_syscallnames;
 	vm_offset_t	sv_timekeep_base;
 	vm_offset_t	sv_shared_page_base;
 	vm_offset_t	sv_shared_page_len;
 	vm_offset_t	sv_sigcode_base;
 	void		*sv_shared_page_obj;
 	void		(*sv_schedtail)(struct thread *);
 	void		(*sv_thread_detach)(struct thread *);
+	int		(*sv_trap)(struct thread *);
 };
 
 #define	SV_ILP32	0x000100	/* 32-bit executable. */
 #define	SV_LP64		0x000200	/* 64-bit executable. */
 #define	SV_IA32		0x004000	/* Intel 32-bit executable. */
 #define	SV_AOUT		0x008000	/* a.out executable. */
 #define	SV_SHP		0x010000	/* Shared page. */
 #define	SV_CAPSICUM	0x020000	/* Force cap_enter() on startup. */
 #define	SV_TIMEKEEP	0x040000
 
 #define	SV_ABI_MASK	0xff
 #define	SV_PROC_FLAG(p, x)	((p)->p_sysent->sv_flags & (x))
 #define	SV_PROC_ABI(p)		((p)->p_sysent->sv_flags & SV_ABI_MASK)
 #define	SV_CURPROC_FLAG(x)	SV_PROC_FLAG(curproc, x)
 #define	SV_CURPROC_ABI()	SV_PROC_ABI(curproc)
 /* same as ELFOSABI_XXX, to prevent header pollution */
 #define	SV_ABI_LINUX	3
 #define	SV_ABI_FREEBSD 	9
 #define	SV_ABI_CLOUDABI	17
 #define	SV_ABI_UNDEF	255
 
 #ifdef _KERNEL
 extern struct sysentvec aout_sysvec;
 extern struct sysentvec elf_freebsd_sysvec;
 extern struct sysentvec null_sysvec;
 extern struct sysent sysent[];
 extern const char *syscallnames[];
 
 #if defined(__amd64__)
 extern int i386_read_exec;
 #endif
 
 #define	NO_SYSCALL (-1)
 
 struct module;
 
 struct syscall_module_data {
 	int	(*chainevh)(struct module *, int, void *); /* next handler */
 	void	*chainarg;		/* arg for next event handler */
 	int	*offset;		/* offset into sysent */
 	struct sysent *new_sysent;	/* new sysent */
 	struct sysent old_sysent;	/* old sysent */
 	int	flags;			/* flags for syscall_register */
 };
 
 #define	MAKE_SYSENT(syscallname)				\
 static struct sysent syscallname##_sysent = {			\
 	(sizeof(struct syscallname ## _args )			\
 	    / sizeof(register_t)),				\
 	(sy_call_t *)& sys_##syscallname,	       		\
 	SYS_AUE_##syscallname					\
 }
 
 #define	MAKE_SYSENT_COMPAT(syscallname)				\
 static struct sysent syscallname##_sysent = {			\
 	(sizeof(struct syscallname ## _args )			\
 	    / sizeof(register_t)),				\
 	(sy_call_t *)& syscallname,				\
 	SYS_AUE_##syscallname					\
 }
 
 #define SYSCALL_MODULE(name, offset, new_sysent, evh, arg)	\
 static struct syscall_module_data name##_syscall_mod = {	\
 	evh, arg, offset, new_sysent, { 0, NULL, AUE_NULL }	\
 };								\
 								\
 static moduledata_t name##_mod = {				\
 	"sys/" #name,						\
 	syscall_module_handler,					\
 	&name##_syscall_mod					\
 };								\
 DECLARE_MODULE(name, name##_mod, SI_SUB_SYSCALLS, SI_ORDER_MIDDLE)
 
 #define	SYSCALL_MODULE_HELPER(syscallname)			\
 static int syscallname##_syscall = SYS_##syscallname;		\
 MAKE_SYSENT(syscallname);					\
 SYSCALL_MODULE(syscallname,					\
     & syscallname##_syscall, & syscallname##_sysent,		\
     NULL, NULL)
 
 #define	SYSCALL_MODULE_PRESENT(syscallname)				\
 	(sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmnosys &&	\
 	sysent[SYS_##syscallname].sy_call != (sy_call_t *)lkmressys)
 
 /*
  * Syscall registration helpers with resource allocation handling.
  */
 struct syscall_helper_data {
 	struct sysent new_sysent;
 	struct sysent old_sysent;
 	int syscall_no;
 	int registered;
 };
 #define SYSCALL_INIT_HELPER(syscallname) {			\
     .new_sysent = {						\
 	.sy_narg = (sizeof(struct syscallname ## _args )	\
 	    / sizeof(register_t)),				\
 	.sy_call = (sy_call_t *)& sys_ ## syscallname,		\
 	.sy_auevent = SYS_AUE_##syscallname			\
     },								\
     .syscall_no = SYS_##syscallname				\
 }
 #define SYSCALL_INIT_HELPER_COMPAT(syscallname) {		\
     .new_sysent = {						\
 	.sy_narg = (sizeof(struct syscallname ## _args )	\
 	    / sizeof(register_t)),				\
 	.sy_call = (sy_call_t *)& syscallname,			\
 	.sy_auevent = SYS_AUE_##syscallname			\
     },								\
     .syscall_no = SYS_##syscallname				\
 }
 #define SYSCALL_INIT_LAST {					\
     .syscall_no = NO_SYSCALL					\
 }
 
 int	syscall_register(int *offset, struct sysent *new_sysent,
 	    struct sysent *old_sysent, int flags);
 int	syscall_deregister(int *offset, struct sysent *old_sysent);
 int	syscall_module_handler(struct module *mod, int what, void *arg);
 int	syscall_helper_register(struct syscall_helper_data *sd, int flags);
 int	syscall_helper_unregister(struct syscall_helper_data *sd);
 
 struct proc;
 const char *syscallname(struct proc *p, u_int code);
 
 /* Special purpose system call functions. */
 struct nosys_args;
 
 int	lkmnosys(struct thread *, struct nosys_args *);
 int	lkmressys(struct thread *, struct nosys_args *);
 
 int	syscall_thread_enter(struct thread *td, struct sysent *se);
 void	syscall_thread_exit(struct thread *td, struct sysent *se);
 
 int shared_page_alloc(int size, int align);
 int shared_page_fill(int size, int align, const void *data);
 void shared_page_write(int base, int size, const void *data);
 void exec_sysvec_init(void *param);
 void exec_inittk(void);
 
 #define INIT_SYSENTVEC(name, sv)					\
     SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY,				\
 	(sysinit_cfunc_t)exec_sysvec_init, sv);
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_SYSENT_H_ */
Index: projects/clang380-import/sys/vm/vm_domain.c
===================================================================
--- projects/clang380-import/sys/vm/vm_domain.c	(revision 293686)
+++ projects/clang380-import/sys/vm/vm_domain.c	(revision 293687)
@@ -1,374 +1,384 @@
 /*-
  * Copyright (c) 2015 Adrian Chadd <adrian@FreeBSD.org>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #if MAXMEMDOM > 1
 #include <sys/proc.h>
 #endif
 #include <sys/queue.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/tree.h>
 #include <sys/vmmeter.h>
 #include <sys/seq.h>
 
 #include <ddb/ddb.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_phys.h>
 
 #include <vm/vm_domain.h>
 
 static __inline int
-vm_domain_rr_selectdomain(void)
+vm_domain_rr_selectdomain(int skip_domain)
 {
 #if MAXMEMDOM > 1
 	struct thread *td;
 
 	td = curthread;
 
 	td->td_dom_rr_idx++;
 	td->td_dom_rr_idx %= vm_ndomains;
+
+	/*
+	 * If skip_domain is provided then skip over that
+	 * domain.  This is intended for round robin variants
+	 * which first try a fixed domain.
+	 */
+	if ((skip_domain > -1) && (td->td_dom_rr_idx == skip_domain)) {
+		td->td_dom_rr_idx++;
+		td->td_dom_rr_idx %= vm_ndomains;
+	}
 	return (td->td_dom_rr_idx);
 #else
 	return (0);
 #endif
 }
 
 /*
  * This implements a very simple set of VM domain memory allocation
  * policies and iterators.
  */
 
 /*
  * A VM domain policy represents a desired VM domain policy.
  * Iterators implement searching through VM domains in a specific
  * order.
  */
 
 /*
  * When setting a policy, the caller must establish their own
  * exclusive write protection for the contents of the domain
  * policy.
  */
 int
 vm_domain_policy_init(struct vm_domain_policy *vp)
 {
 
 	bzero(vp, sizeof(*vp));
 	vp->p.policy = VM_POLICY_NONE;
 	vp->p.domain = -1;
 	return (0);
 }
 
 int
 vm_domain_policy_set(struct vm_domain_policy *vp,
     vm_domain_policy_type_t vt, int domain)
 {
 
 	seq_write_begin(&vp->seq);
 	vp->p.policy = vt;
 	vp->p.domain = domain;
 	seq_write_end(&vp->seq);
 	return (0);
 }
 
 /*
  * Take a local copy of a policy.
  *
  * The destination policy isn't write-barriered; this is used
  * for doing local copies into something that isn't shared.
  */
 void
 vm_domain_policy_localcopy(struct vm_domain_policy *dst,
     const struct vm_domain_policy *src)
 {
 	seq_t seq;
 
 	for (;;) {
 		seq = seq_read(&src->seq);
 		*dst = *src;
 		if (seq_consistent(&src->seq, seq))
 			return;
 		cpu_spinwait();
 	}
 }
 
 /*
  * Take a write-barrier copy of a policy.
  *
  * The destination policy is write -barriered; this is used
  * for doing copies into policies that may be read by other
  * threads.
  */
 void
 vm_domain_policy_copy(struct vm_domain_policy *dst,
     const struct vm_domain_policy *src)
 {
 	seq_t seq;
 	struct vm_domain_policy d;
 
 	for (;;) {
 		seq = seq_read(&src->seq);
 		d = *src;
 		if (seq_consistent(&src->seq, seq)) {
 			seq_write_begin(&dst->seq);
 			dst->p.domain = d.p.domain;
 			dst->p.policy = d.p.policy;
 			seq_write_end(&dst->seq);
 			return;
 		}
 		cpu_spinwait();
 	}
 }
 
 int
 vm_domain_policy_validate(const struct vm_domain_policy *vp)
 {
 
 	switch (vp->p.policy) {
 	case VM_POLICY_NONE:
 	case VM_POLICY_ROUND_ROBIN:
 	case VM_POLICY_FIRST_TOUCH:
 	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
 		if (vp->p.domain == -1)
 			return (0);
 		return (-1);
 	case VM_POLICY_FIXED_DOMAIN:
 	case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN:
 		if (vp->p.domain >= 0 && vp->p.domain < vm_ndomains)
 			return (0);
 		return (-1);
 	default:
 		return (-1);
 	}
 	return (-1);
 }
 
 int
 vm_domain_policy_cleanup(struct vm_domain_policy *vp)
 {
 
 	/* For now, empty */
 	return (0);
 }
 
 int
 vm_domain_iterator_init(struct vm_domain_iterator *vi)
 {
 
 	/* Nothing to do for now */
 	return (0);
 }
 
 /*
  * Manually setup an iterator with the given details.
  */
 int
 vm_domain_iterator_set(struct vm_domain_iterator *vi,
     vm_domain_policy_type_t vt, int domain)
 {
 
 	switch (vt) {
 	case VM_POLICY_FIXED_DOMAIN:
 		vi->policy = VM_POLICY_FIXED_DOMAIN;
 		vi->domain = domain;
 		vi->n = 1;
 		break;
 	case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN:
 		vi->policy = VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN;
 		vi->domain = domain;
 		vi->n = vm_ndomains;
 		break;
 	case VM_POLICY_FIRST_TOUCH:
 		vi->policy = VM_POLICY_FIRST_TOUCH;
 		vi->domain = PCPU_GET(domain);
 		vi->n = 1;
 		break;
 	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
 		vi->policy = VM_POLICY_FIRST_TOUCH_ROUND_ROBIN;
 		vi->domain = PCPU_GET(domain);
 		vi->n = vm_ndomains;
 		break;
 	case VM_POLICY_ROUND_ROBIN:
 	default:
 		vi->policy = VM_POLICY_ROUND_ROBIN;
 		vi->domain = -1;
 		vi->n = vm_ndomains;
 		break;
 	}
 	return (0);
 }
 
 /*
  * Setup an iterator based on the given policy.
  */
 static inline void
 _vm_domain_iterator_set_policy(struct vm_domain_iterator *vi,
     const struct vm_domain_policy *vt)
 {
 	/*
 	 * Initialise the iterator.
 	 *
 	 * For first-touch, the initial domain is set
 	 * via the current thread CPU domain.
 	 *
 	 * For fixed-domain, it's assumed that the
 	 * caller has initialised the specific domain
 	 * it is after.
 	 */
 	switch (vt->p.policy) {
 	case VM_POLICY_FIXED_DOMAIN:
 		vi->policy = vt->p.policy;
 		vi->domain = vt->p.domain;
 		vi->n = 1;
 		break;
 	case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN:
 		vi->policy = vt->p.policy;
 		vi->domain = vt->p.domain;
 		vi->n = vm_ndomains;
 		break;
 	case VM_POLICY_FIRST_TOUCH:
 		vi->policy = vt->p.policy;
 		vi->domain = PCPU_GET(domain);
 		vi->n = 1;
 		break;
 	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
 		vi->policy = vt->p.policy;
 		vi->domain = PCPU_GET(domain);
 		vi->n = vm_ndomains;
 		break;
 	case VM_POLICY_ROUND_ROBIN:
 	default:
 		/*
 		 * Default to round-robin policy.
 		 */
 		vi->policy = VM_POLICY_ROUND_ROBIN;
 		vi->domain = -1;
 		vi->n = vm_ndomains;
 		break;
 	}
 }
 
 void
 vm_domain_iterator_set_policy(struct vm_domain_iterator *vi,
     const struct vm_domain_policy *vt)
 {
 	seq_t seq;
 	struct vm_domain_policy vt_lcl;
 
 	for (;;) {
 		seq = seq_read(&vt->seq);
 		vt_lcl = *vt;
 		if (seq_consistent(&vt->seq, seq)) {
 			_vm_domain_iterator_set_policy(vi, &vt_lcl);
 			return;
 		}
 		cpu_spinwait();
 	}
 }
 
 /*
  * Return the next VM domain to use.
  *
  * Returns 0 w/ domain set to the next domain to use, or
  * -1 to indicate no more domains are available.
  */
 int
 vm_domain_iterator_run(struct vm_domain_iterator *vi, int *domain)
 {
 
 	/* General catch-all */
 	if (vi->n <= 0)
 		return (-1);
 
 	switch (vi->policy) {
 	case VM_POLICY_FIXED_DOMAIN:
 	case VM_POLICY_FIRST_TOUCH:
 		*domain = vi->domain;
 		vi->n--;
 		break;
 	case VM_POLICY_FIXED_DOMAIN_ROUND_ROBIN:
 	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
 		/*
 		 * XXX TODO: skip over the rr'ed domain
 		 * if it equals the one we started with.
 		 */
 		if (vi->n == vm_ndomains)
 			*domain = vi->domain;
 		else
-			*domain = vm_domain_rr_selectdomain();
+			*domain = vm_domain_rr_selectdomain(vi->domain);
 		vi->n--;
 		break;
 	case VM_POLICY_ROUND_ROBIN:
 	default:
-		*domain = vm_domain_rr_selectdomain();
+		*domain = vm_domain_rr_selectdomain(-1);
 		vi->n--;
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * Returns 1 if the iteration is done, or 0 if it has not.
 
  * This can only be called after at least one loop through
  * the iterator.  Ie, it's designed to be used as a tail
  * check of a loop, not the head check of a loop.
  */
 int
 vm_domain_iterator_isdone(struct vm_domain_iterator *vi)
 {
 
 	return (vi->n <= 0);
 }
 
 int
 vm_domain_iterator_cleanup(struct vm_domain_iterator *vi)
 {
 
 	return (0);
 }
Index: projects/clang380-import/sys
===================================================================
--- projects/clang380-import/sys	(revision 293686)
+++ projects/clang380-import/sys	(revision 293687)

Property changes on: projects/clang380-import/sys
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys:r293430-293685
Index: projects/clang380-import/tools/build/options/WITHOUT_LLVM_LIBUNWIND
===================================================================
--- projects/clang380-import/tools/build/options/WITHOUT_LLVM_LIBUNWIND	(nonexistent)
+++ projects/clang380-import/tools/build/options/WITHOUT_LLVM_LIBUNWIND	(revision 293687)
@@ -0,0 +1,2 @@
+.\" $FreeBSD$
+Set to use GCC's stack unwinder (instead of LLVM's libunwind).

Property changes on: projects/clang380-import/tools/build/options/WITHOUT_LLVM_LIBUNWIND
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/clang380-import/tools/build/options/WITH_LLVM_LIBUNWIND
===================================================================
--- projects/clang380-import/tools/build/options/WITH_LLVM_LIBUNWIND	(nonexistent)
+++ projects/clang380-import/tools/build/options/WITH_LLVM_LIBUNWIND	(revision 293687)
@@ -0,0 +1,2 @@
+.\" $FreeBSD$
+Set to use LLVM's libunwind stack unwinder (instead of GCC's unwinder).

Property changes on: projects/clang380-import/tools/build/options/WITH_LLVM_LIBUNWIND
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_concat/conf.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_concat/conf.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_concat/conf.sh	(revision 293687)
@@ -1,8 +1,15 @@
 #!/bin/sh
 # $FreeBSD$
 
 name="$(mktemp -u concat.XXXXXX)"
 class="concat"
 base=`basename $0`
 
+gconcat_test_cleanup()
+{
+	[ -c /dev/$class/$name ] && gconcat destroy $name
+	geom_test_cleanup
+}
+trap gconcat_test_cleanup ABRT EXIT INT TERM
+
 . `dirname $0`/../geom_subr.sh
Index: projects/clang380-import/tools/regression/geom_concat/test-1.t
===================================================================
--- projects/clang380-import/tools/regression/geom_concat/test-1.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_concat/test-1.t	(revision 293687)
@@ -1,30 +1,23 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo '1..1'
 
-us=45
+us0=$(attach_md -t malloc -s 1M) || exit 1
+us1=$(attach_md -t malloc -s 2M) || exit 1
+us2=$(attach_md -t malloc -s 3M) || exit 1
 
-mdconfig -a -t malloc -s 1M -u $us || exit 1
-mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1
-mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1
-
-gconcat create $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1
+gconcat create $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1
 devwait
 
 # Size of created device should be 1MB + 2MB + 3MB.
 
 size=`diskinfo /dev/concat/${name} | awk '{print $3}'`
 
 if [ $size -eq 6291456 ]; then
 	echo "ok - Size is 6291456"
 else
 	echo "not ok - Size is 6291456"
 fi
-
-gconcat destroy $name
-mdconfig -d -u $us
-mdconfig -d -u `expr $us + 1`
-mdconfig -d -u `expr $us + 2`
Index: projects/clang380-import/tools/regression/geom_concat/test-2.t
===================================================================
--- projects/clang380-import/tools/regression/geom_concat/test-2.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_concat/test-2.t	(revision 293687)
@@ -1,35 +1,30 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo '1..1'
 
-us=45
 tsize=6
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s 1M) || exit 1
+us1=$(attach_md -t malloc -s 2M) || exit 1
+us2=$(attach_md -t malloc -s 3M) || exit 1
+
 dd if=/dev/random of=${src} bs=1m count=$tsize >/dev/null 2>&1
 
-mdconfig -a -t malloc -s 1M -u $us || exit 1
-mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1
-mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1
-
-gconcat create $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1
+gconcat create $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1
 devwait
 
 dd if=${src} of=/dev/concat/${name} bs=1m count=$tsize >/dev/null 2>&1
 dd if=/dev/concat/${name} of=${dst} bs=1m count=$tsize >/dev/null 2>&1
 
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok - md5 checksum comparison"
 else
 	echo "ok - md5 checksum comparison"
 fi
 
-gconcat destroy $name
-mdconfig -d -u $us
-mdconfig -d -u `expr $us + 1`
-mdconfig -d -u `expr $us + 2`
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_eli/attach-d.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/attach-d.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/attach-d.t	(revision 293687)
@@ -1,38 +1,38 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1
 
 echo "1..3"
 
 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 geli init -B none -P -K $keyfile md${no}
 geli attach -d -p -k $keyfile md${no}
 if [ -c /dev/md${no}.eli ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 # Be sure it doesn't detach on read.
 dd if=/dev/md${no}.eli of=/dev/null 2>/dev/null
 sleep 1
 if [ -c /dev/md${no}.eli ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
 true > /dev/md${no}.eli
 sleep 1
 if [ ! -c /dev/md${no}.eli ]; then
 	echo "ok 3"
 else
 	echo "not ok 3"
 fi
 
-mdconfig -d -u $no
 rm -f $keyfile
Index: projects/clang380-import/tools/regression/geom_eli/conf.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/conf.sh	(nonexistent)
+++ projects/clang380-import/tools/regression/geom_eli/conf.sh	(revision 293687)
@@ -0,0 +1,21 @@
+#!/bin/sh
+# $FreeBSD$
+
+class="eli"
+base=`basename $0`
+
+# We need to use linear probing in order to detect the first available md(4)
+# device instead of using mdconfig -a -t, because geli(8) attachs md(4) devices
+no=0
+while [ -c /dev/md$no ]; do
+	: $(( no += 1 ))
+done
+
+geli_test_cleanup()
+{
+	[ -c /dev/md${no}.eli ] && geli detach md${no}.eli
+	mdconfig -d -u $no
+}
+trap geli_test_cleanup ABRT EXIT INT TERM
+
+. `dirname $0`/../geom_subr.sh

Property changes on: projects/clang380-import/tools/regression/geom_eli/conf.sh
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_eli/configure-b-B.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/configure-b-B.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/configure-b-B.t	(revision 293687)
@@ -1,130 +1,129 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1
 
 echo "1..17"
 
 geli init -B none -P -K /dev/null md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 
 geli dump md${no} | egrep 'flags: 0x0$' >/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
 
 geli init -B none -b -P -K /dev/null md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 3"
 else
 	echo "not ok 3"
 fi
 
 geli dump md${no} | egrep 'flags: 0x2$' >/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 4"
 else
 	echo "not ok 4"
 fi
 
 geli configure -B md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 5"
 else
 	echo "not ok 5"
 fi
 
 geli dump md${no} | egrep 'flags: 0x0$' >/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 6"
 else
 	echo "not ok 6"
 fi
 
 geli configure -b md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 7"
 else
 	echo "not ok 7"
 fi
 
 geli dump md${no} | egrep 'flags: 0x2$' >/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 8"
 else
 	echo "not ok 8"
 fi
 
 geli attach -p -k /dev/null md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 9"
 else
 	echo "not ok 9"
 fi
 
 geli list md${no}.eli | egrep '^Flags: .*BOOT' >/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 10"
 else
 	echo "not ok 10"
 fi
 
 geli configure -B md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 11"
 else
 	echo "not ok 11"
 fi
 
 geli list md${no}.eli | egrep '^Flags: .*BOOT' >/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 12"
 else
 	echo "not ok 12"
 fi
 
 geli dump md${no} | egrep 'flags: 0x0$' >/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 13"
 else
 	echo "not ok 13"
 fi
 
 geli configure -b md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 14"
 else
 	echo "not ok 14"
 fi
 
 geli list md${no}.eli | egrep '^Flags: .*BOOT' >/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 15"
 else
 	echo "not ok 15"
 fi
 
 geli dump md${no} | egrep 'flags: 0x2$' >/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 16"
 else
 	echo "not ok 16"
 fi
 
 geli detach md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 17"
 else
 	echo "not ok 17"
 fi
-
-mdconfig -d -u $no
Index: projects/clang380-import/tools/regression/geom_eli/delkey.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/delkey.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/delkey.t	(revision 293687)
@@ -1,140 +1,140 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1
-keyfile2=`mktemp /tmp/$base.XXXXXX` || exit 1
-keyfile3=`mktemp /tmp/$base.XXXXXX` || exit 1
-keyfile4=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile1=`mktemp $base.XXXXXX` || exit 1
+keyfile2=`mktemp $base.XXXXXX` || exit 1
+keyfile3=`mktemp $base.XXXXXX` || exit 1
+keyfile4=`mktemp $base.XXXXXX` || exit 1
 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1
 
 echo "1..14"
 
 dd if=/dev/random of=${keyfile1} bs=512 count=16 >/dev/null 2>&1
 dd if=/dev/random of=${keyfile2} bs=512 count=16 >/dev/null 2>&1
 dd if=/dev/random of=${keyfile3} bs=512 count=16 >/dev/null 2>&1
 dd if=/dev/random of=${keyfile4} bs=512 count=16 >/dev/null 2>&1
 
 geli init -B none -P -K $keyfile1 md${no}
 geli attach -p -k $keyfile1 md${no}
 geli setkey -n 1 -P -K $keyfile2 md${no}
 
 # Remove key 0 for attached provider.
 geli delkey -n 0 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 geli detach md${no}
 
 # We cannot use keyfile1 anymore.
 geli attach -p -k $keyfile1 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
 
 # Attach with key 1.
 geli attach -p -k $keyfile2 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 3"
 else
 	echo "not ok 3"
 fi
 
 # We cannot remove last key without -f option (for attached provider).
 geli delkey -n 1 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 4"
 else
 	echo "not ok 4"
 fi
 
 # Remove last key for attached provider.
 geli delkey -f -n 1 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 5"
 else
 	echo "not ok 5"
 fi
 
 # If there are no valid keys, but provider is attached, we can save situation.
 geli setkey -n 0 -P -K $keyfile3 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 6"
 else
 	echo "not ok 6"
 fi
 geli detach md${no}
 
 # We cannot use keyfile2 anymore.
 geli attach -p -k $keyfile2 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 7"
 else
 	echo "not ok 7"
 fi
 
 # Attach with key 0.
 geli attach -p -k $keyfile3 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 8"
 else
 	echo "not ok 8"
 fi
 
 # Setup key 1.
 geli setkey -n 1 -P -K $keyfile4 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 9"
 else
 	echo "not ok 9"
 fi
 geli detach md${no}
 
 # Remove key 1 for detached provider.
 geli delkey -n 1 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 10"
 else
 	echo "not ok 10"
 fi
 
 # We cannot use keyfile4 anymore.
 geli attach -p -k $keyfile4 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 11"
 else
 	echo "not ok 11"
 fi
 
 # We cannot remove last key without -f option (for detached provider).
 geli delkey -n 0 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 12"
 else
 	echo "not ok 12"
 fi
 
 # Remove last key for detached provider.
 geli delkey -f -n 0 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 13"
 else
 	echo "not ok 13"
 fi
 
 # We cannot use keyfile3 anymore.
 geli attach -p -k $keyfile3 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 14"
 else
 	echo "not ok 14"
 fi
 
-mdconfig -d -u $no
 rm -f $keyfile1 $keyfile2 $keyfile3 $keyfile4
Index: projects/clang380-import/tools/regression/geom_eli/detach-l.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/detach-l.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/detach-l.t	(revision 293687)
@@ -1,44 +1,44 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1
 
 echo "1..4"
 
 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 geli init -B none -P -K $keyfile md${no}
 geli attach -p -k $keyfile md${no}
 if [ -c /dev/md${no}.eli ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 # Be sure it doesn't detach before 'detach -l'.
 dd if=/dev/md${no}.eli of=/dev/null 2>/dev/null
 sleep 1
 if [ -c /dev/md${no}.eli ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
 geli detach -l md${no}
 if [ -c /dev/md${no}.eli ]; then
 	echo "ok 3"
 else
 	echo "not ok 3"
 fi
 dd if=/dev/md${no}.eli of=/dev/null 2>/dev/null
 sleep 1
 if [ ! -c /dev/md${no}.eli ]; then
 	echo "ok 4"
 else
 	echo "not ok 4"
 fi
 
-mdconfig -d -u $no
 rm -f $keyfile
Index: projects/clang380-import/tools/regression/geom_eli/init-B.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/init-B.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/init-B.t	(revision 293687)
@@ -1,106 +1,104 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
-backupfile=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
+backupfile=`mktemp $base.XXXXXX` || exit 1
 
 echo "1..13"
 
 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 mdconfig -a -t malloc -s $sectors -u $no || exit 1
 
 # -B none
 rm -f /var/backups/md${no}.eli
 geli init -B none -P -K $keyfile md${no} 2>/dev/null
 if [ ! -f /var/backups/md${no}.eli ]; then
 	echo "ok 1 - -B none"
 else
 	echo "not ok 1 - -B none"
 fi
 
 # no -B
 rm -f /var/backups/md${no}.eli
 geli init -P -K $keyfile md${no} >/dev/null 2>&1
 if [ -f /var/backups/md${no}.eli ]; then
 	echo "ok 2 - no -B"
 else
 	echo "not ok 2 - no -B"
 fi
 geli clear md${no}
 geli attach -p -k $keyfile md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 3 - no -B"
 else
 	echo "not ok 3 - no -B"
 fi
 if [ ! -c /dev/md${no}.eli ]; then
 	echo "ok 4 - no -B"
 else
 	echo "not ok 4 - no -B"
 fi
 geli restore /var/backups/md${no}.eli md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 5 - no -B"
 else
 	echo "not ok 5 - no -B"
 fi
 geli attach -p -k $keyfile md${no} 2>/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 6 - no -B"
 else
 	echo "not ok 6 - no -B"
 fi
 if [ -c /dev/md${no}.eli ]; then
 	echo "ok 7 - no -B"
 else
 	echo "not ok 7 - no -B"
 fi
 geli detach md${no}
 rm -f /var/backups/md${no}.eli
 
 # -B file
 rm -f $backupfile
 geli init -B $backupfile -P -K $keyfile md${no} >/dev/null 2>&1
 if [ -f $backupfile ]; then
 	echo "ok 8 - -B file"
 else
 	echo "not ok 8 - -B file"
 fi
 geli clear md${no}
 geli attach -p -k $keyfile md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 9 - -B file"
 else
 	echo "not ok 9 - -B file"
 fi
 if [ ! -c /dev/md${no}.eli ]; then
 	echo "ok 10 - -B file"
 else
 	echo "not ok 10 - -B file"
 fi
 geli restore $backupfile md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 11 - -B file"
 else
 	echo "not ok 11 - -B file"
 fi
 geli attach -p -k $keyfile md${no} 2>/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 12 - -B file"
 else
 	echo "not ok 12 - -B file"
 fi
 if [ -c /dev/md${no}.eli ]; then
 	echo "ok 13 - -B file"
 else
 	echo "not ok 13 - -B file"
 fi
-geli detach md${no}
-rm -f $backupfile
 
-mdconfig -d -u $no
-rm -f $keyfile
+rm -f $backupfile $keyfile
Index: projects/clang380-import/tools/regression/geom_eli/init-J.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/init-J.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/init-J.t	(revision 293687)
@@ -1,126 +1,126 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile0=`mktemp /tmp/$base.XXXXXX` || exit 1
-keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1
-passfile0=`mktemp /tmp/$base.XXXXXX` || exit 1
-passfile1=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile0=`mktemp $base.XXXXXX` || exit 1
+keyfile1=`mktemp $base.XXXXXX` || exit 1
+passfile0=`mktemp $base.XXXXXX` || exit 1
+passfile1=`mktemp $base.XXXXXX` || exit 1
 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1
 
 echo "1..150"
 
 dd if=/dev/random of=${keyfile0} bs=512 count=16 >/dev/null 2>&1
 dd if=/dev/random of=${keyfile1} bs=512 count=16 >/dev/null 2>&1
 dd if=/dev/random bs=512 count=16 2>/dev/null | sha1 > ${passfile0}
 dd if=/dev/random bs=512 count=16 2>/dev/null | sha1 > ${passfile1}
 
 i=1
 for iter in -1 0 64; do
 	geli init -i ${iter} -B none -J ${passfile0} -P md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli init -i ${iter} -B none -J ${passfile0} -P -K ${keyfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli init -i ${iter} -B none -J ${passfile0} -K ${keyfile0} md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile0} -p md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -j ${passfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -j ${keyfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${passfile0} -p md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -j ${keyfile0} -k ${passfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -j ${keyfile0} -k ${keyfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -j ${passfile0} -k ${passfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -j ${passfile0} -k ${keyfile0} md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli detach md${no} || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	cat ${keyfile0} | geli attach -j ${passfile0} -k - md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli detach md${no} || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	cat ${passfile0} | geli attach -j - -k ${keyfile0} md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli detach md${no} || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 
 	geli init -i ${iter} -B none -J ${passfile0} -J ${passfile1} -P md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli init -i ${iter} -B none -J ${passfile0} -J ${passfile1} -P -K ${keyfile0} -K ${keyfile1} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli init -i ${iter} -B none -J ${passfile0} -J ${passfile1} -K ${keyfile0} -K ${keyfile1} md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile0} -p md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile1} -p md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -j ${passfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -j ${passfile1} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile0} -k ${keyfile1} -p md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -j ${passfile0} -j ${passfile1} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile0} -j ${passfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile0} -j ${passfile1} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile1} -j ${passfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile1} -j ${passfile1} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile0} -j ${passfile0} -j ${passfile1} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile1} -j ${passfile0} -j ${passfile1} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile0} -k ${keyfile1} -j ${passfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile0} -k ${keyfile1} -j ${passfile1} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile1} -k ${keyfile0} -j ${passfile0} -j ${passfile1} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile0} -k ${keyfile1} -j ${passfile1} -j ${passfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -k ${keyfile1} -k ${keyfile0} -j ${passfile1} -j ${passfile0} md${no} 2>/dev/null && echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli attach -j ${passfile0} -j ${passfile1} -k ${keyfile0} -k ${keyfile1} md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli detach md${no} || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	cat ${passfile0} | geli attach -j - -j ${passfile1} -k ${keyfile0} -k ${keyfile1} md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli detach md${no} || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	cat ${passfile1} | geli attach -j ${passfile0} -j - -k ${keyfile0} -k ${keyfile1} md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli detach md${no} || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	cat ${keyfile0} | geli attach -j ${passfile0} -j ${passfile1} -k - -k ${keyfile1} md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli detach md${no} || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	cat ${keyfile1} | geli attach -j ${passfile0} -j ${passfile1} -k ${keyfile0} -k - md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli detach md${no} || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	cat ${keyfile0} ${keyfile1} | geli attach -j ${passfile0} -j ${passfile1} -k - md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli detach md${no} || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	cat ${passfile0} ${passfile1} | awk '{printf "%s", $0}' | geli attach -j - -k ${keyfile0} -k ${keyfile1} md${no} 2>/dev/null || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 	geli detach md${no} || echo -n "not "
 	echo "ok ${i}"; i=$((i+1))
 done
 
-mdconfig -d -u $no
 rm -f ${keyfile0} ${keyfile1} ${passfile0} ${passfile1}
Index: projects/clang380-import/tools/regression/geom_eli/init-a.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/init-a.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/init-a.t	(revision 293687)
@@ -1,59 +1,60 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
 
 echo "1..1380"
 
 i=1
 for cipher in aes:0 aes:128 aes:256 \
     aes-xts:0 aes-xts:128 aes-xts:256 \
     aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \
     3des:0 3des:192 \
     3des-cbc:0 3des-cbc:192 \
     blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \
     blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \
     blowfish:416 blowfish:448 \
     blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \
     blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \
     blowfish-cbc:416 blowfish-cbc:448 \
     camellia:0 camellia:128 camellia:192 camellia:256 \
     camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do
 	ealgo=${cipher%%:*}
 	keylen=${cipher##*:}
 	for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do
 		for secsize in 512 1024 2048 4096 8192; do
-			rnd=`mktemp /tmp/$base.XXXXXX` || exit 1
+			rnd=`mktemp $base.XXXXXX` || exit 1
 			mdconfig -a -t malloc -s `expr $secsize \* $sectors + 512`b -u $no || exit 1
 
 			dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 			geli init -B none -a $aalgo -e $ealgo -l $keylen -P -K $keyfile -s $secsize md${no} 2>/dev/null
 			geli attach -p -k $keyfile md${no}
 
 			secs=`diskinfo /dev/md${no}.eli | awk '{print $4}'`
 
 			dd if=/dev/random of=${rnd} bs=${secsize} count=${secs} >/dev/null 2>&1
 			dd if=${rnd} of=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null
 
 			md_rnd=`dd if=${rnd} bs=${secsize} count=${secs} 2>/dev/null | md5`
 			md_ddev=`dd if=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null | md5`
 
 			if [ ${md_rnd} = ${md_ddev} ]; then
 				echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			else
 				echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			fi
 			i=$((i+1))
 
 			geli detach md${no}
 			rm -f $rnd
 			mdconfig -d -u $no
 		done
 	done
 done
 
 rm -f $keyfile
Index: projects/clang380-import/tools/regression/geom_eli/init-i-P.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/init-i-P.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/init-i-P.t	(revision 293687)
@@ -1,22 +1,22 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1
 
 echo "1..1"
 
 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 geli init -B none -i 64 -P -K ${keyfile} md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 
-mdconfig -d -u $no
 rm -f $keyfile
Index: projects/clang380-import/tools/regression/geom_eli/init.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/init.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/init.t	(revision 293687)
@@ -1,64 +1,65 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
 
 echo "1..460"
 
 i=1
 for cipher in aes:0 aes:128 aes:256 \
     aes-xts:0 aes-xts:128 aes-xts:256 \
     aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \
     3des:0 3des:192 \
     3des-cbc:0 3des-cbc:192 \
     blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \
     blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \
     blowfish:416 blowfish:448 \
     blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \
     blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \
     blowfish-cbc:416 blowfish-cbc:448 \
     camellia:0 camellia:128 camellia:192 camellia:256 \
     camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do
 	ealgo=${cipher%%:*}
 	keylen=${cipher##*:}
 	for secsize in 512 1024 2048 4096 8192; do
-		rnd=`mktemp /tmp/$base.XXXXXX` || exit 1
+		rnd=`mktemp $base.XXXXXX` || exit 1
 		mdconfig -a -t malloc -s `expr $secsize \* $sectors + 512`b -u $no || exit 1
 
 		dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 		geli init -B none -e $ealgo -l $keylen -P -K $keyfile -s $secsize md${no} 2>/dev/null
 		geli attach -p -k $keyfile md${no}
 
 		secs=`diskinfo /dev/md${no}.eli | awk '{print $4}'`
 
 		dd if=/dev/random of=${rnd} bs=${secsize} count=${secs} >/dev/null 2>&1
 		dd if=${rnd} of=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null
 
 		md_rnd=`dd if=${rnd} bs=${secsize} count=${secs} 2>/dev/null | md5`
 		md_ddev=`dd if=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null | md5`
 		md_edev=`dd if=/dev/md${no} bs=${secsize} count=${secs} 2>/dev/null | md5`
 
 		if [ ${md_rnd} = ${md_ddev} ]; then
 			echo "ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 		else
 			echo "not ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 		fi
 		i=$((i+1))
 		if [ ${md_rnd} != ${md_edev} ]; then
 			echo "ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 		else
 			echo "not ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 		fi
 		i=$((i+1))
 
 		geli detach md${no}
 		rm -f $rnd
 		mdconfig -d -u $no
 	done
 done
 
 rm -f $keyfile
Index: projects/clang380-import/tools/regression/geom_eli/integrity-copy.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/integrity-copy.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/integrity-copy.t	(revision 293687)
@@ -1,98 +1,99 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
-sector=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
+sector=`mktemp $base.XXXXXX` || exit 1
 
 echo "1..5520"
 
 i=1
 for cipher in aes:0 aes:128 aes:256 \
     aes-xts:0 aes-xts:128 aes-xts:256 \
     aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \
     3des:0 3des:192 \
     3des-cbc:0 3des-cbc:192 \
     blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \
     blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \
     blowfish:416 blowfish:448 \
     blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \
     blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \
     blowfish-cbc:416 blowfish-cbc:448 \
     camellia:0 camellia:128 camellia:192 camellia:256 \
     camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do
 	ealgo=${cipher%%:*}
 	keylen=${cipher##*:}
 	for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do
 		for secsize in 512 1024 2048 4096 8192; do
 			#mdconfig -a -t malloc -s `expr $secsize \* 2 + 512`b -u $no || exit 1
 			mdconfig -a -t malloc -s $sectors -u $no || exit 1
 
 			dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 			geli init -B none -a $aalgo -e $ealgo -l $keylen -P -K $keyfile -s $secsize md${no} 2>/dev/null
 			geli attach -p -k $keyfile md${no}
 
 			dd if=/dev/random of=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1
 
 			dd if=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1
 			if [ $? -eq 0 ]; then
 				echo "ok $i - small 1 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			else
 				echo "not ok $i - small 1 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			fi
 			i=$((i+1))
 
 			geli detach md${no}
 			# Copy first small sector to the second small sector.
 			# This should be detected as corruption.
 			dd if=/dev/md${no} of=${sector} bs=512 count=1 >/dev/null 2>&1
 			dd if=${sector} of=/dev/md${no} bs=512 count=1 seek=1 >/dev/null 2>&1
 			geli attach -p -k $keyfile md${no}
 
 			dd if=/dev/md${no}.eli of=/dev/null bs=${secsize} count=1 >/dev/null 2>&1
 			if [ $? -ne 0 ]; then
 				echo "ok $i - small 2 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			else
 				echo "not ok $i - small 2 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			fi
 			i=$((i+1))
 
 			ms=`diskinfo /dev/md${no} | awk '{print $3 - 512}'`
 			ns=`diskinfo /dev/md${no}.eli | awk '{print $4}'`
 			usecsize=`echo "($ms / $ns) - (($ms / $ns) % 512)" | bc`
 
 			dd if=/dev/random of=/dev/md${no}.eli bs=${secsize} count=2 >/dev/null 2>&1
 
 			dd if=/dev/md${no}.eli bs=${secsize} count=2 >/dev/null 2>&1
 			if [ $? -eq 0 ]; then
 				echo "ok $i - big 1 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			else
 				echo "not ok $i - big 1 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			fi
 			i=$((i+1))
 
 			geli detach md${no}
 			# Copy first big sector to the second big sector.
 			# This should be detected as corruption.
 			dd if=/dev/md${no} of=${sector} bs=${usecsize} count=1 >/dev/null 2>&1
 			dd if=${sector} of=/dev/md${no} bs=${usecsize} count=1 seek=1 >/dev/null 2>&1
 			geli attach -p -k $keyfile md${no}
 
 			dd if=/dev/md${no}.eli of=/dev/null bs=${secsize} count=2 >/dev/null 2>&1
 			if [ $? -ne 0 ]; then
 				echo "ok $i - big 2 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			else
 				echo "not ok $i - big 2 aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			fi
 			i=$((i+1))
 
 			geli detach md${no}
 			mdconfig -d -u $no
 		done
 	done
 done
 
 rm -f $keyfile $sector
Index: projects/clang380-import/tools/regression/geom_eli/integrity-data.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/integrity-data.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/integrity-data.t	(revision 293687)
@@ -1,68 +1,69 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
-sector=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
+sector=`mktemp $base.XXXXXX` || exit 1
 
 echo "1..2760"
 
 i=1
 for cipher in aes:0 aes:128 aes:256 \
     aes-xts:0 aes-xts:128 aes-xts:256 \
     aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \
     3des:0 3des:192 \
     3des-cbc:0 3des-cbc:192 \
     blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \
     blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \
     blowfish:416 blowfish:448 \
     blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \
     blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \
     blowfish-cbc:416 blowfish-cbc:448 \
     camellia:0 camellia:128 camellia:192 camellia:256 \
     camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do
 	ealgo=${cipher%%:*}
 	keylen=${cipher##*:}
 	for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do
 		for secsize in 512 1024 2048 4096 8192; do
 			mdconfig -a -t malloc -s `expr $secsize \* 2 + 512`b -u $no || exit 1
 
 			dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 			geli init -B none -a $aalgo -e $ealgo -l $keylen -P -K $keyfile -s $secsize md${no} 2>/dev/null
 			geli attach -p -k $keyfile md${no}
 
 			dd if=/dev/random of=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1
 
 			dd if=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1
 			if [ $? -eq 0 ]; then
 				echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			else
 				echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			fi
 			i=$((i+1))
 
 			geli detach md${no}
 			# Corrupt 8 bytes of data.
 			dd if=/dev/md${no} of=${sector} bs=512 count=1 >/dev/null 2>&1
 			dd if=/dev/random of=${sector} bs=1 count=8 seek=64 conv=notrunc >/dev/null 2>&1
 			dd if=${sector} of=/dev/md${no} bs=512 count=1 >/dev/null 2>&1
 			geli attach -p -k $keyfile md${no}
 
 			dd if=/dev/md${no}.eli of=/dev/null bs=${secsize} count=1 >/dev/null 2>&1
 			if [ $? -ne 0 ]; then
 				echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			else
 				echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			fi
 			i=$((i+1))
 
 			geli detach md${no}
 			mdconfig -d -u $no
 		done
 	done
 done
 
 rm -f $keyfile $sector
Index: projects/clang380-import/tools/regression/geom_eli/integrity-hmac.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/integrity-hmac.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/integrity-hmac.t	(revision 293687)
@@ -1,68 +1,69 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
-sector=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
+sector=`mktemp $base.XXXXXX` || exit 1
 
 echo "1..2760"
 
 i=1
 for cipher in aes:0 aes:128 aes:256 \
     aes-xts:0 aes-xts:128 aes-xts:256 \
     aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \
     3des:0 3des:192 \
     3des-cbc:0 3des-cbc:192 \
     blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \
     blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \
     blowfish:416 blowfish:448 \
     blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \
     blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \
     blowfish-cbc:416 blowfish-cbc:448 \
     camellia:0 camellia:128 camellia:192 camellia:256 \
     camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do
 	ealgo=${cipher%%:*}
 	keylen=${cipher##*:}
 	for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do
 		for secsize in 512 1024 2048 4096 8192; do
 			mdconfig -a -t malloc -s `expr $secsize \* 2 + 512`b -u $no || exit 1
 
 			dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 			geli init -B none -a $aalgo -e $ealgo -l $keylen -P -K $keyfile -s $secsize md${no} 2>/dev/null
 			geli attach -p -k $keyfile md${no}
 
 			dd if=/dev/random of=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1
 
 			dd if=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1
 			if [ $? -eq 0 ]; then
 				echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			else
 				echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			fi
 			i=$((i+1))
 
 			geli detach md${no}
 			# Corrupt 8 bytes of HMAC.
 			dd if=/dev/md${no} of=${sector} bs=512 count=1 >/dev/null 2>&1
 			dd if=/dev/random of=${sector} bs=1 count=16 conv=notrunc >/dev/null 2>&1
 			dd if=${sector} of=/dev/md${no} bs=512 count=1 >/dev/null 2>&1
 			geli attach -p -k $keyfile md${no}
 
 			dd if=/dev/md${no}.eli bs=${secsize} count=1 >/dev/null 2>&1
 			if [ $? -ne 0 ]; then
 				echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			else
 				echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			fi
 			i=$((i+1))
 
 			geli detach md${no}
 			mdconfig -d -u $no
 		done
 	done
 done
 
 rm -f $keyfile $sector
Index: projects/clang380-import/tools/regression/geom_eli/kill.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/kill.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/kill.t	(revision 293687)
@@ -1,97 +1,97 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1
-keyfile2=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile1=`mktemp $base.XXXXXX` || exit 1
+keyfile2=`mktemp $base.XXXXXX` || exit 1
 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1
 
 echo "1..9"
 
 dd if=/dev/random of=${keyfile1} bs=512 count=16 >/dev/null 2>&1
 dd if=/dev/random of=${keyfile2} bs=512 count=16 >/dev/null 2>&1
 
 geli init -B none -P -K $keyfile1 md${no}
 geli attach -p -k $keyfile1 md${no}
 geli setkey -n 1 -P -K $keyfile2 md${no}
 
 # Kill attached provider.
 geli kill md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 sleep 1
 # Provider should be automatically detached.
 if [ ! -c /dev/md{$no}.eli ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
 
 # We cannot use keyfile1 anymore.
 geli attach -p -k $keyfile1 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 3"
 else
 	echo "not ok 3"
 fi
 
 # We cannot use keyfile2 anymore.
 geli attach -p -k $keyfile2 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 4"
 else
 	echo "not ok 4"
 fi
 
 geli init -B none -P -K $keyfile1 md${no}
 geli setkey -n 1 -p -k $keyfile1 -P -K $keyfile2 md${no}
 
 # Should be possible to attach with keyfile1.
 geli attach -p -k $keyfile1 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 5"
 else
 	echo "not ok 5"
 fi
 geli detach md${no}
 
 # Should be possible to attach with keyfile2.
 geli attach -p -k $keyfile2 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 6"
 else
 	echo "not ok 6"
 fi
 geli detach md${no}
 
 # Kill detached provider.
 geli kill md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 7"
 else
 	echo "not ok 7"
 fi
 
 # We cannot use keyfile1 anymore.
 geli attach -p -k $keyfile1 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 8"
 else
 	echo "not ok 8"
 fi
 
 # We cannot use keyfile2 anymore.
 geli attach -p -k $keyfile2 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 9"
 else
 	echo "not ok 9"
 fi
 
-mdconfig -d -u $no
 rm -f $keyfile1 $keyfile2
Index: projects/clang380-import/tools/regression/geom_eli/nokey.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/nokey.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/nokey.t	(revision 293687)
@@ -1,65 +1,65 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1
 
 echo "1..8"
 
 geli init -B none -P md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 
 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 geli init -B none -P -K ${keyfile} md${no} 2>/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
 geli attach -p md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 3"
 else
 	echo "not ok 3"
 fi
 geli attach -p -k ${keyfile} md${no} 2>/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 4"
 else
 	echo "not ok 4"
 fi
 geli setkey -n 0 -P md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 5"
 else
 	echo "not ok 5"
 fi
 geli detach md${no} 2>/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 6"
 else
 	echo "not ok 6"
 fi
 geli setkey -n 0 -p -P -K ${keyfile} md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 7"
 else
 	echo "not ok 7"
 fi
 geli setkey -n 0 -p -k ${keyfile} -P md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 8"
 else
 	echo "not ok 8"
 fi
 
-mdconfig -d -u $no
 rm -f $keyfile
Index: projects/clang380-import/tools/regression/geom_eli/onetime-a.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/onetime-a.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/onetime-a.t	(revision 293687)
@@ -1,53 +1,54 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
 
 echo "1..1380"
 
 i=1
 for cipher in aes:0 aes:128 aes:256 \
     aes-xts:0 aes-xts:128 aes-xts:256 \
     aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \
     3des:0 3des:192 \
     3des-cbc:0 3des-cbc:192 \
     blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \
     blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \
     blowfish:416 blowfish:448 \
     blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \
     blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \
     blowfish-cbc:416 blowfish-cbc:448 \
     camellia:0 camellia:128 camellia:192 camellia:256 \
     camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do
 	ealgo=${cipher%%:*}
 	keylen=${cipher##*:}
 	for aalgo in hmac/md5 hmac/sha1 hmac/ripemd160 hmac/sha256 hmac/sha384 hmac/sha512; do
 		for secsize in 512 1024 2048 4096 8192; do
-			rnd=`mktemp /tmp/$base.XXXXXX` || exit 1
+			rnd=`mktemp $base.XXXXXX` || exit 1
 			mdconfig -a -t malloc -s `expr $secsize \* $sectors + 512`b -u $no || exit 1
 
 			geli onetime -a $aalgo -e $ealgo -l $keylen -s $secsize md${no} 2>/dev/null
 
 			secs=`diskinfo /dev/md${no}.eli | awk '{print $4}'`
 
 			dd if=/dev/random of=${rnd} bs=${secsize} count=${secs} >/dev/null 2>&1
 			dd if=${rnd} of=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null
 
 			md_rnd=`dd if=${rnd} bs=${secsize} count=${secs} 2>/dev/null | md5`
 			md_ddev=`dd if=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null | md5`
 
 			if [ ${md_rnd} = ${md_ddev} ]; then
 				echo "ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			else
 				echo "not ok $i - aalgo=${aalgo} ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 			fi
 			i=$((i+1))
 
 			geli detach md${no}
 			rm -f $rnd
 			mdconfig -d -u $no
 		done
 	done
 done
Index: projects/clang380-import/tools/regression/geom_eli/onetime-d.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/onetime-d.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/onetime-d.t	(revision 293687)
@@ -1,33 +1,34 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
 mdconfig -a -t malloc -s $sectors -u $no || exit 1
 
 echo "1..3"
 
 geli onetime -d md${no}
 if [ -c /dev/md${no}.eli ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 # Be sure it doesn't detach on read.
 dd if=/dev/md${no}.eli of=/dev/null 2>/dev/null
 sleep 1
 if [ -c /dev/md${no}.eli ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
 true > /dev/md${no}.eli
 sleep 1
 if [ ! -c /dev/md${no}.eli ]; then
 	echo "ok 3"
 else
 	echo "not ok 3"
 fi
 
 mdconfig -d -u $no
Index: projects/clang380-import/tools/regression/geom_eli/onetime.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/onetime.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/onetime.t	(revision 293687)
@@ -1,58 +1,59 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
 
 echo "1..460"
 
 i=1
 for cipher in aes:0 aes:128 aes:256 \
     aes-xts:0 aes-xts:128 aes-xts:256 \
     aes-cbc:0 aes-cbc:128 aes-cbc:192 aes-cbc:256 \
     3des:0 3des:192 \
     3des-cbc:0 3des-cbc:192 \
     blowfish:0 blowfish:128 blowfish:160 blowfish:192 blowfish:224 \
     blowfish:256 blowfish:288 blowfish:320 blowfish:352 blowfish:384 \
     blowfish:416 blowfish:448 \
     blowfish-cbc:0 blowfish-cbc:128 blowfish-cbc:160 blowfish-cbc:192 blowfish-cbc:224 \
     blowfish-cbc:256 blowfish-cbc:288 blowfish-cbc:320 blowfish-cbc:352 blowfish-cbc:384 \
     blowfish-cbc:416 blowfish-cbc:448 \
     camellia:0 camellia:128 camellia:192 camellia:256 \
     camellia-cbc:0 camellia-cbc:128 camellia-cbc:192 camellia-cbc:256; do
 	ealgo=${cipher%%:*}
 	keylen=${cipher##*:}
 	for secsize in 512 1024 2048 4096 8192; do
-		rnd=`mktemp /tmp/$base.XXXXXX` || exit 1
+		rnd=`mktemp $base.XXXXXX` || exit 1
 		mdconfig -a -t malloc -s `expr $secsize \* $sectors`b -u $no || exit 1
 
 		geli onetime -e $ealgo -l $keylen -s $secsize md${no} 2>/dev/null
 
 		secs=`diskinfo /dev/md${no}.eli | awk '{print $4}'`
 
 		dd if=/dev/random of=${rnd} bs=${secsize} count=${secs} >/dev/null 2>&1
 		dd if=${rnd} of=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null
 
 		md_rnd=`dd if=${rnd} bs=${secsize} count=${secs} 2>/dev/null | md5`
 		md_ddev=`dd if=/dev/md${no}.eli bs=${secsize} count=${secs} 2>/dev/null | md5`
 		md_edev=`dd if=/dev/md${no} bs=${secsize} count=${secs} 2>/dev/null | md5`
 
 		if [ ${md_rnd} = ${md_ddev} ]; then
 			echo "ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 		else
 			echo "not ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 		fi
 		i=$((i+1))
 		if [ ${md_rnd} != ${md_edev} ]; then
 			echo "ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 		else
 			echo "not ok $i - ealgo=${ealgo} keylen=${keylen} sec=${secsize}"
 		fi
 		i=$((i+1))
 
 		geli detach md${no}
 		rm -f $rnd
 		mdconfig -d -u $no
 	done
 done
Index: projects/clang380-import/tools/regression/geom_eli/readonly.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/readonly.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/readonly.t	(revision 293687)
@@ -1,93 +1,94 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-keyfile=`mktemp /tmp/$base.XXXXXX` || exit 1
+keyfile=`mktemp $base.XXXXXX` || exit 1
 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1
 
 echo "1..11"
 
 dd if=/dev/random of=${keyfile} bs=512 count=16 >/dev/null 2>&1
 
 geli init -B none -P -K $keyfile md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 
 geli attach -r -p -k $keyfile md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
 
 sh -c "true >/dev/md${no}.eli" 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 3"
 else
 	echo "not ok 3"
 fi
 
 geli kill md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 4"
 else
 	echo "not ok 4"
 fi
 
 # kill should detach provider...
 if [ ! -c /dev/md{$no}.eli ]; then
 	echo "ok 5"
 else
 	echo "not ok 5"
 fi
 
 # ...but not destroy the metadata.
 geli attach -r -p -k $keyfile md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 6"
 else
 	echo "not ok 6"
 fi
 
 geli setkey -n 1 -P -K /dev/null md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 7"
 else
 	echo "not ok 7"
 fi
 
 geli delkey -n 0 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 8"
 else
 	echo "not ok 8"
 fi
 
 geli delkey -f -n 0 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 9"
 else
 	echo "not ok 9"
 fi
 
 geli list md${no}.eli | egrep '^Flags: .*READ-ONLY' >/dev/null
 if [ $? -eq 0 ]; then
 	echo "ok 10"
 else
 	echo "not ok 10"
 fi
 
 geli detach md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 11"
 else
 	echo "not ok 11"
 fi
 
 mdconfig -d -u $no
 rm -f $keyfile
Index: projects/clang380-import/tools/regression/geom_eli/resize.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/resize.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/resize.t	(revision 293687)
@@ -1,150 +1,148 @@
-#! /bin/sh
-#
+#!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 echo 1..27
 
 BLK=512
 BLKS_PER_MB=2048
 
 md=$(mdconfig -s40m) || exit 1
 unit=${md#md}
 i=1
 
 setsize() {
     partszMB=$1 unitszMB=$2
 
     {
 	echo a: $(($partszMB * $BLKS_PER_MB)) 0 4.2BSD 1024 8192
 	echo c: $(($unitszMB * $BLKS_PER_MB)) 0 unused 0 0
     } | disklabel -R $md /dev/stdin
 }
 
 # Initialise
 
-kldload geom_eli >/dev/null 2>&1
-
 setsize 10 40 || echo -n "not "
 echo ok $i - "Sized ${md}a to 10m"
 i=$((i + 1))
 
 echo secret >tmp.key
 geli init -Bnone -PKtmp.key ${md}a || echo -n "not "
 echo ok $i - "Initialised geli on ${md}a"
 i=$((i + 1))
 geli attach -pk tmp.key ${md}a || echo -n "not "
 echo ok $i - "Attached ${md}a as ${md}a.eli"
 i=$((i + 1))
 
 newfs -U ${md}a.eli >/dev/null || echo -n "not "
 echo ok $i - "Initialised the filesystem on ${md}a.eli"
 i=$((i + 1))
 out=$(fsck -tufs -y ${md}a.eli)
 echo "$out" | fgrep -q MODIFIED && echo -n "not "
 echo ok $i - "fsck says ${md}a.eli is clean," $(echo $(echo "$out" | wc -l)) \
     "lines of output"
 i=$((i + 1))
 
 
 # Doing a backup, resize & restore must be forced (with -f) as geli
 # verifies that the provider size in the metadata matches the consumer.
 
 geli backup ${md}a tmp.meta || echo -n "not "
 echo ok $i - "Backed up ${md}a metadata"
 i=$((i + 1))
 
 geli detach ${md}a.eli || echo -n "not "
 echo ok $i - "Detached ${md}a.eli"
 i=$((i + 1))
 
 setsize 20 40 || echo -n "not "
 echo ok $i - "Sized ${md}a to 20m"
 i=$((i + 1))
 geli attach -pktmp.key ${md}a && echo -n "not "
 echo ok $i - "Attaching ${md}a fails after resizing the consumer"
 i=$((i + 1))
 
 geli restore tmp.meta ${md}a && echo -n "not "
 echo ok $i - "Restoring metadata on ${md}a.eli fails without -f"
 i=$((i + 1))
 geli restore -f tmp.meta ${md}a || echo -n "not "
 echo ok $i - "Restoring metadata on ${md}a.eli can be forced"
 i=$((i + 1))
 
 geli attach -pktmp.key ${md}a || echo -n "not "
 echo ok $i - "Attaching ${md}a is now possible"
 i=$((i + 1))
 
 growfs -y ${md}a.eli >/dev/null || echo -n "not "
 echo ok $i - "Extended the filesystem on ${md}a.eli"
 i=$((i + 1))
 
 out=$(fsck -tufs -y ${md}a.eli)
 echo "$out" | fgrep -q MODIFIED && echo -n "not "
 echo ok $i - "fsck says ${md}a.eli is clean," $(echo $(echo "$out" | wc -l)) \
     "lines of output"
 i=$((i + 1))
 
 
 # Now do the resize properly
 
 geli detach ${md}a.eli || echo -n "not "
 echo ok $i - "Detached ${md}a.eli"
 i=$((i + 1))
 
 setsize 30 40 || echo -n "not "
 echo ok $i - "Sized ${md}a to 30m"
 i=$((i + 1))
 
 geli resize -s20m ${md}a || echo -n "not "
 echo ok $i - "Resizing works ok"
 i=$((i + 1))
 geli resize -s20m ${md}a && echo -n "not "
 echo ok $i - "Resizing doesn't work a 2nd time (no old metadata)"
 i=$((i + 1))
 
 geli attach -pktmp.key ${md}a || echo -n "not "
 echo ok $i - "Attaching ${md}a works ok"
 i=$((i + 1))
 
 growfs -y ${md}a.eli >/dev/null || echo -n "not "
 echo ok $i - "Extended the filesystem on ${md}a.eli"
 i=$((i + 1))
 
 out=$(fsck -tufs -y ${md}a.eli)
 echo "$out" | fgrep -q MODIFIED && echo -n "not "
 echo ok $i - "fsck says ${md}a.eli is clean," $(echo $(echo "$out" | wc -l)) \
     "lines of output"
 i=$((i + 1))
 
 geli detach ${md}a.eli
 gpart destroy -F $md >/dev/null
 
 
 # Verify that the man page example works, changing ada0 to $md,
 # 1g to 20m, 2g to 30m and keyfile to tmp.key, and adding -B none
 # to geli init.
 
 gpart create -s GPT $md || echo -n "not "
 echo ok $i - "Installed a GPT on ${md}"
 i=$((i + 1))
 gpart add -s 20m -t freebsd-ufs -i 1 $md || echo -n "not "
 echo ok $i - "Added a 20m partition in slot 1"
 i=$((i + 1))
 geli init -B none -K tmp.key -P ${md}p1 || echo -n "not "
 echo ok $i - "Initialised geli on ${md}p1"
 i=$((i + 1))
 gpart resize -s 30m -i 1 $md || echo -n "not "
 echo ok $i - "Resized partition ${md}p1 to 30m"
 i=$((i + 1))
 geli resize -s 20m ${md}p1 || echo -n "not "
 echo ok $i - "Resized geli on ${md}p1 to 30m"
 i=$((i + 1))
 geli attach -k tmp.key -p ${md}p1 || echo -n "not "
 echo ok $i - "Attached ${md}p1.eli"
 i=$((i + 1))
 
 geli detach ${md}p1.eli
-mdconfig -du$unit
 
 rm tmp.*
Index: projects/clang380-import/tools/regression/geom_eli/setkey.t
===================================================================
--- projects/clang380-import/tools/regression/geom_eli/setkey.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_eli/setkey.t	(revision 293687)
@@ -1,156 +1,156 @@
 #!/bin/sh
 # $FreeBSD$
 
+. $(dirname $0)/conf.sh
+
 base=`basename $0`
-no=45
 sectors=100
-rnd=`mktemp /tmp/$base.XXXXXX` || exit 1
-keyfile1=`mktemp /tmp/$base.XXXXXX` || exit 1
-keyfile2=`mktemp /tmp/$base.XXXXXX` || exit 1
-keyfile3=`mktemp /tmp/$base.XXXXXX` || exit 1
-keyfile4=`mktemp /tmp/$base.XXXXXX` || exit 1
-keyfile5=`mktemp /tmp/$base.XXXXXX` || exit 1
+rnd=`mktemp $base.XXXXXX` || exit 1
+keyfile1=`mktemp $base.XXXXXX` || exit 1
+keyfile2=`mktemp $base.XXXXXX` || exit 1
+keyfile3=`mktemp $base.XXXXXX` || exit 1
+keyfile4=`mktemp $base.XXXXXX` || exit 1
+keyfile5=`mktemp $base.XXXXXX` || exit 1
 mdconfig -a -t malloc -s `expr $sectors + 1` -u $no || exit 1
 
 echo "1..16"
 
 dd if=/dev/random of=${rnd} bs=512 count=${sectors} >/dev/null 2>&1
 hash1=`dd if=${rnd} bs=512 count=${sectors} 2>/dev/null | md5`
 dd if=/dev/random of=${keyfile1} bs=512 count=16 >/dev/null 2>&1
 dd if=/dev/random of=${keyfile2} bs=512 count=16 >/dev/null 2>&1
 dd if=/dev/random of=${keyfile3} bs=512 count=16 >/dev/null 2>&1
 dd if=/dev/random of=${keyfile4} bs=512 count=16 >/dev/null 2>&1
 dd if=/dev/random of=${keyfile5} bs=512 count=16 >/dev/null 2>&1
 
 geli init -B none -P -K $keyfile1 md${no}
 geli attach -p -k $keyfile1 md${no}
 
 dd if=${rnd} of=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null
 rm -f $rnd
 hash2=`dd if=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null | md5`
 
 # Change current key (0) for attached provider.
 geli setkey -P -K $keyfile2 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 geli detach md${no}
 
 # We cannot use keyfile1 anymore.
 geli attach -p -k $keyfile1 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
 
 # Attach with new key.
 geli attach -p -k $keyfile2 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 3"
 else
 	echo "not ok 3"
 fi
 hash3=`dd if=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null | md5`
 
 # Change key 1 for attached provider.
 geli setkey -n 1 -P -K $keyfile3 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 4"
 else
 	echo "not ok 4"
 fi
 geli detach md${no}
 
 # Attach with key 1.
 geli attach -p -k $keyfile3 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 5"
 else
 	echo "not ok 5"
 fi
 hash4=`dd if=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null | md5`
 geli detach md${no}
 
 # Change current (1) key for detached provider.
 geli setkey -p -k $keyfile3 -P -K $keyfile4 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 6"
 else
 	echo "not ok 6"
 fi
 
 # We cannot use keyfile3 anymore.
 geli attach -p -k $keyfile3 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 7"
 else
 	echo "not ok 7"
 fi
 
 # Attach with key 1.
 geli attach -p -k $keyfile4 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 8"
 else
 	echo "not ok 8"
 fi
 hash5=`dd if=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null | md5`
 geli detach md${no}
 
 # Change key 0 for detached provider.
 geli setkey -n 0 -p -k $keyfile4 -P -K $keyfile5 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 9"
 else
 	echo "not ok 9"
 fi
 
 # We cannot use keyfile2 anymore.
 geli attach -p -k $keyfile2 md${no} 2>/dev/null
 if [ $? -ne 0 ]; then
 	echo "ok 10"
 else
 	echo "not ok 10"
 fi
 
 # Attach with key 0.
 geli attach -p -k $keyfile5 md${no}
 if [ $? -eq 0 ]; then
 	echo "ok 11"
 else
 	echo "not ok 11"
 fi
 hash6=`dd if=/dev/md${no}.eli bs=512 count=${sectors} 2>/dev/null | md5`
 geli detach md${no}
 
 if [ ${hash1} = ${hash2} ]; then
 	echo "ok 12"
 else
 	echo "not ok 12"
 fi
 if [ ${hash1} = ${hash3} ]; then
 	echo "ok 13"
 else
 	echo "not ok 13"
 fi
 if [ ${hash1} = ${hash4} ]; then
 	echo "ok 14"
 else
 	echo "not ok 14"
 fi
 if [ ${hash1} = ${hash5} ]; then
 	echo "ok 15"
 else
 	echo "not ok 15"
 fi
 if [ ${hash1} = ${hash6} ]; then
 	echo "ok 16"
 else
 	echo "not ok 16"
 fi
 
-mdconfig -d -u $no
 rm -f $keyfile1 $keyfile2 $keyfile3 $keyfile4 $keyfile5
Index: projects/clang380-import/tools/regression/geom_gate/runtests.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_gate/runtests.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_gate/runtests.sh	(nonexistent)
@@ -1,8 +0,0 @@
-#!/bin/sh
-# $FreeBSD$
-
-dir=`dirname $0`
-
-for ts in `dirname $0`/test-*.sh; do
-	sh $ts
-done

Property changes on: projects/clang380-import/tools/regression/geom_gate/runtests.sh
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_gate/test-1.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_gate/test-1.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_gate/test-1.sh	(nonexistent)
@@ -1,36 +0,0 @@
-#!/bin/sh
-# $FreeBSD$
-
-base=`basename $0`
-us=45
-work="/dev/md${us}"
-src="/dev/md`expr $us + 1`"
-conf=`mktemp /tmp/$base.XXXXXX` || exit 1
-
-mdconfig -a -t malloc -s 1M -u $us || exit 1
-mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1
-dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1
-dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1
-sum=`cat $src | md5 -q`
-
-echo "127.0.0.1 RW $work" > $conf
-ggated $conf
-ggatec create -u $us 127.0.0.1 $work
-
-dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1
-
-if [ `cat $work | md5 -q` != $sum ]; then
-	echo "FAIL"
-else
-	if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then
-		echo "FAIL"
-	else
-		echo "PASS"
-	fi
-fi
-
-ggatec destroy -u $us
-mdconfig -d -u $us
-mdconfig -d -u `expr $us + 1`
-pkill ggated $conf
-rm -f $conf

Property changes on: projects/clang380-import/tools/regression/geom_gate/test-1.sh
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_gate/test-2.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_gate/test-2.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_gate/test-2.sh	(nonexistent)
@@ -1,28 +0,0 @@
-#!/bin/sh
-# $FreeBSD$
-
-base=`basename $0`
-us=45
-work=`mktemp /tmp/$base.XXXXXX` || exit 1
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-
-dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1
-dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1
-sum=`md5 -q $src`
-
-ggatel create -u $us $work
-
-dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1
-
-if [ `md5 -q $work` != $sum ]; then
-	echo "FAIL"
-else
-	if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then
-		echo "FAIL"
-	else
-		echo "PASS"
-	fi
-fi
-
-ggatel destroy -u $us
-rm -f $work $src

Property changes on: projects/clang380-import/tools/regression/geom_gate/test-2.sh
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_gate/test-3.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_gate/test-3.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_gate/test-3.sh	(nonexistent)
@@ -1,31 +0,0 @@
-#!/bin/sh
-# $FreeBSD$
-
-base=`basename $0`
-us=45
-work="/dev/md${us}"
-src="/dev/md`expr $us + 1`"
-
-mdconfig -a -t malloc -s 1M -u $us || exit 1
-mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1
-dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1
-dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1
-sum=`cat $src | md5 -q`
-
-ggatel create -u $us $work
-
-dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1
-
-if [ `cat $work | md5 -q` != $sum ]; then
-	echo "FAIL"
-else
-	if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then
-		echo "FAIL"
-	else
-		echo "PASS"
-	fi
-fi
-
-ggatel destroy -u $us
-mdconfig -d -u $us
-mdconfig -d -u `expr $us + 1`

Property changes on: projects/clang380-import/tools/regression/geom_gate/test-3.sh
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_gate/Makefile
===================================================================
--- projects/clang380-import/tools/regression/geom_gate/Makefile	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_gate/Makefile	(nonexistent)
@@ -1,8 +0,0 @@
-#
-# $FreeBSD$
-#
-# Regression tests for geom_gate.
-#
-
-test:
-	@sh runtests.sh

Property changes on: projects/clang380-import/tools/regression/geom_gate/Makefile
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_gate/test-1.t
===================================================================
--- projects/clang380-import/tools/regression/geom_gate/test-1.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_gate/test-1.t	(revision 293687)
@@ -1,40 +1,62 @@
 #!/bin/sh
 # $FreeBSD$
 
+. `dirname $0`/conf.sh
+
+echo '1..2'
+
 base=`basename $0`
-us=45
-work="/dev/md${us}"
-src="/dev/md`expr $us + 1`"
-conf=`mktemp /tmp/$base.XXXXXX` || exit 1
+us=0
+while [ -c /dev/ggate${us} ]; do
+	: $(( us += 1 ))
+done
+conf=`mktemp $base.XXXXXX` || exit 1
+pidfile=/var/run/ggated.pid
+port=33080
 
-mdconfig -a -t malloc -s 1M -u $us || exit 1
-mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1
-dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1
-dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1
-sum=`cat $src | md5 -q`
+work=$(attach_md -t malloc -s 1M)
+src=$(attach_md -t malloc -s 1M)
 
-echo "127.0.0.1 RW $work" > $conf
-ggated $conf
-ggatec create -u $us 127.0.0.1 $work
+test_cleanup()
+{
+	ggatec destroy -f -u $us
+	pkill -F $pidfile
+	geom_test_cleanup
+}
+trap test_cleanup ABRT EXIT INT TERM
 
-dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1
+dd if=/dev/random of=/dev/$work bs=1m count=1 conv=sync
+dd if=/dev/random of=/dev/$src bs=1m count=1 conv=sync
+src_checksum=$(md5 -q /dev/$src)
 
-echo '1..2'
+echo "127.0.0.1 RW /dev/$work" > $conf
 
-if [ `cat $work | md5 -q` != $sum ]; then
-	echo 'not ok 1 - md5 checksum'
+if ! ggated -p $port $conf; then
+	echo 'ggated failed to start'
+	echo 'Bail out!'
+	exit 1
+fi
+sleep 1
+if ! ggatec create -p $port -u $us 127.0.0.1 /dev/$work; then
+	echo 'ggatec create failed'
+	echo 'Bail out!'
+	exit 1
+fi
+
+dd if=/dev/${src} of=/dev/ggate${us} bs=1m count=1
+sleep 1
+
+work_checksum=$(md5 -q /dev/$work)
+if [ "$work_checksum" != "$src_checksum" ]; then
+	echo "not ok 1 - md5 checksums didn't match ($work_checksum != $src_checksum)"
+	echo "not ok 2 # SKIP"
 else
 	echo 'ok 1 - md5 checksum'
 
-	if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then
-		echo 'not ok 2 - md5 checksum'
+	ggate_checksum=$(md5 -q /dev/ggate${us})
+	if [ "$ggate_checksum" != "$src_checksum" ]; then
+		echo "not ok 2 - md5 checksums didn't match ($ggate_checksum != $src_checksum)"
 	else
 		echo 'ok 2 - md5 checksum'
 	fi
 fi
-
-ggatec destroy -u $us
-mdconfig -d -u $us
-mdconfig -d -u `expr $us + 1`
-pkill ggated $conf
-rm -f $conf
Index: projects/clang380-import/tools/regression/geom_gate/test-2.t
===================================================================
--- projects/clang380-import/tools/regression/geom_gate/test-2.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_gate/test-2.t	(revision 293687)
@@ -1,31 +1,48 @@
 #!/bin/sh
 # $FreeBSD$
 
+. `dirname $0`/conf.sh
+
 base=`basename $0`
-us=45
-work=`mktemp /tmp/$base.XXXXXX` || exit 1
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
+us=46
+work=`mktemp -u $base.XXXXXX` || exit 1
+src=`mktemp -u $base.XXXXXX` || exit 1
 
-dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1
-dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1
-sum=`md5 -q $src`
+test_cleanup()
+{
+	ggatel destroy -f -u $us
+	rm -f $work $src
 
-ggatel create -u $us $work
+	geom_test_cleanup
+}
+trap test_cleanup ABRT EXIT INT TERM
 
-dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1
+dd if=/dev/random of=$work bs=1m count=1 conv=sync
+dd if=/dev/random of=$src bs=1m count=1 conv=sync
 
+if ! ggatel create -u $us $work; then
+	echo 'ggatel create failed'
+	echo 'Bail out!'
+	exit 1
+fi
+
+dd if=${src} of=/dev/ggate${us} bs=1m count=1
+sleep 1
+
 echo '1..2'
 
-if [ `md5 -q $work` != $sum ]; then
-	echo 'not ok 1 - md5 checksum'
+src_checksum=$(md5 -q $src)
+work_checksum=$(md5 -q $work)
+if [ "$work_checksum" != "$src_checksum" ]; then
+	echo "not ok 1 - md5 checksums didn't match ($work_checksum != $src_checksum) # TODO: bug 204616"
+	echo 'not ok 2 # SKIP'
 else
 	echo 'ok 1 - md5 checksum'
-	if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then
-		echo 'not ok 2 - md5 checksum'
+
+	ggate_checksum=$(md5 -q /dev/ggate${us})
+	if [ "$ggate_checksum" != "$src_checksum" ]; then
+		echo "not ok 2 - md5 checksums didn't match ($ggate_checksum != $src_checksum)"
 	else
 		echo 'ok 2 - md5 checksum'
 	fi
 fi
-
-ggatel destroy -u $us
-rm -f $work $src
Index: projects/clang380-import/tools/regression/geom_gate/test-3.t
===================================================================
--- projects/clang380-import/tools/regression/geom_gate/test-3.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_gate/test-3.t	(revision 293687)
@@ -1,34 +1,48 @@
 #!/bin/sh
 # $FreeBSD$
 
+. `dirname $0`/conf.sh
+
 base=`basename $0`
-us=45
-work="/dev/md${us}"
-src="/dev/md`expr $us + 1`"
+us=47
 
-mdconfig -a -t malloc -s 1M -u $us || exit 1
-mdconfig -a -t malloc -s 1M -u `expr $us + 1` || exit 1
-dd if=/dev/random of=$work bs=1m count=1 >/dev/null 2>&1
-dd if=/dev/random of=$src bs=1m count=1 >/dev/null 2>&1
-sum=`cat $src | md5 -q`
+test_cleanup()
+{
+	ggatel destroy -f -u $us
 
-ggatel create -u $us $work
+	geom_test_cleanup
+}
+trap test_cleanup ABRT EXIT INT TERM
 
-dd if=${src} of=/dev/ggate${us} bs=1m count=1 >/dev/null 2>&1
+work=$(attach_md -t malloc -s 1M)
+src=$(attach_md -t malloc -s 1M)
 
+dd if=/dev/random of=/dev/$work bs=1m count=1 conv=sync
+dd if=/dev/random of=/dev/$src bs=1m count=1 conv=sync
+src_checksum=$(md5 -q /dev/$src)
+
+if ! ggatel create -u $us /dev/$work; then
+	echo 'ggatel create failed'
+	echo 'Bail out!'
+	exit 1
+fi
+
+dd if=/dev/${src} of=/dev/ggate${us} bs=1m count=1 conv=sync
+sleep 1
+
 echo '1..2'
 
-if [ `cat $work | md5 -q` != $sum ]; then
-	echo 'not ok 1 - md5 checksum'
+work_checksum=$(md5 -q /dev/$work)
+if [ "$work_checksum" != "$src_checksum" ]; then
+	echo "not ok 1 - md5 checksums didn't match ($work_checksum != $src_checksum)"
+	echo 'not ok 2 # SKIP'
 else
 	echo 'ok 1 - md5 checksum'
-	if [ `cat /dev/ggate${us} | md5 -q` != $sum ]; then
-		echo 'not ok 2 - md5 checksum'
+
+	ggate_checksum=$(md5 -q /dev/ggate${us})
+	if [ "$ggate_checksum" != "$src_checksum" ]; then
+		echo "not ok 2 - md5 checksums didn't match ($ggate_checksum != $src_checksum)"
 	else
 		echo 'ok 2 - md5 checksum'
 	fi
 fi
-
-ggatel destroy -u $us
-mdconfig -d -u $us
-mdconfig -d -u `expr $us + 1`
Index: projects/clang380-import/tools/regression/geom_nop/conf.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_nop/conf.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_nop/conf.sh	(revision 293687)
@@ -1,7 +1,14 @@
 #!/bin/sh
 # $FreeBSD$
 
 class="nop"
 base=`basename $0`
 
+gnop_test_cleanup()
+{
+	[ -c /dev/${us}.nop ] && gnop destroy ${us}.nop
+	geom_test_cleanup
+}
+trap gnop_test_cleanup ABRT EXIT INT TERM
+
 . `dirname $0`/../geom_subr.sh
Index: projects/clang380-import/tools/regression/geom_nop/test-1.t
===================================================================
--- projects/clang380-import/tools/regression/geom_nop/test-1.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_nop/test-1.t	(revision 293687)
@@ -1,25 +1,20 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us=45
+us=$(attach_md -t malloc -s 1M) || exit 1
 
-mdconfig -a -t malloc -s 1M -u $us || exit 1
+gnop create /dev/${us} || exit 1
 
-gnop create /dev/md${us} || exit 1
-
 # Size of created device should be 1MB.
 
-size=`diskinfo /dev/md${us}.nop | awk '{print $3}'`
+size=`diskinfo /dev/${us}.nop | awk '{print $3}'`
 
 if [ $size -eq 1048576 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
-
-gnop destroy md${us}.nop
-mdconfig -d -u $us
Index: projects/clang380-import/tools/regression/geom_nop/test-2.t
===================================================================
--- projects/clang380-import/tools/regression/geom_nop/test-2.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_nop/test-2.t	(revision 293687)
@@ -1,29 +1,26 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
-us=45
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
 echo "1..1"
 
 dd if=/dev/random of=${src} bs=1m count=1 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s 1M -u $us || exit 1
+us=$(attach_md -t malloc -s 1M) || exit 1
 
-gnop create /dev/md${us} || exit 1
+gnop create /dev/${us} || exit 1
 
-dd if=${src} of=/dev/md${us}.nop bs=1m count=1 >/dev/null 2>&1
-dd if=/dev/md${us}.nop of=${dst} bs=1m count=1 >/dev/null 2>&1
+dd if=${src} of=/dev/${us}.nop bs=1m count=1 >/dev/null 2>&1
+dd if=/dev/${us}.nop of=${dst} bs=1m count=1 >/dev/null 2>&1
 
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-gnop destroy md${us}.nop
-mdconfig -d -u $us
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_raid3/conf.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/conf.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/conf.sh	(revision 293687)
@@ -1,8 +1,15 @@
 #!/bin/sh
 # $FreeBSD$
 
 name="$(mktemp -u graid3.XXXXXX)"
 class="raid3"
 base=`basename $0`
 
+graid3_test_cleanup()
+{
+	[ -c /dev/$class/$name ] && graid3 stop $name
+	geom_test_cleanup
+}
+trap graid3_test_cleanup ABRT EXIT INT TERM
+
 . `dirname $0`/../geom_subr.sh
Index: projects/clang380-import/tools/regression/geom_raid3/test-1.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-1.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-1.t	(revision 293687)
@@ -1,37 +1,28 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..2"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
+us0=$(attach_md -t malloc -s 1M) || exit 1
+us1=$(attach_md -t malloc -s 2M) || exit 1
+us2=$(attach_md -t malloc -s 3M) || exit 1
 
-mdconfig -a -t malloc -s 1M -u $us0 || exit 1
-mdconfig -a -t malloc -s 2M -u $us1 || exit 1
-mdconfig -a -t malloc -s 3M -u $us2 || exit 1
-
-graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} 2>/dev/null || exit 1
+graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} 2>/dev/null || exit 1
 devwait
 
 # Size of created device should be 2MB - 1024B.
 
 mediasize=`diskinfo /dev/raid3/${name} | awk '{print $3}'`
 if [ $mediasize -eq 2096128 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 sectorsize=`diskinfo /dev/raid3/${name} | awk '{print $2}'`
 if [ $sectorsize -eq 1024 ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
-
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
Index: projects/clang380-import/tools/regression/geom_raid3/test-10.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-10.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-10.t	(revision 293687)
@@ -1,39 +1,32 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 ddbs=2048
 nblocks1=1024
 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+
 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1
-
-graid3 label -r $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label -r $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 
 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_raid3/test-11.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-11.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-11.t	(revision 293687)
@@ -1,39 +1,32 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 ddbs=2048
 nblocks1=1024
 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+
 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1
-
-graid3 label -w $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label -w $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 
 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_raid3/test-12.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-12.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-12.t	(revision 293687)
@@ -1,46 +1,38 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 nblocks1=9
 nblocks2=`expr $nblocks1 - 1`
 nblocks3=`expr $nblocks2 / 2`
 
-mdconfig -a -t malloc -s $nblocks1 -u $us0 || exit 1
-mdconfig -a -t malloc -s $nblocks1 -u $us1 || exit 1
-mdconfig -a -t malloc -s $nblocks1 -u $us2 || exit 1
+us0=$(attach_md -t malloc -s $nblocks1) || exit 1
+us1=$(attach_md -t malloc -s $nblocks1) || exit 1
+us2=$(attach_md -t malloc -s $nblocks1) || exit 1
 
-dd if=/dev/random of=/dev/md${us0} count=$nblocks1 >/dev/null 2>&1
-dd if=/dev/random of=/dev/md${us1} count=$nblocks1 >/dev/null 2>&1
-dd if=/dev/random of=/dev/md${us2} count=$nblocks1 >/dev/null 2>&1
+dd if=/dev/random of=/dev/${us0} count=$nblocks1 >/dev/null 2>&1
+dd if=/dev/random of=/dev/${us1} count=$nblocks1 >/dev/null 2>&1
+dd if=/dev/random of=/dev/${us2} count=$nblocks1 >/dev/null 2>&1
 
-graid3 label -w $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label -w $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 # Wait for synchronization.
 sleep 2
 graid3 stop $name
 # Break one component.
-dd if=/dev/random of=/dev/md${us1} count=$nblocks2 >/dev/null 2>&1
+dd if=/dev/random of=/dev/${us1} count=$nblocks2 >/dev/null 2>&1
 # Provoke retaste of the rest components.
-true > /dev/md${us0}
-true > /dev/md${us2}
+true > /dev/${us0}
+true > /dev/${us2}
 sleep 1
 
 dd if=/dev/raid3/${name} of=/dev/null bs=1k count=$nblocks3 >/dev/null 2>&1
 ec=$?
 if [ $ec -eq 0 ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
-
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
Index: projects/clang380-import/tools/regression/geom_raid3/test-2.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-2.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-2.t	(revision 293687)
@@ -1,39 +1,32 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 ddbs=2048
 nblocks1=1024
 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+
 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1
-
-graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 
 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_raid3/test-3.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-3.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-3.t	(revision 293687)
@@ -1,43 +1,36 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 ddbs=2048
 nblocks1=1024
 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+
 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1
-
-graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 
 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
 #
 # Reading without one DATA component (so with parity).
 #
 graid3 remove -n 1 $name
 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_raid3/test-4.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-4.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-4.t	(revision 293687)
@@ -1,43 +1,36 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 ddbs=2048
 nblocks1=1024
 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+
 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1
-
-graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 
 #
 # Writing without one DATA component.
 #
 graid3 remove -n 1 $name
 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_raid3/test-5.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-5.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-5.t	(revision 293687)
@@ -1,43 +1,36 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 ddbs=2048
 nblocks1=1024
 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+
 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1
-
-graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 
 #
 # Writing without PARITY component.
 #
 graid3 remove -n 2 $name
 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_raid3/test-6.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-6.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-6.t	(revision 293687)
@@ -1,47 +1,40 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 ddbs=2048
 nblocks1=1024
 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+
 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1
-
-graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 
 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
 #
 # Rebuild of DATA component.
 #
 graid3 remove -n 1 $name
-dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
+dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
 graid3 insert -n 1 $name md${us1}
 sleep 1
 
 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_raid3/test-7.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-7.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-7.t	(revision 293687)
@@ -1,50 +1,43 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 ddbs=2048
 nblocks1=1024
 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+
 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1
-
-graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 
 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
 #
 # Rebuild of PARITY component.
 #
 graid3 remove -n 2 $name
-dd if=/dev/zero of=/dev/md${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
+dd if=/dev/zero of=/dev/${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
 graid3 insert -n 2 $name md${us2}
 sleep 1
 # Remove DATA component, so PARITY component can be used while reading.
 graid3 remove -n 1 $name
-dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
+dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
 
 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_raid3/test-8.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-8.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-8.t	(revision 293687)
@@ -1,46 +1,39 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 ddbs=2048
 nblocks1=1024
 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+
 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1
-
-graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 
 #
 # Writing without DATA component and rebuild of DATA component.
 #
 graid3 remove -n 1 $name
-dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
+dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 graid3 insert -n 1 $name md${us1}
 sleep 1
 
 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_raid3/test-9.t
===================================================================
--- projects/clang380-import/tools/regression/geom_raid3/test-9.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_raid3/test-9.t	(revision 293687)
@@ -1,49 +1,42 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 ddbs=2048
 nblocks1=1024
 nblocks2=`expr $nblocks1 / \( $ddbs / 512 \)`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
+us0=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us1=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+us2=$(attach_md -t malloc -s $(expr $nblocks1 + 1)) || exit 1
+
 dd if=/dev/random of=${src} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us0 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us1 || exit 1
-mdconfig -a -t malloc -s `expr $nblocks1 + 1` -u $us2 || exit 1
-
-graid3 label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+graid3 label $name /dev/${us0} /dev/${us1} /dev/${us2} || exit 1
 devwait
 
 #
 # Writing without PARITY component and rebuild of PARITY component.
 #
 graid3 remove -n 2 $name
-dd if=/dev/zero of=/dev/md${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
+dd if=/dev/zero of=/dev/${us2} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
 dd if=${src} of=/dev/raid3/${name} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 graid3 insert -n 2 $name md${us2}
 sleep 1
 # Remove DATA component, so PARITY component can be used while reading.
 graid3 remove -n 1 $name
-dd if=/dev/zero of=/dev/md${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
+dd if=/dev/zero of=/dev/${us1} bs=512 count=`expr $nblocks1 + 1` >/dev/null 2>&1
 
 dd if=/dev/raid3/${name} of=${dst} bs=$ddbs count=$nblocks2 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-graid3 stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_shsec/conf.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_shsec/conf.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_shsec/conf.sh	(revision 293687)
@@ -1,8 +1,15 @@
 #!/bin/sh
 # $FreeBSD$
 
 name="$(mktemp -u shsec.XXXXXX)"
 class="shsec"
 base=`basename $0`
 
+shsec_test_cleanup()
+{
+	[ -c /dev/$class/$name ] && gshsec stop $name
+	geom_test_cleanup
+}
+trap shsec_test_cleanup ABRT EXIT INT TERM
+
 . `dirname $0`/../geom_subr.sh
Index: projects/clang380-import/tools/regression/geom_shsec/test-1.t
===================================================================
--- projects/clang380-import/tools/regression/geom_shsec/test-1.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_shsec/test-1.t	(revision 293687)
@@ -1,37 +1,28 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..2"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
+us0=$(attach_md -t malloc -s 1M) || exit 1
+us1=$(attach_md -t malloc -s 2M) || exit 1
+us2=$(attach_md -t malloc -s 3M) || exit 1
 
-mdconfig -a -t malloc -s 1M -u $us0 || exit 1
-mdconfig -a -t malloc -s 2M -u $us1 || exit 1
-mdconfig -a -t malloc -s 3M -u $us2 || exit 1
-
-gshsec label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} 2>/dev/null || exit 1
+gshsec label $name /dev/${us0} /dev/${us1} /dev/${us2} 2>/dev/null || exit 1
 devwait
 
 # Size of created device should be 1MB - 512B.
 
 mediasize=`diskinfo /dev/shsec/${name} | awk '{print $3}'`
 if [ $mediasize -eq 1048064 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
 sectorsize=`diskinfo /dev/shsec/${name} | awk '{print $2}'`
 if [ $sectorsize -eq 512 ]; then
 	echo "ok 2"
 else
 	echo "not ok 2"
 fi
-
-gshsec stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
Index: projects/clang380-import/tools/regression/geom_shsec/test-2.t
===================================================================
--- projects/clang380-import/tools/regression/geom_shsec/test-2.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_shsec/test-2.t	(revision 293687)
@@ -1,59 +1,52 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..4"
 
-us0=45
-us1=`expr $us0 + 1`
-us2=`expr $us0 + 2`
 nblocks1=1024
 nblocks2=`expr $nblocks1 + 1`
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
 dd if=/dev/random of=${src} count=$nblocks1 >/dev/null 2>&1
 
-mdconfig -a -t malloc -s $nblocks2 -u $us0 || exit 1
-mdconfig -a -t malloc -s $nblocks2 -u $us1 || exit 1
-mdconfig -a -t malloc -s $nblocks2 -u $us2 || exit 1
+us0=$(attach_md -t malloc -s $nblocks2) || exit 1
+us1=$(attach_md -t malloc -s $nblocks2) || exit 1
+us2=$(attach_md -t malloc -s $nblocks2) || exit 1
 
-gshsec label $name /dev/md${us0} /dev/md${us1} /dev/md${us2} || exit 1
+gshsec label $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1
 devwait
 
 dd if=${src} of=/dev/shsec/${name} count=$nblocks1 >/dev/null 2>&1
 
 dd if=/dev/shsec/${name} of=${dst} count=$nblocks1 >/dev/null 2>&1
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-dd if=/dev/md${us0} of=${dst} count=$nblocks1 >/dev/null 2>&1
+dd if=/dev/${us0} of=${dst} count=$nblocks1 >/dev/null 2>&1
 if [ `md5 -q ${src}` = `md5 -q ${dst}` ]; then
 	echo "not ok 2"
 else
 	echo "ok 2"
 fi
 
-dd if=/dev/md${us1} of=${dst} count=$nblocks1 >/dev/null 2>&1
+dd if=/dev/${us1} of=${dst} count=$nblocks1 >/dev/null 2>&1
 if [ `md5 -q ${src}` = `md5 -q ${dst}` ]; then
 	echo "not ok 3"
 else
 	echo "ok 3"
 fi
 
-dd if=/dev/md${us2} of=${dst} count=$nblocks1 >/dev/null 2>&1
+dd if=/dev/${us2} of=${dst} count=$nblocks1 >/dev/null 2>&1
 if [ `md5 -q ${src}` = `md5 -q ${dst}` ]; then
 	echo "not ok 4"
 else
 	echo "ok 4"
 fi
 
-gshsec stop $name
-mdconfig -d -u $us0
-mdconfig -d -u $us1
-mdconfig -d -u $us2
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_stripe/conf.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_stripe/conf.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_stripe/conf.sh	(revision 293687)
@@ -1,8 +1,15 @@
 #!/bin/sh
 # $FreeBSD$
 
 name="$(mktemp -u stripe.XXXXXX)"
 class="stripe"
 base=`basename $0`
 
+gstripe_test_cleanup()
+{
+	[ -c /dev/$class/$name ] && gstripe destroy $name
+	geom_test_cleanup
+}
+trap gstripe_test_cleanup ABRT EXIT INT TERM
+
 . `dirname $0`/../geom_subr.sh
Index: projects/clang380-import/tools/regression/geom_stripe/test-1.t
===================================================================
--- projects/clang380-import/tools/regression/geom_stripe/test-1.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_stripe/test-1.t	(revision 293687)
@@ -1,30 +1,23 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us=45
+us0=$(attach_md -t malloc -s 1M) || exit 1
+us1=$(attach_md -t malloc -s 2M) || exit 1
+us2=$(attach_md -t malloc -s 3M) || exit 1
 
-mdconfig -a -t malloc -s 1M -u $us || exit 1
-mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1
-mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1
-
-gstripe create -s 16384 $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1
+gstripe create -s 16384 $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1
 devwait
 
 # Size of created device should be 1MB * 3.
 
 size=`diskinfo /dev/stripe/${name} | awk '{print $3}'`
 
 if [ $size -eq 3145728 ]; then
 	echo "ok 1"
 else
 	echo "not ok 1"
 fi
-
-gstripe destroy $name
-mdconfig -d -u $us
-mdconfig -d -u `expr $us + 1`
-mdconfig -d -u `expr $us + 2`
Index: projects/clang380-import/tools/regression/geom_stripe/test-2.t
===================================================================
--- projects/clang380-import/tools/regression/geom_stripe/test-2.t	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_stripe/test-2.t	(revision 293687)
@@ -1,35 +1,30 @@
 #!/bin/sh
 # $FreeBSD$
 
 . `dirname $0`/conf.sh
 
 echo "1..1"
 
-us=45
 tsize=3
-src=`mktemp /tmp/$base.XXXXXX` || exit 1
-dst=`mktemp /tmp/$base.XXXXXX` || exit 1
+src=`mktemp $base.XXXXXX` || exit 1
+dst=`mktemp $base.XXXXXX` || exit 1
 
 dd if=/dev/random of=${src} bs=1m count=$tsize >/dev/null 2>&1
 
-mdconfig -a -t malloc -s 1M -u $us || exit 1
-mdconfig -a -t malloc -s 2M -u `expr $us + 1` || exit 1
-mdconfig -a -t malloc -s 3M -u `expr $us + 2` || exit 1
+us0=$(attach_md -t malloc -s 1M) || exit 1
+us1=$(attach_md -t malloc -s 2M) || exit 1
+us2=$(attach_md -t malloc -s 3M) || exit 1
 
-gstripe create -s 8192 $name /dev/md${us} /dev/md`expr $us + 1` /dev/md`expr $us + 2` || exit 1
+gstripe create -s 8192 $name /dev/$us0 /dev/$us1 /dev/$us2 || exit 1
 devwait
 
 dd if=${src} of=/dev/stripe/${name} bs=1m count=$tsize >/dev/null 2>&1
 dd if=/dev/stripe/${name} of=${dst} bs=1m count=$tsize >/dev/null 2>&1
 
 if [ `md5 -q ${src}` != `md5 -q ${dst}` ]; then
 	echo "not ok 1"
 else
 	echo "ok 1"
 fi
 
-gstripe destroy $name
-mdconfig -d -u $us
-mdconfig -d -u `expr $us + 1`
-mdconfig -d -u `expr $us + 2`
 rm -f ${src} ${dst}
Index: projects/clang380-import/tools/regression/geom_subr.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_subr.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_subr.sh	(revision 293687)
@@ -1,49 +1,60 @@
 #!/bin/sh
 # $FreeBSD$
 
-if [ $(id -u) -ne 0 ]; then
-	echo 'Tests must be run as root'
-	echo 'Bail out!'
-	exit 1
-fi
-kldstat -q -m g_${class} || geom ${class} load || exit 1
-
 devwait()
 {
 	while :; do
 		if [ -c /dev/${class}/${name} ]; then
 			return
 		fi
 		sleep 0.2
 	done
 }
 
-# Need to keep track of the test md devices to avoid the scenario where a test
-# failing will cause the other tests to bomb out, or a test failing will leave
-# a large number of md(4) devices lingering around
-: ${TMPDIR=/tmp}
-export TMPDIR
-TEST_MDS_FILE=$(mktemp ${TMPDIR}/test_mds.XXXXXX) || exit 1
-
 attach_md()
 {
 	local test_md
 
 	test_md=$(mdconfig -a "$@") || exit
 	echo $test_md >> $TEST_MDS_FILE || exit
 	echo $test_md
 }
 
 geom_test_cleanup()
 {
 	local test_md
 
-	if [ -f $TEST_MDS_FILE ]; then
+	if [ -f "$TEST_MDS_FILE" ]; then
 		while read test_md; do
 			# The "#" tells the TAP parser this is a comment
 			echo "# Removing test memory disk: $test_md"
 			mdconfig -d -u $test_md
 		done < $TEST_MDS_FILE
 	fi
-	rm -f $TEST_MDS_FILE
+	rm -f "$TEST_MDS_FILE"
 }
+
+if [ $(id -u) -ne 0 ]; then
+	echo 'Tests must be run as root'
+	echo 'Bail out!'
+	exit 1
+fi
+# If the geom class isn't already loaded, try loading it.
+if ! kldstat -q -m g_${class}; then
+	if ! geom ${class} load; then
+		echo "Could not load module for geom class=${class}"
+		echo 'Bail out!'
+		exit 1
+	fi
+fi
+
+# Need to keep track of the test md devices to avoid the scenario where a test
+# failing will cause the other tests to bomb out, or a test failing will leave
+# a large number of md(4) devices lingering around
+: ${TMPDIR=/tmp}
+export TMPDIR
+if ! TEST_MDS_FILE=$(mktemp ${TMPDIR}/test_mds.XXXXXX); then
+	echo 'Failed to create temporary file for tracking the test md(4) devices'
+	echo 'Bail out!'
+	exit 1
+fi
Index: projects/clang380-import/tools/regression/geom_uzip/runtests.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_uzip/runtests.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_uzip/runtests.sh	(nonexistent)
@@ -1,10 +0,0 @@
-#!/bin/sh
-#
-# $FreeBSD$
-#
-
-dir=`dirname $0`
-
-for ts in `dirname $0`/test-*.sh; do
-	sh $ts
-done

Property changes on: projects/clang380-import/tools/regression/geom_uzip/runtests.sh
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_uzip/test-1.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_uzip/test-1.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_uzip/test-1.sh	(nonexistent)
@@ -1,36 +0,0 @@
-#!/bin/sh
-#
-# $FreeBSD$
-#
-
-mntpoint="/mnt/test-1"
-
-#
-# prepare
-kldload geom_uzip
-uudecode test-1.img.uzip.uue
-num=`mdconfig -an -f test-1.img.uzip` || exit 1
-sleep 1
-
-#
-# mount
-mkdir -p "${mntpoint}"
-mount -o ro /dev/md${num}.uzip "${mntpoint}" || exit 1
-
-#
-# compare
-#cat "${mntpoint}/etalon.txt"
-diff -u etalon/etalon.txt "${mntpoint}/etalon.txt"
-if [ $? -eq 0 ]; then
-	echo "PASS"
-else
-	echo "FAIL"
-fi
-
-#
-# cleanup
-umount "${mntpoint}"
-rmdir "${mntpoint}"
-mdconfig -d -u ${num}
-sleep 1
-kldunload geom_uzip

Property changes on: projects/clang380-import/tools/regression/geom_uzip/test-1.sh
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_uzip/test-2.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_uzip/test-2.sh	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_uzip/test-2.sh	(nonexistent)
@@ -1,15 +0,0 @@
-#!/bin/sh
-#
-# $FreeBSD$
-#
-
-#
-# prepare
-kldload geom_uzip
-uudecode test-1.img.uzip.uue
-num=`mdconfig -an -f test-1.img.uzip` || exit 1
-sleep 1
-
-#
-# destroy
-kldunload geom_uzip

Property changes on: projects/clang380-import/tools/regression/geom_uzip/test-2.sh
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_uzip/Makefile
===================================================================
--- projects/clang380-import/tools/regression/geom_uzip/Makefile	(revision 293686)
+++ projects/clang380-import/tools/regression/geom_uzip/Makefile	(revision 293687)
@@ -1,23 +1,23 @@
 #
 # $FreeBSD$
 #
 # Regression test for geom_ugz.
 #
 
 IMAGE=		test-1.img
 ZIMAGE=		${IMAGE}.uzip
 UZIMAGE=	${ZIMAGE}.uue
 
 test:
-	@sh runtests.sh
+	prove -rv ./test-1.t
 
 image:
 	makefs -s 1048576 ${IMAGE} etalon
 	printf "#\n# $$" >${UZIMAGE}
 	printf "FreeBSD$$\n#\n\n" >> ${UZIMAGE}
 	mkuzip -o ${ZIMAGE} ${IMAGE}
 	uuencode ${ZIMAGE} ${ZIMAGE} >>${UZIMAGE}
 	rm ${ZIMAGE}
 
 clean:
 	rm -f ${IMAGE} ${ZIMAGE}
Index: projects/clang380-import/tools/regression/geom_uzip/conf.sh
===================================================================
--- projects/clang380-import/tools/regression/geom_uzip/conf.sh	(nonexistent)
+++ projects/clang380-import/tools/regression/geom_uzip/conf.sh	(revision 293687)
@@ -0,0 +1,20 @@
+#!/bin/sh
+# $FreeBSD$
+
+class="uzip"
+base=`basename $0`
+
+uzip_test_cleanup()
+{
+	if [ -n "$mntpoint" ]; then
+		umount $mntpoint
+		rmdir $mntpoint
+	fi
+	geom_test_cleanup
+}
+trap uzip_test_cleanup ABRT EXIT INT TERM
+
+. `dirname $0`/../geom_subr.sh
+
+# NOTE: make sure $TMPDIR has been set by geom_subr.sh if unset [by kyua, etc]
+mntpoint=$(mktemp -d tmp.XXXXXX) || exit

Property changes on: projects/clang380-import/tools/regression/geom_uzip/conf.sh
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/clang380-import/tools/regression/geom_uzip/test-1.t
===================================================================
--- projects/clang380-import/tools/regression/geom_uzip/test-1.t	(nonexistent)
+++ projects/clang380-import/tools/regression/geom_uzip/test-1.t	(revision 293687)
@@ -0,0 +1,22 @@
+#!/bin/sh
+# $FreeBSD$
+
+testsdir=$(dirname $0)
+. $testsdir/conf.sh
+
+echo "1..1"
+
+UUE=$testsdir/test-1.img.uzip.uue
+uudecode $UUE
+us0=$(attach_md -f $(basename $UUE .uue)) || exit 1
+sleep 1
+
+mount -o ro /dev/${us0}.uzip "${mntpoint}" || exit 1
+
+#cat "${mntpoint}/etalon.txt"
+diff -I '\$FreeBSD.*\$' -u $testsdir/etalon/etalon.txt "${mntpoint}/etalon.txt"
+if [ $? -eq 0 ]; then
+	echo "ok 1"
+else
+	echo "not ok 1"
+fi

Property changes on: projects/clang380-import/tools/regression/geom_uzip/test-1.t
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: projects/clang380-import/usr.bin/netstat/mbuf.c
===================================================================
--- projects/clang380-import/usr.bin/netstat/mbuf.c	(revision 293686)
+++ projects/clang380-import/usr.bin/netstat/mbuf.c	(revision 293687)
@@ -1,339 +1,359 @@
 /*-
  * Copyright (c) 1983, 1988, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2005 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)mbuf.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/sf_buf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 
 #include <err.h>
 #include <kvm.h>
 #include <memstat.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <string.h>
 #include <libxo/xo.h>
 #include "netstat.h"
 
 /*
  * Print mbuf statistics.
  */
 void
 mbpr(void *kvmd, u_long mbaddr)
 {
 	struct memory_type_list *mtlp;
 	struct memory_type *mtp;
 	uintmax_t mbuf_count, mbuf_bytes, mbuf_free, mbuf_failures, mbuf_size;
 	uintmax_t mbuf_sleeps;
 	uintmax_t cluster_count, cluster_limit, cluster_free;
 	uintmax_t cluster_failures, cluster_size, cluster_sleeps;
 	uintmax_t packet_count, packet_bytes, packet_free, packet_failures;
 	uintmax_t packet_sleeps;
 	uintmax_t tag_bytes;
 	uintmax_t jumbop_count, jumbop_limit, jumbop_free;
 	uintmax_t jumbop_failures, jumbop_sleeps, jumbop_size;
 	uintmax_t jumbo9_count, jumbo9_limit, jumbo9_free;
 	uintmax_t jumbo9_failures, jumbo9_sleeps, jumbo9_size;
 	uintmax_t jumbo16_count, jumbo16_limit, jumbo16_free;
 	uintmax_t jumbo16_failures, jumbo16_sleeps, jumbo16_size;
 	uintmax_t bytes_inuse, bytes_incache, bytes_total;
 	int nsfbufs, nsfbufspeak, nsfbufsused;
 	struct sfstat sfstat;
 	size_t mlen;
 	int error;
 
 	mtlp = memstat_mtl_alloc();
 	if (mtlp == NULL) {
 		xo_warn("memstat_mtl_alloc");
 		return;
 	}
 
 	/*
 	 * Use memstat_*_all() because some mbuf-related memory is in uma(9),
 	 * and some malloc(9).
 	 */
 	if (live) {
 		if (memstat_sysctl_all(mtlp, 0) < 0) {
 			xo_warnx("memstat_sysctl_all: %s",
 			    memstat_strerror(memstat_mtl_geterror(mtlp)));
 			goto out;
 		}
 	} else {
 		if (memstat_kvm_all(mtlp, kvmd) < 0) {
 			error = memstat_mtl_geterror(mtlp);
 			if (error == MEMSTAT_ERROR_KVM)
 				xo_warnx("memstat_kvm_all: %s",
 				    kvm_geterr(kvmd));
 			else
 				xo_warnx("memstat_kvm_all: %s",
 				    memstat_strerror(error));
 			goto out;
 		}
 	}
 
 	mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_MEM_NAME);
 	if (mtp == NULL) {
 		xo_warnx("memstat_mtl_find: zone %s not found", MBUF_MEM_NAME);
 		goto out;
 	}
 	mbuf_count = memstat_get_count(mtp);
 	mbuf_bytes = memstat_get_bytes(mtp);
 	mbuf_free = memstat_get_free(mtp);
 	mbuf_failures = memstat_get_failures(mtp);
 	mbuf_sleeps = memstat_get_sleeps(mtp);
 	mbuf_size = memstat_get_size(mtp);
 
 	mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_PACKET_MEM_NAME);
 	if (mtp == NULL) {
 		xo_warnx("memstat_mtl_find: zone %s not found",
 		    MBUF_PACKET_MEM_NAME);
 		goto out;
 	}
 	packet_count = memstat_get_count(mtp);
 	packet_bytes = memstat_get_bytes(mtp);
 	packet_free = memstat_get_free(mtp);
 	packet_sleeps = memstat_get_sleeps(mtp);
 	packet_failures = memstat_get_failures(mtp);
 
 	mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_CLUSTER_MEM_NAME);
 	if (mtp == NULL) {
 		xo_warnx("memstat_mtl_find: zone %s not found",
 		    MBUF_CLUSTER_MEM_NAME);
 		goto out;
 	}
 	cluster_count = memstat_get_count(mtp);
 	cluster_limit = memstat_get_countlimit(mtp);
 	cluster_free = memstat_get_free(mtp);
 	cluster_failures = memstat_get_failures(mtp);
 	cluster_sleeps = memstat_get_sleeps(mtp);
 	cluster_size = memstat_get_size(mtp);
 
 	mtp = memstat_mtl_find(mtlp, ALLOCATOR_MALLOC, MBUF_TAG_MEM_NAME);
 	if (mtp == NULL) {
 		xo_warnx("memstat_mtl_find: malloc type %s not found",
 		    MBUF_TAG_MEM_NAME);
 		goto out;
 	}
 	tag_bytes = memstat_get_bytes(mtp);
 
 	mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBOP_MEM_NAME);
 	if (mtp == NULL) {
 		xo_warnx("memstat_mtl_find: zone %s not found",
 		    MBUF_JUMBOP_MEM_NAME);
 		goto out;
 	}
 	jumbop_count = memstat_get_count(mtp);
 	jumbop_limit = memstat_get_countlimit(mtp);
 	jumbop_free = memstat_get_free(mtp);
 	jumbop_failures = memstat_get_failures(mtp);
 	jumbop_sleeps = memstat_get_sleeps(mtp);
 	jumbop_size = memstat_get_size(mtp);
 
 	mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBO9_MEM_NAME);
 	if (mtp == NULL) {
 		xo_warnx("memstat_mtl_find: zone %s not found",
 		    MBUF_JUMBO9_MEM_NAME);
 		goto out;
 	}
 	jumbo9_count = memstat_get_count(mtp);
 	jumbo9_limit = memstat_get_countlimit(mtp);
 	jumbo9_free = memstat_get_free(mtp);
 	jumbo9_failures = memstat_get_failures(mtp);
 	jumbo9_sleeps = memstat_get_sleeps(mtp);
 	jumbo9_size = memstat_get_size(mtp);
 
 	mtp = memstat_mtl_find(mtlp, ALLOCATOR_UMA, MBUF_JUMBO16_MEM_NAME);
 	if (mtp == NULL) {
 		xo_warnx("memstat_mtl_find: zone %s not found",
 		    MBUF_JUMBO16_MEM_NAME);
 		goto out;
 	}
 	jumbo16_count = memstat_get_count(mtp);
 	jumbo16_limit = memstat_get_countlimit(mtp);
 	jumbo16_free = memstat_get_free(mtp);
 	jumbo16_failures = memstat_get_failures(mtp);
 	jumbo16_sleeps = memstat_get_sleeps(mtp);
 	jumbo16_size = memstat_get_size(mtp);
 
 	xo_open_container("mbuf-statistics");
 
 	xo_emit("{:mbuf-current/%ju}/{:mbuf-cache/%ju}/{:mbuf-total/%ju} "
 	    "{N:mbufs in use (current\\/cache\\/total)}\n",
 	    mbuf_count + packet_count, mbuf_free + packet_free,
 	    mbuf_count + packet_count + mbuf_free + packet_free);
 
 	xo_emit("{:cluster-current/%ju}/{:cluster-cache/%ju}/"
 	    "{:cluster-total/%ju}/{:cluster-max/%ju} "
 	    "{N:mbuf clusters in use (current\\/cache\\/total\\/max)}\n",
 	    cluster_count - packet_free, cluster_free + packet_free,
 	    cluster_count + cluster_free, cluster_limit);
 
 	xo_emit("{:packet-count/%ju}/{:packet-free/%ju} "
 	    "{N:mbuf+clusters out of packet secondary zone in use "
 	    "(current\\/cache)}\n",
 	    packet_count, packet_free);
 
 	xo_emit("{:jumbo-count/%ju}/{:jumbo-cache/%ju}/{:jumbo-total/%ju}/"
 	    "{:jumbo-max/%ju} {:jumbo-page-size/%ju}{U:k} {N:(page size)} "
 	    "{N:jumbo clusters in use (current\\/cache\\/total\\/max)}\n",
 	    jumbop_count, jumbop_free, jumbop_count + jumbop_free,
 	    jumbop_limit, jumbop_size / 1024);
 
 	xo_emit("{:jumbo9-count/%ju}/{:jumbo9-cache/%ju}/"
 	    "{:jumbo9-total/%ju}/{:jumbo9-max/%ju} "
 	    "{N:9k jumbo clusters in use (current\\/cache\\/total\\/max)}\n",
 	    jumbo9_count, jumbo9_free, jumbo9_count + jumbo9_free,
 	    jumbo9_limit);
 
 	xo_emit("{:jumbo16-count/%ju}/{:jumbo16-cache/%ju}/"
 	    "{:jumbo16-total/%ju}/{:jumbo16-limit/%ju} "
 	    "{N:16k jumbo clusters in use (current\\/cache\\/total\\/max)}\n",
 	    jumbo16_count, jumbo16_free, jumbo16_count + jumbo16_free,
 	    jumbo16_limit);
 
 #if 0
 	xo_emit("{:tag-count/%ju} {N:mbuf tags in use}\n", tag_count);
 #endif
 
 	/*-
 	 * Calculate in-use bytes as:
 	 * - straight mbuf memory
 	 * - mbuf memory in packets
 	 * - the clusters attached to packets
 	 * - and the rest of the non-packet-attached clusters.
 	 * - m_tag memory
 	 * This avoids counting the clusters attached to packets in the cache.
 	 * This currently excludes sf_buf space.
 	 */
 	bytes_inuse =
 	    mbuf_bytes +			/* straight mbuf memory */
 	    packet_bytes +			/* mbufs in packets */
 	    (packet_count * cluster_size) +	/* clusters in packets */
 	    /* other clusters */
 	    ((cluster_count - packet_count - packet_free) * cluster_size) +
 	    tag_bytes +
 	    (jumbop_count * jumbop_size) +	/* jumbo clusters */
 	    (jumbo9_count * jumbo9_size) +
 	    (jumbo16_count * jumbo16_size);
 
 	/*
 	 * Calculate in-cache bytes as:
 	 * - cached straught mbufs
 	 * - cached packet mbufs
 	 * - cached packet clusters
 	 * - cached straight clusters
 	 * This currently excludes sf_buf space.
 	 */
 	bytes_incache =
 	    (mbuf_free * mbuf_size) +		/* straight free mbufs */
 	    (packet_free * mbuf_size) +		/* mbufs in free packets */
 	    (packet_free * cluster_size) +	/* clusters in free packets */
 	    (cluster_free * cluster_size) +	/* free clusters */
 	    (jumbop_free * jumbop_size) +	/* jumbo clusters */
 	    (jumbo9_free * jumbo9_size) +
 	    (jumbo16_free * jumbo16_size);
 
 	/*
 	 * Total is bytes in use + bytes in cache.  This doesn't take into
 	 * account various other misc data structures, overhead, etc, but
 	 * gives the user something useful despite that.
 	 */
 	bytes_total = bytes_inuse + bytes_incache;
 
 	xo_emit("{:bytes-in-use/%ju}{U:K}/{:bytes-in-cache/%ju}{U:K}/"
 	    "{:bytes-total/%ju}{U:K} "
 	    "{N:bytes allocated to network (current\\/cache\\/total)}\n",
 	    bytes_inuse / 1024, bytes_incache / 1024, bytes_total / 1024);
 
 	xo_emit("{:mbuf-failures/%ju}/{:cluster-failures/%ju}/"
 	    "{:packet-failures/%ju} {N:requests for mbufs denied "
 	    "(mbufs\\/clusters\\/mbuf+clusters)}\n",
 	    mbuf_failures, cluster_failures, packet_failures);
 	xo_emit("{:mbuf-sleeps/%ju}/{:cluster-sleeps/%ju}/{:packet-sleeps/%ju} "
 	    "{N:requests for mbufs delayed "
 	    "(mbufs\\/clusters\\/mbuf+clusters)}\n",
 	    mbuf_sleeps, cluster_sleeps, packet_sleeps);
 
 	xo_emit("{:jumbop-sleeps/%ju}/{:jumbo9-sleeps/%ju}/"
 	    "{:jumbo16-sleeps/%ju} {N:/requests for jumbo clusters delayed "
 	    "(%juk\\/9k\\/16k)}\n",
 	    jumbop_sleeps, jumbo9_sleeps, jumbo16_sleeps, jumbop_size / 1024);
 	xo_emit("{:jumbop-failures/%ju}/{:jumbo9-failures/%ju}/"
 	    "{:jumbo16-failures/%ju} {N:/requests for jumbo clusters denied "
 	    "(%juk\\/9k\\/16k)}\n",
 	    jumbop_failures, jumbo9_failures, jumbo16_failures,
 	    jumbop_size / 1024);
 
 	mlen = sizeof(nsfbufs);
 	if (live &&
 	    sysctlbyname("kern.ipc.nsfbufs", &nsfbufs, &mlen, NULL, 0) == 0 &&
 	    sysctlbyname("kern.ipc.nsfbufsused", &nsfbufsused, &mlen,
 	    NULL, 0) == 0 &&
 	    sysctlbyname("kern.ipc.nsfbufspeak", &nsfbufspeak, &mlen,
 	    NULL, 0) == 0)
 		xo_emit("{:nsfbufs-current/%d}/{:nsfbufs-peak/%d}/"
 		    "{:nsfbufs/%d} "
 		    "{N:sfbufs in use (current\\/peak\\/max)}\n",
 		    nsfbufsused, nsfbufspeak, nsfbufs);
 
 	if (fetch_stats("kern.ipc.sfstat", mbaddr, &sfstat, sizeof(sfstat),
 	    kread_counters) != 0)
 		goto out;
 
+        xo_emit("{:sendfile-syscalls/%ju} {N:sendfile syscalls}\n",
+	    (uintmax_t)sfstat.sf_syscalls); 
+        xo_emit("{:sendfile-no-io/%ju} "
+	    "{N:sendfile syscalls completed without I\\/O request}\n", 
+            (uintmax_t)sfstat.sf_noiocnt);
+	xo_emit("{:sendfile-io-count/%ju} "
+	    "{N:requests for I\\/O initiated by sendfile}\n",
+	    (uintmax_t)sfstat.sf_iocnt);
+        xo_emit("{:sendfile-pages-sent/%ju} "
+	    "{N:pages read by sendfile as part of a request}\n",
+            (uintmax_t)sfstat.sf_pages_read);
+        xo_emit("{:sendfile-pages-valid/%ju} "
+	    "{N:pages were valid at time of a sendfile request}\n",
+            (uintmax_t)sfstat.sf_pages_valid);
+        xo_emit("{:sendfile-requested-readahead/%ju} "
+	    "{N:pages were requested for read ahead by applications}\n",
+            (uintmax_t)sfstat.sf_rhpages_requested);
+        xo_emit("{:sendfile-readahead/%ju} "
+	    "{N:pages were read ahead by sendfile}\n",
+            (uintmax_t)sfstat.sf_rhpages_read);
+	xo_emit("{:sendfile-busy-encounters/%ju} "
+	    "{N:times sendfile encountered an already busy page}\n",
+	    (uintmax_t)sfstat.sf_busy);
 	xo_emit("{:sfbufs-alloc-failed/%ju} {N:requests for sfbufs denied}\n",
 	    (uintmax_t)sfstat.sf_allocfail);
 	xo_emit("{:sfbufs-alloc-wait/%ju} {N:requests for sfbufs delayed}\n",
 	    (uintmax_t)sfstat.sf_allocwait);
-	xo_emit("{:sfbufs-io-count/%ju} "
-	    "{N:requests for I\\/O initiated by sendfile}\n",
-	    (uintmax_t)sfstat.sf_iocnt);
 out:
 	xo_close_container("mbuf-statistics");
 	memstat_mtl_free(mtlp);
 }
Index: projects/clang380-import/usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- projects/clang380-import/usr.sbin/bhyve/pci_virtio_net.c	(revision 293686)
+++ projects/clang380-import/usr.sbin/bhyve/pci_virtio_net.c	(revision 293687)
@@ -1,730 +1,972 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/linker_set.h>
 #include <sys/select.h>
 #include <sys/uio.h>
 #include <sys/ioctl.h>
 #include <machine/atomic.h>
 #include <net/ethernet.h>
+#ifndef NETMAP_WITH_LIBS
+#define NETMAP_WITH_LIBS
+#endif
+#include <net/netmap_user.h>
 
 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <strings.h>
 #include <unistd.h>
 #include <assert.h>
 #include <md5.h>
 #include <pthread.h>
 #include <pthread_np.h>
 
 #include "bhyverun.h"
 #include "pci_emul.h"
 #include "mevent.h"
 #include "virtio.h"
 
 #define VTNET_RINGSZ	1024
 
 #define VTNET_MAXSEGS	256
 
 /*
  * Host capabilities.  Note that we only offer a few of these.
  */
 #define	VIRTIO_NET_F_CSUM	(1 <<  0) /* host handles partial cksum */
 #define	VIRTIO_NET_F_GUEST_CSUM	(1 <<  1) /* guest handles partial cksum */
 #define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
 #define	VIRTIO_NET_F_GSO_DEPREC	(1 <<  6) /* deprecated: host handles GSO */
 #define	VIRTIO_NET_F_GUEST_TSO4	(1 <<  7) /* guest can rcv TSOv4 */
 #define	VIRTIO_NET_F_GUEST_TSO6	(1 <<  8) /* guest can rcv TSOv6 */
 #define	VIRTIO_NET_F_GUEST_ECN	(1 <<  9) /* guest can rcv TSO with ECN */
 #define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* guest can rcv UFO */
 #define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* host can rcv TSOv4 */
 #define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* host can rcv TSOv6 */
 #define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* host can rcv TSO with ECN */
 #define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* host can rcv UFO */
 #define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
 #define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
 #define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* control channel available */
 #define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* control channel RX mode support */
 #define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* control channel VLAN filtering */
 #define	VIRTIO_NET_F_GUEST_ANNOUNCE \
 				(1 << 21) /* guest can send gratuitous pkts */
 
 #define VTNET_S_HOSTCAPS      \
   ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
     VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
 
 /*
  * PCI config-space "registers"
  */
 struct virtio_net_config {
 	uint8_t  mac[6];
 	uint16_t status;
 } __packed;
 
 /*
  * Queue definitions.
  */
 #define VTNET_RXQ	0
 #define VTNET_TXQ	1
 #define VTNET_CTLQ	2	/* NB: not yet supported */
 
 #define VTNET_MAXQ	3
 
 /*
  * Fixed network header size
  */
 struct virtio_net_rxhdr {
 	uint8_t		vrh_flags;
 	uint8_t		vrh_gso_type;
 	uint16_t	vrh_hdr_len;
 	uint16_t	vrh_gso_size;
 	uint16_t	vrh_csum_start;
 	uint16_t	vrh_csum_offset;
 	uint16_t	vrh_bufs;
 } __packed;
 
 /*
  * Debug printf
  */
 static int pci_vtnet_debug;
 #define DPRINTF(params) if (pci_vtnet_debug) printf params
 #define WPRINTF(params) printf params
 
 /*
  * Per-device softc
  */
 struct pci_vtnet_softc {
 	struct virtio_softc vsc_vs;
 	struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
 	pthread_mutex_t vsc_mtx;
 	struct mevent	*vsc_mevp;
 
 	int		vsc_tapfd;
+	struct nm_desc	*vsc_nmd;
+
 	int		vsc_rx_ready;
 	volatile int	resetting;	/* set and checked outside lock */
 
 	uint64_t	vsc_features;	/* negotiated features */
 	
 	struct virtio_net_config vsc_config;
 
 	pthread_mutex_t	rx_mtx;
 	int		rx_in_progress;
 	int		rx_vhdrlen;
 	int		rx_merge;	/* merged rx bufs in use */
 
 	pthread_t 	tx_tid;
 	pthread_mutex_t	tx_mtx;
 	pthread_cond_t	tx_cond;
 	int		tx_in_progress;
+
+	void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc);
+	void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov,
+			     int iovcnt, int len);
 };
 
 static void pci_vtnet_reset(void *);
 /* static void pci_vtnet_notify(void *, struct vqueue_info *); */
 static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
 static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
 static void pci_vtnet_neg_features(void *, uint64_t);
 
 static struct virtio_consts vtnet_vi_consts = {
 	"vtnet",		/* our name */
 	VTNET_MAXQ - 1,		/* we currently support 2 virtqueues */
 	sizeof(struct virtio_net_config), /* config reg size */
 	pci_vtnet_reset,	/* reset */
 	NULL,			/* device-wide qnotify -- not used */
 	pci_vtnet_cfgread,	/* read PCI config */
 	pci_vtnet_cfgwrite,	/* write PCI config */
 	pci_vtnet_neg_features,	/* apply negotiated features */
 	VTNET_S_HOSTCAPS,	/* our capabilities */
 };
 
 /*
  * If the transmit thread is active then stall until it is done.
  */
 static void
 pci_vtnet_txwait(struct pci_vtnet_softc *sc)
 {
 
 	pthread_mutex_lock(&sc->tx_mtx);
 	while (sc->tx_in_progress) {
 		pthread_mutex_unlock(&sc->tx_mtx);
 		usleep(10000);
 		pthread_mutex_lock(&sc->tx_mtx);
 	}
 	pthread_mutex_unlock(&sc->tx_mtx);
 }
 
 /*
  * If the receive thread is active then stall until it is done.
  */
 static void
 pci_vtnet_rxwait(struct pci_vtnet_softc *sc)
 {
 
 	pthread_mutex_lock(&sc->rx_mtx);
 	while (sc->rx_in_progress) {
 		pthread_mutex_unlock(&sc->rx_mtx);
 		usleep(10000);
 		pthread_mutex_lock(&sc->rx_mtx);
 	}
 	pthread_mutex_unlock(&sc->rx_mtx);
 }
 
 static void
 pci_vtnet_reset(void *vsc)
 {
 	struct pci_vtnet_softc *sc = vsc;
 
 	DPRINTF(("vtnet: device reset requested !\n"));
 
 	sc->resetting = 1;
 
 	/*
 	 * Wait for the transmit and receive threads to finish their
 	 * processing.
 	 */
 	pci_vtnet_txwait(sc);
 	pci_vtnet_rxwait(sc);
 
 	sc->vsc_rx_ready = 0;
 	sc->rx_merge = 1;
 	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
 
 	/* now reset rings, MSI-X vectors, and negotiated capabilities */
 	vi_reset_dev(&sc->vsc_vs);
 
 	sc->resetting = 0;
 }
 
 /*
  * Called to send a buffer chain out to the tap device
  */
 static void
 pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
 		 int len)
 {
 	static char pad[60]; /* all zero bytes */
 
 	if (sc->vsc_tapfd == -1)
 		return;
 
 	/*
 	 * If the length is < 60, pad out to that and add the
 	 * extra zero'd segment to the iov. It is guaranteed that
 	 * there is always an extra iov available by the caller.
 	 */
 	if (len < 60) {
 		iov[iovcnt].iov_base = pad;
 		iov[iovcnt].iov_len = 60 - len;
 		iovcnt++;
 	}
 	(void) writev(sc->vsc_tapfd, iov, iovcnt);
 }
 
 /*
  *  Called when there is read activity on the tap file descriptor.
  * Each buffer posted by the guest is assumed to be able to contain
  * an entire ethernet frame + rx header.
  *  MP note: the dummybuf is only used for discarding frames, so there
  * is no need for it to be per-vtnet or locked.
  */
 static uint8_t dummybuf[2048];
 
 static __inline struct iovec *
 rx_iov_trim(struct iovec *iov, int *niov, int tlen)
 {
 	struct iovec *riov;
 
 	/* XXX short-cut: assume first segment is >= tlen */
 	assert(iov[0].iov_len >= tlen);
 
 	iov[0].iov_len -= tlen;
 	if (iov[0].iov_len == 0) {
 		assert(*niov > 1);
 		*niov -= 1;
 		riov = &iov[1];
 	} else {
 		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
 		riov = &iov[0];
 	}
 
 	return (riov);
 }
 
 static void
 pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
 {
 	struct iovec iov[VTNET_MAXSEGS], *riov;
 	struct vqueue_info *vq;
 	void *vrx;
 	int len, n;
 	uint16_t idx;
 
 	/*
 	 * Should never be called without a valid tap fd
 	 */
 	assert(sc->vsc_tapfd != -1);
 
 	/*
 	 * But, will be called when the rx ring hasn't yet
 	 * been set up or the guest is resetting the device.
 	 */
 	if (!sc->vsc_rx_ready || sc->resetting) {
 		/*
 		 * Drop the packet and try later.
 		 */
 		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
 		return;
 	}
 
 	/*
 	 * Check for available rx buffers
 	 */
 	vq = &sc->vsc_queues[VTNET_RXQ];
 	if (!vq_has_descs(vq)) {
 		/*
 		 * Drop the packet and try later.  Interrupt on
 		 * empty, if that's negotiated.
 		 */
 		(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
 		vq_endchains(vq, 1);
 		return;
 	}
 
 	do {
 		/*
 		 * Get descriptor chain.
 		 */
 		n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
 		assert(n >= 1 && n <= VTNET_MAXSEGS);
 
 		/*
 		 * Get a pointer to the rx header, and use the
 		 * data immediately following it for the packet buffer.
 		 */
 		vrx = iov[0].iov_base;
 		riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
 
 		len = readv(sc->vsc_tapfd, riov, n);
 
 		if (len < 0 && errno == EWOULDBLOCK) {
 			/*
 			 * No more packets, but still some avail ring
 			 * entries.  Interrupt if needed/appropriate.
 			 */
 			vq_retchain(vq);
 			vq_endchains(vq, 0);
 			return;
 		}
 
 		/*
 		 * The only valid field in the rx packet header is the
 		 * number of buffers if merged rx bufs were negotiated.
 		 */
 		memset(vrx, 0, sc->rx_vhdrlen);
 
 		if (sc->rx_merge) {
 			struct virtio_net_rxhdr *vrxh;
 
 			vrxh = vrx;
 			vrxh->vrh_bufs = 1;
 		}
 
 		/*
 		 * Release this chain and handle more chains.
 		 */
 		vq_relchain(vq, idx, len + sc->rx_vhdrlen);
 	} while (vq_has_descs(vq));
 
 	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
 	vq_endchains(vq, 1);
 }
 
+static int
+pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+{
+	int r, i;
+	int len = 0;
+
+	for (r = nmd->cur_tx_ring; ; ) {
+		struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
+		uint32_t cur, idx;
+		char *buf;
+
+		if (nm_ring_empty(ring)) {
+			r++;
+			if (r > nmd->last_tx_ring)
+				r = nmd->first_tx_ring;
+			if (r == nmd->cur_rx_ring)
+				break;
+			continue;
+		}
+		cur = ring->cur;
+		idx = ring->slot[cur].buf_idx;
+		buf = NETMAP_BUF(ring, idx);
+
+		for (i = 0; i < iovcnt; i++) {
+			memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
+			len += iov[i].iov_len;
+		}
+		ring->slot[cur].len = len;
+		ring->head = ring->cur = nm_ring_next(ring, cur);
+		nmd->cur_tx_ring = r;
+		ioctl(nmd->fd, NIOCTXSYNC, NULL);
+		break;
+	}
+
+	return (len);
+}
+
+static inline int
+pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
+{
+	int len = 0;
+	int i = 0;
+	int r;
+
+	for (r = nmd->cur_rx_ring; ; ) {
+		struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
+		uint32_t cur, idx;
+		char *buf;
+		size_t left;
+
+		if (nm_ring_empty(ring)) {
+			r++;
+			if (r > nmd->last_rx_ring)
+				r = nmd->first_rx_ring;
+			if (r == nmd->cur_rx_ring)
+				break;
+			continue;
+		}
+		cur = ring->cur;
+		idx = ring->slot[cur].buf_idx;
+		buf = NETMAP_BUF(ring, idx);
+		left = ring->slot[cur].len;
+
+		for (i = 0; i < iovcnt && left > 0; i++) {
+			if (iov[i].iov_len > left)
+				iov[i].iov_len = left;
+			memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
+			len += iov[i].iov_len;
+			left -= iov[i].iov_len;
+		}
+		ring->head = ring->cur = nm_ring_next(ring, cur);
+		nmd->cur_rx_ring = r;
+		ioctl(nmd->fd, NIOCRXSYNC, NULL);
+		break;
+	}
+	for (; i < iovcnt; i++)
+		iov[i].iov_len = 0;
+
+	return (len);
+}
+
+/*
+ * Called to send a buffer chain out to the vale port
+ */
 static void
-pci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
+pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
+		    int len)
 {
+	static char pad[60]; /* all zero bytes */
+
+	if (sc->vsc_nmd == NULL)
+		return;
+
+	/*
+	 * If the length is < 60, pad out to that and add the
+	 * extra zero'd segment to the iov. It is guaranteed that
+	 * there is always an extra iov available by the caller.
+	 */
+	if (len < 60) {
+		iov[iovcnt].iov_base = pad;
+		iov[iovcnt].iov_len = 60 - len;
+		iovcnt++;
+	}
+	(void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
+}
+
+static void
+pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
+{
+	struct iovec iov[VTNET_MAXSEGS], *riov;
+	struct vqueue_info *vq;
+	void *vrx;
+	int len, n;
+	uint16_t idx;
+
+	/*
+	 * Should never be called without a valid netmap descriptor
+	 */
+	assert(sc->vsc_nmd != NULL);
+
+	/*
+	 * But, will be called when the rx ring hasn't yet
+	 * been set up or the guest is resetting the device.
+	 */
+	if (!sc->vsc_rx_ready || sc->resetting) {
+		/*
+		 * Drop the packet and try later.
+		 */
+		(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+		return;
+	}
+
+	/*
+	 * Check for available rx buffers
+	 */
+	vq = &sc->vsc_queues[VTNET_RXQ];
+	if (!vq_has_descs(vq)) {
+		/*
+		 * Drop the packet and try later.  Interrupt on
+		 * empty, if that's negotiated.
+		 */
+		(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
+		vq_endchains(vq, 1);
+		return;
+	}
+
+	do {
+		/*
+		 * Get descriptor chain.
+		 */
+		n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
+		assert(n >= 1 && n <= VTNET_MAXSEGS);
+
+		/*
+		 * Get a pointer to the rx header, and use the
+		 * data immediately following it for the packet buffer.
+		 */
+		vrx = iov[0].iov_base;
+		riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
+
+		len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
+
+		if (len == 0) {
+			/*
+			 * No more packets, but still some avail ring
+			 * entries.  Interrupt if needed/appropriate.
+			 */
+			vq_endchains(vq, 0);
+			return;
+		}
+
+		/*
+		 * The only valid field in the rx packet header is the
+		 * number of buffers if merged rx bufs were negotiated.
+		 */
+		memset(vrx, 0, sc->rx_vhdrlen);
+
+		if (sc->rx_merge) {
+			struct virtio_net_rxhdr *vrxh;
+
+			vrxh = vrx;
+			vrxh->vrh_bufs = 1;
+		}
+
+		/*
+		 * Release this chain and handle more chains.
+		 */
+		vq_relchain(vq, idx, len + sc->rx_vhdrlen);
+	} while (vq_has_descs(vq));
+
+	/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
+	vq_endchains(vq, 1);
+}
+
+static void
+pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
+{
 	struct pci_vtnet_softc *sc = param;
 
 	pthread_mutex_lock(&sc->rx_mtx);
 	sc->rx_in_progress = 1;
-	pci_vtnet_tap_rx(sc);
+	sc->pci_vtnet_rx(sc);
 	sc->rx_in_progress = 0;
 	pthread_mutex_unlock(&sc->rx_mtx);
 
 }
 
 static void
 pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
 {
 	struct pci_vtnet_softc *sc = vsc;
 
 	/*
 	 * A qnotify means that the rx process can now begin
 	 */
 	if (sc->vsc_rx_ready == 0) {
 		sc->vsc_rx_ready = 1;
 		vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
 	}
 }
 
 static void
 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
 {
 	struct iovec iov[VTNET_MAXSEGS + 1];
 	int i, n;
 	int plen, tlen;
 	uint16_t idx;
 
 	/*
 	 * Obtain chain of descriptors.  The first one is
 	 * really the header descriptor, so we need to sum
 	 * up two lengths: packet length and transfer length.
 	 */
 	n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
 	assert(n >= 1 && n <= VTNET_MAXSEGS);
 	plen = 0;
 	tlen = iov[0].iov_len;
 	for (i = 1; i < n; i++) {
 		plen += iov[i].iov_len;
 		tlen += iov[i].iov_len;
 	}
 
 	DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
-	pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
+	sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen);
 
 	/* chain is processed, release it and set tlen */
 	vq_relchain(vq, idx, tlen);
 }
 
 static void
 pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
 {
 	struct pci_vtnet_softc *sc = vsc;
 
 	/*
 	 * Any ring entries to process?
 	 */
 	if (!vq_has_descs(vq))
 		return;
 
 	/* Signal the tx thread for processing */
 	pthread_mutex_lock(&sc->tx_mtx);
 	vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
 	if (sc->tx_in_progress == 0)
 		pthread_cond_signal(&sc->tx_cond);
 	pthread_mutex_unlock(&sc->tx_mtx);
 }
 
 /*
  * Thread which will handle processing of TX desc
  */
 static void *
 pci_vtnet_tx_thread(void *param)
 {
 	struct pci_vtnet_softc *sc = param;
 	struct vqueue_info *vq;
 	int error;
 
 	vq = &sc->vsc_queues[VTNET_TXQ];
 
 	/*
 	 * Let us wait till the tx queue pointers get initialised &
 	 * first tx signaled
 	 */
 	pthread_mutex_lock(&sc->tx_mtx);
 	error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
 	assert(error == 0);
 
 	for (;;) {
 		/* note - tx mutex is locked here */
 		while (sc->resetting || !vq_has_descs(vq)) {
 			vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY;
 			mb();
 			if (!sc->resetting && vq_has_descs(vq))
 				break;
 
 			sc->tx_in_progress = 0;
 			error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
 			assert(error == 0);
 		}
 		vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
 		sc->tx_in_progress = 1;
 		pthread_mutex_unlock(&sc->tx_mtx);
 
 		do {
 			/*
 			 * Run through entries, placing them into
 			 * iovecs and sending when an end-of-packet
 			 * is found
 			 */
 			pci_vtnet_proctx(sc, vq);
 		} while (vq_has_descs(vq));
 
 		/*
 		 * Generate an interrupt if needed.
 		 */
 		vq_endchains(vq, 1);
 
 		pthread_mutex_lock(&sc->tx_mtx);
 	}
 }
 
 #ifdef notyet
 static void
 pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
 {
 
 	DPRINTF(("vtnet: control qnotify!\n\r"));
 }
 #endif
 
 static int
 pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
 {
         struct ether_addr *ea;
         char *tmpstr;
         char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
 
         tmpstr = strsep(&mac_str,"=");
        
         if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
                 ea = ether_aton(mac_str);
 
                 if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
                     memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
 			fprintf(stderr, "Invalid MAC %s\n", mac_str);
                         return (EINVAL);
                 } else
                         memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
         }
 
         return (0);
 }
 
+static void
+pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname)
+{
+	char tbuf[80];
 
+	strcpy(tbuf, "/dev/");
+	strlcat(tbuf, devname, sizeof(tbuf));
+
+	sc->pci_vtnet_rx = pci_vtnet_tap_rx;
+	sc->pci_vtnet_tx = pci_vtnet_tap_tx;
+
+	sc->vsc_tapfd = open(tbuf, O_RDWR);
+	if (sc->vsc_tapfd == -1) {
+		WPRINTF(("open of tap device %s failed\n", tbuf));
+		return;
+	}
+
+	/*
+	 * Set non-blocking and register for read
+	 * notifications with the event loop
+	 */
+	int opt = 1;
+	if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
+		WPRINTF(("tap device O_NONBLOCK failed\n"));
+		close(sc->vsc_tapfd);
+		sc->vsc_tapfd = -1;
+	}
+
+	sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
+				  EVF_READ,
+				  pci_vtnet_rx_callback,
+				  sc);
+	if (sc->vsc_mevp == NULL) {
+		WPRINTF(("Could not register event\n"));
+		close(sc->vsc_tapfd);
+		sc->vsc_tapfd = -1;
+	}
+}
+
+static void
+pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname)
+{
+	sc->pci_vtnet_rx = pci_vtnet_netmap_rx;
+	sc->pci_vtnet_tx = pci_vtnet_netmap_tx;
+
+	sc->vsc_nmd = nm_open(ifname, NULL, 0, 0);
+	if (sc->vsc_nmd == NULL) {
+		WPRINTF(("open of netmap device %s failed\n", ifname));
+		return;
+	}
+
+	sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd,
+				  EVF_READ,
+				  pci_vtnet_rx_callback,
+				  sc);
+	if (sc->vsc_mevp == NULL) {
+		WPRINTF(("Could not register event\n"));
+		nm_close(sc->vsc_nmd);
+		sc->vsc_nmd = NULL;
+	}
+}
+
 static int
 pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
 {
 	MD5_CTX mdctx;
 	unsigned char digest[16];
 	char nstr[80];
 	char tname[MAXCOMLEN + 1];
 	struct pci_vtnet_softc *sc;
 	char *devname;
 	char *vtopts;
 	int mac_provided;
 
 	sc = calloc(1, sizeof(struct pci_vtnet_softc));
 
 	pthread_mutex_init(&sc->vsc_mtx, NULL);
 
 	vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
 	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
 
 	sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
 	sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
 	sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
 	sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
 #ifdef notyet
 	sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
         sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
 #endif
  
 	/*
 	 * Attempt to open the tap device and read the MAC address
 	 * if specified
 	 */
 	mac_provided = 0;
 	sc->vsc_tapfd = -1;
+	sc->vsc_nmd = NULL;
 	if (opts != NULL) {
-		char tbuf[80];
 		int err;
 
 		devname = vtopts = strdup(opts);
 		(void) strsep(&vtopts, ",");
 
 		if (vtopts != NULL) {
 			err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
 			if (err != 0) {
 				free(devname);
 				return (err);
 			}
 			mac_provided = 1;
 		}
 
-		strcpy(tbuf, "/dev/");
-		strlcat(tbuf, devname, sizeof(tbuf));
+		if (strncmp(devname, "vale", 4) == 0)
+			pci_vtnet_netmap_setup(sc, devname);
+		if (strncmp(devname, "tap", 3) == 0 ||
+		    strncmp(devname, "vmnet", 5) == 0)
+			pci_vtnet_tap_setup(sc, devname);
 
 		free(devname);
-
-		sc->vsc_tapfd = open(tbuf, O_RDWR);
-		if (sc->vsc_tapfd == -1) {
-			WPRINTF(("open of tap device %s failed\n", tbuf));
-		} else {
-			/*
-			 * Set non-blocking and register for read
-			 * notifications with the event loop
-			 */
-			int opt = 1;
-			if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
-				WPRINTF(("tap device O_NONBLOCK failed\n"));
-				close(sc->vsc_tapfd);
-				sc->vsc_tapfd = -1;
-			}
-
-			sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
-						  EVF_READ,
-						  pci_vtnet_tap_callback,
-						  sc);
-			if (sc->vsc_mevp == NULL) {
-				WPRINTF(("Could not register event\n"));
-				close(sc->vsc_tapfd);
-				sc->vsc_tapfd = -1;
-			}
-		}		
 	}
 
 	/*
 	 * The default MAC address is the standard NetApp OUI of 00-a0-98,
 	 * followed by an MD5 of the PCI slot/func number and dev name
 	 */
 	if (!mac_provided) {
 		snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
 		    pi->pi_func, vmname);
 
 		MD5Init(&mdctx);
 		MD5Update(&mdctx, nstr, strlen(nstr));
 		MD5Final(digest, &mdctx);
 
 		sc->vsc_config.mac[0] = 0x00;
 		sc->vsc_config.mac[1] = 0xa0;
 		sc->vsc_config.mac[2] = 0x98;
 		sc->vsc_config.mac[3] = digest[0];
 		sc->vsc_config.mac[4] = digest[1];
 		sc->vsc_config.mac[5] = digest[2];
 	}
 
 	/* initialize config space */
 	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
 	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
 	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
 	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
 
 	/* Link is up if we managed to open tap device. */
 	sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0);
 	
 	/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
 	if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
 		return (1);
 
 	/* use BAR 0 to map config regs in IO space */
 	vi_set_io_bar(&sc->vsc_vs, 0);
 
 	sc->resetting = 0;
 
 	sc->rx_merge = 1;
 	sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
 	sc->rx_in_progress = 0;
 	pthread_mutex_init(&sc->rx_mtx, NULL); 
 
 	/* 
 	 * Initialize tx semaphore & spawn TX processing thread.
 	 * As of now, only one thread for TX desc processing is
 	 * spawned. 
 	 */
 	sc->tx_in_progress = 0;
 	pthread_mutex_init(&sc->tx_mtx, NULL);
 	pthread_cond_init(&sc->tx_cond, NULL);
 	pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
 	snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
 	    pi->pi_func);
         pthread_set_name_np(sc->tx_tid, tname);
 
 	return (0);
 }
 
 static int
 pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
 {
 	struct pci_vtnet_softc *sc = vsc;
 	void *ptr;
 
 	if (offset < 6) {
 		assert(offset + size <= 6);
 		/*
 		 * The driver is allowed to change the MAC address
 		 */
 		ptr = &sc->vsc_config.mac[offset];
 		memcpy(ptr, &value, size);
 	} else {
 		/* silently ignore other writes */
 		DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
 	}
 
 	return (0);
 }
 
 static int
 pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
 {
 	struct pci_vtnet_softc *sc = vsc;
 	void *ptr;
 
 	ptr = (uint8_t *)&sc->vsc_config + offset;
 	memcpy(retval, ptr, size);
 	return (0);
 }
 
 static void
 pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
 {
 	struct pci_vtnet_softc *sc = vsc;
 
 	sc->vsc_features = negotiated_features;
 
 	if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) {
 		sc->rx_merge = 0;
 		/* non-merge rx header is 2 bytes shorter */
 		sc->rx_vhdrlen -= 2;
 	}
 }
 
 struct pci_devemu pci_de_vnet = {
 	.pe_emu = 	"virtio-net",
 	.pe_init =	pci_vtnet_init,
 	.pe_barwrite =	vi_pci_write,
 	.pe_barread =	vi_pci_read
 };
 PCI_EMUL_SET(pci_de_vnet);
Index: projects/clang380-import/usr.sbin/bhyve
===================================================================
--- projects/clang380-import/usr.sbin/bhyve	(revision 293686)
+++ projects/clang380-import/usr.sbin/bhyve	(revision 293687)

Property changes on: projects/clang380-import/usr.sbin/bhyve
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/usr.sbin/bhyve:r293016-293685
Index: projects/clang380-import/usr.sbin/bsdconfig/share/dialog.subr
===================================================================
--- projects/clang380-import/usr.sbin/bsdconfig/share/dialog.subr	(revision 293686)
+++ projects/clang380-import/usr.sbin/bsdconfig/share/dialog.subr	(revision 293687)
@@ -1,2341 +1,2340 @@
 if [ ! "$_DIALOG_SUBR" ]; then _DIALOG_SUBR=1
 #
 # Copyright (c) 2006-2015 Devin Teske
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # $FreeBSD$
 #
 ############################################################ INCLUDES
 
 BSDCFG_SHARE="/usr/share/bsdconfig"
 . $BSDCFG_SHARE/common.subr || exit 1
 f_dprintf "%s: loading includes..." dialog.subr
 f_include $BSDCFG_SHARE/strings.subr
 f_include $BSDCFG_SHARE/variable.subr
 
 BSDCFG_LIBE="/usr/libexec/bsdconfig"
 f_include_lang $BSDCFG_LIBE/include/messages.subr
 
 ############################################################ CONFIGURATION
 
 #
 # Default file descriptor to link to stdout for dialog(1) passthru allowing
 # execution of dialog from within a sub-shell (so-long as its standard output
 # is explicitly redirected to this file descriptor).
 #
 : ${DIALOG_TERMINAL_PASSTHRU_FD:=${TERMINAL_STDOUT_PASSTHRU:-3}}
 
 ############################################################ GLOBALS
 
 #
 # Default name of dialog(1) utility
 # NOTE: This is changed to "Xdialog" by the optional `-X' argument
 #
 DIALOG="dialog"
 
 #
 # Default dialog(1) title and backtitle text
 #
 DIALOG_TITLE="$pgm"
 DIALOG_BACKTITLE="bsdconfig"
 
 #
 # Settings used while interacting with dialog(1)
 #
 DIALOG_MENU_TAGS="123456789ABCDEFGHIJKLMNOPQRSTUVWYZabcdefghijklmnopqrstuvwxyz"
 
 #
 # Declare that we are fully-compliant with Xdialog(1) by unset'ing all
 # compatibility settings.
 #
 unset XDIALOG_HIGH_DIALOG_COMPAT
 unset XDIALOG_FORCE_AUTOSIZE
 unset XDIALOG_INFOBOX_TIMEOUT
 
 #
 # Exit codes for [X]dialog(1)
 #
 DIALOG_OK=${SUCCESS:-0}
 DIALOG_CANCEL=${FAILURE:-1}
 DIALOG_HELP=2
 DIALOG_ITEM_HELP=2
 DIALOG_EXTRA=3
 DIALOG_ITEM_HELP=4
 export DIALOG_ERROR=254 # sh(1) can't handle the default of `-1'
 DIALOG_ESC=255
 
 #
 # Default behavior is to call f_dialog_init() automatically when loaded.
 #
 : ${DIALOG_SELF_INITIALIZE=1}
 
 #
 # Default terminal size (used if/when running without a controlling terminal)
 #
 : ${DEFAULT_TERMINAL_SIZE:=24 80}
 
 #
 # Minimum width(s) for various dialog(1) implementations (sensible global
 # default(s) for all widgets of a given variant)
 #
 : ${DIALOG_MIN_WIDTH:=24}
 : ${XDIALOG_MIN_WIDTH:=35}
 
 #
 # When manually sizing Xdialog(1) widgets such as calendar and timebox, you'll
 # need to know the size of the embedded GUI objects because the height passed
 # to Xdialog(1) for these widgets has to be tall enough to accomodate them.
 #
 # These values are helpful when manually sizing with dialog(1) too, but in a
 # different way. dialog(1) does not make you accomodate the custom items in the
 # height (but does for width) -- a height of 3 will display three lines and a
 # full calendar, for example (whereas Xdialog will truncate the calendar if
 # given a height of 3). For dialog(1), use these values for making sure that
 # the height does not exceed max_height (obtained by f_dialog_max_size()).
 #
 DIALOG_CALENDAR_HEIGHT=15
 DIALOG_TIMEBOX_HEIGHT=6
 
 ############################################################ GENERIC FUNCTIONS
 
 # f_dialog_data_sanitize $var_to_edit ...
 #
 # When using dialog(1) or Xdialog(1) sometimes unintended warnings or errors
 # are generated from underlying libraries. For example, if $LANG is set to an
 # invalid or unknown locale, the warnings from the Xdialog(1) libraries will
 # clutter the output. This function helps by providing a centralied function
 # that removes spurious warnings from the dialog(1) (or Xdialog(1)) response.
 #
 # Simply pass the name of one or more variables that need to be sanitized.
 # After execution, the variables will hold their newly-sanitized data.
 #
 f_dialog_data_sanitize()
 {
 	if [ "$#" -eq 0 ]; then
 		f_dprintf "%s: called with zero arguments" \
 		          f_dialog_response_sanitize
 		return $FAILURE
 	fi
 
 	local __var_to_edit
 	for __var_to_edit in $*; do
 		# Skip warnings and trim leading/trailing whitespace
 		setvar $__var_to_edit "$( f_getvar $__var_to_edit | awk '
 			BEGIN { data = 0 }
 			{
 				if ( ! data )
 				{
 					if ( $0 ~ /^$/ ) next
 					if ( $0 ~ /^Gdk-WARNING \*\*:/ ) next
 					data = 1
 				}
 				print
 			}
 		' )"
 	done
 }
 
 # f_dialog_line_sanitize $var_to_edit ...
 #
 # When using dialog(1) or Xdialog(1) sometimes unintended warnings or errors
 # are generated from underlying libraries. For example, if $LANG is set to an
 # invalid or unknown locale, the warnings from the Xdialog(1) libraries will
 # clutter the output. This function helps by providing a centralied function
 # that removes spurious warnings from the dialog(1) (or Xdialog(1)) response.
 #
 # Simply pass the name of one or more variables that need to be sanitized.
 # After execution, the variables will hold their newly-sanitized data.
 #
 # This function, unlike f_dialog_data_sanitize(), also removes leading/trailing
 # whitespace from each line.
 #
 f_dialog_line_sanitize()
 {
 	if [ "$#" -eq 0 ]; then
 		f_dprintf "%s: called with zero arguments" \
 		          f_dialog_response_sanitize
 		return $FAILURE
 	fi
 
 	local __var_to_edit
 	for __var_to_edit in $*; do
 		# Skip warnings and trim leading/trailing whitespace
 		setvar $__var_to_edit "$( f_getvar $__var_to_edit | awk '
 			BEGIN { data = 0 }
 			{
 				if ( ! data )
 				{
 					if ( $0 ~ /^$/ ) next
 					if ( $0 ~ /^Gdk-WARNING \*\*:/ ) next
 					data = 1
 				}
 				sub(/^[[:space:]]*/, "")
 				sub(/[[:space:]]*$/, "")
 				print
 			}
 		' )"
 	done
 }
 
 ############################################################ TITLE FUNCTIONS
 
 # f_dialog_title [$new_title]
 #
 # Set the title of future dialog(1) ($DIALOG_TITLE) or backtitle of Xdialog(1)
 # ($DIALOG_BACKTITLE) invocations. If no arguments are given or the first
 # argument is NULL, the current title is returned.
 #
 # Each time this function is called, a backup of the current values is made
 # allowing a one-time (single-level) restoration of the previous title using
 # the f_dialog_title_restore() function (below).
 #
 f_dialog_title()
 {
 	local new_title="$1"
 
 	if [ "${1+set}" ]; then
 		if [ "$USE_XDIALOG" ]; then
 			_DIALOG_BACKTITLE="$DIALOG_BACKTITLE"
 			DIALOG_BACKTITLE="$new_title"
 		else
 			_DIALOG_TITLE="$DIALOG_TITLE"
 			DIALOG_TITLE="$new_title"
 		fi
 	else
 		if [ "$USE_XDIALOG" ]; then
 			echo "$DIALOG_BACKTITLE"
 		else
 			echo "$DIALOG_TITLE"
 		fi
 	fi
 }
 
 # f_dialog_title_restore
 #
 # Restore the previous title set by the last call to f_dialog_title().
 # Restoration is non-recursive and only works to restore the most-recent title.
 #
 f_dialog_title_restore()
 {
 	if [ "$USE_XDIALOG" ]; then
 		DIALOG_BACKTITLE="$_DIALOG_BACKTITLE"
 	else
 		DIALOG_TITLE="$_DIALOG_TITLE"
 	fi
 }
 
 # f_dialog_backtitle [$new_backtitle]
 #
 # Set the backtitle of future dialog(1) ($DIALOG_BACKTITLE) or title of
 # Xdialog(1) ($DIALOG_TITLE) invocations. If no arguments are given or the
 # first argument is NULL, the current backtitle is returned.
 #
 f_dialog_backtitle()
 {
 	local new_backtitle="$1"
 
 	if [ "${1+set}" ]; then
 		if [ "$USE_XDIALOG" ]; then
 			_DIALOG_TITLE="$DIALOG_TITLE"
 			DIALOG_TITLE="$new_backtitle"
 		else
 			_DIALOG_BACKTITLE="$DIALOG_BACKTITLE"
 			DIALOG_BACKTITLE="$new_backtitle"
 		fi
 	else
 		if [ "$USE_XDIALOG" ]; then
 			echo "$DIALOG_TITLE"
 		else
 			echo "$DIALOG_BACKTITLE"
 		fi
 	fi
 }
 
 # f_dialog_backtitle_restore
 #
 # Restore the previous backtitle set by the last call to f_dialog_backtitle().
 # Restoration is non-recursive and only works to restore the most-recent
 # backtitle.
 #
 f_dialog_backtitle_restore()
 {
 	if [ "$USE_XDIALOG" ]; then
 		DIALOG_TITLE="$_DIALOG_TITLE"
 	else
 		DIALOG_BACKTITLE="$_DIALOG_BACKTITLE"
 	fi
 }
 
 ############################################################ SIZE FUNCTIONS
 
 # f_dialog_max_size $var_height $var_width
 #
 # Get the maximum height and width for a dialog widget and store the values in
 # $var_height and $var_width (respectively).
 #
 f_dialog_max_size()
 {
 	local funcname=f_dialog_max_size
 	local __var_height="$1" __var_width="$2" __max_size
 	[ "$__var_height" -o "$__var_width" ] || return $FAILURE
 	if [ "$USE_XDIALOG" ]; then
 		__max_size="$XDIALOG_MAXSIZE" # see CONFIGURATION
 	else
 		if __max_size=$( $DIALOG --print-maxsize \
 			2>&1 >&$DIALOG_TERMINAL_PASSTHRU_FD )
 		then
 			f_dprintf "$funcname: %s --print-maxsize = [%s]" \
 			          "$DIALOG" "$__max_size"
 			# usually "MaxSize: 24, 80"
 			__max_size="${__max_size#*: }"
 			f_replaceall "$__max_size" "," "" __max_size
 		else
 			f_eval_catch -dk __max_size $funcname stty \
 				'stty size' || __max_size=
 			# usually "24 80"
 		fi
 		: ${__max_size:=$DEFAULT_TERMINAL_SIZE}
 	fi
 	if [ "$__var_height" ]; then
 		local __height="${__max_size%%[$IFS]*}"
 		#
 		# If we're not using Xdialog(1), we should assume that $DIALOG
 		# will render --backtitle behind the widget. In such a case, we
 		# should prevent a widget from obscuring the backtitle (unless
 		# $NO_BACKTITLE is set and non-NULL, allowing a trap-door).
 		#
 		if [ ! "$USE_XDIALOG" ] && [ ! "$NO_BACKTITLE" ]; then
 			#
 			# If use_shadow (in ~/.dialogrc) is OFF, we need to
 			# subtract 4, otherwise 5. However, don't check this
 			# every time, rely on an initialization variable set
 			# by f_dialog_init().
 			#
 			local __adjust=5
 			[ "$NO_SHADOW" ] && __adjust=4
 
 			# Don't adjust height if already too small (allowing
 			# obscured backtitle for small values of __height).
 			[ ${__height:-0} -gt 11 ] &&
 				__height=$(( $__height - $__adjust ))
 		fi
 		setvar "$__var_height" "$__height"
 	fi
 	[ "$__var_width" ] && setvar "$__var_width" "${__max_size##*[$IFS]}"
 }
 
 # f_dialog_size_constrain $var_height $var_width [$min_height [$min_width]]
 #
 # Modify $var_height to be no-less-than $min_height (if given; zero otherwise)
 # and no-greater-than terminal height (or screen height if $USE_XDIALOG is
 # set).
 #
 # Also modify $var_width to be no-less-than $XDIALOG_MIN_WIDTH (or
 # $XDIALOG_MIN_WIDTH if $_USE_XDIALOG is set) and no-greater-than terminal
 # or screen width. The use of $[X]DIALOG_MIN_WIDTH can be overridden by
 # passing $min_width.
 #
 # Return status is success unless one of the passed arguments is invalid
 # or all of the $var_* arguments are either NULL or missing.
 #
 f_dialog_size_constrain()
 {
 	local __var_height="$1" __var_width="$2"
 	local __min_height="$3" __min_width="$4"
 	local __retval=$SUCCESS
 
 	# Return failure unless at least one var_* argument is passed
 	[ "$__var_height" -o "$__var_width" ] || return $FAILURE
 
 	#
 	# Print debug warnings if any given (non-NULL) argument are invalid
 	# NOTE: Don't change the name of $__{var,min,}{height,width}
 	#
 	local __height __width
 	local __arg __cp __fname=f_dialog_size_constrain 
 	for __arg in height width; do
 		debug= f_getvar __var_$__arg __cp
 		[ "$__cp" ] || continue
 		if ! debug= f_getvar "$__cp" __$__arg; then
 			f_dprintf "%s: var_%s variable \`%s' not set" \
 			          $__fname $__arg "$__cp"
 			__retval=$FAILURE
 		elif ! eval f_isinteger \$__$__arg; then
 			f_dprintf "%s: var_%s variable value not a number" \
 			          $__fname $__arg
 			__retval=$FAILURE
 		fi
 	done
 	for __arg in height width; do
 		debug= f_getvar __min_$__arg __cp
 		[ "$__cp" ] || continue
 		f_isinteger "$__cp" && continue
 		f_dprintf "%s: min_%s value not a number" $__fname $__arg
 		__retval=$FAILURE
 		setvar __min_$__arg ""
 	done
 
 	# Obtain maximum height and width values
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	local __max_height_size_constain __max_width_size_constrain
 	f_dialog_max_size \
 		__max_height_size_constrain __max_width_size_constrain
 
 	# Adjust height if desired
 	if [ "$__var_height" ]; then
 		if [ $__height -lt ${__min_height:-0} ]; then
 			setvar "$__var_height" $__min_height
 		elif [ $__height -gt $__max_height_size_constrain ]; then
 			setvar "$__var_height" $__max_height_size_constrain
 		fi
 	fi
 
 	# Adjust width if desired
 	if [ "$__var_width" ]; then
 		if [ "$USE_XDIALOG" ]; then
 			: ${__min_width:=${XDIALOG_MIN_WIDTH:-35}}
 		else
 			: ${__min_width:=${DIALOG_MIN_WIDTH:-24}}
 		fi
 		if [ $__width -lt $__min_width ]; then
 			setvar "$__var_width" $__min_width
 		elif [ $__width -gt $__max_width_size_constrain ]; then
 			setvar "$__var_width" $__max_width_size_constrain
 		fi
 	fi
 
 	if [ "$debug" ]; then
 		# Print final constrained values to debugging
 		[ "$__var_height" ] && f_quietly f_getvar "$__var_height"
 		[ "$__var_width"  ] && f_quietly f_getvar "$__var_width"
 	fi
 
 	return $__retval # success if no debug warnings were printed
 }
 
 # f_dialog_menu_constrain $var_height $var_width $var_rows "$prompt" \
 #                         [$min_height [$min_width [$min_rows]]]
 #
 # Modify $var_height to be no-less-than $min_height (if given; zero otherwise)
 # and no-greater-than terminal height (or screen height if $USE_XDIALOG is
 # set).
 #
 # Also modify $var_width to be no-less-than $XDIALOG_MIN_WIDTH (or
 # $XDIALOG_MIN_WIDTH if $_USE_XDIALOG is set) and no-greater-than terminal
 # or screen width. The use of $[X]DIALOG_MIN_WIDTH can be overridden by
 # passing $min_width.
 #
 # Last, modify $var_rows to be no-less-than $min_rows (if specified; zero
 # otherwise) and no-greater-than (max_height - 8) where max_height is the
 # terminal height (or screen height if $USE_XDIALOG is set). If $prompt is NULL
 # or missing, dialog(1) allows $var_rows to be (max_height - 7), maximizing the
 # number of visible rows.
 #
 # Return status is success unless one of the passed arguments is invalid
 # or all of the $var_* arguments are either NULL or missing.
 #
 f_dialog_menu_constrain()
 {
 	local __var_height="$1" __var_width="$2" __var_rows="$3" __prompt="$4"
 	local __min_height="$5" __min_width="$6" __min_rows="$7"
 
 	# Return failure unless at least one var_* argument is passed
 	[ "$__var_height" -o "$__var_width" -o "$__var_rows" ] ||
 		return $FAILURE
 
 	#
 	# Print debug warnings if any given (non-NULL) argument are invalid
 	# NOTE: Don't change the name of $__{var,min,}{height,width,rows}
 	#
 	local __height_menu_constrain __width_menu_constrain
 	local __rows_menu_constrain
 	local __arg __cp __fname=f_dialog_menu_constrain 
 	for __arg in height width rows; do
 		debug= f_getvar __var_$__arg __cp
 		[ "$__cp" ] || continue
 		if ! debug= f_getvar "$__cp" __${__arg}_menu_constrain; then
 			f_dprintf "%s: var_%s variable \`%s' not set" \
 			          $__fname $__arg "$__cp"
 			__retval=$FAILURE
 		elif ! eval f_isinteger \$__${__arg}_menu_constrain; then
 			f_dprintf "%s: var_%s variable value not a number" \
 			          $__fname $__arg
 			__retval=$FAILURE
 		fi
 	done
 	for __arg in height width rows; do
 		debug= f_getvar __min_$__arg __cp
 		[ "$__cp" ] || continue
 		f_isinteger "$__cp" && continue
 		f_dprintf "%s: min_%s value not a number" $__fname $__arg
 		__retval=$FAILURE
 		setvar __min_$__arg ""
 	done
 
 	# Obtain maximum height and width values
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	local __max_height_menu_constrain __max_width_menu_constrain
 	f_dialog_max_size \
 		__max_height_menu_constrain __max_width_menu_constrain
 
 	# Adjust height if desired
 	if [ "$__var_height" ]; then
 		if [ $__height_menu_constrain -lt ${__min_height:-0} ]; then
 			setvar "$__var_height" $__min_height
 		elif [ $__height_menu_constrain -gt \
 		       $__max_height_menu_constrain ]
 		then
 			setvar "$__var_height" $__max_height_menu_constrain
 		fi
 	fi
 
 	# Adjust width if desired
 	if [ "$__var_width" ]; then
 		if [ "$USE_XDIALOG" ]; then
 			: ${__min_width:=${XDIALOG_MIN_WIDTH:-35}}
 		else
 			: ${__min_width:=${DIALOG_MIN_WIDTH:-24}}
 		fi
 		if [ $__width_menu_constrain -lt $__min_width ]; then
 			setvar "$__var_width" $__min_width
 		elif [ $__width_menu_constrain -gt \
 		       $__max_width_menu_constrain ]
 		then
 			setvar "$__var_width" $__max_width_menu_constrain
 		fi
 	fi
 
 	# Adjust rows if desired
 	if [ "$__var_rows" ]; then
 		if [ "$USE_XDIALOG" ]; then
 			: ${__min_rows:=1}
 		else
 			: ${__min_rows:=0}
 		fi
 
 		local __max_rows_menu_constrain=$((
 			$__max_height_menu_constrain - 7
 		))
 		# If prompt_len is zero (no prompt), bump the max-rows by 1
 		# Default assumption is (if no argument) that there's no prompt
 		[ ${__prompt_len:-0} -gt 0 ] || __max_rows_menu_constrain=$((
 			$__max_rows_menu_constrain + 1
 		))
 
 		if [ $__rows_menu_constrain -lt $__min_rows ]; then
 			setvar "$__var_rows" $__min_rows
 		elif [ $__rows_menu_constrain -gt $__max_rows_menu_constrain ]
 		then
 			setvar "$__var_rows" $__max_rows_menu_constrain
 		fi
 	fi
 
 	if [ "$debug" ]; then
 		# Print final constrained values to debugging
 		[ "$__var_height" ] && f_quietly f_getvar "$__var_height"
 		[ "$__var_width"  ] && f_quietly f_getvar "$__var_width"
 		[ "$__var_rows"   ] && f_quietly f_getvar "$__var_rows"
 	fi
 
 	return $__retval # success if no debug warnings were printed
 }
 
 # f_dialog_infobox_size [-n] $var_height $var_width \
 #                       $title $backtitle $prompt [$hline]
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--infobox' boxes sensibly.
 #
 # This function helps solve this issue by taking two sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height and width. The second set of arguments are the
 # title, backtitle, prompt, and [optionally] hline. The optimal height and
 # width for the described widget (not exceeding the actual terminal height or
 # width) is stored in $var_height and $var_width (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height and
 # $var_width) are not constrained to minimum/maximum values.
 #
 # Newline character sequences (``\n'') in $prompt are expanded as-is done by
 # dialog(1).
 #
 f_dialog_infobox_size()
 {
 	local __constrain=1
 	[ "$1" = "-n" ] && __constrain= && shift 1 # -n
 	local __var_height="$1" __var_width="$2"
 	local __title="$3" __btitle="$4" __prompt="$5" __hline="$6"
 
 	# Return unless at least one size aspect has been requested
 	[ "$__var_height" -o "$__var_width" ] || return $FAILURE
 
 	# Default height/width of zero for auto-sizing
 	local __height=0 __width=0 __n
 
 	# Adjust height if desired
 	if [ "$__var_height" ]; then
 		#
 		# Set height based on number of rows in prompt
 		#
 		__n=$( echo -n "$__prompt" | f_number_of_lines )
 		__n=$(( $__n + 2 ))
 		[ $__n -gt $__height ] && __height=$__n
 
 		#
 		# For Xdialog(1) bump height if backtitle is enabled (displayed
 		# in the X11 window with a separator line between the backtitle
 		# and msg text).
 		#
 		if [ "$USE_XDIALOG" -a "$__btitle" ]; then
 			__n=$( echo "$__btitle" | f_number_of_lines )
 			__height=$(( $__height + $__n + 2 ))
 		fi
 
 		setvar "$__var_height" $__height
 	fi
 
 	# Adjust width if desired
 	if [ "$__var_width" ]; then
 		#
 		# Bump width for long titles
 		#
 		__n=$(( ${#__title} + 4 ))
 		[ $__n -gt $__width ] && __width=$__n
 
 		#
 		# If using Xdialog(1), bump width for long backtitles (which
 		# appear within the window).
 		#
 		if [ "$USE_XDIALOG" ]; then
 			__n=$(( ${#__btitle} + 4 ))
 			[ $__n -gt $__width ] && __width=$__n
 		fi
 
 		#
 		# Bump width for long prompts
 		#
 		__n=$( echo "$__prompt" | f_longest_line_length )
 		__n=$(( $__n + 4 )) # add width for border
 		[ $__n -gt $__width ] && __width=$__n
 
 		#
 		# Bump width for long hlines. Xdialog(1) supports `--hline' but
 		# it's currently not used (so don't do anything here if using
 		# Xdialog(1)).
 		#
 		if [ ! "$USE_XDIALOG" ]; then
 			__n=$(( ${#__hline} + 10 ))
 			[ $__n -gt $__width ] && __width=$__n
 		fi
 
 		# Bump width by 16.6% if using Xdialog(1)
 		[ "$USE_XDIALOG" ] && __width=$(( $__width + $__width / 6 ))
 
 		setvar "$__var_width" $__width
 	fi
 
 	# Constrain values to sensible minimums/maximums unless `-n' was passed
 	# Return success if no-constrain, else return status from constrain
 	[ ! "$__constrain" ] ||
 		f_dialog_size_constrain "$__var_height" "$__var_width"
 }
 
 # f_dialog_buttonbox_size [-n] $var_height $var_width \
 #                         $title $backtitle $prompt [$hline]
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--msgbox' and `--yesno' boxes sensibly.
 #
 # This function helps solve this issue by taking two sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height and width. The second set of arguments are the
 # title, backtitle, prompt, and [optionally] hline. The optimal height and
 # width for the described widget (not exceeding the actual terminal height or
 # width) is stored in $var_height and $var_width (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height and
 # $var_width) are not constrained to minimum/maximum values.
 #
 # Newline character sequences (``\n'') in $prompt are expanded as-is done by
 # dialog(1).
 #
 f_dialog_buttonbox_size()
 {
 	local __constrain=1
 	[ "$1" = "-n" ] && __constrain= && shift 1 # -n
 	local __var_height="$1" __var_width="$2"
 	local __title="$3" __btitle="$4" __prompt="$5" __hline="$6"
 
 	# Return unless at least one size aspect has been requested
 	[ "$__var_height" -o "$__var_width" ] || return $FAILURE
 
 	# Calculate height/width of infobox (adjusted/constrained below)
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	local __height_bbox_size __width_bbox_size
 	f_dialog_infobox_size -n \
 		"${__var_height:+__height_bbox_size}" \
 		"${__var_width:+__width_bbox_size}" \
 		"$__title" "$__btitle" "$__prompt" "$__hline"
 
 	# Adjust height if desired
 	if [ "$__var_height" ]; then
 		# Add height to accomodate the buttons
 		__height_bbox_size=$(( $__height_bbox_size + 2 ))
 
 		# Adjust for clipping with Xdialog(1) on Linux/GTK2
 		[ "$USE_XDIALOG" ] &&
 			__height_bbox_size=$(( $__height_bbox_size + 3 ))
 
 		setvar "$__var_height" $__height_bbox_size
 	fi
 
 	# No adjustemnts to width, just pass-thru the infobox width
 	if [ "$__var_width" ]; then
 		setvar "$__var_width" $__width_bbox_size
 	fi
 
 	# Constrain values to sensible minimums/maximums unless `-n' was passed
 	# Return success if no-constrain, else return status from constrain
 	[ ! "$__constrain" ] ||
 		f_dialog_size_constrain "$__var_height" "$__var_width"
 }
 
 # f_dialog_inputbox_size [-n] $var_height $var_width \
 #                        $title $backtitle $prompt $init [$hline]
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--inputbox' boxes sensibly.
 #
 # This function helps solve this issue by taking two sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height and width. The second set of arguments are the
 # title, backtitle, prompt, and [optionally] hline. The optimal height and
 # width for the described widget (not exceeding the actual terminal height or
 # width) is stored in $var_height and $var_width (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height and
 # $var_width) are not constrained to minimum/maximum values.
 #
 # Newline character sequences (``\n'') in $prompt are expanded as-is done by
 # dialog(1).
 #
 f_dialog_inputbox_size()
 {
 	local __constrain=1
 	[ "$1" = "-n" ] && __constrain= && shift 1 # -n
 	local __var_height="$1" __var_width="$2"
 	local __title="$3" __btitle="$4" __prompt="$5" __init="$6" __hline="$7"
 
 	# Return unless at least one size aspect has been requested
 	[ "$__var_height" -o "$__var_width" ] || return $FAILURE
 
 	# Calculate height/width of buttonbox (adjusted/constrained below)
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	local __height_ibox_size __width_ibox_size
 	f_dialog_buttonbox_size -n \
 		"${__var_height:+__height_ibox_size}" \
 		"${__var_width:+__width_ibox_size}" \
 		"$__title" "$__btitle" "$__prompt" "$__hline"
 
 	# Adjust height if desired
 	if [ "$__var_height" ]; then
 		# Add height for input box (not needed for Xdialog(1))
 		[ ! "$USE_XDIALOG" ] &&
 			__height_ibox_size=$(( $__height_ibox_size + 3 ))
 
 		setvar "$__var_height" $__height_ibox_size
 	fi
 
 	# Adjust width if desired
 	if [ "$__var_width" ]; then
 		# Bump width for initial text (something neither dialog(1) nor
 		# Xdialog(1) do, but worth it!; add 16.6% if using Xdialog(1))
 		local __n=$(( ${#__init} + 7 ))
 		[ "$USE_XDIALOG" ] && __n=$(( $__n + $__n / 6 ))
 		[ $__n -gt $__width_ibox_size ] && __width_ibox_size=$__n
 
 		setvar "$__var_width" $__width_ibox_size
 	fi
 
 	# Constrain values to sensible minimums/maximums unless `-n' was passed
 	# Return success if no-constrain, else return status from constrain
 	[ ! "$__constrain" ] ||
 		f_dialog_size_constrain "$__var_height" "$__var_width"
 }
 
 # f_xdialog_2inputsbox_size [-n] $var_height $var_width \
 #                           $title $backtitle $prompt \
 #                           $label1 $init1 $label2 $init2
 #
 # Xdialog(1) does not perform auto-sizing of the width and height of
 # `--2inputsbox' boxes sensibly.
 #
 # This function helps solve this issue by taking two sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height and width. The second set of arguments are the
 # title, backtitle, prompt, label for the first field, initial text for said
 # field, label for the second field, and initial text for said field. The
 # optimal height and width for the described widget (not exceeding the actual
 # terminal height or width) is stored in $var_height and $var_width
 # (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height and
 # $var_width) are not constrained to minimum/maximum values.
 #
 # Newline character sequences (``\n'') in $prompt are expanded as-is done by
 # Xdialog(1).
 #
 f_xdialog_2inputsbox_size()
 {
 	local __constrain=1
 	[ "$1" = "-n" ] && __constrain= && shift 1 # -n
 	local __var_height="$1" __var_width="$2"
 	local __title="$3" __btitle="$4" __prompt="$5"
 	local __label1="$6" __init1="$7" __label2="$8" __init2="$9"
 
 	# Return unless at least one size aspect has been requested
 	[ "$__var_height" -o "$__var_width" ] || return $FAILURE
 
 	# Calculate height/width of inputbox (adjusted/constrained below)
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	local __height_2ibox_size __width_2ibox_size
 	f_dialog_inputbox_size -n \
 		"${__var_height:+__height_2ibox_size}" \
 		"${__var_width:+__width_2ibox_size}" \
 		"$__title" "$__btitle" "$__prompt" "$__hline" "$__init1"
 	
 	# Adjust height if desired
 	if [ "$__var_height" ]; then
 		# Add height for 1st label, 2nd label, and 2nd input box
 		__height_2ibox_size=$(( $__height_2ibox_size + 2 + 2 + 2  ))
 		setvar "$__var_height" $__height_2ibox_size
 	fi
 
 	# Adjust width if desired
 	if [ "$__var_width" ]; then
 		local __n
 
 		# Bump width for first label text (+16.6% since Xdialog(1))
 		__n=$(( ${#__label1} + 7 ))
 		__n=$(( $__n + $__n / 6 ))
 		[ $__n -gt $__width_2ibox_size ] && __width_2ibox_size=$__n
 
 		# Bump width for second label text (+16.6% since Xdialog(1))
 		__n=$(( ${#__label2} + 7 ))
 		__n=$(( $__n + $__n / 6 ))
 		[ $__n -gt $__width_2ibox_size ] && __width_2ibox_size=$__n
 
 		# Bump width for 2nd initial text (something neither dialog(1)
 		# nor Xdialog(1) do, but worth it!; +16.6% since Xdialog(1))
 		__n=$(( ${#__init2} + 7 ))
 		__n=$(( $__n + $__n / 6 ))
 		[ $__n -gt $__width_2ibox_size ] && __width_2ibox_size=$__n
 
 		setvar "$__var_width" $__width_2ibox_size
 	fi
 
 	# Constrain values to sensible minimums/maximums unless `-n' was passed
 	# Return success if no-constrain, else return status from constrain
 	[ ! "$__constrain" ] ||
 		f_dialog_size_constrain "$__var_height" "$__var_width"
 }
 
 # f_dialog_menu_size [-n] $var_height $var_width $var_rows \
 #                    $title $backtitle $prompt $hline \
 #                    $tag1 $item1 $tag2 $item2 ...
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--menu' boxes sensibly.
 #
 # This function helps solve this issue by taking three sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height, width, and rows. The second set of arguments
 # are the title, backtitle, prompt, and hline. The [optional] third set of
 # arguments are the menu list itself (comprised of tag/item couplets). The
 # optimal height, width, and rows for the described widget (not exceeding the
 # actual terminal height or width) is stored in $var_height, $var_width, and
 # $var_rows (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height, $var_width,
 # and $var_rows) are not constrained to minimum/maximum values.
 #
 f_dialog_menu_size()
 {
 	local __constrain=1
 	[ "$1" = "-n" ] && __constrain= && shift 1 # -n
 	local __var_height="$1" __var_width="$2" __var_rows="$3"
 	local __title="$4" __btitle="$5" __prompt="$6" __hline="$7"
 	shift 7 # var_height/var_width/var_rows/title/btitle/prompt/hline
 
 	# Return unless at least one size aspect has been requested
 	[ "$__var_height" -o "$__var_width" -o "$__var_rows" ] ||
 		return $FAILURE
 
 	# Calculate height/width of infobox (adjusted/constrained below)
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	local __height_menu_size __width_menu_size
 	f_dialog_infobox_size -n \
 		"${__var_height:+__height_menu_size}" \
 		"${__var_width:+__width_menu_size}" \
 		"$__title" "$__btitle" "$__prompt" "$__hline"
 
 	#
 	# Always process the menu-item arguments to get the longest tag-length,
 	# longest item-length (both used to bump the width), and the number of
 	# rows (used to bump the height).
 	#
 	local __longest_tag=0 __longest_item=0 __rows=0
 	while [ $# -ge 2 ]; do
 		local __tag="$1" __item="$2"
 		shift 2 # tag/item
 		[ ${#__tag} -gt $__longest_tag ] && __longest_tag=${#__tag}
 		[ ${#__item} -gt $__longest_item ] && __longest_item=${#__item}
 		__rows=$(( $__rows + 1 ))
 	done
 
 	# Adjust rows early (for up-comning height calculation)
 	if [ "$__var_height" -o "$__var_rows" ]; then
 		# Add a row for visual aid if using Xdialog(1)
 		[ "$USE_XDIALOG" ] && __rows=$(( $__rows + 1 ))
 	fi
 
 	# Adjust height if desired
 	if [ "$__var_height" ]; then
 		# Add rows to height
 		if [ "$USE_XDIALOG" ]; then
 			__height_menu_size=$((
 				$__height_menu_size + $__rows + 7 ))
 		else
 			__height_menu_size=$((
 				$__height_menu_size + $__rows + 4 ))
 		fi
 		setvar "$__var_height" $__height_menu_size
 	fi
 
 	# Adjust width if desired
 	if [ "$__var_width" ]; then
 		# The sum total between the longest tag-length and the
 		# longest item-length should be used to bump menu width
 		local __n=$(( $__longest_tag + $__longest_item + 10 ))
 		[ "$USE_XDIALOG" ] && __n=$(( $__n + $__n / 6 )) # plus 16.6%
 		[ $__n -gt $__width_menu_size ] && __width_menu_size=$__n
 
 		setvar "$__var_width" $__width_menu_size
 	fi
 
 	# Store adjusted rows if desired
 	[ "$__var_rows" ] && setvar "$__var_rows" $__rows
 
 	# Constrain height, width, and rows to sensible minimum/maximum values
 	# Return success if no-constrain, else return status from constrain
 	[ ! "$__constrain" ] || f_dialog_menu_constrain \
 		"$__var_height" "$__var_width" "$__var_rows" "$__prompt"
 }
 
 # f_dialog_menu_with_help_size [-n] $var_height $var_width $var_rows \
 #                              $title $backtitle $prompt $hline \
 #                              $tag1 $item1 $help1 $tag2 $item2 $help2 ...
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--menu' boxes sensibly.
 #
 # This function helps solve this issue by taking three sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height, width, and rows. The second set of arguments
 # are the title, backtitle, prompt, and hline. The [optional] third set of
 # arguments are the menu list itself (comprised of tag/item/help triplets). The
 # optimal height, width, and rows for the described widget (not exceeding the
 # actual terminal height or width) is stored in $var_height, $var_width, and
 # $var_rows (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height, $var_width,
 # and $var_rows) are not constrained to minimum/maximum values.
 #
 f_dialog_menu_with_help_size()
 {
 	local __constrain=1
 	[ "$1" = "-n" ] && __constrain= && shift 1 # -n
 	local __var_height="$1" __var_width="$2" __var_rows="$3"
 	local __title="$4" __btitle="$5" __prompt="$6" __hline="$7"
 	shift 7 # var_height/var_width/var_rows/title/btitle/prompt/hline
 
 	# Return unless at least one size aspect has been requested
 	[ "$__var_height" -o "$__var_width" -o "$__var_rows" ] ||
 		return $FAILURE
 
 	# Calculate height/width of infobox (adjusted/constrained below)
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	local __height_menu_with_help_size __width_menu_with_help_size
 	f_dialog_infobox_size -n \
 		"${__var_height:+__height_menu_with_help_size}" \
 		"${__var_width:+__width_menu_with_help_size}" \
 		"$__title" "$__btitle" "$__prompt" "$__hline"
 
 	#
 	# Always process the menu-item arguments to get the longest tag-length,
 	# longest item-length, longest help-length (help-length only considered
 	# if using Xdialog(1), as it places the help string in the widget) --
 	# all used to bump the width -- and the number of rows (used to bump
 	# the height).
 	#
 	local __longest_tag=0 __longest_item=0 __longest_help=0 __rows=0
 	while [ $# -ge 3 ]; do
 		local __tag="$1" __item="$2" __help="$3"
 		shift 3 # tag/item/help
 		[ ${#__tag} -gt $__longest_tag ] && __longest_tag=${#__tag}
 		[ ${#__item} -gt $__longest_item ] && __longest_item=${#__item}
 		[ ${#__help} -gt $__longest_help ] && __longest_help=${#__help}
 		__rows=$(( $__rows + 1 ))
 	done
 
 	# Adjust rows early (for up-coming height calculation)
 	if [ "$__var_height" -o "$__var_rows" ]; then
 		# Add a row for visual aid if using Xdialog(1)
 		[ "$USE_XDIALOG" ] && __rows=$(( $__rows + 1 ))
 	fi
 
 	# Adjust height if desired
 	if [ "$__var_height" ]; then
 		# Add rows to height
 		if [ "$USE_XDIALOG" ]; then
 			__height_menu_with_help_size=$((
 				$__height_menu_with_help_size + $__rows + 8 ))
 		else
 			__height_menu_with_help_size=$((
 				$__height_menu_with_help_size + $__rows + 4 ))
 		fi
 		setvar "$__var_height" $__height_menu_with_help_size
 	fi
 
 	# Adjust width if desired
 	if [ "$__var_width" ]; then
 		# The sum total between the longest tag-length and the
 		# longest item-length should be used to bump menu width
 		local __n=$(( $__longest_tag + $__longest_item + 10 ))
 		[ "$USE_XDIALOG" ] && __n=$(( $__n + $__n / 6 )) # plus 16.6%
 		[ $__n -gt $__width_menu_with_help_size ] &&
 			__width_menu_with_help_size=$__n
 
 		# Update width for help text if using Xdialog(1)
 		if [ "$USE_XDIALOG" ]; then
 			__n=$(( $__longest_help + 10 ))
 			__n=$(( $__n + $__n / 6 )) # plus 16.6%
 			[ $__n -gt $__width_menu_with_help_size ] &&
 				__width_menu_with_help_size=$__n
 		fi
 
 		setvar "$__var_width" $__width_menu_with_help_size
 	fi
 
 	# Store adjusted rows if desired
 	[ "$__var_rows" ] && setvar "$__var_rows" $__rows
 
 	# Constrain height, width, and rows to sensible minimum/maximum values
 	# Return success if no-constrain, else return status from constrain
 	[ ! "$__constrain" ] || f_dialog_menu_constrain \
 		"$__var_height" "$__var_width" "$__var_rows" "$__prompt"
 }
 
 # f_dialog_radiolist_size [-n] $var_height $var_width $var_rows \
 #                         $title $backtitle $prompt $hline \
 #                         $tag1 $item1 $status1 $tag2 $item2 $status2 ...
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--radiolist' boxes sensibly.
 #
 # This function helps solve this issue by taking three sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height, width, and rows. The second set of arguments
 # are the title, backtitle, prompt, and hline. The [optional] third set of
 # arguments are the radio list itself (comprised of tag/item/status triplets).
 # The optimal height, width, and rows for the described widget (not exceeding
 # the actual terminal height or width) is stored in $var_height, $var_width,
 # and $var_rows (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height, $var_width,
 # and $var_rows) are not constrained to minimum/maximum values.
 #
 f_dialog_radiolist_size()
 {
 	local __constrain=1
 	[ "$1" = "-n" ] && __constrain= && shift 1 # -n
 	local __var_height="$1" __var_width="$2" __var_rows="$3"
 	local __title="$4" __btitle="$5" __prompt="$6" __hline="$7"
 	shift 7 # var_height/var_width/var_rows/title/btitle/prompt/hline
 
 	# Return unless at least one size aspect has been requested
 	[ "$__var_height" -o "$__var_width" -o "$__var_rows" ] ||
 		return $FAILURE
 
 	# Calculate height/width of infobox (adjusted/constrained below)
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	local __height_rlist_size __width_rlist_size
 	f_dialog_infobox_size -n \
 		"${__var_height:+__height_rlist_size}" \
 		"${__var_width:+__width_rlist_size}" \
 		"$__title" "$__btitle" "$__prompt" "$__hline"
 
 	#
 	# Always process the menu-item arguments to get the longest tag-length,
 	# longest item-length (both used to bump the width), and the number of
 	# rows (used to bump the height).
 	#
 	local __longest_tag=0 __longest_item=0 __rows_rlist_size=0
 	while [ $# -ge 3 ]; do
 		local __tag="$1" __item="$2"
 		shift 3 # tag/item/status
 		[ ${#__tag} -gt $__longest_tag ] && __longest_tag=${#__tag}
 		[ ${#__item} -gt $__longest_item ] && __longest_item=${#__item}
 		__rows_rlist_size=$(( $__rows_rlist_size + 1 ))
 	done
 
 	# Adjust rows early (for up-coming height calculation)
 	if [ "$__var_height" -o "$__var_rows" ]; then
 		# Add a row for visual aid if using Xdialog(1)
 		[ "$USE_XDIALOG" ] &&
 			__rows_rlist_size=$(( $__rows_rlist_size + 1 ))
 	fi
 
 	# Adjust height if desired
 	if [ "$__var_height" ]; then
 		# Add rows to height
 		if [ "$USE_XDIALOG" ]; then
 			__height_rlist_size=$((
 				$__height_rlist_size + $__rows_rlist_size + 7
 			))
 		else
 			__height_rlist_size=$((
 				$__height_rlist_size + $__rows_rlist_size + 4
 			))
 		fi
 		setvar "$__var_height" $__height_rlist_size
 	fi
 
 	# Adjust width if desired
 	if [ "$__var_width" ]; then
 		# Sum total between longest tag-length, longest item-length,
 		# and radio-button width should be used to bump menu width
 		local __n=$(( $__longest_tag + $__longest_item + 13 ))
 		[ "$USE_XDIALOG" ] && __n=$(( $__n + $__n / 6 )) # plus 16.6%
 		[ $__n -gt $__width_rlist_size ] && __width_rlist_size=$__n
 
 		setvar "$__var_width" $__width_rlist_size
 	fi
 
 	# Store adjusted rows if desired
 	[ "$__var_rows" ] && setvar "$__var_rows" $__rows_rlist_size
 
 	# Constrain height, width, and rows to sensible minimum/maximum values
 	# Return success if no-constrain, else return status from constrain
 	[ ! "$__constrain" ] || f_dialog_menu_constrain \
 		"$__var_height" "$__var_width" "$__var_rows" "$__prompt"
 }
 
 # f_dialog_checklist_size [-n] $var_height $var_width $var_rows \
 #                         $title $backtitle $prompt $hline \
 #                         $tag1 $item1 $status1 $tag2 $item2 $status2 ...
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--checklist' boxes sensibly.
 #
 # This function helps solve this issue by taking three sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height, width, and rows. The second set of arguments
 # are the title, backtitle, prompt, and hline. The [optional] third set of
 # arguments are the check list itself (comprised of tag/item/status triplets).
 # The optimal height, width, and rows for the described widget (not exceeding
 # the actual terminal height or width) is stored in $var_height, $var_width,
 # and $var_rows (respectively). 
 #
 # If the first argument is `-n', the calculated sizes ($var_height, $var_width,
 # and $var_rows) are not constrained to minimum/maximum values.
 #
 f_dialog_checklist_size()
 {
 	f_dialog_radiolist_size "$@"
 }
 
 # f_dialog_radiolist_with_help_size [-n] $var_height $var_width $var_rows \
 #                                   $title $backtitle $prompt $hline \
 #                                   $tag1 $item1 $status1 $help1 \
 #                                   $tag2 $item2 $status2 $help2 ...
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--radiolist' boxes sensibly.
 #
 # This function helps solve this issue by taking three sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height, width, and rows. The second set of arguments
 # are the title, backtitle, prompt, and hline. The [optional] third set of
 # arguments are the radio list itself (comprised of tag/item/status/help
 # quadruplets). The optimal height, width, and rows for the described widget
 # (not exceeding the actual terminal height or width) is stored in $var_height,
 # $var_width, and $var_rows (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height, $var_width,
 # and $var_rows) are not constrained to minimum/maximum values.
 #
 f_dialog_radiolist_with_help_size()
 {
 	local __constrain=1
 	[ "$1" = "-n" ] && __constrain= && shift 1 # -n
 	local __var_height="$1" __var_width="$2" __var_rows="$3"
 	local __title="$4" __btitle="$5" __prompt="$6" __hline="$7"
 	shift 7 # var_height/var_width/var_rows/title/btitle/prompt/hline
 
 	# Return unless at least one size aspect has been requested
 	[ "$__var_height" -o "$__var_width" -o "$__var_rows" ] ||
 		return $FAILURE
 
 	# Calculate height/width of infobox (adjusted/constrained below)
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	local __height_rlist_with_help_size __width_rlist_with_help_size
 	f_dialog_infobox_size -n \
 		"${__var_height:+__height_rlist_with_help_size}" \
 		"${__var_width:+__width_rlist_with_help_size}" \
 		"$__title" "$__btitle" "$__prompt" "$__hline"
 
 	#
 	# Always process the menu-item arguments to get the longest tag-length,
 	# longest item-length, longest help-length (help-length only considered
 	# if using Xdialog(1), as it places the help string in the widget) --
 	# all used to bump the width -- and the number of rows (used to bump
 	# the height).
 	#
 	local __longest_tag=0 __longest_item=0 __longest_help=0
 	local __rows_rlist_with_help_size=0
 	while [ $# -ge 4 ]; do
 		local __tag="$1" __item="$2" __status="$3" __help="$4"
 		shift 4 # tag/item/status/help
 		[ ${#__tag} -gt $__longest_tag ] && __longest_tag=${#__tag}
 		[ ${#__item} -gt $__longest_item ] && __longest_item=${#__item}
 		[ ${#__help} -gt $__longest_help ] && __longest_help=${#__help}
 		__rows_rlist_with_help_size=$((
 			$__rows_rlist_with_help_size + 1
 		))
 	done
 
 	# Adjust rows early (for up-coming height calculation)
 	if [ "$__var_height" -o "$__var_rows" ]; then
 		# Add a row for visual aid if using Xdialog(1)
 		[ "$USE_XDIALOG" ] &&
 			__rows_rlist_with_help_size=$((
 				$__rows_rlist_with_help_size + 1
 			))
 	fi
 
 	# Adjust height if desired
 	if [ "$__var_height" ]; then
 		# Add rows to height
 		if [ "$USE_XDIALOG" ]; then
 			__height_rlist_with_help_size=$((
 				$__height_rlist_with_help_size +
 				$__rows_rlist_with_help_size + 7
 			))
 		else
 			__height_rlist_with_help_size=$((
 				$__height_rlist_with_help_size +
 				$__rows_rlist_with_help_size + 4
 			))
 		fi
 		setvar "$__var_height" $__height
 	fi
 
 	# Adjust width if desired
 	if [ "$__var_width" ]; then
 		# Sum total between longest tag-length, longest item-length,
 		# and radio-button width should be used to bump menu width
 		local __n=$(( $__longest_tag + $__longest_item + 13 ))
 		[ "$USE_XDIALOG" ] && __n=$(( $__n + $__n / 6 )) # plus 16.6%
 		[ $__n -gt $__width_rlist_with_help_size ] &&
 			__width_rlist_with_help_size=$__n
 
 		# Update width for help text if using Xdialog(1)
 		if [ "$USE_XDIALOG" ]; then
 			__n=$(( $__longest_help + 10 ))
 			__n=$(( $__n + $__n / 6 )) # plus 16.6%
 			[ $__n -gt $__width_rlist_with_help_size ] &&
 				__width_rlist_with_help_size=$__n
 		fi
 
 		setvar "$__var_width" $__width_rlist_with_help_size
 	fi
 
 	# Store adjusted rows if desired
 	[ "$__var_rows" ] && setvar "$__var_rows" $__rows_rlist_with_help_size
 
 	# Constrain height, width, and rows to sensible minimum/maximum values
 	# Return success if no-constrain, else return status from constrain
 	[ ! "$__constrain" ] || f_dialog_menu_constrain \
 		"$__var_height" "$__var_width" "$__var_rows" "$__prompt"
 }
 
 # f_dialog_checklist_with_help_size [-n] $var_height $var_width $var_rows \
 #                                   $title $backtitle $prompt $hline \
 #                                   $tag1 $item1 $status1 $help1 \
 #                                   $tag2 $item2 $status2 $help2 ...
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--checklist' boxes sensibly.
 #
 # This function helps solve this issue by taking three sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height, width, and rows. The second set of arguments
 # are the title, backtitle, prompt, and hline. The [optional] third set of
 # arguments are the check list itself (comprised of tag/item/status/help
 # quadruplets). The optimal height, width, and rows for the described widget
 # (not exceeding the actual terminal height or width) is stored in $var_height,
 # $var_width, and $var_rows (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height, $var_width,
 # and $var_rows) are not constrained to minimum/maximum values.
 #
 f_dialog_checklist_with_help_size()
 {
 	f_dialog_radiolist_with_help_size "$@"
 }
 
 # f_dialog_calendar_size [-n] $var_height $var_width \
 #                        $title $backtitle $prompt [$hline]
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--calendar' boxes sensibly.
 #
 # This function helps solve this issue by taking two sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height and width. The second set of arguments are the
 # title, backtitle, prompt, and [optionally] hline. The optimal height and
 # width for the described widget (not exceeding the actual terminal height or
 # width) is stored in $var_height and $var_width (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height and
 # $var_width) are not constrained to minimum/maximum values.
 #
 # Newline character sequences (``\n'') in $prompt are expanded as-is done by
 # dialog(1).
 #
 f_dialog_calendar_size()
 {
 	local __constrain=1
 	[ "$1" = "-n" ] && __constrain= && shift 1 # -n
 	local __var_height="$1" __var_width="$2"
 	local __title="$3" __btitle="$4" __prompt="$5" __hline="$6"
 
 	# Return unless at least one size aspect has been requested
 	[ "$__var_height" -o "$__var_width" ] || return $FAILURE
 
 	#
 	# Obtain/Adjust minimum and maximum thresholds
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	#
 	local __max_height_cal_size __max_width_cal_size
 	f_dialog_max_size __max_height_cal_size __max_width_cal_size
 	__max_width_cal_size=$(( $__max_width_cal_size - 2 ))
 		# the calendar box will refuse to display if too wide
 	local __min_width
 	if [ "$USE_XDIALOG" ]; then
 		__min_width=55
 	else
 		__min_width=40
 		__max_height_cal_size=$((
 			$__max_height_cal_size - $DIALOG_CALENDAR_HEIGHT ))
 		# When using dialog(1), we can't predict whether the user has
 		# disabled shadow's in their `$HOME/.dialogrc' file, so we'll
 		# subtract one for the potential shadow around the widget
 		__max_height_cal_size=$(( $__max_height_cal_size - 1 ))
 	fi
 
 	# Calculate height if desired
 	if [ "$__var_height" ]; then
 		local __height
 		__height=$( echo "$__prompt" | f_number_of_lines )
 
 		if [ "$USE_XDIALOG" ]; then
 			# Add height to accomodate for embedded calendar widget
 			__height=$(( $__height + $DIALOG_CALENDAR_HEIGHT - 1 ))
 
 			# Also, bump height if backtitle is enabled
 			if [ "$__btitle" ]; then
 				local __n
 				__n=$( echo "$__btitle" | f_number_of_lines )
 				__height=$(( $__height + $__n + 2 ))
 			fi
 		else
 			[ "$__prompt" ] && __height=$(( $__height + 1 ))
 		fi
 
 		# Enforce maximum height, unless `-n' was passed
 		[ "$__constrain" -a $__height -gt $__max_height_cal_size ] &&
 			__height=$__max_height_cal_size
 
 		setvar "$__var_height" $__height
 	fi
 
 	# Calculate width if desired
 	if [ "$__var_width" ]; then
 		# NOTE: Function name appended to prevent __var_{height,width}
 		#       values from becoming local (and thus preventing setvar
 		#       from working).
 		local __width_cal_size
 		f_dialog_infobox_size -n "" __width_cal_size \
 			"$__title" "$__btitle" "$__prompt" "$__hline"
 
 		# Enforce minimum/maximum width, unless `-n' was passed
 		if [ "$__constrain" ]; then
 			if [ $__width_cal_size -lt $__min_width ]; then
 				__width_cal_size=$__min_width
 			elif [ $__width_cal_size -gt $__max_width_cal_size ]
 			then
 				__width_cal_size=$__max_width_size
 			fi
 		fi
 
 		setvar "$__var_width" $__width_cal_size
 	fi
 
 	return $SUCCESS
 }
 
 # f_dialog_timebox_size [-n] $var_height $var_width \
 #                       $title $backtitle $prompt [$hline]
 #
 # Not all versions of dialog(1) perform auto-sizing of the width and height of
 # `--timebox' boxes sensibly.
 #
 # This function helps solve this issue by taking two sets of sequential
 # arguments. The first set of arguments are the variable names to use when
 # storing the calculated height and width. The second set of arguments are the
 # title, backtitle, prompt, and [optionally] hline. The optional height and
 # width for the described widget (not exceeding the actual terminal height or
 # width) is stored in $var_height and $var_width (respectively).
 #
 # If the first argument is `-n', the calculated sizes ($var_height and
 # $var_width) are not constrained to minimum/maximum values.
 #
 # Newline character sequences (``\n'') in $prompt are expanded as-is done by
 # dialog(1).
 #
 f_dialog_timebox_size()
 {
 	local __constrain=1
 	[ "$1" = "-n" ] && __constrain= && shift 1 # -n
 	local __var_height="$1" __var_width="$2"
 	local __title="$3" __btitle="$4" __prompt="$5" __hline="$6"
 
 	# Return unless at least one size aspect has been requested
 	[ "$__var_height" -o "$__var_width" ] || return $FAILURE
 
 	#
 	# Obtain/Adjust minimum and maximum thresholds
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	#
 	local __max_height_tbox_size __max_width_tbox_size
 	f_dialog_max_size __max_height_tbox_size __max_width_tbox_size
 	__max_width_tbox_size=$(( $__max_width_tbox_size - 2 ))
 		# the timebox widget refuses to display if too wide
 	local __min_width
 	if [ "$USE_XDIALOG" ]; then
 		__min_width=40
 	else
 		__min_width=20
 		__max_height_tbox_size=$(( \
 			$__max_height_tbox_size - $DIALOG_TIMEBOX_HEIGHT ))
 		# When using dialog(1), we can't predict whether the user has
 		# disabled shadow's in their `$HOME/.dialogrc' file, so we'll
 		# subtract one for the potential shadow around the widget
 		__max_height_tbox_size=$(( $__max_height_tbox_size - 1 ))
 	fi
 
 	# Calculate height if desired
 	if [ "$__var_height" -a "$USE_XDIALOG" ]; then
 		# When using Xdialog(1), the height seems to have
 		# no effect. All values provide the same results.
 		setvar "$__var_height" 0 # autosize
 	elif [ "$__var_height" ]; then
 		local __height
 		__height=$( echo "$__prompt" | f_number_of_lines )
 		__height=$(( $__height ${__prompt:++1} + 1 ))
 
 		# Enforce maximum height, unless `-n' was passed
 		[ "$__constrain" -a $__height -gt $__max_height_tbox_size ] &&
 			__height=$__max_height_tbox_size
 
 		setvar "$__var_height" $__height
 	fi
 
 	# Calculate width if desired
 	if [ "$__var_width" ]; then
 		# NOTE: Function name appended to prevent __var_{height,width}
 		#       values from becoming local (and thus preventing setvar
 		#       from working).
 		local __width_tbox_size
 		f_dialog_infobox_size -n "" __width_tbox_size \
 			"$__title" "$__btitle" "$__prompt" "$__hline"
 
 		# Enforce the minimum width for displaying the timebox
 		if [ "$__constrain" ]; then
 			if [ $__width_tbox_size -lt $__min_width ]; then
 				__width_tbox_size=$__min_width
 			elif [ $__width_tbox_size -ge $__max_width_tbox_size ]
 			then
 				__width_tbox_size=$__max_width_tbox_size
 			fi
 		fi
 
 		setvar "$__var_width" $__width_tbox_size
 	fi
 
 	return $SUCCESS
 }
 
 ############################################################ CLEAR FUNCTIONS
 
 # f_dialog_clear
 #
 # Clears any/all previous dialog(1) displays.
 #
 f_dialog_clear()
 {
 	$DIALOG --clear
 }
 
 ############################################################ INFO FUNCTIONS
 
 # f_dialog_info $info_text ...
 #
 # Throw up a dialog(1) infobox. The infobox remains until another dialog is
 # displayed or `dialog --clear' (or f_dialog_clear) is called.
 #
 f_dialog_info()
 {
 	local info_text="$*" height width
 	f_dialog_infobox_size height width \
 		"$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$info_text"
 	$DIALOG \
 		--title "$DIALOG_TITLE"         \
 		--backtitle "$DIALOG_BACKTITLE" \
 		${USE_XDIALOG:+--ignore-eof}    \
 		${USE_XDIALOG:+--no-buttons}    \
 		--infobox "$info_text" $height $width
 }
 
 # f_xdialog_info $info_text ...
 #
 # Throw up an Xdialog(1) infobox and do not dismiss it until stdin produces
 # EOF. This implies that you must execute this either as an rvalue to a pipe,
 # lvalue to indirection or in a sub-shell that provides data on stdin.
 #
 # To open an Xdialog(1) infobox that does not disappear until expeclitly dis-
 # missed, use the following:
 #
 # 	f_xdialog_info "$info_text" < /dev/tty &
 # 	pid=$!
 # 	# Perform some lengthy actions
 # 	kill $pid
 #
 # NB: Check $USE_XDIALOG if you need to support both dialog(1) and Xdialog(1).
 #
 f_xdialog_info()
 {
 	local info_text="$*" height width
 	f_dialog_infobox_size height width \
 		"$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$info_text"
 	exec $DIALOG \
 		--title "$DIALOG_TITLE"               \
 		--backtitle "$DIALOG_BACKTITLE"       \
 		--no-close --no-buttons               \
 		--infobox "$info_text" $height $width \
 		-1 # timeout of -1 means abort when EOF on stdin
 }
 
 ############################################################ PAUSE FUNCTIONS
 
 # f_dialog_pause $msg_text $duration [$hline]
 #
 # Display a message in a widget with a progress bar that runs backward for
 # $duration seconds.
 #
 f_dialog_pause()
 {
 	local pause_text="$1" duration="$2" hline="$3" height width
 	f_isinteger "$duration" || return $FAILURE
 	f_dialog_buttonbox_size height width \
 		"$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$pause_text" "$hline"
 	if [ "$USE_XDIALOG" ]; then
 		$DIALOG \
 			--title "$DIALOG_TITLE"         \
 			--backtitle "$DIALOG_BACKTITLE" \
 			--ok-label "$msg_skip"          \
 			--cancel-label "$msg_cancel"    \
 			${noCancel:+--no-cancel}        \
 			--timeout "$duration"           \
 			--yesno "$pause_text"           \
 			$height $width
 	else
 		[ $duration -gt 0 ] && duration=$(( $duration - 1 ))
-		[ $duration -gt 1 ] && duration=$(( $duration - 1 ))
 		height=$(( $height + 3 )) # Add height for progress bar
 		$DIALOG \
 			--title "$DIALOG_TITLE"         \
 			--backtitle "$DIALOG_BACKTITLE" \
 			--hline "$hline"                \
 			--ok-label "$msg_skip"          \
 			--cancel-label "$msg_cancel"    \
 			${noCancel:+--no-cancel}        \
 			--pause "$pause_text"           \
 			$height $width "$duration"
 	fi
 }
 
 # f_dialog_pause_no_cancel $msg_text $duration [$hline]
 #
 # Display a message in a widget with a progress bar that runs backward for
 # $duration seconds. No cancel button is provided. Always returns success.
 #
 f_dialog_pause_no_cancel()
 {
 	noCancel=1 f_dialog_pause "$@"
 	return $SUCCESS
 }
 
 ############################################################ MSGBOX FUNCTIONS
 
 # f_dialog_msgbox $msg_text [$hline]
 #
 # Throw up a dialog(1) msgbox. The msgbox remains until the user presses ENTER
 # or ESC, acknowledging the modal dialog.
 #
 # If the user presses ENTER, the exit status is zero (success), otherwise if
 # the user presses ESC the exit status is 255.
 #
 f_dialog_msgbox()
 {
 	local msg_text="$1" hline="$2" height width
 	f_dialog_buttonbox_size height width \
 		"$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$msg_text" "$hline"
 	$DIALOG \
 		--title "$DIALOG_TITLE"         \
 		--backtitle "$DIALOG_BACKTITLE" \
 		--hline "$hline"                \
 		--ok-label "$msg_ok"            \
 		--msgbox "$msg_text" $height $width
 }
 
 ############################################################ TEXTBOX FUNCTIONS
 
 # f_dialog_textbox $file
 #
 # Display the contents of $file (or an error if $file does not exist, etc.) in
 # a dialog(1) textbox (which has a scrollable region for the text). The textbox
 # remains until the user presses ENTER or ESC, acknowledging the modal dialog.
 #
 # If the user presses ENTER, the exit status is zero (success), otherwise if
 # the user presses ESC the exit status is 255.
 #
 f_dialog_textbox()
 {
 	local file="$1"
 	local contents height width retval
 
 	contents=$( cat "$file" 2>&1 )
 	retval=$?
 
 	f_dialog_buttonbox_size height width \
 		"$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$contents"
 
 	if [ $retval -eq $SUCCESS ]; then
 		$DIALOG \
 			--title "$DIALOG_TITLE"         \
 			--backtitle "$DIALOG_BACKTITLE" \
 			--exit-label "$msg_ok"          \
 			--no-cancel                     \
 			--textbox "$file" $height $width
 	else
 		$DIALOG \
 			--title "$DIALOG_TITLE"         \
 			--backtitle "$DIALOG_BACKTITLE" \
 			--ok-label "$msg_ok"            \
 			--msgbox "$contents" $height $width
 	fi
 }
 
 ############################################################ YESNO FUNCTIONS
 
 # f_dialog_yesno $msg_text [$hline]
 #
 # Display a dialog(1) Yes/No prompt to allow the user to make some decision.
 # The yesno prompt remains until the user presses ENTER or ESC, acknowledging
 # the modal dialog.
 #
 # If the user chooses YES the exit status is zero, or chooses NO the exit
 # status is one, or presses ESC the exit status is 255.
 #
 f_dialog_yesno()
 {
 	local msg_text="$1" height width
 	local hline="${2-$hline_arrows_tab_enter}"
 
 	f_interactive || return 0 # If non-interactive, return YES all the time
 
 	f_dialog_buttonbox_size height width \
 		"$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$msg_text" "$hline"
 
 	if [ "$USE_XDIALOG" ]; then
 		$DIALOG \
 			--title "$DIALOG_TITLE"         \
 			--backtitle "$DIALOG_BACKTITLE" \
 			--hline "$hline"                \
 			--ok-label "$msg_yes"           \
 			--cancel-label "$msg_no"        \
 			--yesno "$msg_text" $height $width
 	else
 		$DIALOG \
 			--title "$DIALOG_TITLE"         \
 			--backtitle "$DIALOG_BACKTITLE" \
 			--hline "$hline"                \
 			--yes-label "$msg_yes"          \
 			--no-label "$msg_no"            \
 			--yesno "$msg_text" $height $width
 	fi
 }
 
 # f_dialog_noyes $msg_text [$hline]
 #
 # Display a dialog(1) No/Yes prompt to allow the user to make some decision.
 # The noyes prompt remains until the user presses ENTER or ESC, acknowledging
 # the modal dialog.
 #
 # If the user chooses YES the exit status is zero, or chooses NO the exit
 # status is one, or presses ESC the exit status is 255.
 #
 # NOTE: This is just like the f_dialog_yesno function except "No" is default.
 #
 f_dialog_noyes()
 {
 	local msg_text="$1" height width
 	local hline="${2-$hline_arrows_tab_enter}"
 
 	f_interactive || return 1 # If non-interactive, return NO all the time
 
 	f_dialog_buttonbox_size height width \
 		"$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$msg_text" "$hline"
 
 	if [ "$USE_XDIALOG" ]; then
 		$DIALOG \
 			--title "$DIALOG_TITLE"         \
 			--backtitle "$DIALOG_BACKTITLE" \
 			--hline "$hline"                \
 			--default-no                    \
 			--ok-label "$msg_yes"           \
 			--cancel-label "$msg_no"        \
 			--yesno "$msg_text" $height $width
 	else
 		$DIALOG \
 			--title "$DIALOG_TITLE"         \
 			--backtitle "$DIALOG_BACKTITLE" \
 			--hline "$hline"                \
 			--defaultno                     \
 			--yes-label "$msg_yes"          \
 			--no-label "$msg_no"            \
 			--yesno "$msg_text" $height $width
 	fi
 }
 
 ############################################################ INPUT FUNCTIONS
 
 # f_dialog_inputstr_store [-s] $text
 #
 # Store some text from a dialog(1) inputbox to be retrieved later by
 # f_dialog_inputstr_fetch(). If the first argument is `-s', the text is
 # sanitized before being stored.
 #
 f_dialog_inputstr_store()
 {
 	local sanitize=
 	[ "$1" = "-s" ] && sanitize=1 && shift 1 # -s
 	local text="$1"
 
 	# Sanitize the line before storing it if desired
 	[ "$sanitize" ] && f_dialog_line_sanitize text
 
 	setvar DIALOG_INPUTBOX_$$ "$text"
 }
 
 # f_dialog_inputstr_fetch [$var_to_set]
 #
 # Obtain the inputstr entered by the user from the most recently displayed
 # dialog(1) inputbox (previously stored with f_dialog_inputstr_store() above).
 # If $var_to_set is NULL or missing, output is printed to stdout (which is less
 # recommended due to performance degradation; in a loop for example).
 #
 f_dialog_inputstr_fetch()
 {
 	local __var_to_set="$1" __cp
 
 	debug= f_getvar DIALOG_INPUTBOX_$$ "${__var_to_set:-__cp}" # get data
 	setvar DIALOG_INPUTBOX_$$ "" # scrub memory in case data was sensitive
 
 	# Return the line on standard-out if desired
 	[ "$__var_to_set" ] || echo "$__cp"
 
 	return $SUCCESS
 }
 
 # f_dialog_input $var_to_set $prompt [$init [$hline]]
 #
 # Prompt the user with a dialog(1) inputbox to enter some value. The inputbox
 # remains until the the user presses ENTER or ESC, or otherwise ends the
 # editing session (by selecting `Cancel' for example).
 #
 # If the user presses ENTER, the exit status is zero (success), otherwise if
 # the user presses ESC the exit status is 255, or if the user chose Cancel, the
 # exit status is instead 1.
 #
 # NOTE: The hline should correspond to the type of data you want from the user.
 # NOTE: Should not be used to edit multiline values.
 #
 f_dialog_input()
 {
 	local __var_to_set="$1" __prompt="$2" __init="$3" __hline="$4"
 
 	# NOTE: Function name appended to prevent __var_{height,width} values
 	#       from becoming local (and thus preventing setvar from working).
 	local __height_input __width_input
 	f_dialog_inputbox_size __height_input __width_input \
 		"$DIALOG_TITLE" "$DIALOG_BACKTITLE" \
 		"$__prompt" "$__init" "$__hline"
 
 	local __opterm="--"
 	[ "$USE_XDIALOG" ] && __opterm=
 
 	local __dialog_input
 	__dialog_input=$(
 		$DIALOG \
 			--title "$DIALOG_TITLE"         \
 			--backtitle "$DIALOG_BACKTITLE" \
 			--hline "$__hline"              \
 			--ok-label "$msg_ok"            \
 			--cancel-label "$msg_cancel"    \
 			--inputbox "$__prompt"          \
 			$__height_input $__width_input  \
 			$__opterm "$__init"             \
 			2>&1 >&$DIALOG_TERMINAL_PASSTHRU_FD
 	)
 	local __retval=$?
 
 	# Remove warnings and leading/trailing whitespace from user input
 	f_dialog_line_sanitize __dialog_input
 
 	setvar "$__var_to_set" "$__dialog_input"
 	return $__retval
 }
 
 ############################################################ MENU FUNCTIONS
 
 # f_dialog_menutag_store [-s] $text
 #
 # Store some text from a dialog(1) menu to be retrieved later by
 # f_dialog_menutag_fetch(). If the first argument is `-s', the text is
 # sanitized before being stored.
 #
 f_dialog_menutag_store()
 {
 	local sanitize=
 	[ "$1" = "-s" ] && sanitize=1 && shift 1 # -s
 	local text="$1"
 
 	# Sanitize the menutag before storing it if desired
 	[ "$sanitize" ] && f_dialog_data_sanitize text
 
 	setvar DIALOG_MENU_$$ "$text"
 }
 
 # f_dialog_menutag_fetch [$var_to_set]
 #
 # Obtain the menutag chosen by the user from the most recently displayed
 # dialog(1) menu (previously stored with f_dialog_menutag_store() above). If
 # $var_to_set is NULL or missing, output is printed to stdout (which is less
 # recommended due to performance degradation; in a loop for example).
 #
 f_dialog_menutag_fetch()
 {
 	local __var_to_set="$1" __cp
 
 	debug= f_getvar DIALOG_MENU_$$ "${__var_to_set:-__cp}" # get the data
 	setvar DIALOG_MENU_$$ "" # scrub memory in case data was sensitive
 
 	# Return the data on standard-out if desired
 	[ "$__var_to_set" ] || echo "$__cp"
 
 	return $SUCCESS
 }
 
 # f_dialog_menuitem_store [-s] $text
 #
 # Store the item from a dialog(1) menu (see f_dialog_menutag2item()) to be
 # retrieved later by f_dialog_menuitem_fetch(). If the first argument is `-s',
 # the text is sanitized before being stored.
 #
 f_dialog_menuitem_store()
 {
 	local sanitize=
 	[ "$1" = "-s" ] && sanitize=1 && shift 1 # -s
 	local text="$1"
 
 	# Sanitize the menuitem before storing it if desired
 	[ "$sanitize" ] && f_dialog_data_sanitize text
 
 	setvar DIALOG_MENUITEM_$$ "$text"
 }
 
 # f_dialog_menuitem_fetch [$var_to_set]
 #
 # Obtain the menuitem chosen by the user from the most recently displayed
 # dialog(1) menu (previously stored with f_dialog_menuitem_store() above). If
 # $var_to_set is NULL or missing, output is printed to stdout (which is less
 # recommended due to performance degradation; in a loop for example).
 #
 f_dialog_menuitem_fetch()
 {
 	local __var_to_set="$1" __cp
 
 	debug= f_getvar DIALOG_MENUITEM_$$ "${__var_to_set:-__cp}" # get data
 	setvar DIALOG_MENUITEM_$$ "" # scrub memory in case data was sensitive
 
 	# Return the data on standard-out if desired
 	[ "$__var_to_set" ] || echo "$__cp"
 
 	return $SUCCESS
 }
 
 # f_dialog_default_store [-s] $text
 #
 # Store some text to be used later as the --default-item argument to dialog(1)
 # (or Xdialog(1)) for --menu, --checklist, and --radiolist widgets. Retrieve
 # the text later with f_dialog_menutag_fetch(). If the first argument is `-s',
 # the text is sanitized before being stored.
 #
 f_dialog_default_store()
 {
 	local sanitize=
 	[ "$1" = "-s" ] && sanitize=1 && shift 1 # -s
 	local text="$1"
 
 	# Sanitize the defaulitem before storing it if desired
 	[ "$sanitize" ] && f_dialog_data_sanitize text
 
 	setvar DEFAULTITEM_$$ "$text"
 }
 
 # f_dialog_default_fetch [$var_to_set]
 #
 # Obtain text to be used with the --default-item argument of dialog(1) (or
 # Xdialog(1)) (previously stored with f_dialog_default_store() above). If
 # $var_to_set is NULL or missing, output is printed to stdout (which is less
 # recommended due to performance degradation; in a loop for example).
 #
 f_dialog_default_fetch()
 {
 	local __var_to_set="$1" __cp
 
 	debug= f_getvar DEFAULTITEM_$$ "${__var_to_set:-__cp}" # get the data
 	setvar DEFAULTITEM_$$ "" # scrub memory in case data was sensitive
 
 	# Return the data on standard-out if desired
 	[ "$__var_to_set" ] || echo "$__cp"
 
 	return $SUCCESS
 }
 
 # f_dialog_menutag2item $tag_chosen $tag1 $item1 $tag2 $item2 ...
 #
 # To use the `--menu' option of dialog(1) you must pass an ordered list of
 # tag/item pairs on the command-line. When the user selects a menu option the
 # tag for that item is printed to stderr.
 #
 # This function allows you to dereference the tag chosen by the user back into
 # the item associated with said tag.
 #
 # Pass the tag chosen by the user as the first argument, followed by the
 # ordered list of tag/item pairs (HINT: use the same tag/item list as was
 # passed to dialog(1) for consistency).
 #
 # If the tag cannot be found, NULL is returned.
 #
 f_dialog_menutag2item()
 {
 	local tag="$1" tagn item
 	shift 1 # tag
 
 	while [ $# -gt 0 ]; do
 		tagn="$1"
 		item="$2"
 		shift 2 # tagn/item
 
 		if [ "$tag" = "$tagn" ]; then
 			echo "$item"
 			return $SUCCESS
 		fi
 	done
 	return $FAILURE
 }
 
 # f_dialog_menutag2item_with_help $tag_chosen $tag1 $item1 $help1 \
 #                                             $tag2 $item2 $help2 ...
 #
 # To use the `--menu' option of dialog(1) with the `--item-help' option, you
 # must pass an ordered list of tag/item/help triplets on the command-line. When
 # the user selects a menu option the tag for that item is printed to stderr.
 #
 # This function allows you to dereference the tag chosen by the user back into
 # the item associated with said tag (help is discarded/ignored).
 #
 # Pass the tag chosen by the user as the first argument, followed by the
 # ordered list of tag/item/help triplets (HINT: use the same tag/item/help list
 # as was passed to dialog(1) for consistency).
 #
 # If the tag cannot be found, NULL is returned.
 #
 f_dialog_menutag2item_with_help()
 {
 	local tag="$1" tagn item
 	shift 1 # tag
 
 	while [ $# -gt 0 ]; do
 		tagn="$1"
 		item="$2"
 		shift 3 # tagn/item/help
 
 		if [ "$tag" = "$tagn" ]; then
 			echo "$item"
 			return $SUCCESS
 		fi
 	done
 	return $FAILURE
 }
 
 # f_dialog_menutag2index $tag_chosen $tag1 $item1 $tag2 $item2 ...
 #
 # To use the `--menu' option of dialog(1) you must pass an ordered list of
 # tag/item pairs on the command-line. When the user selects a menu option the
 # tag for that item is printed to stderr.
 #
 # This function allows you to dereference the tag chosen by the user back into
 # the index associated with said tag. The index is the one-based tag/item pair
 # array position within the ordered list of tag/item pairs passed to dialog(1).
 #
 # Pass the tag chosen by the user as the first argument, followed by the
 # ordered list of tag/item pairs (HINT: use the same tag/item list as was
 # passed to dialog(1) for consistency).
 #
 # If the tag cannot be found, NULL is returned.
 #
 f_dialog_menutag2index()
 {
 	local tag="$1" tagn n=1
 	shift 1 # tag
 
 	while [ $# -gt 0 ]; do
 		tagn="$1"
 		shift 2 # tagn/item
 
 		if [ "$tag" = "$tagn" ]; then
 			echo $n
 			return $SUCCESS
 		fi
 		n=$(( $n + 1 ))
 	done
 	return $FAILURE
 }
 
 # f_dialog_menutag2index_with_help $tag_chosen $tag1 $item1 $help1 \
 #                                              $tag2 $item2 $help2 ...
 #
 # To use the `--menu' option of dialog(1) with the `--item-help' option, you
 # must pass an ordered list of tag/item/help triplets on the command-line. When
 # the user selects a menu option the tag for that item is printed to stderr.
 #
 # This function allows you to dereference the tag chosen by the user back into
 # the index associated with said tag. The index is the one-based tag/item/help
 # triplet array position within the ordered list of tag/item/help triplets
 # passed to dialog(1).
 #
 # Pass the tag chosen by the user as the first argument, followed by the
 # ordered list of tag/item/help triplets (HINT: use the same tag/item/help list
 # as was passed to dialog(1) for consistency).
 #
 # If the tag cannot be found, NULL is returned.
 #
 f_dialog_menutag2index_with_help()
 {
 	local tag="$1" tagn n=1
 	shift 1 # tag
 
 	while [ $# -gt 0 ]; do
 		tagn="$1"
 		shift 3 # tagn/item/help
 
 		if [ "$tag" = "$tagn" ]; then
 			echo $n
 			return $SUCCESS
 		fi
 		n=$(( $n + 1 ))
 	done
 	return $FAILURE
 }
 
 # f_dialog_menutag2help $tag_chosen $tag1 $item1 $help1 $tag2 $item2 $help2 ...
 #
 # To use the `--menu' option of dialog(1) with the `--item-help' option, you
 # must pass an ordered list of tag/item/help triplets on the command-line. When
 # the user selects a menu option the tag for that item is printed to stderr.
 #
 # This function allows you to dereference the tag chosen by the user back into
 # the help associated with said tag (item is discarded/ignored).
 #
 # Pass the tag chosen by the user as the first argument, followed by the
 # ordered list of tag/item/help triplets (HINT: use the same tag/item/help list
 # as was passed to dialog(1) for consistency).
 #
 # If the tag cannot be found, NULL is returned.
 #
 f_dialog_menutag2help()
 {
 	local tag="$1" tagn help
 	shift 1 # tag
 
 	while [ $# -gt 0 ]; do
 		tagn="$1"
 		help="$3"
 		shift 3 # tagn/item/help
 
 		if [ "$tag" = "$tagn" ]; then
 			echo "$help"
 			return $SUCCESS
 		fi
 	done
 	return $FAILURE
 }
 
 ############################################################ INIT FUNCTIONS
 
 # f_dialog_init
 #
 # Initialize (or re-initialize) the dialog module after setting/changing any
 # of the following environment variables:
 #
 # 	USE_XDIALOG   Either NULL or Non-NULL. If given a value will indicate
 # 	              that Xdialog(1) should be used instead of dialog(1).
 #
 # 	SECURE        Either NULL or Non-NULL. If given a value will indicate
 # 	              that (while running as root) sudo(8) authentication is
 # 	              required to proceed.
 #
 # Also reads ~/.dialogrc for the following information:
 #
 # 	NO_SHADOW     Either NULL or Non-NULL. If use_shadow is OFF (case-
 # 	              insensitive) in ~/.dialogrc this is set to "1" (otherwise
 # 	              unset).
 #
 f_dialog_init()
 {
 	local funcname=f_dialog_init
 
 	DIALOG_SELF_INITIALIZE=
 	USE_DIALOG=1
 
 	#
 	# Clone terminal stdout so we can redirect to it from within sub-shells
 	#
 	eval exec $DIALOG_TERMINAL_PASSTHRU_FD\>\&1
 
 	#
 	# Add `-S' and `-X' to the list of standard arguments supported by all
 	#
 	case "$GETOPTS_STDARGS" in
 	*SX*) : good ;; # already present
 	   *) GETOPTS_STDARGS="${GETOPTS_STDARGS}SX"
 	esac
 
 	#
 	# Process stored command-line arguments
 	#
 	# NB: Using backticks instead of $(...) for portability since Linux
 	#     bash(1) balks at the right parentheses encountered in the case-
 	#     statement (incorrectly interpreting it as the close of $(...)).
 	#
 	f_dprintf "f_dialog_init: ARGV=[%s] GETOPTS_STDARGS=[%s]" \
 	          "$ARGV" "$GETOPTS_STDARGS"
 	SECURE=`set -- $ARGV
 		OPTIND=1
 		while getopts \
 			"$GETOPTS_STDARGS$GETOPTS_EXTRA$GETOPTS_ALLFLAGS" \
 		flag > /dev/null; do
 			case "$flag" in
 			S) echo 1 ;;
 			esac
 		done
 	` # END-BACKTICK
 	USE_XDIALOG=`set -- $ARGV
 		OPTIND=1
 		while getopts \
 			"$GETOPTS_STDARGS$GETOPTS_EXTRA$GETOPTS_ALLFLAGS" \
 		flag > /dev/null; do
 			case "$flag" in
 			S|X) echo 1 ;;
 			esac
 		done
 	` # END-BACKTICK
 	f_dprintf "f_dialog_init: SECURE=[%s] USE_XDIALOG=[%s]" \
 	          "$SECURE" "$USE_XDIALOG"
 
 	#
 	# Process `-X' command-line option
 	#
 	[ "$USE_XDIALOG" ] && DIALOG=Xdialog USE_DIALOG=
 
 	#
 	# Sanity check, or die gracefully
 	#
 	if ! f_have $DIALOG; then
 		unset USE_XDIALOG
 		local failed_dialog="$DIALOG"
 		DIALOG=dialog
 		f_die 1 "$msg_no_such_file_or_directory" "$pgm" "$failed_dialog"
 	fi
 
 	#
 	# Read ~/.dialogrc (unless using Xdialog(1)) for properties
 	#
 	if [ -f ~/.dialogrc -a ! "$USE_XDIALOG" ]; then
 		eval "$(
 			awk -v param=use_shadow -v expect=OFF \
 			    -v set="NO_SHADOW=1" '
 			!/^[[:space:]]*(#|$)/ && \
 			tolower($1) ~ "^"param"(=|$)" && \
 			/[^#]*=/ {
 				sub(/^[^=]*=[[:space:]]*/, "")
 				if ( toupper($1) == expect ) print set";"
 			}' ~/.dialogrc
 		)"
 	fi
 
 	#
 	# If we're already running as root but we got there by way of sudo(8)
 	# and we have X11, we should merge the xauth(1) credentials from our
 	# original user.
 	#
 	if [ "$USE_XDIALOG" ] &&
 	   [ "$( id -u )" = "0" ] &&
 	   [ "$SUDO_USER" -a "$DISPLAY" ]
 	then
 		if ! f_have xauth; then
 			# Die gracefully, as we [likely] can't use Xdialog(1)
 			unset USE_XDIALOG
 			DIALOG=dialog
 			f_die 1 "$msg_no_such_file_or_directory" "$pgm" "xauth"
 		fi
 		HOSTNAME=$( hostname )
 		local displaynum="${DISPLAY#*:}"
 		eval xauth -if \~$SUDO_USER/.Xauthority extract - \
 			\"\$HOSTNAME/unix:\$displaynum\" \
 			\"\$HOSTNAME:\$displaynum\" | sudo sh -c 'xauth -ivf \
 			~root/.Xauthority merge - > /dev/null 2>&1'
 	fi
 
 	#
 	# Probe Xdialog(1) for maximum height/width constraints, or die
 	# gracefully
 	#
 	if [ "$USE_XDIALOG" ]; then
 		local maxsize
 		if ! f_eval_catch -dk maxsize $funcname "$DIALOG" \
 			'LANG= LC_ALL= %s --print-maxsize' "$DIALOG"
 		then
 			# Xdialog(1) failed, fall back to dialog(1)
 			unset USE_XDIALOG
 
 			# Display the error message produced by Xdialog(1)
 			local height width
 			f_dialog_buttonbox_size height width \
 				"$DIALOG_TITLE" "$DIALOG_BACKTITLE" "$maxsize"
 			dialog \
 				--title "$DIALOG_TITLE"         \
 				--backtitle "$DIALOG_BACKTITLE" \
 				--ok-label "$msg_ok"            \
 				--msgbox "$maxsize" $height $width
 			exit $FAILURE
 		fi
 
 		XDIALOG_MAXSIZE=$(
 			set -- ${maxsize##*:}
 
 			height=${1%,}
 			width=$2
 
 			echo $height $width
 		)
 	fi
 
 	#
 	# If using Xdialog(1), swap DIALOG_TITLE with DIALOG_BACKTITLE.
 	# The reason for this is because many dialog(1) applications use
 	# --backtitle for the program name (which is better suited as
 	# --title with Xdialog(1)).
 	#
 	if [ "$USE_XDIALOG" ]; then
 		local _DIALOG_TITLE="$DIALOG_TITLE"
 		DIALOG_TITLE="$DIALOG_BACKTITLE"
 		DIALOG_BACKTITLE="$_DIALOG_TITLE"
 	fi
 
 	f_dprintf "f_dialog_init: dialog(1) API initialized."
 }
 
 ############################################################ MAIN
 
 #
 # Self-initialize unless requested otherwise
 #
 f_dprintf "%s: DIALOG_SELF_INITIALIZE=[%s]" \
           dialog.subr "$DIALOG_SELF_INITIALIZE"
 case "$DIALOG_SELF_INITIALIZE" in
 ""|0|[Nn][Oo]|[Oo][Ff][Ff]|[Ff][Aa][Ll][Ss][Ee]) : do nothing ;;
 *) f_dialog_init
 esac
 
 f_dprintf "%s: Successfully loaded." dialog.subr
 
 fi # ! $_DIALOG_SUBR
Index: projects/clang380-import/usr.sbin/ntp/scripts/mkver
===================================================================
--- projects/clang380-import/usr.sbin/ntp/scripts/mkver	(revision 293686)
+++ projects/clang380-import/usr.sbin/ntp/scripts/mkver	(revision 293687)
@@ -1,44 +1,44 @@
 #!/bin/sh
 #
 # $FreeBSD$
 #
 PROG=${1-UNKNOWN}
 
 ConfStr="$PROG"
 
-ConfStr="$ConfStr 4.2.8p4"
+ConfStr="$ConfStr 4.2.8p5"
 
 case "$CSET" in
  '') ;;
  *) ConfStr="$ConfStr@$CSET" ;;
 esac
 
 case "" in
  '')
     case "1" in
      '') ;;
      *) ConfStr="${ConfStr}-a" ;;
     esac
     ;;
  *)  ConfStr="${ConfStr}-r" ;;
 esac
 
 if [ ! -f .version ]; then
   echo 0 > .version
 fi
 RUN="`cat .version`"
 RUN="`expr $RUN + 1`"
 echo $RUN > .version
 
 ConfStr="$ConfStr (${RUN})"
 
 echo "Version <${ConfStr}>";
 
 rm -f version.c
 cat > version.c << -EoF-
 /*
  * version file for $PROG
  */
 #include <config.h>
 const char * Version = "${ConfStr}";
 -EoF-
Index: projects/clang380-import
===================================================================
--- projects/clang380-import	(revision 293686)
+++ projects/clang380-import	(revision 293687)

Property changes on: projects/clang380-import
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r293430-293685